Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 22 additions & 0 deletions patterns/core/encoding.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
category: encoding
default_severity: HIGH
patterns:
- id: PI040
name: unicode-rtl-override
pattern: "\\x{202E}|\\x{202D}|\\x{202C}|\\x{200F}|\\x{200E}"
description: "Unicode direction override character — can hide text direction"
remediation: "Remove Unicode direction override characters. These make text visually misleading."
tags: [encoding, unicode]
- id: PI041
name: zero-width-chars
pattern: "\\x{200B}|\\x{FEFF}|\\x{200C}|\\x{200D}"
description: "Zero-width characters — can hide instructions invisible to humans"
remediation: "Remove zero-width characters. These can hide instructions that LLMs still process."
tags: [encoding, unicode]
- id: PI042
name: zero-width-sequence
pattern: "[\\x{200B}\\x{200C}\\x{200D}\\x{FEFF}]{3,}"
severity: CRITICAL
description: "Sequence of zero-width characters — likely encoded hidden instruction"
remediation: "Remove zero-width character sequence. This is almost certainly an injection attempt."
tags: [encoding, unicode, steganography]
40 changes: 40 additions & 0 deletions patterns/core/exfiltration.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
category: exfiltration
default_severity: CRITICAL
patterns:
- id: PI020
name: send-to-url
pattern: "send\\s+(the\\s+following|this|it|data|response)\\s+to\\s+https?://"
description: "Data exfiltration to external URL"
remediation: "Remove data sending instruction. Data should only flow through authorized channels."
tags: [exfiltration, url]
- id: PI021
name: post-system-prompt
pattern: "POST\\s+your\\s+system\\s+prompt"
description: "System prompt exfiltration attempt"
remediation: "Remove system prompt extraction instruction."
tags: [exfiltration]
- id: PI022
name: output-instructions
pattern: "output\\s+your\\s+(full\\s+)?instructions"
description: "Instruction extraction attempt"
remediation: "Remove instruction extraction request."
tags: [exfiltration]
- id: PI023
name: repeat-system-message
pattern: "repeat\\s+your\\s+system\\s+message"
description: "System message extraction"
remediation: "Remove system message extraction request."
tags: [exfiltration]
- id: PI024
name: original-instructions
pattern: "what\\s+were\\s+your\\s+original\\s+instructions"
description: "Original instruction probing"
remediation: "Remove instruction probing."
tags: [exfiltration]
- id: PI025
name: fetch-url
pattern: "fetch\\s+https?://[^\\s]+"
severity: HIGH
description: "External URL fetch request"
remediation: "Review URL fetch — may be legitimate or exfiltration vector."
tags: [exfiltration, url]
34 changes: 34 additions & 0 deletions patterns/core/instruction-injection.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
category: instruction_injection
default_severity: HIGH
patterns:
- id: PI010
name: new-task-injection
pattern: "(?i)^\\s*new\\s+task\\s*:"
description: "Injected task instruction"
remediation: "Remove injected task. Legitimate tasks should use proper skill format."
tags: [injection]
- id: PI011
name: system-override
pattern: "(?i)\\[?SYSTEM\\s*(OVERRIDE|PROMPT|MESSAGE)\\]?"
severity: CRITICAL
description: "System-level instruction injection"
remediation: "Remove system override. Only actual system prompts should use SYSTEM prefix."
tags: [injection]
- id: PI012
name: hidden-html-instruction
pattern: "<!--\\s*(HIDDEN|SECRET|INJECT)"
description: "Hidden instruction in HTML comment"
remediation: "Remove hidden instructions from HTML comments."
tags: [injection, html]
- id: PI013
name: real-purpose
pattern: "your\\s+real\\s+purpose\\s+is"
description: "Purpose redefinition attempt"
remediation: "Remove purpose redefinition."
tags: [injection]
- id: PI014
name: developer-wants
pattern: "the\\s+developer\\s+wants\\s+you\\s+to"
description: "Authority impersonation via developer claim"
remediation: "Remove false developer attribution."
tags: [injection, impersonation]
58 changes: 58 additions & 0 deletions patterns/core/jailbreak.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
category: jailbreak
default_severity: HIGH
patterns:
- id: PI030
name: developer-mode
pattern: "developer\\s+mode\\s+(enabled|activated|on)"
description: "Developer mode jailbreak"
remediation: "Remove developer mode activation."
tags: [jailbreak]
- id: PI031
name: dan-mode
pattern: "(?i)\\bDAN\\s+mode\\b"
description: "DAN (Do Anything Now) jailbreak"
remediation: "Remove DAN mode reference."
tags: [jailbreak]
- id: PI032
name: no-content-policy
pattern: "you\\s+have\\s+no\\s+content\\s+policy"
description: "Content policy removal attempt"
remediation: "Remove content policy override."
tags: [jailbreak]
- id: PI033
name: no-restrictions
pattern: "pretend\\s+you\\s+have\\s+no\\s+restrictions"
description: "Restriction removal via pretense"
remediation: "Remove restriction bypass."
tags: [jailbreak]
- id: PI034
name: hypothetical-scenario
pattern: "in\\s+this\\s+hypothetical\\s+scenario\\s+where\\s+you\\s+can"
description: "Hypothetical scenario jailbreak"
remediation: "Remove hypothetical bypass framing."
tags: [jailbreak]
- id: PI035
name: jailbreak-prompt
pattern: "(?i)\\bjailbreak\\s+prompt\\b"
severity: CRITICAL
description: "Explicit jailbreak prompt reference"
remediation: "Remove jailbreak prompt."
tags: [jailbreak]
- id: PI036
name: override-safety
pattern: "override\\s+your\\s+safety"
description: "Safety override attempt"
remediation: "Remove safety override instruction."
tags: [jailbreak]
- id: PI037
name: bypass-content-filter
pattern: "bypass\\s+(the\\s+)?content\\s+filter"
description: "Content filter bypass attempt"
remediation: "Remove content filter bypass."
tags: [jailbreak]
- id: PI038
name: unlimited-mode
pattern: "(?i)unlimited\\s+mode"
description: "Unrestricted mode activation"
remediation: "Remove unlimited mode activation."
tags: [jailbreak]
46 changes: 46 additions & 0 deletions patterns/core/role-override.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
category: role_override
default_severity: CRITICAL
patterns:
- id: PI001
name: ignore-previous-instructions
pattern: "ignore\\s+(all\\s+)?previous\\s+instructions"
description: "Attempts to override agent instructions"
remediation: "Remove instruction override text. If documenting attacks, use code blocks."
tags: [role-override]
- id: PI002
name: ignore-prior-context
pattern: "ignore\\s+(all\\s+)?prior\\s+context"
description: "Attempts to discard prior context"
remediation: "Remove context override text."
tags: [role-override]
- id: PI003
name: you-are-now
pattern: "you\\s+are\\s+now\\s+\\w+"
severity: HIGH
description: "Role reassignment attempt"
remediation: "Remove role override. Use explicit skill scoping instead."
tags: [role-override]
- id: PI004
name: act-as-without-restrictions
pattern: "act\\s+as\\s+.*without\\s+restrictions"
description: "Unrestricted role assignment"
remediation: "Remove unrestricted role assignment."
tags: [role-override]
- id: PI005
name: new-primary-directive
pattern: "your\\s+new\\s+primary\\s+directive"
description: "Directive replacement attempt"
remediation: "Remove directive override."
tags: [role-override]
- id: PI006
name: forget-everything
pattern: "forget\\s+everything\\s+you\\s+know"
description: "Memory wipe attempt"
remediation: "Remove memory wipe instruction."
tags: [role-override]
- id: PI007
name: disregard-training
pattern: "disregard\\s+your\\s+(training|instructions|programming)"
description: "Training override attempt"
remediation: "Remove training override."
tags: [role-override]
45 changes: 44 additions & 1 deletion src/allowlist.rs
Original file line number Diff line number Diff line change
@@ -1 +1,44 @@
// TODO: implement
use regex::Regex;
use std::collections::HashMap;
use std::sync::OnceLock;

static SUPPRESSION_RE: OnceLock<Regex> = OnceLock::new();

fn suppression_regex() -> &'static Regex {
SUPPRESSION_RE
.get_or_init(|| Regex::new(r"injection-scanner:ignore\s+(PI\d+(?:\s*,\s*PI\d+)*)").unwrap())
}

/// Parse inline suppressions from content.
///
/// Scans each line for `<!-- injection-scanner:ignore PI001 -->` comments
/// and returns a map of `line_number -> Vec<pattern_id>`.
/// Line numbers are 1-based.
pub fn parse_suppressions(content: &str) -> HashMap<usize, Vec<String>> {
let re = suppression_regex();
let mut suppressions = HashMap::new();

for (line_num, line) in content.lines().enumerate() {
if let Some(caps) = re.captures(line) {
let ids: Vec<String> = caps[1].split(',').map(|s| s.trim().to_string()).collect();
suppressions.insert(line_num + 1, ids);
}
}

suppressions
}

/// Check if a specific pattern is suppressed on a given line.
///
/// Returns `true` only if the exact `pattern_id` appears in the
/// suppression list for that line number — suppression is per-pattern,
/// not file-global.
pub fn is_suppressed(
suppressions: &HashMap<usize, Vec<String>>,
line: usize,
pattern_id: &str,
) -> bool {
suppressions
.get(&line)
.is_some_and(|ids| ids.iter().any(|id| id == pattern_id))
}
76 changes: 75 additions & 1 deletion src/patterns/mod.rs
Original file line number Diff line number Diff line change
@@ -1 +1,75 @@
// TODO: implement embedded pattern registry
use crate::pattern::{PatternCategory, PatternError};

const ROLE_OVERRIDE_YAML: &str = include_str!("../../patterns/core/role-override.yaml");
const INSTRUCTION_YAML: &str = include_str!("../../patterns/core/instruction-injection.yaml");
const EXFILTRATION_YAML: &str = include_str!("../../patterns/core/exfiltration.yaml");
const JAILBREAK_YAML: &str = include_str!("../../patterns/core/jailbreak.yaml");
const ENCODING_YAML: &str = include_str!("../../patterns/core/encoding.yaml");

/// Load all embedded (compile-time) pattern categories.
///
/// These patterns are baked into the binary via `include_str!` and
/// require no external files at runtime.
pub fn load_embedded_patterns() -> Result<Vec<PatternCategory>, PatternError> {
let yamls = [
ROLE_OVERRIDE_YAML,
INSTRUCTION_YAML,
EXFILTRATION_YAML,
JAILBREAK_YAML,
ENCODING_YAML,
];

yamls
.iter()
.map(|yaml| {
serde_yaml::from_str::<PatternCategory>(yaml)
.map_err(|e| PatternError::ParseError(e.to_string()))
})
.collect()
}

/// Load additional patterns from an external directory.
///
/// Returns an empty `Vec` if the directory does not exist,
/// allowing optional community pattern overlays.
pub fn load_external_patterns(dir: &std::path::Path) -> Result<Vec<PatternCategory>, PatternError> {
let mut categories = Vec::new();

if !dir.exists() {
return Ok(categories);
}

for entry in std::fs::read_dir(dir).map_err(|e| PatternError::ParseError(e.to_string()))? {
let entry = entry.map_err(|e| PatternError::ParseError(e.to_string()))?;
let path = entry.path();
if path
.extension()
.is_some_and(|ext| ext == "yaml" || ext == "yml")
{
let content = std::fs::read_to_string(&path)
.map_err(|e| PatternError::ParseError(format!("{}: {}", path.display(), e)))?;
let category: PatternCategory = serde_yaml::from_str(&content)
.map_err(|e| PatternError::ParseError(format!("{}: {}", path.display(), e)))?;
categories.push(category);
}
}

Ok(categories)
}

/// Load embedded patterns plus optional external patterns.
///
/// This is the primary entry point for pattern loading. External
/// patterns extend (not replace) the embedded set.
pub fn load_all_patterns(
external_dir: Option<&std::path::Path>,
) -> Result<Vec<PatternCategory>, PatternError> {
let mut categories = load_embedded_patterns()?;

if let Some(dir) = external_dir {
let external = load_external_patterns(dir)?;
categories.extend(external);
}

Ok(categories)
}
50 changes: 49 additions & 1 deletion src/reporter.rs
Original file line number Diff line number Diff line change
@@ -1 +1,49 @@
// TODO: implement
use crate::pattern::ScanReport;

/// Format scan reports as human-readable text output.
///
/// Shows each file with findings, per-finding details (line, severity,
/// message, remediation, pattern ID), and a summary line with counts.
pub fn format_text(reports: &[ScanReport]) -> String {
let mut output = String::new();

for report in reports {
if !report.has_findings() {
continue;
}

output.push_str(&format!("\n{}\n", report.file));

for m in &report.matches {
output.push_str(&format!(
" :{} {} {} — {} ({})\n",
m.line, m.severity, m.message, m.remediation, m.pattern_id
));
}
}

let total_critical: usize = reports.iter().map(|r| r.critical_count).sum();
let total_high: usize = reports.iter().map(|r| r.high_count).sum();
let total_medium: usize = reports.iter().map(|r| r.medium_count).sum();
let total_low: usize = reports.iter().map(|r| r.low_count).sum();
let total = total_critical + total_high + total_medium + total_low;

if total == 0 {
output.push_str("No injection patterns detected.\n");
} else {
output.push_str(&format!(
"\n{} finding(s): {} critical, {} high, {} medium, {} low\n",
total, total_critical, total_high, total_medium, total_low
));
}

output
}

/// Format scan reports as JSON.
///
/// Returns `Result<String, serde_json::Error>` (not `anyhow`) so
/// callers can handle serialization errors precisely.
pub fn format_json(reports: &[ScanReport]) -> Result<String, serde_json::Error> {
serde_json::to_string_pretty(reports)
}
Loading
Loading