From c7ca19388100107445967209c76cab74d6f4cedc Mon Sep 17 00:00:00 2001 From: tyagian Date: Sun, 3 May 2026 10:08:47 -0400 Subject: [PATCH 1/2] fix secret --- pyproject.toml | 4 +- sentinelguard/__init__.py | 2 +- sentinelguard/scanners/prompt/secrets.py | 147 +++++++++++++++++------ 3 files changed, 117 insertions(+), 36 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index e24371d..4306d2d 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta" [project] name = "sentinelguard" -version = "0.0.5" +version = "0.0.6" description = "A comprehensive, production-ready LLM security and guardrails framework" readme = "README.md" license = {text = "MIT"} @@ -32,6 +32,8 @@ dependencies = [ "pyyaml>=6.0", "tiktoken>=0.5.0", "regex>=2023.0", + # Secrets detection + "detect-secrets>=1.4.0", # PII detection (Presidio) "presidio-analyzer>=2.2.0", "presidio-anonymizer>=2.2.0", diff --git a/sentinelguard/__init__.py b/sentinelguard/__init__.py index 4895bd7..b8a8890 100644 --- a/sentinelguard/__init__.py +++ b/sentinelguard/__init__.py @@ -36,7 +36,7 @@ guard = SentinelGuard(config=config) """ -__version__ = "0.0.5" +__version__ = "0.0.6" __author__ = "SentinelGuard Contributors" from sentinelguard.core.guard import SentinelGuard diff --git a/sentinelguard/scanners/prompt/secrets.py b/sentinelguard/scanners/prompt/secrets.py index 8e061b4..a00957f 100644 --- a/sentinelguard/scanners/prompt/secrets.py +++ b/sentinelguard/scanners/prompt/secrets.py @@ -1,20 +1,25 @@ """Secrets detection scanner. -Detects API keys, tokens, passwords, and other credentials in text -using three detection methods: -1. Vendor-specific patterns (AWS, GitHub, Stripe, etc.) -2. Generic keyword + value patterns (any password=, key=, token=, etc.) -3. High-entropy string detection (catches unknown/custom secrets) +Detects API keys, tokens, passwords, and other credentials in text. + +Detection methods (in order): +1. detect-secrets library (Yelp) — industry-standard secrets detection with + 20+ built-in plugins (AWS, GitHub, Stripe, high-entropy, keyword, etc.) +2. Vendor-specific regex patterns (fallback if detect-secrets unavailable) +3. Generic keyword + value patterns (password=, key=, token=, etc.) """ from __future__ import annotations +import logging import math import re from typing import Any, ClassVar, Dict, List, Optional from sentinelguard.core.scanner import PromptScanner, RiskLevel, ScanResult, register_scanner +logger = logging.getLogger(__name__) + # ── Vendor-specific patterns ── VENDOR_PATTERNS = { "aws_access_key": re.compile(r"(? float: class SecretsScanner(PromptScanner): """Detects API keys, tokens, passwords, and credentials. - Uses three detection methods: - 1. Vendor-specific patterns (AWS, GitHub, Stripe, etc.) - 2. Generic keyword patterns (password=, key=, token=, etc.) - 3. High-entropy string detection (catches unknown secrets) + Primary detection via detect-secrets (Yelp) with 20+ plugins. + Falls back to built-in regex patterns if detect-secrets is unavailable. Args: threshold: Score threshold (0.0-1.0). Default 0.5. @@ -145,47 +145,126 @@ def __init__( super().__init__(threshold=threshold, **kwargs) self.secret_types = secret_types self.detect_entropy = detect_entropy + self._detect_secrets_available = None + + def _try_detect_secrets(self, text: str) -> tuple[Dict[str, int], List[Dict[str, Any]]]: + """Use detect-secrets library for primary detection.""" + if self._detect_secrets_available is False: + return {}, [] + + try: + from detect_secrets import settings + from detect_secrets.core.scan import scan_line + from detect_secrets.settings import transient_settings + except ImportError: + self._detect_secrets_available = False + logger.debug("detect-secrets not installed, using built-in patterns") + return {}, [] + + self._detect_secrets_available = True + found: Dict[str, int] = {} + matches: List[Dict[str, Any]] = [] + + # Scan each line with detect-secrets + with transient_settings({"plugins_used": [ + {"name": "ArtifactoryDetector"}, + {"name": "AWSKeyDetector"}, + {"name": "AzureStorageKeyDetector"}, + {"name": "BasicAuthDetector"}, + {"name": "CloudantDetector"}, + {"name": "DiscordBotTokenDetector"}, + {"name": "GitHubTokenDetector"}, + {"name": "HexHighEntropyString", "limit": 3.0}, + {"name": "Base64HighEntropyString", "limit": 4.5}, + {"name": "IbmCloudIamDetector"}, + {"name": "IbmCosHmacDetector"}, + {"name": "JwtTokenDetector"}, + {"name": "KeywordDetector"}, + {"name": "MailchimpDetector"}, + {"name": "NpmDetector"}, + {"name": "PrivateKeyDetector"}, + {"name": "SendGridDetector"}, + {"name": "SlackDetector"}, + {"name": "SoftlayerDetector"}, + {"name": "SquareOAuthDetector"}, + {"name": "StripeDetector"}, + {"name": "TwilioKeyDetector"}, + ]}): + for line_num, line in enumerate(text.splitlines()): + for secret in scan_line(line): + secret_type = secret.type + found[secret_type] = found.get(secret_type, 0) + 1 + # Calculate position in original text + line_start = sum(len(l) + 1 for l in text.splitlines()[:line_num]) + secret_val = secret.secret_value or "" + try: + val_start = line_start + line.index(secret_val) + except ValueError: + val_start = line_start + matches.append({ + "type": secret_type, + "start": val_start, + "end": val_start + len(secret_val), + "text": secret_val, + }) + + return found, matches def scan(self, text: str, **kwargs: Any) -> ScanResult: found_secrets: Dict[str, int] = {} - secret_matches: List[Dict[str, Any]] = [] # position + value info for sanitization + secret_matches: List[Dict[str, Any]] = [] - # Method 1: Vendor-specific patterns + # Method 1: detect-secrets library (primary) + ds_found, ds_matches = self._try_detect_secrets(text) + found_secrets.update(ds_found) + secret_matches.extend(ds_matches) + + # Method 2: Built-in vendor-specific patterns (supplement detect-secrets) for secret_type, pattern in VENDOR_PATTERNS.items(): if self.secret_types and secret_type not in self.secret_types: continue for match in pattern.finditer(text): - found_secrets[secret_type] = found_secrets.get(secret_type, 0) + 1 - secret_matches.append({ - "type": secret_type, - "start": match.start(), - "end": match.end(), - "text": match.group(0), - }) - - # Method 2: Generic keyword patterns + # Avoid duplicates with detect-secrets findings + match_text = match.group(0) + already_found = any( + m["text"] == match_text or + (m["start"] <= match.start() < m["end"]) + for m in secret_matches + ) + if not already_found: + found_secrets[secret_type] = found_secrets.get(secret_type, 0) + 1 + secret_matches.append({ + "type": secret_type, + "start": match.start(), + "end": match.end(), + "text": match.group(0), + }) + + # Method 3: Generic keyword patterns (supplement) for secret_type, pattern in KEYWORD_PATTERNS.items(): if self.secret_types and secret_type not in self.secret_types: continue for match in pattern.finditer(text): - found_secrets[secret_type] = found_secrets.get(secret_type, 0) + 1 - # For keyword patterns, the value is in group(1) or group(2) try: value = match.group(2) if match.lastindex and match.lastindex >= 2 else match.group(1) val_start = match.start() + match.group(0).index(value) + except (IndexError, ValueError): + value = match.group(0) + val_start = match.start() + # Avoid duplicates + already_found = any( + m["text"] == value or + (m["start"] <= val_start < m["end"]) + for m in secret_matches + ) + if not already_found: + found_secrets[secret_type] = found_secrets.get(secret_type, 0) + 1 secret_matches.append({ "type": secret_type, "start": val_start, "end": val_start + len(value), "text": value, }) - except (IndexError, ValueError): - secret_matches.append({ - "type": secret_type, - "start": match.start(), - "end": match.end(), - "text": match.group(0), - }) # Method 3: High-entropy string detection if self.detect_entropy: From a4a3b585e81f3afe66f60d0259234d851efd34cd Mon Sep 17 00:00:00 2001 From: tyagian Date: Sun, 3 May 2026 13:35:42 -0400 Subject: [PATCH 2/2] fix secret handle --- sentinelguard/scanners/prompt/secrets.py | 84 ++++++++++++++---------- 1 file changed, 50 insertions(+), 34 deletions(-) diff --git a/sentinelguard/scanners/prompt/secrets.py b/sentinelguard/scanners/prompt/secrets.py index a00957f..23386be 100644 --- a/sentinelguard/scanners/prompt/secrets.py +++ b/sentinelguard/scanners/prompt/secrets.py @@ -153,9 +153,10 @@ def _try_detect_secrets(self, text: str) -> tuple[Dict[str, int], List[Dict[str, return {}, [] try: - from detect_secrets import settings - from detect_secrets.core.scan import scan_line + from detect_secrets.core.secrets_collection import SecretsCollection from detect_secrets.settings import transient_settings + import tempfile + import os except ImportError: self._detect_secrets_available = False logger.debug("detect-secrets not installed, using built-in patterns") @@ -165,48 +166,63 @@ def _try_detect_secrets(self, text: str) -> tuple[Dict[str, int], List[Dict[str, found: Dict[str, int] = {} matches: List[Dict[str, Any]] = [] - # Scan each line with detect-secrets - with transient_settings({"plugins_used": [ - {"name": "ArtifactoryDetector"}, - {"name": "AWSKeyDetector"}, - {"name": "AzureStorageKeyDetector"}, - {"name": "BasicAuthDetector"}, - {"name": "CloudantDetector"}, - {"name": "DiscordBotTokenDetector"}, - {"name": "GitHubTokenDetector"}, - {"name": "HexHighEntropyString", "limit": 3.0}, - {"name": "Base64HighEntropyString", "limit": 4.5}, - {"name": "IbmCloudIamDetector"}, - {"name": "IbmCosHmacDetector"}, - {"name": "JwtTokenDetector"}, - {"name": "KeywordDetector"}, - {"name": "MailchimpDetector"}, - {"name": "NpmDetector"}, - {"name": "PrivateKeyDetector"}, - {"name": "SendGridDetector"}, - {"name": "SlackDetector"}, - {"name": "SoftlayerDetector"}, - {"name": "SquareOAuthDetector"}, - {"name": "StripeDetector"}, - {"name": "TwilioKeyDetector"}, - ]}): - for line_num, line in enumerate(text.splitlines()): - for secret in scan_line(line): + # Write text to temp file for full-context scanning + temp_file = tempfile.NamedTemporaryFile(delete=False, suffix=".txt") + temp_file.write(text.encode("utf-8")) + temp_file.close() + + try: + secrets = SecretsCollection() + with transient_settings({"plugins_used": [ + {"name": "ArtifactoryDetector"}, + {"name": "AWSKeyDetector"}, + {"name": "AzureStorageKeyDetector"}, + {"name": "BasicAuthDetector"}, + {"name": "CloudantDetector"}, + {"name": "DiscordBotTokenDetector"}, + {"name": "GitHubTokenDetector"}, + {"name": "IbmCloudIamDetector"}, + {"name": "IbmCosHmacDetector"}, + {"name": "JwtTokenDetector"}, + {"name": "KeywordDetector"}, + {"name": "MailchimpDetector"}, + {"name": "NpmDetector"}, + {"name": "PrivateKeyDetector"}, + {"name": "SendGridDetector"}, + {"name": "SlackDetector"}, + {"name": "SoftlayerDetector"}, + {"name": "SquareOAuthDetector"}, + {"name": "StripeDetector"}, + {"name": "TwilioKeyDetector"}, + {"name": "Base64HighEntropyString", "limit": 4.5}, + {"name": "HexHighEntropyString", "limit": 3.0}, + ]}): + secrets.scan_file(str(temp_file.name)) + + for file in secrets.files: + for secret in secrets[file]: + if secret.secret_value is None: + continue + secret_val = secret.secret_value + if len(secret_val) < 6: + continue + secret_type = secret.type found[secret_type] = found.get(secret_type, 0) + 1 - # Calculate position in original text - line_start = sum(len(l) + 1 for l in text.splitlines()[:line_num]) - secret_val = secret.secret_value or "" + + # Find position in original text try: - val_start = line_start + line.index(secret_val) + val_start = text.index(secret_val) except ValueError: - val_start = line_start + continue matches.append({ "type": secret_type, "start": val_start, "end": val_start + len(secret_val), "text": secret_val, }) + finally: + os.remove(temp_file.name) return found, matches