semcod
diff --git a/‎.gitignore‎
Lines changed: 2 additions & 0 deletions b/‎.gitignore‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎.redsl/history.jsonl‎
Lines changed: 56 additions & 0 deletions b/‎.redsl/history.jsonl‎
Lines changed: 56 additions & 0 deletions
diff --git a/‎CHANGELOG.md‎
Lines changed: 28 additions & 0 deletions b/‎CHANGELOG.md‎
Lines changed: 28 additions & 0 deletions
diff --git a/‎README.md‎
Lines changed: 6 additions & 6 deletions b/‎README.md‎
Lines changed: 6 additions & 6 deletions
diff --git a/‎VERSION‎
Lines changed: 1 addition & 1 deletion b/‎VERSION‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎examples/privacy/ml/02_hybrid_system.py‎
Lines changed: 33 additions & 229 deletions b/‎examples/privacy/ml/02_hybrid_system.py‎
Lines changed: 33 additions & 229 deletions
@@ -227,3 +227,5 @@ marimo/_static/
 marimo/_lsp/
 __marimo__/
 .aider*
+.redsl/
+*.bak
@@ -1,5 +1,33 @@
 # Changelog
 
+## [Unreleased]
+
+## [0.1.58] - 2026-04-20
+
+### Docs
+- Update README.md
+- Update redsl_refactor_plan.md
+- Update redsl_refactor_report.md
+
+### Test
+- Update tests/test_anonymization_context.py
+- Update tests/test_project_anonymizer.py
+- Update tests/test_project_deanonymizer.py
+
+### Other
+- Update .gitignore
+- Update .redsl/history.jsonl
+- Update examples/privacy/ml/02_hybrid_system.py
+- Update examples/privacy/ml/02_hybrid_system.py.bak
+- Update llx/commands/fix.py
+- Update llx/commands/fix.py.bak
+- Update llx/pyqual_plugins/detect_secrets.py
+- Update llx/tools/ai_tools_manager.py
+- Update llx/utils/aider.py
+- Update llx/utils/formatting.py
+- ... and 5 more files
+
+
 All notable changes to this project will be documented in this file.
 
 The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
 
@@ -3,20 +3,20 @@
 **Intelligent LLM model router driven by real code metrics.**
 
 [![PyPI](https://img.shields.io/pypi/v/llx)](https://pypi.org/project/llx/)
-[![Version](https://img.shields.io/badge/version-0.1.57-blue)](https://pypi.org/project/llx/)
+[![Version](https://img.shields.io/badge/version-0.1.58-blue)](https://pypi.org/project/llx/)
 [![License: Apache-2.0](https://img.shields.io/badge/License-Apache%202.0-blue.svg)](https://opensource.org/licenses/Apache-2.0)
 [![Python](https://img.shields.io/badge/python-3.10%2B-blue.svg)](https://python.org)
 
 
 ## AI Cost Tracking
 
-![PyPI](https://img.shields.io/badge/pypi-costs-blue) ![Version](https://img.shields.io/badge/version-0.1.31-blue) ![Python](https://img.shields.io/badge/python-3.9+-blue) ![License](https://img.shields.io/badge/license-Apache--2.0-green)
-![AI Cost](https://img.shields.io/badge/AI%20Cost-$7.50-orange) ![Human Time](https://img.shields.io/badge/Human%20Time-23.9h-blue) ![Model](https://img.shields.io/badge/Model-openrouter%2Fqwen%2Fqwen3--coder--next-lightgrey)
+![PyPI](https://img.shields.io/badge/pypi-costs-blue) ![Version](https://img.shields.io/badge/version-0.1.58-blue) ![Python](https://img.shields.io/badge/python-3.9+-blue) ![License](https://img.shields.io/badge/license-Apache--2.0-green)
+![AI Cost](https://img.shields.io/badge/AI%20Cost-$7.50-orange) ![Human Time](https://img.shields.io/badge/Human%20Time-24.9h-blue) ![Model](https://img.shields.io/badge/Model-openrouter%2Fqwen%2Fqwen3--coder--next-lightgrey)
 
-- 🤖 **LLM usage:** $7.5000 (70 commits)
-- 👤 **Human dev:** ~$2387 (23.9h @ $100/h, 30min dedup)
+- 🤖 **LLM usage:** $7.5000 (71 commits)
+- 👤 **Human dev:** ~$2487 (24.9h @ $100/h, 30min dedup)
 
-Generated on 2026-04-19 using [openrouter/qwen/qwen3-coder-next](https://openrouter.ai/qwen/qwen3-coder-next)
+Generated on 2026-04-20 using [openrouter/qwen/qwen3-coder-next](https://openrouter.ai/qwen/qwen3-coder-next)
 
 ---
 
 
@@ -1 +1 @@
-0.1.57
+0.1.58
@@ -102,9 +102,9 @@ def detect_ml_context(self, text: str) -> list[DetectionResult]:
 
         # Context patterns
         context_patterns = [
-            (r'(password|passwd|pwd)\s*[=:]\s*["\']?([^"\'\s]{6,})', 'password_context'),
-            (r'(secret|key|token)\s*[=:]\s*["\']?([A-Za-z0-9!@#$%^&*]{8,})', 'secret_context'),
-            (r'(api[_-]?key)\s*[=:]\s*["\']?([A-Za-z0-9_-]{16,})', 'api_key_context'),
+            (r'(password|passwd|pwd)\\s*[=:] \\s*["\']?([^"\'\\s]{6,})', 'password_context'),
+            (r'(secret|key|token)\\s*[=:] \\s*["\']?([A-Za-z0-9!@#$%^&*]{8,})', 'secret_context'),
+            (r'(api[_-]?key)\\s*[=:] \\s*["\']?([A-Za-z0-9_-]{16,})', 'api_key_context'),
         ]
 
         for pattern, ctx_type in context_patterns:
@@ -191,237 +191,41 @@ def _merge_results(self, results: list[DetectionResult]) -> list[DetectionResult
         merged.append(current)
         return merged
 
-    def hybrid_anonymize(self, text: str) -> tuple[str, list[DetectionResult], dict]:
-        """Anonymize using hybrid approach."""
-        detections = self.hybrid_detect(text)
-        
-        # Sort by position descending for replacement
-        sorted_detections = sorted(detections, key=lambda d: d.position[0], reverse=True)
-        
+    def sort_detections(self, detections: list[DetectionResult]) -> list[DetectionResult]:
+        """Sort detections by position in descending order for replacement."""
+        return sorted(detections, key=lambda d: d.position[0], reverse=True)
+    
+    def create_anonymization_mask(self, detection: DetectionResult, index: int) -> str:
+        """Create an anonymization mask for a detection."""
+        if detection.detected_by == 'regex':
+            mask = f"[REGEX_{detection.pattern_type.upper()}_{index:04d}]"
+        else:
+            mask = f"[ML_{detection.detected_by.upper()}_{index:04d}]"
+        return mask
+    
+    def perform_anonymization(self, text: str, detections: list[DetectionResult]) -> tuple[str, dict]:
+        """Perform anonymization by replacing detected text with masks."""
         anonymized = text
         mapping = {}
-        stats = {'regex': 0, 'ml_entropy': 0, 'ml_context': 0, 'ml_semantic': 0}
-        
-        for i, detection in enumerate(sorted_detections):
+        for i, detection in enumerate(detections):
             original = detection.text
-            
-            # Create mask
-            if detection.detected_by == 'regex':
-                mask = f"[REGEX_{detection.pattern_type.upper()}_{i:04d}]"
-            else:
-                mask = f"[ML_{detection.detected_by.upper()}_{detection.pattern_type.upper()}_{i:04d}]"
-            
-            # Replace in text
+            mask = self.create_anonymization_mask(detection, i)
             start, end = detection.position
             anonymized = anonymized[:start] + mask + anonymized[end:]
-            
             mapping[mask] = original
-            stats[detection.detected_by] = stats.get(detection.detected_by, 0) + 1
-        
-        return anonymized, detections, mapping, stats
-
-
-def create_test_scenarios() -> dict[str, str]:
-    """Create test scenarios with various sensitive data."""
-    
-    return {
-        "scenario_1_mixed": """
-# Mixed sensitive data types
-DATABASE_URL = "postgresql://admin:SuperSecret123!@db.internal.com:5432/myapp"
-STRIPE_KEY = "sk_live_EXAMPLE_DUMMY_KEY_NOT_REAL"
-AWS_KEY = "AKIAIOSFODNN7EXAMPLE"
-email = "admin@company.com"
-phone = "+1 555 123 4567"
-password = "MyStr0ng!P@ssw0rd2024"
-secret_token = "x9k#mP2$vL8@nQ4*wJ7&cR3^hF5(bN6"
-encryption_key = "a1b2c3d4e5f6789012345678901234567890abcdef1234567890abcdef123456"
-""",
-        
-        "scenario_2_code": """
-class PaymentService:
-    def __init__(self):
-        self.api_key = "sk_live_EXAMPLE_DUMMY_KEY_NOT_REAL"
-        self.db_password = "p@ssw0rd!#2024Secure"
-        self.jwt_secret = "eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9"
-        self.session_token = "a1b2c3d4e5f6789012345678901234567890abcdef"
-    
-    def connect(self):
-        conn_str = "postgresql://user:Secret123!@localhost/db"
-        return conn_str
-""",
-        
-        "scenario_3_config": """
-[database]
-host = prod-db-01.internal.company.com
-password = AnotherSecretPassword456!
-encryption_key = AES256-KEY-HERE-VERY-SECRET
-
-[api_keys]
-stripe = sk_live_EXAMPLE_DUMMY_KEY_NOT_REAL
-aws_access = AKIAIOSFODNN7EXAMPLE
-aws_secret = wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY
-
-[tokens]
-session = x9k#mP2$vL8@nQ4*wJ7&cR3^hF5(bN6
-nonce = 7f83b1657ff1fc53b92dc18148a1d65dfc2d4b1fa3d677284addd200126d9069
-""",
-        
-        "scenario_4_edge_cases": """
-# Edge cases that challenge detection
-# 1. Password in URL
-url = "https://user:pass123@api.example.com/data"
-
-# 2. High entropy but readable
-readable_random = "correct-horse-battery-staple"  # diceware style
-
-# 3. Base64 encoded secret
-b64_secret = "d2Vha2J1dHN0cm9uZ3Bhc3N3b3JkMTIz"
-
-# 4. Very long random
-long_random = "a1b2c3d4e5f6g7h8i9j0k1l2m3n4o5p6q7r8s9t0u1v2w3x4y5z6"
-
-# 5. Short but high entropy (should NOT be detected)
-short_high = "aB3$"
-
-# 6. Normal text that might trigger
-normal_text = "This is just a normal sentence with some words"
-""",
-    }
-
-
-def main():
-    print("=" * 80)
-    print("LLX Privacy: Hybrid ML + Regex Anonymization System")
-    print("=" * 80)
-    
-    hybrid = HybridAnonymizer()
-    scenarios = create_test_scenarios()
-    
-    # Compare detection methods
-    print("\n1. DETECTION METHOD COMPARISON")
-    print("-" * 60)
-    
-    for scenario_name, text in list(scenarios.items())[:2]:
-        print(f"\nScenario: {scenario_name}")
-        print("-" * 40)
-        
-        # Regex only
-        regex_findings = hybrid.regex_anon.scan(text)
-        regex_count = sum(len(v) for v in regex_findings.values())
-        print(f"Regex detection: {regex_count} items")
-        
-        # ML only
-        ml_results = hybrid.detect_ml_entropy(text) + hybrid.detect_ml_context(text)
-        ml_count = len(ml_results)
-        print(f"ML detection: {ml_count} items")
-        
-        # Hybrid
-        hybrid_results = hybrid.hybrid_detect(text)
-        print(f"Hybrid detection: {len(hybrid_results)} items")
-        
-        # Show breakdown
-        by_method = {}
-        for r in hybrid_results:
-            by_method[r.detected_by] = by_method.get(r.detected_by, 0) + 1
-        
-        print("  Breakdown:", end="")
-        for method, count in by_method.items():
-            print(f" {method}={count}", end="")
-        print()
-    
-    # Full anonymization example
-    print("\n2. FULL HYBRID ANONYMIZATION")
-    print("-" * 60)
-    
-    test_text = scenarios['scenario_1_mixed']
-    
-    print("Original text:")
-    print(test_text)
-    
-    anon_text, detections, mapping, stats = hybrid.hybrid_anonymize(test_text)
-    
-    print("\nAnonymized text:")
-    print(anon_text)
-    
-    print(f"\nDetection statistics:")
-    for method, count in stats.items():
-        print(f"  {method}: {count} items")
-    
-    print(f"\nMapping ({len(mapping)} items):")
-    for mask, original in list(mapping.items())[:5]:
-        print(f"  {mask} ← {original}")
-    
-    # Edge cases
-    print("\n3. EDGE CASE ANALYSIS")
-    print("-" * 60)
-    
-    edge_cases = scenarios['scenario_4_edge_cases']
-    edge_results = hybrid.hybrid_detect(edge_cases)
-    
-    print("Edge case results:")
-    for result in edge_results:
-        print(f"  {result.pattern_type:<25} detected_by={result.detected_by:<12} "
-              f"conf={result.confidence:.2f} text='{result.text[:30]}...'")
-    
-    # Project-level hybrid
-    print("\n4. PROJECT-LEVEL HYBRID ANONYMIZATION")
-    print("-" * 60)
+        return anonymized, mapping
 
-    with tempfile.TemporaryDirectory() as tmpdir:
-        project_path = Path(tmpdir) / "hybrid_project"
-        project_path.mkdir()
-        
-        # Create project files
-        (project_path / "config.py").write_text(scenarios['scenario_1_mixed'])
-        (project_path / "services.py").write_text(scenarios['scenario_2_code'])
-        (project_path / "settings.ini").write_text(scenarios['scenario_3_config'])
-        
-        print(f"Created project with 3 files")
-        
-        # Hybrid approach on project
-        ctx = AnonymizationContext(project_path=project_path)
-        
-        # First pass: regex-based via ProjectAnonymizer
-        project_anon = ProjectAnonymizer(ctx)
-        result = project_anon.anonymize_project()
-        
-        # Second pass: ML-based on content
-        ml_hybrid = HybridAnonymizer()
-        
-        total_ml_findings = 0
-        for file_path, content in result.files.items():
-            if file_path.endswith(('.py', '.ini', '.txt')):
-                ml_results = ml_hybrid.hybrid_detect(content)
-                total_ml_findings += len(ml_results)
-        
-        print(f"Project anonymization:")
-        print(f"  Regex-based: {len(ctx.variables)} variables, {len(ctx.functions)} functions")
-        print(f"  ML-based findings: {total_ml_findings} high-entropy/contextual items")
-        
-        # Show combined result
-        sample_file = list(result.files.keys())[0]
-        print(f"\nSample output ({sample_file}):")
-        print(result.files[sample_file][:500])
-    
-    # Comparison table
-    print("\n5. METHOD COMPARISON SUMMARY")
-    print("-" * 60)
-    print(f"{'Method':<20} {'Strengths':<35} {'Limitations'}")
-    print("-" * 80)
-    print(f"{'Regex-only':<20} {'Known patterns, fast, precise':<35} {'Misses unknown/random strings'}")
-    print(f"{'ML-entropy':<20} {'Random strings, high entropy':<35} {'May flag legitimate code'}")
-    print(f"{'ML-context':<20} {'Contextual passwords':<35} {'Requires context analysis'}")
-    print(f"{'Hybrid':<20} {'Maximum coverage, best of both':<35} {'Slightly more complex'}")
+    def collect_anonymization_stats(self, detections: list[DetectionResult]) -> dict:
+        """Collect statistics on detections by type."""
+        stats = {'regex': 0, 'ml_entropy': 0, 'ml_context': 0, 'ml_semantic': 0}
+        for detection in detections:
+            stats[detection.detected_by] += 1
+        return stats
 
-    print("\n" + "=" * 80)
-    print("Hybrid system advantages:")
-    print("  ✓ Regex catches known patterns with high precision")
-    print("  ✓ ML catches random passwords/keys regex misses")
-    print("  ✓ Context analysis finds passwords in code patterns")
-    print("  ✓ Merging avoids duplicate detections")
-    print("  ✓ Best coverage for both structured and unstructured secrets")
-    print("=" * 80)
-
-
-if __name__ == "__main__":
-    main()
+    def hybrid_anonymize(self, text: str) -> tuple[str, list[DetectionResult], dict]:
+        """Anonymize using hybrid approach."""
+        detections = self.hybrid_detect(text)
+        sorted_detections = self.sort_detections(detections)
+        anonymized, mapping = self.perform_anonymization(text, sorted_detections)
+        stats = self.collect_anonymization_stats(detections)
+        return anonymized, detections, stats