From 01f84143e29a99bca138a569222efde25225252c Mon Sep 17 00:00:00 2001 From: Brad Anderson Date: Sun, 3 May 2026 12:33:07 -0400 Subject: [PATCH 1/3] feat: add Python structural scanning support Wires tree-sitter-python into the structural scan pass and ships ten Django/Flask/FastAPI/idiomatic-Python pattern rules covering middleware, RBAC, ABAC, ownership, and feature-gate categories. `has_perm` is explicitly excluded from the ABAC permission-check rule so the same call doesn't double-surface. Also reorganizes the README so Supported languages and Installation appear right after How it works (before the deeper Deep mode walkthrough) and updates them to reflect Python now being structurally supported in v0.1. --- Cargo.lock | 11 ++ Cargo.toml | 1 + README.md | 58 +++---- rules/python/django-permission-required.toml | 58 +++++++ rules/python/django-user-passes-test.toml | 45 +++++ rules/python/fastapi-depends.toml | 52 ++++++ rules/python/feature-gate-check.toml | 41 +++++ rules/python/has-perm-call.toml | 49 ++++++ rules/python/has-role-call.toml | 45 +++++ rules/python/login-required-decorator.toml | 53 ++++++ rules/python/ownership-check.toml | 53 ++++++ rules/python/permission-check-call.toml | 69 ++++++++ rules/python/role-check-conditional.toml | 59 +++++++ src/rules/embedded.rs | 41 +++++ src/scanner/discovery.rs | 33 +++- src/scanner/matcher.rs | 172 +++++++++++++++++++ src/scanner/parser.rs | 21 ++- 17 files changed, 821 insertions(+), 40 deletions(-) create mode 100644 rules/python/django-permission-required.toml create mode 100644 rules/python/django-user-passes-test.toml create mode 100644 rules/python/fastapi-depends.toml create mode 100644 rules/python/feature-gate-check.toml create mode 100644 rules/python/has-perm-call.toml create mode 100644 rules/python/has-role-call.toml create mode 100644 rules/python/login-required-decorator.toml create mode 100644 rules/python/ownership-check.toml create mode 100644 rules/python/permission-check-call.toml create mode 100644 rules/python/role-check-conditional.toml diff --git a/Cargo.lock b/Cargo.lock index 0674dfe..a71db19 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1751,6 +1751,16 @@ version = "0.1.7" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "009994f150cc0cd50ff54917d5bc8bffe8cad10ca10d81c34da2ec421ae61782" +[[package]] +name = "tree-sitter-python" +version = "0.23.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3d065aaa27f3aaceaf60c1f0e0ac09e1cb9eb8ed28e7bcdaa52129cffc7f4b04" +dependencies = [ + "cc", + "tree-sitter-language", +] + [[package]] name = "tree-sitter-typescript" version = "0.23.2" @@ -2394,6 +2404,7 @@ dependencies = [ "tree-sitter", "tree-sitter-java", "tree-sitter-javascript", + "tree-sitter-python", "tree-sitter-typescript", "url", ] diff --git a/Cargo.toml b/Cargo.toml index dd1ece2..59fd46e 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -18,6 +18,7 @@ tree-sitter = "0.24" tree-sitter-typescript = "0.23" tree-sitter-java = "0.23" tree-sitter-javascript = "0.23" +tree-sitter-python = "0.23" ignore = "0.4" sha2 = "0.10" regex = "1" diff --git a/README.md b/README.md index 41e5d25..3fb38f6 100644 --- a/README.md +++ b/README.md @@ -2,7 +2,7 @@ Sift through your codebase for embedded authorization logic. Extract it into Policy as Code (PaC) — [Rego](https://www.openpolicyagent.org/docs/latest/policy-language/) for [OPA](https://www.openpolicyagent.org/) today, with other engines (e.g. Cedar) on the roadmap. -> **Status:** v0.1 — structural scanning ready for TypeScript, JavaScript, and Java. `--deep` (LLM-assisted) mode functional via any OpenAI-compatible endpoint or MCP-capable agent host. +> **Status:** v0.1 — structural scanning ready for TypeScript, JavaScript, Java, and Python. `--deep` (LLM-assisted) mode functional via any OpenAI-compatible endpoint or MCP-capable agent host. ## What is zift? @@ -23,7 +23,34 @@ zift report . # detailed findings report 1. **Structural scan** (tree-sitter) — fast, deterministic, zero-cost. Finds known authorization patterns: role checks, permission guards, auth middleware, security annotations. -2. **Semantic scan** (`--deep`, opt-in) — sends candidate code regions to an LLM that classifies authorization logic the structural pass missed or misjudged. Useful for business rules that implicitly encode access control, and for languages where structural support hasn't shipped yet (Python, Go, etc.). +2. **Semantic scan** (`--deep`, opt-in) — sends candidate code regions to an LLM that classifies authorization logic the structural pass missed or misjudged. Useful for business rules that implicitly encode access control, and for languages where structural support hasn't shipped yet (Go, etc.). + +## Supported languages + +| Language | Structural | Deep (cold-region) | Framework hints (deep) | +|----------|-----------|---------------------|------------------------| +| TypeScript / JavaScript | yes (v0.1) | yes (v0.1) | Express, NestJS, Next.js | +| Java | yes (v0.1) | yes (v0.1) | Spring Security, Jakarta Security | +| Python | yes (v0.1) | yes (v0.1) | Django, Flask, FastAPI | +| Go | planned (v0.2) | yes (v0.1) | Gin, Echo | +| C# | planned (v0.3) | yes (v0.1) | ASP.NET Core | +| Kotlin | planned (v0.3) | yes (v0.1) | Spring (Kotlin) | +| Ruby | planned (v0.3) | yes (v0.1) | Rails | +| PHP | planned (v0.3) | yes (v0.1) | Laravel | + +Deep mode walks the full source tree by extension and detects auth-y function names with regex — so it produces useful results in any language well before structural support lands. + +## Installation + +### Cargo + +```bash +cargo install --git https://github.com/EnforceAuth/zift +``` + +### Binary download + +Prebuilt binaries for Linux (x86_64), macOS (x86_64 and arm64), and Windows (x86_64) are available from [Releases](https://github.com/EnforceAuth/zift/releases). ## Deep mode (`--deep`) @@ -189,33 +216,6 @@ echo '{"jsonrpc":"2.0","id":1,"method":"initialize","params":{"protocolVersion": You should see a single line back with `serverInfo.name == "zift"` and capability flags for tools/resources. Then call `tools/list` to see the seven tool descriptors. -## Supported languages - -| Language | Structural | Deep (cold-region) | Framework hints (deep) | -|----------|-----------|---------------------|------------------------| -| TypeScript / JavaScript | yes (v0.1) | yes (v0.1) | Express, NestJS, Next.js | -| Java | yes (v0.1) | yes (v0.1) | Spring Security, Jakarta Security | -| Python | planned (v0.2) | yes (v0.1) | Django, Flask, FastAPI | -| Go | planned (v0.2) | yes (v0.1) | Gin, Echo | -| C# | planned (v0.3) | yes (v0.1) | ASP.NET Core | -| Kotlin | planned (v0.3) | yes (v0.1) | Spring (Kotlin) | -| Ruby | planned (v0.3) | yes (v0.1) | Rails | -| PHP | planned (v0.3) | yes (v0.1) | Laravel | - -Deep mode walks the full source tree by extension and detects auth-y function names with regex — so it produces useful results in any language well before structural support lands. - -## Installation - -### Cargo - -```bash -cargo install --git https://github.com/EnforceAuth/zift -``` - -### Binary download - -Prebuilt binaries for Linux (x86_64), macOS (x86_64 and arm64), and Windows (x86_64) are available from [Releases](https://github.com/EnforceAuth/zift/releases). - ## License Apache-2.0 diff --git a/rules/python/django-permission-required.toml b/rules/python/django-permission-required.toml new file mode 100644 index 0000000..582f686 --- /dev/null +++ b/rules/python/django-permission-required.toml @@ -0,0 +1,58 @@ +[rule] +id = "py-django-permission-required" +languages = ["python"] +category = "middleware" +confidence = "high" +description = "Django @permission_required decorator (with permission codename argument)" +# Matches both bare and module-qualified forms of the decorator: +# @permission_required('app.delete_user') +# @django.contrib.auth.decorators.permission_required('app.delete_user') +# The decorator's call function is captured at the rightmost identifier, so +# arbitrarily deep import paths still bind to `decorator_name`. +query = """ +(decorator + (call + function: [ + (identifier) @decorator_name + (attribute attribute: (identifier) @decorator_name) + ] + arguments: (argument_list + (string (string_content) @perm_name))) +) @match +""" + +[rule.predicates.decorator_name] +eq = "permission_required" + +[rule.rego_template] +template = """ +default allow := false + +allow if { + "{{perm_name}}" in input.user.permissions +} +""" + +[[rule.tests]] +input = """ +@permission_required('app.delete_user') +def delete_user(request, id): + pass +""" +expect_match = true + +[[rule.tests]] +input = """ +@django.contrib.auth.decorators.permission_required('app.delete_user') +def delete_user(request, id): + pass +""" +expect_match = true + +[[rule.tests]] +input = """ +@cache_page(60) +def index(request): + pass +""" +expect_match = false diff --git a/rules/python/django-user-passes-test.toml b/rules/python/django-user-passes-test.toml new file mode 100644 index 0000000..1b9b79e --- /dev/null +++ b/rules/python/django-user-passes-test.toml @@ -0,0 +1,45 @@ +[rule] +id = "py-django-user-passes-test" +languages = ["python"] +category = "middleware" +confidence = "medium" +description = "Django @user_passes_test decorator (custom predicate gate)" +# The decorator wraps a predicate function/lambda — we can detect the +# decorator but the actual rule encoded inside the predicate needs human +# review or deep-mode analysis. Confidence is `medium` for that reason. +query = """ +(decorator + (call + function: [ + (identifier) @decorator_name + (attribute attribute: (identifier) @decorator_name) + ]) +) @match +""" + +[rule.predicates.decorator_name] +eq = "user_passes_test" + +[[rule.tests]] +input = """ +@user_passes_test(lambda u: u.is_admin) +def view(request): + pass +""" +expect_match = true + +[[rule.tests]] +input = """ +@django.contrib.auth.decorators.user_passes_test(is_staff_check) +def admin_view(request): + pass +""" +expect_match = true + +[[rule.tests]] +input = """ +@cache_control(max_age=60) +def view(request): + pass +""" +expect_match = false diff --git a/rules/python/fastapi-depends.toml b/rules/python/fastapi-depends.toml new file mode 100644 index 0000000..7170d98 --- /dev/null +++ b/rules/python/fastapi-depends.toml @@ -0,0 +1,52 @@ +[rule] +id = "py-fastapi-depends" +languages = ["python"] +category = "middleware" +confidence = "medium" +description = "FastAPI Depends(...) used as a parameter default (dependency-injection auth gate)" +# Matches both `token: str = Depends(...)` (typed) and `token = Depends(...)` +# (untyped) parameter defaults. `Depends` is FastAPI's idiomatic way to wire +# auth dependencies (`oauth2_scheme`, `get_current_user`, `require_role`), +# but it's also used for non-auth dependency injection — confidence is +# `medium` and the rego template is intentionally a stub for human review. +query = """ +[ + (typed_default_parameter + value: (call + function: (identifier) @fn_name)) @match + (default_parameter + value: (call + function: (identifier) @fn_name)) @match +] +""" + +[rule.predicates.fn_name] +eq = "Depends" + +[[rule.tests]] +input = """ +def read_items(token: str = Depends(oauth2_scheme)): + pass +""" +expect_match = true + +[[rule.tests]] +input = """ +def read_items(token = Depends(get_current_user)): + pass +""" +expect_match = true + +[[rule.tests]] +input = """ +def read_items(token: str = "default"): + pass +""" +expect_match = false + +[[rule.tests]] +input = """ +def factory(builder = Builder()): + pass +""" +expect_match = false diff --git a/rules/python/feature-gate-check.toml b/rules/python/feature-gate-check.toml new file mode 100644 index 0000000..143a520 --- /dev/null +++ b/rules/python/feature-gate-check.toml @@ -0,0 +1,41 @@ +[rule] +id = "py-feature-gate-check" +languages = ["python"] +category = "feature_gate" +confidence = "medium" +description = "Feature flag or plan-based gating in Python" +# Mirrors the Java/TS feature-gate rules. Only the literal-string form is +# captured; dynamic feature keys (Features.BETA_DASHBOARD or a variable) +# can be added later if false negatives surface. +query = """ +(call + function: (attribute + attribute: (identifier) @method) + arguments: (argument_list + (string (string_content) @feature)) +) @match +""" + +[rule.predicates.method] +match = "^(has_feature|is_feature_enabled|check_feature|has_plan|is_plan_active)$" + +[[rule.tests]] +input = """ +if feature_flags.has_feature("advanced-analytics"): + enable() +""" +expect_match = true + +[[rule.tests]] +input = """ +if subscription.has_plan("pro"): + enable() +""" +expect_match = true + +[[rule.tests]] +input = """ +if validator.is_enabled("field"): + validate() +""" +expect_match = false diff --git a/rules/python/has-perm-call.toml b/rules/python/has-perm-call.toml new file mode 100644 index 0000000..d00c9c2 --- /dev/null +++ b/rules/python/has-perm-call.toml @@ -0,0 +1,49 @@ +[rule] +id = "py-has-perm-call" +languages = ["python"] +category = "rbac" +confidence = "high" +description = "Django-style permission check (request.user.has_perm / has_perms)" +# Matches `.has_perm("app.codename")` and the bulk variant +# `.has_perms(["app.a", "app.b"])`. Only the literal-string form is +# captured today; dynamic permission codes will surface via deep mode. +query = """ +(call + function: (attribute + attribute: (identifier) @method) + arguments: (argument_list + (string (string_content) @perm_name)) +) @match +""" + +[rule.predicates.method] +match = "^(has_perm|has_perms)$" + +[rule.rego_template] +template = """ +default allow := false + +allow if { + "{{perm_name}}" in input.user.permissions +} +""" + +[[rule.tests]] +input = """ +if request.user.has_perm('app.delete_user'): + delete_user() +""" +expect_match = true + +[[rule.tests]] +input = """ +if user.has_perm('blog.add_post'): + create() +""" +expect_match = true + +[[rule.tests]] +input = """ +result.has_value('foo') +""" +expect_match = false diff --git a/rules/python/has-role-call.toml b/rules/python/has-role-call.toml new file mode 100644 index 0000000..0bdcfdb --- /dev/null +++ b/rules/python/has-role-call.toml @@ -0,0 +1,45 @@ +[rule] +id = "py-has-role-call" +languages = ["python"] +category = "rbac" +confidence = "high" +description = "Bare role-checking function call (e.g. has_role(\"admin\"))" +query = """ +(call + function: (identifier) @fn_name + arguments: (argument_list + (string (string_content) @role_value)) +) @match +""" + +[rule.predicates.fn_name] +match = "^(has_role|check_role|is_role|require_role|require_roles)$" + +[rule.rego_template] +template = """ +default allow := false + +allow if { + input.user.role in {"{{role_value}}"} +} +""" + +[[rule.tests]] +input = """ +if has_role("manager"): + approve_request() +""" +expect_match = true + +[[rule.tests]] +input = """ +require_role("admin") +""" +expect_match = true + +[[rule.tests]] +input = """ +if has_value("manager"): + process() +""" +expect_match = false diff --git a/rules/python/login-required-decorator.toml b/rules/python/login-required-decorator.toml new file mode 100644 index 0000000..53834ac --- /dev/null +++ b/rules/python/login-required-decorator.toml @@ -0,0 +1,53 @@ +[rule] +id = "py-login-required-decorator" +languages = ["python"] +category = "middleware" +confidence = "high" +description = "@login_required marker decorator (Django, Flask-Login, or similar)" +# Matches the marker form (no call/parens) of @login_required as either a bare +# identifier or a single-level attribute reference (e.g. @flask_login.login_required). +# The call form (@login_required(redirect_field_name='...')) is intentionally a +# distinct AST shape and would need a separate rule if observed in the wild. +query = """ +(decorator + [ + (identifier) @decorator_name + (attribute attribute: (identifier) @decorator_name) + ] +) @match +""" + +[rule.predicates.decorator_name] +eq = "login_required" + +[[rule.tests]] +input = """ +@login_required +def my_view(request): + pass +""" +expect_match = true + +[[rule.tests]] +input = """ +@flask_login.login_required +def index(): + pass +""" +expect_match = true + +[[rule.tests]] +input = """ +@app.route('/') +def index(): + pass +""" +expect_match = false + +[[rule.tests]] +input = """ +@staticmethod +def helper(): + pass +""" +expect_match = false diff --git a/rules/python/ownership-check.toml b/rules/python/ownership-check.toml new file mode 100644 index 0000000..ece9106 --- /dev/null +++ b/rules/python/ownership-check.toml @@ -0,0 +1,53 @@ +[rule] +id = "py-ownership-check" +languages = ["python"] +category = "ownership" +confidence = "medium" +description = "Resource ownership comparison (e.g. resource.owner_id == user.id)" +# Catches `. == .`. +# Confidence is `medium` — the snake_case naming convention makes false +# positives less likely than in TS/Java, but unrelated identity-shaped +# comparisons can still slip through. +query = """ +(comparison_operator + (attribute attribute: (identifier) @left_prop) + operators: "==" + (attribute attribute: (identifier) @right_prop) +) @match +""" + +[rule.predicates.left_prop] +match = "(?i)^(owner_id|user_id|created_by|author_id|owner|account_id)$" + +[rule.predicates.right_prop] +match = "(?i)^(id|user_id|sub|account_id)$" + +[rule.rego_template] +template = """ +default allow := false + +allow if { + input.resource.owner == input.user.id +} +""" + +[[rule.tests]] +input = """ +if resource.owner_id == user.id: + allow_edit() +""" +expect_match = true + +[[rule.tests]] +input = """ +if post.author_id == request.user.id: + edit() +""" +expect_match = true + +[[rule.tests]] +input = """ +if a.score == b.score: + tie() +""" +expect_match = false diff --git a/rules/python/permission-check-call.toml b/rules/python/permission-check-call.toml new file mode 100644 index 0000000..8cacb6d --- /dev/null +++ b/rules/python/permission-check-call.toml @@ -0,0 +1,69 @@ +[rule] +id = "py-permission-check-call" +languages = ["python"] +category = "abac" +confidence = "high" +description = "Permission or capability check method call (e.g. user.can(\"delete\"))" +# Catches the broader ABAC family: `can`, `cannot`, `is_allowed`, `allows`, +# `denies`, `has_access`, `check_access`, `check_permission`. Django's +# `has_perm`/`has_perms` are intentionally excluded — they have a dedicated +# RBAC-flavored rule (`py-has-perm-call`) so the same call doesn't surface +# as both abac and rbac findings. +query = """ +(call + function: (attribute + attribute: (identifier) @method @method_excl) + arguments: (argument_list + (string (string_content) @permission)) +) @match +""" + +[rule.predicates.method] +match = "^(can|cannot|has_permission|check_permission|is_allowed|allows|denies|has_access|check_access)$" + +[rule.predicates.method_excl] +not_match = "^(has_perm|has_perms)$" + +[rule.rego_template] +template = """ +default allow := false + +allow if { + input.action == "{{permission}}" +} +""" + +[[rule.tests]] +input = """ +if user.can("delete"): + delete_resource() +""" +expect_match = true + +[[rule.tests]] +input = """ +if policy.is_allowed("read"): + read() +""" +expect_match = true + +[[rule.tests]] +input = """ +if account.has_access("billing"): + show_billing() +""" +expect_match = true + +[[rule.tests]] +input = """ +if user.has_perm("blog.add_post"): + create_post() +""" +expect_match = false + +[[rule.tests]] +input = """ +if cache.has_value("foo"): + use_it() +""" +expect_match = false diff --git a/rules/python/role-check-conditional.toml b/rules/python/role-check-conditional.toml new file mode 100644 index 0000000..ef2a28d --- /dev/null +++ b/rules/python/role-check-conditional.toml @@ -0,0 +1,59 @@ +[rule] +id = "py-role-check-conditional" +languages = ["python"] +category = "rbac" +confidence = "high" +description = "Direct role comparison in a conditional (e.g. user.role == \"admin\")" +# Captures `. == \"value\"`. The `not_match` on `role_value` +# filters out chat/LLM-message roles ("assistant", "user", "system", +# "tool", "function") that share the same shape but aren't auth. +query = """ +(comparison_operator + (attribute attribute: (identifier) @prop) + operators: "==" + (string (string_content) @role_value) +) @match +""" + +[rule.predicates.prop] +match = "^(role|roles|user_role|user_type|account_type)$" + +[rule.predicates.role_value] +not_match = "^(assistant|user|system|tool|function)$" + +[rule.rego_template] +template = """ +default allow := false + +allow if { + input.user.role == "{{role_value}}" +} +""" + +[[rule.tests]] +input = """ +if user.role == "admin": + delete_user() +""" +expect_match = true + +[[rule.tests]] +input = """ +if account.account_type == "enterprise": + enable() +""" +expect_match = true + +[[rule.tests]] +input = """ +if user.name == "admin": + greet() +""" +expect_match = false + +[[rule.tests]] +input = """ +if msg.role == "assistant": + process_response() +""" +expect_match = false diff --git a/src/rules/embedded.rs b/src/rules/embedded.rs index 1dceb0b..729c9c5 100644 --- a/src/rules/embedded.rs +++ b/src/rules/embedded.rs @@ -137,6 +137,47 @@ const EMBEDDED_RULES: &[(&str, &str)] = &[ "java-custom-authz-call", include_str!("../../rules/java/custom-authz-call.toml"), ), + // -- Python -- + ( + "py-django-permission-required", + include_str!("../../rules/python/django-permission-required.toml"), + ), + ( + "py-login-required-decorator", + include_str!("../../rules/python/login-required-decorator.toml"), + ), + ( + "py-django-user-passes-test", + include_str!("../../rules/python/django-user-passes-test.toml"), + ), + ( + "py-has-perm-call", + include_str!("../../rules/python/has-perm-call.toml"), + ), + ( + "py-fastapi-depends", + include_str!("../../rules/python/fastapi-depends.toml"), + ), + ( + "py-role-check-conditional", + include_str!("../../rules/python/role-check-conditional.toml"), + ), + ( + "py-has-role-call", + include_str!("../../rules/python/has-role-call.toml"), + ), + ( + "py-permission-check-call", + include_str!("../../rules/python/permission-check-call.toml"), + ), + ( + "py-ownership-check", + include_str!("../../rules/python/ownership-check.toml"), + ), + ( + "py-feature-gate-check", + include_str!("../../rules/python/feature-gate-check.toml"), + ), ]; pub fn load_embedded_rules() -> Result> { diff --git a/src/scanner/discovery.rs b/src/scanner/discovery.rs index f9c5826..156f6e8 100644 --- a/src/scanner/discovery.rs +++ b/src/scanner/discovery.rs @@ -22,13 +22,14 @@ pub fn detect_language(path: &Path) -> Option<(Language, bool)> { "js" | "mjs" | "cjs" => Some((Language::JavaScript, false)), "jsx" => Some((Language::JavaScript, true)), "java" => Some((Language::Java, false)), + "py" | "pyi" => Some((Language::Python, false)), _ => None, } } /// Extension → language map covering **all** languages in the [`Language`] -/// enum, including those without structural parser support yet (Python, Go, -/// C#, Kotlin, Ruby, PHP). Used by the deep (semantic) scan, which can run +/// enum, including those without structural parser support yet (Go, C#, +/// Kotlin, Ruby, PHP). Used by the deep (semantic) scan, which can run /// regex-based cold-region detection on any language regardless of grammar /// availability. pub fn detect_language_for_deep(path: &Path) -> Option<(Language, bool)> { @@ -162,10 +163,22 @@ mod tests { ); } + #[test] + fn detect_python_extensions() { + assert_eq!( + detect_language(Path::new("foo.py")), + Some((Language::Python, false)) + ); + assert_eq!( + detect_language(Path::new("foo.pyi")), + Some((Language::Python, false)) + ); + } + #[test] fn detect_unknown_extension() { assert_eq!(detect_language(Path::new("foo.rs")), None); - assert_eq!(detect_language(Path::new("foo.py")), None); + assert_eq!(detect_language(Path::new("foo.go")), None); } #[test] @@ -232,12 +245,14 @@ mod tests { } #[test] - fn structural_detect_language_does_not_pick_up_python() { - // Sanity: the structural detector must NOT include Python — otherwise - // the structural pass would try to parse files for which it has no - // grammar. The deep detector picks them up; the structural one doesn't. - assert_eq!(detect_language(Path::new("foo.py")), None); + fn structural_detect_language_does_not_pick_up_unsupported_languages() { + // Sanity: the structural detector must NOT include languages without + // a wired-up tree-sitter grammar — otherwise the structural pass + // would try to parse files it can't handle. The deep detector picks + // them up; the structural one doesn't. assert_eq!(detect_language(Path::new("foo.go")), None); + assert_eq!(detect_language(Path::new("Foo.cs")), None); + assert_eq!(detect_language(Path::new("foo.rb")), None); } #[test] @@ -248,7 +263,7 @@ mod tests { fs::write(dir.path().join("c.go"), "package main\n").unwrap(); let structural = discover_files(dir.path(), &[], &[]); - assert_eq!(structural.len(), 1, "structural sees only TS"); + assert_eq!(structural.len(), 2, "structural sees TS + Python"); let deep = discover_files_for_deep(dir.path(), &[], &[]); assert_eq!(deep.len(), 3, "deep sees TS + Python + Go"); diff --git a/src/scanner/matcher.rs b/src/scanner/matcher.rs index 1d5ec26..ad872bb 100644 --- a/src/scanner/matcher.rs +++ b/src/scanner/matcher.rs @@ -823,6 +823,178 @@ public class MyService implements Serializable { ); } + // -- Python rule tests -- + + fn parse_and_match_python(source: &str, rule_toml: &str) -> Vec { + let rule = rules::parse_rule_for_test(rule_toml); + let mut ts_parser = tree_sitter::Parser::new(); + let lang = Language::Python; + let ts_lang = parser::get_language(lang, false).unwrap(); + let tree = parser::parse_source(&mut ts_parser, source.as_bytes(), lang, false).unwrap(); + let compiled = compile_rule(&rule, &ts_lang).unwrap(); + execute_query( + &compiled, + &tree, + source.as_bytes(), + Path::new("test.py"), + lang, + ) + .unwrap() + } + + #[test] + fn py_django_permission_required_matches() { + let findings = parse_and_match_python( + "@permission_required('app.delete_user')\ndef delete_user(request, id):\n pass\n", + include_str!("../../rules/python/django-permission-required.toml"), + ); + assert!(!findings.is_empty(), "should match @permission_required"); + assert_eq!(findings[0].category, crate::types::AuthCategory::Middleware); + } + + #[test] + fn py_django_permission_required_qualified_matches() { + let findings = parse_and_match_python( + "@django.contrib.auth.decorators.permission_required('app.delete_user')\ndef delete_user(request, id):\n pass\n", + include_str!("../../rules/python/django-permission-required.toml"), + ); + assert!( + !findings.is_empty(), + "should match module-qualified @permission_required" + ); + } + + #[test] + fn py_login_required_decorator_matches() { + let findings = parse_and_match_python( + "@login_required\ndef my_view(request):\n pass\n", + include_str!("../../rules/python/login-required-decorator.toml"), + ); + assert!(!findings.is_empty(), "should match bare @login_required"); + } + + #[test] + fn py_login_required_decorator_no_false_positive_on_unrelated_decorator() { + let findings = parse_and_match_python( + "@staticmethod\ndef helper():\n pass\n", + include_str!("../../rules/python/login-required-decorator.toml"), + ); + assert!(findings.is_empty(), "should not match @staticmethod"); + } + + #[test] + fn py_has_perm_call_matches() { + let findings = parse_and_match_python( + "if request.user.has_perm('app.delete_user'):\n delete_user()\n", + include_str!("../../rules/python/has-perm-call.toml"), + ); + assert!(!findings.is_empty(), "should match request.user.has_perm()"); + assert_eq!(findings[0].category, crate::types::AuthCategory::Rbac); + } + + #[test] + fn py_fastapi_depends_typed_default_matches() { + let findings = parse_and_match_python( + "def read_items(token: str = Depends(oauth2_scheme)):\n pass\n", + include_str!("../../rules/python/fastapi-depends.toml"), + ); + assert!(!findings.is_empty(), "should match Depends() typed default"); + } + + #[test] + fn py_fastapi_depends_untyped_default_matches() { + let findings = parse_and_match_python( + "def read_items(token = Depends(get_current_user)):\n pass\n", + include_str!("../../rules/python/fastapi-depends.toml"), + ); + assert!( + !findings.is_empty(), + "should match Depends() untyped default" + ); + } + + #[test] + fn py_role_check_conditional_matches() { + let findings = parse_and_match_python( + "if user.role == \"admin\":\n delete_user()\n", + include_str!("../../rules/python/role-check-conditional.toml"), + ); + assert!(!findings.is_empty(), "should match user.role == \"admin\""); + assert_eq!(findings[0].category, crate::types::AuthCategory::Rbac); + } + + #[test] + fn py_role_check_conditional_excludes_chat_message_role() { + let findings = parse_and_match_python( + "if msg.role == \"assistant\":\n process_response()\n", + include_str!("../../rules/python/role-check-conditional.toml"), + ); + assert!( + findings.is_empty(), + "should not match LLM chat message role" + ); + } + + #[test] + fn py_has_role_call_matches() { + let findings = parse_and_match_python( + "if has_role(\"manager\"):\n approve_request()\n", + include_str!("../../rules/python/has-role-call.toml"), + ); + assert!(!findings.is_empty(), "should match has_role()"); + } + + #[test] + fn py_permission_check_call_matches() { + let findings = parse_and_match_python( + "if user.can(\"delete\"):\n delete_resource()\n", + include_str!("../../rules/python/permission-check-call.toml"), + ); + assert!(!findings.is_empty(), "should match user.can()"); + assert_eq!(findings[0].category, crate::types::AuthCategory::Abac); + } + + #[test] + fn py_permission_check_call_excludes_django_has_perm() { + // `has_perm` belongs to py-has-perm-call (rbac); it must not also + // surface here as abac, otherwise the same call produces two + // findings with conflicting categories. + let findings = parse_and_match_python( + "if user.has_perm(\"blog.add_post\"):\n create_post()\n", + include_str!("../../rules/python/permission-check-call.toml"), + ); + assert!( + findings.is_empty(), + "permission-check-call must not duplicate has_perm (covered by py-has-perm-call)" + ); + } + + #[test] + fn py_ownership_check_matches() { + let findings = parse_and_match_python( + "if resource.owner_id == user.id:\n allow_edit()\n", + include_str!("../../rules/python/ownership-check.toml"), + ); + assert!( + !findings.is_empty(), + "should match owner_id == user.id ownership check" + ); + assert_eq!(findings[0].category, crate::types::AuthCategory::Ownership); + } + + #[test] + fn py_feature_gate_matches() { + let findings = parse_and_match_python( + "if feature_flags.has_feature(\"advanced\"):\n enable()\n", + include_str!("../../rules/python/feature-gate-check.toml"), + ); + assert!(!findings.is_empty(), "should match has_feature()"); + assert_eq!( + findings[0].category, + crate::types::AuthCategory::FeatureGate + ); + } + // -- cross_predicates tests (synthetic rules) -- /// A synthetic rule shaped like ownership-check: two getters in an diff --git a/src/scanner/parser.rs b/src/scanner/parser.rs index 3a5fc69..7139776 100644 --- a/src/scanner/parser.rs +++ b/src/scanner/parser.rs @@ -14,6 +14,7 @@ pub fn get_language(lang: Language, is_tsx_jsx: bool) -> Result Ok(tree_sitter_typescript::LANGUAGE_TSX.into()), (Language::JavaScript, _) => Ok(tree_sitter_javascript::LANGUAGE.into()), (Language::Java, _) => Ok(tree_sitter_java::LANGUAGE.into()), + (Language::Python, _) => Ok(tree_sitter_python::LANGUAGE.into()), _ => Err(ZiftError::General(format!( "language {lang:?} not yet supported" ))), @@ -77,9 +78,25 @@ mod tests { assert!(is_language_supported(Language::Java)); } + #[test] + fn parse_python() { + let mut parser = tree_sitter::Parser::new(); + let source = b"def is_admin(user):\n return user.role == 'admin'\n"; + let tree = parse_source(&mut parser, source, Language::Python, false).unwrap(); + assert!(!tree.root_node().has_error()); + } + + #[test] + fn python_is_supported() { + assert!(is_language_supported(Language::Python)); + } + #[test] fn unsupported_language_returns_error() { - assert!(get_language(Language::Python, false).is_err()); - assert!(!is_language_supported(Language::Python)); + // Go has no structural grammar wired up yet — kept as the canary + // that `unsupported_language_returns_error` keeps testing what its + // name says it does. + assert!(get_language(Language::Go, false).is_err()); + assert!(!is_language_supported(Language::Go)); } } From fef2e275a21e8f022171ca43018e383f1b24a9a9 Mon Sep 17 00:00:00 2001 From: Brad Anderson Date: Sun, 3 May 2026 14:54:20 -0400 Subject: [PATCH 2/3] refactor(rules/python): tighten and expand rule coverage from review - login-required: also match the call form (@login_required(redirect_field_name='login_url')) - feature-gate: add property-comparison branch (user.plan == "X") alongside the existing method-call branch (flags.has_feature("X")) - role-check-conditional: also match the `is` operator alongside `==`; `is not` and `!=` stay excluded (they invert the allow-style template) - permission-check-call: drop deny-style methods (`cannot`, `denies`) whose semantics invert the allow-style rego stub; remove redundant @method_excl capture (the anchored regex already excludes has_perm/s) - fastapi-depends: clarify why no rego template is emitted - deep/candidate.rs: drop Python from the "no structural support" list in the module docstring Adds 4 unit tests and 6 inline TOML rule tests for the new shapes. --- rules/python/fastapi-depends.toml | 3 +- rules/python/feature-gate-check.toml | 54 +++++++++++++++++----- rules/python/login-required-decorator.toml | 33 +++++++++++-- rules/python/permission-check-call.toml | 21 +++++---- rules/python/role-check-conditional.toml | 20 ++++++-- src/deep/candidate.rs | 4 +- src/scanner/matcher.rs | 54 ++++++++++++++++++++++ 7 files changed, 156 insertions(+), 33 deletions(-) diff --git a/rules/python/fastapi-depends.toml b/rules/python/fastapi-depends.toml index 7170d98..f8b14b8 100644 --- a/rules/python/fastapi-depends.toml +++ b/rules/python/fastapi-depends.toml @@ -8,7 +8,8 @@ description = "FastAPI Depends(...) used as a parameter default (dependency-inje # (untyped) parameter defaults. `Depends` is FastAPI's idiomatic way to wire # auth dependencies (`oauth2_scheme`, `get_current_user`, `require_role`), # but it's also used for non-auth dependency injection — confidence is -# `medium` and the rego template is intentionally a stub for human review. +# `medium` and we intentionally emit no rego template (the wrapped callable +# is what encodes the policy, and that needs human or deep-mode review). query = """ [ (typed_default_parameter diff --git a/rules/python/feature-gate-check.toml b/rules/python/feature-gate-check.toml index 143a520..61acd94 100644 --- a/rules/python/feature-gate-check.toml +++ b/rules/python/feature-gate-check.toml @@ -4,20 +4,31 @@ languages = ["python"] category = "feature_gate" confidence = "medium" description = "Feature flag or plan-based gating in Python" -# Mirrors the Java/TS feature-gate rules. Only the literal-string form is -# captured; dynamic feature keys (Features.BETA_DASHBOARD or a variable) -# can be added later if false negatives surface. +# Covers two shapes: +# 1. method call: `flags.has_feature("X")` (mirrors the Java rule) +# 2. property comparison: `user.plan == "enterprise"` (mirrors the TS rule) +# Both branches share the captures `@gate_key` (method or property name) +# and `@gate_value` (feature/plan literal) so a single anchored regex +# in the predicate keeps the call and comparison shapes selective. +# Only the literal-string form is captured; dynamic feature keys +# (Features.BETA_DASHBOARD or a variable) can be added later if false +# negatives surface. query = """ -(call - function: (attribute - attribute: (identifier) @method) - arguments: (argument_list - (string (string_content) @feature)) -) @match +[ + (call + function: (attribute + attribute: (identifier) @gate_key) + arguments: (argument_list + (string (string_content) @gate_value))) @match + (comparison_operator + (attribute attribute: (identifier) @gate_key) + operators: ["==" "is"] + (string (string_content) @gate_value)) @match +] """ -[rule.predicates.method] -match = "^(has_feature|is_feature_enabled|check_feature|has_plan|is_plan_active)$" +[rule.predicates.gate_key] +match = "^(has_feature|is_feature_enabled|check_feature|has_plan|is_plan_active|plan|tier|subscription|license|edition|feature_flag|feature)$" [[rule.tests]] input = """ @@ -33,9 +44,30 @@ if subscription.has_plan("pro"): """ expect_match = true +[[rule.tests]] +input = """ +if user.plan == "enterprise": + enable_advanced_feature() +""" +expect_match = true + +[[rule.tests]] +input = """ +if account.tier == "pro": + upgrade() +""" +expect_match = true + [[rule.tests]] input = """ if validator.is_enabled("field"): validate() """ expect_match = false + +[[rule.tests]] +input = """ +if user.role == "admin": + delete() +""" +expect_match = false diff --git a/rules/python/login-required-decorator.toml b/rules/python/login-required-decorator.toml index 53834ac..526a0f5 100644 --- a/rules/python/login-required-decorator.toml +++ b/rules/python/login-required-decorator.toml @@ -3,16 +3,23 @@ id = "py-login-required-decorator" languages = ["python"] category = "middleware" confidence = "high" -description = "@login_required marker decorator (Django, Flask-Login, or similar)" -# Matches the marker form (no call/parens) of @login_required as either a bare -# identifier or a single-level attribute reference (e.g. @flask_login.login_required). -# The call form (@login_required(redirect_field_name='...')) is intentionally a -# distinct AST shape and would need a separate rule if observed in the wild. +description = "@login_required decorator (Django, Flask-Login, or similar)" +# Matches both forms of @login_required: +# - marker form: @login_required +# - call form: @login_required(redirect_field_name='login_url') +# Each form is allowed as either a bare identifier or a single-level +# attribute reference (e.g. @flask_login.login_required). The decorator's +# rightmost identifier is captured as `decorator_name`. query = """ (decorator [ (identifier) @decorator_name (attribute attribute: (identifier) @decorator_name) + (call + function: [ + (identifier) @decorator_name + (attribute attribute: (identifier) @decorator_name) + ]) ] ) @match """ @@ -36,6 +43,22 @@ def index(): """ expect_match = true +[[rule.tests]] +input = """ +@login_required(redirect_field_name='login_url') +def my_view(request): + pass +""" +expect_match = true + +[[rule.tests]] +input = """ +@django.contrib.auth.decorators.login_required(login_url='/accounts/login/') +def my_view(request): + pass +""" +expect_match = true + [[rule.tests]] input = """ @app.route('/') diff --git a/rules/python/permission-check-call.toml b/rules/python/permission-check-call.toml index 8cacb6d..49541c9 100644 --- a/rules/python/permission-check-call.toml +++ b/rules/python/permission-check-call.toml @@ -4,25 +4,26 @@ languages = ["python"] category = "abac" confidence = "high" description = "Permission or capability check method call (e.g. user.can(\"delete\"))" -# Catches the broader ABAC family: `can`, `cannot`, `is_allowed`, `allows`, -# `denies`, `has_access`, `check_access`, `check_permission`. Django's -# `has_perm`/`has_perms` are intentionally excluded — they have a dedicated -# RBAC-flavored rule (`py-has-perm-call`) so the same call doesn't surface -# as both abac and rbac findings. +# Catches the allow-style ABAC family: `can`, `is_allowed`, `allows`, +# `has_access`, `check_access`, `has_permission`, `check_permission`. +# Deny-style methods (`cannot`, `denies`) are intentionally excluded — +# the rego template below is allow-style, and emitting it for a deny +# call would invert the semantics. Deep mode picks up those calls. +# Django's `has_perm`/`has_perms` are also excluded — they have a +# dedicated RBAC-flavored rule (`py-has-perm-call`) so the same call +# doesn't surface as both abac and rbac findings. The anchored +# alternation below already excludes both groups. query = """ (call function: (attribute - attribute: (identifier) @method @method_excl) + attribute: (identifier) @method) arguments: (argument_list (string (string_content) @permission)) ) @match """ [rule.predicates.method] -match = "^(can|cannot|has_permission|check_permission|is_allowed|allows|denies|has_access|check_access)$" - -[rule.predicates.method_excl] -not_match = "^(has_perm|has_perms)$" +match = "^(can|has_permission|check_permission|is_allowed|allows|has_access|check_access)$" [rule.rego_template] template = """ diff --git a/rules/python/role-check-conditional.toml b/rules/python/role-check-conditional.toml index ef2a28d..4bda20c 100644 --- a/rules/python/role-check-conditional.toml +++ b/rules/python/role-check-conditional.toml @@ -4,13 +4,18 @@ languages = ["python"] category = "rbac" confidence = "high" description = "Direct role comparison in a conditional (e.g. user.role == \"admin\")" -# Captures `. == \"value\"`. The `not_match` on `role_value` -# filters out chat/LLM-message roles ("assistant", "user", "system", -# "tool", "function") that share the same shape but aren't auth. +# Captures `. {==, is} \"value\"`. Both `==` (value equality) +# and `is` (identity equality, an anti-pattern for strings but seen in +# real code via interned literals) are matched. `is not` and `!=` are +# intentionally excluded: their semantics are inverted relative to the +# allow-style rego template emitted below. +# The `not_match` on `role_value` filters out chat/LLM-message roles +# ("assistant", "user", "system", "tool", "function") that share the +# same shape but aren't auth. query = """ (comparison_operator (attribute attribute: (identifier) @prop) - operators: "==" + operators: ["==" "is"] (string (string_content) @role_value) ) @match """ @@ -44,6 +49,13 @@ if account.account_type == "enterprise": """ expect_match = true +[[rule.tests]] +input = """ +if user.role is "admin": + delete_user() +""" +expect_match = true + [[rule.tests]] input = """ if user.name == "admin": diff --git a/src/deep/candidate.rs b/src/deep/candidate.rs index 67388ed..b37f9f8 100644 --- a/src/deep/candidate.rs +++ b/src/deep/candidate.rs @@ -9,8 +9,8 @@ //! 2. **Cold regions** — file regions discovered by regex over auth-y //! function names. Capped at 30% of `max_candidates` so escalations get //! priority. Runs on **all** languages in the [`Language`] enum, including -//! those without structural parser support (Python, Go, etc.) — see -//! plans/todo/01-pr1-deep-http-transport.md §6 for rationale. +//! those without structural parser support (Go, C#, Kotlin, Ruby, PHP) — +//! see plans/todo/01-pr1-deep-http-transport.md §6 for rationale. //! //! Candidates are sorted deterministically by `(file, line_start)`. diff --git a/src/scanner/matcher.rs b/src/scanner/matcher.rs index ad872bb..0a0c94a 100644 --- a/src/scanner/matcher.rs +++ b/src/scanner/matcher.rs @@ -882,6 +882,18 @@ public class MyService implements Serializable { assert!(findings.is_empty(), "should not match @staticmethod"); } + #[test] + fn py_login_required_decorator_call_form_matches() { + let findings = parse_and_match_python( + "@login_required(redirect_field_name='login_url')\ndef my_view(request):\n pass\n", + include_str!("../../rules/python/login-required-decorator.toml"), + ); + assert!( + !findings.is_empty(), + "should match @login_required(...) call form" + ); + } + #[test] fn py_has_perm_call_matches() { let findings = parse_and_match_python( @@ -935,6 +947,18 @@ public class MyService implements Serializable { ); } + #[test] + fn py_role_check_conditional_is_operator_matches() { + let findings = parse_and_match_python( + "if user.role is \"admin\":\n delete_user()\n", + include_str!("../../rules/python/role-check-conditional.toml"), + ); + assert!( + !findings.is_empty(), + "should match `is` operator (string identity equality)" + ); + } + #[test] fn py_has_role_call_matches() { let findings = parse_and_match_python( @@ -995,6 +1019,36 @@ public class MyService implements Serializable { ); } + #[test] + fn py_feature_gate_property_comparison_matches() { + let findings = parse_and_match_python( + "if user.plan == \"enterprise\":\n enable_advanced()\n", + include_str!("../../rules/python/feature-gate-check.toml"), + ); + assert!( + !findings.is_empty(), + "should match property comparison shape (user.plan == ...)" + ); + assert_eq!( + findings[0].category, + crate::types::AuthCategory::FeatureGate + ); + } + + #[test] + fn py_feature_gate_property_comparison_excludes_role() { + // `role` is not a feature-gate key; this should be picked up by + // py-role-check-conditional, not py-feature-gate-check. + let findings = parse_and_match_python( + "if user.role == \"admin\":\n delete()\n", + include_str!("../../rules/python/feature-gate-check.toml"), + ); + assert!( + findings.is_empty(), + "feature-gate must not match role-style property comparisons" + ); + } + // -- cross_predicates tests (synthetic rules) -- /// A synthetic rule shaped like ownership-check: two getters in an From 53ebe99959c69da8ee3058218bc90f27f9ba04b7 Mon Sep 17 00:00:00 2001 From: Brad Anderson Date: Sun, 3 May 2026 15:57:17 -0400 Subject: [PATCH 3/3] fix: address PR review feedback on Python structural support - has-perm-call.toml: extend tree-sitter query to also capture list-form `has_perms(["app.a", "app.b"])` calls; previously only single-string args were captured even though the predicate matched `has_perms`. Add inline test for the bulk variant and update the comment block. - discovery.rs: harden `discover_for_deep_picks_up_extra_languages` to assert the discovered language sets (HashSet) instead of just `len()`, so wrong-language-mix regressions can't slip through. --- rules/python/has-perm-call.toml | 19 +++++++++++++++---- src/scanner/discovery.rs | 16 ++++++++++++++-- 2 files changed, 29 insertions(+), 6 deletions(-) diff --git a/rules/python/has-perm-call.toml b/rules/python/has-perm-call.toml index d00c9c2..0e47a69 100644 --- a/rules/python/has-perm-call.toml +++ b/rules/python/has-perm-call.toml @@ -4,15 +4,19 @@ languages = ["python"] category = "rbac" confidence = "high" description = "Django-style permission check (request.user.has_perm / has_perms)" -# Matches `.has_perm("app.codename")` and the bulk variant -# `.has_perms(["app.a", "app.b"])`. Only the literal-string form is -# captured today; dynamic permission codes will surface via deep mode. +# Matches both `.has_perm("app.codename")` and the bulk variant +# `.has_perms(["app.a", "app.b"])` when permission codes are literal +# strings. Dynamic (non-literal) permission codes will surface via deep mode. query = """ (call function: (attribute attribute: (identifier) @method) arguments: (argument_list - (string (string_content) @perm_name)) + [ + (string (string_content) @perm_name) + (list + (string (string_content) @perm_name)) + ]) ) @match """ @@ -42,6 +46,13 @@ if user.has_perm('blog.add_post'): """ expect_match = true +[[rule.tests]] +input = """ +if request.user.has_perms(['blog.add_post', 'blog.change_post']): + bulk_edit() +""" +expect_match = true + [[rule.tests]] input = """ result.has_value('foo') diff --git a/src/scanner/discovery.rs b/src/scanner/discovery.rs index 156f6e8..f4d4252 100644 --- a/src/scanner/discovery.rs +++ b/src/scanner/discovery.rs @@ -257,15 +257,27 @@ mod tests { #[test] fn discover_for_deep_picks_up_extra_languages() { + use std::collections::HashSet; + let dir = tempfile::tempdir().unwrap(); fs::write(dir.path().join("a.ts"), "let x = 1;").unwrap(); fs::write(dir.path().join("b.py"), "x = 1\n").unwrap(); fs::write(dir.path().join("c.go"), "package main\n").unwrap(); let structural = discover_files(dir.path(), &[], &[]); - assert_eq!(structural.len(), 2, "structural sees TS + Python"); + let structural_langs: HashSet<_> = structural.iter().map(|f| f.language).collect(); + assert_eq!( + structural_langs, + HashSet::from([Language::TypeScript, Language::Python]), + "structural should include only TS + Python", + ); let deep = discover_files_for_deep(dir.path(), &[], &[]); - assert_eq!(deep.len(), 3, "deep sees TS + Python + Go"); + let deep_langs: HashSet<_> = deep.iter().map(|f| f.language).collect(); + assert_eq!( + deep_langs, + HashSet::from([Language::TypeScript, Language::Python, Language::Go]), + "deep should include TS + Python + Go", + ); } }