diff --git a/Cargo.lock b/Cargo.lock index 0674dfe..a71db19 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1751,6 +1751,16 @@ version = "0.1.7" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "009994f150cc0cd50ff54917d5bc8bffe8cad10ca10d81c34da2ec421ae61782" +[[package]] +name = "tree-sitter-python" +version = "0.23.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3d065aaa27f3aaceaf60c1f0e0ac09e1cb9eb8ed28e7bcdaa52129cffc7f4b04" +dependencies = [ + "cc", + "tree-sitter-language", +] + [[package]] name = "tree-sitter-typescript" version = "0.23.2" @@ -2394,6 +2404,7 @@ dependencies = [ "tree-sitter", "tree-sitter-java", "tree-sitter-javascript", + "tree-sitter-python", "tree-sitter-typescript", "url", ] diff --git a/Cargo.toml b/Cargo.toml index dd1ece2..59fd46e 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -18,6 +18,7 @@ tree-sitter = "0.24" tree-sitter-typescript = "0.23" tree-sitter-java = "0.23" tree-sitter-javascript = "0.23" +tree-sitter-python = "0.23" ignore = "0.4" sha2 = "0.10" regex = "1" diff --git a/README.md b/README.md index 41e5d25..3fb38f6 100644 --- a/README.md +++ b/README.md @@ -2,7 +2,7 @@ Sift through your codebase for embedded authorization logic. Extract it into Policy as Code (PaC) — [Rego](https://www.openpolicyagent.org/docs/latest/policy-language/) for [OPA](https://www.openpolicyagent.org/) today, with other engines (e.g. Cedar) on the roadmap. -> **Status:** v0.1 — structural scanning ready for TypeScript, JavaScript, and Java. `--deep` (LLM-assisted) mode functional via any OpenAI-compatible endpoint or MCP-capable agent host. +> **Status:** v0.1 — structural scanning ready for TypeScript, JavaScript, Java, and Python. `--deep` (LLM-assisted) mode functional via any OpenAI-compatible endpoint or MCP-capable agent host. ## What is zift? @@ -23,7 +23,34 @@ zift report . # detailed findings report 1. **Structural scan** (tree-sitter) — fast, deterministic, zero-cost. Finds known authorization patterns: role checks, permission guards, auth middleware, security annotations. -2. **Semantic scan** (`--deep`, opt-in) — sends candidate code regions to an LLM that classifies authorization logic the structural pass missed or misjudged. Useful for business rules that implicitly encode access control, and for languages where structural support hasn't shipped yet (Python, Go, etc.). +2. **Semantic scan** (`--deep`, opt-in) — sends candidate code regions to an LLM that classifies authorization logic the structural pass missed or misjudged. Useful for business rules that implicitly encode access control, and for languages where structural support hasn't shipped yet (Go, etc.). + +## Supported languages + +| Language | Structural | Deep (cold-region) | Framework hints (deep) | +|----------|-----------|---------------------|------------------------| +| TypeScript / JavaScript | yes (v0.1) | yes (v0.1) | Express, NestJS, Next.js | +| Java | yes (v0.1) | yes (v0.1) | Spring Security, Jakarta Security | +| Python | yes (v0.1) | yes (v0.1) | Django, Flask, FastAPI | +| Go | planned (v0.2) | yes (v0.1) | Gin, Echo | +| C# | planned (v0.3) | yes (v0.1) | ASP.NET Core | +| Kotlin | planned (v0.3) | yes (v0.1) | Spring (Kotlin) | +| Ruby | planned (v0.3) | yes (v0.1) | Rails | +| PHP | planned (v0.3) | yes (v0.1) | Laravel | + +Deep mode walks the full source tree by extension and detects auth-y function names with regex — so it produces useful results in any language well before structural support lands. + +## Installation + +### Cargo + +```bash +cargo install --git https://github.com/EnforceAuth/zift +``` + +### Binary download + +Prebuilt binaries for Linux (x86_64), macOS (x86_64 and arm64), and Windows (x86_64) are available from [Releases](https://github.com/EnforceAuth/zift/releases). ## Deep mode (`--deep`) @@ -189,33 +216,6 @@ echo '{"jsonrpc":"2.0","id":1,"method":"initialize","params":{"protocolVersion": You should see a single line back with `serverInfo.name == "zift"` and capability flags for tools/resources. Then call `tools/list` to see the seven tool descriptors. -## Supported languages - -| Language | Structural | Deep (cold-region) | Framework hints (deep) | -|----------|-----------|---------------------|------------------------| -| TypeScript / JavaScript | yes (v0.1) | yes (v0.1) | Express, NestJS, Next.js | -| Java | yes (v0.1) | yes (v0.1) | Spring Security, Jakarta Security | -| Python | planned (v0.2) | yes (v0.1) | Django, Flask, FastAPI | -| Go | planned (v0.2) | yes (v0.1) | Gin, Echo | -| C# | planned (v0.3) | yes (v0.1) | ASP.NET Core | -| Kotlin | planned (v0.3) | yes (v0.1) | Spring (Kotlin) | -| Ruby | planned (v0.3) | yes (v0.1) | Rails | -| PHP | planned (v0.3) | yes (v0.1) | Laravel | - -Deep mode walks the full source tree by extension and detects auth-y function names with regex — so it produces useful results in any language well before structural support lands. - -## Installation - -### Cargo - -```bash -cargo install --git https://github.com/EnforceAuth/zift -``` - -### Binary download - -Prebuilt binaries for Linux (x86_64), macOS (x86_64 and arm64), and Windows (x86_64) are available from [Releases](https://github.com/EnforceAuth/zift/releases). - ## License Apache-2.0 diff --git a/rules/python/django-permission-required.toml b/rules/python/django-permission-required.toml new file mode 100644 index 0000000..582f686 --- /dev/null +++ b/rules/python/django-permission-required.toml @@ -0,0 +1,58 @@ +[rule] +id = "py-django-permission-required" +languages = ["python"] +category = "middleware" +confidence = "high" +description = "Django @permission_required decorator (with permission codename argument)" +# Matches both bare and module-qualified forms of the decorator: +# @permission_required('app.delete_user') +# @django.contrib.auth.decorators.permission_required('app.delete_user') +# The decorator's call function is captured at the rightmost identifier, so +# arbitrarily deep import paths still bind to `decorator_name`. +query = """ +(decorator + (call + function: [ + (identifier) @decorator_name + (attribute attribute: (identifier) @decorator_name) + ] + arguments: (argument_list + (string (string_content) @perm_name))) +) @match +""" + +[rule.predicates.decorator_name] +eq = "permission_required" + +[rule.rego_template] +template = """ +default allow := false + +allow if { + "{{perm_name}}" in input.user.permissions +} +""" + +[[rule.tests]] +input = """ +@permission_required('app.delete_user') +def delete_user(request, id): + pass +""" +expect_match = true + +[[rule.tests]] +input = """ +@django.contrib.auth.decorators.permission_required('app.delete_user') +def delete_user(request, id): + pass +""" +expect_match = true + +[[rule.tests]] +input = """ +@cache_page(60) +def index(request): + pass +""" +expect_match = false diff --git a/rules/python/django-user-passes-test.toml b/rules/python/django-user-passes-test.toml new file mode 100644 index 0000000..1b9b79e --- /dev/null +++ b/rules/python/django-user-passes-test.toml @@ -0,0 +1,45 @@ +[rule] +id = "py-django-user-passes-test" +languages = ["python"] +category = "middleware" +confidence = "medium" +description = "Django @user_passes_test decorator (custom predicate gate)" +# The decorator wraps a predicate function/lambda — we can detect the +# decorator but the actual rule encoded inside the predicate needs human +# review or deep-mode analysis. Confidence is `medium` for that reason. +query = """ +(decorator + (call + function: [ + (identifier) @decorator_name + (attribute attribute: (identifier) @decorator_name) + ]) +) @match +""" + +[rule.predicates.decorator_name] +eq = "user_passes_test" + +[[rule.tests]] +input = """ +@user_passes_test(lambda u: u.is_admin) +def view(request): + pass +""" +expect_match = true + +[[rule.tests]] +input = """ +@django.contrib.auth.decorators.user_passes_test(is_staff_check) +def admin_view(request): + pass +""" +expect_match = true + +[[rule.tests]] +input = """ +@cache_control(max_age=60) +def view(request): + pass +""" +expect_match = false diff --git a/rules/python/fastapi-depends.toml b/rules/python/fastapi-depends.toml new file mode 100644 index 0000000..f8b14b8 --- /dev/null +++ b/rules/python/fastapi-depends.toml @@ -0,0 +1,53 @@ +[rule] +id = "py-fastapi-depends" +languages = ["python"] +category = "middleware" +confidence = "medium" +description = "FastAPI Depends(...) used as a parameter default (dependency-injection auth gate)" +# Matches both `token: str = Depends(...)` (typed) and `token = Depends(...)` +# (untyped) parameter defaults. `Depends` is FastAPI's idiomatic way to wire +# auth dependencies (`oauth2_scheme`, `get_current_user`, `require_role`), +# but it's also used for non-auth dependency injection — confidence is +# `medium` and we intentionally emit no rego template (the wrapped callable +# is what encodes the policy, and that needs human or deep-mode review). +query = """ +[ + (typed_default_parameter + value: (call + function: (identifier) @fn_name)) @match + (default_parameter + value: (call + function: (identifier) @fn_name)) @match +] +""" + +[rule.predicates.fn_name] +eq = "Depends" + +[[rule.tests]] +input = """ +def read_items(token: str = Depends(oauth2_scheme)): + pass +""" +expect_match = true + +[[rule.tests]] +input = """ +def read_items(token = Depends(get_current_user)): + pass +""" +expect_match = true + +[[rule.tests]] +input = """ +def read_items(token: str = "default"): + pass +""" +expect_match = false + +[[rule.tests]] +input = """ +def factory(builder = Builder()): + pass +""" +expect_match = false diff --git a/rules/python/feature-gate-check.toml b/rules/python/feature-gate-check.toml new file mode 100644 index 0000000..61acd94 --- /dev/null +++ b/rules/python/feature-gate-check.toml @@ -0,0 +1,73 @@ +[rule] +id = "py-feature-gate-check" +languages = ["python"] +category = "feature_gate" +confidence = "medium" +description = "Feature flag or plan-based gating in Python" +# Covers two shapes: +# 1. method call: `flags.has_feature("X")` (mirrors the Java rule) +# 2. property comparison: `user.plan == "enterprise"` (mirrors the TS rule) +# Both branches share the captures `@gate_key` (method or property name) +# and `@gate_value` (feature/plan literal) so a single anchored regex +# in the predicate keeps the call and comparison shapes selective. +# Only the literal-string form is captured; dynamic feature keys +# (Features.BETA_DASHBOARD or a variable) can be added later if false +# negatives surface. +query = """ +[ + (call + function: (attribute + attribute: (identifier) @gate_key) + arguments: (argument_list + (string (string_content) @gate_value))) @match + (comparison_operator + (attribute attribute: (identifier) @gate_key) + operators: ["==" "is"] + (string (string_content) @gate_value)) @match +] +""" + +[rule.predicates.gate_key] +match = "^(has_feature|is_feature_enabled|check_feature|has_plan|is_plan_active|plan|tier|subscription|license|edition|feature_flag|feature)$" + +[[rule.tests]] +input = """ +if feature_flags.has_feature("advanced-analytics"): + enable() +""" +expect_match = true + +[[rule.tests]] +input = """ +if subscription.has_plan("pro"): + enable() +""" +expect_match = true + +[[rule.tests]] +input = """ +if user.plan == "enterprise": + enable_advanced_feature() +""" +expect_match = true + +[[rule.tests]] +input = """ +if account.tier == "pro": + upgrade() +""" +expect_match = true + +[[rule.tests]] +input = """ +if validator.is_enabled("field"): + validate() +""" +expect_match = false + +[[rule.tests]] +input = """ +if user.role == "admin": + delete() +""" +expect_match = false diff --git a/rules/python/has-perm-call.toml b/rules/python/has-perm-call.toml new file mode 100644 index 0000000..0e47a69 --- /dev/null +++ b/rules/python/has-perm-call.toml @@ -0,0 +1,60 @@ +[rule] +id = "py-has-perm-call" +languages = ["python"] +category = "rbac" +confidence = "high" +description = "Django-style permission check (request.user.has_perm / has_perms)" +# Matches both `.has_perm("app.codename")` and the bulk variant +# `.has_perms(["app.a", "app.b"])` when permission codes are literal +# strings. Dynamic (non-literal) permission codes will surface via deep mode. +query = """ +(call + function: (attribute + attribute: (identifier) @method) + arguments: (argument_list + [ + (string (string_content) @perm_name) + (list + (string (string_content) @perm_name)) + ]) +) @match +""" + +[rule.predicates.method] +match = "^(has_perm|has_perms)$" + +[rule.rego_template] +template = """ +default allow := false + +allow if { + "{{perm_name}}" in input.user.permissions +} +""" + +[[rule.tests]] +input = """ +if request.user.has_perm('app.delete_user'): + delete_user() +""" +expect_match = true + +[[rule.tests]] +input = """ +if user.has_perm('blog.add_post'): + create() +""" +expect_match = true + +[[rule.tests]] +input = """ +if request.user.has_perms(['blog.add_post', 'blog.change_post']): + bulk_edit() +""" +expect_match = true + +[[rule.tests]] +input = """ +result.has_value('foo') +""" +expect_match = false diff --git a/rules/python/has-role-call.toml b/rules/python/has-role-call.toml new file mode 100644 index 0000000..0bdcfdb --- /dev/null +++ b/rules/python/has-role-call.toml @@ -0,0 +1,45 @@ +[rule] +id = "py-has-role-call" +languages = ["python"] +category = "rbac" +confidence = "high" +description = "Bare role-checking function call (e.g. has_role(\"admin\"))" +query = """ +(call + function: (identifier) @fn_name + arguments: (argument_list + (string (string_content) @role_value)) +) @match +""" + +[rule.predicates.fn_name] +match = "^(has_role|check_role|is_role|require_role|require_roles)$" + +[rule.rego_template] +template = """ +default allow := false + +allow if { + input.user.role in {"{{role_value}}"} +} +""" + +[[rule.tests]] +input = """ +if has_role("manager"): + approve_request() +""" +expect_match = true + +[[rule.tests]] +input = """ +require_role("admin") +""" +expect_match = true + +[[rule.tests]] +input = """ +if has_value("manager"): + process() +""" +expect_match = false diff --git a/rules/python/login-required-decorator.toml b/rules/python/login-required-decorator.toml new file mode 100644 index 0000000..526a0f5 --- /dev/null +++ b/rules/python/login-required-decorator.toml @@ -0,0 +1,76 @@ +[rule] +id = "py-login-required-decorator" +languages = ["python"] +category = "middleware" +confidence = "high" +description = "@login_required decorator (Django, Flask-Login, or similar)" +# Matches both forms of @login_required: +# - marker form: @login_required +# - call form: @login_required(redirect_field_name='login_url') +# Each form is allowed as either a bare identifier or a single-level +# attribute reference (e.g. @flask_login.login_required). The decorator's +# rightmost identifier is captured as `decorator_name`. +query = """ +(decorator + [ + (identifier) @decorator_name + (attribute attribute: (identifier) @decorator_name) + (call + function: [ + (identifier) @decorator_name + (attribute attribute: (identifier) @decorator_name) + ]) + ] +) @match +""" + +[rule.predicates.decorator_name] +eq = "login_required" + +[[rule.tests]] +input = """ +@login_required +def my_view(request): + pass +""" +expect_match = true + +[[rule.tests]] +input = """ +@flask_login.login_required +def index(): + pass +""" +expect_match = true + +[[rule.tests]] +input = """ +@login_required(redirect_field_name='login_url') +def my_view(request): + pass +""" +expect_match = true + +[[rule.tests]] +input = """ +@django.contrib.auth.decorators.login_required(login_url='/accounts/login/') +def my_view(request): + pass +""" +expect_match = true + +[[rule.tests]] +input = """ +@app.route('/') +def index(): + pass +""" +expect_match = false + +[[rule.tests]] +input = """ +@staticmethod +def helper(): + pass +""" +expect_match = false diff --git a/rules/python/ownership-check.toml b/rules/python/ownership-check.toml new file mode 100644 index 0000000..ece9106 --- /dev/null +++ b/rules/python/ownership-check.toml @@ -0,0 +1,53 @@ +[rule] +id = "py-ownership-check" +languages = ["python"] +category = "ownership" +confidence = "medium" +description = "Resource ownership comparison (e.g. resource.owner_id == user.id)" +# Catches `. == .`. +# Confidence is `medium` — the snake_case naming convention makes false +# positives less likely than in TS/Java, but unrelated identity-shaped +# comparisons can still slip through. +query = """ +(comparison_operator + (attribute attribute: (identifier) @left_prop) + operators: "==" + (attribute attribute: (identifier) @right_prop) +) @match +""" + +[rule.predicates.left_prop] +match = "(?i)^(owner_id|user_id|created_by|author_id|owner|account_id)$" + +[rule.predicates.right_prop] +match = "(?i)^(id|user_id|sub|account_id)$" + +[rule.rego_template] +template = """ +default allow := false + +allow if { + input.resource.owner == input.user.id +} +""" + +[[rule.tests]] +input = """ +if resource.owner_id == user.id: + allow_edit() +""" +expect_match = true + +[[rule.tests]] +input = """ +if post.author_id == request.user.id: + edit() +""" +expect_match = true + +[[rule.tests]] +input = """ +if a.score == b.score: + tie() +""" +expect_match = false diff --git a/rules/python/permission-check-call.toml b/rules/python/permission-check-call.toml new file mode 100644 index 0000000..49541c9 --- /dev/null +++ b/rules/python/permission-check-call.toml @@ -0,0 +1,70 @@ +[rule] +id = "py-permission-check-call" +languages = ["python"] +category = "abac" +confidence = "high" +description = "Permission or capability check method call (e.g. user.can(\"delete\"))" +# Catches the allow-style ABAC family: `can`, `is_allowed`, `allows`, +# `has_access`, `check_access`, `has_permission`, `check_permission`. +# Deny-style methods (`cannot`, `denies`) are intentionally excluded — +# the rego template below is allow-style, and emitting it for a deny +# call would invert the semantics. Deep mode picks up those calls. +# Django's `has_perm`/`has_perms` are also excluded — they have a +# dedicated RBAC-flavored rule (`py-has-perm-call`) so the same call +# doesn't surface as both abac and rbac findings. The anchored +# alternation below already excludes both groups. +query = """ +(call + function: (attribute + attribute: (identifier) @method) + arguments: (argument_list + (string (string_content) @permission)) +) @match +""" + +[rule.predicates.method] +match = "^(can|has_permission|check_permission|is_allowed|allows|has_access|check_access)$" + +[rule.rego_template] +template = """ +default allow := false + +allow if { + input.action == "{{permission}}" +} +""" + +[[rule.tests]] +input = """ +if user.can("delete"): + delete_resource() +""" +expect_match = true + +[[rule.tests]] +input = """ +if policy.is_allowed("read"): + read() +""" +expect_match = true + +[[rule.tests]] +input = """ +if account.has_access("billing"): + show_billing() +""" +expect_match = true + +[[rule.tests]] +input = """ +if user.has_perm("blog.add_post"): + create_post() +""" +expect_match = false + +[[rule.tests]] +input = """ +if cache.has_value("foo"): + use_it() +""" +expect_match = false diff --git a/rules/python/role-check-conditional.toml b/rules/python/role-check-conditional.toml new file mode 100644 index 0000000..4bda20c --- /dev/null +++ b/rules/python/role-check-conditional.toml @@ -0,0 +1,71 @@ +[rule] +id = "py-role-check-conditional" +languages = ["python"] +category = "rbac" +confidence = "high" +description = "Direct role comparison in a conditional (e.g. user.role == \"admin\")" +# Captures `. {==, is} \"value\"`. Both `==` (value equality) +# and `is` (identity equality, an anti-pattern for strings but seen in +# real code via interned literals) are matched. `is not` and `!=` are +# intentionally excluded: their semantics are inverted relative to the +# allow-style rego template emitted below. +# The `not_match` on `role_value` filters out chat/LLM-message roles +# ("assistant", "user", "system", "tool", "function") that share the +# same shape but aren't auth. +query = """ +(comparison_operator + (attribute attribute: (identifier) @prop) + operators: ["==" "is"] + (string (string_content) @role_value) +) @match +""" + +[rule.predicates.prop] +match = "^(role|roles|user_role|user_type|account_type)$" + +[rule.predicates.role_value] +not_match = "^(assistant|user|system|tool|function)$" + +[rule.rego_template] +template = """ +default allow := false + +allow if { + input.user.role == "{{role_value}}" +} +""" + +[[rule.tests]] +input = """ +if user.role == "admin": + delete_user() +""" +expect_match = true + +[[rule.tests]] +input = """ +if account.account_type == "enterprise": + enable() +""" +expect_match = true + +[[rule.tests]] +input = """ +if user.role is "admin": + delete_user() +""" +expect_match = true + +[[rule.tests]] +input = """ +if user.name == "admin": + greet() +""" +expect_match = false + +[[rule.tests]] +input = """ +if msg.role == "assistant": + process_response() +""" +expect_match = false diff --git a/src/deep/candidate.rs b/src/deep/candidate.rs index 67388ed..b37f9f8 100644 --- a/src/deep/candidate.rs +++ b/src/deep/candidate.rs @@ -9,8 +9,8 @@ //! 2. **Cold regions** — file regions discovered by regex over auth-y //! function names. Capped at 30% of `max_candidates` so escalations get //! priority. Runs on **all** languages in the [`Language`] enum, including -//! those without structural parser support (Python, Go, etc.) — see -//! plans/todo/01-pr1-deep-http-transport.md §6 for rationale. +//! those without structural parser support (Go, C#, Kotlin, Ruby, PHP) — +//! see plans/todo/01-pr1-deep-http-transport.md §6 for rationale. //! //! Candidates are sorted deterministically by `(file, line_start)`. diff --git a/src/rules/embedded.rs b/src/rules/embedded.rs index 1dceb0b..729c9c5 100644 --- a/src/rules/embedded.rs +++ b/src/rules/embedded.rs @@ -137,6 +137,47 @@ const EMBEDDED_RULES: &[(&str, &str)] = &[ "java-custom-authz-call", include_str!("../../rules/java/custom-authz-call.toml"), ), + // -- Python -- + ( + "py-django-permission-required", + include_str!("../../rules/python/django-permission-required.toml"), + ), + ( + "py-login-required-decorator", + include_str!("../../rules/python/login-required-decorator.toml"), + ), + ( + "py-django-user-passes-test", + include_str!("../../rules/python/django-user-passes-test.toml"), + ), + ( + "py-has-perm-call", + include_str!("../../rules/python/has-perm-call.toml"), + ), + ( + "py-fastapi-depends", + include_str!("../../rules/python/fastapi-depends.toml"), + ), + ( + "py-role-check-conditional", + include_str!("../../rules/python/role-check-conditional.toml"), + ), + ( + "py-has-role-call", + include_str!("../../rules/python/has-role-call.toml"), + ), + ( + "py-permission-check-call", + include_str!("../../rules/python/permission-check-call.toml"), + ), + ( + "py-ownership-check", + include_str!("../../rules/python/ownership-check.toml"), + ), + ( + "py-feature-gate-check", + include_str!("../../rules/python/feature-gate-check.toml"), + ), ]; pub fn load_embedded_rules() -> Result> { diff --git a/src/scanner/discovery.rs b/src/scanner/discovery.rs index f9c5826..f4d4252 100644 --- a/src/scanner/discovery.rs +++ b/src/scanner/discovery.rs @@ -22,13 +22,14 @@ pub fn detect_language(path: &Path) -> Option<(Language, bool)> { "js" | "mjs" | "cjs" => Some((Language::JavaScript, false)), "jsx" => Some((Language::JavaScript, true)), "java" => Some((Language::Java, false)), + "py" | "pyi" => Some((Language::Python, false)), _ => None, } } /// Extension → language map covering **all** languages in the [`Language`] -/// enum, including those without structural parser support yet (Python, Go, -/// C#, Kotlin, Ruby, PHP). Used by the deep (semantic) scan, which can run +/// enum, including those without structural parser support yet (Go, C#, +/// Kotlin, Ruby, PHP). Used by the deep (semantic) scan, which can run /// regex-based cold-region detection on any language regardless of grammar /// availability. pub fn detect_language_for_deep(path: &Path) -> Option<(Language, bool)> { @@ -162,10 +163,22 @@ mod tests { ); } + #[test] + fn detect_python_extensions() { + assert_eq!( + detect_language(Path::new("foo.py")), + Some((Language::Python, false)) + ); + assert_eq!( + detect_language(Path::new("foo.pyi")), + Some((Language::Python, false)) + ); + } + #[test] fn detect_unknown_extension() { assert_eq!(detect_language(Path::new("foo.rs")), None); - assert_eq!(detect_language(Path::new("foo.py")), None); + assert_eq!(detect_language(Path::new("foo.go")), None); } #[test] @@ -232,25 +245,39 @@ mod tests { } #[test] - fn structural_detect_language_does_not_pick_up_python() { - // Sanity: the structural detector must NOT include Python — otherwise - // the structural pass would try to parse files for which it has no - // grammar. The deep detector picks them up; the structural one doesn't. - assert_eq!(detect_language(Path::new("foo.py")), None); + fn structural_detect_language_does_not_pick_up_unsupported_languages() { + // Sanity: the structural detector must NOT include languages without + // a wired-up tree-sitter grammar — otherwise the structural pass + // would try to parse files it can't handle. The deep detector picks + // them up; the structural one doesn't. assert_eq!(detect_language(Path::new("foo.go")), None); + assert_eq!(detect_language(Path::new("Foo.cs")), None); + assert_eq!(detect_language(Path::new("foo.rb")), None); } #[test] fn discover_for_deep_picks_up_extra_languages() { + use std::collections::HashSet; + let dir = tempfile::tempdir().unwrap(); fs::write(dir.path().join("a.ts"), "let x = 1;").unwrap(); fs::write(dir.path().join("b.py"), "x = 1\n").unwrap(); fs::write(dir.path().join("c.go"), "package main\n").unwrap(); let structural = discover_files(dir.path(), &[], &[]); - assert_eq!(structural.len(), 1, "structural sees only TS"); + let structural_langs: HashSet<_> = structural.iter().map(|f| f.language).collect(); + assert_eq!( + structural_langs, + HashSet::from([Language::TypeScript, Language::Python]), + "structural should include only TS + Python", + ); let deep = discover_files_for_deep(dir.path(), &[], &[]); - assert_eq!(deep.len(), 3, "deep sees TS + Python + Go"); + let deep_langs: HashSet<_> = deep.iter().map(|f| f.language).collect(); + assert_eq!( + deep_langs, + HashSet::from([Language::TypeScript, Language::Python, Language::Go]), + "deep should include TS + Python + Go", + ); } } diff --git a/src/scanner/matcher.rs b/src/scanner/matcher.rs index 1d5ec26..0a0c94a 100644 --- a/src/scanner/matcher.rs +++ b/src/scanner/matcher.rs @@ -823,6 +823,232 @@ public class MyService implements Serializable { ); } + // -- Python rule tests -- + + fn parse_and_match_python(source: &str, rule_toml: &str) -> Vec { + let rule = rules::parse_rule_for_test(rule_toml); + let mut ts_parser = tree_sitter::Parser::new(); + let lang = Language::Python; + let ts_lang = parser::get_language(lang, false).unwrap(); + let tree = parser::parse_source(&mut ts_parser, source.as_bytes(), lang, false).unwrap(); + let compiled = compile_rule(&rule, &ts_lang).unwrap(); + execute_query( + &compiled, + &tree, + source.as_bytes(), + Path::new("test.py"), + lang, + ) + .unwrap() + } + + #[test] + fn py_django_permission_required_matches() { + let findings = parse_and_match_python( + "@permission_required('app.delete_user')\ndef delete_user(request, id):\n pass\n", + include_str!("../../rules/python/django-permission-required.toml"), + ); + assert!(!findings.is_empty(), "should match @permission_required"); + assert_eq!(findings[0].category, crate::types::AuthCategory::Middleware); + } + + #[test] + fn py_django_permission_required_qualified_matches() { + let findings = parse_and_match_python( + "@django.contrib.auth.decorators.permission_required('app.delete_user')\ndef delete_user(request, id):\n pass\n", + include_str!("../../rules/python/django-permission-required.toml"), + ); + assert!( + !findings.is_empty(), + "should match module-qualified @permission_required" + ); + } + + #[test] + fn py_login_required_decorator_matches() { + let findings = parse_and_match_python( + "@login_required\ndef my_view(request):\n pass\n", + include_str!("../../rules/python/login-required-decorator.toml"), + ); + assert!(!findings.is_empty(), "should match bare @login_required"); + } + + #[test] + fn py_login_required_decorator_no_false_positive_on_unrelated_decorator() { + let findings = parse_and_match_python( + "@staticmethod\ndef helper():\n pass\n", + include_str!("../../rules/python/login-required-decorator.toml"), + ); + assert!(findings.is_empty(), "should not match @staticmethod"); + } + + #[test] + fn py_login_required_decorator_call_form_matches() { + let findings = parse_and_match_python( + "@login_required(redirect_field_name='login_url')\ndef my_view(request):\n pass\n", + include_str!("../../rules/python/login-required-decorator.toml"), + ); + assert!( + !findings.is_empty(), + "should match @login_required(...) call form" + ); + } + + #[test] + fn py_has_perm_call_matches() { + let findings = parse_and_match_python( + "if request.user.has_perm('app.delete_user'):\n delete_user()\n", + include_str!("../../rules/python/has-perm-call.toml"), + ); + assert!(!findings.is_empty(), "should match request.user.has_perm()"); + assert_eq!(findings[0].category, crate::types::AuthCategory::Rbac); + } + + #[test] + fn py_fastapi_depends_typed_default_matches() { + let findings = parse_and_match_python( + "def read_items(token: str = Depends(oauth2_scheme)):\n pass\n", + include_str!("../../rules/python/fastapi-depends.toml"), + ); + assert!(!findings.is_empty(), "should match Depends() typed default"); + } + + #[test] + fn py_fastapi_depends_untyped_default_matches() { + let findings = parse_and_match_python( + "def read_items(token = Depends(get_current_user)):\n pass\n", + include_str!("../../rules/python/fastapi-depends.toml"), + ); + assert!( + !findings.is_empty(), + "should match Depends() untyped default" + ); + } + + #[test] + fn py_role_check_conditional_matches() { + let findings = parse_and_match_python( + "if user.role == \"admin\":\n delete_user()\n", + include_str!("../../rules/python/role-check-conditional.toml"), + ); + assert!(!findings.is_empty(), "should match user.role == \"admin\""); + assert_eq!(findings[0].category, crate::types::AuthCategory::Rbac); + } + + #[test] + fn py_role_check_conditional_excludes_chat_message_role() { + let findings = parse_and_match_python( + "if msg.role == \"assistant\":\n process_response()\n", + include_str!("../../rules/python/role-check-conditional.toml"), + ); + assert!( + findings.is_empty(), + "should not match LLM chat message role" + ); + } + + #[test] + fn py_role_check_conditional_is_operator_matches() { + let findings = parse_and_match_python( + "if user.role is \"admin\":\n delete_user()\n", + include_str!("../../rules/python/role-check-conditional.toml"), + ); + assert!( + !findings.is_empty(), + "should match `is` operator (string identity equality)" + ); + } + + #[test] + fn py_has_role_call_matches() { + let findings = parse_and_match_python( + "if has_role(\"manager\"):\n approve_request()\n", + include_str!("../../rules/python/has-role-call.toml"), + ); + assert!(!findings.is_empty(), "should match has_role()"); + } + + #[test] + fn py_permission_check_call_matches() { + let findings = parse_and_match_python( + "if user.can(\"delete\"):\n delete_resource()\n", + include_str!("../../rules/python/permission-check-call.toml"), + ); + assert!(!findings.is_empty(), "should match user.can()"); + assert_eq!(findings[0].category, crate::types::AuthCategory::Abac); + } + + #[test] + fn py_permission_check_call_excludes_django_has_perm() { + // `has_perm` belongs to py-has-perm-call (rbac); it must not also + // surface here as abac, otherwise the same call produces two + // findings with conflicting categories. + let findings = parse_and_match_python( + "if user.has_perm(\"blog.add_post\"):\n create_post()\n", + include_str!("../../rules/python/permission-check-call.toml"), + ); + assert!( + findings.is_empty(), + "permission-check-call must not duplicate has_perm (covered by py-has-perm-call)" + ); + } + + #[test] + fn py_ownership_check_matches() { + let findings = parse_and_match_python( + "if resource.owner_id == user.id:\n allow_edit()\n", + include_str!("../../rules/python/ownership-check.toml"), + ); + assert!( + !findings.is_empty(), + "should match owner_id == user.id ownership check" + ); + assert_eq!(findings[0].category, crate::types::AuthCategory::Ownership); + } + + #[test] + fn py_feature_gate_matches() { + let findings = parse_and_match_python( + "if feature_flags.has_feature(\"advanced\"):\n enable()\n", + include_str!("../../rules/python/feature-gate-check.toml"), + ); + assert!(!findings.is_empty(), "should match has_feature()"); + assert_eq!( + findings[0].category, + crate::types::AuthCategory::FeatureGate + ); + } + + #[test] + fn py_feature_gate_property_comparison_matches() { + let findings = parse_and_match_python( + "if user.plan == \"enterprise\":\n enable_advanced()\n", + include_str!("../../rules/python/feature-gate-check.toml"), + ); + assert!( + !findings.is_empty(), + "should match property comparison shape (user.plan == ...)" + ); + assert_eq!( + findings[0].category, + crate::types::AuthCategory::FeatureGate + ); + } + + #[test] + fn py_feature_gate_property_comparison_excludes_role() { + // `role` is not a feature-gate key; this should be picked up by + // py-role-check-conditional, not py-feature-gate-check. + let findings = parse_and_match_python( + "if user.role == \"admin\":\n delete()\n", + include_str!("../../rules/python/feature-gate-check.toml"), + ); + assert!( + findings.is_empty(), + "feature-gate must not match role-style property comparisons" + ); + } + // -- cross_predicates tests (synthetic rules) -- /// A synthetic rule shaped like ownership-check: two getters in an diff --git a/src/scanner/parser.rs b/src/scanner/parser.rs index 3a5fc69..7139776 100644 --- a/src/scanner/parser.rs +++ b/src/scanner/parser.rs @@ -14,6 +14,7 @@ pub fn get_language(lang: Language, is_tsx_jsx: bool) -> Result Ok(tree_sitter_typescript::LANGUAGE_TSX.into()), (Language::JavaScript, _) => Ok(tree_sitter_javascript::LANGUAGE.into()), (Language::Java, _) => Ok(tree_sitter_java::LANGUAGE.into()), + (Language::Python, _) => Ok(tree_sitter_python::LANGUAGE.into()), _ => Err(ZiftError::General(format!( "language {lang:?} not yet supported" ))), @@ -77,9 +78,25 @@ mod tests { assert!(is_language_supported(Language::Java)); } + #[test] + fn parse_python() { + let mut parser = tree_sitter::Parser::new(); + let source = b"def is_admin(user):\n return user.role == 'admin'\n"; + let tree = parse_source(&mut parser, source, Language::Python, false).unwrap(); + assert!(!tree.root_node().has_error()); + } + + #[test] + fn python_is_supported() { + assert!(is_language_supported(Language::Python)); + } + #[test] fn unsupported_language_returns_error() { - assert!(get_language(Language::Python, false).is_err()); - assert!(!is_language_supported(Language::Python)); + // Go has no structural grammar wired up yet — kept as the canary + // that `unsupported_language_returns_error` keeps testing what its + // name says it does. + assert!(get_language(Language::Go, false).is_err()); + assert!(!is_language_supported(Language::Go)); } }