diff --git a/architecture/security-policy.md b/architecture/security-policy.md index e5f179dc1..5811a03dd 100644 --- a/architecture/security-policy.md +++ b/architecture/security-policy.md @@ -36,6 +36,27 @@ Ordinary network traffic follows this order: Explicit deny and hardening checks win over allow rules. If no rule matches, the request is denied. +## Host Wildcards + +Network endpoint `host` patterns accept a `*` wildcard inside the first DNS +label only. The OPA runtime matches with a `.` label boundary, so a wildcard +never spans dots. The validator enforces the same boundary so that policy load +fails fast instead of silently mismatching at the proxy. + +| Pattern | Accepted | Example match | Notes | +|---|---|---|---| +| `*.example.com` | Yes | `api.example.com` | Single first label of any value. | +| `**.example.com` | Yes | `a.b.example.com` | Recursive wildcard as the entire first label. | +| `*-aiplatform.googleapis.com` | Yes | `us-central1-aiplatform.googleapis.com` | Intra-label wildcard inside the first DNS label. | +| `*` or `**` | No | — | Matches every host. | +| `*.com`, `**.com` | No | — | TLD wildcards (`labels <= 2`). | +| `foo.*.example.com` | No | — | Wildcard outside the first DNS label. | +| `foo**.example.com` | No | — | Recursive `**` mixed inside a label; allowed only as the entire first label. | + +Validation rejects the disallowed patterns at policy load time with a message +that names the offending host. Exact hosts and IP addresses do not use this +path. + ## TLS and L7 Inspection For HTTP endpoints that need request-level controls, the proxy can terminate TLS diff --git a/crates/openshell-sandbox/src/l7/mod.rs b/crates/openshell-sandbox/src/l7/mod.rs index 5301ac4d5..fb44a7487 100644 --- a/crates/openshell-sandbox/src/l7/mod.rs +++ b/crates/openshell-sandbox/src/l7/mod.rs @@ -274,6 +274,43 @@ fn check_glob_syntax(pattern: &str) -> Option { None } +fn validate_host_wildcard(errors: &mut Vec, loc: &str, host: &str) { + if !host.contains('*') { + return; + } + + if host == "*" || host == "**" { + errors.push(format!( + "{loc}: host wildcard '{host}' matches all hosts; use specific patterns like '*.example.com'" + )); + return; + } + + let labels: Vec<&str> = host.split('.').collect(); + let first_label = labels.first().copied().unwrap_or_default(); + if labels.iter().skip(1).any(|label| label.contains('*')) { + errors.push(format!( + "{loc}: host wildcard may only appear in the first DNS label, got '{host}'" + )); + return; + } + if first_label.contains("**") && first_label != "**" { + errors.push(format!( + "{loc}: recursive host wildcard '**' is only allowed as the entire first DNS label, got '{host}'" + )); + return; + } + + // Reject TLD or single-label wildcards. They are accepted by the policy + // engine but silently fail at the proxy layer (see #787). + if labels.len() <= 2 { + errors.push(format!( + "{loc}: TLD wildcard '{host}' is not allowed; \ + use subdomain wildcards like '*.example.com' instead" + )); + } +} + fn validate_graphql_operation_type( errors: &mut Vec, loc: &str, @@ -409,29 +446,7 @@ pub fn validate_l7_policies(data_json: &serde_json::Value) -> (Vec, Vec< } } - // Validate host wildcard patterns. - if host.contains('*') { - if host == "*" || host == "**" { - errors.push(format!( - "{loc}: host wildcard '{host}' matches all hosts; use specific patterns like '*.example.com'" - )); - } else if !host.starts_with("*.") && !host.starts_with("**.") { - errors.push(format!( - "{loc}: host wildcard must start with '*.' or '**.' (e.g., '*.example.com'), got '{host}'" - )); - } else { - // Reject TLD wildcards like *.com (2 labels) — they are - // accepted by the policy engine but silently fail at the - // proxy layer (see #787). - let label_count = host.split('.').count(); - if label_count <= 2 { - errors.push(format!( - "{loc}: TLD wildcard '{host}' is not allowed; \ - use subdomain wildcards like '*.example.com' instead" - )); - } - } - } + validate_host_wildcard(&mut errors, &loc, host); // port + ports mutual exclusion let has_scalar_port = ep @@ -1360,7 +1375,27 @@ mod tests { } #[test] - fn validate_wildcard_host_no_star_dot_error() { + fn validate_wildcard_host_mid_label_error() { + let data = serde_json::json!({ + "network_policies": { + "test": { + "endpoints": [{ + "host": "foo.*.example.com", + "port": 443 + }], + "binaries": [] + } + } + }); + let (errors, _warnings) = validate_l7_policies(&data); + assert!( + errors.iter().any(|e| e.contains("first DNS label")), + "Mid-label wildcard should be rejected, got errors: {errors:?}" + ); + } + + #[test] + fn validate_wildcard_host_single_label_error() { let data = serde_json::json!({ "network_policies": { "test": { @@ -1374,8 +1409,28 @@ mod tests { }); let (errors, _warnings) = validate_l7_policies(&data); assert!( - errors.iter().any(|e| e.contains("must start with")), - "Malformed wildcard should be rejected, got errors: {errors:?}" + errors.iter().any(|e| e.contains("TLD wildcard")), + "Single-label wildcard should be rejected, got errors: {errors:?}" + ); + } + + #[test] + fn validate_wildcard_host_recursive_intra_label_error() { + let data = serde_json::json!({ + "network_policies": { + "test": { + "endpoints": [{ + "host": "foo**.example.com", + "port": 443 + }], + "binaries": [] + } + } + }); + let (errors, _warnings) = validate_l7_policies(&data); + assert!( + errors.iter().any(|e| e.contains("recursive host wildcard")), + "Recursive intra-label wildcard should be rejected, got errors: {errors:?}" ); } @@ -1443,6 +1498,54 @@ mod tests { ); } + #[test] + fn validate_wildcard_host_double_star_valid_no_error() { + let data = serde_json::json!({ + "network_policies": { + "test": { + "endpoints": [{ + "host": "**.example.com", + "port": 443 + }], + "binaries": [] + } + } + }); + let (errors, warnings) = validate_l7_policies(&data); + assert!( + errors.is_empty(), + "**.example.com should be valid, got errors: {errors:?}" + ); + assert!( + warnings.is_empty(), + "**.example.com should not warn, got warnings: {warnings:?}" + ); + } + + #[test] + fn validate_wildcard_host_intra_label_valid_no_error() { + let data = serde_json::json!({ + "network_policies": { + "test": { + "endpoints": [{ + "host": "*-aiplatform.googleapis.com", + "port": 443 + }], + "binaries": [] + } + } + }); + let (errors, warnings) = validate_l7_policies(&data); + assert!( + errors.is_empty(), + "*-aiplatform.googleapis.com should be valid, got errors: {errors:?}" + ); + assert!( + warnings.is_empty(), + "*-aiplatform.googleapis.com should not warn, got warnings: {warnings:?}" + ); + } + #[test] fn validate_port_and_ports_mutually_exclusive() { let data = serde_json::json!({ diff --git a/crates/openshell-sandbox/src/opa.rs b/crates/openshell-sandbox/src/opa.rs index 5897679a0..7578acd1f 100644 --- a/crates/openshell-sandbox/src/opa.rs +++ b/crates/openshell-sandbox/src/opa.rs @@ -3739,6 +3739,69 @@ network_policies: assert!(!decision.allowed, "Wildcard host on wrong port should deny"); } + #[test] + fn wildcard_host_intra_label_matches() { + // First-label intra-label wildcard: `*` matches the variable prefix + // within a single DNS label. Locks validator/runtime alignment for + // the pattern accepted by `validate_host_wildcard`. + let data = r#" +network_policies: + intra_label: + name: intra_label + endpoints: + - { host: "*-aiplatform.googleapis.com", port: 443 } + binaries: + - { path: /usr/bin/curl } +"#; + let engine = OpaEngine::from_strings(TEST_POLICY, data).unwrap(); + let input = NetworkInput { + host: "us-central1-aiplatform.googleapis.com".into(), + port: 443, + binary_path: PathBuf::from("/usr/bin/curl"), + binary_sha256: "unused".into(), + ancestors: vec![], + cmdline_paths: vec![], + }; + let decision = engine.evaluate_network(&input).unwrap(); + assert!( + decision.allowed, + "*-aiplatform.googleapis.com should match us-central1-aiplatform.googleapis.com: {}", + decision.reason + ); + } + + #[test] + fn wildcard_host_intra_label_does_not_cross_dot() { + // `glob.match(..., ["."])` treats `.` as a label boundary that `*` + // cannot cross. `*-aiplatform.googleapis.com` must not match a host + // whose first label is `us-central1` and where `aiplatform` is a + // separate label. + let data = r#" +network_policies: + intra_label: + name: intra_label + endpoints: + - { host: "*-aiplatform.googleapis.com", port: 443 } + binaries: + - { path: /usr/bin/curl } +"#; + let engine = OpaEngine::from_strings(TEST_POLICY, data).unwrap(); + let input = NetworkInput { + host: "us-central1.aiplatform.googleapis.com".into(), + port: 443, + binary_path: PathBuf::from("/usr/bin/curl"), + binary_sha256: "unused".into(), + ancestors: vec![], + cmdline_paths: vec![], + }; + let decision = engine.evaluate_network(&input).unwrap(); + assert!( + !decision.allowed, + "*-aiplatform.googleapis.com must NOT match us-central1.aiplatform.googleapis.com \ + (would cross a `.` boundary)" + ); + } + #[test] fn wildcard_host_multi_port() { let data = r#" diff --git a/docs/reference/policy-schema.mdx b/docs/reference/policy-schema.mdx index a98e8087c..7a0d0e962 100644 --- a/docs/reference/policy-schema.mdx +++ b/docs/reference/policy-schema.mdx @@ -152,7 +152,7 @@ Each endpoint defines a reachable destination and optional inspection rules. | Field | Type | Required | Description | |---|---|---|---| -| `host` | string | Yes | Hostname or IP address. Supports wildcards: `*.example.com` matches any subdomain. | +| `host` | string | Yes | Hostname or IP address. Supports a `*` wildcard inside the first DNS label only: `*.example.com`, `**.example.com`, and intra-label patterns like `*-aiplatform.googleapis.com` are accepted; bare `*`/`**`, TLD wildcards (`*.com`), and wildcards outside the first label are rejected at load time. | | `port` | integer | Yes | TCP port number. | | `path` | string | No | Optional HTTP path glob used to select between L7 endpoints that share the same host and port. Empty means all paths. Use this when REST and GraphQL live under the same host, such as `/repos/**` and `/graphql`. | | `protocol` | string | No | Set to `rest` for HTTP method/path inspection or `graphql` for GraphQL operation inspection. Omit for TCP passthrough. |