From ff6dcb3abdef81803dc3a4f97a84ce1d3942ca05 Mon Sep 17 00:00:00 2001 From: Taylor Mutch Date: Fri, 15 May 2026 15:07:16 -0700 Subject: [PATCH] feat(auth): add SPIFFE supervisor authentication Signed-off-by: Taylor Mutch --- .../skills/debug-openshell-cluster/SKILL.md | 16 ++ .agents/skills/helm-dev-environment/SKILL.md | 17 ++ .markdownlint-cli2.jsonc | 1 + Cargo.lock | 272 +++++++++--------- Cargo.toml | 10 +- architecture/gateway.md | 13 +- crates/openshell-cli/Cargo.toml | 2 +- crates/openshell-core/Cargo.toml | 3 +- crates/openshell-core/build.rs | 7 +- crates/openshell-core/src/config.rs | 35 +++ crates/openshell-core/src/lib.rs | 4 +- crates/openshell-core/src/sandbox_env.rs | 12 + .../openshell-driver-kubernetes/src/config.rs | 21 ++ .../openshell-driver-kubernetes/src/driver.rs | 257 ++++++++++++++--- .../openshell-driver-kubernetes/src/main.rs | 24 ++ crates/openshell-sandbox/Cargo.toml | 3 +- crates/openshell-sandbox/src/grpc_client.rs | 181 ++++++++++-- crates/openshell-server/Cargo.toml | 3 +- crates/openshell-server/src/auth/mod.rs | 1 + crates/openshell-server/src/auth/spiffe.rs | 212 ++++++++++++++ crates/openshell-server/src/cli.rs | 11 +- crates/openshell-server/src/config_file.rs | 24 +- crates/openshell-server/src/lib.rs | 12 + crates/openshell-server/src/multiplex.rs | 33 +-- crates/openshell-tui/Cargo.toml | 2 +- deploy/helm/openshell/README.md | 12 + .../helm/openshell/ci/values-spire-stack.yaml | 22 ++ deploy/helm/openshell/ci/values-spire.yaml | 11 + deploy/helm/openshell/skaffold.yaml | 21 ++ deploy/helm/openshell/templates/_helpers.tpl | 8 + .../openshell/templates/gateway-config.yaml | 15 + .../helm/openshell/templates/statefulset.yaml | 13 + .../openshell/tests/gateway_config_test.yaml | 27 ++ deploy/helm/openshell/values.yaml | 9 + docs/kubernetes/access-control.mdx | 8 + 35 files changed, 1085 insertions(+), 237 deletions(-) create mode 100644 crates/openshell-server/src/auth/spiffe.rs create mode 100644 deploy/helm/openshell/ci/values-spire-stack.yaml create mode 100644 deploy/helm/openshell/ci/values-spire.yaml diff --git a/.agents/skills/debug-openshell-cluster/SKILL.md b/.agents/skills/debug-openshell-cluster/SKILL.md index 6b7f8e6b2..a72751bfb 100644 --- a/.agents/skills/debug-openshell-cluster/SKILL.md +++ b/.agents/skills/debug-openshell-cluster/SKILL.md @@ -126,6 +126,22 @@ If the gateway exits with `failed to read sandbox JWT signing key from `sandbox-jwt` secret at `/etc/openshell-jwt`. The sandbox JWT mount is required even when local Helm values disable TLS. +If `server.spiffe.enabled=true`, the sandbox JWT ConfigMap block and +`sandbox-jwt` StatefulSet mount are intentionally omitted. Instead verify that +SPIRE is installed, the CSI driver is available, and the gateway pod mounts the +SPIFFE Workload API socket: + +```bash +helm -n openshell get values openshell | grep -E 'spiffe|trustDomain|workloadApiSocketPath' +kubectl get pods -A | grep -E 'spire|spiffe' +kubectl -n openshell get statefulset openshell -o yaml | grep -E 'spiffe-workload-api|csi.spiffe.io' +``` + +Sandbox pods in SPIFFE mode should have `openshell.io/sandbox-id` and +`openshell.io/spiffe-id` annotations, an `openshell.ai/managed-by=openshell` +label, and supervisor env vars `OPENSHELL_SPIFFE_WORKLOAD_API_SOCKET`, +`OPENSHELL_SPIFFE_AUDIENCE`, and `OPENSHELL_SPIFFE_ID`. + Check the image references currently used by the gateway deployment: ```bash diff --git a/.agents/skills/helm-dev-environment/SKILL.md b/.agents/skills/helm-dev-environment/SKILL.md index 623efb2e6..410981707 100644 --- a/.agents/skills/helm-dev-environment/SKILL.md +++ b/.agents/skills/helm-dev-environment/SKILL.md @@ -169,6 +169,21 @@ To remove Keycloak: mise run keycloak:k8s:teardown ``` +### SPIRE / SPIFFE Sandbox Identity + +Skaffold can install SPIRE with the SPIFFE hardened Helm charts. To activate +SPIFFE JWT-SVID supervisor authentication: + +1. Uncomment the `spire-crds` and `spire` releases in `deploy/helm/openshell/skaffold.yaml` +2. Uncomment `#- ci/values-spire.yaml` in the OpenShell release values files +3. Redeploy: `mise run helm:skaffold:run` + +`ci/values-spire-stack.yaml` configures the local SPIRE trust domain as +`openshell.local` and adds a `ClusterSPIFFEID` that maps sandbox pod +annotations to `spiffe://openshell.local/openshell/sandbox/`. +OpenShell mounts the SPIFFE CSI Workload API socket at +`/spiffe-workload-api/spire-agent.sock`. + --- ## Cluster Lifecycle (suspend/resume) @@ -196,6 +211,8 @@ mise run helm:k3s:status | `deploy/helm/openshell/ci/values-cert-manager.yaml` | cert-manager PKI overlay (opt-in; disables pkiInitJob) | | `deploy/helm/openshell/ci/values-gateway.yaml` | Envoy Gateway GRPCRoute + Gateway overlay | | `deploy/helm/openshell/ci/values-keycloak.yaml` | Keycloak OIDC overlay | +| `deploy/helm/openshell/ci/values-spire.yaml` | SPIFFE/SPIRE sandbox supervisor auth overlay | +| `deploy/helm/openshell/ci/values-spire-stack.yaml` | SPIRE hardened chart values for local dev | | `deploy/helm/openshell/ci/values-tls-disabled.yaml` | Lint-only: TLS + auth disabled (reverse-proxy edge termination) | | `deploy/kube/manifests/envoy-gateway-openshell.yaml` | GatewayClass for Envoy Gateway (`mise run helm:gateway:apply`) | | `tasks/scripts/helm-k3s-local.sh` | k3d cluster create/delete/start/stop/status | diff --git a/.markdownlint-cli2.jsonc b/.markdownlint-cli2.jsonc index 30cf48849..125df0f81 100644 --- a/.markdownlint-cli2.jsonc +++ b/.markdownlint-cli2.jsonc @@ -16,6 +16,7 @@ ".claude/**", ".opencode/**", ".github/**", + "architecture/plans/**", "**/node_modules/**", "target/**", ".pytest_cache/**", diff --git a/Cargo.lock b/Cargo.lock index 57e7afb78..46142f201 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -156,6 +156,15 @@ dependencies = [ "thiserror 2.0.18", ] +[[package]] +name = "arc-swap" +version = "1.9.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6a3a1fd6f75306b68087b831f025c712524bcb19aad54e557b1129cfa0a2b207" +dependencies = [ + "rustversion", +] + [[package]] name = "argon2" version = "0.5.3" @@ -217,28 +226,6 @@ dependencies = [ "serde_json", ] -[[package]] -name = "async-stream" -version = "0.3.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0b5a71a6f37880a80d1d7f19efd781e4b5de42c88f0722cc13bcb6cc2cfe8476" -dependencies = [ - "async-stream-impl", - "futures-core", - "pin-project-lite", -] - -[[package]] -name = "async-stream-impl" -version = "0.3.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c7c24de15d275a1ecfd47a380fb4d5ec9bfe0933f309ed5e705b775596a3574d" -dependencies = [ - "proc-macro2", - "quote", - "syn 2.0.117", -] - [[package]] name = "async-trait" version = "0.1.89" @@ -303,40 +290,13 @@ dependencies = [ "fs_extra", ] -[[package]] -name = "axum" -version = "0.7.9" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "edca88bc138befd0323b20752846e6587272d3b03b0343c8ea28a6f819e6e71f" -dependencies = [ - "async-trait", - "axum-core 0.4.5", - "bytes", - "futures-util", - "http", - "http-body", - "http-body-util", - "itoa", - "matchit 0.7.3", - "memchr", - "mime", - "percent-encoding", - "pin-project-lite", - "rustversion", - "serde", - "sync_wrapper", - "tower 0.5.3", - "tower-layer", - "tower-service", -] - [[package]] name = "axum" version = "0.8.9" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "31b698c5f9a010f6573133b09e0de5408834d0c82f8d7475a89fc1867a71cd90" dependencies = [ - "axum-core 0.5.6", + "axum-core", "base64 0.22.1", "bytes", "form_urlencoded", @@ -347,7 +307,7 @@ dependencies = [ "hyper", "hyper-util", "itoa", - "matchit 0.8.4", + "matchit", "memchr", "mime", "percent-encoding", @@ -366,26 +326,6 @@ dependencies = [ "tracing", ] -[[package]] -name = "axum-core" -version = "0.4.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "09f2bd6146b97ae3359fa0cc6d6b376d9539582c7b4220f041a33ec24c226199" -dependencies = [ - "async-trait", - "bytes", - "futures-util", - "http", - "http-body", - "http-body-util", - "mime", - "pin-project-lite", - "rustversion", - "sync_wrapper", - "tower-layer", - "tower-service", -] - [[package]] name = "axum-core" version = "0.5.6" @@ -1855,19 +1795,13 @@ dependencies = [ "futures-core", "futures-sink", "http", - "indexmap 2.14.0", + "indexmap", "slab", "tokio", "tokio-util", "tracing", ] -[[package]] -name = "hashbrown" -version = "0.12.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8a9ee70c43aaf417c914396645a0fa852624801b24ebb7ae78fe8272889ac888" - [[package]] name = "hashbrown" version = "0.14.5" @@ -2149,7 +2083,7 @@ dependencies = [ "libc", "percent-encoding", "pin-project-lite", - "socket2 0.6.3", + "socket2", "tokio", "tower-service", "tracing", @@ -2328,16 +2262,6 @@ dependencies = [ "quote", ] -[[package]] -name = "indexmap" -version = "1.9.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bd070e393353796e801d209ad339e89596eb4c8d430d18ede6a1cced8fafbd99" -dependencies = [ - "autocfg", - "hashbrown 0.12.3", -] - [[package]] name = "indexmap" version = "2.14.0" @@ -2957,12 +2881,6 @@ dependencies = [ "regex-automata", ] -[[package]] -name = "matchit" -version = "0.7.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0e7465ac9959cc2b1404e8e2367b43684a6d13790fe23056cc8c6c5a6b7bcb94" - [[package]] name = "matchit" version = "0.8.4" @@ -3011,7 +2929,7 @@ dependencies = [ "http-body-util", "hyper", "hyper-util", - "indexmap 2.14.0", + "indexmap", "ipnet", "metrics", "metrics-util", @@ -3465,7 +3383,8 @@ dependencies = [ "tempfile", "thiserror 2.0.18", "tonic", - "tonic-build", + "tonic-prost", + "tonic-prost-build", "url", ] @@ -3662,6 +3581,7 @@ dependencies = [ "serde_yml", "sha1 0.10.6", "sha2 0.10.9", + "spiffe", "temp-env", "tempfile", "thiserror 2.0.18", @@ -3683,7 +3603,7 @@ version = "0.0.0" dependencies = [ "anyhow", "async-trait", - "axum 0.8.9", + "axum", "bytes", "clap", "futures", @@ -3726,6 +3646,7 @@ dependencies = [ "serde", "serde_json", "sha2 0.10.9", + "spiffe", "sqlx", "tempfile", "thiserror 2.0.18", @@ -4010,12 +3931,13 @@ dependencies = [ [[package]] name = "petgraph" -version = "0.7.1" +version = "0.8.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3672b37090dbd86368a4145bc067582552b29c27377cad4e0a306c97f9bd7772" +checksum = "8701b58ea97060d5e5b155d383a69952a60943f0e6dfe30b04c287beb0b27455" dependencies = [ "fixedbitset", - "indexmap 2.14.0", + "hashbrown 0.15.5", + "indexmap", ] [[package]] @@ -4253,9 +4175,9 @@ dependencies = [ [[package]] name = "prost" -version = "0.13.5" +version = "0.14.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2796faa41db3ec313a31f7624d9286acf277b52de526150b7e69f3debf891ee5" +checksum = "d2ea70524a2f82d518bce41317d0fae74151505651af45faf1ffbd6fd33f0568" dependencies = [ "bytes", "prost-derive", @@ -4263,19 +4185,20 @@ dependencies = [ [[package]] name = "prost-build" -version = "0.13.5" +version = "0.14.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "be769465445e8c1474e9c5dac2018218498557af32d9ed057325ec9a41ae81bf" +checksum = "343d3bd7056eda839b03204e68deff7d1b13aba7af2b2fd16890697274262ee7" dependencies = [ "heck", "itertools 0.14.0", "log", "multimap", - "once_cell", "petgraph", "prettyplease", "prost", "prost-types", + "pulldown-cmark", + "pulldown-cmark-to-cmark", "regex", "syn 2.0.117", "tempfile", @@ -4283,9 +4206,9 @@ dependencies = [ [[package]] name = "prost-derive" -version = "0.13.5" +version = "0.14.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8a56d757972c98b346a9b766e3f02746cde6dd1cd1d1d563472929fdd74bec4d" +checksum = "27c6023962132f4b30eb4c172c91ce92d933da334c59c23cddee82358ddafb0b" dependencies = [ "anyhow", "itertools 0.14.0", @@ -4296,9 +4219,9 @@ dependencies = [ [[package]] name = "prost-types" -version = "0.13.5" +version = "0.14.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "52c2c1bf36ddb1a1c396b3601a3cec27c2462e45f07c386894ec3ccf5332bd16" +checksum = "8991c4cbdb8bc5b11f0b074ffe286c30e523de90fee5ba8132f1399f23cb3dd7" dependencies = [ "prost", ] @@ -4312,6 +4235,26 @@ dependencies = [ "autotools", ] +[[package]] +name = "pulldown-cmark" +version = "0.13.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7c3a14896dfa883796f1cb410461aef38810ea05f2b2c33c5aded3649095fdad" +dependencies = [ + "bitflags", + "memchr", + "unicase", +] + +[[package]] +name = "pulldown-cmark-to-cmark" +version = "22.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "50793def1b900256624a709439404384204a5dc3a6ec580281bfaac35e882e90" +dependencies = [ + "pulldown-cmark", +] + [[package]] name = "quanta" version = "0.12.6" @@ -4340,7 +4283,7 @@ dependencies = [ "quinn-udp", "rustc-hash 2.1.2", "rustls", - "socket2 0.6.3", + "socket2", "thiserror 2.0.18", "tokio", "tracing", @@ -4378,7 +4321,7 @@ dependencies = [ "cfg_aliases", "libc", "once_cell", - "socket2 0.6.3", + "socket2", "tracing", "windows-sys 0.60.2", ] @@ -5234,7 +5177,7 @@ version = "0.9.34+deprecated" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6a8b1a1a2ebf674015cc02edccce75287f1a0130d394307b36743c2f5d504b47" dependencies = [ - "indexmap 2.14.0", + "indexmap", "itoa", "ryu", "serde", @@ -5247,7 +5190,7 @@ version = "0.0.12" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "59e2dd588bf1597a252c3b920e0143eb99b0f76e4e082f4c92ce34fbc9e71ddd" dependencies = [ - "indexmap 2.14.0", + "indexmap", "itoa", "libyml", "memchr", @@ -5430,22 +5373,40 @@ dependencies = [ [[package]] name = "socket2" -version = "0.5.10" +version = "0.6.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e22376abed350d73dd1cd119b57ffccad95b4e585a7cda43e286245ce23c0678" +checksum = "3a766e1110788c36f4fa1c2b71b387a7815aa65f88ce0229841826633d93723e" dependencies = [ "libc", - "windows-sys 0.52.0", + "windows-sys 0.61.2", ] [[package]] -name = "socket2" -version = "0.6.3" +name = "spiffe" +version = "0.15.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3a766e1110788c36f4fa1c2b71b387a7815aa65f88ce0229841826633d93723e" +checksum = "6d3f9e45e9e53f03cb452fe0f050101a9280ff4f4214e326037bc8275284d906" dependencies = [ - "libc", - "windows-sys 0.61.2", + "arc-swap", + "base64ct", + "fastrand", + "futures", + "hyper-util", + "log", + "prost", + "prost-types", + "serde", + "serde_json", + "thiserror 2.0.18", + "time", + "tokio", + "tokio-util", + "tonic", + "tonic-prost", + "tower 0.5.3", + "tracing", + "url", + "zeroize", ] [[package]] @@ -5508,7 +5469,7 @@ dependencies = [ "futures-util", "hashbrown 0.15.5", "hashlink", - "indexmap 2.14.0", + "indexmap", "log", "memchr", "once_cell", @@ -6074,7 +6035,7 @@ dependencies = [ "parking_lot", "pin-project-lite", "signal-hook-registry", - "socket2 0.6.3", + "socket2", "tokio-macros", "windows-sys 0.61.2", ] @@ -6180,7 +6141,7 @@ version = "0.22.27" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "41fe8c660ae4257887cf66394862d21dbca4a6ddd26f04a3560410406a2f819a" dependencies = [ - "indexmap 2.14.0", + "indexmap", "serde", "serde_spanned", "toml_datetime", @@ -6196,13 +6157,12 @@ checksum = "5d99f8c9a7727884afe522e9bd5edbfc91a3312b36a77b5fb8926e4c31a41801" [[package]] name = "tonic" -version = "0.12.3" +version = "0.14.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "877c5b330756d856ffcc4553ab34a5684481ade925ecc54bcd1bf02b1d0d4d52" +checksum = "ac2a5518c70fa84342385732db33fb3f44bc4cc748936eb5833d2df34d6445ef" dependencies = [ - "async-stream", "async-trait", - "axum 0.7.9", + "axum", "base64 0.22.1", "bytes", "h2", @@ -6214,14 +6174,13 @@ dependencies = [ "hyper-util", "percent-encoding", "pin-project", - "prost", "rustls-native-certs", - "rustls-pemfile", - "socket2 0.5.10", + "socket2", + "sync_wrapper", "tokio", "tokio-rustls", "tokio-stream", - "tower 0.4.13", + "tower 0.5.3", "tower-layer", "tower-service", "tracing", @@ -6229,9 +6188,32 @@ dependencies = [ [[package]] name = "tonic-build" -version = "0.12.3" +version = "0.14.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c68f61875ac5293cf72e6c8cf0158086428c82c37229e98c840878f1706b0322" +dependencies = [ + "prettyplease", + "proc-macro2", + "quote", + "syn 2.0.117", +] + +[[package]] +name = "tonic-prost" +version = "0.14.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "50849f68853be452acf590cde0b146665b8d507b3b8af17261df47e02c209ea0" +dependencies = [ + "bytes", + "prost", + "tonic", +] + +[[package]] +name = "tonic-prost-build" +version = "0.14.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9557ce109ea773b399c9b9e5dca39294110b74f1f342cb347a80d1fce8c26a11" +checksum = "654e5643eff75d7f8c99197ce1440ed19a3474eada74c12bbac488b2cafdae27" dependencies = [ "prettyplease", "proc-macro2", @@ -6239,6 +6221,8 @@ dependencies = [ "prost-types", "quote", "syn 2.0.117", + "tempfile", + "tonic-build", ] [[package]] @@ -6249,11 +6233,8 @@ checksum = "b8fa9be0de6cf49e536ce1851f987bd21a43b771b09473c3549a6c853db37c1c" dependencies = [ "futures-core", "futures-util", - "indexmap 1.9.3", "pin-project", "pin-project-lite", - "rand 0.8.6", - "slab", "tokio", "tokio-util", "tower-layer", @@ -6269,9 +6250,12 @@ checksum = "ebe5ef63511595f1344e2d5cfa636d973292adc0eec1f0ad45fae9f0851ab1d4" dependencies = [ "futures-core", "futures-util", + "indexmap", "pin-project-lite", + "slab", "sync_wrapper", "tokio", + "tokio-util", "tower-layer", "tower-service", "tracing", @@ -6758,7 +6742,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "bb0e353e6a2fbdc176932bbaab493762eb1255a7900fe0fea1a2f96c296cc909" dependencies = [ "anyhow", - "indexmap 2.14.0", + "indexmap", "wasm-encoder", "wasmparser", ] @@ -6784,7 +6768,7 @@ checksum = "47b807c72e1bac69382b3a6fb3dbe8ea4c0ed87ff5629b8685ae6b9a611028fe" dependencies = [ "bitflags", "hashbrown 0.15.5", - "indexmap 2.14.0", + "indexmap", "semver", ] @@ -7349,7 +7333,7 @@ checksum = "b7c566e0f4b284dd6561c786d9cb0142da491f46a9fbed79ea69cdad5db17f21" dependencies = [ "anyhow", "heck", - "indexmap 2.14.0", + "indexmap", "prettyplease", "syn 2.0.117", "wasm-metadata", @@ -7380,7 +7364,7 @@ checksum = "9d66ea20e9553b30172b5e831994e35fbde2d165325bec84fc43dbf6f4eb9cb2" dependencies = [ "anyhow", "bitflags", - "indexmap 2.14.0", + "indexmap", "log", "serde", "serde_derive", @@ -7399,7 +7383,7 @@ checksum = "ecc8ac4bc1dc3381b7f59c34f00b67e18f910c2c0f50015669dde7def656a736" dependencies = [ "anyhow", "id-arena", - "indexmap 2.14.0", + "indexmap", "log", "semver", "serde", @@ -7613,7 +7597,7 @@ dependencies = [ "flate2", "getrandom 0.4.2", "hmac", - "indexmap 2.14.0", + "indexmap", "lzma-rust2", "memchr", "pbkdf2", diff --git a/Cargo.toml b/Cargo.toml index 3fea379a2..ef7cb9f30 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -17,10 +17,11 @@ repository = "https://github.com/NVIDIA/OpenShell" tokio = { version = "1.43", features = ["full"] } # gRPC/Protobuf -tonic = "0.12" -tonic-build = "0.12" -prost = "0.13" -prost-types = "0.13" +tonic = "0.14" +tonic-prost = "0.14" +tonic-prost-build = "0.14" +prost = "0.14" +prost-types = "0.14" # HTTP server axum = { version = "0.8", features = ["ws"] } @@ -86,6 +87,7 @@ sha2 = "0.10" rand = "0.9" jsonwebtoken = "9" getrandom = "0.3" +spiffe = { version = "0.15", default-features = false, features = ["workload-api-jwt", "tracing"] } # Filesystem embedding include_dir = "0.7" diff --git a/architecture/gateway.md b/architecture/gateway.md index e9cbe187d..8a3ab2370 100644 --- a/architecture/gateway.md +++ b/architecture/gateway.md @@ -41,10 +41,15 @@ Supported auth modes: | Plaintext | Local development or a trusted reverse proxy boundary. | | Cloudflare JWT | Edge-authenticated deployments where Cloudflare Access supplies identity. | | OIDC | Bearer-token auth for users, with browser PKCE or client credentials login. | - -Sandbox supervisor RPCs authenticate with either mTLS material or a sandbox -secret depending on the runtime and deployment mode. User-facing mutations are -authorized by role policy when OIDC or edge identity is enabled. +| SPIFFE JWT-SVID | Sandbox supervisor authentication through a local SPIFFE Workload API implementation such as SPIRE. | + +Sandbox supervisor RPCs authenticate with mTLS material plus a sandbox workload +identity. Kubernetes deployments can use either the gateway-minted JWT +bootstrap path or SPIFFE JWT-SVIDs. In SPIFFE mode, the supervisor fetches a +JWT-SVID from the SPIFFE Workload API and the gateway validates it through its +own local Workload API socket, then maps `spiffe:///openshell/sandbox/` +to `Principal::Sandbox`. User-facing mutations are authorized by role policy +when OIDC or edge identity is enabled. ## API Surface diff --git a/crates/openshell-cli/Cargo.toml b/crates/openshell-cli/Cargo.toml index 7dc0c0f22..dedd1f460 100644 --- a/crates/openshell-cli/Cargo.toml +++ b/crates/openshell-cli/Cargo.toml @@ -29,7 +29,7 @@ prost-types = { workspace = true } tokio = { workspace = true } # gRPC client -tonic = { workspace = true, features = ["tls", "tls-native-roots"] } +tonic = { workspace = true, features = ["tls-native-roots"] } # CLI clap = { workspace = true } diff --git a/crates/openshell-core/Cargo.toml b/crates/openshell-core/Cargo.toml index b03fb1494..014bf0dc7 100644 --- a/crates/openshell-core/Cargo.toml +++ b/crates/openshell-core/Cargo.toml @@ -14,6 +14,7 @@ repository.workspace = true prost = { workspace = true } prost-types = { workspace = true } tonic = { workspace = true } +tonic-prost = { workspace = true } thiserror = { workspace = true } miette = { workspace = true } serde = { workspace = true } @@ -28,7 +29,7 @@ ipnet = "2" dev-settings = [] [build-dependencies] -tonic-build = { workspace = true } +tonic-prost-build = { workspace = true } protobuf-src = { workspace = true } [dev-dependencies] diff --git a/crates/openshell-core/build.rs b/crates/openshell-core/build.rs index 7613c8754..98011c3c4 100644 --- a/crates/openshell-core/build.rs +++ b/crates/openshell-core/build.rs @@ -40,11 +40,12 @@ fn main() -> Result<(), Box> { collect_proto_files(&proto_root, &mut proto_files)?; proto_files.sort(); - // Configure tonic-build - tonic_build::configure() + // Configure tonic/prost protobuf code generation. + let include_paths = [proto_root]; + tonic_prost_build::configure() .build_server(true) .build_client(true) - .compile_protos(&proto_files, &[proto_root.as_path()])?; + .compile_protos(&proto_files, &include_paths)?; Ok(()) } diff --git a/crates/openshell-core/src/config.rs b/crates/openshell-core/src/config.rs index 4505226c3..f8b6add49 100644 --- a/crates/openshell-core/src/config.rs +++ b/crates/openshell-core/src/config.rs @@ -212,6 +212,12 @@ pub struct Config { #[serde(default)] pub gateway_jwt: Option, + /// SPIFFE Workload API configuration. When `Some`, the gateway validates + /// sandbox JWT-SVIDs through the local SPIFFE implementation and maps + /// matching SPIFFE IDs to sandbox principals. + #[serde(default)] + pub spiffe: Option, + /// Database URL for persistence. pub database_url: String, @@ -347,6 +353,26 @@ pub struct GatewayJwtConfig { pub ttl_secs: u64, } +/// SPIFFE-based sandbox identity configuration. +/// +/// The gateway uses the local SPIFFE Workload API to validate JWT-SVIDs +/// presented by sandbox supervisors. Supervisors request those JWT-SVIDs +/// for the configured audience and use SPIFFE IDs shaped as +/// `spiffe:///`. +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct SpiffeConfig { + /// Path to the SPIFFE Workload API UNIX socket. + pub workload_api_socket_path: PathBuf, + /// Trust domain accepted for sandbox SPIFFE IDs. + pub trust_domain: String, + /// Audience expected in sandbox JWT-SVIDs. + #[serde(default = "default_spiffe_audience")] + pub audience: String, + /// Path prefix, below the trust domain, that precedes the sandbox UUID. + #[serde(default = "default_spiffe_sandbox_id_prefix")] + pub sandbox_id_prefix: String, +} + fn default_gateway_id() -> String { "openshell".to_string() } @@ -355,6 +381,14 @@ const fn default_sandbox_token_ttl_secs() -> u64 { 86_400 } +fn default_spiffe_audience() -> String { + "openshell-gateway".to_string() +} + +fn default_spiffe_sandbox_id_prefix() -> String { + "/openshell/sandbox/".to_string() +} + fn default_roles_claim() -> String { "realm_access.roles".to_string() } @@ -379,6 +413,7 @@ impl Config { tls, oidc: None, gateway_jwt: None, + spiffe: None, database_url: String::new(), compute_drivers: vec![], ssh_session_ttl_secs: default_ssh_session_ttl_secs(), diff --git a/crates/openshell-core/src/lib.rs b/crates/openshell-core/src/lib.rs index f58405bcb..4032e4659 100644 --- a/crates/openshell-core/src/lib.rs +++ b/crates/openshell-core/src/lib.rs @@ -26,7 +26,9 @@ pub mod sandbox_env; pub mod settings; pub mod time; -pub use config::{ComputeDriverKind, Config, GatewayJwtConfig, OidcConfig, TlsConfig}; +pub use config::{ + ComputeDriverKind, Config, GatewayJwtConfig, OidcConfig, SpiffeConfig, TlsConfig, +}; pub use error::{ComputeDriverError, Error, Result}; pub use metadata::{ObjectId, ObjectLabels, ObjectName}; diff --git a/crates/openshell-core/src/sandbox_env.rs b/crates/openshell-core/src/sandbox_env.rs index b367e450c..6d89d3c36 100644 --- a/crates/openshell-core/src/sandbox_env.rs +++ b/crates/openshell-core/src/sandbox_env.rs @@ -53,3 +53,15 @@ pub const SANDBOX_TOKEN_FILE: &str = "OPENSHELL_SANDBOX_TOKEN_FILE"; /// writes and rotates this file; the supervisor exchanges its contents /// for a gateway JWT at startup and on refresh. pub const K8S_SA_TOKEN_FILE: &str = "OPENSHELL_K8S_SA_TOKEN_FILE"; + +/// Filesystem path to the SPIFFE Workload API UNIX socket. +/// +/// When set, the supervisor fetches a JWT-SVID from the local Workload API +/// and presents that token directly to the gateway. +pub const SPIFFE_WORKLOAD_API_SOCKET: &str = "OPENSHELL_SPIFFE_WORKLOAD_API_SOCKET"; + +/// Audience requested when fetching a SPIFFE JWT-SVID. +pub const SPIFFE_AUDIENCE: &str = "OPENSHELL_SPIFFE_AUDIENCE"; + +/// Optional exact SPIFFE ID requested from the Workload API. +pub const SPIFFE_ID: &str = "OPENSHELL_SPIFFE_ID"; diff --git a/crates/openshell-driver-kubernetes/src/config.rs b/crates/openshell-driver-kubernetes/src/config.rs index 8e4275ab7..cb11d3aa5 100644 --- a/crates/openshell-driver-kubernetes/src/config.rs +++ b/crates/openshell-driver-kubernetes/src/config.rs @@ -73,6 +73,16 @@ pub struct KubernetesComputeConfig { /// this token within a few seconds of pod start, so any value at /// the floor is sufficient. Default 3600. pub sa_token_ttl_secs: i64, + /// SPIFFE Workload API socket path mounted into sandbox pods. Empty + /// disables SPIFFE identity material and keeps the `ServiceAccount` + /// bootstrap path active. + pub spiffe_workload_api_socket_path: String, + /// SPIFFE trust domain used for sandbox identities. + pub spiffe_trust_domain: String, + /// Audience requested by sandbox supervisors for JWT-SVIDs. + pub spiffe_audience: String, + /// Path prefix under the trust domain before the sandbox UUID. + pub spiffe_sandbox_id_prefix: String, } /// Lower bound enforced by kubelet for projected SA tokens. @@ -102,6 +112,10 @@ impl Default for KubernetesComputeConfig { host_gateway_ip: String::new(), enable_user_namespaces: false, sa_token_ttl_secs: 3600, + spiffe_workload_api_socket_path: String::new(), + spiffe_trust_domain: String::new(), + spiffe_audience: "openshell-gateway".to_string(), + spiffe_sandbox_id_prefix: "/openshell/sandbox/".to_string(), } } } @@ -119,6 +133,13 @@ impl KubernetesComputeConfig { .clamp(MIN_SA_TOKEN_TTL_SECS, MAX_SA_TOKEN_TTL_SECS) } } + + #[must_use] + pub fn spiffe_enabled(&self) -> bool { + !self.spiffe_workload_api_socket_path.trim().is_empty() + && !self.spiffe_trust_domain.trim().is_empty() + && !self.spiffe_audience.trim().is_empty() + } } fn default_sandbox_image() -> String { diff --git a/crates/openshell-driver-kubernetes/src/driver.rs b/crates/openshell-driver-kubernetes/src/driver.rs index da9ade3eb..33df91ada 100644 --- a/crates/openshell-driver-kubernetes/src/driver.rs +++ b/crates/openshell-driver-kubernetes/src/driver.rs @@ -74,6 +74,7 @@ const SANDBOX_MANAGED_LABEL: &str = "openshell.ai/managed-by"; const SANDBOX_MANAGED_VALUE: &str = "openshell"; const GPU_RESOURCE_NAME: &str = "nvidia.com/gpu"; const GPU_RESOURCE_QUANTITY: &str = "1"; +const SPIFFE_WORKLOAD_API_VOLUME_NAME: &str = "spiffe-workload-api"; // --------------------------------------------------------------------------- // Default workspace persistence (temporary — will be replaced by snapshotting) @@ -328,6 +329,11 @@ impl KubernetesComputeDriver { host_gateway_ip: &self.config.host_gateway_ip, enable_user_namespaces: self.config.enable_user_namespaces, sa_token_ttl_secs: self.config.effective_sa_token_ttl_secs(), + spiffe_enabled: self.config.spiffe_enabled(), + spiffe_workload_api_socket_path: &self.config.spiffe_workload_api_socket_path, + spiffe_trust_domain: &self.config.spiffe_trust_domain, + spiffe_audience: &self.config.spiffe_audience, + spiffe_sandbox_id_prefix: &self.config.spiffe_sandbox_id_prefix, }; obj.data = sandbox_to_k8s_spec(sandbox.spec.as_ref(), ¶ms); let api = self.api(); @@ -1059,6 +1065,11 @@ struct SandboxPodParams<'a> { /// Lifetime (seconds) of the projected `ServiceAccount` token used /// for the bootstrap `IssueSandboxToken` exchange. sa_token_ttl_secs: i64, + spiffe_enabled: bool, + spiffe_workload_api_socket_path: &'a str, + spiffe_trust_domain: &'a str, + spiffe_audience: &'a str, + spiffe_sandbox_id_prefix: &'a str, } impl Default for SandboxPodParams<'_> { @@ -1077,6 +1088,11 @@ impl Default for SandboxPodParams<'_> { host_gateway_ip: "", enable_user_namespaces: false, sa_token_ttl_secs: 3600, + spiffe_enabled: false, + spiffe_workload_api_socket_path: "", + spiffe_trust_domain: "", + spiffe_audience: "openshell-gateway", + spiffe_sandbox_id_prefix: "/openshell/sandbox/", } } } @@ -1167,8 +1183,25 @@ fn sandbox_template_to_k8s( params: &SandboxPodParams<'_>, ) -> serde_json::Value { let mut metadata = serde_json::Map::new(); - if !template.labels.is_empty() { - metadata.insert("labels".to_string(), serde_json::json!(template.labels)); + let mut pod_labels = template + .labels + .iter() + .map(|(key, value)| (key.clone(), serde_json::Value::String(value.clone()))) + .collect::>(); + if params.spiffe_enabled { + pod_labels.insert( + SANDBOX_MANAGED_LABEL.to_string(), + serde_json::Value::String(SANDBOX_MANAGED_VALUE.to_string()), + ); + if !params.sandbox_id.is_empty() { + pod_labels.insert( + SANDBOX_ID_LABEL.to_string(), + serde_json::Value::String(params.sandbox_id.to_string()), + ); + } + } + if !pod_labels.is_empty() { + metadata.insert("labels".to_string(), serde_json::Value::Object(pod_labels)); } // Carry the sandbox UUID as a pod annotation so the gateway can resolve // a projected SA token claim (pod name + uid) back to a sandbox identity @@ -1187,6 +1220,12 @@ fn sandbox_template_to_k8s( serde_json::Value::String(params.sandbox_id.to_string()), ); } + if params.spiffe_enabled { + pod_annotations.insert( + "openshell.io/spiffe-id".to_string(), + serde_json::Value::String(sandbox_spiffe_id(params)), + ); + } if !pod_annotations.is_empty() { metadata.insert( "annotations".to_string(), @@ -1257,6 +1296,7 @@ fn sandbox_template_to_k8s( params.grpc_endpoint, params.ssh_socket_path, !params.client_tls_secret_name.is_empty(), + spiffe_env(params), ); container.insert("env".to_string(), serde_json::Value::Array(env)); @@ -1278,9 +1318,10 @@ fn sandbox_template_to_k8s( }), ); - // Mount client TLS secret for mTLS to the server, plus the projected - // ServiceAccount token used to bootstrap the sandbox's gateway JWT - // via `IssueSandboxToken`. + // Mount client TLS secret for mTLS to the server, plus exactly one + // sandbox identity source: SPIFFE Workload API socket when configured, + // otherwise a projected ServiceAccount token for the gateway-JWT + // bootstrap path. let mut volume_mounts: Vec = Vec::new(); if !params.client_tls_secret_name.is_empty() { volume_mounts.push(serde_json::json!({ @@ -1289,11 +1330,19 @@ fn sandbox_template_to_k8s( "readOnly": true })); } - volume_mounts.push(serde_json::json!({ - "name": "openshell-sa-token", - "mountPath": "/var/run/secrets/openshell", - "readOnly": true, - })); + if params.spiffe_enabled { + volume_mounts.push(serde_json::json!({ + "name": SPIFFE_WORKLOAD_API_VOLUME_NAME, + "mountPath": spiffe_socket_mount_path(params.spiffe_workload_api_socket_path), + "readOnly": true, + })); + } else { + volume_mounts.push(serde_json::json!({ + "name": "openshell-sa-token", + "mountPath": "/var/run/secrets/openshell", + "readOnly": true, + })); + } container.insert( "volumeMounts".to_string(), serde_json::Value::Array(volume_mounts), @@ -1316,23 +1365,33 @@ fn sandbox_template_to_k8s( "secret": { "secretName": params.client_tls_secret_name, "defaultMode": 256 } })); } - // Projected ServiceAccountToken volume — kubelet writes a short-lived - // audience-bound JWT into /var/run/secrets/openshell/token and rotates - // it automatically. The supervisor exchanges this for a gateway-minted - // JWT via `IssueSandboxToken` once at startup. - volumes.push(serde_json::json!({ - "name": "openshell-sa-token", - "projected": { - "sources": [{ - "serviceAccountToken": { - "audience": "openshell-gateway", - "expirationSeconds": params.sa_token_ttl_secs, - "path": "token" - } - }], - "defaultMode": 256 - } - })); + if params.spiffe_enabled { + volumes.push(serde_json::json!({ + "name": SPIFFE_WORKLOAD_API_VOLUME_NAME, + "csi": { + "driver": "csi.spiffe.io", + "readOnly": true + } + })); + } else { + // Projected ServiceAccountToken volume — kubelet writes a short-lived + // audience-bound JWT into /var/run/secrets/openshell/token and rotates + // it automatically. The supervisor exchanges this for a gateway-minted + // JWT via `IssueSandboxToken` once at startup. + volumes.push(serde_json::json!({ + "name": "openshell-sa-token", + "projected": { + "sources": [{ + "serviceAccountToken": { + "audience": "openshell-gateway", + "expirationSeconds": params.sa_token_ttl_secs, + "path": "token" + } + }], + "defaultMode": 256 + } + })); + } spec.insert("volumes".to_string(), serde_json::Value::Array(volumes)); // Add hostAliases so sandbox pods can reach the Docker host. @@ -1444,6 +1503,7 @@ fn build_env_list( grpc_endpoint: &str, ssh_socket_path: &str, tls_enabled: bool, + spiffe_env: Option, ) -> Vec { let mut env = existing_env.cloned().unwrap_or_default(); apply_env_map(&mut env, template_environment); @@ -1455,6 +1515,7 @@ fn build_env_list( grpc_endpoint, ssh_socket_path, tls_enabled, + spiffe_env, ); env } @@ -1477,6 +1538,7 @@ fn apply_required_env( grpc_endpoint: &str, ssh_socket_path: &str, tls_enabled: bool, + spiffe_env: Option, ) { upsert_env(env, openshell_core::sandbox_env::SANDBOX_ID, sandbox_id); upsert_env(env, openshell_core::sandbox_env::SANDBOX, sandbox_name); @@ -1512,14 +1574,79 @@ fn apply_required_env( "/etc/openshell-tls/client/tls.key", ); } - // Projected ServiceAccount token written by kubelet (see the volume - // definition in `sandbox_template_to_k8s`). The supervisor reads this - // and exchanges it for a gateway-minted JWT via `IssueSandboxToken`. - upsert_env( - env, - openshell_core::sandbox_env::K8S_SA_TOKEN_FILE, - "/var/run/secrets/openshell/token", - ); + if let Some(spiffe) = spiffe_env { + upsert_env( + env, + openshell_core::sandbox_env::SPIFFE_WORKLOAD_API_SOCKET, + &spiffe.socket_path, + ); + upsert_env( + env, + openshell_core::sandbox_env::SPIFFE_AUDIENCE, + &spiffe.audience, + ); + upsert_env(env, openshell_core::sandbox_env::SPIFFE_ID, &spiffe.id); + } else { + // Projected ServiceAccount token written by kubelet (see the volume + // definition in `sandbox_template_to_k8s`). The supervisor reads this + // and exchanges it for a gateway-minted JWT via `IssueSandboxToken`. + upsert_env( + env, + openshell_core::sandbox_env::K8S_SA_TOKEN_FILE, + "/var/run/secrets/openshell/token", + ); + } +} + +#[derive(Clone)] +struct SpiffeEnv { + socket_path: String, + audience: String, + id: String, +} + +fn spiffe_env(params: &SandboxPodParams<'_>) -> Option { + params.spiffe_enabled.then(|| SpiffeEnv { + socket_path: params.spiffe_workload_api_socket_path.to_string(), + audience: params.spiffe_audience.to_string(), + id: sandbox_spiffe_id(params), + }) +} + +fn sandbox_spiffe_id(params: &SandboxPodParams<'_>) -> String { + format!( + "spiffe://{}{}{}", + params + .spiffe_trust_domain + .trim() + .trim_start_matches("spiffe://") + .trim_end_matches('/'), + normalize_spiffe_path_prefix(params.spiffe_sandbox_id_prefix), + params.sandbox_id, + ) +} + +fn normalize_spiffe_path_prefix(prefix: &str) -> String { + let trimmed = prefix.trim(); + let with_leading = if trimmed.starts_with('/') { + trimmed.to_string() + } else { + format!("/{trimmed}") + }; + if with_leading.ends_with('/') { + with_leading + } else { + format!("{with_leading}/") + } +} + +fn spiffe_socket_mount_path(socket_path: &str) -> String { + std::path::Path::new(socket_path) + .parent() + .and_then(std::path::Path::to_str) + .filter(|path| !path.is_empty()) + .unwrap_or("/spiffe-workload-api") + .to_string() } fn upsert_env(env: &mut Vec, name: &str, value: &str) { @@ -1939,6 +2066,7 @@ mod tests { "https://endpoint:8080", "0.0.0.0:2222", true, // tls_enabled + None, ); // Extract the TLS-related env vars @@ -2491,6 +2619,65 @@ mod tests { ); } + #[test] + fn spiffe_mode_mounts_csi_socket_and_sets_identity_env() { + let params = SandboxPodParams { + sandbox_id: "sandbox-123", + sandbox_name: "sandbox", + spiffe_enabled: true, + spiffe_workload_api_socket_path: "/spiffe-workload-api/spire-agent.sock", + spiffe_trust_domain: "openshell.local", + spiffe_audience: "openshell-gateway", + spiffe_sandbox_id_prefix: "/openshell/sandbox/", + ..SandboxPodParams::default() + }; + let pod_template = sandbox_template_to_k8s( + &SandboxTemplate::default(), + false, + &std::collections::HashMap::new(), + true, + ¶ms, + ); + + let env = pod_template["spec"]["containers"][0]["env"] + .as_array() + .expect("env"); + assert!(env.iter().any(|e| { + e["name"] == openshell_core::sandbox_env::SPIFFE_WORKLOAD_API_SOCKET + && e["value"] == "/spiffe-workload-api/spire-agent.sock" + })); + assert!(env.iter().any(|e| { + e["name"] == openshell_core::sandbox_env::SPIFFE_ID + && e["value"] == "spiffe://openshell.local/openshell/sandbox/sandbox-123" + })); + assert!( + !env.iter() + .any(|e| e["name"] == openshell_core::sandbox_env::K8S_SA_TOKEN_FILE), + "SPIFFE mode must not expose the ServiceAccount bootstrap token" + ); + + let volumes = pod_template["spec"]["volumes"].as_array().expect("volumes"); + assert!(volumes.iter().any(|volume| { + volume["name"] == SPIFFE_WORKLOAD_API_VOLUME_NAME + && volume["csi"]["driver"] == "csi.spiffe.io" + })); + assert!( + !volumes + .iter() + .any(|volume| volume["name"] == "openshell-sa-token"), + "SPIFFE mode must not mount the ServiceAccount token volume" + ); + + assert_eq!( + pod_template["metadata"]["annotations"]["openshell.io/spiffe-id"], + serde_json::json!("spiffe://openshell.local/openshell/sandbox/sandbox-123") + ); + assert_eq!( + pod_template["metadata"]["labels"][SANDBOX_MANAGED_LABEL], + serde_json::json!(SANDBOX_MANAGED_VALUE) + ); + } + #[test] fn platform_config_bool_extracts_value() { let template = SandboxTemplate { diff --git a/crates/openshell-driver-kubernetes/src/main.rs b/crates/openshell-driver-kubernetes/src/main.rs index ac500e650..0f843a0d1 100644 --- a/crates/openshell-driver-kubernetes/src/main.rs +++ b/crates/openshell-driver-kubernetes/src/main.rs @@ -75,6 +75,26 @@ struct Args { /// gateway clamps values outside `[600, 86400]`. Default 3600. #[arg(long, env = "OPENSHELL_K8S_SA_TOKEN_TTL_SECS", default_value_t = 3600)] sa_token_ttl_secs: i64, + + #[arg(long, env = "OPENSHELL_SPIFFE_WORKLOAD_API_SOCKET")] + spiffe_workload_api_socket_path: Option, + + #[arg(long, env = "OPENSHELL_SPIFFE_TRUST_DOMAIN")] + spiffe_trust_domain: Option, + + #[arg( + long, + env = "OPENSHELL_SPIFFE_AUDIENCE", + default_value = "openshell-gateway" + )] + spiffe_audience: String, + + #[arg( + long, + env = "OPENSHELL_SPIFFE_SANDBOX_ID_PREFIX", + default_value = "/openshell/sandbox/" + )] + spiffe_sandbox_id_prefix: String, } #[tokio::main] @@ -101,6 +121,10 @@ async fn main() -> Result<()> { host_gateway_ip: args.host_gateway_ip.unwrap_or_default(), enable_user_namespaces: args.enable_user_namespaces, sa_token_ttl_secs: args.sa_token_ttl_secs, + spiffe_workload_api_socket_path: args.spiffe_workload_api_socket_path.unwrap_or_default(), + spiffe_trust_domain: args.spiffe_trust_domain.unwrap_or_default(), + spiffe_audience: args.spiffe_audience, + spiffe_sandbox_id_prefix: args.spiffe_sandbox_id_prefix, }) .await .into_diagnostic()?; diff --git a/crates/openshell-sandbox/Cargo.toml b/crates/openshell-sandbox/Cargo.toml index 29919ede4..be74827f2 100644 --- a/crates/openshell-sandbox/Cargo.toml +++ b/crates/openshell-sandbox/Cargo.toml @@ -24,8 +24,9 @@ openshell-router = { path = "../openshell-router" } tokio = { workspace = true } # gRPC -tonic = { workspace = true, features = ["channel", "tls"] } +tonic = { workspace = true, features = ["channel", "tls-native-roots"] } tokio-stream = { workspace = true } +spiffe = { workspace = true } # CLI clap = { workspace = true } diff --git a/crates/openshell-sandbox/src/grpc_client.rs b/crates/openshell-sandbox/src/grpc_client.rs index 934dff2b5..dafd6901b 100644 --- a/crates/openshell-sandbox/src/grpc_client.rs +++ b/crates/openshell-sandbox/src/grpc_client.rs @@ -4,22 +4,24 @@ //! gRPC client for fetching sandbox policy, provider environment, and inference //! route bundles from `OpenShell` server. //! -//! Every request carries a gateway-minted JWT in the `Authorization` header -//! (PR 3 of the per-sandbox identity series; see issue #1354). The token is -//! resolved at startup from one of three sources: +//! Every request carries a sandbox bearer credential in the `Authorization` +//! header. The token is resolved at startup from one of four sources: //! //! 1. `OPENSHELL_SANDBOX_TOKEN` — raw JWT in the env (test harness path). //! 2. `OPENSHELL_SANDBOX_TOKEN_FILE` — file containing the JWT (Docker / //! Podman / VM drivers write this to a bundle file at sandbox-create //! time). -//! 3. `OPENSHELL_K8S_SA_TOKEN_FILE` — projected `ServiceAccount` JWT; the +//! 3. `OPENSHELL_SPIFFE_WORKLOAD_API_SOCKET` — local SPIFFE Workload API +//! socket; the supervisor fetches a JWT-SVID and presents it directly. +//! 4. `OPENSHELL_K8S_SA_TOKEN_FILE` — projected `ServiceAccount` JWT; the //! supervisor exchanges it for a gateway JWT via `IssueSandboxToken` //! once at startup. //! -//! The resolved gateway JWT is held in process memory thereafter and +//! The resolved bearer credential is held in process memory thereafter and //! injected on every outbound call by [`AuthInterceptor`]. use std::collections::HashMap; +use std::path::PathBuf; use std::sync::{Arc, OnceLock, RwLock}; use std::time::{Duration, SystemTime, UNIX_EPOCH}; @@ -32,6 +34,7 @@ use openshell_core::proto::{ UpdateConfigRequest, inference_client::InferenceClient, open_shell_client::OpenShellClient, }; use openshell_core::sandbox_env; +use spiffe::{SpiffeId, WorkloadApiClient}; use tonic::Status; use tonic::metadata::AsciiMetadataValue; use tonic::service::interceptor::InterceptedService; @@ -54,6 +57,25 @@ static TOKEN_SLOT: OnceLock = OnceLock::new(); /// One-shot guard so the refresh loop spawns at most once per process. static REFRESH_SPAWNED: OnceLock<()> = OnceLock::new(); +#[derive(Clone, Debug)] +enum RefreshMode { + GatewayJwt, + Spiffe(SpiffeTokenSource), +} + +#[derive(Clone, Debug)] +struct SpiffeTokenSource { + socket_path: PathBuf, + audience: String, + spiffe_id: Option, +} + +#[derive(Debug)] +struct AcquiredToken { + token: String, + refresh_mode: RefreshMode, +} + fn install_token_slot(token: &str) -> Result { let bearer = AsciiMetadataValue::try_from(format!("Bearer {token}")) .into_diagnostic() @@ -171,16 +193,31 @@ async fn connect_channel(endpoint: &str) -> Result { let slot = if let Some(existing) = TOKEN_SLOT.get() { existing.clone() } else { - let token = acquire_sandbox_token(endpoint, &channel).await?; - install_token_slot(&token)? + let acquired = acquire_sandbox_token(endpoint, &channel).await?; + let slot = install_token_slot(&acquired.token)?; + if REFRESH_SPAWNED.set(()).is_ok() { + match acquired.refresh_mode { + RefreshMode::GatewayJwt => { + let refresh_channel = InterceptedService::new( + channel.clone(), + AuthInterceptor::new(slot.clone()), + ); + let refresh_slot = slot.clone(); + tokio::spawn(async move { + refresh_token_loop(refresh_channel, refresh_slot).await; + }); + } + RefreshMode::Spiffe(source) => { + let refresh_slot = slot.clone(); + tokio::spawn(async move { + refresh_spiffe_token_loop(source, refresh_slot).await; + }); + } + } + } + slot }; - let intercepted = InterceptedService::new(channel, AuthInterceptor::new(slot.clone())); - if REFRESH_SPAWNED.set(()).is_ok() { - let refresh_channel = intercepted.clone(); - tokio::spawn(async move { - refresh_token_loop(refresh_channel, slot).await; - }); - } + let intercepted = InterceptedService::new(channel, AuthInterceptor::new(slot)); Ok(intercepted) } @@ -190,12 +227,15 @@ async fn connect_channel(endpoint: &str) -> Result { /// actual network call lives inside this function only on the K8s /// bootstrap path, which uses `plain_channel` to call `IssueSandboxToken` /// once before the steady-state Bearer-authenticated channel is built. -async fn acquire_sandbox_token(endpoint: &str, plain_channel: &Channel) -> Result { +async fn acquire_sandbox_token(endpoint: &str, plain_channel: &Channel) -> Result { if let Ok(t) = std::env::var(sandbox_env::SANDBOX_TOKEN) && !t.is_empty() { debug!(source = "env", "loaded sandbox token"); - return Ok(t); + return Ok(AcquiredToken { + token: t, + refresh_mode: RefreshMode::GatewayJwt, + }); } if let Ok(path) = std::env::var(sandbox_env::SANDBOX_TOKEN_FILE) @@ -205,7 +245,24 @@ async fn acquire_sandbox_token(endpoint: &str, plain_channel: &Channel) -> Resul .into_diagnostic() .wrap_err_with(|| format!("failed to read sandbox token from {path}"))?; debug!(source = "file", path = %path, "loaded sandbox token"); - return Ok(contents.trim().to_string()); + return Ok(AcquiredToken { + token: contents.trim().to_string(), + refresh_mode: RefreshMode::GatewayJwt, + }); + } + + if let Some(source) = spiffe_token_source_from_env()? { + info!( + socket = %source.socket_path.display(), + audience = %source.audience, + spiffe_id = source.spiffe_id.as_deref().unwrap_or(""), + "fetching SPIFFE JWT-SVID for sandbox gateway authentication" + ); + let token = fetch_spiffe_jwt_svid(&source).await?; + return Ok(AcquiredToken { + token, + refresh_mode: RefreshMode::Spiffe(source), + }); } if let Ok(sa_path) = std::env::var(sandbox_env::K8S_SA_TOKEN_FILE) @@ -233,17 +290,79 @@ async fn acquire_sandbox_token(endpoint: &str, plain_channel: &Channel) -> Resul .await .into_diagnostic() .wrap_err("IssueSandboxToken bootstrap exchange failed")?; - return Ok(resp.into_inner().token); + return Ok(AcquiredToken { + token: resp.into_inner().token, + refresh_mode: RefreshMode::GatewayJwt, + }); } Err(miette::miette!( - "no sandbox token source available — set one of {}, {}, or {}", + "no sandbox token source available — set one of {}, {}, {}, or {}", sandbox_env::SANDBOX_TOKEN, sandbox_env::SANDBOX_TOKEN_FILE, + sandbox_env::SPIFFE_WORKLOAD_API_SOCKET, sandbox_env::K8S_SA_TOKEN_FILE, )) } +fn spiffe_token_source_from_env() -> Result> { + let Ok(socket_path) = std::env::var(sandbox_env::SPIFFE_WORKLOAD_API_SOCKET) else { + return Ok(None); + }; + if socket_path.trim().is_empty() { + return Ok(None); + } + let audience = std::env::var(sandbox_env::SPIFFE_AUDIENCE) + .unwrap_or_else(|_| "openshell-gateway".to_string()); + if audience.trim().is_empty() { + return Err(miette::miette!( + "{} must not be empty when {} is set", + sandbox_env::SPIFFE_AUDIENCE, + sandbox_env::SPIFFE_WORKLOAD_API_SOCKET, + )); + } + let spiffe_id = std::env::var(sandbox_env::SPIFFE_ID) + .ok() + .map(|value| value.trim().to_string()) + .filter(|value| !value.is_empty()); + Ok(Some(SpiffeTokenSource { + socket_path: PathBuf::from(socket_path), + audience, + spiffe_id, + })) +} + +async fn fetch_spiffe_jwt_svid(source: &SpiffeTokenSource) -> Result { + let endpoint = spiffe_workload_api_endpoint(&source.socket_path); + let client = WorkloadApiClient::connect_to(&endpoint) + .await + .into_diagnostic() + .wrap_err_with(|| { + format!("failed to connect to SPIFFE Workload API endpoint {endpoint}") + })?; + let requested_spiffe_id = source + .spiffe_id + .as_deref() + .map(SpiffeId::try_from) + .transpose() + .into_diagnostic() + .wrap_err("invalid SPIFFE ID requested for JWT-SVID")?; + client + .fetch_jwt_token([source.audience.as_str()], requested_spiffe_id.as_ref()) + .await + .into_diagnostic() + .wrap_err("SPIFFE FetchJWTSVID failed") +} + +fn spiffe_workload_api_endpoint(path: &std::path::Path) -> String { + let path = path.to_string_lossy(); + if path.starts_with("unix:") || path.starts_with("tcp:") { + path.into_owned() + } else { + format!("unix:{path}") + } +} + /// Build an authenticated channel for direct external use (e.g. the /// long-lived `supervisor_session` control stream). pub async fn connect_channel_pub(endpoint: &str) -> Result { @@ -285,6 +404,30 @@ async fn refresh_token_loop(channel: AuthedChannel, slot: TokenSlot) { } } +async fn refresh_spiffe_token_loop(source: SpiffeTokenSource, slot: TokenSlot) { + loop { + let sleep = compute_refresh_delay(&slot); + tokio::time::sleep(sleep).await; + match fetch_spiffe_jwt_svid(&source).await { + Ok(new_token) => match AsciiMetadataValue::try_from(format!("Bearer {new_token}")) { + Ok(value) => { + if let Ok(mut guard) = slot.write() { + *guard = value; + info!("rotated SPIFFE JWT-SVID in-place"); + } + } + Err(e) => { + warn!(error = %e, "refreshed SPIFFE JWT-SVID contained invalid header bytes"); + } + }, + Err(err) => { + warn!(error = %err, "SPIFFE FetchJWTSVID failed; will retry"); + tokio::time::sleep(Duration::from_secs(60)).await; + } + } + } +} + /// Compute the next refresh delay: 80 % of the time remaining until the /// current token's `exp`, plus up to 10 % jitter, floored at 60 s and /// capped at 12 h. If the token can't be parsed (legacy/non-JWT bearer) diff --git a/crates/openshell-server/Cargo.toml b/crates/openshell-server/Cargo.toml index fa19ab526..82920e1e1 100644 --- a/crates/openshell-server/Cargo.toml +++ b/crates/openshell-server/Cargo.toml @@ -33,7 +33,7 @@ k8s-openapi = { workspace = true } tokio = { workspace = true } # gRPC -tonic = { workspace = true, features = ["channel", "tls"] } +tonic = { workspace = true, features = ["channel", "tls-native-roots"] } prost = { workspace = true } prost-types = { workspace = true } @@ -82,6 +82,7 @@ uuid = { workspace = true } hmac = "0.12" sha2 = { workspace = true } jsonwebtoken = { workspace = true } +spiffe = { workspace = true } async-trait = "0.1" url = { workspace = true } hex = "0.4" diff --git a/crates/openshell-server/src/auth/mod.rs b/crates/openshell-server/src/auth/mod.rs index d4c6978af..cff5508ad 100644 --- a/crates/openshell-server/src/auth/mod.rs +++ b/crates/openshell-server/src/auth/mod.rs @@ -18,5 +18,6 @@ pub mod oidc; pub mod principal; pub mod revocation; pub mod sandbox_jwt; +pub mod spiffe; pub use http::router; diff --git a/crates/openshell-server/src/auth/spiffe.rs b/crates/openshell-server/src/auth/spiffe.rs new file mode 100644 index 000000000..c942a44b4 --- /dev/null +++ b/crates/openshell-server/src/auth/spiffe.rs @@ -0,0 +1,212 @@ +// SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// SPDX-License-Identifier: Apache-2.0 + +//! SPIFFE JWT-SVID authentication for sandbox supervisors. +//! +//! The gateway does not validate SPIFFE JWT-SVID signatures itself. Instead it +//! delegates validation to the local SPIFFE Workload API, keeping algorithm and +//! bundle handling inside the configured SPIFFE implementation. + +use super::authenticator::Authenticator; +use super::principal::{Principal, SandboxIdentitySource, SandboxPrincipal}; +use async_trait::async_trait; +use openshell_core::SpiffeConfig; +use spiffe::{JwtSvid, WorkloadApiClient}; +use std::path::Path; +use tonic::Status; +use tracing::{debug, info, warn}; + +/// Authenticator backed by the SPIFFE Workload API `ValidateJWTSVID` RPC. +pub struct SpiffeAuthenticator { + client: WorkloadApiClient, + config: SpiffeConfig, +} + +impl std::fmt::Debug for SpiffeAuthenticator { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.debug_struct("SpiffeAuthenticator") + .field("socket", &self.config.workload_api_socket_path) + .field("trust_domain", &self.config.trust_domain) + .field("audience", &self.config.audience) + .field("sandbox_id_prefix", &self.config.sandbox_id_prefix) + .finish_non_exhaustive() + } +} + +impl SpiffeAuthenticator { + pub async fn new(config: SpiffeConfig) -> Result { + let endpoint = workload_api_endpoint(&config.workload_api_socket_path); + let client = WorkloadApiClient::connect_to(&endpoint) + .await + .map_err(|e| { + format!("failed to connect to SPIFFE Workload API endpoint {endpoint}: {e}") + })?; + info!( + socket = %config.workload_api_socket_path.display(), + trust_domain = %config.trust_domain, + audience = %config.audience, + "SPIFFE JWT-SVID sandbox authenticator enabled" + ); + Ok(Self { client, config }) + } + + #[allow(clippy::result_large_err)] + async fn validate_bearer(&self, token: &str) -> Result, Status> { + let Some(candidate_id) = candidate_spiffe_id(token) else { + return Ok(None); + }; + if parse_sandbox_id_from_spiffe_id( + &candidate_id, + &self.config.trust_domain, + &self.config.sandbox_id_prefix, + ) + .is_none() + { + return Ok(None); + } + + let svid = self + .client + .validate_jwt_token(&self.config.audience, token) + .await + .map_err(|status| { + debug!(error = %status, "SPIFFE JWT-SVID validation failed"); + Status::unauthenticated("invalid SPIFFE JWT-SVID") + })?; + + self.principal_from_validated_svid(&svid) + } + + #[allow(clippy::result_large_err)] + fn principal_from_validated_svid(&self, svid: &JwtSvid) -> Result, Status> { + let spiffe_id = svid.spiffe_id().to_string(); + let Some(sandbox_id) = parse_sandbox_id_from_spiffe_id( + &spiffe_id, + &self.config.trust_domain, + &self.config.sandbox_id_prefix, + ) else { + warn!( + spiffe_id = %spiffe_id, + trust_domain = %self.config.trust_domain, + prefix = %self.config.sandbox_id_prefix, + "validated SPIFFE ID is outside the configured sandbox identity namespace" + ); + return Err(Status::permission_denied( + "SPIFFE ID is not authorized as an OpenShell sandbox", + )); + }; + + Ok(Some(Principal::Sandbox(SandboxPrincipal { + sandbox_id, + source: SandboxIdentitySource::SpiffeSvid { spiffe_id }, + trust_domain: Some(self.config.trust_domain.clone()), + }))) + } +} + +#[async_trait] +impl Authenticator for SpiffeAuthenticator { + async fn authenticate( + &self, + headers: &http::HeaderMap, + _path: &str, + ) -> Result, Status> { + let Some(token) = headers + .get("authorization") + .and_then(|v| v.to_str().ok()) + .and_then(|v| v.strip_prefix("Bearer ")) + else { + return Ok(None); + }; + self.validate_bearer(token).await + } +} + +fn parse_sandbox_id_from_spiffe_id( + spiffe_id: &str, + trust_domain: &str, + sandbox_id_prefix: &str, +) -> Option { + let trust_domain = trust_domain.trim().trim_start_matches("spiffe://"); + let prefix = format!( + "spiffe://{}{}", + trust_domain.trim_end_matches('/'), + normalize_spiffe_path_prefix(sandbox_id_prefix) + ); + let sandbox_id = spiffe_id.strip_prefix(&prefix)?; + (!sandbox_id.is_empty() && !sandbox_id.contains('/')).then(|| sandbox_id.to_string()) +} + +fn normalize_spiffe_path_prefix(prefix: &str) -> String { + let trimmed = prefix.trim(); + if trimmed.starts_with('/') { + trimmed.to_string() + } else { + format!("/{trimmed}") + } +} + +fn candidate_spiffe_id(jwt: &str) -> Option { + JwtSvid::parse_insecure(jwt) + .ok() + .map(|svid| svid.spiffe_id().to_string()) +} + +fn workload_api_endpoint(path: &Path) -> String { + let path = path.to_string_lossy(); + if path.starts_with("unix:") || path.starts_with("tcp:") { + path.into_owned() + } else { + format!("unix:{path}") + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn parses_sandbox_id_from_configured_spiffe_id() { + assert_eq!( + parse_sandbox_id_from_spiffe_id( + "spiffe://openshell.local/openshell/sandbox/abc", + "openshell.local", + "/openshell/sandbox/", + ) + .as_deref(), + Some("abc") + ); + } + + #[test] + fn rejects_spiffe_id_outside_sandbox_namespace() { + assert!( + parse_sandbox_id_from_spiffe_id( + "spiffe://openshell.local/ns/openshell/sa/default", + "openshell.local", + "/openshell/sandbox/", + ) + .is_none() + ); + assert!( + parse_sandbox_id_from_spiffe_id( + "spiffe://other.local/openshell/sandbox/abc", + "openshell.local", + "/openshell/sandbox/", + ) + .is_none() + ); + } + + #[test] + fn prefixes_plain_socket_paths_as_unix_endpoints() { + assert_eq!( + workload_api_endpoint(Path::new("/spiffe-workload-api/spire-agent.sock")), + "unix:/spiffe-workload-api/spire-agent.sock" + ); + assert_eq!( + workload_api_endpoint(Path::new("unix:/tmp/spire.sock")), + "unix:/tmp/spire.sock" + ); + } +} diff --git a/crates/openshell-server/src/cli.rs b/crates/openshell-server/src/cli.rs index 913da706d..7d54872de 100644 --- a/crates/openshell-server/src/cli.rs +++ b/crates/openshell-server/src/cli.rs @@ -343,6 +343,12 @@ async fn run_from_args(mut args: RunArgs, matches: ArgMatches) -> Result<()> { { config.gateway_jwt = Some(jwt); } + if let Some(spiffe) = file + .as_ref() + .and_then(|f| f.openshell.gateway.spiffe.clone()) + { + config.spiffe = Some(spiffe); + } let vm_config = build_vm_config(file.as_ref())?; let docker_config = build_docker_config(file.as_ref())?; @@ -359,8 +365,11 @@ async fn run_from_args(mut args: RunArgs, matches: ArgMatches) -> Result<()> { if has_oidc { info!("OIDC authentication enabled"); } + if config.spiffe.is_some() { + info!("SPIFFE sandbox authentication enabled"); + } - if !has_client_ca && !has_oidc { + if !has_client_ca && !has_oidc && config.spiffe.is_none() { warn!( "Neither mTLS (--tls-client-ca) nor OIDC (--oidc-issuer) is configured — \ the gateway has no authentication mechanism" diff --git a/crates/openshell-server/src/config_file.rs b/crates/openshell-server/src/config_file.rs index eddcd2615..dd9f8badf 100644 --- a/crates/openshell-server/src/config_file.rs +++ b/crates/openshell-server/src/config_file.rs @@ -25,7 +25,7 @@ use std::net::SocketAddr; use std::path::{Path, PathBuf}; use openshell_core::config::ComputeDriverKind; -use openshell_core::{GatewayJwtConfig, OidcConfig, TlsConfig}; +use openshell_core::{GatewayJwtConfig, OidcConfig, SpiffeConfig, TlsConfig}; use serde::{Deserialize, Serialize}; /// Latest schema version this build understands. @@ -140,6 +140,8 @@ pub struct GatewayFileSection { pub oidc: Option, #[serde(default)] pub gateway_jwt: Option, + #[serde(default)] + pub spiffe: Option, // ── Disallowed-in-file fields ──────────────────────────────────────── // @@ -255,6 +257,10 @@ fn inheritable_keys(driver: ComputeDriverKind) -> &'static [&'static str] { "host_gateway_ip", "enable_user_namespaces", "sa_token_ttl_secs", + "spiffe_workload_api_socket_path", + "spiffe_trust_domain", + "spiffe_audience", + "spiffe_sandbox_id_prefix", ], ComputeDriverKind::Docker => &[ "sandbox_namespace", @@ -290,6 +296,22 @@ fn gateway_inherited_value(g: &GatewayFileSection, key: &str) -> Option g.host_gateway_ip.as_deref().map(string_value), "enable_user_namespaces" => g.enable_user_namespaces.map(toml::Value::Boolean), "sa_token_ttl_secs" => g.sa_token_ttl_secs.map(toml::Value::Integer), + "spiffe_workload_api_socket_path" => g + .spiffe + .as_ref() + .map(|spiffe| path_value(&spiffe.workload_api_socket_path)), + "spiffe_trust_domain" => g + .spiffe + .as_ref() + .map(|spiffe| string_value(&spiffe.trust_domain)), + "spiffe_audience" => g + .spiffe + .as_ref() + .map(|spiffe| string_value(&spiffe.audience)), + "spiffe_sandbox_id_prefix" => g + .spiffe + .as_ref() + .map(|spiffe| string_value(&spiffe.sandbox_id_prefix)), "guest_tls_ca" => g.guest_tls_ca.as_deref().map(path_value), "guest_tls_cert" => g.guest_tls_cert.as_deref().map(path_value), "guest_tls_key" => g.guest_tls_key.as_deref().map(path_value), diff --git a/crates/openshell-server/src/lib.rs b/crates/openshell-server/src/lib.rs index 3fa5b313b..7f4b5f7ba 100644 --- a/crates/openshell-server/src/lib.rs +++ b/crates/openshell-server/src/lib.rs @@ -115,6 +115,10 @@ pub struct ServerState { /// presenting a freshly minted token are recognized. pub sandbox_jwt_authenticator: Option>, + /// Authenticator that validates SPIFFE JWT-SVIDs through the local SPIFFE + /// Workload API and maps authorized SPIFFE IDs to sandbox principals. + pub spiffe_authenticator: Option>, + /// Optional K8s `ServiceAccount` authenticator that backs the /// `IssueSandboxToken` bootstrap path. Only present when the gateway /// runs in-cluster. @@ -169,6 +173,7 @@ impl ServerState { oidc_cache, sandbox_jwt_issuer: None, sandbox_jwt_authenticator: None, + spiffe_authenticator: None, k8s_sa_authenticator: None, sandbox_jwt_revocation: Arc::new(auth::revocation::RevocationSet::new()), } @@ -295,6 +300,13 @@ pub async fn run_server( state.sandbox_jwt_authenticator = Some(Arc::new(authenticator)); } + if let Some(ref spiffe) = config.spiffe { + let authenticator = auth::spiffe::SpiffeAuthenticator::new(spiffe.clone()) + .await + .map_err(|e| Error::config(format!("SPIFFE initialization failed: {e}")))?; + state.spiffe_authenticator = Some(Arc::new(authenticator)); + } + // K8s ServiceAccount bootstrap authenticator. Only constructed when // the gateway is running in-cluster (kubelet provides the API host // env var) and has a sandbox JWT issuer to mint replacements against; diff --git a/crates/openshell-server/src/multiplex.rs b/crates/openshell-server/src/multiplex.rs index 567df2272..69a06a644 100644 --- a/crates/openshell-server/src/multiplex.rs +++ b/crates/openshell-server/src/multiplex.rs @@ -266,10 +266,13 @@ where /// other path; only present when the gateway runs in-cluster. /// 2. `SandboxJwtAuthenticator` — validates gateway-minted JWTs. Recognized /// via a distinctive `kid` so non-matching Bearer tokens fall through. -/// 3. `OidcAuthenticator` — validates user Bearer tokens against the +/// 3. `SpiffeAuthenticator` — validates SPIFFE JWT-SVIDs through the +/// local SPIFFE Workload API and maps sandbox SPIFFE IDs to +/// `Principal::Sandbox`. +/// 4. `OidcAuthenticator` — validates user Bearer tokens against the /// configured OIDC issuer. Returns `Unauthenticated` for missing /// Bearer headers so non-OIDC clients can't sneak through. -/// 4. `PermissiveUserAuthenticator` — installed only when no OIDC is +/// 5. `PermissiveUserAuthenticator` — installed only when no OIDC is /// configured (singleplayer / helm-dev). Catches anything the /// sandbox authenticators didn't claim and produces a synthetic /// user principal, preserving the pre-PR-1 "no OIDC = open" posture. @@ -285,6 +288,9 @@ fn build_authenticator_chain(state: &ServerState) -> Option if let Some(jwt) = state.sandbox_jwt_authenticator.clone() { authenticators.push(jwt); } + if let Some(spiffe) = state.spiffe_authenticator.clone() { + authenticators.push(spiffe); + } if let Some(cache) = state.oidc_cache.clone() { authenticators.push(Arc::new(OidcAuthenticator::new(cache))); } else if !authenticators.is_empty() { @@ -347,19 +353,13 @@ impl AuthGrpcRouter { } } -fn status_response(status: tonic::Status) -> Response { - let response = status.into_http(); - let (parts, body) = response.into_parts(); - let body = tonic::body::BoxBody::new(body); - Response::from_parts(parts, body) +fn status_response(status: tonic::Status) -> Response { + status.into_http() } impl tower::Service> for AuthGrpcRouter where - S: tower::Service, Response = Response> - + Clone - + Send - + 'static, + S: tower::Service, Response = Response> + Clone + Send + 'static, S::Future: Send, S::Error: Send + Into>, B: Send + 'static, @@ -896,7 +896,7 @@ mod tests { } impl Service> for PrincipalRecorder { - type Response = Response; + type Response = Response; type Error = std::convert::Infallible; type Future = Pin> + Send>>; @@ -907,14 +907,7 @@ mod tests { fn call(&mut self, req: Request) -> Self::Future { let principal = req.extensions().get::().cloned(); *self.recorded.lock().unwrap() = principal; - Box::pin(async move { - let body = tonic::body::BoxBody::new( - Full::new(Bytes::new()) - .map_err(|never| match never {}) - .boxed_unsync(), - ); - Ok(Response::new(body)) - }) + Box::pin(async move { Ok(Response::new(tonic::body::Body::empty())) }) } } diff --git a/crates/openshell-tui/Cargo.toml b/crates/openshell-tui/Cargo.toml index b0ac0c7ca..723528cd7 100644 --- a/crates/openshell-tui/Cargo.toml +++ b/crates/openshell-tui/Cargo.toml @@ -21,7 +21,7 @@ ratatui = { workspace = true } crossterm = { workspace = true } terminal-colorsaurus = { workspace = true } tokio = { workspace = true } -tonic = { workspace = true, features = ["tls"] } +tonic = { workspace = true, features = ["tls-native-roots"] } miette = { workspace = true } owo-colors = { workspace = true } serde = { workspace = true } diff --git a/deploy/helm/openshell/README.md b/deploy/helm/openshell/README.md index cc856731d..f27fd5da1 100644 --- a/deploy/helm/openshell/README.md +++ b/deploy/helm/openshell/README.md @@ -52,6 +52,8 @@ See [`values.yaml`](values.yaml) for configurable values. Selected overlays: - [`ci/values-gateway.yaml`](ci/values-gateway.yaml) — gateway-only configuration - [`ci/values-cert-manager.yaml`](ci/values-cert-manager.yaml) — cert-manager integration - [`ci/values-keycloak.yaml`](ci/values-keycloak.yaml) — Keycloak OIDC integration +- [`ci/values-spire.yaml`](ci/values-spire.yaml) — SPIFFE/SPIRE sandbox supervisor authentication +- [`ci/values-spire-stack.yaml`](ci/values-spire-stack.yaml) — SPIRE hardened chart values for local development ## PKI bootstrap @@ -70,3 +72,13 @@ The Job is idempotent: Disable with `--set pkiInitJob.enabled=false` when bringing your own PKI (cert-manager, external CA, or pre-created Secrets). See `certManager.*` in `values.yaml` for the cert-manager alternative. + +## SPIFFE/SPIRE sandbox identity + +Set `server.spiffe.enabled=true` to use SPIFFE JWT-SVIDs for sandbox supervisor +authentication instead of gateway-minted sandbox JWTs. The chart mounts the +SPIFFE CSI Workload API socket into the gateway pod and configures sandbox pods +to request `spiffe:///openshell/sandbox/` JWT-SVIDs. + +For local development, uncomment the SPIRE Helm releases in `skaffold.yaml` and +add `ci/values-spire.yaml` to the OpenShell release values files. diff --git a/deploy/helm/openshell/ci/values-spire-stack.yaml b/deploy/helm/openshell/ci/values-spire-stack.yaml new file mode 100644 index 000000000..018b441d6 --- /dev/null +++ b/deploy/helm/openshell/ci/values-spire-stack.yaml @@ -0,0 +1,22 @@ +# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 + +# SPIRE hardened chart values for the local Helm dev environment. +global: + spire: + clusterName: openshell-dev + trustDomain: openshell.local + +spire-server: + controllerManager: + identities: + clusterSPIFFEIDs: + openshell-sandboxes: + enabled: true + spiffeIDTemplate: 'spiffe://{{ .TrustDomain }}/openshell/sandbox/{{ index .PodMeta.Annotations "openshell.io/sandbox-id" }}' + namespaceSelector: + matchLabels: + kubernetes.io/metadata.name: openshell + podSelector: + matchLabels: + openshell.ai/managed-by: openshell diff --git a/deploy/helm/openshell/ci/values-spire.yaml b/deploy/helm/openshell/ci/values-spire.yaml new file mode 100644 index 000000000..cf4dd1104 --- /dev/null +++ b/deploy/helm/openshell/ci/values-spire.yaml @@ -0,0 +1,11 @@ +# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 + +# OpenShell overlay for local SPIRE-backed supervisor authentication. +server: + spiffe: + enabled: true + trustDomain: openshell.local + audience: openshell-gateway + workloadApiSocketPath: /spiffe-workload-api/spire-agent.sock + sandboxIdPrefix: /openshell/sandbox/ diff --git a/deploy/helm/openshell/skaffold.yaml b/deploy/helm/openshell/skaffold.yaml index 779211877..9905c4b10 100644 --- a/deploy/helm/openshell/skaffold.yaml +++ b/deploy/helm/openshell/skaffold.yaml @@ -79,6 +79,24 @@ deploy: # # wait ensures Gateway API CRDs are registered before the openshell # # release attempts to create Gateway and HTTPRoute resources. # wait: true + # SPIRE — installs SPIRE Server, Agent, Controller Manager, CSI Driver, + # and OIDC Discovery Provider using the SPIFFE hardened charts. + # Uncomment both releases and ci/values-spire.yaml below to use + # SPIFFE JWT-SVIDs for sandbox supervisor authentication. + #- name: spire-crds + # repo: https://spiffe.github.io/helm-charts-hardened/ + # remoteChart: spire-crds + # namespace: spire + # createNamespace: true + # wait: true + #- name: spire + # repo: https://spiffe.github.io/helm-charts-hardened/ + # remoteChart: spire + # namespace: spire + # createNamespace: true + # valuesFiles: + # - ci/values-spire-stack.yaml + # wait: true - name: openshell chartPath: . namespace: openshell @@ -95,6 +113,9 @@ deploy: #- ci/values-keycloak.yaml # To enable the Gateway API HTTPRoute (requires Envoy Gateway above): #- ci/values-gateway.yaml + # To enable SPIFFE/SPIRE sandbox supervisor authentication (requires + # the spire-crds and spire releases above): + #- ci/values-spire.yaml setValueTemplates: image.repository: '{{.IMAGE_REPO_openshell_gateway}}' image.tag: '{{.IMAGE_TAG_openshell_gateway}}' diff --git a/deploy/helm/openshell/templates/_helpers.tpl b/deploy/helm/openshell/templates/_helpers.tpl index 00925d2d3..7f3fe4cc2 100644 --- a/deploy/helm/openshell/templates/_helpers.tpl +++ b/deploy/helm/openshell/templates/_helpers.tpl @@ -125,3 +125,11 @@ init-container {{- printf "%s://%s.%s.svc.cluster.local:%d" $scheme (include "openshell.fullname" .) .Release.Namespace (int .Values.service.port) -}} {{- end -}} {{- end }} + +{{/* +Directory mounted for the SPIFFE Workload API CSI volume. The socket itself +lives at server.spiffe.workloadApiSocketPath. +*/}} +{{- define "openshell.spiffeWorkloadApiMountPath" -}} +{{- dir .Values.server.spiffe.workloadApiSocketPath -}} +{{- end }} diff --git a/deploy/helm/openshell/templates/gateway-config.yaml b/deploy/helm/openshell/templates/gateway-config.yaml index 302a5806f..3dc92e095 100644 --- a/deploy/helm/openshell/templates/gateway-config.yaml +++ b/deploy/helm/openshell/templates/gateway-config.yaml @@ -64,12 +64,20 @@ data: {{- end }} {{- end }} + {{- if .Values.server.spiffe.enabled }} + [openshell.gateway.spiffe] + workload_api_socket_path = {{ .Values.server.spiffe.workloadApiSocketPath | quote }} + trust_domain = {{ .Values.server.spiffe.trustDomain | quote }} + audience = {{ .Values.server.spiffe.audience | quote }} + sandbox_id_prefix = {{ .Values.server.spiffe.sandboxIdPrefix | quote }} + {{- else }} [openshell.gateway.gateway_jwt] signing_key_path = "/etc/openshell-jwt/signing.pem" public_key_path = "/etc/openshell-jwt/public.pem" kid_path = "/etc/openshell-jwt/kid" gateway_id = {{ .Values.server.sandboxJwt.gatewayId | default (include "openshell.fullname" .) | quote }} ttl_secs = {{ .Values.server.sandboxJwt.ttlSecs | default 86400 }} + {{- end }} {{- if .Values.server.oidc.issuer }} @@ -94,7 +102,14 @@ data: [openshell.drivers.kubernetes] grpc_endpoint = {{ include "openshell.grpcEndpoint" . | quote }} supervisor_sideload_method = {{ include "openshell.supervisorSideloadMethod" . | quote }} + {{- if .Values.server.spiffe.enabled }} + spiffe_workload_api_socket_path = {{ .Values.server.spiffe.workloadApiSocketPath | quote }} + spiffe_trust_domain = {{ .Values.server.spiffe.trustDomain | quote }} + spiffe_audience = {{ .Values.server.spiffe.audience | quote }} + spiffe_sandbox_id_prefix = {{ .Values.server.spiffe.sandboxIdPrefix | quote }} + {{- else }} sa_token_ttl_secs = {{ .Values.server.sandboxJwt.k8sSaTokenTtlSecs | default 3600 }} + {{- end }} {{- if .Values.server.sandboxImagePullPolicy }} image_pull_policy = {{ .Values.server.sandboxImagePullPolicy | quote }} {{- end }} diff --git a/deploy/helm/openshell/templates/statefulset.yaml b/deploy/helm/openshell/templates/statefulset.yaml index 5dd4f1caf..47d8555cc 100644 --- a/deploy/helm/openshell/templates/statefulset.yaml +++ b/deploy/helm/openshell/templates/statefulset.yaml @@ -75,9 +75,15 @@ spec: - name: gateway-config mountPath: /etc/openshell readOnly: true + {{- if .Values.server.spiffe.enabled }} + - name: spiffe-workload-api + mountPath: {{ include "openshell.spiffeWorkloadApiMountPath" . | quote }} + readOnly: true + {{- else }} - name: sandbox-jwt mountPath: /etc/openshell-jwt readOnly: true + {{- end }} {{- if not .Values.server.disableTls }} - name: tls-cert mountPath: /etc/openshell-tls/server @@ -134,10 +140,17 @@ spec: - name: gateway-config configMap: name: {{ include "openshell.fullname" . }}-config + {{- if .Values.server.spiffe.enabled }} + - name: spiffe-workload-api + csi: + driver: csi.spiffe.io + readOnly: true + {{- else }} - name: sandbox-jwt secret: secretName: {{ .Values.server.sandboxJwt.signingSecretName | default (printf "%s-jwt-keys" (include "openshell.fullname" .)) }} defaultMode: 0400 + {{- end }} {{- if not .Values.server.disableTls }} - name: tls-cert secret: diff --git a/deploy/helm/openshell/tests/gateway_config_test.yaml b/deploy/helm/openshell/tests/gateway_config_test.yaml index 2d464b8e6..c8856900d 100644 --- a/deploy/helm/openshell/tests/gateway_config_test.yaml +++ b/deploy/helm/openshell/tests/gateway_config_test.yaml @@ -66,3 +66,30 @@ tests: - matchRegex: path: data["gateway.toml"] pattern: 'server_sans\s*=\s*\["openshell", "\*\.dev\.openshell\.localhost"\]' + + - it: renders SPIFFE sandbox auth instead of gateway JWT when enabled + set: + server.spiffe.enabled: true + template: templates/gateway-config.yaml + asserts: + - matchRegex: + path: data["gateway.toml"] + pattern: '\[openshell\.gateway\.spiffe\]' + - matchRegex: + path: data["gateway.toml"] + pattern: 'spiffe_workload_api_socket_path\s*=\s*"/spiffe-workload-api/spire-agent\.sock"' + - notMatchRegex: + path: data["gateway.toml"] + pattern: '\[openshell\.gateway\.gateway_jwt\]' + + - it: mounts the SPIFFE Workload API socket when SPIFFE is enabled + set: + server.spiffe.enabled: true + template: templates/statefulset.yaml + asserts: + - matchRegex: + path: spec.template.spec.volumes[1].name + pattern: '^spiffe-workload-api$' + - matchRegex: + path: spec.template.spec.volumes[1].csi.driver + pattern: '^csi\.spiffe\.io$' diff --git a/deploy/helm/openshell/values.yaml b/deploy/helm/openshell/values.yaml index 54323068b..623422b74 100644 --- a/deploy/helm/openshell/values.yaml +++ b/deploy/helm/openshell/values.yaml @@ -146,6 +146,15 @@ server: # values outside [600, 86400]. Default 3600 — generous, since the # supervisor consumes the token within seconds of pod start. k8sSaTokenTtlSecs: 3600 + # SPIFFE/SPIRE sandbox identity. When enabled, sandbox supervisors fetch a + # JWT-SVID from the SPIFFE Workload API and present it directly to the + # gateway instead of bootstrapping a gateway-minted sandbox JWT. + spiffe: + enabled: false + trustDomain: openshell.local + audience: openshell-gateway + workloadApiSocketPath: /spiffe-workload-api/spire-agent.sock + sandboxIdPrefix: /openshell/sandbox/ # OIDC (OpenID Connect) configuration for JWT-based authentication. # When issuer is set, the server validates Bearer tokens on gRPC requests. oidc: diff --git a/docs/kubernetes/access-control.mdx b/docs/kubernetes/access-control.mdx index d66dc528d..0c53d0ace 100644 --- a/docs/kubernetes/access-control.mdx +++ b/docs/kubernetes/access-control.mdx @@ -19,6 +19,14 @@ The Helm chart always generates mTLS certificates at install time. The gateway u For how the CLI resolves gateways and stores credentials, refer to [Gateway Authentication](/reference/gateway-auth). +## Sandbox Supervisor Identity + +Kubernetes sandbox supervisors authenticate back to the gateway as sandbox workloads. By default, the gateway mints its own sandbox JWTs and Kubernetes sandboxes bootstrap them with a projected ServiceAccount token. + +Set `server.spiffe.enabled=true` to use SPIFFE JWT-SVIDs instead. In this mode, sandbox pods mount the SPIFFE CSI Workload API socket, request a JWT-SVID for `server.spiffe.audience`, and present that token directly to the gateway. The gateway validates the token through its local SPIFFE Workload API socket and accepts SPIFFE IDs under `spiffe:///openshell/sandbox/`. + +SPIFFE mode requires a SPIFFE implementation such as SPIRE and a `ClusterSPIFFEID` that assigns per-sandbox IDs from the pod's `openshell.io/sandbox-id` annotation. + ## OIDC User Authentication Set `server.oidc.issuer` to enable OIDC. The gateway validates the `Authorization: Bearer ` header on every request against the issuer's JWKS endpoint.