From 504c75c0307d987843d3d58ab844e283a34923b2 Mon Sep 17 00:00:00 2001
From: Ryan Angilly <rangilly@nvidia.com>
Date: Tue, 12 May 2026 14:58:07 -0600
Subject: [PATCH 1/3] feat(sandbox): add GPU count support

Signed-off-by: Ryan Angilly <rangilly@nvidia.com>
---
 architecture/compute-runtimes.md              |   5 +-
 crates/openshell-cli/src/main.rs              |  41 ++++++
 crates/openshell-cli/src/run.rs               |  13 +-
 .../sandbox_create_lifecycle_integration.rs   |  55 ++++++-
 crates/openshell-driver-docker/src/tests.rs   |   1 +
 crates/openshell-driver-kubernetes/README.md  |   8 +-
 .../openshell-driver-kubernetes/src/driver.rs | 136 ++++++++++++++++--
 crates/openshell-server/src/compute/mod.rs    |  26 +++-
 .../openshell-server/src/grpc/validation.rs   |  30 ++++
 docs/kubernetes/setup.mdx                     |   1 +
 docs/sandboxes/manage-sandboxes.mdx           |  10 ++
 proto/compute_driver.proto                    |   3 +
 proto/openshell.proto                         |   3 +
 13 files changed, 310 insertions(+), 22 deletions(-)
diff --git a/architecture/compute-runtimes.md b/architecture/compute-runtimes.md
index 9ab512f98..b048af605 100644
--- a/architecture/compute-runtimes.md
+++ b/architecture/compute-runtimes.md
@@ -22,7 +22,7 @@ Each runtime receives a sandbox spec from the gateway and is responsible for:
 |---|---|---|---|
 | Docker | Local development with Docker available. | Container plus nested sandbox namespace. | Uses host networking so loopback gateway endpoints work from the supervisor. |
 | Podman | Rootless or single-machine deployments. | Container plus nested sandbox namespace. | Uses the Podman REST API, OCI image volumes, and CDI GPU devices when available. |
-| Kubernetes | Cluster deployment through Helm. | Pod plus nested sandbox namespace. | Uses Kubernetes API objects, service accounts, secrets, PVC-backed workspace storage, and GPU resources. |
+| Kubernetes | Cluster deployment through Helm. | Pod plus nested sandbox namespace. | Uses Kubernetes API objects, service accounts, secrets, PVC-backed workspace storage, and `nvidia.com/gpu` limits for GPU requests. |
 | VM | Experimental microVM isolation. | Per-sandbox libkrun VM. | Gateway spawns `openshell-driver-vm` as a subprocess over a private, state-local Unix socket. |
 
 VM runtime state paths are derived only from driver-validated sandbox IDs
@@ -62,7 +62,8 @@ users.
 Custom sandbox images must include the agent runtime and any system
 dependencies, but they should not need to include the gateway. GPU-capable
 images must include the user-space libraries required by the workload. The
-runtime still owns GPU device injection.
+runtime still owns GPU device injection or resource scheduling. Kubernetes maps
+GPU counts to pod `nvidia.com/gpu` limits when the cluster exposes that resource.
 
 ## Deployment Shape
 
diff --git a/crates/openshell-cli/src/main.rs b/crates/openshell-cli/src/main.rs
index 9cffb243b..8e5d54faa 100644
--- a/crates/openshell-cli/src/main.rs
+++ b/crates/openshell-cli/src/main.rs
@@ -1084,6 +1084,13 @@ enum SandboxCommands {
         #[arg(long)]
         gpu: bool,
 
+        /// Request a specific number of GPUs.
+        ///
+        /// This implies --gpu. Kubernetes-backed gateways schedule pods with
+        /// the corresponding nvidia.com/gpu resource limit.
+        #[arg(long, value_name = "COUNT", conflicts_with = "gpu_device", value_parser = clap::value_parser!(u32).range(1..))]
+        gpu_count: Option<u32>,
+
         /// Target a driver-specific GPU device. Docker and Podman use CDI device IDs
         /// (for example "nvidia.com/gpu=0"); VM uses a PCI BDF or index.
         /// Only valid with --gpu. When omitted with --gpu, the driver uses its default GPU selection.
@@ -2364,6 +2371,7 @@ async fn main() -> Result<()> {
                     no_keep,
                     editor,
                     gpu,
+                    gpu_count,
                     gpu_device,
                     providers,
                     policy,
@@ -2430,6 +2438,7 @@ async fn main() -> Result<()> {
                         upload_spec.as_ref(),
                         keep,
                         gpu,
+                        gpu_count,
                         gpu_device.as_deref(),
                         editor,
                         &providers,
@@ -3821,4 +3830,36 @@ mod tests {
             other => panic!("expected service delete command, got: {other:?}"),
         }
     }
+
+    #[test]
+    fn sandbox_create_gpu_count_rejects_zero() {
+        let result = Cli::try_parse_from(["openshell", "sandbox", "create", "--gpu-count", "0"]);
+        assert!(
+            result.is_err(),
+            "sandbox create --gpu-count 0 should be rejected"
+        );
+        let err = result.unwrap_err().to_string();
+        assert!(err.contains("--gpu-count"));
+    }
+
+    #[test]
+    fn sandbox_create_gpu_count_conflicts_with_gpu_device() {
+        let result = Cli::try_parse_from([
+            "openshell",
+            "sandbox",
+            "create",
+            "--gpu",
+            "--gpu-count",
+            "2",
+            "--gpu-device",
+            "nvidia.com/gpu=0",
+        ]);
+        assert!(
+            result.is_err(),
+            "sandbox create should reject combining --gpu-count with --gpu-device"
+        );
+        let err = result.unwrap_err().to_string();
+        assert!(err.contains("--gpu-count"));
+        assert!(err.contains("--gpu-device"));
+    }
 }
diff --git a/crates/openshell-cli/src/run.rs b/crates/openshell-cli/src/run.rs
index 3205b8f68..5e5a48ac5 100644
--- a/crates/openshell-cli/src/run.rs
+++ b/crates/openshell-cli/src/run.rs
@@ -1467,6 +1467,7 @@ pub async fn sandbox_create(
     upload: Option<&(String, Option<String>, bool)>,
     keep: bool,
     gpu: bool,
+    gpu_count: Option<u32>,
     gpu_device: Option<&str>,
     editor: Option<Editor>,
     providers: &[String],
@@ -1483,6 +1484,14 @@ pub async fn sandbox_create(
             "--editor cannot be used with a trailing command; use `openshell sandbox connect <name> --editor ...` after the sandbox is ready"
         ));
     }
+    if gpu_count == Some(0) {
+        return Err(miette::miette!("--gpu-count must be greater than zero"));
+    }
+    if gpu_count.is_some() && gpu_device.is_some() {
+        return Err(miette::miette!(
+            "--gpu-count cannot be combined with --gpu-device"
+        ));
+    }
 
     // Check port availability *before* creating the sandbox so we don't
     // leave an orphaned sandbox behind when the forward would fail.
@@ -1518,7 +1527,8 @@ pub async fn sandbox_create(
         }
         None => None,
     };
-    let requested_gpu = gpu || image.as_deref().is_some_and(image_requests_gpu);
+    let requested_gpu =
+        gpu || gpu_count.is_some() || image.as_deref().is_some_and(image_requests_gpu);
 
     let inferred_types: Vec<String> = inferred_provider_type(command).into_iter().collect();
     let configured_providers = ensure_required_providers(
@@ -1539,6 +1549,7 @@ pub async fn sandbox_create(
     let request = CreateSandboxRequest {
         spec: Some(SandboxSpec {
             gpu: requested_gpu,
+            gpu_count: gpu_count.unwrap_or_default(),
             gpu_device: gpu_device.unwrap_or_default().to_string(),
             policy,
             providers: configured_providers,
diff --git a/crates/openshell-cli/tests/sandbox_create_lifecycle_integration.rs b/crates/openshell-cli/tests/sandbox_create_lifecycle_integration.rs
index 6e7d66d11..dfdf975a4 100644
--- a/crates/openshell-cli/tests/sandbox_create_lifecycle_integration.rs
+++ b/crates/openshell-cli/tests/sandbox_create_lifecycle_integration.rs
@@ -84,6 +84,7 @@ impl Drop for EnvVarGuard {
 #[derive(Clone, Default)]
 struct SandboxState {
     deleted_names: Arc<Mutex<Vec<Vec<String>>>>,
+    created_requests: Arc<Mutex<Vec<CreateSandboxRequest>>>,
 }
 
 #[derive(Clone, Default)]
@@ -107,7 +108,9 @@ impl OpenShell for TestOpenShell {
         &self,
         request: tonic::Request<CreateSandboxRequest>,
     ) -> Result<Response<SandboxResponse>, Status> {
-        let name = request.into_inner().name;
+        let request = request.into_inner();
+        let name = request.name.clone();
+        self.state.created_requests.lock().await.push(request);
         let sandbox_name = if name.is_empty() {
             "test-sandbox".to_string()
         } else {
@@ -648,6 +651,10 @@ async fn deleted_names(server: &TestServer) -> Vec<Vec<String>> {
     server.openshell.state.deleted_names.lock().await.clone()
 }
 
+async fn created_requests(server: &TestServer) -> Vec<CreateSandboxRequest> {
+    server.openshell.state.created_requests.lock().await.clone()
+}
+
 fn test_tls(server: &TestServer) -> TlsOptions {
     server.tls.with_gateway_name("openshell")
 }
@@ -671,6 +678,7 @@ async fn sandbox_create_keeps_command_sessions_by_default() {
         false,
         None,
         None,
+        None,
         &[],
         None,
         None,
@@ -691,6 +699,47 @@ async fn sandbox_create_keeps_command_sessions_by_default() {
     );
 }
 
+#[tokio::test]
+async fn sandbox_create_gpu_count_sets_gpu_intent_and_count() {
+    let server = run_server().await;
+    let fake_ssh_dir = tempfile::tempdir().unwrap();
+    let xdg_dir = tempfile::tempdir().unwrap();
+    let _env = test_env(&fake_ssh_dir, &xdg_dir);
+    let tls = test_tls(&server);
+    install_fake_ssh(&fake_ssh_dir);
+
+    run::sandbox_create(
+        &server.endpoint,
+        Some("gpu-count"),
+        None,
+        "openshell",
+        None,
+        true,
+        false,
+        Some(2),
+        None,
+        None,
+        &[],
+        None,
+        None,
+        &["echo".to_string(), "OK".to_string()],
+        Some(false),
+        Some(false),
+        &HashMap::new(),
+        &tls,
+    )
+    .await
+    .expect("sandbox create should succeed");
+
+    let requests = created_requests(&server).await;
+    let spec = requests
+        .last()
+        .and_then(|request| request.spec.as_ref())
+        .expect("create request should include a spec");
+    assert!(spec.gpu);
+    assert_eq!(spec.gpu_count, 2);
+}
+
 #[tokio::test]
 async fn sandbox_create_deletes_command_sessions_with_no_keep() {
     let server = run_server().await;
@@ -710,6 +759,7 @@ async fn sandbox_create_deletes_command_sessions_with_no_keep() {
         false,
         None,
         None,
+        None,
         &[],
         None,
         None,
@@ -752,6 +802,7 @@ async fn sandbox_create_deletes_shell_sessions_with_no_keep() {
         false,
         None,
         None,
+        None,
         &[],
         None,
         None,
@@ -794,6 +845,7 @@ async fn sandbox_create_keeps_sandbox_with_hidden_keep_flag() {
         false,
         None,
         None,
+        None,
         &[],
         None,
         None,
@@ -836,6 +888,7 @@ async fn sandbox_create_keeps_sandbox_with_forwarding() {
         false,
         None,
         None,
+        None,
         &[],
         None,
         Some(openshell_core::forward::ForwardSpec::new(forward_port)),
diff --git a/crates/openshell-driver-docker/src/tests.rs b/crates/openshell-driver-docker/src/tests.rs
index df68d39d6..f5155fd13 100644
--- a/crates/openshell-driver-docker/src/tests.rs
+++ b/crates/openshell-driver-docker/src/tests.rs
@@ -33,6 +33,7 @@ fn test_sandbox() -> DriverSandbox {
             }),
             gpu: false,
             gpu_device: String::new(),
+            gpu_count: 0,
         }),
         status: None,
     }
diff --git a/crates/openshell-driver-kubernetes/README.md b/crates/openshell-driver-kubernetes/README.md
index 4a8a8f76b..6202e69f6 100644
--- a/crates/openshell-driver-kubernetes/README.md
+++ b/crates/openshell-driver-kubernetes/README.md
@@ -44,6 +44,8 @@ pods do not need direct external ingress for SSH.
 ## GPU Support
 
 When a sandbox requests GPU support, the driver checks node allocatable capacity
-for `nvidia.com/gpu` and requests one GPU resource in the workload spec. The
-sandbox image must provide the user-space libraries needed by the agent
-workload.
+for `nvidia.com/gpu`. A request with only `gpu=true` asks for one GPU. A request
+with `gpu_count > 0` sets the pod resource limit to that count. The cluster must
+expose `nvidia.com/gpu` through the NVIDIA device plugin or an equivalent
+device-plugin implementation, and the sandbox image must provide the user-space
+libraries needed by the agent workload.
diff --git a/crates/openshell-driver-kubernetes/src/driver.rs b/crates/openshell-driver-kubernetes/src/driver.rs
index 56b73447a..a434731c3 100644
--- a/crates/openshell-driver-kubernetes/src/driver.rs
+++ b/crates/openshell-driver-kubernetes/src/driver.rs
@@ -194,7 +194,10 @@ impl KubernetesComputeDriver {
     }
 
     pub async fn validate_sandbox_create(&self, sandbox: &Sandbox) -> Result<(), tonic::Status> {
-        let gpu_requested = sandbox.spec.as_ref().is_some_and(|spec| spec.gpu);
+        let gpu_requested = sandbox
+            .spec
+            .as_ref()
+            .is_some_and(|spec| spec.gpu || spec.gpu_count > 0);
         if gpu_requested
             && !self.has_gpu_capacity().await.map_err(|err| {
                 tonic::Status::internal(format!("check GPU node capacity failed: {err}"))
@@ -1011,7 +1014,14 @@ fn sandbox_to_k8s_spec(
         if let Some(template) = spec.template.as_ref() {
             root.insert(
                 "podTemplate".to_string(),
-                sandbox_template_to_k8s(template, spec.gpu, &pod_env, inject_workspace, params),
+                sandbox_template_to_k8s(
+                    template,
+                    spec.gpu,
+                    spec.gpu_count,
+                    &pod_env,
+                    inject_workspace,
+                    params,
+                ),
             );
             if !template.agent_socket_path.is_empty() {
                 root.insert(
@@ -1044,6 +1054,7 @@ fn sandbox_to_k8s_spec(
             sandbox_template_to_k8s(
                 &SandboxTemplate::default(),
                 spec.is_some_and(|s| s.gpu),
+                spec.map_or(0, |s| s.gpu_count),
                 &pod_env,
                 inject_workspace,
                 params,
@@ -1059,6 +1070,7 @@ fn sandbox_to_k8s_spec(
 fn sandbox_template_to_k8s(
     template: &SandboxTemplate,
     gpu: bool,
+    gpu_count: u32,
     spec_environment: &std::collections::HashMap<String, String>,
     inject_workspace: bool,
     params: &SandboxPodParams<'_>,
@@ -1091,7 +1103,7 @@ fn sandbox_template_to_k8s(
 
     if use_user_namespaces {
         spec.insert("hostUsers".to_string(), serde_json::json!(false));
-        if gpu {
+        if gpu || gpu_count > 0 {
             warn!(
                 "GPU sandbox with user namespaces enabled — \
                  NVIDIA device plugin compatibility is unverified"
@@ -1169,7 +1181,7 @@ fn sandbox_template_to_k8s(
         );
     }
 
-    if let Some(resources) = container_resources(template, gpu) {
+    if let Some(resources) = container_resources(template, gpu, gpu_count) {
         container.insert("resources".to_string(), resources);
     }
     spec.insert(
@@ -1225,7 +1237,11 @@ fn sandbox_template_to_k8s(
     result
 }
 
-fn container_resources(template: &SandboxTemplate, gpu: bool) -> Option<serde_json::Value> {
+fn container_resources(
+    template: &SandboxTemplate,
+    gpu: bool,
+    gpu_count: u32,
+) -> Option<serde_json::Value> {
     // Start from the raw resources passthrough in platform_config (preserves
     // custom resource types like GPU limits that users set via the public API
     // Struct), then overlay the typed DriverResourceRequirements on top.
@@ -1247,8 +1263,11 @@ fn container_resources(template: &SandboxTemplate, gpu: bool) -> Option<serde_js
         apply("limits", "memory", &req.memory_limit);
     }
 
-    if gpu {
-        apply_gpu_limit(&mut resources);
+    if gpu_count > 0 {
+        let quantity = gpu_count.to_string();
+        apply_gpu_limit(&mut resources, &quantity);
+    } else if gpu {
+        apply_gpu_limit(&mut resources, GPU_RESOURCE_QUANTITY);
     }
     if resources.as_object().is_some_and(serde_json::Map::is_empty) {
         None
@@ -1257,10 +1276,10 @@ fn container_resources(template: &SandboxTemplate, gpu: bool) -> Option<serde_js
     }
 }
 
-fn apply_gpu_limit(resources: &mut serde_json::Value) {
+fn apply_gpu_limit(resources: &mut serde_json::Value, quantity: &str) {
     let Some(resources_obj) = resources.as_object_mut() else {
         *resources = serde_json::json!({});
-        return apply_gpu_limit(resources);
+        return apply_gpu_limit(resources, quantity);
     };
 
     let limits = resources_obj
@@ -1268,13 +1287,10 @@ fn apply_gpu_limit(resources: &mut serde_json::Value) {
         .or_insert_with(|| serde_json::json!({}));
     let Some(limits_obj) = limits.as_object_mut() else {
         *limits = serde_json::json!({});
-        return apply_gpu_limit(resources);
+        return apply_gpu_limit(resources, quantity);
     };
 
-    limits_obj.insert(
-        GPU_RESOURCE_NAME.to_string(),
-        serde_json::json!(GPU_RESOURCE_QUANTITY),
-    );
+    limits_obj.insert(GPU_RESOURCE_NAME.to_string(), serde_json::json!(quantity));
 }
 
 #[allow(clippy::too_many_arguments)]
@@ -1494,6 +1510,23 @@ mod tests {
     use super::*;
     use prost_types::{Struct, Value, value::Kind};
 
+    fn string_value(value: &str) -> Value {
+        Value {
+            kind: Some(Kind::StringValue(value.to_string())),
+        }
+    }
+
+    fn struct_value(fields: impl IntoIterator<Item = (&'static str, Value)>) -> Value {
+        Value {
+            kind: Some(Kind::StructValue(Struct {
+                fields: fields
+                    .into_iter()
+                    .map(|(key, value)| (key.to_string(), value))
+                    .collect(),
+            })),
+        }
+    }
+
     #[test]
     fn apply_required_env_always_injects_ssh_handshake_secret() {
         let mut env = Vec::new();
@@ -1788,6 +1821,7 @@ mod tests {
             sandbox_template_to_k8s(
                 &SandboxTemplate::default(),
                 true,
+                0,
                 &std::collections::HashMap::new(),
                 true,
                 &params,
@@ -1804,6 +1838,26 @@ mod tests {
         );
     }
 
+    #[test]
+    fn gpu_count_sandbox_sets_requested_gpu_limit() {
+        let pod_template = {
+            let params = SandboxPodParams::default();
+            sandbox_template_to_k8s(
+                &SandboxTemplate::default(),
+                true,
+                4,
+                &std::collections::HashMap::new(),
+                true,
+                &params,
+            )
+        };
+
+        assert_eq!(
+            pod_template["spec"]["containers"][0]["resources"]["limits"][GPU_RESOURCE_NAME],
+            serde_json::json!("4")
+        );
+    }
+
     #[test]
     fn gpu_sandbox_uses_template_runtime_class_name_when_set() {
         let template = SandboxTemplate {
@@ -1824,6 +1878,7 @@ mod tests {
             sandbox_template_to_k8s(
                 &template,
                 true,
+                0,
                 &std::collections::HashMap::new(),
                 true,
                 &params,
@@ -1856,6 +1911,7 @@ mod tests {
             sandbox_template_to_k8s(
                 &template,
                 false,
+                0,
                 &std::collections::HashMap::new(),
                 true,
                 &params,
@@ -1884,6 +1940,7 @@ mod tests {
             sandbox_template_to_k8s(
                 &template,
                 true,
+                0,
                 &std::collections::HashMap::new(),
                 true,
                 &params,
@@ -1898,6 +1955,49 @@ mod tests {
         );
     }
 
+    #[test]
+    fn gpu_count_preserves_existing_resource_limits() {
+        use openshell_core::proto::compute::v1::DriverResourceRequirements;
+
+        let template = SandboxTemplate {
+            resources: Some(DriverResourceRequirements {
+                cpu_limit: "2".to_string(),
+                ..Default::default()
+            }),
+            platform_config: Some(Struct {
+                fields: std::iter::once((
+                    "resources_raw".to_string(),
+                    struct_value([(
+                        "limits",
+                        struct_value([
+                            ("example.com/custom", string_value("7")),
+                            (GPU_RESOURCE_NAME, string_value("2")),
+                        ]),
+                    )]),
+                ))
+                .collect(),
+            }),
+            ..SandboxTemplate::default()
+        };
+
+        let pod_template = {
+            let params = SandboxPodParams::default();
+            sandbox_template_to_k8s(
+                &template,
+                true,
+                4,
+                &std::collections::HashMap::new(),
+                true,
+                &params,
+            )
+        };
+
+        let limits = &pod_template["spec"]["containers"][0]["resources"]["limits"];
+        assert_eq!(limits["cpu"], serde_json::json!("2"));
+        assert_eq!(limits["example.com/custom"], serde_json::json!("7"));
+        assert_eq!(limits[GPU_RESOURCE_NAME], serde_json::json!("4"));
+    }
+
     #[test]
     fn host_aliases_injected_when_gateway_ip_set() {
         let pod_template = {
@@ -1908,6 +2008,7 @@ mod tests {
             sandbox_template_to_k8s(
                 &SandboxTemplate::default(),
                 false,
+                0,
                 &std::collections::HashMap::new(),
                 true,
                 &params,
@@ -1933,6 +2034,7 @@ mod tests {
             sandbox_template_to_k8s(
                 &SandboxTemplate::default(),
                 false,
+                0,
                 &std::collections::HashMap::new(),
                 true,
                 &params,
@@ -1956,6 +2058,7 @@ mod tests {
             sandbox_template_to_k8s(
                 &template,
                 false,
+                0,
                 &std::collections::HashMap::new(),
                 true,
                 &params,
@@ -2095,6 +2198,7 @@ mod tests {
         let pod_template = sandbox_template_to_k8s(
             &SandboxTemplate::default(),
             false,
+            0,
             &std::collections::HashMap::new(),
             false, // user provided custom VCTs
             &params,
@@ -2133,6 +2237,7 @@ mod tests {
         sandbox_template_to_k8s(
             &SandboxTemplate::default(),
             false,
+            0,
             &std::collections::HashMap::new(),
             true,
             &params,
@@ -2198,6 +2303,7 @@ mod tests {
         let pod_template = sandbox_template_to_k8s(
             &template,
             false,
+            0,
             &std::collections::HashMap::new(),
             true,
             &params,
@@ -2236,6 +2342,7 @@ mod tests {
         let pod_template = sandbox_template_to_k8s(
             &template,
             false,
+            0,
             &std::collections::HashMap::new(),
             true,
             &params,
@@ -2262,6 +2369,7 @@ mod tests {
             sandbox_template_to_k8s(
                 &SandboxTemplate::default(),
                 false,
+                0,
                 &std::collections::HashMap::new(),
                 true,
                 &params,
diff --git a/crates/openshell-server/src/compute/mod.rs b/crates/openshell-server/src/compute/mod.rs
index d2fd34011..38120d67b 100644
--- a/crates/openshell-server/src/compute/mod.rs
+++ b/crates/openshell-server/src/compute/mod.rs
@@ -1132,6 +1132,7 @@ fn driver_sandbox_spec_from_public(spec: &SandboxSpec) -> DriverSandboxSpec {
             .map(driver_sandbox_template_from_public),
         gpu: spec.gpu,
         gpu_device: spec.gpu_device.clone(),
+        gpu_count: spec.gpu_count,
     }
 }
 
@@ -1491,7 +1492,8 @@ fn derive_phase(status: Option<&DriverSandboxStatus>) -> SandboxPhase {
 }
 
 fn rewrite_user_facing_conditions(status: &mut Option<SandboxStatus>, spec: Option<&SandboxSpec>) {
-    let gpu_requested = spec.is_some_and(|sandbox_spec| sandbox_spec.gpu);
+    let gpu_requested =
+        spec.is_some_and(|sandbox_spec| sandbox_spec.gpu || sandbox_spec.gpu_count > 0);
     if !gpu_requested {
         return;
     }
@@ -2008,6 +2010,28 @@ mod tests {
         assert!(build_platform_config(&template).is_none());
     }
 
+    #[test]
+    fn driver_sandbox_spec_from_public_maps_gpu_count() {
+        let public = SandboxSpec {
+            gpu: true,
+            gpu_count: 4,
+            ..Default::default()
+        };
+
+        let driver = driver_sandbox_spec_from_public(&public);
+
+        assert!(driver.gpu);
+        assert_eq!(driver.gpu_count, 4);
+    }
+
+    #[test]
+    fn driver_sandbox_spec_from_public_preserves_default_gpu_count() {
+        let driver = driver_sandbox_spec_from_public(&SandboxSpec::default());
+
+        assert!(!driver.gpu);
+        assert_eq!(driver.gpu_count, 0);
+    }
+
     #[test]
     fn build_platform_config_preserves_non_typed_resource_fields() {
         let template = SandboxTemplate {
diff --git a/crates/openshell-server/src/grpc/validation.rs b/crates/openshell-server/src/grpc/validation.rs
index 160b7e031..518babb20 100644
--- a/crates/openshell-server/src/grpc/validation.rs
+++ b/crates/openshell-server/src/grpc/validation.rs
@@ -131,6 +131,13 @@ pub(super) fn validate_sandbox_spec(
         validate_sandbox_template(tmpl)?;
     }
 
+    // --- GPU allocation mode ---
+    if spec.gpu_count > 0 && !spec.gpu_device.is_empty() {
+        return Err(Status::invalid_argument(
+            "gpu_count cannot be combined with gpu_device",
+        ));
+    }
+
     // --- spec.policy serialized size ---
     if let Some(ref policy) = spec.policy {
         let size = policy.encoded_len();
@@ -674,6 +681,29 @@ mod tests {
         assert!(validate_sandbox_spec("gpu-sandbox", &spec).is_ok());
     }
 
+    #[test]
+    fn validate_sandbox_spec_accepts_gpu_count() {
+        let spec = SandboxSpec {
+            gpu_count: 4,
+            ..Default::default()
+        };
+        assert!(validate_sandbox_spec("gpu-count-sandbox", &spec).is_ok());
+    }
+
+    #[test]
+    fn validate_sandbox_spec_rejects_gpu_count_with_gpu_device() {
+        let spec = SandboxSpec {
+            gpu: true,
+            gpu_count: 4,
+            gpu_device: "nvidia.com/gpu=0".to_string(),
+            ..Default::default()
+        };
+        let err = validate_sandbox_spec("gpu-count-sandbox", &spec).unwrap_err();
+        assert_eq!(err.code(), Code::InvalidArgument);
+        assert!(err.message().contains("gpu_count"));
+        assert!(err.message().contains("gpu_device"));
+    }
+
     #[test]
     fn validate_sandbox_spec_accepts_empty_defaults() {
         assert!(validate_sandbox_spec("", &default_spec()).is_ok());
diff --git a/docs/kubernetes/setup.mdx b/docs/kubernetes/setup.mdx
index 067e0d122..4adccecde 100644
--- a/docs/kubernetes/setup.mdx
+++ b/docs/kubernetes/setup.mdx
@@ -25,6 +25,7 @@ Make sure the following are in place before you install.
 | Agent Sandbox controller and CRDs | Yes | Install before the OpenShell chart. Refer to [Install Agent Sandbox](#install-agent-sandbox). |
 | cert-manager | No | Refer to [Managing Certificates](/kubernetes/managing-certificates). Use cert-manager only if you prefer it over the built-in PKI job. |
 | Kubernetes Gateway API | No | Refer to [Ingress](/kubernetes/ingress). Use it only for external access without port-forwarding. |
+| NVIDIA device plugin or equivalent | No | Required only for GPU sandboxes. The cluster must expose allocatable `nvidia.com/gpu` resources for `--gpu` and `--gpu-count`. |
 
 ## Install Agent Sandbox
 
diff --git a/docs/sandboxes/manage-sandboxes.mdx b/docs/sandboxes/manage-sandboxes.mdx
index 3eddecec5..eddbe210b 100644
--- a/docs/sandboxes/manage-sandboxes.mdx
+++ b/docs/sandboxes/manage-sandboxes.mdx
@@ -35,6 +35,16 @@ To request GPU resources, add `--gpu`:
 openshell sandbox create --gpu -- claude
 ```
 
+To request multiple GPUs on a Kubernetes-backed gateway, set a GPU count:
+
+```shell
+openshell sandbox create --gpu-count 4 -- claude
+```
+
+GPU counts schedule Kubernetes sandbox pods with an `nvidia.com/gpu` resource
+limit. Use `--gpu-device` only when you need driver-specific device selection;
+it cannot be combined with `--gpu-count`.
+
 For Docker-backed sandboxes, GPU injection uses Docker CDI. If you enable Docker
 CDI after the gateway starts, restart the gateway so OpenShell can detect the
 updated Docker daemon capability.
diff --git a/proto/compute_driver.proto b/proto/compute_driver.proto
index 3c4308f3f..603f5e954 100644
--- a/proto/compute_driver.proto
+++ b/proto/compute_driver.proto
@@ -90,6 +90,9 @@ message DriverSandboxSpec {
   // (e.g. "0", "1"). When empty with gpu=true, the driver assigns the
   // first available GPU.
   string gpu_device = 10;
+  // Number of NVIDIA GPUs requested for this sandbox. 0 means unspecified;
+  // when gpu=true and gpu_count=0, drivers keep their existing default.
+  uint32 gpu_count = 11;
 }
 
 // Driver-owned runtime template consumed by the compute platform.
diff --git a/proto/openshell.proto b/proto/openshell.proto
index bb2ce6cec..f2c17f451 100644
--- a/proto/openshell.proto
+++ b/proto/openshell.proto
@@ -256,6 +256,9 @@ message SandboxSpec {
   // (e.g. "0", "1"). When empty with gpu=true, the driver assigns the
   // first available GPU.
   string gpu_device = 10;
+  // Number of NVIDIA GPUs requested for this sandbox. 0 means unspecified;
+  // when gpu=true and gpu_count=0, drivers keep their existing default.
+  uint32 gpu_count = 11;
 }
 
 // Public sandbox template mapped onto compute-driver template inputs.

From 3dd8d5a0c050028eb306d10945a43661b68c9eb7 Mon Sep 17 00:00:00 2001
From: Ryan Angilly <rangilly@nvidia.com>
Date: Tue, 12 May 2026 19:38:33 -0600
Subject: [PATCH 2/3] feat(sandbox): add resource JSON passthrough

Signed-off-by: Ryan Angilly <rangilly@nvidia.com>
---
 architecture/compute-runtimes.md              |   3 +-
 crates/openshell-cli/src/main.rs              |  46 ++--
 crates/openshell-cli/src/run.rs               | 198 ++++++++++++++++--
 .../sandbox_create_lifecycle_integration.rs   |  92 +++++++-
 crates/openshell-driver-docker/src/tests.rs   |   1 -
 crates/openshell-driver-kubernetes/README.md  |  11 +-
 .../openshell-driver-kubernetes/src/driver.rs |  72 +------
 crates/openshell-server/src/compute/mod.rs    |  26 +--
 .../openshell-server/src/grpc/validation.rs   |  30 ---
 docs/sandboxes/manage-sandboxes.mdx           |  14 +-
 proto/compute_driver.proto                    |   3 -
 proto/openshell.proto                         |   3 -
 12 files changed, 334 insertions(+), 165 deletions(-)

diff --git a/architecture/compute-runtimes.md b/architecture/compute-runtimes.md
index b048af605..11f6de22a 100644
--- a/architecture/compute-runtimes.md
+++ b/architecture/compute-runtimes.md
@@ -63,7 +63,8 @@ Custom sandbox images must include the agent runtime and any system
 dependencies, but they should not need to include the gateway. GPU-capable
 images must include the user-space libraries required by the workload. The
 runtime still owns GPU device injection or resource scheduling. Kubernetes maps
-GPU counts to pod `nvidia.com/gpu` limits when the cluster exposes that resource.
+template resource limits such as `nvidia.com/gpu` into the sandbox pod when the
+cluster exposes those resources.
 
 ## Deployment Shape
 
diff --git a/crates/openshell-cli/src/main.rs b/crates/openshell-cli/src/main.rs
index 8e5d54faa..d1bc7ca40 100644
--- a/crates/openshell-cli/src/main.rs
+++ b/crates/openshell-cli/src/main.rs
@@ -1088,9 +1088,17 @@ enum SandboxCommands {
         ///
         /// This implies --gpu. Kubernetes-backed gateways schedule pods with
         /// the corresponding nvidia.com/gpu resource limit.
-        #[arg(long, value_name = "COUNT", conflicts_with = "gpu_device", value_parser = clap::value_parser!(u32).range(1..))]
+        #[arg(long, value_name = "COUNT", value_parser = clap::value_parser!(u32).range(1..))]
         gpu_count: Option<u32>,
 
+        /// Set compute resource requests and limits as JSON.
+        ///
+        /// The JSON must be an object with the same shape as
+        /// SandboxTemplate.resources, for example:
+        /// {"requests":{"cpu":"2"},"limits":{"cpu":"16"}}
+        #[arg(long, value_name = "JSON")]
+        resources_json: Option<String>,
+
         /// Target a driver-specific GPU device. Docker and Podman use CDI device IDs
         /// (for example "nvidia.com/gpu=0"); VM uses a PCI BDF or index.
         /// Only valid with --gpu. When omitted with --gpu, the driver uses its default GPU selection.
@@ -2372,6 +2380,7 @@ async fn main() -> Result<()> {
                     editor,
                     gpu,
                     gpu_count,
+                    resources_json,
                     gpu_device,
                     providers,
                     policy,
@@ -2439,6 +2448,7 @@ async fn main() -> Result<()> {
                         keep,
                         gpu,
                         gpu_count,
+                        resources_json.as_deref(),
                         gpu_device.as_deref(),
                         editor,
                         &providers,
@@ -3843,23 +3853,27 @@ mod tests {
     }
 
     #[test]
-    fn sandbox_create_gpu_count_conflicts_with_gpu_device() {
-        let result = Cli::try_parse_from([
+    fn sandbox_create_resources_json_parses() {
+        let cli = Cli::try_parse_from([
             "openshell",
             "sandbox",
             "create",
-            "--gpu",
-            "--gpu-count",
-            "2",
-            "--gpu-device",
-            "nvidia.com/gpu=0",
-        ]);
-        assert!(
-            result.is_err(),
-            "sandbox create should reject combining --gpu-count with --gpu-device"
-        );
-        let err = result.unwrap_err().to_string();
-        assert!(err.contains("--gpu-count"));
-        assert!(err.contains("--gpu-device"));
+            "--resources-json",
+            r#"{"requests":{"cpu":"2"},"limits":{"cpu":"16"}}"#,
+        ])
+        .expect("sandbox create --resources-json should parse");
+
+        match cli.command {
+            Some(Commands::Sandbox {
+                command: Some(SandboxCommands::Create { resources_json, .. }),
+                ..
+            }) => {
+                assert_eq!(
+                    resources_json.as_deref(),
+                    Some(r#"{"requests":{"cpu":"2"},"limits":{"cpu":"16"}}"#)
+                );
+            }
+            other => panic!("expected sandbox create command, got: {other:?}"),
+        }
     }
 }
diff --git a/crates/openshell-cli/src/run.rs b/crates/openshell-cli/src/run.rs
index 5e5a48ac5..07ee6fd69 100644
--- a/crates/openshell-cli/src/run.rs
+++ b/crates/openshell-cli/src/run.rs
@@ -67,6 +67,8 @@ pub use openshell_core::forward::{
     find_forward_by_port, list_forwards, stop_forward, stop_forwards_for_sandbox,
 };
 
+const NVIDIA_GPU_RESOURCE_NAME: &str = "nvidia.com/gpu";
+
 /// Convert a sandbox phase integer to a human-readable string.
 fn phase_name(phase: i32) -> &'static str {
     match SandboxPhase::try_from(phase) {
@@ -1457,6 +1459,93 @@ async fn finalize_sandbox_create_session(
     session_result
 }
 
+fn parse_sandbox_resources_json(
+    resources_json: Option<&str>,
+    gpu_count: Option<u32>,
+) -> Result<Option<prost_types::Struct>> {
+    let mut resources = match resources_json {
+        Some(raw) => {
+            if raw.trim().is_empty() {
+                return Err(miette::miette!("--resources-json cannot be empty"));
+            }
+
+            let value: serde_json::Value = serde_json::from_str(raw)
+                .into_diagnostic()
+                .wrap_err("invalid --resources-json JSON")?;
+            let serde_json::Value::Object(fields) = value else {
+                return Err(miette::miette!("--resources-json must be a JSON object"));
+            };
+
+            Some(json_object_to_prost_struct(fields))
+        }
+        None => None,
+    };
+
+    if let Some(count) = gpu_count {
+        if count == 0 {
+            return Err(miette::miette!("--gpu-count must be greater than zero"));
+        }
+        inject_gpu_count_resource(resources.get_or_insert_with(Default::default), count);
+    }
+
+    Ok(resources)
+}
+
+fn json_object_to_prost_struct(
+    fields: serde_json::Map<String, serde_json::Value>,
+) -> prost_types::Struct {
+    prost_types::Struct {
+        fields: fields
+            .into_iter()
+            .map(|(key, value)| (key, json_value_to_prost_value(value)))
+            .collect(),
+    }
+}
+
+fn json_value_to_prost_value(value: serde_json::Value) -> prost_types::Value {
+    use prost_types::{ListValue, Value, value::Kind};
+
+    let kind = match value {
+        serde_json::Value::Null => Kind::NullValue(0),
+        serde_json::Value::Bool(value) => Kind::BoolValue(value),
+        serde_json::Value::Number(value) => Kind::NumberValue(value.as_f64().unwrap_or_default()),
+        serde_json::Value::String(value) => Kind::StringValue(value),
+        serde_json::Value::Array(values) => Kind::ListValue(ListValue {
+            values: values.into_iter().map(json_value_to_prost_value).collect(),
+        }),
+        serde_json::Value::Object(fields) => Kind::StructValue(json_object_to_prost_struct(fields)),
+    };
+
+    Value { kind: Some(kind) }
+}
+
+fn inject_gpu_count_resource(resources: &mut prost_types::Struct, gpu_count: u32) {
+    use prost_types::{Struct, Value, value::Kind};
+
+    let limits_kind = &mut resources
+        .fields
+        .entry("limits".to_string())
+        .or_insert_with(|| Value {
+            kind: Some(Kind::StructValue(Struct::default())),
+        })
+        .kind;
+
+    if !matches!(limits_kind, Some(Kind::StructValue(_))) {
+        *limits_kind = Some(Kind::StructValue(Struct::default()));
+    }
+
+    let Some(Kind::StructValue(limits)) = limits_kind.as_mut() else {
+        unreachable!("limits kind was normalized to a StructValue");
+    };
+
+    limits.fields.insert(
+        NVIDIA_GPU_RESOURCE_NAME.to_string(),
+        Value {
+            kind: Some(Kind::StringValue(gpu_count.to_string())),
+        },
+    );
+}
+
 /// Create a sandbox with default settings.
 #[allow(clippy::too_many_arguments, clippy::implicit_hasher)] // user-facing CLI command; default hasher is fine
 pub async fn sandbox_create(
@@ -1468,6 +1557,7 @@ pub async fn sandbox_create(
     keep: bool,
     gpu: bool,
     gpu_count: Option<u32>,
+    resources_json: Option<&str>,
     gpu_device: Option<&str>,
     editor: Option<Editor>,
     providers: &[String],
@@ -1484,14 +1574,7 @@ pub async fn sandbox_create(
             "--editor cannot be used with a trailing command; use `openshell sandbox connect <name> --editor ...` after the sandbox is ready"
         ));
     }
-    if gpu_count == Some(0) {
-        return Err(miette::miette!("--gpu-count must be greater than zero"));
-    }
-    if gpu_count.is_some() && gpu_device.is_some() {
-        return Err(miette::miette!(
-            "--gpu-count cannot be combined with --gpu-device"
-        ));
-    }
+    let resources = parse_sandbox_resources_json(resources_json, gpu_count)?;
 
     // Check port availability *before* creating the sandbox so we don't
     // leave an orphaned sandbox behind when the forward would fail.
@@ -1541,15 +1624,19 @@ pub async fn sandbox_create(
 
     let policy = load_sandbox_policy(policy)?;
 
-    let template = image.map(|img| SandboxTemplate {
-        image: img,
-        ..SandboxTemplate::default()
-    });
+    let template = if image.is_some() || resources.is_some() {
+        Some(SandboxTemplate {
+            image: image.unwrap_or_default(),
+            resources,
+            ..SandboxTemplate::default()
+        })
+    } else {
+        None
+    };
 
     let request = CreateSandboxRequest {
         spec: Some(SandboxSpec {
             gpu: requested_gpu,
-            gpu_count: gpu_count.unwrap_or_default(),
             gpu_device: gpu_device.unwrap_or_default().to_string(),
             policy,
             providers: configured_providers,
@@ -6028,9 +6115,9 @@ mod tests {
         gateway_env_override_warning, gateway_select_with, gateway_type_label, git_sync_files,
         http_health_check, image_requests_gpu, import_local_package_mtls_bundle,
         inferred_provider_type, package_managed_tls_dirs, parse_cli_setting_value,
-        parse_credential_pairs, plaintext_gateway_is_remote, provisioning_timeout_message,
-        ready_false_condition_message, resolve_from, sandbox_should_persist,
-        service_expose_status_error, service_url_for_gateway,
+        parse_credential_pairs, parse_sandbox_resources_json, plaintext_gateway_is_remote,
+        provisioning_timeout_message, ready_false_condition_message, resolve_from,
+        sandbox_should_persist, service_expose_status_error, service_url_for_gateway,
     };
     use crate::TEST_ENV_LOCK;
     use hyper::StatusCode;
@@ -6048,6 +6135,85 @@ mod tests {
         Provider, SandboxCondition, SandboxStatus, datamodel::v1::ObjectMeta,
     };
 
+    fn prost_struct_field<'a>(
+        value: &'a prost_types::Struct,
+        field: &str,
+    ) -> &'a prost_types::Struct {
+        let value = value
+            .fields
+            .get(field)
+            .and_then(|value| value.kind.as_ref())
+            .unwrap_or_else(|| panic!("expected {field} field"));
+        let prost_types::value::Kind::StructValue(value) = value else {
+            panic!("expected {field} to be an object");
+        };
+        value
+    }
+
+    fn prost_string_field(value: &prost_types::Struct, field: &str) -> String {
+        let value = value
+            .fields
+            .get(field)
+            .and_then(|value| value.kind.as_ref())
+            .unwrap_or_else(|| panic!("expected {field} field"));
+        let prost_types::value::Kind::StringValue(value) = value else {
+            panic!("expected {field} to be a string");
+        };
+        value.clone()
+    }
+
+    #[test]
+    fn parse_sandbox_resources_json_rejects_empty_string() {
+        let err = parse_sandbox_resources_json(Some(""), None)
+            .expect_err("empty resources JSON should fail");
+
+        assert!(err.to_string().contains("--resources-json"));
+        assert!(err.to_string().contains("empty"));
+    }
+
+    #[test]
+    fn parse_sandbox_resources_json_rejects_invalid_json() {
+        let err = parse_sandbox_resources_json(Some("{bad"), None)
+            .expect_err("invalid resources JSON should fail");
+
+        assert!(err.to_string().contains("invalid --resources-json JSON"));
+    }
+
+    #[test]
+    fn parse_sandbox_resources_json_rejects_non_object_json() {
+        let err = parse_sandbox_resources_json(Some(r#"["cpu"]"#), None)
+            .expect_err("non-object resources JSON should fail");
+
+        assert!(err.to_string().contains("--resources-json"));
+        assert!(err.to_string().contains("JSON object"));
+    }
+
+    #[test]
+    fn parse_sandbox_resources_json_injects_gpu_count_resource() {
+        let resources = parse_sandbox_resources_json(
+            Some(r#"{"requests":{"cpu":"2"},"limits":{"cpu":"16"}}"#),
+            Some(4),
+        )
+        .expect("resources should parse")
+        .expect("resources should be present");
+
+        let requests = prost_struct_field(&resources, "requests");
+        let limits = prost_struct_field(&resources, "limits");
+
+        assert_eq!(prost_string_field(requests, "cpu"), "2");
+        assert_eq!(prost_string_field(limits, "cpu"), "16");
+        assert_eq!(prost_string_field(limits, "nvidia.com/gpu"), "4");
+    }
+
+    #[test]
+    fn parse_sandbox_resources_json_rejects_zero_gpu_count() {
+        let err =
+            parse_sandbox_resources_json(None, Some(0)).expect_err("zero GPU count should fail");
+
+        assert!(err.to_string().contains("--gpu-count"));
+        assert!(err.to_string().contains("greater than zero"));
+    }
+
     struct EnvVarGuard {
         key: &'static str,
         original: Option<String>,
diff --git a/crates/openshell-cli/tests/sandbox_create_lifecycle_integration.rs b/crates/openshell-cli/tests/sandbox_create_lifecycle_integration.rs
index dfdf975a4..861c8c011 100644
--- a/crates/openshell-cli/tests/sandbox_create_lifecycle_integration.rs
+++ b/crates/openshell-cli/tests/sandbox_create_lifecycle_integration.rs
@@ -659,6 +659,30 @@ fn test_tls(server: &TestServer) -> TlsOptions {
     server.tls.with_gateway_name("openshell")
 }
 
+fn struct_field<'a>(value: &'a prost_types::Struct, field: &str) -> &'a prost_types::Struct {
+    let value = value
+        .fields
+        .get(field)
+        .and_then(|value| value.kind.as_ref())
+        .unwrap_or_else(|| panic!("expected {field} field"));
+    let prost_types::value::Kind::StructValue(value) = value else {
+        panic!("expected {field} to be an object");
+    };
+    value
+}
+
+fn string_field(value: &prost_types::Struct, field: &str) -> String {
+    let value = value
+        .fields
+        .get(field)
+        .and_then(|value| value.kind.as_ref())
+        .unwrap_or_else(|| panic!("expected {field} field"));
+    let prost_types::value::Kind::StringValue(value) = value else {
+        panic!("expected {field} to be a string");
+    };
+    value.clone()
+}
+
 #[tokio::test]
 async fn sandbox_create_keeps_command_sessions_by_default() {
     let server = run_server().await;
@@ -679,6 +703,7 @@ async fn sandbox_create_keeps_command_sessions_by_default() {
         None,
         None,
         None,
+        None,
         &[],
         None,
         None,
@@ -700,7 +725,7 @@ async fn sandbox_create_keeps_command_sessions_by_default() {
 }
 
 #[tokio::test]
-async fn sandbox_create_gpu_count_sets_gpu_intent_and_count() {
+async fn sandbox_create_gpu_count_sets_gpu_intent_and_resources() {
     let server = run_server().await;
     let fake_ssh_dir = tempfile::tempdir().unwrap();
     let xdg_dir = tempfile::tempdir().unwrap();
@@ -719,6 +744,7 @@ async fn sandbox_create_gpu_count_sets_gpu_intent_and_count() {
         Some(2),
         None,
         None,
+        None,
         &[],
         None,
         None,
@@ -737,7 +763,65 @@ async fn sandbox_create_gpu_count_sets_gpu_intent_and_count() {
         .and_then(|request| request.spec.as_ref())
         .expect("create request should include a spec");
     assert!(spec.gpu);
-    assert_eq!(spec.gpu_count, 2);
+
+    let resources = spec
+        .template
+        .as_ref()
+        .and_then(|template| template.resources.as_ref())
+        .expect("gpu count should create template resources");
+    let limits = struct_field(resources, "limits");
+    assert_eq!(string_field(limits, "nvidia.com/gpu"), "2");
+}
+
+#[tokio::test]
+async fn sandbox_create_resources_json_sets_template_resources() {
+    let server = run_server().await;
+    let fake_ssh_dir = tempfile::tempdir().unwrap();
+    let xdg_dir = tempfile::tempdir().unwrap();
+    let _env = test_env(&fake_ssh_dir, &xdg_dir);
+    let tls = test_tls(&server);
+    install_fake_ssh(&fake_ssh_dir);
+
+    run::sandbox_create(
+        &server.endpoint,
+        Some("resources-json"),
+        None,
+        "openshell",
+        None,
+        true,
+        false,
+        None,
+        Some(r#"{"requests":{"cpu":"2"},"limits":{"cpu":"16"}}"#),
+        None,
+        None,
+        &[],
+        None,
+        None,
+        &["echo".to_string(), "OK".to_string()],
+        Some(false),
+        Some(false),
+        &HashMap::new(),
+        &tls,
+    )
+    .await
+    .expect("sandbox create should succeed");
+
+    let requests = created_requests(&server).await;
+    let spec = requests
+        .last()
+        .and_then(|request| request.spec.as_ref())
+        .expect("create request should include a spec");
+    assert!(!spec.gpu);
+
+    let resources = spec
+        .template
+        .as_ref()
+        .and_then(|template| template.resources.as_ref())
+        .expect("resources JSON should create template resources");
+    let requests = struct_field(resources, "requests");
+    let limits = struct_field(resources, "limits");
+    assert_eq!(string_field(requests, "cpu"), "2");
+    assert_eq!(string_field(limits, "cpu"), "16");
 }
 
 #[tokio::test]
@@ -760,6 +844,7 @@ async fn sandbox_create_deletes_command_sessions_with_no_keep() {
         None,
         None,
         None,
+        None,
         &[],
         None,
         None,
@@ -803,6 +888,7 @@ async fn sandbox_create_deletes_shell_sessions_with_no_keep() {
         None,
         None,
         None,
+        None,
         &[],
         None,
         None,
@@ -846,6 +932,7 @@ async fn sandbox_create_keeps_sandbox_with_hidden_keep_flag() {
         None,
         None,
         None,
+        None,
         &[],
         None,
         None,
@@ -889,6 +976,7 @@ async fn sandbox_create_keeps_sandbox_with_forwarding() {
         None,
         None,
         None,
+        None,
         &[],
         None,
         Some(openshell_core::forward::ForwardSpec::new(forward_port)),
diff --git a/crates/openshell-driver-docker/src/tests.rs b/crates/openshell-driver-docker/src/tests.rs
index f5155fd13..df68d39d6 100644
--- a/crates/openshell-driver-docker/src/tests.rs
+++ b/crates/openshell-driver-docker/src/tests.rs
@@ -33,7 +33,6 @@ fn test_sandbox() -> DriverSandbox {
             }),
             gpu: false,
             gpu_device: String::new(),
-            gpu_count: 0,
         }),
         status: None,
     }
diff --git a/crates/openshell-driver-kubernetes/README.md b/crates/openshell-driver-kubernetes/README.md
index 6202e69f6..f2324b757 100644
--- a/crates/openshell-driver-kubernetes/README.md
+++ b/crates/openshell-driver-kubernetes/README.md
@@ -44,8 +44,9 @@ pods do not need direct external ingress for SSH.
 ## GPU Support
 
 When a sandbox requests GPU support, the driver checks node allocatable capacity
-for `nvidia.com/gpu`. A request with only `gpu=true` asks for one GPU. A request
-with `gpu_count > 0` sets the pod resource limit to that count. The cluster must
-expose `nvidia.com/gpu` through the NVIDIA device plugin or an equivalent
-device-plugin implementation, and the sandbox image must provide the user-space
-libraries needed by the agent workload.
+for `nvidia.com/gpu`. A request with only `gpu=true` asks for one GPU unless the
+template already provides an explicit `limits["nvidia.com/gpu"]` value through
+the resource passthrough. The cluster must expose `nvidia.com/gpu` through the
+NVIDIA device plugin or an equivalent device-plugin implementation, and the
+sandbox image must provide the user-space libraries needed by the agent
+workload.
diff --git a/crates/openshell-driver-kubernetes/src/driver.rs b/crates/openshell-driver-kubernetes/src/driver.rs
index a434731c3..5b2fa8984 100644
--- a/crates/openshell-driver-kubernetes/src/driver.rs
+++ b/crates/openshell-driver-kubernetes/src/driver.rs
@@ -194,10 +194,7 @@ impl KubernetesComputeDriver {
     }
 
     pub async fn validate_sandbox_create(&self, sandbox: &Sandbox) -> Result<(), tonic::Status> {
-        let gpu_requested = sandbox
-            .spec
-            .as_ref()
-            .is_some_and(|spec| spec.gpu || spec.gpu_count > 0);
+        let gpu_requested = sandbox.spec.as_ref().is_some_and(|spec| spec.gpu);
         if gpu_requested
             && !self.has_gpu_capacity().await.map_err(|err| {
                 tonic::Status::internal(format!("check GPU node capacity failed: {err}"))
@@ -1014,14 +1011,7 @@ fn sandbox_to_k8s_spec(
         if let Some(template) = spec.template.as_ref() {
             root.insert(
                 "podTemplate".to_string(),
-                sandbox_template_to_k8s(
-                    template,
-                    spec.gpu,
-                    spec.gpu_count,
-                    &pod_env,
-                    inject_workspace,
-                    params,
-                ),
+                sandbox_template_to_k8s(template, spec.gpu, &pod_env, inject_workspace, params),
             );
             if !template.agent_socket_path.is_empty() {
                 root.insert(
@@ -1054,7 +1044,6 @@ fn sandbox_to_k8s_spec(
             sandbox_template_to_k8s(
                 &SandboxTemplate::default(),
                 spec.is_some_and(|s| s.gpu),
-                spec.map_or(0, |s| s.gpu_count),
                 &pod_env,
                 inject_workspace,
                 params,
@@ -1070,7 +1059,6 @@ fn sandbox_to_k8s_spec(
 fn sandbox_template_to_k8s(
     template: &SandboxTemplate,
     gpu: bool,
-    gpu_count: u32,
     spec_environment: &std::collections::HashMap<String, String>,
     inject_workspace: bool,
     params: &SandboxPodParams<'_>,
@@ -1103,7 +1091,7 @@ fn sandbox_template_to_k8s(
 
     if use_user_namespaces {
         spec.insert("hostUsers".to_string(), serde_json::json!(false));
-        if gpu || gpu_count > 0 {
+        if gpu {
             warn!(
                 "GPU sandbox with user namespaces enabled — \
                  NVIDIA device plugin compatibility is unverified"
@@ -1181,7 +1169,7 @@ fn sandbox_template_to_k8s(
         );
     }
 
-    if let Some(resources) = container_resources(template, gpu, gpu_count) {
+    if let Some(resources) = container_resources(template, gpu) {
         container.insert("resources".to_string(), resources);
     }
     spec.insert(
@@ -1237,11 +1225,7 @@ fn sandbox_template_to_k8s(
     result
 }
 
-fn container_resources(
-    template: &SandboxTemplate,
-    gpu: bool,
-    gpu_count: u32,
-) -> Option<serde_json::Value> {
+fn container_resources(template: &SandboxTemplate, gpu: bool) -> Option<serde_json::Value> {
     // Start from the raw resources passthrough in platform_config (preserves
     // custom resource types like GPU limits that users set via the public API
     // Struct), then overlay the typed DriverResourceRequirements on top.
@@ -1263,10 +1247,7 @@ fn container_resources(
         apply("limits", "memory", &req.memory_limit);
     }
 
-    if gpu_count > 0 {
-        let quantity = gpu_count.to_string();
-        apply_gpu_limit(&mut resources, &quantity);
-    } else if gpu {
+    if gpu {
         apply_gpu_limit(&mut resources, GPU_RESOURCE_QUANTITY);
     }
     if resources.as_object().is_some_and(serde_json::Map::is_empty) {
@@ -1290,7 +1271,9 @@ fn apply_gpu_limit(resources: &mut serde_json::Value, quantity: &str) {
         return apply_gpu_limit(resources, quantity);
     };
 
-    limits_obj.insert(GPU_RESOURCE_NAME.to_string(), serde_json::json!(quantity));
+    limits_obj
+        .entry(GPU_RESOURCE_NAME.to_string())
+        .or_insert_with(|| serde_json::json!(quantity));
 }
 
 #[allow(clippy::too_many_arguments)]
@@ -1821,7 +1804,6 @@ mod tests {
             sandbox_template_to_k8s(
                 &SandboxTemplate::default(),
                 true,
-                0,
                 &std::collections::HashMap::new(),
                 true,
                 &params,
@@ -1838,26 +1820,6 @@ mod tests {
         );
     }
 
-    #[test]
-    fn gpu_count_sandbox_sets_requested_gpu_limit() {
-        let pod_template = {
-            let params = SandboxPodParams::default();
-            sandbox_template_to_k8s(
-                &SandboxTemplate::default(),
-                true,
-                4,
-                &std::collections::HashMap::new(),
-                true,
-                &params,
-            )
-        };
-
-        assert_eq!(
-            pod_template["spec"]["containers"][0]["resources"]["limits"][GPU_RESOURCE_NAME],
-            serde_json::json!("4")
-        );
-    }
-
     #[test]
     fn gpu_sandbox_uses_template_runtime_class_name_when_set() {
         let template = SandboxTemplate {
@@ -1878,7 +1840,6 @@ mod tests {
             sandbox_template_to_k8s(
                 &template,
                 true,
-                0,
                 &std::collections::HashMap::new(),
                 true,
                 &params,
@@ -1911,7 +1872,6 @@ mod tests {
             sandbox_template_to_k8s(
                 &template,
                 false,
-                0,
                 &std::collections::HashMap::new(),
                 true,
                 &params,
@@ -1940,7 +1900,6 @@ mod tests {
             sandbox_template_to_k8s(
                 &template,
                 true,
-                0,
                 &std::collections::HashMap::new(),
                 true,
                 &params,
@@ -1956,7 +1915,7 @@ mod tests {
     }
 
     #[test]
-    fn gpu_count_preserves_existing_resource_limits() {
+    fn gpu_sandbox_preserves_explicit_gpu_resource_limit() {
         use openshell_core::proto::compute::v1::DriverResourceRequirements;
 
         let template = SandboxTemplate {
@@ -1985,7 +1944,6 @@ mod tests {
             sandbox_template_to_k8s(
                 &template,
                 true,
-                4,
                 &std::collections::HashMap::new(),
                 true,
                 &params,
@@ -1995,7 +1953,7 @@ mod tests {
         let limits = &pod_template["spec"]["containers"][0]["resources"]["limits"];
         assert_eq!(limits["cpu"], serde_json::json!("2"));
         assert_eq!(limits["example.com/custom"], serde_json::json!("7"));
-        assert_eq!(limits[GPU_RESOURCE_NAME], serde_json::json!("4"));
+        assert_eq!(limits[GPU_RESOURCE_NAME], serde_json::json!("2"));
     }
 
     #[test]
@@ -2008,7 +1966,6 @@ mod tests {
             sandbox_template_to_k8s(
                 &SandboxTemplate::default(),
                 false,
-                0,
                 &std::collections::HashMap::new(),
                 true,
                 &params,
@@ -2034,7 +1991,6 @@ mod tests {
             sandbox_template_to_k8s(
                 &SandboxTemplate::default(),
                 false,
-                0,
                 &std::collections::HashMap::new(),
                 true,
                 &params,
@@ -2058,7 +2014,6 @@ mod tests {
             sandbox_template_to_k8s(
                 &template,
                 false,
-                0,
                 &std::collections::HashMap::new(),
                 true,
                 &params,
@@ -2198,7 +2153,6 @@ mod tests {
         let pod_template = sandbox_template_to_k8s(
             &SandboxTemplate::default(),
             false,
-            0,
             &std::collections::HashMap::new(),
             false, // user provided custom VCTs
             &params,
@@ -2237,7 +2191,6 @@ mod tests {
         sandbox_template_to_k8s(
             &SandboxTemplate::default(),
             false,
-            0,
             &std::collections::HashMap::new(),
             true,
             &params,
@@ -2303,7 +2256,6 @@ mod tests {
         let pod_template = sandbox_template_to_k8s(
             &template,
             false,
-            0,
             &std::collections::HashMap::new(),
             true,
             &params,
@@ -2342,7 +2294,6 @@ mod tests {
         let pod_template = sandbox_template_to_k8s(
             &template,
             false,
-            0,
             &std::collections::HashMap::new(),
             true,
             &params,
@@ -2369,7 +2320,6 @@ mod tests {
             sandbox_template_to_k8s(
                 &SandboxTemplate::default(),
                 false,
-                0,
                 &std::collections::HashMap::new(),
                 true,
                 &params,
diff --git a/crates/openshell-server/src/compute/mod.rs b/crates/openshell-server/src/compute/mod.rs
index 38120d67b..d2fd34011 100644
--- a/crates/openshell-server/src/compute/mod.rs
+++ b/crates/openshell-server/src/compute/mod.rs
@@ -1132,7 +1132,6 @@ fn driver_sandbox_spec_from_public(spec: &SandboxSpec) -> DriverSandboxSpec {
             .map(driver_sandbox_template_from_public),
         gpu: spec.gpu,
         gpu_device: spec.gpu_device.clone(),
-        gpu_count: spec.gpu_count,
     }
 }
 
@@ -1492,8 +1491,7 @@ fn derive_phase(status: Option<&DriverSandboxStatus>) -> SandboxPhase {
 }
 
 fn rewrite_user_facing_conditions(status: &mut Option<SandboxStatus>, spec: Option<&SandboxSpec>) {
-    let gpu_requested =
-        spec.is_some_and(|sandbox_spec| sandbox_spec.gpu || sandbox_spec.gpu_count > 0);
+    let gpu_requested = spec.is_some_and(|sandbox_spec| sandbox_spec.gpu);
     if !gpu_requested {
         return;
     }
@@ -2010,28 +2008,6 @@ mod tests {
         assert!(build_platform_config(&template).is_none());
     }
 
-    #[test]
-    fn driver_sandbox_spec_from_public_maps_gpu_count() {
-        let public = SandboxSpec {
-            gpu: true,
-            gpu_count: 4,
-            ..Default::default()
-        };
-
-        let driver = driver_sandbox_spec_from_public(&public);
-
-        assert!(driver.gpu);
-        assert_eq!(driver.gpu_count, 4);
-    }
-
-    #[test]
-    fn driver_sandbox_spec_from_public_preserves_default_gpu_count() {
-        let driver = driver_sandbox_spec_from_public(&SandboxSpec::default());
-
-        assert!(!driver.gpu);
-        assert_eq!(driver.gpu_count, 0);
-    }
-
     #[test]
     fn build_platform_config_preserves_non_typed_resource_fields() {
         let template = SandboxTemplate {
diff --git a/crates/openshell-server/src/grpc/validation.rs b/crates/openshell-server/src/grpc/validation.rs
index 518babb20..160b7e031 100644
--- a/crates/openshell-server/src/grpc/validation.rs
+++ b/crates/openshell-server/src/grpc/validation.rs
@@ -131,13 +131,6 @@ pub(super) fn validate_sandbox_spec(
         validate_sandbox_template(tmpl)?;
     }
 
-    // --- GPU allocation mode ---
-    if spec.gpu_count > 0 && !spec.gpu_device.is_empty() {
-        return Err(Status::invalid_argument(
-            "gpu_count cannot be combined with gpu_device",
-        ));
-    }
-
     // --- spec.policy serialized size ---
     if let Some(ref policy) = spec.policy {
         let size = policy.encoded_len();
@@ -681,29 +674,6 @@ mod tests {
         assert!(validate_sandbox_spec("gpu-sandbox", &spec).is_ok());
     }
 
-    #[test]
-    fn validate_sandbox_spec_accepts_gpu_count() {
-        let spec = SandboxSpec {
-            gpu_count: 4,
-            ..Default::default()
-        };
-        assert!(validate_sandbox_spec("gpu-count-sandbox", &spec).is_ok());
-    }
-
-    #[test]
-    fn validate_sandbox_spec_rejects_gpu_count_with_gpu_device() {
-        let spec = SandboxSpec {
-            gpu: true,
-            gpu_count: 4,
-            gpu_device: "nvidia.com/gpu=0".to_string(),
-            ..Default::default()
-        };
-        let err = validate_sandbox_spec("gpu-count-sandbox", &spec).unwrap_err();
-        assert_eq!(err.code(), Code::InvalidArgument);
-        assert!(err.message().contains("gpu_count"));
-        assert!(err.message().contains("gpu_device"));
-    }
-
     #[test]
     fn validate_sandbox_spec_accepts_empty_defaults() {
         assert!(validate_sandbox_spec("", &default_spec()).is_ok());
diff --git a/docs/sandboxes/manage-sandboxes.mdx b/docs/sandboxes/manage-sandboxes.mdx
index eddbe210b..0ded3ddf3 100644
--- a/docs/sandboxes/manage-sandboxes.mdx
+++ b/docs/sandboxes/manage-sandboxes.mdx
@@ -42,8 +42,18 @@ openshell sandbox create --gpu-count 4 -- claude
 ```
 
 GPU counts schedule Kubernetes sandbox pods with an `nvidia.com/gpu` resource
-limit. Use `--gpu-device` only when you need driver-specific device selection;
-it cannot be combined with `--gpu-count`.
+limit.
+
+To pass generic resource requests and limits through to the sandbox template,
+use `--resources-json` with a JSON object:
+
+```shell
+openshell sandbox create \
+  --resources-json '{"requests":{"cpu":"2"},"limits":{"cpu":"16"}}'
+```
+
+The JSON is forwarded as `SandboxTemplate.resources`. Use Kubernetes resource
+quantity strings for CPU, memory, and extended resources.
 
 For Docker-backed sandboxes, GPU injection uses Docker CDI. If you enable Docker
 CDI after the gateway starts, restart the gateway so OpenShell can detect the
diff --git a/proto/compute_driver.proto b/proto/compute_driver.proto
index 603f5e954..3c4308f3f 100644
--- a/proto/compute_driver.proto
+++ b/proto/compute_driver.proto
@@ -90,9 +90,6 @@ message DriverSandboxSpec {
   // (e.g. "0", "1"). When empty with gpu=true, the driver assigns the
   // first available GPU.
   string gpu_device = 10;
-  // Number of NVIDIA GPUs requested for this sandbox. 0 means unspecified;
-  // when gpu=true and gpu_count=0, drivers keep their existing default.
-  uint32 gpu_count = 11;
 }
 
 // Driver-owned runtime template consumed by the compute platform.
diff --git a/proto/openshell.proto b/proto/openshell.proto
index f2c17f451..bb2ce6cec 100644
--- a/proto/openshell.proto
+++ b/proto/openshell.proto
@@ -256,9 +256,6 @@ message SandboxSpec {
   // (e.g. "0", "1"). When empty with gpu=true, the driver assigns the
   // first available GPU.
   string gpu_device = 10;
-  // Number of NVIDIA GPUs requested for this sandbox. 0 means unspecified;
-  // when gpu=true and gpu_count=0, drivers keep their existing default.
-  uint32 gpu_count = 11;
 }
 
 // Public sandbox template mapped onto compute-driver template inputs.

From 9c167294be5013752924437f60d5e90ec32511ae Mon Sep 17 00:00:00 2001
From: Ryan Angilly <rangilly@nvidia.com>
Date: Wed, 13 May 2026 10:07:32 -0600
Subject: [PATCH 3/3] refactor(sandbox): add ResourceSpec for sandbox resources

Signed-off-by: Ryan Angilly <rangilly@nvidia.com>
---
 architecture/compute-runtimes.md              |   4 +-
 crates/openshell-cli/src/main.rs              | 100 +++++--
 crates/openshell-cli/src/run.rs               | 257 +++++++-----------
 .../sandbox_create_lifecycle_integration.rs   |  79 ++----
 crates/openshell-driver-docker/src/tests.rs   |   1 +
 crates/openshell-driver-kubernetes/README.md  |  13 +-
 .../openshell-driver-kubernetes/src/driver.rs |  48 +++-
 crates/openshell-server/src/compute/mod.rs    | 117 +++++++-
 .../openshell-server/src/grpc/validation.rs   | 192 ++++++++++++-
 docs/sandboxes/manage-sandboxes.mdx           |  21 +-
 proto/compute_driver.proto                    |   4 +
 proto/openshell.proto                         |  22 ++
 12 files changed, 592 insertions(+), 266 deletions(-)

diff --git a/architecture/compute-runtimes.md b/architecture/compute-runtimes.md
index 11f6de22a..af12291a8 100644
--- a/architecture/compute-runtimes.md
+++ b/architecture/compute-runtimes.md
@@ -63,8 +63,8 @@ Custom sandbox images must include the agent runtime and any system
 dependencies, but they should not need to include the gateway. GPU-capable
 images must include the user-space libraries required by the workload. The
 runtime still owns GPU device injection or resource scheduling. Kubernetes maps
-template resource limits such as `nvidia.com/gpu` into the sandbox pod when the
-cluster exposes those resources.
+portable `SandboxSpec.resources` CPU, memory, and GPU count requirements into
+pod resource requests and limits when the cluster exposes those resources.
 
 ## Deployment Shape
 
diff --git a/crates/openshell-cli/src/main.rs b/crates/openshell-cli/src/main.rs
index d1bc7ca40..542a57ab7 100644
--- a/crates/openshell-cli/src/main.rs
+++ b/crates/openshell-cli/src/main.rs
@@ -1030,6 +1030,7 @@ enum DoctorCommands {
 }
 
 #[derive(Subcommand, Debug)]
+#[allow(clippy::large_enum_variant)]
 enum SandboxCommands {
     /// Create a sandbox.
     #[command(help_template = LEAF_HELP_TEMPLATE, next_help_heading = "FLAGS")]
@@ -1088,21 +1089,33 @@ enum SandboxCommands {
         ///
         /// This implies --gpu. Kubernetes-backed gateways schedule pods with
         /// the corresponding nvidia.com/gpu resource limit.
-        #[arg(long, value_name = "COUNT", value_parser = clap::value_parser!(u32).range(1..))]
+        #[arg(long, value_name = "COUNT", value_parser = clap::value_parser!(u32).range(1..), conflicts_with = "gpu_device")]
         gpu_count: Option<u32>,
 
-        /// Set compute resource requests and limits as JSON.
-        ///
-        /// The JSON must be an object with the same shape as
-        /// SandboxTemplate.resources, for example:
-        /// {"requests":{"cpu":"2"},"limits":{"cpu":"16"}}
-        #[arg(long, value_name = "JSON")]
-        resources_json: Option<String>,
+        /// Minimum CPU cores requested, e.g. "500m" or "2".
+        #[arg(long, value_name = "QUANTITY")]
+        cpu_request: Option<String>,
+
+        /// Maximum CPU cores allowed, e.g. "2" or "4".
+        #[arg(long, value_name = "QUANTITY")]
+        cpu_limit: Option<String>,
+
+        /// Minimum memory requested, e.g. "512Mi" or "4Gi".
+        #[arg(long, value_name = "QUANTITY")]
+        memory_request: Option<String>,
+
+        /// Maximum memory allowed, e.g. "1Gi" or "8Gi".
+        #[arg(long, value_name = "QUANTITY")]
+        memory_limit: Option<String>,
+
+        /// Driver-specific resource configuration as KEY=VALUE.
+        #[arg(long = "resource-config", value_name = "KEY=VALUE")]
+        resource_config: Vec<String>,
 
         /// Target a driver-specific GPU device. Docker and Podman use CDI device IDs
         /// (for example "nvidia.com/gpu=0"); VM uses a PCI BDF or index.
         /// Only valid with --gpu. When omitted with --gpu, the driver uses its default GPU selection.
-        #[arg(long, requires = "gpu")]
+        #[arg(long, requires = "gpu", conflicts_with = "gpu_count")]
         gpu_device: Option<String>,
 
         /// Provider names to attach to this sandbox.
@@ -2380,7 +2393,11 @@ async fn main() -> Result<()> {
                     editor,
                     gpu,
                     gpu_count,
-                    resources_json,
+                    cpu_request,
+                    cpu_limit,
+                    memory_request,
+                    memory_limit,
+                    resource_config,
                     gpu_device,
                     providers,
                     policy,
@@ -2447,8 +2464,14 @@ async fn main() -> Result<()> {
                         upload_spec.as_ref(),
                         keep,
                         gpu,
-                        gpu_count,
-                        resources_json.as_deref(),
+                        run::SandboxResourceArgs {
+                            cpu_request: cpu_request.as_deref(),
+                            cpu_limit: cpu_limit.as_deref(),
+                            memory_request: memory_request.as_deref(),
+                            memory_limit: memory_limit.as_deref(),
+                            gpu_count,
+                            driver_config: &resource_config,
+                        },
                         gpu_device.as_deref(),
                         editor,
                         &providers,
@@ -3853,27 +3876,66 @@ mod tests {
     }
 
     #[test]
-    fn sandbox_create_resources_json_parses() {
+    fn sandbox_create_resource_spec_flags_parse() {
         let cli = Cli::try_parse_from([
             "openshell",
             "sandbox",
             "create",
-            "--resources-json",
-            r#"{"requests":{"cpu":"2"},"limits":{"cpu":"16"}}"#,
+            "--cpu-request",
+            "2",
+            "--cpu-limit",
+            "4",
+            "--memory-request",
+            "8Gi",
+            "--memory-limit",
+            "16Gi",
+            "--resource-config",
+            "kubernetes.resource-name=nvidia.com/gpu",
         ])
-        .expect("sandbox create --resources-json should parse");
+        .expect("sandbox create resource flags should parse");
 
         match cli.command {
             Some(Commands::Sandbox {
-                command: Some(SandboxCommands::Create { resources_json, .. }),
+                command:
+                    Some(SandboxCommands::Create {
+                        cpu_request,
+                        cpu_limit,
+                        memory_request,
+                        memory_limit,
+                        resource_config,
+                        ..
+                    }),
                 ..
             }) => {
+                assert_eq!(cpu_request.as_deref(), Some("2"));
+                assert_eq!(cpu_limit.as_deref(), Some("4"));
+                assert_eq!(memory_request.as_deref(), Some("8Gi"));
+                assert_eq!(memory_limit.as_deref(), Some("16Gi"));
                 assert_eq!(
-                    resources_json.as_deref(),
-                    Some(r#"{"requests":{"cpu":"2"},"limits":{"cpu":"16"}}"#)
+                    resource_config,
+                    vec!["kubernetes.resource-name=nvidia.com/gpu".to_string()]
                 );
             }
             other => panic!("expected sandbox create command, got: {other:?}"),
         }
     }
+
+    #[test]
+    fn sandbox_create_gpu_count_conflicts_with_gpu_device() {
+        let result = Cli::try_parse_from([
+            "openshell",
+            "sandbox",
+            "create",
+            "--gpu",
+            "--gpu-count",
+            "2",
+            "--gpu-device",
+            "nvidia.com/gpu=0",
+        ]);
+
+        assert!(
+            result.is_err(),
+            "sandbox create should reject combining --gpu-count and --gpu-device"
+        );
+    }
 }
diff --git a/crates/openshell-cli/src/run.rs b/crates/openshell-cli/src/run.rs
index 07ee6fd69..f4d931445 100644
--- a/crates/openshell-cli/src/run.rs
+++ b/crates/openshell-cli/src/run.rs
@@ -36,7 +36,7 @@ use openshell_core::proto::{
     LintProviderProfilesRequest, ListProviderProfilesRequest, ListProvidersRequest,
     ListSandboxPoliciesRequest, ListSandboxProvidersRequest, ListSandboxesRequest,
     ListServicesRequest, PolicySource, PolicyStatus, Provider, ProviderProfile,
-    ProviderProfileDiagnostic, ProviderProfileImportItem, RejectDraftChunkRequest,
+    ProviderProfileDiagnostic, ProviderProfileImportItem, RejectDraftChunkRequest, ResourceSpec,
     RevokeSshSessionRequest, Sandbox, SandboxPhase, SandboxPolicy, SandboxSpec, SandboxTemplate,
     ServiceEndpointResponse, SetClusterInferenceRequest, SettingScope, SettingValue,
     TcpForwardFrame, TcpForwardInit, TcpRelayTarget, UpdateConfigRequest, UpdateProviderRequest,
@@ -67,8 +67,6 @@ pub use openshell_core::forward::{
     find_forward_by_port, list_forwards, stop_forward, stop_forwards_for_sandbox,
 };
 
-const NVIDIA_GPU_RESOURCE_NAME: &str = "nvidia.com/gpu";
-
 /// Convert a sandbox phase integer to a human-readable string.
 fn phase_name(phase: i32) -> &'static str {
     match SandboxPhase::try_from(phase) {
@@ -1459,91 +1457,52 @@ async fn finalize_sandbox_create_session(
     session_result
 }
 
-fn parse_sandbox_resources_json(
-    resources_json: Option<&str>,
-    gpu_count: Option<u32>,
-) -> Result<Option<prost_types::Struct>> {
-    let mut resources = match resources_json {
-        Some(raw) => {
-            if raw.trim().is_empty() {
-                return Err(miette::miette!("--resources-json cannot be empty"));
-            }
-
-            let value: serde_json::Value = serde_json::from_str(raw)
-                .into_diagnostic()
-                .wrap_err("invalid --resources-json JSON")?;
-            let serde_json::Value::Object(fields) = value else {
-                return Err(miette::miette!("--resources-json must be a JSON object"));
-            };
+#[derive(Clone, Copy, Debug, Default)]
+pub struct SandboxResourceArgs<'a> {
+    pub cpu_request: Option<&'a str>,
+    pub cpu_limit: Option<&'a str>,
+    pub memory_request: Option<&'a str>,
+    pub memory_limit: Option<&'a str>,
+    pub gpu_count: Option<u32>,
+    pub driver_config: &'a [String],
+}
 
-            Some(json_object_to_prost_struct(fields))
-        }
-        None => None,
+fn build_resource_spec(args: SandboxResourceArgs<'_>) -> Result<Option<ResourceSpec>> {
+    let mut resources = ResourceSpec {
+        driver_config: parse_key_value_pairs(args.driver_config, "--resource-config")?,
+        ..ResourceSpec::default()
     };
 
-    if let Some(count) = gpu_count {
+    if let Some(value) = args.cpu_request {
+        resources.cpu_request = value.to_string();
+    }
+    if let Some(value) = args.cpu_limit {
+        resources.cpu_limit = value.to_string();
+    }
+    if let Some(value) = args.memory_request {
+        resources.memory_request = value.to_string();
+    }
+    if let Some(value) = args.memory_limit {
+        resources.memory_limit = value.to_string();
+    }
+    if let Some(count) = args.gpu_count {
         if count == 0 {
             return Err(miette::miette!("--gpu-count must be greater than zero"));
         }
-        inject_gpu_count_resource(resources.get_or_insert_with(Default::default), count);
-    }
-
-    Ok(resources)
-}
-
-fn json_object_to_prost_struct(
-    fields: serde_json::Map<String, serde_json::Value>,
-) -> prost_types::Struct {
-    prost_types::Struct {
-        fields: fields
-            .into_iter()
-            .map(|(key, value)| (key, json_value_to_prost_value(value)))
-            .collect(),
+        resources.gpu_count = count;
     }
-}
-
-fn json_value_to_prost_value(value: serde_json::Value) -> prost_types::Value {
-    use prost_types::{ListValue, Value, value::Kind};
-
-    let kind = match value {
-        serde_json::Value::Null => Kind::NullValue(0),
-        serde_json::Value::Bool(value) => Kind::BoolValue(value),
-        serde_json::Value::Number(value) => Kind::NumberValue(value.as_f64().unwrap_or_default()),
-        serde_json::Value::String(value) => Kind::StringValue(value),
-        serde_json::Value::Array(values) => Kind::ListValue(ListValue {
-            values: values.into_iter().map(json_value_to_prost_value).collect(),
-        }),
-        serde_json::Value::Object(fields) => Kind::StructValue(json_object_to_prost_struct(fields)),
-    };
 
-    Value { kind: Some(kind) }
-}
-
-fn inject_gpu_count_resource(resources: &mut prost_types::Struct, gpu_count: u32) {
-    use prost_types::{Struct, Value, value::Kind};
-
-    let limits_kind = &mut resources
-        .fields
-        .entry("limits".to_string())
-        .or_insert_with(|| Value {
-            kind: Some(Kind::StructValue(Struct::default())),
-        })
-        .kind;
-
-    if !matches!(limits_kind, Some(Kind::StructValue(_))) {
-        *limits_kind = Some(Kind::StructValue(Struct::default()));
+    if resources.cpu_request.is_empty()
+        && resources.cpu_limit.is_empty()
+        && resources.memory_request.is_empty()
+        && resources.memory_limit.is_empty()
+        && resources.gpu_count == 0
+        && resources.driver_config.is_empty()
+    {
+        Ok(None)
+    } else {
+        Ok(Some(resources))
     }
-
-    let Some(Kind::StructValue(limits)) = limits_kind.as_mut() else {
-        unreachable!("limits kind was normalized to a StructValue");
-    };
-
-    limits.fields.insert(
-        NVIDIA_GPU_RESOURCE_NAME.to_string(),
-        Value {
-            kind: Some(Kind::StringValue(gpu_count.to_string())),
-        },
-    );
 }
 
 /// Create a sandbox with default settings.
@@ -1556,8 +1515,7 @@ pub async fn sandbox_create(
     upload: Option<&(String, Option<String>, bool)>,
     keep: bool,
     gpu: bool,
-    gpu_count: Option<u32>,
-    resources_json: Option<&str>,
+    resources: SandboxResourceArgs<'_>,
     gpu_device: Option<&str>,
     editor: Option<Editor>,
     providers: &[String],
@@ -1574,7 +1532,12 @@ pub async fn sandbox_create(
             "--editor cannot be used with a trailing command; use `openshell sandbox connect <name> --editor ...` after the sandbox is ready"
         ));
     }
-    let resources = parse_sandbox_resources_json(resources_json, gpu_count)?;
+    let resources = build_resource_spec(resources)?;
+    if resources.as_ref().is_some_and(|spec| spec.gpu_count > 0) && gpu_device.is_some() {
+        return Err(miette::miette!(
+            "--gpu-count cannot be combined with --gpu-device"
+        ));
+    }
 
     // Check port availability *before* creating the sandbox so we don't
     // leave an orphaned sandbox behind when the forward would fail.
@@ -1610,8 +1573,9 @@ pub async fn sandbox_create(
         }
         None => None,
     };
-    let requested_gpu =
-        gpu || gpu_count.is_some() || image.as_deref().is_some_and(image_requests_gpu);
+    let requested_gpu = gpu
+        || resources.as_ref().is_some_and(|spec| spec.gpu_count > 0)
+        || image.as_deref().is_some_and(image_requests_gpu);
 
     let inferred_types: Vec<String> = inferred_provider_type(command).into_iter().collect();
     let configured_providers = ensure_required_providers(
@@ -1624,10 +1588,9 @@ pub async fn sandbox_create(
 
     let policy = load_sandbox_policy(policy)?;
 
-    let template = if image.is_some() || resources.is_some() {
+    let template = if image.is_some() {
         Some(SandboxTemplate {
             image: image.unwrap_or_default(),
-            resources,
             ..SandboxTemplate::default()
         })
     } else {
@@ -1640,6 +1603,7 @@ pub async fn sandbox_create(
             gpu_device: gpu_device.unwrap_or_default().to_string(),
             policy,
             providers: configured_providers,
+            resources,
             template,
             ..SandboxSpec::default()
         }),
@@ -6109,15 +6073,15 @@ fn format_timestamp_ms(ms: i64) -> String {
 #[cfg(test)]
 mod tests {
     use super::{
-        TlsOptions, dockerfile_sources_supported_for_gateway, format_endpoint,
-        format_gateway_select_header, format_gateway_select_items,
-        format_provider_attachment_table, gateway_add, gateway_auth_label,
-        gateway_env_override_warning, gateway_select_with, gateway_type_label, git_sync_files,
-        http_health_check, image_requests_gpu, import_local_package_mtls_bundle,
+        SandboxResourceArgs, TlsOptions, build_resource_spec,
+        dockerfile_sources_supported_for_gateway, format_endpoint, format_gateway_select_header,
+        format_gateway_select_items, format_provider_attachment_table, gateway_add,
+        gateway_auth_label, gateway_env_override_warning, gateway_select_with, gateway_type_label,
+        git_sync_files, http_health_check, image_requests_gpu, import_local_package_mtls_bundle,
         inferred_provider_type, package_managed_tls_dirs, parse_cli_setting_value,
-        parse_credential_pairs, parse_sandbox_resources_json, plaintext_gateway_is_remote,
-        provisioning_timeout_message, ready_false_condition_message, resolve_from,
-        sandbox_should_persist, service_expose_status_error, service_url_for_gateway,
+        parse_credential_pairs, plaintext_gateway_is_remote, provisioning_timeout_message,
+        ready_false_condition_message, resolve_from, sandbox_should_persist,
+        service_expose_status_error, service_url_for_gateway,
     };
     use crate::TEST_ENV_LOCK;
     use hyper::StatusCode;
@@ -6135,83 +6099,54 @@ mod tests {
         Provider, SandboxCondition, SandboxStatus, datamodel::v1::ObjectMeta,
     };
 
-    fn prost_struct_field<'a>(
-        value: &'a prost_types::Struct,
-        field: &str,
-    ) -> &'a prost_types::Struct {
-        let value = value
-            .fields
-            .get(field)
-            .and_then(|value| value.kind.as_ref())
-            .unwrap_or_else(|| panic!("expected {field} field"));
-        let prost_types::value::Kind::StructValue(value) = value else {
-            panic!("expected {field} to be an object");
-        };
-        value
-    }
-
-    fn prost_string_field(value: &prost_types::Struct, field: &str) -> String {
-        let value = value
-            .fields
-            .get(field)
-            .and_then(|value| value.kind.as_ref())
-            .unwrap_or_else(|| panic!("expected {field} field"));
-        let prost_types::value::Kind::StringValue(value) = value else {
-            panic!("expected {field} to be a string");
-        };
-        value.clone()
-    }
-
-    #[test]
-    fn parse_sandbox_resources_json_rejects_empty_string() {
-        let err = parse_sandbox_resources_json(Some(""), None)
-            .expect_err("empty resources JSON should fail");
-
-        assert!(err.to_string().contains("--resources-json"));
-        assert!(err.to_string().contains("empty"));
-    }
-
     #[test]
-    fn parse_sandbox_resources_json_rejects_invalid_json() {
-        let err = parse_sandbox_resources_json(Some("{bad"), None)
-            .expect_err("invalid resources JSON should fail");
-
-        assert!(err.to_string().contains("invalid --resources-json JSON"));
-    }
-
-    #[test]
-    fn parse_sandbox_resources_json_rejects_non_object_json() {
-        let err = parse_sandbox_resources_json(Some(r#"["cpu"]"#), None)
-            .expect_err("non-object resources JSON should fail");
+    fn build_resource_spec_sets_typed_fields_and_driver_config() {
+        let driver_config = vec!["kubernetes.resource-name=nvidia.com/mig-1g.5gb".to_string()];
+        let resources = build_resource_spec(SandboxResourceArgs {
+            cpu_request: Some("2"),
+            cpu_limit: Some("4"),
+            memory_request: Some("8Gi"),
+            memory_limit: Some("16Gi"),
+            gpu_count: Some(4),
+            driver_config: &driver_config,
+        })
+        .expect("resources should parse")
+        .expect("resource spec should be present");
 
-        assert!(err.to_string().contains("--resources-json"));
-        assert!(err.to_string().contains("JSON object"));
+        assert_eq!(resources.cpu_request, "2");
+        assert_eq!(resources.cpu_limit, "4");
+        assert_eq!(resources.memory_request, "8Gi");
+        assert_eq!(resources.memory_limit, "16Gi");
+        assert_eq!(resources.gpu_count, 4);
+        assert_eq!(
+            resources.driver_config.get("kubernetes.resource-name"),
+            Some(&"nvidia.com/mig-1g.5gb".to_string())
+        );
     }
 
     #[test]
-    fn parse_sandbox_resources_json_injects_gpu_count_resource() {
-        let resources = parse_sandbox_resources_json(
-            Some(r#"{"requests":{"cpu":"2"},"limits":{"cpu":"16"}}"#),
-            Some(4),
-        )
-        .expect("resources should parse")
-        .expect("resources should be present");
-
-        let requests = prost_struct_field(&resources, "requests");
-        let limits = prost_struct_field(&resources, "limits");
+    fn build_resource_spec_rejects_zero_gpu_count() {
+        let err = build_resource_spec(SandboxResourceArgs {
+            gpu_count: Some(0),
+            ..SandboxResourceArgs::default()
+        })
+        .expect_err("zero GPU count should fail");
 
-        assert_eq!(prost_string_field(requests, "cpu"), "2");
-        assert_eq!(prost_string_field(limits, "cpu"), "16");
-        assert_eq!(prost_string_field(limits, "nvidia.com/gpu"), "4");
+        assert!(err.to_string().contains("--gpu-count"));
+        assert!(err.to_string().contains("greater than zero"));
     }
 
     #[test]
-    fn parse_sandbox_resources_json_rejects_zero_gpu_count() {
-        let err =
-            parse_sandbox_resources_json(None, Some(0)).expect_err("zero GPU count should fail");
+    fn build_resource_spec_rejects_invalid_driver_config() {
+        let driver_config = vec!["missing-separator".to_string()];
+        let err = build_resource_spec(SandboxResourceArgs {
+            driver_config: &driver_config,
+            ..SandboxResourceArgs::default()
+        })
+        .expect_err("invalid driver config should fail");
 
-        assert!(err.to_string().contains("--gpu-count"));
-        assert!(err.to_string().contains("greater than zero"));
+        assert!(err.to_string().contains("--resource-config"));
+        assert!(err.to_string().contains("KEY=VALUE"));
     }
 
     struct EnvVarGuard {
diff --git a/crates/openshell-cli/tests/sandbox_create_lifecycle_integration.rs b/crates/openshell-cli/tests/sandbox_create_lifecycle_integration.rs
index 861c8c011..4b6ec4451 100644
--- a/crates/openshell-cli/tests/sandbox_create_lifecycle_integration.rs
+++ b/crates/openshell-cli/tests/sandbox_create_lifecycle_integration.rs
@@ -659,30 +659,6 @@ fn test_tls(server: &TestServer) -> TlsOptions {
     server.tls.with_gateway_name("openshell")
 }
 
-fn struct_field<'a>(value: &'a prost_types::Struct, field: &str) -> &'a prost_types::Struct {
-    let value = value
-        .fields
-        .get(field)
-        .and_then(|value| value.kind.as_ref())
-        .unwrap_or_else(|| panic!("expected {field} field"));
-    let prost_types::value::Kind::StructValue(value) = value else {
-        panic!("expected {field} to be an object");
-    };
-    value
-}
-
-fn string_field(value: &prost_types::Struct, field: &str) -> String {
-    let value = value
-        .fields
-        .get(field)
-        .and_then(|value| value.kind.as_ref())
-        .unwrap_or_else(|| panic!("expected {field} field"));
-    let prost_types::value::Kind::StringValue(value) = value else {
-        panic!("expected {field} to be a string");
-    };
-    value.clone()
-}
-
 #[tokio::test]
 async fn sandbox_create_keeps_command_sessions_by_default() {
     let server = run_server().await;
@@ -700,8 +676,7 @@ async fn sandbox_create_keeps_command_sessions_by_default() {
         None,
         true,
         false,
-        None,
-        None,
+        run::SandboxResourceArgs::default(),
         None,
         None,
         &[],
@@ -741,8 +716,10 @@ async fn sandbox_create_gpu_count_sets_gpu_intent_and_resources() {
         None,
         true,
         false,
-        Some(2),
-        None,
+        run::SandboxResourceArgs {
+            gpu_count: Some(2),
+            ..run::SandboxResourceArgs::default()
+        },
         None,
         None,
         &[],
@@ -765,16 +742,14 @@ async fn sandbox_create_gpu_count_sets_gpu_intent_and_resources() {
     assert!(spec.gpu);
 
     let resources = spec
-        .template
+        .resources
         .as_ref()
-        .and_then(|template| template.resources.as_ref())
-        .expect("gpu count should create template resources");
-    let limits = struct_field(resources, "limits");
-    assert_eq!(string_field(limits, "nvidia.com/gpu"), "2");
+        .expect("gpu count should create resource spec");
+    assert_eq!(resources.gpu_count, 2);
 }
 
 #[tokio::test]
-async fn sandbox_create_resources_json_sets_template_resources() {
+async fn sandbox_create_resource_spec_sets_resource_requirements() {
     let server = run_server().await;
     let fake_ssh_dir = tempfile::tempdir().unwrap();
     let xdg_dir = tempfile::tempdir().unwrap();
@@ -784,14 +759,19 @@ async fn sandbox_create_resources_json_sets_template_resources() {
 
     run::sandbox_create(
         &server.endpoint,
-        Some("resources-json"),
+        Some("resource-spec"),
         None,
         "openshell",
         None,
         true,
         false,
-        None,
-        Some(r#"{"requests":{"cpu":"2"},"limits":{"cpu":"16"}}"#),
+        run::SandboxResourceArgs {
+            cpu_request: Some("2"),
+            cpu_limit: Some("4"),
+            memory_request: Some("8Gi"),
+            memory_limit: Some("16Gi"),
+            ..run::SandboxResourceArgs::default()
+        },
         None,
         None,
         &[],
@@ -814,14 +794,13 @@ async fn sandbox_create_resources_json_sets_template_resources() {
     assert!(!spec.gpu);
 
     let resources = spec
-        .template
+        .resources
         .as_ref()
-        .and_then(|template| template.resources.as_ref())
-        .expect("resources JSON should create template resources");
-    let requests = struct_field(resources, "requests");
-    let limits = struct_field(resources, "limits");
-    assert_eq!(string_field(requests, "cpu"), "2");
-    assert_eq!(string_field(limits, "cpu"), "16");
+        .expect("resource flags should create resource spec");
+    assert_eq!(resources.cpu_request, "2");
+    assert_eq!(resources.cpu_limit, "4");
+    assert_eq!(resources.memory_request, "8Gi");
+    assert_eq!(resources.memory_limit, "16Gi");
 }
 
 #[tokio::test]
@@ -841,8 +820,7 @@ async fn sandbox_create_deletes_command_sessions_with_no_keep() {
         None,
         false,
         false,
-        None,
-        None,
+        run::SandboxResourceArgs::default(),
         None,
         None,
         &[],
@@ -885,8 +863,7 @@ async fn sandbox_create_deletes_shell_sessions_with_no_keep() {
         None,
         false,
         false,
-        None,
-        None,
+        run::SandboxResourceArgs::default(),
         None,
         None,
         &[],
@@ -929,8 +906,7 @@ async fn sandbox_create_keeps_sandbox_with_hidden_keep_flag() {
         None,
         true,
         false,
-        None,
-        None,
+        run::SandboxResourceArgs::default(),
         None,
         None,
         &[],
@@ -973,8 +949,7 @@ async fn sandbox_create_keeps_sandbox_with_forwarding() {
         None,
         false,
         false,
-        None,
-        None,
+        run::SandboxResourceArgs::default(),
         None,
         None,
         &[],
diff --git a/crates/openshell-driver-docker/src/tests.rs b/crates/openshell-driver-docker/src/tests.rs
index df68d39d6..f3453f78f 100644
--- a/crates/openshell-driver-docker/src/tests.rs
+++ b/crates/openshell-driver-docker/src/tests.rs
@@ -357,6 +357,7 @@ fn docker_resource_limits_rejects_requests() {
             cpu_limit: String::new(),
             memory_request: String::new(),
             memory_limit: String::new(),
+            ..DriverResourceRequirements::default()
         }),
         platform_config: None,
     };
diff --git a/crates/openshell-driver-kubernetes/README.md b/crates/openshell-driver-kubernetes/README.md
index f2324b757..b1e133c86 100644
--- a/crates/openshell-driver-kubernetes/README.md
+++ b/crates/openshell-driver-kubernetes/README.md
@@ -44,9 +44,10 @@ pods do not need direct external ingress for SSH.
 ## GPU Support
 
 When a sandbox requests GPU support, the driver checks node allocatable capacity
-for `nvidia.com/gpu`. A request with only `gpu=true` asks for one GPU unless the
-template already provides an explicit `limits["nvidia.com/gpu"]` value through
-the resource passthrough. The cluster must expose `nvidia.com/gpu` through the
-NVIDIA device plugin or an equivalent device-plugin implementation, and the
-sandbox image must provide the user-space libraries needed by the agent
-workload.
+for `nvidia.com/gpu`. A resource spec with `gpu_count` maps to
+`limits["nvidia.com/gpu"] = "<count>"`. A request with only `gpu=true` asks for
+one GPU unless the template already provides an explicit
+`limits["nvidia.com/gpu"]` value through the resource passthrough. The cluster
+must expose `nvidia.com/gpu` through the NVIDIA device plugin or an equivalent
+device-plugin implementation, and the sandbox image must provide the user-space
+libraries needed by the agent workload.
diff --git a/crates/openshell-driver-kubernetes/src/driver.rs b/crates/openshell-driver-kubernetes/src/driver.rs
index 5b2fa8984..b3dac55b9 100644
--- a/crates/openshell-driver-kubernetes/src/driver.rs
+++ b/crates/openshell-driver-kubernetes/src/driver.rs
@@ -1245,10 +1245,13 @@ fn container_resources(template: &SandboxTemplate, gpu: bool) -> Option<serde_js
         apply("requests", "memory", &req.memory_request);
         apply("limits", "cpu", &req.cpu_limit);
         apply("limits", "memory", &req.memory_limit);
+        if req.gpu_count > 0 {
+            set_gpu_limit(&mut resources, &req.gpu_count.to_string(), true);
+        }
     }
 
     if gpu {
-        apply_gpu_limit(&mut resources, GPU_RESOURCE_QUANTITY);
+        set_gpu_limit(&mut resources, GPU_RESOURCE_QUANTITY, false);
     }
     if resources.as_object().is_some_and(serde_json::Map::is_empty) {
         None
@@ -1257,10 +1260,10 @@ fn container_resources(template: &SandboxTemplate, gpu: bool) -> Option<serde_js
     }
 }
 
-fn apply_gpu_limit(resources: &mut serde_json::Value, quantity: &str) {
+fn set_gpu_limit(resources: &mut serde_json::Value, quantity: &str, overwrite: bool) {
     let Some(resources_obj) = resources.as_object_mut() else {
         *resources = serde_json::json!({});
-        return apply_gpu_limit(resources, quantity);
+        return set_gpu_limit(resources, quantity, overwrite);
     };
 
     let limits = resources_obj
@@ -1268,12 +1271,16 @@ fn apply_gpu_limit(resources: &mut serde_json::Value, quantity: &str) {
         .or_insert_with(|| serde_json::json!({}));
     let Some(limits_obj) = limits.as_object_mut() else {
         *limits = serde_json::json!({});
-        return apply_gpu_limit(resources, quantity);
+        return set_gpu_limit(resources, quantity, overwrite);
     };
 
-    limits_obj
-        .entry(GPU_RESOURCE_NAME.to_string())
-        .or_insert_with(|| serde_json::json!(quantity));
+    if overwrite {
+        limits_obj.insert(GPU_RESOURCE_NAME.to_string(), serde_json::json!(quantity));
+    } else {
+        limits_obj
+            .entry(GPU_RESOURCE_NAME.to_string())
+            .or_insert_with(|| serde_json::json!(quantity));
+    }
 }
 
 #[allow(clippy::too_many_arguments)]
@@ -1914,6 +1921,33 @@ mod tests {
         );
     }
 
+    #[test]
+    fn gpu_resource_spec_sets_gpu_resource_limit() {
+        use openshell_core::proto::compute::v1::DriverResourceRequirements;
+
+        let template = SandboxTemplate {
+            resources: Some(DriverResourceRequirements {
+                gpu_count: 4,
+                ..Default::default()
+            }),
+            ..SandboxTemplate::default()
+        };
+
+        let pod_template = {
+            let params = SandboxPodParams::default();
+            sandbox_template_to_k8s(
+                &template,
+                true,
+                &std::collections::HashMap::new(),
+                true,
+                &params,
+            )
+        };
+
+        let limits = &pod_template["spec"]["containers"][0]["resources"]["limits"];
+        assert_eq!(limits[GPU_RESOURCE_NAME], serde_json::json!("4"));
+    }
+
     #[test]
     fn gpu_sandbox_preserves_explicit_gpu_resource_limit() {
         use openshell_core::proto::compute::v1::DriverResourceRequirements;
diff --git a/crates/openshell-server/src/compute/mod.rs b/crates/openshell-server/src/compute/mod.rs
index d2fd34011..8483578bb 100644
--- a/crates/openshell-server/src/compute/mod.rs
+++ b/crates/openshell-server/src/compute/mod.rs
@@ -24,8 +24,8 @@ use openshell_core::proto::compute::v1::{
     watch_sandboxes_event,
 };
 use openshell_core::proto::{
-    PlatformEvent, Sandbox, SandboxCondition, SandboxPhase, SandboxSpec, SandboxStatus,
-    SandboxTemplate, SshSession,
+    PlatformEvent, ResourceSpec, Sandbox, SandboxCondition, SandboxPhase, SandboxSpec,
+    SandboxStatus, SandboxTemplate, SshSession,
 };
 use openshell_driver_docker::DockerComputeDriver;
 use openshell_driver_kubernetes::{
@@ -1126,23 +1126,76 @@ fn driver_sandbox_spec_from_public(spec: &SandboxSpec) -> DriverSandboxSpec {
     DriverSandboxSpec {
         log_level: spec.log_level.clone(),
         environment: spec.environment.clone(),
-        template: spec
-            .template
-            .as_ref()
-            .map(driver_sandbox_template_from_public),
-        gpu: spec.gpu,
+        template: driver_sandbox_template_from_public(
+            spec.template.as_ref(),
+            spec.resources.as_ref(),
+        ),
+        gpu: spec.gpu || spec.resources.as_ref().is_some_and(|r| r.gpu_count > 0),
         gpu_device: spec.gpu_device.clone(),
     }
 }
 
-fn driver_sandbox_template_from_public(template: &SandboxTemplate) -> DriverSandboxTemplate {
-    DriverSandboxTemplate {
+fn driver_sandbox_template_from_public(
+    template: Option<&SandboxTemplate>,
+    resources: Option<&ResourceSpec>,
+) -> Option<DriverSandboxTemplate> {
+    let driver_resources =
+        driver_resources_from_public(resources, template.and_then(|t| t.resources.as_ref()));
+
+    if template.is_none() && driver_resources.is_none() {
+        return None;
+    }
+
+    let default_template = SandboxTemplate::default();
+    let template = template.unwrap_or(&default_template);
+
+    Some(DriverSandboxTemplate {
         image: template.image.clone(),
         agent_socket_path: template.agent_socket.clone(),
         labels: template.labels.clone(),
         environment: template.environment.clone(),
-        resources: extract_typed_resources(&template.resources),
+        resources: driver_resources,
         platform_config: build_platform_config(template),
+    })
+}
+
+fn driver_resources_from_public(
+    resources: Option<&ResourceSpec>,
+    template_resources: Option<&prost_types::Struct>,
+) -> Option<DriverResourceRequirements> {
+    let mut req = extract_typed_resources(template_resources).unwrap_or_default();
+
+    if let Some(resources) = resources {
+        if !resources.cpu_request.is_empty() {
+            req.cpu_request.clone_from(&resources.cpu_request);
+        }
+        if !resources.cpu_limit.is_empty() {
+            req.cpu_limit.clone_from(&resources.cpu_limit);
+        }
+        if !resources.memory_request.is_empty() {
+            req.memory_request.clone_from(&resources.memory_request);
+        }
+        if !resources.memory_limit.is_empty() {
+            req.memory_limit.clone_from(&resources.memory_limit);
+        }
+        if resources.gpu_count > 0 {
+            req.gpu_count = resources.gpu_count;
+        }
+        if !resources.driver_config.is_empty() {
+            req.driver_config.clone_from(&resources.driver_config);
+        }
+    }
+
+    if req.cpu_request.is_empty()
+        && req.cpu_limit.is_empty()
+        && req.memory_request.is_empty()
+        && req.memory_limit.is_empty()
+        && req.gpu_count == 0
+        && req.driver_config.is_empty()
+    {
+        None
+    } else {
+        Some(req)
     }
 }
 
@@ -1152,7 +1205,7 @@ fn driver_sandbox_template_from_public(template: &SandboxTemplate) -> DriverSand
 /// with the Kubernetes limits/requests shape. We pull out the well-known
 /// keys into the typed `DriverResourceRequirements` message.
 fn extract_typed_resources(
-    resources: &Option<prost_types::Struct>,
+    resources: Option<&prost_types::Struct>,
 ) -> Option<DriverResourceRequirements> {
     fn get_quantity(s: &prost_types::Struct, section: &str, key: &str) -> String {
         s.fields
@@ -1168,13 +1221,14 @@ fn extract_typed_resources(
             .unwrap_or_default()
     }
 
-    let s = resources.as_ref()?;
+    let s = resources?;
 
     let req = DriverResourceRequirements {
         cpu_request: get_quantity(s, "requests", "cpu"),
         cpu_limit: get_quantity(s, "limits", "cpu"),
         memory_request: get_quantity(s, "requests", "memory"),
         memory_limit: get_quantity(s, "limits", "memory"),
+        ..DriverResourceRequirements::default()
     };
 
     // Return None when all fields are empty so drivers can distinguish
@@ -1183,6 +1237,8 @@ fn extract_typed_resources(
         && req.cpu_limit.is_empty()
         && req.memory_request.is_empty()
         && req.memory_limit.is_empty()
+        && req.gpu_count == 0
+        && req.driver_config.is_empty()
     {
         None
     } else {
@@ -2098,6 +2154,43 @@ mod tests {
         assert!(resources_raw.fields.contains_key("opaque_cpu"));
     }
 
+    #[test]
+    fn driver_sandbox_spec_maps_resource_spec_to_driver_resources() {
+        let spec = SandboxSpec {
+            resources: Some(ResourceSpec {
+                cpu_request: "2".to_string(),
+                cpu_limit: "4".to_string(),
+                memory_request: "8Gi".to_string(),
+                memory_limit: "16Gi".to_string(),
+                gpu_count: 4,
+                driver_config: std::iter::once((
+                    "kubernetes.resource-name".to_string(),
+                    "nvidia.com/gpu".to_string(),
+                ))
+                .collect(),
+            }),
+            ..Default::default()
+        };
+
+        let driver_spec = driver_sandbox_spec_from_public(&spec);
+        assert!(driver_spec.gpu);
+
+        let resources = driver_spec
+            .template
+            .as_ref()
+            .and_then(|template| template.resources.as_ref())
+            .expect("resource spec should create driver template resources");
+        assert_eq!(resources.cpu_request, "2");
+        assert_eq!(resources.cpu_limit, "4");
+        assert_eq!(resources.memory_request, "8Gi");
+        assert_eq!(resources.memory_limit, "16Gi");
+        assert_eq!(resources.gpu_count, 4);
+        assert_eq!(
+            resources.driver_config.get("kubernetes.resource-name"),
+            Some(&"nvidia.com/gpu".to_string())
+        );
+    }
+
     #[test]
     fn rewrite_user_facing_conditions_rewrites_gpu_unschedulable_message() {
         let mut status = Some(SandboxStatus {
diff --git a/crates/openshell-server/src/grpc/validation.rs b/crates/openshell-server/src/grpc/validation.rs
index 160b7e031..424042435 100644
--- a/crates/openshell-server/src/grpc/validation.rs
+++ b/crates/openshell-server/src/grpc/validation.rs
@@ -9,7 +9,8 @@
 #![allow(clippy::result_large_err)] // Validation returns Result<_, Status>
 
 use openshell_core::proto::{
-    ExecSandboxRequest, Provider, SandboxPolicy as ProtoSandboxPolicy, SandboxTemplate,
+    ExecSandboxRequest, Provider, ResourceSpec, SandboxPolicy as ProtoSandboxPolicy,
+    SandboxTemplate,
 };
 use prost::Message;
 use tonic::Status;
@@ -21,6 +22,8 @@ use super::{
     MAX_TEMPLATE_STRUCT_SIZE,
 };
 
+const NVIDIA_GPU_RESOURCE_NAME: &str = "nvidia.com/gpu";
+
 // ---------------------------------------------------------------------------
 // Exec request validation
 // ---------------------------------------------------------------------------
@@ -130,6 +133,20 @@ pub(super) fn validate_sandbox_spec(
     if let Some(ref tmpl) = spec.template {
         validate_sandbox_template(tmpl)?;
     }
+    if let Some(ref resources) = spec.resources {
+        validate_resource_spec(resources)?;
+        validate_resource_template_conflicts(resources, spec.template.as_ref())?;
+    }
+    if spec
+        .resources
+        .as_ref()
+        .is_some_and(|resources| resources.gpu_count > 0)
+        && !spec.gpu_device.is_empty()
+    {
+        return Err(Status::invalid_argument(
+            "resources.gpu_count cannot be combined with gpu_device",
+        ));
+    }
 
     // --- spec.policy serialized size ---
     if let Some(ref policy) = spec.policy {
@@ -144,6 +161,104 @@ pub(super) fn validate_sandbox_spec(
     Ok(())
 }
 
+fn validate_resource_spec(resources: &ResourceSpec) -> Result<(), Status> {
+    for (field, value) in [
+        ("resources.cpu_request", &resources.cpu_request),
+        ("resources.cpu_limit", &resources.cpu_limit),
+        ("resources.memory_request", &resources.memory_request),
+        ("resources.memory_limit", &resources.memory_limit),
+    ] {
+        if value.len() > MAX_MAP_VALUE_LEN {
+            return Err(Status::invalid_argument(format!(
+                "{field} exceeds maximum length ({} > {MAX_MAP_VALUE_LEN})",
+                value.len()
+            )));
+        }
+    }
+
+    validate_string_map(
+        &resources.driver_config,
+        MAX_TEMPLATE_MAP_ENTRIES,
+        MAX_MAP_KEY_LEN,
+        MAX_MAP_VALUE_LEN,
+        "resources.driver_config",
+    )
+}
+
+fn validate_resource_template_conflicts(
+    resources: &ResourceSpec,
+    template: Option<&SandboxTemplate>,
+) -> Result<(), Status> {
+    let Some(template_resources) = template.and_then(|template| template.resources.as_ref()) else {
+        return Ok(());
+    };
+
+    for (field, section, key, expected) in [
+        (
+            "resources.cpu_request",
+            "requests",
+            "cpu",
+            resources.cpu_request.as_str(),
+        ),
+        (
+            "resources.cpu_limit",
+            "limits",
+            "cpu",
+            resources.cpu_limit.as_str(),
+        ),
+        (
+            "resources.memory_request",
+            "requests",
+            "memory",
+            resources.memory_request.as_str(),
+        ),
+        (
+            "resources.memory_limit",
+            "limits",
+            "memory",
+            resources.memory_limit.as_str(),
+        ),
+    ] {
+        if !expected.is_empty()
+            && template_resource_string(template_resources, section, key)
+                .is_some_and(|actual| actual != expected)
+        {
+            return Err(Status::invalid_argument(format!(
+                "{field} conflicts with template.resources.{section}.{key}"
+            )));
+        }
+    }
+
+    if resources.gpu_count > 0
+        && template_resource_string(template_resources, "limits", NVIDIA_GPU_RESOURCE_NAME)
+            .is_some_and(|actual| actual != resources.gpu_count.to_string())
+    {
+        return Err(Status::invalid_argument(
+            "resources.gpu_count conflicts with template.resources.limits.nvidia.com/gpu",
+        ));
+    }
+
+    Ok(())
+}
+
+fn template_resource_string<'a>(
+    resources: &'a prost_types::Struct,
+    section: &str,
+    key: &str,
+) -> Option<&'a str> {
+    resources
+        .fields
+        .get(section)
+        .and_then(|value| match value.kind.as_ref() {
+            Some(prost_types::value::Kind::StructValue(section)) => section.fields.get(key),
+            _ => None,
+        })
+        .and_then(|value| match value.kind.as_ref() {
+            Some(prost_types::value::Kind::StringValue(value)) => Some(value.as_str()),
+            _ => None,
+        })
+}
+
 /// Validate template-level field sizes.
 fn validate_sandbox_template(tmpl: &SandboxTemplate) -> Result<(), Status> {
     // String fields.
@@ -674,6 +789,81 @@ mod tests {
         assert!(validate_sandbox_spec("gpu-sandbox", &spec).is_ok());
     }
 
+    #[test]
+    fn validate_sandbox_spec_accepts_resource_spec() {
+        let spec = SandboxSpec {
+            resources: Some(ResourceSpec {
+                cpu_limit: "4".to_string(),
+                memory_limit: "8Gi".to_string(),
+                gpu_count: 2,
+                driver_config: std::iter::once((
+                    "kubernetes.resource-name".to_string(),
+                    "nvidia.com/gpu".to_string(),
+                ))
+                .collect(),
+                ..Default::default()
+            }),
+            ..Default::default()
+        };
+
+        assert!(validate_sandbox_spec("resource-sandbox", &spec).is_ok());
+    }
+
+    #[test]
+    fn validate_sandbox_spec_rejects_gpu_count_with_gpu_device() {
+        let spec = SandboxSpec {
+            gpu_device: "nvidia.com/gpu=0".to_string(),
+            resources: Some(ResourceSpec {
+                gpu_count: 2,
+                ..Default::default()
+            }),
+            ..Default::default()
+        };
+
+        let err = validate_sandbox_spec("resource-sandbox", &spec).unwrap_err();
+        assert_eq!(err.code(), Code::InvalidArgument);
+        assert!(err.message().contains("gpu_count"));
+        assert!(err.message().contains("gpu_device"));
+    }
+
+    #[test]
+    fn validate_sandbox_spec_rejects_conflicting_template_resources() {
+        use prost_types::{Struct, Value, value::Kind};
+
+        let spec = SandboxSpec {
+            resources: Some(ResourceSpec {
+                cpu_limit: "4".to_string(),
+                ..Default::default()
+            }),
+            template: Some(SandboxTemplate {
+                resources: Some(Struct {
+                    fields: std::iter::once((
+                        "limits".to_string(),
+                        Value {
+                            kind: Some(Kind::StructValue(Struct {
+                                fields: std::iter::once((
+                                    "cpu".to_string(),
+                                    Value {
+                                        kind: Some(Kind::StringValue("2".to_string())),
+                                    },
+                                ))
+                                .collect(),
+                            })),
+                        },
+                    ))
+                    .collect(),
+                }),
+                ..Default::default()
+            }),
+            ..Default::default()
+        };
+
+        let err = validate_sandbox_spec("resource-sandbox", &spec).unwrap_err();
+        assert_eq!(err.code(), Code::InvalidArgument);
+        assert!(err.message().contains("resources.cpu_limit"));
+        assert!(err.message().contains("template.resources.limits.cpu"));
+    }
+
     #[test]
     fn validate_sandbox_spec_accepts_empty_defaults() {
         assert!(validate_sandbox_spec("", &default_spec()).is_ok());
diff --git a/docs/sandboxes/manage-sandboxes.mdx b/docs/sandboxes/manage-sandboxes.mdx
index 0ded3ddf3..5ec45b3bd 100644
--- a/docs/sandboxes/manage-sandboxes.mdx
+++ b/docs/sandboxes/manage-sandboxes.mdx
@@ -42,18 +42,27 @@ openshell sandbox create --gpu-count 4 -- claude
 ```
 
 GPU counts schedule Kubernetes sandbox pods with an `nvidia.com/gpu` resource
-limit.
+limit. `--gpu-count` cannot be combined with `--gpu-device` because count-based
+scheduling and driver-specific device selection are different allocation modes.
 
-To pass generic resource requests and limits through to the sandbox template,
-use `--resources-json` with a JSON object:
+To set portable CPU and memory resource requests or limits, use the resource
+flags:
 
 ```shell
 openshell sandbox create \
-  --resources-json '{"requests":{"cpu":"2"},"limits":{"cpu":"16"}}'
+  --cpu-request 2 \
+  --cpu-limit 4 \
+  --memory-request 8Gi \
+  --memory-limit 16Gi \
+  -- claude
 ```
 
-The JSON is forwarded as `SandboxTemplate.resources`. Use Kubernetes resource
-quantity strings for CPU, memory, and extended resources.
+Resource values use Kubernetes-style quantity strings. Kubernetes-backed
+gateways map these fields into pod `resources.requests` and
+`resources.limits`.
+
+For driver-specific resource settings, pass opaque key-value entries with
+`--resource-config KEY=VALUE`. The selected driver interprets those entries.
 
 For Docker-backed sandboxes, GPU injection uses Docker CDI. If you enable Docker
 CDI after the gateway starts, restart the gateway so OpenShell can detect the
diff --git a/proto/compute_driver.proto b/proto/compute_driver.proto
index 3c4308f3f..6ed42cf4a 100644
--- a/proto/compute_driver.proto
+++ b/proto/compute_driver.proto
@@ -131,6 +131,10 @@ message DriverResourceRequirements {
   string memory_request = 3;
   // Maximum memory allowed (e.g. "512Mi", "8Gi").
   string memory_limit = 4;
+  // Number of GPUs requested. A value of 0 means no explicit GPU count.
+  uint32 gpu_count = 5;
+  // Opaque key-value configuration interpreted by the selected driver.
+  map<string, string> driver_config = 100;
 }
 
 // Raw status observed directly from the compute platform.
diff --git a/proto/openshell.proto b/proto/openshell.proto
index bb2ce6cec..5af21851a 100644
--- a/proto/openshell.proto
+++ b/proto/openshell.proto
@@ -256,6 +256,28 @@ message SandboxSpec {
   // (e.g. "0", "1"). When empty with gpu=true, the driver assigns the
   // first available GPU.
   string gpu_device = 10;
+  // Portable resource requirements for the sandbox workload.
+  ResourceSpec resources = 11;
+}
+
+// Portable resource requirements for a sandbox workload.
+//
+// Values use Kubernetes-style quantity strings (e.g. "500m", "2", "4Gi")
+// because they are a well-known, widely-adopted notation. Drivers for
+// non-Kubernetes platforms must parse these strings into their native units.
+message ResourceSpec {
+  // Minimum CPU cores requested (e.g. "500m", "2").
+  string cpu_request = 1;
+  // Maximum CPU cores allowed (e.g. "500m", "4").
+  string cpu_limit = 2;
+  // Minimum memory requested (e.g. "256Mi", "4Gi").
+  string memory_request = 3;
+  // Maximum memory allowed (e.g. "512Mi", "8Gi").
+  string memory_limit = 4;
+  // Number of GPUs requested. A value of 0 means no explicit GPU count.
+  uint32 gpu_count = 5;
+  // Opaque key-value configuration interpreted by the selected driver.
+  map<string, string> driver_config = 100;
 }
 
 // Public sandbox template mapped onto compute-driver template inputs.