NVIDIA · ryana · May 12, 2026 · May 13, 2026 · May 13, 2026 · elezar
@@ -22,7 +22,7 @@ Each runtime receives a sandbox spec from the gateway and is responsible for:
 |---|---|---|---|
 | Docker | Local development with Docker available. | Container plus nested sandbox namespace. | Uses host networking so loopback gateway endpoints work from the supervisor. |
 | Podman | Rootless or single-machine deployments. | Container plus nested sandbox namespace. | Uses the Podman REST API, OCI image volumes, and CDI GPU devices when available. |
-| Kubernetes | Cluster deployment through Helm. | Pod plus nested sandbox namespace. | Uses Kubernetes API objects, service accounts, secrets, PVC-backed workspace storage, and GPU resources. |
+| Kubernetes | Cluster deployment through Helm. | Pod plus nested sandbox namespace. | Uses Kubernetes API objects, service accounts, secrets, PVC-backed workspace storage, and `nvidia.com/gpu` limits for GPU requests. |
 | VM | Experimental microVM isolation. | Per-sandbox libkrun VM. | Gateway spawns `openshell-driver-vm` as a subprocess over a private, state-local Unix socket. |
 
 VM runtime state paths are derived only from driver-validated sandbox IDs
@@ -62,7 +62,9 @@ users.
 Custom sandbox images must include the agent runtime and any system
 dependencies, but they should not need to include the gateway. GPU-capable
 images must include the user-space libraries required by the workload. The
-runtime still owns GPU device injection.
+runtime still owns GPU device injection or resource scheduling. Kubernetes maps
+portable `SandboxSpec.resources` CPU, memory, and GPU count requirements into
+pod resource requests and limits when the cluster exposes those resources.
 
 ## Deployment Shape
 

@@ -1030,6 +1030,7 @@ enum DoctorCommands {
 }
 
 #[derive(Subcommand, Debug)]
+#[allow(clippy::large_enum_variant)]
 enum SandboxCommands {
     /// Create a sandbox.
     #[command(help_template = LEAF_HELP_TEMPLATE, next_help_heading = "FLAGS")]
@@ -1084,10 +1085,37 @@ enum SandboxCommands {
         #[arg(long)]
         gpu: bool,
 
+        /// Request a specific number of GPUs.
+        ///
+        /// This implies --gpu. Kubernetes-backed gateways schedule pods with
+        /// the corresponding nvidia.com/gpu resource limit.
+        #[arg(long, value_name = "COUNT", value_parser = clap::value_parser!(u32).range(1..), conflicts_with = "gpu_device")]
+        gpu_count: Option<u32>,
+
+        /// Minimum CPU cores requested, e.g. "500m" or "2".
+        #[arg(long, value_name = "QUANTITY")]
+        cpu_request: Option<String>,
+
+        /// Maximum CPU cores allowed, e.g. "2" or "4".
+        #[arg(long, value_name = "QUANTITY")]
+        cpu_limit: Option<String>,
+
+        /// Minimum memory requested, e.g. "512Mi" or "4Gi".
+        #[arg(long, value_name = "QUANTITY")]
+        memory_request: Option<String>,
+
+        /// Maximum memory allowed, e.g. "1Gi" or "8Gi".
+        #[arg(long, value_name = "QUANTITY")]
+        memory_limit: Option<String>,
+
+        /// Driver-specific resource configuration as KEY=VALUE.
+        #[arg(long = "resource-config", value_name = "KEY=VALUE")]
+        resource_config: Vec<String>,
+
         /// Target a driver-specific GPU device. Docker and Podman use CDI device IDs
         /// (for example "nvidia.com/gpu=0"); VM uses a PCI BDF or index.
         /// Only valid with --gpu. When omitted with --gpu, the driver uses its default GPU selection.
-        #[arg(long, requires = "gpu")]
+        #[arg(long, requires = "gpu", conflicts_with = "gpu_count")]
         gpu_device: Option<String>,
 
         /// Provider names to attach to this sandbox.
@@ -2364,6 +2392,12 @@ async fn main() -> Result<()> {
                     no_keep,
                     editor,
                     gpu,
+                    gpu_count,
+                    cpu_request,
+                    cpu_limit,
+                    memory_request,
+                    memory_limit,
+                    resource_config,
                     gpu_device,
                     providers,
                     policy,
@@ -2430,6 +2464,14 @@ async fn main() -> Result<()> {
                         upload_spec.as_ref(),
                         keep,
                         gpu,
+                        run::SandboxResourceArgs {
+                            cpu_request: cpu_request.as_deref(),
+                            cpu_limit: cpu_limit.as_deref(),
+                            memory_request: memory_request.as_deref(),
+                            memory_limit: memory_limit.as_deref(),
+                            gpu_count,
+                            driver_config: &resource_config,
+                        },
                         gpu_device.as_deref(),
                         editor,
                         &providers,
@@ -3821,4 +3863,79 @@ mod tests {
             other => panic!("expected service delete command, got: {other:?}"),
         }
     }
+
+    #[test]
+    fn sandbox_create_gpu_count_rejects_zero() {
+        let result = Cli::try_parse_from(["openshell", "sandbox", "create", "--gpu-count", "0"]);
+        assert!(
+            result.is_err(),
+            "sandbox create --gpu-count 0 should be rejected"
+        );
+        let err = result.unwrap_err().to_string();
+        assert!(err.contains("--gpu-count"));
+    }
+
+    #[test]
+    fn sandbox_create_resource_spec_flags_parse() {
+        let cli = Cli::try_parse_from([
+            "openshell",
+            "sandbox",
+            "create",
+            "--cpu-request",
+            "2",
+            "--cpu-limit",
+            "4",
+            "--memory-request",
+            "8Gi",
+            "--memory-limit",
+            "16Gi",
+            "--resource-config",
+            "kubernetes.resource-name=nvidia.com/gpu",
+        ])
+        .expect("sandbox create resource flags should parse");
+
+        match cli.command {
+            Some(Commands::Sandbox {
+                command:
+                    Some(SandboxCommands::Create {
+                        cpu_request,
+                        cpu_limit,
+                        memory_request,
+                        memory_limit,
+                        resource_config,
+                        ..
+                    }),
+                ..
+            }) => {
+                assert_eq!(cpu_request.as_deref(), Some("2"));
+                assert_eq!(cpu_limit.as_deref(), Some("4"));
+                assert_eq!(memory_request.as_deref(), Some("8Gi"));
+                assert_eq!(memory_limit.as_deref(), Some("16Gi"));
+                assert_eq!(
+                    resource_config,
+                    vec!["kubernetes.resource-name=nvidia.com/gpu".to_string()]
+                );
+            }
+            other => panic!("expected sandbox create command, got: {other:?}"),
+        }
+    }
+
+    #[test]
+    fn sandbox_create_gpu_count_conflicts_with_gpu_device() {
+        let result = Cli::try_parse_from([
+            "openshell",
+            "sandbox",
+            "create",
+            "--gpu",
+            "--gpu-count",
+            "2",
+            "--gpu-device",
+            "nvidia.com/gpu=0",
+        ]);
+
+        assert!(
+            result.is_err(),
+            "sandbox create should reject combining --gpu-count and --gpu-device"
+        );
+    }
 }
@@ -36,7 +36,7 @@ use openshell_core::proto::{
     LintProviderProfilesRequest, ListProviderProfilesRequest, ListProvidersRequest,
     ListSandboxPoliciesRequest, ListSandboxProvidersRequest, ListSandboxesRequest,
     ListServicesRequest, PolicySource, PolicyStatus, Provider, ProviderProfile,
-    ProviderProfileDiagnostic, ProviderProfileImportItem, RejectDraftChunkRequest,
+    ProviderProfileDiagnostic, ProviderProfileImportItem, RejectDraftChunkRequest, ResourceSpec,
     RevokeSshSessionRequest, Sandbox, SandboxPhase, SandboxPolicy, SandboxSpec, SandboxTemplate,
     ServiceEndpointResponse, SetClusterInferenceRequest, SettingScope, SettingValue,
     TcpForwardFrame, TcpForwardInit, TcpRelayTarget, UpdateConfigRequest, UpdateProviderRequest,
@@ -1457,6 +1457,54 @@ async fn finalize_sandbox_create_session(
     session_result
 }
 
+#[derive(Clone, Copy, Debug, Default)]
+pub struct SandboxResourceArgs<'a> {
+    pub cpu_request: Option<&'a str>,
+    pub cpu_limit: Option<&'a str>,
+    pub memory_request: Option<&'a str>,
+    pub memory_limit: Option<&'a str>,
+    pub gpu_count: Option<u32>,
+    pub driver_config: &'a [String],
+}
+
+fn build_resource_spec(args: SandboxResourceArgs<'_>) -> Result<Option<ResourceSpec>> {
+    let mut resources = ResourceSpec {
+        driver_config: parse_key_value_pairs(args.driver_config, "--resource-config")?,
+        ..ResourceSpec::default()
+    };
+
+    if let Some(value) = args.cpu_request {
+        resources.cpu_request = value.to_string();
+    }
+    if let Some(value) = args.cpu_limit {
+        resources.cpu_limit = value.to_string();
+    }
+    if let Some(value) = args.memory_request {
+        resources.memory_request = value.to_string();
+    }
+    if let Some(value) = args.memory_limit {
+        resources.memory_limit = value.to_string();
+    }
+    if let Some(count) = args.gpu_count {
+        if count == 0 {
+            return Err(miette::miette!("--gpu-count must be greater than zero"));
+        }
+        resources.gpu_count = count;
+    }
+
+    if resources.cpu_request.is_empty()
+        && resources.cpu_limit.is_empty()
+        && resources.memory_request.is_empty()
+        && resources.memory_limit.is_empty()
+        && resources.gpu_count == 0
+        && resources.driver_config.is_empty()
+    {
+        Ok(None)
+    } else {
+        Ok(Some(resources))
+    }
+}
+
 /// Create a sandbox with default settings.
 #[allow(clippy::too_many_arguments, clippy::implicit_hasher)] // user-facing CLI command; default hasher is fine
 pub async fn sandbox_create(
@@ -1467,6 +1515,7 @@ pub async fn sandbox_create(
     upload: Option<&(String, Option<String>, bool)>,
     keep: bool,
     gpu: bool,
+    resources: SandboxResourceArgs<'_>,
     gpu_device: Option<&str>,
     editor: Option<Editor>,
     providers: &[String],
@@ -1483,6 +1532,12 @@ pub async fn sandbox_create(
             "--editor cannot be used with a trailing command; use `openshell sandbox connect <name> --editor ...` after the sandbox is ready"
         ));
     }
+    let resources = build_resource_spec(resources)?;
+    if resources.as_ref().is_some_and(|spec| spec.gpu_count > 0) && gpu_device.is_some() {
+        return Err(miette::miette!(
+            "--gpu-count cannot be combined with --gpu-device"
+        ));
+    }
 
     // Check port availability *before* creating the sandbox so we don't
     // leave an orphaned sandbox behind when the forward would fail.
@@ -1518,7 +1573,9 @@ pub async fn sandbox_create(
         }
         None => None,
     };
-    let requested_gpu = gpu || image.as_deref().is_some_and(image_requests_gpu);
+    let requested_gpu = gpu
+        || resources.as_ref().is_some_and(|spec| spec.gpu_count > 0)
+        || image.as_deref().is_some_and(image_requests_gpu);
 
     let inferred_types: Vec<String> = inferred_provider_type(command).into_iter().collect();
     let configured_providers = ensure_required_providers(
@@ -1531,17 +1588,22 @@ pub async fn sandbox_create(
 
     let policy = load_sandbox_policy(policy)?;
 
-    let template = image.map(|img| SandboxTemplate {
-        image: img,
-        ..SandboxTemplate::default()
-    });
+    let template = if image.is_some() {
+        Some(SandboxTemplate {
+            image: image.unwrap_or_default(),
+            ..SandboxTemplate::default()
+        })
+    } else {
+        None
+    };
 
     let request = CreateSandboxRequest {
         spec: Some(SandboxSpec {
             gpu: requested_gpu,
             gpu_device: gpu_device.unwrap_or_default().to_string(),
             policy,
             providers: configured_providers,
+            resources,
             template,
             ..SandboxSpec::default()
         }),
@@ -6011,11 +6073,11 @@ fn format_timestamp_ms(ms: i64) -> String {
 #[cfg(test)]
 mod tests {
     use super::{
-        TlsOptions, dockerfile_sources_supported_for_gateway, format_endpoint,
-        format_gateway_select_header, format_gateway_select_items,
-        format_provider_attachment_table, gateway_add, gateway_auth_label,
-        gateway_env_override_warning, gateway_select_with, gateway_type_label, git_sync_files,
-        http_health_check, image_requests_gpu, import_local_package_mtls_bundle,
+        SandboxResourceArgs, TlsOptions, build_resource_spec,
+        dockerfile_sources_supported_for_gateway, format_endpoint, format_gateway_select_header,
+        format_gateway_select_items, format_provider_attachment_table, gateway_add,
+        gateway_auth_label, gateway_env_override_warning, gateway_select_with, gateway_type_label,
+        git_sync_files, http_health_check, image_requests_gpu, import_local_package_mtls_bundle,
         inferred_provider_type, package_managed_tls_dirs, parse_cli_setting_value,
         parse_credential_pairs, plaintext_gateway_is_remote, provisioning_timeout_message,
         ready_false_condition_message, resolve_from, sandbox_should_persist,
@@ -6037,6 +6099,56 @@ mod tests {
         Provider, SandboxCondition, SandboxStatus, datamodel::v1::ObjectMeta,
     };
 
+    #[test]
+    fn build_resource_spec_sets_typed_fields_and_driver_config() {
+        let driver_config = vec!["kubernetes.resource-name=nvidia.com/mig-1g.5gb".to_string()];
+        let resources = build_resource_spec(SandboxResourceArgs {
+            cpu_request: Some("2"),
+            cpu_limit: Some("4"),
+            memory_request: Some("8Gi"),
+            memory_limit: Some("16Gi"),
+            gpu_count: Some(4),
+            driver_config: &driver_config,
+        })
+        .expect("resources should parse")
+        .expect("resource spec should be present");
+
+        assert_eq!(resources.cpu_request, "2");
+        assert_eq!(resources.cpu_limit, "4");
+        assert_eq!(resources.memory_request, "8Gi");
+        assert_eq!(resources.memory_limit, "16Gi");
+        assert_eq!(resources.gpu_count, 4);
+        assert_eq!(
+            resources.driver_config.get("kubernetes.resource-name"),
+            Some(&"nvidia.com/mig-1g.5gb".to_string())
+        );
+    }
+
+    #[test]
+    fn build_resource_spec_rejects_zero_gpu_count() {
+        let err = build_resource_spec(SandboxResourceArgs {
+            gpu_count: Some(0),
+            ..SandboxResourceArgs::default()
+        })
+        .expect_err("zero GPU count should fail");
+
+        assert!(err.to_string().contains("--gpu-count"));
+        assert!(err.to_string().contains("greater than zero"));
+    }
+
+    #[test]
+    fn build_resource_spec_rejects_invalid_driver_config() {
+        let driver_config = vec!["missing-separator".to_string()];
+        let err = build_resource_spec(SandboxResourceArgs {
+            driver_config: &driver_config,
+            ..SandboxResourceArgs::default()
+        })
+        .expect_err("invalid driver config should fail");
+
+        assert!(err.to_string().contains("--resource-config"));
+        assert!(err.to_string().contains("KEY=VALUE"));
+    }
+
     struct EnvVarGuard {
         key: &'static str,
         original: Option<String>,