From 3ab05fbbd7f449c05f8500907482915bca0bd976 Mon Sep 17 00:00:00 2001
From: Drew Newberry <anewberry@nvidia.com>
Date: Fri, 15 May 2026 10:47:49 -0700
Subject: [PATCH 1/3] fix(vm): restore sandboxes after gateway restart

Signed-off-by: Drew Newberry <anewberry@nvidia.com>
---
 Cargo.lock                                 |   1 +
 architecture/compute-runtimes.md           |   2 +-
 crates/openshell-driver-vm/Cargo.toml      |   1 +
 crates/openshell-driver-vm/README.md       |   7 +
 crates/openshell-driver-vm/src/driver.rs   | 436 ++++++++++++++++++++-
 docs/reference/sandbox-compute-drivers.mdx |   2 +
 e2e/rust/Cargo.toml                        |   2 +-
 e2e/rust/e2e-vm.sh                         |  63 ++-
 e2e/rust/tests/gateway_resume.rs           | 119 ++++--
 9 files changed, 569 insertions(+), 64 deletions(-)
diff --git a/Cargo.lock b/Cargo.lock
index cba681774..87adc5e2b 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -3548,6 +3548,7 @@ dependencies = [
  "openshell-core",
  "openshell-vfio",
  "polling",
+ "prost",
  "prost-types",
  "serde",
  "serde_json",
diff --git a/architecture/compute-runtimes.md b/architecture/compute-runtimes.md
index 9f1e1c37a..d79b7366d 100644
--- a/architecture/compute-runtimes.md
+++ b/architecture/compute-runtimes.md
@@ -29,7 +29,7 @@ reason strings.
 | Docker | Local development with Docker available. | Container plus nested sandbox namespace. | Uses host networking so loopback gateway endpoints work from the supervisor. |
 | Podman | Rootless or single-machine deployments. | Container plus nested sandbox namespace. | Uses the Podman REST API, OCI image volumes, and CDI GPU devices when available. |
 | Kubernetes | Cluster deployment through Helm. | Pod plus nested sandbox namespace. | Uses Kubernetes API objects, service accounts, secrets, PVC-backed workspace storage, and GPU resources. |
-| VM | Experimental microVM isolation. | Per-sandbox libkrun VM. | Gateway spawns `openshell-driver-vm` as a subprocess over a private, state-local Unix socket. The VM driver boots a cached bootstrap `rootfs.ext4`, prepares requested OCI images inside a bootstrap VM with `umoci`, attaches the prepared image disk read-only, and gives each sandbox a writable `overlay.ext4` for merged-root changes and runtime material. |
+| VM | Experimental microVM isolation. | Per-sandbox libkrun VM. | Gateway spawns `openshell-driver-vm` as a subprocess over a private, state-local Unix socket. The VM driver boots a cached bootstrap `rootfs.ext4`, prepares requested OCI images inside a bootstrap VM with `umoci`, attaches the prepared image disk read-only, and gives each sandbox a writable `overlay.ext4` for merged-root changes and runtime material. The driver persists each accepted launch request beside the overlay and restarts those VMs on driver startup without recreating the overlay. |
 
 Per-sandbox CPU and memory values currently enter the driver layer through
 template resource limits. Docker and Podman apply them as runtime limits.
diff --git a/crates/openshell-driver-vm/Cargo.toml b/crates/openshell-driver-vm/Cargo.toml
index c13d904a6..fb1964415 100644
--- a/crates/openshell-driver-vm/Cargo.toml
+++ b/crates/openshell-driver-vm/Cargo.toml
@@ -25,6 +25,7 @@ openshell-vfio = { path = "../openshell-vfio" }
 bollard = { version = "0.20", features = ["ssh"] }
 tokio = { workspace = true }
 tonic = { workspace = true, features = ["transport"] }
+prost = { workspace = true }
 prost-types = { workspace = true }
 futures = { workspace = true }
 tokio-stream = { workspace = true, features = ["net"] }
diff --git a/crates/openshell-driver-vm/README.md b/crates/openshell-driver-vm/README.md
index e9900f3bb..f581d8766 100644
--- a/crates/openshell-driver-vm/README.md
+++ b/crates/openshell-driver-vm/README.md
@@ -189,6 +189,13 @@ the overlay while cached image disks remain unchanged. The overlay disk must be
 large enough to hold the compressed payload, unpacked rootfs, and sandbox writes
 during the first prepare.
 
+The driver also writes the accepted `DriverSandbox` launch request to
+`<state-dir>/sandboxes/<id>/sandbox.pb`. If the gateway restarts, it starts a
+new VM driver process; that process scans the sandbox state directories,
+restarts each persisted VM launcher, and preserves any existing `overlay.ext4`
+instead of cloning a fresh overlay template. If a restart happened before the
+overlay was created, the driver creates it during the resume attempt.
+
 ## Logs and debugging
 
 Raise log verbosity for both processes:
diff --git a/crates/openshell-driver-vm/src/driver.rs b/crates/openshell-driver-vm/src/driver.rs
index e3b98fbd5..ad5625e61 100644
--- a/crates/openshell-driver-vm/src/driver.rs
+++ b/crates/openshell-driver-vm/src/driver.rs
@@ -39,11 +39,14 @@ use openshell_core::proto::compute::v1::{
     compute_driver_server::ComputeDriver, watch_sandboxes_event,
 };
 use openshell_vfio::SysfsRoot;
+use prost::Message;
 use sha2::{Digest, Sha256};
 use std::collections::{HashMap, HashSet};
 use std::fs;
 use std::io::Read;
 use std::net::Ipv4Addr;
+#[cfg(unix)]
+use std::os::unix::fs::PermissionsExt;
 use std::path::{Component, Path, PathBuf};
 use std::pin::Pin;
 use std::process::Stdio;
@@ -102,6 +105,7 @@ const IMAGE_CACHE_ROOTFS_IMAGE: &str = "rootfs.ext4";
 const OVERLAY_TEMPLATE_CACHE_DIR: &str = "overlay-templates";
 const OVERLAY_TEMPLATE_CACHE_LAYOUT_VERSION: &str = "sandbox-overlay-ext4-v1";
 const SANDBOX_OVERLAY_IMAGE: &str = "overlay.ext4";
+const SANDBOX_REQUEST_FILE: &str = "sandbox.pb";
 const GUEST_IMAGE_CONFIG_DIR: &str = "openshell-image";
 const GUEST_IMAGE_OCI_LAYOUT_DIR: &str = "oci";
 const GUEST_IMAGE_OCI_REF: &str = "openshell";
@@ -278,6 +282,12 @@ struct SandboxRecord {
     deleting: bool,
 }
 
+#[derive(Debug, Clone, Copy, PartialEq, Eq)]
+enum OverlayPreparation {
+    Fresh,
+    PreserveExisting,
+}
+
 #[derive(Clone)]
 pub struct VmDriver {
     config: VmDriverConfig,
@@ -306,14 +316,12 @@ impl VmDriver {
         }
 
         let state_root = sandboxes_root_dir(&config.state_dir);
-        tokio::fs::create_dir_all(&state_root)
-            .await
-            .map_err(|err| {
-                format!(
-                    "failed to create state dir '{}': {err}",
-                    state_root.display()
-                )
-            })?;
+        create_private_dir_all(&state_root).await.map_err(|err| {
+            format!(
+                "failed to create state dir '{}': {err}",
+                state_root.display()
+            )
+        })?;
         let image_cache_root = image_cache_root_dir(&config.state_dir);
         tokio::fs::create_dir_all(&image_cache_root)
             .await
@@ -349,7 +357,7 @@ impl VmDriver {
         )));
 
         let (events, _) = broadcast::channel(WATCH_BUFFER);
-        Ok(Self {
+        let driver = Self {
             config,
             launcher_bin,
             registry: Arc::new(Mutex::new(HashMap::new())),
@@ -357,7 +365,9 @@ impl VmDriver {
             events,
             gpu_inventory,
             subnet_allocator,
-        })
+        };
+        driver.restore_persisted_sandboxes().await;
+        Ok(driver)
     }
 
     #[must_use]
@@ -441,12 +451,21 @@ impl VmDriver {
             }
         };
 
-        if let Err(err) = tokio::fs::create_dir_all(&state_dir).await {
+        if let Err(err) = create_private_dir_all(&state_dir).await {
             let mut registry = self.registry.lock().await;
             registry.remove(&sandbox.id);
             return Err(Status::internal(format!("create state dir failed: {err}")));
         }
 
+        if let Err(err) = write_sandbox_request(&state_dir, sandbox).await {
+            let mut registry = self.registry.lock().await;
+            registry.remove(&sandbox.id);
+            let _ = tokio::fs::remove_dir_all(&state_dir).await;
+            return Err(Status::internal(format!(
+                "write sandbox resume metadata failed: {err}"
+            )));
+        }
+
         self.publish_platform_event(
             sandbox.id.clone(),
             platform_event(
@@ -470,6 +489,7 @@ impl VmDriver {
                     image_ref_for_task,
                     state_dir_for_task,
                     tls_paths,
+                    OverlayPreparation::Fresh,
                 )
                 .await;
         });
@@ -494,14 +514,23 @@ impl VmDriver {
         image_ref: String,
         state_dir: PathBuf,
         tls_paths: Option<VmDriverTlsPaths>,
+        overlay_preparation: OverlayPreparation,
     ) {
         let sandbox_id = sandbox.id.clone();
         if let Err(err) = self
-            .provision_sandbox_inner(sandbox, image_ref, state_dir.clone(), tls_paths)
+            .provision_sandbox_inner(
+                sandbox,
+                image_ref,
+                state_dir.clone(),
+                tls_paths,
+                overlay_preparation,
+            )
             .await
         {
             if err.code() == tonic::Code::Cancelled {
-                let _ = tokio::fs::remove_dir_all(&state_dir).await;
+                if overlay_preparation == OverlayPreparation::Fresh {
+                    let _ = tokio::fs::remove_dir_all(&state_dir).await;
+                }
                 return;
             }
 
@@ -510,8 +539,14 @@ impl VmDriver {
                 error = %err.message(),
                 "vm driver: sandbox provisioning failed"
             );
-            self.fail_provisioning(&sandbox_id, &state_dir, "ProvisioningFailed", err.message())
-                .await;
+            self.fail_provisioning(
+                &sandbox_id,
+                &state_dir,
+                "ProvisioningFailed",
+                err.message(),
+                overlay_preparation == OverlayPreparation::Fresh,
+            )
+            .await;
         }
     }
 
@@ -522,6 +557,7 @@ impl VmDriver {
         image_ref: String,
         state_dir: PathBuf,
         tls_paths: Option<VmDriverTlsPaths>,
+        overlay_preparation: OverlayPreparation,
     ) -> Result<(), Status> {
         self.ensure_provisioning_active(&sandbox.id).await?;
         self.publish_platform_event(
@@ -559,7 +595,7 @@ impl VmDriver {
             ),
         );
         if let Err(err) = self
-            .prepare_runtime_overlay(&overlay_disk, tls_paths.as_ref())
+            .prepare_runtime_overlay(&overlay_disk, tls_paths.as_ref(), overlay_preparation)
             .await
         {
             return Err(Status::internal(format!(
@@ -856,6 +892,158 @@ impl VmDriver {
         snapshots
     }
 
+    async fn restore_persisted_sandboxes(&self) {
+        let state_root = sandboxes_root_dir(&self.config.state_dir);
+        let mut entries = match tokio::fs::read_dir(&state_root).await {
+            Ok(entries) => entries,
+            Err(err) if err.kind() == std::io::ErrorKind::NotFound => return,
+            Err(err) => {
+                warn!(
+                    state_root = %state_root.display(),
+                    error = %err,
+                    "vm driver: failed to scan persisted sandboxes"
+                );
+                return;
+            }
+        };
+
+        loop {
+            let entry = match entries.next_entry().await {
+                Ok(Some(entry)) => entry,
+                Ok(None) => break,
+                Err(err) => {
+                    warn!(
+                        state_root = %state_root.display(),
+                        error = %err,
+                        "vm driver: failed to continue scanning persisted sandboxes"
+                    );
+                    break;
+                }
+            };
+            let state_dir = entry.path();
+            let is_dir = match entry.file_type().await {
+                Ok(file_type) => file_type.is_dir(),
+                Err(err) => {
+                    warn!(
+                        state_dir = %state_dir.display(),
+                        error = %err,
+                        "vm driver: failed to inspect persisted sandbox state dir"
+                    );
+                    continue;
+                }
+            };
+            if !is_dir {
+                continue;
+            }
+
+            let request_path = state_dir.join(SANDBOX_REQUEST_FILE);
+            let sandbox = match read_sandbox_request(&request_path).await {
+                Ok(sandbox) => sandbox,
+                Err(err) if err.kind() == std::io::ErrorKind::NotFound => continue,
+                Err(err) => {
+                    warn!(
+                        state_dir = %state_dir.display(),
+                        error = %err,
+                        "vm driver: failed to read persisted sandbox request"
+                    );
+                    continue;
+                }
+            };
+
+            if let Err(status) =
+                validate_restored_sandbox_state(&self.config.state_dir, &state_dir, &sandbox)
+            {
+                warn!(
+                    sandbox_id = %sandbox.id,
+                    state_dir = %state_dir.display(),
+                    error = %status.message(),
+                    "vm driver: ignoring invalid persisted sandbox state"
+                );
+                continue;
+            }
+
+            self.restore_persisted_sandbox(sandbox, state_dir).await;
+        }
+    }
+
+    async fn restore_persisted_sandbox(&self, sandbox: Sandbox, state_dir: PathBuf) {
+        let Some(image_ref) = self.resolved_sandbox_image(&sandbox) else {
+            warn!(
+                sandbox_id = %sandbox.id,
+                sandbox_name = %sandbox.name,
+                "vm driver: cannot restore persisted sandbox without image"
+            );
+            return;
+        };
+        let tls_paths = match self.config.tls_paths() {
+            Ok(paths) => paths,
+            Err(err) => {
+                warn!(
+                    sandbox_id = %sandbox.id,
+                    sandbox_name = %sandbox.name,
+                    error = %err,
+                    "vm driver: cannot restore persisted sandbox TLS configuration"
+                );
+                return;
+            }
+        };
+
+        let snapshot = sandbox_snapshot(&sandbox, provisioning_condition(), false);
+        {
+            let mut registry = self.registry.lock().await;
+            if registry.contains_key(&sandbox.id) {
+                return;
+            }
+            registry.insert(
+                sandbox.id.clone(),
+                SandboxRecord {
+                    snapshot: snapshot.clone(),
+                    state_dir: state_dir.clone(),
+                    process: None,
+                    provisioning_task: None,
+                    gpu_bdf: None,
+                    deleting: false,
+                },
+            );
+        }
+
+        self.publish_platform_event(
+            sandbox.id.clone(),
+            platform_event(
+                "vm",
+                "Normal",
+                "Restoring",
+                "Restoring persisted VM sandbox after driver restart".to_string(),
+            ),
+        );
+        self.publish_snapshot(snapshot);
+
+        let driver = self.clone();
+        let sandbox_id = sandbox.id.clone();
+        let task = tokio::spawn(async move {
+            driver
+                .provision_sandbox(
+                    sandbox,
+                    image_ref,
+                    state_dir,
+                    tls_paths,
+                    OverlayPreparation::PreserveExisting,
+                )
+                .await;
+        });
+
+        let mut registry = self.registry.lock().await;
+        if let Some(record) = registry.get_mut(&sandbox_id) {
+            if record.deleting {
+                task.abort();
+            } else {
+                record.provisioning_task = Some(task);
+            }
+        } else {
+            task.abort();
+        }
+    }
+
     fn release_gpu_and_subnet(&self, sandbox_id: &str) {
         if let Some(inventory) = self.gpu_inventory.as_ref()
             && let Ok(mut inv) = inventory.lock()
@@ -916,6 +1104,7 @@ impl VmDriver {
         state_dir: &Path,
         reason: &str,
         message: &str,
+        remove_state: bool,
     ) {
         self.release_gpu_and_subnet(sandbox_id);
         let snapshot = {
@@ -937,7 +1126,9 @@ impl VmDriver {
             Some(record.snapshot.clone())
         };
 
-        let _ = tokio::fs::remove_dir_all(state_dir).await;
+        if remove_state {
+            let _ = tokio::fs::remove_dir_all(state_dir).await;
+        }
         self.publish_platform_event(
             sandbox_id.to_string(),
             platform_event(
@@ -999,6 +1190,7 @@ impl VmDriver {
         &self,
         overlay_disk: &Path,
         tls_paths: Option<&VmDriverTlsPaths>,
+        preparation: OverlayPreparation,
     ) -> Result<(), String> {
         let tls_materials = match tls_paths {
             Some(paths) => Some(read_guest_tls_materials(paths).await?),
@@ -1028,10 +1220,12 @@ impl VmDriver {
         }
 
         tokio::task::spawn_blocking(move || {
-            create_sandbox_overlay_image_from_template(
+            prepare_sandbox_overlay_image(
                 &template_path,
                 &overlay_disk,
                 tls_materials.as_ref(),
+                preparation,
+                overlay_size_bytes,
             )
         })
         .await
@@ -3313,6 +3507,21 @@ fn sandboxes_root_dir(root: &Path) -> PathBuf {
     root.join("sandboxes")
 }
 
+async fn create_private_dir_all(path: &Path) -> Result<(), std::io::Error> {
+    tokio::fs::create_dir_all(path).await?;
+    restrict_owner_only_dir(path).await
+}
+
+#[cfg(unix)]
+async fn restrict_owner_only_dir(path: &Path) -> Result<(), std::io::Error> {
+    tokio::fs::set_permissions(path, fs::Permissions::from_mode(0o700)).await
+}
+
+#[cfg(not(unix))]
+async fn restrict_owner_only_dir(_path: &Path) -> Result<(), std::io::Error> {
+    Ok(())
+}
+
 #[allow(clippy::result_large_err)]
 fn sandbox_state_dir(root: &Path, sandbox_id: &str) -> Result<PathBuf, Status> {
     validate_sandbox_id(sandbox_id)?;
@@ -3550,6 +3759,63 @@ async fn write_sandbox_image_metadata(
     Ok(())
 }
 
+async fn write_sandbox_request(state_dir: &Path, sandbox: &Sandbox) -> Result<(), std::io::Error> {
+    restrict_owner_only_dir(state_dir).await?;
+    write_private_file(
+        &state_dir.join(SANDBOX_REQUEST_FILE),
+        sandbox.encode_to_vec(),
+    )
+    .await
+}
+
+async fn read_sandbox_request(path: &Path) -> Result<Sandbox, std::io::Error> {
+    let bytes = tokio::fs::read(path).await?;
+    Sandbox::decode(bytes.as_slice()).map_err(|err| {
+        std::io::Error::new(
+            std::io::ErrorKind::InvalidData,
+            format!("decode persisted sandbox request: {err}"),
+        )
+    })
+}
+
+async fn write_private_file(path: &Path, bytes: Vec<u8>) -> Result<(), std::io::Error> {
+    tokio::fs::write(path, bytes).await?;
+    restrict_owner_read_write(path).await
+}
+
+#[cfg(unix)]
+async fn restrict_owner_read_write(path: &Path) -> Result<(), std::io::Error> {
+    tokio::fs::set_permissions(path, fs::Permissions::from_mode(0o600)).await
+}
+
+#[cfg(not(unix))]
+async fn restrict_owner_read_write(_path: &Path) -> Result<(), std::io::Error> {
+    Ok(())
+}
+
+#[allow(clippy::result_large_err)]
+fn validate_restored_sandbox_state(
+    root: &Path,
+    state_dir: &Path,
+    sandbox: &Sandbox,
+) -> Result<(), Status> {
+    validate_sandbox_id(&sandbox.id)?;
+    validate_sandbox_state_dir(root, state_dir)?;
+    let Some(dir_name) = state_dir.file_name().and_then(|name| name.to_str()) else {
+        return Err(Status::internal(format!(
+            "sandbox state path has no valid directory name: {}",
+            state_dir.display()
+        )));
+    };
+    if dir_name != sandbox.id {
+        return Err(Status::internal(format!(
+            "sandbox state dir '{}' does not match persisted sandbox id '{}'",
+            dir_name, sandbox.id
+        )));
+    }
+    Ok(())
+}
+
 #[derive(Debug, Clone)]
 struct GuestTlsMaterials {
     ca: Vec<u8>,
@@ -3663,6 +3929,48 @@ fn create_sandbox_overlay_image_from_template(
     Ok(())
 }
 
+fn prepare_sandbox_overlay_image(
+    template_path: &Path,
+    overlay_disk: &Path,
+    tls_materials: Option<&GuestTlsMaterials>,
+    preparation: OverlayPreparation,
+    expected_size_bytes: u64,
+) -> Result<(), String> {
+    if preparation == OverlayPreparation::PreserveExisting {
+        match fs::metadata(overlay_disk) {
+            Ok(metadata) if metadata.is_file() && metadata.len() == expected_size_bytes => {
+                if let Some(tls) = tls_materials {
+                    inject_guest_tls_materials(overlay_disk, tls)?;
+                }
+                return Ok(());
+            }
+            Ok(metadata) if metadata.is_file() => {
+                return Err(format!(
+                    "existing overlay disk '{}' has size {}, expected {}",
+                    overlay_disk.display(),
+                    metadata.len(),
+                    expected_size_bytes
+                ));
+            }
+            Ok(_) => {
+                return Err(format!(
+                    "existing overlay path '{}' is not a file",
+                    overlay_disk.display()
+                ));
+            }
+            Err(err) if err.kind() == std::io::ErrorKind::NotFound => {}
+            Err(err) => {
+                return Err(format!(
+                    "stat overlay disk {}: {err}",
+                    overlay_disk.display()
+                ));
+            }
+        }
+    }
+
+    create_sandbox_overlay_image_from_template(template_path, overlay_disk, tls_materials)
+}
+
 fn inject_guest_tls_materials(
     overlay_disk: &Path,
     materials: &GuestTlsMaterials,
@@ -4328,6 +4636,98 @@ mod tests {
         );
     }
 
+    #[tokio::test]
+    async fn sandbox_request_metadata_round_trips_for_resume() {
+        let base = unique_temp_dir();
+        let state_dir = base.join("sandboxes").join("sandbox-123");
+        std::fs::create_dir_all(&state_dir).unwrap();
+        let sandbox = Sandbox {
+            id: "sandbox-123".to_string(),
+            name: "resume-sandbox".to_string(),
+            namespace: "vm-dev".to_string(),
+            spec: Some(SandboxSpec {
+                environment: HashMap::from([("KEY".to_string(), "value".to_string())]),
+                template: Some(SandboxTemplate {
+                    image: "ghcr.io/example/sandbox:latest".to_string(),
+                    ..Default::default()
+                }),
+                ..Default::default()
+            }),
+            ..Default::default()
+        };
+
+        write_sandbox_request(&state_dir, &sandbox)
+            .await
+            .expect("write sandbox request");
+        let restored = read_sandbox_request(&state_dir.join(SANDBOX_REQUEST_FILE))
+            .await
+            .expect("read sandbox request");
+
+        assert_eq!(restored, sandbox);
+        #[cfg(unix)]
+        {
+            use std::os::unix::fs::PermissionsExt as _;
+
+            let dir_mode = std::fs::metadata(&state_dir).unwrap().permissions().mode() & 0o777;
+            let file_mode = std::fs::metadata(state_dir.join(SANDBOX_REQUEST_FILE))
+                .unwrap()
+                .permissions()
+                .mode()
+                & 0o777;
+            assert_eq!(dir_mode, 0o700);
+            assert_eq!(file_mode, 0o600);
+        }
+        validate_restored_sandbox_state(&base, &state_dir, &restored)
+            .expect("restored state should validate");
+
+        let _ = std::fs::remove_dir_all(base);
+    }
+
+    #[test]
+    fn prepare_sandbox_overlay_preserves_existing_overlay_on_resume() {
+        let base = unique_temp_dir();
+        std::fs::create_dir_all(&base).unwrap();
+        let template = base.join("template.ext4");
+        let overlay = base.join("overlay.ext4");
+        std::fs::write(&template, b"fresh-overlay").unwrap();
+        std::fs::write(&overlay, b"saved-overlay").unwrap();
+
+        prepare_sandbox_overlay_image(
+            &template,
+            &overlay,
+            None,
+            OverlayPreparation::PreserveExisting,
+            "saved-overlay".len() as u64,
+        )
+        .expect("preserve existing overlay");
+
+        assert_eq!(std::fs::read(&overlay).unwrap(), b"saved-overlay");
+
+        let _ = std::fs::remove_dir_all(base);
+    }
+
+    #[test]
+    fn prepare_sandbox_overlay_creates_missing_overlay_on_resume() {
+        let base = unique_temp_dir();
+        std::fs::create_dir_all(&base).unwrap();
+        let template = base.join("template.ext4");
+        let overlay = base.join("overlay.ext4");
+        std::fs::write(&template, b"fresh-overlay").unwrap();
+
+        prepare_sandbox_overlay_image(
+            &template,
+            &overlay,
+            None,
+            OverlayPreparation::PreserveExisting,
+            "fresh-overlay".len() as u64,
+        )
+        .expect("create missing overlay");
+
+        assert_eq!(std::fs::read(&overlay).unwrap(), b"fresh-overlay");
+
+        let _ = std::fs::remove_dir_all(base);
+    }
+
     #[test]
     fn overlay_upper_path_targets_overlay_upperdir() {
         assert_eq!(
diff --git a/docs/reference/sandbox-compute-drivers.mdx b/docs/reference/sandbox-compute-drivers.mdx
index 43b7fb81e..9055799c3 100644
--- a/docs/reference/sandbox-compute-drivers.mdx
+++ b/docs/reference/sandbox-compute-drivers.mdx
@@ -69,6 +69,8 @@ The VM driver boots a cached immutable bootstrap ext4 root disk. When the reques
 
 VM sandbox creation follows the same progress model as Kubernetes-backed sandboxes. The gateway accepts the sandbox, then the VM driver publishes watch events while it resolves the image, prepares or reuses the bootstrap and prepared image caches, creates the writable overlay, and starts the VM launcher.
 
+On gateway restart, the gateway starts a fresh VM driver process. The driver scans its state directory for accepted sandbox launch records, restarts those VMs, and reuses each sandbox's existing `overlay.ext4` so files written inside the sandbox remain available after the supervisor reconnects.
+
 For maintainer-level implementation details, refer to the [VM driver README](https://github.com/NVIDIA/OpenShell/blob/main/crates/openshell-driver-vm/README.md).
 
 ### Enable the VM Driver
diff --git a/e2e/rust/Cargo.toml b/e2e/rust/Cargo.toml
index 89a75967a..2f25b3054 100644
--- a/e2e/rust/Cargo.toml
+++ b/e2e/rust/Cargo.toml
@@ -43,7 +43,7 @@ required-features = ["e2e-docker"]
 [[test]]
 name = "gateway_resume"
 path = "tests/gateway_resume.rs"
-required-features = ["e2e-docker"]
+required-features = ["e2e"]
 
 [[test]]
 name = "websocket_conformance"
diff --git a/e2e/rust/e2e-vm.sh b/e2e/rust/e2e-vm.sh
index 73be90536..097914d25 100755
--- a/e2e/rust/e2e-vm.sh
+++ b/e2e/rust/e2e-vm.sh
@@ -29,9 +29,9 @@
 #   3. On macOS, codesigns the VM driver (libkrun needs the
 #      `com.apple.security.hypervisor` entitlement).
 #   4. Starts the gateway with `--drivers vm --disable-tls
-#      --db-url sqlite::memory:` on a random
-#      free port, waits for `Server listening`, then runs the
-#      cluster-agnostic Rust smoke test.
+#      --disable-gateway-auth --db-url sqlite:<run-state>/gateway.db` on a random
+#      free port, waits for `Server listening`, then runs the selected
+#      Rust e2e test (`smoke` by default).
 #   5. Tears the gateway down and (on failure) preserves the gateway
 #      log and every VM serial console log for post-mortem.
 #
@@ -42,9 +42,12 @@
 set -euo pipefail
 
 ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")/../.." && pwd)"
+source "${ROOT}/e2e/support/gateway-common.sh"
+
 COMPRESSED_DIR="${ROOT}/target/vm-runtime-compressed"
 GATEWAY_BIN="${ROOT}/target/debug/openshell-gateway"
 DRIVER_BIN="${ROOT}/target/debug/openshell-driver-vm"
+E2E_TEST="${OPENSHELL_E2E_VM_TEST:-smoke}"
 
 # The VM driver places `compute-driver.sock` under --vm-driver-state-dir.
 # AF_UNIX SUN_LEN is 104 bytes on macOS (108 on Linux), so paths anchored
@@ -110,23 +113,31 @@ RUN_STATE_DIR="${STATE_DIR_ROOT}/os-vm-e2e-${HOST_PORT}-$$"
 mkdir -p "${RUN_STATE_DIR}"
 
 GATEWAY_LOG="$(mktemp /tmp/openshell-gateway-e2e.XXXXXX)"
+GATEWAY_PID_FILE="${RUN_STATE_DIR}/gateway.pid"
+GATEWAY_ARGS_FILE="${RUN_STATE_DIR}/gateway.args"
+GATEWAY_DB="${RUN_STATE_DIR}/gateway.db"
 
 # ── Cleanup (trap) ───────────────────────────────────────────────────
 
 cleanup() {
   local exit_code=$?
 
-  if [ -n "${GATEWAY_PID:-}" ] && kill -0 "${GATEWAY_PID}" 2>/dev/null; then
-    echo "Stopping openshell-gateway (pid ${GATEWAY_PID})..."
+  local gateway_pid="${GATEWAY_PID:-}"
+  if [ -f "${GATEWAY_PID_FILE:-}" ]; then
+    gateway_pid="$(cat "${GATEWAY_PID_FILE}" 2>/dev/null || true)"
+  fi
+
+  if [ -n "${gateway_pid}" ] && kill -0 "${gateway_pid}" 2>/dev/null; then
+    echo "Stopping openshell-gateway (pid ${gateway_pid})..."
     # SIGTERM first; gateway drops ManagedDriverProcess which SIGKILLs
     # the driver and removes the UDS. Wait briefly, then force-kill.
-    kill -TERM "${GATEWAY_PID}" 2>/dev/null || true
+    kill -TERM "${gateway_pid}" 2>/dev/null || true
     for _ in 1 2 3 4 5 6 7 8 9 10; do
-      kill -0 "${GATEWAY_PID}" 2>/dev/null || break
+      kill -0 "${gateway_pid}" 2>/dev/null || break
       sleep 0.5
     done
-    kill -KILL "${GATEWAY_PID}" 2>/dev/null || true
-    wait "${GATEWAY_PID}" 2>/dev/null || true
+    kill -KILL "${gateway_pid}" 2>/dev/null || true
+    wait "${gateway_pid}" 2>/dev/null || true
   fi
 
   # On failure, keep the VM console log for debugging. We deliberately
@@ -173,16 +184,22 @@ echo "==> Starting openshell-gateway on 127.0.0.1:${HOST_PORT} (state: ${RUN_STA
 # (192.168.127.1) does NOT forward arbitrary host ports. The driver also
 # rewrites loopback URLs to this hostname as a safety net, so this matches
 # what the guest will actually see and aligns with `tasks/scripts/gateway-vm.sh`.
-"${GATEWAY_BIN}" \
-  --drivers vm \
-  --disable-tls \
-  --db-url 'sqlite::memory:' \
-  --port "${HOST_PORT}" \
-  --grpc-endpoint "http://host.containers.internal:${HOST_PORT}" \
-  --driver-dir "${ROOT}/target/debug" \
-  --vm-driver-state-dir "${RUN_STATE_DIR}" \
+GATEWAY_ARGS=(
+  --drivers vm
+  --disable-tls
+  --disable-gateway-auth
+  --db-url "sqlite:${GATEWAY_DB}?mode=rwc"
+  --port "${HOST_PORT}"
+  --grpc-endpoint "http://host.containers.internal:${HOST_PORT}"
+  --driver-dir "${ROOT}/target/debug"
+  --vm-driver-state-dir "${RUN_STATE_DIR}"
+)
+e2e_write_gateway_args_file "${GATEWAY_ARGS_FILE}" "${GATEWAY_ARGS[@]}"
+
+"${GATEWAY_BIN}" "${GATEWAY_ARGS[@]}" \
   >"${GATEWAY_LOG}" 2>&1 &
 GATEWAY_PID=$!
+printf '%s\n' "${GATEWAY_PID}" >"${GATEWAY_PID_FILE}"
 
 # ── Wait for gateway readiness ───────────────────────────────────────
 #
@@ -216,6 +233,12 @@ echo "==> Gateway ready after ${elapsed}s"
 
 export OPENSHELL_GATEWAY_ENDPOINT="http://127.0.0.1:${HOST_PORT}"
 export OPENSHELL_E2E_EXPECT_VM_OVERLAY=1
+export OPENSHELL_E2E_DRIVER="vm"
+e2e_export_gateway_restart_metadata \
+  "${GATEWAY_BIN}" \
+  "${GATEWAY_ARGS_FILE}" \
+  "${GATEWAY_LOG}" \
+  "${GATEWAY_PID_FILE}"
 
 # The VM driver creates each sandbox VM from a cached read-only ext4 root disk
 # plus a writable overlay disk. The guest's sandbox supervisor then initializes
@@ -223,11 +246,11 @@ export OPENSHELL_E2E_EXPECT_VM_OVERLAY=1
 # preparation; allow 180s for slower CI runners.
 export OPENSHELL_PROVISION_TIMEOUT="${SANDBOX_PROVISION_TIMEOUT}"
 
-echo "==> Running e2e smoke test (endpoint: ${OPENSHELL_GATEWAY_ENDPOINT})"
+echo "==> Running e2e ${E2E_TEST} test (endpoint: ${OPENSHELL_GATEWAY_ENDPOINT})"
 cargo test \
   --manifest-path "${ROOT}/e2e/rust/Cargo.toml" \
   --features e2e \
-  --test smoke \
+  --test "${E2E_TEST}" \
   -- --nocapture
 
-echo "==> Smoke test passed."
+echo "==> ${E2E_TEST} test passed."
diff --git a/e2e/rust/tests/gateway_resume.rs b/e2e/rust/tests/gateway_resume.rs
index ded961859..745f20850 100644
--- a/e2e/rust/tests/gateway_resume.rs
+++ b/e2e/rust/tests/gateway_resume.rs
@@ -3,11 +3,11 @@
 
 #![cfg(feature = "e2e")]
 
-//! E2E coverage for resuming Docker sandboxes after a standalone gateway restart.
+//! E2E coverage for resuming sandboxes after a standalone gateway restart.
 //!
-//! This intentionally targets the Docker-driver gateway started by
-//! `e2e/with-docker-gateway.sh`. Existing-endpoint E2E runs do not own the
-//! gateway process, so they skip this restart-only coverage.
+//! This intentionally targets managed local gateways started by e2e wrapper
+//! scripts. Existing-endpoint E2E runs do not own the gateway process, so they
+//! skip this restart-only coverage.
 
 use std::process::{Command, Stdio};
 use std::time::{Duration, Instant};
@@ -20,6 +20,7 @@ use tokio::time::sleep;
 
 const MANAGED_BY_LABEL_FILTER: &str = "label=openshell.ai/managed-by=openshell";
 const READY_MARKER: &str = "gateway-resume-ready";
+const RESUME_FILE: &str = "/sandbox/gateway-resume-state";
 const SANDBOX_NAMESPACE_LABEL: &str = "openshell.ai/sandbox-namespace";
 const SANDBOX_NAME_LABEL: &str = "openshell.ai/sandbox-name";
 
@@ -77,6 +78,46 @@ async fn sandbox_names() -> Result<Vec<String>, String> {
         .collect())
 }
 
+async fn wait_for_sandbox_exec_contains(
+    sandbox_name: &str,
+    command: &[&str],
+    expected: &str,
+    timeout: Duration,
+) -> Result<(), String> {
+    let start = Instant::now();
+    let mut last_output: String;
+
+    loop {
+        let mut cmd = openshell_cmd();
+        cmd.args(["sandbox", "exec", "--name", sandbox_name, "--no-tty", "--"])
+            .args(command)
+            .stdout(Stdio::piped())
+            .stderr(Stdio::piped());
+
+        match cmd.output().await {
+            Ok(output) => {
+                let stdout = String::from_utf8_lossy(&output.stdout);
+                let stderr = String::from_utf8_lossy(&output.stderr);
+                last_output = strip_ansi(&format!("{stdout}{stderr}"));
+                if output.status.success() && last_output.contains(expected) {
+                    return Ok(());
+                }
+            }
+            Err(err) => {
+                last_output = format!("failed to spawn openshell sandbox exec: {err}");
+            }
+        }
+
+        if start.elapsed() > timeout {
+            return Err(format!(
+                "sandbox '{sandbox_name}' exec did not produce '{expected}' within {}s. Last output:\n{last_output}",
+                timeout.as_secs()
+            ));
+        }
+        sleep(Duration::from_secs(2)).await;
+    }
+}
+
 fn sandbox_container_id(namespace: &str, sandbox_name: &str) -> Result<String, String> {
     let namespace_filter = format!("label={SANDBOX_NAMESPACE_LABEL}={namespace}");
     let sandbox_name_filter = format!("label={SANDBOX_NAME_LABEL}={sandbox_name}");
@@ -169,51 +210,72 @@ async fn wait_for_container_running(
 }
 
 #[tokio::test]
-async fn docker_gateway_restart_resumes_running_sandbox() {
+async fn gateway_restart_resumes_running_sandbox() {
     let Some(gateway) = ManagedGateway::from_env().expect("load managed e2e gateway metadata")
     else {
         eprintln!("Skipping gateway resume test: e2e gateway is not managed by this test run");
         return;
     };
-    let Some(namespace) = std::env::var("OPENSHELL_E2E_DOCKER_NETWORK_NAME")
-        .ok()
-        .filter(|value| !value.trim().is_empty())
-    else {
-        eprintln!("Skipping gateway resume test: Docker e2e namespace is unavailable");
-        return;
+
+    let is_vm = std::env::var("OPENSHELL_E2E_DRIVER").as_deref() == Ok("vm");
+    let docker_namespace = if is_vm {
+        None
+    } else {
+        let namespace = std::env::var("OPENSHELL_E2E_DOCKER_NETWORK_NAME")
+            .ok()
+            .filter(|value| !value.trim().is_empty());
+        if namespace.is_none() {
+            eprintln!("Skipping gateway resume test: Docker e2e namespace is unavailable");
+            return;
+        }
+        namespace
     };
 
     wait_for_healthy(Duration::from_secs(30))
         .await
         .expect("gateway should start healthy");
 
+    let script = format!(
+        "echo before-restart > {RESUME_FILE}; echo {READY_MARKER}; while true; do sleep 1; done"
+    );
     let mut sandbox = SandboxGuard::create_keep(
-        &[
-            "sh",
-            "-c",
-            "echo gateway-resume-ready; while true; do sleep 1; done",
-        ],
+        &["sh", "-lc", &script],
         READY_MARKER,
     )
     .await
     .expect("create long-running sandbox");
 
-    wait_for_container_running(&namespace, &sandbox.name, true, Duration::from_secs(60))
+    let before_restart = sandbox
+        .exec(&["cat", RESUME_FILE])
         .await
-        .expect("sandbox container should be running before gateway restart");
+        .expect("read sandbox state before restart");
+    assert!(
+        before_restart.contains("before-restart"),
+        "sandbox state was not written before restart:\n{before_restart}"
+    );
+
+    if let Some(namespace) = docker_namespace.as_deref() {
+        wait_for_container_running(namespace, &sandbox.name, true, Duration::from_secs(60))
+            .await
+            .expect("sandbox container should be running before gateway restart");
+    }
 
     gateway.stop().expect("stop e2e gateway");
-    wait_for_container_running(&namespace, &sandbox.name, false, Duration::from_secs(120))
-        .await
-        .expect("gateway shutdown should stop managed Docker sandboxes");
+    if let Some(namespace) = docker_namespace.as_deref() {
+        wait_for_container_running(namespace, &sandbox.name, false, Duration::from_secs(120))
+            .await
+            .expect("gateway shutdown should stop managed Docker sandboxes");
+    }
 
     gateway.start().expect("restart e2e gateway");
     wait_for_healthy(Duration::from_secs(120))
         .await
         .expect("gateway should become healthy after restart");
-    wait_for_container_running(&namespace, &sandbox.name, true, Duration::from_secs(120))
-        .await
-        .expect("gateway startup should resume the Docker sandbox container");
+    if let Some(namespace) = docker_namespace.as_deref() {
+        wait_for_container_running(namespace, &sandbox.name, true, Duration::from_secs(120))
+            .await
+            .expect("gateway startup should resume the Docker sandbox container");
+    }
 
     let names = sandbox_names().await.expect("list sandboxes after restart");
     assert!(
@@ -222,5 +284,14 @@ async fn docker_gateway_restart_resumes_running_sandbox() {
         sandbox.name
     );
 
+    wait_for_sandbox_exec_contains(
+        &sandbox.name,
+        &["cat", RESUME_FILE],
+        "before-restart",
+        Duration::from_secs(240),
+    )
+    .await
+    .expect("sandbox should become ready again with its state preserved");
+
     sandbox.cleanup().await;
 }

From a7c22c4fa8aad2244722abb80a2900801ea23085 Mon Sep 17 00:00:00 2001
From: Drew Newberry <anewberry@nvidia.com>
Date: Fri, 15 May 2026 11:03:50 -0700
Subject: [PATCH 2/3] test(vm): gate gateway resume coverage

Signed-off-by: Drew Newberry <anewberry@nvidia.com>
---
 e2e/rust/Cargo.toml                 |   8 +-
 e2e/rust/e2e-vm.sh                  |   5 +-
 e2e/rust/tests/gateway_resume.rs    |  53 ++++-----
 e2e/rust/tests/vm_gateway_resume.rs | 176 ++++++++++++++++++++++++++++
 4 files changed, 206 insertions(+), 36 deletions(-)
 create mode 100644 e2e/rust/tests/vm_gateway_resume.rs

diff --git a/e2e/rust/Cargo.toml b/e2e/rust/Cargo.toml
index 2f25b3054..03ad930b4 100644
--- a/e2e/rust/Cargo.toml
+++ b/e2e/rust/Cargo.toml
@@ -29,6 +29,7 @@ e2e-docker-gpu = ["e2e-docker", "e2e-gpu"]
 e2e-kubernetes = ["e2e"]
 e2e-podman = ["e2e", "e2e-host-gateway"]
 e2e-podman-gpu = ["e2e-podman", "e2e-gpu"]
+e2e-vm = ["e2e"]
 
 [[test]]
 name = "custom_image"
@@ -43,7 +44,12 @@ required-features = ["e2e-docker"]
 [[test]]
 name = "gateway_resume"
 path = "tests/gateway_resume.rs"
-required-features = ["e2e"]
+required-features = ["e2e-docker"]
+
+[[test]]
+name = "vm_gateway_resume"
+path = "tests/vm_gateway_resume.rs"
+required-features = ["e2e-vm"]
 
 [[test]]
 name = "websocket_conformance"
diff --git a/e2e/rust/e2e-vm.sh b/e2e/rust/e2e-vm.sh
index 097914d25..5d42bc781 100755
--- a/e2e/rust/e2e-vm.sh
+++ b/e2e/rust/e2e-vm.sh
@@ -48,6 +48,7 @@ COMPRESSED_DIR="${ROOT}/target/vm-runtime-compressed"
 GATEWAY_BIN="${ROOT}/target/debug/openshell-gateway"
 DRIVER_BIN="${ROOT}/target/debug/openshell-driver-vm"
 E2E_TEST="${OPENSHELL_E2E_VM_TEST:-smoke}"
+E2E_FEATURES="${OPENSHELL_E2E_VM_FEATURES:-e2e-vm}"
 
 # The VM driver places `compute-driver.sock` under --vm-driver-state-dir.
 # AF_UNIX SUN_LEN is 104 bytes on macOS (108 on Linux), so paths anchored
@@ -246,10 +247,10 @@ e2e_export_gateway_restart_metadata \
 # preparation; allow 180s for slower CI runners.
 export OPENSHELL_PROVISION_TIMEOUT="${SANDBOX_PROVISION_TIMEOUT}"
 
-echo "==> Running e2e ${E2E_TEST} test (endpoint: ${OPENSHELL_GATEWAY_ENDPOINT})"
+echo "==> Running e2e ${E2E_TEST} test (features: ${E2E_FEATURES}, endpoint: ${OPENSHELL_GATEWAY_ENDPOINT})"
 cargo test \
   --manifest-path "${ROOT}/e2e/rust/Cargo.toml" \
-  --features e2e \
+  --features "${E2E_FEATURES}" \
   --test "${E2E_TEST}" \
   -- --nocapture
 
diff --git a/e2e/rust/tests/gateway_resume.rs b/e2e/rust/tests/gateway_resume.rs
index 745f20850..e3a2e6664 100644
--- a/e2e/rust/tests/gateway_resume.rs
+++ b/e2e/rust/tests/gateway_resume.rs
@@ -3,11 +3,11 @@
 
 #![cfg(feature = "e2e")]
 
-//! E2E coverage for resuming sandboxes after a standalone gateway restart.
+//! E2E coverage for resuming Docker sandboxes after a standalone gateway restart.
 //!
-//! This intentionally targets managed local gateways started by e2e wrapper
-//! scripts. Existing-endpoint E2E runs do not own the gateway process, so they
-//! skip this restart-only coverage.
+//! This intentionally targets the Docker-driver gateway started by
+//! `e2e/with-docker-gateway.sh`. Existing-endpoint E2E runs do not own the
+//! gateway process, so they skip this restart-only coverage.
 
 use std::process::{Command, Stdio};
 use std::time::{Duration, Instant};
@@ -210,25 +210,18 @@ async fn wait_for_container_running(
 }
 
 #[tokio::test]
-async fn gateway_restart_resumes_running_sandbox() {
+async fn docker_gateway_restart_resumes_running_sandbox() {
     let Some(gateway) = ManagedGateway::from_env().expect("load managed e2e gateway metadata")
     else {
         eprintln!("Skipping gateway resume test: e2e gateway is not managed by this test run");
         return;
     };
-
-    let is_vm = std::env::var("OPENSHELL_E2E_DRIVER").as_deref() == Ok("vm");
-    let docker_namespace = if is_vm {
-        None
-    } else {
-        let namespace = std::env::var("OPENSHELL_E2E_DOCKER_NETWORK_NAME")
-            .ok()
-            .filter(|value| !value.trim().is_empty());
-        if namespace.is_none() {
-            eprintln!("Skipping gateway resume test: Docker e2e namespace is unavailable");
-            return;
-        }
-        namespace
+    let Some(namespace) = std::env::var("OPENSHELL_E2E_DOCKER_NETWORK_NAME")
+        .ok()
+        .filter(|value| !value.trim().is_empty())
+    else {
+        eprintln!("Skipping gateway resume test: Docker e2e namespace is unavailable");
+        return;
     };
 
     wait_for_healthy(Duration::from_secs(30))
@@ -254,28 +247,22 @@ async fn gateway_restart_resumes_running_sandbox() {
         "sandbox state was not written before restart:\n{before_restart}"
     );
 
-    if let Some(namespace) = docker_namespace.as_deref() {
-        wait_for_container_running(namespace, &sandbox.name, true, Duration::from_secs(60))
-            .await
-            .expect("sandbox container should be running before gateway restart");
-    }
+    wait_for_container_running(&namespace, &sandbox.name, true, Duration::from_secs(60))
+        .await
+        .expect("sandbox container should be running before gateway restart");
 
     gateway.stop().expect("stop e2e gateway");
-    if let Some(namespace) = docker_namespace.as_deref() {
-        wait_for_container_running(namespace, &sandbox.name, false, Duration::from_secs(120))
-            .await
-            .expect("gateway shutdown should stop managed Docker sandboxes");
-    }
+    wait_for_container_running(&namespace, &sandbox.name, false, Duration::from_secs(120))
+        .await
+        .expect("gateway shutdown should stop managed Docker sandboxes");
 
     gateway.start().expect("restart e2e gateway");
     wait_for_healthy(Duration::from_secs(120))
         .await
         .expect("gateway should become healthy after restart");
-    if let Some(namespace) = docker_namespace.as_deref() {
-        wait_for_container_running(namespace, &sandbox.name, true, Duration::from_secs(120))
-            .await
-            .expect("gateway startup should resume the Docker sandbox container");
-    }
+    wait_for_container_running(&namespace, &sandbox.name, true, Duration::from_secs(120))
+        .await
+        .expect("gateway startup should resume the Docker sandbox container");
 
     let names = sandbox_names().await.expect("list sandboxes after restart");
     assert!(
diff --git a/e2e/rust/tests/vm_gateway_resume.rs b/e2e/rust/tests/vm_gateway_resume.rs
new file mode 100644
index 000000000..488be681a
--- /dev/null
+++ b/e2e/rust/tests/vm_gateway_resume.rs
@@ -0,0 +1,176 @@
+// SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+// SPDX-License-Identifier: Apache-2.0
+
+#![cfg(feature = "e2e-vm")]
+
+//! VM-specific E2E coverage for resuming sandboxes after a standalone gateway
+//! restart.
+//!
+//! This test is gated behind the `e2e-vm` feature because it requires the VM
+//! driver runtime prepared by `e2e/rust/e2e-vm.sh`.
+
+use std::process::Stdio;
+use std::time::{Duration, Instant};
+
+use openshell_e2e::harness::binary::openshell_cmd;
+use openshell_e2e::harness::gateway::ManagedGateway;
+use openshell_e2e::harness::output::strip_ansi;
+use openshell_e2e::harness::sandbox::SandboxGuard;
+use tokio::time::sleep;
+
+const READY_MARKER: &str = "vm-gateway-resume-ready";
+const RESUME_FILE: &str = "/sandbox/vm-gateway-resume-state";
+
+async fn run_cli(args: &[&str]) -> (String, i32) {
+    let mut cmd = openshell_cmd();
+    cmd.args(args).stdout(Stdio::piped()).stderr(Stdio::piped());
+
+    let output = cmd.output().await.expect("spawn openshell");
+    let stdout = String::from_utf8_lossy(&output.stdout);
+    let stderr = String::from_utf8_lossy(&output.stderr);
+    let combined = format!("{stdout}{stderr}");
+    let code = output.status.code().unwrap_or(-1);
+    (combined, code)
+}
+
+async fn wait_for_healthy(timeout: Duration) -> Result<(), String> {
+    let start = Instant::now();
+    let mut last_output: String;
+
+    loop {
+        let (output, code) = run_cli(&["status"]).await;
+        let clean = strip_ansi(&output);
+        let lower = clean.to_lowercase();
+        if code == 0
+            && (lower.contains("healthy")
+                || lower.contains("running")
+                || lower.contains("connected"))
+        {
+            return Ok(());
+        }
+        last_output = clean;
+
+        if start.elapsed() > timeout {
+            return Err(format!(
+                "gateway did not become healthy within {}s. Last output:\n{last_output}",
+                timeout.as_secs()
+            ));
+        }
+        sleep(Duration::from_secs(2)).await;
+    }
+}
+
+async fn sandbox_names() -> Result<Vec<String>, String> {
+    let (output, code) = run_cli(&["sandbox", "list", "--names"]).await;
+    let clean = strip_ansi(&output);
+    if code != 0 {
+        return Err(format!("sandbox list failed (exit {code}):\n{clean}"));
+    }
+
+    Ok(clean
+        .lines()
+        .map(str::trim)
+        .filter(|line| !line.is_empty())
+        .map(ToOwned::to_owned)
+        .collect())
+}
+
+async fn wait_for_sandbox_exec_contains(
+    sandbox_name: &str,
+    command: &[&str],
+    expected: &str,
+    timeout: Duration,
+) -> Result<(), String> {
+    let start = Instant::now();
+    let mut last_output: String;
+
+    loop {
+        let mut cmd = openshell_cmd();
+        cmd.args(["sandbox", "exec", "--name", sandbox_name, "--no-tty", "--"])
+            .args(command)
+            .stdout(Stdio::piped())
+            .stderr(Stdio::piped());
+
+        match cmd.output().await {
+            Ok(output) => {
+                let stdout = String::from_utf8_lossy(&output.stdout);
+                let stderr = String::from_utf8_lossy(&output.stderr);
+                last_output = strip_ansi(&format!("{stdout}{stderr}"));
+                if output.status.success() && last_output.contains(expected) {
+                    return Ok(());
+                }
+            }
+            Err(err) => {
+                last_output = format!("failed to spawn openshell sandbox exec: {err}");
+            }
+        }
+
+        if start.elapsed() > timeout {
+            return Err(format!(
+                "sandbox '{sandbox_name}' exec did not produce '{expected}' within {}s. Last output:\n{last_output}",
+                timeout.as_secs()
+            ));
+        }
+        sleep(Duration::from_secs(2)).await;
+    }
+}
+
+#[tokio::test]
+async fn vm_gateway_restart_resumes_running_sandbox() {
+    if std::env::var("OPENSHELL_E2E_DRIVER").as_deref() != Ok("vm") {
+        eprintln!("Skipping VM gateway resume test: e2e driver is not vm");
+        return;
+    }
+    let Some(gateway) = ManagedGateway::from_env().expect("load managed e2e gateway metadata")
+    else {
+        eprintln!("Skipping VM gateway resume test: e2e gateway is not managed by this test run");
+        return;
+    };
+
+    wait_for_healthy(Duration::from_secs(30))
+        .await
+        .expect("gateway should start healthy");
+
+    let script = format!(
+        "echo before-restart > {RESUME_FILE}; echo {READY_MARKER}; while true; do sleep 1; done"
+    );
+    let mut sandbox = SandboxGuard::create_keep(
+        &["sh", "-lc", &script],
+        READY_MARKER,
+    )
+    .await
+    .expect("create long-running VM sandbox");
+
+    let before_restart = sandbox
+        .exec(&["cat", RESUME_FILE])
+        .await
+        .expect("read VM sandbox state before restart");
+    assert!(
+        before_restart.contains("before-restart"),
+        "VM sandbox state was not written before restart:\n{before_restart}"
+    );
+
+    gateway.stop().expect("stop e2e gateway");
+    gateway.start().expect("restart e2e gateway");
+    wait_for_healthy(Duration::from_secs(120))
+        .await
+        .expect("gateway should become healthy after restart");
+
+    let names = sandbox_names().await.expect("list sandboxes after restart");
+    assert!(
+        names.contains(&sandbox.name),
+        "sandbox '{}' should still be listed after gateway restart. Names: {names:?}",
+        sandbox.name
+    );
+
+    wait_for_sandbox_exec_contains(
+        &sandbox.name,
+        &["cat", RESUME_FILE],
+        "before-restart",
+        Duration::from_secs(240),
+    )
+    .await
+    .expect("VM sandbox should become ready again with its overlay state preserved");
+
+    sandbox.cleanup().await;
+}

From 6f4e7c51638e1df332e6d1d001990b1fce75ce9f Mon Sep 17 00:00:00 2001
From: Drew Newberry <anewberry@nvidia.com>
Date: Fri, 15 May 2026 13:47:11 -0700
Subject: [PATCH 3/3] ci(vm): restore missing umoci runtime input

---
 .github/workflows/driver-vm-linux.yml       | 22 +++-----------
 .github/workflows/driver-vm-macos.yml       | 22 +++-----------
 tasks/scripts/vm/_lib.sh                    | 32 +++++++++++++++++++++
 tasks/scripts/vm/compress-vm-runtime.sh     | 17 +++++++++++
 tasks/scripts/vm/download-kernel-runtime.sh | 14 +--------
 5 files changed, 58 insertions(+), 49 deletions(-)

diff --git a/.github/workflows/driver-vm-linux.yml b/.github/workflows/driver-vm-linux.yml
index 4c39baeeb..8ad4073ca 100644
--- a/.github/workflows/driver-vm-linux.yml
+++ b/.github/workflows/driver-vm-linux.yml
@@ -134,24 +134,10 @@ jobs:
         run: |
           set -euo pipefail
           COMPRESSED_DIR="${PWD}/target/vm-runtime-compressed"
-          mkdir -p "$COMPRESSED_DIR"
-
-          EXTRACT_DIR=$(mktemp -d)
-          zstd -d "runtime-download/vm-runtime-${{ matrix.platform }}.tar.zst" --stdout \
-            | tar -xf - -C "$EXTRACT_DIR"
-
-          echo "Extracted runtime files:"
-          ls -lah "$EXTRACT_DIR"
-
-          for file in "$EXTRACT_DIR"/*; do
-            [ -f "$file" ] || continue
-            name=$(basename "$file")
-            [ "$name" = "provenance.json" ] && continue
-            zstd -19 -f -q -T0 -o "${COMPRESSED_DIR}/${name}.zst" "$file"
-          done
-
-          echo "Staged compressed runtime artifacts:"
-          ls -lah "$COMPRESSED_DIR"
+          VM_RUNTIME_TARBALL="${PWD}/runtime-download/vm-runtime-${{ matrix.platform }}.tar.zst" \
+          VM_RUNTIME_PLATFORM="${{ matrix.platform }}" \
+          OPENSHELL_VM_RUNTIME_COMPRESSED_DIR="$COMPRESSED_DIR" \
+            tasks/scripts/vm/compress-vm-runtime.sh
 
       - name: Build bundled supervisor
         run: |
diff --git a/.github/workflows/driver-vm-macos.yml b/.github/workflows/driver-vm-macos.yml
index f096d06ec..915e007c9 100644
--- a/.github/workflows/driver-vm-macos.yml
+++ b/.github/workflows/driver-vm-macos.yml
@@ -165,24 +165,10 @@ jobs:
         run: |
           set -euo pipefail
           COMPRESSED_DIR="${PWD}/target/vm-runtime-compressed-macos"
-          mkdir -p "$COMPRESSED_DIR"
-
-          EXTRACT_DIR=$(mktemp -d)
-          zstd -d "runtime-download/vm-runtime-darwin-aarch64.tar.zst" --stdout \
-            | tar -xf - -C "$EXTRACT_DIR"
-
-          echo "Extracted darwin runtime files:"
-          ls -lah "$EXTRACT_DIR"
-
-          for file in "$EXTRACT_DIR"/*; do
-            [ -f "$file" ] || continue
-            name=$(basename "$file")
-            [ "$name" = "provenance.json" ] && continue
-            zstd -19 -f -q -T0 -o "${COMPRESSED_DIR}/${name}.zst" "$file"
-          done
-
-          echo "Staged macOS compressed runtime artifacts:"
-          ls -lah "$COMPRESSED_DIR"
+          VM_RUNTIME_TARBALL="${PWD}/runtime-download/vm-runtime-darwin-aarch64.tar.zst" \
+          VM_RUNTIME_PLATFORM="darwin-aarch64" \
+          OPENSHELL_VM_RUNTIME_COMPRESSED_DIR="$COMPRESSED_DIR" \
+            tasks/scripts/vm/compress-vm-runtime.sh
 
       - name: Download bundled supervisor
         uses: actions/download-artifact@v4
diff --git a/tasks/scripts/vm/_lib.sh b/tasks/scripts/vm/_lib.sh
index c77778fb4..f6d12d3e6 100755
--- a/tasks/scripts/vm/_lib.sh
+++ b/tasks/scripts/vm/_lib.sh
@@ -74,6 +74,38 @@ download_umoci_binary() {
     return 1
 }
 
+# Map a VM runtime platform to the Linux guest umoci architecture.
+# Usage: umoci_guest_arch_for_platform <platform>
+umoci_guest_arch_for_platform() {
+    local platform="$1"
+
+    case "$platform" in
+        linux-aarch64|darwin-aarch64) echo "arm64" ;;
+        linux-x86_64)                 echo "amd64" ;;
+        *)
+            echo "Error: Unsupported platform for umoci guest binary: ${platform}" >&2
+            return 1
+            ;;
+    esac
+}
+
+# Ensure an extracted runtime directory contains the guest umoci binary.
+# Usage: ensure_umoci_for_platform <runtime_dir> <platform> <version>
+ensure_umoci_for_platform() {
+    local runtime_dir="$1"
+    local platform="$2"
+    local version="$3"
+
+    if [ -f "${runtime_dir}/umoci" ]; then
+        return 0
+    fi
+
+    local guest_arch
+    guest_arch="$(umoci_guest_arch_for_platform "$platform")"
+    echo "    Runtime tarball has no umoci"
+    download_umoci_binary "${runtime_dir}/umoci" "$version" "$guest_arch"
+}
+
 # ── Compression helpers ─────────────────────────────────────────────────
 
 # Compress a single file with zstd level 19, reporting sizes.
diff --git a/tasks/scripts/vm/compress-vm-runtime.sh b/tasks/scripts/vm/compress-vm-runtime.sh
index 324e2cfaf..598dc5505 100755
--- a/tasks/scripts/vm/compress-vm-runtime.sh
+++ b/tasks/scripts/vm/compress-vm-runtime.sh
@@ -127,6 +127,23 @@ if [ -n "${VM_RUNTIME_TARBALL:-}" ]; then
     # Extract tarball contents
     zstd -d "${VM_RUNTIME_TARBALL}" --stdout | tar -xf - -C "$WORK_DIR"
 
+    VM_RUNTIME_PLATFORM="${VM_RUNTIME_PLATFORM:-}"
+    if [ -z "$VM_RUNTIME_PLATFORM" ]; then
+        case "$(basename "$VM_RUNTIME_TARBALL")" in
+            vm-runtime-darwin-aarch64.tar.zst) VM_RUNTIME_PLATFORM="darwin-aarch64" ;;
+            vm-runtime-linux-aarch64.tar.zst)  VM_RUNTIME_PLATFORM="linux-aarch64" ;;
+            vm-runtime-linux-x86_64.tar.zst)   VM_RUNTIME_PLATFORM="linux-x86_64" ;;
+        esac
+    fi
+    if [ ! -f "${WORK_DIR}/umoci" ]; then
+        if [ -z "$VM_RUNTIME_PLATFORM" ]; then
+            echo "Error: VM_RUNTIME_TARBALL has no umoci and platform could not be inferred." >&2
+            echo "       Set VM_RUNTIME_PLATFORM to linux-aarch64, linux-x86_64, or darwin-aarch64." >&2
+            exit 1
+        fi
+        ensure_umoci_for_platform "$WORK_DIR" "$VM_RUNTIME_PLATFORM" "$UMOCI_VERSION"
+    fi
+
     echo "    Extracted files:"
     ls -lah "$WORK_DIR"
 
diff --git a/tasks/scripts/vm/download-kernel-runtime.sh b/tasks/scripts/vm/download-kernel-runtime.sh
index a345a7ddd..2e83d0fb3 100755
--- a/tasks/scripts/vm/download-kernel-runtime.sh
+++ b/tasks/scripts/vm/download-kernel-runtime.sh
@@ -111,19 +111,7 @@ mkdir -p "$EXTRACT_DIR"
 
 zstd -d "${DOWNLOAD_DIR}/${TARBALL_NAME}" --stdout | tar -xf - -C "$EXTRACT_DIR"
 
-if [ ! -f "${EXTRACT_DIR}/umoci" ]; then
-    UMOCI_GUEST_ARCH=""
-    case "$PLATFORM" in
-        linux-aarch64|darwin-aarch64) UMOCI_GUEST_ARCH="arm64" ;;
-        linux-x86_64)                 UMOCI_GUEST_ARCH="amd64" ;;
-        *)
-            echo "Error: Unsupported platform for umoci guest binary: ${PLATFORM}" >&2
-            exit 1
-            ;;
-    esac
-    echo "    Runtime tarball has no umoci"
-    download_umoci_binary "${EXTRACT_DIR}/umoci" "${UMOCI_VERSION}" "${UMOCI_GUEST_ARCH}"
-fi
+ensure_umoci_for_platform "$EXTRACT_DIR" "$PLATFORM" "$UMOCI_VERSION"
 
 echo "    Extracted files:"
 ls -lah "$EXTRACT_DIR"