From 4930555250c25f7c736ca9ec2f7eda243b775d91 Mon Sep 17 00:00:00 2001 From: Tinson Lai Date: Tue, 12 May 2026 05:21:16 +0000 Subject: [PATCH 1/6] fix(cli): cp-style sandbox download and workspace-boundary check MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit `openshell sandbox download` produced a directory at the host destination instead of writing the source bytes as a regular file, and did not refuse sandbox-side paths that resolved outside `/sandbox` (only `creds.json` was rejected, by filesystem policy rather than by an explicit check). Mirrors the cp-style design from the upload-side fixes (#667 / #694 and #885 / #952): destination resolution now respects trailing slashes and existing-directory destinations, single-file sources land as regular files via a staged extraction and atomic rename, and the function delegates the SSH tar stream to a shared helper. Adds an explicit lexical workspace-boundary check on the sandbox-side source — required on the download side because filesystem policy alone is not a substitute for an explicit check when crossing the trust boundary in the leak direction. Signed-off-by: Tinson Lai --- crates/openshell-cli/Cargo.toml | 1 + crates/openshell-cli/src/main.rs | 8 +- crates/openshell-cli/src/run.rs | 8 +- crates/openshell-cli/src/ssh.rs | 514 +++++++++++++++++++++++++++++-- 4 files changed, 489 insertions(+), 42 deletions(-) diff --git a/crates/openshell-cli/Cargo.toml b/crates/openshell-cli/Cargo.toml index 21068ad99..d98c6d788 100644 --- a/crates/openshell-cli/Cargo.toml +++ b/crates/openshell-cli/Cargo.toml @@ -58,6 +58,7 @@ anyhow = { workspace = true } # File archiving (tar-over-SSH sync) tar = "0.4" +tempfile = "3" # OIDC/Auth oauth2 = "5" diff --git a/crates/openshell-cli/src/main.rs b/crates/openshell-cli/src/main.rs index e370d1f27..19c9e95ca 100644 --- a/crates/openshell-cli/src/main.rs +++ b/crates/openshell-cli/src/main.rs @@ -2374,12 +2374,8 @@ async fn main() -> Result<()> { let ctx = resolve_gateway(&cli.gateway, &cli.gateway_endpoint)?; let mut tls = tls.with_gateway_name(&ctx.name); apply_auth(&mut tls, &ctx.name); - let local_dest = std::path::Path::new(dest.as_deref().unwrap_or(".")); - eprintln!( - "Downloading sandbox:{} -> {}", - sandbox_path, - local_dest.display() - ); + let local_dest = dest.as_deref().unwrap_or("."); + eprintln!("Downloading sandbox:{sandbox_path} -> {local_dest}"); run::sandbox_sync_down(&ctx.endpoint, &name, &sandbox_path, local_dest, &tls) .await?; eprintln!("{} Download complete", "✓".green().bold()); diff --git a/crates/openshell-cli/src/run.rs b/crates/openshell-cli/src/run.rs index 2797bd66c..de9c40879 100644 --- a/crates/openshell-cli/src/run.rs +++ b/crates/openshell-cli/src/run.rs @@ -2200,12 +2200,8 @@ pub async fn sandbox_sync_command( eprintln!("{} Sync complete", "✓".green().bold()); } (None, Some(sandbox_path)) => { - let local_dest = Path::new(dest.unwrap_or(".")); - eprintln!( - "Syncing sandbox:{} -> {}", - sandbox_path, - local_dest.display() - ); + let local_dest = dest.unwrap_or("."); + eprintln!("Syncing sandbox:{sandbox_path} -> {local_dest}"); sandbox_sync_down(server, name, sandbox_path, local_dest, tls).await?; eprintln!("{} Sync complete", "✓".green().bold()); } diff --git a/crates/openshell-cli/src/ssh.rs b/crates/openshell-cli/src/ssh.rs index e99e6ee15..a0776113c 100644 --- a/crates/openshell-cli/src/ssh.rs +++ b/crates/openshell-cli/src/ssh.rs @@ -605,6 +605,91 @@ fn split_sandbox_path(path: &str) -> (&str, &str) { } } +/// Writable root inside every sandbox. Used as the boundary for path-traversal +/// checks on sandbox-side source paths in download flows. +const SANDBOX_WORKSPACE_ROOT: &str = "/sandbox"; + +/// Lexically normalise a POSIX-style absolute path by resolving `.` and `..` +/// components, collapsing repeated separators, and stripping any trailing +/// slash. Returns `None` if the input is empty or relative — the caller is +/// expected to reject those before reaching this helper. +/// +/// This is *lexical* only: it does not consult the filesystem and so cannot +/// follow symlinks. That trade-off is intentional — the function is used +/// client-side to refuse obvious path-traversal attempts before issuing the +/// SSH command. Symlink-based escapes inside the sandbox must be addressed +/// server-side. +fn lexical_normalise_absolute_path(path: &str) -> Option { + if !path.starts_with('/') { + return None; + } + let mut stack: Vec<&str> = Vec::new(); + for component in path.split('/') { + match component { + "" | "." => {} + ".." => { + stack.pop(); + } + other => stack.push(other), + } + } + if stack.is_empty() { + return Some("/".to_string()); + } + let mut out = String::with_capacity(path.len()); + for component in stack { + out.push('/'); + out.push_str(component); + } + Some(out) +} + +/// Validate that a sandbox-side source path passed to `sandbox download` +/// resolves under the sandbox writable root. +/// +/// Returns the normalised, traversal-resolved path on success. Refuses any +/// path that escapes `/sandbox` (e.g. `/etc/passwd`, `/sandbox/../etc/passwd`) +/// with a user-facing error. +fn validate_sandbox_source_path(path: &str) -> Result { + if path.is_empty() { + return Err(miette::miette!("sandbox source path is empty")); + } + let normalised = lexical_normalise_absolute_path(path) + .ok_or_else(|| miette::miette!("sandbox source path must be absolute (got '{path}')"))?; + let inside_workspace = normalised == SANDBOX_WORKSPACE_ROOT + || normalised.starts_with(&format!("{SANDBOX_WORKSPACE_ROOT}/")); + if !inside_workspace { + return Err(miette::miette!( + "sandbox source path '{path}' is outside the sandbox workspace ({SANDBOX_WORKSPACE_ROOT})" + )); + } + Ok(normalised) +} + +/// Resolve the host-side target path for a downloaded *file*, following +/// `cp`-style semantics. +/// +/// - If `dest_str` ends with `/` or already exists as a directory, the file is +/// placed inside it as `/`. +/// - Otherwise `dest_str` is treated as the exact file path to write. +/// +/// `dest_exists_as_dir` is taken as a parameter (rather than queried inside) +/// so this function stays pure and unit-testable; the caller performs the +/// filesystem check. +fn resolve_file_download_target( + dest_str: &str, + source_basename: &str, + dest_exists_as_dir: bool, +) -> PathBuf { + let trailing_slash = dest_str.ends_with('/'); + let dest_path = Path::new(dest_str); + if trailing_slash || dest_exists_as_dir { + dest_path.join(source_basename) + } else { + dest_path.to_path_buf() + } +} + /// Push a list of files from a local directory into a sandbox using tar-over-SSH. /// /// Files are streamed as a tar archive to `ssh ... tar xf - -C ` on @@ -730,41 +815,105 @@ fn file_list_archive_prefix(local_path: &Path) -> Option { } } +/// Run a small command on the sandbox over SSH and capture its stdout. +/// +/// Used by the download flow to probe whether the source path is a regular +/// file or a directory before streaming the tar archive. Stderr is inherited +/// so the user still sees any diagnostic output from ssh itself. +async fn ssh_run_capture_stdout(session: &SshSessionConfig, command: &str) -> Result { + let mut ssh = ssh_base_command(&session.proxy_command); + ssh.arg("-T") + .arg("-o") + .arg("RequestTTY=no") + .arg("sandbox") + .arg(command) + .stdin(Stdio::null()) + .stdout(Stdio::piped()) + .stderr(Stdio::inherit()); + let output = tokio::task::spawn_blocking(move || ssh.output()) + .await + .into_diagnostic()? + .into_diagnostic()?; + if !output.status.success() { + return Err(miette::miette!( + "ssh probe exited with status {}", + output.status + )); + } + Ok(String::from_utf8_lossy(&output.stdout).trim().to_string()) +} + +#[derive(Clone, Copy, Debug, PartialEq, Eq)] +enum SandboxSourceKind { + File, + Directory, +} + +/// Probe the sandbox-side source path. The path is assumed to have already +/// been validated by `validate_sandbox_source_path`. +async fn probe_sandbox_source_kind( + session: &SshSessionConfig, + sandbox_path: &str, +) -> Result { + let probe_cmd = format!( + "if [ -d {path} ]; then printf dir; elif [ -e {path} ]; then printf file; else printf missing; fi", + path = shell_escape(sandbox_path), + ); + let kind = ssh_run_capture_stdout(session, &probe_cmd).await?; + match kind.as_str() { + "dir" => Ok(SandboxSourceKind::Directory), + "file" => Ok(SandboxSourceKind::File), + "missing" => Err(miette::miette!( + "sandbox source path '{sandbox_path}' does not exist" + )), + other => Err(miette::miette!( + "unexpected probe output for sandbox source path '{sandbox_path}': '{other}'" + )), + } +} + /// Pull a path from a sandbox to a local destination using tar-over-SSH. +/// +/// Follows `cp`-style semantics for the destination: +/// +/// - If the source is a single file: +/// - When `dest` ends with `/` or already exists as a directory on the host, +/// the file lands at `/`. +/// - Otherwise `dest` is taken to be the exact file path to write. +/// - If the source is a directory, its contents are extracted into `dest` +/// (creating `dest` if it does not yet exist). This preserves prior +/// behaviour for the directory-source case. +/// +/// The sandbox source path is also subjected to a workspace-boundary check +/// before any SSH command is issued; paths that lexically resolve outside +/// `/sandbox` are refused. pub async fn sandbox_sync_down( server: &str, name: &str, sandbox_path: &str, - local_path: &Path, + dest: &str, tls: &TlsOptions, ) -> Result<()> { + let sandbox_path = validate_sandbox_source_path(sandbox_path)?; let session = ssh_session_config(server, name, tls).await?; + let kind = probe_sandbox_source_kind(&session, &sandbox_path).await?; - // Build tar command. When the sandbox path is a directory we tar its - // *contents* (using `-C .`) so the caller gets the files directly - // without an extra wrapper directory. For a single file we split into - // the parent directory and the filename. - let sandbox_path_clean = sandbox_path.trim_end_matches('/'); - - let tar_cmd = format!( - "if [ -d {path} ]; then tar cf - -C {path} .; else tar cf - -C {parent} {name}; fi", - path = shell_escape(sandbox_path_clean), - parent = shell_escape( - sandbox_path_clean - .rfind('/') - .map_or(".", |pos| if pos == 0 { - "/" - } else { - &sandbox_path_clean[..pos] - }) - ), - name = shell_escape( - sandbox_path_clean - .rfind('/') - .map_or(sandbox_path_clean, |pos| &sandbox_path_clean[pos + 1..]) - ), - ); + match kind { + SandboxSourceKind::File => sandbox_sync_down_file(&session, &sandbox_path, dest).await, + SandboxSourceKind::Directory => { + sandbox_sync_down_directory(&session, &sandbox_path, dest).await + } + } +} +/// Stream a tar archive from the sandbox and extract it into a fresh +/// destination directory. The source is always wrapped on the sandbox side so +/// the host can pick a basename when needed. +async fn stream_sandbox_tar( + session: &SshSessionConfig, + tar_cmd: String, + extract_into: &Path, +) -> Result<()> { let mut ssh = ssh_base_command(&session.proxy_command); ssh.arg("-T") .arg("-o") @@ -781,14 +930,11 @@ pub async fn sandbox_sync_down( .take() .ok_or_else(|| miette::miette!("failed to open stdout for ssh process"))?; - let local_path = local_path.to_path_buf(); + let extract_into = extract_into.to_path_buf(); tokio::task::spawn_blocking(move || -> Result<()> { - fs::create_dir_all(&local_path) - .into_diagnostic() - .wrap_err("failed to create local destination directory")?; let mut archive = tar::Archive::new(stdout); archive - .unpack(&local_path) + .unpack(&extract_into) .into_diagnostic() .wrap_err("failed to extract tar archive from sandbox")?; Ok(()) @@ -806,10 +952,115 @@ pub async fn sandbox_sync_down( "ssh tar create exited with status {status}" )); } + Ok(()) +} + +async fn sandbox_sync_down_file( + session: &SshSessionConfig, + sandbox_path: &str, + dest: &str, +) -> Result<()> { + let (parent, basename) = split_sandbox_path(sandbox_path); + let dest_exists_as_dir = fs::symlink_metadata(Path::new(dest)).is_ok_and(|m| m.is_dir()); + let final_path = resolve_file_download_target(dest, basename, dest_exists_as_dir); + + let staging_parent = final_path + .parent() + .ok_or_else(|| miette::miette!("destination '{}' has no parent directory", dest))?; + fs::create_dir_all(staging_parent) + .into_diagnostic() + .wrap_err_with(|| { + format!( + "failed to create local destination directory '{}'", + staging_parent.display() + ) + })?; + + let staging = tempfile::TempDir::new_in(staging_parent) + .into_diagnostic() + .wrap_err("failed to create download staging directory")?; + + let tar_cmd = format!( + "tar cf - -C {parent} {name}", + parent = shell_escape(parent), + name = shell_escape(basename), + ); + stream_sandbox_tar(session, tar_cmd, staging.path()).await?; + + place_downloaded_file(staging.path(), basename, &final_path).wrap_err_with(|| { + format!( + "failed to place downloaded file at '{}'", + final_path.display() + ) + })?; + Ok(()) +} + +/// Move a single file extracted by `stream_sandbox_tar` into its final +/// position on the host. +/// +/// `staging_dir` must contain a single regular-file entry named +/// `source_basename` (the wrapper produced by `tar cf - -C `). +/// The entry is renamed onto `final_path`, atomically when `staging_dir` is +/// on the same filesystem. Refuses to overwrite an existing directory at +/// `final_path` to match `cp` behaviour. +fn place_downloaded_file( + staging_dir: &Path, + source_basename: &str, + final_path: &Path, +) -> Result<()> { + let staged_file = staging_dir.join(source_basename); + let staged_meta = fs::symlink_metadata(&staged_file) + .into_diagnostic() + .wrap_err("downloaded archive did not contain the expected entry")?; + if !staged_meta.is_file() { + return Err(miette::miette!( + "downloaded entry '{source_basename}' is not a regular file" + )); + } + + if let Ok(existing) = fs::symlink_metadata(final_path) + && existing.is_dir() + { + return Err(miette::miette!( + "cannot overwrite directory '{}' with downloaded file", + final_path.display() + )); + } + fs::rename(&staged_file, final_path) + .into_diagnostic() + .wrap_err_with(|| format!("failed to rename into '{}'", final_path.display()))?; Ok(()) } +async fn sandbox_sync_down_directory( + session: &SshSessionConfig, + sandbox_path: &str, + dest: &str, +) -> Result<()> { + let dest_path = Path::new(dest); + if let Ok(existing) = fs::symlink_metadata(dest_path) + && !existing.is_dir() + { + return Err(miette::miette!( + "cannot extract directory '{sandbox_path}' over non-directory destination '{}'", + dest_path.display() + )); + } + fs::create_dir_all(dest_path) + .into_diagnostic() + .wrap_err_with(|| { + format!( + "failed to create local destination directory '{}'", + dest_path.display() + ) + })?; + + let tar_cmd = format!("tar cf - -C {path} .", path = shell_escape(sandbox_path)); + stream_sandbox_tar(session, tar_cmd, dest_path).await +} + /// Run the SSH proxy, connecting stdin/stdout to the gateway. pub async fn sandbox_ssh_proxy( gateway_url: &str, @@ -1239,6 +1490,209 @@ mod tests { assert_eq!(split_sandbox_path("/a/b/c/d.txt"), ("/a/b/c", "d.txt")); } + #[test] + fn lexical_normalise_resolves_dot_and_dotdot_segments() { + assert_eq!( + lexical_normalise_absolute_path("/sandbox/./a"), + Some("/sandbox/a".to_string()) + ); + assert_eq!( + lexical_normalise_absolute_path("/sandbox/sub/../a"), + Some("/sandbox/a".to_string()) + ); + assert_eq!( + lexical_normalise_absolute_path("/sandbox/../etc/passwd"), + Some("/etc/passwd".to_string()) + ); + assert_eq!( + lexical_normalise_absolute_path("//sandbox///foo//"), + Some("/sandbox/foo".to_string()) + ); + assert_eq!(lexical_normalise_absolute_path("/"), Some("/".to_string())); + } + + #[test] + fn lexical_normalise_refuses_relative_paths() { + assert_eq!(lexical_normalise_absolute_path(""), None); + assert_eq!(lexical_normalise_absolute_path("sandbox/a"), None); + assert_eq!(lexical_normalise_absolute_path("./a"), None); + } + + #[test] + fn validate_sandbox_source_path_accepts_workspace_paths() { + assert_eq!( + validate_sandbox_source_path("/sandbox/file.txt").unwrap(), + "/sandbox/file.txt" + ); + assert_eq!( + validate_sandbox_source_path("/sandbox/.openclaw/workspace/hello.txt").unwrap(), + "/sandbox/.openclaw/workspace/hello.txt" + ); + assert_eq!( + validate_sandbox_source_path("/sandbox").unwrap(), + "/sandbox" + ); + assert_eq!( + validate_sandbox_source_path("/sandbox/").unwrap(), + "/sandbox" + ); + assert_eq!( + validate_sandbox_source_path("/sandbox/sub/../file").unwrap(), + "/sandbox/file" + ); + } + + #[test] + fn validate_sandbox_source_path_rejects_traversal_and_escapes() { + let traversal = validate_sandbox_source_path("/etc/passwd").unwrap_err(); + assert!( + format!("{traversal}").contains("outside the sandbox workspace"), + "unexpected error: {traversal}" + ); + + let parent_escape = validate_sandbox_source_path("/sandbox/../etc/passwd").unwrap_err(); + assert!( + format!("{parent_escape}").contains("outside the sandbox workspace"), + "unexpected error: {parent_escape}" + ); + + let prefix_only = validate_sandbox_source_path("/sandboxed/secrets").unwrap_err(); + assert!( + format!("{prefix_only}").contains("outside the sandbox workspace"), + "unexpected error: {prefix_only}" + ); + + let empty = validate_sandbox_source_path("").unwrap_err(); + assert!(format!("{empty}").contains("empty")); + + let relative = validate_sandbox_source_path("sandbox/file").unwrap_err(); + assert!(format!("{relative}").contains("must be absolute")); + } + + #[test] + fn resolve_file_download_target_writes_to_dest_when_not_a_directory() { + assert_eq!( + resolve_file_download_target("/tmp/out.txt", "hello.txt", false), + PathBuf::from("/tmp/out.txt") + ); + } + + #[test] + fn resolve_file_download_target_places_inside_existing_directory() { + assert_eq!( + resolve_file_download_target("/tmp", "hello.txt", true), + PathBuf::from("/tmp/hello.txt") + ); + } + + #[test] + fn resolve_file_download_target_honors_trailing_slash() { + assert_eq!( + resolve_file_download_target("/tmp/newdir/", "hello.txt", false), + PathBuf::from("/tmp/newdir/hello.txt") + ); + } + + fn build_single_file_archive(entry_path: &str, bytes: &[u8]) -> Vec { + let mut buf = Vec::new(); + { + let mut builder = tar::Builder::new(&mut buf); + let mut header = tar::Header::new_gnu(); + header.set_path(entry_path).expect("set tar entry path"); + header.set_size(bytes.len() as u64); + header.set_mode(0o644); + header.set_entry_type(tar::EntryType::Regular); + header.set_cksum(); + builder.append(&header, bytes).expect("append tar entry"); + builder.finish().expect("finish tar archive"); + } + buf + } + + fn unpack_into(archive_bytes: &[u8], staging: &Path) { + let mut archive = tar::Archive::new(std::io::Cursor::new(archive_bytes)); + archive.unpack(staging).expect("unpack archive"); + } + + #[test] + fn place_downloaded_file_writes_regular_file_at_dest() { + let workdir = tempfile::tempdir().expect("create workdir"); + let staging = workdir.path().join("staging"); + fs::create_dir_all(&staging).expect("create staging"); + let archive = build_single_file_archive("hello.txt", b"trust me"); + unpack_into(&archive, &staging); + + let dest = workdir.path().join("out.txt"); + place_downloaded_file(&staging, "hello.txt", &dest).expect("place file"); + + let meta = fs::symlink_metadata(&dest).expect("stat dest"); + assert!(meta.is_file(), "dest must be a regular file, got {meta:?}"); + assert_eq!(fs::read(&dest).expect("read dest"), b"trust me"); + } + + #[test] + fn place_downloaded_file_refuses_to_clobber_existing_directory() { + let workdir = tempfile::tempdir().expect("create workdir"); + let staging = workdir.path().join("staging"); + fs::create_dir_all(&staging).expect("create staging"); + let archive = build_single_file_archive("hello.txt", b"trust me"); + unpack_into(&archive, &staging); + + let dest = workdir.path().join("conflict-dir"); + fs::create_dir(&dest).expect("create conflict dir"); + + let err = place_downloaded_file(&staging, "hello.txt", &dest) + .expect_err("expected directory-clobber refusal"); + assert!( + format!("{err}").contains("cannot overwrite directory"), + "unexpected error: {err}" + ); + assert!( + fs::symlink_metadata(&dest).expect("stat dest").is_dir(), + "dest should remain a directory after refusal" + ); + } + + #[test] + fn download_full_pipeline_lands_file_at_exact_dest_path() { + let workdir = tempfile::tempdir().expect("create workdir"); + let staging_parent = workdir.path(); + let archive = build_single_file_archive("hello.txt", b"trust me"); + + let dest_str = staging_parent.join("out.txt"); + let dest_str = dest_str.to_str().unwrap(); + let final_path = resolve_file_download_target(dest_str, "hello.txt", false); + assert_eq!(final_path, Path::new(dest_str)); + + let staging = tempfile::TempDir::new_in(staging_parent).expect("staging dir"); + unpack_into(&archive, staging.path()); + place_downloaded_file(staging.path(), "hello.txt", &final_path).expect("place"); + + let meta = fs::symlink_metadata(&final_path).expect("stat final"); + assert!(meta.is_file(), "expected regular file, got {meta:?}"); + assert_eq!(fs::read(&final_path).expect("read final"), b"trust me"); + } + + #[test] + fn download_full_pipeline_places_inside_existing_directory_destination() { + let workdir = tempfile::tempdir().expect("create workdir"); + let archive = build_single_file_archive("hello.txt", b"trust me"); + + let dest_dir = workdir.path().join("out-dir"); + fs::create_dir(&dest_dir).expect("create dest dir"); + let dest_str = dest_dir.to_str().unwrap(); + let final_path = resolve_file_download_target(dest_str, "hello.txt", true); + assert_eq!(final_path, dest_dir.join("hello.txt")); + + let staging = tempfile::TempDir::new_in(workdir.path()).expect("staging dir"); + unpack_into(&archive, staging.path()); + place_downloaded_file(staging.path(), "hello.txt", &final_path).expect("place"); + + let meta = fs::symlink_metadata(&final_path).expect("stat final"); + assert!(meta.is_file()); + assert_eq!(fs::read(&final_path).expect("read final"), b"trust me"); + } + #[test] fn directory_upload_prefix_uses_basename_for_named_directories() { assert_eq!( From 7b69bcc418b80348f634edca32a7483b0e1092f1 Mon Sep 17 00:00:00 2001 From: Tinson Lai Date: Wed, 13 May 2026 09:33:41 +0000 Subject: [PATCH 2/6] fix(cli): canonicalise sandbox download path remotely and quote tar member The lexical check in `validate_sandbox_source_path` clears any path whose components stay under `/sandbox` after `.` and `..` are folded out, but it cannot see symlinks. A workspace-relative symlink such as `/sandbox/etc-link -> /etc` lets `openshell sandbox download /sandbox/etc-link/passwd /tmp/passwd` slip through, then `tar -C /sandbox/etc-link passwd` follows the link on the sandbox side and streams `/etc/passwd` back to the host. Add `canonicalize_sandbox_source_path` which runs `realpath -e --` over SSH and rejects the result unless it still equals `/sandbox` or starts with `/sandbox/`. The lexical guard stays as a cheap fail-fast for obviously-bad inputs, and the rest of the download flow now uses the canonical path everywhere a tar invocation or probe might dereference a symlink. While editing the single-file branch, also add the `--` separator before the basename in `tar cf - -C -- `. Without it, a sandbox file whose basename starts with `-` (e.g. `--checkpoint-action=...`) is parsed by GNU tar as an option rather than a member, which at best fails the wrap and at worst triggers tar option behaviour. The directory branch passes literal `.` and is unaffected. Cover both fixes with pure-function tests: `is_under_sandbox_workspace` gets accept/reject pairs (the same predicate the post-realpath validator uses) and the new `build_single_file_tar_cmd` helper asserts the `--` separator and shell escaping survive any future refactor. Signed-off-by: Tinson Lai --- crates/openshell-cli/src/ssh.rs | 114 +++++++++++++++++++++++++++++--- 1 file changed, 104 insertions(+), 10 deletions(-) diff --git a/crates/openshell-cli/src/ssh.rs b/crates/openshell-cli/src/ssh.rs index a0776113c..aef53e825 100644 --- a/crates/openshell-cli/src/ssh.rs +++ b/crates/openshell-cli/src/ssh.rs @@ -648,17 +648,20 @@ fn lexical_normalise_absolute_path(path: &str) -> Option { /// resolves under the sandbox writable root. /// /// Returns the normalised, traversal-resolved path on success. Refuses any -/// path that escapes `/sandbox` (e.g. `/etc/passwd`, `/sandbox/../etc/passwd`) -/// with a user-facing error. +/// path that lexically escapes `/sandbox` (e.g. `/etc/passwd`, +/// `/sandbox/../etc/passwd`) with a user-facing error. +/// +/// This is a lexical guard only — it does not follow symlinks. Call +/// `canonicalize_sandbox_source_path` after this on any path that will be +/// passed to a subsequent SSH I/O operation, so a symlink such as +/// `/sandbox/etc-link -> /etc` cannot leak files outside the workspace. fn validate_sandbox_source_path(path: &str) -> Result { if path.is_empty() { return Err(miette::miette!("sandbox source path is empty")); } let normalised = lexical_normalise_absolute_path(path) .ok_or_else(|| miette::miette!("sandbox source path must be absolute (got '{path}')"))?; - let inside_workspace = normalised == SANDBOX_WORKSPACE_ROOT - || normalised.starts_with(&format!("{SANDBOX_WORKSPACE_ROOT}/")); - if !inside_workspace { + if !is_under_sandbox_workspace(&normalised) { return Err(miette::miette!( "sandbox source path '{path}' is outside the sandbox workspace ({SANDBOX_WORKSPACE_ROOT})" )); @@ -666,6 +669,41 @@ fn validate_sandbox_source_path(path: &str) -> Result { Ok(normalised) } +/// Pure helper: is `path` equal to `/sandbox` or a descendant of it? +fn is_under_sandbox_workspace(path: &str) -> bool { + path == SANDBOX_WORKSPACE_ROOT || path.starts_with(&format!("{SANDBOX_WORKSPACE_ROOT}/")) +} + +/// Resolve every symlink in `sandbox_path` on the sandbox side and refuse the +/// result if it lands outside `/sandbox`. +/// +/// The lexical guard in `validate_sandbox_source_path` cannot see symlinks; a +/// path such as `/sandbox/etc-link/passwd` (where `etc-link -> /etc`) clears +/// the lexical check but would still leak `/etc/passwd` once `tar -C` follows +/// the link. Canonicalising on the remote side and re-validating closes that +/// gap. The returned canonical path is what the caller should hand to probe +/// and tar invocations. +async fn canonicalize_sandbox_source_path( + session: &SshSessionConfig, + sandbox_path: &str, +) -> Result { + let canonical_cmd = format!("realpath -e -- {path}", path = shell_escape(sandbox_path)); + let canonical = ssh_run_capture_stdout(session, &canonical_cmd) + .await + .wrap_err_with(|| format!("failed to canonicalise sandbox source path '{sandbox_path}'"))?; + if canonical.is_empty() { + return Err(miette::miette!( + "sandbox source path '{sandbox_path}' does not exist" + )); + } + if !is_under_sandbox_workspace(&canonical) { + return Err(miette::miette!( + "sandbox source path '{sandbox_path}' resolves to '{canonical}', outside the sandbox workspace ({SANDBOX_WORKSPACE_ROOT})" + )); + } + Ok(canonical) +} + /// Resolve the host-side target path for a downloaded *file*, following /// `cp`-style semantics. /// @@ -896,6 +934,7 @@ pub async fn sandbox_sync_down( ) -> Result<()> { let sandbox_path = validate_sandbox_source_path(sandbox_path)?; let session = ssh_session_config(server, name, tls).await?; + let sandbox_path = canonicalize_sandbox_source_path(&session, &sandbox_path).await?; let kind = probe_sandbox_source_kind(&session, &sandbox_path).await?; match kind { @@ -955,6 +994,20 @@ async fn stream_sandbox_tar( Ok(()) } +/// Build the `tar cf - -C -- ` command used to wrap a +/// single sandbox-side file for download. +/// +/// The trailing `--` is required: a sandbox-side file whose basename starts +/// with `-` (e.g. `--checkpoint-action=...`) would otherwise be parsed by GNU +/// tar as an option rather than a member to archive. +fn build_single_file_tar_cmd(parent: &str, basename: &str) -> String { + format!( + "tar cf - -C {parent} -- {name}", + parent = shell_escape(parent), + name = shell_escape(basename), + ) +} + async fn sandbox_sync_down_file( session: &SshSessionConfig, sandbox_path: &str, @@ -980,11 +1033,7 @@ async fn sandbox_sync_down_file( .into_diagnostic() .wrap_err("failed to create download staging directory")?; - let tar_cmd = format!( - "tar cf - -C {parent} {name}", - parent = shell_escape(parent), - name = shell_escape(basename), - ); + let tar_cmd = build_single_file_tar_cmd(parent, basename); stream_sandbox_tar(session, tar_cmd, staging.path()).await?; place_downloaded_file(staging.path(), basename, &final_path).wrap_err_with(|| { @@ -1569,6 +1618,51 @@ mod tests { assert!(format!("{relative}").contains("must be absolute")); } + #[test] + fn is_under_sandbox_workspace_accepts_root_and_descendants() { + assert!(is_under_sandbox_workspace("/sandbox")); + assert!(is_under_sandbox_workspace("/sandbox/file")); + assert!(is_under_sandbox_workspace("/sandbox/sub/nested")); + } + + #[test] + fn is_under_sandbox_workspace_rejects_outside_paths_and_prefix_collisions() { + assert!(!is_under_sandbox_workspace("/etc/passwd")); + assert!(!is_under_sandbox_workspace("/sandboxed/secrets")); + assert!(!is_under_sandbox_workspace("/")); + assert!(!is_under_sandbox_workspace("")); + } + + #[test] + fn build_single_file_tar_cmd_inserts_double_dash_before_basename() { + // Without `--`, a basename such as `--checkpoint-action=...` would be + // parsed by GNU tar as an option. Guard the wire format against this + // regression. + let cmd = build_single_file_tar_cmd("/sandbox", "--checkpoint-action=exec=id"); + assert!( + cmd.contains(" -- "), + "expected `--` separator in tar command, got: {cmd}" + ); + assert!( + cmd.ends_with(&shell_escape("--checkpoint-action=exec=id")), + "expected basename at end of tar command, got: {cmd}" + ); + } + + #[test] + fn build_single_file_tar_cmd_escapes_parent_and_basename() { + let cmd = build_single_file_tar_cmd("/sandbox/with space", "name with space"); + assert!(cmd.contains(" -- "), "missing `--` separator: {cmd}"); + assert!( + cmd.contains(&shell_escape("/sandbox/with space")), + "parent not shell-escaped: {cmd}" + ); + assert!( + cmd.contains(&shell_escape("name with space")), + "basename not shell-escaped: {cmd}" + ); + } + #[test] fn resolve_file_download_target_writes_to_dest_when_not_a_directory() { assert_eq!( From 39979e6310a5857d08729b41f630d490b3d0bc25 Mon Sep 17 00:00:00 2001 From: Tinson Lai Date: Wed, 13 May 2026 10:34:16 +0000 Subject: [PATCH 3/6] chore(cli): drop redundant tempfile dev-dependency entry MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit `tempfile` was promoted from `[dev-dependencies]` to `[dependencies]` once the cp-style single-file download path started using `TempDir::new_in` from production code. The original `[dev-dependencies]` line stayed behind by accident — entries under `[dependencies]` are already available to test compilation, so the second declaration is dead weight. Signed-off-by: Tinson Lai --- crates/openshell-cli/Cargo.toml | 1 - crates/openshell-cli/src/ssh.rs | 60 ++++++++++++++++----------------- 2 files changed, 30 insertions(+), 31 deletions(-) diff --git a/crates/openshell-cli/Cargo.toml b/crates/openshell-cli/Cargo.toml index d98c6d788..7dc0c0f22 100644 --- a/crates/openshell-cli/Cargo.toml +++ b/crates/openshell-cli/Cargo.toml @@ -92,6 +92,5 @@ rcgen = { version = "0.13", features = ["crypto", "pem"] } reqwest = { workspace = true } serde_json = { workspace = true } temp-env = "0.3" -tempfile = "3" tokio-stream = { workspace = true } url = { workspace = true } diff --git a/crates/openshell-cli/src/ssh.rs b/crates/openshell-cli/src/ssh.rs index aef53e825..5db4c74d6 100644 --- a/crates/openshell-cli/src/ssh.rs +++ b/crates/openshell-cli/src/ssh.rs @@ -609,7 +609,7 @@ fn split_sandbox_path(path: &str) -> (&str, &str) { /// checks on sandbox-side source paths in download flows. const SANDBOX_WORKSPACE_ROOT: &str = "/sandbox"; -/// Lexically normalise a POSIX-style absolute path by resolving `.` and `..` +/// Lexically clean a POSIX-style absolute path by resolving `.` and `..` /// components, collapsing repeated separators, and stripping any trailing /// slash. Returns `None` if the input is empty or relative — the caller is /// expected to reject those before reaching this helper. @@ -619,7 +619,7 @@ const SANDBOX_WORKSPACE_ROOT: &str = "/sandbox"; /// client-side to refuse obvious path-traversal attempts before issuing the /// SSH command. Symlink-based escapes inside the sandbox must be addressed /// server-side. -fn lexical_normalise_absolute_path(path: &str) -> Option { +fn lexical_clean_absolute_path(path: &str) -> Option { if !path.starts_with('/') { return None; } @@ -647,26 +647,26 @@ fn lexical_normalise_absolute_path(path: &str) -> Option { /// Validate that a sandbox-side source path passed to `sandbox download` /// resolves under the sandbox writable root. /// -/// Returns the normalised, traversal-resolved path on success. Refuses any +/// Returns the cleaned, traversal-resolved path on success. Refuses any /// path that lexically escapes `/sandbox` (e.g. `/etc/passwd`, /// `/sandbox/../etc/passwd`) with a user-facing error. /// /// This is a lexical guard only — it does not follow symlinks. Call -/// `canonicalize_sandbox_source_path` after this on any path that will be -/// passed to a subsequent SSH I/O operation, so a symlink such as +/// `resolve_sandbox_source_path` after this on any path that will be passed +/// to a subsequent SSH I/O operation, so a symlink such as /// `/sandbox/etc-link -> /etc` cannot leak files outside the workspace. fn validate_sandbox_source_path(path: &str) -> Result { if path.is_empty() { return Err(miette::miette!("sandbox source path is empty")); } - let normalised = lexical_normalise_absolute_path(path) + let cleaned = lexical_clean_absolute_path(path) .ok_or_else(|| miette::miette!("sandbox source path must be absolute (got '{path}')"))?; - if !is_under_sandbox_workspace(&normalised) { + if !is_under_sandbox_workspace(&cleaned) { return Err(miette::miette!( "sandbox source path '{path}' is outside the sandbox workspace ({SANDBOX_WORKSPACE_ROOT})" )); } - Ok(normalised) + Ok(cleaned) } /// Pure helper: is `path` equal to `/sandbox` or a descendant of it? @@ -680,28 +680,28 @@ fn is_under_sandbox_workspace(path: &str) -> bool { /// The lexical guard in `validate_sandbox_source_path` cannot see symlinks; a /// path such as `/sandbox/etc-link/passwd` (where `etc-link -> /etc`) clears /// the lexical check but would still leak `/etc/passwd` once `tar -C` follows -/// the link. Canonicalising on the remote side and re-validating closes that -/// gap. The returned canonical path is what the caller should hand to probe -/// and tar invocations. -async fn canonicalize_sandbox_source_path( +/// the link. Resolving symlinks on the remote side and re-validating closes +/// that gap. The returned fully-resolved path is what the caller should hand +/// to probe and tar invocations. +async fn resolve_sandbox_source_path( session: &SshSessionConfig, sandbox_path: &str, ) -> Result { - let canonical_cmd = format!("realpath -e -- {path}", path = shell_escape(sandbox_path)); - let canonical = ssh_run_capture_stdout(session, &canonical_cmd) + let resolve_cmd = format!("realpath -e -- {path}", path = shell_escape(sandbox_path)); + let resolved = ssh_run_capture_stdout(session, &resolve_cmd) .await - .wrap_err_with(|| format!("failed to canonicalise sandbox source path '{sandbox_path}'"))?; - if canonical.is_empty() { + .wrap_err_with(|| format!("failed to resolve sandbox source path '{sandbox_path}'"))?; + if resolved.is_empty() { return Err(miette::miette!( "sandbox source path '{sandbox_path}' does not exist" )); } - if !is_under_sandbox_workspace(&canonical) { + if !is_under_sandbox_workspace(&resolved) { return Err(miette::miette!( - "sandbox source path '{sandbox_path}' resolves to '{canonical}', outside the sandbox workspace ({SANDBOX_WORKSPACE_ROOT})" + "sandbox source path '{sandbox_path}' resolves to '{resolved}', outside the sandbox workspace ({SANDBOX_WORKSPACE_ROOT})" )); } - Ok(canonical) + Ok(resolved) } /// Resolve the host-side target path for a downloaded *file*, following @@ -934,7 +934,7 @@ pub async fn sandbox_sync_down( ) -> Result<()> { let sandbox_path = validate_sandbox_source_path(sandbox_path)?; let session = ssh_session_config(server, name, tls).await?; - let sandbox_path = canonicalize_sandbox_source_path(&session, &sandbox_path).await?; + let sandbox_path = resolve_sandbox_source_path(&session, &sandbox_path).await?; let kind = probe_sandbox_source_kind(&session, &sandbox_path).await?; match kind { @@ -1540,31 +1540,31 @@ mod tests { } #[test] - fn lexical_normalise_resolves_dot_and_dotdot_segments() { + fn lexical_clean_resolves_dot_and_dotdot_segments() { assert_eq!( - lexical_normalise_absolute_path("/sandbox/./a"), + lexical_clean_absolute_path("/sandbox/./a"), Some("/sandbox/a".to_string()) ); assert_eq!( - lexical_normalise_absolute_path("/sandbox/sub/../a"), + lexical_clean_absolute_path("/sandbox/sub/../a"), Some("/sandbox/a".to_string()) ); assert_eq!( - lexical_normalise_absolute_path("/sandbox/../etc/passwd"), + lexical_clean_absolute_path("/sandbox/../etc/passwd"), Some("/etc/passwd".to_string()) ); assert_eq!( - lexical_normalise_absolute_path("//sandbox///foo//"), + lexical_clean_absolute_path("//sandbox///foo//"), Some("/sandbox/foo".to_string()) ); - assert_eq!(lexical_normalise_absolute_path("/"), Some("/".to_string())); + assert_eq!(lexical_clean_absolute_path("/"), Some("/".to_string())); } #[test] - fn lexical_normalise_refuses_relative_paths() { - assert_eq!(lexical_normalise_absolute_path(""), None); - assert_eq!(lexical_normalise_absolute_path("sandbox/a"), None); - assert_eq!(lexical_normalise_absolute_path("./a"), None); + fn lexical_clean_refuses_relative_paths() { + assert_eq!(lexical_clean_absolute_path(""), None); + assert_eq!(lexical_clean_absolute_path("sandbox/a"), None); + assert_eq!(lexical_clean_absolute_path("./a"), None); } #[test] From e15848d24341dff4543ee094243f09ef6e35716c Mon Sep 17 00:00:00 2001 From: Tinson Lai Date: Wed, 13 May 2026 11:33:58 +0000 Subject: [PATCH 4/6] test(cli): add E2E coverage for sandbox download paths MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Adds four end-to-end tests for `openshell sandbox download` so the happy path and both Codex regressions have explicit coverage: * `sandbox_download_file_only` — seeds a single file via the new `SandboxGuard::exec` harness helper and pulls it back, exercising `sandbox_sync_down_file` without an upload phase. * `sandbox_download_directory_only` — seeds a small directory tree and pulls it recursively, exercising `sandbox_sync_down_directory`. * `sandbox_download_rejects_symlinks_pointing_outside_workspace` — regression for the high-severity finding. Plants three escape shapes (`/sandbox/etc-link -> /etc`, `/sandbox/passwd-link -> /etc/passwd`, and a path with a symlinked component `/sandbox/etc-link/passwd`) and asserts all three are refused with a workspace-boundary error before any tar streams. Also checks the host destination stays empty so a silent leak would surface. * `sandbox_download_handles_dash_leading_basename` — regression for the medium-severity finding. Seeds a file named `--checkpoint-action=evil` and verifies it downloads with byte-for-byte contents, proving the `--` separator stops tar parsing the basename as an option. The harness gains a single new method, `SandboxGuard::exec`, that wraps `openshell sandbox exec --name --no-tty -- `. Existing tests preferred the upload path to populate sandbox state, but the regression cases need to plant filesystem state the upload flow cannot produce (symlinks, files whose basenames clap would otherwise reject). Signed-off-by: Tinson Lai --- e2e/rust/src/harness/sandbox.rs | 46 ++++++++ e2e/rust/tests/sync.rs | 185 ++++++++++++++++++++++++++++++++ 2 files changed, 231 insertions(+) diff --git a/e2e/rust/src/harness/sandbox.rs b/e2e/rust/src/harness/sandbox.rs index 278c16f53..cdb461494 100644 --- a/e2e/rust/src/harness/sandbox.rs +++ b/e2e/rust/src/harness/sandbox.rs @@ -349,6 +349,52 @@ impl SandboxGuard { Ok(combined) } + /// Run a one-shot command inside the sandbox via `openshell sandbox exec`. + /// + /// Used by tests that need to pre-populate sandbox-side state (create + /// files, symlinks, directories) without going through the upload flow. + /// Stdout and stderr are captured and returned together; PTY allocation + /// is disabled so the call is suitable for non-interactive setups. + /// + /// # Arguments + /// + /// * `argv` — Command and arguments to execute (passed after `--`). + /// + /// # Errors + /// + /// Returns an error if the CLI exits non-zero. + pub async fn exec(&self, argv: &[&str]) -> Result { + let mut cmd = openshell_cmd(); + cmd.arg("sandbox") + .arg("exec") + .arg("--name") + .arg(&self.name) + .arg("--no-tty") + .arg("--"); + for arg in argv { + cmd.arg(arg); + } + cmd.stdout(Stdio::piped()).stderr(Stdio::piped()); + + let output = cmd + .output() + .await + .map_err(|e| format!("failed to spawn openshell exec: {e}"))?; + + let stdout = String::from_utf8_lossy(&output.stdout).to_string(); + let stderr = String::from_utf8_lossy(&output.stderr).to_string(); + let combined = format!("{stdout}{stderr}"); + + if !output.status.success() { + return Err(format!( + "sandbox exec failed (exit {:?}):\n{combined}", + output.status.code() + )); + } + + Ok(combined) + } + /// Download files from the sandbox via `openshell sandbox download`. /// /// # Arguments diff --git a/e2e/rust/tests/sync.rs b/e2e/rust/tests/sync.rs index 60abb42b4..56a7af020 100644 --- a/e2e/rust/tests/sync.rs +++ b/e2e/rust/tests/sync.rs @@ -352,3 +352,188 @@ async fn upload_single_file_from_git_repo_only_uploads_that_file() { guard.cleanup().await; } + +/// Pre-populate a single file on the sandbox side via `sandbox exec` and pull +/// it back with `sandbox download`. Exercises `sandbox_sync_down_file` in +/// isolation (no upload phase). +#[tokio::test] +async fn sandbox_download_file_only() { + let mut guard = + SandboxGuard::create_keep(&["sh", "-c", "echo Ready && sleep infinity"], "Ready") + .await + .expect("sandbox create --keep"); + + guard + .exec(&["sh", "-c", "printf greeting-payload > /sandbox/greeting.txt"]) + .await + .expect("seed greeting.txt inside the sandbox"); + + let tmpdir = tempfile::tempdir().expect("create tmpdir"); + let dest = tmpdir.path().join("out"); + fs::create_dir_all(&dest).expect("create download dir"); + let dest_str = dest.to_str().expect("dest path is UTF-8"); + + guard + .download("/sandbox/greeting.txt", dest_str) + .await + .expect("download greeting.txt"); + + let actual = fs::read_to_string(dest.join("greeting.txt")).expect("read greeting.txt"); + assert_eq!( + actual, "greeting-payload", + "downloaded file content mismatch" + ); + + guard.cleanup().await; +} + +/// Pre-populate a small directory tree on the sandbox side via `sandbox exec` +/// and pull it recursively with `sandbox download`. Exercises +/// `sandbox_sync_down_directory` in isolation (no upload phase). +#[tokio::test] +async fn sandbox_download_directory_only() { + let mut guard = + SandboxGuard::create_keep(&["sh", "-c", "echo Ready && sleep infinity"], "Ready") + .await + .expect("sandbox create --keep"); + + guard + .exec(&[ + "sh", + "-c", + "mkdir -p /sandbox/tree/sub \ + && printf top-level > /sandbox/tree/root.txt \ + && printf nested > /sandbox/tree/sub/child.txt", + ]) + .await + .expect("seed directory tree inside the sandbox"); + + let tmpdir = tempfile::tempdir().expect("create tmpdir"); + let dest = tmpdir.path().join("out"); + fs::create_dir_all(&dest).expect("create download dir"); + let dest_str = dest.to_str().expect("dest path is UTF-8"); + + guard + .download("/sandbox/tree", dest_str) + .await + .expect("download directory tree"); + + let root = fs::read_to_string(dest.join("root.txt")).expect("read root.txt"); + assert_eq!(root, "top-level", "root.txt content mismatch"); + let child = fs::read_to_string(dest.join("sub/child.txt")).expect("read sub/child.txt"); + assert_eq!(child, "nested", "sub/child.txt content mismatch"); + + guard.cleanup().await; +} + +/// Regression for the Codex high-severity finding: `validate_sandbox_source_path` +/// is purely lexical, so a sandbox-side symlink that escapes `/sandbox` could +/// slip through and let `tar -C` follow the link into the host filesystem. +/// `resolve_sandbox_source_path` re-validates after `realpath -e` resolves +/// every component. Cover both shapes from the Codex example: the symlink as +/// the full source (directory case) and the symlink as a component of the +/// source path (file case). +#[tokio::test] +async fn sandbox_download_rejects_symlinks_pointing_outside_workspace() { + let mut guard = + SandboxGuard::create_keep(&["sh", "-c", "echo Ready && sleep infinity"], "Ready") + .await + .expect("sandbox create --keep"); + + guard + .exec(&[ + "sh", + "-c", + "ln -s /etc /sandbox/etc-link && ln -s /etc/passwd /sandbox/passwd-link", + ]) + .await + .expect("plant escape symlinks inside the sandbox"); + + let tmpdir = tempfile::tempdir().expect("create tmpdir"); + let dest = tmpdir.path().join("out"); + fs::create_dir_all(&dest).expect("create download dir"); + let dest_str = dest.to_str().expect("dest path is UTF-8"); + + // Directory-source symlink: /sandbox/etc-link -> /etc + let err = guard + .download("/sandbox/etc-link", dest_str) + .await + .expect_err("download of a directory symlink to /etc must be refused"); + assert!( + err.contains("outside the sandbox workspace"), + "expected workspace-boundary error for directory symlink, got: {err}" + ); + + // File-source symlink: /sandbox/passwd-link -> /etc/passwd + let err = guard + .download("/sandbox/passwd-link", dest_str) + .await + .expect_err("download of a file symlink to /etc/passwd must be refused"); + assert!( + err.contains("outside the sandbox workspace"), + "expected workspace-boundary error for file symlink, got: {err}" + ); + + // Path with a symlinked component: /sandbox/etc-link/passwd + let err = guard + .download("/sandbox/etc-link/passwd", dest_str) + .await + .expect_err("download via a symlinked path component must be refused"); + assert!( + err.contains("outside the sandbox workspace"), + "expected workspace-boundary error for symlinked component, got: {err}" + ); + + // Sanity check: the host destination should still be empty — no file from + // /etc should have leaked through on any of the three attempts. + assert!( + !dest.join("passwd").exists() && !dest.join("etc-link").exists(), + "no file should have been written when the source resolves outside the workspace" + ); + + guard.cleanup().await; +} + +/// Regression for the Codex medium-severity finding: the single-file tar +/// invocation must wrap the basename behind a `--` separator, otherwise a +/// sandbox-side filename whose basename starts with `--` (e.g. created by a +/// malicious agent inside the sandbox) is parsed by GNU tar as an option +/// rather than a member to archive. +#[tokio::test] +async fn sandbox_download_handles_dash_leading_basename() { + let mut guard = + SandboxGuard::create_keep(&["sh", "-c", "echo Ready && sleep infinity"], "Ready") + .await + .expect("sandbox create --keep"); + + // Quoting the redirect target keeps the leading dashes intact across the + // shell parse; the sandbox-side filesystem ends up with a basename that + // would be parsed as an option by an unprotected tar invocation. + guard + .exec(&[ + "sh", + "-c", + "printf dash-payload > '/sandbox/--checkpoint-action=evil'", + ]) + .await + .expect("seed dash-leading file inside the sandbox"); + + let tmpdir = tempfile::tempdir().expect("create tmpdir"); + let dest = tmpdir.path().join("out"); + fs::create_dir_all(&dest).expect("create download dir"); + let dest_str = dest.to_str().expect("dest path is UTF-8"); + + guard + .download("/sandbox/--checkpoint-action=evil", dest_str) + .await + .expect("download dash-leading basename"); + + let actual = + fs::read_to_string(dest.join("--checkpoint-action=evil")).expect("read dash-leading file"); + assert_eq!( + actual, "dash-payload", + "dash-leading file content mismatch — tar may have parsed the basename as an option" + ); + + guard.cleanup().await; +} From 5633febefeec6c538aa4c884e3820a455154ad3b Mon Sep 17 00:00:00 2001 From: Tinson Lai Date: Wed, 13 May 2026 11:36:08 +0000 Subject: [PATCH 5/6] docs(cli): note cp-style destination and workspace boundary on download `openshell sandbox download` now refuses any source path that resolves outside `/sandbox` (lexically or through a symlink), and the single-file branch follows `cp`-style placement rules. Document both so users hitting the workspace-boundary error or the "land at exact path vs land inside dir" decision can self-serve. Signed-off-by: Tinson Lai --- docs/sandboxes/manage-sandboxes.mdx | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/docs/sandboxes/manage-sandboxes.mdx b/docs/sandboxes/manage-sandboxes.mdx index b43a6846a..a1f69bb6a 100644 --- a/docs/sandboxes/manage-sandboxes.mdx +++ b/docs/sandboxes/manage-sandboxes.mdx @@ -235,6 +235,10 @@ Download files from the sandbox to your host: openshell sandbox download my-sandbox /sandbox/output ./local ``` +When the sandbox-side source is a single file, the destination follows `cp`-style placement: if the destination already exists as a directory or ends with `/`, the file lands inside it as `/`; otherwise the file is written at the exact destination path. + +The CLI only allows sandbox-side sources that resolve inside the writable workspace (`/sandbox`). Paths that escape lexically (`/etc/passwd`, `/sandbox/../etc/passwd`) and paths that escape through a symlink (`/sandbox/etc-link` pointing at `/etc`) are both refused before any data is transferred. + You can also upload files at creation time with the `--upload` flag on `openshell sandbox create`. From 1f636f823c219630bed551950907538b428d6df2 Mon Sep 17 00:00:00 2001 From: Tinson Lai Date: Wed, 13 May 2026 12:19:28 +0000 Subject: [PATCH 6/6] test(cli): tolerate miette line-wrapping in symlink-rejection assertions MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit `sandbox_download_rejects_symlinks_pointing_outside_workspace` matched the substring `"outside the sandbox workspace"` against the CLI's error output. The check is correct in spirit but fragile in practice: miette wraps long single-line errors across multiple lines at terminal width and inserts a `│ ` continuation marker, so the substring breaks across the wrap and the test fails even though the canonicalisation step correctly refused the source. Split the match into three short markers — `resolves to`, `outside the`, `sandbox workspace` — each guaranteed to fit on one wrapped line. The combined check is stricter than the original (it now requires evidence that the remote `realpath -e` step actually ran, not just the boundary refusal) without inheriting the wrap fragility. Signed-off-by: Tinson Lai --- e2e/rust/tests/sync.rs | 30 ++++++++++++++++++------------ 1 file changed, 18 insertions(+), 12 deletions(-) diff --git a/e2e/rust/tests/sync.rs b/e2e/rust/tests/sync.rs index 56a7af020..a42447865 100644 --- a/e2e/rust/tests/sync.rs +++ b/e2e/rust/tests/sync.rs @@ -426,6 +426,21 @@ async fn sandbox_download_directory_only() { guard.cleanup().await; } +/// Assert that an error string carries the two markers of a remote-side +/// canonicalisation refusal: `resolves to` (proving `realpath -e` ran) and +/// `outside the` + `sandbox workspace` (proving the boundary check fired). +/// +/// miette renders the long single-line error wrapped across multiple lines +/// at terminal width and inserts a `│ ` continuation marker, so a single +/// long substring like `"outside the sandbox workspace"` is fragile — +/// match the short markers individually instead. +fn assert_resolves_outside_workspace(err: &str, label: &str) { + assert!( + err.contains("resolves to") && err.contains("outside the") && err.contains("sandbox workspace"), + "expected resolves-outside-workspace error for {label}, got: {err}" + ); +} + /// Regression for the Codex high-severity finding: `validate_sandbox_source_path` /// is purely lexical, so a sandbox-side symlink that escapes `/sandbox` could /// slip through and let `tar -C` follow the link into the host filesystem. @@ -459,30 +474,21 @@ async fn sandbox_download_rejects_symlinks_pointing_outside_workspace() { .download("/sandbox/etc-link", dest_str) .await .expect_err("download of a directory symlink to /etc must be refused"); - assert!( - err.contains("outside the sandbox workspace"), - "expected workspace-boundary error for directory symlink, got: {err}" - ); + assert_resolves_outside_workspace(&err, "directory symlink"); // File-source symlink: /sandbox/passwd-link -> /etc/passwd let err = guard .download("/sandbox/passwd-link", dest_str) .await .expect_err("download of a file symlink to /etc/passwd must be refused"); - assert!( - err.contains("outside the sandbox workspace"), - "expected workspace-boundary error for file symlink, got: {err}" - ); + assert_resolves_outside_workspace(&err, "file symlink"); // Path with a symlinked component: /sandbox/etc-link/passwd let err = guard .download("/sandbox/etc-link/passwd", dest_str) .await .expect_err("download via a symlinked path component must be refused"); - assert!( - err.contains("outside the sandbox workspace"), - "expected workspace-boundary error for symlinked component, got: {err}" - ); + assert_resolves_outside_workspace(&err, "symlinked component"); // Sanity check: the host destination should still be empty — no file from // /etc should have leaked through on any of the three attempts.