diff --git a/crates/openshell-cli/Cargo.toml b/crates/openshell-cli/Cargo.toml index 21068ad99..7dc0c0f22 100644 --- a/crates/openshell-cli/Cargo.toml +++ b/crates/openshell-cli/Cargo.toml @@ -58,6 +58,7 @@ anyhow = { workspace = true } # File archiving (tar-over-SSH sync) tar = "0.4" +tempfile = "3" # OIDC/Auth oauth2 = "5" @@ -91,6 +92,5 @@ rcgen = { version = "0.13", features = ["crypto", "pem"] } reqwest = { workspace = true } serde_json = { workspace = true } temp-env = "0.3" -tempfile = "3" tokio-stream = { workspace = true } url = { workspace = true } diff --git a/crates/openshell-cli/src/main.rs b/crates/openshell-cli/src/main.rs index e370d1f27..19c9e95ca 100644 --- a/crates/openshell-cli/src/main.rs +++ b/crates/openshell-cli/src/main.rs @@ -2374,12 +2374,8 @@ async fn main() -> Result<()> { let ctx = resolve_gateway(&cli.gateway, &cli.gateway_endpoint)?; let mut tls = tls.with_gateway_name(&ctx.name); apply_auth(&mut tls, &ctx.name); - let local_dest = std::path::Path::new(dest.as_deref().unwrap_or(".")); - eprintln!( - "Downloading sandbox:{} -> {}", - sandbox_path, - local_dest.display() - ); + let local_dest = dest.as_deref().unwrap_or("."); + eprintln!("Downloading sandbox:{sandbox_path} -> {local_dest}"); run::sandbox_sync_down(&ctx.endpoint, &name, &sandbox_path, local_dest, &tls) .await?; eprintln!("{} Download complete", "✓".green().bold()); diff --git a/crates/openshell-cli/src/run.rs b/crates/openshell-cli/src/run.rs index 2797bd66c..de9c40879 100644 --- a/crates/openshell-cli/src/run.rs +++ b/crates/openshell-cli/src/run.rs @@ -2200,12 +2200,8 @@ pub async fn sandbox_sync_command( eprintln!("{} Sync complete", "✓".green().bold()); } (None, Some(sandbox_path)) => { - let local_dest = Path::new(dest.unwrap_or(".")); - eprintln!( - "Syncing sandbox:{} -> {}", - sandbox_path, - local_dest.display() - ); + let local_dest = dest.unwrap_or("."); + eprintln!("Syncing sandbox:{sandbox_path} -> {local_dest}"); sandbox_sync_down(server, name, sandbox_path, local_dest, tls).await?; eprintln!("{} Sync complete", "✓".green().bold()); } diff --git a/crates/openshell-cli/src/ssh.rs b/crates/openshell-cli/src/ssh.rs index e99e6ee15..5db4c74d6 100644 --- a/crates/openshell-cli/src/ssh.rs +++ b/crates/openshell-cli/src/ssh.rs @@ -605,6 +605,129 @@ fn split_sandbox_path(path: &str) -> (&str, &str) { } } +/// Writable root inside every sandbox. Used as the boundary for path-traversal +/// checks on sandbox-side source paths in download flows. +const SANDBOX_WORKSPACE_ROOT: &str = "/sandbox"; + +/// Lexically clean a POSIX-style absolute path by resolving `.` and `..` +/// components, collapsing repeated separators, and stripping any trailing +/// slash. Returns `None` if the input is empty or relative — the caller is +/// expected to reject those before reaching this helper. +/// +/// This is *lexical* only: it does not consult the filesystem and so cannot +/// follow symlinks. That trade-off is intentional — the function is used +/// client-side to refuse obvious path-traversal attempts before issuing the +/// SSH command. Symlink-based escapes inside the sandbox must be addressed +/// server-side. +fn lexical_clean_absolute_path(path: &str) -> Option { + if !path.starts_with('/') { + return None; + } + let mut stack: Vec<&str> = Vec::new(); + for component in path.split('/') { + match component { + "" | "." => {} + ".." => { + stack.pop(); + } + other => stack.push(other), + } + } + if stack.is_empty() { + return Some("/".to_string()); + } + let mut out = String::with_capacity(path.len()); + for component in stack { + out.push('/'); + out.push_str(component); + } + Some(out) +} + +/// Validate that a sandbox-side source path passed to `sandbox download` +/// resolves under the sandbox writable root. +/// +/// Returns the cleaned, traversal-resolved path on success. Refuses any +/// path that lexically escapes `/sandbox` (e.g. `/etc/passwd`, +/// `/sandbox/../etc/passwd`) with a user-facing error. +/// +/// This is a lexical guard only — it does not follow symlinks. Call +/// `resolve_sandbox_source_path` after this on any path that will be passed +/// to a subsequent SSH I/O operation, so a symlink such as +/// `/sandbox/etc-link -> /etc` cannot leak files outside the workspace. +fn validate_sandbox_source_path(path: &str) -> Result { + if path.is_empty() { + return Err(miette::miette!("sandbox source path is empty")); + } + let cleaned = lexical_clean_absolute_path(path) + .ok_or_else(|| miette::miette!("sandbox source path must be absolute (got '{path}')"))?; + if !is_under_sandbox_workspace(&cleaned) { + return Err(miette::miette!( + "sandbox source path '{path}' is outside the sandbox workspace ({SANDBOX_WORKSPACE_ROOT})" + )); + } + Ok(cleaned) +} + +/// Pure helper: is `path` equal to `/sandbox` or a descendant of it? +fn is_under_sandbox_workspace(path: &str) -> bool { + path == SANDBOX_WORKSPACE_ROOT || path.starts_with(&format!("{SANDBOX_WORKSPACE_ROOT}/")) +} + +/// Resolve every symlink in `sandbox_path` on the sandbox side and refuse the +/// result if it lands outside `/sandbox`. +/// +/// The lexical guard in `validate_sandbox_source_path` cannot see symlinks; a +/// path such as `/sandbox/etc-link/passwd` (where `etc-link -> /etc`) clears +/// the lexical check but would still leak `/etc/passwd` once `tar -C` follows +/// the link. Resolving symlinks on the remote side and re-validating closes +/// that gap. The returned fully-resolved path is what the caller should hand +/// to probe and tar invocations. +async fn resolve_sandbox_source_path( + session: &SshSessionConfig, + sandbox_path: &str, +) -> Result { + let resolve_cmd = format!("realpath -e -- {path}", path = shell_escape(sandbox_path)); + let resolved = ssh_run_capture_stdout(session, &resolve_cmd) + .await + .wrap_err_with(|| format!("failed to resolve sandbox source path '{sandbox_path}'"))?; + if resolved.is_empty() { + return Err(miette::miette!( + "sandbox source path '{sandbox_path}' does not exist" + )); + } + if !is_under_sandbox_workspace(&resolved) { + return Err(miette::miette!( + "sandbox source path '{sandbox_path}' resolves to '{resolved}', outside the sandbox workspace ({SANDBOX_WORKSPACE_ROOT})" + )); + } + Ok(resolved) +} + +/// Resolve the host-side target path for a downloaded *file*, following +/// `cp`-style semantics. +/// +/// - If `dest_str` ends with `/` or already exists as a directory, the file is +/// placed inside it as `/`. +/// - Otherwise `dest_str` is treated as the exact file path to write. +/// +/// `dest_exists_as_dir` is taken as a parameter (rather than queried inside) +/// so this function stays pure and unit-testable; the caller performs the +/// filesystem check. +fn resolve_file_download_target( + dest_str: &str, + source_basename: &str, + dest_exists_as_dir: bool, +) -> PathBuf { + let trailing_slash = dest_str.ends_with('/'); + let dest_path = Path::new(dest_str); + if trailing_slash || dest_exists_as_dir { + dest_path.join(source_basename) + } else { + dest_path.to_path_buf() + } +} + /// Push a list of files from a local directory into a sandbox using tar-over-SSH. /// /// Files are streamed as a tar archive to `ssh ... tar xf - -C ` on @@ -730,41 +853,106 @@ fn file_list_archive_prefix(local_path: &Path) -> Option { } } +/// Run a small command on the sandbox over SSH and capture its stdout. +/// +/// Used by the download flow to probe whether the source path is a regular +/// file or a directory before streaming the tar archive. Stderr is inherited +/// so the user still sees any diagnostic output from ssh itself. +async fn ssh_run_capture_stdout(session: &SshSessionConfig, command: &str) -> Result { + let mut ssh = ssh_base_command(&session.proxy_command); + ssh.arg("-T") + .arg("-o") + .arg("RequestTTY=no") + .arg("sandbox") + .arg(command) + .stdin(Stdio::null()) + .stdout(Stdio::piped()) + .stderr(Stdio::inherit()); + let output = tokio::task::spawn_blocking(move || ssh.output()) + .await + .into_diagnostic()? + .into_diagnostic()?; + if !output.status.success() { + return Err(miette::miette!( + "ssh probe exited with status {}", + output.status + )); + } + Ok(String::from_utf8_lossy(&output.stdout).trim().to_string()) +} + +#[derive(Clone, Copy, Debug, PartialEq, Eq)] +enum SandboxSourceKind { + File, + Directory, +} + +/// Probe the sandbox-side source path. The path is assumed to have already +/// been validated by `validate_sandbox_source_path`. +async fn probe_sandbox_source_kind( + session: &SshSessionConfig, + sandbox_path: &str, +) -> Result { + let probe_cmd = format!( + "if [ -d {path} ]; then printf dir; elif [ -e {path} ]; then printf file; else printf missing; fi", + path = shell_escape(sandbox_path), + ); + let kind = ssh_run_capture_stdout(session, &probe_cmd).await?; + match kind.as_str() { + "dir" => Ok(SandboxSourceKind::Directory), + "file" => Ok(SandboxSourceKind::File), + "missing" => Err(miette::miette!( + "sandbox source path '{sandbox_path}' does not exist" + )), + other => Err(miette::miette!( + "unexpected probe output for sandbox source path '{sandbox_path}': '{other}'" + )), + } +} + /// Pull a path from a sandbox to a local destination using tar-over-SSH. +/// +/// Follows `cp`-style semantics for the destination: +/// +/// - If the source is a single file: +/// - When `dest` ends with `/` or already exists as a directory on the host, +/// the file lands at `/`. +/// - Otherwise `dest` is taken to be the exact file path to write. +/// - If the source is a directory, its contents are extracted into `dest` +/// (creating `dest` if it does not yet exist). This preserves prior +/// behaviour for the directory-source case. +/// +/// The sandbox source path is also subjected to a workspace-boundary check +/// before any SSH command is issued; paths that lexically resolve outside +/// `/sandbox` are refused. pub async fn sandbox_sync_down( server: &str, name: &str, sandbox_path: &str, - local_path: &Path, + dest: &str, tls: &TlsOptions, ) -> Result<()> { + let sandbox_path = validate_sandbox_source_path(sandbox_path)?; let session = ssh_session_config(server, name, tls).await?; + let sandbox_path = resolve_sandbox_source_path(&session, &sandbox_path).await?; + let kind = probe_sandbox_source_kind(&session, &sandbox_path).await?; - // Build tar command. When the sandbox path is a directory we tar its - // *contents* (using `-C .`) so the caller gets the files directly - // without an extra wrapper directory. For a single file we split into - // the parent directory and the filename. - let sandbox_path_clean = sandbox_path.trim_end_matches('/'); - - let tar_cmd = format!( - "if [ -d {path} ]; then tar cf - -C {path} .; else tar cf - -C {parent} {name}; fi", - path = shell_escape(sandbox_path_clean), - parent = shell_escape( - sandbox_path_clean - .rfind('/') - .map_or(".", |pos| if pos == 0 { - "/" - } else { - &sandbox_path_clean[..pos] - }) - ), - name = shell_escape( - sandbox_path_clean - .rfind('/') - .map_or(sandbox_path_clean, |pos| &sandbox_path_clean[pos + 1..]) - ), - ); + match kind { + SandboxSourceKind::File => sandbox_sync_down_file(&session, &sandbox_path, dest).await, + SandboxSourceKind::Directory => { + sandbox_sync_down_directory(&session, &sandbox_path, dest).await + } + } +} +/// Stream a tar archive from the sandbox and extract it into a fresh +/// destination directory. The source is always wrapped on the sandbox side so +/// the host can pick a basename when needed. +async fn stream_sandbox_tar( + session: &SshSessionConfig, + tar_cmd: String, + extract_into: &Path, +) -> Result<()> { let mut ssh = ssh_base_command(&session.proxy_command); ssh.arg("-T") .arg("-o") @@ -781,14 +969,11 @@ pub async fn sandbox_sync_down( .take() .ok_or_else(|| miette::miette!("failed to open stdout for ssh process"))?; - let local_path = local_path.to_path_buf(); + let extract_into = extract_into.to_path_buf(); tokio::task::spawn_blocking(move || -> Result<()> { - fs::create_dir_all(&local_path) - .into_diagnostic() - .wrap_err("failed to create local destination directory")?; let mut archive = tar::Archive::new(stdout); archive - .unpack(&local_path) + .unpack(&extract_into) .into_diagnostic() .wrap_err("failed to extract tar archive from sandbox")?; Ok(()) @@ -806,10 +991,125 @@ pub async fn sandbox_sync_down( "ssh tar create exited with status {status}" )); } + Ok(()) +} +/// Build the `tar cf - -C -- ` command used to wrap a +/// single sandbox-side file for download. +/// +/// The trailing `--` is required: a sandbox-side file whose basename starts +/// with `-` (e.g. `--checkpoint-action=...`) would otherwise be parsed by GNU +/// tar as an option rather than a member to archive. +fn build_single_file_tar_cmd(parent: &str, basename: &str) -> String { + format!( + "tar cf - -C {parent} -- {name}", + parent = shell_escape(parent), + name = shell_escape(basename), + ) +} + +async fn sandbox_sync_down_file( + session: &SshSessionConfig, + sandbox_path: &str, + dest: &str, +) -> Result<()> { + let (parent, basename) = split_sandbox_path(sandbox_path); + let dest_exists_as_dir = fs::symlink_metadata(Path::new(dest)).is_ok_and(|m| m.is_dir()); + let final_path = resolve_file_download_target(dest, basename, dest_exists_as_dir); + + let staging_parent = final_path + .parent() + .ok_or_else(|| miette::miette!("destination '{}' has no parent directory", dest))?; + fs::create_dir_all(staging_parent) + .into_diagnostic() + .wrap_err_with(|| { + format!( + "failed to create local destination directory '{}'", + staging_parent.display() + ) + })?; + + let staging = tempfile::TempDir::new_in(staging_parent) + .into_diagnostic() + .wrap_err("failed to create download staging directory")?; + + let tar_cmd = build_single_file_tar_cmd(parent, basename); + stream_sandbox_tar(session, tar_cmd, staging.path()).await?; + + place_downloaded_file(staging.path(), basename, &final_path).wrap_err_with(|| { + format!( + "failed to place downloaded file at '{}'", + final_path.display() + ) + })?; Ok(()) } +/// Move a single file extracted by `stream_sandbox_tar` into its final +/// position on the host. +/// +/// `staging_dir` must contain a single regular-file entry named +/// `source_basename` (the wrapper produced by `tar cf - -C `). +/// The entry is renamed onto `final_path`, atomically when `staging_dir` is +/// on the same filesystem. Refuses to overwrite an existing directory at +/// `final_path` to match `cp` behaviour. +fn place_downloaded_file( + staging_dir: &Path, + source_basename: &str, + final_path: &Path, +) -> Result<()> { + let staged_file = staging_dir.join(source_basename); + let staged_meta = fs::symlink_metadata(&staged_file) + .into_diagnostic() + .wrap_err("downloaded archive did not contain the expected entry")?; + if !staged_meta.is_file() { + return Err(miette::miette!( + "downloaded entry '{source_basename}' is not a regular file" + )); + } + + if let Ok(existing) = fs::symlink_metadata(final_path) + && existing.is_dir() + { + return Err(miette::miette!( + "cannot overwrite directory '{}' with downloaded file", + final_path.display() + )); + } + + fs::rename(&staged_file, final_path) + .into_diagnostic() + .wrap_err_with(|| format!("failed to rename into '{}'", final_path.display()))?; + Ok(()) +} + +async fn sandbox_sync_down_directory( + session: &SshSessionConfig, + sandbox_path: &str, + dest: &str, +) -> Result<()> { + let dest_path = Path::new(dest); + if let Ok(existing) = fs::symlink_metadata(dest_path) + && !existing.is_dir() + { + return Err(miette::miette!( + "cannot extract directory '{sandbox_path}' over non-directory destination '{}'", + dest_path.display() + )); + } + fs::create_dir_all(dest_path) + .into_diagnostic() + .wrap_err_with(|| { + format!( + "failed to create local destination directory '{}'", + dest_path.display() + ) + })?; + + let tar_cmd = format!("tar cf - -C {path} .", path = shell_escape(sandbox_path)); + stream_sandbox_tar(session, tar_cmd, dest_path).await +} + /// Run the SSH proxy, connecting stdin/stdout to the gateway. pub async fn sandbox_ssh_proxy( gateway_url: &str, @@ -1239,6 +1539,254 @@ mod tests { assert_eq!(split_sandbox_path("/a/b/c/d.txt"), ("/a/b/c", "d.txt")); } + #[test] + fn lexical_clean_resolves_dot_and_dotdot_segments() { + assert_eq!( + lexical_clean_absolute_path("/sandbox/./a"), + Some("/sandbox/a".to_string()) + ); + assert_eq!( + lexical_clean_absolute_path("/sandbox/sub/../a"), + Some("/sandbox/a".to_string()) + ); + assert_eq!( + lexical_clean_absolute_path("/sandbox/../etc/passwd"), + Some("/etc/passwd".to_string()) + ); + assert_eq!( + lexical_clean_absolute_path("//sandbox///foo//"), + Some("/sandbox/foo".to_string()) + ); + assert_eq!(lexical_clean_absolute_path("/"), Some("/".to_string())); + } + + #[test] + fn lexical_clean_refuses_relative_paths() { + assert_eq!(lexical_clean_absolute_path(""), None); + assert_eq!(lexical_clean_absolute_path("sandbox/a"), None); + assert_eq!(lexical_clean_absolute_path("./a"), None); + } + + #[test] + fn validate_sandbox_source_path_accepts_workspace_paths() { + assert_eq!( + validate_sandbox_source_path("/sandbox/file.txt").unwrap(), + "/sandbox/file.txt" + ); + assert_eq!( + validate_sandbox_source_path("/sandbox/.openclaw/workspace/hello.txt").unwrap(), + "/sandbox/.openclaw/workspace/hello.txt" + ); + assert_eq!( + validate_sandbox_source_path("/sandbox").unwrap(), + "/sandbox" + ); + assert_eq!( + validate_sandbox_source_path("/sandbox/").unwrap(), + "/sandbox" + ); + assert_eq!( + validate_sandbox_source_path("/sandbox/sub/../file").unwrap(), + "/sandbox/file" + ); + } + + #[test] + fn validate_sandbox_source_path_rejects_traversal_and_escapes() { + let traversal = validate_sandbox_source_path("/etc/passwd").unwrap_err(); + assert!( + format!("{traversal}").contains("outside the sandbox workspace"), + "unexpected error: {traversal}" + ); + + let parent_escape = validate_sandbox_source_path("/sandbox/../etc/passwd").unwrap_err(); + assert!( + format!("{parent_escape}").contains("outside the sandbox workspace"), + "unexpected error: {parent_escape}" + ); + + let prefix_only = validate_sandbox_source_path("/sandboxed/secrets").unwrap_err(); + assert!( + format!("{prefix_only}").contains("outside the sandbox workspace"), + "unexpected error: {prefix_only}" + ); + + let empty = validate_sandbox_source_path("").unwrap_err(); + assert!(format!("{empty}").contains("empty")); + + let relative = validate_sandbox_source_path("sandbox/file").unwrap_err(); + assert!(format!("{relative}").contains("must be absolute")); + } + + #[test] + fn is_under_sandbox_workspace_accepts_root_and_descendants() { + assert!(is_under_sandbox_workspace("/sandbox")); + assert!(is_under_sandbox_workspace("/sandbox/file")); + assert!(is_under_sandbox_workspace("/sandbox/sub/nested")); + } + + #[test] + fn is_under_sandbox_workspace_rejects_outside_paths_and_prefix_collisions() { + assert!(!is_under_sandbox_workspace("/etc/passwd")); + assert!(!is_under_sandbox_workspace("/sandboxed/secrets")); + assert!(!is_under_sandbox_workspace("/")); + assert!(!is_under_sandbox_workspace("")); + } + + #[test] + fn build_single_file_tar_cmd_inserts_double_dash_before_basename() { + // Without `--`, a basename such as `--checkpoint-action=...` would be + // parsed by GNU tar as an option. Guard the wire format against this + // regression. + let cmd = build_single_file_tar_cmd("/sandbox", "--checkpoint-action=exec=id"); + assert!( + cmd.contains(" -- "), + "expected `--` separator in tar command, got: {cmd}" + ); + assert!( + cmd.ends_with(&shell_escape("--checkpoint-action=exec=id")), + "expected basename at end of tar command, got: {cmd}" + ); + } + + #[test] + fn build_single_file_tar_cmd_escapes_parent_and_basename() { + let cmd = build_single_file_tar_cmd("/sandbox/with space", "name with space"); + assert!(cmd.contains(" -- "), "missing `--` separator: {cmd}"); + assert!( + cmd.contains(&shell_escape("/sandbox/with space")), + "parent not shell-escaped: {cmd}" + ); + assert!( + cmd.contains(&shell_escape("name with space")), + "basename not shell-escaped: {cmd}" + ); + } + + #[test] + fn resolve_file_download_target_writes_to_dest_when_not_a_directory() { + assert_eq!( + resolve_file_download_target("/tmp/out.txt", "hello.txt", false), + PathBuf::from("/tmp/out.txt") + ); + } + + #[test] + fn resolve_file_download_target_places_inside_existing_directory() { + assert_eq!( + resolve_file_download_target("/tmp", "hello.txt", true), + PathBuf::from("/tmp/hello.txt") + ); + } + + #[test] + fn resolve_file_download_target_honors_trailing_slash() { + assert_eq!( + resolve_file_download_target("/tmp/newdir/", "hello.txt", false), + PathBuf::from("/tmp/newdir/hello.txt") + ); + } + + fn build_single_file_archive(entry_path: &str, bytes: &[u8]) -> Vec { + let mut buf = Vec::new(); + { + let mut builder = tar::Builder::new(&mut buf); + let mut header = tar::Header::new_gnu(); + header.set_path(entry_path).expect("set tar entry path"); + header.set_size(bytes.len() as u64); + header.set_mode(0o644); + header.set_entry_type(tar::EntryType::Regular); + header.set_cksum(); + builder.append(&header, bytes).expect("append tar entry"); + builder.finish().expect("finish tar archive"); + } + buf + } + + fn unpack_into(archive_bytes: &[u8], staging: &Path) { + let mut archive = tar::Archive::new(std::io::Cursor::new(archive_bytes)); + archive.unpack(staging).expect("unpack archive"); + } + + #[test] + fn place_downloaded_file_writes_regular_file_at_dest() { + let workdir = tempfile::tempdir().expect("create workdir"); + let staging = workdir.path().join("staging"); + fs::create_dir_all(&staging).expect("create staging"); + let archive = build_single_file_archive("hello.txt", b"trust me"); + unpack_into(&archive, &staging); + + let dest = workdir.path().join("out.txt"); + place_downloaded_file(&staging, "hello.txt", &dest).expect("place file"); + + let meta = fs::symlink_metadata(&dest).expect("stat dest"); + assert!(meta.is_file(), "dest must be a regular file, got {meta:?}"); + assert_eq!(fs::read(&dest).expect("read dest"), b"trust me"); + } + + #[test] + fn place_downloaded_file_refuses_to_clobber_existing_directory() { + let workdir = tempfile::tempdir().expect("create workdir"); + let staging = workdir.path().join("staging"); + fs::create_dir_all(&staging).expect("create staging"); + let archive = build_single_file_archive("hello.txt", b"trust me"); + unpack_into(&archive, &staging); + + let dest = workdir.path().join("conflict-dir"); + fs::create_dir(&dest).expect("create conflict dir"); + + let err = place_downloaded_file(&staging, "hello.txt", &dest) + .expect_err("expected directory-clobber refusal"); + assert!( + format!("{err}").contains("cannot overwrite directory"), + "unexpected error: {err}" + ); + assert!( + fs::symlink_metadata(&dest).expect("stat dest").is_dir(), + "dest should remain a directory after refusal" + ); + } + + #[test] + fn download_full_pipeline_lands_file_at_exact_dest_path() { + let workdir = tempfile::tempdir().expect("create workdir"); + let staging_parent = workdir.path(); + let archive = build_single_file_archive("hello.txt", b"trust me"); + + let dest_str = staging_parent.join("out.txt"); + let dest_str = dest_str.to_str().unwrap(); + let final_path = resolve_file_download_target(dest_str, "hello.txt", false); + assert_eq!(final_path, Path::new(dest_str)); + + let staging = tempfile::TempDir::new_in(staging_parent).expect("staging dir"); + unpack_into(&archive, staging.path()); + place_downloaded_file(staging.path(), "hello.txt", &final_path).expect("place"); + + let meta = fs::symlink_metadata(&final_path).expect("stat final"); + assert!(meta.is_file(), "expected regular file, got {meta:?}"); + assert_eq!(fs::read(&final_path).expect("read final"), b"trust me"); + } + + #[test] + fn download_full_pipeline_places_inside_existing_directory_destination() { + let workdir = tempfile::tempdir().expect("create workdir"); + let archive = build_single_file_archive("hello.txt", b"trust me"); + + let dest_dir = workdir.path().join("out-dir"); + fs::create_dir(&dest_dir).expect("create dest dir"); + let dest_str = dest_dir.to_str().unwrap(); + let final_path = resolve_file_download_target(dest_str, "hello.txt", true); + assert_eq!(final_path, dest_dir.join("hello.txt")); + + let staging = tempfile::TempDir::new_in(workdir.path()).expect("staging dir"); + unpack_into(&archive, staging.path()); + place_downloaded_file(staging.path(), "hello.txt", &final_path).expect("place"); + + let meta = fs::symlink_metadata(&final_path).expect("stat final"); + assert!(meta.is_file()); + assert_eq!(fs::read(&final_path).expect("read final"), b"trust me"); + } + #[test] fn directory_upload_prefix_uses_basename_for_named_directories() { assert_eq!( diff --git a/docs/sandboxes/manage-sandboxes.mdx b/docs/sandboxes/manage-sandboxes.mdx index b43a6846a..a1f69bb6a 100644 --- a/docs/sandboxes/manage-sandboxes.mdx +++ b/docs/sandboxes/manage-sandboxes.mdx @@ -235,6 +235,10 @@ Download files from the sandbox to your host: openshell sandbox download my-sandbox /sandbox/output ./local ``` +When the sandbox-side source is a single file, the destination follows `cp`-style placement: if the destination already exists as a directory or ends with `/`, the file lands inside it as `/`; otherwise the file is written at the exact destination path. + +The CLI only allows sandbox-side sources that resolve inside the writable workspace (`/sandbox`). Paths that escape lexically (`/etc/passwd`, `/sandbox/../etc/passwd`) and paths that escape through a symlink (`/sandbox/etc-link` pointing at `/etc`) are both refused before any data is transferred. + You can also upload files at creation time with the `--upload` flag on `openshell sandbox create`. diff --git a/e2e/rust/src/harness/sandbox.rs b/e2e/rust/src/harness/sandbox.rs index 278c16f53..cdb461494 100644 --- a/e2e/rust/src/harness/sandbox.rs +++ b/e2e/rust/src/harness/sandbox.rs @@ -349,6 +349,52 @@ impl SandboxGuard { Ok(combined) } + /// Run a one-shot command inside the sandbox via `openshell sandbox exec`. + /// + /// Used by tests that need to pre-populate sandbox-side state (create + /// files, symlinks, directories) without going through the upload flow. + /// Stdout and stderr are captured and returned together; PTY allocation + /// is disabled so the call is suitable for non-interactive setups. + /// + /// # Arguments + /// + /// * `argv` — Command and arguments to execute (passed after `--`). + /// + /// # Errors + /// + /// Returns an error if the CLI exits non-zero. + pub async fn exec(&self, argv: &[&str]) -> Result { + let mut cmd = openshell_cmd(); + cmd.arg("sandbox") + .arg("exec") + .arg("--name") + .arg(&self.name) + .arg("--no-tty") + .arg("--"); + for arg in argv { + cmd.arg(arg); + } + cmd.stdout(Stdio::piped()).stderr(Stdio::piped()); + + let output = cmd + .output() + .await + .map_err(|e| format!("failed to spawn openshell exec: {e}"))?; + + let stdout = String::from_utf8_lossy(&output.stdout).to_string(); + let stderr = String::from_utf8_lossy(&output.stderr).to_string(); + let combined = format!("{stdout}{stderr}"); + + if !output.status.success() { + return Err(format!( + "sandbox exec failed (exit {:?}):\n{combined}", + output.status.code() + )); + } + + Ok(combined) + } + /// Download files from the sandbox via `openshell sandbox download`. /// /// # Arguments diff --git a/e2e/rust/tests/sync.rs b/e2e/rust/tests/sync.rs index 60abb42b4..a42447865 100644 --- a/e2e/rust/tests/sync.rs +++ b/e2e/rust/tests/sync.rs @@ -352,3 +352,194 @@ async fn upload_single_file_from_git_repo_only_uploads_that_file() { guard.cleanup().await; } + +/// Pre-populate a single file on the sandbox side via `sandbox exec` and pull +/// it back with `sandbox download`. Exercises `sandbox_sync_down_file` in +/// isolation (no upload phase). +#[tokio::test] +async fn sandbox_download_file_only() { + let mut guard = + SandboxGuard::create_keep(&["sh", "-c", "echo Ready && sleep infinity"], "Ready") + .await + .expect("sandbox create --keep"); + + guard + .exec(&["sh", "-c", "printf greeting-payload > /sandbox/greeting.txt"]) + .await + .expect("seed greeting.txt inside the sandbox"); + + let tmpdir = tempfile::tempdir().expect("create tmpdir"); + let dest = tmpdir.path().join("out"); + fs::create_dir_all(&dest).expect("create download dir"); + let dest_str = dest.to_str().expect("dest path is UTF-8"); + + guard + .download("/sandbox/greeting.txt", dest_str) + .await + .expect("download greeting.txt"); + + let actual = fs::read_to_string(dest.join("greeting.txt")).expect("read greeting.txt"); + assert_eq!( + actual, "greeting-payload", + "downloaded file content mismatch" + ); + + guard.cleanup().await; +} + +/// Pre-populate a small directory tree on the sandbox side via `sandbox exec` +/// and pull it recursively with `sandbox download`. Exercises +/// `sandbox_sync_down_directory` in isolation (no upload phase). +#[tokio::test] +async fn sandbox_download_directory_only() { + let mut guard = + SandboxGuard::create_keep(&["sh", "-c", "echo Ready && sleep infinity"], "Ready") + .await + .expect("sandbox create --keep"); + + guard + .exec(&[ + "sh", + "-c", + "mkdir -p /sandbox/tree/sub \ + && printf top-level > /sandbox/tree/root.txt \ + && printf nested > /sandbox/tree/sub/child.txt", + ]) + .await + .expect("seed directory tree inside the sandbox"); + + let tmpdir = tempfile::tempdir().expect("create tmpdir"); + let dest = tmpdir.path().join("out"); + fs::create_dir_all(&dest).expect("create download dir"); + let dest_str = dest.to_str().expect("dest path is UTF-8"); + + guard + .download("/sandbox/tree", dest_str) + .await + .expect("download directory tree"); + + let root = fs::read_to_string(dest.join("root.txt")).expect("read root.txt"); + assert_eq!(root, "top-level", "root.txt content mismatch"); + let child = fs::read_to_string(dest.join("sub/child.txt")).expect("read sub/child.txt"); + assert_eq!(child, "nested", "sub/child.txt content mismatch"); + + guard.cleanup().await; +} + +/// Assert that an error string carries the two markers of a remote-side +/// canonicalisation refusal: `resolves to` (proving `realpath -e` ran) and +/// `outside the` + `sandbox workspace` (proving the boundary check fired). +/// +/// miette renders the long single-line error wrapped across multiple lines +/// at terminal width and inserts a `│ ` continuation marker, so a single +/// long substring like `"outside the sandbox workspace"` is fragile — +/// match the short markers individually instead. +fn assert_resolves_outside_workspace(err: &str, label: &str) { + assert!( + err.contains("resolves to") && err.contains("outside the") && err.contains("sandbox workspace"), + "expected resolves-outside-workspace error for {label}, got: {err}" + ); +} + +/// Regression for the Codex high-severity finding: `validate_sandbox_source_path` +/// is purely lexical, so a sandbox-side symlink that escapes `/sandbox` could +/// slip through and let `tar -C` follow the link into the host filesystem. +/// `resolve_sandbox_source_path` re-validates after `realpath -e` resolves +/// every component. Cover both shapes from the Codex example: the symlink as +/// the full source (directory case) and the symlink as a component of the +/// source path (file case). +#[tokio::test] +async fn sandbox_download_rejects_symlinks_pointing_outside_workspace() { + let mut guard = + SandboxGuard::create_keep(&["sh", "-c", "echo Ready && sleep infinity"], "Ready") + .await + .expect("sandbox create --keep"); + + guard + .exec(&[ + "sh", + "-c", + "ln -s /etc /sandbox/etc-link && ln -s /etc/passwd /sandbox/passwd-link", + ]) + .await + .expect("plant escape symlinks inside the sandbox"); + + let tmpdir = tempfile::tempdir().expect("create tmpdir"); + let dest = tmpdir.path().join("out"); + fs::create_dir_all(&dest).expect("create download dir"); + let dest_str = dest.to_str().expect("dest path is UTF-8"); + + // Directory-source symlink: /sandbox/etc-link -> /etc + let err = guard + .download("/sandbox/etc-link", dest_str) + .await + .expect_err("download of a directory symlink to /etc must be refused"); + assert_resolves_outside_workspace(&err, "directory symlink"); + + // File-source symlink: /sandbox/passwd-link -> /etc/passwd + let err = guard + .download("/sandbox/passwd-link", dest_str) + .await + .expect_err("download of a file symlink to /etc/passwd must be refused"); + assert_resolves_outside_workspace(&err, "file symlink"); + + // Path with a symlinked component: /sandbox/etc-link/passwd + let err = guard + .download("/sandbox/etc-link/passwd", dest_str) + .await + .expect_err("download via a symlinked path component must be refused"); + assert_resolves_outside_workspace(&err, "symlinked component"); + + // Sanity check: the host destination should still be empty — no file from + // /etc should have leaked through on any of the three attempts. + assert!( + !dest.join("passwd").exists() && !dest.join("etc-link").exists(), + "no file should have been written when the source resolves outside the workspace" + ); + + guard.cleanup().await; +} + +/// Regression for the Codex medium-severity finding: the single-file tar +/// invocation must wrap the basename behind a `--` separator, otherwise a +/// sandbox-side filename whose basename starts with `--` (e.g. created by a +/// malicious agent inside the sandbox) is parsed by GNU tar as an option +/// rather than a member to archive. +#[tokio::test] +async fn sandbox_download_handles_dash_leading_basename() { + let mut guard = + SandboxGuard::create_keep(&["sh", "-c", "echo Ready && sleep infinity"], "Ready") + .await + .expect("sandbox create --keep"); + + // Quoting the redirect target keeps the leading dashes intact across the + // shell parse; the sandbox-side filesystem ends up with a basename that + // would be parsed as an option by an unprotected tar invocation. + guard + .exec(&[ + "sh", + "-c", + "printf dash-payload > '/sandbox/--checkpoint-action=evil'", + ]) + .await + .expect("seed dash-leading file inside the sandbox"); + + let tmpdir = tempfile::tempdir().expect("create tmpdir"); + let dest = tmpdir.path().join("out"); + fs::create_dir_all(&dest).expect("create download dir"); + let dest_str = dest.to_str().expect("dest path is UTF-8"); + + guard + .download("/sandbox/--checkpoint-action=evil", dest_str) + .await + .expect("download dash-leading basename"); + + let actual = + fs::read_to_string(dest.join("--checkpoint-action=evil")).expect("read dash-leading file"); + assert_eq!( + actual, "dash-payload", + "dash-leading file content mismatch — tar may have parsed the basename as an option" + ); + + guard.cleanup().await; +}