From e65720f4f1f103f1095d90a6790f9c9a09dd31e7 Mon Sep 17 00:00:00 2001 From: Adam Miller Date: Fri, 15 May 2026 11:03:00 -0500 Subject: [PATCH 1/2] chore: remove SSH handshake secret residuals and fix agent memory Removes three artifacts left behind by the #1274 removal of OPENSHELL_SSH_HANDSHAKE_SECRET, then corrects a sweep of stale and inaccurate notes in .claude/agent-memory/arch-doc-writer/MEMORY.md that were discovered during the audit. Artifact removal (Refs OS-174): - openshell.spec: stale comment claiming init-gateway-env.sh generates an SSH handshake secret - e2e/with-podman-gateway.sh: dead podman secret rm for openshell-handshake-, which is never created since #1274 Agent memory corrections: - ssh_tunnel.rs no longer exists; replaced by ssh_sessions.rs - Object types list was missing service_endpoint and provider_profile - Pre-exec chain now includes harden_child_process() and uses the linux::prepare()/enforce() two-phase pattern on Linux - CLI SSH function list had nonexistent sandbox_rsync; corrected to actual exported functions - ExecSandbox is in grpc/sandbox.rs (not grpc.rs) and operates over a supervisor relay DuplexStream, not a direct TCP connection - resolve_ssh_gateway() moved to openshell-core/src/forward.rs - SSH transport note rewrote: NSSH1 is an OCSF-only label (not a live protocol preface); actual path is ForwardTcp -> DuplexStream -> RelayStream -> Unix socket; access gated by CreateSshSession token; TLS follows endpoint scheme (https:// = mTLS, http:// = plaintext; Podman driver does not yet inject mTLS client materials) - CLI flag note was self-contradictory; corrected to --gateway-endpoint with resolution priority chain --- .claude/agent-memory/arch-doc-writer/MEMORY.md | 16 ++++++++-------- e2e/with-podman-gateway.sh | 1 - openshell.spec | 4 ++-- 3 files changed, 10 insertions(+), 11 deletions(-) diff --git a/.claude/agent-memory/arch-doc-writer/MEMORY.md b/.claude/agent-memory/arch-doc-writer/MEMORY.md index 4e5781a59..1fce46001 100644 --- a/.claude/agent-memory/arch-doc-writer/MEMORY.md +++ b/.claude/agent-memory/arch-doc-writer/MEMORY.md @@ -11,7 +11,7 @@ - Proxy: `crates/openshell-sandbox/src/proxy.rs` - Policy crate: `crates/openshell-policy/src/lib.rs` (YAML<->proto conversion, validation, restrictive default) - Server multiplex: `crates/openshell-server/src/multiplex.rs` -- SSH tunnel: `crates/openshell-server/src/ssh_tunnel.rs` +- SSH sessions: `crates/openshell-server/src/ssh_sessions.rs` (session persistence, reaper) - Sandbox SSH server: `crates/openshell-sandbox/src/ssh.rs` - Providers: `crates/openshell-providers/src/providers/` (per-provider modules) - Bootstrap: `crates/openshell-bootstrap/src/lib.rs` (cluster lifecycle) @@ -27,7 +27,7 @@ - OPA baked-in rules: `include_str!("../data/sandbox-policy.rego")` in opa.rs - Policy loading: gRPC mode (OPENSHELL_SANDBOX_ID + OPENSHELL_ENDPOINT) or file mode (--policy-rules + --policy-data) - Env vars: sandbox uses OPENSHELL_* prefix (e.g., OPENSHELL_SANDBOX_ID, OPENSHELL_ENDPOINT, OPENSHELL_POLICY_RULES) -- CLI flag: `--openshell-endpoint` (NOT `--openshell-endpoint`) +- CLI flag: `--gateway-endpoint` (direct URL to gateway); resolution: --gateway-endpoint > --gateway > OPENSHELL_GATEWAY env > active_gateway file - Provider env injection: both entrypoint process (tokio Command) and SSH shell (std Command) - Cluster bootstrap: `sandbox_create_with_bootstrap()` auto-deploys when no cluster exists (main.rs ~line 632) - CLI cluster resolution: --cluster flag > OPENSHELL_CLUSTER env > active cluster file @@ -50,9 +50,9 @@ - Persistence: single `objects` table, protobuf payloads, Store enum dispatches SQLite vs Postgres by URL prefix - Persistence CRUD: upsert ON CONFLICT (id) not (object_type, id); list ORDER BY created_at_ms ASC, name ASC (not id!) - --db-url has no code default; Helm values.yaml sets `sqlite:/var/openshell/openshell.db` -- Object types: "sandbox", "provider", "ssh_session", "inference_route" -- each implements ObjectType/ObjectId/ObjectName +- Object types: "sandbox", "provider", "ssh_session", "inference_route", "service_endpoint", "provider_profile" -- each implements ObjectType/ObjectId/ObjectName - Config: `openshell_core::Config` in `crates/openshell-core/src/config.rs`, all flags have env var fallbacks -- SSH handshake: "NSSH1" preface + HMAC-SHA256, used in both exec proxy (grpc.rs) and tunnel gateway (ssh_tunnel.rs) +- SSH transport: CLI opens ForwardTcp gRPC stream (gated by CreateSshSession short-lived token), gateway relays via DuplexStream to supervisor RelayStream, supervisor connects to sandbox russh server over root-only Unix socket (/run/openshell/ssh.sock); channel uses mTLS when https:// endpoint configured, plaintext when http:// (Podman driver does not yet inject mTLS client materials). NSSH1 appears only in openshell-ocsf examples/tests, not on any live code path. - Phase derivation: transient reasons (ReconcilerError, DependenciesNotReady) -> Provisioning; all others -> Error - Broadcast bus buffer sizes: SandboxWatchBus=128, TracingLogBus=1024, PlatformEventBus=1024 - Sandbox CRD: `agents.x-k8s.io/v1alpha1/Sandbox`, labels: `openshell.ai/sandbox-id`, `openshell.ai/managed-by` @@ -75,13 +75,13 @@ - DNS solution in cluster-entrypoint.sh: iptables DNAT proxy (NOT host-gateway resolv.conf) ## Sandbox Connect Details -- CLI SSH module: `crates/openshell-cli/src/ssh.rs` (sandbox_connect, sandbox_exec, sandbox_rsync, sandbox_ssh_proxy) +- CLI SSH module: `crates/openshell-cli/src/ssh.rs` (sandbox_connect, sandbox_connect_editor, sandbox_forward, sandbox_exec, sandbox_sync_up_files, sandbox_sync_up, sandbox_sync_down, sandbox_ssh_proxy, sandbox_ssh_proxy_by_name) - Re-exported from run.rs: `pub use crate::ssh::{...}` for backward compat - ssh-proxy subcommand: `Commands::SshProxy` in main.rs (~line 139) -- Gateway loopback resolution: `resolve_ssh_gateway()` in ssh.rs -- overrides loopback with cluster endpoint host -- ExecSandbox gRPC: uses single-use TCP proxy + russh client in grpc.rs +- Gateway loopback resolution: `resolve_ssh_gateway()` in `crates/openshell-core/src/forward.rs:439` -- overrides loopback with cluster endpoint host; imported by ssh.rs and tui +- ExecSandbox gRPC: uses single-use TCP proxy + russh client in `grpc/sandbox.rs` (handle_exec_sandbox -> stream_exec_over_relay); operates over a relay DuplexStream through the supervisor session, not a direct TCP connection - PTY I/O: 3 std::threads (writer, reader, exit) with reader_done sync for SSH protocol ordering -- SSH daemon: russh server, ephemeral Ed25519 key, pre_exec: setsid -> TIOCSCTTY -> setns -> drop_privileges -> sandbox::apply +- SSH daemon: russh server, ephemeral Ed25519 key, pre_exec: setsid -> TIOCSCTTY -> setns -> drop_privileges -> harden_child_process -> sandbox::linux::enforce(prepared) [Linux] / sandbox::apply [non-Linux]; sandbox::linux::prepare() runs before fork ## Policy Reload Details - Poll loop: `run_policy_poll_loop()` in lib.rs, spawned after child process, gRPC mode only diff --git a/e2e/with-podman-gateway.sh b/e2e/with-podman-gateway.sh index fd4d1ffe9..c7b5ceff7 100755 --- a/e2e/with-podman-gateway.sh +++ b/e2e/with-podman-gateway.sh @@ -100,7 +100,6 @@ cleanup() { podman_cmd rm -f "${id}" >/dev/null 2>&1 || true if [ -n "${sandbox_id}" ] && [ "${sandbox_id}" != "" ]; then podman_cmd volume rm -f "openshell-sandbox-${sandbox_id}-workspace" >/dev/null 2>&1 || true - podman_cmd secret rm "openshell-handshake-${sandbox_id}" >/dev/null 2>&1 || true fi done fi diff --git a/openshell.spec b/openshell.spec index 966bd5205..a45cee323 100644 --- a/openshell.spec +++ b/openshell.spec @@ -153,8 +153,8 @@ Type=exec # %%S expands to $XDG_STATE_HOME (~/.local/state) in user units. ExecStartPre=%{_libexecdir}/openshell/init-pki.sh %%S/openshell/tls -# Auto-generate gateway.env (SSH handshake secret + commented config -# reference) on first start if not present. +# Auto-generate gateway.env (commented config reference) on first +# start if not present. # %%E expands to $XDG_CONFIG_HOME (~/.config) in user units. ExecStartPre=%{_libexecdir}/openshell/init-gateway-env.sh %%E/openshell/gateway.env EnvironmentFile=-%%E/openshell/gateway.env From de77b99e1b62bd5e9fd241a284a4328dca10b9a0 Mon Sep 17 00:00:00 2001 From: Adam Miller Date: Fri, 15 May 2026 12:26:58 -0500 Subject: [PATCH 2/2] fix(vm): collapse nested if blocks to satisfy clippy::collapsible_if Three nested if blocks in connect_local_container_engine() were flagged by clippy after #1370. Collapse to single if-let chains using && as suggested. --- crates/openshell-driver-vm/src/driver.rs | 28 +++++++++++------------- 1 file changed, 13 insertions(+), 15 deletions(-) diff --git a/crates/openshell-driver-vm/src/driver.rs b/crates/openshell-driver-vm/src/driver.rs index 4cea6be1a..61e5e0ee7 100644 --- a/crates/openshell-driver-vm/src/driver.rs +++ b/crates/openshell-driver-vm/src/driver.rs @@ -1522,25 +1522,23 @@ fn parse_registry_reference(image_ref: &str) -> Result { /// `DOCKER_HOST`). If Docker is unavailable, falls back to the Podman /// socket, which exposes a Docker-compatible API. async fn connect_local_container_engine() -> Option { - if let Ok(docker) = Docker::connect_with_local_defaults() { - if docker.ping().await.is_ok() { - return Some(docker); - } + if let Ok(docker) = Docker::connect_with_local_defaults() + && docker.ping().await.is_ok() + { + return Some(docker); } let podman_socket = podman_socket_path(); - if podman_socket.exists() { - if let Ok(docker) = + if podman_socket.exists() + && let Ok(docker) = Docker::connect_with_unix(podman_socket.to_str()?, 120, bollard::API_DEFAULT_VERSION) - { - if docker.ping().await.is_ok() { - info!( - socket = %podman_socket.display(), - "vm driver: connected to Podman (Docker-compatible API)" - ); - return Some(docker); - } - } + && docker.ping().await.is_ok() + { + info!( + socket = %podman_socket.display(), + "vm driver: connected to Podman (Docker-compatible API)" + ); + return Some(docker); } None