Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions architecture/gateway.md
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,11 @@ health, metrics, or tunnel routes. The plaintext service router also rejects
browser requests whose Fetch Metadata, Origin, or Referer headers indicate a
cross-origin or sibling-subdomain request.

Dedicated health listeners expose `/healthz` (process liveness only) and
`/readyz` (dependency-aware readiness). Readiness probes include a bounded DB
connectivity check with a configurable timeout so operators can tune pod
eviction sensitivity without changing liveness behavior.

Supported auth modes:

| Mode | Use |
Expand Down
37 changes: 37 additions & 0 deletions crates/openshell-core/src/config.rs
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,9 @@ pub const DEFAULT_STOP_TIMEOUT_SECS: u32 = 10;
/// Default allowed clock skew for SSH handshake validation, in seconds.
pub const DEFAULT_SSH_HANDSHAKE_SKEW_SECS: u64 = 300;

/// Default timeout for gateway DB readiness probes, in seconds.
pub const DEFAULT_READINESS_DATABASE_TIMEOUT_SECS: u64 = 1;

/// Default Podman bridge network name.
pub const DEFAULT_NETWORK_NAME: &str = "openshell";

Expand Down Expand Up @@ -197,6 +200,13 @@ pub struct Config {
#[serde(default)]
pub metrics_bind_address: Option<SocketAddr>,

/// Timeout for database readiness probes, in seconds.
///
/// Used by `/readyz` (and `/health`, which aliases readiness) to bound
/// `Store::ping` checks before returning an unhealthy dependency status.
#[serde(default = "default_readiness_database_timeout_secs")]
pub readiness_database_timeout_secs: u64,

/// Additional bind addresses that serve the same multiplexed gRPC/HTTP
/// surface as `bind_address`.
///
Expand Down Expand Up @@ -415,6 +425,7 @@ impl Config {
bind_address: default_bind_address(),
health_bind_address: None,
metrics_bind_address: None,
readiness_database_timeout_secs: default_readiness_database_timeout_secs(),
extra_bind_addresses: Vec::new(),
log_level: default_log_level(),
tls,
Expand Down Expand Up @@ -458,6 +469,13 @@ impl Config {
self
}

/// Set the database readiness probe timeout in seconds.
#[must_use]
pub const fn with_readiness_database_timeout_secs(mut self, secs: u64) -> Self {
self.readiness_database_timeout_secs = secs;
self
}

/// Append an extra listener address to the multiplex service.
///
/// Duplicate entries (matching `bind_address` or any existing entry) are
Expand Down Expand Up @@ -711,6 +729,10 @@ const fn default_ssh_handshake_skew_secs() -> u64 {
DEFAULT_SSH_HANDSHAKE_SKEW_SECS
}

const fn default_readiness_database_timeout_secs() -> u64 {
DEFAULT_READINESS_DATABASE_TIMEOUT_SECS
}

const fn default_ssh_session_ttl_secs() -> u64 {
86400 // 24 hours
}
Expand Down Expand Up @@ -764,6 +786,15 @@ mod tests {
assert!(cfg.health_bind_address.is_none());
}

#[test]
fn config_new_sets_default_readiness_database_timeout() {
let cfg = Config::new(None);
assert_eq!(
cfg.readiness_database_timeout_secs,
super::DEFAULT_READINESS_DATABASE_TIMEOUT_SECS
);
}

#[test]
fn service_routing_allows_loopback_plaintext_http_by_default() {
let cfg = Config::new(None);
Expand Down Expand Up @@ -817,6 +848,12 @@ mod tests {
assert_eq!(cfg.health_bind_address, Some(addr));
}

#[test]
fn config_with_readiness_database_timeout_sets_value() {
let cfg = Config::new(None).with_readiness_database_timeout_secs(9);
assert_eq!(cfg.readiness_database_timeout_secs, 9);
}

#[test]
fn detect_driver_returns_none_without_k8s_env_or_binaries() {
// When KUBERNETES_SERVICE_HOST is not set and no docker/podman binaries
Expand Down
1 change: 1 addition & 0 deletions crates/openshell-server/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -91,6 +91,7 @@ nix = { workspace = true }

[features]
dev-settings = ["openshell-core/dev-settings"]
test-support = []

[dev-dependencies]
hyper-rustls = { version = "0.27", default-features = false, features = ["native-tokio", "http1", "tls12", "logging", "ring", "webpki-tokio"] }
Expand Down
43 changes: 41 additions & 2 deletions crates/openshell-server/src/cli.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,8 @@ use clap::{ArgAction, Command, CommandFactory, FromArgMatches, Parser};
use miette::{IntoDiagnostic, Result};
use openshell_core::ComputeDriverKind;
use openshell_core::config::{
DEFAULT_DOCKER_NETWORK_NAME, DEFAULT_SERVER_PORT, DEFAULT_SSH_HANDSHAKE_SKEW_SECS,
DEFAULT_SSH_PORT,
DEFAULT_DOCKER_NETWORK_NAME, DEFAULT_READINESS_DATABASE_TIMEOUT_SECS, DEFAULT_SERVER_PORT,
DEFAULT_SSH_HANDSHAKE_SKEW_SECS, DEFAULT_SSH_PORT,
};
use std::net::{IpAddr, SocketAddr};
use std::path::PathBuf;
Expand Down Expand Up @@ -62,6 +62,15 @@ struct RunArgs {
#[arg(long, default_value_t = 0, env = "OPENSHELL_METRICS_PORT")]
metrics_port: u16,

/// Timeout in seconds for database readiness probes (`/readyz`, `/health`).
#[arg(
long,
default_value_t = DEFAULT_READINESS_DATABASE_TIMEOUT_SECS,
env = "OPENSHELL_READINESS_DB_TIMEOUT_SECS",
value_parser = clap::value_parser!(u64).range(1..)
)]
readiness_db_timeout_secs: u64,

/// Log level (trace, debug, info, warn, error).
#[arg(long, default_value = "info", env = "OPENSHELL_LOG_LEVEL")]
log_level: String,
Expand Down Expand Up @@ -413,6 +422,7 @@ async fn run_from_args(args: RunArgs) -> Result<()> {
.with_ssh_gateway_port(args.ssh_gateway_port)
.with_sandbox_ssh_port(args.sandbox_ssh_port)
.with_ssh_handshake_skew_secs(args.ssh_handshake_skew_secs)
.with_readiness_database_timeout_secs(args.readiness_db_timeout_secs)
.with_server_sans(args.server_sans)
.with_loopback_service_http(args.enable_loopback_service_http);

Expand Down Expand Up @@ -579,6 +589,35 @@ mod tests {
assert_eq!(cli.run.bind_address, IpAddr::V4(Ipv4Addr::LOCALHOST));
}

#[test]
fn command_parses_readiness_db_timeout_from_flag() {
let _lock = ENV_LOCK
.lock()
.unwrap_or_else(std::sync::PoisonError::into_inner);
let _guard = EnvVarGuard::remove("OPENSHELL_READINESS_DB_TIMEOUT_SECS");
let cli = Cli::try_parse_from([
"openshell-gateway",
"--db-url",
"sqlite::memory:",
"--readiness-db-timeout-secs",
"9",
])
.unwrap();
assert_eq!(cli.run.readiness_db_timeout_secs, 9);
}

#[test]
fn command_reads_readiness_db_timeout_from_env() {
let _lock = ENV_LOCK
.lock()
.unwrap_or_else(std::sync::PoisonError::into_inner);
let _guard = EnvVarGuard::set("OPENSHELL_READINESS_DB_TIMEOUT_SECS", "11");

let cli =
Cli::try_parse_from(["openshell-gateway", "--db-url", "sqlite::memory:"]).unwrap();
assert_eq!(cli.run.readiness_db_timeout_secs, 11);
}

#[test]
fn command_reads_bind_address_from_env() {
let _lock = ENV_LOCK
Expand Down
Loading
Loading