diff --git a/Cargo.toml b/Cargo.toml index 205e34a..ef4848a 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -20,6 +20,7 @@ libc = "0.2" [dev-dependencies] anyhow = "1.0" +proptest = "1.11.0" rand = "0.10" uuid = "1.10" diff --git a/src/cmdext.rs b/src/cmdext.rs index 2fabe33..66ff08f 100644 --- a/src/cmdext.rs +++ b/src/cmdext.rs @@ -4,6 +4,7 @@ //! //! - File descriptor passing //! - Changing to a file-descriptor relative directory +//! - Systemd socket activation fd passing use cap_std::fs::Dir; use cap_std::io_lifetimes; @@ -11,17 +12,221 @@ use cap_tempfile::cap_std; use io_lifetimes::OwnedFd; use rustix::fd::{AsFd, FromRawFd, IntoRawFd}; use rustix::io::FdFlags; +use std::collections::BTreeSet; +use std::ffi::CString; use std::os::fd::AsRawFd; use std::os::unix::process::CommandExt; use std::sync::Arc; +/// The file descriptor number at which systemd passes the first socket. +/// See `sd_listen_fds(3)`. +const SD_LISTEN_FDS_START: i32 = 3; + +/// A validated name for a systemd socket-activation file descriptor. +/// +/// Names appear in the `LISTEN_FDNAMES` environment variable as +/// colon-separated values. The constructor validates that the name +/// conforms to systemd's `fdname_is_valid()` rules: at most 255 +/// printable ASCII characters, excluding `:`. +/// +/// ``` +/// use cap_std_ext::cmdext::SystemdFdName; +/// let name = SystemdFdName::new("varlink"); +/// ``` +#[derive(Debug, Clone, Copy)] +pub struct SystemdFdName<'a>(&'a str); + +impl<'a> SystemdFdName<'a> { + /// Create a new `SystemdFdName`, panicking if `name` is invalid. + /// + /// # Panics + /// + /// Panics if `name` is longer than 255 bytes or contains any + /// character that is not printable ASCII (i.e. control characters, + /// DEL, non-ASCII bytes, or `:`). + pub const fn new(name: &'a str) -> Self { + assert!( + name.len() <= 255, + "systemd fd name must be at most 255 characters" + ); + let bytes = name.as_bytes(); + let mut i = 0; + while i < bytes.len() { + let b = bytes[i]; + assert!( + b >= b' ' && b < 127 && b != b':', + "systemd fd name must only contain printable ASCII characters except ':'" + ); + i += 1; + } + Self(name) + } + + /// Return the name as a string slice. + pub fn as_str(&self) -> &'a str { + self.0 + } +} + +/// File descriptor allocator for child processes. +/// +/// Collects fd assignments and optional systemd socket-activation +/// configuration, then applies them all at once via +/// [`CapStdExtCommandExt::take_fds`]. +/// +/// - [`new_systemd_fds`](Self::new_systemd_fds) creates an allocator +/// with systemd socket-activation fds at 3, 4, … (`SD_LISTEN_FDS_START`). +/// - [`take_fd`](Self::take_fd) auto-assigns the next fd above all +/// previously assigned ones (minimum 3). +/// - [`take_fd_n`](Self::take_fd_n) places an fd at an explicit number, +/// panicking on overlap. +/// +/// ```no_run +/// # use std::sync::Arc; +/// # use cap_std_ext::cmdext::{CmdFds, CapStdExtCommandExt, SystemdFdName}; +/// # let varlink_fd: Arc = todo!(); +/// # let extra_fd: Arc = todo!(); +/// let mut cmd = std::process::Command::new("myservice"); +/// let mut fds = CmdFds::new_systemd_fds([(varlink_fd, SystemdFdName::new("varlink"))]); +/// let extra_n = fds.take_fd(extra_fd); +/// cmd.take_fds(fds); +/// ``` +#[derive(Debug)] +pub struct CmdFds { + taken: BTreeSet, + fds: Vec<(i32, Arc)>, + /// Pre-built CStrings for the systemd env vars, set by new_systemd_fds. + systemd_env: Option<(CString, CString)>, +} + +impl Default for CmdFds { + fn default() -> Self { + Self::new() + } +} + +impl CmdFds { + /// Create a new fd allocator. + pub fn new() -> Self { + Self { + taken: BTreeSet::new(), + fds: Vec::new(), + systemd_env: None, + } + } + + /// Create a new fd allocator with systemd socket-activation fds. + /// + /// Each `(fd, name)` pair is assigned a consecutive fd number starting + /// at `SD_LISTEN_FDS_START` (3). The `LISTEN_PID`, `LISTEN_FDS`, and + /// `LISTEN_FDNAMES` environment variables will be set in the child + /// when [`CapStdExtCommandExt::take_fds`] is called. + /// + /// Additional (non-systemd) fds can be registered afterwards via + /// [`take_fd`](Self::take_fd) or [`take_fd_n`](Self::take_fd_n). + /// + /// [sd_listen_fds]: https://www.freedesktop.org/software/systemd/man/latest/sd_listen_fds.html + pub fn new_systemd_fds<'a>( + fds: impl IntoIterator, SystemdFdName<'a>)>, + ) -> Self { + let mut this = Self::new(); + this.register_systemd_fds(fds); + this + } + + /// Compute the next fd number above everything already taken + /// (minimum `SD_LISTEN_FDS_START`). + fn next_fd(&self) -> i32 { + self.taken + .last() + .map(|n| n.checked_add(1).expect("fd number overflow")) + .unwrap_or(SD_LISTEN_FDS_START) + } + + fn insert_fd(&mut self, n: i32) { + let inserted = self.taken.insert(n); + assert!(inserted, "fd {n} is already assigned"); + } + + /// Register a file descriptor at the next available fd number. + /// + /// Returns the fd number that will be assigned in the child. + /// Call [`CapStdExtCommandExt::take_fds`] to apply. + pub fn take_fd(&mut self, fd: Arc) -> i32 { + let n = self.next_fd(); + self.insert_fd(n); + self.fds.push((n, fd)); + n + } + + /// Register a file descriptor at a specific fd number. + /// + /// Call [`CapStdExtCommandExt::take_fds`] to apply. + /// + /// # Panics + /// + /// Panics if `target` has already been assigned. + pub fn take_fd_n(&mut self, fd: Arc, target: i32) -> &mut Self { + self.insert_fd(target); + self.fds.push((target, fd)); + self + } + + fn register_systemd_fds<'a>( + &mut self, + fds: impl IntoIterator, SystemdFdName<'a>)>, + ) { + let mut n_fds: i32 = 0; + let mut names = Vec::new(); + for (fd, name) in fds { + let target = SD_LISTEN_FDS_START + .checked_add(n_fds) + .expect("too many fds"); + self.insert_fd(target); + self.fds.push((target, fd)); + names.push(name.as_str()); + n_fds = n_fds.checked_add(1).expect("too many fds"); + } + + let fd_count = CString::new(n_fds.to_string()).unwrap(); + // SAFETY: SystemdFdName guarantees no NUL bytes. + let fd_names = CString::new(names.join(":")).unwrap(); + self.systemd_env = Some((fd_count, fd_names)); + } +} + /// Extension trait for [`std::process::Command`]. /// /// [`cap_std::fs::Dir`]: https://docs.rs/cap-std/latest/cap_std/fs/struct.Dir.html pub trait CapStdExtCommandExt { - /// Pass a file descriptor into the target process. + /// Pass a file descriptor into the target process at a specific fd number. + /// + /// # Deprecated + /// + /// Use [`CmdFds`] with [`take_fds`](Self::take_fds) instead. This method + /// registers an independent `pre_exec` hook per call, which means + /// multiple `take_fd_n` calls (or mixing with `take_fds`) can clobber + /// each other when a source fd's raw number equals another mapping's + /// target. `take_fds` handles this correctly with atomic fd shuffling. + #[deprecated = "Use CmdFds with take_fds() instead"] fn take_fd_n(&mut self, fd: Arc, target: i32) -> &mut Self; + /// Apply a [`CmdFds`] to this command, passing all registered file + /// descriptors and (if configured) setting up the systemd + /// socket-activation environment. + /// + /// # Important: Do not use `Command::env()` with systemd fds + /// + /// When systemd socket-activation environment variables are configured + /// (via [`CmdFds::new_systemd_fds`]), they are set using `setenv(3)` in + /// a `pre_exec` hook. If `Command::env()` is also called, Rust will + /// build an `envp` array that replaces the process environment, causing + /// the `LISTEN_*` variables set by the hook to be lost. `Command::envs()` + /// is equally problematic. If you need to set additional environment + /// variables alongside systemd fds, set them via `pre_exec` + `setenv` + /// as well. + fn take_fds(&mut self, fds: CmdFds) -> &mut Self; + /// Use the given directory as the current working directory for the process. fn cwd_dir(&mut self, dir: Dir) -> &mut Self; @@ -39,7 +244,27 @@ pub trait CapStdExtCommandExt { fn lifecycle_bind_to_parent_thread(&mut self) -> &mut Self; } +/// Wrapper around `libc::setenv` that checks the return value. +/// +/// # Safety +/// +/// Must only be called in a single-threaded context (e.g. after `fork()` +/// and before `exec()`). #[allow(unsafe_code)] +unsafe fn check_setenv( + key: *const std::ffi::c_char, + val: *const std::ffi::c_char, +) -> std::io::Result<()> { + // SAFETY: Caller guarantees we are in a single-threaded context + // with valid nul-terminated C strings. + if unsafe { libc::setenv(key, val, 1) } != 0 { + return Err(std::io::Error::last_os_error()); + } + Ok(()) +} + +#[allow(unsafe_code)] +#[allow(deprecated)] impl CapStdExtCommandExt for std::process::Command { fn take_fd_n(&mut self, fd: Arc, target: i32) -> &mut Self { unsafe { @@ -62,6 +287,59 @@ impl CapStdExtCommandExt for std::process::Command { self } + fn take_fds(&mut self, fds: CmdFds) -> &mut Self { + // Use a single pre_exec hook that handles all fd shuffling atomically. + // This avoids the problem where separate hooks clobber each other when + // a source fd number equals a target fd number from a different mapping. + unsafe { + self.pre_exec(move || { + // Dup each source fd to a temporary location above all + // targets, so that no dup2() in step 2 can clobber a source. + let safe_min = fds + .fds + .iter() + .map(|(t, _)| *t) + .max() + .unwrap_or(0) + .checked_add(1) + .expect("fd number overflow"); + let mut safe_copies: Vec<(i32, OwnedFd)> = Vec::new(); + for (target, fd) in &fds.fds { + let copy = rustix::io::fcntl_dupfd_cloexec(fd, safe_min)?; + safe_copies.push((*target, copy)); + } + + // Place each fd at its target via dup2. + // We use raw dup2 to avoid fabricating an OwnedFd for a + // target number we don't yet own (which would be unsound + // if dup2 failed — the OwnedFd drop would close a wrong fd). + for (target, copy) in safe_copies { + // SAFETY: target is a non-negative fd number that dup2 + // will atomically (re)open; we don't own it beforehand. + let r = libc::dup2(copy.as_raw_fd(), target); + if r < 0 { + return Err(std::io::Error::last_os_error()); + } + // `copy` drops here, closing the temporary fd. + } + + // Handle systemd env vars, if configured + if let Some((ref fd_count, ref fd_names)) = fds.systemd_env { + let pid = rustix::process::getpid(); + let pid_dec = rustix::path::DecInt::new(pid.as_raw_nonzero().get()); + // SAFETY: After fork() and before exec(), the child is + // single-threaded, so setenv (which is not thread-safe) + // is safe to call here. + check_setenv(c"LISTEN_PID".as_ptr(), pid_dec.as_c_str().as_ptr())?; + check_setenv(c"LISTEN_FDS".as_ptr(), fd_count.as_ptr())?; + check_setenv(c"LISTEN_FDNAMES".as_ptr(), fd_names.as_ptr())?; + } + Ok(()) + }); + } + self + } + fn cwd_dir(&mut self, dir: Dir) -> &mut Self { unsafe { self.pre_exec(move || { @@ -92,6 +370,7 @@ mod tests { use super::*; use std::sync::Arc; + #[allow(deprecated)] #[test] fn test_take_fdn() -> anyhow::Result<()> { // Pass srcfd == destfd and srcfd != destfd diff --git a/src/lib.rs b/src/lib.rs index e78618f..1ef22b0 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -42,7 +42,7 @@ pub(crate) fn escape_attempt() -> io::Error { /// Prelude, intended for glob import. pub mod prelude { #[cfg(not(windows))] - pub use super::cmdext::CapStdExtCommandExt; + pub use super::cmdext::{CapStdExtCommandExt, CmdFds, SystemdFdName}; pub use super::dirext::CapStdExtDirExt; #[cfg(feature = "fs_utf8")] pub use super::dirext::CapStdExtDirExtUtf8; diff --git a/tests/it/main.rs b/tests/it/main.rs index 5589650..4887cf6 100644 --- a/tests/it/main.rs +++ b/tests/it/main.rs @@ -5,7 +5,7 @@ use cap_std::fs::PermissionsExt; use cap_std::fs::{Dir, File, Permissions}; use cap_std_ext::cap_std; #[cfg(not(windows))] -use cap_std_ext::cmdext::CapStdExtCommandExt; +use cap_std_ext::cmdext::{CapStdExtCommandExt, CmdFds, SystemdFdName}; use cap_std_ext::dirext::{CapStdExtDirExt, WalkConfiguration}; #[cfg(any(target_os = "android", target_os = "linux"))] use cap_std_ext::RootDir; @@ -16,26 +16,52 @@ use std::cmp::Ordering; use std::ffi::OsStr; use std::io::Write; use std::ops::ControlFlow; +#[cfg(not(windows))] +use std::os::fd::AsRawFd; use std::path::{Path, PathBuf}; use std::{process::Command, sync::Arc}; -#[test] +/// Create a pipe, write `content` to it, and return the read end wrapped in `Arc`. #[cfg(not(windows))] -fn take_fd() -> Result<()> { - let mut c = Command::new("/bin/bash"); - c.arg("-c"); - c.arg("wc -c <&5"); +fn make_pipe(content: &str) -> Result> { let (r, w) = rustix::pipe::pipe()?; let r = Arc::new(r); let mut w: File = w.into(); - c.take_fd_n(r, 5); - write!(w, "hello world")?; + write!(w, "{content}")?; drop(w); + Ok(r) +} + +/// Run a bash script, return its stdout split into lines. +/// +/// `setup` is called on the [`Command`] before spawning, so callers can +/// attach fds or configure the child. +#[cfg(not(windows))] +fn run_bash_piped(script: &str, setup: impl FnOnce(&mut Command)) -> Result> { + let mut c = Command::new("/bin/bash"); + c.arg("-c").arg(script); c.stdout(std::process::Stdio::piped()); - let s = c.output()?; - assert!(s.status.success()); - let out = String::from_utf8_lossy(&s.stdout); - assert_eq!(out.trim(), "11"); + setup(&mut c); + let out = c.output()?; + assert!( + out.status.success(), + "child failed: {}", + String::from_utf8_lossy(&out.stderr) + ); + let stdout = String::from_utf8_lossy(&out.stdout); + Ok(stdout.lines().map(String::from).collect()) +} + +#[test] +#[cfg(not(windows))] +#[allow(deprecated)] +fn take_fd() -> Result<()> { + let r = make_pipe("hello world")?; + let lines = run_bash_piped("wc -c <&5", |c| { + c.take_fd_n(r, 5); + })?; + assert_eq!(lines.len(), 1); + assert_eq!(lines[0].trim(), "11"); Ok(()) } @@ -860,3 +886,189 @@ fn test_lifecycle_bind_to_parent_thread() -> Result<()> { Ok(()) } + +#[test] +#[cfg(not(windows))] +fn test_pass_systemd_fds() -> Result<()> { + let r = make_pipe("sd-activate-test")?; + + // The child: verify LISTEN_PID matches $$, print the other vars, read fd 3. + let script = r#" +test "$LISTEN_PID" = "$$" || { echo "LISTEN_PID=$LISTEN_PID but $$=$$" >&2; exit 1; } +printf '%s\n' "$LISTEN_FDS" "$LISTEN_FDNAMES" +cat <&3 +"#; + let fds = CmdFds::new_systemd_fds([(r, SystemdFdName::new("myproto"))]); + let lines = run_bash_piped(script, |c| { + c.take_fds(fds); + })?; + assert_eq!(lines.len(), 3, "unexpected output: {lines:?}"); + assert_eq!(lines[0], "1"); + assert_eq!(lines[1], "myproto"); + assert_eq!(lines[2], "sd-activate-test"); + + Ok(()) +} + +#[test] +#[cfg(not(windows))] +fn test_systemd_fds_then_take_fd() -> Result<()> { + // Systemd fds at 3 and 4, then auto-assigned take_fd gets 5. + let r1 = make_pipe("first")?; + let r2 = make_pipe("second")?; + let r_extra = make_pipe("extra")?; + + let script = r#" +printf '%s\n' "$LISTEN_FDS" "$LISTEN_FDNAMES" +cat <&3 +printf '\n' +cat <&4 +printf '\n' +cat <&5 +"#; + let mut fds = CmdFds::new_systemd_fds([ + (r1, SystemdFdName::new("alpha")), + (r2, SystemdFdName::new("beta")), + ]); + let extra_n = fds.take_fd(r_extra); + assert_eq!(extra_n, 5); + let lines = run_bash_piped(script, |c| { + c.take_fds(fds); + })?; + assert_eq!(lines.len(), 5, "unexpected output: {lines:?}"); + assert_eq!(lines[0], "2"); + assert_eq!(lines[1], "alpha:beta"); + assert_eq!(lines[2], "first"); + assert_eq!(lines[3], "second"); + assert_eq!(lines[4], "extra"); + + Ok(()) +} + +#[test] +#[cfg(not(windows))] +fn test_cmd_fds_take_fd_n_then_systemd() -> Result<()> { + // Reserve fd 10 explicitly, then systemd fds at 3 — no conflict. + let r_explicit = make_pipe("explicit")?; + let r_sd = make_pipe("systemd")?; + + let script = r#" +cat <&10 +printf '\n' +printf '%s\n' "$LISTEN_FDS" "$LISTEN_FDNAMES" +cat <&3 +"#; + let mut fds = CmdFds::new_systemd_fds([(r_sd, SystemdFdName::new("varlink"))]); + fds.take_fd_n(r_explicit, 10); + let lines = run_bash_piped(script, |c| { + c.take_fds(fds); + })?; + assert_eq!(lines.len(), 4, "unexpected output: {lines:?}"); + assert_eq!(lines[0], "explicit"); + assert_eq!(lines[1], "1"); + assert_eq!(lines[2], "varlink"); + assert_eq!(lines[3], "systemd"); + + Ok(()) +} + +/// Try to dup `fd` so its raw fd number equals `raw`. +/// Returns `Ok(Some(dup))` on success, `Ok(None)` if the slot is occupied. +/// Uses F_DUPFD starting at `raw`; if the kernel returns a higher number, +/// we close it and give up rather than fighting for the slot. +#[cfg(not(windows))] +fn try_fd_at_raw(fd: &rustix::fd::OwnedFd, raw: i32) -> Result> { + let dup = rustix::io::fcntl_dupfd_cloexec(fd, raw)?; + if dup.as_raw_fd() == raw { + Ok(Some(dup)) + } else { + // Slot was occupied; drop the dup (closes it) and signal failure. + Ok(None) + } +} + +#[cfg(not(windows))] +mod proptest_fd { + use super::*; + use cap_std_ext::cmdext::{CapStdExtCommandExt, CmdFds}; + use proptest::prelude::*; + + /// Generate a Vec of distinct target fd numbers in [3, 15]. + fn fd_targets(max_count: usize) -> impl Strategy> { + proptest::collection::btree_set(3..=15i32, 1..=max_count) + .prop_map(|s| s.into_iter().collect::>()) + } + + proptest! { + #![proptest_config(ProptestConfig::with_cases(80))] + + /// For each generated set of target fds, create pipes with unique + /// content, force some source fds into positions that collide with + /// other targets (the worst case), then verify every target gets + /// the right content through the child process. + #[test] + fn fd_shuffle_no_clobber(targets in fd_targets(5)) { + // Build pipes with content "fdN" for each target N. + let mut entries: Vec<(i32, Arc)> = Vec::new(); + + for (i, &tgt) in targets.iter().enumerate() { + let pipe_r = make_pipe(&format!("fd{tgt}")).unwrap(); + + // For odd-indexed entries, try to force the source fd's raw + // number to equal the *next* entry's target (wrapping around). + // This manufactures exactly the collision that broke CI. + if i % 2 == 1 { + let collide_with = targets[(i + 1) % targets.len()]; + if let Ok(Some(moved)) = try_fd_at_raw(&pipe_r, collide_with) { + entries.push((tgt, Arc::new(moved))); + continue; + } + } + entries.push((tgt, pipe_r)); + } + + // Build a bash script that reads from each target fd and prints + // "targetN:content". + let mut script = String::new(); + for &tgt in &targets { + script.push_str(&format!( + "printf '{tgt}:'; cat <&{tgt}; printf '\\n'\n" + )); + } + + let mut fds = CmdFds::new(); + for (tgt, fd) in entries { + fds.take_fd_n(fd, tgt); + } + let lines = run_bash_piped(&script, |c| { c.take_fds(fds); }).unwrap(); + + // Verify each line is "N:fdN". + let n_targets = targets.len(); + prop_assert_eq!( + lines.len(), n_targets, + "expected {} lines, got {}: {:?}", n_targets, lines.len(), lines + ); + for (line, &tgt) in lines.iter().zip(targets.iter()) { + let expected = format!("{tgt}:fd{tgt}"); + prop_assert_eq!( + line, &expected, + "wrong content for target fd {}", tgt + ); + } + } + } +} + +#[test] +#[cfg(not(windows))] +#[should_panic(expected = "fd 3 is already assigned")] +fn test_cmd_fds_overlap_panics() { + let (r1, _w1) = rustix::pipe::pipe().unwrap(); + let (r2, _w2) = rustix::pipe::pipe().unwrap(); + let r1 = Arc::new(r1); + let r2 = Arc::new(r2); + + let mut fds = CmdFds::new_systemd_fds([(r1, SystemdFdName::new("x"))]); + // This should panic: fd 3 is already taken by systemd. + fds.take_fd_n(r2, 3); +}