diff --git a/Cargo.lock b/Cargo.lock index cdc5f451..2645ee12 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -690,6 +690,7 @@ dependencies = [ "bincode", "clap", "console", + "crc32fast", "debugid", "exec-harness", "futures", diff --git a/Cargo.toml b/Cargo.toml index 05cf5993..1ec323b3 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -73,6 +73,7 @@ shell-words = "1.1.0" rmp-serde = "1.3.0" uuid = { version = "1.21.0", features = ["v4"] } which = "8.0.2" +crc32fast = "1.5.0" [target.'cfg(target_os = "linux")'.dependencies] procfs = "0.17.0" diff --git a/src/executor/wall_time/perf/debug_info.rs b/src/executor/wall_time/perf/debug_info.rs index 5109044b..b76078a5 100644 --- a/src/executor/wall_time/perf/debug_info.rs +++ b/src/executor/wall_time/perf/debug_info.rs @@ -1,3 +1,4 @@ +use super::elf_helper::find_debug_file; use super::parse_perf_file::LoadedModule; use crate::executor::wall_time::perf::module_symbols::ModuleSymbols; use crate::prelude::*; @@ -43,7 +44,10 @@ pub trait ModuleDebugInfoExt { } impl ModuleDebugInfoExt for ModuleDebugInfo { - /// Create debug info from existing symbols by looking up file/line in DWARF + /// Create debug info from existing symbols by looking up file/line in DWARF. + /// + /// If the binary has no DWARF sections, tries to find a separate debug file + /// via `.gnu_debuglink` (e.g. installed by `libc6-dbg`). fn from_symbols>( path: P, symbols: &ModuleSymbols, @@ -52,7 +56,25 @@ impl ModuleDebugInfoExt for ModuleDebugInfo { let content = std::fs::read(path.as_ref())?; let object = object::File::parse(&*content)?; - let ctx = Self::create_dwarf_context(&object).context("Failed to create DWARF context")?; + // If the binary has no DWARF, try a separate debug file via .gnu_debuglink + let ctx = if object.section_by_name(".debug_info").is_some() { + Self::create_dwarf_context(&object).context("Failed to create DWARF context")? + } else { + let debug_path = find_debug_file(&object, path.as_ref()).with_context(|| { + format!( + "No DWARF in {:?} and no separate debug file found", + path.as_ref() + ) + })?; + trace!( + "Using separate debug file {debug_path:?} for {:?}", + path.as_ref() + ); + let debug_content = std::fs::read(&debug_path)?; + let debug_object = object::File::parse(&*debug_content)?; + Self::create_dwarf_context(&debug_object) + .context("Failed to create DWARF context from debug file")? + }; let (mut min_addr, mut max_addr) = (None, None); let debug_infos = symbols .symbols() @@ -213,6 +235,31 @@ mod tests { insta::assert_debug_snapshot!(module_debug_info.debug_infos); } + #[rstest::rstest] + #[case::cpp( + "testdata/perf_map/cpp_my_benchmark_stripped.bin", + "testdata/perf_map/cpp_my_benchmark.debug" + )] + #[case::libc("testdata/perf_map/libc.so.6", "testdata/perf_map/libc.so.6.debug")] + fn test_stripped_binary_with_debuglink_resolves_debug_info( + #[case] binary: &str, + #[case] debug_file: &str, + ) { + let (_dir, binary, _debug_file) = super::super::elf_helper::setup_debuglink_tmpdir( + Path::new(binary), + Path::new(debug_file), + ); + + let module_symbols = ModuleSymbols::from_elf(&binary).unwrap(); + assert!(!module_symbols.symbols().is_empty()); + + let module_debug_info = ModuleDebugInfo::from_symbols(&binary, &module_symbols, 0).unwrap(); + assert!( + !module_debug_info.debug_infos.is_empty(), + "DWARF should resolve via .gnu_debuglink" + ); + } + #[test] fn test_ruff_debug_info() { const MODULE_PATH: &str = "testdata/perf_map/ty_walltime"; diff --git a/src/executor/wall_time/perf/elf_helper.rs b/src/executor/wall_time/perf/elf_helper.rs index 8690355a..9c5bb54c 100644 --- a/src/executor/wall_time/perf/elf_helper.rs +++ b/src/executor/wall_time/perf/elf_helper.rs @@ -1,8 +1,10 @@ //! Based on this: https://github.com/mstange/samply/blob/4a5afec57b7c68b37ecde12b5a258de523e89463/samply/src/linux_shared/svma_file_range.rs#L8 use anyhow::Context; +use log::trace; use object::Object; use object::ObjectSegment; +use std::path::{Path, PathBuf}; // A file range in an object file, such as a segment or a section, // for which we know the corresponding Stated Virtual Memory Address (SVMA). @@ -188,3 +190,175 @@ pub fn relative_address_base(object_file: &object::File) -> u64 { pub fn compute_base_avma(base_svma: u64, load_bias: u64) -> u64 { base_svma.wrapping_add(load_bias) } + +const DEFAULT_DEBUG_DIR: &str = "/usr/lib/debug"; + +/// Search for a separate debug info file. +/// +/// Tries two mechanisms in order: +/// 1. **Build-ID path**: `/.build-id//.debug` +/// 2. **`.gnu_debuglink`** with GDB search order and CRC32 validation +/// +/// This is the same order GDB uses (see [Separate Debug Files]). Build-ID is +/// preferred because it's a cryptographic hash of the binary contents, so a +/// match cannot be a false positive — whereas `.gnu_debuglink` matches by +/// filename and relies on a CRC32 check. On Debian/Ubuntu, `*-dbg` and +/// `*-dbgsym` packages install their files under `/usr/lib/debug/.build-id/`, +/// so this path is what actually resolves stripped system libraries in +/// practice. +/// +/// [Separate Debug Files]: https://sourceware.org/gdb/current/onlinedocs/gdb.html/Separate-Debug-Files.html +pub fn find_debug_file(object: &object::File, binary_path: &Path) -> Option { + find_debug_file_in(object, binary_path, Path::new(DEFAULT_DEBUG_DIR)) +} + +fn find_debug_file_in( + object: &object::File, + binary_path: &Path, + debug_dir: &Path, +) -> Option { + if let Some(path) = find_debug_file_by_build_id(object, debug_dir) { + return Some(path); + } + find_debug_file_by_debuglink(object, binary_path, debug_dir) +} + +/// Tries to find a debug file using the build-id. +/// +/// ## How it works +/// +/// For build-id a05cfb6313fe06a13c9b4b5cb86c2069faa3951f, the debug file lives at: +/// ```text +/// /usr/lib/debug/.build-id/a0/5cfb6313fe06a13c9b4b5cb86c2069faa3951f.debug +/// ^^ ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +/// first byte (2 hex chars) as subdir +/// rest as the filename +/// ``` +fn find_debug_file_by_build_id(object: &object::File, debug_dir: &Path) -> Option { + let build_id = object.build_id().ok()??; + if build_id.is_empty() { + return None; + } + + let hex = build_id + .iter() + .map(|b| format!("{b:02x}")) + .collect::(); + let path = debug_dir + .join(".build-id") + .join(&hex[..2]) + .join(format!("{}.debug", &hex[2..])); + + if path.exists() { + return Some(path); + } + + None +} + +fn find_debug_file_by_debuglink( + object: &object::File, + binary_path: &Path, + debug_dir: &Path, +) -> Option { + let (debuglink, expected_crc) = object.gnu_debuglink().ok()??; + let debuglink = std::str::from_utf8(debuglink).ok()?; + let dir = binary_path.parent()?; + + let candidates = [ + dir.join(debuglink), + dir.join(".debug").join(debuglink), + debug_dir + .join(dir.strip_prefix("/").unwrap_or(dir)) + .join(debuglink), + ]; + + candidates.into_iter().find(|p| { + let Ok(content) = std::fs::read(p) else { + return false; + }; + let actual_crc = crc32fast::hash(&content); + if actual_crc != expected_crc { + trace!( + "CRC mismatch for {}: expected {expected_crc:#x}, got {actual_crc:#x}", + p.display() + ); + return false; + } + true + }) +} + +/// Copy `binary` and `debug_file` in a fresh tempdir, renaming the debug +/// file to match the binary's `.gnu_debuglink` basename so `find_debug_file` +/// resolves the pair. +/// +/// Returns `(TempDir, staged_binary, staged_debug_file)`. Keep the `TempDir` +/// alive for the duration of the test — dropping it removes the files. +#[cfg(all(test, target_os = "linux"))] +pub(super) fn setup_debuglink_tmpdir( + binary: &Path, + debug_file: &Path, +) -> (tempfile::TempDir, PathBuf, PathBuf) { + let src = std::fs::read(binary).unwrap(); + let object = object::File::parse(&*src).unwrap(); + let (debuglink, _crc) = object + .gnu_debuglink() + .unwrap() + .expect("binary has no .gnu_debuglink"); + let debuglink = std::str::from_utf8(debuglink).unwrap(); + + let dir = tempfile::tempdir().unwrap(); + let staged_binary = dir.path().join("binary"); + let staged_debug = dir.path().join(debuglink); + std::fs::copy(binary, &staged_binary).unwrap(); + std::fs::copy(debug_file, &staged_debug).unwrap(); + + (dir, staged_binary, staged_debug) +} + +#[cfg(all(test, target_os = "linux"))] +mod tests { + use super::*; + + // The fixtures `testdata/perf_map/libc.so.6` and `libc.so.6.debug` are the + // stripped libc plus its separate debug file from Ubuntu 22.04's `libc6` + // and `libc6-dbg` packages. + const LIBC_PATH: &str = "testdata/perf_map/libc.so.6"; + const LIBC_DEBUG_PATH: &str = "testdata/perf_map/libc.so.6.debug"; + + #[test] + fn test_find_debug_file_by_build_id() { + // Ubuntu's `libc6-dbg` installs its debug file under + // `/usr/lib/debug/.build-id//.debug`. Reproduce that layout + // in a tempdir and confirm we resolve it via the build-id note. + let binary_path = Path::new(LIBC_PATH); + let content = std::fs::read(binary_path).unwrap(); + let object = object::File::parse(&*content).unwrap(); + + let build_id = object.build_id().unwrap().unwrap(); + let hex: String = build_id.iter().map(|b| format!("{b:02x}")).collect(); + + let tmp = tempfile::tempdir().unwrap(); + let debug_file_dir = tmp.path().join(".build-id").join(&hex[..2]); + std::fs::create_dir_all(&debug_file_dir).unwrap(); + + let debug_file_path = debug_file_dir.join(format!("{}.debug", &hex[2..])); + std::fs::copy(LIBC_DEBUG_PATH, &debug_file_path).unwrap(); + + let result = find_debug_file_in(&object, binary_path, tmp.path()); + assert_eq!(result, Some(debug_file_path)); + } + + #[test] + fn test_find_debug_file_by_debuglink() { + let (_dir, binary, debug_file) = + setup_debuglink_tmpdir(Path::new(LIBC_PATH), Path::new(LIBC_DEBUG_PATH)); + let content = std::fs::read(&binary).unwrap(); + let object = object::File::parse(&*content).unwrap(); + + let empty_debug_dir = tempfile::tempdir().unwrap(); + let result = find_debug_file_in(&object, &binary, empty_debug_dir.path()); + assert_eq!(result, Some(debug_file)); + } +} diff --git a/src/executor/wall_time/perf/module_symbols.rs b/src/executor/wall_time/perf/module_symbols.rs index 240a2602..d7575f06 100644 --- a/src/executor/wall_time/perf/module_symbols.rs +++ b/src/executor/wall_time/perf/module_symbols.rs @@ -1,7 +1,9 @@ use crate::executor::wall_time::perf::elf_helper; +use log::trace; use object::{Object, ObjectSymbol, ObjectSymbolTable}; use runner_shared::module_symbols::SYMBOLS_MAP_SUFFIX; use std::{ + collections::HashSet, fmt::Debug, io::{BufWriter, Write}, path::Path, @@ -55,11 +57,8 @@ impl ModuleSymbols { ) } - /// Extract symbols from an ELF file (pid-agnostic, load_bias = 0). - pub fn from_elf>(path: P) -> anyhow::Result { - let content = std::fs::read(path.as_ref())?; - let object = object::File::parse(&*content)?; - + /// Extract raw symbols from an object file's `.symtab` and `.dynsym` tables. + fn extract_symbols_from_object(object: &object::File) -> Vec { let mut symbols = Vec::new(); if let Some(symbol_table) = object.symbol_table() { @@ -82,6 +81,44 @@ impl ModuleSymbols { })); } + symbols + } + + /// Extract symbols from an ELF file (pid-agnostic, load_bias = 0). + /// + /// If the binary has a `.gnu_debuglink` pointing to a separate debug file, + /// symbols from that file are merged in. This provides full symbol coverage + /// for stripped system libraries when debug packages are installed. + pub fn from_elf>(path: P) -> anyhow::Result { + let content = std::fs::read(path.as_ref())?; + let object = object::File::parse(&*content)?; + + let mut symbols = Self::extract_symbols_from_object(&object); + + // Merge symbols from a separate debug file if available + if let Some(debug_path) = elf_helper::find_debug_file(&object, path.as_ref()) { + trace!( + "Merging symbols from debug file {:?} for {:?}", + debug_path, + path.as_ref() + ); + let debug_symbols = std::fs::read(&debug_path).ok().and_then(|c| { + object::File::parse(&*c) + .ok() + .map(|o| Self::extract_symbols_from_object(&o)) + }); + + if let Some(debug_symbols) = debug_symbols { + let existing: HashSet<(u64, String)> = + symbols.iter().map(|s| (s.addr, s.name.clone())).collect(); + symbols.extend( + debug_symbols + .into_iter() + .filter(|s| !existing.contains(&(s.addr, s.name.clone()))), + ); + } + } + // Filter out // - ARM ELF "mapping symbols" (https://github.com/torvalds/linux/blob/9448598b22c50c8a5bb77a9103e2d49f134c9578/tools/perf/util/symbol-elf.c#L1591C1-L1598C4) // - symbols that have en empty name @@ -227,4 +264,45 @@ mod tests { let module_symbols = ModuleSymbols::from_elf(MODULE_PATH).unwrap(); insta::assert_debug_snapshot!(module_symbols); } + + #[test] + fn test_stripped_binary_merges_debug_file_symbols() { + // The stripped binary has only .dynsym, the .debug file has the full .symtab. + // from_elf should merge both via .gnu_debuglink. + let stripped_only = + ModuleSymbols::from_elf("testdata/perf_map/cpp_my_benchmark_stripped.bin").unwrap(); + let full = ModuleSymbols::from_elf("testdata/perf_map/cpp_my_benchmark.bin").unwrap(); + + assert!( + stripped_only.symbols().len() == full.symbols().len(), + "stripped+debug ({}) should have the same number of symbols as the original ({})", + stripped_only.symbols().len(), + full.symbols().len(), + ); + } + + #[test] + fn test_libc_symbols_merge_with_debug_file() { + // libc.so.6 ships with .dynsym populated, so from_elf alone would skip + // the debug file under a naive fallback. Merging must pick up .symtab + // symbols like `_int_malloc` that only live in the debug file — + // this is the coverage needed for full libc symbolication. + let (_dir, binary, _debug_file) = elf_helper::setup_debuglink_tmpdir( + Path::new("testdata/perf_map/libc.so.6"), + Path::new("testdata/perf_map/libc.so.6.debug"), + ); + + let module_symbols = ModuleSymbols::from_elf(&binary).unwrap(); + assert!( + module_symbols.symbols().iter().any(|s| s.name == "malloc"), + "libc dynsym symbol `malloc` should be present" + ); + assert!( + module_symbols + .symbols() + .iter() + .any(|s| s.name == "_int_malloc"), + "internal libc symbol `_int_malloc` should be merged in from the debug file" + ); + } } diff --git a/testdata/perf_map/cpp_my_benchmark.debug b/testdata/perf_map/cpp_my_benchmark.debug new file mode 100755 index 00000000..8ee42f03 --- /dev/null +++ b/testdata/perf_map/cpp_my_benchmark.debug @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ad370c4bf2fd6dbdad578077eef9ee171e408245c8f552705c8e16bf9043d623 +size 7751584 diff --git a/testdata/perf_map/cpp_my_benchmark_stripped.bin b/testdata/perf_map/cpp_my_benchmark_stripped.bin new file mode 100755 index 00000000..229ba3d0 --- /dev/null +++ b/testdata/perf_map/cpp_my_benchmark_stripped.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:132d0e7ff1acb6fac1051e85057c291cadd92b9c42a7e1523a5734d93dd9357b +size 455160 diff --git a/testdata/perf_map/libc.so.6 b/testdata/perf_map/libc.so.6 new file mode 100644 index 00000000..6c4747dd --- /dev/null +++ b/testdata/perf_map/libc.so.6 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ebaed8bd64291875c908cc6e7f5115ae7bf605a5da56ce3da4028e2fcfb0d8e5 +size 2216304 diff --git a/testdata/perf_map/libc.so.6.debug b/testdata/perf_map/libc.so.6.debug new file mode 100644 index 00000000..3cc0cded --- /dev/null +++ b/testdata/perf_map/libc.so.6.debug @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f84c93df1dffba7b504b2bc3e5dd7996b87e734053f45f4ee1ad2fe266c2fa99 +size 4418528