Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,7 @@ shell-words = "1.1.0"
rmp-serde = "1.3.0"
uuid = { version = "1.21.0", features = ["v4"] }
which = "8.0.2"
crc32fast = "1.5.0"

[target.'cfg(target_os = "linux")'.dependencies]
procfs = "0.17.0"
Expand Down
51 changes: 49 additions & 2 deletions src/executor/wall_time/perf/debug_info.rs
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
use super::elf_helper::find_debug_file;
use super::parse_perf_file::LoadedModule;
use crate::executor::wall_time::perf::module_symbols::ModuleSymbols;
use crate::prelude::*;
Expand Down Expand Up @@ -43,7 +44,10 @@ pub trait ModuleDebugInfoExt {
}

impl ModuleDebugInfoExt for ModuleDebugInfo {
/// Create debug info from existing symbols by looking up file/line in DWARF
/// Create debug info from existing symbols by looking up file/line in DWARF.
///
/// If the binary has no DWARF sections, tries to find a separate debug file
/// via `.gnu_debuglink` (e.g. installed by `libc6-dbg`).
fn from_symbols<P: AsRef<Path>>(
path: P,
symbols: &ModuleSymbols,
Expand All @@ -52,7 +56,25 @@ impl ModuleDebugInfoExt for ModuleDebugInfo {
let content = std::fs::read(path.as_ref())?;
let object = object::File::parse(&*content)?;

let ctx = Self::create_dwarf_context(&object).context("Failed to create DWARF context")?;
// If the binary has no DWARF, try a separate debug file via .gnu_debuglink
let ctx = if object.section_by_name(".debug_info").is_some() {
Self::create_dwarf_context(&object).context("Failed to create DWARF context")?
} else {
let debug_path = find_debug_file(&object, path.as_ref()).with_context(|| {
format!(
"No DWARF in {:?} and no separate debug file found",
path.as_ref()
)
})?;
trace!(
"Using separate debug file {debug_path:?} for {:?}",
path.as_ref()
);
let debug_content = std::fs::read(&debug_path)?;
let debug_object = object::File::parse(&*debug_content)?;
Self::create_dwarf_context(&debug_object)
.context("Failed to create DWARF context from debug file")?
};
let (mut min_addr, mut max_addr) = (None, None);
let debug_infos = symbols
.symbols()
Expand Down Expand Up @@ -213,6 +235,31 @@ mod tests {
insta::assert_debug_snapshot!(module_debug_info.debug_infos);
}

#[rstest::rstest]
#[case::cpp(
"testdata/perf_map/cpp_my_benchmark_stripped.bin",
"testdata/perf_map/cpp_my_benchmark.debug"
)]
#[case::libc("testdata/perf_map/libc.so.6", "testdata/perf_map/libc.so.6.debug")]
fn test_stripped_binary_with_debuglink_resolves_debug_info(
#[case] binary: &str,
#[case] debug_file: &str,
) {
let (_dir, binary, _debug_file) = super::super::elf_helper::setup_debuglink_tmpdir(
Path::new(binary),
Path::new(debug_file),
);

let module_symbols = ModuleSymbols::from_elf(&binary).unwrap();
assert!(!module_symbols.symbols().is_empty());

let module_debug_info = ModuleDebugInfo::from_symbols(&binary, &module_symbols, 0).unwrap();
assert!(
!module_debug_info.debug_infos.is_empty(),
"DWARF should resolve via .gnu_debuglink"
);
}

#[test]
fn test_ruff_debug_info() {
const MODULE_PATH: &str = "testdata/perf_map/ty_walltime";
Expand Down
174 changes: 174 additions & 0 deletions src/executor/wall_time/perf/elf_helper.rs
Original file line number Diff line number Diff line change
@@ -1,8 +1,10 @@
//! Based on this: https://github.com/mstange/samply/blob/4a5afec57b7c68b37ecde12b5a258de523e89463/samply/src/linux_shared/svma_file_range.rs#L8

use anyhow::Context;
use log::trace;
use object::Object;
use object::ObjectSegment;
use std::path::{Path, PathBuf};

// A file range in an object file, such as a segment or a section,
// for which we know the corresponding Stated Virtual Memory Address (SVMA).
Expand Down Expand Up @@ -188,3 +190,175 @@ pub fn relative_address_base(object_file: &object::File) -> u64 {
pub fn compute_base_avma(base_svma: u64, load_bias: u64) -> u64 {
base_svma.wrapping_add(load_bias)
}

const DEFAULT_DEBUG_DIR: &str = "/usr/lib/debug";

/// Search for a separate debug info file.
///
/// Tries two mechanisms in order:
/// 1. **Build-ID path**: `<debug_dir>/.build-id/<XX>/<YYYYYY...>.debug`
/// 2. **`.gnu_debuglink`** with GDB search order and CRC32 validation
Comment thread
not-matthias marked this conversation as resolved.
///
/// This is the same order GDB uses (see [Separate Debug Files]). Build-ID is
/// preferred because it's a cryptographic hash of the binary contents, so a
/// match cannot be a false positive — whereas `.gnu_debuglink` matches by
/// filename and relies on a CRC32 check. On Debian/Ubuntu, `*-dbg` and
/// `*-dbgsym` packages install their files under `/usr/lib/debug/.build-id/`,
/// so this path is what actually resolves stripped system libraries in
/// practice.
///
/// [Separate Debug Files]: https://sourceware.org/gdb/current/onlinedocs/gdb.html/Separate-Debug-Files.html
pub fn find_debug_file(object: &object::File, binary_path: &Path) -> Option<PathBuf> {
find_debug_file_in(object, binary_path, Path::new(DEFAULT_DEBUG_DIR))
}

fn find_debug_file_in(
object: &object::File,
binary_path: &Path,
debug_dir: &Path,
) -> Option<PathBuf> {
if let Some(path) = find_debug_file_by_build_id(object, debug_dir) {
return Some(path);
}
find_debug_file_by_debuglink(object, binary_path, debug_dir)
}

/// Tries to find a debug file using the build-id.
///
/// ## How it works
///
/// For build-id a05cfb6313fe06a13c9b4b5cb86c2069faa3951f, the debug file lives at:
/// ```text
/// /usr/lib/debug/.build-id/a0/5cfb6313fe06a13c9b4b5cb86c2069faa3951f.debug
/// ^^ ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
/// first byte (2 hex chars) as subdir
/// rest as the filename
/// ```
fn find_debug_file_by_build_id(object: &object::File, debug_dir: &Path) -> Option<PathBuf> {
let build_id = object.build_id().ok()??;
if build_id.is_empty() {
return None;
}

let hex = build_id
.iter()
.map(|b| format!("{b:02x}"))
.collect::<String>();
let path = debug_dir
.join(".build-id")
.join(&hex[..2])
.join(format!("{}.debug", &hex[2..]));

if path.exists() {
return Some(path);
}

None
}

fn find_debug_file_by_debuglink(
object: &object::File,
binary_path: &Path,
debug_dir: &Path,
) -> Option<PathBuf> {
let (debuglink, expected_crc) = object.gnu_debuglink().ok()??;
let debuglink = std::str::from_utf8(debuglink).ok()?;
let dir = binary_path.parent()?;

let candidates = [
dir.join(debuglink),
dir.join(".debug").join(debuglink),
debug_dir
.join(dir.strip_prefix("/").unwrap_or(dir))
.join(debuglink),
];

candidates.into_iter().find(|p| {
let Ok(content) = std::fs::read(p) else {
return false;
};
let actual_crc = crc32fast::hash(&content);
if actual_crc != expected_crc {
trace!(
"CRC mismatch for {}: expected {expected_crc:#x}, got {actual_crc:#x}",
p.display()
);
return false;
}
true
})
}

/// Copy `binary` and `debug_file` in a fresh tempdir, renaming the debug
/// file to match the binary's `.gnu_debuglink` basename so `find_debug_file`
/// resolves the pair.
///
/// Returns `(TempDir, staged_binary, staged_debug_file)`. Keep the `TempDir`
/// alive for the duration of the test — dropping it removes the files.
#[cfg(all(test, target_os = "linux"))]
pub(super) fn setup_debuglink_tmpdir(
binary: &Path,
debug_file: &Path,
) -> (tempfile::TempDir, PathBuf, PathBuf) {
let src = std::fs::read(binary).unwrap();
let object = object::File::parse(&*src).unwrap();
let (debuglink, _crc) = object
.gnu_debuglink()
.unwrap()
.expect("binary has no .gnu_debuglink");
let debuglink = std::str::from_utf8(debuglink).unwrap();

let dir = tempfile::tempdir().unwrap();
let staged_binary = dir.path().join("binary");
let staged_debug = dir.path().join(debuglink);
std::fs::copy(binary, &staged_binary).unwrap();
std::fs::copy(debug_file, &staged_debug).unwrap();

(dir, staged_binary, staged_debug)
}

#[cfg(all(test, target_os = "linux"))]
mod tests {
use super::*;

// The fixtures `testdata/perf_map/libc.so.6` and `libc.so.6.debug` are the
// stripped libc plus its separate debug file from Ubuntu 22.04's `libc6`
// and `libc6-dbg` packages.
const LIBC_PATH: &str = "testdata/perf_map/libc.so.6";
const LIBC_DEBUG_PATH: &str = "testdata/perf_map/libc.so.6.debug";

#[test]
fn test_find_debug_file_by_build_id() {
// Ubuntu's `libc6-dbg` installs its debug file under
// `/usr/lib/debug/.build-id/<xx>/<rest>.debug`. Reproduce that layout
// in a tempdir and confirm we resolve it via the build-id note.
let binary_path = Path::new(LIBC_PATH);
let content = std::fs::read(binary_path).unwrap();
let object = object::File::parse(&*content).unwrap();

let build_id = object.build_id().unwrap().unwrap();
let hex: String = build_id.iter().map(|b| format!("{b:02x}")).collect();

let tmp = tempfile::tempdir().unwrap();
let debug_file_dir = tmp.path().join(".build-id").join(&hex[..2]);
std::fs::create_dir_all(&debug_file_dir).unwrap();

let debug_file_path = debug_file_dir.join(format!("{}.debug", &hex[2..]));
std::fs::copy(LIBC_DEBUG_PATH, &debug_file_path).unwrap();

let result = find_debug_file_in(&object, binary_path, tmp.path());
assert_eq!(result, Some(debug_file_path));
}

#[test]
fn test_find_debug_file_by_debuglink() {
let (_dir, binary, debug_file) =
setup_debuglink_tmpdir(Path::new(LIBC_PATH), Path::new(LIBC_DEBUG_PATH));
let content = std::fs::read(&binary).unwrap();
let object = object::File::parse(&*content).unwrap();

let empty_debug_dir = tempfile::tempdir().unwrap();
let result = find_debug_file_in(&object, &binary, empty_debug_dir.path());
assert_eq!(result, Some(debug_file));
}
}
88 changes: 83 additions & 5 deletions src/executor/wall_time/perf/module_symbols.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,9 @@
use crate::executor::wall_time::perf::elf_helper;
use log::trace;
use object::{Object, ObjectSymbol, ObjectSymbolTable};
use runner_shared::module_symbols::SYMBOLS_MAP_SUFFIX;
use std::{
collections::HashSet,
fmt::Debug,
io::{BufWriter, Write},
path::Path,
Expand Down Expand Up @@ -55,11 +57,8 @@ impl ModuleSymbols {
)
}

/// Extract symbols from an ELF file (pid-agnostic, load_bias = 0).
pub fn from_elf<P: AsRef<Path>>(path: P) -> anyhow::Result<Self> {
let content = std::fs::read(path.as_ref())?;
let object = object::File::parse(&*content)?;

/// Extract raw symbols from an object file's `.symtab` and `.dynsym` tables.
fn extract_symbols_from_object(object: &object::File) -> Vec<Symbol> {
let mut symbols = Vec::new();

if let Some(symbol_table) = object.symbol_table() {
Expand All @@ -82,6 +81,44 @@ impl ModuleSymbols {
}));
}

symbols
}

/// Extract symbols from an ELF file (pid-agnostic, load_bias = 0).
///
/// If the binary has a `.gnu_debuglink` pointing to a separate debug file,
/// symbols from that file are merged in. This provides full symbol coverage
/// for stripped system libraries when debug packages are installed.
pub fn from_elf<P: AsRef<Path>>(path: P) -> anyhow::Result<Self> {
let content = std::fs::read(path.as_ref())?;
let object = object::File::parse(&*content)?;

let mut symbols = Self::extract_symbols_from_object(&object);

// Merge symbols from a separate debug file if available
if let Some(debug_path) = elf_helper::find_debug_file(&object, path.as_ref()) {
trace!(
"Merging symbols from debug file {:?} for {:?}",
debug_path,
path.as_ref()
);
let debug_symbols = std::fs::read(&debug_path).ok().and_then(|c| {
object::File::parse(&*c)
.ok()
.map(|o| Self::extract_symbols_from_object(&o))
});

if let Some(debug_symbols) = debug_symbols {
let existing: HashSet<(u64, String)> =
symbols.iter().map(|s| (s.addr, s.name.clone())).collect();
symbols.extend(
debug_symbols
.into_iter()
.filter(|s| !existing.contains(&(s.addr, s.name.clone()))),
);
}
}
Comment thread
not-matthias marked this conversation as resolved.

// Filter out
// - ARM ELF "mapping symbols" (https://github.com/torvalds/linux/blob/9448598b22c50c8a5bb77a9103e2d49f134c9578/tools/perf/util/symbol-elf.c#L1591C1-L1598C4)
// - symbols that have en empty name
Expand Down Expand Up @@ -227,4 +264,45 @@ mod tests {
let module_symbols = ModuleSymbols::from_elf(MODULE_PATH).unwrap();
insta::assert_debug_snapshot!(module_symbols);
}

#[test]
fn test_stripped_binary_merges_debug_file_symbols() {
// The stripped binary has only .dynsym, the .debug file has the full .symtab.
// from_elf should merge both via .gnu_debuglink.
let stripped_only =
ModuleSymbols::from_elf("testdata/perf_map/cpp_my_benchmark_stripped.bin").unwrap();
let full = ModuleSymbols::from_elf("testdata/perf_map/cpp_my_benchmark.bin").unwrap();

assert!(
stripped_only.symbols().len() == full.symbols().len(),
"stripped+debug ({}) should have the same number of symbols as the original ({})",
stripped_only.symbols().len(),
full.symbols().len(),
);
}

#[test]
fn test_libc_symbols_merge_with_debug_file() {
// libc.so.6 ships with .dynsym populated, so from_elf alone would skip
// the debug file under a naive fallback. Merging must pick up .symtab
// symbols like `_int_malloc` that only live in the debug file —
// this is the coverage needed for full libc symbolication.
let (_dir, binary, _debug_file) = elf_helper::setup_debuglink_tmpdir(
Path::new("testdata/perf_map/libc.so.6"),
Path::new("testdata/perf_map/libc.so.6.debug"),
);

let module_symbols = ModuleSymbols::from_elf(&binary).unwrap();
assert!(
module_symbols.symbols().iter().any(|s| s.name == "malloc"),
"libc dynsym symbol `malloc` should be present"
);
assert!(
module_symbols
.symbols()
.iter()
.any(|s| s.name == "_int_malloc"),
"internal libc symbol `_int_malloc` should be merged in from the debug file"
);
}
}
3 changes: 3 additions & 0 deletions testdata/perf_map/cpp_my_benchmark.debug
Git LFS file not shown
Loading
Loading