diff --git a/ghostscope-dwarf/src/dwarf_expr/call_site.rs b/ghostscope-dwarf/src/dwarf_expr/call_site.rs new file mode 100644 index 0000000..3a5fa22 --- /dev/null +++ b/ghostscope-dwarf/src/dwarf_expr/call_site.rs @@ -0,0 +1,157 @@ +//! DWARF call-site expression helpers. + +use crate::{ + binary::DwarfReader, + core::ComputeStep, + dwarf_expr::{errors as expr_errors, modes::DwarfExprMode, ExpressionEvaluator}, +}; +use gimli::{Operation, Reader}; + +pub(crate) struct ParsedCallSiteParameter { + pub(crate) callee_register: u16, + pub(crate) caller_value_steps: Vec, +} + +pub(crate) fn target_address( + dwarf: &gimli::Dwarf, + unit: &gimli::Unit, + entry: &gimli::DebuggingInformationEntry, +) -> Option { + address_attr(dwarf, unit, entry, gimli::constants::DW_AT_call_target) + .or_else(|| target_expr_address(unit, entry)) +} + +pub(crate) fn parameter( + dwarf: &gimli::Dwarf, + unit: &gimli::Unit, + entry: &gimli::DebuggingInformationEntry, + return_pc: u64, +) -> Option { + let callee_register = target_register(unit, entry)?; + let caller_value_steps = value_steps(dwarf, unit, entry, return_pc)?; + Some(ParsedCallSiteParameter { + callee_register, + caller_value_steps, + }) +} + +fn target_expr_address( + unit: &gimli::Unit, + entry: &gimli::DebuggingInformationEntry, +) -> Option { + let attr = entry.attr(gimli::constants::DW_AT_call_target)?; + let gimli::AttributeValue::Exprloc(expr) = attr.value() else { + return None; + }; + let first = expr_errors::soft_optional( + DwarfExprMode::CallSiteValue, + crate::dwarf_expr::ops::parse_single_op( + expr.0, + unit.encoding(), + "DW_AT_call_target expression", + ), + )?; + match first { + Operation::Address { address } => Some(address), + _ => None, + } +} + +fn target_register( + unit: &gimli::Unit, + entry: &gimli::DebuggingInformationEntry, +) -> Option { + let attr = entry.attr(gimli::constants::DW_AT_location)?; + let gimli::AttributeValue::Exprloc(expr) = attr.value() else { + return None; + }; + let first = expr_errors::soft_optional( + DwarfExprMode::CallSiteValue, + crate::dwarf_expr::ops::parse_single_op( + expr.0, + unit.encoding(), + "DW_AT_location call-site parameter expression", + ), + )?; + match first { + Operation::Register { register } => Some(register.0), + _ => None, + } +} + +fn value_steps( + dwarf: &gimli::Dwarf, + unit: &gimli::Unit, + entry: &gimli::DebuggingInformationEntry, + return_pc: u64, +) -> Option> { + let expr = [ + gimli::constants::DW_AT_call_value, + gimli::constants::DW_AT_GNU_call_site_value, + ] + .into_iter() + .find_map(|attr_name| { + let attr = entry.attr(attr_name)?; + match attr.value() { + gimli::AttributeValue::Exprloc(expr) => Some(expr), + _ => None, + } + })?; + expr_errors::soft_value( + DwarfExprMode::CallSiteValue, + ExpressionEvaluator::parse_expression_to_steps_in_unit( + expr.0.to_slice().ok().as_deref().unwrap_or(&[]), + expr.0.endian(), + unit, + dwarf, + return_pc, + None, + None, + None, + ), + ) + .or_else(|| call_value_register_fallback(expr, unit.encoding())) +} + +fn call_value_register_fallback( + expr: gimli::Expression, + encoding: gimli::Encoding, +) -> Option> { + let first = expr_errors::soft_optional( + DwarfExprMode::CallSiteValue, + crate::dwarf_expr::ops::parse_single_op( + expr.0, + encoding, + "DW_AT_call_value fallback expression", + ), + )?; + let Operation::EntryValue { expression: inner } = first else { + return None; + }; + let inner_op = expr_errors::soft_optional( + DwarfExprMode::CallSiteValue, + crate::dwarf_expr::ops::parse_single_op( + inner, + encoding, + "DW_AT_call_value fallback entry_value inner expression", + ), + )?; + match inner_op { + Operation::Register { register } => Some(vec![ComputeStep::LoadRegister(register.0)]), + _ => None, + } +} + +fn address_attr( + dwarf: &gimli::Dwarf, + unit: &gimli::Unit, + entry: &gimli::DebuggingInformationEntry, + attr_name: gimli::DwAt, +) -> Option { + let attr = entry.attr(attr_name)?; + match attr.value() { + gimli::AttributeValue::Addr(addr) => Some(addr), + gimli::AttributeValue::DebugAddrIndex(index) => dwarf.address(unit, index).ok(), + _ => None, + } +} diff --git a/ghostscope-dwarf/src/dwarf_expr/cfa.rs b/ghostscope-dwarf/src/dwarf_expr/cfa.rs new file mode 100644 index 0000000..be4227e --- /dev/null +++ b/ghostscope-dwarf/src/dwarf_expr/cfa.rs @@ -0,0 +1,136 @@ +//! CFA DWARF expression lowering. + +use crate::{ + core::{ComputeStep, MemoryAccessSize, Result}, + dwarf_expr::{errors as expr_errors, modes::DwarfExprMode}, +}; +use anyhow::anyhow; +use gimli::Reader; + +/// Parse CFA DWARF expression operations into a `ComputeStep` sequence. +pub(crate) fn parse_expression(reader: R, encoding: gimli::Encoding) -> Result> +where + R: Reader, +{ + let mut steps = Vec::new(); + + for op in expr_errors::hard( + DwarfExprMode::Cfa, + crate::dwarf_expr::ops::parse_ops(reader, encoding, "CFA expression"), + )? { + match op { + gimli::Operation::Register { register } => { + steps.push(ComputeStep::LoadRegister(register.0)); + } + gimli::Operation::RegisterOffset { + register, offset, .. + } => { + steps.push(ComputeStep::LoadRegister(register.0)); + if offset != 0 { + steps.push(ComputeStep::PushConstant(offset)); + steps.push(ComputeStep::Add); + } + } + gimli::Operation::PlusConstant { value } => { + steps.push(ComputeStep::PushConstant(value as i64)); + steps.push(ComputeStep::Add); + } + gimli::Operation::UnsignedConstant { value } => { + steps.push(ComputeStep::PushConstant(value as i64)); + } + gimli::Operation::SignedConstant { value } => { + steps.push(ComputeStep::PushConstant(value)); + } + gimli::Operation::Deref { size, space, .. } => { + if space { + return Err(anyhow!("unsupported CFA expression operation: {:?}", op)); + } + let size = match size { + 1 => MemoryAccessSize::U8, + 2 => MemoryAccessSize::U16, + 4 => MemoryAccessSize::U32, + 8 => MemoryAccessSize::U64, + _ => { + return Err(anyhow!( + "unsupported CFA expression dereference size {} in operation: {:?}", + size, + op + )) + } + }; + steps.push(ComputeStep::Dereference { size }); + } + gimli::Operation::Plus => steps.push(ComputeStep::Add), + gimli::Operation::Minus => steps.push(ComputeStep::Sub), + gimli::Operation::Mul => steps.push(ComputeStep::Mul), + gimli::Operation::And => steps.push(ComputeStep::And), + gimli::Operation::Or => steps.push(ComputeStep::Or), + gimli::Operation::Xor => steps.push(ComputeStep::Xor), + gimli::Operation::Nop => {} + _ => { + return Err(anyhow!("unsupported CFA expression operation: {:?}", op)); + } + } + } + + Ok(steps) +} + +#[cfg(test)] +mod tests { + use super::parse_expression; + use crate::core::{ComputeStep, MemoryAccessSize}; + use gimli::{EndianSlice, RunTimeEndian}; + + fn test_encoding() -> gimli::Encoding { + gimli::Encoding { + format: gimli::Format::Dwarf32, + version: 4, + address_size: 8, + } + } + + fn parse_test_expr(bytes: &[u8]) -> crate::core::Result> { + parse_expression( + EndianSlice::new(bytes, RunTimeEndian::Little), + test_encoding(), + ) + } + + #[test] + fn cfa_expression_parses_unsigned_constant() { + let steps = parse_test_expr(&[0x10, 0x2a]).expect("DW_OP_constu should parse"); + assert_eq!(steps, vec![ComputeStep::PushConstant(42)]); + } + + #[test] + fn cfa_expression_parses_signed_constant() { + let steps = parse_test_expr(&[0x11, 0x7f]).expect("DW_OP_consts should parse"); + assert_eq!(steps, vec![ComputeStep::PushConstant(-1)]); + } + + #[test] + fn cfa_expression_parses_dereference() { + let steps = parse_test_expr(&[0x70, 0x00, 0x06]).expect("DW_OP_deref should parse"); + assert_eq!( + steps, + vec![ + ComputeStep::LoadRegister(0), + ComputeStep::Dereference { + size: MemoryAccessSize::U64, + }, + ] + ); + } + + #[test] + fn cfa_expression_rejects_unknown_opcode_after_valid_prefix() { + let error = parse_test_expr(&[0x70, 0x00, 0xff]) + .expect_err("unknown CFI expression opcode must not be skipped"); + + assert!( + error.to_string().contains("failed to parse"), + "unexpected error: {error}" + ); + } +} diff --git a/ghostscope-dwarf/src/dwarf_expr/const_eval.rs b/ghostscope-dwarf/src/dwarf_expr/const_eval.rs new file mode 100644 index 0000000..5d99b83 --- /dev/null +++ b/ghostscope-dwarf/src/dwarf_expr/const_eval.rs @@ -0,0 +1,31 @@ +//! Constant-only DWARF expression helpers. + +use crate::{ + binary::DwarfReader, + core::Result, + dwarf_expr::{errors as expr_errors, modes::DwarfExprMode}, +}; + +pub(crate) fn eval_const_offset( + expr: &gimli::Expression, + encoding: gimli::Encoding, +) -> Result> { + let Some(op) = expr_errors::hard( + DwarfExprMode::ConstOffset, + crate::dwarf_expr::ops::parse_single_op( + expr.0.clone(), + encoding, + "constant DWARF expression", + ), + )? + else { + return Ok(None); + }; + + match op { + gimli::Operation::UnsignedConstant { value } => Ok(Some(value)), + gimli::Operation::SignedConstant { value } if value >= 0 => Ok(Some(value as u64)), + gimli::Operation::PlusConstant { value } => Ok(Some(value)), + _ => Ok(None), + } +} diff --git a/ghostscope-dwarf/src/dwarf_expr/entry_value.rs b/ghostscope-dwarf/src/dwarf_expr/entry_value.rs new file mode 100644 index 0000000..30c8aa8 --- /dev/null +++ b/ghostscope-dwarf/src/dwarf_expr/entry_value.rs @@ -0,0 +1,709 @@ +//! DW_OP_entry_value lowering and call-site recovery helpers. + +use crate::{ + binary::DwarfReader, + core::{ComputeStep, EntryValueCase, Result}, + dwarf_expr::{errors as expr_errors, modes::DwarfExprMode}, + index::{BlockIndexBuilder, CfiIndex, FunctionBlocks}, +}; +use gimli::{Operation, Reader}; +use std::collections::BTreeMap; +use tracing::{debug, warn}; + +const ENTRY_VALUE_LOOKUP_WARN_CASES: usize = 16; + +pub(crate) struct LocationContext<'a> { + pub(crate) current_pc: u64, + pub(crate) address_size: u8, + pub(crate) dwarf: Option<&'a gimli::Dwarf>, + pub(crate) function_context: Option<&'a FunctionBlocks>, + pub(crate) cfi_index: Option<&'a CfiIndex>, +} + +pub(crate) enum LoweredEntryValue { + Steps { + steps: Vec, + forces_stack_value: bool, + }, + Optimized, +} + +pub(crate) fn lower_location_entry_value( + expression: R, + encoding: gimli::Encoding, + context: LocationContext<'_>, +) -> Result +where + R: Reader, +{ + let inner_ops = expr_errors::hard( + DwarfExprMode::Location, + crate::dwarf_expr::ops::parse_ops( + expression, + encoding, + "DW_OP_entry_value inner expression", + ), + )?; + if inner_ops.len() != 1 { + debug!("Unsupported EntryValue with {} inner ops", inner_ops.len()); + return Err(anyhow::anyhow!( + "unsupported DW_OP_entry_value with {} inner ops", + inner_ops.len() + )); + } + + match &inner_ops[0] { + Operation::Register { register } => { + match resolve_register( + context.current_pc, + register.0, + context.dwarf, + context.function_context, + context.cfi_index, + ) { + Ok(steps) => Ok(LoweredEntryValue::Steps { + steps, + forces_stack_value: true, + }), + Err(error) => { + debug!( + "DW_OP_entry_value register {} unresolved at 0x{:x}: {}", + register.0, context.current_pc, error + ); + Ok(LoweredEntryValue::Optimized) + } + } + } + Operation::RegisterOffset { + register, offset, .. + } => { + let steps = resolve_register_offset( + context.current_pc, + register.0, + *offset, + context.address_size, + context.dwarf, + context.function_context, + context.cfi_index, + )?; + Ok(LoweredEntryValue::Steps { + steps, + forces_stack_value: false, + }) + } + _ => { + debug!("Unsupported EntryValue inner op: {:?}", inner_ops[0]); + Err(anyhow::anyhow!( + "unsupported DW_OP_entry_value inner op: {:?}", + inner_ops[0] + )) + } + } +} + +pub(crate) fn resolve_register( + current_pc: u64, + register: u16, + dwarf: Option<&gimli::Dwarf>, + function_context: Option<&FunctionBlocks>, + cfi_index: Option<&CfiIndex>, +) -> Result> { + let function_context = function_context + .ok_or_else(|| anyhow::anyhow!("DW_OP_entry_value requires function call-site context"))?; + build_incoming_lookup(current_pc, register, dwarf, function_context, cfi_index).or_else( + |incoming_error| { + recover_register_from_cfi(current_pc, register, cfi_index).map_err(|cfi_error| { + anyhow::anyhow!( + "failed to recover DW_OP_entry_value register {} at 0x{:x}: {}; fallback via CFI also failed: {}", + register, + current_pc, + incoming_error, + cfi_error + ) + }) + }, + ) +} + +fn build_incoming_lookup( + current_pc: u64, + register: u16, + dwarf: Option<&gimli::Dwarf>, + function_context: &FunctionBlocks, + cfi_index: Option<&CfiIndex>, +) -> Result> { + let cfi_index = cfi_index.ok_or_else(|| { + anyhow::anyhow!( + "DW_OP_entry_value register recovery needs CFI at 0x{:x}", + current_pc + ) + })?; + let recovery = cfi_index.recover_caller_frame(current_pc, &[])?; + + let mut cases_by_return_pc = BTreeMap::>::new(); + let parameters = collect_parameter_steps(register, dwarf, function_context); + for (caller_return_pc, caller_value_steps) in parameters { + let value_steps = + materialize_caller_value_steps(&caller_value_steps, current_pc, Some(cfi_index)) + .map_err(|error| { + anyhow::anyhow!( + "failed to materialize incoming call-site parameter for DW_OP_entry_value register {} at 0x{:x} (caller return pc 0x{:x}): {}", + register, + current_pc, + caller_return_pc, + error + ) + })?; + match cases_by_return_pc.entry(caller_return_pc) { + std::collections::btree_map::Entry::Vacant(entry) => { + entry.insert(value_steps); + } + std::collections::btree_map::Entry::Occupied(entry) => { + if entry.get() != &value_steps { + return Err(anyhow::anyhow!( + "ambiguous incoming call-site parameter for DW_OP_entry_value register {} at 0x{:x} (caller return pc 0x{:x})", + register, + current_pc, + caller_return_pc + )); + } + } + } + } + + if cases_by_return_pc.is_empty() { + return Err(anyhow::anyhow!( + "no call-site parameter found for DW_OP_entry_value register {} at 0x{:x}", + register, + current_pc + )); + } + + Ok(build_lookup_steps( + recovery.caller_pc_steps, + cases_by_return_pc, + )) +} + +pub(crate) fn collect_parameter_steps( + register: u16, + dwarf: Option<&gimli::Dwarf>, + function_context: &FunctionBlocks, +) -> Vec<(u64, Vec)> { + let indexed_parameters: Vec<_> = function_context + .incoming_entry_value_parameters(register) + .into_iter() + .map(|(caller_return_pc, parameter)| { + (caller_return_pc, parameter.caller_value_steps.clone()) + }) + .collect(); + if !indexed_parameters.is_empty() { + return indexed_parameters; + } + + dwarf + .map(|dwarf| { + BlockIndexBuilder::new(dwarf) + .collect_incoming_entry_value_parameters(function_context, register) + .into_iter() + .map(|(caller_return_pc, parameter)| { + (caller_return_pc, parameter.caller_value_steps) + }) + .collect() + }) + .unwrap_or_default() +} + +pub(crate) fn resolve_register_offset( + current_pc: u64, + register: u16, + offset: i64, + address_size: u8, + dwarf: Option<&gimli::Dwarf>, + function_context: Option<&FunctionBlocks>, + cfi_index: Option<&CfiIndex>, +) -> Result> { + if is_stack_pointer_register(register) { + return recover_stack_pointer_steps(current_pc, offset, address_size, cfi_index); + } + + match resolve_register(current_pc, register, dwarf, function_context, cfi_index) { + Ok(mut steps) => { + append_constant_offset(&mut steps, offset); + Ok(steps) + } + Err(entry_error) => { + let mut steps = + recover_register_from_cfi(current_pc, register, cfi_index).map_err(|cfi_error| { + anyhow::anyhow!( + "failed to recover DW_OP_entry_value base register {} with offset {} at 0x{:x}: {}; fallback via CFI also failed: {}", + register, + offset, + current_pc, + entry_error, + cfi_error + ) + })?; + append_constant_offset(&mut steps, offset); + Ok(steps) + } + } +} + +fn recover_register_from_cfi( + current_pc: u64, + register: u16, + cfi_index: Option<&CfiIndex>, +) -> Result> { + let cfi_index = cfi_index.ok_or_else(|| { + anyhow::anyhow!( + "DW_OP_entry_value register recovery needs CFI at 0x{:x}", + current_pc + ) + })?; + cfi_index + .recover_caller_register_steps(current_pc, register)? + .ok_or_else(|| { + anyhow::anyhow!( + "no entry register recovery rule for DWARF register {} at 0x{:x}", + register, + current_pc + ) + }) +} + +fn recover_stack_pointer_steps( + current_pc: u64, + offset: i64, + address_size: u8, + cfi_index: Option<&CfiIndex>, +) -> Result> { + let cfi_index = cfi_index.ok_or_else(|| { + anyhow::anyhow!( + "DW_OP_entry_value stack-pointer recovery needs CFI at 0x{:x}", + current_pc + ) + })?; + let mut steps = cfa_to_steps(cfi_index.get_cfa_result(current_pc)?); + // This assumes the common x86/x86_64 call-frame convention where the CFA + // observed after the call is `SP_entry + address_size` because the return + // address is stored on the stack. Targets such as AArch64 may define the + // CFA at call entry differently when LR is not pushed, so keep this + // adjustment centralized until entry-SP reconstruction becomes + // target-aware. + append_constant_offset(&mut steps, offset - i64::from(address_size)); + Ok(steps) +} + +pub(crate) fn cfa_to_steps(cfa: crate::core::CfaResult) -> Vec { + match cfa { + crate::core::CfaResult::RegisterPlusOffset { register, offset } => { + let mut steps = vec![ComputeStep::LoadRegister(register)]; + append_constant_offset(&mut steps, offset); + steps + } + crate::core::CfaResult::Expression { steps } => steps, + } +} + +pub(crate) fn append_constant_offset(steps: &mut Vec, offset: i64) { + if offset != 0 { + steps.push(ComputeStep::PushConstant(offset)); + steps.push(ComputeStep::Add); + } +} + +fn is_stack_pointer_register(register: u16) -> bool { + matches!( + ghostscope_platform::register_mapping::dwarf_reg_to_name(register), + Some("RSP" | "ESP" | "SP") + ) +} + +pub(crate) fn build_lookup_steps( + caller_pc_steps: Vec, + cases_by_return_pc: BTreeMap>, +) -> Vec { + let cases: Vec<_> = cases_by_return_pc + .into_iter() + .map(|(caller_return_pc, value_steps)| EntryValueCase { + caller_return_pc, + value_steps, + }) + .collect(); + if cases.len() > ENTRY_VALUE_LOOKUP_WARN_CASES { + warn!( + "DW_OP_entry_value lookup generated {} caller return-pc cases; large fan-in may exceed eBPF verifier limits", + cases.len() + ); + } + vec![ComputeStep::EntryValueLookup { + caller_pc_steps, + cases, + }] +} + +fn materialize_caller_value_steps( + steps: &[ComputeStep], + current_pc: u64, + cfi_index: Option<&CfiIndex>, +) -> Result> { + let mut materialized = Vec::new(); + for step in steps { + match step { + ComputeStep::LoadRegister(register) => { + let cfi_index = cfi_index.ok_or_else(|| { + anyhow::anyhow!( + "DW_OP_entry_value register recovery needs CFI at 0x{:x}", + current_pc + ) + })?; + let recovered = cfi_index + .recover_caller_register_steps(current_pc, *register)? + .ok_or_else(|| { + anyhow::anyhow!( + "no caller register recovery rule for DWARF register {} at 0x{:x}; DW_OP_entry_value can only materialize caller values for registers with unwind recovery, and caller-saved argument registers are often unavailable after the call", + register, + current_pc + ) + })?; + materialized.extend(recovered); + } + other => materialized.push(other.clone()), + } + } + Ok(materialized) +} + +#[cfg(test)] +mod tests { + use super::{ + append_constant_offset, build_lookup_steps, cfa_to_steps, collect_parameter_steps, + resolve_register, + }; + use crate::binary::{dwarf_reader_from_arc, DwarfReader}; + use crate::core::{ComputeStep, EntryValueCase}; + use crate::index::{BlockNode, CallSiteParameter, CallSiteRecord, FunctionBlocks}; + use gimli::constants; + use gimli::write::{ + Address, AttributeValue as WriteAttributeValue, Dwarf as WriteDwarf, EndianVec, + Expression as WriteExpression, LineProgram, Sections, Unit, + }; + use gimli::{Format, LittleEndian, Register}; + use std::sync::Arc; + + fn build_scanned_incoming_entry_value_fixture( + register: u16, + caller_value: u64, + ) -> gimli::Dwarf { + let encoding = gimli::Encoding { + format: Format::Dwarf32, + version: 5, + address_size: 8, + }; + + let mut dwarf = WriteDwarf::new(); + let unit_id = dwarf.units.add(Unit::new(encoding, LineProgram::none())); + let unit = dwarf.units.get_mut(unit_id); + let root = unit.root(); + + let caller_id = unit.add(root, constants::DW_TAG_subprogram); + let caller = unit.get_mut(caller_id); + caller.set( + constants::DW_AT_low_pc, + WriteAttributeValue::Address(Address::Constant(0x1000)), + ); + caller.set(constants::DW_AT_high_pc, WriteAttributeValue::Udata(0x40)); + + let callee_id = unit.add(root, constants::DW_TAG_subprogram); + let callee = unit.get_mut(callee_id); + callee.set( + constants::DW_AT_low_pc, + WriteAttributeValue::Address(Address::Constant(0x1200)), + ); + callee.set(constants::DW_AT_high_pc, WriteAttributeValue::Udata(0x10)); + + let call_site_id = unit.add(caller_id, constants::DW_TAG_call_site); + unit.get_mut(call_site_id).set( + constants::DW_AT_call_target, + WriteAttributeValue::Address(Address::Constant(0x1200)), + ); + unit.get_mut(call_site_id).set( + constants::DW_AT_call_return_pc, + WriteAttributeValue::Address(Address::Constant(0x2018)), + ); + + let param_id = unit.add(call_site_id, constants::DW_TAG_call_site_parameter); + let param = unit.get_mut(param_id); + let mut location = WriteExpression::new(); + location.op_reg(Register(register)); + param.set( + constants::DW_AT_location, + WriteAttributeValue::Exprloc(location), + ); + let mut value = WriteExpression::new(); + value.op_constu(caller_value); + param.set( + constants::DW_AT_call_value, + WriteAttributeValue::Exprloc(value), + ); + + let mut sections = Sections::new(EndianVec::new(LittleEndian)); + dwarf.write(&mut sections).unwrap(); + + let dwarf_sections: gimli::DwarfSections> = gimli::DwarfSections::load(|id| { + Ok::<_, gimli::Error>( + sections + .get(id) + .map(|section| section.slice().to_vec()) + .unwrap_or_default(), + ) + }) + .unwrap(); + + dwarf_sections + .borrow(|section| dwarf_reader_from_arc(Arc::<[u8]>::from(section.as_slice()))) + } + + #[test] + fn ignores_outgoing_call_sites_in_inline_context() { + let mut function = FunctionBlocks { + cu_offset: gimli::DebugInfoOffset(0), + die_offset: gimli::UnitOffset(0), + abs_die_offset: Some(gimli::DebugInfoOffset(0)), + ranges: vec![(0x1000, 0x1040)], + nodes: vec![ + BlockNode { + ranges: vec![], + entry_pc: None, + die_offset: Some(gimli::UnitOffset(0)), + variables: vec![], + children: vec![1], + }, + BlockNode { + ranges: vec![(0x1000, 0x1040)], + entry_pc: Some(0x1000), + die_offset: Some(gimli::UnitOffset(1)), + variables: vec![], + children: vec![], + }, + ], + block_addr_map: std::collections::BTreeMap::new(), + call_sites: std::collections::BTreeMap::new(), + incoming_call_sites: std::collections::BTreeMap::new(), + }; + function.call_sites.insert( + 0x1018, + vec![CallSiteRecord { + cu_offset: gimli::DebugInfoOffset(0), + die_offset: gimli::UnitOffset(1), + return_pc: 0x1018, + call_origin: None, + call_target: None, + parameters: vec![CallSiteParameter { + callee_register: 5, + caller_value_steps: vec![ComputeStep::PushConstant(11)], + }], + }], + ); + function.call_sites.insert( + 0x1030, + vec![CallSiteRecord { + cu_offset: gimli::DebugInfoOffset(0), + die_offset: gimli::UnitOffset(2), + return_pc: 0x1030, + call_origin: None, + call_target: None, + parameters: vec![CallSiteParameter { + callee_register: 5, + caller_value_steps: vec![ComputeStep::PushConstant(22)], + }], + }], + ); + + let error = resolve_register(0x1034, 5, None, Some(&function), None) + .expect_err("inline entry_value must not reuse nested outgoing call-site bindings"); + assert!( + error + .to_string() + .contains("DW_OP_entry_value register recovery needs CFI"), + "unexpected error: {error}" + ); + } + + #[test] + fn ignores_outgoing_call_sites_in_non_inline_context() { + let mut function = FunctionBlocks { + cu_offset: gimli::DebugInfoOffset(0), + die_offset: gimli::UnitOffset(0), + abs_die_offset: Some(gimli::DebugInfoOffset(0)), + ranges: vec![(0x1000, 0x1040)], + nodes: vec![BlockNode { + ranges: vec![], + entry_pc: None, + die_offset: Some(gimli::UnitOffset(0)), + variables: vec![], + children: vec![], + }], + block_addr_map: std::collections::BTreeMap::new(), + call_sites: std::collections::BTreeMap::new(), + incoming_call_sites: std::collections::BTreeMap::new(), + }; + function.call_sites.insert( + 0x1030, + vec![CallSiteRecord { + cu_offset: gimli::DebugInfoOffset(0), + die_offset: gimli::UnitOffset(2), + return_pc: 0x1030, + call_origin: None, + call_target: None, + parameters: vec![CallSiteParameter { + callee_register: 5, + caller_value_steps: vec![ComputeStep::PushConstant(22)], + }], + }], + ); + + let error = resolve_register(0x1034, 5, None, Some(&function), None) + .expect_err("non-inline entry_value must not reuse outgoing call-site bindings"); + assert!( + error + .to_string() + .contains("DW_OP_entry_value register recovery needs CFI"), + "unexpected error: {error}" + ); + } + + #[test] + fn uses_incoming_call_site_lookup_for_non_inline_functions() { + let mut function = FunctionBlocks { + cu_offset: gimli::DebugInfoOffset(0), + die_offset: gimli::UnitOffset(0), + abs_die_offset: Some(gimli::DebugInfoOffset(0)), + ranges: vec![(0x1200, 0x1210)], + nodes: vec![BlockNode { + ranges: vec![], + entry_pc: None, + die_offset: Some(gimli::UnitOffset(0)), + variables: vec![], + children: vec![], + }], + block_addr_map: std::collections::BTreeMap::new(), + call_sites: std::collections::BTreeMap::new(), + incoming_call_sites: std::collections::BTreeMap::new(), + }; + function.incoming_call_sites.insert( + 0x2018, + vec![CallSiteRecord { + cu_offset: gimli::DebugInfoOffset(1), + die_offset: gimli::UnitOffset(3), + return_pc: 0x2018, + call_origin: function.abs_die_offset, + call_target: Some(0x1200), + parameters: vec![CallSiteParameter { + callee_register: 5, + caller_value_steps: vec![ComputeStep::PushConstant(33)], + }], + }], + ); + function.incoming_call_sites.insert( + 0x2030, + vec![CallSiteRecord { + cu_offset: gimli::DebugInfoOffset(2), + die_offset: gimli::UnitOffset(4), + return_pc: 0x2030, + call_origin: function.abs_die_offset, + call_target: Some(0x1200), + parameters: vec![CallSiteParameter { + callee_register: 5, + caller_value_steps: vec![ComputeStep::PushConstant(44)], + }], + }], + ); + + let mut cases_by_return_pc = std::collections::BTreeMap::new(); + for (caller_return_pc, parameter) in function.incoming_entry_value_parameters(5) { + cases_by_return_pc.insert(caller_return_pc, parameter.caller_value_steps.clone()); + } + let steps = build_lookup_steps( + vec![ComputeStep::PushConstant(0xdeadbeef)], + cases_by_return_pc, + ); + assert_eq!( + steps, + vec![ComputeStep::EntryValueLookup { + caller_pc_steps: vec![ComputeStep::PushConstant(0xdeadbeef)], + cases: vec![ + EntryValueCase { + caller_return_pc: 0x2018, + value_steps: vec![ComputeStep::PushConstant(33)], + }, + EntryValueCase { + caller_return_pc: 0x2030, + value_steps: vec![ComputeStep::PushConstant(44)], + }, + ], + }] + ); + } + + #[test] + fn prefers_indexed_incoming_parameters_over_dwarf_scan() { + let mut function = FunctionBlocks { + cu_offset: gimli::DebugInfoOffset(0), + die_offset: gimli::UnitOffset(0), + abs_die_offset: Some(gimli::DebugInfoOffset(0)), + ranges: vec![(0x1200, 0x1210)], + nodes: vec![BlockNode { + ranges: vec![], + entry_pc: None, + die_offset: Some(gimli::UnitOffset(0)), + variables: vec![], + children: vec![], + }], + block_addr_map: std::collections::BTreeMap::new(), + call_sites: std::collections::BTreeMap::new(), + incoming_call_sites: std::collections::BTreeMap::new(), + }; + function.incoming_call_sites.insert( + 0x2018, + vec![CallSiteRecord { + cu_offset: gimli::DebugInfoOffset(1), + die_offset: gimli::UnitOffset(3), + return_pc: 0x2018, + call_origin: function.abs_die_offset, + call_target: Some(0x1200), + parameters: vec![CallSiteParameter { + callee_register: 5, + caller_value_steps: vec![ComputeStep::PushConstant(33)], + }], + }], + ); + let dwarf = build_scanned_incoming_entry_value_fixture(5, 99); + + let parameters = collect_parameter_steps(5, Some(&dwarf), &function); + + assert_eq!( + parameters, + vec![(0x2018, vec![ComputeStep::PushConstant(33)])] + ); + } + + #[test] + fn stack_pointer_offsets_use_cfa_based_entry_sp() { + let mut entry_sp_steps = cfa_to_steps(crate::core::CfaResult::RegisterPlusOffset { + register: 7, + offset: 32, + }); + append_constant_offset(&mut entry_sp_steps, 8 - 8); + + assert_eq!( + entry_sp_steps, + vec![ + ComputeStep::LoadRegister(7), + ComputeStep::PushConstant(32), + ComputeStep::Add, + ] + ); + } +} diff --git a/ghostscope-dwarf/src/dwarf_expr/errors.rs b/ghostscope-dwarf/src/dwarf_expr/errors.rs new file mode 100644 index 0000000..7780215 --- /dev/null +++ b/ghostscope-dwarf/src/dwarf_expr/errors.rs @@ -0,0 +1,106 @@ +//! DWARF expression error policy helpers. + +use crate::{ + core::Result, + dwarf_expr::modes::{DwarfExprMode, ErrorPolicy}, +}; +use tracing::debug; + +pub(crate) fn hard(mode: DwarfExprMode, result: Result) -> Result { + debug_assert_eq!(mode.error_policy(), ErrorPolicy::Hard); + result +} + +pub(crate) fn soft_value(mode: DwarfExprMode, result: Result) -> Option { + debug_assert_eq!(mode.error_policy(), ErrorPolicy::SoftWithFallback); + match result { + Ok(value) => Some(value), + Err(error) => { + debug!( + mode = mode.label(), + error = %error, + "DWARF expression parse failed; trying fallback" + ); + None + } + } +} + +pub(crate) fn soft_optional(mode: DwarfExprMode, result: Result>) -> Option { + debug_assert_eq!(mode.error_policy(), ErrorPolicy::SoftWithFallback); + soft_value(mode, result).flatten() +} + +pub(crate) fn silent_false(mode: DwarfExprMode, result: Result) -> bool { + debug_assert_eq!(mode.error_policy(), ErrorPolicy::SilentFalse); + match result { + Ok(value) => value, + Err(error) => { + debug!( + mode = mode.label(), + error = %error, + "DWARF expression scan failed; treating result as false" + ); + false + } + } +} + +pub(crate) fn downgrade_to_none( + mode: DwarfExprMode, + result: Result, + reason: &'static str, +) -> Option { + match result { + Ok(value) => Some(value), + Err(error) => { + debug!( + mode = mode.label(), + reason, + error = %error, + "DWARF expression error downgraded to None" + ); + None + } + } +} + +pub(crate) fn downgrade_optional_to_none( + mode: DwarfExprMode, + result: Result>, + reason: &'static str, +) -> Option { + downgrade_to_none(mode, result, reason).flatten() +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn hard_propagates_errors() { + let result: Result<()> = Err(anyhow::anyhow!("boom")); + assert!(hard(DwarfExprMode::Location, result).is_err()); + } + + #[test] + fn soft_with_fallback_converts_errors_to_none() { + let result: Result = Err(anyhow::anyhow!("boom")); + assert_eq!(soft_value(DwarfExprMode::CallSiteValue, result), None); + } + + #[test] + fn silent_false_converts_errors_to_false() { + let result: Result = Err(anyhow::anyhow!("boom")); + assert!(!silent_false(DwarfExprMode::ScanOnly, result)); + } + + #[test] + fn explicit_downgrade_converts_errors_to_none() { + let result: Result = Err(anyhow::anyhow!("boom")); + assert_eq!( + downgrade_to_none(DwarfExprMode::ConstOffset, result, "test downgrade"), + None + ); + } +} diff --git a/ghostscope-dwarf/src/parser/expression_evaluator.rs b/ghostscope-dwarf/src/dwarf_expr/lower.rs similarity index 67% rename from ghostscope-dwarf/src/parser/expression_evaluator.rs rename to ghostscope-dwarf/src/dwarf_expr/lower.rs index e063864..6406e18 100644 --- a/ghostscope-dwarf/src/parser/expression_evaluator.rs +++ b/ghostscope-dwarf/src/dwarf_expr/lower.rs @@ -4,12 +4,12 @@ use crate::binary::{DwarfEndian, DwarfReader}; use crate::core::{ - ComputeStep, DirectValueResult, EntryValueCase, EvaluationResult, LocationResult, - MemoryAccessSize, Result, + ComputeStep, DirectValueResult, EvaluationResult, LocationResult, MemoryAccessSize, Result, }; -use crate::index::{BlockIndexBuilder, CfiIndex, FunctionBlocks}; +use crate::dwarf_expr::{errors as expr_errors, modes::DwarfExprMode}; +use crate::index::{CfiIndex, FunctionBlocks}; use crate::semantics::{range_contains_pc, resolve_attr_with_unit_origins}; -use gimli::{read::RawLocListEntry, EndianSlice, Expression, Operation, Reader}; +use gimli::{read::RawLocListEntry, EndianSlice, Operation, Reader}; use tracing::{debug, trace, warn}; /// DWARF expression evaluator @@ -17,7 +17,6 @@ pub struct ExpressionEvaluator; impl ExpressionEvaluator { const MAX_IMPLICIT_POINTER_DEPTH: usize = 8; - const ENTRY_VALUE_LOOKUP_WARN_CASES: usize = 16; /// Evaluate a variable's location from its DIE attributes pub fn evaluate_location( @@ -336,20 +335,18 @@ impl ExpressionEvaluator { PrecomputedSteps(Vec), } - let mut expression = Expression(EndianSlice::new(expr_bytes, endian)); let mut operations: Vec> = Vec::new(); let mut has_stack_value = false; // Parse all operations in the expression - while !expression.0.is_empty() { - let offset = expr_bytes.len() - expression.0.len(); - let op = Operation::parse(&mut expression.0, encoding).map_err(|error| { - anyhow::anyhow!( - "failed to parse DWARF expression operation at byte offset {}: {}", - offset, - error - ) - })?; + for op in expr_errors::hard( + DwarfExprMode::Location, + crate::dwarf_expr::ops::parse_ops( + EndianSlice::new(expr_bytes, endian), + encoding, + "DWARF expression", + ), + )? { if matches!(op, Operation::StackValue) { has_stack_value = true; debug!("Found DW_OP_stack_value - this is a computed value"); @@ -359,73 +356,29 @@ impl ExpressionEvaluator { // call-site metadata. This keeps optimized parameters usable // after their entry registers have been clobbered. Operation::EntryValue { expression } => { - let mut inner = *expression; - let mut inner_ops: Vec> = Vec::new(); - let inner_len = inner.len(); - while !inner.is_empty() { - let offset = inner_len - inner.len(); - let iop = Operation::parse(&mut inner, encoding).map_err(|error| { - anyhow::anyhow!( - "failed to parse DW_OP_entry_value inner expression operation at byte offset {}: {}", - offset, - error - ) - })?; - inner_ops.push(iop); - } - if inner_ops.len() == 1 { - match &inner_ops[0] { - Operation::Register { register } => { + match crate::dwarf_expr::entry_value::lower_location_entry_value( + *expression, + encoding, + crate::dwarf_expr::entry_value::LocationContext { + current_pc: address, + address_size: encoding.address_size, + dwarf, + function_context, + cfi_index, + }, + )? { + crate::dwarf_expr::entry_value::LoweredEntryValue::Steps { + steps, + forces_stack_value, + } => { + if forces_stack_value { has_stack_value = true; - match Self::resolve_entry_value_register( - address, - register.0, - dwarf, - function_context, - cfi_index, - ) { - Ok(steps) => { - operations.push(ParsedOperation::PrecomputedSteps(steps)); - } - Err(error) => { - debug!( - "DW_OP_entry_value register {} unresolved at 0x{:x}: {}", - register.0, - address, - error - ); - return Ok(EvaluationResult::Optimized); - } - } - } - Operation::RegisterOffset { - register, offset, .. - } => { - let steps = Self::resolve_entry_value_register_offset( - address, - register.0, - *offset, - encoding.address_size, - dwarf, - function_context, - cfi_index, - )?; - operations.push(ParsedOperation::PrecomputedSteps(steps)); - } - _ => { - debug!("Unsupported EntryValue inner op: {:?}", inner_ops[0]); - return Err(anyhow::anyhow!( - "unsupported DW_OP_entry_value inner op: {:?}", - inner_ops[0] - )); } + operations.push(ParsedOperation::PrecomputedSteps(steps)); + } + crate::dwarf_expr::entry_value::LoweredEntryValue::Optimized => { + return Ok(EvaluationResult::Optimized); } - } else { - debug!("Unsupported EntryValue with {} inner ops", inner_ops.len()); - return Err(anyhow::anyhow!( - "unsupported DW_OP_entry_value with {} inner ops", - inner_ops.len() - )); } } _ => operations.push(ParsedOperation::Operation(op)), @@ -1316,322 +1269,14 @@ impl ExpressionEvaluator { ); Ok(EvaluationResult::Optimized) } - - fn resolve_entry_value_register( - current_pc: u64, - register: u16, - dwarf: Option<&gimli::Dwarf>, - function_context: Option<&FunctionBlocks>, - cfi_index: Option<&CfiIndex>, - ) -> Result> { - let function_context = function_context.ok_or_else(|| { - anyhow::anyhow!("DW_OP_entry_value requires function call-site context") - })?; - Self::build_incoming_entry_value_lookup( - current_pc, - register, - dwarf, - function_context, - cfi_index, - ) - .or_else(|incoming_error| { - Self::recover_entry_register_from_cfi(current_pc, register, cfi_index).map_err( - |cfi_error| { - anyhow::anyhow!( - "failed to recover DW_OP_entry_value register {} at 0x{:x}: {}; fallback via CFI also failed: {}", - register, - current_pc, - incoming_error, - cfi_error - ) - }, - ) - }) - } - - fn build_incoming_entry_value_lookup( - current_pc: u64, - register: u16, - dwarf: Option<&gimli::Dwarf>, - function_context: &FunctionBlocks, - cfi_index: Option<&CfiIndex>, - ) -> Result> { - let cfi_index = cfi_index.ok_or_else(|| { - anyhow::anyhow!( - "DW_OP_entry_value register recovery needs CFI at 0x{:x}", - current_pc - ) - })?; - let recovery = cfi_index.recover_caller_frame(current_pc, &[])?; - - let mut cases_by_return_pc = std::collections::BTreeMap::>::new(); - let parameters = - Self::collect_incoming_entry_value_parameter_steps(register, dwarf, function_context); - for (caller_return_pc, caller_value_steps) in parameters { - let value_steps = Self::materialize_caller_value_steps( - &caller_value_steps, - current_pc, - Some(cfi_index), - ) - .map_err(|error| { - anyhow::anyhow!( - "failed to materialize incoming call-site parameter for DW_OP_entry_value register {} at 0x{:x} (caller return pc 0x{:x}): {}", - register, - current_pc, - caller_return_pc, - error - ) - })?; - match cases_by_return_pc.entry(caller_return_pc) { - std::collections::btree_map::Entry::Vacant(entry) => { - entry.insert(value_steps); - } - std::collections::btree_map::Entry::Occupied(entry) => { - if entry.get() != &value_steps { - return Err(anyhow::anyhow!( - "ambiguous incoming call-site parameter for DW_OP_entry_value register {} at 0x{:x} (caller return pc 0x{:x})", - register, - current_pc, - caller_return_pc - )); - } - } - } - } - - if cases_by_return_pc.is_empty() { - return Err(anyhow::anyhow!( - "no call-site parameter found for DW_OP_entry_value register {} at 0x{:x}", - register, - current_pc - )); - } - - Ok(Self::build_entry_value_lookup_steps( - recovery.caller_pc_steps, - cases_by_return_pc, - )) - } - - fn collect_incoming_entry_value_parameter_steps( - register: u16, - dwarf: Option<&gimli::Dwarf>, - function_context: &FunctionBlocks, - ) -> Vec<(u64, Vec)> { - let indexed_parameters: Vec<_> = function_context - .incoming_entry_value_parameters(register) - .into_iter() - .map(|(caller_return_pc, parameter)| { - (caller_return_pc, parameter.caller_value_steps.clone()) - }) - .collect(); - if !indexed_parameters.is_empty() { - return indexed_parameters; - } - - dwarf - .map(|dwarf| { - BlockIndexBuilder::new(dwarf) - .collect_incoming_entry_value_parameters(function_context, register) - .into_iter() - .map(|(caller_return_pc, parameter)| { - (caller_return_pc, parameter.caller_value_steps) - }) - .collect() - }) - .unwrap_or_default() - } - - fn resolve_entry_value_register_offset( - current_pc: u64, - register: u16, - offset: i64, - address_size: u8, - dwarf: Option<&gimli::Dwarf>, - function_context: Option<&FunctionBlocks>, - cfi_index: Option<&CfiIndex>, - ) -> Result> { - if Self::is_stack_pointer_register(register) { - return Self::recover_entry_stack_pointer_steps( - current_pc, - offset, - address_size, - cfi_index, - ); - } - - match Self::resolve_entry_value_register( - current_pc, - register, - dwarf, - function_context, - cfi_index, - ) { - Ok(mut steps) => { - Self::append_constant_offset(&mut steps, offset); - Ok(steps) - } - Err(entry_error) => { - let mut steps = Self::recover_entry_register_from_cfi( - current_pc, register, cfi_index, - ) - .map_err(|cfi_error| { - anyhow::anyhow!( - "failed to recover DW_OP_entry_value base register {} with offset {} at 0x{:x}: {}; fallback via CFI also failed: {}", - register, - offset, - current_pc, - entry_error, - cfi_error - ) - })?; - Self::append_constant_offset(&mut steps, offset); - Ok(steps) - } - } - } - - fn recover_entry_register_from_cfi( - current_pc: u64, - register: u16, - cfi_index: Option<&CfiIndex>, - ) -> Result> { - let cfi_index = cfi_index.ok_or_else(|| { - anyhow::anyhow!( - "DW_OP_entry_value register recovery needs CFI at 0x{:x}", - current_pc - ) - })?; - cfi_index - .recover_caller_register_steps(current_pc, register)? - .ok_or_else(|| { - anyhow::anyhow!( - "no entry register recovery rule for DWARF register {} at 0x{:x}", - register, - current_pc - ) - }) - } - - fn recover_entry_stack_pointer_steps( - current_pc: u64, - offset: i64, - address_size: u8, - cfi_index: Option<&CfiIndex>, - ) -> Result> { - let cfi_index = cfi_index.ok_or_else(|| { - anyhow::anyhow!( - "DW_OP_entry_value stack-pointer recovery needs CFI at 0x{:x}", - current_pc - ) - })?; - let mut steps = Self::cfa_to_steps(cfi_index.get_cfa_result(current_pc)?); - // This assumes the common x86/x86_64 call-frame convention where the CFA - // observed after the call is `SP_entry + address_size` because the return - // address is stored on the stack. Targets such as AArch64 may define the - // CFA at call entry differently when LR is not pushed, so keep this - // adjustment centralized until entry-SP reconstruction becomes - // target-aware. - Self::append_constant_offset(&mut steps, offset - i64::from(address_size)); - Ok(steps) - } - - fn cfa_to_steps(cfa: crate::core::CfaResult) -> Vec { - match cfa { - crate::core::CfaResult::RegisterPlusOffset { register, offset } => { - let mut steps = vec![ComputeStep::LoadRegister(register)]; - Self::append_constant_offset(&mut steps, offset); - steps - } - crate::core::CfaResult::Expression { steps } => steps, - } - } - - fn append_constant_offset(steps: &mut Vec, offset: i64) { - if offset != 0 { - steps.push(ComputeStep::PushConstant(offset)); - steps.push(ComputeStep::Add); - } - } - - fn is_stack_pointer_register(register: u16) -> bool { - matches!( - ghostscope_platform::register_mapping::dwarf_reg_to_name(register), - Some("RSP" | "ESP" | "SP") - ) - } - - fn build_entry_value_lookup_steps( - caller_pc_steps: Vec, - cases_by_return_pc: std::collections::BTreeMap>, - ) -> Vec { - let cases: Vec<_> = cases_by_return_pc - .into_iter() - .map(|(caller_return_pc, value_steps)| EntryValueCase { - caller_return_pc, - value_steps, - }) - .collect(); - if cases.len() > Self::ENTRY_VALUE_LOOKUP_WARN_CASES { - warn!( - "DW_OP_entry_value lookup generated {} caller return-pc cases; large fan-in may exceed eBPF verifier limits", - cases.len() - ); - } - vec![ComputeStep::EntryValueLookup { - caller_pc_steps, - cases, - }] - } - - fn materialize_caller_value_steps( - steps: &[ComputeStep], - current_pc: u64, - cfi_index: Option<&CfiIndex>, - ) -> Result> { - let mut materialized = Vec::new(); - for step in steps { - match step { - ComputeStep::LoadRegister(register) => { - let cfi_index = cfi_index.ok_or_else(|| { - anyhow::anyhow!( - "DW_OP_entry_value register recovery needs CFI at 0x{:x}", - current_pc - ) - })?; - let recovered = cfi_index - .recover_caller_register_steps(current_pc, *register)? - .ok_or_else(|| { - anyhow::anyhow!( - "no caller register recovery rule for DWARF register {} at 0x{:x}; DW_OP_entry_value can only materialize caller values for registers with unwind recovery, and caller-saved argument registers are often unavailable after the call", - register, - current_pc - ) - })?; - materialized.extend(recovered); - } - other => materialized.push(other.clone()), - } - } - Ok(materialized) - } } #[cfg(test)] mod tests { use super::ExpressionEvaluator; - use crate::binary::{dwarf_reader_from_arc, DwarfReader}; - use crate::core::{ - CfaResult, ComputeStep, DirectValueResult, EntryValueCase, EvaluationResult, LocationResult, - }; - use crate::index::{BlockNode, CallSiteParameter, CallSiteRecord, FunctionBlocks}; + use crate::core::{CfaResult, DirectValueResult, EvaluationResult, LocationResult}; use gimli::constants; - use gimli::write::{ - Address, AttributeValue as WriteAttributeValue, Dwarf as WriteDwarf, EndianVec, - Expression as WriteExpression, LineProgram, Sections, Unit, - }; - use gimli::{Format, LittleEndian, Register, RunTimeEndian}; - use std::sync::Arc; + use gimli::RunTimeEndian; fn test_encoding() -> gimli::Encoding { gimli::Encoding { @@ -1641,79 +1286,6 @@ mod tests { } } - fn build_scanned_incoming_entry_value_fixture( - register: u16, - caller_value: u64, - ) -> gimli::Dwarf { - let encoding = gimli::Encoding { - format: Format::Dwarf32, - version: 5, - address_size: 8, - }; - - let mut dwarf = WriteDwarf::new(); - let unit_id = dwarf.units.add(Unit::new(encoding, LineProgram::none())); - let unit = dwarf.units.get_mut(unit_id); - let root = unit.root(); - - let caller_id = unit.add(root, constants::DW_TAG_subprogram); - let caller = unit.get_mut(caller_id); - caller.set( - constants::DW_AT_low_pc, - WriteAttributeValue::Address(Address::Constant(0x1000)), - ); - caller.set(constants::DW_AT_high_pc, WriteAttributeValue::Udata(0x40)); - - let callee_id = unit.add(root, constants::DW_TAG_subprogram); - let callee = unit.get_mut(callee_id); - callee.set( - constants::DW_AT_low_pc, - WriteAttributeValue::Address(Address::Constant(0x1200)), - ); - callee.set(constants::DW_AT_high_pc, WriteAttributeValue::Udata(0x10)); - - let call_site_id = unit.add(caller_id, constants::DW_TAG_call_site); - unit.get_mut(call_site_id).set( - constants::DW_AT_call_target, - WriteAttributeValue::Address(Address::Constant(0x1200)), - ); - unit.get_mut(call_site_id).set( - constants::DW_AT_call_return_pc, - WriteAttributeValue::Address(Address::Constant(0x2018)), - ); - - let param_id = unit.add(call_site_id, constants::DW_TAG_call_site_parameter); - let param = unit.get_mut(param_id); - let mut location = WriteExpression::new(); - location.op_reg(Register(register)); - param.set( - constants::DW_AT_location, - WriteAttributeValue::Exprloc(location), - ); - let mut value = WriteExpression::new(); - value.op_constu(caller_value); - param.set( - constants::DW_AT_call_value, - WriteAttributeValue::Exprloc(value), - ); - - let mut sections = Sections::new(EndianVec::new(LittleEndian)); - dwarf.write(&mut sections).unwrap(); - - let dwarf_sections: gimli::DwarfSections> = gimli::DwarfSections::load(|id| { - Ok::<_, gimli::Error>( - sections - .get(id) - .map(|section| section.slice().to_vec()) - .unwrap_or_default(), - ) - }) - .unwrap(); - - dwarf_sections - .borrow(|section| dwarf_reader_from_arc(Arc::<[u8]>::from(section.as_slice()))) - } - #[test] fn implicit_pointer_to_static_storage_preserves_absolute_address_semantics() { let result = ExpressionEvaluator::addressable_location_to_pointer_value( @@ -1728,265 +1300,6 @@ mod tests { ); } - #[test] - fn entry_value_ignores_outgoing_call_sites_in_inline_context() { - let mut function = FunctionBlocks { - cu_offset: gimli::DebugInfoOffset(0), - die_offset: gimli::UnitOffset(0), - abs_die_offset: Some(gimli::DebugInfoOffset(0)), - ranges: vec![(0x1000, 0x1040)], - nodes: vec![ - BlockNode { - ranges: vec![], - entry_pc: None, - die_offset: Some(gimli::UnitOffset(0)), - variables: vec![], - children: vec![1], - }, - BlockNode { - ranges: vec![(0x1000, 0x1040)], - entry_pc: Some(0x1000), - die_offset: Some(gimli::UnitOffset(1)), - variables: vec![], - children: vec![], - }, - ], - block_addr_map: std::collections::BTreeMap::new(), - call_sites: std::collections::BTreeMap::new(), - incoming_call_sites: std::collections::BTreeMap::new(), - }; - function.call_sites.insert( - 0x1018, - vec![CallSiteRecord { - cu_offset: gimli::DebugInfoOffset(0), - die_offset: gimli::UnitOffset(1), - return_pc: 0x1018, - call_origin: None, - call_target: None, - parameters: vec![CallSiteParameter { - callee_register: 5, - caller_value_steps: vec![ComputeStep::PushConstant(11)], - }], - }], - ); - function.call_sites.insert( - 0x1030, - vec![CallSiteRecord { - cu_offset: gimli::DebugInfoOffset(0), - die_offset: gimli::UnitOffset(2), - return_pc: 0x1030, - call_origin: None, - call_target: None, - parameters: vec![CallSiteParameter { - callee_register: 5, - caller_value_steps: vec![ComputeStep::PushConstant(22)], - }], - }], - ); - - let error = ExpressionEvaluator::resolve_entry_value_register( - 0x1034, - 5, - None, - Some(&function), - None, - ) - .expect_err("inline entry_value must not reuse nested outgoing call-site bindings"); - assert!( - error - .to_string() - .contains("DW_OP_entry_value register recovery needs CFI"), - "unexpected error: {error}" - ); - } - - #[test] - fn entry_value_ignores_outgoing_call_sites_in_non_inline_context() { - let mut function = FunctionBlocks { - cu_offset: gimli::DebugInfoOffset(0), - die_offset: gimli::UnitOffset(0), - abs_die_offset: Some(gimli::DebugInfoOffset(0)), - ranges: vec![(0x1000, 0x1040)], - nodes: vec![BlockNode { - ranges: vec![], - entry_pc: None, - die_offset: Some(gimli::UnitOffset(0)), - variables: vec![], - children: vec![], - }], - block_addr_map: std::collections::BTreeMap::new(), - call_sites: std::collections::BTreeMap::new(), - incoming_call_sites: std::collections::BTreeMap::new(), - }; - function.call_sites.insert( - 0x1030, - vec![CallSiteRecord { - cu_offset: gimli::DebugInfoOffset(0), - die_offset: gimli::UnitOffset(2), - return_pc: 0x1030, - call_origin: None, - call_target: None, - parameters: vec![CallSiteParameter { - callee_register: 5, - caller_value_steps: vec![ComputeStep::PushConstant(22)], - }], - }], - ); - - let error = ExpressionEvaluator::resolve_entry_value_register( - 0x1034, - 5, - None, - Some(&function), - None, - ) - .expect_err("non-inline entry_value must not reuse outgoing call-site bindings"); - assert!( - error - .to_string() - .contains("DW_OP_entry_value register recovery needs CFI"), - "unexpected error: {error}" - ); - } - - #[test] - fn entry_value_uses_incoming_call_site_lookup_for_non_inline_functions() { - let mut function = FunctionBlocks { - cu_offset: gimli::DebugInfoOffset(0), - die_offset: gimli::UnitOffset(0), - abs_die_offset: Some(gimli::DebugInfoOffset(0)), - ranges: vec![(0x1200, 0x1210)], - nodes: vec![BlockNode { - ranges: vec![], - entry_pc: None, - die_offset: Some(gimli::UnitOffset(0)), - variables: vec![], - children: vec![], - }], - block_addr_map: std::collections::BTreeMap::new(), - call_sites: std::collections::BTreeMap::new(), - incoming_call_sites: std::collections::BTreeMap::new(), - }; - function.incoming_call_sites.insert( - 0x2018, - vec![CallSiteRecord { - cu_offset: gimli::DebugInfoOffset(1), - die_offset: gimli::UnitOffset(3), - return_pc: 0x2018, - call_origin: function.abs_die_offset, - call_target: Some(0x1200), - parameters: vec![CallSiteParameter { - callee_register: 5, - caller_value_steps: vec![ComputeStep::PushConstant(33)], - }], - }], - ); - function.incoming_call_sites.insert( - 0x2030, - vec![CallSiteRecord { - cu_offset: gimli::DebugInfoOffset(2), - die_offset: gimli::UnitOffset(4), - return_pc: 0x2030, - call_origin: function.abs_die_offset, - call_target: Some(0x1200), - parameters: vec![CallSiteParameter { - callee_register: 5, - caller_value_steps: vec![ComputeStep::PushConstant(44)], - }], - }], - ); - - let mut cases_by_return_pc = std::collections::BTreeMap::new(); - for (caller_return_pc, parameter) in function.incoming_entry_value_parameters(5) { - cases_by_return_pc.insert(caller_return_pc, parameter.caller_value_steps.clone()); - } - let steps = ExpressionEvaluator::build_entry_value_lookup_steps( - vec![ComputeStep::PushConstant(0xdeadbeef)], - cases_by_return_pc, - ); - assert_eq!( - steps, - vec![ComputeStep::EntryValueLookup { - caller_pc_steps: vec![ComputeStep::PushConstant(0xdeadbeef)], - cases: vec![ - EntryValueCase { - caller_return_pc: 0x2018, - value_steps: vec![ComputeStep::PushConstant(33)], - }, - EntryValueCase { - caller_return_pc: 0x2030, - value_steps: vec![ComputeStep::PushConstant(44)], - }, - ], - }] - ); - } - - #[test] - fn entry_value_prefers_indexed_incoming_parameters_over_dwarf_scan() { - let mut function = FunctionBlocks { - cu_offset: gimli::DebugInfoOffset(0), - die_offset: gimli::UnitOffset(0), - abs_die_offset: Some(gimli::DebugInfoOffset(0)), - ranges: vec![(0x1200, 0x1210)], - nodes: vec![BlockNode { - ranges: vec![], - entry_pc: None, - die_offset: Some(gimli::UnitOffset(0)), - variables: vec![], - children: vec![], - }], - block_addr_map: std::collections::BTreeMap::new(), - call_sites: std::collections::BTreeMap::new(), - incoming_call_sites: std::collections::BTreeMap::new(), - }; - function.incoming_call_sites.insert( - 0x2018, - vec![CallSiteRecord { - cu_offset: gimli::DebugInfoOffset(1), - die_offset: gimli::UnitOffset(3), - return_pc: 0x2018, - call_origin: function.abs_die_offset, - call_target: Some(0x1200), - parameters: vec![CallSiteParameter { - callee_register: 5, - caller_value_steps: vec![ComputeStep::PushConstant(33)], - }], - }], - ); - let dwarf = build_scanned_incoming_entry_value_fixture(5, 99); - - let parameters = ExpressionEvaluator::collect_incoming_entry_value_parameter_steps( - 5, - Some(&dwarf), - &function, - ); - - assert_eq!( - parameters, - vec![(0x2018, vec![ComputeStep::PushConstant(33)])] - ); - } - - #[test] - fn entry_value_stack_pointer_offsets_use_cfa_based_entry_sp() { - let mut entry_sp_steps = - ExpressionEvaluator::cfa_to_steps(crate::core::CfaResult::RegisterPlusOffset { - register: 7, - offset: 32, - }); - ExpressionEvaluator::append_constant_offset(&mut entry_sp_steps, 8 - 8); - - assert_eq!( - entry_sp_steps, - vec![ - ComputeStep::LoadRegister(7), - ComputeStep::PushConstant(32), - ComputeStep::Add, - ] - ); - } - #[test] fn multi_op_expression_rejects_invalid_opcode_after_valid_prefix() { let expr_bytes = [ diff --git a/ghostscope-dwarf/src/dwarf_expr/mod.rs b/ghostscope-dwarf/src/dwarf_expr/mod.rs new file mode 100644 index 0000000..2172198 --- /dev/null +++ b/ghostscope-dwarf/src/dwarf_expr/mod.rs @@ -0,0 +1,14 @@ +//! Shared DWARF expression parsing, scanning, and lowering utilities. + +pub(crate) mod call_site; +pub(crate) mod cfa; +pub(crate) mod const_eval; +pub(crate) mod entry_value; +pub(crate) mod errors; +pub(crate) mod lower; +pub(crate) mod modes; +pub(crate) mod ops; +pub(crate) mod scan; +pub(crate) mod storage; + +pub(crate) use lower::ExpressionEvaluator; diff --git a/ghostscope-dwarf/src/dwarf_expr/modes.rs b/ghostscope-dwarf/src/dwarf_expr/modes.rs new file mode 100644 index 0000000..249b7ea --- /dev/null +++ b/ghostscope-dwarf/src/dwarf_expr/modes.rs @@ -0,0 +1,74 @@ +//! DWARF expression lowering modes and their error policies. + +/// The consumer-specific context for DWARF expression parsing/lowering. +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub(crate) enum DwarfExprMode { + /// Normal DWARF location expressions used to recover variable values. + Location, + /// Caller-side call-site values used as an optional entry-value source. + CallSiteValue, + /// Lightweight scans that only answer a yes/no question. + ScanOnly, + /// CFI/CFA expressions used while recovering caller frames. + Cfa, + /// Constant-only expressions used for member offsets. + ConstOffset, + /// Optional global/static storage-address discovery while indexing. + StorageAddress, +} + +/// How parse/lowering errors should be surfaced for a mode. +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub(crate) enum ErrorPolicy { + /// Propagate the error to the caller. + Hard, + /// Treat errors as "not usable" so a caller-owned fallback can run. + SoftWithFallback, + /// Treat errors as `false` for best-effort presence scans. + SilentFalse, +} + +impl DwarfExprMode { + pub(crate) const fn error_policy(self) -> ErrorPolicy { + match self { + Self::Location | Self::Cfa | Self::ConstOffset => ErrorPolicy::Hard, + Self::CallSiteValue | Self::StorageAddress => ErrorPolicy::SoftWithFallback, + Self::ScanOnly => ErrorPolicy::SilentFalse, + } + } + + pub(crate) const fn label(self) -> &'static str { + match self { + Self::Location => "location", + Self::CallSiteValue => "call-site value", + Self::ScanOnly => "scan-only", + Self::Cfa => "CFA", + Self::ConstOffset => "const offset", + Self::StorageAddress => "storage address", + } + } +} + +#[cfg(test)] +mod tests { + use super::{DwarfExprMode, ErrorPolicy}; + + #[test] + fn mode_error_policies_are_explicit() { + assert_eq!(DwarfExprMode::Location.error_policy(), ErrorPolicy::Hard); + assert_eq!(DwarfExprMode::Cfa.error_policy(), ErrorPolicy::Hard); + assert_eq!(DwarfExprMode::ConstOffset.error_policy(), ErrorPolicy::Hard); + assert_eq!( + DwarfExprMode::CallSiteValue.error_policy(), + ErrorPolicy::SoftWithFallback + ); + assert_eq!( + DwarfExprMode::StorageAddress.error_policy(), + ErrorPolicy::SoftWithFallback + ); + assert_eq!( + DwarfExprMode::ScanOnly.error_policy(), + ErrorPolicy::SilentFalse + ); + } +} diff --git a/ghostscope-dwarf/src/dwarf_expr/ops.rs b/ghostscope-dwarf/src/dwarf_expr/ops.rs new file mode 100644 index 0000000..c46970b --- /dev/null +++ b/ghostscope-dwarf/src/dwarf_expr/ops.rs @@ -0,0 +1,62 @@ +//! Shared gimli operation walker for DWARF expressions. + +use crate::core::Result; +use gimli::{Operation, Reader}; + +pub(crate) fn parse_ops( + mut reader: R, + encoding: gimli::Encoding, + context: &str, +) -> Result>> +where + R: Reader, +{ + let total_len = reader.len(); + let mut operations = Vec::new(); + while !reader.is_empty() { + let offset = total_len - reader.len(); + let op = Operation::parse(&mut reader, encoding).map_err(|error| { + anyhow::anyhow!("failed to parse {context} operation at byte offset {offset}: {error}") + })?; + operations.push(op); + } + Ok(operations) +} + +pub(crate) fn parse_single_op( + reader: R, + encoding: gimli::Encoding, + context: &str, +) -> Result>> +where + R: Reader, +{ + let mut operations = parse_ops(reader, encoding, context)?; + if operations.len() > 1 { + return Ok(None); + } + Ok(operations.pop()) +} + +pub(crate) fn any_op( + mut reader: R, + encoding: gimli::Encoding, + context: &str, + mut predicate: F, +) -> Result +where + R: Reader, + F: FnMut(&Operation) -> bool, +{ + let total_len = reader.len(); + while !reader.is_empty() { + let offset = total_len - reader.len(); + let op = Operation::parse(&mut reader, encoding).map_err(|error| { + anyhow::anyhow!("failed to parse {context} operation at byte offset {offset}: {error}") + })?; + if predicate(&op) { + return Ok(true); + } + } + Ok(false) +} diff --git a/ghostscope-dwarf/src/dwarf_expr/scan.rs b/ghostscope-dwarf/src/dwarf_expr/scan.rs new file mode 100644 index 0000000..132c74e --- /dev/null +++ b/ghostscope-dwarf/src/dwarf_expr/scan.rs @@ -0,0 +1,44 @@ +//! Lightweight DWARF expression scans that do not lower expressions. + +use crate::{binary::DwarfReader, core::Result}; + +pub(crate) fn contains_entry_value( + expr: gimli::Expression, + encoding: gimli::Encoding, +) -> Result { + crate::dwarf_expr::ops::any_op(expr.0, encoding, "DWARF expression scan", |op| { + matches!(op, gimli::Operation::EntryValue { .. }) + }) +} + +#[cfg(test)] +mod tests { + use super::contains_entry_value; + use crate::binary::dwarf_reader_from_arc; + use std::sync::Arc; + + fn test_encoding() -> gimli::Encoding { + gimli::Encoding { + format: gimli::Format::Dwarf32, + version: 4, + address_size: 8, + } + } + + fn expr(bytes: &[u8]) -> gimli::Expression { + let data: Arc<[u8]> = Arc::from(bytes); + gimli::Expression(dwarf_reader_from_arc(data)) + } + + #[test] + fn entry_value_scan_stops_after_match() { + let expression = expr(&[0xa3, 0x01, 0x50, 0xff]); + assert!(contains_entry_value(expression, test_encoding()).unwrap()); + } + + #[test] + fn entry_value_scan_errors_before_match() { + let expression = expr(&[0xff, 0xa3, 0x01, 0x50]); + assert!(contains_entry_value(expression, test_encoding()).is_err()); + } +} diff --git a/ghostscope-dwarf/src/dwarf_expr/storage.rs b/ghostscope-dwarf/src/dwarf_expr/storage.rs new file mode 100644 index 0000000..a886c62 --- /dev/null +++ b/ghostscope-dwarf/src/dwarf_expr/storage.rs @@ -0,0 +1,101 @@ +//! Storage-address DWARF expression helpers for index-time discovery. + +use crate::{ + binary::DwarfReader, + dwarf_expr::{errors as expr_errors, modes::DwarfExprMode}, +}; +use gimli::{Operation, Reader}; + +pub(crate) fn absolute_address( + dwarf: &gimli::Dwarf, + unit: &gimli::Unit, + expr: gimli::Expression, +) -> Option { + let operations = expr_errors::soft_value( + DwarfExprMode::StorageAddress, + crate::dwarf_expr::ops::parse_ops( + expr.0, + unit.encoding(), + "absolute storage address expression", + ), + )?; + + absolute_address_from_ops(&operations, |index| dwarf.address(unit, index).ok()) +} + +fn absolute_address_from_ops(operations: &[Operation], mut resolve_addrx: F) -> Option +where + R: Reader, + F: FnMut(gimli::DebugAddrIndex) -> Option, +{ + match operations { + [Operation::Address { address }] => Some(*address), + // clang/LLVM commonly encodes function-scoped statics in DWARF5 as a + // single `DW_OP_addrx` op. This is still a true storage location, just + // indirected through `.debug_addr`. + [Operation::AddressIndex { index }] => resolve_addrx(*index), + [Operation::UnsignedConstant { value }] => Some(*value), + // Anything more complex may be a computed value or a composite + // location. In particular, `DW_OP_stack_value` means the expression + // yields a value, not a storage address, so treating it as global + // storage would misindex optimized locals. + _ => None, + } +} + +#[cfg(test)] +mod tests { + use super::absolute_address_from_ops; + use crate::dwarf_expr::{errors as expr_errors, modes::DwarfExprMode}; + use gimli::{EndianSlice, RunTimeEndian}; + + fn test_encoding() -> gimli::Encoding { + gimli::Encoding { + format: gimli::Format::Dwarf32, + version: 5, + address_size: 8, + } + } + + fn parse_test_expr(bytes: &[u8]) -> Option { + let operations = expr_errors::soft_value( + DwarfExprMode::StorageAddress, + crate::dwarf_expr::ops::parse_ops( + EndianSlice::new(bytes, RunTimeEndian::Little), + test_encoding(), + "test storage address expression", + ), + )?; + absolute_address_from_ops(&operations, |index| Some(0x1000 + index.0 as u64)) + } + + #[test] + fn storage_address_parses_dw_op_addr() { + let address = parse_test_expr(&[0x03, 0x78, 0x56, 0x34, 0x12, 0, 0, 0, 0]); + assert_eq!(address, Some(0x1234_5678)); + } + + #[test] + fn storage_address_parses_dw_op_addrx() { + let address = parse_test_expr(&[gimli::constants::DW_OP_addrx.0, 0x2a]); + assert_eq!(address, Some(0x102a)); + } + + #[test] + fn storage_address_parses_legacy_constu_address() { + let address = parse_test_expr(&[0x10, 0x2a]); + assert_eq!(address, Some(42)); + } + + #[test] + fn storage_address_rejects_stack_value_expression() { + let address = parse_test_expr(&[0x10, 0x2a, gimli::constants::DW_OP_stack_value.0]); + assert_eq!(address, None); + } + + #[test] + fn storage_address_parse_error_downgrades_to_none() { + let address = parse_test_expr(&[0xff]); + assert_eq!(address, None); + } +} diff --git a/ghostscope-dwarf/src/index/block_index.rs b/ghostscope-dwarf/src/index/block_index.rs index 2f4ec4e..3cf7805 100644 --- a/ghostscope-dwarf/src/index/block_index.rs +++ b/ghostscope-dwarf/src/index/block_index.rs @@ -9,10 +9,10 @@ use crate::{ binary::DwarfReader, core::ComputeStep, - parser::{ExpressionEvaluator, RangeExtractor}, + dwarf_expr::call_site, + parser::RangeExtractor, semantics::{ranges_contain_pc, resolve_origin_entry}, }; -use gimli::Reader; use std::collections::BTreeMap; /// Reference to a variable DIE within a unit (minimal info) @@ -590,26 +590,7 @@ impl<'a> BlockIndexBuilder<'a> { unit: &gimli::Unit, entry: &gimli::DebuggingInformationEntry, ) -> Option { - self.resolve_address_attr(unit, entry, gimli::constants::DW_AT_call_target) - .or_else(|| Self::parse_call_target_expr_address(unit, entry)) - } - - fn parse_call_target_expr_address( - unit: &gimli::Unit, - entry: &gimli::DebuggingInformationEntry, - ) -> Option { - let attr = entry.attr(gimli::constants::DW_AT_call_target)?; - let gimli::AttributeValue::Exprloc(mut expr) = attr.value() else { - return None; - }; - let first = gimli::Operation::parse(&mut expr.0, unit.encoding()).ok()?; - if !expr.0.is_empty() { - return None; - } - match first { - gimli::Operation::Address { address } => Some(address), - _ => None, - } + call_site::target_address(self.dwarf, unit, entry) } fn parse_call_site_parameter( @@ -618,88 +599,13 @@ impl<'a> BlockIndexBuilder<'a> { entry: &gimli::DebuggingInformationEntry, return_pc: u64, ) -> Option { - let callee_register = Self::parse_call_site_target_register(unit, entry)?; - let caller_value_steps = self.parse_call_site_value_steps(unit, entry, return_pc)?; + let parsed = call_site::parameter(self.dwarf, unit, entry, return_pc)?; Some(CallSiteParameter { - callee_register, - caller_value_steps, + callee_register: parsed.callee_register, + caller_value_steps: parsed.caller_value_steps, }) } - fn parse_call_site_target_register( - unit: &gimli::Unit, - entry: &gimli::DebuggingInformationEntry, - ) -> Option { - let attr = entry.attr(gimli::constants::DW_AT_location)?; - let gimli::AttributeValue::Exprloc(mut expr) = attr.value() else { - return None; - }; - let first = gimli::Operation::parse(&mut expr.0, unit.encoding()).ok()?; - if !expr.0.is_empty() { - return None; - } - match first { - gimli::Operation::Register { register } => Some(register.0), - _ => None, - } - } - - fn parse_call_site_value_steps( - &self, - unit: &gimli::Unit, - entry: &gimli::DebuggingInformationEntry, - return_pc: u64, - ) -> Option> { - let expr = [ - gimli::constants::DW_AT_call_value, - gimli::constants::DW_AT_GNU_call_site_value, - ] - .into_iter() - .find_map(|attr_name| { - let attr = entry.attr(attr_name)?; - match attr.value() { - gimli::AttributeValue::Exprloc(expr) => Some(expr), - _ => None, - } - })?; - ExpressionEvaluator::parse_expression_to_steps_in_unit( - expr.0.to_slice().ok().as_deref().unwrap_or(&[]), - expr.0.endian(), - unit, - self.dwarf, - return_pc, - None, - None, - None, - ) - .ok() - .or_else(|| Self::lower_entry_value_call_site_register(unit, expr)) - } - - fn lower_entry_value_call_site_register( - unit: &gimli::Unit, - mut expr: gimli::Expression, - ) -> Option> { - let first = gimli::Operation::parse(&mut expr.0, unit.encoding()).ok()?; - if !expr.0.is_empty() { - return None; - } - let gimli::Operation::EntryValue { expression: inner } = first else { - return None; - }; - let mut inner = inner; - let inner_op = gimli::Operation::parse(&mut inner, unit.encoding()).ok()?; - if !inner.is_empty() { - return None; - } - match inner_op { - gimli::Operation::Register { register } => { - Some(vec![ComputeStep::LoadRegister(register.0)]) - } - _ => None, - } - } - fn resolve_address_attr( &self, unit: &gimli::Unit, diff --git a/ghostscope-dwarf/src/index/cfi_index.rs b/ghostscope-dwarf/src/index/cfi_index.rs index 116f7c6..87fed66 100644 --- a/ghostscope-dwarf/src/index/cfi_index.rs +++ b/ghostscope-dwarf/src/index/cfi_index.rs @@ -27,6 +27,8 @@ pub struct CfiIndex { eh_frame_hdr: Option>, /// Base addresses for DWARF sections bases: BaseAddresses, + /// Encoding used when parsing CFI DWARF expressions. + encoding: gimli::Encoding, /// Whether we have eh_frame_hdr for fast lookup has_fast_lookup: bool, } @@ -47,6 +49,12 @@ impl CfiIndex { .parse_object() .context("Failed to parse object file")?; let endian = dwarf_endian_from_object(&object); + let address_size = if object.is_64() { 8 } else { 4 }; + let encoding = gimli::Encoding { + format: gimli::Format::Dwarf32, + version: 4, + address_size, + }; // Load eh_frame section (required) let eh_frame_section = object @@ -81,8 +89,6 @@ impl CfiIndex { .ok_or_else(|| anyhow!("Invalid .eh_frame_hdr range in mapped file"))?; let hdr_section = EhFrameHdr::from(hdr_reader); - // Parse with proper address_size - let address_size = if object.is_64() { 8 } else { 4 }; let mut bases = BaseAddresses::default(); // Set eh_frame_hdr section base @@ -131,6 +137,7 @@ impl CfiIndex { eh_frame, eh_frame_hdr, bases, + encoding, has_fast_lookup, }) } @@ -147,14 +154,8 @@ impl CfiIndex { offset: *offset, }, CfaRule::Expression(expr) => { - // Get the expression bytes from the section let expression = expr.get(&self.eh_frame)?; - // Parse DWARF expression to compute steps - // expression.0 is EndianSlice, get the underlying bytes - use gimli::Reader; - let temp = expression.0.to_slice().ok(); - let expr_bytes = temp.as_deref().unwrap_or(&[]); - let steps = Self::parse_dwarf_expression(expr_bytes)?; + let steps = crate::dwarf_expr::cfa::parse_expression(expression.0, self.encoding)?; CfaResult::Expression { steps } } }; @@ -350,12 +351,8 @@ impl CfiIndex { &self, expr: gimli::UnwindExpression, ) -> Result> { - use gimli::Reader; - let expression = expr.get(&self.eh_frame)?; - let temp = expression.0.to_slice().ok(); - let expr_bytes = temp.as_deref().unwrap_or(&[]); - Self::parse_dwarf_expression(expr_bytes) + crate::dwarf_expr::cfa::parse_expression(expression.0, self.encoding) } fn default_register_rule(register: u16) -> Option> { @@ -367,112 +364,6 @@ impl CfiIndex { } } - /// Parse DWARF expression bytes into ComputeStep sequence - fn parse_dwarf_expression(expr_bytes: &[u8]) -> Result> { - let mut steps = Vec::new(); - let mut pc = 0; - - while pc < expr_bytes.len() { - let opcode = expr_bytes[pc]; - pc += 1; - - match opcode { - // DW_OP_breg0..DW_OP_breg31 - 0x70..=0x8f => { - let register = (opcode - 0x70) as u16; - // Read SLEB128 offset - let (offset, bytes_read) = Self::read_sleb128(&expr_bytes[pc..])?; - pc += bytes_read; - - steps.push(ComputeStep::LoadRegister(register)); - if offset != 0 { - steps.push(ComputeStep::PushConstant(offset)); - steps.push(ComputeStep::Add); - } - } - // DW_OP_plus_uconst - 0x23 => { - let (value, bytes_read) = Self::read_uleb128(&expr_bytes[pc..])?; - pc += bytes_read; - steps.push(ComputeStep::PushConstant(value as i64)); - steps.push(ComputeStep::Add); - } - // DW_OP_lit0..DW_OP_lit31 - 0x30..=0x4f => { - let value = (opcode - 0x30) as i64; - steps.push(ComputeStep::PushConstant(value)); - } - // DW_OP_plus - 0x22 => steps.push(ComputeStep::Add), - // DW_OP_minus - 0x1c => steps.push(ComputeStep::Sub), - // DW_OP_mul - 0x1e => steps.push(ComputeStep::Mul), - // DW_OP_and - 0x1a => steps.push(ComputeStep::And), - // DW_OP_or - 0x21 => steps.push(ComputeStep::Or), - // DW_OP_xor - 0x27 => steps.push(ComputeStep::Xor), - // DW_OP_nop - 0x96 => {} - - _ => { - return Err(anyhow!( - "unsupported DWARF opcode 0x{:02x} in CFA expression at byte offset {}", - opcode, - pc - 1 - )); - } - } - } - - Ok(steps) - } - - /// Read ULEB128 from byte slice - fn read_uleb128(data: &[u8]) -> Result<(u64, usize)> { - let mut result = 0u64; - let mut shift = 0; - let mut bytes_read = 0; - - for &byte in data { - bytes_read += 1; - result |= ((byte & 0x7f) as u64) << shift; - if byte & 0x80 == 0 { - return Ok((result, bytes_read)); - } - shift += 7; - } - - Err(anyhow!("Invalid ULEB128 encoding")) - } - - /// Read SLEB128 from byte slice - fn read_sleb128(data: &[u8]) -> Result<(i64, usize)> { - let mut result = 0i64; - let mut shift = 0; - let mut bytes_read = 0; - let mut byte = 0u8; - - for &b in data { - byte = b; - bytes_read += 1; - result |= ((byte & 0x7f) as i64) << shift; - shift += 7; - if byte & 0x80 == 0 { - break; - } - } - - // Sign extend - if shift < 64 && (byte & 0x40) != 0 { - result |= -(1i64 << shift); - } - - Ok((result, bytes_read)) - } - /// Check if fast lookup is available pub fn has_fast_lookup(&self) -> bool { self.has_fast_lookup @@ -496,22 +387,9 @@ pub struct CfiStats { #[cfg(test)] mod tests { - use super::CfiIndex; - #[test] fn test_cfi_index_creation() { // This would need a real ELF file for testing // For now, just ensure the module compiles } - - #[test] - fn cfa_expression_rejects_unknown_opcode_after_valid_prefix() { - let error = CfiIndex::parse_dwarf_expression(&[0x70, 0x00, 0xff]) - .expect_err("unknown CFI expression opcode must not be skipped"); - - assert!( - error.to_string().contains("unsupported"), - "unexpected error: {error}" - ); - } } diff --git a/ghostscope-dwarf/src/lib.rs b/ghostscope-dwarf/src/lib.rs index bddc048..ff513d0 100644 --- a/ghostscope-dwarf/src/lib.rs +++ b/ghostscope-dwarf/src/lib.rs @@ -8,6 +8,7 @@ pub mod core; // Internal implementation modules pub(crate) mod binary; +pub(crate) mod dwarf_expr; pub(crate) mod index; pub(crate) mod loader; pub(crate) mod objfile; diff --git a/ghostscope-dwarf/src/objfile/access_planner.rs b/ghostscope-dwarf/src/objfile/access_planner.rs index d416ccb..b142eef 100644 --- a/ghostscope-dwarf/src/objfile/access_planner.rs +++ b/ghostscope-dwarf/src/objfile/access_planner.rs @@ -2,12 +2,11 @@ //! requiring full TypeInfo expansion. pub(crate) use crate::semantics::TypeLoc; -use crate::semantics::{ - eval_member_offset_expr, resolve_type_ref_with_origins, strip_typedef_qualified, -}; +use crate::semantics::{resolve_type_ref_with_origins, strip_typedef_qualified}; use crate::{ binary::DwarfReader, core::{attr_u64, EvaluationResult, Result}, + dwarf_expr::{errors as expr_errors, modes::DwarfExprMode}, }; use gimli::Reader; @@ -180,7 +179,13 @@ impl<'dwarf> AccessPlanner<'dwarf> { { match a.value() { gimli::AttributeValue::Exprloc(expr) => { - off = eval_member_offset_expr(&expr) + off = expr_errors::hard( + DwarfExprMode::ConstOffset, + crate::dwarf_expr::const_eval::eval_const_offset( + &expr, + unit_now2.encoding(), + ), + )?; } value => off = attr_u64(value), } diff --git a/ghostscope-dwarf/src/objfile/function_lookup.rs b/ghostscope-dwarf/src/objfile/function_lookup.rs index 3060d18..46f518b 100644 --- a/ghostscope-dwarf/src/objfile/function_lookup.rs +++ b/ghostscope-dwarf/src/objfile/function_lookup.rs @@ -2,6 +2,7 @@ use super::LoadedObjfile; use crate::{ binary::DwarfReader, core::{demangled_name, normalize_demangled_signature, symbol_name_matches_query, Result}, + dwarf_expr::{errors as expr_errors, modes::DwarfExprMode}, index::LightweightIndex, parser::RangeExtractor, semantics::{range_contains_pc, resolve_attr_with_unit_origins, resolve_origin_entry}, @@ -598,15 +599,12 @@ impl LoadedObjfile { fn expression_uses_entry_value( unit: &gimli::Unit, - mut expression: gimli::Expression, + expression: gimli::Expression, ) -> bool { - while let Ok(op) = gimli::Operation::parse(&mut expression.0, unit.encoding()) { - if matches!(op, gimli::Operation::EntryValue { .. }) { - return true; - } - } - - false + expr_errors::silent_false( + DwarfExprMode::ScanOnly, + crate::dwarf_expr::scan::contains_entry_value(expression, unit.encoding()), + ) } pub(crate) fn lookup_function_addresses_any(&self, name: &str) -> Vec { diff --git a/ghostscope-dwarf/src/parser/detailed_parser.rs b/ghostscope-dwarf/src/parser/detailed_parser.rs index e56534a..d0e8c73 100644 --- a/ghostscope-dwarf/src/parser/detailed_parser.rs +++ b/ghostscope-dwarf/src/parser/detailed_parser.rs @@ -8,12 +8,10 @@ use crate::{ binary::DwarfReader, core::{attr_u64, EvaluationResult, Result}, + dwarf_expr::{errors as expr_errors, modes::DwarfExprMode}, index::{CfiIndex, FunctionBlocks}, parser::ExpressionEvaluator, - semantics::{ - eval_member_offset_expr, resolve_name_with_origins, - resolve_type_ref_in_same_unit_with_origins, - }, + semantics::{resolve_name_with_origins, resolve_type_ref_in_same_unit_with_origins}, TypeInfo, }; use gimli::Reader; @@ -315,8 +313,14 @@ impl DetailedParser { if let Some(ml) = ce.attr(gimli::DW_AT_data_member_location) { match ml.value() { gimli::AttributeValue::Exprloc(expr) => { - // Try to eval simple DW_OP_constu / plus_uconst - if let Some(v) = eval_member_offset_expr(&expr) { + if let Some(v) = expr_errors::downgrade_optional_to_none( + DwarfExprMode::ConstOffset, + crate::dwarf_expr::const_eval::eval_const_offset( + &expr, + unit.encoding(), + ), + "shallow member type display", + ) { m_offset = v; } } diff --git a/ghostscope-dwarf/src/parser/fast_parser.rs b/ghostscope-dwarf/src/parser/fast_parser.rs index b459000..01fa286 100644 --- a/ghostscope-dwarf/src/parser/fast_parser.rs +++ b/ghostscope-dwarf/src/parser/fast_parser.rs @@ -899,26 +899,9 @@ impl<'a> DwarfParser<'a> { fn extract_absolute_storage_address_from_expr( &self, unit: &gimli::Unit, - mut expr: gimli::Expression, + expr: gimli::Expression, ) -> Option { - let mut operations = Vec::new(); - while let Ok(op) = gimli::Operation::parse(&mut expr.0, unit.encoding()) { - operations.push(op); - } - - match operations.as_slice() { - [gimli::Operation::Address { address }] => Some(*address), - // clang/LLVM commonly encodes function-scoped statics in DWARF5 as a - // single `DW_OP_addrx` op. This is still a true storage location, just - // indirected through `.debug_addr`. - [gimli::Operation::AddressIndex { index }] => self.dwarf.address(unit, *index).ok(), - [gimli::Operation::UnsignedConstant { value }] => Some(*value), - // Anything more complex may be a computed value or a composite - // location. In particular, `DW_OP_stack_value` means the expression - // yields a value, not a storage address, so treating it as global - // storage would misindex optimized locals. - _ => None, - } + crate::dwarf_expr::storage::absolute_address(self.dwarf, unit, expr) } // Additional helper methods for GDB-style cooked index diff --git a/ghostscope-dwarf/src/parser/mod.rs b/ghostscope-dwarf/src/parser/mod.rs index f5ed665..d7be8d5 100644 --- a/ghostscope-dwarf/src/parser/mod.rs +++ b/ghostscope-dwarf/src/parser/mod.rs @@ -1,7 +1,6 @@ //! Unified DWARF parser - single-pass parsing for all DWARF information pub(crate) mod detailed_parser; -pub(crate) mod expression_evaluator; pub(crate) mod fast_parser; pub(crate) mod range_extractor; // Full type resolver removed; shallow resolver lives in detailed_parser @@ -10,7 +9,7 @@ pub(crate) mod range_extractor; pub use detailed_parser::VariableWithEvaluation; // Internal re-exports for crate use +pub(crate) use crate::dwarf_expr::ExpressionEvaluator; pub(crate) use detailed_parser::DetailedParser; -pub(crate) use expression_evaluator::ExpressionEvaluator; pub(crate) use fast_parser::*; pub(crate) use range_extractor::RangeExtractor; diff --git a/ghostscope-dwarf/src/semantics/expr.rs b/ghostscope-dwarf/src/semantics/expr.rs deleted file mode 100644 index f135075..0000000 --- a/ghostscope-dwarf/src/semantics/expr.rs +++ /dev/null @@ -1,16 +0,0 @@ -use crate::binary::DwarfReader; -use gimli::Reader; - -pub(crate) fn eval_member_offset_expr(expr: &gimli::Expression) -> Option { - let mut reader = expr.0.clone(); - if reader.is_empty() { - return None; - } - - match reader.read_u8().ok()? { - 0x10 => reader.read_uleb128().ok(), // DW_OP_constu - 0x11 => reader.read_sleb128().ok().map(|value| value as u64), // DW_OP_consts - 0x23 => reader.read_uleb128().ok(), // DW_OP_plus_uconst - _ => None, - } -} diff --git a/ghostscope-dwarf/src/semantics/mod.rs b/ghostscope-dwarf/src/semantics/mod.rs index cc6f66c..c38a596 100644 --- a/ghostscope-dwarf/src/semantics/mod.rs +++ b/ghostscope-dwarf/src/semantics/mod.rs @@ -1,9 +1,7 @@ -pub(crate) mod expr; pub(crate) mod origins; pub(crate) mod pc; pub(crate) mod types; -pub(crate) use expr::eval_member_offset_expr; pub(crate) use origins::{ resolve_attr_with_unit_origins, resolve_name_with_origins, resolve_origin_entry, };