From cc35896fd081096ef316d6de89f01cd4a2c46c2b Mon Sep 17 00:00:00 2001 From: swananan Date: Sun, 26 Apr 2026 00:20:46 +0800 Subject: [PATCH] refactor: remove DWARF analyzer raw pointer Store the analyzer as a scoped mutable reference instead of a raw pointer. Move bytecode emission out of the active analyzer borrow so compiler DWARF queries stay borrow-checked. --- ghostscope-compiler/src/ebpf/codegen.rs | 2 +- ghostscope-compiler/src/ebpf/context.rs | 12 +-- ghostscope-compiler/src/ebpf/dwarf_bridge.rs | 99 +++++++++---------- ghostscope-compiler/src/ebpf/expression.rs | 12 +-- .../src/ebpf/helper_functions.rs | 4 +- ghostscope-compiler/src/ebpf/instruction.rs | 2 +- ghostscope-compiler/src/ebpf/variables.rs | 2 +- ghostscope-compiler/src/script/compiler.rs | 75 ++++++++++---- 8 files changed, 115 insertions(+), 93 deletions(-) diff --git a/ghostscope-compiler/src/ebpf/codegen.rs b/ghostscope-compiler/src/ebpf/codegen.rs index 8ffffe36..4b6902fb 100644 --- a/ghostscope-compiler/src/ebpf/codegen.rs +++ b/ghostscope-compiler/src/ebpf/codegen.rs @@ -154,7 +154,7 @@ fn allocate_dynamic_payload_reservations(max_lens: &[usize], available: usize) - reservations } -impl<'ctx> EbpfContext<'ctx> { +impl<'ctx, 'dw> EbpfContext<'ctx, 'dw> { const UNKNOWN_CHAR_ARRAY_READ_FALLBACK: usize = 256; fn build_errno_i32(&self, ret: IntValue<'ctx>, name: &str) -> Result> { diff --git a/ghostscope-compiler/src/ebpf/context.rs b/ghostscope-compiler/src/ebpf/context.rs index f3eea8e2..e405d39e 100644 --- a/ghostscope-compiler/src/ebpf/context.rs +++ b/ghostscope-compiler/src/ebpf/context.rs @@ -56,7 +56,7 @@ pub enum CodeGenError { pub type Result = std::result::Result; /// eBPF LLVM code generation context -pub struct EbpfContext<'ctx> { +pub struct EbpfContext<'ctx, 'dw> { pub context: &'ctx Context, pub module: Module<'ctx>, pub builder: Builder<'ctx>, @@ -80,7 +80,7 @@ pub struct EbpfContext<'ctx> { pub optimized_out_vars: HashMap, // Optimized out variables pub var_pc_addresses: HashMap, // Variable -> PC address pub variable_context: Option, // Scope validation context - pub process_analyzer: Option<*mut DwarfAnalyzer>, // Multi-module DWARF analyzer + pub process_analyzer: Option<&'dw mut DwarfAnalyzer>, // Multi-module DWARF analyzer pub current_trace_id: Option, // Current trace_id being compiled pub current_compile_time_context: Option, // PC address and module for DWARF queries @@ -122,9 +122,9 @@ pub struct EbpfContext<'ctx> { } // Temporary alias for backward compatibility during refactoring -pub type NewCodeGen<'ctx> = EbpfContext<'ctx>; +pub type NewCodeGen<'ctx, 'dw> = EbpfContext<'ctx, 'dw>; -impl<'ctx> EbpfContext<'ctx> { +impl<'ctx, 'dw> EbpfContext<'ctx, 'dw> { /// Create a new eBPF code generation context pub fn new( context: &'ctx Context, @@ -287,12 +287,12 @@ impl<'ctx> EbpfContext<'ctx> { pub fn new_with_process_analyzer( context: &'ctx Context, module_name: &str, - process_analyzer: Option<&mut DwarfAnalyzer>, + process_analyzer: Option<&'dw mut DwarfAnalyzer>, trace_id: Option, compile_options: &crate::CompileOptions, ) -> Result { let mut codegen = Self::new(context, module_name, trace_id, compile_options)?; - codegen.process_analyzer = process_analyzer.map(|pa| pa as *const _ as *mut _); + codegen.process_analyzer = process_analyzer; Ok(codegen) } diff --git a/ghostscope-compiler/src/ebpf/dwarf_bridge.rs b/ghostscope-compiler/src/ebpf/dwarf_bridge.rs index 8f1bc751..f4312d45 100644 --- a/ghostscope-compiler/src/ebpf/dwarf_bridge.rs +++ b/ghostscope-compiler/src/ebpf/dwarf_bridge.rs @@ -12,7 +12,7 @@ use ghostscope_process::module_probe; use inkwell::values::{BasicValueEnum, IntValue, PointerValue}; use tracing::{debug, warn}; -impl<'ctx> EbpfContext<'ctx> { +impl<'ctx, 'dw> EbpfContext<'ctx, 'dw> { /// Compute a stable cookie for a module when per-PID offsets are unavailable (via coordinator). fn fallback_cookie_from_module_path(&self, module_path: &str) -> u64 { module_probe::cookie_for_path(module_path) @@ -20,8 +20,7 @@ impl<'ctx> EbpfContext<'ctx> { /// Compute section code for an address within a module (text=0, rodata=1, data=2, bss=3). fn section_code_for_address(&mut self, module_path: &str, link_addr: u64) -> u8 { - if let Some(analyzer_ptr) = self.process_analyzer { - let analyzer = unsafe { &mut *analyzer_ptr }; + if let Some(analyzer) = self.process_analyzer.as_deref_mut() { if let Some(st) = analyzer.classify_section_for_address(module_path, link_addr) { return match st { ghostscope_dwarf::core::SectionType::Text => 0, @@ -1187,7 +1186,7 @@ impl<'ctx> EbpfContext<'ctx> { // DWARF resolvers see the actual DWARF-based expression tree. // Guard against self-referential or cyclic aliases. fn expand_aliases( - ctx: &crate::ebpf::context::EbpfContext<'_>, + ctx: &crate::ebpf::context::EbpfContext<'_, '_>, e: &crate::script::Expr, visited: &mut std::collections::HashSet, depth: usize, @@ -1310,30 +1309,26 @@ impl<'ctx> EbpfContext<'ctx> { &mut self, var_name: &str, ) -> Result> { - if self.process_analyzer.is_none() { - return Err(CodeGenError::DwarfError( - "No DWARF analyzer available".to_string(), - )); - } - let context = self.get_compile_time_context()?; let pc_address = context.pc_address; - let module_path = &context.module_path; + let module_path = context.module_path.clone(); debug!( "Querying DWARF for variable '{}' at PC 0x{:x} in module '{}'", var_name, pc_address, module_path ); - // Query DWARF analyzer for variable - let analyzer = unsafe { &mut *(self.process_analyzer.unwrap()) }; + let analyzer = self + .process_analyzer + .as_deref_mut() + .ok_or_else(|| CodeGenError::DwarfError("No DWARF analyzer available".to_string()))?; let module_address = ghostscope_dwarf::ModuleAddress::new( std::path::PathBuf::from(module_path.clone()), pc_address, ); - let module_path_owned = module_path.clone(); + let module_path_owned = module_path; let lookup_globals = |analyzer: &mut ghostscope_dwarf::DwarfAnalyzer| -> Result< Option<(std::path::PathBuf, VariableWithEvaluation)>, > { @@ -1649,16 +1644,15 @@ impl<'ctx> EbpfContext<'ctx> { } // Support simple variable base and fall back to global/static lowering if let crate::script::Expr::Variable(base_name) = obj_expr { - let Some(analyzer_ptr) = self.process_analyzer else { - return Err(CodeGenError::DwarfError( - "No DWARF analyzer available".to_string(), - )); - }; - let analyzer = unsafe { &mut *analyzer_ptr }; let ctx = self.get_compile_time_context()?; + let module_path = ctx.module_path.clone(); + let pc_address = ctx.pc_address; + let analyzer = self.process_analyzer.as_deref_mut().ok_or_else(|| { + CodeGenError::DwarfError("No DWARF analyzer available".to_string()) + })?; let module_address = ghostscope_dwarf::ModuleAddress::new( - std::path::PathBuf::from(ctx.module_path.clone()), - ctx.pc_address, + std::path::PathBuf::from(module_path.clone()), + pc_address, ); // Try current module at PC first match analyzer.plan_chain_access(&module_address, base_name, &[field_name.to_string()]) @@ -1673,7 +1667,7 @@ impl<'ctx> EbpfContext<'ctx> { // Strict cross-module chain planning via analyzer API match analyzer .plan_global_chain_access( - &std::path::PathBuf::from(ctx.module_path.clone()), + &std::path::PathBuf::from(module_path.clone()), base_name, &[field_name.to_string()], ) @@ -1696,7 +1690,7 @@ impl<'ctx> EbpfContext<'ctx> { ghostscope_dwarf::core::GlobalVariableInfo, )> = matches .iter() - .filter(|(p, _)| p.to_string_lossy() == ctx.module_path.as_str()) + .filter(|(p, _)| p.to_string_lossy() == module_path.as_str()) .cloned() .collect(); let chosen = if preferred.len() == 1 { @@ -1804,16 +1798,15 @@ impl<'ctx> EbpfContext<'ctx> { } let mut segs: Vec<&str> = Vec::new(); if flatten_chain(array_expr, &mut segs) && !segs.is_empty() { - let Some(analyzer_ptr) = self.process_analyzer else { - return Err(CodeGenError::DwarfError( - "No DWARF analyzer available".to_string(), - )); - }; - let analyzer = unsafe { &mut *analyzer_ptr }; let ctx = self.get_compile_time_context()?; + let module_path = ctx.module_path.clone(); + let pc_address = ctx.pc_address; + let analyzer = self.process_analyzer.as_deref_mut().ok_or_else(|| { + CodeGenError::DwarfError("No DWARF analyzer available".to_string()) + })?; let module_address = ghostscope_dwarf::ModuleAddress::new( - std::path::PathBuf::from(ctx.module_path.clone()), - ctx.pc_address, + std::path::PathBuf::from(module_path), + pc_address, ); let base = segs[0].to_string(); let rest: Vec = segs[1..].iter().map(|s| s.to_string()).collect(); @@ -1929,17 +1922,17 @@ impl<'ctx> EbpfContext<'ctx> { return self.query_dwarf_for_variable(&chain[0]); } // Planner path only; do not fallback. If planning fails, surface an error. - let Some(analyzer_ptr) = self.process_analyzer else { - return Err(CodeGenError::DwarfError( - "No DWARF analyzer available".to_string(), - )); - }; - let analyzer = unsafe { &mut *analyzer_ptr }; let ctx = self.get_compile_time_context()?; + let module_path = ctx.module_path.clone(); + let pc_address = ctx.pc_address; + let analyzer = self + .process_analyzer + .as_deref_mut() + .ok_or_else(|| CodeGenError::DwarfError("No DWARF analyzer available".to_string()))?; // First attempt: current module at current PC (locals/params) let module_address = ghostscope_dwarf::ModuleAddress::new( - std::path::PathBuf::from(ctx.module_path.clone()), - ctx.pc_address, + std::path::PathBuf::from(module_path.clone()), + pc_address, ); match analyzer.plan_chain_access(&module_address, &chain[0], &chain[1..]) { Ok(Some(var)) => return Ok(Some(var)), @@ -1953,11 +1946,7 @@ impl<'ctx> EbpfContext<'ctx> { let base = &chain[0]; let rest = &chain[1..]; match analyzer - .plan_global_chain_access( - &std::path::PathBuf::from(ctx.module_path.clone()), - base, - rest, - ) + .plan_global_chain_access(&std::path::PathBuf::from(module_path.clone()), base, rest) .map_err(|e| CodeGenError::DwarfError(e.to_string()))? { Some((mpath, v)) => { @@ -1975,7 +1964,7 @@ impl<'ctx> EbpfContext<'ctx> { ghostscope_dwarf::core::GlobalVariableInfo, )> = matches .iter() - .filter(|(p, _)| p.to_string_lossy() == ctx.module_path.as_str()) + .filter(|(p, _)| p.to_string_lossy() == module_path.as_str()) .cloned() .collect(); let chosen = if preferred.len() == 1 { @@ -2104,9 +2093,9 @@ impl<'ctx> EbpfContext<'ctx> { } } } - if let Some(analyzer_ptr) = self.process_analyzer { - let analyzer = unsafe { &mut *analyzer_ptr }; - let ctx = self.get_compile_time_context()?; + let ctx = self.get_compile_time_context()?; + let module_path = ctx.module_path.clone(); + if let Some(analyzer) = self.process_analyzer.as_deref_mut() { let mut alias_used: Option = None; for n in candidate_names { // Prefer cross-module definitions first to avoid forward decls with size=0 in current CU @@ -2118,8 +2107,8 @@ impl<'ctx> EbpfContext<'ctx> { } } if upgraded.is_none() { - if let Some(ti) = analyzer - .resolve_struct_type_shallow_by_name_in_module(&ctx.module_path, &n) + if let Some(ti) = + analyzer.resolve_struct_type_shallow_by_name_in_module(&module_path, &n) { if ti.size() > 0 { upgraded = Some(ti); @@ -2135,8 +2124,8 @@ impl<'ctx> EbpfContext<'ctx> { } } if upgraded.is_none() { - if let Some(ti) = analyzer - .resolve_union_type_shallow_by_name_in_module(&ctx.module_path, &n) + if let Some(ti) = + analyzer.resolve_union_type_shallow_by_name_in_module(&module_path, &n) { if ti.size() > 0 { upgraded = Some(ti); @@ -2152,8 +2141,8 @@ impl<'ctx> EbpfContext<'ctx> { } } if upgraded.is_none() { - if let Some(ti) = analyzer - .resolve_enum_type_shallow_by_name_in_module(&ctx.module_path, &n) + if let Some(ti) = + analyzer.resolve_enum_type_shallow_by_name_in_module(&module_path, &n) { if ti.size() > 0 { upgraded = Some(ti); diff --git a/ghostscope-compiler/src/ebpf/expression.rs b/ghostscope-compiler/src/ebpf/expression.rs index 3157ae18..1364fe62 100644 --- a/ghostscope-compiler/src/ebpf/expression.rs +++ b/ghostscope-compiler/src/ebpf/expression.rs @@ -12,7 +12,7 @@ use tracing::debug; // compare cap is provided via compile_options.compare_cap (config: ebpf.compare_cap) -impl<'ctx> EbpfContext<'ctx> { +impl<'ctx, 'dw> EbpfContext<'ctx, 'dw> { pub(crate) fn get_host_pid_tid_values(&mut self) -> Result<(IntValue<'ctx>, IntValue<'ctx>)> { let i32_type = self.context.i32_type(); let i64_type = self.context.i64_type(); @@ -1361,8 +1361,8 @@ impl<'ctx> EbpfContext<'ctx> { } }; // Accept string on either side: string literal or script string variable - fn extract_script_string<'a>( - this: &mut EbpfContext<'a>, + fn extract_script_string( + this: &mut EbpfContext<'_, '_>, e: &Expr, ) -> Option { match e { @@ -1404,8 +1404,8 @@ impl<'ctx> EbpfContext<'ctx> { )); } // Accept string on either side (literal or script string var) - fn extract_script_string<'a>( - this: &mut EbpfContext<'a>, + fn extract_script_string( + this: &mut EbpfContext<'_, '_>, e: &Expr, ) -> Option { match e { @@ -2222,7 +2222,7 @@ impl<'ctx> EbpfContext<'ctx> { } } -impl<'ctx> EbpfContext<'ctx> { +impl<'ctx, 'dw> EbpfContext<'ctx, 'dw> { /// Compile comparison between a DWARF-side expression and a script string literal. /// Supports char* and char[N] according to design in string_comparison.md. fn compile_string_comparison( diff --git a/ghostscope-compiler/src/ebpf/helper_functions.rs b/ghostscope-compiler/src/ebpf/helper_functions.rs index fbfb4582..aaa290a8 100644 --- a/ghostscope-compiler/src/ebpf/helper_functions.rs +++ b/ghostscope-compiler/src/ebpf/helper_functions.rs @@ -22,7 +22,7 @@ struct ProbeReadResult<'ctx> { not_found: IntValue<'ctx>, } -impl<'ctx> EbpfContext<'ctx> { +impl<'ctx, 'dw> EbpfContext<'ctx, 'dw> { fn get_probe_read_scratch_buffer( &mut self, result_size: usize, @@ -603,7 +603,7 @@ impl<'ctx> EbpfContext<'ctx> { .build_pointer_cast(val_ptr, i64_ptr_ty, "val_u64_ptr") .map_err(|e| CodeGenError::LLVMError(e.to_string()))?; let load_field = |idx: u64, - ctx: &mut EbpfContext<'ctx>, + ctx: &mut EbpfContext<'ctx, 'dw>, base: PointerValue<'ctx>| -> Result> { // GEP in i64 element space diff --git a/ghostscope-compiler/src/ebpf/instruction.rs b/ghostscope-compiler/src/ebpf/instruction.rs index cd276212..61a976ae 100644 --- a/ghostscope-compiler/src/ebpf/instruction.rs +++ b/ghostscope-compiler/src/ebpf/instruction.rs @@ -15,7 +15,7 @@ const fn split_pid_tgid(pid_tgid: u64) -> (u32, u32) { ((pid_tgid >> 32) as u32, pid_tgid as u32) } -impl<'ctx> EbpfContext<'ctx> { +impl<'ctx, 'dw> EbpfContext<'ctx, 'dw> { /// Reserve `size` bytes in the per-CPU accumulation buffer and return a pointer to the /// beginning of the reserved region. On overflow, resets the event offset and returns /// from the eBPF program early (mirrors existing control-flow style used elsewhere). diff --git a/ghostscope-compiler/src/ebpf/variables.rs b/ghostscope-compiler/src/ebpf/variables.rs index 46ab6f16..9b5ce69a 100644 --- a/ghostscope-compiler/src/ebpf/variables.rs +++ b/ghostscope-compiler/src/ebpf/variables.rs @@ -9,7 +9,7 @@ use inkwell::values::BasicValueEnum; use inkwell::AddressSpace; use tracing::{debug, info}; -impl<'ctx> EbpfContext<'ctx> { +impl<'ctx, 'dw> EbpfContext<'ctx, 'dw> { /// Register a DWARF alias variable. The value expression is stored and resolved at use time. pub fn set_alias_variable(&mut self, name: &str, expr: crate::script::Expr) { self.alias_vars.insert(name.to_string(), expr); diff --git a/ghostscope-compiler/src/script/compiler.rs b/ghostscope-compiler/src/script/compiler.rs index ddb4e4cb..c3f04081 100644 --- a/ghostscope-compiler/src/script/compiler.rs +++ b/ghostscope-compiler/src/script/compiler.rs @@ -727,6 +727,8 @@ impl<'a> AstCompiler<'a> { // Generate unified eBPF function name using the assigned trace_id let ebpf_function_name = self.generate_unified_function_name(target, assigned_trace_id); + let compile_options = self.compile_options.clone(); + let binary_path_hint = self.binary_path_hint.clone(); info!( "Generating eBPF code for '{}' (function: {})", @@ -792,8 +794,14 @@ impl<'a> AstCompiler<'a> { let module = codegen_new.get_module(); // Generate eBPF bytecode from LLVM module - let ebpf_bytecode = - self.generate_ebpf_bytecode(module, &ebpf_function_name, target, assigned_trace_id)?; + let ebpf_bytecode = Self::generate_ebpf_bytecode( + module, + &ebpf_function_name, + target, + assigned_trace_id, + &compile_options, + binary_path_hint.as_deref(), + )?; // Use the TraceContext returned from compile_program (no need to get it again) @@ -891,13 +899,37 @@ impl<'a> AstCompiler<'a> { format!("gs_{module_hash}_{address_hex}_trace{trace_id}.{extension}") } + fn generate_filename_with_hint( + target: &ResolvedTarget, + trace_id: u32, + extension: &str, + binary_path_hint: Option<&str>, + ) -> String { + let effective_path = if target.binary_path.is_empty() { + binary_path_hint.unwrap_or("unknown") + } else { + target.binary_path.as_str() + }; + let mut hasher = DefaultHasher::new(); + effective_path.hash(&mut hasher); + let module_hash = format!("{:08x}", (hasher.finish() & 0xFFFF_FFFF) as u32); + let address_hex = if let Some(addr) = target.function_address { + format!("{addr:x}") + } else { + "unknown".to_string() + }; + + format!("gs_{module_hash}_{address_hex}_trace{trace_id}.{extension}") + } + /// Generate eBPF bytecode from LLVM module fn generate_ebpf_bytecode( - &mut self, module: &inkwell::module::Module, function_name: &str, target: &ResolvedTarget, assigned_trace_id: u32, + compile_options: &crate::CompileOptions, + binary_path_hint: Option<&str>, ) -> Result, CompileError> { use inkwell::targets::{FileType, Target, TargetTriple}; use inkwell::OptimizationLevel; @@ -912,14 +944,17 @@ impl<'a> AstCompiler<'a> { ); // Save LLVM IR file if requested - if let Some(compile_options) = self.get_compile_options() { - if compile_options.save_llvm_ir { - let filename = self.generate_filename(target, assigned_trace_id, "ll"); - if let Err(e) = std::fs::write(&filename, &llvm_ir) { - warn!("Failed to save LLVM IR to {}: {}", filename, e); - } else { - info!("Saved LLVM IR to: {}", filename); - } + if compile_options.save_llvm_ir { + let filename = Self::generate_filename_with_hint( + target, + assigned_trace_id, + "ll", + binary_path_hint, + ); + if let Err(e) = std::fs::write(&filename, &llvm_ir) { + warn!("Failed to save LLVM IR to {}: {}", filename, e); + } else { + info!("Saved LLVM IR to: {}", filename); } } @@ -1009,19 +1044,17 @@ impl<'a> AstCompiler<'a> { let bytecode = object_code.as_slice().to_vec(); // Save eBPF object file and AST if requested - if let Some(compile_options) = self.get_compile_options() { - if compile_options.save_ebpf { - let filename = self.generate_filename(target, assigned_trace_id, "o"); - if let Err(e) = std::fs::write(&filename, &bytecode) { - warn!("Failed to save eBPF object to {}: {}", filename, e); - } else { - info!("Saved eBPF object to: {}", filename); - } + if compile_options.save_ebpf { + let filename = + Self::generate_filename_with_hint(target, assigned_trace_id, "o", binary_path_hint); + if let Err(e) = std::fs::write(&filename, &bytecode) { + warn!("Failed to save eBPF object to {}: {}", filename, e); + } else { + info!("Saved eBPF object to: {}", filename); } - - // AST has already been saved earlier in generate_ebpf_for_target } + // AST has already been saved earlier in generate_ebpf_for_target Ok(bytecode) }