diff --git a/objdiff-core/src/arch/mod.rs b/objdiff-core/src/arch/mod.rs index 9799e14..82579dd 100644 --- a/objdiff-core/src/arch/mod.rs +++ b/objdiff-core/src/arch/mod.rs @@ -36,13 +36,22 @@ pub enum DataType { impl DataType { pub fn display_bytes(&self, bytes: &[u8]) -> Option { - // TODO: Attempt to interpret large symbols as arrays of a smaller type, - // fallback to intrepreting it as bytes. - // https://github.com/encounter/objdiff/issues/124 - if self.required_len().is_some_and(|l| bytes.len() != l) { - log::warn!("Failed to display a symbol value for a symbol whose size doesn't match the instruction referencing it."); + if self.required_len().is_some_and(|l| bytes.len() < l) { + log::warn!("Failed to display a symbol value for a symbol whose size is too small for instruction referencing it."); return None; } + let mut bytes = bytes; + if self.required_len().is_some_and(|l| bytes.len() > l) { + // If the symbol's size is larger a single instance of this data type, we take just the + // bytes necessary for one of them in order to display the first element of the array. + bytes = &bytes[0..self.required_len().unwrap()]; + // TODO: Attempt to interpret large symbols as arrays of a smaller type and show all + // elements of the array instead. https://github.com/encounter/objdiff/issues/124 + // However, note that the stride of an array can not always be determined just by the + // data type guessed by the single instruction accessing it. There can also be arrays of + // structs that contain multiple elements of different types, so if other elements after + // the first one were to be displayed in this manner, they may be inaccurate. + } match self { DataType::Int8 => { @@ -86,10 +95,10 @@ impl DataType { } } DataType::Float => { - format!("Float: {}", Endian::read_f32(bytes)) + format!("Float: {:?}f", Endian::read_f32(bytes)) } DataType::Double => { - format!("Double: {}", Endian::read_f64(bytes)) + format!("Double: {:?}", Endian::read_f64(bytes)) } DataType::Bytes => { format!("Bytes: {:#?}", bytes) diff --git a/objdiff-core/src/arch/ppc.rs b/objdiff-core/src/arch/ppc.rs index 6c487fc..3768a53 100644 --- a/objdiff-core/src/arch/ppc.rs +++ b/objdiff-core/src/arch/ppc.rs @@ -1,4 +1,7 @@ -use std::{borrow::Cow, collections::BTreeMap}; +use std::{ + borrow::Cow, + collections::{BTreeMap, HashMap}, +}; use anyhow::{bail, ensure, Result}; use byteorder::BigEndian; @@ -7,7 +10,7 @@ use object::{ elf, File, Object, ObjectSection, ObjectSymbol, Relocation, RelocationFlags, RelocationTarget, Symbol, SymbolKind, }; -use ppc750cl::{Argument, InsIter, Opcode, GPR}; +use ppc750cl::{Argument, InsIter, Opcode, ParsedIns, GPR}; use crate::{ arch::{DataType, ObjArch, ProcessCodeResult}, @@ -49,6 +52,8 @@ impl ObjArch for ObjArchPpc { let ins_count = code.len() / 4; let mut ops = Vec::::with_capacity(ins_count); let mut insts = Vec::::with_capacity(ins_count); + let fake_pool_reloc_for_addr = + generate_fake_pool_reloc_for_addr_mapping(address, code, relocations); for (cur_addr, mut ins) in InsIter::new(code, address as u32) { let reloc = relocations.iter().find(|r| (r.address as u32 & !3) == cur_addr); if let Some(reloc) = reloc { @@ -145,7 +150,7 @@ impl ObjArch for ObjArchPpc { size: 4, mnemonic: Cow::Borrowed(simplified.mnemonic), args, - reloc: reloc.cloned(), + reloc: reloc.or(fake_pool_reloc_for_addr.get(&cur_addr)).cloned(), op: ins.op as u16, branch_dest, line, @@ -173,6 +178,7 @@ impl ObjArch for ObjArchPpc { fn display_reloc(&self, flags: RelocationFlags) -> Cow<'static, str> { match flags { RelocationFlags::Elf { r_type } => match r_type { + elf::R_PPC_NONE => Cow::Borrowed("R_PPC_NONE"), // We use this for fake pool relocs elf::R_PPC_ADDR16_LO => Cow::Borrowed("R_PPC_ADDR16_LO"), elf::R_PPC_ADDR16_HI => Cow::Borrowed("R_PPC_ADDR16_HI"), elf::R_PPC_ADDR16_HA => Cow::Borrowed("R_PPC_ADDR16_HA"), @@ -188,26 +194,22 @@ impl ObjArch for ObjArchPpc { } fn guess_data_type(&self, instruction: &ObjIns) -> Option { - // Always shows the first string of the table. Not ideal, but it's really hard to find - // the actual string being referenced. if instruction.reloc.as_ref().is_some_and(|r| r.target.name.starts_with("@stringBase")) { return Some(DataType::String); } - match Opcode::from(instruction.op as u8) { - Opcode::Lbz | Opcode::Lbzu | Opcode::Lbzux | Opcode::Lbzx => Some(DataType::Int8), - Opcode::Lhz | Opcode::Lhzu | Opcode::Lhzux | Opcode::Lhzx => Some(DataType::Int16), - Opcode::Lha | Opcode::Lhau | Opcode::Lhaux | Opcode::Lhax => Some(DataType::Int16), - Opcode::Lwz | Opcode::Lwzu | Opcode::Lwzux | Opcode::Lwzx => Some(DataType::Int32), - Opcode::Lfs | Opcode::Lfsu | Opcode::Lfsux | Opcode::Lfsx => Some(DataType::Float), - Opcode::Lfd | Opcode::Lfdu | Opcode::Lfdux | Opcode::Lfdx => Some(DataType::Double), - - Opcode::Stb | Opcode::Stbu | Opcode::Stbux | Opcode::Stbx => Some(DataType::Int8), - Opcode::Sth | Opcode::Sthu | Opcode::Sthux | Opcode::Sthx => Some(DataType::Int16), - Opcode::Stw | Opcode::Stwu | Opcode::Stwux | Opcode::Stwx => Some(DataType::Int32), - Opcode::Stfs | Opcode::Stfsu | Opcode::Stfsux | Opcode::Stfsx => Some(DataType::Float), - Opcode::Stfd | Opcode::Stfdu | Opcode::Stfdux | Opcode::Stfdx => Some(DataType::Double), - _ => None, + let op = Opcode::from(instruction.op as u8); + if let Some(ty) = guess_data_type_from_load_store_inst_op(op) { + Some(ty) + } else if op == Opcode::Addi { + // Assume that any addi instruction that references a local symbol is loading a string. + // This hack is not ideal and results in tons of false positives where it will show + // garbage strings (e.g. misinterpreting arrays, float literals, etc). + // But not all strings are in the @stringBase pool, so the condition above that checks + // the target symbol name would miss some. + Some(DataType::String) + } else { + None } } @@ -381,3 +383,196 @@ fn make_symbol_ref(symbol: &Symbol) -> Result { let demangled_name = cwdemangle::demangle(&name, &cwdemangle::DemangleOptions::default()); Ok(ExtabSymbolRef { original_index: symbol.index().0, name, demangled_name }) } + +fn guess_data_type_from_load_store_inst_op(inst_op: Opcode) -> Option { + match inst_op { + Opcode::Lbz | Opcode::Lbzu | Opcode::Lbzux | Opcode::Lbzx => Some(DataType::Int8), + Opcode::Lhz | Opcode::Lhzu | Opcode::Lhzux | Opcode::Lhzx => Some(DataType::Int16), + Opcode::Lha | Opcode::Lhau | Opcode::Lhaux | Opcode::Lhax => Some(DataType::Int16), + Opcode::Lwz | Opcode::Lwzu | Opcode::Lwzux | Opcode::Lwzx => Some(DataType::Int32), + Opcode::Lfs | Opcode::Lfsu | Opcode::Lfsux | Opcode::Lfsx => Some(DataType::Float), + Opcode::Lfd | Opcode::Lfdu | Opcode::Lfdux | Opcode::Lfdx => Some(DataType::Double), + + Opcode::Stb | Opcode::Stbu | Opcode::Stbux | Opcode::Stbx => Some(DataType::Int8), + Opcode::Sth | Opcode::Sthu | Opcode::Sthux | Opcode::Sthx => Some(DataType::Int16), + Opcode::Stw | Opcode::Stwu | Opcode::Stwux | Opcode::Stwx => Some(DataType::Int32), + Opcode::Stfs | Opcode::Stfsu | Opcode::Stfsux | Opcode::Stfsx => Some(DataType::Float), + Opcode::Stfd | Opcode::Stfdu | Opcode::Stfdux | Opcode::Stfdx => Some(DataType::Double), + _ => None, + } +} + +// Given an instruction, determine if it could accessing data at the address in a register. +// If so, return the offset added to the register's address, the register containing that address, +// and (optionally) which destination register the address is being copied into. +fn get_offset_and_addr_gpr_for_possible_pool_reference( + opcode: Opcode, + simplified: &ParsedIns, +) -> Option<(i16, GPR, Option)> { + let args = &simplified.args; + if guess_data_type_from_load_store_inst_op(opcode).is_some() { + match (args[1], args[2]) { + (Argument::Offset(offset), Argument::GPR(addr_src_gpr)) => { + // e.g. lwz. Immediate offset. + Some((offset.0, addr_src_gpr, None)) + } + (Argument::GPR(addr_src_gpr), Argument::GPR(_offset_gpr)) => { + // e.g. lwzx. The offset is in a register and was likely calculated from an index. + // Treat the offset as being 0 in this case to show the first element of the array. + // It may be possible to show all elements by figuring out the stride of the array + // from the calculations performed on the index before it's put into offset_gpr, but + // this would be much more complicated, so it's not currently done. + Some((0, addr_src_gpr, None)) + } + _ => None, + } + } else { + // If it's not a load/store instruction, there's two more possibilities we need to handle. + // 1. It could be loading a pointer to a string. + // 2. It could be moving the relocation address plus an offset into a different register to + // load from later. + // If either of these match, we also want to return the destination register that the + // address is being copied into so that we can detect any future references to that new + // register as well. + match (opcode, args[0], args[1], args[2]) { + ( + Opcode::Addi, + Argument::GPR(addr_dst_gpr), + Argument::GPR(addr_src_gpr), + Argument::Simm(simm), + ) => Some((simm.0, addr_src_gpr, Some(addr_dst_gpr))), + ( + Opcode::Or, + Argument::GPR(addr_dst_gpr), + Argument::GPR(addr_src_gpr), + Argument::None, + ) => Some((0, addr_src_gpr, Some(addr_dst_gpr))), // `mr` or `mr.` + _ => None, + } + } +} + +// We create a fake relocation for an instruction, vaguely simulating what the actual relocation +// might have looked like if it wasn't pooled. This is so minimal changes are needed to display +// pooled accesses vs non-pooled accesses. We set the relocation type to R_PPC_NONE to indicate that +// there isn't really a relocation here, as copying the pool relocation's type wouldn't make sense. +// Also, if this instruction is accessing the middle of a symbol instead of the start, we add an +// addend to indicate that. +fn make_fake_pool_reloc(offset: i16, cur_addr: u32, pool_reloc: &ObjReloc) -> Option { + let offset_from_pool = pool_reloc.addend + offset as i64; + let target_address = pool_reloc.target.address.checked_add_signed(offset_from_pool)?; + let orig_section_index = pool_reloc.target.orig_section_index?; + // We also need to create a fake target symbol to go inside our fake relocation. + // This is because we don't have access to list of all symbols in this section, so we can't find + // the real symbol yet. Instead we make a placeholder that has the correct `orig_section_index` + // and `address` fields, and then later on when this information is displayed to the user, we + // can find the real symbol by searching through the object's section's symbols for one that + // contains this address. + let fake_target_symbol = ObjSymbol { + name: "".to_string(), + demangled_name: None, + address: target_address, + section_address: 0, + size: 0, + size_known: false, + kind: Default::default(), + flags: Default::default(), + orig_section_index: Some(orig_section_index), + virtual_address: None, + original_index: None, + bytes: vec![], + }; + // The addend is also fake because we don't know yet if the `target_address` here is the exact + // start of the symbol or if it's in the middle of it. + let fake_addend = 0; + Some(ObjReloc { + flags: RelocationFlags::Elf { r_type: elf::R_PPC_NONE }, + address: cur_addr as u64, + target: fake_target_symbol, + addend: fake_addend, + }) +} + +// Searches through all instructions in a function, determining which registers have the addresses +// of pooled data relocations in them, finding which instructions load data from those addresses, +// and constructing a mapping of the address of that instruction to a "fake pool relocation" that +// simulates what that instruction's relocation would look like if data hadn't been pooled. +// Limitations: This method currently only goes through the instructions in a function in linear +// order, from start to finish. It does *not* follow any branches. This means that it could have +// false positives or false negatives in determining which relocation is currently loaded in which +// register at any given point in the function, as control flow is not respected. +// There are currently no known examples of this method producing inaccurate results in reality, but +// if examples are found, it may be possible to update this method to also follow all branches so +// that it produces more accurate results. +fn generate_fake_pool_reloc_for_addr_mapping( + address: u64, + code: &[u8], + relocations: &[ObjReloc], +) -> HashMap { + let mut active_pool_relocs = HashMap::new(); + let mut pool_reloc_for_addr = HashMap::new(); + for (cur_addr, ins) in InsIter::new(code, address as u32) { + let simplified = ins.simplified(); + let reloc = relocations.iter().find(|r| (r.address as u32 & !3) == cur_addr); + + if let Some(reloc) = reloc { + // This instruction has a real relocation, so it may be a pool load we want to keep + // track of. + let args = &simplified.args; + match (ins.op, args[0], args[1], args[2]) { + ( + Opcode::Addi, + Argument::GPR(addr_dst_gpr), + Argument::GPR(_addr_src_gpr), + Argument::Simm(_simm), + ) => { + active_pool_relocs.insert(addr_dst_gpr.0, reloc.clone()); // `lis` + `addi` + } + ( + Opcode::Ori, + Argument::GPR(addr_dst_gpr), + Argument::GPR(_addr_src_gpr), + Argument::Uimm(_uimm), + ) => { + active_pool_relocs.insert(addr_dst_gpr.0, reloc.clone()); // `lis` + `ori` + } + (Opcode::B, _, _, _) => { + if simplified.mnemonic == "bl" { + // When encountering a function call, clear any active pool relocations from + // the volatile registers (r0, r3-r12), but not the nonvolatile registers. + active_pool_relocs.remove(&0); + for gpr in 3..12 { + active_pool_relocs.remove(&gpr); + } + } + } + _ => {} + } + } else if let Some((offset, addr_src_gpr, addr_dst_gpr)) = + get_offset_and_addr_gpr_for_possible_pool_reference(ins.op, &simplified) + { + // This instruction doesn't have a real relocation, so it may be a reference to one of + // the already-loaded pools. + if let Some(pool_reloc) = active_pool_relocs.get(&addr_src_gpr.0) { + if let Some(fake_pool_reloc) = make_fake_pool_reloc(offset, cur_addr, pool_reloc) { + pool_reloc_for_addr.insert(cur_addr, fake_pool_reloc); + } + if let Some(addr_dst_gpr) = addr_dst_gpr { + // If the address of the pool relocation got copied into another register, we + // need to keep track of it in that register too as future instructions may + // reference the symbol indirectly via this new register, instead of the + // register the symbol's address was originally loaded into. + // For example, the start of the function might `lis` + `addi` the start of the + // ...data pool into r25, and then later the start of a loop will `addi` r25 + // with the offset within the .data section of an array variable into r21. + // Then the body of the loop will `lwzx` one of the array elements from r21. + let mut new_reloc = pool_reloc.clone(); + new_reloc.addend += offset as i64; + active_pool_relocs.insert(addr_dst_gpr.0, new_reloc); + } + } + } + } + + pool_reloc_for_addr +} diff --git a/objdiff-core/src/diff/code.rs b/objdiff-core/src/diff/code.rs index 3d3273a..e645a24 100644 --- a/objdiff-core/src/diff/code.rs +++ b/objdiff-core/src/diff/code.rs @@ -9,7 +9,7 @@ use crate::{ DiffObjConfig, ObjInsArgDiff, ObjInsBranchFrom, ObjInsBranchTo, ObjInsDiff, ObjInsDiffKind, ObjSymbolDiff, }, - obj::{ObjInfo, ObjInsArg, ObjReloc, ObjSymbolFlags, SymbolRef}, + obj::{ObjInfo, ObjInsArg, ObjReloc, ObjSection, ObjSymbol, ObjSymbolFlags, SymbolRef}, }; pub fn process_code_symbol( @@ -21,14 +21,30 @@ pub fn process_code_symbol( let section = section.ok_or_else(|| anyhow!("Code symbol section not found"))?; let code = §ion.data [symbol.section_address as usize..(symbol.section_address + symbol.size) as usize]; - obj.arch.process_code( + let mut res = obj.arch.process_code( symbol.address, code, section.orig_index, §ion.relocations, §ion.line_info, config, - ) + )?; + + for inst in res.insts.iter_mut() { + if let Some(reloc) = &mut inst.reloc { + if reloc.target.size == 0 && reloc.target.name.is_empty() { + // Fake target symbol we added as a placeholder. We need to find the real one. + if let Some(real_target) = + find_symbol_matching_fake_symbol_in_sections(&reloc.target, &obj.sections) + { + reloc.addend = (reloc.target.address - real_target.address) as i64; + reloc.target = real_target; + } + } + } + } + + Ok(res) } pub fn no_diff_code(out: &ProcessCodeResult, symbol_ref: SymbolRef) -> Result { @@ -369,3 +385,16 @@ fn compare_ins( } Ok(result) } + +fn find_symbol_matching_fake_symbol_in_sections( + fake_symbol: &ObjSymbol, + sections: &[ObjSection], +) -> Option { + let orig_section_index = fake_symbol.orig_section_index?; + let section = sections.iter().find(|s| s.orig_index == orig_section_index)?; + let real_symbol = section + .symbols + .iter() + .find(|s| s.size > 0 && (s.address..s.address + s.size).contains(&fake_symbol.address))?; + Some(real_symbol.clone()) +} diff --git a/objdiff-gui/src/views/function_diff.rs b/objdiff-gui/src/views/function_diff.rs index 081a0ab..6a1a880 100644 --- a/objdiff-gui/src/views/function_diff.rs +++ b/objdiff-gui/src/views/function_diff.rs @@ -1,4 +1,4 @@ -use std::default::Default; +use std::{cmp::Ordering, default::Default}; use egui::{text::LayoutJob, Id, Label, Response, RichText, Sense, Widget}; use egui_extras::TableRow; @@ -123,7 +123,15 @@ fn ins_hover_ui( if let Some(reloc) = &ins.reloc { ui.label(format!("Relocation type: {}", obj.arch.display_reloc(reloc.flags))); - ui.colored_label(appearance.highlight_color, format!("Name: {}", reloc.target.name)); + let addend_str = match reloc.addend.cmp(&0i64) { + Ordering::Greater => format!("+{:x}", reloc.addend), + Ordering::Less => format!("-{:x}", -reloc.addend), + _ => "".to_string(), + }; + ui.colored_label( + appearance.highlight_color, + format!("Name: {}{}", reloc.target.name, addend_str), + ); if let Some(orig_section_index) = reloc.target.orig_section_index { if let Some(section) = obj.sections.iter().find(|s| s.orig_index == orig_section_index) @@ -135,18 +143,18 @@ fn ins_hover_ui( } ui.colored_label( appearance.highlight_color, - format!("Address: {:x}", reloc.target.address), + format!("Address: {:x}{}", reloc.target.address, addend_str), ); ui.colored_label( appearance.highlight_color, format!("Size: {:x}", reloc.target.size), ); - if let Some(s) = obj - .arch - .guess_data_type(ins) - .and_then(|ty| obj.arch.display_data_type(ty, &reloc.target.bytes)) - { - ui.colored_label(appearance.highlight_color, s); + if reloc.addend >= 0 && reloc.target.bytes.len() > reloc.addend as usize { + if let Some(s) = obj.arch.guess_data_type(ins).and_then(|ty| { + obj.arch.display_data_type(ty, &reloc.target.bytes[reloc.addend as usize..]) + }) { + ui.colored_label(appearance.highlight_color, s); + } } } else { ui.colored_label(appearance.highlight_color, "Extern".to_string());