From 9e37e40daf7b27e8bd69941dc7cace929bfdb5b9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Daniel=20M=C3=BCller?= Date: Mon, 18 Sep 2023 10:53:43 -0700 Subject: [PATCH] Add symbol size to Sym type MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit With this change we expose the symbol's size in the form of the newly added Sym::size member, if it is available. Currently the only symbolization source that does not expose the size is kallsyms. Signed-off-by: Daniel Müller --- CHANGELOG.md | 5 +++++ src/dwarf/resolver.rs | 6 +++++- src/elf/parser.rs | 31 +++++++++++++++++-------------- src/elf/resolver.rs | 4 ++-- src/gsym/resolver.rs | 1 + src/ksym.rs | 2 ++ src/resolver.rs | 2 ++ src/symbolize/symbolizer.rs | 6 ++++++ 8 files changed, 40 insertions(+), 17 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 3bed8b77b..ed36d125e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,8 @@ +Unreleased +---------- +- Added `size` member to `symbolize::Sym` type + + 0.2.0-alpha.6 ------------- - Fixed potential panic when normalizing an APK ELF file using the C APIs diff --git a/src/dwarf/resolver.rs b/src/dwarf/resolver.rs index cb26afa50..fc5516c5e 100644 --- a/src/dwarf/resolver.rs +++ b/src/dwarf/resolver.rs @@ -142,9 +142,13 @@ impl DwarfResolver { .range .map(|range| range.begin as usize) .unwrap_or(0); + let size = function + .range + .map(|range| usize::try_from(range.end - range.begin).unwrap_or(usize::MAX)); let sym = IntSym { name, addr, + size, lang: language.into(), }; Ok(vec![sym]) @@ -191,7 +195,7 @@ impl DwarfResolver { .range .as_ref() .and_then(|range| range.end.checked_sub(range.begin)) - .map(|size| size as usize) + .map(|size| usize::try_from(size).unwrap_or(usize::MAX)) .unwrap_or(0); let info = SymInfo { name, diff --git a/src/elf/parser.rs b/src/elf/parser.rs index 1e1c1db35..ca23aeb2c 100644 --- a/src/elf/parser.rs +++ b/src/elf/parser.rs @@ -46,7 +46,7 @@ fn find_sym<'mmap>( strtab: &'mmap [u8], addr: Addr, st_type: u8, -) -> Result> { +) -> Result> { match find_match_or_lower_bound_by_key(symtab, addr, |sym| sym.st_value as Addr) { None => Ok(None), Some(idx) => { @@ -68,7 +68,8 @@ fn find_sym<'mmap>( // candidate. let name = symbol_name(strtab, sym)?; let addr = sym.st_value as Addr; - return Ok(Some((name, addr))) + let size = usize::try_from(sym.st_size).unwrap_or(usize::MAX); + return Ok(Some((name, addr, size))) } } Ok(None) @@ -403,7 +404,7 @@ impl ElfParser { Ok(index) } - pub fn find_sym(&self, addr: Addr, st_type: u8) -> Result> { + pub fn find_sym(&self, addr: Addr, st_type: u8) -> Result> { let mut cache = self.cache.borrow_mut(); let strtab = cache.ensure_strtab()?; let () = cache.ensure_symtab()?; @@ -479,7 +480,7 @@ impl ElfParser { } #[cfg(test)] - fn pick_symtab_addr(&self) -> (&str, Addr) { + fn pick_symtab_addr(&self) -> (&str, Addr, usize) { let mut cache = self.cache.borrow_mut(); let () = cache.ensure_symtab().unwrap(); let symtab = cache.symtab.as_ref().unwrap(); @@ -490,10 +491,11 @@ impl ElfParser { } let sym = &symtab[idx]; let addr = sym.st_value; + let size = sym.st_size; drop(cache); let sym_name = self.get_symbol_name(idx).unwrap(); - (sym_name, addr as Addr) + (sym_name, addr as Addr, usize::try_from(size).unwrap_or(usize::MAX)) } } @@ -526,12 +528,13 @@ mod tests { let parser = ElfParser::open(bin_name.as_ref()).unwrap(); assert!(parser.find_section(".shstrtab").is_ok()); - let (sym_name, addr) = parser.pick_symtab_addr(); + let (name, addr, size) = parser.pick_symtab_addr(); let sym = parser.find_sym(addr, STT_FUNC).unwrap().unwrap(); - let (sym_name_ret, addr_ret) = sym; + let (name_ret, addr_ret, size_ret) = sym; assert_eq!(addr_ret, addr); - assert_eq!(sym_name_ret, sym_name); + assert_eq!(name_ret, name); + assert_eq!(size_ret, size); } #[test] @@ -543,13 +546,13 @@ mod tests { let parser = ElfParser::open(bin_name.as_ref()).unwrap(); assert!(parser.find_section(".shstrtab").is_ok()); - let (sym_name, addr) = parser.pick_symtab_addr(); + let (name, addr, size) = parser.pick_symtab_addr(); - println!("{sym_name}"); + println!("{name}"); let opts = FindAddrOpts::default(); - let addr_r = parser.find_addr(sym_name, &opts).unwrap(); + let addr_r = parser.find_addr(name, &opts).unwrap(); assert_eq!(addr_r.len(), 1); - assert!(addr_r.iter().any(|x| x.addr == addr)); + assert!(addr_r.iter().any(|x| x.addr == addr && x.size == size)); } /// Make sure that we can look up a symbol in an ELF file. @@ -620,7 +623,7 @@ mod tests { let result = find_sym(symtab, strtab, 0x29d00, STT_FUNC) .unwrap() .unwrap(); - assert_eq!(result, ("__libc_init_first", 0x29d00)); + assert_eq!(result, ("__libc_init_first", 0x29d00, 0x5)); // Strictly speaking this address is way outside of the range of // the second symbol (which is only five bytes in size). @@ -632,7 +635,7 @@ mod tests { let result = find_sym(symtab, strtab, 0x29d90, STT_FUNC) .unwrap() .unwrap(); - assert_eq!(result, ("__libc_init_first", 0x29d00)); + assert_eq!(result, ("__libc_init_first", 0x29d00, 0x5)); } let symtab = [ diff --git a/src/elf/resolver.rs b/src/elf/resolver.rs index 97e566f1f..1d902464f 100644 --- a/src/elf/resolver.rs +++ b/src/elf/resolver.rs @@ -58,14 +58,14 @@ impl SymResolver for ElfResolver { #[cfg_attr(feature = "tracing", crate::log::instrument(fields(addr = format_args!("{addr:#x}"))))] fn find_syms(&self, addr: Addr) -> Result>> { let parser = self.get_parser(); - if let Some((name, addr)) = parser.find_sym(addr, STT_FUNC)? { + if let Some((name, addr, size)) = parser.find_sym(addr, STT_FUNC)? { // ELF does not carry any source code language information. let lang = SrcLang::Unknown; // We found the address in ELF. // TODO: Long term we probably want a different heuristic here, as // there can be valid differences between the two formats // (e.g., DWARF could contain more symbols). - return Ok(vec![IntSym { name, addr, lang }]) + return Ok(vec![IntSym { name, addr, size: Some(size), lang }]) } match &self.backend { diff --git a/src/gsym/resolver.rs b/src/gsym/resolver.rs index 7c6e42899..3ea8cee64 100644 --- a/src/gsym/resolver.rs +++ b/src/gsym/resolver.rs @@ -100,6 +100,7 @@ impl SymResolver for GsymResolver<'_> { let sym = IntSym { name, addr: found, + size: Some(usize::try_from(info.size).unwrap_or(usize::MAX)), lang, }; diff --git a/src/ksym.rs b/src/ksym.rs index 38172fbdd..f82078889 100644 --- a/src/ksym.rs +++ b/src/ksym.rs @@ -35,6 +35,8 @@ impl<'ksym> From<&'ksym Ksym> for IntSym<'ksym> { IntSym { name, addr: *addr, + // There is no size information in kallsyms. + size: None, // Kernel symbols don't carry any source code language // information. lang: SrcLang::Unknown, diff --git a/src/resolver.rs b/src/resolver.rs index 941388849..57fbf1892 100644 --- a/src/resolver.rs +++ b/src/resolver.rs @@ -26,6 +26,8 @@ pub(crate) struct IntSym<'src> { pub(crate) name: &'src str, /// The symbol's normalized address. pub(crate) addr: Addr, + /// The symbol's size, if available. + pub(crate) size: Option, /// The source code language from which the symbol originates. pub(crate) lang: SrcLang, } diff --git a/src/symbolize/symbolizer.rs b/src/symbolize/symbolizer.rs index 3afee1553..3fe6579e3 100644 --- a/src/symbolize/symbolizer.rs +++ b/src/symbolize/symbolizer.rs @@ -90,6 +90,8 @@ pub struct Sym { /// context (which may have been relocated and/or have layout randomizations /// applied). pub offset: usize, + /// The symbol's size, if available. + pub size: Option, /// The directory in which the source file resides. pub dir: Option, /// The file that defines the symbol. @@ -232,12 +234,14 @@ impl Symbolizer { let IntSym { name, addr: sym_addr, + size: sym_size, lang, } = sym; results.push(Sym { name: self.maybe_demangle(name, lang), addr: sym_addr, offset: addr - sym_addr, + size: sym_size, dir: Some(linfo.dir.to_path_buf()), file: Some(linfo.file.to_os_string()), line: linfo.line, @@ -248,12 +252,14 @@ impl Symbolizer { let IntSym { name, addr: sym_addr, + size: sym_size, lang, } = sym; results.push(Sym { name: self.maybe_demangle(name, lang), addr: sym_addr, offset: addr - sym_addr, + size: sym_size, dir: None, file: None, line: None,