From db0bdebedda93a21432376861f7c0c22cee5f37b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Daniel=20M=C3=BCller?= Date: Fri, 22 Sep 2023 09:50:00 -0700 Subject: [PATCH] Read LEB128 as 64 bit values MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Currently all our LEB128 decoding functions return 128 bit values, as they are the largest integer type available to us and that barring any heap allocated dynamically sized integers, that is the fixed size integer most likely to fit all expressed values. However, working with 128 bit values requires two registers on common 64 bit machines and in general Gsym, the only format decoder currently using this functionality, is unlikely to need this data range. Hence, with this change we switch to decoding all those values into 64 bit fixed size integers instead. Signed-off-by: Daniel Müller --- src/gsym/inline.rs | 24 +++++++++--------------- src/gsym/linetab.rs | 18 +++++++++--------- src/gsym/parser.rs | 4 +--- src/util.rs | 22 +++++++++++----------- 4 files changed, 30 insertions(+), 38 deletions(-) diff --git a/src/gsym/inline.rs b/src/gsym/inline.rs index b3db20e0..a6ec38d2 100644 --- a/src/gsym/inline.rs +++ b/src/gsym/inline.rs @@ -23,7 +23,7 @@ impl InlineInfo { lookup_addr: Option, ) -> Result> { let range_cnt = data - .read_u128_leb128() + .read_u64_leb128() .ok_or_invalid_data(|| "failed to read range count from inline information")? .0; let range_cnt = usize::try_from(range_cnt) @@ -39,19 +39,13 @@ impl InlineInfo { if let Some(lookup_addr) = lookup_addr { for i in 0..range_cnt { let offset = data - .read_u128_leb128() + .read_u64_leb128() .ok_or_invalid_data(|| "failed to read offset from inline information")? .0; - let offset = u64::try_from(offset) - .ok() - .ok_or_invalid_data(|| "offset ({}) is too big")?; let size = data - .read_u128_leb128() + .read_u64_leb128() .ok_or_invalid_data(|| "failed to read size from inline information")? .0; - let size = u64::try_from(size) - .ok() - .ok_or_invalid_data(|| "size ({}) is too big")?; let start = base_addr + offset; let end = start + size; @@ -67,10 +61,10 @@ impl InlineInfo { } else { for _ in 0..range_cnt { let _offset = data - .read_u128_leb128() + .read_u64_leb128() .ok_or_invalid_data(|| "failed to read offset from inline information")?; let _size = data - .read_u128_leb128() + .read_u64_leb128() .ok_or_invalid_data(|| "failed to read size from inline information")?; } } @@ -85,24 +79,24 @@ impl InlineInfo { let (call_file, call_line) = if lookup_addr.is_some() { let call_file = data - .read_u128_leb128() + .read_u64_leb128() .ok_or_invalid_data(|| "failed to read call file from inline information")? .0; let call_file = u32::try_from(call_file) .ok() .ok_or_invalid_data(|| "call file index ({}) is too big")?; let call_line = data - .read_u128_leb128() + .read_u64_leb128() .ok_or_invalid_data(|| "failed to read call line from inline information")? .0; let call_line = u32::try_from(call_line).unwrap_or(u32::MAX); (Some(call_file), Some(call_line)) } else { let _call_file = data - .read_u128_leb128() + .read_u64_leb128() .ok_or_invalid_data(|| "failed to read call file from inline information")?; let _call_line = data - .read_u128_leb128() + .read_u64_leb128() .ok_or_invalid_data(|| "failed to read call line from inline information")?; (None, None) }; diff --git a/src/gsym/linetab.rs b/src/gsym/linetab.rs index 8d19f2e4..c482ec6e 100644 --- a/src/gsym/linetab.rs +++ b/src/gsym/linetab.rs @@ -48,13 +48,13 @@ impl LineTableHeader { /// /// * `data` - is what [`AddrData::data`] is. pub(super) fn parse(data: &mut &[u8]) -> Option { - let (min_delta, _bytes) = data.read_i128_leb128()?; - let (max_delta, _bytes) = data.read_i128_leb128()?; - let (first_line, _bytes) = data.read_u128_leb128()?; + let (min_delta, _bytes) = data.read_i64_leb128()?; + let (max_delta, _bytes) = data.read_i64_leb128()?; + let (first_line, _bytes) = data.read_u64_leb128()?; let header = Self { - min_delta: min_delta as i64, - max_delta: max_delta as i64, + min_delta, + max_delta, first_line: first_line as u32, }; Some(header) @@ -107,18 +107,18 @@ pub fn run_op( match op { END_SEQUENCE => Some(RunResult::End), SET_FILE => { - let (f, _bytes) = ops.read_u128_leb128()?; + let (f, _bytes) = ops.read_u64_leb128()?; ctx.file_idx = f as u32; Some(RunResult::Ok) } ADVANCE_PC => { - let (adv, _bytes) = ops.read_u128_leb128()?; + let (adv, _bytes) = ops.read_u64_leb128()?; ctx.address += adv as Addr; Some(RunResult::NewRow) } ADVANCE_LINE => { - let (adv, _bytes) = ops.read_i128_leb128()?; - ctx.file_line = (ctx.file_line as i64 + adv as i64) as u32; + let (adv, _bytes) = ops.read_i64_leb128()?; + ctx.file_line = (ctx.file_line as i64 + adv) as u32; Some(RunResult::Ok) } // Special operators. diff --git a/src/gsym/parser.rs b/src/gsym/parser.rs index cb1f8bf2..4db6f39f 100644 --- a/src/gsym/parser.rs +++ b/src/gsym/parser.rs @@ -284,9 +284,7 @@ mod tests { gsym_fo.read_to_end(&mut data).unwrap(); - let mut addr_tab = Vec::::new(); - addr_tab.resize(TEST_SIZE * 4, 0); - + let mut addr_tab = vec![0; TEST_SIZE * 4]; let mut values: Vec = (0_u32..(TEST_SIZE as u32)).collect(); let copy_to_addr_tab = |values: &[u32], addr_tab: &mut Vec| { diff --git a/src/util.rs b/src/util.rs index 64f10d54..2357173e 100644 --- a/src/util.rs +++ b/src/util.rs @@ -325,17 +325,17 @@ pub(crate) trait ReadRaw<'data> { self.read_pod::() } - /// Read a `u128` encoded as unsigned variable length little endian base 128 + /// Read a `u64` encoded as unsigned variable length little endian base 128 /// value. /// /// The function returns the value read along with the number of bytes /// consumed. - fn read_u128_leb128(&mut self) -> Option<(u128, u8)> { + fn read_u64_leb128(&mut self) -> Option<(u64, u8)> { let mut shift = 0; - let mut value = 0u128; + let mut value = 0u64; while let Some(bytes) = self.read_slice(1) { if let [byte] = bytes { - value |= ((byte & 0b0111_1111) as u128) << shift; + value |= ((byte & 0b0111_1111) as u64) << shift; shift += 7; if (byte & 0b1000_0000) == 0 { return Some((value, shift / 7)) @@ -347,15 +347,15 @@ pub(crate) trait ReadRaw<'data> { None } - /// Read a `u128` encoded as signed variable length little endian base 128 + /// Read a `u64` encoded as signed variable length little endian base 128 /// value. /// /// The function returns the value read along with the number of bytes /// consumed. - fn read_i128_leb128(&mut self) -> Option<(i128, u8)> { - let (value, shift) = self.read_u128_leb128()?; - let sign_bits = 128 - shift * 7; - let value = ((value as i128) << sign_bits) >> sign_bits; + fn read_i64_leb128(&mut self) -> Option<(i64, u8)> { + let (value, shift) = self.read_u64_leb128()?; + let sign_bits = u64::BITS as u8 - shift * 7; + let value = ((value as i64) << sign_bits) >> sign_bits; Some((value, shift)) } } @@ -619,11 +619,11 @@ mod tests { #[test] fn leb128_reading() { let data = [0xf4, 0xf3, 0x75]; - let (v, s) = data.as_slice().read_u128_leb128().unwrap(); + let (v, s) = data.as_slice().read_u64_leb128().unwrap(); assert_eq!(v, 0x1d79f4); assert_eq!(s, 3); - let (v, s) = data.as_slice().read_i128_leb128().unwrap(); + let (v, s) = data.as_slice().read_i64_leb128().unwrap(); assert_eq!(v, -165388); assert_eq!(s, 3); }