diff --git a/Cargo.toml b/Cargo.toml index ba11f32fb..1b0f1ba3f 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -19,7 +19,7 @@ include = [ "LICENSE", "README.md", ] -keywords = ["binary", "elf", "mach", "pe", "archive"] +keywords = ["binary", "elf", "mach", "pe", "te", "archive"] license = "MIT" readme = "README.md" repository = "https://github.com/m4b/goblin" @@ -38,7 +38,7 @@ version = "0.12" default_features = false [features] -default = ["std", "elf32", "elf64", "mach32", "mach64", "pe32", "pe64", "archive", "endian_fd"] +default = ["std", "elf32", "elf64", "mach32", "mach64", "pe32", "pe64", "te", "archive", "endian_fd"] std = ["alloc", "scroll/std"] alloc = ["scroll/derive", "log"] endian_fd = ["alloc"] @@ -49,6 +49,7 @@ mach32 = ["alloc", "endian_fd", "archive"] mach64 = ["alloc", "endian_fd", "archive"] pe32 = ["alloc", "endian_fd"] pe64 = ["alloc", "endian_fd"] +te = ["alloc", "endian_fd"] archive = ["alloc"] [badges.travis-ci] diff --git a/README.md b/README.md index 30d30f3c9..e4680c223 100644 --- a/README.md +++ b/README.md @@ -97,6 +97,7 @@ Here are some things you could do with this crate (or help to implement so they * mach32 - 32-bit mach-o `repr(C)` struct defs * pe32 - 32-bit PE `repr(C)` struct defs * pe64 - 64-bit PE `repr(C)` struct defs ++ te - Terse Executable (TE) `repr(C)` struct defs * archive - a Unix Archive parser * endian_fd - parses according to the endianness in the binary * std - to allow `no_std` environments diff --git a/src/lib.rs b/src/lib.rs index ec77f93d5..e811de1ea 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -229,6 +229,7 @@ pub enum Hint { Mach(HintData), MachFat(usize), PE, + TE, COFF, Archive, Unknown(u64), @@ -236,7 +237,7 @@ pub enum Hint { macro_rules! if_everything { ($($i:item)*) => ($( - #[cfg(all(feature = "endian_fd", feature = "elf64", feature = "elf32", feature = "pe64", feature = "pe32", feature = "mach64", feature = "mach32", feature = "archive"))] + #[cfg(all(feature = "endian_fd", feature = "elf64", feature = "elf32", feature = "pe64", feature = "pe32", feature = "te", feature = "mach64", feature = "mach32", feature = "archive"))] $i )*) } @@ -262,6 +263,7 @@ if_everything! { } else { match *&bytes[0..2].pread_with::(0, LE)? { pe::header::DOS_MAGIC => Ok(Hint::PE), + pe::header::TE_MAGIC => Ok(Hint::TE), pe::header::COFF_MACHINE_X86 | pe::header::COFF_MACHINE_X86_64 | pe::header::COFF_MACHINE_ARM64 => Ok(Hint::COFF), @@ -290,6 +292,8 @@ if_everything! { Elf(elf::Elf<'a>), /// A PE32/PE32+! PE(pe::PE<'a>), + /// A TE! + TE(pe::TE<'a>), /// A COFF COFF(pe::Coff<'a>), /// A 32/64-bit Mach-o binary _OR_ it is a multi-architecture binary container! @@ -309,6 +313,7 @@ if_everything! { Hint::Mach(_) | Hint::MachFat(_) => Ok(Object::Mach(mach::Mach::parse(bytes)?)), Hint::Archive => Ok(Object::Archive(archive::Archive::parse(bytes)?)), Hint::PE => Ok(Object::PE(pe::PE::parse(bytes)?)), + Hint::TE => Ok(Object::TE(pe::TE::parse(bytes)?)), Hint::COFF => Ok(Object::COFF(pe::Coff::parse(bytes)?)), Hint::Unknown(magic) => Ok(Object::Unknown(magic)), } diff --git a/src/pe/debug.rs b/src/pe/debug.rs index 311bc632c..cae77b4be 100644 --- a/src/pe/debug.rs +++ b/src/pe/debug.rs @@ -92,7 +92,7 @@ impl ImageDebugDirectory { ) } - fn parse_with_opts( + pub(crate) fn parse_with_opts( bytes: &[u8], dd: data_directories::DataDirectory, sections: &[section_table::SectionTable], diff --git a/src/pe/header.rs b/src/pe/header.rs index cb3b08faa..62bfe1906 100644 --- a/src/pe/header.rs +++ b/src/pe/header.rs @@ -1,5 +1,5 @@ use crate::error; -use crate::pe::{optional_header, section_table, symbol}; +use crate::pe::{data_directories, optional_header, section_table, symbol}; use crate::strtab; use alloc::vec::Vec; use log::debug; @@ -837,6 +837,147 @@ impl ctx::TryIntoCtx for Header { } } +/// The TE header is a reduced PE32/PE32+ header containing only fields +/// required for execution in the Platform Initialization +/// ([PI](https://uefi.org/specs/PI/1.8/V1_Introduction.html)) architecture. +/// The TE header is described in this specification: +/// +#[cfg(feature = "te")] +#[repr(C)] +#[derive(Debug, Default, PartialEq, Copy, Clone, Pread, Pwrite)] +pub struct TeHeader { + /// Te signature, always [TE_MAGIC] + pub signature: u16, + /// The machine type + pub machine: u16, + /// The number of sections + pub number_of_sections: u8, + /// The subsystem + pub subsystem: u8, + /// the amount of bytes stripped from the header when converting from a + /// PE32/PE32+ header to a TE header. Used to resolve addresses + pub stripped_size: u16, + /// The entry point of the binary + pub entry_point: u32, + /// The base of the code section + pub base_of_code: u32, + /// The image base + pub image_base: u64, + /// The size and address of the relocation directory + pub reloc_dir: data_directories::DataDirectory, + /// The size and address of the debug directory + pub debug_dir: data_directories::DataDirectory, +} + +#[cfg(feature = "te")] +#[doc(alias("IMAGE_TE_SIGNATURE"))] +pub const TE_MAGIC: u16 = 0x5a56; + +#[cfg(feature = "te")] +impl TeHeader { + /// Parse the TE header from the given bytes. + pub fn parse(bytes: &[u8], offset: &mut usize) -> error::Result { + let mut header: TeHeader = bytes.gread_with(offset, scroll::LE)?; + let adj_offset = header.stripped_size as u32 - core::mem::size_of::() as u32; + header.fixup_header(adj_offset); + Ok(header) + } + + /// Parse the sections from the TE header. + pub fn sections( + &self, + bytes: &[u8], + offset: &mut usize, + ) -> error::Result> { + let adj_offset = self.stripped_size as u32 - core::mem::size_of::() as u32; + let nsections = self.number_of_sections as usize; + + // a section table is at least 40 bytes + if nsections > bytes.len() / 40 { + return Err(error::Error::BufferTooShort(nsections, "sections")); + } + + let mut sections = Vec::with_capacity(nsections); + for i in 0..nsections { + let mut section = section_table::SectionTable::parse(bytes, offset, 0)?; + TeHeader::fixup_section(&mut section, adj_offset); + debug!("({}) {:#?}", i, section); + sections.push(section); + } + Ok(sections) + } + + // Adjust addresses in the header to account for the stripped size + fn fixup_header(&mut self, adj_offset: u32) { + debug!( + "Entry point fixed up from: 0x{:x} to 0x{:X}", + self.entry_point, + self.entry_point.wrapping_sub(adj_offset) + ); + self.entry_point = self.entry_point.wrapping_sub(adj_offset); + + debug!( + "Base of code fixed up from: 0x{:x} to 0x{:X}", + self.base_of_code, + self.base_of_code.wrapping_sub(adj_offset) + ); + self.base_of_code = self.base_of_code.wrapping_sub(adj_offset); + + debug!( + "Relocation Directory fixed up from: 0x{:x} to 0x{:X}", + self.reloc_dir.virtual_address, + self.reloc_dir.virtual_address.wrapping_sub(adj_offset) + ); + self.reloc_dir.virtual_address = self.reloc_dir.virtual_address.wrapping_sub(adj_offset); + + debug!( + "Debug Directory fixed up from: 0x{:x} to 0x{:X}", + self.debug_dir.virtual_address, + self.debug_dir.virtual_address.wrapping_sub(adj_offset) + ); + self.debug_dir.virtual_address = self.debug_dir.virtual_address.wrapping_sub(adj_offset); + } + + // Adjust addresses in the section to account for the stripped size + fn fixup_section(section: &mut section_table::SectionTable, adj_offset: u32) { + debug!( + "Section virtual address fixed up from: 0x{:X} to 0x{:X}", + section.virtual_address, + section.virtual_address.wrapping_sub(adj_offset) + ); + section.virtual_address = section.virtual_address.wrapping_sub(adj_offset); + + if section.pointer_to_linenumbers > 0 { + debug!( + "Section pointer to line numbers fixed up from: 0x{:X} to 0x{:X}", + section.pointer_to_linenumbers, + section.pointer_to_linenumbers.wrapping_sub(adj_offset) + ); + section.pointer_to_linenumbers = + section.pointer_to_linenumbers.wrapping_sub(adj_offset); + } + + if section.pointer_to_raw_data > 0 { + debug!( + "Section pointer to raw data fixed up from: 0x{:X} to 0x{:X}", + section.pointer_to_raw_data, + section.pointer_to_raw_data.wrapping_sub(adj_offset) + ); + section.pointer_to_raw_data = section.pointer_to_raw_data.wrapping_sub(adj_offset); + } + + if section.pointer_to_relocations > 0 { + debug!( + "Section pointer to relocations fixed up from: 0x{:X} to 0x{:X}", + section.pointer_to_relocations, + section.pointer_to_relocations.wrapping_sub(adj_offset) + ); + section.pointer_to_relocations = + section.pointer_to_relocations.wrapping_sub(adj_offset); + } + } +} + /// Convert machine to str representation. Any case of "COFF_UNKNOWN" /// should be expected to change to a more specific value. pub fn machine_to_str(machine: u16) -> &'static str { diff --git a/src/pe/mod.rs b/src/pe/mod.rs index dd9c2c5df..1f2bac7a9 100644 --- a/src/pe/mod.rs +++ b/src/pe/mod.rs @@ -467,6 +467,98 @@ impl<'a> ctx::TryIntoCtx for PE<'a> { } } +/// An analyzed TE binary +/// +/// A TE binary is a PE/PE32+ binary that has had it's header stripped and +/// re-formatted to the TE specification. This presents a challenge for +/// parsing, as all relative addresses (RVAs) are not updated to take this into +/// account, and are thus incorrect. The parsing of a TE must take this into +/// account by using the [header::TeHeader::stripped_size`] field of the TE +/// header to adjust the RVAs during parsing. +#[cfg(feature = "te")] +#[derive(Debug)] +pub struct TE<'a> { + /// The TE header + pub header: header::TeHeader, + /// A list of the sections in this TE binary + pub sections: Vec, + /// Debug information, contained in the PE header + pub debug_data: debug::DebugData<'a>, + /// The offset to apply to addresses not parsed by the TE parser + /// itself: [header::TeHeader::stripped_size] - size_of::<[header::TeHeader]>() + pub rva_offset: usize, +} + +#[cfg(feature = "te")] +impl<'a> TE<'a> { + /// Reads a TE binary from the underlying `bytes` + pub fn parse(bytes: &'a [u8]) -> error::Result { + let opts = &options::ParseOptions { + resolve_rva: false, + parse_attribute_certificates: false, + }; + + let mut offset = 0; + + // Parse the TE header and adjust the offsets + let header = header::TeHeader::parse(bytes, &mut offset)?; + let rva_offset = header.stripped_size as usize - core::mem::size_of::(); + + // Parse the sections and adjust the offsets + let sections = header.sections(bytes, &mut offset)?; + + // Parse the debug data. Must adjust offsets before parsing the image_debug_directory + let mut debug_data = debug::DebugData::default(); + debug_data.image_debug_directory = debug::ImageDebugDirectory::parse_with_opts( + bytes, + header.debug_dir, + §ions, + 0, + opts, + )?; + TE::fixup_debug_data(&mut debug_data, rva_offset as u32); + debug_data.codeview_pdb70_debug_info = debug::CodeviewPDB70DebugInfo::parse_with_opts( + bytes, + &debug_data.image_debug_directory, + opts, + )?; + + Ok(TE { + header, + sections, + debug_data, + rva_offset, + }) + } + + /// Adjust all addresses in the TE binary debug data. + fn fixup_debug_data(dd: &mut debug::DebugData, rva_offset: u32) { + debug!( + "ImageDebugDirectory address of raw data fixed up from: 0x{:X} to 0x{:X}", + dd.image_debug_directory.address_of_raw_data, + dd.image_debug_directory + .address_of_raw_data + .wrapping_sub(rva_offset), + ); + dd.image_debug_directory.address_of_raw_data = dd + .image_debug_directory + .address_of_raw_data + .wrapping_sub(rva_offset); + + debug!( + "ImageDebugDirectory pointer to raw data fixed up from: 0x{:X} to 0x{:X}", + dd.image_debug_directory.pointer_to_raw_data, + dd.image_debug_directory + .pointer_to_raw_data + .wrapping_sub(rva_offset), + ); + dd.image_debug_directory.pointer_to_raw_data = dd + .image_debug_directory + .pointer_to_raw_data + .wrapping_sub(rva_offset); + } +} + /// An analyzed COFF object #[derive(Debug)] pub struct Coff<'a> { diff --git a/tests/bins/te/README.md b/tests/bins/te/README.md new file mode 100644 index 000000000..c60f26333 --- /dev/null +++ b/tests/bins/te/README.md @@ -0,0 +1,24 @@ +# TE binaries + +Binaries located in this directory are precompiled PE32/PE32+ binaries using a +terse executable (TE) header as defined in the Platform Initialization (PI) +specification: [TE](https://uefi.org/specs/PI/1.8/V1_TE_Image.html#te-header). +These binaries were compiled using the +[EDK2](https://github.com/tianocore/edk2) build system. + +## test_image.te + +This binary is a simple Terse executable binary + +## test_image_loaded.bin + +This binary is the same as `test_image.te`, but it has been loaded by a loader, +meaning the sections have been placed in the expected address. Please note that +this particular binary has not been relocated, so no relocations have been +applied + +## test_image_relocated.bin + +This binary is the same as `test_image.te`, but it has been loaded by a loader, +meaning the sections have been placed in the expected address, and any any +relocations have been applied. diff --git a/tests/bins/te/test_image.te b/tests/bins/te/test_image.te new file mode 100644 index 000000000..8ac7c77b1 Binary files /dev/null and b/tests/bins/te/test_image.te differ diff --git a/tests/bins/te/test_image_loaded.bin b/tests/bins/te/test_image_loaded.bin new file mode 100644 index 000000000..0463f7bf5 Binary files /dev/null and b/tests/bins/te/test_image_loaded.bin differ diff --git a/tests/bins/te/test_image_relocated.bin b/tests/bins/te/test_image_relocated.bin new file mode 100644 index 000000000..123f95d1c Binary files /dev/null and b/tests/bins/te/test_image_relocated.bin differ diff --git a/tests/te.rs b/tests/te.rs new file mode 100644 index 000000000..347131f26 --- /dev/null +++ b/tests/te.rs @@ -0,0 +1,109 @@ +#[cfg(test)] +mod te_tests { + use goblin::pe; + use goblin::pe::header::machine_to_str; + use goblin::pe::section_table::*; + + // https://learn.microsoft.com/en-us/windows/win32/debug/pe-format#windows-subsystem + const IMAGE_SUBSYSTEM_EFI_BOOT_SERVICE_DRIVER: u8 = 11; + + #[test] + fn parse_unloaded_te() { + let image = include_bytes!("bins/te/test_image.te"); + let te = pe::TE::parse(image).expect("Failed to parse TE"); + + assert_eq!(machine_to_str(te.header.machine), "X86_64"); + assert_eq!(te.header.number_of_sections, 5); + assert_eq!(te.header.subsystem, IMAGE_SUBSYSTEM_EFI_BOOT_SERVICE_DRIVER); + + // Pre-determined field values to be correct for this specific binary + assert_eq!(te.header.stripped_size, 0x1c8); + assert_eq!(te.header.entry_point, 0x10a8); + assert_eq!(te.header.base_of_code, 0x0e60); + assert_eq!(te.header.image_base, 0x0); + assert_eq!(te.header.reloc_dir.virtual_address, 0x6e58); + assert_eq!(te.header.reloc_dir.size, 0x0); + assert_eq!(te.header.debug_dir.virtual_address, 0x3a64); + assert_eq!(te.header.debug_dir.size, 0x54); + + // Verify section information is correct - with pre-determined values + // known to be correct. For brevity sake, check first and last entries. + assert_eq!(String::from_utf8_lossy(&te.sections[0].name), ".text\0\0\0"); + assert_eq!(te.sections[0].virtual_address, 0xe60); + assert_eq!(te.sections[0].virtual_size, 0x17db); + assert_eq!(te.sections[0].pointer_to_linenumbers, 0); + assert_eq!(te.sections[0].pointer_to_raw_data, 0xe60); + assert_eq!(te.sections[0].pointer_to_relocations, 0); + assert_eq!( + te.sections[0].characteristics, + IMAGE_SCN_MEM_EXECUTE + | IMAGE_SCN_MEM_READ + | IMAGE_SCN_MEM_NOT_PAGED + | IMAGE_SCN_CNT_CODE + ); + + assert_eq!(String::from_utf8_lossy(&te.sections[4].name), ".xdata\0\0"); + assert_eq!(te.sections[4].virtual_address, 0x5e60); + assert_eq!(te.sections[4].virtual_size, 0x98); + assert_eq!( + te.sections[4].characteristics, + IMAGE_SCN_MEM_READ | IMAGE_SCN_MEM_DISCARDABLE | IMAGE_SCN_CNT_INITIALIZED_DATA + ); + assert_eq!(te.sections[4].pointer_to_linenumbers, 0); + assert_eq!(te.sections[4].pointer_to_raw_data, 0x5e60); + assert_eq!(te.sections[4].pointer_to_relocations, 0); + + // Verify the debug directory is correct + assert_eq!(te.debug_data.image_debug_directory.size_of_data, 0xab); + assert_eq!( + te.debug_data.image_debug_directory.address_of_raw_data, + 0x3b54 + ); + assert_eq!( + te.debug_data.image_debug_directory.pointer_to_raw_data, + 0x3b54 + ); + let debug_info = te.debug_data.codeview_pdb70_debug_info.unwrap(); + assert_eq!( + debug_info.signature, + [ + 0x70, 0xfb, 0xb5, 0x4b, 0xcf, 0x68, 0x15, 0x42, 0xa1, 0x2b, 0xa5, 0xc5, 0x51, 0x95, + 0x0a, 0x4a + ] + ); + assert_eq!(String::from_utf8_lossy(debug_info.filename), String::from("c:\\src\\mu_tiano_platforms\\Build\\QemuQ35Pkg\\DEBUG_VS2022\\X64\\QemuQ35Pkg\\RustTerseImageTestDxe\\RustTerseImageTestDxe\\DEBUG\\RustTerseImageTestDxe.pdb\0")); + + // Misc matches + assert_eq!(te.header.base_of_code, te.sections[0].virtual_address); + } + + /// Verify that parsing of a loaded TE image works. + #[test] + fn parse_loaded_te() { + let image = include_bytes!("bins/te/test_image.te"); + let te = pe::TE::parse(image).expect("Failed to parse TE"); + + let loaded_image = include_bytes!("bins/te/test_image_loaded.bin"); + let te_loaded = pe::TE::parse(loaded_image).expect("Failed to parse TE"); + + assert_eq!(te.header, te_loaded.header); + assert_eq!(te.sections, te_loaded.sections); + assert_eq!(te.debug_data, te_loaded.debug_data); + } + + /// Verify that parsing of a relocated TE image works. Raw data should be different due to + /// the relocations being applied, but that is outside the scope of goblin. + #[test] + fn parse_relocated_te() { + let loaded_image = include_bytes!("bins/te/test_image_loaded.bin"); + let te_loaded = pe::TE::parse(loaded_image).expect("Failed to parse TE"); + + let relocated_image = include_bytes!("bins/te/test_image_relocated.bin"); + let te_relocated = pe::TE::parse(relocated_image).expect("Failed to parse TE"); + + // Only the image base should be different in the section headers. + assert_ne!(te_loaded.header.image_base, te_relocated.header.image_base); + assert_eq!(te_loaded.sections, te_relocated.sections); + assert_eq!(te_loaded.debug_data, te_relocated.debug_data); + } +}