From e5b71005b76db147c39de9569e1df9a39ba7daec Mon Sep 17 00:00:00 2001 From: Dmitrii - Demenev Date: Sun, 17 Mar 2024 21:17:03 -0600 Subject: [PATCH] pe: add lots of documentation * Documented the DOS header * Partially documented COFF header and added more machine constants * Started documenting optional header * Documented standard fields * Nearly fully documented the PE optional header --- src/pe/characteristic.rs | 41 +++++ src/pe/dll_characteristic.rs | 38 +++++ src/pe/header.rs | 316 +++++++++++++++++++++++++++++++---- src/pe/mod.rs | 2 + src/pe/optional_header.rs | 284 +++++++++++++++++++++++++++++-- src/pe/subsystem.rs | 45 +++++ 6 files changed, 686 insertions(+), 40 deletions(-) create mode 100644 src/pe/dll_characteristic.rs create mode 100644 src/pe/subsystem.rs diff --git a/src/pe/characteristic.rs b/src/pe/characteristic.rs index f5c1fe4e5..801ffcc13 100644 --- a/src/pe/characteristic.rs +++ b/src/pe/characteristic.rs @@ -1,3 +1,6 @@ +//! Constants for flags that indicate attributes of the object or image file. These flags are used in the +//! [`goblin::pe::header::CoffHeader::characteristics`](crate::pe::header::CoffHeader::characteristics) field. + /* type characteristic = | IMAGE_FILE_RELOCS_STRIPPED @@ -73,27 +76,65 @@ let show_type characteristics = else "MANY" (* print all *) */ +/// Image only, Windows CE, and Microsoft Windows NT and later. This indicates that the file does not +/// contain base relocations and must therefore be loaded at its preferred base address. If the base address +/// is not available, the loader reports an error. The default behavior of the linker is to strip base relocations +/// from executable (EXE) files. pub const IMAGE_FILE_RELOCS_STRIPPED: u16 = 0x0001; + +/// Image only. This indicates that the image file is valid and can be run. +/// If this flag is not set, it indicates a linker error. pub const IMAGE_FILE_EXECUTABLE_IMAGE: u16 = 0x0002; + +/// COFF line numbers have been removed. This flag is deprecated and should be zero. pub const IMAGE_FILE_LINE_NUMS_STRIPPED: u16 = 0x0004; + +/// COFF symbol table entries for local symbols have been removed. This flag is deprecated and should be zero. pub const IMAGE_FILE_LOCAL_SYMS_STRIPPED: u16 = 0x0008; + +/// Obsolete. Aggressively trim working set. This flag is deprecated for Windows 2000 and later and must be zero. pub const IMAGE_FILE_AGGRESSIVE_WS_TRIM: u16 = 0x0010; + +/// Application can handle > 2-GB addresses. pub const IMAGE_FILE_LARGE_ADDRESS_AWARE: u16 = 0x0020; + +/// This flag is reserved for future use. pub const RESERVED: u16 = 0x0040; + +/// Little endian: the least significant bit (LSB) precedes the most significant bit (MSB) in memory. +/// This flag is deprecated and should be zero. pub const IMAGE_FILE_BYTES_REVERSED_LO: u16 = 0x0080; + +/// Machine is based on a 32-bit-word architecture. pub const IMAGE_FILE_32BIT_MACHINE: u16 = 0x0100; + +/// Debugging information is removed from the image file. pub const IMAGE_FILE_DEBUG_STRIPPED: u16 = 0x0200; + +/// If the image is on removable media, fully load it and copy it to the swap file. pub const IMAGE_FILE_REMOVABLE_RUN_FROM_SWAP: u16 = 0x0400; + +/// If the image is on network media, fully load it and copy it to the swap file. pub const IMAGE_FILE_NET_RUN_FROM_SWAP: u16 = 0x0800; + +/// The image file is a system file, not a user program. pub const IMAGE_FILE_SYSTEM: u16 = 0x1000; + +/// The image file is a dynamic-link library (DLL). Such files are considered executable files for almost all purposes, although they cannot be directly run. pub const IMAGE_FILE_DLL: u16 = 0x2000; + +/// The file should be run only on a uniprocessor machine. pub const IMAGE_FILE_UP_SYSTEM_ONLY: u16 = 0x4000; + +/// Big endian: the MSB precedes the LSB in memory. This flag is deprecated and should be zero. pub const IMAGE_FILE_BYTES_REVERSED_HI: u16 = 0x8000; +/// Checks whether the characteristics value indicates that the file is a DLL (dynamically-linked library). pub fn is_dll(characteristics: u16) -> bool { characteristics & IMAGE_FILE_DLL == IMAGE_FILE_DLL } +/// Checks whether the characteristics value indicates that the file is an executable. pub fn is_exe(characteristics: u16) -> bool { characteristics & IMAGE_FILE_EXECUTABLE_IMAGE == IMAGE_FILE_EXECUTABLE_IMAGE } diff --git a/src/pe/dll_characteristic.rs b/src/pe/dll_characteristic.rs new file mode 100644 index 000000000..b63d01d5d --- /dev/null +++ b/src/pe/dll_characteristic.rs @@ -0,0 +1,38 @@ +//! Constants for characteristics of image files. These constants are used in the +//! [`goblin::pe::optional_header::WindowsFields::dll_characteristics`](crate::pe::optional_header::WindowsFields::dll_characteristics) +//! field. +//! +//! The values 0x0001, 0x0002, 0x0004, 0x0008 are reserved for future use and must be zero. + +/// Image can handle a high entropy 64-bit virtual address space. +pub const IMAGE_DLLCHARACTERISTICS_HIGH_ENTROPY_VA: u16 = 0x0020; + +/// DLL can be relocated at load time. +pub const IMAGE_DLLCHARACTERISTICS_DYNAMIC_BASE: u16 = 0x0040; + +/// Code Integrity checks are enforced. +pub const IMAGE_DLLCHARACTERISTICS_FORCE_INTEGRITY: u16 = 0x0080; + +/// Image is NX compatible. +pub const IMAGE_DLLCHARACTERISTICS_NX_COMPAT: u16 = 0x0100; + +/// Isolation aware, but do not isolate the image. +pub const IMAGE_DLLCHARACTERISTICS_NO_ISOLATION: u16 = 0x0200; + +/// Does not use structured exception (SE) handling. No SE handler may be called in this image. +pub const IMAGE_DLLCHARACTERISTICS_NO_SEH: u16 = 0x0400; + +/// Do not bind the image. +pub const IMAGE_DLLCHARACTERISTICS_NO_BIND: u16 = 0x0800; + +/// Image must execute in an AppContainer. +pub const IMAGE_DLLCHARACTERISTICS_APPCONTAINER: u16 = 0x1000; + +/// A WDM driver. +pub const IMAGE_DLLCHARACTERISTICS_WDM_DRIVER: u16 = 0x2000; + +/// Image supports Control Flow Guard. +pub const IMAGE_DLLCHARACTERISTICS_GUARD_CF: u16 = 0x4000; + +/// Terminal Server aware. +pub const IMAGE_DLLCHARACTERISTICS_TERMINAL_SERVER_AWARE: u16 = 0x8000; diff --git a/src/pe/header.rs b/src/pe/header.rs index b7f52b3e8..cb3b08faa 100644 --- a/src/pe/header.rs +++ b/src/pe/header.rs @@ -8,12 +8,16 @@ use scroll::{ctx, IOread, IOwrite, Pread, Pwrite, SizeWith}; /// In `winnt.h` and `pe.h`, it's `IMAGE_DOS_HEADER`. It's a DOS header present in all PE binaries. /// /// The DOS header is a relic from the MS-DOS era. It used to be useful to display an -/// error message if the binary is run in MS-DOS. +/// error message if the binary is run in MS-DOS by utilizing the DOS stub. /// /// Nowadays, only two fields from /// the DOS header are used on Windows: [`signature` (aka `e_magic`)](DosHeader::signature) /// and [`pe_pointer` (aka `e_lfanew`)](DosHeader::pe_pointer). /// +/// ## Position in a modern PE file +/// +/// The DOS header is located at the beginning of the PE file and is usually followed by the [DosStub]. +/// /// ## Note on the archaic "formatted header" /// /// The subset of the structure spanning from its start to the [`overlay_number` (aka `e_ovno`)](DosHeader::overlay_number) field @@ -381,7 +385,14 @@ impl DosHeader { #[repr(C)] #[derive(Debug, PartialEq, Copy, Clone, Pread, Pwrite)] -/// The DOS stub program which should be executed in DOS mode +/// The DOS stub program which should be executed in DOS mode. It prints the message "This program cannot be run in DOS mode" and exits. +/// +/// ## Position in a modern PE file +/// +/// The [DosStub] is usually located immediately after the [DosHeader] and... +/// +/// * De facto, can be followed by a non-standard ["Rich header"](https://0xrick.github.io/win-internals/pe3/#rich-header). +/// * According to the standard, is followed by the [Header::signature] and then the [CoffHeader]. pub struct DosStub(pub [u8; 0x40]); impl Default for DosStub { fn default() -> Self { @@ -396,17 +407,99 @@ impl Default for DosStub { } } -/// COFF Header +/// In `winnt.h`, it's `IMAGE_FILE_HEADER`. COFF Header. +/// +/// Together with the [Header::signature] and the [Header::optional_header], it forms the +/// [`IMAGE_NT_HEADERS`](https://learn.microsoft.com/en-us/windows/win32/api/winnt/ns-winnt-image_nt_headers32). +/// +/// ## Position in a modern PE file +/// +/// The COFF header is located after the [Header::signature], which in turn is located after the +/// non-standard ["Rich header"](https://0xrick.github.io/win-internals/pe3/#rich-header), if present, +/// and after the [DosStub], according to the standard. +/// +/// COFF header is followed by the [Header::optional_header]. #[repr(C)] #[derive(Debug, PartialEq, Copy, Clone, Default, Pread, Pwrite, IOread, IOwrite, SizeWith)] +#[doc(alias("IMAGE_FILE_HEADER"))] pub struct CoffHeader { - /// The machine type + /// The architecture type of the computer. An image file can only be run + /// on the specified computer or a system that emulates the specified computer. + /// + /// Can be one of the following values: + /// + /// * [`COFF_MACHINE_UNKNOWN`], + /// * [`COFF_MACHINE_ALPHA`], + /// * [`COFF_MACHINE_ALPHA64`], + /// * [`COFF_MACHINE_AM33`], + /// * [`COFF_MACHINE_X86_64`], + /// * [`COFF_MACHINE_ARM`], + /// * [`COFF_MACHINE_ARM64`], + /// * [`COFF_MACHINE_ARMNT`], + /// * [`COFF_MACHINE_EBC`], + /// * [`COFF_MACHINE_X86`], + /// * [`COFF_MACHINE_IA64`], + /// * [`COFF_MACHINE_LOONGARCH32`], + /// * [`COFF_MACHINE_LOONGARCH64`], + /// * [`COFF_MACHINE_M32R`], + /// * [`COFF_MACHINE_MIPS16`], + /// * [`COFF_MACHINE_MIPSFPU`], + /// * [`COFF_MACHINE_MIPSFPU16`], + /// * [`COFF_MACHINE_POWERPC`], + /// * [`COFF_MACHINE_POWERPCFP`], + /// * [`COFF_MACHINE_R4000`], + /// * [`COFF_MACHINE_RISCV32`], + /// * [`COFF_MACHINE_RISCV64`], + /// * [`COFF_MACHINE_RISCV128`], + /// * [`COFF_MACHINE_SH3`], + /// * [`COFF_MACHINE_SH3DSP`], + /// * [`COFF_MACHINE_SH4`], + /// * [`COFF_MACHINE_SH5`], + /// * [`COFF_MACHINE_THUMB`], + /// * [`COFF_MACHINE_WCEMIPSV2`], + /// + /// or any other value that is not listed here. + /// + /// The constants above are sourced from . + /// If there's a missing constant, please open an issue or a pull request. + // TODO: insert the values names with a macro + #[doc(alias("Machine"))] pub machine: u16, + /// The number of sections. This indicates the size of the section table, which immediately follows the headers. + /// Note that the Windows loader limits the number of sections to 96. + /// [Source](https://learn.microsoft.com/en-us/windows/win32/api/winnt/ns-winnt-image_file_header). + #[doc(alias("NumberOfSections"))] pub number_of_sections: u16, + /// The low 32 bits of the time stamp of the image. This represents the date and time the image was created by the linker. + /// The value is represented in the number of seconds elapsed since midnight (00:00:00), January 1, 1970, Universal + /// Coordinated Time, according to the system clock. + #[doc(alias("TimeDateStamp"))] pub time_date_stamp: u32, + /// The offset of the symbol table, in bytes, or zero if no COFF symbol table exists. + /// + /// Typically, this field is set to 0 because COFF debugging information is deprecated. + /// [Source](https://0xrick.github.io/win-internals/pe4/#file-header-image_file_header). + // TODO: further explain the COFF symbol table. This seems to be a nuanced topic. + #[doc(alias("PointerToSymbolTable"))] pub pointer_to_symbol_table: u32, + /// The number of symbols in the symbol table. + /// + /// Typically, this field is set to 0 because COFF debugging information is deprecated. + /// [Source](https://0xrick.github.io/win-internals/pe4/#file-header-image_file_header). + // Q (JohnScience): Why is the name `number_of_symbol_table` and not `number_of_symbols`? + #[doc(alias("NumberOfSymbols"))] pub number_of_symbol_table: u32, + /// The size of the optional header, in bytes. This value should be zero for object files. + /// + /// The [`goblin::pe::optional_header::OptionalHeader`](crate::pe::optional_header::OptionalHeader) is meant to + /// represent either the 32-bit or the 64-bit optional header. The size of the optional header is used to determine + /// which one it is. + #[doc(alias("SizeOfOptionalHeader"))] pub size_of_optional_header: u16, + /// The [characteristics](https://learn.microsoft.com/en-us/windows/win32/debug/pe-format#characteristics) of the image. + /// + /// The constants for the characteristics are available in the [`goblin::pe::characteristic`](crate::pe::characteristic) module. + #[doc(alias("Characteristics"))] pub characteristics: u16, } @@ -414,55 +507,200 @@ pub const SIZEOF_COFF_HEADER: usize = 20; /// PE\0\0, little endian pub const PE_MAGIC: u32 = 0x0000_4550; pub const SIZEOF_PE_MAGIC: usize = 4; -/// The contents of this field are assumed to be applicable to any machine type + +// Q (JohnScience): doesn't it make sense to move all these constants to a dedicated module +// and then re-export them from here? This way, the module will be more organized. +// +// Also, don't we want to declare them in a macro to remove the boilerplate and make the implementation +// of `machine_to_str` more future-proof and concise? For example, addition of... +// +// * `IMAGE_FILE_MACHINE_LOONGARCH32`, +// * `IMAGE_FILE_MACHINE_LOONGARCH64`, +// * `IMAGE_FILE_MACHINE_ALPHA`, +// * `IMAGE_FILE_MACHINE_ALPHA64` +// +// didn't trigger the exhaustiveness check because there was a necessary default case. +// +// This way, we can also generate a test that would parse +// and check that there are no missing constants. + +/// The contents of this field are assumed to be applicable to any machine type. +/// +/// One of the possible values for [`CoffHeader::machine`]. +#[doc(alias("IMAGE_FILE_MACHINE_UNKNOWN"))] pub const COFF_MACHINE_UNKNOWN: u16 = 0x0; -/// Matsushita AM33 + +/// Alpha AXP, 32-bit address space. +/// +/// One of the possible values for [`CoffHeader::machine`]. +#[doc(alias("IMAGE_FILE_MACHINE_ALPHA"))] +pub const COFF_MACHINE_ALPHA: u16 = 0x184; + +/// Alpha AXP, 64-bit address space. +/// +/// One of the possible values for [`CoffHeader::machine`]. +#[doc(alias("IMAGE_FILE_MACHINE_ALPHA64"))] +#[doc(alias("IMAGE_FILE_MACHINE_AXP64"))] +pub const COFF_MACHINE_ALPHA64: u16 = 0x284; + +/// Matsushita AM33. +/// +/// One of the possible values for [`CoffHeader::machine`]. +#[doc(alias("IMAGE_FILE_MACHINE_AM33"))] pub const COFF_MACHINE_AM33: u16 = 0x1d3; -/// x64 + +/// x64 aka amd64. +/// +/// One of the possible values for [`CoffHeader::machine`]. +#[doc(alias("IMAGE_FILE_MACHINE_AMD64"))] +// Q (JohnScience): why is this `COFF_MACHINE_X86_64` and not `COFF_MACHINE_AMD64`? +// Should we deprecate the former and use the latter instead? pub const COFF_MACHINE_X86_64: u16 = 0x8664; -/// ARM little endian + +/// ARM little endian. +/// +/// One of the possible values for [`CoffHeader::machine`]. +#[doc(alias("IMAGE_FILE_MACHINE_ARM"))] pub const COFF_MACHINE_ARM: u16 = 0x1c0; -/// ARM64 little endian + +/// ARM64 little endian. +/// +/// One of the possible values for [`CoffHeader::machine`]. +#[doc(alias("IMAGE_FILE_MACHINE_ARM64"))] pub const COFF_MACHINE_ARM64: u16 = 0xaa64; -/// ARM Thumb-2 little endian + +/// ARM Thumb-2 little endian. +/// +/// One of the possible values for [`CoffHeader::machine`]. +#[doc(alias("IMAGE_FILE_MACHINE_ARMNT"))] pub const COFF_MACHINE_ARMNT: u16 = 0x1c4; -/// EFI byte code + +/// EFI byte code. +/// +/// One of the possible values for [`CoffHeader::machine`]. +#[doc(alias("IMAGE_FILE_MACHINE_EBC"))] pub const COFF_MACHINE_EBC: u16 = 0xebc; -/// Intel 386 or later processors and compatible processors + +/// Intel 386 or later processors and compatible processors. +/// +/// One of the possible values for [`CoffHeader::machine`]. +// Q (JohnScience): why is this `COFF_MACHINE_X86` and not `COFF_MACHINE_I386`? +// Should we deprecate the former and use the latter instead? +#[doc(alias("IMAGE_FILE_MACHINE_I386"))] pub const COFF_MACHINE_X86: u16 = 0x14c; -/// Intel Itanium processor family + +/// Intel Itanium processor family. +/// +/// One of the possible values for [`CoffHeader::machine`]. +#[doc(alias("IMAGE_FILE_MACHINE_IA64"))] pub const COFF_MACHINE_IA64: u16 = 0x200; -/// Mitsubishi M32R little endian + +/// LoongArch 32-bit processor family. +/// +/// One of the possible values for [`CoffHeader::machine`]. +#[doc(alias("IMAGE_FILE_MACHINE_LOONGARCH32"))] +pub const COFF_MACHINE_LOONGARCH32: u16 = 0x6232; + +/// LoongArch 64-bit processor family. +/// +/// One of the possible values for [`CoffHeader::machine`]. +#[doc(alias("IMAGE_FILE_MACHINE_LOONGARCH64"))] +pub const COFF_MACHINE_LOONGARCH64: u16 = 0x6264; + +/// Mitsubishi M32R little endian. +/// +/// One of the possible values for [`CoffHeader::machine`]. +#[doc(alias("IMAGE_FILE_MACHINE_M32R"))] pub const COFF_MACHINE_M32R: u16 = 0x9041; -/// MIPS16 + +/// MIPS16. +/// +/// One of the possible values for [`CoffHeader::machine`]. +#[doc(alias("IMAGE_FILE_MACHINE_MIPS16"))] pub const COFF_MACHINE_MIPS16: u16 = 0x266; -/// MIPS with FPU + +/// MIPS with FPU. +/// +/// One of the possible values for [`CoffHeader::machine`]. +#[doc(alias("IMAGE_FILE_MACHINE_MIPSFPU"))] pub const COFF_MACHINE_MIPSFPU: u16 = 0x366; -/// MIPS16 with FPU + +/// MIPS16 with FPU. +/// +/// One of the possible values for [`CoffHeader::machine`]. +#[doc(alias("IMAGE_FILE_MACHINE_MIPSFPU16"))] pub const COFF_MACHINE_MIPSFPU16: u16 = 0x466; -/// Power PC little endian + +/// Power PC little endian. +/// +/// One of the possible values for [`CoffHeader::machine`]. +#[doc(alias("IMAGE_FILE_MACHINE_POWERPC"))] pub const COFF_MACHINE_POWERPC: u16 = 0x1f0; -/// Power PC with floating point support + +/// Power PC with floating point support. +/// +/// One of the possible values for [`CoffHeader::machine`]. +#[doc(alias("IMAGE_FILE_MACHINE_POWERPCFP"))] pub const COFF_MACHINE_POWERPCFP: u16 = 0x1f1; -/// MIPS little endian + +/// MIPS little endian. +/// +/// One of the possible values for [`CoffHeader::machine`]. +#[doc(alias("IMAGE_FILE_MACHINE_R4000"))] pub const COFF_MACHINE_R4000: u16 = 0x166; -/// RISC-V 32-bit address space + +/// RISC-V 32-bit address space. +/// +/// One of the possible values for [`CoffHeader::machine`]. +#[doc(alias("IMAGE_FILE_MACHINE_RISCV32"))] pub const COFF_MACHINE_RISCV32: u16 = 0x5032; -/// RISC-V 64-bit address space + +/// RISC-V 64-bit address space. +/// +/// One of the possible values for [`CoffHeader::machine`]. +#[doc(alias("IMAGE_FILE_MACHINE_RISCV64"))] pub const COFF_MACHINE_RISCV64: u16 = 0x5064; + /// RISC-V 128-bit address space +/// +/// One of the possible values for [`CoffHeader::machine`]. +#[doc(alias("IMAGE_FILE_MACHINE_RISCV128"))] pub const COFF_MACHINE_RISCV128: u16 = 0x5128; -/// Hitachi SH3 + +/// Hitachi SH3. +/// +/// One of the possible values for [`CoffHeader::machine`]. +#[doc(alias("IMAGE_FILE_MACHINE_SH3"))] pub const COFF_MACHINE_SH3: u16 = 0x1a2; -/// Hitachi SH3 DSP + +/// Hitachi SH3 DSP. +/// +/// One of the possible values for [`CoffHeader::machine`]. +#[doc(alias("IMAGE_FILE_MACHINE_SH3DSP"))] pub const COFF_MACHINE_SH3DSP: u16 = 0x1a3; -/// Hitachi SH4 + +/// Hitachi SH4. +/// +/// One of the possible values for [`CoffHeader::machine`]. +#[doc(alias("IMAGE_FILE_MACHINE_SH4"))] pub const COFF_MACHINE_SH4: u16 = 0x1a6; -/// Hitachi SH5 + +/// Hitachi SH5. +/// +/// One of the possible values for [`CoffHeader::machine`]. +#[doc(alias("IMAGE_FILE_MACHINE_SH5"))] pub const COFF_MACHINE_SH5: u16 = 0x1a8; -/// Thumb + +/// Thumb. +/// +/// One of the possible values for [`CoffHeader::machine`]. +#[doc(alias("IMAGE_FILE_MACHINE_THUMB"))] pub const COFF_MACHINE_THUMB: u16 = 0x1c2; -/// MIPS little-endian WCE v2 + +/// MIPS little-endian WCE v2. +/// +/// One of the possible values for [`CoffHeader::machine`]. +#[doc(alias("IMAGE_FILE_MACHINE_WCEMIPSV2"))] pub const COFF_MACHINE_WCEMIPSV2: u16 = 0x169; impl CoffHeader { @@ -531,11 +769,23 @@ impl CoffHeader { } } +/// The PE header. +/// +/// ## Position in a modern PE file +/// +/// The PE header is located at the very beginning of the file and +/// is followed by the section table and sections. #[derive(Debug, PartialEq, Copy, Clone, Default)] pub struct Header { pub dos_header: DosHeader, /// DOS program for legacy loaders pub dos_stub: DosStub, + + // Q (JohnScience): should we care about the "rich header"? + // https://0xrick.github.io/win-internals/pe3/#rich-header + // Introducing it would be a breaking change because it would require a new field in the struct + // but it would be a good addition to the library. + // /// PE Magic: PE\0\0, little endian pub signature: u32, pub coff_header: CoffHeader, @@ -587,18 +837,26 @@ impl ctx::TryIntoCtx for Header { } } -/// Convert machine to str representation +/// Convert machine to str representation. Any case of "COFF_UNKNOWN" +/// should be expected to change to a more specific value. pub fn machine_to_str(machine: u16) -> &'static str { + // TODO: generate the branches with a macro match machine { COFF_MACHINE_UNKNOWN => "UNKNOWN", + COFF_MACHINE_ALPHA => "ALPHA", + COFF_MACHINE_ALPHA64 => "ALPHA64", COFF_MACHINE_AM33 => "AM33", + // This is an outlier. In the C header, it's IMAGE_FILE_MACHINE_AMD64 COFF_MACHINE_X86_64 => "X86_64", COFF_MACHINE_ARM => "ARM", COFF_MACHINE_ARM64 => "ARM64", COFF_MACHINE_ARMNT => "ARM_NT", COFF_MACHINE_EBC => "EBC", + // This is an outlier. In the C header, it's IMAGE_FILE_MACHINE_I386 COFF_MACHINE_X86 => "X86", COFF_MACHINE_IA64 => "IA64", + COFF_MACHINE_LOONGARCH32 => "LOONGARCH32", + COFF_MACHINE_LOONGARCH64 => "LOONGARCH64", COFF_MACHINE_M32R => "M32R", COFF_MACHINE_MIPS16 => "MIPS_16", COFF_MACHINE_MIPSFPU => "MIPS_FPU", diff --git a/src/pe/mod.rs b/src/pe/mod.rs index 2336fddc5..dd9c2c5df 100644 --- a/src/pe/mod.rs +++ b/src/pe/mod.rs @@ -15,6 +15,7 @@ pub mod certificate_table; pub mod characteristic; pub mod data_directories; pub mod debug; +pub mod dll_characteristic; pub mod exception; pub mod export; pub mod header; @@ -23,6 +24,7 @@ pub mod optional_header; pub mod options; pub mod relocation; pub mod section_table; +pub mod subsystem; pub mod symbol; pub mod utils; diff --git a/src/pe/optional_header.rs b/src/pe/optional_header.rs index 852f4dced..e3fba43f7 100644 --- a/src/pe/optional_header.rs +++ b/src/pe/optional_header.rs @@ -1,3 +1,5 @@ +//! The module for the PE optional header ([`OptionalHeader`]) and related items. + use crate::container; use crate::error; @@ -6,52 +8,125 @@ use crate::pe::data_directories; use scroll::{ctx, Endian, LE}; use scroll::{Pread, Pwrite, SizeWith}; -/// standard COFF fields +/// Standard 32-bit COFF fields (for `PE32`). +/// +/// In `winnt.h`, this is a subset of [`IMAGE_OPTIONAL_HEADER32`](https://learn.microsoft.com/en-us/windows/win32/api/winnt/ns-winnt-image_optional_header32). +/// +/// * For 64-bit version, see [`StandardFields64`]. +/// * For unified version, see [`StandardFields`]. #[repr(C)] #[derive(Debug, PartialEq, Copy, Clone, Default, Pread, Pwrite, SizeWith)] pub struct StandardFields32 { + /// See docs for [`StandardFields::magic`](crate::pe::optional_header::StandardFields::magic). pub magic: u16, + /// See docs for [`StandardFields::major_linker_version`]. pub major_linker_version: u8, + /// See docs for [`StandardFields::minor_linker_version`]. pub minor_linker_version: u8, + /// See docs for [`StandardFields::size_of_code`]. pub size_of_code: u32, + /// See docs for [`StandardFields::size_of_initialized_data`]. pub size_of_initialized_data: u32, + /// See docs for [`StandardFields::size_of_uninitialized_data`]. pub size_of_uninitialized_data: u32, + /// See docs for [`StandardFields::address_of_entry_point`]. pub address_of_entry_point: u32, + /// See docs for [`StandardFields::base_of_code`]. pub base_of_code: u32, - /// absent in 64-bit PE32+ + /// See docs for [`StandardFields::base_of_data`]. pub base_of_data: u32, } +/// Convenience constant for `core::mem::size_of::()`. pub const SIZEOF_STANDARD_FIELDS_32: usize = 28; -/// standard 64-bit COFF fields +/// Standard 64-bit COFF fields (for `PE32+`). +/// +/// In `winnt.h`, this is a subset of [`IMAGE_OPTIONAL_HEADER64`](https://learn.microsoft.com/en-us/windows/win32/api/winnt/ns-winnt-image_optional_header64). +/// +/// * For 32-bit version, see [`StandardFields32`]. +/// * For unified version, see [`StandardFields`]. #[repr(C)] #[derive(Debug, PartialEq, Copy, Clone, Default, Pread, Pwrite, SizeWith)] pub struct StandardFields64 { + /// See docs for [`StandardFields::magic`](crate::pe::optional_header::StandardFields::magic). pub magic: u16, + /// See docs for [`StandardFields::major_linker_version`]. pub major_linker_version: u8, + /// See docs for [`StandardFields::minor_linker_version`]. pub minor_linker_version: u8, + /// See docs for [`StandardFields::size_of_code`]. pub size_of_code: u32, + /// See docs for [`StandardFields::size_of_initialized_data`]. pub size_of_initialized_data: u32, + /// See docs for [`StandardFields::size_of_uninitialized_data`]. pub size_of_uninitialized_data: u32, + /// See docs for [`StandardFields::address_of_entry_point`]. pub address_of_entry_point: u32, + /// See docs for [`StandardFields::base_of_code`]. pub base_of_code: u32, } +/// Convenience constant for `core::mem::size_of::()`. pub const SIZEOF_STANDARD_FIELDS_64: usize = 24; -/// Unified 32/64-bit COFF fields +/// Unified 32/64-bit standard COFF fields (for `PE32` and `PE32+`). +/// +/// Notably, a value of this type is a member of +/// [`goblin::pe::optional_header::OptionalHeader`](crate::pe::optional_header::OptionalHeader), +/// which in turn represents either +/// * [`IMAGE_OPTIONAL_HEADER32`](https://learn.microsoft.com/en-us/windows/win32/api/winnt/ns-winnt-image_optional_header32); or +/// * [`IMAGE_OPTIONAL_HEADER64`](https://learn.microsoft.com/en-us/windows/win32/api/winnt/ns-winnt-image_optional_header64) +/// +/// from `winnt.h`, depending on the value of [`StandardFields::magic`]. +/// +/// ## Position in PE binary +/// +/// Standard COFF fields are located at the beginning of the [`OptionalHeader`] and before the +/// [`WindowsFields`]. +/// +/// ## Related structures +/// +/// * For 32-bit version, see [`StandardFields32`]. +/// * For 64-bit version, see [`StandardFields64`]. #[derive(Debug, PartialEq, Copy, Clone, Default)] pub struct StandardFields { + /// The state of the image file. This member can be one of the following values: + /// + /// * [`IMAGE_NT_OPTIONAL_HDR32_MAGIC`]. + /// * [`IMAGE_NT_OPTIONAL_HDR64_MAGIC`]. + /// * [`IMAGE_ROM_OPTIONAL_HDR_MAGIC`]. + #[doc(alias = "Magic")] pub magic: u16, + /// The major version number of the linker. + #[doc(alias = "MajorLinkerVersion")] pub major_linker_version: u8, + /// The minor version number of the linker. + #[doc(alias = "MinorLinkerVersion")] pub minor_linker_version: u8, + /// The size of the code section (.text), in bytes, or the sum of all such sections if there are multiple code sections. + #[doc(alias = "SizeOfCode")] pub size_of_code: u64, + /// The size of the initialized data section (.data), in bytes, or the sum of all such sections if there are multiple initialized data sections. + #[doc(alias = "SizeOfInitializedData")] pub size_of_initialized_data: u64, + /// The size of the uninitialized data section (.bss), in bytes, or the sum of all such sections if there are multiple uninitialized data sections. + #[doc(alias = "SizeOfUninitializedData")] pub size_of_uninitialized_data: u64, + /// A pointer to the entry point function, relative to the image base address. + /// + /// * For executable files, this is the starting address. + /// * For device drivers, this is the address of the initialization function. + /// + /// The entry point function is optional for DLLs. When no entry point is present, this member is zero. pub address_of_entry_point: u64, + /// A pointer to the beginning of the code section (.text), relative to the image base. pub base_of_code: u64, - /// absent in 64-bit PE32+ + /// A pointer to the beginning of the data section (.data), relative to the image base. Absent in 64-bit PE32+. + /// + /// In other words, it is a Relative virtual address (RVA) of the start of the data (.data) section when the PE + /// is loaded into memory. + // Q (JohnScience): Why is this a u32 and not an Option? pub base_of_data: u32, } @@ -118,71 +193,217 @@ impl From for StandardFields64 { } } -/// Standard fields magic number for 32-bit binary +/// Standard fields magic number for 32-bit binary (`PE32`). pub const MAGIC_32: u16 = 0x10b; -/// Standard fields magic number for 64-bit binary +/// Standard fields magic number for 64-bit binary (`PE32+`). pub const MAGIC_64: u16 = 0x20b; -/// Windows specific fields +/// Windows specific fields for 32-bit binary (`PE32`). They're also known as "NT additional fields". +/// +/// In `winnt.h`, this is a subset of [`IMAGE_OPTIONAL_HEADER32`](https://learn.microsoft.com/en-us/windows/win32/api/winnt/ns-winnt-image_optional_header32). +/// +/// * For 64-bit version, see [`WindowsFields64`]. +/// * For unified version, see [`WindowsFields`]. #[repr(C)] #[derive(Debug, PartialEq, Copy, Clone, Default, Pread, Pwrite, SizeWith)] pub struct WindowsFields32 { + /// See docs for [`WindowsFields::image_base`]. pub image_base: u32, + /// See docs for [`WindowsFields::section_alignment`]. pub section_alignment: u32, + /// See docs for [`WindowsFields::file_alignment`]. pub file_alignment: u32, + /// See docs for [`WindowsFields::major_operating_system_version`]. pub major_operating_system_version: u16, + /// See docs for [`WindowsFields::minor_operating_system_version`]. pub minor_operating_system_version: u16, + /// See docs for [`WindowsFields::major_image_version`]. pub major_image_version: u16, + /// See docs for [`WindowsFields::minor_image_version`]. pub minor_image_version: u16, + /// See docs for [`WindowsFields::major_subsystem_version`]. pub major_subsystem_version: u16, + /// See docs for [`WindowsFields::minor_subsystem_version`]. pub minor_subsystem_version: u16, + /// See docs for [`WindowsFields::win32_version_value`]. pub win32_version_value: u32, + /// See docs for [`WindowsFields::size_of_image`]. pub size_of_image: u32, + /// See docs for [`WindowsFields::size_of_headers`]. pub size_of_headers: u32, + /// See docs for [`WindowsFields::check_sum`]. pub check_sum: u32, + /// See docs for [`WindowsFields::subsystem`]. pub subsystem: u16, + /// See docs for [`WindowsFields::dll_characteristics`]. pub dll_characteristics: u16, + /// See docs for [`WindowsFields::size_of_stack_reserve`]. pub size_of_stack_reserve: u32, + /// See docs for [`WindowsFields::size_of_stack_commit`]. pub size_of_stack_commit: u32, + /// See docs for [`WindowsFields::size_of_heap_reserve`]. pub size_of_heap_reserve: u32, + /// See docs for [`WindowsFields::size_of_heap_commit`]. pub size_of_heap_commit: u32, + /// See docs for [`WindowsFields::loader_flags`]. pub loader_flags: u32, + /// See docs for [`WindowsFields::number_of_rva_and_sizes`]. pub number_of_rva_and_sizes: u32, } +/// Convenience constant for `core::mem::size_of::()`. pub const SIZEOF_WINDOWS_FIELDS_32: usize = 68; -/// Offset of the `check_sum` field in [`WindowsFields32`] +/// Offset of the `check_sum` field in [`WindowsFields32`]. pub const OFFSET_WINDOWS_FIELDS_32_CHECKSUM: usize = 36; -/// 64-bit Windows specific fields +/// Windows specific fields for 64-bit binary (`PE32+`). They're also known as "NT additional fields". +/// +/// In `winnt.h`, this is a subset of [`IMAGE_OPTIONAL_HEADER64`](https://learn.microsoft.com/en-us/windows/win32/api/winnt/ns-winnt-image_optional_header64). +/// +/// *Note: at the moment of writing, [`WindowsFields`] is an alias for `WindowsFields64`. Though [nominally equivalent](https://en.wikipedia.org/wiki/Nominal_type_system), +/// they're semantically distinct.* +/// +/// * For 32-bit version, see [`WindowsFields32`]. +/// * For unified version, see [`WindowsFields`]. #[repr(C)] #[derive(Debug, PartialEq, Copy, Clone, Default, Pread, Pwrite, SizeWith)] pub struct WindowsFields64 { + /// The *preferred* yet rarely provided address of the first byte of image when loaded into memory; must be a + /// multiple of 64 K. + /// + /// This address is rarely used because Windows uses memory protection mechanisms like Address Space Layout + /// Randomization (ASLR). As a result, it’s rare to see an image mapped to the preferred address. Instead, + /// the Windows PE Loader maps the file to a different address with an unused memory range. This process + /// would create issues because some addresses that would have been constant are now changed. The Loader + /// addresses this via a process called PE relocation which fixes these constant addresses to work with the + /// new image base. The relocation section (.reloc) holds data essential to this relocation process. + /// [Source](https://offwhitesecurity.dev/malware-development/portable-executable-pe/nt-headers/optional-header/). + /// + /// * The default address for DLLs is 0x10000000. + /// * The default for Windows CE EXEs is 0x00010000. + /// * The default for Windows NT, Windows 2000, Windows XP, Windows 95, Windows 98, and Windows Me is 0x00400000. + /// + /// ## Position in PE binary + /// + /// Windows fields are located inside [`OptionalHeader`] after [`StandardFields`] and before the + /// [`DataDirectories`](data_directories::DataDirectories). + /// + /// ## Related structures + /// + /// * For 32-bit version, see [`WindowsFields32`]. + /// * For unified version, see [`WindowsFields`], especially the note on nominal equivalence. + #[doc(alias = "ImageBase")] pub image_base: u64, + /// Holds a byte value used for section alignment in memory. + /// + /// This value must be greater than or equal to + /// [`file_alignment`](WindowsFields64::file_alignment), which is the next field. + /// + /// When loaded into memory, sections are aligned in memory boundaries that are multiples of this value. + /// + /// If the value is less than the architecture’s page size, then the value should match + /// [`file_alignment`](WindowsFields64::file_alignment). + /// [Source](https://offwhitesecurity.dev/malware-development/portable-executable-pe/nt-headers/optional-header/). + /// + /// The default value is the page size for the architecture. + #[doc(alias = "SectionAlignment")] pub section_alignment: u32, + /// The alignment factor (in bytes) that is used to align the raw data of sections in the image file. + /// + /// The value should be a power of 2 between 512 and 64 K, inclusive. + /// + /// If the [`section_alignment`](WindowsFields64::section_alignment) is less than the architecture's page size, + /// then [`file_alignment`](WindowsFields64::file_alignment) must match [`section_alignment`](WindowsFields64::section_alignment). + /// + /// If [`file_alignment`](WindowsFields64::file_alignment) is less than [`section_alignment`](WindowsFields64::section_alignment), + /// then remainder will be padded with zeroes in order to maintain the alignment boundaries. + /// [Source](https://offwhitesecurity.dev/malware-development/portable-executable-pe/nt-headers/optional-header/). + /// + /// The default value is 512. + #[doc(alias = "FileAlignment")] pub file_alignment: u32, + /// The major version number of the required operating system. + #[doc(alias = "MajorOperatingSystemVersion")] pub major_operating_system_version: u16, + /// The minor version number of the required operating system. + #[doc(alias = "MinorOperatingSystemVersion")] pub minor_operating_system_version: u16, + /// The major version number of the image. + #[doc(alias = "MajorImageVersion")] pub major_image_version: u16, + /// The minor version number of the image. + #[doc(alias = "MinorImageVersion")] pub minor_image_version: u16, + /// The major version number of the subsystem. + #[doc(alias = "MajorSubsystemVersion")] pub major_subsystem_version: u16, + /// The minor version number of the subsystem. + #[doc(alias = "MinorSubsystemVersion")] pub minor_subsystem_version: u16, + /// Reserved, must be zero. + #[doc(alias = "Win32VersionValue")] pub win32_version_value: u32, + /// The size (in bytes) of the image, including all headers, as the image is loaded in memory. + /// + /// It must be a multiple of the [`section_alignment`](WindowsFields64::section_alignment). + #[doc(alias = "SizeOfImage")] pub size_of_image: u32, + /// The combined size of an MS-DOS stub, PE header, and section headers rounded up to a multiple of + /// [`file_alignment`](WindowsFields64::file_alignment). + #[doc(alias = "SizeOfHeaders")] pub size_of_headers: u32, + /// The image file checksum. The algorithm for computing the checksum is incorporated into IMAGHELP.DLL. + /// + /// The following are checked for validation at load time: + /// * all drivers, + /// * any DLL loaded at boot time, and + /// * any DLL that is loaded into a critical Windows process. + #[doc(alias = "CheckSum")] pub check_sum: u32, + /// The subsystem that is required to run this image. + /// + /// The subsystem can be one of the values in the [`goblin::pe::subsystem`](crate::pe::subsystem) module. + #[doc(alias = "Subsystem")] pub subsystem: u16, + /// DLL characteristics of the image. + /// + /// DLL characteristics can be one of the values in the + /// [`goblin::pe::dll_characteristic`](crate::pe::dll_characteristic) module. + #[doc(alias = "DllCharacteristics")] pub dll_characteristics: u16, + /// The size of the stack to reserve. Only [`WindowsFields::size_of_stack_commit`] is committed; + /// the rest is made available one page at a time until the reserve size is reached. + /// + /// In the context of memory management in operating systems, "commit" refers to the act of allocating physical memory + /// to back a portion of the virtual memory space. + /// + /// When a program requests memory, the operating system typically allocates virtual memory space for it. However, + /// this virtual memory space doesn't immediately consume physical memory (RAM) resources. Instead, physical memory + /// is only allocated when the program actually uses (or accesses) that portion of the virtual memory space. + /// This allocation of physical memory to back virtual memory is called "committing" memory. + #[doc(alias = "SizeOfStackReserve")] pub size_of_stack_reserve: u64, + /// The size of the stack to commit. + #[doc(alias = "SizeOfStackCommit")] pub size_of_stack_commit: u64, + /// The size of the local heap space to reserve. Only [`WindowsFields::size_of_heap_commit`] is committed; the rest + /// is made available one page at a time until the reserve size is reached. + #[doc(alias = "SizeOfHeapReserve")] pub size_of_heap_reserve: u64, + /// The size of the local heap space to commit. + #[doc(alias = "SizeOfHeapCommit")] pub size_of_heap_commit: u64, + /// Reserved, must be zero. + #[doc(alias = "LoaderFlags")] pub loader_flags: u32, + /// The number of data-directory entries in the remainder of the optional header. Each describes a location and size. + #[doc(alias = "NumberOfRvaAndSizes")] pub number_of_rva_and_sizes: u32, } +/// Convenience constant for `core::mem::size_of::()`. pub const SIZEOF_WINDOWS_FIELDS_64: usize = 88; -/// Offset of the `check_sum` field in [`WindowsFields64`] +/// Offset of the `check_sum` field in [`WindowsFields64`]. pub const OFFSET_WINDOWS_FIELDS_64_CHECKSUM: usize = 40; // /// Generic 32/64-bit Windows specific fields @@ -297,16 +518,57 @@ impl TryFrom for WindowsFields32 { // } // } +/// Unified 32/64-bit Windows fields (for `PE32` and `PE32+`). Since 64-bit fields are a superset of 32-bit fields, +/// `WindowsFields` is an alias for `WindowsFields64`. +// +// Opinion (JohnScience): even though they're structurally equivalent, it was a questionable idea to make +// them nominally equivalent as well because they're not actually the same thing semantically. WindowsFields is meant to be +// a unified type that can represent either 32-bit or 64-bit Windows fields. +// +// How do you document this effectively and forward-compatibly? `WindowsFields64` and `WindowsFields` need +// different documentation. pub type WindowsFields = WindowsFields64; +/// Unified 32/64-bit optional header (for `PE32` and `PE32+`). +/// +/// Optional header is the most important of the [NT headers](https://offwhitesecurity.dev/malware-development/portable-executable-pe/nt-headers/). +/// Although it's called "optional", it's actually required for PE image files. +/// +/// It is meant to represent either +/// +/// * [`IMAGE_OPTIONAL_HEADER32`](https://learn.microsoft.com/en-us/windows/win32/api/winnt/ns-winnt-image_optional_header32); or +/// * [`IMAGE_OPTIONAL_HEADER64`](https://learn.microsoft.com/en-us/windows/win32/api/winnt/ns-winnt-image_optional_header64). +/// +/// Whether it's 32 or 64-bit is determined by the [`StandardFields::magic`] and by the value +/// [`CoffHeader::size_of_optional_header`](crate::pe::header::CoffHeader::size_of_optional_header). +/// +/// ## Position in PE binary +/// +/// The optional header is located after [`CoffHeader`](crate::pe::header::CoffHeader) and before +/// section table. #[derive(Debug, PartialEq, Copy, Clone)] +#[doc(alias = "IMAGE_OPTIONAL_HEADER32")] +#[doc(alias = "IMAGE_OPTIONAL_HEADER64")] pub struct OptionalHeader { + /// Unified standard (COFF) fields. See [`StandardFields`] to learn more. pub standard_fields: StandardFields, + /// Unified Windows fields. See [`WindowsFields`] to learn more. pub windows_fields: WindowsFields, + /// Data directories. See [`DataDirectories`](data_directories::DataDirectories) to learn more. pub data_directories: data_directories::DataDirectories, } +/// Magic number for 32-bit binary (`PE32`). +pub const IMAGE_NT_OPTIONAL_HDR32_MAGIC: u16 = 0x10b; +/// Magic number for 64-bit binary (`PE32+`). +pub const IMAGE_NT_OPTIONAL_HDR64_MAGIC: u16 = 0x20b; +/// Magic number for a ROM image. +/// +/// More info: . +pub const IMAGE_ROM_OPTIONAL_HDR_MAGIC: u16 = 0x107; + impl OptionalHeader { + /// Returns the container type of the PE binary. pub fn container(&self) -> error::Result { match self.standard_fields.magic { MAGIC_32 => Ok(container::Container::Little), diff --git a/src/pe/subsystem.rs b/src/pe/subsystem.rs new file mode 100644 index 000000000..a611591ae --- /dev/null +++ b/src/pe/subsystem.rs @@ -0,0 +1,45 @@ +//! Constants for subsystems required to run image files. These constants are used in the +//! [`goblin::pe::optional_header::WindowsFields::subsystem`](crate::pe::optional_header::WindowsFields::subsystem) +//! field. + +/// An unknown subsystem. +pub const IMAGE_SUBSYSTEM_UNKNOWN: u16 = 0; + +/// Device drivers and native Windows processes. +pub const IMAGE_SUBSYSTEM_NATIVE: u16 = 1; + +/// The Windows graphical user interface (GUI) subsystem. +pub const IMAGE_SUBSYSTEM_WINDOWS_GUI: u16 = 2; + +/// The Windows character subsystem. +pub const IMAGE_SUBSYSTEM_WINDOWS_CUI: u16 = 3; + +/// The OS/2 character subsystem. +pub const IMAGE_SUBSYSTEM_OS2_CUI: u16 = 5; + +/// The Posix character subsystem. +pub const IMAGE_SUBSYSTEM_POSIX_CUI: u16 = 7; + +/// Native Win9x driver. +pub const IMAGE_SUBSYSTEM_NATIVE_WINDOWS: u16 = 8; + +/// Windows CE. +pub const IMAGE_SUBSYSTEM_WINDOWS_CE_GUI: u16 = 9; + +/// An Extensible Firmware Interface (EFI) application. +pub const IMAGE_SUBSYSTEM_EFI_APPLICATION: u16 = 10; + +/// An EFI driver with boot services. +pub const IMAGE_SUBSYSTEM_EFI_BOOT_SERVICE_DRIVER: u16 = 11; + +/// An EFI driver with run-time services. +pub const IMAGE_SUBSYSTEM_EFI_RUNTIME_DRIVER: u16 = 12; + +/// An EFI ROM image. +pub const IMAGE_SUBSYSTEM_EFI_ROM: u16 = 13; + +/// XBOX. +pub const IMAGE_SUBSYSTEM_XBOX: u16 = 14; + +/// Windows boot application. +pub const IMAGE_SUBSYSTEM_WINDOWS_BOOT_APPLICATION: u16 = 16;