diff --git a/src/pe/header.rs b/src/pe/header.rs index 06e23c0b0..b7f52b3e8 100644 --- a/src/pe/header.rs +++ b/src/pe/header.rs @@ -5,50 +5,288 @@ use alloc::vec::Vec; use log::debug; use scroll::{ctx, IOread, IOwrite, Pread, Pwrite, SizeWith}; -/// DOS header present in all PE binaries +/// In `winnt.h` and `pe.h`, it's `IMAGE_DOS_HEADER`. It's a DOS header present in all PE binaries. +/// +/// The DOS header is a relic from the MS-DOS era. It used to be useful to display an +/// error message if the binary is run in MS-DOS. +/// +/// Nowadays, only two fields from +/// the DOS header are used on Windows: [`signature` (aka `e_magic`)](DosHeader::signature) +/// and [`pe_pointer` (aka `e_lfanew`)](DosHeader::pe_pointer). +/// +/// ## Note on the archaic "formatted header" +/// +/// The subset of the structure spanning from its start to the [`overlay_number` (aka `e_ovno`)](DosHeader::overlay_number) field +/// included (i.e. till the offset 0x1C) used to be commonly known as "formatted header", since their position and contents were +/// fixed. Optional information used by overlay managers could have followed the formatted header. In the absence of optional +/// information, the formatted header was followed by the ["relocation pointer table"](https://www.tavi.co.uk/phobos/exeformat.html#reloctable). +/// +/// Overlays were sections of a program that remained on disk until the program actually required them. Different overlays +/// could thus share the same memory area. The overlays were loaded and unloaded by special code provided by the program +/// or its run-time library. +/// +/// [Source](https://www.tavi.co.uk/phobos/exeformat.html#:~:text=Format%20of%20the%20.EXE%20file%20header). #[repr(C)] #[derive(Debug, PartialEq, Copy, Clone, Default, Pwrite)] +#[doc(alias("IMAGE_DOS_HEADER"))] pub struct DosHeader { - /// Magic number: 5a4d + /// Magic number: `[0x5A, 0x4D]`. In [little endian](https://en.wikipedia.org/wiki/Endianness) + /// [ASCII](https://en.wikipedia.org/wiki/ASCII), it reads "MZ" for [Mark Zbikowski](https://en.wikipedia.org/wiki/Mark_Zbikowski)). + /// + /// ## Non-MZ DOS executables + /// + /// * For [IBM OS/2](https://www.britannica.com/technology/IBM-OS-2), the value was "NE". + /// * For IBM OS/2 LE, the value was "LE". + /// * For [NT](https://en.wikipedia.org/wiki/Windows_NT), the value was "PE00". + /// + /// Sources: + /// + /// * + /// * + #[doc(alias("e_magic"))] pub signature: u16, - /// e_cblp + /// In `winnt.h` and `pe.h`, it's `e_cblp`. + /// + /// It used to specify the number of bytes actually used in the last "page". + /// Page used to refer to a segment of memory, usually of 512 bytes size. + /// + /// The case of full page was represented by 0x0000 (since the last page is never empty). + /// + /// For example, assuming a page size of 512 bytes, this value would + /// be 0x0000 for a 1024 byte file, and 0x0001 for a 1025 byte file + /// (since it only contains one valid byte). + /// [Source](https://stixproject.github.io/data-model/1.2/WinExecutableFileObj/DOSHeaderType/). + /// + /// Typically, this field is set to 0. [Source](https://offwhitesecurity.dev/malware-development/portable-executable-pe/dos-header/). + #[doc(alias("e_cblp"))] pub bytes_on_last_page: u16, - /// e_cp + /// In `winnt.h` and `pe.h`, it's `e_cp`. + /// + /// It used to specify the number of pages required to hold a file. For example, + /// if the file contained 1024 bytes, and the file had pages of a size of 512 bytes, + /// this [word](https://en.wikipedia.org/wiki/Word_(computer_architecture)) would contain + /// 0x0002 (2 pages); if the file contained 1025 bytes, this word would contain 0x0003 (3 pages). + /// [Source](https://stixproject.github.io/data-model/1.2/WinExecutableFileObj/DOSHeaderType/). + /// + /// Typically, this field is set to 0. [Source](https://offwhitesecurity.dev/malware-development/portable-executable-pe/dos-header/). + #[doc(alias("e_cp"))] pub pages_in_file: u16, - /// e_crlc + /// In `winnt.h` and `pe.h`, it's `e_crlc`. + /// + /// It used to specify the number of "relocation items", i.e. the number of entries that + /// existed in the ["relocation pointer table"](https://www.tavi.co.uk/phobos/exeformat.html#reloctable). + /// If there were no relocations, this field would contain 0x0000. + /// [Source](https://stixproject.github.io/data-model/1.2/WinExecutableFileObj/DOSHeaderType/). + /// + /// ## On relocation items and relocation pointer table + /// + /// When a program is compiled, memory addresses are often hard-coded into the binary code. + /// These addresses are usually relative to the base address where the program expects to be loaded into memory. + /// However, when the program is loaded into memory, it might not be loaded at its preferred base address due to + /// various reasons such as memory fragmentation or other programs already occupying that space. + /// + /// Relocation items, also known as fixups or relocations, are pieces of data embedded within the executable file + /// that indicate which memory addresses need to be adjusted when the program is loaded at a different base address. + /// These relocations specify the location and type of adjustment needed. + /// + /// The relocation pointer table is a data structure that contains pointers to the locations within the executable file + /// where relocations need to be applied. It allows the operating system's loader to efficiently locate and process the + /// relocation data during the loading process. + /// + /// --- + /// + /// Typically, this field is set to 0. [Source](https://offwhitesecurity.dev/malware-development/portable-executable-pe/dos-header/). + #[doc(alias("e_crlc"))] pub relocations: u16, - /// e_cparhdr + /// In `winnt.h` and `pe.h`, it's `e_cparhdr`. + /// + /// It used to specify the size of the "executable header" in terms of "paragraphs" (16 byte chunks). It used to indicate + /// the offset of the program's compiled/assembled and linked image (the [load module](https://www.tavi.co.uk/phobos/exeformat.html#loadmodule)) within the executable file. The size + /// of the load module could have been deduced by substructing this value (converted to bytes) from the overall size that could + /// have been derived from combining the value of [`pages_in_file` (aka `e_cp`)](DosHeader::pages_in_file) and the value of + /// [`bytes_on_last_page` (aka `e_cblp)`](DosHeader::bytes_on_last_page). The header used to always span an even number of + /// paragraphs. + /// [Source](https://stixproject.github.io/data-model/1.2/WinExecutableFileObj/DOSHeaderType/). + /// + /// The "executable header" in this context refers to the DOS header itself. + /// + /// Typically, this field is set to 4. [Source](https://offwhitesecurity.dev/malware-development/portable-executable-pe/dos-header/). + /// This is because the modern DOS header is 64 bytes long, and 64 / 16 = 4. + #[doc(alias("e_cparhdr"))] pub size_of_header_in_paragraphs: u16, - /// e_minalloc + /// In `winnt.h` and `pe.h`, it's `e_minalloc`. + /// + /// It used to specify the minimum number of extra paragraphs needed to be allocated to begin execution. This is + /// **in addition** to the memory required to hold the [load module](https://www.tavi.co.uk/phobos/exeformat.html#loadmodule). This value normally represented the total size + /// of any uninitialized data and/or stack segments that were linked at the end of the program. This space was not + /// directly included in the load module, since there were no particular initializing values and it would simply waste + /// disk space. + /// [Source](https://stixproject.github.io/data-model/1.2/WinExecutableFileObj/DOSHeaderType/). + /// + /// If both the [`minimum_extra_paragraphs_needed` (aka `e_minalloc`)](DosHeader::minimum_extra_paragraphs_needed) and + /// [`maximum_extra_paragraphs_needed` (aka `e_maxalloc`)](DosHeader::maximum_extra_paragraphs_needed) fields were set to 0x0000, + /// the program would be allocated as much memory as available. [Source](https://www.tavi.co.uk/phobos/exeformat.html) + /// + /// Typically, this field is set to 0x10. [Source](https://offwhitesecurity.dev/malware-development/portable-executable-pe/dos-header/). + #[doc(alias("e_minalloc"))] pub minimum_extra_paragraphs_needed: u16, - /// e_maxalloc + /// In `winnt.h` and `pe.h`, it's `e_maxalloc`. + /// + /// It used to specify the maximum number of extra paragraphs needed to be allocated by to begin execution. This indicated + /// **additional** memory over and above that required by the [load module](https://www.tavi.co.uk/phobos/exeformat.html#loadmodule) and the value specified in + /// [`minimum_extra_paragraphs_needed` (aka `e_minalloc`)](DosHeader::minimum_extra_paragraphs_needed). + /// If the request could not be satisfied, the program would be allocated as much memory as available. + /// [Source](https://stixproject.github.io/data-model/1.2/WinExecutableFileObj/DOSHeaderType/). + /// + /// If both the [`minimum_extra_paragraphs_needed` (aka `e_minalloc`)](DosHeader::minimum_extra_paragraphs_needed) and + /// [`maximum_extra_paragraphs_needed` (aka `e_maxalloc`)](DosHeader::maximum_extra_paragraphs_needed) fields were set to 0x0000, + /// the program would be allocated as much memory as available. [Source](https://www.tavi.co.uk/phobos/exeformat.html) + /// + /// Typically, this field is set to 0xFFFF. [Source](https://offwhitesecurity.dev/malware-development/portable-executable-pe/dos-header/). + #[doc(alias("e_maxalloc"))] pub maximum_extra_paragraphs_needed: u16, - /// e_ss + /// In `winnt.h` and `pe.h`, it's `e_ss`. + /// + /// It used to specify the initial SS ("stack segment") value. SS value was a paragraph address of the stack segment + /// relative to the start of the [load module](https://www.tavi.co.uk/phobos/exeformat.html#loadmodule). At load time, the value was relocated by adding the address of the + /// start segment of the program to it, and the resulting value was placed in the SS register before the program is + /// started. To read more about x86 memory segmentation and SS register, see the + /// [wikipedia article](https://en.wikipedia.org/wiki/X86_memory_segmentation) on this topic. In DOS, the start segment + /// boundary of the program was the first segment boundary in memory after + /// [Program Segment Prefix (PSP)](https://en.wikipedia.org/wiki/Program_Segment_Prefix). + /// [Source](https://stixproject.github.io/data-model/1.2/WinExecutableFileObj/DOSHeaderType/). + /// + /// The Program Segment Prefix (PSP) was a data structure used in DOS (Disk Operating System) environments. + /// It was located at the beginning of the memory allocated for a running program and it contained various + /// pieces of information about the program, including command-line arguments, environment variables, + /// and pointers to various system resources. + /// + /// [According to Wikipedia](https://en.wikipedia.org/wiki/Data_segment#Stack), the stack segment contains the call stack, + /// a LIFO structure, typically located in the higher parts of memory. A "stack pointer" register tracks the top of the + /// stack; it is adjusted each time a value is "pushed" onto the stack. The set of values pushed for one function call + /// is termed a "stack frame". + /// + /// Typically, this field is set to 0. [Source](https://offwhitesecurity.dev/malware-development/portable-executable-pe/dos-header/). + #[doc(alias("e_ss"))] pub initial_relative_ss: u16, - /// e_sp + /// In `winnt.h` and `pe.h`, it's `e_sp`. + /// + /// It used to specify the initial SP ("stack pointer") value. SP value was the absolute value that must have been loaded + /// into the SP register before the program is given control. Since the actual stack segment was determined by the loader, + /// and this was merely a value within that segment, it didn't need to be relocated. + /// + /// [According to Wikipedia](https://en.wikipedia.org/wiki/Data_segment#Stack), the stack segment contains the call stack, + /// a LIFO structure, typically located in the higher parts of memory. A "stack pointer" register tracks the top of the + /// stack; it is adjusted each time a value is "pushed" onto the stack. The set of values pushed for one function call + /// is termed a "stack frame". + /// [Source](https://stixproject.github.io/data-model/1.2/WinExecutableFileObj/DOSHeaderType/). + /// + /// Typically, this field is set to 0xB8. [Source](https://offwhitesecurity.dev/malware-development/portable-executable-pe/dos-header/). + // TODO: Clarify what exactly is meany by "this was merely a value within that segment". + #[doc(alias("e_sp"))] pub initial_sp: u16, - /// e_csum + /// In `winnt.h` and `pe.h`, it's `e_csum`. + /// + /// It used to specify the checksum of the contents of the executable file It used to ensure the integrity of the data + /// within the file. For full details on how this checksum was calculated, see . + /// [Source](https://stixproject.github.io/data-model/1.2/WinExecutableFileObj/DOSHeaderType/). + /// + /// Typically, this field is set to 0. [Source](https://offwhitesecurity.dev/malware-development/portable-executable-pe/dos-header/). + #[doc(alias("e_csum"))] pub checksum: u16, - /// e_ip + /// In `winnt.h` and `pe.h`, it's `e_ip`. + /// + /// It used to specify the initial IP ("instruction pointer") value. IP value was the absolute value that must have been + /// loaded into the IP register in order to transfer control to the program. Since the actual code segment was determined + /// by the loader and, and this was merely a value within that segment, it didn't need to be relocated. + /// [Source](https://stixproject.github.io/data-model/1.2/WinExecutableFileObj/DOSHeaderType/). + /// + /// Typically, this field is set to 0. [Source](https://offwhitesecurity.dev/malware-development/portable-executable-pe/dos-header/). + // TODO: Clarify what exactly is meany by "this was merely a value within that segment". + #[doc(alias("e_ip"))] pub initial_ip: u16, - /// e_cs + /// In `winnt.h` and `pe.h`, it's `e_cs`. + /// + /// It used to specify the pre-relocated initial CS ("code segment") value relative to the start of the [load module](https://www.tavi.co.uk/phobos/exeformat.html#loadmodule), + /// that should have been placed in the CS register in order to transfer control to the program. At load time, this value + /// was relocated by adding the address of the start segment of the program to it, and the resulting value was placed in + /// the CS register when control is transferred. + /// [Source](https://stixproject.github.io/data-model/1.2/WinExecutableFileObj/DOSHeaderType/). + /// + /// Typically, this field is set to 0. [Source](https://offwhitesecurity.dev/malware-development/portable-executable-pe/dos-header/). + #[doc(alias("e_cs"))] pub initial_relative_cs: u16, - /// e_lfarlc + /// In `winnt.h` and `pe.h`, it's `e_lfarlc`. + /// + /// It used to specify the logical file address of the relocation table, or more specifically, the offset from the start + /// of the file to the [relocation pointer table](https://www.tavi.co.uk/phobos/exeformat.html#reloctable). This value + /// must have been used to locate the relocation table (rather than assuming a fixed location) because variable-length + /// information pertaining to program overlays could have occurred before this table, causing its position to vary. + /// A value of 0x40 in this field generally indicated a different kind of executable, not a DOS 'MZ' type. + /// [Source](https://stixproject.github.io/data-model/1.2/WinExecutableFileObj/DOSHeaderType/). + /// + /// Typically, this field is set to 0x40. [Source](https://offwhitesecurity.dev/malware-development/portable-executable-pe/dos-header/). + #[doc(alias("e_lfarlc"))] pub file_address_of_relocation_table: u16, - /// e_ovno + /// In `winnt.h` and `pe.h`, it's `e_ovno`. + /// + /// It used to specify the overlay number, which was normally set to 0x0000, because few programs actually had overlays. + /// It changed only in files containing programs that used overlays. + /// [Source](https://stixproject.github.io/data-model/1.2/WinExecutableFileObj/DOSHeaderType/). + /// + /// Overlays were sections of a program that remained on disk until the program actually required them. Different overlays + /// could thus share the same memory area. The overlays were loaded and unloaded by special code provided by the program + /// or its run-time library. + /// + /// Typically, this field is set to 0. [Source](https://offwhitesecurity.dev/malware-development/portable-executable-pe/dos-header/). + #[doc(alias("e_ovno"))] pub overlay_number: u16, - /// e_res[4] + /// In `winnt.h` and `pe.h`, it's `e_res[4]`. + /// + /// It used to specify the reserved words for the program, i.e. an array reserved for future use. + /// Usually, the array was zeroed by the linker. + /// [Source](https://stixproject.github.io/data-model/1.2/WinExecutableFileObj/DOSHeaderType/). + /// + /// Typically, this field is set to 0. [Source](https://offwhitesecurity.dev/malware-development/portable-executable-pe/dos-header/). + #[doc(alias("e_res"))] pub reserved: [u16; 4], - /// e_oemid + /// In `winnt.h` and `pe.h`, it's `e_oemid`. + /// + /// It used to specify the identifier for the OEM ("Original Equipment Manufacturer") for [`oem_info` aka `e_oeminfo`](DosHeader::oem_info). + /// [Source](https://stixproject.github.io/data-model/1.2/WinExecutableFileObj/DOSHeaderType/). + /// + /// More specifically, it used to specify the OEM of the system or hardware platform for which the executable file was created. + /// This field was used to specify certain characteristics or requirements related to the hardware environment in which the + /// executable was intended to run. + /// + /// Typically, this field is set to 0. [Source](https://offwhitesecurity.dev/malware-development/portable-executable-pe/dos-header/). + #[doc(alias("e_oemid"))] pub oem_id: u16, - /// e_oeminfo + /// In `winnt.h` and `pe.h`, it's `e_oeminfo`. + /// + /// It used to specify the extra information, the kind of which was specific to the OEM identified by [`oem_id` aka `e_oemid`](DosHeader::oem_id). + #[doc(alias("e_oeminfo"))] pub oem_info: u16, - /// e_res2[10] + /// In `winnt.h` and `pe.h`, it's `e_res2[10]`. + /// + /// It used to specify the reserved words for the program, i.e. an array reserved for future use. + /// Usually, the array was zeroed by the linker. + /// [Source](https://stixproject.github.io/data-model/1.2/WinExecutableFileObj/DOSHeaderType/). + /// + /// Typically, this field is set to 0. [Source](https://offwhitesecurity.dev/malware-development/portable-executable-pe/dos-header/). + #[doc(alias("e_res2"))] pub reserved2: [u16; 10], - /// e_lfanew: pointer to PE header, always at offset 0x3c + /// In `winnt.h` and `pe.h`, it's `e_lfanew`. + /// + /// Today, it specifies the logcal file address of the of the new exe header. In particular, it is a 4-byte offset into + /// the file where the PE file header is located. It is necessary to use this offset to locate the PE header in the file. + /// + /// Typically, this field is set to 0x3c ([`PE_POINTER_OFFSET`]). + #[doc(alias("e_lfanew"))] pub pe_pointer: u32, } +#[doc(alias("IMAGE_DOS_SIGNATURE"))] pub const DOS_MAGIC: u16 = 0x5a4d; pub const PE_POINTER_OFFSET: u32 = 0x3c; pub const DOS_STUB_OFFSET: u32 = PE_POINTER_OFFSET + (core::mem::size_of::() as u32);