From 0256b2a98ba6a034715646173c28c049c98d2963 Mon Sep 17 00:00:00 2001 From: dd86k Date: Mon, 22 Jan 2024 10:01:25 -0500 Subject: [PATCH] server: Kind of introduce archives --- app/dump/ar.d | 86 +++++++++++++++++++++++++++++++++++++ app/dump/package.d | 11 ++++- app/dumper.d | 57 +++++++++++++++++++++++- app/utils.d | 86 +++++++++++++++++++++++++++++++++++++ src/adbg/object/format/ar.d | 84 +++++++++++++++++++++++++++++++++--- src/adbg/object/format/mz.d | 1 + src/adbg/object/formats.d | 3 +- src/adbg/object/server.d | 54 +++++++++++++---------- 8 files changed, 351 insertions(+), 31 deletions(-) create mode 100644 app/dump/ar.d diff --git a/app/dump/ar.d b/app/dump/ar.d new file mode 100644 index 00000000..df82c8cb --- /dev/null +++ b/app/dump/ar.d @@ -0,0 +1,86 @@ +/// Library archive dumper +/// +/// Authors: dd86k +/// Copyright: © dd86k +/// License: BSD-3-Clause +module dump.ar; + +import adbg.disassembler.core; +import adbg.object.server; +import adbg.object.machines; +import adbg.object.format.ar; +import adbg.utils.bit : adbg_bswap32; +import core.stdc.ctype : isdigit; +import dumper; +import utils : realstring; + +extern (C): + +int dump_archive(ref Dumper dump, adbg_object_t *o) { + if (dump.selected_headers()) + dump_archive_headers(dump, o); + + return 0; +} + +private: + +void dump_archive_headers(ref Dumper dump, adbg_object_t *o) { + print_header("Header"); + + ar_member_header *rhdr = void; // Root headers + for (size_t i; (rhdr = adbg_object_ar_header(o, i)) != null; ++i) { + print_section(cast(uint)i); + print_stringl("Name", rhdr.Name.ptr, rhdr.Name.sizeof); + print_stringl("Date", rhdr.Date.ptr, rhdr.Date.sizeof); + print_stringl("UserID", rhdr.UserID.ptr, rhdr.UserID.sizeof); + print_stringl("GroupID", rhdr.GroupID.ptr, rhdr.GroupID.sizeof); + print_stringl("Mode", rhdr.Mode.ptr, rhdr.Mode.sizeof); + print_stringl("Size", rhdr.Size.ptr, rhdr.Size.sizeof); + + char[10] b = void; + int l = realstring(b.ptr, 10, rhdr.End.ptr, 2, '"', '"'); + print_x16l("End", rhdr.EndMarker, b.ptr, l); + + /+void *data = adbg_object_ar_data(o, rhdr); + if (data == null) { + print_string("warning", "Could not get data pointer"); + continue; + } + + int size = adbg_object_ar_header_size(o, rhdr); + if (size <= 0) { + print_string("warning", "Could not get size of data"); + continue; + } + + import core.stdc.stdio : printf; + + int symcnt = *cast(int*)data; + int *symoffs = cast(int*)data + 1; + for (int isym; isym < symcnt; ++isym) { + int off = adbg_bswap32(symoffs[isym]); + + ar_member_header *table = void; + if (adbg_object_offset(o, cast(void**)&table, off)) { + print_string("warning", "aaaaaaaaaaa cringe"); + printf("there was %d headers\n", isym); + return; + } + + print_stringl("Name", table.Name.ptr, table.Name.sizeof); + print_stringl("Date", table.Date.ptr, table.Date.sizeof); + print_stringl("UserID", table.UserID.ptr, table.UserID.sizeof); + print_stringl("GroupID", table.GroupID.ptr, table.GroupID.sizeof); + print_stringl("Mode", table.Mode.ptr, table.Mode.sizeof); + print_stringl("Size", table.Size.ptr, table.Size.sizeof); + l = realstring(b.ptr, 10, table.End.ptr, 2, '"', '"'); + print_x16l("End", table.EndMarker, b.ptr, l); + + if (table.Name[0] != '/' || isdigit(table.Name[1]) == 0) + continue; + + + }+/ + } +} \ No newline at end of file diff --git a/app/dump/package.d b/app/dump/package.d index 49d9d89e..6ed03a92 100644 --- a/app/dump/package.d +++ b/app/dump/package.d @@ -7,4 +7,13 @@ */ module dump; -public import dump.mz, dump.ne, dump.lx, dump.pe, dump.elf, dump.macho, dump.pdb70, dump.pdb20; \ No newline at end of file +public import + dump.mz, + dump.ne, + dump.lx, + dump.pe, + dump.elf, + dump.macho, + dump.pdb70, + dump.pdb20, + dump.ar; \ No newline at end of file diff --git a/app/dumper.d b/app/dumper.d index 08976273..3ccb4bd3 100644 --- a/app/dumper.d +++ b/app/dumper.d @@ -148,7 +148,8 @@ int app_dump() { case macho: return dump_macho(dump, o); case pdb20: return dump_pdb20(dump, o); case pdb70: return dump_pdb70(dump, o); - default: return EXIT_FAILURE; + case archive: return dump_archive(dump, o); + default: assert(0, "Invalid object type"); // Raw/unknown } } @@ -156,6 +157,8 @@ private immutable { /// Padding spacing to use in characters // PE32 has fields like MinorOperatingSystemVersion (27 chars) int __field_padding = -28; + /// + int __columns = 16; } void print_header(const(char)* name) { @@ -316,6 +319,58 @@ L_START: goto L_START; } +void print_raw(const(char)* name, void *data, size_t dsize, adbg_object_t *o) { + // Is size fitting within file? + if (adbg_object_outboundpl(o, data, dsize)) { + print_string("warning", "Data goes beyond file bounds."); + return; + } + + import core.stdc.ctype : isprint; + + print_header(name); + + size_t offset = data - o.buffer; + + // Print header + static immutable string _soff = "Offset "; + printf(_soff.ptr); + //TODO: Print extra spaces after offset string + for (int ib; ib < __columns; ++ib) + printf("%02x ", ib); + putchar('\n'); + + // Print data + ubyte *d = cast(ubyte*)data; + size_t afo; // Absolute file offset + for (size_t id; id < dsize; id += __columns, offset += __columns) { + printf("%8zx ", offset); + + // Adjust column for row + size_t col = __columns;//id + __columns >= dsize ? dsize - __columns : __columns; + size_t off = afo; + + // Print data bytes + for (size_t ib; ib < col; ++ib, ++off) + printf("%02x ", d[off]); + + // Adjust spacing between the two + if (col < __columns) { + + } else + putchar(' '); + + // Print printable characters + off = afo; + for (size_t ib; ib < col; ++ib, ++off) + putchar(isprint(d[off]) ? d[off] : '.'); + + // New row + afo += col; + putchar('\n'); + } +} + void print_directory_entry(const(char)* name, uint rva, uint size) { printf("%*s: 0x%08x %u\n", __field_padding, name, rva, size); } diff --git a/app/utils.d b/app/utils.d index 62efbaae..ee91a2bc 100644 --- a/app/utils.d +++ b/app/utils.d @@ -6,6 +6,92 @@ module utils; import core.stdc.stdio : sscanf; +import core.stdc.ctype : isprint; + +char hexc0(ubyte upper) { + ubyte h = upper >> 4; + return cast(char)(h >= 0xa ? h + ('a' - 0xa) : h + '0'); +} +unittest { + assert(hexc0(0x00) == '0'); + assert(hexc0(0x10) == '1'); + assert(hexc0(0x20) == '2'); + assert(hexc0(0x30) == '3'); + assert(hexc0(0x40) == '4'); + assert(hexc0(0x50) == '5'); + assert(hexc0(0x60) == '6'); + assert(hexc0(0x70) == '7'); + assert(hexc0(0x80) == '8'); + assert(hexc0(0x90) == '9'); + assert(hexc0(0xa0) == 'a'); + assert(hexc0(0xb0) == 'b'); + assert(hexc0(0xc0) == 'c'); + assert(hexc0(0xd0) == 'd'); + assert(hexc0(0xe0) == 'e'); + assert(hexc0(0xf0) == 'f'); +} +char hexc1(ubyte lower) { + ubyte l = lower & 15; + return cast(char)(l >= 0xa ? l + ('a' - 0xa) : l + '0'); +} +unittest { + assert(hexc1(0) == '0'); + assert(hexc1(1) == '1'); + assert(hexc1(2) == '2'); + assert(hexc1(3) == '3'); + assert(hexc1(4) == '4'); + assert(hexc1(5) == '5'); + assert(hexc1(6) == '6'); + assert(hexc1(7) == '7'); + assert(hexc1(8) == '8'); + assert(hexc1(9) == '9'); + assert(hexc1(0xa) == 'a'); + assert(hexc1(0xb) == 'b'); + assert(hexc1(0xc) == 'c'); + assert(hexc1(0xd) == 'd'); + assert(hexc1(0xe) == 'e'); + assert(hexc1(0xf) == 'f'); +} + +int realstring(char* buffer, size_t bsize, const(char)* str, size_t ssize, + char pre = 0, char post = 0) { + int len; // total length + + if (bsize == 0) + return 0; + + if (pre && bsize) + buffer[len++] = pre; + + for (size_t i; i < ssize && len < bsize; ++i) { + char c = str[i]; + if (isprint(c)) { + if (len >= bsize) break; + buffer[len++] = c; + } else { + if (len + 4 >= bsize) break; + buffer[len++] = '\\'; + buffer[len++] = 'x'; + buffer[len++] = hexc0(c); + buffer[len++] = hexc1(c); + } + } + + if (post && len < bsize) + buffer[len++] = post; + + return len; +} +unittest { + char[2] bi = "`\n"; + char[10] bo = void; // '`' '\\x0a' + assert(realstring(bo.ptr, 10, bi.ptr, 2) == 5); + assert(bo[0] == '`'); + assert(bo[1] == '\\'); + assert(bo[2] == 'x'); + assert(bo[3] == '0'); + assert(bo[4] == 'a'); +} /// Unformat text number. /// Params: diff --git a/src/adbg/object/format/ar.d b/src/adbg/object/format/ar.d index befd2dfc..a49cbb60 100644 --- a/src/adbg/object/format/ar.d +++ b/src/adbg/object/format/ar.d @@ -5,7 +5,11 @@ /// License: BSD-3-Clause module adbg.object.format.ar; +import adbg.error; +import adbg.object.server; import adbg.utils.bit; +import core.stdc.stdlib : atoi; +import core.stdc.string : memcpy; // Sources: // - gdb/include/aout/ar.h @@ -20,10 +24,14 @@ import adbg.utils.bit; // Header + Longnames Member + Data // Header + obj n + Data +// NOTE: MSVC linker can only process libraries under 4 GiB in size. + /// COFF archive magic. enum AR_MAGIC = CHAR64!"!\n"; /// Thin COFF archive magic. enum AR_THIN_MAGIC = CHAR64!"!\n"; +/// +private enum AR_EOL = CHAR16!"`\n"; /// struct ar_file_header { @@ -68,8 +76,11 @@ struct ar_member_header { /// ASCII decimal representation of the total size of the /// archive member, not including the size of the header. char[10] Size; - /// The two bytes in the C string: "`\n" (0x60 0x0a). - char[2] End; + union { + /// The two bytes in the C string: "`\n" (0x60 0x0a). + char[2] End; + ushort EndMarker; + } } /// When first name is "/" @@ -90,16 +101,77 @@ struct mscoff_second_linker_header { // String Table after Indices } +int adbg_object_ar_load(adbg_object_t *o) { + o.format = AdbgObject.archive; + return 0; +} + +ar_member_header* adbg_object_ar_header(adbg_object_t *o, size_t index) { + if (o == null) + return null; + + version (Trace) trace("index=%zu", index); + + ar_member_header *p = cast(ar_member_header*)(o.buffer + ar_file_header.sizeof); + void *max = o.buffer + 0x8000_0000; // 2 GiB limit + for (size_t i; p < max; ++i) { + if (i == index) + return p.EndMarker == AR_EOL ? p : null; + + // Adjust pointer + size_t offset = atoi(p.Size.ptr) + ar_member_header.sizeof; + version (Trace) trace("offset=%zu", offset); + p = cast(ar_member_header*)(cast(void*)p + offset); + + // Outside bounds + if (adbg_object_outboundpl(o, p, ar_member_header.sizeof)) + return null; + } + + return null; +} + +int adbg_object_ar_header_size(adbg_object_t *o, ar_member_header *mhdr) { + if (o == null || mhdr == null) + return -1; + + char[12] str = void; + memcpy(str.ptr, mhdr.Size.ptr, mhdr.Size.sizeof); + str[10] = 0; + return atoi(str.ptr); +} + +void* adbg_object_ar_data(adbg_object_t *o, ar_member_header *mhdr) { + if (o == null || mhdr == null) + return null; + + void *p = cast(void*)mhdr + ar_member_header.sizeof; + int size = adbg_object_ar_header_size(o, mhdr); + if (size < 0) + return null; + if (adbg_object_outboundpl(o, p, size)) + return null; + return p; +} + // // MSCOFF import // /// struct mscoff_import_header { - /// Must be IMAGE_FILE_MACHINE_UNKNOWN. - ushort Sig1; - /// Must be 0xFFFF. - ushort Sig2; + union { + /// Must be a combination of IMAGE_FILE_MACHINE_UNKNOWN and 0xFFFF. + /// + /// Effectively only 0xFFFF, since the enum value is 0. + uint Signature; + struct { + /// Must be IMAGE_FILE_MACHINE_UNKNOWN. + ushort Signature1; + /// Must be 0xFFFF. + ushort Signature2; + } + } ushort Version; ushort Machine; uint TimeStamp; diff --git a/src/adbg/object/format/mz.d b/src/adbg/object/format/mz.d index 123a9552..cdc5b123 100644 --- a/src/adbg/object/format/mz.d +++ b/src/adbg/object/format/mz.d @@ -69,6 +69,7 @@ struct mz_hdr_ext { ushort[ERESWDS] e_res; /// Reserved words uint e_lfanew; /// } +static assert(mz_hdr_ext.e_lfanew.offsetof == LFANEW_OFFSET); /// MZ relocation entry struct mz_reloc { diff --git a/src/adbg/object/formats.d b/src/adbg/object/formats.d index 4fab52c4..6d17d308 100644 --- a/src/adbg/object/formats.d +++ b/src/adbg/object/formats.d @@ -12,4 +12,5 @@ public import adbg.object.format.ne, adbg.object.format.lx, adbg.object.format.pe, - adbg.object.format.pdb; \ No newline at end of file + adbg.object.format.pdb, + adbg.object.format.ar; \ No newline at end of file diff --git a/src/adbg/object/server.d b/src/adbg/object/server.d index f2fbed5d..4a76e3d1 100644 --- a/src/adbg/object/server.d +++ b/src/adbg/object/server.d @@ -63,10 +63,12 @@ enum AdbgObject { elf, /// Mach Object format. macho, - // Microsoft Program Database format 2.0. + /// Microsoft Program Database format 2.0. pdb20, - // Microsoft Program Database format 7.0. + /// Microsoft Program Database format 7.0. pdb70, + /// Library archive. + archive, // Microsoft Debug format. //dbg, // @@ -170,7 +172,10 @@ struct adbg_object_t { void *header; struct mz_t { - mz_hdr *header; + union { + mz_hdr *header; + mz_hdr_ext *header_ext; + } mz_reloc *relocs; void *newbase; bool *reversed_relocs; @@ -549,10 +554,14 @@ L_ARG: return adbg_object_pdb70_load(o); } - //TODO: 64-bit detection - // starting with MSCOFF + // 64-bit signature detection + switch (*o.buffer64) { + case AR_MAGIC: + return adbg_object_ar_load(o); + default: + } - // 32-bit detection + // 32-bit signature detection switch (*o.buffer32) { case ELF_MAGIC: // ELF return adbg_object_elf_load(o); @@ -566,32 +575,31 @@ L_ARG: default: } - // 16-bit detection + // 16-bit signature detection switch (*o.buffer16) { case MAGIC_MZ: if (o.file_size < mz_hdr.sizeof) return adbg_oops(AdbgError.unknownObjFormat); - import adbg.object.format.mz : LFANEW_OFFSET; - // If e_lfarlc (relocation table) starts lower than e_lfanew, // then assume old MZ. - if (o.i.mz.header.e_lfarlc < 0x40) + if (o.i.mz.header.e_lfarlc <= 0x40) return adbg_object_mz_load(o); - // Attempt to check new file format offset and signature. - // If e_lfanew seem to be garbage, load file as an MZ exec instead. - uint e_lfanew = *cast(uint*)(o.buffer + LFANEW_OFFSET); - // If within MZ extended header - if (e_lfanew <= mz_hdr_ext.sizeof) + // If e_lfanew points within MZ extended header + if (o.i.mz.header_ext.e_lfanew <= mz_hdr_ext.sizeof) return adbg_object_mz_load(o); - // If outside file - if (e_lfanew >= o.file_size) + + // If e_lfanew points outside file + if (o.i.mz.header_ext.e_lfanew >= o.file_size) return adbg_object_mz_load(o); + // NOTE: ReactOS checks if NtHeaderOffset is not higher than 256 MiB - if (e_lfanew >= 256 * 1024 * 1024) + if (o.i.mz.header_ext.e_lfanew >= 256 * 1024 * 1024) return adbg_object_mz_load(o); - o.i.mz.newbase = o.buffer + e_lfanew; // Used by sub-loaders + + // Set where new header is located, used by sub-loaders + o.i.mz.newbase = o.buffer + o.i.mz.header_ext.e_lfanew; // 32-bit signature check uint sig = *cast(uint*)o.i.mz.newbase; @@ -666,7 +674,8 @@ const(char)* adbg_object_short_name(adbg_object_t *o) { case elf: return "elf"; case pdb20: return "pdb20"; case pdb70: return "pdb70"; - default: + case archive: return "archive"; + default: // Because of unknown } L_UNKNOWN: return "unknown"; @@ -687,8 +696,9 @@ const(char)* adbg_object_name(adbg_object_t *o) { case elf: return `Executable and Linkable Format`; case pdb20: return `Program Database 2.0`; case pdb70: return `Program Database 7.0`; - default: + case archive: return `COFF Library Archive`; + default: // Because of unknown } L_UNKNOWN: - return "Unknown"; + return "unknown"; } \ No newline at end of file