From 0256b2a98ba6a034715646173c28c049c98d2963 Mon Sep 17 00:00:00 2001
From: dd86k
Date: Mon, 22 Jan 2024 10:01:25 -0500
Subject: [PATCH] server: Kind of introduce archives
---
app/dump/ar.d | 86 +++++++++++++++++++++++++++++++++++++
app/dump/package.d | 11 ++++-
app/dumper.d | 57 +++++++++++++++++++++++-
app/utils.d | 86 +++++++++++++++++++++++++++++++++++++
src/adbg/object/format/ar.d | 84 +++++++++++++++++++++++++++++++++---
src/adbg/object/format/mz.d | 1 +
src/adbg/object/formats.d | 3 +-
src/adbg/object/server.d | 54 +++++++++++++----------
8 files changed, 351 insertions(+), 31 deletions(-)
create mode 100644 app/dump/ar.d
diff --git a/app/dump/ar.d b/app/dump/ar.d
new file mode 100644
index 00000000..df82c8cb
--- /dev/null
+++ b/app/dump/ar.d
@@ -0,0 +1,86 @@
+/// Library archive dumper
+///
+/// Authors: dd86k
+/// Copyright: © dd86k
+/// License: BSD-3-Clause
+module dump.ar;
+
+import adbg.disassembler.core;
+import adbg.object.server;
+import adbg.object.machines;
+import adbg.object.format.ar;
+import adbg.utils.bit : adbg_bswap32;
+import core.stdc.ctype : isdigit;
+import dumper;
+import utils : realstring;
+
+extern (C):
+
+int dump_archive(ref Dumper dump, adbg_object_t *o) {
+ if (dump.selected_headers())
+ dump_archive_headers(dump, o);
+
+ return 0;
+}
+
+private:
+
+void dump_archive_headers(ref Dumper dump, adbg_object_t *o) {
+ print_header("Header");
+
+ ar_member_header *rhdr = void; // Root headers
+ for (size_t i; (rhdr = adbg_object_ar_header(o, i)) != null; ++i) {
+ print_section(cast(uint)i);
+ print_stringl("Name", rhdr.Name.ptr, rhdr.Name.sizeof);
+ print_stringl("Date", rhdr.Date.ptr, rhdr.Date.sizeof);
+ print_stringl("UserID", rhdr.UserID.ptr, rhdr.UserID.sizeof);
+ print_stringl("GroupID", rhdr.GroupID.ptr, rhdr.GroupID.sizeof);
+ print_stringl("Mode", rhdr.Mode.ptr, rhdr.Mode.sizeof);
+ print_stringl("Size", rhdr.Size.ptr, rhdr.Size.sizeof);
+
+ char[10] b = void;
+ int l = realstring(b.ptr, 10, rhdr.End.ptr, 2, '"', '"');
+ print_x16l("End", rhdr.EndMarker, b.ptr, l);
+
+ /+void *data = adbg_object_ar_data(o, rhdr);
+ if (data == null) {
+ print_string("warning", "Could not get data pointer");
+ continue;
+ }
+
+ int size = adbg_object_ar_header_size(o, rhdr);
+ if (size <= 0) {
+ print_string("warning", "Could not get size of data");
+ continue;
+ }
+
+ import core.stdc.stdio : printf;
+
+ int symcnt = *cast(int*)data;
+ int *symoffs = cast(int*)data + 1;
+ for (int isym; isym < symcnt; ++isym) {
+ int off = adbg_bswap32(symoffs[isym]);
+
+ ar_member_header *table = void;
+ if (adbg_object_offset(o, cast(void**)&table, off)) {
+ print_string("warning", "aaaaaaaaaaa cringe");
+ printf("there was %d headers\n", isym);
+ return;
+ }
+
+ print_stringl("Name", table.Name.ptr, table.Name.sizeof);
+ print_stringl("Date", table.Date.ptr, table.Date.sizeof);
+ print_stringl("UserID", table.UserID.ptr, table.UserID.sizeof);
+ print_stringl("GroupID", table.GroupID.ptr, table.GroupID.sizeof);
+ print_stringl("Mode", table.Mode.ptr, table.Mode.sizeof);
+ print_stringl("Size", table.Size.ptr, table.Size.sizeof);
+ l = realstring(b.ptr, 10, table.End.ptr, 2, '"', '"');
+ print_x16l("End", table.EndMarker, b.ptr, l);
+
+ if (table.Name[0] != '/' || isdigit(table.Name[1]) == 0)
+ continue;
+
+
+ }+/
+ }
+}
\ No newline at end of file
diff --git a/app/dump/package.d b/app/dump/package.d
index 49d9d89e..6ed03a92 100644
--- a/app/dump/package.d
+++ b/app/dump/package.d
@@ -7,4 +7,13 @@
*/
module dump;
-public import dump.mz, dump.ne, dump.lx, dump.pe, dump.elf, dump.macho, dump.pdb70, dump.pdb20;
\ No newline at end of file
+public import
+ dump.mz,
+ dump.ne,
+ dump.lx,
+ dump.pe,
+ dump.elf,
+ dump.macho,
+ dump.pdb70,
+ dump.pdb20,
+ dump.ar;
\ No newline at end of file
diff --git a/app/dumper.d b/app/dumper.d
index 08976273..3ccb4bd3 100644
--- a/app/dumper.d
+++ b/app/dumper.d
@@ -148,7 +148,8 @@ int app_dump() {
case macho: return dump_macho(dump, o);
case pdb20: return dump_pdb20(dump, o);
case pdb70: return dump_pdb70(dump, o);
- default: return EXIT_FAILURE;
+ case archive: return dump_archive(dump, o);
+ default: assert(0, "Invalid object type"); // Raw/unknown
}
}
@@ -156,6 +157,8 @@ private immutable {
/// Padding spacing to use in characters
// PE32 has fields like MinorOperatingSystemVersion (27 chars)
int __field_padding = -28;
+ ///
+ int __columns = 16;
}
void print_header(const(char)* name) {
@@ -316,6 +319,58 @@ L_START:
goto L_START;
}
+void print_raw(const(char)* name, void *data, size_t dsize, adbg_object_t *o) {
+ // Is size fitting within file?
+ if (adbg_object_outboundpl(o, data, dsize)) {
+ print_string("warning", "Data goes beyond file bounds.");
+ return;
+ }
+
+ import core.stdc.ctype : isprint;
+
+ print_header(name);
+
+ size_t offset = data - o.buffer;
+
+ // Print header
+ static immutable string _soff = "Offset ";
+ printf(_soff.ptr);
+ //TODO: Print extra spaces after offset string
+ for (int ib; ib < __columns; ++ib)
+ printf("%02x ", ib);
+ putchar('\n');
+
+ // Print data
+ ubyte *d = cast(ubyte*)data;
+ size_t afo; // Absolute file offset
+ for (size_t id; id < dsize; id += __columns, offset += __columns) {
+ printf("%8zx ", offset);
+
+ // Adjust column for row
+ size_t col = __columns;//id + __columns >= dsize ? dsize - __columns : __columns;
+ size_t off = afo;
+
+ // Print data bytes
+ for (size_t ib; ib < col; ++ib, ++off)
+ printf("%02x ", d[off]);
+
+ // Adjust spacing between the two
+ if (col < __columns) {
+
+ } else
+ putchar(' ');
+
+ // Print printable characters
+ off = afo;
+ for (size_t ib; ib < col; ++ib, ++off)
+ putchar(isprint(d[off]) ? d[off] : '.');
+
+ // New row
+ afo += col;
+ putchar('\n');
+ }
+}
+
void print_directory_entry(const(char)* name, uint rva, uint size) {
printf("%*s: 0x%08x %u\n", __field_padding, name, rva, size);
}
diff --git a/app/utils.d b/app/utils.d
index 62efbaae..ee91a2bc 100644
--- a/app/utils.d
+++ b/app/utils.d
@@ -6,6 +6,92 @@
module utils;
import core.stdc.stdio : sscanf;
+import core.stdc.ctype : isprint;
+
+char hexc0(ubyte upper) {
+ ubyte h = upper >> 4;
+ return cast(char)(h >= 0xa ? h + ('a' - 0xa) : h + '0');
+}
+unittest {
+ assert(hexc0(0x00) == '0');
+ assert(hexc0(0x10) == '1');
+ assert(hexc0(0x20) == '2');
+ assert(hexc0(0x30) == '3');
+ assert(hexc0(0x40) == '4');
+ assert(hexc0(0x50) == '5');
+ assert(hexc0(0x60) == '6');
+ assert(hexc0(0x70) == '7');
+ assert(hexc0(0x80) == '8');
+ assert(hexc0(0x90) == '9');
+ assert(hexc0(0xa0) == 'a');
+ assert(hexc0(0xb0) == 'b');
+ assert(hexc0(0xc0) == 'c');
+ assert(hexc0(0xd0) == 'd');
+ assert(hexc0(0xe0) == 'e');
+ assert(hexc0(0xf0) == 'f');
+}
+char hexc1(ubyte lower) {
+ ubyte l = lower & 15;
+ return cast(char)(l >= 0xa ? l + ('a' - 0xa) : l + '0');
+}
+unittest {
+ assert(hexc1(0) == '0');
+ assert(hexc1(1) == '1');
+ assert(hexc1(2) == '2');
+ assert(hexc1(3) == '3');
+ assert(hexc1(4) == '4');
+ assert(hexc1(5) == '5');
+ assert(hexc1(6) == '6');
+ assert(hexc1(7) == '7');
+ assert(hexc1(8) == '8');
+ assert(hexc1(9) == '9');
+ assert(hexc1(0xa) == 'a');
+ assert(hexc1(0xb) == 'b');
+ assert(hexc1(0xc) == 'c');
+ assert(hexc1(0xd) == 'd');
+ assert(hexc1(0xe) == 'e');
+ assert(hexc1(0xf) == 'f');
+}
+
+int realstring(char* buffer, size_t bsize, const(char)* str, size_t ssize,
+ char pre = 0, char post = 0) {
+ int len; // total length
+
+ if (bsize == 0)
+ return 0;
+
+ if (pre && bsize)
+ buffer[len++] = pre;
+
+ for (size_t i; i < ssize && len < bsize; ++i) {
+ char c = str[i];
+ if (isprint(c)) {
+ if (len >= bsize) break;
+ buffer[len++] = c;
+ } else {
+ if (len + 4 >= bsize) break;
+ buffer[len++] = '\\';
+ buffer[len++] = 'x';
+ buffer[len++] = hexc0(c);
+ buffer[len++] = hexc1(c);
+ }
+ }
+
+ if (post && len < bsize)
+ buffer[len++] = post;
+
+ return len;
+}
+unittest {
+ char[2] bi = "`\n";
+ char[10] bo = void; // '`' '\\x0a'
+ assert(realstring(bo.ptr, 10, bi.ptr, 2) == 5);
+ assert(bo[0] == '`');
+ assert(bo[1] == '\\');
+ assert(bo[2] == 'x');
+ assert(bo[3] == '0');
+ assert(bo[4] == 'a');
+}
/// Unformat text number.
/// Params:
diff --git a/src/adbg/object/format/ar.d b/src/adbg/object/format/ar.d
index befd2dfc..a49cbb60 100644
--- a/src/adbg/object/format/ar.d
+++ b/src/adbg/object/format/ar.d
@@ -5,7 +5,11 @@
/// License: BSD-3-Clause
module adbg.object.format.ar;
+import adbg.error;
+import adbg.object.server;
import adbg.utils.bit;
+import core.stdc.stdlib : atoi;
+import core.stdc.string : memcpy;
// Sources:
// - gdb/include/aout/ar.h
@@ -20,10 +24,14 @@ import adbg.utils.bit;
// Header + Longnames Member + Data
// Header + obj n + Data
+// NOTE: MSVC linker can only process libraries under 4 GiB in size.
+
/// COFF archive magic.
enum AR_MAGIC = CHAR64!"!\n";
/// Thin COFF archive magic.
enum AR_THIN_MAGIC = CHAR64!"!\n";
+///
+private enum AR_EOL = CHAR16!"`\n";
///
struct ar_file_header {
@@ -68,8 +76,11 @@ struct ar_member_header {
/// ASCII decimal representation of the total size of the
/// archive member, not including the size of the header.
char[10] Size;
- /// The two bytes in the C string: "`\n" (0x60 0x0a).
- char[2] End;
+ union {
+ /// The two bytes in the C string: "`\n" (0x60 0x0a).
+ char[2] End;
+ ushort EndMarker;
+ }
}
/// When first name is "/"
@@ -90,16 +101,77 @@ struct mscoff_second_linker_header {
// String Table after Indices
}
+int adbg_object_ar_load(adbg_object_t *o) {
+ o.format = AdbgObject.archive;
+ return 0;
+}
+
+ar_member_header* adbg_object_ar_header(adbg_object_t *o, size_t index) {
+ if (o == null)
+ return null;
+
+ version (Trace) trace("index=%zu", index);
+
+ ar_member_header *p = cast(ar_member_header*)(o.buffer + ar_file_header.sizeof);
+ void *max = o.buffer + 0x8000_0000; // 2 GiB limit
+ for (size_t i; p < max; ++i) {
+ if (i == index)
+ return p.EndMarker == AR_EOL ? p : null;
+
+ // Adjust pointer
+ size_t offset = atoi(p.Size.ptr) + ar_member_header.sizeof;
+ version (Trace) trace("offset=%zu", offset);
+ p = cast(ar_member_header*)(cast(void*)p + offset);
+
+ // Outside bounds
+ if (adbg_object_outboundpl(o, p, ar_member_header.sizeof))
+ return null;
+ }
+
+ return null;
+}
+
+int adbg_object_ar_header_size(adbg_object_t *o, ar_member_header *mhdr) {
+ if (o == null || mhdr == null)
+ return -1;
+
+ char[12] str = void;
+ memcpy(str.ptr, mhdr.Size.ptr, mhdr.Size.sizeof);
+ str[10] = 0;
+ return atoi(str.ptr);
+}
+
+void* adbg_object_ar_data(adbg_object_t *o, ar_member_header *mhdr) {
+ if (o == null || mhdr == null)
+ return null;
+
+ void *p = cast(void*)mhdr + ar_member_header.sizeof;
+ int size = adbg_object_ar_header_size(o, mhdr);
+ if (size < 0)
+ return null;
+ if (adbg_object_outboundpl(o, p, size))
+ return null;
+ return p;
+}
+
//
// MSCOFF import
//
///
struct mscoff_import_header {
- /// Must be IMAGE_FILE_MACHINE_UNKNOWN.
- ushort Sig1;
- /// Must be 0xFFFF.
- ushort Sig2;
+ union {
+ /// Must be a combination of IMAGE_FILE_MACHINE_UNKNOWN and 0xFFFF.
+ ///
+ /// Effectively only 0xFFFF, since the enum value is 0.
+ uint Signature;
+ struct {
+ /// Must be IMAGE_FILE_MACHINE_UNKNOWN.
+ ushort Signature1;
+ /// Must be 0xFFFF.
+ ushort Signature2;
+ }
+ }
ushort Version;
ushort Machine;
uint TimeStamp;
diff --git a/src/adbg/object/format/mz.d b/src/adbg/object/format/mz.d
index 123a9552..cdc5b123 100644
--- a/src/adbg/object/format/mz.d
+++ b/src/adbg/object/format/mz.d
@@ -69,6 +69,7 @@ struct mz_hdr_ext {
ushort[ERESWDS] e_res; /// Reserved words
uint e_lfanew; ///
}
+static assert(mz_hdr_ext.e_lfanew.offsetof == LFANEW_OFFSET);
/// MZ relocation entry
struct mz_reloc {
diff --git a/src/adbg/object/formats.d b/src/adbg/object/formats.d
index 4fab52c4..6d17d308 100644
--- a/src/adbg/object/formats.d
+++ b/src/adbg/object/formats.d
@@ -12,4 +12,5 @@ public import
adbg.object.format.ne,
adbg.object.format.lx,
adbg.object.format.pe,
- adbg.object.format.pdb;
\ No newline at end of file
+ adbg.object.format.pdb,
+ adbg.object.format.ar;
\ No newline at end of file
diff --git a/src/adbg/object/server.d b/src/adbg/object/server.d
index f2fbed5d..4a76e3d1 100644
--- a/src/adbg/object/server.d
+++ b/src/adbg/object/server.d
@@ -63,10 +63,12 @@ enum AdbgObject {
elf,
/// Mach Object format.
macho,
- // Microsoft Program Database format 2.0.
+ /// Microsoft Program Database format 2.0.
pdb20,
- // Microsoft Program Database format 7.0.
+ /// Microsoft Program Database format 7.0.
pdb70,
+ /// Library archive.
+ archive,
// Microsoft Debug format.
//dbg,
//
@@ -170,7 +172,10 @@ struct adbg_object_t {
void *header;
struct mz_t {
- mz_hdr *header;
+ union {
+ mz_hdr *header;
+ mz_hdr_ext *header_ext;
+ }
mz_reloc *relocs;
void *newbase;
bool *reversed_relocs;
@@ -549,10 +554,14 @@ L_ARG:
return adbg_object_pdb70_load(o);
}
- //TODO: 64-bit detection
- // starting with MSCOFF
+ // 64-bit signature detection
+ switch (*o.buffer64) {
+ case AR_MAGIC:
+ return adbg_object_ar_load(o);
+ default:
+ }
- // 32-bit detection
+ // 32-bit signature detection
switch (*o.buffer32) {
case ELF_MAGIC: // ELF
return adbg_object_elf_load(o);
@@ -566,32 +575,31 @@ L_ARG:
default:
}
- // 16-bit detection
+ // 16-bit signature detection
switch (*o.buffer16) {
case MAGIC_MZ:
if (o.file_size < mz_hdr.sizeof)
return adbg_oops(AdbgError.unknownObjFormat);
- import adbg.object.format.mz : LFANEW_OFFSET;
-
// If e_lfarlc (relocation table) starts lower than e_lfanew,
// then assume old MZ.
- if (o.i.mz.header.e_lfarlc < 0x40)
+ if (o.i.mz.header.e_lfarlc <= 0x40)
return adbg_object_mz_load(o);
- // Attempt to check new file format offset and signature.
- // If e_lfanew seem to be garbage, load file as an MZ exec instead.
- uint e_lfanew = *cast(uint*)(o.buffer + LFANEW_OFFSET);
- // If within MZ extended header
- if (e_lfanew <= mz_hdr_ext.sizeof)
+ // If e_lfanew points within MZ extended header
+ if (o.i.mz.header_ext.e_lfanew <= mz_hdr_ext.sizeof)
return adbg_object_mz_load(o);
- // If outside file
- if (e_lfanew >= o.file_size)
+
+ // If e_lfanew points outside file
+ if (o.i.mz.header_ext.e_lfanew >= o.file_size)
return adbg_object_mz_load(o);
+
// NOTE: ReactOS checks if NtHeaderOffset is not higher than 256 MiB
- if (e_lfanew >= 256 * 1024 * 1024)
+ if (o.i.mz.header_ext.e_lfanew >= 256 * 1024 * 1024)
return adbg_object_mz_load(o);
- o.i.mz.newbase = o.buffer + e_lfanew; // Used by sub-loaders
+
+ // Set where new header is located, used by sub-loaders
+ o.i.mz.newbase = o.buffer + o.i.mz.header_ext.e_lfanew;
// 32-bit signature check
uint sig = *cast(uint*)o.i.mz.newbase;
@@ -666,7 +674,8 @@ const(char)* adbg_object_short_name(adbg_object_t *o) {
case elf: return "elf";
case pdb20: return "pdb20";
case pdb70: return "pdb70";
- default:
+ case archive: return "archive";
+ default: // Because of unknown
}
L_UNKNOWN:
return "unknown";
@@ -687,8 +696,9 @@ const(char)* adbg_object_name(adbg_object_t *o) {
case elf: return `Executable and Linkable Format`;
case pdb20: return `Program Database 2.0`;
case pdb70: return `Program Database 7.0`;
- default:
+ case archive: return `COFF Library Archive`;
+ default: // Because of unknown
}
L_UNKNOWN:
- return "Unknown";
+ return "unknown";
}
\ No newline at end of file