diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index bb41a763..d83d5996 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -34,17 +34,6 @@ jobs: - run: zig fmt --check src - run: zig build test -Dhas-zig - macos_11: - name: Test macos 11.0 - runs-on: macos-11 - steps: - - uses: actions/checkout@v3 - - uses: goto-bus-stop/setup-zig@v2 - with: - version: master - - run: zig version - - run: zig build test -Dhas-static -Dhas-zig - gcc_musl: name: Test gcc with musl runs-on: ubuntu-latest diff --git a/build.zig b/build.zig index e1184977..02542104 100644 --- a/build.zig +++ b/build.zig @@ -19,6 +19,8 @@ pub fn build(b: *std.Build) void { }; const use_llvm = b.option(bool, "use-llvm", "Whether to use LLVM") orelse true; const use_lld = if (builtin.os.tag == .macos) false else use_llvm; + const sanitize_thread = b.option(bool, "sanitize-thread", "Enable thread-sanitization") orelse false; + const single_threaded = b.option(bool, "single-threaded", "Force single-threaded") orelse false; const yaml = b.dependency("zig-yaml", .{ .target = target, @@ -36,6 +38,8 @@ pub fn build(b: *std.Build) void { .optimize = mode, .use_llvm = use_llvm, .use_lld = use_lld, + .sanitize_thread = sanitize_thread, + .single_threaded = single_threaded, }); exe.root_module.addImport("yaml", yaml.module("yaml")); exe.root_module.addImport("dis_x86_64", dis_x86_64.module("dis_x86_64")); @@ -91,6 +95,8 @@ pub fn build(b: *std.Build) void { .optimize = mode, .use_llvm = use_llvm, .use_lld = use_lld, + .sanitize_thread = sanitize_thread, + .single_threaded = single_threaded, }); const unit_tests_opts = b.addOptions(); unit_tests.root_module.addOptions("build_options", unit_tests_opts); diff --git a/src/MachO.zig b/src/MachO.zig index 02c0b4b1..602fda4f 100644 --- a/src/MachO.zig +++ b/src/MachO.zig @@ -20,15 +20,11 @@ file_handles: std.ArrayListUnmanaged(File.Handle) = .{}, segments: std.ArrayListUnmanaged(macho.segment_command_64) = .{}, sections: std.MultiArrayList(Section) = .{}, -symbols: std.ArrayListUnmanaged(Symbol) = .{}, -symbols_extra: std.ArrayListUnmanaged(u32) = .{}, -globals: std.AutoHashMapUnmanaged(u32, Symbol.Index) = .{}, +resolver: SymbolResolver = .{}, /// This table will be populated after `scanRelocs` has run. /// Key is symbol index. -undefs: std.AutoHashMapUnmanaged(Symbol.Index, std.ArrayListUnmanaged(Atom.Index)) = .{}, -/// Global symbols we need to resolve for the link to succeed. -undefined_symbols: std.ArrayListUnmanaged(Symbol.Index) = .{}, -boundary_symbols: std.ArrayListUnmanaged(Symbol.Index) = .{}, +undefs: std.AutoHashMapUnmanaged(Ref, std.ArrayListUnmanaged(Ref)) = .{}, +undefs_mutex: std.Thread.Mutex = .{}, pagezero_seg_index: ?u8 = null, text_seg_index: ?u8 = null, @@ -43,17 +39,6 @@ eh_frame_sect_index: ?u8 = null, unwind_info_sect_index: ?u8 = null, objc_stubs_sect_index: ?u8 = null, -mh_execute_header_index: ?Symbol.Index = null, -mh_dylib_header_index: ?Symbol.Index = null, -dyld_private_index: ?Symbol.Index = null, -dyld_stub_binder_index: ?Symbol.Index = null, -dso_handle_index: ?Symbol.Index = null, -objc_msg_send_index: ?Symbol.Index = null, - -entry_index: ?Symbol.Index = null, - -string_intern: StringTable = .{}, - symtab: std.ArrayListUnmanaged(macho.nlist_64) = .{}, strtab: std.ArrayListUnmanaged(u8) = .{}, indsymtab: Indsymtab = .{}, @@ -63,21 +48,20 @@ stubs_helper: StubsHelperSection = .{}, objc_stubs: ObjcStubsSection = .{}, la_symbol_ptr: LaSymbolPtrSection = .{}, tlv_ptr: TlvPtrSection = .{}, -rebase: RebaseSection = .{}, -bind: BindSection = .{}, -weak_bind: WeakBindSection = .{}, -lazy_bind: LazyBindSection = .{}, -export_trie: ExportTrieSection = .{}, +rebase: Rebase = .{}, +bind: Bind = .{}, +weak_bind: WeakBind = .{}, +lazy_bind: LazyBind = .{}, +export_trie: ExportTrie = .{}, unwind_info: UnwindInfo = .{}, +data_in_code: DataInCode = .{}, -atoms: std.ArrayListUnmanaged(Atom) = .{}, -atoms_extra: std.ArrayListUnmanaged(u32) = .{}, thunks: std.ArrayListUnmanaged(Thunk) = .{}, -unwind_records: std.ArrayListUnmanaged(UnwindInfo.Record) = .{}, -has_tlv: bool = false, -binds_to_weak: bool = false, -weak_defines: bool = false, +has_tlv: AtomicBool = AtomicBool.init(false), +binds_to_weak: AtomicBool = AtomicBool.init(false), +weak_defines: AtomicBool = AtomicBool.init(false), +has_errors: AtomicBool = AtomicBool.init(false), pub fn openPath(allocator: Allocator, options: Options, thread_pool: *ThreadPool) !*MachO { const file = try options.emit.directory.createFile(options.emit.sub_path, .{ @@ -117,12 +101,8 @@ pub fn deinit(self: *MachO) void { } self.file_handles.deinit(gpa); - self.symbols.deinit(gpa); - self.symbols_extra.deinit(gpa); - self.globals.deinit(gpa); + self.resolver.deinit(gpa); self.undefs.deinit(gpa); - self.undefined_symbols.deinit(gpa); - self.string_intern.deinit(gpa); self.objects.deinit(gpa); self.dylibs.deinit(gpa); @@ -136,9 +116,18 @@ pub fn deinit(self: *MachO) void { self.files.deinit(gpa); self.segments.deinit(gpa); + for ( + self.sections.items(.atoms), + self.sections.items(.thunks), + self.sections.items(.out), + self.sections.items(.relocs), + ) |*atoms, *th, *out, *relocs| { + atoms.deinit(gpa); + th.deinit(gpa); + out.deinit(gpa); + relocs.deinit(gpa); + } self.sections.deinit(gpa); - self.atoms.deinit(gpa); - self.atoms_extra.deinit(gpa); self.thunks.deinit(gpa); self.symtab.deinit(gpa); @@ -153,7 +142,7 @@ pub fn deinit(self: *MachO) void { self.lazy_bind.deinit(gpa); self.export_trie.deinit(gpa); self.unwind_info.deinit(gpa); - self.unwind_records.deinit(gpa); + self.data_in_code.deinit(gpa); } pub fn flush(self: *MachO) !void { @@ -162,17 +151,10 @@ pub fn flush(self: *MachO) !void { const gpa = self.base.allocator; - // Atom at index 0 is reserved as null atom - try self.atoms.append(gpa, .{}); - try self.atoms_extra.append(gpa, 0); // Append empty string to string tables - try self.string_intern.buffer.append(gpa, 0); try self.strtab.append(gpa, 0); // Append null file try self.files.append(gpa, .null); - // Append null symbols - try self.symbols.append(gpa, .{}); - try self.symbols_extra.append(gpa, 0); var arena_allocator = std.heap.ArenaAllocator.init(gpa); defer arena_allocator.deinit(); @@ -203,84 +185,11 @@ pub fn flush(self: *MachO) !void { } } - // Resolve link objects var resolved_objects = std.ArrayList(LinkObject).init(arena); - try resolved_objects.ensureTotalCapacityPrecise(self.options.positionals.len); - for (self.options.positionals) |obj| { - const full_path = blk: { - switch (obj.tag) { - .obj => { - var buffer: [fs.max_path_bytes]u8 = undefined; - const full_path = std.fs.realpath(obj.path, &buffer) catch |err| switch (err) { - error.FileNotFound => { - self.base.fatal("file not found {}", .{obj}); - continue; - }, - else => |e| return e, - }; - break :blk try arena.dupe(u8, full_path); - }, - .lib => { - const full_path = (try self.resolveLib(arena, lib_dirs.items, obj.path)) orelse { - const err = try self.base.addErrorWithNotes(lib_dirs.items.len); - try err.addMsg("library not found for {}", .{obj}); - for (lib_dirs.items) |dir| try err.addNote("tried {s}", .{dir}); - continue; - }; - break :blk full_path; - }, - .framework => { - const full_path = (try self.resolveFramework(arena, framework_dirs.items, obj.path)) orelse { - const err = try self.base.addErrorWithNotes(framework_dirs.items.len); - try err.addMsg("framework not found for {}", .{obj}); - for (framework_dirs.items) |dir| try err.addNote("tried {s}", .{dir}); - continue; - }; - break :blk full_path; - }, - } - }; - resolved_objects.appendAssumeCapacity(.{ - .path = full_path, - .tag = obj.tag, - .needed = obj.needed, - .weak = obj.weak, - .hidden = obj.hidden, - .reexport = obj.reexport, - .must_link = obj.must_link, - }); - } + try self.resolvePaths(arena, lib_dirs.items, framework_dirs.items, &resolved_objects); if (self.options.cpu_arch == null) { - var has_parse_error = false; - var platforms = std.ArrayList(struct { std.Target.Cpu.Arch, ?Options.Platform }).init(self.base.allocator); - defer platforms.deinit(); - try platforms.ensureUnusedCapacity(resolved_objects.items.len); - - for (resolved_objects.items) |obj| { - self.inferCpuArchAndPlatform(obj, &platforms) catch |err| { - has_parse_error = true; - switch (err) { - error.UnhandledCpuArch => {}, // already reported - else => |e| { - self.base.fatal("{s}: unexpected error occurred while parsing input file: {s}", .{ - obj.path, @errorName(e), - }); - return e; - }, - } - }; - } - if (has_parse_error) return error.ParseFailed; - if (platforms.items.len == 0) { - self.base.fatal("could not infer CPU architecture", .{}); - return error.InferCpuFailed; - } - - self.options.cpu_arch = platforms.items[0][0]; - self.options.platform = for (platforms.items) |platform| { - if (platform[1]) |p| break p; - } else null; + try self.inferCpuArchAndPlatform(resolved_objects.items); } if (self.options.platform == null) { @@ -316,53 +225,35 @@ pub fn flush(self: *MachO) !void { } } - var has_parse_error = false; for (resolved_objects.items) |obj| { - self.parsePositional(obj) catch |err| { - has_parse_error = true; - switch (err) { - error.ParseFailed => {}, // already reported - else => |e| { - self.base.fatal("{s}: unexpected error occurred while parsing input file: {s}", .{ - obj.path, @errorName(e), - }); - return e; - }, - } + self.parsePositional(obj) catch |err| switch (err) { + else => |e| { + self.base.fatal("{s}: unexpected error occurred while parsing input file: {s}", .{ + obj.path, @errorName(e), + }); + return e; + }, }; } - if (has_parse_error) return error.ParseFailed; - - for (self.dylibs.items) |index| { - self.getFile(index).?.dylib.umbrella = index; - } + try self.parseObjects(); try self.parseDependentDylibs(arena, lib_dirs.items, framework_dirs.items); - for (self.dylibs.items) |index| { - const dylib = self.getFile(index).?.dylib; - if (!dylib.explicit and !dylib.hoisted) continue; - try dylib.initSymbols(self); - } - - { + if (!self.options.relocatable) { const index = @as(File.Index, @intCast(try self.files.addOne(gpa))); self.files.set(index, .{ .internal = .{ .index = index } }); self.internal_object_index = index; + const object = self.getInternalObject().?; + try object.init(gpa); + try object.initSymbols(self); } - try self.addUndefinedGlobals(); try self.resolveSymbols(); - try self.parseDebugInfo(); if (self.options.relocatable) return relocatable.flush(self); - try self.resolveSyntheticSymbols(); - - try self.convertTentativeDefinitions(); - try self.createObjcSections(); + try self.convertTentativeDefsAndResolveSpecialSymbols(); try self.dedupLiterals(); - try self.claimUnresolved(); if (self.options.dead_strip) { try dead_strip.gcAtoms(self); @@ -376,43 +267,29 @@ pub fn flush(self: *MachO) !void { dylib.ordinal = @intCast(ord); } + self.claimUnresolved(); try self.scanRelocs(); - try self.initOutputSections(); try self.initSyntheticSections(); try self.sortSections(); try self.addAtomsToSections(); try self.calcSectionSizes(); + try self.generateUnwindInfo(); - try self.initSegments(); + try self.initSegments(); try self.allocateSections(); self.allocateSegments(); self.allocateSyntheticSymbols(); state_log.debug("{}", .{self.dumpState()}); - try self.initDyldInfoSections(); - try self.writeAtoms(); - try self.writeUnwindInfo(); - try self.finalizeDyldInfoSections(); - try self.writeSyntheticSections(); - - var off = math.cast(u32, self.getLinkeditSegment().fileoff) orelse return error.Overflow; - off = try self.writeDyldInfoSections(off); - off = mem.alignForward(u32, off, @alignOf(u64)); - off = try self.writeFunctionStarts(off); - off = mem.alignForward(u32, off, @alignOf(u64)); - off = try self.writeDataInCode(self.getTextSegment().vmaddr, off); - try self.calcSymtabSize(); - off = mem.alignForward(u32, off, @alignOf(u64)); - off = try self.writeSymtab(off); - off = mem.alignForward(u32, off, @alignOf(u32)); - off = try self.writeIndsymtab(off); - off = mem.alignForward(u32, off, @alignOf(u64)); - off = try self.writeStrtab(off); + try self.resizeSections(); + try self.writeSectionsAndUpdateLinkeditSizes(); - self.getLinkeditSegment().filesize = off - self.getLinkeditSegment().fileoff; + try self.writeSectionsToFile(); + try self.allocateLinkeditSegment(); + try self.writeLinkeditSectionsToFile(); var codesig: ?CodeSignature = if (self.requiresCodeSig()) blk: { // Preallocate space for the code signature. @@ -544,7 +421,105 @@ fn resolveFramework( } } -fn inferCpuArchAndPlatform(self: *MachO, obj: LinkObject, platforms: anytype) !void { +fn resolvePaths( + self: *MachO, + arena: Allocator, + lib_dirs: []const []const u8, + framework_dirs: []const []const u8, + resolved_objects: *std.ArrayList(LinkObject), +) !void { + const tracy = trace(@src()); + defer tracy.end(); + + var has_resolve_error = false; + try resolved_objects.ensureTotalCapacityPrecise(self.options.positionals.len); + for (self.options.positionals) |obj| { + const full_path = blk: { + switch (obj.tag) { + .obj => { + var buffer: [fs.max_path_bytes]u8 = undefined; + const full_path = std.fs.realpath(obj.path, &buffer) catch |err| switch (err) { + error.FileNotFound => { + self.base.fatal("file not found {}", .{obj}); + has_resolve_error = true; + continue; + }, + else => |e| return e, + }; + break :blk try arena.dupe(u8, full_path); + }, + .lib => { + const full_path = (try self.resolveLib(arena, lib_dirs, obj.path)) orelse { + const err = try self.base.addErrorWithNotes(lib_dirs.len); + try err.addMsg("library not found for {}", .{obj}); + for (lib_dirs) |dir| try err.addNote("tried {s}", .{dir}); + has_resolve_error = true; + continue; + }; + break :blk full_path; + }, + .framework => { + const full_path = (try self.resolveFramework(arena, framework_dirs, obj.path)) orelse { + const err = try self.base.addErrorWithNotes(framework_dirs.len); + try err.addMsg("framework not found for {}", .{obj}); + for (framework_dirs) |dir| try err.addNote("tried {s}", .{dir}); + has_resolve_error = true; + continue; + }; + break :blk full_path; + }, + } + }; + resolved_objects.appendAssumeCapacity(.{ + .path = full_path, + .tag = obj.tag, + .needed = obj.needed, + .weak = obj.weak, + .hidden = obj.hidden, + .reexport = obj.reexport, + .must_link = obj.must_link, + }); + } + + if (has_resolve_error) return error.ResolveFailed; +} + +fn inferCpuArchAndPlatform(self: *MachO, resolved_objects: []const LinkObject) !void { + const tracy = trace(@src()); + defer tracy.end(); + + var has_parse_error = false; + var platforms = std.ArrayList(struct { std.Target.Cpu.Arch, ?Options.Platform }).init(self.base.allocator); + defer platforms.deinit(); + try platforms.ensureUnusedCapacity(resolved_objects.len); + + for (resolved_objects) |obj| { + self.inferCpuArchAndPlatformInObject(obj, &platforms) catch |err| { + has_parse_error = true; + switch (err) { + error.UnhandledCpuArch => {}, // already reported + else => |e| { + self.base.fatal("{s}: unexpected error occurred while parsing input file: {s}", .{ + obj.path, @errorName(e), + }); + return e; + }, + } + }; + } + if (has_parse_error) return error.ParseFailed; + if (platforms.items.len == 0) { + self.base.fatal("could not infer CPU architecture", .{}); + return error.InferCpuFailed; + } + + self.options.cpu_arch = platforms.items[0][0]; + self.options.platform = for (platforms.items) |platform| { + if (platform[1]) |p| break p; + } else null; +} + +fn inferCpuArchAndPlatformInObject(self: *MachO, obj: LinkObject, platforms: anytype) !void { const gpa = self.base.allocator; const file = try std.fs.cwd().openFile(obj.path, .{}); @@ -577,6 +552,7 @@ fn inferCpuArchAndPlatform(self: *MachO, obj: LinkObject, platforms: anytype) !v .ncmds = header.ncmds, .buffer = cmds_buffer, }; + // An input object file may have more than one build LC but we take the first one and bail. out[1] = while (it.next()) |cmd| switch (cmd.cmd()) { .BUILD_VERSION, .VERSION_MIN_MACOSX, @@ -588,150 +564,154 @@ fn inferCpuArchAndPlatform(self: *MachO, obj: LinkObject, platforms: anytype) !v } else null; } -fn validateCpuArch(self: *MachO, index: File.Index, cputype: macho.cpu_type_t) void { - const cpu_arch: std.Target.Cpu.Arch = switch (cputype) { - macho.CPU_TYPE_ARM64 => .aarch64, - macho.CPU_TYPE_X86_64 => .x86_64, - else => unreachable, - }; - if (self.options.cpu_arch.? != cpu_arch) { - return self.base.fatal("{}: invalid architecture '{s}', expected '{s}'", .{ - self.getFile(index).?.fmtPath(), - @tagName(cpu_arch), - @tagName(self.options.cpu_arch.?), - }); - } -} - -fn validatePlatform(self: *MachO, index: File.Index) void { - const self_platform = self.options.platform orelse return; - const file = self.getFile(index).?; - const other_platform: ?Options.Platform = switch (file) { - .object => |x| x.platform, - .dylib => |x| x.platform, - else => null, - }; - if (other_platform) |platform| { - if (self_platform.platform != platform.platform) { - return self.base.fatal( - "{}: object file was built for different platform: expected {s}, got {s}", - .{ file.fmtPath(), @tagName(self_platform.platform), @tagName(platform.platform) }, - ); - } - if (self_platform.version.value < platform.version.value) { - return self.base.warn( - "{}: object file was built for newer platform version: expected {}, got {}", - .{ - file.fmtPath(), - self_platform.version, - platform.version, - }, - ); - } - } -} +fn parsePositional(self: *MachO, obj: LinkObject) !void { + const tracy = trace(@src()); + defer tracy.end(); -fn addUndefinedGlobals(self: *MachO) !void { - const gpa = self.base.allocator; + log.debug("parsing positional {}", .{obj}); - try self.undefined_symbols.ensureUnusedCapacity(gpa, self.options.force_undefined_symbols.len); - for (self.options.force_undefined_symbols) |name| { - const off = try self.string_intern.insert(gpa, name); - const gop = try self.getOrCreateGlobal(off); - self.undefined_symbols.appendAssumeCapacity(gop.index); + const file = try std.fs.cwd().openFile(obj.path, .{}); + const fh = try self.addFileHandle(file); + var buffer: [Archive.SARMAG]u8 = undefined; + + const fat_arch: ?fat.Arch = try self.parseFatFile(obj, file); + const offset = if (fat_arch) |fa| fa.offset else 0; + + if (readMachHeader(file, offset) catch null) |h| blk: { + if (h.magic != macho.MH_MAGIC_64) break :blk; + switch (h.filetype) { + macho.MH_OBJECT => try self.addObject(obj, fh, offset), + macho.MH_DYLIB => if (self.options.cpu_arch) |_| { + _ = try self.addDylib(obj, fh, offset, true); + } else { + self.base.fatal("{s}: ignoring library as no architecture specified", .{obj.path}); + }, + else => self.base.fatal("{s}: unsupported input file type: {x}", .{ obj.path, h.filetype }), + } + return; } - - if (!self.options.dylib) { - const name = self.options.entry orelse "_main"; - const off = try self.string_intern.insert(gpa, name); - const gop = try self.getOrCreateGlobal(off); - self.entry_index = gop.index; + if (readArMagic(file, offset, &buffer) catch null) |ar_magic| blk: { + if (!mem.eql(u8, ar_magic, Archive.ARMAG)) break :blk; + if (self.options.cpu_arch) |_| { + try self.addArchive(obj, fh, fat_arch); + } else { + self.base.fatal("{s}: ignoring library as no architecture specified", .{obj.path}); + } + return; } - - { - const off = try self.string_intern.insert(gpa, "dyld_stub_binder"); - const gop = try self.getOrCreateGlobal(off); - self.dyld_stub_binder_index = gop.index; + blk: { + if (self.options.cpu_arch == null) { + self.base.fatal("{s}: ignoring library as no architecture specified", .{obj.path}); + } else { + const lib_stub = LibStub.loadFromFile(self.base.allocator, file) catch break :blk; + _ = try self.addTbd(obj, lib_stub, true); + } + return; } - { - const off = try self.string_intern.insert(gpa, "_objc_msgSend"); - const gop = try self.getOrCreateGlobal(off); - self.objc_msg_send_index = gop.index; - } + self.base.fatal("unknown filetype for positional argument: '{s}'", .{obj.path}); } -fn parsePositional(self: *MachO, obj: LinkObject) !void { - log.debug("parsing positional {}", .{obj}); +fn parseFatFile(self: *MachO, obj: LinkObject, file: std.fs.File) !?fat.Arch { + const fat_h = fat.readFatHeader(file) catch return null; + if (fat_h.magic != macho.FAT_MAGIC and fat_h.magic != macho.FAT_MAGIC_64) return null; + var fat_archs_buffer: [2]fat.Arch = undefined; + const fat_archs = try fat.parseArchs(file, fat_h, &fat_archs_buffer); + const fat_arch = if (self.options.cpu_arch) |cpu_arch| arch: { + for (fat_archs) |arch| { + if (arch.tag == cpu_arch) break :arch arch; + } + self.base.fatal("{s}: missing arch in universal file: expected {s}", .{ obj.path, @tagName(cpu_arch) }); + return error.MissingArch; + } else { + const err = try self.base.addErrorWithNotes(1 + fat_archs.len); + try err.addMsg("{s}: ignoring universal file as no architecture specified", .{obj.path}); + for (fat_archs) |arch| { + try err.addNote("universal file built for {s}", .{@tagName(arch.tag)}); + } + return error.NoArchSpecified; + }; + return fat_arch; +} - if (try self.parseObject(obj)) return; - if (try self.parseArchive(obj)) return; - if (try self.parseDylib(obj, true)) |_| return; - if (try self.parseTbd(obj, true)) |_| return; +pub fn readMachHeader(file: std.fs.File, offset: usize) !macho.mach_header_64 { + var buffer: [@sizeOf(macho.mach_header_64)]u8 = undefined; + const nread = try file.preadAll(&buffer, offset); + if (nread != buffer.len) return error.InputOutput; + const hdr = @as(*align(1) const macho.mach_header_64, @ptrCast(&buffer)).*; + return hdr; +} - self.base.fatal("unknown filetype for positional argument: '{s}'", .{obj.path}); +pub fn readArMagic(file: std.fs.File, offset: usize, buffer: *[Archive.SARMAG]u8) ![]const u8 { + const nread = try file.preadAll(buffer, offset); + if (nread != buffer.len) return error.InputOutput; + return buffer[0..Archive.SARMAG]; } -fn parseObject(self: *MachO, obj: LinkObject) !bool { +fn addObject(self: *MachO, obj: LinkObject, handle: File.HandleIndex, offset: u64) !void { const tracy = trace(@src()); defer tracy.end(); const gpa = self.base.allocator; - const file = try std.fs.cwd().openFile(obj.path, .{}); - const fh = try self.addFileHandle(file); - - const header = file.reader().readStruct(macho.mach_header_64) catch return false; - try file.seekTo(0); - - if (header.filetype != macho.MH_OBJECT) return false; - const mtime: u64 = mtime: { + const file = self.getFileHandle(handle); const stat = file.stat() catch break :mtime 0; break :mtime @as(u64, @intCast(@divFloor(stat.mtime, 1_000_000_000))); }; const index = @as(File.Index, @intCast(try self.files.addOne(gpa))); self.files.set(index, .{ .object = .{ + .offset = offset, .path = try gpa.dupe(u8, obj.path), - .file_handle = fh, + .file_handle = handle, .index = index, .mtime = mtime, } }); - const object = &self.files.items(.data)[index].object; - try object.parse(self); try self.objects.append(gpa, index); - self.validateCpuArch(index, header.cputype); - self.validatePlatform(index); +} - return true; +fn parseObjects(self: *MachO) !void { + var wg: WaitGroup = .{}; + + { + wg.reset(); + defer wg.wait(); + + for (self.objects.items) |index| { + self.base.thread_pool.spawnWg(&wg, parseObjectWorker, .{ self, index }); + } + for (self.dylibs.items) |index| { + self.base.thread_pool.spawnWg(&wg, parseDylibWorker, .{ self, index }); + } + } + + if (self.has_errors.swap(false, .seq_cst)) return error.FlushFailed; } -fn parseArchive(self: *MachO, obj: LinkObject) !bool { +fn parseObjectWorker(self: *MachO, index: File.Index) void { + const object = self.getFile(index).?.object; + object.parse(self) catch |err| { + switch (err) { + error.ParseFailed => {}, // reported + else => |e| self.base.fatal("{}: unxexpected error occurred while parsing input file: {s}", .{ + object.fmtPath(), + @errorName(e), + }), + } + _ = self.has_errors.swap(true, .seq_cst); + }; +} + +fn addArchive(self: *MachO, obj: LinkObject, handle: File.HandleIndex, fat_arch: ?fat.Arch) !void { const tracy = trace(@src()); defer tracy.end(); const gpa = self.base.allocator; - const file = try std.fs.cwd().openFile(obj.path, .{}); - const fh = try self.addFileHandle(file); - - const fat_arch: ?fat.Arch = if (fat.isFatLibrary(file)) blk: { - break :blk self.parseFatLibrary(obj.path, file) catch |err| switch (err) { - error.NoArchSpecified, error.MissingArch => return false, - else => |e| return e, - }; - } else null; - const offset = if (fat_arch) |ar| ar.offset else 0; - try file.seekTo(offset); - - const magic = file.reader().readBytesNoEof(Archive.SARMAG) catch return false; - if (!mem.eql(u8, &magic, Archive.ARMAG)) return false; - try file.seekTo(0); var archive = Archive{}; defer archive.deinit(gpa); - try archive.parse(self, obj.path, fh, fat_arch); + try archive.parse(self, obj.path, handle, fat_arch); - var has_parse_error = false; for (archive.objects.items) |extracted| { const index = @as(File.Index, @intCast(try self.files.addOne(gpa))); self.files.set(index, .{ .object = extracted }); @@ -739,44 +719,8 @@ fn parseArchive(self: *MachO, obj: LinkObject) !bool { object.index = index; object.alive = obj.must_link or obj.needed or self.options.all_load; object.hidden = obj.hidden; - object.parse(self) catch |err| switch (err) { - error.ParseFailed => { - has_parse_error = true; - // TODO see below - // continue; - }, - else => |e| return e, - }; try self.objects.append(gpa, index); - // TODO this should come before reporting any parse errors - self.validateCpuArch(index, object.header.?.cputype); - self.validatePlatform(index); - - // Finally, we do a post-parse check for -ObjC to see if we need to force load this member - // anyhow. - object.alive = object.alive or (self.options.force_load_objc and object.hasObjc()); - } - if (has_parse_error) return error.ParseFailed; - - return true; -} - -fn parseFatLibrary(self: *MachO, path: []const u8, file: fs.File) !fat.Arch { - var buffer: [2]fat.Arch = undefined; - const fat_archs = try fat.parseArchs(file, &buffer); - const cpu_arch = self.options.cpu_arch orelse { - const err = try self.base.addErrorWithNotes(1 + fat_archs.len); - try err.addMsg("{s}: ignoring universal file as no architecture specified", .{path}); - for (fat_archs) |arch| { - try err.addNote("universal file built for {s}", .{@tagName(arch.tag)}); - } - return error.NoArchSpecified; - }; - for (fat_archs) |arch| { - if (arch.tag == cpu_arch) return arch; } - self.base.fatal("{s}: missing arch in universal file: expected {s}", .{ path, @tagName(cpu_arch) }); - return error.MissingArch; } const DylibOpts = struct { @@ -787,88 +731,65 @@ const DylibOpts = struct { reexport: bool = false, }; -fn parseDylib(self: *MachO, obj: LinkObject, explicit: bool) anyerror!?File.Index { +fn addDylib(self: *MachO, obj: LinkObject, handle: File.HandleIndex, offset: u64, explicit: bool) anyerror!File.Index { const tracy = trace(@src()); defer tracy.end(); const gpa = self.base.allocator; - - if (self.options.cpu_arch == null) { - self.base.fatal("{s}: ignoring library as no architecture specified", .{obj.path}); - return null; - } - - const file = try std.fs.cwd().openFile(obj.path, .{}); - defer file.close(); - - const fat_arch = if (fat.isFatLibrary(file)) blk: { - break :blk self.parseFatLibrary(obj.path, file) catch |err| switch (err) { - error.NoArchSpecified, error.MissingArch => return null, - else => |e| return e, - }; - } else null; - const offset = if (fat_arch) |ar| ar.offset else 0; - try file.seekTo(offset); - - const header = file.reader().readStruct(macho.mach_header_64) catch return null; - try file.seekTo(offset); - - if (header.filetype != macho.MH_DYLIB) return null; - const index = @as(File.Index, @intCast(try self.files.addOne(gpa))); self.files.set(index, .{ .dylib = .{ + .offset = offset, + .file_handle = handle, .path = obj.path, .index = index, .needed = obj.needed, .weak = obj.weak, .reexport = obj.reexport, .explicit = explicit, + .umbrella = index, } }); - const dylib = &self.files.items(.data)[index].dylib; - try dylib.parse(self, file, fat_arch); - try self.dylibs.append(gpa, index); - self.validateCpuArch(index, header.cputype); - self.validatePlatform(index); return index; } -fn parseTbd(self: *MachO, obj: LinkObject, explicit: bool) anyerror!?File.Index { +fn addTbd(self: *MachO, obj: LinkObject, lib_stub: LibStub, explicit: bool) anyerror!File.Index { const tracy = trace(@src()); defer tracy.end(); const gpa = self.base.allocator; - const file = try std.fs.cwd().openFile(obj.path, .{}); - defer file.close(); - - var lib_stub = LibStub.loadFromFile(gpa, file) catch return null; // TODO actually handle different errors - defer lib_stub.deinit(); - - if (lib_stub.inner.len == 0) return null; - - const cpu_arch = self.options.cpu_arch orelse { - self.base.fatal("{s}: ignoring library as no architecture specified", .{obj.path}); - return null; - }; const index = @as(File.Index, @intCast(try self.files.addOne(gpa))); self.files.set(index, .{ .dylib = .{ + .offset = 0, + .lib_stub = lib_stub, .path = obj.path, .index = index, .needed = obj.needed, .weak = obj.weak, .reexport = obj.reexport, .explicit = explicit, + .umbrella = index, } }); - const dylib = &self.files.items(.data)[index].dylib; - try dylib.parseTbd(cpu_arch, self.options.platform, lib_stub, self); try self.dylibs.append(gpa, index); - self.validatePlatform(index); return index; } +fn parseDylibWorker(self: *MachO, index: File.Index) void { + const dylib = self.getFile(index).?.dylib; + dylib.parse(self) catch |err| { + switch (err) { + error.ParseFailed => {}, // reported already + else => |e| self.base.fatal("{s}: unexpected error occurred while parsing input file: {s}", .{ + dylib.path, + @errorName(e), + }), + } + _ = self.has_errors.swap(true, .seq_cst); + }; +} + /// According to ld64's manual, public (i.e., system) dylibs/frameworks are hoisted into the final /// image unless overriden by -no_implicit_dylibs. fn isHoisted(self: *MachO, install_name: []const u8) bool { @@ -971,10 +892,22 @@ fn parseDependentDylibs( .tag = .obj, .weak = is_weak, }; + const file = try std.fs.cwd().openFile(link_obj.path, .{}); + const fh = try self.addFileHandle(file); + + const fat_arch = try self.parseFatFile(link_obj, file); + const offset = if (fat_arch) |fa| fa.offset else 0; + const file_index = file_index: { - if (try self.parseDylib(link_obj, false)) |file| break :file_index file; - if (try self.parseTbd(link_obj, false)) |file| break :file_index file; - break :file_index @as(File.Index, 0); + if (readMachHeader(file, offset) catch null) |h| blk: { + if (h.magic != macho.MH_MAGIC_64) break :blk; + switch (h.filetype) { + macho.MH_DYLIB => break :file_index try self.addDylib(link_obj, fh, offset, false), + else => break :file_index @as(File.Index, 0), + } + } + const lib_stub = LibStub.loadFromFile(gpa, file) catch break :file_index @as(File.Index, 0); + break :file_index try self.addTbd(link_obj, lib_stub, false); }; dependents.appendAssumeCapacity(file_index); } @@ -983,13 +916,13 @@ fn parseDependentDylibs( for (dylib.dependents.items, dependents.items) |id, file_index| { if (self.getFile(file_index)) |file| { const dep_dylib = file.dylib; + try dep_dylib.parse(self); // TODO in parallel dep_dylib.hoisted = self.isHoisted(id.name); - if (self.getFile(dep_dylib.umbrella) == null) { - dep_dylib.umbrella = dylib.umbrella; - } + dep_dylib.umbrella = dylib.umbrella; if (!dep_dylib.hoisted) { const umbrella = dep_dylib.getUmbrella(self); for (dep_dylib.exports.items(.name), dep_dylib.exports.items(.flags)) |off, flags| { + // TODO rethink this entire algorithm try umbrella.addExport(gpa, dep_dylib.getString(off), flags); } try umbrella.rpaths.ensureUnusedCapacity(gpa, dep_dylib.rpaths.keys().len); @@ -1010,19 +943,21 @@ fn parseDependentDylibs( /// 5. Remove references to dead objects/shared objects /// 6. Re-run symbol resolution on pruned objects and shared objects sets. pub fn resolveSymbols(self: *MachO) !void { - const tracy = trace(@src()); - defer tracy.end(); + { + const tracy = trace(@src()); + defer tracy.end(); - // Resolve symbols on the set of all objects and shared objects (even if some are unneeded). - for (self.objects.items) |index| self.getFile(index).?.resolveSymbols(self); - for (self.dylibs.items) |index| self.getFile(index).?.resolveSymbols(self); + // Resolve symbols on the set of all objects and shared objects (even if some are unneeded). + for (self.objects.items) |index| try self.getFile(index).?.resolveSymbols(self); + for (self.dylibs.items) |index| try self.getFile(index).?.resolveSymbols(self); + if (self.getInternalObject()) |obj| try obj.resolveSymbols(self); + } // Mark live objects. self.markLive(); // Reset state of all globals after marking live objects. - for (self.objects.items) |index| self.getFile(index).?.resetGlobals(self); - for (self.dylibs.items) |index| self.getFile(index).?.resetGlobals(self); + self.resolver.reset(); // Prune dead objects. var i: usize = 0; @@ -1035,51 +970,39 @@ pub fn resolveSymbols(self: *MachO) !void { } else i += 1; } - // Re-resolve the symbols. - for (self.objects.items) |index| self.getFile(index).?.resolveSymbols(self); - for (self.dylibs.items) |index| self.getFile(index).?.resolveSymbols(self); + { + const tracy = trace(@src()); + defer tracy.end(); + + // Re-resolve the symbols. + for (self.objects.items) |index| try self.getFile(index).?.resolveSymbols(self); + for (self.dylibs.items) |index| try self.getFile(index).?.resolveSymbols(self); + if (self.getInternalObject()) |obj| try obj.resolveSymbols(self); + } + + { + const tracy = trace(@src()); + defer tracy.end(); + + // Merge symbol visibility + for (self.objects.items) |index| self.getFile(index).?.object.mergeSymbolVisibility(self); + } } fn markLive(self: *MachO) void { const tracy = trace(@src()); defer tracy.end(); - for (self.undefined_symbols.items) |index| { - if (self.getSymbol(index).getFile(self)) |file| { - if (file == .object) file.object.alive = true; - } - } - if (self.entry_index) |index| { - const sym = self.getSymbol(index); - if (sym.getFile(self)) |file| { - if (file == .object) file.object.alive = true; - } - } for (self.objects.items) |index| { const object = self.getFile(index).?.object; if (object.alive) object.markLive(self); } -} - -fn parseDebugInfo(self: *MachO) !void { - for (self.objects.items) |index| { - try self.getFile(index).?.object.parseDebugInfo(self); - } + if (self.getInternalObject()) |obj| obj.markLive(self); } fn deadStripDylibs(self: *MachO) void { - for (&[_]?Symbol.Index{ - self.entry_index, - self.dyld_stub_binder_index, - self.objc_msg_send_index, - }) |index| { - if (index) |idx| { - const sym = self.getSymbol(idx); - if (sym.getFile(self)) |file| { - if (file == .dylib) file.dylib.referenced = true; - } - } - } + const tracy = trace(@src()); + defer tracy.end(); for (self.dylibs.items) |index| { self.getFile(index).?.dylib.markReferenced(self); @@ -1096,152 +1019,84 @@ fn deadStripDylibs(self: *MachO) void { } } -fn convertTentativeDefinitions(self: *MachO) !void { - for (self.objects.items) |index| { - try self.getFile(index).?.object.convertTentativeDefinitions(self); - } -} - -fn markImportsAndExports(self: *MachO) void { - for (self.objects.items) |index| { - for (self.getFile(index).?.getSymbols()) |sym_index| { - const sym = self.getSymbol(sym_index); - const file = sym.getFile(self) orelse continue; - if (sym.visibility != .global) continue; - if (file == .dylib and !sym.flags.abs) { - sym.flags.import = true; - continue; - } - if (file.getIndex() == index) { - sym.flags.@"export" = true; - } - } - } +fn convertTentativeDefsAndResolveSpecialSymbols(self: *MachO) !void { + var wg: WaitGroup = .{}; - for (self.undefined_symbols.items) |index| { - const sym = self.getSymbol(index); - if (sym.getFile(self)) |file| { - if (sym.visibility != .global) continue; - if (file == .dylib and !sym.flags.abs) sym.flags.import = true; + { + wg.reset(); + defer wg.wait(); + for (self.objects.items) |index| { + self.base.thread_pool.spawnWg(&wg, convertTentativeDefinitionsWorker, .{ self, index }); } - } - - for (&[_]?Symbol.Index{ - self.entry_index, - self.dyld_stub_binder_index, - self.objc_msg_send_index, - }) |index| { - if (index) |idx| { - const sym = self.getSymbol(idx); - if (sym.getFile(self)) |file| { - if (file == .dylib) sym.flags.import = true; - } + if (self.getInternalObject()) |obj| { + self.base.thread_pool.spawnWg(&wg, resolveSpecialSymbolsWorker, .{ self, obj }); } } -} -fn initOutputSections(self: *MachO) !void { - for (self.objects.items) |index| { - const object = self.getFile(index).?.object; - for (object.atoms.items) |atom_index| { - const atom = self.getAtom(atom_index) orelse continue; - if (!atom.flags.alive) continue; - atom.out_n_sect = try Atom.initOutputSection(atom.getInputSection(self), self); - } - } - if (self.getInternalObject()) |object| { - for (object.atoms.items) |atom_index| { - const atom = self.getAtom(atom_index) orelse continue; - if (!atom.flags.alive) continue; - atom.out_n_sect = try Atom.initOutputSection(atom.getInputSection(self), self); - } - } - self.data_sect_index = self.getSectionByName("__DATA", "__data") orelse - try self.addSection("__DATA", "__data", .{}); + if (self.has_errors.swap(false, .seq_cst)) return error.FlushFailed; } -fn resolveSyntheticSymbols(self: *MachO) !void { - const internal = self.getInternalObject() orelse return; - - if (!self.options.dylib) { - self.mh_execute_header_index = try internal.addSymbol("__mh_execute_header", self); - const sym = self.getSymbol(self.mh_execute_header_index.?); - sym.flags.@"export" = true; - sym.flags.dyn_ref = true; - sym.visibility = .global; - } else if (self.options.dylib) { - self.mh_dylib_header_index = try internal.addSymbol("__mh_dylib_header", self); - } - - self.dso_handle_index = try internal.addSymbol("___dso_handle", self); - self.dyld_private_index = try internal.addSymbol("dyld_private", self); +fn convertTentativeDefinitionsWorker(self: *MachO, index: File.Index) void { + const tracy = trace(@src()); + defer tracy.end(); - { - const gpa = self.base.allocator; - var boundary_symbols = std.AutoHashMap(Symbol.Index, void).init(gpa); - defer boundary_symbols.deinit(); + const object = self.getFile(index).?.object; + object.convertTentativeDefinitions(self) catch |err| { + self.base.fatal("{s}: unexpected error occurred while converting tentative symbols into defined symbols: {s}", .{ + object.fmtPath(), + @errorName(err), + }); + _ = self.has_errors.swap(true, .seq_cst); + }; +} - for (self.objects.items) |index| { - const object = self.getFile(index).?.object; - for (object.symbols.items, 0..) |sym_index, i| { - const nlist = object.symtab.items(.nlist)[i]; - const name = self.getSymbol(sym_index).getName(self); - if (!nlist.undf() or !nlist.ext()) continue; - if (mem.startsWith(u8, name, "segment$start$") or - mem.startsWith(u8, name, "segment$stop$") or - mem.startsWith(u8, name, "section$start$") or - mem.startsWith(u8, name, "section$stop$")) - { - _ = try boundary_symbols.put(sym_index, {}); - } - } - } +fn resolveSpecialSymbolsWorker(self: *MachO, obj: *InternalObject) void { + const tracy = trace(@src()); + defer tracy.end(); - try self.boundary_symbols.ensureTotalCapacityPrecise(gpa, boundary_symbols.count()); + obj.resolveBoundarySymbols(self) catch |err| { + self.base.fatal("unexpected error occurred while resolving boundary symbols: {s}", .{ + @errorName(err), + }); + _ = self.has_errors.swap(true, .seq_cst); + return; + }; + obj.resolveObjcMsgSendSymbols(self) catch |err| { + self.base.fatal("unexpected error occurred while resolving ObjC msgsend stubs: {s}", .{ + @errorName(err), + }); + _ = self.has_errors.swap(true, .seq_cst); + }; +} - var it = boundary_symbols.iterator(); - while (it.next()) |entry| { - _ = try internal.addSymbol(self.getSymbol(entry.key_ptr.*).getName(self), self); - self.boundary_symbols.appendAssumeCapacity(entry.key_ptr.*); - } +fn markImportsAndExports(self: *MachO) void { + const tracy = trace(@src()); + defer tracy.end(); + for (self.objects.items) |index| { + self.getFile(index).?.markImportsAndExports(self); + } + if (self.getInternalObject()) |obj| { + obj.asFile().markImportsAndExports(self); } } -fn createObjcSections(self: *MachO) !void { - const gpa = self.base.allocator; - var objc_msgsend_syms = std.AutoArrayHashMap(Symbol.Index, void).init(gpa); - defer objc_msgsend_syms.deinit(); - +fn initOutputSections(self: *MachO) !void { + const tracy = trace(@src()); + defer tracy.end(); for (self.objects.items) |index| { - const object = self.getFile(index).?.object; - - for (object.symbols.items, 0..) |sym_index, i| { - const nlist_idx = @as(Symbol.Index, @intCast(i)); - const nlist = object.symtab.items(.nlist)[nlist_idx]; - if (!nlist.ext()) continue; - if (!nlist.undf()) continue; - - const sym = self.getSymbol(sym_index); - if (sym.getFile(self) != null) continue; - if (mem.startsWith(u8, sym.getName(self), "_objc_msgSend$")) { - _ = try objc_msgsend_syms.put(sym_index, {}); - } - } + try self.getFile(index).?.initOutputSections(self); } - - for (objc_msgsend_syms.keys()) |sym_index| { - const internal = self.getInternalObject().?; - const sym = self.getSymbol(sym_index); - _ = try internal.addSymbol(sym.getName(self), self); - sym.visibility = .hidden; - const name = eatPrefix(sym.getName(self), "_objc_msgSend$").?; - const selrefs_index = try internal.addObjcMsgsendSections(name, self); - try sym.addExtra(.{ .objc_selrefs = selrefs_index }, self); - sym.flags.objc_stubs = true; + if (self.getInternalObject()) |object| { + try object.asFile().initOutputSections(self); } + self.data_sect_index = self.getSectionByName("__DATA", "__data") orelse + try self.addSection("__DATA", "__data", .{}); } pub fn dedupLiterals(self: *MachO) !void { + const tracy = trace(@src()); + defer tracy.end(); + const gpa = self.base.allocator; var lp: LiteralPool = .{}; defer lp.deinit(gpa); @@ -1253,44 +1108,24 @@ pub fn dedupLiterals(self: *MachO) !void { try object.resolveLiterals(&lp, self); } - for (self.objects.items) |index| { - self.getFile(index).?.object.dedupLiterals(lp, self); - } - if (self.getInternalObject()) |object| { - object.dedupLiterals(lp, self); + var wg: WaitGroup = .{}; + { + wg.reset(); + defer wg.wait(); + for (self.objects.items) |index| { + self.base.thread_pool.spawnWg(&wg, File.dedupLiterals, .{ self.getFile(index).?, lp, self }); + } + if (self.getInternalObject()) |object| { + self.base.thread_pool.spawnWg(&wg, File.dedupLiterals, .{ object.asFile(), lp, self }); + } } + + if (self.has_errors.swap(false, .seq_cst)) return error.FlushFailed; } -fn claimUnresolved(self: *MachO) error{OutOfMemory}!void { +fn claimUnresolved(self: *MachO) void { for (self.objects.items) |index| { - const object = self.getFile(index).?.object; - - for (object.symbols.items, 0..) |sym_index, i| { - const nlist_idx = @as(Symbol.Index, @intCast(i)); - const nlist = object.symtab.items(.nlist)[nlist_idx]; - if (!nlist.ext()) continue; - if (!nlist.undf()) continue; - - const sym = self.getSymbol(sym_index); - if (sym.getFile(self) != null) continue; - - const is_import = switch (self.options.undefined_treatment) { - .@"error" => false, - .warn, .suppress => nlist.weakRef(), - .dynamic_lookup => true, - }; - if (is_import) { - sym.value = 0; - sym.atom = 0; - sym.nlist_idx = 0; - sym.file = self.internal_object_index.?; - sym.flags.weak = false; - sym.flags.weak_ref = nlist.weakRef(); - sym.flags.import = is_import; - sym.visibility = .global; - try self.getInternalObject().?.symbols.append(self.base.allocator, sym_index); - } - } + self.getFile(index).?.object.claimUnresolved(self); } } @@ -1298,49 +1133,42 @@ fn scanRelocs(self: *MachO) !void { const tracy = trace(@src()); defer tracy.end(); - for (self.objects.items) |index| { - try self.getFile(index).?.object.scanRelocs(self); + var wg: WaitGroup = .{}; + + { + wg.reset(); + defer wg.wait(); + for (self.objects.items) |index| { + self.base.thread_pool.spawnWg(&wg, scanRelocsWorker, .{ self, self.getFile(index).? }); + } + if (self.getInternalObject()) |obj| { + self.base.thread_pool.spawnWg(&wg, scanRelocsWorker, .{ self, obj.asFile() }); + } } + if (self.has_errors.swap(false, .seq_cst)) return error.FlushFailed; + try self.reportUndefs(); - if (self.entry_index) |index| { - const sym = self.getSymbol(index); - if (sym.getFile(self) != null) { - if (sym.flags.import) sym.flags.stubs = true; - } + for (self.objects.items) |index| { + try self.getFile(index).?.createSymbolIndirection(self); } - - if (self.dyld_stub_binder_index) |index| { - const sym = self.getSymbol(index); - if (sym.getFile(self) != null) sym.flags.got = true; + for (self.dylibs.items) |index| { + try self.getFile(index).?.createSymbolIndirection(self); } - - if (self.objc_msg_send_index) |index| { - const sym = self.getSymbol(index); - if (sym.getFile(self) != null) - sym.flags.got = true; // TODO is it always needed, or only if we are synthesising fast stubs? + if (self.getInternalObject()) |obj| { + try obj.asFile().createSymbolIndirection(self); } +} - for (self.symbols.items, 0..) |*symbol, i| { - const index = @as(Symbol.Index, @intCast(i)); - if (symbol.flags.got) { - log.debug("'{s}' needs GOT", .{symbol.getName(self)}); - try self.got.addSymbol(index, self); - } - if (symbol.flags.stubs) { - log.debug("'{s}' needs STUBS", .{symbol.getName(self)}); - try self.stubs.addSymbol(index, self); - } - if (symbol.flags.tlv_ptr) { - log.debug("'{s}' needs TLV pointer", .{symbol.getName(self)}); - try self.tlv_ptr.addSymbol(index, self); - } - if (symbol.flags.objc_stubs) { - log.debug("'{s}' needs OBJC STUBS", .{symbol.getName(self)}); - try self.objc_stubs.addSymbol(index, self); - } - } +fn scanRelocsWorker(self: *MachO, file: File) void { + file.scanRelocs(self) catch |err| { + self.base.fatal("{}: failed to scan relocations: {s}", .{ + file.fmtPath(), + @errorName(err), + }); + _ = self.has_errors.swap(true, .seq_cst); + }; } fn reportUndefs(self: *MachO) !void { @@ -1362,7 +1190,7 @@ fn reportUndefs(self: *MachO) !void { var has_undefs = false; var it = self.undefs.iterator(); while (it.next()) |entry| { - const undef_sym = self.getSymbol(entry.key_ptr.*); + const undef_sym = entry.key_ptr.getSymbol(self).?; const notes = entry.value_ptr.*; const nnotes = @min(notes.items.len, max_notes) + @intFromBool(notes.items.len > max_notes); @@ -1372,8 +1200,9 @@ fn reportUndefs(self: *MachO) !void { var inote: usize = 0; while (inote < @min(notes.items.len, max_notes)) : (inote += 1) { - const atom = self.getAtom(notes.items[inote]).?; - const file = atom.getFile(self); + const note = notes.items[inote]; + const file = self.getFile(note.file).?; + const atom = note.getAtom(self).?; try err.addNote("referenced by {}:{s}", .{ file.fmtPath(), atom.getName(self) }); } @@ -1382,46 +1211,6 @@ fn reportUndefs(self: *MachO) !void { try err.addNote("referenced {d} more times", .{remaining}); } } - - for (self.undefined_symbols.items) |index| { - const sym = self.getSymbol(index); - if (sym.getFile(self) != null) continue; // If undefined in an object file, will be reported above - has_undefs = true; - const err = try addFn(&self.base, 1); - try err.addMsg("undefined symbol: {s}", .{sym.getName(self)}); - try err.addNote("-u command line option", .{}); - } - - if (self.entry_index) |index| { - const sym = self.getSymbol(index); - if (sym.getFile(self) == null) { - has_undefs = true; - const err = try addFn(&self.base, 1); - try err.addMsg("undefined symbol: {s}", .{sym.getName(self)}); - try err.addNote("implicit entry/start for main executable", .{}); - } - } - - if (self.dyld_stub_binder_index) |index| { - const sym = self.getSymbol(index); - if (sym.getFile(self) == null and self.stubs_sect_index != null) { - has_undefs = true; - const err = try addFn(&self.base, 1); - try err.addMsg("undefined symbol: {s}", .{sym.getName(self)}); - try err.addNote("implicit -u command line option", .{}); - } - } - - if (self.objc_msg_send_index) |index| { - const sym = self.getSymbol(index); - if (sym.getFile(self) == null and self.objc_stubs_sect_index != null) { - has_undefs = true; - const err = try addFn(&self.base, 1); - try err.addMsg("undefined symbol: {s}", .{sym.getName(self)}); - try err.addNote("implicit -u command line option", .{}); - } - } - if (has_undefs) return error.UndefinedSymbols; } @@ -1482,46 +1271,50 @@ fn initSyntheticSections(self: *MachO) !void { self.eh_frame_sect_index = try self.addSection("__TEXT", "__eh_frame", .{}); } - for (self.boundary_symbols.items) |sym_index| { + if (self.getInternalObject()) |obj| { const gpa = self.base.allocator; - const sym = self.getSymbol(sym_index); - const name = sym.getName(self); - - if (eatPrefix(name, "segment$start$")) |segname| { - if (self.getSegmentByName(segname) == null) { // TODO check segname is valid - const prot = getSegmentProt(segname); - _ = try self.segments.append(gpa, .{ - .cmdsize = @sizeOf(macho.segment_command_64), - .segname = makeStaticString(segname), - .initprot = prot, - .maxprot = prot, - }); - } - } else if (eatPrefix(name, "segment$stop$")) |segname| { - if (self.getSegmentByName(segname) == null) { // TODO check segname is valid - const prot = getSegmentProt(segname); - _ = try self.segments.append(gpa, .{ - .cmdsize = @sizeOf(macho.segment_command_64), - .segname = makeStaticString(segname), - .initprot = prot, - .maxprot = prot, - }); - } - } else if (eatPrefix(name, "section$start$")) |actual_name| { - const sep = mem.indexOfScalar(u8, actual_name, '$').?; // TODO error rather than a panic - const segname = actual_name[0..sep]; // TODO check segname is valid - const sectname = actual_name[sep + 1 ..]; // TODO check sectname is valid - if (self.getSectionByName(segname, sectname) == null) { - _ = try self.addSection(segname, sectname, .{}); - } - } else if (eatPrefix(name, "section$stop$")) |actual_name| { - const sep = mem.indexOfScalar(u8, actual_name, '$').?; // TODO error rather than a panic - const segname = actual_name[0..sep]; // TODO check segname is valid - const sectname = actual_name[sep + 1 ..]; // TODO check sectname is valid - if (self.getSectionByName(segname, sectname) == null) { - _ = try self.addSection(segname, sectname, .{}); - } - } else unreachable; + + for (obj.boundary_symbols.items) |sym_index| { + const ref = obj.getSymbolRef(sym_index, self); + const sym = ref.getSymbol(self).?; + const name = sym.getName(self); + + if (eatPrefix(name, "segment$start$")) |segname| { + if (self.getSegmentByName(segname) == null) { // TODO check segname is valid + const prot = getSegmentProt(segname); + _ = try self.segments.append(gpa, .{ + .cmdsize = @sizeOf(macho.segment_command_64), + .segname = makeStaticString(segname), + .initprot = prot, + .maxprot = prot, + }); + } + } else if (eatPrefix(name, "segment$stop$")) |segname| { + if (self.getSegmentByName(segname) == null) { // TODO check segname is valid + const prot = getSegmentProt(segname); + _ = try self.segments.append(gpa, .{ + .cmdsize = @sizeOf(macho.segment_command_64), + .segname = makeStaticString(segname), + .initprot = prot, + .maxprot = prot, + }); + } + } else if (eatPrefix(name, "section$start$")) |actual_name| { + const sep = mem.indexOfScalar(u8, actual_name, '$').?; // TODO error rather than a panic + const segname = actual_name[0..sep]; // TODO check segname is valid + const sectname = actual_name[sep + 1 ..]; // TODO check sectname is valid + if (self.getSectionByName(segname, sectname) == null) { + _ = try self.addSection(segname, sectname, .{}); + } + } else if (eatPrefix(name, "section$stop$")) |actual_name| { + const sep = mem.indexOfScalar(u8, actual_name, '$').?; // TODO error rather than a panic + const segname = actual_name[0..sep]; // TODO check segname is valid + const sectname = actual_name[sep + 1 ..]; // TODO check sectname is valid + if (self.getSectionByName(segname, sectname) == null) { + _ = try self.addSection(segname, sectname, .{}); + } + } else unreachable; + } } } @@ -1627,16 +1420,17 @@ pub fn sortSections(self: *MachO) !void { } for (self.objects.items) |index| { - for (self.getFile(index).?.object.atoms.items) |atom_index| { - const atom = self.getAtom(atom_index) orelse continue; - if (!atom.flags.alive) continue; + const file = self.getFile(index).?; + for (file.getAtoms()) |atom_index| { + const atom = file.getAtom(atom_index) orelse continue; + if (!atom.alive.load(.seq_cst)) continue; atom.out_n_sect = backlinks[atom.out_n_sect]; } } if (self.getInternalObject()) |object| { - for (object.atoms.items) |atom_index| { - const atom = self.getAtom(atom_index) orelse continue; - if (!atom.flags.alive) continue; + for (object.getAtoms()) |atom_index| { + const atom = object.getAtom(atom_index) orelse continue; + if (!atom.alive.load(.seq_cst)) continue; atom.out_n_sect = backlinks[atom.out_n_sect]; } } @@ -1663,34 +1457,20 @@ pub fn addAtomsToSections(self: *MachO) !void { defer tracy.end(); for (self.objects.items) |index| { - const object = self.getFile(index).?.object; - for (object.atoms.items) |atom_index| { - const atom = self.getAtom(atom_index) orelse continue; - if (!atom.flags.alive) continue; + const file = self.getFile(index).?; + for (file.getAtoms()) |atom_index| { + const atom = file.getAtom(atom_index) orelse continue; + if (!atom.alive.load(.seq_cst)) continue; const atoms = &self.sections.items(.atoms)[atom.out_n_sect]; - try atoms.append(self.base.allocator, atom_index); - } - for (object.symbols.items) |sym_index| { - const sym = self.getSymbol(sym_index); - const atom = sym.getAtom(self) orelse continue; - if (!atom.flags.alive) continue; - if (sym.getFile(self).?.getIndex() != index) continue; - sym.out_n_sect = atom.out_n_sect; + try atoms.append(self.base.allocator, .{ .index = atom.atom_index, .file = index }); } } if (self.getInternalObject()) |object| { - for (object.atoms.items) |atom_index| { - const atom = self.getAtom(atom_index) orelse continue; - if (!atom.flags.alive) continue; + for (object.getAtoms()) |atom_index| { + const atom = object.getAtom(atom_index) orelse continue; + if (!atom.alive.load(.seq_cst)) continue; const atoms = &self.sections.items(.atoms)[atom.out_n_sect]; - try atoms.append(self.base.allocator, atom_index); - } - for (object.symbols.items) |sym_index| { - const sym = self.getSymbol(sym_index); - const atom = sym.getAtom(self) orelse continue; - if (!atom.flags.alive) continue; - if (sym.getFile(self).?.getIndex() != object.index) continue; - sym.out_n_sect = atom.out_n_sect; + try atoms.append(self.base.allocator, .{ .index = atom.atom_index, .file = object.index }); } } } @@ -1724,32 +1504,41 @@ fn calcSectionSizes(self: *MachO) !void { header.@"align" = 3; } - const slice = self.sections.slice(); - for (slice.items(.header), slice.items(.atoms)) |*header, atoms| { - if (atoms.items.len == 0) continue; - if (self.requiresThunks() and header.isCode()) continue; - - for (atoms.items) |atom_index| { - const atom = self.getAtom(atom_index).?; - const atom_alignment = try math.powi(u32, 2, atom.alignment); - const offset = mem.alignForward(u64, header.size, atom_alignment); - const padding = offset - header.size; - atom.value = offset; - header.size += padding + atom.size; - header.@"align" = @max(header.@"align", atom.alignment); - } - } - - if (self.requiresThunks()) { + var wg: WaitGroup = .{}; + { + wg.reset(); + defer wg.wait(); + const slice = self.sections.slice(); for (slice.items(.header), slice.items(.atoms), 0..) |header, atoms, i| { - if (!header.isCode()) continue; if (atoms.items.len == 0) continue; + if (self.requiresThunks() and header.isCode()) continue; + self.base.thread_pool.spawnWg(&wg, calcSectionSizeWorker, .{ self, @as(u8, @intCast(i)) }); + } + + if (self.requiresThunks()) { + for (slice.items(.header), slice.items(.atoms), 0..) |header, atoms, i| { + if (!header.isCode()) continue; + if (atoms.items.len == 0) continue; + self.base.thread_pool.spawnWg(&wg, createThunksWorker, .{ self, @as(u8, @intCast(i)) }); + } + } - // Create jump/branch range extenders if needed. - try thunks.createThunks(@intCast(i), self); + // At this point, we can also calculate symtab and data-in-code linkedit section sizes + for (self.objects.items) |index| { + self.base.thread_pool.spawnWg(&wg, File.calcSymtabSize, .{ self.getFile(index).?, self }); + } + for (self.dylibs.items) |index| { + self.base.thread_pool.spawnWg(&wg, File.calcSymtabSize, .{ self.getFile(index).?, self }); + } + if (self.getInternalObject()) |obj| { + self.base.thread_pool.spawnWg(&wg, File.calcSymtabSize, .{ obj.asFile(), self }); } } + if (self.has_errors.swap(false, .seq_cst)) return error.FlushFailed; + + try self.calcSymtabSize(); + if (self.got_sect_index) |idx| { const header = &self.sections.items(.header)[idx]; header.size = self.got.size(); @@ -1795,6 +1584,54 @@ fn calcSectionSizes(self: *MachO) !void { } } +fn calcSectionSizeWorker(self: *MachO, sect_id: u8) void { + const tracy = trace(@src()); + defer tracy.end(); + const doWork = struct { + fn doWork( + macho_file: *MachO, + header: *macho.section_64, + atoms: []const Ref, + ) !void { + for (atoms) |ref| { + const atom = ref.getAtom(macho_file).?; + const p2align = atom.alignment; + const atom_alignment = try math.powi(u32, 2, p2align); + const offset = mem.alignForward(u64, header.size, atom_alignment); + const padding = offset - header.size; + atom.value = offset; + header.size += padding + atom.size; + header.@"align" = @max(header.@"align", p2align); + } + } + }.doWork; + const slice = self.sections.slice(); + const header = &slice.items(.header)[sect_id]; + const atoms = slice.items(.atoms)[sect_id].items; + doWork(self, header, atoms) catch |err| { + self.base.fatal("failed to calculate size of section '{s},{s}': {s}", .{ + header.segName(), + header.sectName(), + @errorName(err), + }); + _ = self.has_errors.swap(true, .seq_cst); + }; +} + +fn createThunksWorker(self: *MachO, sect_id: u8) void { + const tracy = trace(@src()); + defer tracy.end(); + thunks.createThunks(sect_id, self) catch |err| { + const header = self.sections.items(.header)[sect_id]; + self.base.fatal("failed to create thunks and calculate size of section '{s},{s}': {s}", .{ + header.segName(), + header.sectName(), + @errorName(err), + }); + _ = self.has_errors.swap(true, .seq_cst); + }; +} + fn initSegments(self: *MachO) !void { const gpa = self.base.allocator; const slice = self.sections.slice(); @@ -1964,319 +1801,355 @@ fn allocateSegments(self: *MachO) void { } fn allocateSyntheticSymbols(self: *MachO) void { - const text_seg = self.getTextSegment(); - - if (self.mh_execute_header_index) |index| { - const global = self.getSymbol(index); - global.value = text_seg.vmaddr; - } + if (self.getInternalObject()) |obj| { + obj.allocateSyntheticSymbols(self); - if (self.data_sect_index) |idx| { - const sect = self.sections.items(.header)[idx]; - for (&[_]?Symbol.Index{ - self.dso_handle_index, - self.mh_dylib_header_index, - self.dyld_private_index, - }) |maybe_index| { - if (maybe_index) |index| { - const global = self.getSymbol(index); - global.value = sect.addr; - global.out_n_sect = idx; - } - } - } + const text_seg = self.getTextSegment(); - for (self.boundary_symbols.items) |sym_index| { - const sym = self.getSymbol(sym_index); - const name = sym.getName(self); + for (obj.boundary_symbols.items) |sym_index| { + const ref = obj.getSymbolRef(sym_index, self); + const sym = ref.getSymbol(self).?; + const name = sym.getName(self); - sym.flags.@"export" = false; - sym.value = text_seg.vmaddr; + sym.value = text_seg.vmaddr; - if (mem.startsWith(u8, name, "segment$start$")) { - const segname = name["segment$start$".len..]; - if (self.getSegmentByName(segname)) |seg_id| { - const seg = self.segments.items[seg_id]; - sym.value = seg.vmaddr; - } - } else if (mem.startsWith(u8, name, "segment$stop$")) { - const segname = name["segment$stop$".len..]; - if (self.getSegmentByName(segname)) |seg_id| { - const seg = self.segments.items[seg_id]; - sym.value = seg.vmaddr + seg.vmsize; - } - } else if (mem.startsWith(u8, name, "section$start$")) { - const actual_name = name["section$start$".len..]; - const sep = mem.indexOfScalar(u8, actual_name, '$').?; // TODO error rather than a panic - const segname = actual_name[0..sep]; - const sectname = actual_name[sep + 1 ..]; - if (self.getSectionByName(segname, sectname)) |sect_id| { - const sect = self.sections.items(.header)[sect_id]; - sym.value = sect.addr; - sym.out_n_sect = sect_id; - } - } else if (mem.startsWith(u8, name, "section$stop$")) { - const actual_name = name["section$stop$".len..]; - const sep = mem.indexOfScalar(u8, actual_name, '$').?; // TODO error rather than a panic - const segname = actual_name[0..sep]; - const sectname = actual_name[sep + 1 ..]; - if (self.getSectionByName(segname, sectname)) |sect_id| { - const sect = self.sections.items(.header)[sect_id]; - sym.value = sect.addr + sect.size; - sym.out_n_sect = sect_id; - } - } else unreachable; + if (mem.startsWith(u8, name, "segment$start$")) { + const segname = name["segment$start$".len..]; + if (self.getSegmentByName(segname)) |seg_id| { + const seg = self.segments.items[seg_id]; + sym.value = seg.vmaddr; + } + } else if (mem.startsWith(u8, name, "segment$stop$")) { + const segname = name["segment$stop$".len..]; + if (self.getSegmentByName(segname)) |seg_id| { + const seg = self.segments.items[seg_id]; + sym.value = seg.vmaddr + seg.vmsize; + } + } else if (mem.startsWith(u8, name, "section$start$")) { + const actual_name = name["section$start$".len..]; + const sep = mem.indexOfScalar(u8, actual_name, '$').?; // TODO error rather than a panic + const segname = actual_name[0..sep]; + const sectname = actual_name[sep + 1 ..]; + if (self.getSectionByName(segname, sectname)) |sect_id| { + const sect = self.sections.items(.header)[sect_id]; + sym.value = sect.addr; + sym.out_n_sect = sect_id; + } + } else if (mem.startsWith(u8, name, "section$stop$")) { + const actual_name = name["section$stop$".len..]; + const sep = mem.indexOfScalar(u8, actual_name, '$').?; // TODO error rather than a panic + const segname = actual_name[0..sep]; + const sectname = actual_name[sep + 1 ..]; + if (self.getSectionByName(segname, sectname)) |sect_id| { + const sect = self.sections.items(.header)[sect_id]; + sym.value = sect.addr + sect.size; + sym.out_n_sect = sect_id; + } + } else unreachable; + } } if (self.objc_stubs.symbols.items.len > 0) { const addr = self.sections.items(.header)[self.objc_stubs_sect_index.?].addr; - for (self.objc_stubs.symbols.items, 0..) |sym_index, idx| { - const sym = self.getSymbol(sym_index); + for (self.objc_stubs.symbols.items, 0..) |ref, idx| { + const sym = ref.getSymbol(self).?; sym.value = addr + idx * ObjcStubsSection.entrySize(self.options.cpu_arch.?); sym.out_n_sect = self.objc_stubs_sect_index.?; } } } -fn initDyldInfoSections(self: *MachO) !void { - const tracy = trace(@src()); - defer tracy.end(); - - const gpa = self.base.allocator; +fn allocateLinkeditSegment(self: *MachO) error{Overflow}!void { + const seg = self.getLinkeditSegment(); + var off = math.cast(u32, seg.fileoff) orelse return error.Overflow; + // DYLD_INFO_ONLY + { + const cmd = &self.dyld_info_cmd; + cmd.rebase_off = off; + off += cmd.rebase_size; + cmd.bind_off = off; + off += cmd.bind_size; + cmd.weak_bind_off = off; + off += cmd.weak_bind_size; + cmd.lazy_bind_off = off; + off += cmd.lazy_bind_size; + cmd.export_off = off; + off += cmd.export_size; + off = mem.alignForward(u32, off, @alignOf(u64)); + } + + // FUNCTION_STARTS + { + const cmd = &self.function_starts_cmd; + cmd.dataoff = off; + off += cmd.datasize; + off = mem.alignForward(u32, off, @alignOf(u64)); + } - if (self.got_sect_index != null) try self.got.addDyldRelocs(self); - if (self.tlv_ptr_sect_index != null) try self.tlv_ptr.addDyldRelocs(self); - if (self.la_symbol_ptr_sect_index != null) try self.la_symbol_ptr.addDyldRelocs(self); - try self.initExportTrie(); + // DATA_IN_CODE + { + const cmd = &self.data_in_code_cmd; + cmd.dataoff = off; + off += cmd.datasize; + off = mem.alignForward(u32, off, @alignOf(u64)); + } - var nrebases: usize = 0; - var nbinds: usize = 0; - var nweak_binds: usize = 0; - for (self.objects.items) |index| { - const object = self.getFile(index).?.object; - nrebases += object.num_rebase_relocs; - nbinds += object.num_bind_relocs; - nweak_binds += object.num_weak_bind_relocs; + // SYMTAB (symtab) + { + const cmd = &self.symtab_cmd; + cmd.symoff = off; + off += cmd.nsyms * @sizeOf(macho.nlist_64); + off = mem.alignForward(u32, off, @alignOf(u32)); } - if (self.getInternalObject()) |int| { - nrebases += int.num_rebase_relocs; + + // DYSYMTAB + { + const cmd = &self.dysymtab_cmd; + cmd.indirectsymoff = off; + off += cmd.nindirectsyms * @sizeOf(u32); + off = mem.alignForward(u32, off, @alignOf(u64)); } - try self.rebase.entries.ensureUnusedCapacity(gpa, nrebases); - try self.bind.entries.ensureUnusedCapacity(gpa, nbinds); - try self.weak_bind.entries.ensureUnusedCapacity(gpa, nweak_binds); -} -fn initExportTrie(self: *MachO) !void { - const tracy = trace(@src()); - defer tracy.end(); + // SYMTAB (strtab) + { + const cmd = &self.symtab_cmd; + cmd.stroff = off; + off += cmd.strsize; + } - const gpa = self.base.allocator; - try self.export_trie.init(gpa); + seg.filesize = off - seg.fileoff; +} - const seg = self.getTextSegment(); - for (self.objects.items) |index| { - for (self.getFile(index).?.getSymbols()) |sym_index| { - const sym = self.getSymbol(sym_index); - if (!sym.flags.@"export") continue; - if (sym.getAtom(self)) |atom| if (!atom.flags.alive) continue; - if (sym.getFile(self).?.getIndex() != index) continue; - var flags: u64 = if (sym.flags.abs) - macho.EXPORT_SYMBOL_FLAGS_KIND_ABSOLUTE - else if (sym.flags.tlv) - macho.EXPORT_SYMBOL_FLAGS_KIND_THREAD_LOCAL - else - macho.EXPORT_SYMBOL_FLAGS_KIND_REGULAR; - if (sym.flags.weak) { - flags |= macho.EXPORT_SYMBOL_FLAGS_WEAK_DEFINITION; - self.weak_defines = true; - self.binds_to_weak = true; - } - try self.export_trie.put(gpa, .{ - .name = sym.getName(self), - .vmaddr_offset = sym.getAddress(.{ .stubs = false }, self) - seg.vmaddr, - .export_flags = flags, - }); +fn updateLazyBindSizeWorker(self: *MachO) void { + const doWork = struct { + fn doWork(macho_file: *MachO) !void { + const tracy = trace(@src()); + defer tracy.end(); + try macho_file.lazy_bind.updateSize(macho_file); + const sect_id = macho_file.stubs_helper_sect_index.?; + const out = &macho_file.sections.items(.out)[sect_id]; + var stream = std.io.fixedBufferStream(out.items); + try macho_file.stubs_helper.write(macho_file, stream.writer()); } - } + }.doWork; + doWork(self) catch |err| { + self.base.fatal("could not calculate lazy_bind opcodes size: {s}", .{@errorName(err)}); + _ = self.has_errors.swap(true, .seq_cst); + }; +} - if (self.mh_execute_header_index) |index| { - const sym = self.getSymbol(index); - try self.export_trie.put(gpa, .{ - .name = sym.getName(self), - .vmaddr_offset = sym.getAddress(.{}, self) - seg.vmaddr, - .export_flags = macho.EXPORT_SYMBOL_FLAGS_KIND_REGULAR, +pub fn updateLinkeditSizeWorker(self: *MachO, tag: enum { + rebase, + bind, + weak_bind, + export_trie, + data_in_code, +}) void { + const res = switch (tag) { + .rebase => self.rebase.updateSize(self), + .bind => self.bind.updateSize(self), + .weak_bind => self.weak_bind.updateSize(self), + .export_trie => self.export_trie.updateSize(self), + .data_in_code => self.data_in_code.updateSize(self), + }; + res catch |err| { + self.base.fatal("could not calculate {s} opcodes size: {s}", .{ + @tagName(tag), + @errorName(err), }); - } + _ = self.has_errors.swap(true, .seq_cst); + }; } -fn writeAtoms(self: *MachO) !void { - const tracy = trace(@src()); - defer tracy.end(); +fn resizeSections(self: *MachO) !void { + const slice = self.sections.slice(); + for (slice.items(.header), slice.items(.out)) |header, *out| { + if (header.isZerofill()) continue; + const cpu_arch = self.options.cpu_arch.?; + try out.resize(self.base.allocator, header.size); + const padding_byte: u8 = if (header.isCode() and cpu_arch == .x86_64) 0xcc else 0; + @memset(out.items, padding_byte); + } +} +fn writeSectionsAndUpdateLinkeditSizes(self: *MachO) !void { const gpa = self.base.allocator; - const cpu_arch = self.options.cpu_arch.?; - const slice = self.sections.slice(); - var has_resolve_error = false; - for (slice.items(.header), slice.items(.atoms)) |header, atoms| { - if (atoms.items.len == 0) continue; - if (header.isZerofill()) continue; + const cmd = self.symtab_cmd; + try self.symtab.resize(gpa, cmd.nsyms); + try self.strtab.resize(gpa, cmd.strsize); + self.strtab.items[0] = 0; + + var wg: WaitGroup = .{}; + { + wg.reset(); + defer wg.wait(); + for (self.objects.items) |index| { + self.base.thread_pool.spawnWg(&wg, writeAtomsWorker, .{ self, index }); + } + if (self.getInternalObject()) |obj| { + self.base.thread_pool.spawnWg(&wg, writeAtomsWorker, .{ self, obj.index }); + } + for (self.thunks.items) |thunk| { + self.base.thread_pool.spawnWg(&wg, writeThunkWorker, .{ self, thunk }); + } + + const slice = self.sections.slice(); + for (&[_]?u8{ + self.eh_frame_sect_index, + self.unwind_info_sect_index, + self.got_sect_index, + self.stubs_sect_index, + self.la_symbol_ptr_sect_index, + self.tlv_ptr_sect_index, + self.objc_stubs_sect_index, + }) |maybe_sect_id| { + if (maybe_sect_id) |sect_id| { + const out = &slice.items(.out)[sect_id]; + self.base.thread_pool.spawnWg(&wg, writeSyntheticSectionWorker, .{ self, sect_id, out.items }); + } + } - const buffer = try gpa.alloc(u8, header.size); - defer gpa.free(buffer); - const padding_byte: u8 = if (header.isCode() and cpu_arch == .x86_64) 0xcc else 0; - @memset(buffer, padding_byte); - - for (atoms.items) |atom_index| { - const atom = self.getAtom(atom_index).?; - assert(atom.flags.alive); - const off = atom.value; - try atom.getCode(self, buffer[off..][0..atom.size]); - atom.resolveRelocs(self, buffer[off..][0..atom.size]) catch |err| switch (err) { - error.ResolveFailed => has_resolve_error = true, - else => |e| return e, - }; + if (self.la_symbol_ptr_sect_index) |_| { + self.base.thread_pool.spawnWg(&wg, updateLazyBindSizeWorker, .{self}); } - try self.base.file.pwriteAll(buffer, header.offset); - } + self.base.thread_pool.spawnWg(&wg, updateLinkeditSizeWorker, .{ self, .rebase }); + self.base.thread_pool.spawnWg(&wg, updateLinkeditSizeWorker, .{ self, .bind }); + self.base.thread_pool.spawnWg(&wg, updateLinkeditSizeWorker, .{ self, .weak_bind }); + self.base.thread_pool.spawnWg(&wg, updateLinkeditSizeWorker, .{ self, .export_trie }); + self.base.thread_pool.spawnWg(&wg, updateLinkeditSizeWorker, .{ self, .data_in_code }); - for (self.thunks.items) |thunk| { - const header = slice.items(.header)[thunk.out_n_sect]; - const offset = thunk.value + header.offset; - const buffer = try gpa.alloc(u8, thunk.size()); - defer gpa.free(buffer); - var stream = std.io.fixedBufferStream(buffer); - try thunk.write(self, stream.writer()); - try self.base.file.pwriteAll(buffer, offset); + for (self.objects.items) |index| { + self.base.thread_pool.spawnWg(&wg, File.writeSymtab, .{ self.getFile(index).?, self }); + } + for (self.dylibs.items) |index| { + self.base.thread_pool.spawnWg(&wg, File.writeSymtab, .{ self.getFile(index).?, self }); + } + if (self.getInternalObject()) |obj| { + self.base.thread_pool.spawnWg(&wg, File.writeSymtab, .{ obj.asFile(), self }); + } } - if (has_resolve_error) return error.ResolveFailed; + if (self.has_errors.swap(false, .seq_cst)) return error.FlushFailed; } -fn writeUnwindInfo(self: *MachO) !void { +fn writeSectionsToFile(self: *MachO) !void { const tracy = trace(@src()); defer tracy.end(); - const gpa = self.base.allocator; - - if (self.eh_frame_sect_index) |index| { - const header = self.sections.items(.header)[index]; - const buffer = try gpa.alloc(u8, header.size); - defer gpa.free(buffer); - eh_frame.write(self, buffer); - try self.base.file.pwriteAll(buffer, header.offset); - } - - if (self.unwind_info_sect_index) |index| { - const header = self.sections.items(.header)[index]; - const buffer = try gpa.alloc(u8, header.size); - defer gpa.free(buffer); - try self.unwind_info.write(self, buffer); - try self.base.file.pwriteAll(buffer, header.offset); + const slice = self.sections.slice(); + for (slice.items(.header), slice.items(.out)) |header, out| { + try self.base.file.pwriteAll(out.items, header.offset); } } -fn finalizeDyldInfoSections(self: *MachO) !void { +fn writeAtomsWorker(self: *MachO, index: File.Index) void { const tracy = trace(@src()); defer tracy.end(); - const gpa = self.base.allocator; - - try self.rebase.finalize(gpa); - try self.bind.finalize(gpa, self); - try self.weak_bind.finalize(gpa, self); - try self.lazy_bind.finalize(gpa, self); - try self.export_trie.finalize(gpa); + self.getFile(index).?.writeAtoms(self) catch |err| { + self.base.fatal("{}: failed to write atoms: {s}", .{ + self.getFile(index).?.fmtPath(), + @errorName(err), + }); + _ = self.has_errors.swap(true, .seq_cst); + }; } -fn writeSyntheticSections(self: *MachO) !void { +fn writeThunkWorker(self: *MachO, thunk: Thunk) void { const tracy = trace(@src()); defer tracy.end(); + const doWork = struct { + fn doWork(th: Thunk, buffer: []u8, macho_file: *MachO) !void { + const off = th.value; + const size = th.size(); + var stream = std.io.fixedBufferStream(buffer[off..][0..size]); + try th.write(macho_file, stream.writer()); + } + }.doWork; + const out = self.sections.items(.out)[thunk.out_n_sect].items; + doWork(thunk, out, self) catch |err| { + self.base.fatal("failed to write contents of thunk: {s}", .{@errorName(err)}); + _ = self.has_errors.swap(true, .seq_cst); + }; +} - const gpa = self.base.allocator; - - if (self.got_sect_index) |sect_id| { - const header = self.sections.items(.header)[sect_id]; - var buffer = try std.ArrayList(u8).initCapacity(gpa, header.size); - defer buffer.deinit(); - try self.got.write(self, buffer.writer()); - assert(buffer.items.len == header.size); - try self.base.file.pwriteAll(buffer.items, header.offset); - } - - if (self.stubs_sect_index) |sect_id| { - const header = self.sections.items(.header)[sect_id]; - var buffer = try std.ArrayList(u8).initCapacity(gpa, header.size); - defer buffer.deinit(); - try self.stubs.write(self, buffer.writer()); - assert(buffer.items.len == header.size); - try self.base.file.pwriteAll(buffer.items, header.offset); - } - - if (self.stubs_helper_sect_index) |sect_id| { - const header = self.sections.items(.header)[sect_id]; - var buffer = try std.ArrayList(u8).initCapacity(gpa, header.size); - defer buffer.deinit(); - try self.stubs_helper.write(self, buffer.writer()); - assert(buffer.items.len == header.size); - try self.base.file.pwriteAll(buffer.items, header.offset); - } - - if (self.la_symbol_ptr_sect_index) |sect_id| { - const header = self.sections.items(.header)[sect_id]; - var buffer = try std.ArrayList(u8).initCapacity(gpa, header.size); - defer buffer.deinit(); - try self.la_symbol_ptr.write(self, buffer.writer()); - assert(buffer.items.len == header.size); - try self.base.file.pwriteAll(buffer.items, header.offset); - } - - if (self.tlv_ptr_sect_index) |sect_id| { - const header = self.sections.items(.header)[sect_id]; - var buffer = try std.ArrayList(u8).initCapacity(gpa, header.size); - defer buffer.deinit(); - try self.tlv_ptr.write(self, buffer.writer()); - assert(buffer.items.len == header.size); - try self.base.file.pwriteAll(buffer.items, header.offset); - } +fn writeSyntheticSectionWorker(self: *MachO, sect_id: u8, out: []u8) void { + const tracy = trace(@src()); + defer tracy.end(); + const Tag = enum { + eh_frame, + unwind_info, + got, + stubs, + la_symbol_ptr, + tlv_ptr, + objc_stubs, + }; + const doWork = struct { + fn doWork(macho_file: *MachO, tag: Tag, buffer: []u8) !void { + var stream = std.io.fixedBufferStream(buffer); + switch (tag) { + .eh_frame => eh_frame.write(macho_file, buffer), + .unwind_info => try macho_file.unwind_info.write(macho_file, buffer), + .got => try macho_file.got.write(macho_file, stream.writer()), + .stubs => try macho_file.stubs.write(macho_file, stream.writer()), + .la_symbol_ptr => try macho_file.la_symbol_ptr.write(macho_file, stream.writer()), + .tlv_ptr => try macho_file.tlv_ptr.write(macho_file, stream.writer()), + .objc_stubs => try macho_file.objc_stubs.write(macho_file, stream.writer()), + } + } + }.doWork; + const header = self.sections.items(.header)[sect_id]; + const tag: Tag = tag: { + if (self.eh_frame_sect_index != null and + self.eh_frame_sect_index.? == sect_id) break :tag .eh_frame; + if (self.unwind_info_sect_index != null and + self.unwind_info_sect_index.? == sect_id) break :tag .unwind_info; + if (self.got_sect_index != null and + self.got_sect_index.? == sect_id) break :tag .got; + if (self.stubs_sect_index != null and + self.stubs_sect_index.? == sect_id) break :tag .stubs; + if (self.la_symbol_ptr_sect_index != null and + self.la_symbol_ptr_sect_index.? == sect_id) break :tag .la_symbol_ptr; + if (self.tlv_ptr_sect_index != null and + self.tlv_ptr_sect_index.? == sect_id) break :tag .tlv_ptr; + if (self.objc_stubs_sect_index != null and + self.objc_stubs_sect_index.? == sect_id) break :tag .objc_stubs; + unreachable; + }; + doWork(self, tag, out) catch |err| { + self.base.fatal("could not write section '{s},{s}' to file: {s}", .{ + header.segName(), + header.sectName(), + @errorName(err), + }); + _ = self.has_errors.swap(true, .seq_cst); + }; +} - if (self.objc_stubs_sect_index) |sect_id| { - const header = self.sections.items(.header)[sect_id]; - var buffer = try std.ArrayList(u8).initCapacity(gpa, header.size); - defer buffer.deinit(); - try self.objc_stubs.write(self, buffer.writer()); - assert(buffer.items.len == header.size); - try self.base.file.pwriteAll(buffer.items, header.offset); - } +fn writeLinkeditSectionsToFile(self: *MachO) !void { + const tracy = trace(@src()); + defer tracy.end(); + try self.writeDyldInfo(); + try self.writeDataInCode(); + try self.writeSymtabToFile(); + try self.writeIndsymtab(); } -fn writeDyldInfoSections(self: *MachO, off: u32) !u32 { +fn writeDyldInfo(self: *MachO) !void { const tracy = trace(@src()); defer tracy.end(); const gpa = self.base.allocator; - const cmd = &self.dyld_info_cmd; + const base_off = self.getLinkeditSegment().fileoff; + const cmd = self.dyld_info_cmd; var needed_size: u32 = 0; - - cmd.rebase_off = needed_size; - cmd.rebase_size = mem.alignForward(u32, @intCast(self.rebase.size()), @alignOf(u64)); needed_size += cmd.rebase_size; - - cmd.bind_off = needed_size; - cmd.bind_size = mem.alignForward(u32, @intCast(self.bind.size()), @alignOf(u64)); needed_size += cmd.bind_size; - - cmd.weak_bind_off = needed_size; - cmd.weak_bind_size = mem.alignForward(u32, @intCast(self.weak_bind.size()), @alignOf(u64)); needed_size += cmd.weak_bind_size; - - cmd.lazy_bind_off = needed_size; - cmd.lazy_bind_size = mem.alignForward(u32, @intCast(self.lazy_bind.size()), @alignOf(u64)); needed_size += cmd.lazy_bind_size; - - cmd.export_off = needed_size; - cmd.export_size = mem.alignForward(u32, @intCast(self.export_trie.size), @alignOf(u64)); needed_size += cmd.export_size; const buffer = try gpa.alloc(u8, needed_size); @@ -2287,89 +2160,29 @@ fn writeDyldInfoSections(self: *MachO, off: u32) !u32 { const writer = stream.writer(); try self.rebase.write(writer); - try stream.seekTo(cmd.bind_off); + try stream.seekTo(cmd.bind_off - base_off); try self.bind.write(writer); - try stream.seekTo(cmd.weak_bind_off); + try stream.seekTo(cmd.weak_bind_off - base_off); try self.weak_bind.write(writer); - try stream.seekTo(cmd.lazy_bind_off); + try stream.seekTo(cmd.lazy_bind_off - base_off); try self.lazy_bind.write(writer); - try stream.seekTo(cmd.export_off); + try stream.seekTo(cmd.export_off - base_off); try self.export_trie.write(writer); - - cmd.rebase_off += off; - cmd.bind_off += off; - cmd.weak_bind_off += off; - cmd.lazy_bind_off += off; - cmd.export_off += off; - - try self.base.file.pwriteAll(buffer, off); - - return off + needed_size; -} - -fn writeFunctionStarts(self: *MachO, off: u32) !u32 { - // TODO actually write it out - const cmd = &self.function_starts_cmd; - cmd.dataoff = off; - return off; + try self.base.file.pwriteAll(buffer, cmd.rebase_off); } -pub fn writeDataInCode(self: *MachO, base_address: u64, off: u32) !u32 { - const cmd = &self.data_in_code_cmd; - cmd.dataoff = off; - - const gpa = self.base.allocator; - var dices = std.ArrayList(macho.data_in_code_entry).init(gpa); - defer dices.deinit(); - - for (self.objects.items) |index| { - const object = self.getFile(index).?.object; - const in_dices = object.getDataInCode(); - - try dices.ensureUnusedCapacity(in_dices.len); - - var next_dice: usize = 0; - for (object.atoms.items) |atom_index| { - if (next_dice >= in_dices.len) break; - const atom = self.getAtom(atom_index) orelse continue; - const start_off = atom.getInputAddress(self); - const end_off = start_off + atom.size; - const start_dice = next_dice; - - if (end_off < in_dices[next_dice].offset) continue; - - while (next_dice < in_dices.len and - in_dices[next_dice].offset < end_off) : (next_dice += 1) - {} - - if (atom.flags.alive) for (in_dices[start_dice..next_dice]) |dice| { - dices.appendAssumeCapacity(.{ - .offset = @intCast(atom.getAddress(self) + dice.offset - start_off - base_address), - .length = dice.length, - .kind = dice.kind, - }); - }; - } - } - - const needed_size = math.cast(u32, dices.items.len * @sizeOf(macho.data_in_code_entry)) orelse return error.Overflow; - cmd.datasize = needed_size; - - try self.base.file.pwriteAll(mem.sliceAsBytes(dices.items), cmd.dataoff); - - return off + needed_size; +pub fn writeDataInCode(self: *MachO) !void { + const tracy = trace(@src()); + defer tracy.end(); + const cmd = self.data_in_code_cmd; + try self.base.file.pwriteAll(mem.sliceAsBytes(self.data_in_code.entries.items), cmd.dataoff); } -pub fn calcSymtabSize(self: *MachO) !void { +fn calcSymtabSize(self: *MachO) !void { const tracy = trace(@src()); defer tracy.end(); - const gpa = self.base.allocator; - var nlocals: u32 = 0; - var nstabs: u32 = 0; - var nexports: u32 = 0; - var nimports: u32 = 0; - var strsize: u32 = 0; + const gpa = self.base.allocator; var files = std.ArrayList(File.Index).init(gpa); defer files.deinit(); @@ -2378,6 +2191,12 @@ pub fn calcSymtabSize(self: *MachO) !void { for (self.dylibs.items) |index| files.appendAssumeCapacity(index); if (self.internal_object_index) |index| files.appendAssumeCapacity(index); + var nlocals: u32 = 0; + var nstabs: u32 = 0; + var nexports: u32 = 0; + var nimports: u32 = 0; + var strsize: u32 = 1; + for (files.items) |index| { const file = self.getFile(index).?; const ctx = switch (file) { @@ -2387,7 +2206,7 @@ pub fn calcSymtabSize(self: *MachO) !void { ctx.istab = nstabs; ctx.iexport = nexports; ctx.iimport = nimports; - try file.calcSymtabSize(self); + ctx.stroff = strsize; nlocals += ctx.nlocals; nstabs += ctx.nstabs; nexports += ctx.nexports; @@ -2405,10 +2224,12 @@ pub fn calcSymtabSize(self: *MachO) !void { ctx.iimport += nlocals + nstabs + nexports; } + try self.indsymtab.updateSize(self); + { const cmd = &self.symtab_cmd; cmd.nsyms = nlocals + nstabs + nexports + nimports; - cmd.strsize = strsize + 1; + cmd.strsize = strsize; } { @@ -2422,55 +2243,24 @@ pub fn calcSymtabSize(self: *MachO) !void { } } -pub fn writeSymtab(self: *MachO, off: u32) !u32 { +fn writeIndsymtab(self: *MachO) !void { const tracy = trace(@src()); defer tracy.end(); const gpa = self.base.allocator; - const cmd = &self.symtab_cmd; - cmd.symoff = off; - - try self.symtab.resize(gpa, cmd.nsyms); - try self.strtab.ensureUnusedCapacity(gpa, cmd.strsize - 1); - - for (self.objects.items) |index| { - self.getFile(index).?.writeSymtab(self); - } - for (self.dylibs.items) |index| { - self.getFile(index).?.writeSymtab(self); - } - if (self.getInternalObject()) |internal| { - internal.writeSymtab(self); - } - - assert(self.strtab.items.len == cmd.strsize); - - try self.base.file.pwriteAll(mem.sliceAsBytes(self.symtab.items), cmd.symoff); - - return off + cmd.nsyms * @sizeOf(macho.nlist_64); -} - -fn writeIndsymtab(self: *MachO, off: u32) !u32 { - const gpa = self.base.allocator; - const cmd = &self.dysymtab_cmd; - cmd.indirectsymoff = off; - cmd.nindirectsyms = self.indsymtab.nsyms(self); - + const cmd = self.dysymtab_cmd; const needed_size = cmd.nindirectsyms * @sizeOf(u32); var buffer = try std.ArrayList(u8).initCapacity(gpa, needed_size); defer buffer.deinit(); try self.indsymtab.write(self, buffer.writer()); - try self.base.file.pwriteAll(buffer.items, cmd.indirectsymoff); - assert(buffer.items.len == needed_size); - - return off + needed_size; } -pub fn writeStrtab(self: *MachO, off: u32) !u32 { - const cmd = &self.symtab_cmd; - cmd.stroff = off; +pub fn writeSymtabToFile(self: *MachO) !void { + const tracy = trace(@src()); + defer tracy.end(); + const cmd = self.symtab_cmd; + try self.base.file.pwriteAll(mem.sliceAsBytes(self.symtab.items), cmd.symoff); try self.base.file.pwriteAll(self.strtab.items, cmd.stroff); - return off + cmd.strsize; } fn writeLoadCommands(self: *MachO) !struct { usize, usize, usize } { @@ -2512,18 +2302,20 @@ fn writeLoadCommands(self: *MachO) !struct { usize, usize, usize } { try load_commands.writeDylinkerLC(writer); ncmds += 1; - if (self.entry_index) |global_index| { - const sym = self.getSymbol(global_index); - const seg = self.getTextSegment(); - const entryoff: u32 = if (sym.getFile(self) == null) - 0 - else - @as(u32, @intCast(sym.getAddress(.{ .stubs = true }, self) - seg.vmaddr)); - try writer.writeStruct(macho.entry_point_command{ - .entryoff = entryoff, - .stacksize = self.options.stack_size orelse 0, - }); - ncmds += 1; + if (self.getInternalObject()) |obj| { + if (obj.getEntryRef(self)) |ref| { + const sym = ref.getSymbol(self).?; + const seg = self.getTextSegment(); + const entryoff: u32 = if (sym.getFile(self) == null) + 0 + else + @as(u32, @intCast(sym.getAddress(.{ .stubs = true }, self) - seg.vmaddr)); + try writer.writeStruct(macho.entry_point_command{ + .entryoff = entryoff, + .stacksize = self.options.stack_size orelse 0, + }); + ncmds += 1; + } } if (self.options.dylib) { @@ -2616,13 +2408,13 @@ fn writeHeader(self: *MachO, ncmds: usize, sizeofcmds: usize) !void { header.flags |= macho.MH_NO_REEXPORTED_DYLIBS; } - if (self.has_tlv) { + if (self.has_tlv.load(.seq_cst)) { header.flags |= macho.MH_HAS_TLV_DESCRIPTORS; } - if (self.binds_to_weak) { + if (self.binds_to_weak.load(.seq_cst)) { header.flags |= macho.MH_BINDS_TO_WEAK; } - if (self.weak_defines) { + if (self.weak_defines.load(.seq_cst)) { header.flags |= macho.MH_WEAK_DEFINES; } @@ -2635,6 +2427,9 @@ fn writeHeader(self: *MachO, ncmds: usize, sizeofcmds: usize) !void { } fn writeUuid(self: *MachO, uuid_cmd_offset: usize, has_codesig: bool) !void { + const tracy = trace(@src()); + defer tracy.end(); + const file_size = if (!has_codesig) blk: { const seg = self.getLinkeditSegment(); break :blk seg.fileoff + seg.filesize; @@ -2645,6 +2440,9 @@ fn writeUuid(self: *MachO, uuid_cmd_offset: usize, has_codesig: bool) !void { } pub fn writeCodeSignaturePadding(self: *MachO, code_sig: *CodeSignature) !void { + const tracy = trace(@src()); + defer tracy.end(); + const seg = self.getLinkeditSegment(); // Code signature data has to be 16-bytes aligned for Apple tools to recognize the file // https://github.com/opensource-apple/cctools/blob/fdb4825f303fd5c0751be524babd32958181b3ed/libstuff/checkout.c#L271 @@ -2662,6 +2460,9 @@ pub fn writeCodeSignaturePadding(self: *MachO, code_sig: *CodeSignature) !void { } pub fn writeCodeSignature(self: *MachO, code_sig: *CodeSignature) !void { + const tracy = trace(@src()); + defer tracy.end(); + const seg = self.getTextSegment(); const offset = self.codesig_cmd.dataoff; @@ -2694,6 +2495,8 @@ pub fn writeCodeSignature(self: *MachO, code_sig: *CodeSignature) !void { /// the original file. This is super messy, but there doesn't seem any other /// way to please the XNU. pub fn invalidateKernelCache(dir: std.fs.Dir, sub_path: []const u8) !void { + const tracy = trace(@src()); + defer tracy.end(); if (comptime builtin.target.isDarwin() and builtin.target.cpu.arch == .aarch64) { try dir.copyFile(sub_path, dir, sub_path, .{}); } @@ -2822,157 +2625,6 @@ pub fn getFileHandle(self: MachO, index: File.HandleIndex) File.Handle { return self.file_handles.items[index]; } -pub fn addAtom(self: *MachO) !Atom.Index { - const index = @as(Atom.Index, @intCast(self.atoms.items.len)); - const atom = try self.atoms.addOne(self.base.allocator); - atom.* = .{}; - return index; -} - -pub fn getAtom(self: *MachO, atom_index: Atom.Index) ?*Atom { - if (atom_index == 0) return null; - assert(atom_index < self.atoms.items.len); - return &self.atoms.items[atom_index]; -} - -pub fn addAtomExtra(self: *MachO, extra: Atom.Extra) !u32 { - const fields = @typeInfo(Atom.Extra).Struct.fields; - try self.atoms_extra.ensureUnusedCapacity(self.base.allocator, fields.len); - return self.addAtomExtraAssumeCapacity(extra); -} - -pub fn addAtomExtraAssumeCapacity(self: *MachO, extra: Atom.Extra) u32 { - const index = @as(u32, @intCast(self.atoms_extra.items.len)); - const fields = @typeInfo(Atom.Extra).Struct.fields; - inline for (fields) |field| { - self.atoms_extra.appendAssumeCapacity(switch (field.type) { - u32 => @field(extra, field.name), - else => @compileError("bad field type"), - }); - } - return index; -} - -pub fn getAtomExtra(self: *MachO, index: u32) ?Atom.Extra { - if (index == 0) return null; - const fields = @typeInfo(Atom.Extra).Struct.fields; - var i: usize = index; - var result: Atom.Extra = undefined; - inline for (fields) |field| { - @field(result, field.name) = switch (field.type) { - u32 => self.atoms_extra.items[i], - else => @compileError("bad field type"), - }; - i += 1; - } - return result; -} - -pub fn setAtomExtra(self: *MachO, index: u32, extra: Atom.Extra) void { - assert(index > 0); - const fields = @typeInfo(Atom.Extra).Struct.fields; - inline for (fields, 0..) |field, i| { - self.atoms_extra.items[index + i] = switch (field.type) { - u32 => @field(extra, field.name), - else => @compileError("bad field type"), - }; - } -} - -pub fn addSymbol(self: *MachO) !Symbol.Index { - const index = @as(Symbol.Index, @intCast(self.symbols.items.len)); - const symbol = try self.symbols.addOne(self.base.allocator); - symbol.* = .{}; - return index; -} - -pub fn getSymbol(self: *MachO, index: Symbol.Index) *Symbol { - assert(index < self.symbols.items.len); - return &self.symbols.items[index]; -} - -pub fn addSymbolExtra(self: *MachO, extra: Symbol.Extra) !u32 { - const fields = @typeInfo(Symbol.Extra).Struct.fields; - try self.symbols_extra.ensureUnusedCapacity(self.base.allocator, fields.len); - return self.addSymbolExtraAssumeCapacity(extra); -} - -pub fn addSymbolExtraAssumeCapacity(self: *MachO, extra: Symbol.Extra) u32 { - const index = @as(u32, @intCast(self.symbols_extra.items.len)); - const fields = @typeInfo(Symbol.Extra).Struct.fields; - inline for (fields) |field| { - self.symbols_extra.appendAssumeCapacity(switch (field.type) { - u32 => @field(extra, field.name), - else => @compileError("bad field type"), - }); - } - return index; -} - -pub fn getSymbolExtra(self: MachO, index: u32) ?Symbol.Extra { - if (index == 0) return null; - const fields = @typeInfo(Symbol.Extra).Struct.fields; - var i: usize = index; - var result: Symbol.Extra = undefined; - inline for (fields) |field| { - @field(result, field.name) = switch (field.type) { - u32 => self.symbols_extra.items[i], - else => @compileError("bad field type"), - }; - i += 1; - } - return result; -} - -pub fn setSymbolExtra(self: *MachO, index: u32, extra: Symbol.Extra) void { - assert(index > 0); - const fields = @typeInfo(Symbol.Extra).Struct.fields; - inline for (fields, 0..) |field, i| { - self.symbols_extra.items[index + i] = switch (field.type) { - u32 => @field(extra, field.name), - else => @compileError("bad field type"), - }; - } -} - -const GetOrCreateGlobalResult = struct { - found_existing: bool, - index: Symbol.Index, -}; - -pub fn getOrCreateGlobal(self: *MachO, off: u32) !GetOrCreateGlobalResult { - const gpa = self.base.allocator; - const gop = try self.globals.getOrPut(gpa, off); - if (!gop.found_existing) { - const index = try self.addSymbol(); - const global = self.getSymbol(index); - global.flags.global = true; - global.name = off; - gop.value_ptr.* = index; - } - return .{ - .found_existing = gop.found_existing, - .index = gop.value_ptr.*, - }; -} - -pub fn getGlobalByName(self: *MachO, name: []const u8) ?Symbol.Index { - const off = self.string_intern.getOffset(name) orelse return null; - return self.globals.get(off); -} - -pub fn addUnwindRecord(self: *MachO) !UnwindInfo.Record.Index { - const index = @as(UnwindInfo.Record.Index, @intCast(self.unwind_records.items.len)); - const rec = try self.unwind_records.addOne(self.base.allocator); - rec.* = .{}; - return index; -} - -pub fn getUnwindRecord(self: *MachO, index: UnwindInfo.Record.Index) *UnwindInfo.Record { - assert(index < self.unwind_records.items.len); - return &self.unwind_records.items[index]; -} - pub fn addThunk(self: *MachO) !Thunk.Index { const index = @as(Thunk.Index, @intCast(self.thunks.items.len)); const thunk = try self.thunks.addOne(self.base.allocator); @@ -3180,7 +2832,7 @@ const default_pagezero_vmsize: u64 = 0x100000000; pub const LiteralPool = struct { table: std.AutoArrayHashMapUnmanaged(void, void) = .{}, keys: std.ArrayListUnmanaged(Key) = .{}, - values: std.ArrayListUnmanaged(Atom.Index) = .{}, + values: std.ArrayListUnmanaged(MachO.Ref) = .{}, data: std.ArrayListUnmanaged(u8) = .{}, pub fn deinit(lp: *LiteralPool, allocator: Allocator) void { @@ -3193,12 +2845,16 @@ pub const LiteralPool = struct { const InsertResult = struct { found_existing: bool, index: Index, - atom: *Atom.Index, + ref: *MachO.Ref, }; - pub fn getAtom(lp: LiteralPool, index: Index, macho_file: *MachO) *Atom { + pub fn getSymbolRef(lp: LiteralPool, index: Index) MachO.Ref { assert(index < lp.values.items.len); - return macho_file.getAtom(lp.values.items[index]).?; + return lp.values.items[index]; + } + + pub fn getSymbol(lp: LiteralPool, index: Index, macho_file: *MachO) *Symbol { + return lp.getSymbolRef(index).getSymbol(macho_file).?; } pub fn insert(lp: *LiteralPool, allocator: Allocator, @"type": u8, string: []const u8) !InsertResult { @@ -3216,7 +2872,7 @@ pub const LiteralPool = struct { return .{ .found_existing = gop.found_existing, .index = @intCast(gop.index), - .atom = &lp.values.items[gop.index], + .ref = &lp.values.items[gop.index], }; } @@ -3261,7 +2917,10 @@ pub const LiteralPool = struct { const Section = struct { header: macho.section_64, segment_id: u8, - atoms: std.ArrayListUnmanaged(Atom.Index) = .{}, + atoms: std.ArrayListUnmanaged(Ref) = .{}, + thunks: std.ArrayListUnmanaged(Thunk.Index) = .{}, + out: std.ArrayListUnmanaged(u8) = .{}, + relocs: std.ArrayListUnmanaged(macho.relocation_info) = .{}, }; pub const SymtabCtx = struct { @@ -3273,6 +2932,7 @@ pub const SymtabCtx = struct { nstabs: u32 = 0, nexports: u32 = 0, nimports: u32 = 0, + stroff: u32 = 0, strsize: u32 = 0, }; @@ -3284,6 +2944,136 @@ pub const null_sym = macho.nlist_64{ .n_value = 0, }; +/// A reference to atom or symbol in an input file. +/// If file == 0, symbol is an undefined global. +pub const Ref = struct { + index: u32, + file: File.Index, + + pub fn eql(ref: Ref, other: Ref) bool { + return ref.index == other.index and ref.file == other.file; + } + + pub fn getFile(ref: Ref, macho_file: *MachO) ?File { + return macho_file.getFile(ref.file); + } + + pub fn getAtom(ref: Ref, macho_file: *MachO) ?*Atom { + const file = ref.getFile(macho_file) orelse return null; + return file.getAtom(ref.index); + } + + pub fn getSymbol(ref: Ref, macho_file: *MachO) ?*Symbol { + const file = ref.getFile(macho_file) orelse return null; + return switch (file) { + inline else => |x| &x.symbols.items[ref.index], + }; + } + + pub fn format( + ref: Ref, + comptime unused_fmt_string: []const u8, + options: std.fmt.FormatOptions, + writer: anytype, + ) !void { + _ = unused_fmt_string; + _ = options; + try writer.print("%{d} in file({d})", .{ ref.index, ref.file }); + } +}; + +pub const SymbolResolver = struct { + keys: std.ArrayListUnmanaged(Key) = .{}, + values: std.ArrayListUnmanaged(Ref) = .{}, + table: std.AutoArrayHashMapUnmanaged(void, void) = .{}, + + const Result = struct { + found_existing: bool, + index: Index, + ref: *Ref, + }; + + pub fn deinit(resolver: *SymbolResolver, allocator: Allocator) void { + resolver.keys.deinit(allocator); + resolver.values.deinit(allocator); + resolver.table.deinit(allocator); + } + + pub fn getOrPut( + resolver: *SymbolResolver, + allocator: Allocator, + ref: Ref, + macho_file: *MachO, + ) !Result { + const adapter = Adapter{ .keys = resolver.keys.items, .macho_file = macho_file }; + const key = Key{ .index = ref.index, .file = ref.file }; + const gop = try resolver.table.getOrPutAdapted(allocator, key, adapter); + if (!gop.found_existing) { + try resolver.keys.append(allocator, key); + _ = try resolver.values.addOne(allocator); + } + return .{ + .found_existing = gop.found_existing, + .index = @intCast(gop.index + 1), + .ref = &resolver.values.items[gop.index], + }; + } + + pub fn get(resolver: SymbolResolver, index: Index) ?Ref { + if (index == 0) return null; + return resolver.values.items[index - 1]; + } + + pub fn reset(resolver: *SymbolResolver) void { + resolver.keys.clearRetainingCapacity(); + resolver.values.clearRetainingCapacity(); + resolver.table.clearRetainingCapacity(); + } + + const Key = struct { + index: Symbol.Index, + file: File.Index, + + fn getName(key: Key, macho_file: *MachO) [:0]const u8 { + const ref = Ref{ .index = key.index, .file = key.file }; + return ref.getSymbol(macho_file).?.getName(macho_file); + } + + fn eql(key: Key, other: Key, macho_file: *MachO) bool { + const key_name = key.getName(macho_file); + const other_name = other.getName(macho_file); + return mem.eql(u8, key_name, other_name); + } + + fn hash(key: Key, macho_file: *MachO) u32 { + const name = key.getName(macho_file); + return @truncate(Hash.hash(0, name)); + } + }; + + const Adapter = struct { + keys: []const Key, + macho_file: *MachO, + + pub fn eql(ctx: @This(), key: Key, b_void: void, b_map_index: usize) bool { + _ = b_void; + const other = ctx.keys[b_map_index]; + return key.eql(other, ctx.macho_file); + } + + pub fn hash(ctx: @This(), key: Key) u32 { + return key.hash(ctx.macho_file); + } + }; + + pub const Index = u32; +}; + +pub const String = struct { + pos: u32 = 0, + len: u32 = 0, +}; + pub const base_tag = Zld.Tag.macho; const aarch64 = @import("aarch64.zig"); @@ -3314,10 +3104,12 @@ const Allocator = mem.Allocator; const ArenaAllocator = std.heap.ArenaAllocator; const Archive = @import("MachO/Archive.zig"); const Atom = @import("MachO/Atom.zig"); -const BindSection = synthetic.BindSection; +const AtomicBool = std.atomic.Value(bool); +const Bind = synthetic.Bind; const CodeSignature = @import("MachO/CodeSignature.zig"); +const DataInCode = synthetic.DataInCode; const Dylib = @import("MachO/Dylib.zig"); -const ExportTrieSection = synthetic.ExportTrieSection; +const ExportTrie = synthetic.ExportTrie; const File = @import("MachO/file.zig").File; const GotSection = synthetic.GotSection; const Hash = std.hash.Wyhash; @@ -3328,10 +3120,10 @@ const Md5 = std.crypto.hash.Md5; const Object = @import("MachO/Object.zig"); const ObjcStubsSection = synthetic.ObjcStubsSection; pub const Options = @import("MachO/Options.zig"); -const LazyBindSection = synthetic.LazyBindSection; +const LazyBind = synthetic.LazyBind; const LaSymbolPtrSection = synthetic.LaSymbolPtrSection; const LibStub = @import("tapi.zig").LibStub; -const RebaseSection = synthetic.RebaseSection; +const Rebase = @import("MachO/dyld_info/Rebase.zig"); const Symbol = @import("MachO/Symbol.zig"); const StringTable = @import("StringTable.zig"); const StubsSection = synthetic.StubsSection; @@ -3340,5 +3132,6 @@ const Thunk = thunks.Thunk; const ThreadPool = std.Thread.Pool; const TlvPtrSection = synthetic.TlvPtrSection; const UnwindInfo = @import("MachO/UnwindInfo.zig"); -const WeakBindSection = synthetic.WeakBindSection; +const WaitGroup = std.Thread.WaitGroup; +const WeakBind = synthetic.WeakBind; const Zld = @import("Zld.zig"); diff --git a/src/MachO/Archive.zig b/src/MachO/Archive.zig index 84250510..614230b1 100644 --- a/src/MachO/Archive.zig +++ b/src/MachO/Archive.zig @@ -70,21 +70,17 @@ pub fn parse(self: *Archive, macho_file: *MachO, path: []const u8, file_handle: const file = macho_file.getFileHandle(file_handle); const offset = if (fat_arch) |ar| ar.offset else 0; - const size = if (fat_arch) |ar| ar.size else (try file.stat()).size; - try file.seekTo(offset); + const end_pos = if (fat_arch) |ar| ar.offset + ar.size else (try file.stat()).size; - const reader = file.reader(); - _ = try reader.readBytesNoEof(Archive.SARMAG); - - var pos: usize = Archive.SARMAG; + var pos: usize = Archive.SARMAG + offset; while (true) { - if (pos >= size) break; - if (!mem.isAligned(pos, 2)) { - try file.seekBy(1); - pos += 1; - } + if (pos >= end_pos) break; + if (!mem.isAligned(pos, 2)) pos += 1; - const hdr = try reader.readStruct(ar_hdr); + var buffer: [@sizeOf(ar_hdr)]u8 = undefined; + var nread = try file.preadAll(&buffer, pos); + if (nread != buffer.len) return error.InputOutput; + const hdr = @as(*align(1) const ar_hdr, @ptrCast(&buffer)).*; pos += @sizeOf(ar_hdr); if (!mem.eql(u8, &hdr.ar_fmag, ARFMAG)) { @@ -100,33 +96,35 @@ pub fn parse(self: *Archive, macho_file: *MachO, path: []const u8, file_handle: if (try hdr.nameLength()) |len| { hdr_size -= len; const buf = try arena.allocator().alloc(u8, len); - try reader.readNoEof(buf); + nread = try file.preadAll(buf, pos); + if (nread != len) return error.InputOutput; pos += len; const actual_len = mem.indexOfScalar(u8, buf, @as(u8, 0)) orelse len; break :name buf[0..actual_len]; } unreachable; }; - defer { - _ = file.seekBy(hdr_size) catch {}; - pos += hdr_size; - } + defer pos += hdr_size; if (mem.eql(u8, name, "__.SYMDEF") or mem.eql(u8, name, "__.SYMDEF SORTED")) continue; + // TODO validate we are dealing with object files. + const object = Object{ - .archive = .{ - .path = try gpa.dupe(u8, path), - .offset = offset + pos, - }, .path = try gpa.dupe(u8, name), .file_handle = file_handle, .index = undefined, .alive = false, .mtime = hdr.date() catch 0, + .offset = pos, + .ar_name = try gpa.dupe(u8, path), }; - log.debug("extracting object '{s}' from archive '{s}'", .{ object.path, path }); + log.debug("extracting object '{s}' from archive '{s}' at offset 0x{x}", .{ + object.path, + path, + pos, + }); try self.objects.append(gpa, object); } diff --git a/src/MachO/Atom.zig b/src/MachO/Atom.zig index e02b9424..0b996035 100644 --- a/src/MachO/Atom.zig +++ b/src/MachO/Atom.zig @@ -2,7 +2,7 @@ value: u64 = 0, /// Name of this Atom. -name: u32 = 0, +name: MachO.String = .{}, /// Index into linker's input file table. file: File.Index = 0, @@ -26,7 +26,11 @@ off: u64 = 0, /// Index of this atom in the linker's atoms table. atom_index: Index = 0, -flags: Flags = .{}, +/// Specifies whether this atom is alive or has been garbage collected. +alive: std.atomic.Value(bool) = std.atomic.Value(bool).init(true), + +/// Specifies if the atom has been visited during garbage collection. +visited: std.atomic.Value(bool) = std.atomic.Value(bool).init(false), extra: u32 = 0, @@ -69,9 +73,8 @@ pub fn getCode(self: Atom, macho_file: *MachO, buffer: []u8) !void { .object => |x| { const slice = x.sections.slice(); const file = macho_file.getFileHandle(x.file_handle); - const offset = if (x.archive) |ar| ar.offset else 0; const sect = slice.items(.header)[self.n_sect]; - const amt = try file.preadAll(buffer, sect.offset + offset + self.off); + const amt = try file.preadAll(buffer, sect.offset + x.offset + self.off); if (amt != buffer.len) return error.InputOutput; }, .internal => |x| { @@ -82,27 +85,27 @@ pub fn getCode(self: Atom, macho_file: *MachO, buffer: []u8) !void { } pub fn getRelocs(self: Atom, macho_file: *MachO) []const Relocation { - if (!self.flags.relocs) return &[0]Relocation{}; const relocs = switch (self.getFile(macho_file)) { .dylib => unreachable, inline else => |x| x.sections.items(.relocs)[self.n_sect], }; - const extra = self.getExtra(macho_file).?; + const extra = self.getExtra(macho_file); return relocs.items[extra.rel_index..][0..extra.rel_count]; } pub fn getUnwindRecords(self: Atom, macho_file: *MachO) []const UnwindInfo.Record.Index { - if (!self.flags.unwind) return &[0]UnwindInfo.Record.Index{}; - const extra = self.getExtra(macho_file).?; + const extra = self.getExtra(macho_file); return switch (self.getFile(macho_file)) { - .dylib, .internal => unreachable, - .object => |x| x.unwind_records.items[extra.unwind_index..][0..extra.unwind_count], + .dylib => unreachable, + .internal => &[0]UnwindInfo.Record.Index{}, + .object => |x| x.unwind_records_indexes.items[extra.unwind_index..][0..extra.unwind_count], }; } pub fn markUnwindRecordsDead(self: Atom, macho_file: *MachO) void { + const object = self.getFile(macho_file).object; for (self.getUnwindRecords(macho_file)) |cu_index| { - const cu = macho_file.getUnwindRecord(cu_index); + const cu = object.getUnwindRecord(cu_index); cu.alive = false; if (cu.getFdePtr(macho_file)) |fde| { @@ -112,44 +115,39 @@ pub fn markUnwindRecordsDead(self: Atom, macho_file: *MachO) void { } pub fn getThunk(self: Atom, macho_file: *MachO) *Thunk { - assert(self.flags.thunk); - const extra = self.getExtra(macho_file).?; + const extra = self.getExtra(macho_file); return macho_file.getThunk(extra.thunk); } -pub fn getLiteralPoolIndex(self: Atom, macho_file: *MachO) ?MachO.LiteralPool.Index { - if (!self.flags.literal_pool) return null; - return self.getExtra(macho_file).?.literal_index; -} - const AddExtraOpts = struct { thunk: ?u32 = null, rel_index: ?u32 = null, rel_count: ?u32 = null, + rel_out_index: ?u32 = null, + rel_out_count: ?u32 = null, unwind_index: ?u32 = null, unwind_count: ?u32 = null, - literal_index: ?u32 = null, + literal_pool_index: ?u32 = null, + literal_symbol_index: ?u32 = null, }; -pub fn addExtra(atom: *Atom, opts: AddExtraOpts, macho_file: *MachO) !void { - if (atom.getExtra(macho_file) == null) { - atom.extra = try macho_file.addAtomExtra(.{}); - } - var extra = atom.getExtra(macho_file).?; +pub fn addExtra(atom: *Atom, opts: AddExtraOpts, macho_file: *MachO) void { + const file = atom.getFile(macho_file); + var extra = file.getAtomExtra(atom.extra); inline for (@typeInfo(@TypeOf(opts)).Struct.fields) |field| { if (@field(opts, field.name)) |x| { @field(extra, field.name) = x; } } - atom.setExtra(extra, macho_file); + file.setAtomExtra(atom.extra, extra); } -pub inline fn getExtra(atom: Atom, macho_file: *MachO) ?Extra { - return macho_file.getAtomExtra(atom.extra); +pub inline fn getExtra(atom: Atom, macho_file: *MachO) Extra { + return atom.getFile(macho_file).getAtomExtra(atom.extra); } pub inline fn setExtra(atom: Atom, extra: Extra, macho_file: *MachO) void { - macho_file.setAtomExtra(atom.extra, extra); + atom.getFile(macho_file).setAtomExtra(atom.extra, extra); } pub fn initOutputSection(sect: macho.section_64, macho_file: *MachO) !u8 { @@ -240,14 +238,14 @@ pub fn scanRelocs(self: Atom, macho_file: *MachO) !void { switch (rel.type) { .branch => { - const symbol = rel.getTargetSymbol(macho_file); + const symbol = rel.getTargetSymbol(self, macho_file); if (symbol.flags.import or (symbol.flags.@"export" and symbol.flags.weak) or symbol.flags.interposable) { - symbol.flags.stubs = true; + symbol.setSectionFlags(.{ .stubs = true }); if (symbol.flags.weak) { - macho_file.binds_to_weak = true; + macho_file.binds_to_weak.store(true, .seq_cst); } } else if (mem.startsWith(u8, symbol.getName(macho_file), "_objc_msgSend$")) { - symbol.flags.objc_stubs = true; + symbol.setSectionFlags(.{ .objc_stubs = true }); } }, @@ -255,28 +253,28 @@ pub fn scanRelocs(self: Atom, macho_file: *MachO) !void { .got_load_page, .got_load_pageoff, => { - const symbol = rel.getTargetSymbol(macho_file); + const symbol = rel.getTargetSymbol(self, macho_file); if (symbol.flags.import or (symbol.flags.@"export" and symbol.flags.weak) or symbol.flags.interposable or macho_file.options.cpu_arch.? == .aarch64) // TODO relax on arm64 { - symbol.flags.got = true; + symbol.setSectionFlags(.{ .got = true }); if (symbol.flags.weak) { - macho_file.binds_to_weak = true; + macho_file.binds_to_weak.store(true, .seq_cst); } } }, .got => { - rel.getTargetSymbol(macho_file).flags.got = true; + rel.getTargetSymbol(self, macho_file).setSectionFlags(.{ .got = true }); }, .tlv, .tlvp_page, .tlvp_pageoff, => { - const symbol = rel.getTargetSymbol(macho_file); + const symbol = rel.getTargetSymbol(self, macho_file); if (!symbol.flags.tlv) { macho_file.base.fatal( "{}: {s}: illegal thread-local variable reference to regular symbol {s}", @@ -284,9 +282,9 @@ pub fn scanRelocs(self: Atom, macho_file: *MachO) !void { ); } if (symbol.flags.import or (symbol.flags.@"export" and symbol.flags.weak) or symbol.flags.interposable) { - symbol.flags.tlv_ptr = true; + symbol.setSectionFlags(.{ .tlv_ptr = true }); if (symbol.flags.weak) { - macho_file.binds_to_weak = true; + macho_file.binds_to_weak.store(true, .seq_cst); } } }, @@ -294,27 +292,21 @@ pub fn scanRelocs(self: Atom, macho_file: *MachO) !void { .unsigned => { if (rel.meta.length == 3) { // TODO this really should check if this is pointer width if (rel.tag == .@"extern") { - const symbol = rel.getTargetSymbol(macho_file); + const symbol = rel.getTargetSymbol(self, macho_file); if (symbol.isTlvInit(macho_file)) { - macho_file.has_tlv = true; + macho_file.has_tlv.store(true, .seq_cst); continue; } if (symbol.flags.import) { - object.num_bind_relocs += 1; if (symbol.flags.weak) { - object.num_weak_bind_relocs += 1; - macho_file.binds_to_weak = true; + macho_file.binds_to_weak.store(true, .seq_cst); } continue; } if (symbol.flags.@"export" and symbol.flags.weak) { - object.num_weak_bind_relocs += 1; - macho_file.binds_to_weak = true; - } else if (symbol.flags.interposable) { - object.num_bind_relocs += 1; + macho_file.binds_to_weak.store(true, .seq_cst); } } - object.num_rebase_relocs += 1; } }, @@ -326,14 +318,17 @@ pub fn scanRelocs(self: Atom, macho_file: *MachO) !void { fn reportUndefSymbol(self: Atom, rel: Relocation, macho_file: *MachO) !bool { if (rel.tag == .local) return false; - const sym = rel.getTargetSymbol(macho_file); - if (sym.getFile(macho_file) == null) { + const file = self.getFile(macho_file); + const ref = file.getSymbolRef(rel.target, macho_file); + if (ref.getFile(macho_file) == null) { + macho_file.undefs_mutex.lock(); + defer macho_file.undefs_mutex.unlock(); const gpa = macho_file.base.allocator; - const gop = try macho_file.undefs.getOrPut(gpa, rel.target); + const gop = try macho_file.undefs.getOrPut(gpa, .{ .index = rel.target, .file = self.file }); if (!gop.found_existing) { gop.value_ptr.* = .{}; } - try gop.value_ptr.append(gpa, self.atom_index); + try gop.value_ptr.append(gpa, .{ .index = self.atom_index, .file = self.file }); return true; } @@ -360,7 +355,7 @@ pub fn resolveRelocs(self: Atom, macho_file: *MachO, buffer: []u8) !void { const subtractor = if (rel.meta.has_subtractor) relocs[i - 1] else null; if (rel.tag == .@"extern") { - if (rel.getTargetSymbol(macho_file).getFile(macho_file) == null) continue; + if (rel.getTargetSymbolRef(self, macho_file).getFile(macho_file) == null) continue; } try stream.seekTo(rel_offset); @@ -395,14 +390,15 @@ fn resolveRelocInner( ) ResolveError!void { const cpu_arch = macho_file.options.cpu_arch.?; const rel_offset = rel.offset - self.off; - const seg_id = macho_file.sections.items(.segment_id)[self.out_n_sect]; - const seg = macho_file.segments.items[seg_id]; const P = @as(i64, @intCast(self.getAddress(macho_file))) + @as(i64, @intCast(rel_offset)); const A = rel.addend + rel.getRelocAddend(cpu_arch); - const S: i64 = @intCast(rel.getTargetAddress(macho_file)); - const G: i64 = @intCast(rel.getGotTargetAddress(macho_file)); + const S: i64 = @intCast(rel.getTargetAddress(self, macho_file)); + const G: i64 = @intCast(rel.getGotTargetAddress(self, macho_file)); const TLS = @as(i64, @intCast(macho_file.getTlsAddress())); - const SUB = if (subtractor) |sub| @as(i64, @intCast(sub.getTargetAddress(macho_file))) else 0; + const SUB = if (subtractor) |sub| + @as(i64, @intCast(sub.getTargetAddress(self, macho_file))) + else + 0; const divExact = struct { fn divExact(atom: Atom, r: Relocation, num: u12, den: u12, ctx: *MachO) !u12 { @@ -425,7 +421,7 @@ fn resolveRelocInner( rel_offset, @tagName(rel.type), S + A - SUB, - rel.getTargetAtom(macho_file).atom_index, + rel.getTargetAtom(self, macho_file).atom_index, }), .@"extern" => relocs_log.debug(" {x}<+{d}>: {s}: [=> {x}] G({x}) ({s})", .{ P, @@ -433,7 +429,7 @@ fn resolveRelocInner( @tagName(rel.type), S + A - SUB, G + A, - rel.getTargetSymbol(macho_file).getName(macho_file), + rel.getTargetSymbol(self, macho_file).getName(macho_file), }), } @@ -444,34 +440,13 @@ fn resolveRelocInner( assert(!rel.meta.pcrel); if (rel.meta.length == 3) { if (rel.tag == .@"extern") { - const sym = rel.getTargetSymbol(macho_file); + const sym = rel.getTargetSymbol(self, macho_file); if (sym.isTlvInit(macho_file)) { try writer.writeInt(u64, @intCast(S - TLS), .little); return; } - const entry = bind.Entry{ - .target = rel.target, - .offset = @as(u64, @intCast(P)) - seg.vmaddr, - .segment_id = seg_id, - .addend = A, - }; - if (sym.flags.import) { - macho_file.bind.entries.appendAssumeCapacity(entry); - if (sym.flags.weak) { - macho_file.weak_bind.entries.appendAssumeCapacity(entry); - } - return; - } - if (sym.flags.@"export" and sym.flags.weak) { - macho_file.weak_bind.entries.appendAssumeCapacity(entry); - } else if (sym.flags.interposable) { - macho_file.bind.entries.appendAssumeCapacity(entry); - } + if (sym.flags.import) return; } - macho_file.rebase.entries.appendAssumeCapacity(.{ - .offset = @as(u64, @intCast(P)) - seg.vmaddr, - .segment_id = seg_id, - }); try writer.writeInt(u64, @bitCast(S + A - SUB), .little); } else if (rel.meta.length == 2) { try writer.writeInt(u32, @bitCast(@as(i32, @truncate(S + A - SUB))), .little); @@ -495,7 +470,7 @@ fn resolveRelocInner( .aarch64 => { const disp: i28 = math.cast(i28, S + A - P) orelse blk: { const thunk = self.getThunk(macho_file); - const S_: i64 = @intCast(thunk.getTargetAddress(rel.target, macho_file)); + const S_: i64 = @intCast(thunk.getTargetAddress(rel.getTargetSymbolRef(self, macho_file), macho_file)); break :blk math.cast(i28, S_ + A - P) orelse return error.Overflow; }; aarch64.writeBranchImm(disp, code[rel_offset..][0..4]); @@ -508,7 +483,7 @@ fn resolveRelocInner( assert(rel.tag == .@"extern"); assert(rel.meta.length == 2); assert(rel.meta.pcrel); - if (rel.getTargetSymbol(macho_file).flags.got) { + if (rel.getTargetSymbol(self, macho_file).getSectionFlags().got) { try writer.writeInt(i32, @intCast(G + A - P), .little); } else { try relaxGotLoad(code[rel_offset - 3 ..]); @@ -520,8 +495,8 @@ fn resolveRelocInner( assert(rel.tag == .@"extern"); assert(rel.meta.length == 2); assert(rel.meta.pcrel); - const sym = rel.getTargetSymbol(macho_file); - if (sym.flags.tlv_ptr) { + const sym = rel.getTargetSymbol(self, macho_file); + if (sym.getSectionFlags().tlv_ptr) { const S_: i64 = @intCast(sym.getTlvPtrAddress(macho_file)); try writer.writeInt(i32, @intCast(S_ + A - P), .little); } else { @@ -543,13 +518,13 @@ fn resolveRelocInner( assert(rel.tag == .@"extern"); assert(rel.meta.length == 2); assert(rel.meta.pcrel); - const sym = rel.getTargetSymbol(macho_file); + const sym = rel.getTargetSymbol(self, macho_file); const source = math.cast(u64, P) orelse return error.Overflow; const target = target: { const target = switch (rel.type) { .page => S + A, .got_load_page => G + A, - .tlvp_page => if (sym.flags.tlv_ptr) blk: { + .tlvp_page => if (sym.getSectionFlags().tlv_ptr) blk: { const S_: i64 = @intCast(sym.getTlvPtrAddress(macho_file)); break :blk S_ + A; } else S + A, @@ -602,9 +577,9 @@ fn resolveRelocInner( assert(rel.meta.length == 2); assert(!rel.meta.pcrel); - const sym = rel.getTargetSymbol(macho_file); + const sym = rel.getTargetSymbol(self, macho_file); const target = target: { - const target = if (sym.flags.tlv_ptr) blk: { + const target = if (sym.getSectionFlags().tlv_ptr) blk: { const S_: i64 = @intCast(sym.getTlvPtrAddress(macho_file)); break :blk S_ + A; } else S + A; @@ -642,7 +617,7 @@ fn resolveRelocInner( } }; - var inst = if (sym.flags.tlv_ptr) aarch64.Instruction{ + var inst = if (sym.getSectionFlags().tlv_ptr) aarch64.Instruction{ .load_store_register = .{ .rt = reg_info.rd, .rn = reg_info.rn, @@ -726,48 +701,48 @@ pub fn calcNumRelocs(self: Atom, macho_file: *MachO) u32 { } } -pub fn writeRelocs(self: Atom, macho_file: *MachO, code: []u8, buffer: *std.ArrayList(macho.relocation_info)) !void { +pub fn writeRelocs(self: Atom, macho_file: *MachO, code: []u8, buffer: []macho.relocation_info) !void { const tracy = trace(@src()); defer tracy.end(); const cpu_arch = macho_file.options.cpu_arch.?; const relocs = self.getRelocs(macho_file); - var stream = std.io.fixedBufferStream(code); + var i: usize = 0; for (relocs) |rel| { + defer i += 1; const rel_offset = rel.offset - self.off; const r_address: i32 = math.cast(i32, self.value + rel_offset) orelse return error.Overflow; const r_symbolnum = r_symbolnum: { const r_symbolnum: u32 = switch (rel.tag) { - .local => rel.getTargetAtom(macho_file).out_n_sect + 1, - .@"extern" => rel.getTargetSymbol(macho_file).getOutputSymtabIndex(macho_file).?, + .local => rel.getTargetAtom(self, macho_file).out_n_sect + 1, + .@"extern" => rel.getTargetSymbol(self, macho_file).getOutputSymtabIndex(macho_file).?, }; break :r_symbolnum math.cast(u24, r_symbolnum) orelse return error.Overflow; }; const r_extern = rel.tag == .@"extern"; var addend = rel.addend + rel.getRelocAddend(cpu_arch); if (rel.tag == .local) { - const target: i64 = @intCast(rel.getTargetAddress(macho_file)); + const target: i64 = @intCast(rel.getTargetAddress(self, macho_file)); addend += target; } - try stream.seekTo(rel_offset); - switch (cpu_arch) { .aarch64 => { if (rel.type == .unsigned) switch (rel.meta.length) { 0, 1 => unreachable, - 2 => try stream.writer().writeInt(i32, @truncate(addend), .little), - 3 => try stream.writer().writeInt(i64, addend, .little), + 2 => mem.writeInt(i32, code[rel_offset..][0..4], @truncate(addend), .little), + 3 => mem.writeInt(i64, code[rel_offset..][0..8], addend, .little), } else if (addend > 0) { - buffer.appendAssumeCapacity(.{ + buffer[i] = .{ .r_address = r_address, .r_symbolnum = @bitCast(math.cast(i24, addend) orelse return error.Overflow), .r_pcrel = 0, .r_length = 2, .r_extern = 0, .r_type = @intFromEnum(macho.reloc_type_arm64.ARM64_RELOC_ADDEND), - }); + }; + i += 1; } const r_type: macho.reloc_type_arm64 = switch (rel.type) { @@ -790,14 +765,14 @@ pub fn writeRelocs(self: Atom, macho_file: *MachO, code: []u8, buffer: *std.Arra .tlv, => unreachable, }; - buffer.appendAssumeCapacity(.{ + buffer[i] = .{ .r_address = r_address, .r_symbolnum = r_symbolnum, .r_pcrel = @intFromBool(rel.meta.pcrel), .r_extern = @intFromBool(r_extern), .r_length = rel.meta.length, .r_type = @intFromEnum(r_type), - }); + }; }, .x86_64 => { if (rel.meta.pcrel) { @@ -809,8 +784,8 @@ pub fn writeRelocs(self: Atom, macho_file: *MachO, code: []u8, buffer: *std.Arra } switch (rel.meta.length) { 0, 1 => unreachable, - 2 => try stream.writer().writeInt(i32, @truncate(addend), .little), - 3 => try stream.writer().writeInt(i64, addend, .little), + 2 => mem.writeInt(i32, code[rel_offset..][0..4], @truncate(addend), .little), + 3 => mem.writeInt(i64, code[rel_offset..][0..8], addend, .little), } const r_type: macho.reloc_type_x86_64 = switch (rel.type) { @@ -833,18 +808,20 @@ pub fn writeRelocs(self: Atom, macho_file: *MachO, code: []u8, buffer: *std.Arra .tlvp_pageoff, => unreachable, }; - buffer.appendAssumeCapacity(.{ + buffer[i] = .{ .r_address = r_address, .r_symbolnum = r_symbolnum, .r_pcrel = @intFromBool(rel.meta.pcrel), .r_extern = @intFromBool(r_extern), .r_length = rel.meta.length, .r_type = @intFromEnum(r_type), - }); + }; }, else => unreachable, } } + + assert(i == buffer.len); } pub fn format( @@ -882,17 +859,18 @@ fn format2( _ = unused_fmt_string; const atom = ctx.atom; const macho_file = ctx.macho_file; - try writer.print("atom({d}) : {s} : @{x} : sect({d}) : align({x}) : size({x})", .{ - atom.atom_index, atom.getName(macho_file), atom.getAddress(macho_file), - atom.out_n_sect, atom.alignment, atom.size, + const file = atom.getFile(macho_file); + try writer.print("atom({d}) : {s} : @{x} : sect({d}) : align({x}) : size({x}) : thunk({d})", .{ + atom.atom_index, atom.getName(macho_file), atom.getAddress(macho_file), + atom.out_n_sect, atom.alignment, atom.size, + atom.getExtra(macho_file).thunk, }); - if (atom.flags.thunk) try writer.print(" : thunk({d})", .{atom.getExtra(macho_file).?.thunk}); - if (!atom.flags.alive) try writer.writeAll(" : [*]"); - if (atom.flags.unwind) { + if (!atom.alive.load(.seq_cst)) try writer.writeAll(" : [*]"); + if (atom.getUnwindRecords(macho_file).len > 0) { try writer.writeAll(" : unwind{ "); - const extra = atom.getExtra(macho_file).?; + const extra = atom.getExtra(macho_file); for (atom.getUnwindRecords(macho_file), extra.unwind_index..) |index, i| { - const rec = macho_file.getUnwindRecord(index); + const rec = file.object.getUnwindRecord(index); try writer.print("{d}", .{index}); if (!rec.alive) try writer.writeAll("([*])"); if (i < extra.unwind_index + extra.unwind_count - 1) try writer.writeAll(", "); @@ -903,26 +881,6 @@ fn format2( pub const Index = u32; -pub const Flags = packed struct { - /// Specifies whether this atom is alive or has been garbage collected. - alive: bool = true, - - /// Specifies if the atom has been visited during garbage collection. - visited: bool = false, - - /// Whether this atom has a range extension thunk. - thunk: bool = false, - - /// Whether this atom has any relocations. - relocs: bool = false, - - /// Whether this atom has any unwind records. - unwind: bool = false, - - /// Whether this atom has LiteralPool entry. - literal_pool: bool = false, -}; - pub const Extra = struct { /// Index of the range extension thunk of this atom. thunk: u32 = 0, @@ -933,6 +891,12 @@ pub const Extra = struct { /// Count of relocations belonging to this atom. rel_count: u32 = 0, + /// Start index of relocations being written out to file for this atom. + rel_out_index: u32 = 0, + + /// Count of relocations written out to file for this atom. + rel_out_count: u32 = 0, + /// Start index of relocations belonging to this atom. unwind_index: u32 = 0, @@ -940,12 +904,14 @@ pub const Extra = struct { unwind_count: u32 = 0, /// Index into LiteralPool entry for this atom. - literal_index: u32 = 0, + literal_pool_index: u32 = 0, + + /// Index into the File's symbol table for local symbol representing this literal atom. + literal_symbol_index: u32 = 0, }; const aarch64 = @import("../aarch64.zig"); const assert = std.debug.assert; -const bind = @import("dyld_info/bind.zig"); const dis_x86_64 = @import("dis_x86_64"); const macho = std.macho; const math = std.math; diff --git a/src/MachO/Dylib.zig b/src/MachO/Dylib.zig index 270565a3..0dcb82b7 100644 --- a/src/MachO/Dylib.zig +++ b/src/MachO/Dylib.zig @@ -1,16 +1,21 @@ +/// Non-zero for fat dylibs +offset: u64, path: []const u8, index: File.Index, +file_handle: ?File.HandleIndex = null, +lib_stub: ?LibStub = null, exports: std.MultiArrayList(Export) = .{}, strtab: std.ArrayListUnmanaged(u8) = .{}, id: ?Id = null, ordinal: u16 = 0, -symbols: std.ArrayListUnmanaged(Symbol.Index) = .{}, +symbols: std.ArrayListUnmanaged(Symbol) = .{}, +symbols_extra: std.ArrayListUnmanaged(u32) = .{}, +globals: std.ArrayListUnmanaged(MachO.SymbolResolver.Index) = .{}, dependents: std.ArrayListUnmanaged(Id) = .{}, rpaths: std.StringArrayHashMapUnmanaged(void) = .{}, -umbrella: File.Index = 0, -platform: ?MachO.Options.Platform = null, +umbrella: File.Index, needed: bool, weak: bool, @@ -22,10 +27,13 @@ referenced: bool = false, output_symtab_ctx: MachO.SymtabCtx = .{}, pub fn deinit(self: *Dylib, allocator: Allocator) void { + if (self.lib_stub) |*ls| ls.deinit(); self.exports.deinit(allocator); self.strtab.deinit(allocator); if (self.id) |*id| id.deinit(allocator); self.symbols.deinit(allocator); + self.symbols_extra.deinit(allocator); + self.globals.deinit(allocator); for (self.dependents.items) |*id| { id.deinit(allocator); } @@ -36,12 +44,23 @@ pub fn deinit(self: *Dylib, allocator: Allocator) void { self.rpaths.deinit(allocator); } -pub fn parse(self: *Dylib, macho_file: *MachO, file: std.fs.File, fat_arch: ?fat.Arch) !void { +pub fn parse(self: *Dylib, macho_file: *MachO) !void { + if (self.lib_stub) |_| { + try self.parseTbd(macho_file); + } else { + assert(self.file_handle != null); + try self.parseBinary(macho_file); + } + try self.initSymbols(macho_file); +} + +fn parseBinary(self: *Dylib, macho_file: *MachO) !void { const tracy = trace(@src()); defer tracy.end(); const gpa = macho_file.base.allocator; - const offset = if (fat_arch) |ar| ar.offset else 0; + const file = macho_file.getFileHandle(self.file_handle.?); + const offset = self.offset; log.debug("parsing dylib from binary", .{}); @@ -52,6 +71,20 @@ pub fn parse(self: *Dylib, macho_file: *MachO, file: std.fs.File, fat_arch: ?fat } const header = @as(*align(1) const macho.mach_header_64, @ptrCast(&header_buffer)).*; + const cpu_arch: std.Target.Cpu.Arch = switch (header.cputype) { + macho.CPU_TYPE_ARM64 => .aarch64, + macho.CPU_TYPE_X86_64 => .x86_64, + else => unreachable, + }; + if (macho_file.options.cpu_arch.? != cpu_arch) { + macho_file.base.fatal("{s}: invalid architecture '{s}', expected '{s}'", .{ + self.path, + @tagName(cpu_arch), + @tagName(macho_file.options.cpu_arch.?), + }); + return error.ParseFailed; + } + const lc_buffer = try gpa.alloc(u8, header.sizeofcmds); defer gpa.free(lc_buffer); { @@ -59,6 +92,9 @@ pub fn parse(self: *Dylib, macho_file: *MachO, file: std.fs.File, fat_arch: ?fat if (amt != lc_buffer.len) return error.InputOutput; } + var platforms = std.ArrayList(MachO.Options.Platform).init(gpa); + defer platforms.deinit(); + var it = LoadCommandIterator{ .ncmds = header.ncmds, .buffer = lc_buffer, @@ -96,9 +132,7 @@ pub fn parse(self: *Dylib, macho_file: *MachO, file: std.fs.File, fat_arch: ?fat .VERSION_MIN_IPHONEOS, .VERSION_MIN_TVOS, .VERSION_MIN_WATCHOS, - => { - self.platform = MachO.Options.Platform.fromLoadCommand(lc); - }, + => try platforms.append(MachO.Options.Platform.fromLoadCommand(lc)), else => {}, }; @@ -106,6 +140,34 @@ pub fn parse(self: *Dylib, macho_file: *MachO, file: std.fs.File, fat_arch: ?fat macho_file.base.fatal("{s}: missing LC_ID_DYLIB load command", .{self.path}); return error.ParseFailed; } + + if (macho_file.options.platform) |plat| { + const match = for (platforms.items) |this_plat| { + if (this_plat.platform == plat.platform) break this_plat; + } else null; + if (match) |this_plat| { + if (this_plat.version.value > plat.version.value) { + macho_file.base.warn( + "{s}: object file was built for newer platform version: expected {}, got {}", + .{ + self.path, + plat.version, + this_plat.version, + }, + ); + } + } else { + const err = try macho_file.base.addErrorWithNotes(1 + platforms.items.len); + try err.addMsg("{s}: object file was built for different platforms than required {s}", .{ + self.path, + @tagName(plat.platform), + }); + for (platforms.items) |this_plat| { + try err.addNote("object file built for {s}", .{@tagName(this_plat.platform)}); + } + return error.ParseFailed; + } + } } const TrieIterator = struct { @@ -149,10 +211,16 @@ const TrieIterator = struct { }; pub fn addExport(self: *Dylib, allocator: Allocator, name: []const u8, flags: Export.Flags) !void { - try self.exports.append(allocator, .{ - .name = try self.addString(allocator, name), - .flags = flags, - }); + const str = try self.addString(allocator, name); + const index = try self.addSymbol(allocator); + const symbol = &self.symbols.items[index]; + symbol.name = str; + symbol.extra = try self.addSymbolExtra(allocator, .{}); + symbol.flags.weak = flags.weak; + symbol.flags.tlv = flags.tlv; + symbol.visibility = .global; + try self.exports.append(allocator, .{ .name = str, .flags = flags }); + try self.globals.append(allocator, 0); } fn parseTrieNode( @@ -176,14 +244,23 @@ fn parseTrieNode( if (flags & macho.EXPORT_SYMBOL_FLAGS_REEXPORT != 0) { _ = try it.readULEB128(); // dylib ordinal const name = try it.readString(); - try self.addExport(allocator, if (name.len > 0) name else prefix, out_flags); + try self.exports.append(allocator, .{ + .name = try self.addString(allocator, if (name.len > 0) name else prefix), + .flags = out_flags, + }); } else if (flags & macho.EXPORT_SYMBOL_FLAGS_STUB_AND_RESOLVER != 0) { _ = try it.readULEB128(); // stub offset _ = try it.readULEB128(); // resolver offset - try self.addExport(allocator, prefix, out_flags); + try self.exports.append(allocator, .{ + .name = try self.addString(allocator, prefix), + .flags = out_flags, + }); } else { _ = try it.readULEB128(); // VM offset - try self.addExport(allocator, prefix, out_flags); + try self.exports.append(allocator, .{ + .name = try self.addString(allocator, prefix), + .flags = out_flags, + }); } } @@ -211,19 +288,14 @@ fn parseTrie(self: *Dylib, data: []const u8, macho_file: *MachO) !void { try self.parseTrieNode(&it, gpa, arena.allocator(), ""); } -pub fn parseTbd( - self: *Dylib, - cpu_arch: std.Target.Cpu.Arch, - platform: ?MachO.Options.Platform, - lib_stub: LibStub, - macho_file: *MachO, -) !void { +fn parseTbd(self: *Dylib, macho_file: *MachO) !void { const tracy = trace(@src()); defer tracy.end(); const gpa = macho_file.base.allocator; log.debug("parsing dylib from stub", .{}); + const lib_stub = self.lib_stub.?; const umbrella_lib = lib_stub.inner[0]; { @@ -242,12 +314,13 @@ pub fn parseTbd( log.debug(" (install_name '{s}')", .{umbrella_lib.installName()}); - self.platform = platform orelse .{ + const cpu_arch = macho_file.options.cpu_arch.?; + const platform: MachO.Options.Platform = macho_file.options.platform orelse .{ .platform = .MACOS, .version = .{ .value = 0 }, }; - var matcher = try TargetMatcher.init(gpa, cpu_arch, self.platform.?.platform); + var matcher = try TargetMatcher.init(gpa, cpu_arch, platform.platform); defer matcher.deinit(); for (lib_stub.inner, 0..) |elem, stub_index| { @@ -268,13 +341,19 @@ pub fn parseTbd( if (exp.symbols) |symbols| { for (symbols) |sym_name| { - try self.addExport(gpa, sym_name, .{}); + try self.exports.append(gpa, .{ + .name = try self.addString(gpa, sym_name), + .flags = .{}, + }); } } if (exp.weak_symbols) |symbols| { for (symbols) |sym_name| { - try self.addExport(gpa, sym_name, .{ .weak = true }); + try self.exports.append(gpa, .{ + .name = try self.addString(gpa, sym_name), + .flags = .{ .weak = true }, + }); } } @@ -316,13 +395,19 @@ pub fn parseTbd( if (exp.symbols) |symbols| { for (symbols) |sym_name| { - try self.addExport(gpa, sym_name, .{}); + try self.exports.append(gpa, .{ + .name = try self.addString(gpa, sym_name), + .flags = .{}, + }); } } if (exp.weak_symbols) |symbols| { for (symbols) |sym_name| { - try self.addExport(gpa, sym_name, .{ .weak = true }); + try self.exports.append(gpa, .{ + .name = try self.addString(gpa, sym_name), + .flags = .{ .weak = true }, + }); } } @@ -352,13 +437,19 @@ pub fn parseTbd( if (reexp.symbols) |symbols| { for (symbols) |sym_name| { - try self.addExport(gpa, sym_name, .{}); + try self.exports.append(gpa, .{ + .name = try self.addString(gpa, sym_name), + .flags = .{}, + }); } } if (reexp.weak_symbols) |symbols| { for (symbols) |sym_name| { - try self.addExport(gpa, sym_name, .{ .weak = true }); + try self.exports.append(gpa, .{ + .name = try self.addString(gpa, sym_name), + .flags = .{ .weak = true }, + }); } } @@ -447,59 +538,64 @@ fn addObjCExport( ) !void { const full_name = try std.fmt.allocPrint(allocator, prefix ++ "$_{s}", .{name}); defer allocator.free(full_name); - try self.addExport(allocator, full_name, .{}); + try self.exports.append(allocator, .{ + .name = try self.addString(allocator, full_name), + .flags = .{}, + }); } -pub fn initSymbols(self: *Dylib, macho_file: *MachO) !void { +fn initSymbols(self: *Dylib, macho_file: *MachO) !void { const gpa = macho_file.base.allocator; - try self.symbols.ensureTotalCapacityPrecise(gpa, self.exports.items(.name).len); - - for (self.exports.items(.name)) |noff| { - const name = self.getString(noff); - const off = try macho_file.string_intern.insert(gpa, name); - const gop = try macho_file.getOrCreateGlobal(off); - self.symbols.addOneAssumeCapacity().* = gop.index; + const nsyms = self.exports.items(.name).len; + try self.symbols.ensureTotalCapacityPrecise(gpa, nsyms); + try self.symbols_extra.ensureTotalCapacityPrecise(gpa, nsyms * @sizeOf(Symbol.Extra)); + try self.globals.ensureTotalCapacityPrecise(gpa, nsyms); + self.globals.resize(gpa, nsyms) catch unreachable; + @memset(self.globals.items, 0); + + for (self.exports.items(.name), self.exports.items(.flags)) |noff, flags| { + const index = self.addSymbolAssumeCapacity(); + const symbol = &self.symbols.items[index]; + symbol.name = noff; + symbol.extra = self.addSymbolExtraAssumeCapacity(.{}); + symbol.flags.weak = flags.weak; + symbol.flags.tlv = flags.tlv; + symbol.visibility = .global; } } -pub fn resolveSymbols(self: *Dylib, macho_file: *MachO) void { +pub fn resolveSymbols(self: *Dylib, macho_file: *MachO) !void { const tracy = trace(@src()); defer tracy.end(); if (!self.explicit and !self.hoisted) return; - for (self.symbols.items, self.exports.items(.flags)) |index, flags| { - const global = macho_file.getSymbol(index); + const gpa = macho_file.base.allocator; + + for (self.exports.items(.flags), self.globals.items, 0..) |flags, *global, i| { + const gop = try macho_file.resolver.getOrPut(gpa, .{ + .index = @intCast(i), + .file = self.index, + }, macho_file); + if (!gop.found_existing) { + gop.ref.* = .{ .index = 0, .file = 0 }; + } + global.* = gop.index; + + if (gop.ref.getFile(macho_file) == null) { + gop.ref.* = .{ .index = @intCast(i), .file = self.index }; + continue; + } + if (self.asFile().getSymbolRank(.{ .weak = flags.weak, - }) < global.getSymbolRank(macho_file)) { - global.value = 0; - global.atom = 0; - global.nlist_idx = 0; - global.file = self.index; - global.flags.weak = flags.weak; - global.flags.tlv = flags.tlv; - global.flags.dyn_ref = false; - global.flags.tentative = false; - global.visibility = .global; + }) < gop.ref.getSymbol(macho_file).?.getSymbolRank(macho_file)) { + gop.ref.* = .{ .index = @intCast(i), .file = self.index }; } } } -pub fn resetGlobals(self: *Dylib, macho_file: *MachO) void { - for (self.symbols.items) |sym_index| { - const sym = macho_file.getSymbol(sym_index); - const name = sym.name; - const global = sym.flags.global; - const weak_ref = sym.flags.weak_ref; - sym.* = .{}; - sym.name = name; - sym.flags.global = global; - sym.flags.weak_ref = weak_ref; - } -} - pub fn isAlive(self: Dylib, macho_file: *MachO) bool { if (!macho_file.options.dead_strip_dylibs) return self.explicit or self.referenced or self.needed; return self.referenced or self.needed; @@ -509,30 +605,31 @@ pub fn markReferenced(self: *Dylib, macho_file: *MachO) void { const tracy = trace(@src()); defer tracy.end(); - for (self.symbols.items) |global_index| { - const global = macho_file.getSymbol(global_index); - const file_ptr = global.getFile(macho_file) orelse continue; - if (file_ptr.getIndex() != self.index) continue; + for (0..self.symbols.items.len) |i| { + const ref = self.getSymbolRef(@intCast(i), macho_file); + const file = ref.getFile(macho_file) orelse continue; + if (file.getIndex() != self.index) continue; + const global = ref.getSymbol(macho_file).?; if (global.isLocal()) continue; self.referenced = true; break; } } -pub fn calcSymtabSize(self: *Dylib, macho_file: *MachO) !void { +pub fn calcSymtabSize(self: *Dylib, macho_file: *MachO) void { const tracy = trace(@src()); defer tracy.end(); - for (self.symbols.items) |global_index| { - const global = macho_file.getSymbol(global_index); - const file_ptr = global.getFile(macho_file) orelse continue; - if (file_ptr.getIndex() != self.index) continue; - if (global.isLocal()) continue; - assert(global.flags.import); - global.flags.output_symtab = true; - try global.addExtra(.{ .symtab = self.output_symtab_ctx.nimports }, macho_file); + for (self.symbols.items, 0..) |*sym, i| { + const ref = self.getSymbolRef(@intCast(i), macho_file); + const file = ref.getFile(macho_file) orelse continue; + if (file.getIndex() != self.index) continue; + if (sym.isLocal()) continue; + assert(sym.flags.import); + sym.flags.output_symtab = true; + sym.addExtra(.{ .symtab = self.output_symtab_ctx.nimports }, macho_file); self.output_symtab_ctx.nimports += 1; - self.output_symtab_ctx.strsize += @as(u32, @intCast(global.getName(macho_file).len + 1)); + self.output_symtab_ctx.strsize += @as(u32, @intCast(sym.getName(macho_file).len + 1)); } } @@ -540,17 +637,20 @@ pub fn writeSymtab(self: Dylib, macho_file: *MachO) void { const tracy = trace(@src()); defer tracy.end(); - for (self.symbols.items) |global_index| { - const global = macho_file.getSymbol(global_index); - const file = global.getFile(macho_file) orelse continue; + var n_strx = self.output_symtab_ctx.stroff; + for (self.symbols.items, 0..) |sym, i| { + const ref = self.getSymbolRef(@intCast(i), macho_file); + const file = ref.getFile(macho_file) orelse continue; if (file.getIndex() != self.index) continue; - const idx = global.getOutputSymtabIndex(macho_file) orelse continue; - const n_strx = @as(u32, @intCast(macho_file.strtab.items.len)); - macho_file.strtab.appendSliceAssumeCapacity(global.getName(macho_file)); - macho_file.strtab.appendAssumeCapacity(0); + const idx = sym.getOutputSymtabIndex(macho_file) orelse continue; const out_sym = &macho_file.symtab.items[idx]; out_sym.n_strx = n_strx; - global.setOutputSym(macho_file, out_sym); + sym.setOutputSym(macho_file, out_sym); + const name = sym.getName(macho_file); + @memcpy(macho_file.strtab.items[n_strx..][0..name.len], name); + n_strx += @intCast(name.len); + macho_file.strtab.items[n_strx] = 0; + n_strx += 1; } } @@ -558,21 +658,84 @@ pub inline fn getUmbrella(self: Dylib, macho_file: *MachO) *Dylib { return macho_file.getFile(self.umbrella).?.dylib; } -fn addString(self: *Dylib, allocator: Allocator, name: []const u8) !u32 { +fn addString(self: *Dylib, allocator: Allocator, name: []const u8) !MachO.String { const off = @as(u32, @intCast(self.strtab.items.len)); - try self.strtab.writer(allocator).print("{s}\x00", .{name}); - return off; + try self.strtab.ensureUnusedCapacity(allocator, name.len + 1); + self.strtab.appendSliceAssumeCapacity(name); + self.strtab.appendAssumeCapacity(0); + return .{ .pos = off, .len = @intCast(name.len + 1) }; } -pub inline fn getString(self: Dylib, off: u32) [:0]const u8 { - assert(off < self.strtab.items.len); - return mem.sliceTo(@as([*:0]const u8, @ptrCast(self.strtab.items.ptr + off)), 0); +pub fn getString(self: Dylib, name: MachO.String) [:0]const u8 { + assert(name.pos < self.strtab.items.len and name.pos + name.len <= self.strtab.items.len); + if (name.len == 0) return ""; + return self.strtab.items[name.pos..][0 .. name.len - 1 :0]; } pub fn asFile(self: *Dylib) File { return .{ .dylib = self }; } +fn addSymbol(self: *Dylib, allocator: Allocator) !Symbol.Index { + try self.symbols.ensureUnusedCapacity(allocator, 1); + return self.addSymbolAssumeCapacity(); +} + +fn addSymbolAssumeCapacity(self: *Dylib) Symbol.Index { + const index: Symbol.Index = @intCast(self.symbols.items.len); + const symbol = self.symbols.addOneAssumeCapacity(); + symbol.* = .{ .file = self.index }; + return index; +} + +pub fn getSymbolRef(self: Dylib, index: Symbol.Index, macho_file: *MachO) MachO.Ref { + const global_index = self.globals.items[index]; + if (macho_file.resolver.get(global_index)) |ref| return ref; + return .{ .index = index, .file = self.index }; +} + +pub fn addSymbolExtra(self: *Dylib, allocator: Allocator, extra: Symbol.Extra) !u32 { + const fields = @typeInfo(Symbol.Extra).Struct.fields; + try self.symbols_extra.ensureUnusedCapacity(allocator, fields.len); + return self.addSymbolExtraAssumeCapacity(extra); +} + +fn addSymbolExtraAssumeCapacity(self: *Dylib, extra: Symbol.Extra) u32 { + const index = @as(u32, @intCast(self.symbols_extra.items.len)); + const fields = @typeInfo(Symbol.Extra).Struct.fields; + inline for (fields) |field| { + self.symbols_extra.appendAssumeCapacity(switch (field.type) { + u32 => @field(extra, field.name), + else => @compileError("bad field type"), + }); + } + return index; +} + +pub fn getSymbolExtra(self: Dylib, index: u32) Symbol.Extra { + const fields = @typeInfo(Symbol.Extra).Struct.fields; + var i: usize = index; + var result: Symbol.Extra = undefined; + inline for (fields) |field| { + @field(result, field.name) = switch (field.type) { + u32 => self.symbols_extra.items[i], + else => @compileError("bad field type"), + }; + i += 1; + } + return result; +} + +pub fn setSymbolExtra(self: *Dylib, index: u32, extra: Symbol.Extra) void { + const fields = @typeInfo(Symbol.Extra).Struct.fields; + inline for (fields, 0..) |field, i| { + self.symbols_extra.items[index + i] = switch (field.type) { + u32 => @field(extra, field.name), + else => @compileError("bad field type"), + }; + } +} + pub fn format( self: *Dylib, comptime unused_fmt_string: []const u8, @@ -607,10 +770,16 @@ fn formatSymtab( _ = unused_fmt_string; _ = options; const dylib = ctx.dylib; + const macho_file = ctx.macho_file; try writer.writeAll(" globals\n"); - for (dylib.symbols.items) |index| { - const global = ctx.macho_file.getSymbol(index); - try writer.print(" {}\n", .{global.fmt(ctx.macho_file)}); + for (dylib.symbols.items, 0..) |sym, i| { + const ref = dylib.getSymbolRef(@intCast(i), macho_file); + if (ref.getFile(macho_file) == null) { + // TODO any better way of handling this? + try writer.print(" {s} : unclaimed\n", .{sym.getName(macho_file)}); + } else { + try writer.print(" {}\n", .{ref.getSymbol(macho_file).?.fmt(macho_file)}); + } } } @@ -799,7 +968,7 @@ pub const Id = struct { }; const Export = struct { - name: u32, + name: MachO.String, flags: Flags, const Flags = packed struct { @@ -810,7 +979,6 @@ const Export = struct { }; const assert = std.debug.assert; -const fat = @import("fat.zig"); const fs = std.fs; const fmt = std.fmt; const log = std.log.scoped(.link); diff --git a/src/MachO/InternalObject.zig b/src/MachO/InternalObject.zig index f206fe99..3de9fe37 100644 --- a/src/MachO/InternalObject.zig +++ b/src/MachO/InternalObject.zig @@ -1,13 +1,28 @@ index: File.Index, sections: std.MultiArrayList(Section) = .{}, -atoms: std.ArrayListUnmanaged(Atom.Index) = .{}, -symbols: std.ArrayListUnmanaged(Symbol.Index) = .{}, +atoms: std.ArrayListUnmanaged(Atom) = .{}, +atoms_indexes: std.ArrayListUnmanaged(Atom.Index) = .{}, +atoms_extra: std.ArrayListUnmanaged(u32) = .{}, +symtab: std.ArrayListUnmanaged(macho.nlist_64) = .{}, +strtab: std.ArrayListUnmanaged(u8) = .{}, +symbols: std.ArrayListUnmanaged(Symbol) = .{}, +symbols_extra: std.ArrayListUnmanaged(u32) = .{}, +globals: std.ArrayListUnmanaged(MachO.SymbolResolver.Index) = .{}, objc_methnames: std.ArrayListUnmanaged(u8) = .{}, objc_selrefs: [@sizeOf(u64)]u8 = [_]u8{0} ** @sizeOf(u64), -num_rebase_relocs: u32 = 0, +force_undefined: std.ArrayListUnmanaged(Symbol.Index) = .{}, +entry_index: ?Symbol.Index = null, +dyld_stub_binder_index: ?Symbol.Index = null, +dyld_private_index: ?Symbol.Index = null, +objc_msg_send_index: ?Symbol.Index = null, +mh_execute_header_index: ?Symbol.Index = null, +mh_dylib_header_index: ?Symbol.Index = null, +dso_handle_index: ?Symbol.Index = null, +boundary_symbols: std.ArrayListUnmanaged(Symbol.Index) = .{}, + output_symtab_ctx: MachO.SymtabCtx = .{}, pub fn deinit(self: *InternalObject, allocator: Allocator) void { @@ -16,39 +31,224 @@ pub fn deinit(self: *InternalObject, allocator: Allocator) void { } self.sections.deinit(allocator); self.atoms.deinit(allocator); + self.atoms_indexes.deinit(allocator); + self.atoms_extra.deinit(allocator); + self.symtab.deinit(allocator); + self.strtab.deinit(allocator); self.symbols.deinit(allocator); + self.symbols_extra.deinit(allocator); + self.globals.deinit(allocator); self.objc_methnames.deinit(allocator); + self.force_undefined.deinit(allocator); + self.boundary_symbols.deinit(allocator); +} + +pub fn init(self: *InternalObject, allocator: Allocator) !void { + // Atom at index 0 is reserved as null atom + try self.atoms.append(allocator, .{}); + try self.atoms_extra.append(allocator, 0); + // Null byte in strtab + try self.strtab.append(allocator, 0); } -pub fn addSymbol(self: *InternalObject, name: [:0]const u8, macho_file: *MachO) !Symbol.Index { +pub fn initSymbols(self: *InternalObject, macho_file: *MachO) !void { + const createSymbol = struct { + fn createSymbol(obj: *InternalObject, name: MachO.String, args: struct { + type: u8 = macho.N_UNDF | macho.N_EXT, + desc: u16 = 0, + }) Symbol.Index { + const index = obj.addSymbolAssumeCapacity(); + const symbol = &obj.symbols.items[index]; + symbol.name = name; + symbol.extra = obj.addSymbolExtraAssumeCapacity(.{}); + symbol.flags.dyn_ref = args.desc & macho.REFERENCED_DYNAMICALLY != 0; + symbol.visibility = if (args.type & macho.N_EXT != 0) blk: { + break :blk if (args.type & macho.N_PEXT != 0) .hidden else .global; + } else .local; + + const nlist_idx: u32 = @intCast(obj.symtab.items.len); + const nlist = obj.symtab.addOneAssumeCapacity(); + nlist.* = .{ + .n_strx = name.pos, + .n_type = args.type, + .n_sect = 0, + .n_desc = args.desc, + .n_value = 0, + }; + symbol.nlist_idx = nlist_idx; + return index; + } + }.createSymbol; + const gpa = macho_file.base.allocator; - try self.symbols.ensureUnusedCapacity(gpa, 1); - const off = try macho_file.string_intern.insert(gpa, name); - const gop = try macho_file.getOrCreateGlobal(off); - self.symbols.addOneAssumeCapacity().* = gop.index; - const sym = macho_file.getSymbol(gop.index); - sym.file = self.index; - sym.value = 0; - sym.atom = 0; - sym.nlist_idx = 0; - sym.flags = .{ .global = true }; - return gop.index; + var nsyms = macho_file.options.force_undefined_symbols.len; + nsyms += 1; // dyld_stub_binder + nsyms += 1; // _objc_msgSend + if (!macho_file.options.dylib) { + nsyms += 1; // entry + nsyms += 1; // __mh_execute_header + } else { + nsyms += 1; // __mh_dylib_header + } + nsyms += 1; // ___dso_handle + nsyms += 1; // dyld_private + + try self.symbols.ensureTotalCapacityPrecise(gpa, nsyms); + try self.symbols_extra.ensureTotalCapacityPrecise(gpa, nsyms * @sizeOf(Symbol.Extra)); + try self.symtab.ensureTotalCapacityPrecise(gpa, nsyms); + try self.globals.ensureTotalCapacityPrecise(gpa, nsyms); + self.globals.resize(gpa, nsyms) catch unreachable; + @memset(self.globals.items, 0); + + try self.force_undefined.ensureTotalCapacityPrecise(gpa, macho_file.options.force_undefined_symbols.len); + for (macho_file.options.force_undefined_symbols) |name| { + self.force_undefined.addOneAssumeCapacity().* = createSymbol(self, try self.addString(gpa, name), .{}); + } + + self.dyld_stub_binder_index = createSymbol(self, try self.addString(gpa, "dyld_stub_binder"), .{}); + self.objc_msg_send_index = createSymbol(self, try self.addString(gpa, "_objc_msgSend"), .{}); + + if (!macho_file.options.dylib) { + self.entry_index = createSymbol(self, try self.addString(gpa, macho_file.options.entry orelse "_main"), .{}); + self.mh_execute_header_index = createSymbol(self, try self.addString(gpa, "__mh_execute_header"), .{ + .type = macho.N_SECT | macho.N_EXT, + .desc = macho.REFERENCED_DYNAMICALLY, + }); + } else { + self.mh_dylib_header_index = createSymbol(self, try self.addString(gpa, "__mh_dylib_header"), .{ + .type = macho.N_SECT | macho.N_EXT, + }); + } + + self.dso_handle_index = createSymbol(self, try self.addString(gpa, "___dso_handle"), .{ + .type = macho.N_SECT | macho.N_EXT, + }); + self.dyld_private_index = createSymbol(self, try self.addString(gpa, "dyld_private"), .{ + .type = macho.N_SECT, + }); } -/// Creates a fake input sections __TEXT,__objc_methname and __DATA,__objc_selrefs. -pub fn addObjcMsgsendSections(self: *InternalObject, sym_name: []const u8, macho_file: *MachO) !Atom.Index { - const methname_atom_index = try self.addObjcMethnameSection(sym_name, macho_file); - return try self.addObjcSelrefsSection(methname_atom_index, macho_file); +pub fn resolveSymbols(self: *InternalObject, macho_file: *MachO) !void { + const tracy = trace(@src()); + defer tracy.end(); + + const gpa = macho_file.base.allocator; + + for (self.symtab.items, self.globals.items, 0..) |nlist, *global, i| { + const gop = try macho_file.resolver.getOrPut(gpa, .{ + .index = @intCast(i), + .file = self.index, + }, macho_file); + if (!gop.found_existing) { + gop.ref.* = .{ .index = 0, .file = 0 }; + } + global.* = gop.index; + + if (nlist.undf()) continue; + if (gop.ref.getFile(macho_file) == null) { + gop.ref.* = .{ .index = @intCast(i), .file = self.index }; + continue; + } + + if (self.asFile().getSymbolRank(.{ + .archive = false, + .weak = false, + .tentative = false, + }) < gop.ref.getSymbol(macho_file).?.getSymbolRank(macho_file)) { + gop.ref.* = .{ .index = @intCast(i), .file = self.index }; + } + } } -fn addObjcMethnameSection(self: *InternalObject, methname: []const u8, macho_file: *MachO) !Atom.Index { +pub fn resolveBoundarySymbols(self: *InternalObject, macho_file: *MachO) !void { + const tracy = trace(@src()); + defer tracy.end(); + const gpa = macho_file.base.allocator; - const atom_index = try macho_file.addAtom(); - try self.atoms.append(gpa, atom_index); + var boundary_symbols = std.StringArrayHashMap(MachO.Ref).init(gpa); + defer boundary_symbols.deinit(); + + for (macho_file.objects.items) |index| { + const object = macho_file.getFile(index).?.object; + for (object.symbols.items, 0..) |sym, i| { + const nlist = object.symtab.items(.nlist)[i]; + if (!nlist.undf() or !nlist.ext()) continue; + const ref = object.getSymbolRef(@intCast(i), macho_file); + if (ref.getFile(macho_file) != null) continue; + const name = sym.getName(macho_file); + if (mem.startsWith(u8, name, "segment$start$") or + mem.startsWith(u8, name, "segment$stop$") or + mem.startsWith(u8, name, "section$start$") or + mem.startsWith(u8, name, "section$stop$")) + { + const gop = try boundary_symbols.getOrPut(name); + if (!gop.found_existing) { + gop.value_ptr.* = .{ .index = @intCast(i), .file = index }; + } + } + } + } - const atom = macho_file.getAtom(atom_index).?; - atom.atom_index = atom_index; - atom.file = self.index; + const nsyms = boundary_symbols.values().len; + try self.boundary_symbols.ensureTotalCapacityPrecise(gpa, nsyms); + try self.symbols.ensureUnusedCapacity(gpa, nsyms); + try self.symtab.ensureUnusedCapacity(gpa, nsyms); + try self.symbols_extra.ensureUnusedCapacity(gpa, nsyms * @sizeOf(Symbol.Extra)); + try self.globals.ensureUnusedCapacity(gpa, nsyms); + + for (boundary_symbols.keys(), boundary_symbols.values()) |name, ref| { + const name_off = try self.addString(gpa, name); + const sym_index = self.addSymbolAssumeCapacity(); + self.boundary_symbols.appendAssumeCapacity(sym_index); + const sym = &self.symbols.items[sym_index]; + sym.name = name_off; + sym.visibility = .local; + const nlist_idx: u32 = @intCast(self.symtab.items.len); + const nlist = self.symtab.addOneAssumeCapacity(); + nlist.* = .{ + .n_strx = name_off.pos, + .n_type = macho.N_SECT, + .n_sect = 0, + .n_desc = 0, + .n_value = 0, + }; + sym.nlist_idx = nlist_idx; + sym.extra = self.addSymbolExtraAssumeCapacity(.{}); + + const idx = ref.getFile(macho_file).?.object.globals.items[ref.index]; + self.globals.addOneAssumeCapacity().* = idx; + macho_file.resolver.values.items[idx - 1] = .{ .index = sym_index, .file = self.index }; + } +} + +pub fn markLive(self: *InternalObject, macho_file: *MachO) void { + const tracy = trace(@src()); + defer tracy.end(); + + for (0..self.symbols.items.len) |i| { + const nlist = self.symtab.items[i]; + if (!nlist.ext()) continue; + + const ref = self.getSymbolRef(@intCast(i), macho_file); + const file = ref.getFile(macho_file) orelse continue; + if (file == .object and !file.object.alive) { + file.object.alive = true; + file.object.markLive(macho_file); + } + } +} + +/// Creates a fake input sections __TEXT,__objc_methname and __DATA,__objc_selrefs. +pub fn addObjcMsgsendSections(self: *InternalObject, sym_name: []const u8, macho_file: *MachO) !Symbol.Index { + const methname_sym_index = try self.addObjcMethnameSection(sym_name, macho_file); + return try self.addObjcSelrefsSection(methname_sym_index, macho_file); +} + +fn addObjcMethnameSection(self: *InternalObject, methname: []const u8, macho_file: *MachO) !Symbol.Index { + const gpa = macho_file.base.allocator; + const atom_index = try self.addAtom(gpa); + try self.atoms_indexes.append(gpa, atom_index); + const atom = self.getAtom(atom_index).?; atom.size = methname.len + 1; atom.alignment = 0; @@ -64,17 +264,32 @@ fn addObjcMethnameSection(self: *InternalObject, methname: []const u8, macho_fil try self.objc_methnames.ensureUnusedCapacity(gpa, methname.len + 1); self.objc_methnames.writer(gpa).print("{s}\x00", .{methname}) catch unreachable; - return atom_index; + const name_str = try self.addString(gpa, "ltmp"); + const sym_index = try self.addSymbol(gpa); + const sym = &self.symbols.items[sym_index]; + sym.name = name_str; + sym.atom_ref = .{ .index = atom_index, .file = self.index }; + sym.extra = try self.addSymbolExtra(gpa, .{}); + const nlist_idx: u32 = @intCast(self.symtab.items.len); + const nlist = try self.symtab.addOne(gpa); + nlist.* = .{ + .n_strx = name_str.pos, + .n_type = macho.N_SECT, + .n_sect = @intCast(n_sect + 1), + .n_desc = 0, + .n_value = 0, + }; + sym.nlist_idx = nlist_idx; + try self.globals.append(gpa, 0); + + return sym_index; } -fn addObjcSelrefsSection(self: *InternalObject, methname_atom_index: Atom.Index, macho_file: *MachO) !Atom.Index { +fn addObjcSelrefsSection(self: *InternalObject, methname_sym_index: Symbol.Index, macho_file: *MachO) !Symbol.Index { const gpa = macho_file.base.allocator; - const atom_index = try macho_file.addAtom(); - try self.atoms.append(gpa, atom_index); - - const atom = macho_file.getAtom(atom_index).?; - atom.atom_index = atom_index; - atom.file = self.index; + const atom_index = try self.addAtom(gpa); + try self.atoms_indexes.append(gpa, atom_index); + const atom = self.getAtom(atom_index).?; atom.size = @sizeOf(u64); atom.alignment = 3; @@ -90,9 +305,9 @@ fn addObjcSelrefsSection(self: *InternalObject, methname_atom_index: Atom.Index, const relocs = &self.sections.items(.relocs)[n_sect]; try relocs.ensureUnusedCapacity(gpa, 1); relocs.appendAssumeCapacity(.{ - .tag = .local, + .tag = .@"extern", .offset = 0, - .target = methname_atom_index, + .target = methname_sym_index, .addend = 0, .type = .unsigned, .meta = .{ @@ -102,139 +317,283 @@ fn addObjcSelrefsSection(self: *InternalObject, methname_atom_index: Atom.Index, .has_subtractor = false, }, }); - try atom.addExtra(.{ .rel_index = 0, .rel_count = 1 }, macho_file); - atom.flags.relocs = true; - self.num_rebase_relocs += 1; + atom.addExtra(.{ .rel_index = 0, .rel_count = 1 }, macho_file); + + const sym_index = try self.addSymbol(gpa); + const sym = &self.symbols.items[sym_index]; + sym.atom_ref = .{ .index = atom_index, .file = self.index }; + sym.extra = try self.addSymbolExtra(gpa, .{}); + const nlist_idx: u32 = @intCast(self.symtab.items.len); + const nlist = try self.symtab.addOne(gpa); + nlist.* = .{ + .n_strx = 0, + .n_type = macho.N_SECT, + .n_sect = @intCast(n_sect + 1), + .n_desc = 0, + .n_value = 0, + }; + sym.nlist_idx = nlist_idx; + try self.globals.append(gpa, 0); + atom.addExtra(.{ .literal_symbol_index = sym_index }, macho_file); - return atom_index; + return sym_index; +} + +pub fn resolveObjcMsgSendSymbols(self: *InternalObject, macho_file: *MachO) !void { + const tracy = trace(@src()); + defer tracy.end(); + + const gpa = macho_file.base.allocator; + + var objc_msgsend_syms = std.StringArrayHashMap(MachO.Ref).init(gpa); + defer objc_msgsend_syms.deinit(); + + for (macho_file.objects.items) |index| { + const object = macho_file.getFile(index).?.object; + + for (object.symbols.items, 0..) |sym, i| { + const nlist = object.symtab.items(.nlist)[i]; + if (!nlist.ext()) continue; + if (!nlist.undf()) continue; + + const ref = object.getSymbolRef(@intCast(i), macho_file); + if (ref.getFile(macho_file) != null) continue; + + const name = sym.getName(macho_file); + if (mem.startsWith(u8, name, "_objc_msgSend$")) { + const gop = try objc_msgsend_syms.getOrPut(name); + if (!gop.found_existing) { + gop.value_ptr.* = .{ .index = @intCast(i), .file = index }; + } + } + } + } + + for (objc_msgsend_syms.keys(), objc_msgsend_syms.values()) |sym_name, ref| { + const name = MachO.eatPrefix(sym_name, "_objc_msgSend$").?; + const selrefs_index = try self.addObjcMsgsendSections(name, macho_file); + + const name_off = try self.addString(gpa, sym_name); + const sym_index = try self.addSymbol(gpa); + const sym = &self.symbols.items[sym_index]; + sym.name = name_off; + sym.visibility = .hidden; + const nlist_idx: u32 = @intCast(self.symtab.items.len); + const nlist = try self.symtab.addOne(gpa); + nlist.* = .{ + .n_strx = name_off.pos, + .n_type = macho.N_SECT | macho.N_EXT | macho.N_PEXT, + .n_sect = 0, + .n_desc = 0, + .n_value = 0, + }; + sym.nlist_idx = nlist_idx; + sym.extra = try self.addSymbolExtra(gpa, .{ .objc_selrefs = selrefs_index }); + sym.setSectionFlags(.{ .objc_stubs = true }); + + const idx = ref.getFile(macho_file).?.object.globals.items[ref.index]; + try self.globals.append(gpa, idx); + macho_file.resolver.values.items[idx - 1] = .{ .index = sym_index, .file = self.index }; + } } -pub fn resolveLiterals(self: InternalObject, lp: *MachO.LiteralPool, macho_file: *MachO) !void { +pub fn resolveLiterals(self: *InternalObject, lp: *MachO.LiteralPool, macho_file: *MachO) !void { + const tracy = trace(@src()); + defer tracy.end(); + const gpa = macho_file.base.allocator; var buffer = std.ArrayList(u8).init(gpa); defer buffer.deinit(); const slice = self.sections.slice(); - for (slice.items(.header), self.atoms.items, 0..) |header, atom_index, n_sect| { - if (Object.isCstringLiteral(header) or Object.isFixedSizeLiteral(header)) { - const data = self.getSectionData(@intCast(n_sect)); - const atom = macho_file.getAtom(atom_index).?; - const res = try lp.insert(gpa, header.type(), data); - if (!res.found_existing) { - res.atom.* = atom_index; - } - atom.flags.literal_pool = true; - try atom.addExtra(.{ .literal_index = res.index }, macho_file); - } else if (Object.isPtrLiteral(header)) { - const atom = macho_file.getAtom(atom_index).?; - const relocs = atom.getRelocs(macho_file); - assert(relocs.len == 1); - const rel = relocs[0]; - assert(rel.tag == .local); - const target = macho_file.getAtom(rel.target).?; - const addend = std.math.cast(u32, rel.addend) orelse return error.Overflow; - try buffer.ensureUnusedCapacity(target.size); - buffer.resize(target.size) catch unreachable; - try target.getCode(macho_file, buffer.items); - const res = try lp.insert(gpa, header.type(), buffer.items[addend..]); - buffer.clearRetainingCapacity(); - if (!res.found_existing) { - res.atom.* = atom_index; - } - atom.flags.literal_pool = true; - try atom.addExtra(.{ .literal_index = res.index }, macho_file); + for (slice.items(.header), self.getAtoms()) |header, atom_index| { + if (!Object.isPtrLiteral(header)) continue; + const atom = self.getAtom(atom_index).?; + const relocs = atom.getRelocs(macho_file); + assert(relocs.len == 1); + const rel = relocs[0]; + assert(rel.tag == .@"extern"); + const target = rel.getTargetSymbol(atom.*, macho_file).getAtom(macho_file).?; + try buffer.ensureUnusedCapacity(target.size); + buffer.resize(target.size) catch unreachable; + @memcpy(buffer.items, self.getSectionData(target.n_sect)); + const res = try lp.insert(gpa, header.type(), buffer.items); + buffer.clearRetainingCapacity(); + if (!res.found_existing) { + res.ref.* = .{ .index = atom.getExtra(macho_file).literal_symbol_index, .file = self.index }; + } else { + const lp_sym = lp.getSymbol(res.index, macho_file); + const lp_atom = lp_sym.getAtom(macho_file).?; + lp_atom.alignment = @max(lp_atom.alignment, atom.alignment); + _ = atom.alive.swap(false, .seq_cst); } + atom.addExtra(.{ .literal_pool_index = res.index }, macho_file); } } -pub fn dedupLiterals(self: InternalObject, lp: MachO.LiteralPool, macho_file: *MachO) void { - for (self.atoms.items) |atom_index| { - const atom = macho_file.getAtom(atom_index) orelse continue; - if (!atom.flags.alive) continue; - if (!atom.flags.relocs) continue; +pub fn dedupLiterals(self: *InternalObject, lp: MachO.LiteralPool, macho_file: *MachO) void { + const tracy = trace(@src()); + defer tracy.end(); + + for (self.getAtoms()) |atom_index| { + const atom = self.getAtom(atom_index) orelse continue; + if (!atom.alive.load(.seq_cst)) continue; const relocs = blk: { - const extra = atom.getExtra(macho_file).?; + const extra = atom.getExtra(macho_file); const relocs = self.sections.items(.relocs)[atom.n_sect].items; break :blk relocs[extra.rel_index..][0..extra.rel_count]; }; - for (relocs) |*rel| switch (rel.tag) { - .local => { - const target = macho_file.getAtom(rel.target).?; - if (target.getLiteralPoolIndex(macho_file)) |lp_index| { - const lp_atom = lp.getAtom(lp_index, macho_file); - if (target.atom_index != lp_atom.atom_index) { - lp_atom.alignment = @max(lp_atom.alignment, target.alignment); - target.flags.alive = false; - rel.target = lp_atom.atom_index; - } - } - }, - .@"extern" => { - const target_sym = rel.getTargetSymbol(macho_file); - if (target_sym.getAtom(macho_file)) |target_atom| { - if (target_atom.getLiteralPoolIndex(macho_file)) |lp_index| { - const lp_atom = lp.getAtom(lp_index, macho_file); - if (target_atom.atom_index != lp_atom.atom_index) { - lp_atom.alignment = @max(lp_atom.alignment, target_atom.alignment); - target_atom.flags.alive = false; - target_sym.atom = lp_atom.atom_index; - } - } - } - }, + for (relocs) |*rel| { + if (rel.tag != .@"extern") continue; + const target_sym_ref = rel.getTargetSymbolRef(atom.*, macho_file); + const file = target_sym_ref.getFile(macho_file) orelse continue; + if (file.getIndex() != self.index) continue; + const target_sym = target_sym_ref.getSymbol(macho_file).?; + const target_atom = target_sym.getAtom(macho_file) orelse continue; + if (!Object.isPtrLiteral(target_atom.getInputSection(macho_file))) continue; + const lp_index = target_atom.getExtra(macho_file).literal_pool_index; + const lp_sym = lp.getSymbol(lp_index, macho_file); + const lp_atom_ref = lp_sym.atom_ref; + if (target_atom.atom_index != lp_atom_ref.index or target_atom.file != lp_atom_ref.file) { + target_sym.atom_ref = lp_atom_ref; + } + } + } + + for (self.symbols.items) |*sym| { + if (!sym.getSectionFlags().objc_stubs) continue; + const extra = sym.getExtra(macho_file); + const file = sym.getFile(macho_file).?; + if (file.getIndex() != self.index) continue; + const tsym = switch (file) { + .dylib => unreachable, + inline else => |x| &x.symbols.items[extra.objc_selrefs], }; + const atom = tsym.getAtom(macho_file) orelse continue; + if (!Object.isPtrLiteral(atom.getInputSection(macho_file))) continue; + const lp_index = atom.getExtra(macho_file).literal_pool_index; + const lp_sym = lp.getSymbol(lp_index, macho_file); + const lp_atom_ref = lp_sym.atom_ref; + if (atom.atom_index != lp_atom_ref.index or atom.file != lp_atom_ref.file) { + tsym.atom_ref = lp_atom_ref; + } } +} - for (self.symbols.items) |sym_index| { - const sym = macho_file.getSymbol(sym_index); - if (!sym.flags.objc_stubs) continue; - var extra = sym.getExtra(macho_file).?; - const atom = macho_file.getAtom(extra.objc_selrefs).?; - if (atom.getLiteralPoolIndex(macho_file)) |lp_index| { - const lp_atom = lp.getAtom(lp_index, macho_file); - if (atom.atom_index != lp_atom.atom_index) { - lp_atom.alignment = @max(lp_atom.alignment, atom.alignment); - atom.flags.alive = false; - extra.objc_selrefs = lp_atom.atom_index; - sym.setExtra(extra, macho_file); +pub fn scanRelocs(self: *InternalObject, macho_file: *MachO) void { + const tracy = trace(@src()); + defer tracy.end(); + + if (self.getEntryRef(macho_file)) |ref| { + if (ref.getFile(macho_file) != null) { + const sym = ref.getSymbol(macho_file).?; + if (sym.flags.import) sym.setSectionFlags(.{ .stubs = true }); + } + } + if (self.getDyldStubBinderRef(macho_file)) |ref| { + if (ref.getFile(macho_file) != null) { + const sym = ref.getSymbol(macho_file).?; + sym.setSectionFlags(.{ .got = true }); + } + } + if (self.getObjcMsgSendRef(macho_file)) |ref| { + if (ref.getFile(macho_file) != null) { + const sym = ref.getSymbol(macho_file).?; + // TODO is it always needed, or only if we are synthesising fast stubs + sym.setSectionFlags(.{ .got = true }); + } + } +} + +pub fn allocateSyntheticSymbols(self: *InternalObject, macho_file: *MachO) void { + const text_seg = macho_file.getTextSegment(); + + if (self.mh_execute_header_index) |index| { + const ref = self.getSymbolRef(index, macho_file); + if (ref.getFile(macho_file)) |file| { + if (file.getIndex() == self.index) { + const sym = &self.symbols.items[index]; + sym.value = text_seg.vmaddr; + } + } + } + + if (macho_file.data_sect_index) |idx| { + const sect = macho_file.sections.items(.header)[idx]; + for (&[_]?Symbol.Index{ + self.dso_handle_index, + self.mh_dylib_header_index, + self.dyld_private_index, + }) |maybe_index| { + if (maybe_index) |index| { + const ref = self.getSymbolRef(index, macho_file); + if (ref.getFile(macho_file)) |file| { + if (file.getIndex() == self.index) { + const sym = &self.symbols.items[index]; + sym.value = sect.addr; + sym.out_n_sect = idx; + } + } } } } } -pub fn calcSymtabSize(self: *InternalObject, macho_file: *MachO) !void { - for (self.symbols.items) |sym_index| { - const sym = macho_file.getSymbol(sym_index); - if (sym.getFile(macho_file)) |file| if (file.getIndex() != self.index) continue; +pub fn calcSymtabSize(self: *InternalObject, macho_file: *MachO) void { + for (self.symbols.items, 0..) |*sym, i| { + const ref = self.getSymbolRef(@intCast(i), macho_file); + const file = ref.getFile(macho_file) orelse continue; + if (file.getIndex() != self.index) continue; + if (sym.getName(macho_file).len == 0) continue; sym.flags.output_symtab = true; if (sym.isLocal()) { - try sym.addExtra(.{ .symtab = self.output_symtab_ctx.nlocals }, macho_file); + sym.addExtra(.{ .symtab = self.output_symtab_ctx.nlocals }, macho_file); self.output_symtab_ctx.nlocals += 1; } else if (sym.flags.@"export") { - try sym.addExtra(.{ .symtab = self.output_symtab_ctx.nexports }, macho_file); + sym.addExtra(.{ .symtab = self.output_symtab_ctx.nexports }, macho_file); self.output_symtab_ctx.nexports += 1; } else { assert(sym.flags.import); - try sym.addExtra(.{ .symtab = self.output_symtab_ctx.nimports }, macho_file); + sym.addExtra(.{ .symtab = self.output_symtab_ctx.nimports }, macho_file); self.output_symtab_ctx.nimports += 1; } self.output_symtab_ctx.strsize += @as(u32, @intCast(sym.getName(macho_file).len + 1)); } } +pub fn writeAtoms(self: *InternalObject, macho_file: *MachO) !void { + const tracy = trace(@src()); + defer tracy.end(); + + for (self.getAtoms()) |atom_index| { + const atom = self.getAtom(atom_index) orelse continue; + if (!atom.alive.load(.seq_cst)) continue; + const sect = atom.getInputSection(macho_file); + if (sect.isZerofill()) continue; + const off = atom.value; + const buffer = macho_file.sections.items(.out)[atom.out_n_sect].items[off..][0..atom.size]; + @memcpy(buffer, self.getSectionData(atom.n_sect)); + try atom.resolveRelocs(macho_file, buffer); + } +} + pub fn writeSymtab(self: InternalObject, macho_file: *MachO) void { - for (self.symbols.items) |sym_index| { - const sym = macho_file.getSymbol(sym_index); - if (sym.getFile(macho_file)) |file| if (file.getIndex() != self.index) continue; + var n_strx = self.output_symtab_ctx.stroff; + for (self.symbols.items, 0..) |sym, i| { + const ref = self.getSymbolRef(@intCast(i), macho_file); + const file = ref.getFile(macho_file) orelse continue; + if (file.getIndex() != self.index) continue; const idx = sym.getOutputSymtabIndex(macho_file) orelse continue; - const n_strx = @as(u32, @intCast(macho_file.strtab.items.len)); - macho_file.strtab.appendSliceAssumeCapacity(sym.getName(macho_file)); - macho_file.strtab.appendAssumeCapacity(0); const out_sym = &macho_file.symtab.items[idx]; out_sym.n_strx = n_strx; sym.setOutputSym(macho_file, out_sym); + const name = sym.getName(macho_file); + @memcpy(macho_file.strtab.items[n_strx..][0..name.len], name); + n_strx += @intCast(name.len); + macho_file.strtab.items[n_strx] = 0; + n_strx += 1; } } @@ -261,16 +620,173 @@ pub fn getSectionData(self: *const InternalObject, index: u32) []const u8 { } else @panic("ref to non-existent section"); } -pub fn getString(self: InternalObject, off: u32) [:0]const u8 { - _ = self; - _ = off; - return ""; +pub fn addString(self: *InternalObject, allocator: Allocator, name: []const u8) !MachO.String { + const off: u32 = @intCast(self.strtab.items.len); + try self.strtab.ensureUnusedCapacity(allocator, name.len + 1); + self.strtab.appendSliceAssumeCapacity(name); + self.strtab.appendAssumeCapacity(0); + return .{ .pos = off, .len = @intCast(name.len + 1) }; +} + +pub fn getString(self: InternalObject, name: MachO.String) [:0]const u8 { + assert(name.pos < self.strtab.items.len and name.pos + name.len <= self.strtab.items.len); + if (name.len == 0) return ""; + return self.strtab.items[name.pos..][0 .. name.len - 1 :0]; +} + +pub fn getNStrx(self: InternalObject, off: u32) [:0]const u8 { + assert(off < self.strtab.items.len); + return mem.sliceTo(@as([*:0]const u8, @ptrCast(self.strtab.items.ptr + off)), 0); } pub fn asFile(self: *InternalObject) File { return .{ .internal = self }; } +fn addAtom(self: *InternalObject, allocator: Allocator) !Atom.Index { + const atom_index: Atom.Index = @intCast(self.atoms.items.len); + const atom = try self.atoms.addOne(allocator); + atom.* = .{ + .file = self.index, + .atom_index = atom_index, + .extra = try self.addAtomExtra(allocator, .{}), + }; + return atom_index; +} + +pub fn getAtom(self: *InternalObject, atom_index: Atom.Index) ?*Atom { + if (atom_index == 0) return null; + assert(atom_index < self.atoms.items.len); + return &self.atoms.items[atom_index]; +} + +pub fn getAtoms(self: InternalObject) []const Atom.Index { + return self.atoms_indexes.items; +} + +fn addAtomExtra(self: *InternalObject, allocator: Allocator, extra: Atom.Extra) !u32 { + const fields = @typeInfo(Atom.Extra).Struct.fields; + try self.atoms_extra.ensureUnusedCapacity(allocator, fields.len); + return self.addAtomExtraAssumeCapacity(extra); +} + +fn addAtomExtraAssumeCapacity(self: *InternalObject, extra: Atom.Extra) u32 { + const index = @as(u32, @intCast(self.atoms_extra.items.len)); + const fields = @typeInfo(Atom.Extra).Struct.fields; + inline for (fields) |field| { + self.atoms_extra.appendAssumeCapacity(switch (field.type) { + u32 => @field(extra, field.name), + else => @compileError("bad field type"), + }); + } + return index; +} + +pub fn getAtomExtra(self: InternalObject, index: u32) Atom.Extra { + const fields = @typeInfo(Atom.Extra).Struct.fields; + var i: usize = index; + var result: Atom.Extra = undefined; + inline for (fields) |field| { + @field(result, field.name) = switch (field.type) { + u32 => self.atoms_extra.items[i], + else => @compileError("bad field type"), + }; + i += 1; + } + return result; +} + +pub fn setAtomExtra(self: *InternalObject, index: u32, extra: Atom.Extra) void { + assert(index > 0); + const fields = @typeInfo(Atom.Extra).Struct.fields; + inline for (fields, 0..) |field, i| { + self.atoms_extra.items[index + i] = switch (field.type) { + u32 => @field(extra, field.name), + else => @compileError("bad field type"), + }; + } +} + +pub fn getEntryRef(self: InternalObject, macho_file: *MachO) ?MachO.Ref { + const index = self.entry_index orelse return null; + return self.getSymbolRef(index, macho_file); +} + +pub fn getDyldStubBinderRef(self: InternalObject, macho_file: *MachO) ?MachO.Ref { + const index = self.dyld_stub_binder_index orelse return null; + return self.getSymbolRef(index, macho_file); +} + +pub fn getDyldPrivateRef(self: InternalObject, macho_file: *MachO) ?MachO.Ref { + const index = self.dyld_private_index orelse return null; + return self.getSymbolRef(index, macho_file); +} + +pub fn getObjcMsgSendRef(self: InternalObject, macho_file: *MachO) ?MachO.Ref { + const index = self.objc_msg_send_index orelse return null; + return self.getSymbolRef(index, macho_file); +} + +pub fn addSymbol(self: *InternalObject, allocator: Allocator) !Symbol.Index { + try self.symbols.ensureUnusedCapacity(allocator, 1); + return self.addSymbolAssumeCapacity(); +} + +pub fn addSymbolAssumeCapacity(self: *InternalObject) Symbol.Index { + const index: Symbol.Index = @intCast(self.symbols.items.len); + const symbol = self.symbols.addOneAssumeCapacity(); + symbol.* = .{ .file = self.index }; + return index; +} + +pub fn getSymbolRef(self: InternalObject, index: Symbol.Index, macho_file: *MachO) MachO.Ref { + const global_index = self.globals.items[index]; + if (macho_file.resolver.get(global_index)) |ref| return ref; + return .{ .index = index, .file = self.index }; +} + +pub fn addSymbolExtra(self: *InternalObject, allocator: Allocator, extra: Symbol.Extra) !u32 { + const fields = @typeInfo(Symbol.Extra).Struct.fields; + try self.symbols_extra.ensureUnusedCapacity(allocator, fields.len); + return self.addSymbolExtraAssumeCapacity(extra); +} + +fn addSymbolExtraAssumeCapacity(self: *InternalObject, extra: Symbol.Extra) u32 { + const index = @as(u32, @intCast(self.symbols_extra.items.len)); + const fields = @typeInfo(Symbol.Extra).Struct.fields; + inline for (fields) |field| { + self.symbols_extra.appendAssumeCapacity(switch (field.type) { + u32 => @field(extra, field.name), + else => @compileError("bad field type"), + }); + } + return index; +} + +pub fn getSymbolExtra(self: InternalObject, index: u32) Symbol.Extra { + const fields = @typeInfo(Symbol.Extra).Struct.fields; + var i: usize = index; + var result: Symbol.Extra = undefined; + inline for (fields) |field| { + @field(result, field.name) = switch (field.type) { + u32 => self.symbols_extra.items[i], + else => @compileError("bad field type"), + }; + i += 1; + } + return result; +} + +pub fn setSymbolExtra(self: *InternalObject, index: u32, extra: Symbol.Extra) void { + const fields = @typeInfo(Symbol.Extra).Struct.fields; + inline for (fields, 0..) |field, i| { + self.symbols_extra.items[index + i] = switch (field.type) { + u32 => @field(extra, field.name), + else => @compileError("bad field type"), + }; + } +} + const FormatContext = struct { self: *InternalObject, macho_file: *MachO, @@ -292,8 +808,8 @@ fn formatAtoms( _ = unused_fmt_string; _ = options; try writer.writeAll(" atoms\n"); - for (ctx.self.atoms.items) |atom_index| { - const atom = ctx.macho_file.getAtom(atom_index).?; + for (ctx.self.getAtoms()) |atom_index| { + const atom = ctx.self.getAtom(atom_index) orelse continue; try writer.print(" {}\n", .{atom.fmt(ctx.macho_file)}); } } @@ -313,10 +829,17 @@ fn formatSymtab( ) !void { _ = unused_fmt_string; _ = options; + const macho_file = ctx.macho_file; + const self = ctx.self; try writer.writeAll(" symbols\n"); - for (ctx.self.symbols.items) |index| { - const global = ctx.macho_file.getSymbol(index); - try writer.print(" {}\n", .{global.fmt(ctx.macho_file)}); + for (self.symbols.items, 0..) |sym, i| { + const ref = self.getSymbolRef(@intCast(i), macho_file); + if (ref.getFile(macho_file) == null) { + // TODO any better way of handling this? + try writer.print(" {s} : unclaimed\n", .{sym.getName(macho_file)}); + } else { + try writer.print(" {}\n", .{ref.getSymbol(macho_file).?.fmt(macho_file)}); + } } } @@ -335,6 +858,7 @@ const assert = std.debug.assert; const macho = std.macho; const mem = std.mem; const std = @import("std"); +const trace = @import("../tracy.zig").trace; const Allocator = std.mem.Allocator; const Atom = @import("Atom.zig"); diff --git a/src/MachO/Object.zig b/src/MachO/Object.zig index 27f72a12..e8126fd5 100644 --- a/src/MachO/Object.zig +++ b/src/MachO/Object.zig @@ -1,18 +1,23 @@ -archive: ?Archive = null, +/// Non-zero for fat object files or archives +offset: u64, path: []const u8, file_handle: File.HandleIndex, mtime: u64, index: File.Index, +ar_name: ?[]const u8 = null, header: ?macho.mach_header_64 = null, sections: std.MultiArrayList(Section) = .{}, symtab: std.MultiArrayList(Nlist) = .{}, strtab: std.ArrayListUnmanaged(u8) = .{}, -symbols: std.ArrayListUnmanaged(Symbol.Index) = .{}, -atoms: std.ArrayListUnmanaged(Atom.Index) = .{}, +symbols: std.ArrayListUnmanaged(Symbol) = .{}, +symbols_extra: std.ArrayListUnmanaged(u32) = .{}, +globals: std.ArrayListUnmanaged(MachO.SymbolResolver.Index) = .{}, +atoms: std.ArrayListUnmanaged(Atom) = .{}, +atoms_indexes: std.ArrayListUnmanaged(Atom.Index) = .{}, +atoms_extra: std.ArrayListUnmanaged(u32) = .{}, -platform: ?MachO.Options.Platform = null, compile_unit: ?CompileUnit = null, stab_files: std.ArrayListUnmanaged(StabFile) = .{}, @@ -21,20 +26,19 @@ compact_unwind_sect_index: ?u8 = null, cies: std.ArrayListUnmanaged(Cie) = .{}, fdes: std.ArrayListUnmanaged(Fde) = .{}, eh_frame_data: std.ArrayListUnmanaged(u8) = .{}, -unwind_records: std.ArrayListUnmanaged(UnwindInfo.Record.Index) = .{}, +unwind_records: std.ArrayListUnmanaged(UnwindInfo.Record) = .{}, +unwind_records_indexes: std.ArrayListUnmanaged(UnwindInfo.Record.Index) = .{}, data_in_code: std.ArrayListUnmanaged(macho.data_in_code_entry) = .{}, alive: bool = true, hidden: bool = false, -num_rebase_relocs: u32 = 0, -num_bind_relocs: u32 = 0, -num_weak_bind_relocs: u32 = 0, +compact_unwind_ctx: CompactUnwindCtx = .{}, output_symtab_ctx: MachO.SymtabCtx = .{}, pub fn deinit(self: *Object, allocator: Allocator) void { allocator.free(self.path); - if (self.archive) |*ar| allocator.free(ar.path); + if (self.ar_name) |path| allocator.free(path); for (self.sections.items(.relocs), self.sections.items(.subsections)) |*relocs, *sub| { relocs.deinit(allocator); sub.deinit(allocator); @@ -43,11 +47,16 @@ pub fn deinit(self: *Object, allocator: Allocator) void { self.symtab.deinit(allocator); self.strtab.deinit(allocator); self.symbols.deinit(allocator); + self.symbols_extra.deinit(allocator); + self.globals.deinit(allocator); self.atoms.deinit(allocator); + self.atoms_indexes.deinit(allocator); + self.atoms_extra.deinit(allocator); self.cies.deinit(allocator); self.fdes.deinit(allocator); self.eh_frame_data.deinit(allocator); self.unwind_records.deinit(allocator); + self.unwind_records_indexes.deinit(allocator); for (self.stab_files.items) |*sf| { sf.stabs.deinit(allocator); } @@ -62,23 +71,43 @@ pub fn parse(self: *Object, macho_file: *MachO) !void { log.debug("parsing input object file {}", .{self.fmtPath()}); const gpa = macho_file.base.allocator; - const offset = if (self.archive) |ar| ar.offset else 0; const file = macho_file.getFileHandle(self.file_handle); + // Atom at index 0 is reserved as null atom + try self.atoms.append(gpa, .{}); + try self.atoms_extra.append(gpa, 0); + var header_buffer: [@sizeOf(macho.mach_header_64)]u8 = undefined; { - const amt = try file.preadAll(&header_buffer, offset); + const amt = try file.preadAll(&header_buffer, self.offset); if (amt != @sizeOf(macho.mach_header_64)) return error.InputOutput; } self.header = @as(*align(1) const macho.mach_header_64, @ptrCast(&header_buffer)).*; + const cpu_arch: std.Target.Cpu.Arch = switch (self.header.?.cputype) { + macho.CPU_TYPE_ARM64 => .aarch64, + macho.CPU_TYPE_X86_64 => .x86_64, + else => unreachable, + }; + if (macho_file.options.cpu_arch.? != cpu_arch) { + macho_file.base.fatal("{}: invalid architecture '{s}', expected '{s}'", .{ + self.fmtPath(), + @tagName(cpu_arch), + @tagName(macho_file.options.cpu_arch.?), + }); + return error.ParseFailed; + } + const lc_buffer = try gpa.alloc(u8, self.header.?.sizeofcmds); defer gpa.free(lc_buffer); { - const amt = try file.preadAll(lc_buffer, offset + @sizeOf(macho.mach_header_64)); + const amt = try file.preadAll(lc_buffer, self.offset + @sizeOf(macho.mach_header_64)); if (amt != self.header.?.sizeofcmds) return error.InputOutput; } + var platforms = std.ArrayList(MachO.Options.Platform).init(gpa); + defer platforms.deinit(); + var it = LoadCommandIterator{ .ncmds = self.header.?.ncmds, .buffer = lc_buffer, @@ -102,14 +131,14 @@ pub fn parse(self: *Object, macho_file: *MachO) !void { const cmd = lc.cast(macho.symtab_command).?; try self.strtab.resize(gpa, cmd.strsize); { - const amt = try file.preadAll(self.strtab.items, cmd.stroff + offset); + const amt = try file.preadAll(self.strtab.items, cmd.stroff + self.offset); if (amt != self.strtab.items.len) return error.InputOutput; } const symtab_buffer = try gpa.alloc(u8, cmd.nsyms * @sizeOf(macho.nlist_64)); defer gpa.free(symtab_buffer); { - const amt = try file.preadAll(symtab_buffer, cmd.symoff + offset); + const amt = try file.preadAll(symtab_buffer, cmd.symoff + self.offset); if (amt != symtab_buffer.len) return error.InputOutput; } const symtab = @as([*]align(1) const macho.nlist_64, @ptrCast(symtab_buffer.ptr))[0..cmd.nsyms]; @@ -127,7 +156,7 @@ pub fn parse(self: *Object, macho_file: *MachO) !void { const buffer = try gpa.alloc(u8, cmd.datasize); defer gpa.free(buffer); { - const amt = try file.preadAll(buffer, offset + cmd.dataoff); + const amt = try file.preadAll(buffer, self.offset + cmd.dataoff); if (amt != buffer.len) return error.InputOutput; } const ndice = @divExact(cmd.datasize, @sizeOf(macho.data_in_code_entry)); @@ -139,19 +168,45 @@ pub fn parse(self: *Object, macho_file: *MachO) !void { .VERSION_MIN_IPHONEOS, .VERSION_MIN_TVOS, .VERSION_MIN_WATCHOS, - => if (self.platform == null) { - self.platform = MachO.Options.Platform.fromLoadCommand(lc); - }, + => try platforms.append(MachO.Options.Platform.fromLoadCommand(lc)), else => {}, }; + if (macho_file.options.platform) |plat| { + const match = for (platforms.items) |this_plat| { + if (this_plat.platform == plat.platform) break this_plat; + } else null; + if (match) |this_plat| { + if (this_plat.version.value > plat.version.value) { + macho_file.base.warn( + "{}: object file was built for newer platform version: expected {}, got {}", + .{ + self.fmtPath(), + plat.version, + this_plat.version, + }, + ); + } + } else { + const err = try macho_file.base.addErrorWithNotes(1 + platforms.items.len); + try err.addMsg("{}: object file was built for different platforms than required {s}", .{ + self.fmtPath(), + @tagName(plat.platform), + }); + for (platforms.items) |this_plat| { + try err.addNote("object file built for {s}", .{@tagName(this_plat.platform)}); + } + return error.ParseFailed; + } + } + const NlistIdx = struct { nlist: macho.nlist_64, idx: usize, fn rank(ctx: *const Object, nl: macho.nlist_64) u8 { if (!nl.ext()) { - const name = ctx.getString(nl.n_strx); + const name = ctx.getNStrx(nl.n_strx); if (name.len == 0) return 5; if (name[0] == 'l' or name[0] == 'L') return 4; return 3; @@ -179,43 +234,49 @@ pub fn parse(self: *Object, macho_file: *MachO) !void { mem.sort(NlistIdx, nlists.items, self, NlistIdx.lessThan); if (self.hasSubsections()) { - try self.initSubsections(nlists.items, macho_file); + try self.initSubsections(gpa, nlists.items); } else { - try self.initSections(nlists.items, macho_file); + try self.initSections(gpa, nlists.items); } - try self.initCstringLiterals(macho_file); - try self.initFixedSizeLiterals(macho_file); - try self.initPointerLiterals(macho_file); + try self.initCstringLiterals(gpa, file, macho_file); + try self.initFixedSizeLiterals(gpa, macho_file); + try self.initPointerLiterals(gpa, macho_file); try self.linkNlistToAtom(macho_file); try self.sortAtoms(macho_file); - try self.initSymbols(macho_file); - try self.initSymbolStabs(nlists.items, macho_file); - try self.initRelocs(macho_file); + try self.initSymbols(gpa, macho_file); + try self.initSymbolStabs(gpa, nlists.items, macho_file); + try self.initRelocs(file, cpu_arch, macho_file); if (self.eh_frame_sect_index) |index| { - try self.initEhFrameRecords(index, macho_file); + try self.initEhFrameRecords(gpa, index, file, macho_file); } if (self.compact_unwind_sect_index) |index| { - try self.initUnwindRecords(index, macho_file); + try self.initUnwindRecords(gpa, index, file, macho_file); } if (self.hasUnwindRecords() or self.hasEhFrameRecords()) { - try self.parseUnwindRecords(macho_file); + try self.parseUnwindRecords(gpa, cpu_arch, macho_file); } - for (self.atoms.items) |atom_index| { - const atom = macho_file.getAtom(atom_index).?; + try self.parseDebugInfo(macho_file); + + for (self.getAtoms()) |atom_index| { + const atom = self.getAtom(atom_index) orelse continue; const isec = atom.getInputSection(macho_file); if (mem.eql(u8, isec.sectName(), "__eh_frame") or mem.eql(u8, isec.sectName(), "__compact_unwind") or isec.attrs() & macho.S_ATTR_DEBUG != 0) { - atom.flags.alive = false; + _ = atom.alive.swap(false, .seq_cst); } } + + // Finally, we do a post-parse check for -ObjC to see if we need to force load this member + // anyhow. + self.alive = self.alive or (macho_file.options.force_load_objc and self.hasObjc()); } pub fn isCstringLiteral(sect: macho.section_64) bool { @@ -236,10 +297,9 @@ pub fn isPtrLiteral(sect: macho.section_64) bool { return sect.type() == macho.S_LITERAL_POINTERS; } -fn initSubsections(self: *Object, nlists: anytype, macho_file: *MachO) !void { +fn initSubsections(self: *Object, allocator: Allocator, nlists: anytype) !void { const tracy = trace(@src()); defer tracy.end(); - const gpa = macho_file.base.allocator; const slice = self.sections.slice(); for (slice.items(.header), slice.items(.subsections), 0..) |sect, *subsections, n_sect| { if (isCstringLiteral(sect)) continue; @@ -254,17 +314,18 @@ fn initSubsections(self: *Object, nlists: anytype, macho_file: *MachO) !void { } else nlists.len; if (nlist_start == nlist_end or nlists[nlist_start].nlist.n_value > sect.addr) { - const name = try std.fmt.allocPrintZ(gpa, "{s}${s}", .{ sect.segName(), sect.sectName() }); - defer gpa.free(name); + const name = try std.fmt.allocPrintZ(allocator, "{s}${s}", .{ sect.segName(), sect.sectName() }); + defer allocator.free(name); const size = if (nlist_start == nlist_end) sect.size else nlists[nlist_start].nlist.n_value - sect.addr; - const atom_index = try self.addAtom(.{ - .name = try self.addString(gpa, name), + const atom_index = try self.addAtom(allocator, .{ + .name = try self.addString(allocator, name), .n_sect = @intCast(n_sect), .off = 0, .size = size, .alignment = sect.@"align", - }, macho_file); - try subsections.append(gpa, .{ + }); + try self.atoms_indexes.append(allocator, atom_index); + try subsections.append(allocator, .{ .atom = atom_index, .off = 0, }); @@ -287,14 +348,15 @@ fn initSubsections(self: *Object, nlists: anytype, macho_file: *MachO) !void { @min(@ctz(nlist.nlist.n_value), sect.@"align") else sect.@"align"; - const atom_index = try self.addAtom(.{ - .name = nlist.nlist.n_strx, + const atom_index = try self.addAtom(allocator, .{ + .name = .{ .pos = nlist.nlist.n_strx, .len = @intCast(self.getNStrx(nlist.nlist.n_strx).len + 1) }, .n_sect = @intCast(n_sect), .off = nlist.nlist.n_value - sect.addr, .size = size, .alignment = alignment, - }, macho_file); - try subsections.append(gpa, .{ + }); + try self.atoms_indexes.append(allocator, atom_index); + try subsections.append(allocator, .{ .atom = atom_index, .off = nlist.nlist.n_value - sect.addr, }); @@ -306,30 +368,31 @@ fn initSubsections(self: *Object, nlists: anytype, macho_file: *MachO) !void { } } -fn initSections(self: *Object, nlists: anytype, macho_file: *MachO) !void { +fn initSections(self: *Object, allocator: Allocator, nlists: anytype) !void { const tracy = trace(@src()); defer tracy.end(); - const gpa = macho_file.base.allocator; const slice = self.sections.slice(); - try self.atoms.ensureUnusedCapacity(gpa, self.sections.items(.header).len); + try self.atoms.ensureUnusedCapacity(allocator, self.sections.items(.header).len); + try self.atoms_indexes.ensureUnusedCapacity(allocator, self.sections.items(.header).len); for (slice.items(.header), 0..) |sect, n_sect| { if (isCstringLiteral(sect)) continue; if (isFixedSizeLiteral(sect)) continue; if (isPtrLiteral(sect)) continue; - const name = try std.fmt.allocPrintZ(gpa, "{s}${s}", .{ sect.segName(), sect.sectName() }); - defer gpa.free(name); + const name = try std.fmt.allocPrintZ(allocator, "{s}${s}", .{ sect.segName(), sect.sectName() }); + defer allocator.free(name); - const atom_index = try self.addAtom(.{ - .name = try self.addString(gpa, name), + const atom_index = try self.addAtom(allocator, .{ + .name = try self.addString(allocator, name), .n_sect = @intCast(n_sect), .off = 0, .size = sect.size, .alignment = sect.@"align", - }, macho_file); - try slice.items(.subsections)[n_sect].append(gpa, .{ .atom = atom_index, .off = 0 }); + }); + try self.atoms_indexes.append(allocator, atom_index); + try slice.items(.subsections)[n_sect].append(allocator, .{ .atom = atom_index, .off = 0 }); const nlist_start = for (nlists, 0..) |nlist, i| { if (nlist.nlist.n_sect - 1 == n_sect) break i; @@ -358,44 +421,22 @@ fn initSections(self: *Object, nlists: anytype, macho_file: *MachO) !void { } } -const AddAtomArgs = struct { - name: u32, - n_sect: u8, - off: u64, - size: u64, - alignment: u32, -}; - -fn addAtom(self: *Object, args: AddAtomArgs, macho_file: *MachO) !Atom.Index { - const gpa = macho_file.base.allocator; - const atom_index = try macho_file.addAtom(); - const atom = macho_file.getAtom(atom_index).?; - atom.file = self.index; - atom.atom_index = atom_index; - atom.name = args.name; - atom.n_sect = args.n_sect; - atom.size = args.size; - atom.alignment = args.alignment; - atom.off = args.off; - try self.atoms.append(gpa, atom_index); - return atom_index; -} - -fn initCstringLiterals(self: *Object, macho_file: *MachO) !void { +fn initCstringLiterals(self: *Object, allocator: Allocator, file: File.Handle, macho_file: *MachO) !void { const tracy = trace(@src()); defer tracy.end(); - const gpa = macho_file.base.allocator; const slice = self.sections.slice(); for (slice.items(.header), 0..) |sect, n_sect| { if (!isCstringLiteral(sect)) continue; - const data = try self.getSectionData(@intCast(n_sect), macho_file); - defer gpa.free(data); + const data = try self.getSectionData(allocator, @intCast(n_sect), file); + defer allocator.free(data); + var count: u32 = 0; var start: u32 = 0; while (start < data.len) { + defer count += 1; var end = start; while (end < data.len - 1 and data[end] != 0) : (end += 1) {} if (data[end] != 0) { @@ -408,28 +449,47 @@ fn initCstringLiterals(self: *Object, macho_file: *MachO) !void { } end += 1; - const atom_index = try self.addAtom(.{ - .name = 0, + const name = try std.fmt.allocPrintZ(allocator, "l._str{d}", .{count}); + defer allocator.free(name); + const name_str = try self.addString(allocator, name); + + const atom_index = try self.addAtom(allocator, .{ + .name = name_str, .n_sect = @intCast(n_sect), .off = start, .size = end - start, .alignment = sect.@"align", - }, macho_file); - try slice.items(.subsections)[n_sect].append(gpa, .{ + }); + try self.atoms_indexes.append(allocator, atom_index); + try slice.items(.subsections)[n_sect].append(allocator, .{ .atom = atom_index, .off = start, }); + const atom = self.getAtom(atom_index).?; + const nlist_index: u32 = @intCast(try self.symtab.addOne(allocator)); + self.symtab.set(nlist_index, .{ + .nlist = .{ + .n_strx = name_str.pos, + .n_type = macho.N_SECT, + .n_sect = @intCast(atom.n_sect + 1), + .n_desc = 0, + .n_value = atom.getInputAddress(macho_file), + }, + .size = atom.size, + .atom = atom_index, + }); + atom.addExtra(.{ .literal_symbol_index = nlist_index }, macho_file); + start = end; } } } -fn initFixedSizeLiterals(self: *Object, macho_file: *MachO) !void { +fn initFixedSizeLiterals(self: *Object, allocator: Allocator, macho_file: *MachO) !void { const tracy = trace(@src()); defer tracy.end(); - const gpa = macho_file.base.allocator; const slice = self.sections.slice(); for (slice.items(.header), 0..) |sect, n_sect| { @@ -451,27 +511,50 @@ fn initFixedSizeLiterals(self: *Object, macho_file: *MachO) !void { } var pos: u32 = 0; - while (pos < sect.size) : (pos += rec_size) { - const atom_index = try self.addAtom(.{ - .name = 0, + var count: u32 = 0; + while (pos < sect.size) : ({ + pos += rec_size; + count += 1; + }) { + const name = try std.fmt.allocPrintZ(allocator, "l._literal{d}", .{count}); + defer allocator.free(name); + const name_str = try self.addString(allocator, name); + + const atom_index = try self.addAtom(allocator, .{ + .name = name_str, .n_sect = @intCast(n_sect), .off = pos, .size = rec_size, .alignment = sect.@"align", - }, macho_file); - try slice.items(.subsections)[n_sect].append(gpa, .{ + }); + try self.atoms_indexes.append(allocator, atom_index); + try slice.items(.subsections)[n_sect].append(allocator, .{ .atom = atom_index, .off = pos, }); + + const atom = self.getAtom(atom_index).?; + const nlist_index: u32 = @intCast(try self.symtab.addOne(allocator)); + self.symtab.set(nlist_index, .{ + .nlist = .{ + .n_strx = name_str.pos, + .n_type = macho.N_SECT, + .n_sect = @intCast(atom.n_sect + 1), + .n_desc = 0, + .n_value = atom.getInputAddress(macho_file), + }, + .size = atom.size, + .atom = atom_index, + }); + atom.addExtra(.{ .literal_symbol_index = nlist_index }, macho_file); } } } -fn initPointerLiterals(self: *Object, macho_file: *MachO) !void { +fn initPointerLiterals(self: *Object, allocator: Allocator, macho_file: *MachO) !void { const tracy = trace(@src()); defer tracy.end(); - const gpa = macho_file.base.allocator; const slice = self.sections.slice(); for (slice.items(.header), 0..) |sect, n_sect| { @@ -490,23 +573,48 @@ fn initPointerLiterals(self: *Object, macho_file: *MachO) !void { for (0..num_ptrs) |i| { const pos: u32 = @as(u32, @intCast(i)) * rec_size; - const atom_index = try self.addAtom(.{ - .name = 0, + + const name = try std.fmt.allocPrintZ(allocator, "l._ptr{d}", .{i}); + defer allocator.free(name); + const name_str = try self.addString(allocator, name); + + const atom_index = try self.addAtom(allocator, .{ + .name = name_str, .n_sect = @intCast(n_sect), .off = pos, .size = rec_size, .alignment = sect.@"align", - }, macho_file); - try slice.items(.subsections)[n_sect].append(gpa, .{ + }); + try self.atoms_indexes.append(allocator, atom_index); + try slice.items(.subsections)[n_sect].append(allocator, .{ .atom = atom_index, .off = pos, }); + + const atom = self.getAtom(atom_index).?; + const nlist_index: u32 = @intCast(try self.symtab.addOne(allocator)); + self.symtab.set(nlist_index, .{ + .nlist = .{ + .n_strx = name_str.pos, + .n_type = macho.N_SECT, + .n_sect = @intCast(atom.n_sect + 1), + .n_desc = 0, + .n_value = atom.getInputAddress(macho_file), + }, + .size = atom.size, + .atom = atom_index, + }); + atom.addExtra(.{ .literal_symbol_index = nlist_index }, macho_file); } } } -pub fn resolveLiterals(self: Object, lp: *MachO.LiteralPool, macho_file: *MachO) !void { +pub fn resolveLiterals(self: *Object, lp: *MachO.LiteralPool, macho_file: *MachO) !void { + const tracy = trace(@src()); + defer tracy.end(); + const gpa = macho_file.base.allocator; + const file = macho_file.getFileHandle(self.file_handle); var buffer = std.ArrayList(u8).init(gpa); defer buffer.deinit(); @@ -514,83 +622,108 @@ pub fn resolveLiterals(self: Object, lp: *MachO.LiteralPool, macho_file: *MachO) const slice = self.sections.slice(); for (slice.items(.header), slice.items(.subsections), 0..) |header, subs, n_sect| { if (isCstringLiteral(header) or isFixedSizeLiteral(header)) { - const data = try self.getSectionData(@intCast(n_sect), macho_file); + const data = try self.getSectionData(gpa, @intCast(n_sect), file); defer gpa.free(data); for (subs.items) |sub| { - const atom = macho_file.getAtom(sub.atom).?; + const atom = self.getAtom(sub.atom).?; const atom_data = data[atom.off..][0..atom.size]; const res = try lp.insert(gpa, header.type(), atom_data); if (!res.found_existing) { - res.atom.* = sub.atom; + res.ref.* = .{ .index = atom.getExtra(macho_file).literal_symbol_index, .file = self.index }; + } else { + const lp_sym = lp.getSymbol(res.index, macho_file); + const lp_atom = lp_sym.getAtom(macho_file).?; + lp_atom.alignment = @max(lp_atom.alignment, atom.alignment); + _ = atom.alive.swap(false, .seq_cst); } - atom.flags.literal_pool = true; - try atom.addExtra(.{ .literal_index = res.index }, macho_file); + atom.addExtra(.{ .literal_pool_index = res.index }, macho_file); } } else if (isPtrLiteral(header)) { + var sections_data = std.AutoHashMap(u32, []const u8).init(gpa); + try sections_data.ensureUnusedCapacity(@intCast(self.sections.items(.header).len)); + defer { + var it = sections_data.iterator(); + while (it.next()) |entry| { + gpa.free(entry.value_ptr.*); + } + sections_data.deinit(); + } for (subs.items) |sub| { - const atom = macho_file.getAtom(sub.atom).?; + const atom = self.getAtom(sub.atom).?; const relocs = atom.getRelocs(macho_file); assert(relocs.len == 1); const rel = relocs[0]; const target = switch (rel.tag) { - .local => rel.target, - .@"extern" => rel.getTargetSymbol(macho_file).atom, + .local => rel.getTargetAtom(atom.*, macho_file), + .@"extern" => rel.getTargetSymbol(atom.*, macho_file).getAtom(macho_file).?, }; const addend = math.cast(u32, rel.addend) orelse return error.Overflow; - const target_atom = macho_file.getAtom(target).?; - try buffer.ensureUnusedCapacity(target_atom.size); - buffer.resize(target_atom.size) catch unreachable; - try target_atom.getCode(macho_file, buffer.items); + try buffer.ensureUnusedCapacity(target.size); + buffer.resize(target.size) catch unreachable; + const gop = try sections_data.getOrPut(target.n_sect); + if (!gop.found_existing) { + gop.value_ptr.* = try self.getSectionData(gpa, target.n_sect, file); + } + const data = gop.value_ptr.*; + @memcpy(buffer.items, data[target.off..][0..target.size]); const res = try lp.insert(gpa, header.type(), buffer.items[addend..]); buffer.clearRetainingCapacity(); if (!res.found_existing) { - res.atom.* = sub.atom; + res.ref.* = .{ .index = atom.getExtra(macho_file).literal_symbol_index, .file = self.index }; + } else { + const lp_sym = lp.getSymbol(res.index, macho_file); + const lp_atom = lp_sym.getAtom(macho_file).?; + lp_atom.alignment = @max(lp_atom.alignment, atom.alignment); + _ = atom.alive.swap(false, .seq_cst); } - atom.flags.literal_pool = true; - try atom.addExtra(.{ .literal_index = res.index }, macho_file); + atom.addExtra(.{ .literal_pool_index = res.index }, macho_file); } } } } -pub fn dedupLiterals(self: Object, lp: MachO.LiteralPool, macho_file: *MachO) void { - for (self.atoms.items) |atom_index| { - const atom = macho_file.getAtom(atom_index) orelse continue; - if (!atom.flags.alive) continue; - if (!atom.flags.relocs) continue; +pub fn dedupLiterals(self: *Object, lp: MachO.LiteralPool, macho_file: *MachO) void { + const tracy = trace(@src()); + defer tracy.end(); + + for (self.getAtoms()) |atom_index| { + const atom = self.getAtom(atom_index) orelse continue; + if (!atom.alive.load(.seq_cst)) continue; const relocs = blk: { - const extra = atom.getExtra(macho_file).?; + const extra = atom.getExtra(macho_file); const relocs = self.sections.items(.relocs)[atom.n_sect].items; break :blk relocs[extra.rel_index..][0..extra.rel_count]; }; - for (relocs) |*rel| switch (rel.tag) { - .local => { - const target = macho_file.getAtom(rel.target).?; - if (target.getLiteralPoolIndex(macho_file)) |lp_index| { - const lp_atom = lp.getAtom(lp_index, macho_file); - if (target.atom_index != lp_atom.atom_index) { - lp_atom.alignment = @max(lp_atom.alignment, target.alignment); - target.flags.alive = false; - rel.target = lp_atom.atom_index; - } - } - }, - .@"extern" => { - const target_sym = rel.getTargetSymbol(macho_file); - if (target_sym.getAtom(macho_file)) |target_atom| { - if (target_atom.getLiteralPoolIndex(macho_file)) |lp_index| { - const lp_atom = lp.getAtom(lp_index, macho_file); - if (target_atom.atom_index != lp_atom.atom_index) { - lp_atom.alignment = @max(lp_atom.alignment, target_atom.alignment); - target_atom.flags.alive = false; - target_sym.atom = lp_atom.atom_index; - } - } - } - }, - }; + for (relocs) |*rel| { + if (rel.tag != .@"extern") continue; + const target_sym_ref = rel.getTargetSymbolRef(atom.*, macho_file); + const file = target_sym_ref.getFile(macho_file) orelse continue; + if (file.getIndex() != self.index) continue; + const target_sym = target_sym_ref.getSymbol(macho_file).?; + const target_atom = target_sym.getAtom(macho_file) orelse continue; + const isec = target_atom.getInputSection(macho_file); + if (!Object.isCstringLiteral(isec) and !Object.isFixedSizeLiteral(isec) and !Object.isPtrLiteral(isec)) continue; + const lp_index = target_atom.getExtra(macho_file).literal_pool_index; + const lp_sym = lp.getSymbol(lp_index, macho_file); + const lp_atom_ref = lp_sym.atom_ref; + if (target_atom.atom_index != lp_atom_ref.index or target_atom.file != lp_atom_ref.file) { + target_sym.atom_ref = lp_atom_ref; + } + } + } + + for (self.symbols.items) |*sym| { + const atom = sym.getAtom(macho_file) orelse continue; + const isec = atom.getInputSection(macho_file); + if (!Object.isCstringLiteral(isec) and !Object.isFixedSizeLiteral(isec) and !Object.isPtrLiteral(isec)) continue; + const lp_index = atom.getExtra(macho_file).literal_pool_index; + const lp_sym = lp.getSymbol(lp_index, macho_file); + const lp_atom_ref = lp_sym.atom_ref; + if (atom.atom_index != lp_atom_ref.index or self.index != lp_atom_ref.file) { + sym.atom_ref = lp_atom_ref; + } } } @@ -655,7 +788,7 @@ fn linkNlistToAtom(self: *Object, macho_file: *MachO) !void { atom.* = atom_index; } else { macho_file.base.fatal("{}: symbol {s} not attached to any (sub)section", .{ - self.fmtPath(), self.getString(nlist.n_strx), + self.fmtPath(), self.getNStrx(nlist.n_strx), }); return error.ParseFailed; } @@ -663,55 +796,59 @@ fn linkNlistToAtom(self: *Object, macho_file: *MachO) !void { } } -fn initSymbols(self: *Object, macho_file: *MachO) !void { +fn initSymbols(self: *Object, allocator: Allocator, macho_file: *MachO) !void { const tracy = trace(@src()); defer tracy.end(); - const gpa = macho_file.base.allocator; + const slice = self.symtab.slice(); + const nsyms = slice.items(.nlist).len; - try self.symbols.ensureUnusedCapacity(gpa, slice.items(.nlist).len); + try self.symbols.ensureTotalCapacityPrecise(allocator, nsyms); + try self.symbols_extra.ensureTotalCapacityPrecise(allocator, nsyms * @sizeOf(Symbol.Extra)); + try self.globals.ensureTotalCapacityPrecise(allocator, nsyms); + self.globals.resize(allocator, nsyms) catch unreachable; + @memset(self.globals.items, 0); for (slice.items(.nlist), slice.items(.atom), 0..) |nlist, atom_index, i| { - if (nlist.ext()) { - const name = self.getString(nlist.n_strx); - const off = try macho_file.string_intern.insert(gpa, name); - const gop = try macho_file.getOrCreateGlobal(off); - self.symbols.addOneAssumeCapacity().* = gop.index; - if (nlist.undf() and nlist.weakRef()) { - macho_file.getSymbol(gop.index).flags.weak_ref = true; - } - continue; - } - - const index = try macho_file.addSymbol(); - self.symbols.appendAssumeCapacity(index); - const symbol = macho_file.getSymbol(index); - symbol.* = .{ - .value = nlist.n_value, - .name = nlist.n_strx, - .nlist_idx = @intCast(i), - .atom = 0, - .file = self.index, - }; - - if (macho_file.getAtom(atom_index)) |atom| { + const index = self.addSymbolAssumeCapacity(); + const symbol = &self.symbols.items[index]; + symbol.value = nlist.n_value; + symbol.name = .{ .pos = nlist.n_strx, .len = @intCast(self.getNStrx(nlist.n_strx).len + 1) }; + symbol.nlist_idx = @intCast(i); + symbol.extra = self.addSymbolExtraAssumeCapacity(.{}); + + if (self.getAtom(atom_index)) |atom| { assert(!nlist.abs()); symbol.value -= atom.getInputAddress(macho_file); - symbol.atom = atom_index; + symbol.atom_ref = .{ .index = atom_index, .file = self.index }; } + symbol.flags.weak = nlist.weakDef(); symbol.flags.abs = nlist.abs(); + symbol.flags.tentative = nlist.tentative(); symbol.flags.no_dead_strip = symbol.flags.no_dead_strip or nlist.noDeadStrip(); + symbol.flags.dyn_ref = nlist.n_desc & macho.REFERENCED_DYNAMICALLY != 0; + symbol.flags.interposable = nlist.ext() and (nlist.sect() or nlist.abs()) and macho_file.options.dylib and macho_file.options.namespace == .flat and !nlist.pext(); if (nlist.sect() and self.sections.items(.header)[nlist.n_sect - 1].type() == macho.S_THREAD_LOCAL_VARIABLES) { symbol.flags.tlv = true; } + + if (nlist.ext()) { + if (nlist.undf()) { + symbol.flags.weak_ref = nlist.weakRef(); + } else if (nlist.pext() or (nlist.weakDef() and nlist.weakRef()) or self.hidden) { + symbol.visibility = .hidden; + } else { + symbol.visibility = .global; + } + } } } -fn initSymbolStabs(self: *Object, nlists: anytype, macho_file: *MachO) !void { +fn initSymbolStabs(self: *Object, allocator: Allocator, nlists: anytype, macho_file: *MachO) !void { const tracy = trace(@src()); defer tracy.end(); @@ -722,7 +859,7 @@ fn initSymbolStabs(self: *Object, nlists: anytype, macho_file: *MachO) !void { fn find(fs: @This(), addr: u64) ?Symbol.Index { // TODO binary search since we have the list sorted for (fs.entries) |nlist| { - if (nlist.nlist.n_value == addr) return fs.ctx.symbols.items[nlist.idx]; + if (nlist.nlist.n_value == addr) return @intCast(nlist.idx); } return null; } @@ -737,18 +874,17 @@ fn initSymbolStabs(self: *Object, nlists: anytype, macho_file: *MachO) !void { if (start == end) return; - const gpa = macho_file.base.allocator; const syms = self.symtab.items(.nlist); const sym_lookup = SymbolLookup{ .ctx = self, .entries = nlists }; // We need to cache nlists by name so that we can properly resolve local N_GSYM stabs. // What happens is `ld -r` will emit an N_GSYM stab for a symbol that may be either an // external or private external. - var addr_lookup = std.StringHashMap(u64).init(gpa); + var addr_lookup = std.StringHashMap(u64).init(allocator); defer addr_lookup.deinit(); for (syms) |sym| { if (sym.sect() and (sym.ext() or sym.pext())) { - try addr_lookup.putNoClobber(self.getString(sym.n_strx), sym.n_value); + try addr_lookup.putNoClobber(self.getNStrx(sym.n_strx), sym.n_value); } } @@ -775,17 +911,17 @@ fn initSymbolStabs(self: *Object, nlists: anytype, macho_file: *MachO) !void { switch (nlist.n_type) { macho.N_BNSYM => { stab.is_func = true; - stab.symbol = sym_lookup.find(nlist.n_value); + stab.index = sym_lookup.find(nlist.n_value); // TODO validate i += 3; }, macho.N_GSYM => { stab.is_func = false; - stab.symbol = sym_lookup.find(addr_lookup.get(self.getString(nlist.n_strx)).?); + stab.index = sym_lookup.find(addr_lookup.get(self.getNStrx(nlist.n_strx)).?); }, macho.N_STSYM => { stab.is_func = false; - stab.symbol = sym_lookup.find(nlist.n_value); + stab.index = sym_lookup.find(nlist.n_value); }, else => { macho_file.base.fatal("{}: unhandled symbol stab type 0x{x}", .{ @@ -795,26 +931,32 @@ fn initSymbolStabs(self: *Object, nlists: anytype, macho_file: *MachO) !void { return error.ParseFailed; }, } - try sf.stabs.append(gpa, stab); + try sf.stabs.append(allocator, stab); } - try self.stab_files.append(gpa, sf); + try self.stab_files.append(allocator, sf); } } fn sortAtoms(self: *Object, macho_file: *MachO) !void { - const lessThanAtom = struct { - fn lessThanAtom(ctx: *MachO, lhs: Atom.Index, rhs: Atom.Index) bool { - return ctx.getAtom(lhs).?.getInputAddress(ctx) < ctx.getAtom(rhs).?.getInputAddress(ctx); + const Ctx = struct { + object: *Object, + m_file: *MachO, + + fn lessThanAtom(ctx: @This(), lhs: Atom.Index, rhs: Atom.Index) bool { + return ctx.object.getAtom(lhs).?.getInputAddress(ctx.m_file) < + ctx.object.getAtom(rhs).?.getInputAddress(ctx.m_file); } - }.lessThanAtom; - mem.sort(Atom.Index, self.atoms.items, macho_file, lessThanAtom); + }; + mem.sort(Atom.Index, self.atoms_indexes.items, Ctx{ + .object = self, + .m_file = macho_file, + }, Ctx.lessThanAtom); } -fn initRelocs(self: *Object, macho_file: *MachO) !void { +fn initRelocs(self: *Object, file: File.Handle, cpu_arch: std.Target.Cpu.Arch, macho_file: *MachO) !void { const tracy = trace(@src()); defer tracy.end(); - const cpu_arch = macho_file.options.cpu_arch.?; const slice = self.sections.slice(); for (slice.items(.header), slice.items(.relocs), 0..) |sect, *out, n_sect| { @@ -826,8 +968,8 @@ fn initRelocs(self: *Object, macho_file: *MachO) !void { !mem.eql(u8, sect.sectName(), "__compact_unwind")) continue; switch (cpu_arch) { - .x86_64 => try x86_64.parseRelocs(self, @intCast(n_sect), sect, out, macho_file), - .aarch64 => try aarch64.parseRelocs(self, @intCast(n_sect), sect, out, macho_file), + .x86_64 => try x86_64.parseRelocs(self, @intCast(n_sect), sect, out, file, macho_file), + .aarch64 => try aarch64.parseRelocs(self, @intCast(n_sect), sect, out, file, macho_file), else => unreachable, } @@ -839,8 +981,8 @@ fn initRelocs(self: *Object, macho_file: *MachO) !void { var next_reloc: usize = 0; for (subsections.items) |subsection| { - const atom = macho_file.getAtom(subsection.atom).?; - if (!atom.flags.alive) continue; + const atom = self.getAtom(subsection.atom).?; + if (!atom.alive.load(.seq_cst)) continue; if (next_reloc >= relocs.items.len) break; const end_addr = atom.off + atom.size; const rel_index = next_reloc; @@ -848,26 +990,24 @@ fn initRelocs(self: *Object, macho_file: *MachO) !void { while (next_reloc < relocs.items.len and relocs.items[next_reloc].offset < end_addr) : (next_reloc += 1) {} const rel_count = next_reloc - rel_index; - try atom.addExtra(.{ .rel_index = @intCast(rel_index), .rel_count = @intCast(rel_count) }, macho_file); - atom.flags.relocs = true; + atom.addExtra(.{ .rel_index = @intCast(rel_index), .rel_count = @intCast(rel_count) }, macho_file); } } } -fn initEhFrameRecords(self: *Object, sect_id: u8, macho_file: *MachO) !void { +fn initEhFrameRecords(self: *Object, allocator: Allocator, sect_id: u8, file: File.Handle, macho_file: *MachO) !void { const tracy = trace(@src()); defer tracy.end(); - const gpa = macho_file.base.allocator; const nlists = self.symtab.items(.nlist); const slice = self.sections.slice(); const sect = slice.items(.header)[sect_id]; const relocs = slice.items(.relocs)[sect_id]; // TODO: read into buffer directly - const data = try self.getSectionData(sect_id, macho_file); - defer gpa.free(data); + const data = try self.getSectionData(allocator, sect_id, file); + defer allocator.free(data); - try self.eh_frame_data.ensureTotalCapacityPrecise(gpa, data.len); + try self.eh_frame_data.ensureTotalCapacityPrecise(allocator, data.len); self.eh_frame_data.appendSliceAssumeCapacity(data); // Check for non-personality relocs in FDEs and apply them @@ -900,12 +1040,12 @@ fn initEhFrameRecords(self: *Object, sect_id: u8, macho_file: *MachO) !void { var it = eh_frame.Iterator{ .data = self.eh_frame_data.items }; while (try it.next()) |rec| { switch (rec.tag) { - .cie => try self.cies.append(gpa, .{ + .cie => try self.cies.append(allocator, .{ .offset = rec.offset, .size = rec.size, .file = self.index, }), - .fde => try self.fdes.append(gpa, .{ + .fde => try self.fdes.append(allocator, .{ .offset = rec.offset, .size = rec.size, .cie = undefined, @@ -943,14 +1083,14 @@ fn initEhFrameRecords(self: *Object, sect_id: u8, macho_file: *MachO) !void { }); return error.ParseFailed; }; - cie.personality = .{ .index = @intCast(rel.target), .offset = rel.offset - cie.offset }; + cie.personality = .{ .index = rel.target, .offset = rel.offset - cie.offset }; }, else => {}, } } } -fn initUnwindRecords(self: *Object, sect_id: u8, macho_file: *MachO) !void { +fn initUnwindRecords(self: *Object, allocator: Allocator, sect_id: u8, file: File.Handle, macho_file: *MachO) !void { const tracy = trace(@src()); defer tracy.end(); @@ -958,27 +1098,27 @@ fn initUnwindRecords(self: *Object, sect_id: u8, macho_file: *MachO) !void { ctx: *const Object, fn find(fs: @This(), addr: u64) ?Symbol.Index { - for (fs.ctx.symbols.items, 0..) |sym_index, i| { + for (0..fs.ctx.symbols.items.len) |i| { const nlist = fs.ctx.symtab.items(.nlist)[i]; - if (nlist.ext() and nlist.n_value == addr) return sym_index; + if (nlist.ext() and nlist.n_value == addr) return @intCast(i); } return null; } }; - const gpa = macho_file.base.allocator; - const data = try self.getSectionData(sect_id, macho_file); - defer gpa.free(data); + const data = try self.getSectionData(allocator, sect_id, file); + defer allocator.free(data); const nrecs = @divExact(data.len, @sizeOf(macho.compact_unwind_entry)); const recs = @as([*]align(1) const macho.compact_unwind_entry, @ptrCast(data.ptr))[0..nrecs]; const sym_lookup = SymbolLookup{ .ctx = self }; - try self.unwind_records.resize(gpa, nrecs); + try self.unwind_records.ensureTotalCapacityPrecise(allocator, nrecs); + try self.unwind_records_indexes.ensureTotalCapacityPrecise(allocator, nrecs); const header = self.sections.items(.header)[sect_id]; const relocs = self.sections.items(.relocs)[sect_id].items; var reloc_idx: usize = 0; - for (recs, self.unwind_records.items, 0..) |rec, *out_index, rec_idx| { + for (recs, 0..) |rec, rec_idx| { const rec_start = rec_idx * @sizeOf(macho.compact_unwind_entry); const rec_end = rec_start + @sizeOf(macho.compact_unwind_entry); const reloc_start = reloc_idx; @@ -986,11 +1126,11 @@ fn initUnwindRecords(self: *Object, sect_id: u8, macho_file: *MachO) !void { relocs[reloc_idx].offset < rec_end) : (reloc_idx += 1) {} - out_index.* = try macho_file.addUnwindRecord(); - const out = macho_file.getUnwindRecord(out_index.*); + const out_index = self.addUnwindRecordAssumeCapacity(); + self.unwind_records_indexes.appendAssumeCapacity(out_index); + const out = self.getUnwindRecord(out_index); out.length = rec.rangeLength; out.enc = .{ .enc = rec.compactUnwindEncoding }; - out.file = self.index; for (relocs[reloc_start..reloc_idx]) |rel| { if (rel.type != .unsigned or rel.meta.length != 3) { @@ -1053,7 +1193,7 @@ fn initUnwindRecords(self: *Object, sect_id: u8, macho_file: *MachO) !void { } } -fn parseUnwindRecords(self: *Object, macho_file: *MachO) !void { +fn parseUnwindRecords(self: *Object, allocator: Allocator, cpu_arch: std.Target.Cpu.Arch, macho_file: *MachO) !void { // Synthesise missing unwind records. // The logic here is as follows: // 1. if an atom has unwind info record that is not DWARF, FDE is marked dead @@ -1063,8 +1203,7 @@ fn parseUnwindRecords(self: *Object, macho_file: *MachO) !void { const Superposition = struct { atom: Atom.Index, size: u64, cu: ?UnwindInfo.Record.Index = null, fde: ?Fde.Index = null }; - const gpa = macho_file.base.allocator; - var superposition = std.AutoArrayHashMap(u64, Superposition).init(gpa); + var superposition = std.AutoArrayHashMap(u64, Superposition).init(allocator); defer superposition.deinit(); const slice = self.symtab.slice(); @@ -1082,8 +1221,8 @@ fn parseUnwindRecords(self: *Object, macho_file: *MachO) !void { } } - for (self.unwind_records.items) |rec_index| { - const rec = macho_file.getUnwindRecord(rec_index); + for (self.unwind_records_indexes.items) |rec_index| { + const rec = self.getUnwindRecord(rec_index); const atom = rec.getAtom(macho_file); const addr = atom.getInputAddress(macho_file) + rec.atom_offset; superposition.getPtr(addr).?.cu = rec_index; @@ -1100,7 +1239,7 @@ fn parseUnwindRecords(self: *Object, macho_file: *MachO) !void { const fde = &self.fdes.items[fde_index]; if (meta.cu) |rec_index| { - const rec = macho_file.getUnwindRecord(rec_index); + const rec = self.getUnwindRecord(rec_index); if (!rec.enc.isDwarf(macho_file)) { // Mark FDE dead fde.alive = false; @@ -1110,15 +1249,14 @@ fn parseUnwindRecords(self: *Object, macho_file: *MachO) !void { } } else { // Synthesise new unwind info record - const rec_index = try macho_file.addUnwindRecord(); - const rec = macho_file.getUnwindRecord(rec_index); - try self.unwind_records.append(gpa, rec_index); + const rec_index = try self.addUnwindRecord(allocator); + const rec = self.getUnwindRecord(rec_index); + try self.unwind_records_indexes.append(allocator, rec_index); rec.length = @intCast(meta.size); rec.atom = fde.atom; rec.atom_offset = fde.atom_offset; rec.fde = fde_index; - rec.file = fde.file; - switch (macho_file.options.cpu_arch.?) { + switch (cpu_arch) { .x86_64 => rec.enc.setMode(macho.UNWIND_X86_64_MODE.DWARF), .aarch64 => rec.enc.setMode(macho.UNWIND_ARM64_MODE.DWARF), else => unreachable, @@ -1126,41 +1264,45 @@ fn parseUnwindRecords(self: *Object, macho_file: *MachO) !void { } } else if (meta.cu == null and meta.fde == null) { // Create a null record - const rec_index = try macho_file.addUnwindRecord(); - const rec = macho_file.getUnwindRecord(rec_index); - const atom = macho_file.getAtom(meta.atom).?; - try self.unwind_records.append(gpa, rec_index); + const rec_index = try self.addUnwindRecord(allocator); + const rec = self.getUnwindRecord(rec_index); + const atom = self.getAtom(meta.atom).?; + try self.unwind_records_indexes.append(allocator, rec_index); rec.length = @intCast(meta.size); rec.atom = meta.atom; rec.atom_offset = @intCast(addr - atom.getInputAddress(macho_file)); - rec.file = self.index; } } - const sortFn = struct { - fn sortFn(ctx: *MachO, lhs_index: UnwindInfo.Record.Index, rhs_index: UnwindInfo.Record.Index) bool { - const lhs = ctx.getUnwindRecord(lhs_index); - const rhs = ctx.getUnwindRecord(rhs_index); - const lhsa = lhs.getAtom(ctx); - const rhsa = rhs.getAtom(ctx); - return lhsa.getInputAddress(ctx) + lhs.atom_offset < rhsa.getInputAddress(ctx) + rhs.atom_offset; + const SortCtx = struct { + object: *Object, + mfile: *MachO, + + fn sort(ctx: @This(), lhs_index: UnwindInfo.Record.Index, rhs_index: UnwindInfo.Record.Index) bool { + const lhs = ctx.object.getUnwindRecord(lhs_index); + const rhs = ctx.object.getUnwindRecord(rhs_index); + const lhsa = lhs.getAtom(ctx.mfile); + const rhsa = rhs.getAtom(ctx.mfile); + return lhsa.getInputAddress(ctx.mfile) + lhs.atom_offset < rhsa.getInputAddress(ctx.mfile) + rhs.atom_offset; } - }.sortFn; - mem.sort(UnwindInfo.Record.Index, self.unwind_records.items, macho_file, sortFn); + }; + mem.sort(UnwindInfo.Record.Index, self.unwind_records_indexes.items, SortCtx{ + .object = self, + .mfile = macho_file, + }, SortCtx.sort); // Associate unwind records to atoms var next_cu: u32 = 0; - while (next_cu < self.unwind_records.items.len) { + while (next_cu < self.unwind_records_indexes.items.len) { const start = next_cu; - const rec_index = self.unwind_records.items[start]; - const rec = macho_file.getUnwindRecord(rec_index); - while (next_cu < self.unwind_records.items.len and - macho_file.getUnwindRecord(self.unwind_records.items[next_cu]).atom == rec.atom) : (next_cu += 1) + const rec_index = self.unwind_records_indexes.items[start]; + const rec = self.getUnwindRecord(rec_index); + while (next_cu < self.unwind_records_indexes.items.len and + self.getUnwindRecord(self.unwind_records_indexes.items[next_cu]).atom == rec.atom) : (next_cu += 1) {} const atom = rec.getAtom(macho_file); - try atom.addExtra(.{ .unwind_index = start, .unwind_count = next_cu - start }, macho_file); - atom.flags.unwind = true; + atom.addExtra(.{ .unwind_index = start, .unwind_count = next_cu - start }, macho_file); } } @@ -1168,7 +1310,7 @@ fn parseUnwindRecords(self: *Object, macho_file: *MachO) !void { /// and record that so that we can emit symbol stabs. /// TODO in the future, we want parse debug info and debug line sections so that /// we can provide nice error locations to the user. -pub fn parseDebugInfo(self: *Object, macho_file: *MachO) !void { +fn parseDebugInfo(self: *Object, macho_file: *MachO) !void { const tracy = trace(@src()); defer tracy.end(); @@ -1187,11 +1329,12 @@ pub fn parseDebugInfo(self: *Object, macho_file: *MachO) !void { if (debug_info_index == null or debug_abbrev_index == null) return; - const debug_info = try self.getSectionData(@intCast(debug_info_index.?), macho_file); + const file = macho_file.getFileHandle(self.file_handle); + const debug_info = try self.getSectionData(gpa, @intCast(debug_info_index.?), file); defer gpa.free(debug_info); - const debug_abbrev = try self.getSectionData(@intCast(debug_abbrev_index.?), macho_file); + const debug_abbrev = try self.getSectionData(gpa, @intCast(debug_abbrev_index.?), file); defer gpa.free(debug_abbrev); - const debug_str = if (debug_str_index) |index| try self.getSectionData(@intCast(index), macho_file) else &[0]u8{}; + const debug_str = if (debug_str_index) |index| try self.getSectionData(gpa, @intCast(index), file) else &[0]u8{}; defer gpa.free(debug_str); self.compile_unit = self.findCompileUnit(.{ @@ -1297,86 +1440,55 @@ fn findCompileUnit(self: *Object, args: struct { }; } -pub fn resolveSymbols(self: *Object, macho_file: *MachO) void { +pub fn resolveSymbols(self: *Object, macho_file: *MachO) !void { const tracy = trace(@src()); defer tracy.end(); - for (self.symbols.items, 0..) |index, i| { - const nlist_idx = @as(Symbol.Index, @intCast(i)); - const nlist = self.symtab.items(.nlist)[nlist_idx]; - const atom_index = self.symtab.items(.atom)[nlist_idx]; + const gpa = macho_file.base.allocator; + for (self.symtab.items(.nlist), self.symtab.items(.atom), self.globals.items, 0..) |nlist, atom_index, *global, i| { if (!nlist.ext()) continue; - if (nlist.undf() and !nlist.tentative()) continue; if (nlist.sect()) { - const atom = macho_file.getAtom(atom_index).?; - if (!atom.flags.alive) continue; + const atom = self.getAtom(atom_index).?; + if (!atom.alive.load(.seq_cst)) continue; + } + + const gop = try macho_file.resolver.getOrPut(gpa, .{ + .index = @intCast(i), + .file = self.index, + }, macho_file); + if (!gop.found_existing) { + gop.ref.* = .{ .index = 0, .file = 0 }; + } + global.* = gop.index; + + if (nlist.undf() and !nlist.tentative()) continue; + if (gop.ref.getFile(macho_file) == null) { + gop.ref.* = .{ .index = @intCast(i), .file = self.index }; + continue; } - const symbol = macho_file.getSymbol(index); if (self.asFile().getSymbolRank(.{ .archive = !self.alive, .weak = nlist.weakDef(), .tentative = nlist.tentative(), - }) < symbol.getSymbolRank(macho_file)) { - const value = if (nlist.sect()) blk: { - const atom = macho_file.getAtom(atom_index).?; - break :blk nlist.n_value - atom.getInputAddress(macho_file); - } else nlist.n_value; - symbol.value = value; - symbol.atom = atom_index; - symbol.nlist_idx = nlist_idx; - symbol.file = self.index; - symbol.flags.weak = nlist.weakDef(); - symbol.flags.abs = nlist.abs(); - symbol.flags.tentative = nlist.tentative(); - symbol.flags.weak_ref = false; - symbol.flags.dyn_ref = nlist.n_desc & macho.REFERENCED_DYNAMICALLY != 0; - symbol.flags.no_dead_strip = symbol.flags.no_dead_strip or nlist.noDeadStrip(); - symbol.flags.interposable = macho_file.options.dylib and macho_file.options.namespace == .flat and !nlist.pext(); - - if (nlist.sect() and - self.sections.items(.header)[nlist.n_sect - 1].type() == macho.S_THREAD_LOCAL_VARIABLES) - { - symbol.flags.tlv = true; - } - } - - // Regardless of who the winner is, we still merge symbol visibility here. - if (nlist.pext() or (nlist.weakDef() and nlist.weakRef()) or self.hidden) { - if (symbol.visibility != .global) { - symbol.visibility = .hidden; - } - } else { - symbol.visibility = .global; + }) < gop.ref.getSymbol(macho_file).?.getSymbolRank(macho_file)) { + gop.ref.* = .{ .index = @intCast(i), .file = self.index }; } } } -pub fn resetGlobals(self: *Object, macho_file: *MachO) void { - for (self.symbols.items, 0..) |sym_index, nlist_idx| { - if (!self.symtab.items(.nlist)[nlist_idx].ext()) continue; - const sym = macho_file.getSymbol(sym_index); - const name = sym.name; - const global = sym.flags.global; - const weak_ref = sym.flags.weak_ref; - sym.* = .{}; - sym.name = name; - sym.flags.global = global; - sym.flags.weak_ref = weak_ref; - } -} - pub fn markLive(self: *Object, macho_file: *MachO) void { const tracy = trace(@src()); defer tracy.end(); - for (self.symbols.items, 0..) |index, nlist_idx| { - const nlist = self.symtab.items(.nlist)[nlist_idx]; + for (0..self.symbols.items.len) |i| { + const nlist = self.symtab.items(.nlist)[i]; if (!nlist.ext()) continue; - const sym = macho_file.getSymbol(index); - const file = sym.getFile(macho_file) orelse continue; + const ref = self.getSymbolRef(@intCast(i), macho_file); + const file = ref.getFile(macho_file) orelse continue; + const sym = ref.getSymbol(macho_file).?; const should_keep = nlist.undf() or (nlist.tentative() and !sym.flags.tentative); if (should_keep and file == .object and !file.object.alive) { file.object.alive = true; @@ -1385,27 +1497,43 @@ pub fn markLive(self: *Object, macho_file: *MachO) void { } } -pub fn scanRelocs(self: Object, macho_file: *MachO) !void { +pub fn mergeSymbolVisibility(self: *Object, macho_file: *MachO) void { + const tracy = trace(@src()); + defer tracy.end(); + + for (self.symbols.items, 0..) |sym, i| { + const ref = self.getSymbolRef(@intCast(i), macho_file); + const global = ref.getSymbol(macho_file) orelse continue; + if (global.visibility != .global) { + global.visibility = sym.visibility; + } + if (sym.flags.weak_ref) { + global.flags.weak_ref = true; + } + } +} + +pub fn scanRelocs(self: *Object, macho_file: *MachO) !void { const tracy = trace(@src()); defer tracy.end(); - for (self.atoms.items) |atom_index| { - const atom = macho_file.getAtom(atom_index).?; - if (!atom.flags.alive) continue; + for (self.getAtoms()) |atom_index| { + const atom = self.getAtom(atom_index) orelse continue; + if (!atom.alive.load(.seq_cst)) continue; const sect = atom.getInputSection(macho_file); if (sect.isZerofill()) continue; try atom.scanRelocs(macho_file); } - for (self.unwind_records.items) |rec_index| { - const rec = macho_file.getUnwindRecord(rec_index); + for (self.unwind_records_indexes.items) |rec_index| { + const rec = self.getUnwindRecord(rec_index); if (!rec.alive) continue; if (rec.getFde(macho_file)) |fde| { if (fde.getCie(macho_file).getPersonality(macho_file)) |sym| { - sym.flags.got = true; + sym.setSectionFlags(.{ .got = true }); } } else if (rec.getPersonality(macho_file)) |sym| { - sym.flags.got = true; + sym.setSectionFlags(.{ .got = true }); } } } @@ -1415,38 +1543,35 @@ pub fn convertTentativeDefinitions(self: *Object, macho_file: *MachO) !void { defer tracy.end(); const gpa = macho_file.base.allocator; - for (self.symbols.items, 0..) |index, i| { - const sym = macho_file.getSymbol(index); + for (self.symbols.items, self.globals.items, 0..) |*sym, off, i| { if (!sym.flags.tentative) continue; - const sym_file = sym.getFile(macho_file).?; - if (sym_file.getIndex() != self.index) continue; + if (macho_file.resolver.get(off).?.file != self.index) continue; const nlist_idx = @as(Symbol.Index, @intCast(i)); const nlist = &self.symtab.items(.nlist)[nlist_idx]; const nlist_atom = &self.symtab.items(.atom)[nlist_idx]; - const atom_index = try macho_file.addAtom(); - try self.atoms.append(gpa, atom_index); - const name = try std.fmt.allocPrintZ(gpa, "__DATA$__common${s}", .{sym.getName(macho_file)}); defer gpa.free(name); - const atom = macho_file.getAtom(atom_index).?; - atom.atom_index = atom_index; - atom.name = try self.addString(gpa, name); - atom.file = self.index; - atom.size = nlist.n_value; - atom.alignment = (nlist.n_desc >> 8) & 0x0f; + const alignment = (nlist.n_desc >> 8) & 0x0f; const n_sect = try self.addSection(gpa, "__DATA", "__common"); + const atom_index = try self.addAtom(gpa, .{ + .name = try self.addString(gpa, name), + .n_sect = n_sect, + .off = 0, + .size = nlist.n_value, + .alignment = alignment, + }); + try self.atoms_indexes.append(gpa, atom_index); + const sect = &self.sections.items(.header)[n_sect]; sect.flags = macho.S_ZEROFILL; - sect.size = atom.size; - sect.@"align" = atom.alignment; - atom.n_sect = n_sect; + sect.size = nlist.n_value; + sect.@"align" = alignment; sym.value = 0; - sym.atom = atom_index; - sym.flags.global = true; + sym.atom_ref = .{ .index = atom_index, .file = self.index }; sym.flags.weak = false; sym.flags.weak_ref = false; sym.flags.tentative = false; @@ -1460,8 +1585,58 @@ pub fn convertTentativeDefinitions(self: *Object, macho_file: *MachO) !void { } } -fn addSection(self: *Object, allocator: Allocator, segname: []const u8, sectname: []const u8) !u32 { - const n_sect = @as(u32, @intCast(try self.sections.addOne(allocator))); +pub fn claimUnresolved(self: *Object, macho_file: *MachO) void { + const tracy = trace(@src()); + defer tracy.end(); + + for (self.symbols.items, 0..) |*sym, i| { + const nlist = self.symtab.items(.nlist)[i]; + if (!nlist.ext()) continue; + if (!nlist.undf()) continue; + + if (self.getSymbolRef(@intCast(i), macho_file).getFile(macho_file) != null) continue; + + const is_import = switch (macho_file.options.undefined_treatment) { + .@"error" => false, + .warn, .suppress => nlist.weakRef(), + .dynamic_lookup => true, + }; + if (is_import) { + sym.value = 0; + sym.atom_ref = .{ .index = 0, .file = 0 }; + sym.flags.weak = false; + sym.flags.weak_ref = nlist.weakRef(); + sym.flags.import = is_import; + sym.visibility = .global; + + const idx = self.globals.items[i]; + macho_file.resolver.values.items[idx - 1] = .{ .index = @intCast(i), .file = self.index }; + } + } +} + +pub fn claimUnresolvedRelocatable(self: *Object, macho_file: *MachO) void { + const tracy = trace(@src()); + defer tracy.end(); + + for (self.symbols.items, self.symtab.items(.nlist), 0..) |*sym, nlist, i| { + if (!nlist.ext()) continue; + if (!nlist.undf()) continue; + if (self.getSymbolRef(@intCast(i), macho_file).getFile(macho_file) != null) continue; + + sym.value = 0; + sym.atom_ref = .{ .index = 0, .file = 0 }; + sym.flags.weak_ref = nlist.weakRef(); + sym.flags.import = true; + sym.visibility = .global; + + const idx = self.globals.items[i]; + macho_file.resolver.values.items[idx - 1] = .{ .index = @intCast(i), .file = self.index }; + } +} + +fn addSection(self: *Object, allocator: Allocator, segname: []const u8, sectname: []const u8) !u8 { + const n_sect = @as(u8, @intCast(try self.sections.addOne(allocator))); self.sections.set(n_sect, .{ .header = .{ .sectname = MachO.makeStaticString(sectname), @@ -1471,36 +1646,36 @@ fn addSection(self: *Object, allocator: Allocator, segname: []const u8, sectname return n_sect; } -pub fn calcSymtabSize(self: *Object, macho_file: *MachO) !void { +pub fn calcSymtabSize(self: *Object, macho_file: *MachO) void { const tracy = trace(@src()); defer tracy.end(); const is_relocatable = macho_file.options.relocatable; - for (self.symbols.items) |sym_index| { - const sym = macho_file.getSymbol(sym_index); - const file = sym.getFile(macho_file) orelse continue; + for (self.symbols.items, 0..) |*sym, i| { + const ref = self.getSymbolRef(@intCast(i), macho_file); + const file = ref.getFile(macho_file) orelse continue; if (file.getIndex() != self.index) continue; - if (sym.getAtom(macho_file)) |atom| if (!atom.flags.alive) continue; + if (sym.getAtom(macho_file)) |atom| if (!atom.alive.load(.seq_cst)) continue; if (sym.isSymbolStab(macho_file)) continue; const name = sym.getName(macho_file); + if (name.len == 0) continue; // TODO in -r mode, we actually want to merge symbol names and emit only one // work it out when emitting relocs - if (name.len > 0 and - (name[0] == 'L' or name[0] == 'l' or + if ((name[0] == 'L' or name[0] == 'l' or mem.startsWith(u8, name, "_OBJC_SELECTOR_REFERENCES_")) and - !is_relocatable) continue; + !is_relocatable) + continue; sym.flags.output_symtab = true; if (sym.isLocal()) { - try sym.addExtra(.{ .symtab = self.output_symtab_ctx.nlocals }, macho_file); - + sym.addExtra(.{ .symtab = self.output_symtab_ctx.nlocals }, macho_file); self.output_symtab_ctx.nlocals += 1; } else if (sym.flags.@"export") { - try sym.addExtra(.{ .symtab = self.output_symtab_ctx.nexports }, macho_file); + sym.addExtra(.{ .symtab = self.output_symtab_ctx.nexports }, macho_file); self.output_symtab_ctx.nexports += 1; } else { assert(sym.flags.import); - try sym.addExtra(.{ .symtab = self.output_symtab_ctx.nimports }, macho_file); + sym.addExtra(.{ .symtab = self.output_symtab_ctx.nimports }, macho_file); self.output_symtab_ctx.nimports += 1; } self.output_symtab_ctx.strsize += @as(u32, @intCast(sym.getName(macho_file).len + 1)); @@ -1519,22 +1694,22 @@ pub fn calcStabsSize(self: *Object, macho_file: *MachO) void { self.output_symtab_ctx.strsize += @as(u32, @intCast(comp_dir.len + 1)); // comp_dir self.output_symtab_ctx.strsize += @as(u32, @intCast(tu_name.len + 1)); // tu_name - if (self.archive) |ar| { - self.output_symtab_ctx.strsize += @as(u32, @intCast(ar.path.len + 1 + self.path.len + 1 + 1)); + if (self.ar_name) |path| { + self.output_symtab_ctx.strsize += @as(u32, @intCast(path.len + 1 + self.path.len + 1 + 1)); } else { self.output_symtab_ctx.strsize += @as(u32, @intCast(self.path.len + 1)); } - for (self.symbols.items) |sym_index| { - const sym = macho_file.getSymbol(sym_index); - const file = sym.getFile(macho_file) orelse continue; + for (self.symbols.items, 0..) |sym, i| { + const ref = self.getSymbolRef(@intCast(i), macho_file); + const file = ref.getFile(macho_file) orelse continue; if (file.getIndex() != self.index) continue; if (!sym.flags.output_symtab) continue; if (macho_file.options.relocatable) { const name = sym.getName(macho_file); if (name.len > 0 and (name[0] == 'L' or name[0] == 'l')) continue; } - const sect = macho_file.sections.items(.header)[sym.out_n_sect]; + const sect = macho_file.sections.items(.header)[sym.getOutputSectionIndex(macho_file)]; if (sect.isCode()) { self.output_symtab_ctx.nstabs += 4; // N_BNSYM, N_FUN, N_FUN, N_ENSYM } else if (sym.visibility == .global) { @@ -1553,7 +1728,7 @@ pub fn calcStabsSize(self: *Object, macho_file: *MachO) void { self.output_symtab_ctx.strsize += @as(u32, @intCast(sf.getOsoPath(self).len + 1)); // path for (sf.stabs.items) |stab| { - const sym = stab.getSymbol(macho_file) orelse continue; + const sym = stab.getSymbol(self) orelse continue; const file = sym.getFile(macho_file).?; if (file.getIndex() != self.index) continue; if (!sym.flags.output_symtab) continue; @@ -1564,27 +1739,197 @@ pub fn calcStabsSize(self: *Object, macho_file: *MachO) void { } } +pub fn writeAtoms(self: *Object, macho_file: *MachO) !void { + const tracy = trace(@src()); + defer tracy.end(); + + const gpa = macho_file.base.allocator; + const headers = self.sections.items(.header); + const sections_data = try gpa.alloc([]const u8, headers.len); + defer { + for (sections_data) |data| { + gpa.free(data); + } + gpa.free(sections_data); + } + @memset(sections_data, &[0]u8{}); + const file = macho_file.getFileHandle(self.file_handle); + + for (headers, 0..) |header, n_sect| { + if (header.isZerofill()) continue; + sections_data[n_sect] = try self.getSectionData(gpa, @intCast(n_sect), file); + } + for (self.getAtoms()) |atom_index| { + const atom = self.getAtom(atom_index) orelse continue; + if (!atom.alive.load(.seq_cst)) continue; + const sect = atom.getInputSection(macho_file); + if (sect.isZerofill()) continue; + const off = atom.value; + const buffer = macho_file.sections.items(.out)[atom.out_n_sect].items; + const data = sections_data[atom.n_sect]; + @memcpy(buffer[off..][0..atom.size], data[atom.off..][0..atom.size]); + try atom.resolveRelocs(macho_file, buffer[off..][0..atom.size]); + } +} + +pub fn writeAtomsRelocatable(self: *Object, macho_file: *MachO) !void { + const tracy = trace(@src()); + defer tracy.end(); + + const gpa = macho_file.base.allocator; + const headers = self.sections.items(.header); + const sections_data = try gpa.alloc([]const u8, headers.len); + defer { + for (sections_data) |data| { + gpa.free(data); + } + gpa.free(sections_data); + } + @memset(sections_data, &[0]u8{}); + const file = macho_file.getFileHandle(self.file_handle); + + for (headers, 0..) |header, n_sect| { + if (header.isZerofill()) continue; + sections_data[n_sect] = try self.getSectionData(gpa, @intCast(n_sect), file); + } + for (self.getAtoms()) |atom_index| { + const atom = self.getAtom(atom_index) orelse continue; + if (!atom.alive.load(.seq_cst)) continue; + const sect = atom.getInputSection(macho_file); + if (sect.isZerofill()) continue; + const off = atom.value; + const buffer = macho_file.sections.items(.out)[atom.out_n_sect].items; + const data = sections_data[atom.n_sect]; + @memcpy(buffer[off..][0..atom.size], data[atom.off..][0..atom.size]); + const relocs = macho_file.sections.items(.relocs)[atom.out_n_sect].items; + const extra = atom.getExtra(macho_file); + try atom.writeRelocs(macho_file, buffer[off..][0..atom.size], relocs[extra.rel_out_index..][0..extra.rel_out_count]); + } +} + +pub fn calcCompactUnwindSizeRelocatable(self: *Object, macho_file: *MachO) void { + const tracy = trace(@src()); + defer tracy.end(); + + const ctx = &self.compact_unwind_ctx; + + for (self.unwind_records_indexes.items) |irec| { + const rec = self.getUnwindRecord(irec); + if (!rec.alive) continue; + + ctx.rec_count += 1; + ctx.reloc_count += 1; + if (rec.getPersonality(macho_file)) |_| { + ctx.reloc_count += 1; + } + if (rec.getLsdaAtom(macho_file)) |_| { + ctx.reloc_count += 1; + } + } +} + +pub fn writeCompactUnwindRelocatable(self: *Object, macho_file: *MachO) !void { + const tracy = trace(@src()); + defer tracy.end(); + + const addReloc = struct { + fn addReloc(offset: u32, cpu_arch: std.Target.Cpu.Arch) !macho.relocation_info { + return .{ + .r_address = math.cast(i32, offset) orelse return error.Overflow, + .r_symbolnum = 0, + .r_pcrel = 0, + .r_length = 3, + .r_extern = 0, + .r_type = switch (cpu_arch) { + .aarch64 => @intFromEnum(macho.reloc_type_arm64.ARM64_RELOC_UNSIGNED), + .x86_64 => @intFromEnum(macho.reloc_type_x86_64.X86_64_RELOC_UNSIGNED), + else => unreachable, + }, + }; + } + }.addReloc; + + const nsect = macho_file.unwind_info_sect_index.?; + const buffer = macho_file.sections.items(.out)[nsect].items; + const relocs = macho_file.sections.items(.relocs)[nsect].items; + + var rec_index: u32 = self.compact_unwind_ctx.rec_index; + var reloc_index: u32 = self.compact_unwind_ctx.reloc_index; + + for (self.unwind_records_indexes.items) |irec| { + const rec = self.getUnwindRecord(irec); + if (!rec.alive) continue; + + var out: macho.compact_unwind_entry = .{ + .rangeStart = 0, + .rangeLength = rec.length, + .compactUnwindEncoding = rec.enc.enc, + .personalityFunction = 0, + .lsda = 0, + }; + defer rec_index += 1; + + const offset = rec_index * @sizeOf(macho.compact_unwind_entry); + + { + // Function address + const atom = rec.getAtom(macho_file); + const addr = rec.getAtomAddress(macho_file); + out.rangeStart = addr; + var reloc = try addReloc(offset, macho_file.options.cpu_arch.?); + reloc.r_symbolnum = atom.out_n_sect + 1; + relocs[reloc_index] = reloc; + reloc_index += 1; + } + + // Personality function + if (rec.getPersonality(macho_file)) |sym| { + const r_symbolnum = math.cast(u24, sym.getOutputSymtabIndex(macho_file).?) orelse return error.Overflow; + var reloc = try addReloc(offset + 16, macho_file.options.cpu_arch.?); + reloc.r_symbolnum = r_symbolnum; + reloc.r_extern = 1; + relocs[reloc_index] = reloc; + reloc_index += 1; + } + + // LSDA address + if (rec.getLsdaAtom(macho_file)) |atom| { + const addr = rec.getLsdaAddress(macho_file); + out.lsda = addr; + var reloc = try addReloc(offset + 24, macho_file.options.cpu_arch.?); + reloc.r_symbolnum = atom.out_n_sect + 1; + relocs[reloc_index] = reloc; + reloc_index += 1; + } + + @memcpy(buffer[offset..][0..@sizeOf(macho.compact_unwind_entry)], mem.asBytes(&out)); + } +} + pub fn writeSymtab(self: Object, macho_file: *MachO) void { const tracy = trace(@src()); defer tracy.end(); - for (self.symbols.items) |sym_index| { - const sym = macho_file.getSymbol(sym_index); - const file = sym.getFile(macho_file) orelse continue; + var n_strx = self.output_symtab_ctx.stroff; + for (self.symbols.items, 0..) |sym, i| { + const ref = self.getSymbolRef(@intCast(i), macho_file); + const file = ref.getFile(macho_file) orelse continue; if (file.getIndex() != self.index) continue; const idx = sym.getOutputSymtabIndex(macho_file) orelse continue; - const n_strx = @as(u32, @intCast(macho_file.strtab.items.len)); - macho_file.strtab.appendSliceAssumeCapacity(sym.getName(macho_file)); - macho_file.strtab.appendAssumeCapacity(0); const out_sym = &macho_file.symtab.items[idx]; out_sym.n_strx = n_strx; sym.setOutputSym(macho_file, out_sym); + const name = sym.getName(macho_file); + @memcpy(macho_file.strtab.items[n_strx..][0..name.len], name); + n_strx += @intCast(name.len); + macho_file.strtab.items[n_strx] = 0; + n_strx += 1; } - if (!macho_file.options.strip and self.hasDebugInfo()) self.writeStabs(macho_file); + if (!macho_file.options.strip and self.hasDebugInfo()) self.writeStabs(n_strx, macho_file); } -pub fn writeStabs(self: *const Object, macho_file: *MachO) void { +pub fn writeStabs(self: *const Object, stroff: u32, macho_file: *MachO) void { const writeFuncStab = struct { inline fn writeFuncStab( n_strx: u32, @@ -1626,6 +1971,7 @@ pub fn writeStabs(self: *const Object, macho_file: *MachO) void { }.writeFuncStab; var index = self.output_symtab_ctx.istab; + var n_strx = stroff; if (self.compile_unit) |cu| { // TODO handle multiple CUs @@ -1634,9 +1980,6 @@ pub fn writeStabs(self: *const Object, macho_file: *MachO) void { // Open scope // N_SO comp_dir - var n_strx = @as(u32, @intCast(macho_file.strtab.items.len)); - macho_file.strtab.appendSliceAssumeCapacity(comp_dir); - macho_file.strtab.appendAssumeCapacity(0); macho_file.symtab.items[index] = .{ .n_strx = n_strx, .n_type = macho.N_SO, @@ -1645,10 +1988,11 @@ pub fn writeStabs(self: *const Object, macho_file: *MachO) void { .n_value = 0, }; index += 1; + @memcpy(macho_file.strtab.items[n_strx..][0..comp_dir.len], comp_dir); + n_strx += @intCast(comp_dir.len); + macho_file.strtab.items[n_strx] = 0; + n_strx += 1; // N_SO tu_name - n_strx = @as(u32, @intCast(macho_file.strtab.items.len)); - macho_file.strtab.appendSliceAssumeCapacity(tu_name); - macho_file.strtab.appendAssumeCapacity(0); macho_file.symtab.items[index] = .{ .n_strx = n_strx, .n_type = macho.N_SO, @@ -1657,18 +2001,11 @@ pub fn writeStabs(self: *const Object, macho_file: *MachO) void { .n_value = 0, }; index += 1; + @memcpy(macho_file.strtab.items[n_strx..][0..tu_name.len], tu_name); + n_strx += @intCast(tu_name.len); + macho_file.strtab.items[n_strx] = 0; + n_strx += 1; // N_OSO path - n_strx = @as(u32, @intCast(macho_file.strtab.items.len)); - if (self.archive) |ar| { - macho_file.strtab.appendSliceAssumeCapacity(ar.path); - macho_file.strtab.appendAssumeCapacity('('); - macho_file.strtab.appendSliceAssumeCapacity(self.path); - macho_file.strtab.appendAssumeCapacity(')'); - macho_file.strtab.appendAssumeCapacity(0); - } else { - macho_file.strtab.appendSliceAssumeCapacity(self.path); - macho_file.strtab.appendAssumeCapacity(0); - } macho_file.symtab.items[index] = .{ .n_strx = n_strx, .n_type = macho.N_OSO, @@ -1677,23 +2014,40 @@ pub fn writeStabs(self: *const Object, macho_file: *MachO) void { .n_value = self.mtime, }; index += 1; + if (self.ar_name) |path| { + @memcpy(macho_file.strtab.items[n_strx..][0..path.len], path); + n_strx += @intCast(path.len); + macho_file.strtab.items[n_strx] = '('; + n_strx += 1; + @memcpy(macho_file.strtab.items[n_strx..][0..self.path.len], self.path); + n_strx += @intCast(self.path.len); + macho_file.strtab.items[n_strx] = ')'; + n_strx += 1; + macho_file.strtab.items[n_strx] = 0; + n_strx += 1; + } else { + @memcpy(macho_file.strtab.items[n_strx..][0..self.path.len], self.path); + n_strx += @intCast(self.path.len); + macho_file.strtab.items[n_strx] = 0; + n_strx += 1; + } - for (self.symbols.items) |sym_index| { - const sym = macho_file.getSymbol(sym_index); - const file = sym.getFile(macho_file) orelse continue; + for (self.symbols.items, 0..) |sym, i| { + const ref = self.getSymbolRef(@intCast(i), macho_file); + const file = ref.getFile(macho_file) orelse continue; if (file.getIndex() != self.index) continue; if (!sym.flags.output_symtab) continue; if (macho_file.options.relocatable) { const name = sym.getName(macho_file); if (name.len > 0 and (name[0] == 'L' or name[0] == 'l')) continue; } - const sect = macho_file.sections.items(.header)[sym.out_n_sect]; + const sect = macho_file.sections.items(.header)[sym.getOutputSectionIndex(macho_file)]; const sym_n_strx = n_strx: { const symtab_index = sym.getOutputSymtabIndex(macho_file).?; const osym = macho_file.symtab.items[symtab_index]; break :n_strx osym.n_strx; }; - const sym_n_sect: u8 = if (!sym.flags.abs) @intCast(sym.out_n_sect + 1) else 0; + const sym_n_sect: u8 = if (!sym.flags.abs) @intCast(sym.getOutputSectionIndex(macho_file) + 1) else 0; const sym_n_value = sym.getAddress(.{}, macho_file); const sym_size = sym.getSize(macho_file); if (sect.isCode()) { @@ -1733,11 +2087,12 @@ pub fn writeStabs(self: *const Object, macho_file: *MachO) void { assert(self.hasSymbolStabs()); for (self.stab_files.items) |sf| { + const comp_dir = sf.getCompDir(self); + const tu_name = sf.getTuName(self); + const oso_path = sf.getOsoPath(self); + // Open scope // N_SO comp_dir - var n_strx = @as(u32, @intCast(macho_file.strtab.items.len)); - macho_file.strtab.appendSliceAssumeCapacity(sf.getCompDir(self)); - macho_file.strtab.appendAssumeCapacity(0); macho_file.symtab.items[index] = .{ .n_strx = n_strx, .n_type = macho.N_SO, @@ -1746,10 +2101,11 @@ pub fn writeStabs(self: *const Object, macho_file: *MachO) void { .n_value = 0, }; index += 1; + @memcpy(macho_file.strtab.items[n_strx..][0..comp_dir.len], comp_dir); + n_strx += @intCast(comp_dir.len); + macho_file.strtab.items[n_strx] = 0; + n_strx += 1; // N_SO tu_name - n_strx = @as(u32, @intCast(macho_file.strtab.items.len)); - macho_file.strtab.appendSliceAssumeCapacity(sf.getTuName(self)); - macho_file.strtab.appendAssumeCapacity(0); macho_file.symtab.items[index] = .{ .n_strx = n_strx, .n_type = macho.N_SO, @@ -1758,10 +2114,11 @@ pub fn writeStabs(self: *const Object, macho_file: *MachO) void { .n_value = 0, }; index += 1; + @memcpy(macho_file.strtab.items[n_strx..][0..tu_name.len], tu_name); + n_strx += @intCast(tu_name.len); + macho_file.strtab.items[n_strx] = 0; + n_strx += 1; // N_OSO path - n_strx = @as(u32, @intCast(macho_file.strtab.items.len)); - macho_file.strtab.appendSliceAssumeCapacity(sf.getOsoPath(self)); - macho_file.strtab.appendAssumeCapacity(0); macho_file.symtab.items[index] = .{ .n_strx = n_strx, .n_type = macho.N_OSO, @@ -1770,9 +2127,13 @@ pub fn writeStabs(self: *const Object, macho_file: *MachO) void { .n_value = sf.getOsoModTime(self), }; index += 1; + @memcpy(macho_file.strtab.items[n_strx..][0..oso_path.len], oso_path); + n_strx += @intCast(oso_path.len); + macho_file.strtab.items[n_strx] = 0; + n_strx += 1; for (sf.stabs.items) |stab| { - const sym = stab.getSymbol(macho_file) orelse continue; + const sym = stab.getSymbol(self) orelse continue; const file = sym.getFile(macho_file).?; if (file.getIndex() != self.index) continue; if (!sym.flags.output_symtab) continue; @@ -1781,7 +2142,7 @@ pub fn writeStabs(self: *const Object, macho_file: *MachO) void { const osym = macho_file.symtab.items[symtab_index]; break :n_strx osym.n_strx; }; - const sym_n_sect: u8 = if (!sym.flags.abs) @intCast(sym.out_n_sect + 1) else 0; + const sym_n_sect: u8 = if (!sym.flags.abs) @intCast(sym.getOutputSectionIndex(macho_file) + 1) else 0; const sym_n_value = sym.getAddress(.{}, macho_file); const sym_size = sym.getSize(macho_file); if (stab.is_func) { @@ -1822,31 +2183,34 @@ pub fn writeStabs(self: *const Object, macho_file: *MachO) void { } } -pub fn getSectionData(self: *const Object, index: u32, macho_file: *MachO) ![]u8 { - const gpa = macho_file.base.allocator; +pub fn getSectionData(self: *const Object, allocator: Allocator, index: u32, file: File.Handle) ![]u8 { const slice = self.sections.slice(); assert(index < slice.items(.header).len); const sect = slice.items(.header)[index]; - const file = macho_file.getFileHandle(self.file_handle); - const offset = if (self.archive) |ar| ar.offset else 0; - const buffer = try gpa.alloc(u8, sect.size); - errdefer gpa.free(buffer); - const amt = try file.preadAll(buffer, sect.offset + offset); + const buffer = try allocator.alloc(u8, sect.size); + errdefer allocator.free(buffer); + const amt = try file.preadAll(buffer, sect.offset + self.offset); if (amt != buffer.len) return error.InputOutput; return buffer; } -fn addString(self: *Object, allocator: Allocator, name: [:0]const u8) error{OutOfMemory}!u32 { +fn addString(self: *Object, allocator: Allocator, name: [:0]const u8) error{OutOfMemory}!MachO.String { const off: u32 = @intCast(self.strtab.items.len); try self.strtab.ensureUnusedCapacity(allocator, name.len + 1); self.strtab.appendSliceAssumeCapacity(name); self.strtab.appendAssumeCapacity(0); - return off; + return .{ .pos = off, .len = @intCast(name.len + 1) }; } -pub fn getString(self: Object, off: u32) [:0]const u8 { - assert(off < self.strtab.items.len); - return mem.sliceTo(@as([*:0]const u8, @ptrCast(self.strtab.items.ptr + off)), 0); +pub fn getString(self: Object, name: MachO.String) [:0]const u8 { + assert(name.pos < self.strtab.items.len and name.pos + name.len <= self.strtab.items.len); + if (name.len == 0) return ""; + return self.strtab.items[name.pos..][0 .. name.len - 1 :0]; +} + +fn getNStrx(self: Object, n_strx: u32) [:0]const u8 { + assert(n_strx < self.strtab.items.len); + return mem.sliceTo(@as([*:0]const u8, @ptrCast(self.strtab.items.ptr + n_strx)), 0); } /// TODO handle multiple CUs @@ -1858,9 +2222,9 @@ fn hasSymbolStabs(self: Object) bool { return self.stab_files.items.len > 0; } -pub fn hasObjc(self: Object) bool { +fn hasObjc(self: Object) bool { for (self.symtab.items(.nlist)) |nlist| { - const name = self.getString(nlist.n_strx); + const name = self.getNStrx(nlist.n_strx); if (mem.startsWith(u8, name, "_OBJC_CLASS_$_")) return true; } for (self.sections.items(.header)) |sect| { @@ -1890,6 +2254,160 @@ pub fn asFile(self: *Object) File { return .{ .object = self }; } +const AddAtomArgs = struct { + name: MachO.String, + n_sect: u8, + off: u64, + size: u64, + alignment: u32, +}; + +fn addAtom(self: *Object, allocator: Allocator, args: AddAtomArgs) !Atom.Index { + const atom_index: Atom.Index = @intCast(self.atoms.items.len); + const atom = try self.atoms.addOne(allocator); + atom.* = .{ + .file = self.index, + .atom_index = atom_index, + .name = args.name, + .n_sect = args.n_sect, + .size = args.size, + .off = args.off, + .extra = try self.addAtomExtra(allocator, .{}), + .alignment = args.alignment, + }; + return atom_index; +} + +pub fn getAtom(self: *Object, atom_index: Atom.Index) ?*Atom { + if (atom_index == 0) return null; + assert(atom_index < self.atoms.items.len); + return &self.atoms.items[atom_index]; +} + +pub fn getAtoms(self: *Object) []const Atom.Index { + return self.atoms_indexes.items; +} + +fn addAtomExtra(self: *Object, allocator: Allocator, extra: Atom.Extra) !u32 { + const fields = @typeInfo(Atom.Extra).Struct.fields; + try self.atoms_extra.ensureUnusedCapacity(allocator, fields.len); + return self.addAtomExtraAssumeCapacity(extra); +} + +fn addAtomExtraAssumeCapacity(self: *Object, extra: Atom.Extra) u32 { + const index = @as(u32, @intCast(self.atoms_extra.items.len)); + const fields = @typeInfo(Atom.Extra).Struct.fields; + inline for (fields) |field| { + self.atoms_extra.appendAssumeCapacity(switch (field.type) { + u32 => @field(extra, field.name), + else => @compileError("bad field type"), + }); + } + return index; +} + +pub fn getAtomExtra(self: Object, index: u32) Atom.Extra { + const fields = @typeInfo(Atom.Extra).Struct.fields; + var i: usize = index; + var result: Atom.Extra = undefined; + inline for (fields) |field| { + @field(result, field.name) = switch (field.type) { + u32 => self.atoms_extra.items[i], + else => @compileError("bad field type"), + }; + i += 1; + } + return result; +} + +pub fn setAtomExtra(self: *Object, index: u32, extra: Atom.Extra) void { + assert(index > 0); + const fields = @typeInfo(Atom.Extra).Struct.fields; + inline for (fields, 0..) |field, i| { + self.atoms_extra.items[index + i] = switch (field.type) { + u32 => @field(extra, field.name), + else => @compileError("bad field type"), + }; + } +} + +fn addSymbol(self: *Object, allocator: Allocator) !Symbol.Index { + try self.symbols.ensureUnusedCapacity(allocator, 1); + return self.addSymbolAssumeCapacity(); +} + +fn addSymbolAssumeCapacity(self: *Object) Symbol.Index { + const index: Symbol.Index = @intCast(self.symbols.items.len); + const symbol = self.symbols.addOneAssumeCapacity(); + symbol.* = .{ .file = self.index }; + return index; +} + +pub fn getSymbolRef(self: Object, index: Symbol.Index, macho_file: *MachO) MachO.Ref { + const global_index = self.globals.items[index]; + if (macho_file.resolver.get(global_index)) |ref| return ref; + return .{ .index = index, .file = self.index }; +} + +pub fn addSymbolExtra(self: *Object, allocator: Allocator, extra: Symbol.Extra) !u32 { + const fields = @typeInfo(Symbol.Extra).Struct.fields; + try self.symbols_extra.ensureUnusedCapacity(allocator, fields.len); + return self.addSymbolExtraAssumeCapacity(extra); +} + +fn addSymbolExtraAssumeCapacity(self: *Object, extra: Symbol.Extra) u32 { + const index = @as(u32, @intCast(self.symbols_extra.items.len)); + const fields = @typeInfo(Symbol.Extra).Struct.fields; + inline for (fields) |field| { + self.symbols_extra.appendAssumeCapacity(switch (field.type) { + u32 => @field(extra, field.name), + else => @compileError("bad field type"), + }); + } + return index; +} + +pub fn getSymbolExtra(self: Object, index: u32) Symbol.Extra { + const fields = @typeInfo(Symbol.Extra).Struct.fields; + var i: usize = index; + var result: Symbol.Extra = undefined; + inline for (fields) |field| { + @field(result, field.name) = switch (field.type) { + u32 => self.symbols_extra.items[i], + else => @compileError("bad field type"), + }; + i += 1; + } + return result; +} + +pub fn setSymbolExtra(self: *Object, index: u32, extra: Symbol.Extra) void { + const fields = @typeInfo(Symbol.Extra).Struct.fields; + inline for (fields, 0..) |field, i| { + self.symbols_extra.items[index + i] = switch (field.type) { + u32 => @field(extra, field.name), + else => @compileError("bad field type"), + }; + } +} + +fn addUnwindRecord(self: *Object, allocator: Allocator) !UnwindInfo.Record.Index { + try self.unwind_records.ensureUnusedCapacity(allocator, 1); + return self.addUnwindRecordAssumeCapacity(); +} + +fn addUnwindRecordAssumeCapacity(self: *Object) UnwindInfo.Record.Index { + const index = @as(UnwindInfo.Record.Index, @intCast(self.unwind_records.items.len)); + const rec = self.unwind_records.addOneAssumeCapacity(); + rec.* = .{ .file = self.index }; + return index; +} + +pub fn getUnwindRecord(self: *Object, index: UnwindInfo.Record.Index) *UnwindInfo.Record { + assert(index < self.unwind_records.items.len); + return &self.unwind_records.items[index]; +} + pub fn format( self: *Object, comptime unused_fmt_string: []const u8, @@ -1924,10 +2442,11 @@ fn formatAtoms( _ = unused_fmt_string; _ = options; const object = ctx.object; + const macho_file = ctx.macho_file; try writer.writeAll(" atoms\n"); - for (object.atoms.items) |atom_index| { - const atom = ctx.macho_file.getAtom(atom_index).?; - try writer.print(" {}\n", .{atom.fmt(ctx.macho_file)}); + for (object.getAtoms()) |atom_index| { + const atom = object.getAtom(atom_index) orelse continue; + try writer.print(" {}\n", .{atom.fmt(macho_file)}); } } @@ -1993,8 +2512,8 @@ fn formatUnwindRecords( const object = ctx.object; const macho_file = ctx.macho_file; try writer.writeAll(" unwind records\n"); - for (object.unwind_records.items) |rec| { - try writer.print(" rec({d}) : {}\n", .{ rec, macho_file.getUnwindRecord(rec).fmt(macho_file) }); + for (object.unwind_records_indexes.items) |rec| { + try writer.print(" rec({d}) : {}\n", .{ rec, object.getUnwindRecord(rec).fmt(macho_file) }); } } @@ -2014,10 +2533,26 @@ fn formatSymtab( _ = unused_fmt_string; _ = options; const object = ctx.object; + const macho_file = ctx.macho_file; try writer.writeAll(" symbols\n"); - for (object.symbols.items) |index| { - const sym = ctx.macho_file.getSymbol(index); - try writer.print(" {}\n", .{sym.fmt(ctx.macho_file)}); + for (object.symbols.items, 0..) |sym, i| { + const ref = object.getSymbolRef(@intCast(i), macho_file); + if (ref.getFile(macho_file) == null) { + // TODO any better way of handling this? + try writer.print(" {s} : unclaimed\n", .{sym.getName(macho_file)}); + } else { + try writer.print(" {}\n", .{ref.getSymbol(macho_file).?.fmt(macho_file)}); + } + } + for (object.stab_files.items) |sf| { + try writer.print(" stabs({s},{s},{s})\n", .{ + sf.getCompDir(object), + sf.getTuName(object), + sf.getOsoPath(object), + }); + for (sf.stabs.items) |stab| { + try writer.print(" {}", .{stab.fmt(object)}); + } } } @@ -2033,8 +2568,8 @@ fn formatPath( ) !void { _ = unused_fmt_string; _ = options; - if (object.archive) |ar| { - try writer.writeAll(ar.path); + if (object.ar_name) |path| { + try writer.writeAll(path); try writer.writeByte('('); try writer.writeAll(object.path); try writer.writeByte(')'); @@ -2064,17 +2599,17 @@ const StabFile = struct { fn getCompDir(sf: StabFile, object: *const Object) [:0]const u8 { const nlist = object.symtab.items(.nlist)[sf.comp_dir]; - return object.getString(nlist.n_strx); + return object.getNStrx(nlist.n_strx); } fn getTuName(sf: StabFile, object: *const Object) [:0]const u8 { const nlist = object.symtab.items(.nlist)[sf.comp_dir + 1]; - return object.getString(nlist.n_strx); + return object.getNStrx(nlist.n_strx); } fn getOsoPath(sf: StabFile, object: *const Object) [:0]const u8 { const nlist = object.symtab.items(.nlist)[sf.comp_dir + 2]; - return object.getString(nlist.n_strx); + return object.getNStrx(nlist.n_strx); } fn getOsoModTime(sf: StabFile, object: *const Object) u64 { @@ -2084,17 +2619,56 @@ const StabFile = struct { const Stab = struct { is_func: bool = true, - symbol: ?Symbol.Index = null, + index: ?Symbol.Index = null, + + fn getSymbol(stab: Stab, object: *const Object) ?*Symbol { + const index = stab.index orelse return null; + return &object.symbols.items[index]; + } - fn getSymbol(stab: Stab, macho_file: *MachO) ?*Symbol { - return if (stab.symbol) |s| macho_file.getSymbol(s) else null; + pub fn format( + stab: Stab, + comptime unused_fmt_string: []const u8, + options: std.fmt.FormatOptions, + writer: anytype, + ) !void { + _ = stab; + _ = unused_fmt_string; + _ = options; + _ = writer; + @compileError("do not format stabs directly"); + } + + const StabFormatContext = struct { Stab, *const Object }; + + pub fn fmt(stab: Stab, object: *const Object) std.fmt.Formatter(format2) { + return .{ .data = .{ stab, object } }; + } + + fn format2( + ctx: StabFormatContext, + comptime unused_fmt_string: []const u8, + options: std.fmt.FormatOptions, + writer: anytype, + ) !void { + _ = unused_fmt_string; + _ = options; + const stab, const object = ctx; + const sym = stab.getSymbol(object).?; + if (stab.is_func) { + try writer.print("func({d})", .{stab.index.?}); + } else if (sym.visibility == .global) { + try writer.print("gsym({d})", .{stab.index.?}); + } else { + try writer.print("stsym({d})", .{stab.index.?}); + } } }; }; const CompileUnit = struct { - comp_dir: u32, - tu_name: u32, + comp_dir: MachO.String, + tu_name: MachO.String, fn getCompDir(cu: CompileUnit, object: *const Object) [:0]const u8 { return object.getString(cu.comp_dir); @@ -2105,32 +2679,33 @@ const CompileUnit = struct { } }; -const Archive = struct { - path: []const u8, - offset: u64, +const CompactUnwindCtx = struct { + rec_index: u32 = 0, + rec_count: u32 = 0, + reloc_index: u32 = 0, + reloc_count: u32 = 0, }; const x86_64 = struct { fn parseRelocs( - self: *const Object, + self: *Object, n_sect: u8, sect: macho.section_64, out: *std.ArrayListUnmanaged(Relocation), + file: File.Handle, macho_file: *MachO, ) !void { const gpa = macho_file.base.allocator; - const file = macho_file.getFileHandle(self.file_handle); - const offset = if (self.archive) |ar| ar.offset else 0; const relocs_buffer = try gpa.alloc(u8, sect.nreloc * @sizeOf(macho.relocation_info)); defer gpa.free(relocs_buffer); { - const amt = try file.preadAll(relocs_buffer, sect.reloff + offset); + const amt = try file.preadAll(relocs_buffer, sect.reloff + self.offset); if (amt != relocs_buffer.len) return error.InputOutput; } const relocs = @as([*]align(1) const macho.relocation_info, @ptrCast(relocs_buffer.ptr))[0..sect.nreloc]; - const code = try self.getSectionData(@intCast(n_sect), macho_file); + const code = try self.getSectionData(gpa, @intCast(n_sect), file); defer gpa.free(code); try out.ensureTotalCapacityPrecise(gpa, relocs.len); @@ -2153,8 +2728,9 @@ const x86_64 = struct { .X86_64_RELOC_SIGNED_4 => 4, else => 0, }; + var is_extern = rel.r_extern == 1; - const target = if (rel.r_extern == 0) blk: { + const target: u32 = if (!is_extern) blk: { const nsect = rel.r_symbolnum - 1; const taddr: i64 = if (rel.r_pcrel == 1) @as(i64, @intCast(sect.addr)) + rel.r_address + addend + 4 @@ -2166,9 +2742,15 @@ const x86_64 = struct { }); return error.ParseFailed; }; - addend = taddr - @as(i64, @intCast(macho_file.getAtom(target).?.getInputAddress(macho_file))); + const target_atom = self.getAtom(target).?; + addend = taddr - @as(i64, @intCast(target_atom.getInputAddress(macho_file))); + const isec = target_atom.getInputSection(macho_file); + if (isCstringLiteral(isec) or isFixedSizeLiteral(isec) or isPtrLiteral(isec)) { + is_extern = true; + break :blk target_atom.getExtra(macho_file).literal_symbol_index; + } break :blk target; - } else self.symbols.items[rel.r_symbolnum]; + } else rel.r_symbolnum; const has_subtractor = if (i > 0 and @as(macho.reloc_type_x86_64, @enumFromInt(relocs[i - 1].r_type)) == .X86_64_RELOC_SUBTRACTOR) @@ -2182,7 +2764,7 @@ const x86_64 = struct { break :blk true; } else false; - const @"type": Relocation.Type = validateRelocType(rel, rel_type) catch |err| { + const @"type": Relocation.Type = validateRelocType(rel, rel_type, is_extern) catch |err| { switch (err) { error.Pcrel => macho_file.base.fatal( "{}: {s},{s}: 0x{x}: PC-relative {s} relocation", @@ -2205,7 +2787,7 @@ const x86_64 = struct { }; out.appendAssumeCapacity(.{ - .tag = if (rel.r_extern == 1) .@"extern" else .local, + .tag = if (is_extern) .@"extern" else .local, .offset = @as(u32, @intCast(rel.r_address)), .target = target, .addend = addend, @@ -2220,7 +2802,7 @@ const x86_64 = struct { } } - fn validateRelocType(rel: macho.relocation_info, rel_type: macho.reloc_type_x86_64) !Relocation.Type { + fn validateRelocType(rel: macho.relocation_info, rel_type: macho.reloc_type_x86_64, is_extern: bool) !Relocation.Type { switch (rel_type) { .X86_64_RELOC_UNSIGNED => { if (rel.r_pcrel == 1) return error.Pcrel; @@ -2240,7 +2822,7 @@ const x86_64 = struct { => { if (rel.r_pcrel == 0) return error.NonPcrel; if (rel.r_length != 2) return error.InvalidLength; - if (rel.r_extern == 0) return error.NonExtern; + if (!is_extern) return error.NonExtern; return switch (rel_type) { .X86_64_RELOC_BRANCH => .branch, .X86_64_RELOC_GOT_LOAD => .got_load, @@ -2271,25 +2853,24 @@ const x86_64 = struct { const aarch64 = struct { fn parseRelocs( - self: *const Object, + self: *Object, n_sect: u8, sect: macho.section_64, out: *std.ArrayListUnmanaged(Relocation), + file: File.Handle, macho_file: *MachO, ) !void { const gpa = macho_file.base.allocator; - const file = macho_file.getFileHandle(self.file_handle); - const offset = if (self.archive) |ar| ar.offset else 0; const relocs_buffer = try gpa.alloc(u8, sect.nreloc * @sizeOf(macho.relocation_info)); defer gpa.free(relocs_buffer); { - const amt = try file.preadAll(relocs_buffer, sect.reloff + offset); + const amt = try file.preadAll(relocs_buffer, sect.reloff + self.offset); if (amt != relocs_buffer.len) return error.InputOutput; } const relocs = @as([*]align(1) const macho.relocation_info, @ptrCast(relocs_buffer.ptr))[0..sect.nreloc]; - const code = try self.getSectionData(@intCast(n_sect), macho_file); + const code = try self.getSectionData(gpa, @intCast(n_sect), file); defer gpa.free(code); try out.ensureTotalCapacityPrecise(gpa, relocs.len); @@ -2335,8 +2916,9 @@ const aarch64 = struct { } const rel_type: macho.reloc_type_arm64 = @enumFromInt(rel.r_type); + var is_extern = rel.r_extern == 1; - const target = if (rel.r_extern == 0) blk: { + const target: u32 = if (!is_extern) blk: { const nsect = rel.r_symbolnum - 1; const taddr: i64 = if (rel.r_pcrel == 1) @as(i64, @intCast(sect.addr)) + rel.r_address + addend @@ -2348,9 +2930,15 @@ const aarch64 = struct { }); return error.ParseFailed; }; - addend = taddr - @as(i64, @intCast(macho_file.getAtom(target).?.getInputAddress(macho_file))); + const target_atom = self.getAtom(target).?; + addend = taddr - @as(i64, @intCast(target_atom.getInputAddress(macho_file))); + const isec = target_atom.getInputSection(macho_file); + if (isCstringLiteral(isec) or isFixedSizeLiteral(isec) or isPtrLiteral(isec)) { + is_extern = true; + break :blk target_atom.getExtra(macho_file).literal_symbol_index; + } break :blk target; - } else self.symbols.items[rel.r_symbolnum]; + } else rel.r_symbolnum; const has_subtractor = if (i > 0 and @as(macho.reloc_type_arm64, @enumFromInt(relocs[i - 1].r_type)) == .ARM64_RELOC_SUBTRACTOR) @@ -2364,7 +2952,7 @@ const aarch64 = struct { break :blk true; } else false; - const @"type": Relocation.Type = validateRelocType(rel, rel_type) catch |err| { + const @"type": Relocation.Type = validateRelocType(rel, rel_type, is_extern) catch |err| { switch (err) { error.Pcrel => macho_file.base.fatal( "{}: {s},{s}: 0x{x}: PC-relative {s} relocation", @@ -2387,7 +2975,7 @@ const aarch64 = struct { }; out.appendAssumeCapacity(.{ - .tag = if (rel.r_extern == 1) .@"extern" else .local, + .tag = if (is_extern) .@"extern" else .local, .offset = @as(u32, @intCast(rel.r_address)), .target = target, .addend = addend, @@ -2402,7 +2990,7 @@ const aarch64 = struct { } } - fn validateRelocType(rel: macho.relocation_info, rel_type: macho.reloc_type_arm64) !Relocation.Type { + fn validateRelocType(rel: macho.relocation_info, rel_type: macho.reloc_type_arm64, is_extern: bool) !Relocation.Type { switch (rel_type) { .ARM64_RELOC_UNSIGNED => { if (rel.r_pcrel == 1) return error.Pcrel; @@ -2423,7 +3011,7 @@ const aarch64 = struct { => { if (rel.r_pcrel == 0) return error.NonPcrel; if (rel.r_length != 2) return error.InvalidLength; - if (rel.r_extern == 0) return error.NonExtern; + if (!is_extern) return error.NonExtern; return switch (rel_type) { .ARM64_RELOC_BRANCH26 => .branch, .ARM64_RELOC_PAGE21 => .page, @@ -2440,7 +3028,7 @@ const aarch64 = struct { => { if (rel.r_pcrel == 1) return error.Pcrel; if (rel.r_length != 2) return error.InvalidLength; - if (rel.r_extern == 0) return error.NonExtern; + if (!is_extern) return error.NonExtern; return switch (rel_type) { .ARM64_RELOC_PAGEOFF12 => .pageoff, .ARM64_RELOC_GOT_LOAD_PAGEOFF12 => .got_load_pageoff, diff --git a/src/MachO/Options.zig b/src/MachO/Options.zig index 3205289b..6f558b15 100644 --- a/src/MachO/Options.zig +++ b/src/MachO/Options.zig @@ -325,6 +325,8 @@ pub fn parse(arena: Allocator, args: []const []const u8, ctx: anytype) !Options } if (verbose) { + std.debug.lockStdErr(); + defer std.debug.unlockStdErr(); ctx.print("{s} ", .{cmd}); for (args[0 .. args.len - 1]) |arg| { ctx.print("{s} ", .{arg}); @@ -332,7 +334,11 @@ pub fn parse(arena: Allocator, args: []const []const u8, ctx: anytype) !Options ctx.print("{s}\n", .{args[args.len - 1]}); } - if (print_version) ctx.print("{s}\n", .{version}); + if (print_version) { + std.debug.lockStdErr(); + defer std.debug.unlockStdErr(); + ctx.print("{s}\n", .{version}); + } if (positionals.items.len == 0) ctx.fatal("Expected at least one positional argument\n", .{}); diff --git a/src/MachO/Relocation.zig b/src/MachO/Relocation.zig index 6ad9d9bd..3d000ed3 100644 --- a/src/MachO/Relocation.zig +++ b/src/MachO/Relocation.zig @@ -1,4 +1,4 @@ -tag: enum { @"extern", local }, +tag: Tag, offset: u32, target: u32, addend: i64, @@ -10,27 +10,33 @@ meta: packed struct { symbolnum: u24, }, -pub fn getTargetSymbol(rel: Relocation, macho_file: *MachO) *Symbol { +pub fn getTargetSymbolRef(rel: Relocation, atom: Atom, macho_file: *MachO) MachO.Ref { assert(rel.tag == .@"extern"); - return macho_file.getSymbol(rel.target); + return atom.getFile(macho_file).getSymbolRef(rel.target, macho_file); } -pub fn getTargetAtom(rel: Relocation, macho_file: *MachO) *Atom { +pub fn getTargetSymbol(rel: Relocation, atom: Atom, macho_file: *MachO) *Symbol { + assert(rel.tag == .@"extern"); + const ref = atom.getFile(macho_file).getSymbolRef(rel.target, macho_file); + return ref.getSymbol(macho_file).?; +} + +pub fn getTargetAtom(rel: Relocation, atom: Atom, macho_file: *MachO) *Atom { assert(rel.tag == .local); - return macho_file.getAtom(rel.target).?; + return atom.getFile(macho_file).getAtom(rel.target).?; } -pub fn getTargetAddress(rel: Relocation, macho_file: *MachO) u64 { +pub fn getTargetAddress(rel: Relocation, atom: Atom, macho_file: *MachO) u64 { return switch (rel.tag) { - .local => rel.getTargetAtom(macho_file).getAddress(macho_file), - .@"extern" => rel.getTargetSymbol(macho_file).getAddress(.{}, macho_file), + .local => rel.getTargetAtom(atom, macho_file).getAddress(macho_file), + .@"extern" => rel.getTargetSymbol(atom, macho_file).getAddress(.{}, macho_file), }; } -pub fn getGotTargetAddress(rel: Relocation, macho_file: *MachO) u64 { +pub fn getGotTargetAddress(rel: Relocation, atom: Atom, macho_file: *MachO) u64 { return switch (rel.tag) { .local => 0, - .@"extern" => rel.getTargetSymbol(macho_file).getGotAddress(macho_file), + .@"extern" => rel.getTargetSymbol(atom, macho_file).getGotAddress(macho_file), }; } @@ -145,6 +151,8 @@ pub const Type = enum { unsigned, }; +const Tag = enum { local, @"extern" }; + const assert = std.debug.assert; const macho = std.macho; const math = std.math; diff --git a/src/MachO/Symbol.zig b/src/MachO/Symbol.zig index 4d8a5b19..e33d0868 100644 --- a/src/MachO/Symbol.zig +++ b/src/MachO/Symbol.zig @@ -4,15 +4,14 @@ value: u64 = 0, /// Offset into the linker's intern table. -name: u32 = 0, +name: MachO.String = .{}, /// File where this symbol is defined. file: File.Index = 0, -/// Atom containing this symbol if any. -/// Index of 0 means there is no associated atom with this symbol. +/// Reference to Atom containing this symbol if any. /// Use `getAtom` to get the pointer to the atom. -atom: Atom.Index = 0, +atom_ref: MachO.Ref = .{ .index = 0, .file = 0 }, /// Assigned output section index for this symbol. out_n_sect: u8 = 0, @@ -24,6 +23,8 @@ nlist_idx: u32 = 0, /// Misc flags for the symbol packaged as packed struct for compression. flags: Flags = .{}, +sect_flags: std.atomic.Value(u8) = std.atomic.Value(u8).init(0), + visibility: Visibility = .local, extra: u32 = 0, @@ -55,15 +56,18 @@ pub fn weakRef(symbol: Symbol, macho_file: *MachO) bool { } pub fn getName(symbol: Symbol, macho_file: *MachO) [:0]const u8 { - if (symbol.flags.global) return macho_file.string_intern.getAssumeExists(symbol.name); return switch (symbol.getFile(macho_file).?) { - .dylib => unreachable, // There are no local symbols for dylibs inline else => |x| x.getString(symbol.name), }; } pub fn getAtom(symbol: Symbol, macho_file: *MachO) ?*Atom { - return macho_file.getAtom(symbol.atom); + return symbol.atom_ref.getAtom(macho_file); +} + +pub fn getOutputSectionIndex(symbol: Symbol, macho_file: *MachO) u8 { + if (symbol.getAtom(macho_file)) |atom| return atom.out_n_sect; + return symbol.out_n_sect; } pub fn getFile(symbol: Symbol, macho_file: *MachO) ?File { @@ -74,8 +78,9 @@ pub fn getFile(symbol: Symbol, macho_file: *MachO) ?File { pub fn getNlist(symbol: Symbol, macho_file: *MachO) macho.nlist_64 { const file = symbol.getFile(macho_file).?; return switch (file) { + .dylib => unreachable, .object => |x| x.symtab.items(.nlist)[symbol.nlist_idx], - else => unreachable, + .internal => |x| x.symtab.items[symbol.nlist_idx], }; } @@ -95,7 +100,7 @@ pub fn getDylibOrdinal(symbol: Symbol, macho_file: *MachO) ?u16 { } pub fn getSymbolRank(symbol: Symbol, macho_file: *MachO) u32 { - const file = symbol.getFile(macho_file) orelse return std.math.maxInt(u32); + const file = symbol.getFile(macho_file).?; const in_archive = switch (file) { .object => |x| !x.alive, else => false, @@ -111,9 +116,9 @@ pub fn getAddress(symbol: Symbol, opts: struct { stubs: bool = true, }, macho_file: *MachO) u64 { if (opts.stubs) { - if (symbol.flags.stubs) { + if (symbol.getSectionFlags().stubs) { return symbol.getStubsAddress(macho_file); - } else if (symbol.flags.objc_stubs) { + } else if (symbol.getSectionFlags().objc_stubs) { return symbol.getObjcStubsAddress(macho_file); } } @@ -122,34 +127,36 @@ pub fn getAddress(symbol: Symbol, opts: struct { } pub fn getGotAddress(symbol: Symbol, macho_file: *MachO) u64 { - if (!symbol.flags.got) return 0; - const extra = symbol.getExtra(macho_file).?; + if (!symbol.getSectionFlags().got) return 0; + const extra = symbol.getExtra(macho_file); return macho_file.got.getAddress(extra.got, macho_file); } pub fn getStubsAddress(symbol: Symbol, macho_file: *MachO) u64 { - if (!symbol.flags.stubs) return 0; - const extra = symbol.getExtra(macho_file).?; + if (!symbol.getSectionFlags().stubs) return 0; + const extra = symbol.getExtra(macho_file); return macho_file.stubs.getAddress(extra.stubs, macho_file); } pub fn getObjcStubsAddress(symbol: Symbol, macho_file: *MachO) u64 { - if (!symbol.flags.objc_stubs) return 0; - const extra = symbol.getExtra(macho_file).?; + if (!symbol.getSectionFlags().objc_stubs) return 0; + const extra = symbol.getExtra(macho_file); return macho_file.objc_stubs.getAddress(extra.objc_stubs, macho_file); } pub fn getObjcSelrefsAddress(symbol: Symbol, macho_file: *MachO) u64 { - if (!symbol.flags.objc_stubs) return 0; - const extra = symbol.getExtra(macho_file).?; - const atom = macho_file.getAtom(extra.objc_selrefs).?; - assert(atom.flags.alive); - return atom.getAddress(macho_file); + if (!symbol.getSectionFlags().objc_stubs) return 0; + const extra = symbol.getExtra(macho_file); + const file = symbol.getFile(macho_file).?; + return switch (file) { + .dylib => unreachable, + inline else => |x| x.symbols.items[extra.objc_selrefs].getAddress(.{}, macho_file), + }; } pub fn getTlvPtrAddress(symbol: Symbol, macho_file: *MachO) u64 { - if (!symbol.flags.tlv_ptr) return 0; - const extra = symbol.getExtra(macho_file).?; + if (!symbol.getSectionFlags().tlv_ptr) return 0; + const extra = symbol.getExtra(macho_file); return macho_file.tlv_ptr.getAddress(extra.tlv_ptr, macho_file); } @@ -160,7 +167,7 @@ pub fn getOutputSymtabIndex(symbol: Symbol, macho_file: *MachO) ?u32 { const symtab_ctx = switch (file) { inline else => |x| x.output_symtab_ctx, }; - var idx = symbol.getExtra(macho_file).?.symtab; + var idx = symbol.getExtra(macho_file).symtab; if (symbol.isLocal()) { idx += symtab_ctx.ilocal; } else if (symbol.flags.@"export") { @@ -172,20 +179,25 @@ pub fn getOutputSymtabIndex(symbol: Symbol, macho_file: *MachO) ?u32 { return idx; } +pub fn getSectionFlags(symbol: Symbol) SectionFlags { + return @bitCast(symbol.sect_flags.load(.seq_cst)); +} + +pub fn setSectionFlags(symbol: *Symbol, flags: SectionFlags) void { + _ = symbol.sect_flags.fetchOr(@bitCast(flags), .seq_cst); +} + const AddExtraOpts = struct { got: ?u32 = null, stubs: ?u32 = null, objc_stubs: ?u32 = null, - objc_selrefs: ?u32 = null, tlv_ptr: ?u32 = null, symtab: ?u32 = null, + objc_selrefs: ?u32 = null, }; -pub fn addExtra(symbol: *Symbol, opts: AddExtraOpts, macho_file: *MachO) !void { - if (symbol.getExtra(macho_file) == null) { - symbol.extra = try macho_file.addSymbolExtra(.{}); - } - var extra = symbol.getExtra(macho_file).?; +pub fn addExtra(symbol: *Symbol, opts: AddExtraOpts, macho_file: *MachO) void { + var extra = symbol.getExtra(macho_file); inline for (@typeInfo(@TypeOf(opts)).Struct.fields) |field| { if (@field(opts, field.name)) |x| { @field(extra, field.name) = x; @@ -194,18 +206,22 @@ pub fn addExtra(symbol: *Symbol, opts: AddExtraOpts, macho_file: *MachO) !void { symbol.setExtra(extra, macho_file); } -pub inline fn getExtra(symbol: Symbol, macho_file: *MachO) ?Extra { - return macho_file.getSymbolExtra(symbol.extra); +pub inline fn getExtra(symbol: Symbol, macho_file: *MachO) Extra { + return switch (symbol.getFile(macho_file).?) { + inline else => |x| x.getSymbolExtra(symbol.extra), + }; } pub inline fn setExtra(symbol: Symbol, extra: Extra, macho_file: *MachO) void { - macho_file.setSymbolExtra(symbol.extra, extra); + return switch (symbol.getFile(macho_file).?) { + inline else => |x| x.setSymbolExtra(symbol.extra, extra), + }; } pub fn setOutputSym(symbol: Symbol, macho_file: *MachO, out: *macho.nlist_64) void { if (symbol.isLocal()) { out.n_type = if (symbol.flags.abs) macho.N_ABS else macho.N_SECT; - out.n_sect = if (symbol.flags.abs) 0 else @intCast(symbol.out_n_sect + 1); + out.n_sect = if (symbol.flags.abs) 0 else @intCast(symbol.getOutputSectionIndex(macho_file) + 1); out.n_desc = 0; out.n_value = symbol.getAddress(.{ .stubs = false }, macho_file); @@ -217,7 +233,7 @@ pub fn setOutputSym(symbol: Symbol, macho_file: *MachO, out: *macho.nlist_64) vo assert(symbol.visibility == .global); out.n_type = macho.N_EXT; out.n_type |= if (symbol.flags.abs) macho.N_ABS else macho.N_SECT; - out.n_sect = if (symbol.flags.abs) 0 else @intCast(symbol.out_n_sect + 1); + out.n_sect = if (symbol.flags.abs) 0 else @intCast(symbol.getOutputSectionIndex(macho_file) + 1); out.n_value = symbol.getAddress(.{ .stubs = false }, macho_file); out.n_desc = 0; @@ -292,8 +308,8 @@ fn format2( symbol.getAddress(.{}, ctx.macho_file), }); if (symbol.getFile(ctx.macho_file)) |file| { - if (symbol.out_n_sect != 0) { - try writer.print(" : sect({d})", .{symbol.out_n_sect}); + if (symbol.getOutputSectionIndex(ctx.macho_file) != 0) { + try writer.print(" : sect({d})", .{symbol.getOutputSectionIndex(ctx.macho_file)}); } if (symbol.getAtom(ctx.macho_file)) |atom| { try writer.print(" : atom({d})", .{atom.atom_index}); @@ -319,11 +335,6 @@ pub const Flags = packed struct { /// Whether the symbol is exported at runtime. @"export": bool = false, - /// Whether the symbol is effectively an extern and takes part in global - /// symbol resolution. Then, its name will be saved in global string interning - /// table. - global: bool = false, - /// Whether this symbol is weak. weak: bool = false, @@ -350,7 +361,9 @@ pub const Flags = packed struct { /// Whether the symbol makes into the output symtab or not. output_symtab: bool = false, +}; +pub const SectionFlags = packed struct(u8) { /// Whether the symbol contains __got indirection. got: bool = false, @@ -362,6 +375,8 @@ pub const Flags = packed struct { /// Whether the symbol contains __objc_stubs indirection. objc_stubs: bool = false, + + _: u4 = 0, }; pub const Visibility = enum { @@ -374,13 +389,23 @@ pub const Extra = struct { got: u32 = 0, stubs: u32 = 0, objc_stubs: u32 = 0, - objc_selrefs: u32 = 0, tlv_ptr: u32 = 0, symtab: u32 = 0, + objc_selrefs: u32 = 0, }; pub const Index = u32; +pub const Ref = struct { + index: Symbol.Index, + file: File.Index, + + pub fn getSymbol(ref: Ref, macho_file: *MachO) ?*Symbol { + const file = ref.getFile(macho_file) orelse return null; + return file.getSymbol(ref.index); + } +}; + const assert = std.debug.assert; const macho = std.macho; const std = @import("std"); diff --git a/src/MachO/UnwindInfo.zig b/src/MachO/UnwindInfo.zig index 0d1dd875..3ab3854a 100644 --- a/src/MachO/UnwindInfo.zig +++ b/src/MachO/UnwindInfo.zig @@ -1,10 +1,10 @@ /// List of all unwind records gathered from all objects and sorted /// by allocated relative function address within the section. -records: std.ArrayListUnmanaged(Record.Index) = .{}, +records: std.ArrayListUnmanaged(Record.Ref) = .{}, /// List of all personalities referenced by either unwind info entries /// or __eh_frame entries. -personalities: [max_personalities]Symbol.Index = undefined, +personalities: [max_personalities]MachO.Ref = undefined, personalities_count: u2 = 0, /// List of common encodings sorted in descending order with the most common first. @@ -25,10 +25,10 @@ pub fn deinit(info: *UnwindInfo, allocator: Allocator) void { info.lsdas_lookup.deinit(allocator); } -fn canFold(macho_file: *MachO, lhs_index: Record.Index, rhs_index: Record.Index) bool { +fn canFold(macho_file: *MachO, lhs_ref: Record.Ref, rhs_ref: Record.Ref) bool { const cpu_arch = macho_file.options.cpu_arch.?; - const lhs = macho_file.getUnwindRecord(lhs_index); - const rhs = macho_file.getUnwindRecord(rhs_index); + const lhs = lhs_ref.getUnwindRecord(macho_file); + const rhs = rhs_ref.getUnwindRecord(macho_file); if (cpu_arch == .x86_64) { if (lhs.enc.getMode() == @intFromEnum(macho.UNWIND_X86_64_MODE.STACK_IND) or rhs.enc.getMode() == @intFromEnum(macho.UNWIND_X86_64_MODE.STACK_IND)) return false; @@ -42,27 +42,31 @@ fn canFold(macho_file: *MachO, lhs_index: Record.Index, rhs_index: Record.Index) } pub fn generate(info: *UnwindInfo, macho_file: *MachO) !void { + const tracy = trace(@src()); + defer tracy.end(); + const gpa = macho_file.base.allocator; log.debug("generating unwind info", .{}); // Collect all unwind records for (macho_file.sections.items(.atoms)) |atoms| { - for (atoms.items) |atom_index| { - const atom = macho_file.getAtom(atom_index) orelse continue; - if (!atom.flags.alive) continue; + for (atoms.items) |ref| { + const atom = ref.getAtom(macho_file) orelse continue; + if (!atom.alive.load(.seq_cst)) continue; const recs = atom.getUnwindRecords(macho_file); + const file = atom.getFile(macho_file); try info.records.ensureUnusedCapacity(gpa, recs.len); for (recs) |rec| { - if (!macho_file.getUnwindRecord(rec).alive) continue; - info.records.appendAssumeCapacity(rec); + if (!file.object.getUnwindRecord(rec).alive) continue; + info.records.appendAssumeCapacity(.{ .record = rec, .file = file.getIndex() }); } } } // Encode records - for (info.records.items) |index| { - const rec = macho_file.getUnwindRecord(index); + for (info.records.items) |ref| { + const rec = ref.getUnwindRecord(macho_file); if (rec.getFde(macho_file)) |fde| { rec.enc.setDwarfSectionOffset(@intCast(fde.out_offset)); if (fde.getLsdaAtom(macho_file)) |lsda| { @@ -72,27 +76,31 @@ pub fn generate(info: *UnwindInfo, macho_file: *MachO) !void { } const cie = fde.getCie(macho_file); if (cie.getPersonality(macho_file)) |_| { - const personality_index = try info.getOrPutPersonalityFunction(cie.personality.?.index); // TODO handle error + const object = cie.getObject(macho_file); + const sym_ref = object.getSymbolRef(cie.personality.?.index, macho_file); + const personality_index = try info.getOrPutPersonalityFunction(sym_ref); // TODO handle error rec.enc.setPersonalityIndex(personality_index + 1); } } else if (rec.getPersonality(macho_file)) |_| { - const personality_index = try info.getOrPutPersonalityFunction(rec.personality.?); // TODO handle error + const object = rec.getObject(macho_file); + const sym_ref = object.getSymbolRef(rec.personality.?, macho_file); + const personality_index = try info.getOrPutPersonalityFunction(sym_ref); // TODO handle error rec.enc.setPersonalityIndex(personality_index + 1); } } // Sort by assigned relative address within each output section const sortFn = struct { - fn sortFn(ctx: *MachO, lhs_index: Record.Index, rhs_index: Record.Index) bool { - const lhs = ctx.getUnwindRecord(lhs_index); - const rhs = ctx.getUnwindRecord(rhs_index); + fn sortFn(ctx: *MachO, lhs_ref: Record.Ref, rhs_ref: Record.Ref) bool { + const lhs = lhs_ref.getUnwindRecord(ctx); + const rhs = rhs_ref.getUnwindRecord(ctx); const lhsa = lhs.getAtom(ctx); const rhsa = rhs.getAtom(ctx); if (lhsa.out_n_sect == rhsa.out_n_sect) return lhs.getAtomAddress(ctx) < rhs.getAtomAddress(ctx); return lhsa.out_n_sect < rhsa.out_n_sect; } }.sortFn; - mem.sort(Record.Index, info.records.items, macho_file, sortFn); + mem.sort(Record.Ref, info.records.items, macho_file, sortFn); // Fold the records // Any adjacent two records that share encoding can be folded into one. @@ -101,8 +109,8 @@ pub fn generate(info: *UnwindInfo, macho_file: *MachO) !void { var j: usize = 1; while (j < info.records.items.len) : (j += 1) { if (canFold(macho_file, info.records.items[i], info.records.items[j])) { - const rec = macho_file.getUnwindRecord(info.records.items[i]); - rec.length += macho_file.getUnwindRecord(info.records.items[j]).length + 1; + const rec = info.records.items[i].getUnwindRecord(macho_file); + rec.length += info.records.items[j].getUnwindRecord(macho_file).length + 1; } else { i += 1; info.records.items[i] = info.records.items[j]; @@ -111,14 +119,15 @@ pub fn generate(info: *UnwindInfo, macho_file: *MachO) !void { info.records.shrinkAndFree(gpa, i + 1); } - for (info.records.items) |rec_index| { - const rec = macho_file.getUnwindRecord(rec_index); + for (info.records.items) |ref| { + const rec = ref.getUnwindRecord(macho_file); const atom = rec.getAtom(macho_file); - log.debug("@{x}-{x} : {s} : rec({d}) : {}", .{ + log.debug("@{x}-{x} : {s} : rec({d}) : object({d}) : {}", .{ rec.getAtomAddress(macho_file), rec.getAtomAddress(macho_file) + rec.length, atom.getName(macho_file), - rec_index, + ref.record, + ref.file, rec.enc, }); } @@ -161,8 +170,8 @@ pub fn generate(info: *UnwindInfo, macho_file: *MachO) !void { ).init(gpa); defer common_encodings_counts.deinit(); - for (info.records.items) |rec_index| { - const rec = macho_file.getUnwindRecord(rec_index); + for (info.records.items) |ref| { + const rec = ref.getUnwindRecord(macho_file); if (rec.enc.isDwarf(macho_file)) continue; const gop = try common_encodings_counts.getOrPut(rec.enc); if (!gop.found_existing) { @@ -190,7 +199,7 @@ pub fn generate(info: *UnwindInfo, macho_file: *MachO) !void { { var i: u32 = 0; while (i < info.records.items.len) { - const rec = macho_file.getUnwindRecord(info.records.items[i]); + const rec = info.records.items[i].getUnwindRecord(macho_file); const range_start_max: u64 = rec.getAtomAddress(macho_file) + compressed_entry_func_offset_mask; var encoding_count: u9 = info.common_encodings_count; var space_left: u32 = second_level_page_words - @@ -202,7 +211,7 @@ pub fn generate(info: *UnwindInfo, macho_file: *MachO) !void { }; while (space_left >= 1 and i < info.records.items.len) { - const next = macho_file.getUnwindRecord(info.records.items[i]); + const next = info.records.items[i].getUnwindRecord(macho_file); const is_dwarf = next.enc.isDwarf(macho_file); if (next.getAtomAddress(macho_file) >= range_start_max) { @@ -244,8 +253,8 @@ pub fn generate(info: *UnwindInfo, macho_file: *MachO) !void { // Save records having an LSDA pointer log.debug("LSDA pointers:", .{}); try info.lsdas_lookup.ensureTotalCapacityPrecise(gpa, info.records.items.len); - for (info.records.items, 0..) |index, i| { - const rec = macho_file.getUnwindRecord(index); + for (info.records.items, 0..) |ref, i| { + const rec = ref.getUnwindRecord(macho_file); info.lsdas_lookup.appendAssumeCapacity(@intCast(info.lsdas.items.len)); if (rec.getLsdaAtom(macho_file)) |lsda| { log.debug(" @{x} => lsda({d})", .{ rec.getAtomAddress(macho_file), lsda.atom_index }); @@ -255,6 +264,9 @@ pub fn generate(info: *UnwindInfo, macho_file: *MachO) !void { } pub fn calcSize(info: UnwindInfo) usize { + const tracy = trace(@src()); + defer tracy.end(); + var total_size: usize = 0; total_size += @sizeOf(macho.unwind_info_section_header); total_size += @@ -292,8 +304,8 @@ pub fn write(info: UnwindInfo, macho_file: *MachO, buffer: []u8) !void { try writer.writeAll(mem.sliceAsBytes(info.common_encodings[0..info.common_encodings_count])); - for (info.personalities[0..info.personalities_count]) |sym_index| { - const sym = macho_file.getSymbol(sym_index); + for (info.personalities[0..info.personalities_count]) |ref| { + const sym = ref.getSymbol(macho_file).?; try writer.writeInt(u32, @intCast(sym.getGotAddress(macho_file) - seg.vmaddr), .little); } @@ -302,7 +314,7 @@ pub fn write(info: UnwindInfo, macho_file: *MachO, buffer: []u8) !void { (info.lsdas.items.len * @sizeOf(macho.unwind_info_section_header_lsda_index_entry)))); for (info.pages.items, 0..) |page, i| { assert(page.count > 0); - const rec = macho_file.getUnwindRecord(info.records.items[page.start]); + const rec = info.records.items[page.start].getUnwindRecord(macho_file); try writer.writeStruct(macho.unwind_info_section_header_index_entry{ .functionOffset = @as(u32, @intCast(rec.getAtomAddress(macho_file) - seg.vmaddr)), .secondLevelPagesSectionOffset = @as(u32, @intCast(pages_base_offset + i * second_level_page_bytes)), @@ -311,7 +323,7 @@ pub fn write(info: UnwindInfo, macho_file: *MachO, buffer: []u8) !void { }); } - const last_rec = macho_file.getUnwindRecord(info.records.items[info.records.items.len - 1]); + const last_rec = info.records.items[info.records.items.len - 1].getUnwindRecord(macho_file); const sentinel_address = @as(u32, @intCast(last_rec.getAtomAddress(macho_file) + last_rec.length - seg.vmaddr)); try writer.writeStruct(macho.unwind_info_section_header_index_entry{ .functionOffset = sentinel_address, @@ -321,7 +333,7 @@ pub fn write(info: UnwindInfo, macho_file: *MachO, buffer: []u8) !void { }); for (info.lsdas.items) |index| { - const rec = macho_file.getUnwindRecord(info.records.items[index]); + const rec = info.records.items[index].getUnwindRecord(macho_file); try writer.writeStruct(macho.unwind_info_section_header_lsda_index_entry{ .functionOffset = @as(u32, @intCast(rec.getAtomAddress(macho_file) - seg.vmaddr)), .lsdaOffset = @as(u32, @intCast(rec.getLsdaAddress(macho_file) - seg.vmaddr)), @@ -343,13 +355,13 @@ pub fn write(info: UnwindInfo, macho_file: *MachO, buffer: []u8) !void { } } -fn getOrPutPersonalityFunction(info: *UnwindInfo, sym_index: Symbol.Index) error{TooManyPersonalities}!u2 { +fn getOrPutPersonalityFunction(info: *UnwindInfo, ref: MachO.Ref) error{TooManyPersonalities}!u2 { comptime var index: u2 = 0; inline while (index < max_personalities) : (index += 1) { - if (info.personalities[index] == sym_index) { + if (info.personalities[index].eql(ref)) { return index; } else if (index == info.personalities_count) { - info.personalities[index] = sym_index; + info.personalities[index] = ref; info.personalities_count += 1; return index; } @@ -464,16 +476,17 @@ pub const Record = struct { } pub fn getAtom(rec: Record, macho_file: *MachO) *Atom { - return macho_file.getAtom(rec.atom).?; + return rec.getObject(macho_file).getAtom(rec.atom).?; } pub fn getLsdaAtom(rec: Record, macho_file: *MachO) ?*Atom { - return macho_file.getAtom(rec.lsda); + return rec.getObject(macho_file).getAtom(rec.lsda); } pub fn getPersonality(rec: Record, macho_file: *MachO) ?*Symbol { const personality = rec.personality orelse return null; - return macho_file.getSymbol(personality); + const object = rec.getObject(macho_file); + return object.getSymbolRef(personality, macho_file).getSymbol(macho_file); } pub fn getFde(rec: Record, macho_file: *MachO) ?Fde { @@ -540,6 +553,16 @@ pub const Record = struct { } pub const Index = u32; + + // TODO convert into MachO.Ref + pub const Ref = struct { + record: Index, + file: File.Index, + + pub fn getUnwindRecord(ref: Ref, macho_file: *MachO) *Record { + return macho_file.getFile(ref.file).?.object.getUnwindRecord(ref.record); + } + }; }; const max_personalities = 3; @@ -638,8 +661,8 @@ const Page = struct { .entryCount = page.count, }); - for (info.records.items[page.start..][0..page.count]) |index| { - const rec = macho_file.getUnwindRecord(index); + for (info.records.items[page.start..][0..page.count]) |ref| { + const rec = ref.getUnwindRecord(macho_file); try writer.writeStruct(macho.unwind_info_regular_second_level_entry{ .functionOffset = @as(u32, @intCast(rec.getAtomAddress(macho_file) - seg.vmaddr)), .encoding = rec.enc.enc, @@ -661,9 +684,9 @@ const Page = struct { } assert(page.count > 0); - const first_rec = macho_file.getUnwindRecord(info.records.items[page.start]); - for (info.records.items[page.start..][0..page.count]) |index| { - const rec = macho_file.getUnwindRecord(index); + const first_rec = info.records.items[page.start].getUnwindRecord(macho_file); + for (info.records.items[page.start..][0..page.count]) |ref| { + const rec = ref.getUnwindRecord(macho_file); const enc_index = blk: { if (info.getCommonEncoding(rec.enc)) |id| break :blk id; const ncommon = info.common_encodings_count; diff --git a/src/MachO/dead_strip.zig b/src/MachO/dead_strip.zig index b49c0126..59b36c7b 100644 --- a/src/MachO/dead_strip.zig +++ b/src/MachO/dead_strip.zig @@ -17,16 +17,16 @@ pub fn gcAtoms(macho_file: *MachO) !void { fn collectRoots(roots: *std.ArrayList(*Atom), objects: []const File.Index, macho_file: *MachO) !void { for (objects) |index| { const object = macho_file.getFile(index).?; - for (object.getSymbols()) |sym_index| { - const sym = macho_file.getSymbol(sym_index); - const file = sym.getFile(macho_file) orelse continue; + for (object.getSymbols(), 0..) |*sym, i| { + const ref = object.getSymbolRef(@intCast(i), macho_file); + const file = ref.getFile(macho_file) orelse continue; if (file.getIndex() != index) continue; if (sym.flags.no_dead_strip or (macho_file.options.dylib and sym.visibility == .global)) try markSymbol(sym, roots, macho_file); } for (object.getAtoms()) |atom_index| { - const atom = macho_file.getAtom(atom_index).?; + const atom = object.getAtom(atom_index) orelse continue; const isec = atom.getInputSection(macho_file); switch (isec.type()) { macho.S_MOD_INIT_FUNC_POINTERS, @@ -41,8 +41,9 @@ fn collectRoots(roots: *std.ArrayList(*Atom), objects: []const File.Index, macho } for (macho_file.objects.items) |index| { - for (macho_file.getFile(index).?.object.unwind_records.items) |cu_index| { - const cu = macho_file.getUnwindRecord(cu_index); + const object = macho_file.getFile(index).?.object; + for (object.unwind_records_indexes.items) |cu_index| { + const cu = object.getUnwindRecord(cu_index); if (!cu.alive) continue; if (cu.getFde(macho_file)) |fde| { if (fde.getCie(macho_file).getPersonality(macho_file)) |sym| try markSymbol(sym, roots, macho_file); @@ -50,19 +51,27 @@ fn collectRoots(roots: *std.ArrayList(*Atom), objects: []const File.Index, macho } } - for (macho_file.undefined_symbols.items) |sym_index| { - const sym = macho_file.getSymbol(sym_index); - try markSymbol(sym, roots, macho_file); - } + if (macho_file.getInternalObject()) |obj| { + for (obj.force_undefined.items) |sym_index| { + const ref = obj.getSymbolRef(sym_index, macho_file); + if (ref.getFile(macho_file) != null) { + const sym = ref.getSymbol(macho_file).?; + try markSymbol(sym, roots, macho_file); + } + } - for (&[_]?Symbol.Index{ - macho_file.entry_index, - macho_file.dyld_stub_binder_index, - macho_file.objc_msg_send_index, - }) |index| { - if (index) |idx| { - const sym = macho_file.getSymbol(idx); - try markSymbol(sym, roots, macho_file); + for (&[_]?Symbol.Index{ + obj.entry_index, + obj.dyld_stub_binder_index, + obj.objc_msg_send_index, + }) |index| { + if (index) |idx| { + const ref = obj.getSymbolRef(idx, macho_file); + if (ref.getFile(macho_file) != null) { + const sym = ref.getSymbol(macho_file).?; + try markSymbol(sym, roots, macho_file); + } + } } } } @@ -73,9 +82,8 @@ fn markSymbol(sym: *Symbol, roots: *std.ArrayList(*Atom), macho_file: *MachO) !v } fn markAtom(atom: *Atom) bool { - const already_visited = atom.flags.visited; - atom.flags.visited = true; - return atom.flags.alive and !already_visited; + const already_visited = atom.visited.swap(true, .seq_cst); + return atom.alive.load(.seq_cst) and !already_visited; } fn mark(roots: []*Atom, objects: []const File.Index, macho_file: *MachO) void { @@ -88,14 +96,15 @@ fn mark(roots: []*Atom, objects: []const File.Index, macho_file: *MachO) void { loop = false; for (objects) |index| { - for (macho_file.getFile(index).?.getAtoms()) |atom_index| { - const atom = macho_file.getAtom(atom_index).?; + const file = macho_file.getFile(index).?; + for (file.getAtoms()) |atom_index| { + const atom = file.getAtom(atom_index) orelse continue; const isec = atom.getInputSection(macho_file); if (isec.isDontDeadStripIfReferencesLive() and !(mem.eql(u8, isec.sectName(), "__eh_frame") or mem.eql(u8, isec.sectName(), "__compact_unwind") or isec.attrs() & macho.S_ATTR_DEBUG != 0) and - !atom.flags.alive and refersLive(atom, macho_file)) + !atom.alive.load(.seq_cst) and refersLive(atom, macho_file)) { markLive(atom, macho_file); loop = true; @@ -106,8 +115,8 @@ fn mark(roots: []*Atom, objects: []const File.Index, macho_file: *MachO) void { } fn markLive(atom: *Atom, macho_file: *MachO) void { - assert(atom.flags.visited); - atom.flags.alive = true; + assert(atom.visited.load(.seq_cst)); + _ = atom.alive.swap(true, .seq_cst); track_live_log.debug("{}marking live atom({d},{s})", .{ track_live_level, atom.atom_index, @@ -119,16 +128,20 @@ fn markLive(atom: *Atom, macho_file: *MachO) void { for (atom.getRelocs(macho_file)) |rel| { const target_atom = switch (rel.tag) { - .local => rel.getTargetAtom(macho_file), - .@"extern" => rel.getTargetSymbol(macho_file).getAtom(macho_file), + .local => rel.getTargetAtom(atom.*, macho_file), + .@"extern" => blk: { + const ref = rel.getTargetSymbolRef(atom.*, macho_file); + break :blk if (ref.getSymbol(macho_file)) |sym| sym.getAtom(macho_file) else null; + }, }; if (target_atom) |ta| { if (markAtom(ta)) markLive(ta, macho_file); } } + const file = atom.getFile(macho_file); for (atom.getUnwindRecords(macho_file)) |cu_index| { - const cu = macho_file.getUnwindRecord(cu_index); + const cu = file.object.getUnwindRecord(cu_index); const cu_atom = cu.getAtom(macho_file); if (markAtom(cu_atom)) markLive(cu_atom, macho_file); @@ -149,11 +162,14 @@ fn markLive(atom: *Atom, macho_file: *MachO) void { fn refersLive(atom: *Atom, macho_file: *MachO) bool { for (atom.getRelocs(macho_file)) |rel| { const target_atom = switch (rel.tag) { - .local => rel.getTargetAtom(macho_file), - .@"extern" => rel.getTargetSymbol(macho_file).getAtom(macho_file), + .local => rel.getTargetAtom(atom.*, macho_file), + .@"extern" => blk: { + const ref = rel.getTargetSymbolRef(atom.*, macho_file); + break :blk if (ref.getSymbol(macho_file)) |sym| sym.getAtom(macho_file) else null; + }, }; if (target_atom) |ta| { - if (ta.flags.alive) return true; + if (ta.alive.load(.seq_cst)) return true; } } return false; @@ -161,11 +177,13 @@ fn refersLive(atom: *Atom, macho_file: *MachO) bool { fn prune(objects: []const File.Index, macho_file: *MachO) void { for (objects) |index| { - for (macho_file.getFile(index).?.getAtoms()) |atom_index| { - const atom = macho_file.getAtom(atom_index).?; - if (atom.flags.alive and !atom.flags.visited) { - atom.flags.alive = false; - atom.markUnwindRecordsDead(macho_file); + const file = macho_file.getFile(index).?; + for (file.getAtoms()) |atom_index| { + const atom = file.getAtom(atom_index) orelse continue; + if (!atom.visited.load(.seq_cst)) { + if (atom.alive.cmpxchgStrong(true, false, .seq_cst, .seq_cst) == null) { + atom.markUnwindRecordsDead(macho_file); + } } } } diff --git a/src/MachO/dyld_info/Rebase.zig b/src/MachO/dyld_info/Rebase.zig index 4b6f5688..ee883d20 100644 --- a/src/MachO/dyld_info/Rebase.zig +++ b/src/MachO/dyld_info/Rebase.zig @@ -1,14 +1,3 @@ -const Rebase = @This(); - -const std = @import("std"); -const assert = std.debug.assert; -const leb = std.leb; -const log = std.log.scoped(.dyld_info); -const macho = std.macho; -const testing = std.testing; - -const Allocator = std.mem.Allocator; - entries: std.ArrayListUnmanaged(Entry) = .{}, buffer: std.ArrayListUnmanaged(u8) = .{}, @@ -34,12 +23,99 @@ pub fn size(rebase: Rebase) u64 { return @as(u64, @intCast(rebase.buffer.items.len)); } -pub fn finalize(rebase: *Rebase, gpa: Allocator) !void { +pub fn updateSize(rebase: *Rebase, macho_file: *MachO) !void { + const tracy = trace(@src()); + defer tracy.end(); + + const gpa = macho_file.base.allocator; + + var objects = try std.ArrayList(File.Index).initCapacity(gpa, macho_file.objects.items.len + 1); + defer objects.deinit(); + objects.appendSliceAssumeCapacity(macho_file.objects.items); + if (macho_file.getInternalObject()) |obj| objects.appendAssumeCapacity(obj.index); + + for (objects.items) |index| { + const file = macho_file.getFile(index).?; + for (file.getAtoms()) |atom_index| { + const atom = file.getAtom(atom_index) orelse continue; + if (!atom.alive.load(.seq_cst)) continue; + if (atom.getInputSection(macho_file).isZerofill()) continue; + const atom_addr = atom.getAddress(macho_file); + const seg_id = macho_file.sections.items(.segment_id)[atom.out_n_sect]; + const seg = macho_file.segments.items[seg_id]; + for (atom.getRelocs(macho_file)) |rel| { + if (rel.type != .unsigned or rel.meta.length != 3) continue; + if (rel.tag == .@"extern") { + const sym = rel.getTargetSymbol(atom.*, macho_file); + if (sym.isTlvInit(macho_file)) continue; + if (sym.flags.import) continue; + } + const rel_offset = rel.offset - atom.off; + try rebase.entries.append(gpa, .{ + .offset = atom_addr + rel_offset - seg.vmaddr, + .segment_id = seg_id, + }); + } + } + } + + if (macho_file.got_sect_index) |sid| { + const seg_id = macho_file.sections.items(.segment_id)[sid]; + const seg = macho_file.segments.items[seg_id]; + for (macho_file.got.symbols.items, 0..) |ref, idx| { + const sym = ref.getSymbol(macho_file).?; + const addr = macho_file.got.getAddress(@intCast(idx), macho_file); + if (!sym.flags.import) { + try rebase.entries.append(gpa, .{ + .offset = addr - seg.vmaddr, + .segment_id = seg_id, + }); + } + } + } + + if (macho_file.la_symbol_ptr_sect_index) |sid| { + const sect = macho_file.sections.items(.header)[sid]; + const seg_id = macho_file.sections.items(.segment_id)[sid]; + const seg = macho_file.segments.items[seg_id]; + for (macho_file.stubs.symbols.items, 0..) |ref, idx| { + const sym = ref.getSymbol(macho_file).?; + const addr = sect.addr + idx * @sizeOf(u64); + const rebase_entry = Rebase.Entry{ + .offset = addr - seg.vmaddr, + .segment_id = seg_id, + }; + if ((sym.flags.import and !sym.flags.weak) or !sym.flags.import) { + try rebase.entries.append(gpa, rebase_entry); + } + } + } + + if (macho_file.tlv_ptr_sect_index) |sid| { + const seg_id = macho_file.sections.items(.segment_id)[sid]; + const seg = macho_file.segments.items[seg_id]; + for (macho_file.tlv_ptr.symbols.items, 0..) |ref, idx| { + const sym = ref.getSymbol(macho_file).?; + const addr = macho_file.tlv_ptr.getAddress(@intCast(idx), macho_file); + if (!sym.flags.import) { + try rebase.entries.append(gpa, .{ + .offset = addr - seg.vmaddr, + .segment_id = seg_id, + }); + } + } + } + + try rebase.finalize(gpa); + macho_file.dyld_info_cmd.rebase_size = mem.alignForward(u32, @intCast(rebase.size()), @alignOf(u64)); +} + +fn finalize(rebase: *Rebase, gpa: Allocator) !void { if (rebase.entries.items.len == 0) return; const writer = rebase.buffer.writer(gpa); - std.mem.sort(Entry, rebase.entries.items, {}, Entry.lessThan); + mem.sort(Entry, rebase.entries.items, {}, Entry.lessThan); try setTypePointer(writer); @@ -179,7 +255,7 @@ fn rebaseTimesSkip(count: usize, skip: u64, writer: anytype) !void { fn addAddr(addr: u64, writer: anytype) !void { log.debug(">>> add: {x}", .{addr}); - if (std.mem.isAligned(addr, @sizeOf(u64))) { + if (mem.isAligned(addr, @sizeOf(u64))) { const imm = @divExact(addr, @sizeOf(u64)); if (imm <= 0xf) { try writer.writeByte(macho.REBASE_OPCODE_ADD_ADDR_IMM_SCALED | @as(u4, @truncate(imm))); @@ -572,3 +648,17 @@ test "rebase - composite" { macho.REBASE_OPCODE_DONE, }, rebase.buffer.items); } + +const std = @import("std"); +const assert = std.debug.assert; +const leb = std.leb; +const log = std.log.scoped(.dyld_info); +const macho = std.macho; +const mem = std.mem; +const testing = std.testing; +const trace = @import("../../tracy.zig").trace; + +const Allocator = mem.Allocator; +const File = @import("../file.zig").File; +const MachO = @import("../../MachO.zig"); +const Rebase = @This(); diff --git a/src/MachO/dyld_info/Trie.zig b/src/MachO/dyld_info/Trie.zig index edef5756..58931f38 100644 --- a/src/MachO/dyld_info/Trie.zig +++ b/src/MachO/dyld_info/Trie.zig @@ -28,463 +28,310 @@ //! After the optional exported symbol information is a byte of how many edges (0-255) that //! this node has leaving it, followed by each edge. Each edge is a zero terminated UTF8 of //! the addition chars in the symbol, followed by a uleb128 offset for the node that edge points to. -const Trie = @This(); - -const std = @import("std"); -const mem = std.mem; -const leb = std.leb; -const log = std.log.scoped(.macho); -const macho = std.macho; -const testing = std.testing; -const assert = std.debug.assert; -const Allocator = mem.Allocator; - -pub const Node = struct { - base: *Trie, - - /// Terminal info associated with this node. - /// If this node is not a terminal node, info is null. - terminal_info: ?struct { - /// Export flags associated with this exported symbol. - export_flags: u64, - /// VM address offset wrt to the section this symbol is defined against. - vmaddr_offset: u64, - } = null, - - /// Offset of this node in the trie output byte stream. - trie_offset: ?u64 = null, - - /// List of all edges originating from this node. - edges: std.ArrayListUnmanaged(Edge) = .{}, - - node_dirty: bool = true, - - /// Edge connecting to nodes in the trie. - pub const Edge = struct { - from: *Node, - to: *Node, - label: []u8, - - fn deinit(self: *Edge, allocator: Allocator) void { - self.to.deinit(allocator); - allocator.destroy(self.to); - allocator.free(self.label); - self.from = undefined; - self.to = undefined; - self.label = undefined; - } - }; - - fn deinit(self: *Node, allocator: Allocator) void { - for (self.edges.items) |*edge| { - edge.deinit(allocator); - } - self.edges.deinit(allocator); - } - - /// Inserts a new node starting from `self`. - fn put(self: *Node, allocator: Allocator, label: []const u8) !*Node { - // Check for match with edges from this node. - for (self.edges.items) |*edge| { - const match = mem.indexOfDiff(u8, edge.label, label) orelse return edge.to; - if (match == 0) continue; - if (match == edge.label.len) return edge.to.put(allocator, label[match..]); - - // Found a match, need to splice up nodes. - // From: A -> B - // To: A -> C -> B - const mid = try allocator.create(Node); - mid.* = .{ .base = self.base }; - const to_label = try allocator.dupe(u8, edge.label[match..]); - allocator.free(edge.label); - const to_node = edge.to; - edge.to = mid; - edge.label = try allocator.dupe(u8, label[0..match]); - self.base.node_count += 1; - - try mid.edges.append(allocator, .{ - .from = mid, - .to = to_node, - .label = to_label, - }); - - return if (match == label.len) mid else mid.put(allocator, label[match..]); - } - - // Add a new node. - const node = try allocator.create(Node); - node.* = .{ .base = self.base }; - self.base.node_count += 1; - try self.edges.append(allocator, .{ - .from = self, - .to = node, - .label = try allocator.dupe(u8, label), - }); - - return node; - } - - /// Recursively parses the node from the input byte stream. - fn read(self: *Node, allocator: Allocator, reader: anytype) Trie.ReadError!usize { - self.node_dirty = true; - const trie_offset = try reader.context.getPos(); - self.trie_offset = trie_offset; - - var nread: usize = 0; - - const node_size = try leb.readULEB128(u64, reader); - if (node_size > 0) { - const export_flags = try leb.readULEB128(u64, reader); - // TODO Parse special flags. - assert(export_flags & macho.EXPORT_SYMBOL_FLAGS_REEXPORT == 0 and - export_flags & macho.EXPORT_SYMBOL_FLAGS_STUB_AND_RESOLVER == 0); - - const vmaddr_offset = try leb.readULEB128(u64, reader); - - self.terminal_info = .{ - .export_flags = export_flags, - .vmaddr_offset = vmaddr_offset, - }; - } - - const nedges = try reader.readByte(); - self.base.node_count += nedges; - - nread += (try reader.context.getPos()) - trie_offset; - - var i: usize = 0; - while (i < nedges) : (i += 1) { - const edge_start_pos = try reader.context.getPos(); - - const label = blk: { - var label_buf = std.ArrayList(u8).init(allocator); - while (true) { - const next = try reader.readByte(); - if (next == @as(u8, 0)) - break; - try label_buf.append(next); - } - break :blk try label_buf.toOwnedSlice(); - }; - - const seek_to = try leb.readULEB128(u64, reader); - const return_pos = try reader.context.getPos(); - - nread += return_pos - edge_start_pos; - try reader.context.seekTo(seek_to); - - const node = try allocator.create(Node); - node.* = .{ .base = self.base }; - - nread += try node.read(allocator, reader); - try self.edges.append(allocator, .{ - .from = self, - .to = node, - .label = label, - }); - try reader.context.seekTo(return_pos); - } +/// The root node of the trie. +root: ?Node.Index = null, +buffer: std.ArrayListUnmanaged(u8) = .{}, +nodes: std.MultiArrayList(Node) = .{}, +edges: std.ArrayListUnmanaged(Edge) = .{}, - return nread; - } +/// Insert a symbol into the trie, updating the prefixes in the process. +/// This operation may change the layout of the trie by splicing edges in +/// certain circumstances. +fn put(self: *Trie, allocator: Allocator, symbol: ExportSymbol) !void { + // const tracy = trace(@src()); + // defer tracy.end(); + + const node_index = try self.putNode(self.root.?, allocator, symbol.name); + const slice = self.nodes.slice(); + slice.items(.is_terminal)[node_index] = true; + slice.items(.vmaddr_offset)[node_index] = symbol.vmaddr_offset; + slice.items(.export_flags)[node_index] = symbol.export_flags; +} - /// Writes this node to a byte stream. - /// The children of this node *are* not written to the byte stream - /// recursively. To write all nodes to a byte stream in sequence, - /// iterate over `Trie.ordered_nodes` and call this method on each node. - /// This is one of the requirements of the MachO. - /// Panics if `finalize` was not called before calling this method. - fn write(self: Node, writer: anytype) !void { - assert(!self.node_dirty); - if (self.terminal_info) |info| { - // Terminal node info: encode export flags and vmaddr offset of this symbol. - var info_buf: [@sizeOf(u64) * 2]u8 = undefined; - var info_stream = std.io.fixedBufferStream(&info_buf); - // TODO Implement for special flags. - assert(info.export_flags & macho.EXPORT_SYMBOL_FLAGS_REEXPORT == 0 and - info.export_flags & macho.EXPORT_SYMBOL_FLAGS_STUB_AND_RESOLVER == 0); - try leb.writeULEB128(info_stream.writer(), info.export_flags); - try leb.writeULEB128(info_stream.writer(), info.vmaddr_offset); - - // Encode the size of the terminal node info. - var size_buf: [@sizeOf(u64)]u8 = undefined; - var size_stream = std.io.fixedBufferStream(&size_buf); - try leb.writeULEB128(size_stream.writer(), info_stream.pos); - - // Now, write them to the output stream. - try writer.writeAll(size_buf[0..size_stream.pos]); - try writer.writeAll(info_buf[0..info_stream.pos]); - } else { - // Non-terminal node is delimited by 0 byte. - try writer.writeByte(0); - } - // Write number of edges (max legal number of edges is 256). - try writer.writeByte(@as(u8, @intCast(self.edges.items.len))); - - for (self.edges.items) |edge| { - // Write edge label and offset to next node in trie. - try writer.writeAll(edge.label); - try writer.writeByte(0); - try leb.writeULEB128(writer, edge.to.trie_offset.?); - } +/// Inserts a new node starting at `node_index`. +fn putNode(self: *Trie, node_index: Node.Index, allocator: Allocator, label: []const u8) !Node.Index { + // Check for match with edges from this node. + for (self.nodes.items(.edges)[node_index].items) |edge_index| { + const edge = &self.edges.items[edge_index]; + const match = mem.indexOfDiff(u8, edge.label, label) orelse return edge.node; + if (match == 0) continue; + if (match == edge.label.len) return self.putNode(edge.node, allocator, label[match..]); + + // Found a match, need to splice up nodes. + // From: A -> B + // To: A -> C -> B + const mid_index = try self.addNode(allocator); + const to_label = edge.label[match..]; + const to_node = edge.node; + edge.node = mid_index; + edge.label = label[0..match]; + + const new_edge_index = try self.addEdge(allocator); + const new_edge = &self.edges.items[new_edge_index]; + new_edge.node = to_node; + new_edge.label = to_label; + try self.nodes.items(.edges)[mid_index].append(allocator, new_edge_index); + + return if (match == label.len) mid_index else self.putNode(mid_index, allocator, label[match..]); } - const FinalizeResult = struct { - /// Current size of this node in bytes. - node_size: u64, - - /// True if the trie offset of this node in the output byte stream - /// would need updating; false otherwise. - updated: bool, - }; + // Add a new node. + const new_node_index = try self.addNode(allocator); + const new_edge_index = try self.addEdge(allocator); + const new_edge = &self.edges.items[new_edge_index]; + new_edge.node = new_node_index; + new_edge.label = label; + try self.nodes.items(.edges)[node_index].append(allocator, new_edge_index); - /// Updates offset of this node in the output byte stream. - fn finalize(self: *Node, offset_in_trie: u64) !FinalizeResult { - var stream = std.io.countingWriter(std.io.null_writer); - const writer = stream.writer(); - - var node_size: u64 = 0; - if (self.terminal_info) |info| { - try leb.writeULEB128(writer, info.export_flags); - try leb.writeULEB128(writer, info.vmaddr_offset); - try leb.writeULEB128(writer, stream.bytes_written); - } else { - node_size += 1; // 0x0 for non-terminal nodes - } - node_size += 1; // 1 byte for edge count + return new_node_index; +} - for (self.edges.items) |edge| { - const next_node_offset = edge.to.trie_offset orelse 0; - node_size += edge.label.len + 1; - try leb.writeULEB128(writer, next_node_offset); +pub fn updateSize(self: *Trie, macho_file: *MachO) !void { + const tracy = trace(@src()); + defer tracy.end(); + + const gpa = macho_file.base.allocator; + + try self.init(gpa); + try self.nodes.ensureUnusedCapacity(gpa, macho_file.resolver.values.items.len * 2); + try self.edges.ensureUnusedCapacity(gpa, macho_file.resolver.values.items.len * 2); + + const seg = macho_file.getTextSegment(); + for (macho_file.resolver.values.items) |ref| { + if (ref.getFile(macho_file) == null) continue; + const sym = ref.getSymbol(macho_file).?; + if (!sym.flags.@"export") continue; + if (sym.getAtom(macho_file)) |atom| if (!atom.alive.load(.seq_cst)) continue; + var flags: u64 = if (sym.flags.abs) + macho.EXPORT_SYMBOL_FLAGS_KIND_ABSOLUTE + else if (sym.flags.tlv) + macho.EXPORT_SYMBOL_FLAGS_KIND_THREAD_LOCAL + else + macho.EXPORT_SYMBOL_FLAGS_KIND_REGULAR; + if (sym.flags.weak) { + flags |= macho.EXPORT_SYMBOL_FLAGS_WEAK_DEFINITION; + macho_file.weak_defines.store(true, .seq_cst); + macho_file.binds_to_weak.store(true, .seq_cst); } - - const trie_offset = self.trie_offset orelse 0; - const updated = offset_in_trie != trie_offset; - self.trie_offset = offset_in_trie; - self.node_dirty = false; - node_size += stream.bytes_written; - - return FinalizeResult{ .node_size = node_size, .updated = updated }; + try self.put(gpa, .{ + .name = sym.getName(macho_file), + .vmaddr_offset = sym.getAddress(.{ .stubs = false }, macho_file) - seg.vmaddr, + .export_flags = flags, + }); } -}; - -/// The root node of the trie. -root: ?*Node = null, -/// If you want to access nodes ordered in DFS fashion, -/// you should call `finalize` first since the nodes -/// in this container are not guaranteed to not be stale -/// if more insertions took place after the last `finalize` -/// call. -ordered_nodes: std.ArrayListUnmanaged(*Node) = .{}, + try self.finalize(gpa); -/// The size of the trie in bytes. -/// This value may be outdated if there were additional -/// insertions performed after `finalize` was called. -/// Call `finalize` before accessing this value to ensure -/// it is up-to-date. -size: u64 = 0, - -/// Number of nodes currently in the trie. -node_count: usize = 0, - -trie_dirty: bool = true, - -/// Export symbol that is to be placed in the trie. -pub const ExportSymbol = struct { - /// Name of the symbol. - name: []const u8, - - /// Offset of this symbol's virtual memory address from the beginning - /// of the __TEXT segment. - vmaddr_offset: u64, - - /// Export flags of this exported symbol. - export_flags: u64, -}; - -/// Insert a symbol into the trie, updating the prefixes in the process. -/// This operation may change the layout of the trie by splicing edges in -/// certain circumstances. -pub fn put(self: *Trie, allocator: Allocator, symbol: ExportSymbol) !void { - const node = try self.root.?.put(allocator, symbol.name); - node.terminal_info = .{ - .vmaddr_offset = symbol.vmaddr_offset, - .export_flags = symbol.export_flags, - }; - self.trie_dirty = true; + macho_file.dyld_info_cmd.export_size = mem.alignForward(u32, @intCast(self.buffer.items.len), @alignOf(u64)); } /// Finalizes this trie for writing to a byte stream. /// This step performs multiple passes through the trie ensuring /// there are no gaps after every `Node` is ULEB128 encoded. /// Call this method before trying to `write` the trie to a byte stream. -pub fn finalize(self: *Trie, allocator: Allocator) !void { - if (!self.trie_dirty) return; +fn finalize(self: *Trie, allocator: Allocator) !void { + const tracy = trace(@src()); + defer tracy.end(); - self.ordered_nodes.shrinkRetainingCapacity(0); - try self.ordered_nodes.ensureTotalCapacity(allocator, self.node_count); + var ordered_nodes = std.ArrayList(Node.Index).init(allocator); + defer ordered_nodes.deinit(); + try ordered_nodes.ensureTotalCapacityPrecise(self.nodes.items(.is_terminal).len); - var fifo = std.fifo.LinearFifo(*Node, .Dynamic).init(allocator); + var fifo = std.fifo.LinearFifo(Node.Index, .Dynamic).init(allocator); defer fifo.deinit(); try fifo.writeItem(self.root.?); - while (fifo.readItem()) |next| { - for (next.edges.items) |*edge| { - try fifo.writeItem(edge.to); + while (fifo.readItem()) |next_index| { + const edges = &self.nodes.items(.edges)[next_index]; + for (edges.items) |edge_index| { + const edge = self.edges.items[edge_index]; + try fifo.writeItem(edge.node); } - self.ordered_nodes.appendAssumeCapacity(next); + ordered_nodes.appendAssumeCapacity(next_index); } var more: bool = true; + var size: u32 = 0; while (more) { - self.size = 0; + size = 0; more = false; - for (self.ordered_nodes.items) |node| { - const res = try node.finalize(self.size); - self.size += res.node_size; + for (ordered_nodes.items) |node_index| { + const res = try self.finalizeNode(node_index, size); + size += res.node_size; if (res.updated) more = true; } } - self.trie_dirty = false; + try self.buffer.ensureTotalCapacityPrecise(allocator, size); + for (ordered_nodes.items) |node_index| { + try self.writeNode(node_index, self.buffer.writer(allocator)); + } } -const ReadError = error{ - OutOfMemory, - EndOfStream, - Overflow, +const FinalizeNodeResult = struct { + /// Current size of this node in bytes. + node_size: u32, + + /// True if the trie offset of this node in the output byte stream + /// would need updating; false otherwise. + updated: bool, }; -/// Parse the trie from a byte stream. -pub fn read(self: *Trie, allocator: Allocator, reader: anytype) ReadError!usize { - return self.root.?.read(allocator, reader); -} +/// Updates offset of this node in the output byte stream. +fn finalizeNode(self: *Trie, node_index: Node.Index, offset_in_trie: u32) !FinalizeNodeResult { + var stream = std.io.countingWriter(std.io.null_writer); + const writer = stream.writer(); + const slice = self.nodes.slice(); + + var node_size: u32 = 0; + if (slice.items(.is_terminal)[node_index]) { + const export_flags = slice.items(.export_flags)[node_index]; + const vmaddr_offset = slice.items(.vmaddr_offset)[node_index]; + try leb.writeULEB128(writer, export_flags); + try leb.writeULEB128(writer, vmaddr_offset); + try leb.writeULEB128(writer, stream.bytes_written); + } else { + node_size += 1; // 0x0 for non-terminal nodes + } + node_size += 1; // 1 byte for edge count -/// Write the trie to a byte stream. -/// Panics if the trie was not finalized using `finalize` before calling this method. -pub fn write(self: Trie, writer: anytype) !void { - assert(!self.trie_dirty); - for (self.ordered_nodes.items) |node| { - try node.write(writer); + for (slice.items(.edges)[node_index].items) |edge_index| { + const edge = &self.edges.items[edge_index]; + const next_node_offset = slice.items(.trie_offset)[edge.node]; + node_size += @intCast(edge.label.len + 1); + try leb.writeULEB128(writer, next_node_offset); } + + const trie_offset = slice.items(.trie_offset)[node_index]; + const updated = offset_in_trie != trie_offset; + slice.items(.trie_offset)[node_index] = offset_in_trie; + node_size += @intCast(stream.bytes_written); + + return .{ .node_size = node_size, .updated = updated }; } -pub fn init(self: *Trie, allocator: Allocator) !void { +fn init(self: *Trie, allocator: Allocator) !void { assert(self.root == null); - const root = try allocator.create(Node); - root.* = .{ .base = self }; - self.root = root; - self.node_count += 1; + self.root = try self.addNode(allocator); } pub fn deinit(self: *Trie, allocator: Allocator) void { - if (self.root) |root| { - root.deinit(allocator); - allocator.destroy(root); + for (self.nodes.items(.edges)) |*edges| { + edges.deinit(allocator); } - self.ordered_nodes.deinit(allocator); + self.nodes.deinit(allocator); + self.edges.deinit(allocator); + self.buffer.deinit(allocator); } -test "Trie node count" { - const gpa = testing.allocator; - var trie: Trie = .{}; - defer trie.deinit(gpa); - try trie.init(gpa); +pub fn write(self: Trie, writer: anytype) !void { + if (self.buffer.items.len == 0) return; + try writer.writeAll(self.buffer.items); +} - try testing.expectEqual(@as(usize, 1), trie.node_count); - try testing.expect(trie.root != null); +/// Writes this node to a byte stream. +/// The children of this node *are* not written to the byte stream +/// recursively. To write all nodes to a byte stream in sequence, +/// iterate over `Trie.ordered_nodes` and call this method on each node. +/// This is one of the requirements of the MachO. +/// Panics if `finalize` was not called before calling this method. +fn writeNode(self: *Trie, node_index: Node.Index, writer: anytype) !void { + const slice = self.nodes.slice(); + const edges = slice.items(.edges)[node_index]; + const is_terminal = slice.items(.is_terminal)[node_index]; + const export_flags = slice.items(.export_flags)[node_index]; + const vmaddr_offset = slice.items(.vmaddr_offset)[node_index]; + + if (is_terminal) { + // Terminal node info: encode export flags and vmaddr offset of this symbol. + var info_buf: [@sizeOf(u64) * 2]u8 = undefined; + var info_stream = std.io.fixedBufferStream(&info_buf); + // TODO Implement for special flags. + assert(export_flags & macho.EXPORT_SYMBOL_FLAGS_REEXPORT == 0 and + export_flags & macho.EXPORT_SYMBOL_FLAGS_STUB_AND_RESOLVER == 0); + try leb.writeULEB128(info_stream.writer(), export_flags); + try leb.writeULEB128(info_stream.writer(), vmaddr_offset); + + // Encode the size of the terminal node info. + var size_buf: [@sizeOf(u64)]u8 = undefined; + var size_stream = std.io.fixedBufferStream(&size_buf); + try leb.writeULEB128(size_stream.writer(), info_stream.pos); + + // Now, write them to the output stream. + try writer.writeAll(size_buf[0..size_stream.pos]); + try writer.writeAll(info_buf[0..info_stream.pos]); + } else { + // Non-terminal node is delimited by 0 byte. + try writer.writeByte(0); + } + // Write number of edges (max legal number of edges is 256). + try writer.writeByte(@as(u8, @intCast(edges.items.len))); + + for (edges.items) |edge_index| { + const edge = self.edges.items[edge_index]; + // Write edge label and offset to next node in trie. + try writer.writeAll(edge.label); + try writer.writeByte(0); + try leb.writeULEB128(writer, slice.items(.trie_offset)[edge.node]); + } +} - try trie.put(gpa, .{ - .name = "_main", - .vmaddr_offset = 0, - .export_flags = 0, - }); - try testing.expectEqual(@as(usize, 2), trie.node_count); +fn addNode(self: *Trie, allocator: Allocator) !Node.Index { + const index: Node.Index = @intCast(try self.nodes.addOne(allocator)); + self.nodes.set(index, .{}); + return index; +} - // Inserting the same node shouldn't update the trie. - try trie.put(gpa, .{ - .name = "_main", - .vmaddr_offset = 0, - .export_flags = 0, - }); - try testing.expectEqual(@as(usize, 2), trie.node_count); +fn addEdge(self: *Trie, allocator: Allocator) !Edge.Index { + const index: Edge.Index = @intCast(self.edges.items.len); + const edge = try self.edges.addOne(allocator); + edge.* = .{}; + return index; +} - try trie.put(gpa, .{ - .name = "__mh_execute_header", - .vmaddr_offset = 0x1000, - .export_flags = 0, - }); - try testing.expectEqual(@as(usize, 4), trie.node_count); +/// Export symbol that is to be placed in the trie. +pub const ExportSymbol = struct { + /// Name of the symbol. + name: []const u8, - // Inserting the same node shouldn't update the trie. - try trie.put(gpa, .{ - .name = "__mh_execute_header", - .vmaddr_offset = 0x1000, - .export_flags = 0, - }); - try testing.expectEqual(@as(usize, 4), trie.node_count); - try trie.put(gpa, .{ - .name = "_main", - .vmaddr_offset = 0, - .export_flags = 0, - }); - try testing.expectEqual(@as(usize, 4), trie.node_count); -} + /// Offset of this symbol's virtual memory address from the beginning + /// of the __TEXT segment. + vmaddr_offset: u64, -test "Trie basic" { - const gpa = testing.allocator; - var trie: Trie = .{}; - defer trie.deinit(gpa); - try trie.init(gpa); + /// Export flags of this exported symbol. + export_flags: u64, +}; - // root --- _st ---> node - try trie.put(gpa, .{ - .name = "_st", - .vmaddr_offset = 0, - .export_flags = 0, - }); - try testing.expect(trie.root.?.edges.items.len == 1); - try testing.expect(mem.eql(u8, trie.root.?.edges.items[0].label, "_st")); - - { - // root --- _st ---> node --- art ---> node - try trie.put(gpa, .{ - .name = "_start", - .vmaddr_offset = 0, - .export_flags = 0, - }); - try testing.expect(trie.root.?.edges.items.len == 1); +const Node = struct { + is_terminal: bool = false, - const nextEdge = &trie.root.?.edges.items[0]; - try testing.expect(mem.eql(u8, nextEdge.label, "_st")); - try testing.expect(nextEdge.to.edges.items.len == 1); - try testing.expect(mem.eql(u8, nextEdge.to.edges.items[0].label, "art")); - } - { - // root --- _ ---> node --- st ---> node --- art ---> node - // | - // | --- main ---> node - try trie.put(gpa, .{ - .name = "_main", - .vmaddr_offset = 0, - .export_flags = 0, - }); - try testing.expect(trie.root.?.edges.items.len == 1); + /// Export flags associated with this exported symbol. + export_flags: u64 = 0, - const nextEdge = &trie.root.?.edges.items[0]; - try testing.expect(mem.eql(u8, nextEdge.label, "_")); - try testing.expect(nextEdge.to.edges.items.len == 2); - try testing.expect(mem.eql(u8, nextEdge.to.edges.items[0].label, "st")); - try testing.expect(mem.eql(u8, nextEdge.to.edges.items[1].label, "main")); + /// VM address offset wrt to the section this symbol is defined against. + vmaddr_offset: u64 = 0, - const nextNextEdge = &nextEdge.to.edges.items[0]; - try testing.expect(mem.eql(u8, nextNextEdge.to.edges.items[0].label, "art")); - } -} + /// Offset of this node in the trie output byte stream. + trie_offset: u32 = 0, + + /// List of all edges originating from this node. + edges: std.ArrayListUnmanaged(Edge.Index) = .{}, + + const Index = u32; +}; + +/// Edge connecting nodes in the trie. +const Edge = struct { + /// Target node in the trie. + node: Node.Index = 0, + + /// Matching prefix. + label: []const u8 = "", + + const Index = u32; +}; fn expectEqualHexStrings(expected: []const u8, given: []const u8) !void { assert(expected.len > 0); @@ -502,7 +349,7 @@ fn expectEqualHexStrings(expected: []const u8, given: []const u8) !void { } test "write Trie to a byte stream" { - var gpa = testing.allocator; + const gpa = testing.allocator; var trie: Trie = .{}; defer trie.deinit(gpa); try trie.init(gpa); @@ -519,7 +366,6 @@ test "write Trie to a byte stream" { }); try trie.finalize(gpa); - try trie.finalize(gpa); // Finalizing mulitple times is a nop subsequently unless we add new nodes. const exp_buffer = [_]u8{ 0x0, 0x1, // node root @@ -531,51 +377,7 @@ test "write Trie to a byte stream" { 0x2, 0x0, 0x0, 0x0, // terminal node 0x3, 0x0, 0x80, 0x20, 0x0, // terminal node }; - - const buffer = try gpa.alloc(u8, trie.size); - defer gpa.free(buffer); - var stream = std.io.fixedBufferStream(buffer); - { - _ = try trie.write(stream.writer()); - try expectEqualHexStrings(&exp_buffer, buffer); - } - { - // Writing finalized trie again should yield the same result. - try stream.seekTo(0); - _ = try trie.write(stream.writer()); - try expectEqualHexStrings(&exp_buffer, buffer); - } -} - -test "parse Trie from byte stream" { - const gpa = testing.allocator; - - const in_buffer = [_]u8{ - 0x0, 0x1, // node root - 0x5f, 0x0, 0x5, // edge '_' - 0x0, 0x2, // non-terminal node - 0x5f, 0x6d, 0x68, 0x5f, 0x65, 0x78, 0x65, 0x63, 0x75, 0x74, // edge '_mh_execute_header' - 0x65, 0x5f, 0x68, 0x65, 0x61, 0x64, 0x65, 0x72, 0x0, 0x21, // edge '_mh_execute_header' - 0x6d, 0x61, 0x69, 0x6e, 0x0, 0x25, // edge 'main' - 0x2, 0x0, 0x0, 0x0, // terminal node - 0x3, 0x0, 0x80, 0x20, 0x0, // terminal node - }; - - var in_stream = std.io.fixedBufferStream(&in_buffer); - var trie: Trie = .{}; - defer trie.deinit(gpa); - try trie.init(gpa); - const nread = try trie.read(gpa, in_stream.reader()); - - try testing.expect(nread == in_buffer.len); - - try trie.finalize(gpa); - - const out_buffer = try gpa.alloc(u8, trie.size); - defer gpa.free(out_buffer); - var out_stream = std.io.fixedBufferStream(out_buffer); - _ = try trie.write(out_stream.writer()); - try expectEqualHexStrings(&in_buffer, out_buffer); + try expectEqualHexStrings(&exp_buffer, trie.buffer.items); } test "ordering bug" { @@ -602,11 +404,18 @@ test "ordering bug" { 0x88, 0x80, 0x02, 0x01, 0x73, 0x53, 0x74, 0x72, 0x00, 0x12, 0x03, 0x00, 0xD8, 0x0A, 0x00, }; - - const buffer = try gpa.alloc(u8, trie.size); - defer gpa.free(buffer); - var stream = std.io.fixedBufferStream(buffer); - // Writing finalized trie again should yield the same result. - _ = try trie.write(stream.writer()); - try expectEqualHexStrings(&exp_buffer, buffer); + try expectEqualHexStrings(&exp_buffer, trie.buffer.items); } + +const assert = std.debug.assert; +const leb = std.leb; +const log = std.log.scoped(.macho); +const macho = std.macho; +const mem = std.mem; +const std = @import("std"); +const testing = std.testing; +const trace = @import("../../tracy.zig").trace; + +const Allocator = mem.Allocator; +const MachO = @import("../../MachO.zig"); +const Trie = @This(); diff --git a/src/MachO/dyld_info/bind.zig b/src/MachO/dyld_info/bind.zig index 5bc872e2..c911d374 100644 --- a/src/MachO/dyld_info/bind.zig +++ b/src/MachO/dyld_info/bind.zig @@ -1,28 +1,19 @@ -const std = @import("std"); -const assert = std.debug.assert; -const leb = std.leb; -const log = std.log.scoped(.dyld_info); -const macho = std.macho; -const testing = std.testing; - -const Allocator = std.mem.Allocator; -const MachO = @import("../../MachO.zig"); -const Symbol = @import("../Symbol.zig"); - pub const Entry = struct { - target: Symbol.Index, + target: MachO.Ref, offset: u64, segment_id: u8, addend: i64, pub fn lessThan(ctx: *MachO, entry: Entry, other: Entry) bool { + _ = ctx; if (entry.segment_id == other.segment_id) { - if (entry.target == other.target) { + if (entry.target.eql(other.target)) { return entry.offset < other.offset; } - const entry_name = ctx.getSymbol(entry.target).getName(ctx); - const other_name = ctx.getSymbol(other.target).getName(ctx); - return std.mem.lessThan(u8, entry_name, other_name); + if (entry.target.file == other.target.file) { + return entry.target.index < other.target.index; + } + return entry.target.file < other.target.file; } return entry.segment_id < other.segment_id; } @@ -43,7 +34,108 @@ pub const Bind = struct { return @as(u64, @intCast(self.buffer.items.len)); } - pub fn finalize(self: *Self, gpa: Allocator, ctx: *MachO) !void { + pub fn updateSize(self: *Self, macho_file: *MachO) !void { + const tracy = trace(@src()); + defer tracy.end(); + + const gpa = macho_file.base.allocator; + const cpu_arch = macho_file.options.cpu_arch.?; + + var objects = try std.ArrayList(File.Index).initCapacity(gpa, macho_file.objects.items.len + 1); + defer objects.deinit(); + objects.appendSliceAssumeCapacity(macho_file.objects.items); + if (macho_file.getInternalObject()) |obj| objects.appendAssumeCapacity(obj.index); + + for (objects.items) |index| { + const file = macho_file.getFile(index).?; + for (file.getAtoms()) |atom_index| { + const atom = file.getAtom(atom_index) orelse continue; + if (!atom.alive.load(.seq_cst)) continue; + if (atom.getInputSection(macho_file).isZerofill()) continue; + const atom_addr = atom.getAddress(macho_file); + const relocs = atom.getRelocs(macho_file); + const seg_id = macho_file.sections.items(.segment_id)[atom.out_n_sect]; + const seg = macho_file.segments.items[seg_id]; + for (relocs) |rel| { + if (rel.type != .unsigned or rel.meta.length != 3 or rel.tag != .@"extern") continue; + const rel_offset = rel.offset - atom.off; + const addend = rel.addend + rel.getRelocAddend(cpu_arch); + const sym = rel.getTargetSymbol(atom.*, macho_file); + if (sym.isTlvInit(macho_file)) continue; + const entry = Entry{ + .target = rel.getTargetSymbolRef(atom.*, macho_file), + .offset = atom_addr + rel_offset - seg.vmaddr, + .segment_id = seg_id, + .addend = addend, + }; + if (sym.flags.import or (!(sym.flags.@"export" and sym.flags.weak) and sym.flags.interposable)) { + try self.entries.append(gpa, entry); + } + } + } + } + + if (macho_file.got_sect_index) |sid| { + const seg_id = macho_file.sections.items(.segment_id)[sid]; + const seg = macho_file.segments.items[seg_id]; + for (macho_file.got.symbols.items, 0..) |ref, idx| { + const sym = ref.getSymbol(macho_file).?; + const addr = macho_file.got.getAddress(@intCast(idx), macho_file); + const entry = Entry{ + .target = ref, + .offset = addr - seg.vmaddr, + .segment_id = seg_id, + .addend = 0, + }; + if (sym.flags.import or (sym.flags.@"export" and sym.flags.interposable and !sym.flags.weak)) { + try self.entries.append(gpa, entry); + } + } + } + + if (macho_file.la_symbol_ptr_sect_index) |sid| { + const sect = macho_file.sections.items(.header)[sid]; + const seg_id = macho_file.sections.items(.segment_id)[sid]; + const seg = macho_file.segments.items[seg_id]; + for (macho_file.stubs.symbols.items, 0..) |ref, idx| { + const sym = ref.getSymbol(macho_file).?; + const addr = sect.addr + idx * @sizeOf(u64); + const bind_entry = Entry{ + .target = ref, + .offset = addr - seg.vmaddr, + .segment_id = seg_id, + .addend = 0, + }; + if (sym.flags.import and sym.flags.weak) { + try self.entries.append(gpa, bind_entry); + } + } + } + + if (macho_file.tlv_ptr_sect_index) |sid| { + const seg_id = macho_file.sections.items(.segment_id)[sid]; + const seg = macho_file.segments.items[seg_id]; + + for (macho_file.tlv_ptr.symbols.items, 0..) |ref, idx| { + const sym = ref.getSymbol(macho_file).?; + const addr = macho_file.tlv_ptr.getAddress(@intCast(idx), macho_file); + const entry = Entry{ + .target = ref, + .offset = addr - seg.vmaddr, + .segment_id = seg_id, + .addend = 0, + }; + if (sym.flags.import or (sym.flags.@"export" and sym.flags.interposable and !sym.flags.weak)) { + try self.entries.append(gpa, entry); + } + } + } + + try self.finalize(gpa, macho_file); + macho_file.dyld_info_cmd.bind_size = mem.alignForward(u32, @intCast(self.size()), @alignOf(u64)); + } + + fn finalize(self: *Self, gpa: Allocator, ctx: *MachO) !void { if (self.entries.items.len == 0) return; const writer = self.buffer.writer(gpa); @@ -73,7 +165,7 @@ pub const Bind = struct { var addend: i64 = 0; var count: usize = 0; var skip: u64 = 0; - var target: ?Symbol.Index = null; + var target: ?MachO.Ref = null; var state: enum { start, @@ -84,7 +176,7 @@ pub const Bind = struct { var i: usize = 0; while (i < entries.len) : (i += 1) { const current = entries[i]; - if (target == null or target.? != current.target) { + if (target == null or !target.?.eql(current.target)) { switch (state) { .start => {}, .bind_single => try doBind(writer), @@ -93,7 +185,7 @@ pub const Bind = struct { state = .start; target = current.target; - const sym = ctx.getSymbol(current.target); + const sym = current.target.getSymbol(ctx).?; const name = sym.getName(ctx); const flags: u8 = if (sym.weakRef(ctx)) macho.BIND_SYMBOL_FLAGS_WEAK_IMPORT else 0; const ordinal: i16 = ord: { @@ -196,7 +288,109 @@ pub const WeakBind = struct { return @as(u64, @intCast(self.buffer.items.len)); } - pub fn finalize(self: *Self, gpa: Allocator, ctx: *MachO) !void { + pub fn updateSize(self: *Self, macho_file: *MachO) !void { + const tracy = trace(@src()); + defer tracy.end(); + + const gpa = macho_file.base.allocator; + const cpu_arch = macho_file.options.cpu_arch.?; + + var objects = try std.ArrayList(File.Index).initCapacity(gpa, macho_file.objects.items.len + 1); + defer objects.deinit(); + objects.appendSliceAssumeCapacity(macho_file.objects.items); + if (macho_file.getInternalObject()) |obj| objects.appendAssumeCapacity(obj.index); + + for (objects.items) |index| { + const file = macho_file.getFile(index).?; + for (file.getAtoms()) |atom_index| { + const atom = file.getAtom(atom_index) orelse continue; + if (!atom.alive.load(.seq_cst)) continue; + if (atom.getInputSection(macho_file).isZerofill()) continue; + const atom_addr = atom.getAddress(macho_file); + const relocs = atom.getRelocs(macho_file); + const seg_id = macho_file.sections.items(.segment_id)[atom.out_n_sect]; + const seg = macho_file.segments.items[seg_id]; + for (relocs) |rel| { + if (rel.type != .unsigned or rel.meta.length != 3 or rel.tag != .@"extern") continue; + const rel_offset = rel.offset - atom.off; + const addend = rel.addend + rel.getRelocAddend(cpu_arch); + const sym = rel.getTargetSymbol(atom.*, macho_file); + if (sym.isTlvInit(macho_file)) continue; + const entry = Entry{ + .target = rel.getTargetSymbolRef(atom.*, macho_file), + .offset = atom_addr + rel_offset - seg.vmaddr, + .segment_id = seg_id, + .addend = addend, + }; + if (!sym.isLocal() and sym.flags.weak) { + try self.entries.append(gpa, entry); + } + } + } + } + + if (macho_file.got_sect_index) |sid| { + const seg_id = macho_file.sections.items(.segment_id)[sid]; + const seg = macho_file.segments.items[seg_id]; + for (macho_file.got.symbols.items, 0..) |ref, idx| { + const sym = ref.getSymbol(macho_file).?; + const addr = macho_file.got.getAddress(@intCast(idx), macho_file); + const entry = Entry{ + .target = ref, + .offset = addr - seg.vmaddr, + .segment_id = seg_id, + .addend = 0, + }; + if (sym.flags.weak) { + try self.entries.append(gpa, entry); + } + } + } + + if (macho_file.la_symbol_ptr_sect_index) |sid| { + const sect = macho_file.sections.items(.header)[sid]; + const seg_id = macho_file.sections.items(.segment_id)[sid]; + const seg = macho_file.segments.items[seg_id]; + + for (macho_file.stubs.symbols.items, 0..) |ref, idx| { + const sym = ref.getSymbol(macho_file).?; + const addr = sect.addr + idx * @sizeOf(u64); + const bind_entry = Entry{ + .target = ref, + .offset = addr - seg.vmaddr, + .segment_id = seg_id, + .addend = 0, + }; + if (sym.flags.weak) { + try self.entries.append(gpa, bind_entry); + } + } + } + + if (macho_file.tlv_ptr_sect_index) |sid| { + const seg_id = macho_file.sections.items(.segment_id)[sid]; + const seg = macho_file.segments.items[seg_id]; + + for (macho_file.tlv_ptr.symbols.items, 0..) |ref, idx| { + const sym = ref.getSymbol(macho_file).?; + const addr = macho_file.tlv_ptr.getAddress(@intCast(idx), macho_file); + const entry = Entry{ + .target = ref, + .offset = addr - seg.vmaddr, + .segment_id = seg_id, + .addend = 0, + }; + if (sym.flags.weak) { + try self.entries.append(gpa, entry); + } + } + } + + try self.finalize(gpa, macho_file); + macho_file.dyld_info_cmd.weak_bind_size = mem.alignForward(u32, @intCast(self.size()), @alignOf(u64)); + } + + fn finalize(self: *Self, gpa: Allocator, ctx: *MachO) !void { if (self.entries.items.len == 0) return; const writer = self.buffer.writer(gpa); @@ -226,7 +420,7 @@ pub const WeakBind = struct { var addend: i64 = 0; var count: usize = 0; var skip: u64 = 0; - var target: ?Symbol.Index = null; + var target: ?MachO.Ref = null; var state: enum { start, @@ -237,7 +431,7 @@ pub const WeakBind = struct { var i: usize = 0; while (i < entries.len) : (i += 1) { const current = entries[i]; - if (target == null or target.? != current.target) { + if (target == null or !target.?.eql(current.target)) { switch (state) { .start => {}, .bind_single => try doBind(writer), @@ -246,7 +440,7 @@ pub const WeakBind = struct { state = .start; target = current.target; - const sym = ctx.getSymbol(current.target); + const sym = current.target.getSymbol(ctx).?; const name = sym.getName(ctx); const flags: u8 = 0; // TODO NON_WEAK_DEFINITION @@ -340,7 +534,36 @@ pub const LazyBind = struct { return @as(u64, @intCast(self.buffer.items.len)); } - pub fn finalize(self: *Self, gpa: Allocator, ctx: *MachO) !void { + pub fn updateSize(self: *Self, macho_file: *MachO) !void { + const tracy = trace(@src()); + defer tracy.end(); + + const gpa = macho_file.base.allocator; + + const sid = macho_file.la_symbol_ptr_sect_index.?; + const sect = macho_file.sections.items(.header)[sid]; + const seg_id = macho_file.sections.items(.segment_id)[sid]; + const seg = macho_file.segments.items[seg_id]; + + for (macho_file.stubs.symbols.items, 0..) |ref, idx| { + const sym = ref.getSymbol(macho_file).?; + const addr = sect.addr + idx * @sizeOf(u64); + const bind_entry = Entry{ + .target = ref, + .offset = addr - seg.vmaddr, + .segment_id = seg_id, + .addend = 0, + }; + if ((sym.flags.import and !sym.flags.weak) or (sym.flags.interposable and !sym.flags.weak)) { + try self.entries.append(gpa, bind_entry); + } + } + + try self.finalize(gpa, macho_file); + macho_file.dyld_info_cmd.lazy_bind_size = mem.alignForward(u32, @intCast(self.size()), @alignOf(u64)); + } + + fn finalize(self: *Self, gpa: Allocator, ctx: *MachO) !void { if (self.entries.items.len == 0) return; try self.offsets.ensureTotalCapacityPrecise(gpa, self.entries.items.len); @@ -353,7 +576,7 @@ pub const LazyBind = struct { for (self.entries.items) |entry| { self.offsets.appendAssumeCapacity(@as(u32, @intCast(cwriter.bytes_written))); - const sym = ctx.getSymbol(entry.target); + const sym = entry.target.getSymbol(ctx).?; const name = sym.getName(ctx); const flags: u8 = if (sym.weakRef(ctx)) macho.BIND_SYMBOL_FLAGS_WEAK_IMPORT else 0; const ordinal: i16 = ord: { @@ -472,3 +695,17 @@ fn done(writer: anytype) !void { log.debug(">>> done", .{}); try writer.writeByte(macho.BIND_OPCODE_DONE); } + +const assert = std.debug.assert; +const leb = std.leb; +const log = std.log.scoped(.dyld_info); +const macho = std.macho; +const mem = std.mem; +const testing = std.testing; +const std = @import("std"); +const trace = @import("../../tracy.zig").trace; + +const Allocator = mem.Allocator; +const File = @import("../file.zig").File; +const MachO = @import("../../MachO.zig"); +const Symbol = @import("../Symbol.zig"); diff --git a/src/MachO/eh_frame.zig b/src/MachO/eh_frame.zig index e87d80a0..69e53095 100644 --- a/src/MachO/eh_frame.zig +++ b/src/MachO/eh_frame.zig @@ -68,7 +68,8 @@ pub const Cie = struct { pub fn getPersonality(cie: Cie, macho_file: *MachO) ?*Symbol { const personality = cie.personality orelse return null; - return macho_file.getSymbol(personality.index); + const object = cie.getObject(macho_file); + return object.getSymbolRef(personality.index, macho_file).getSymbol(macho_file); } pub fn eql(cie: Cie, other: Cie, macho_file: *MachO) bool { @@ -224,11 +225,11 @@ pub const Fde = struct { } pub fn getAtom(fde: Fde, macho_file: *MachO) *Atom { - return macho_file.getAtom(fde.atom).?; + return fde.getObject(macho_file).getAtom(fde.atom).?; } pub fn getLsdaAtom(fde: Fde, macho_file: *MachO) ?*Atom { - return macho_file.getAtom(fde.lsda); + return fde.getObject(macho_file).getAtom(fde.lsda); } pub fn format( @@ -449,7 +450,7 @@ pub fn write(macho_file: *MachO, buffer: []u8) void { } } -pub fn writeRelocs(macho_file: *MachO, code: []u8, relocs: *std.ArrayList(macho.relocation_info)) error{Overflow}!void { +pub fn writeRelocs(macho_file: *MachO, code: []u8, relocs: []macho.relocation_info) error{Overflow}!void { const tracy = trace(@src()); defer tracy.end(); @@ -460,6 +461,7 @@ pub fn writeRelocs(macho_file: *MachO, code: []u8, relocs: *std.ArrayList(macho. else => 0, }; + var i: usize = 0; for (macho_file.objects.items) |index| { const object = macho_file.getFile(index).?.object; for (object.cies.items) |cie| { @@ -470,7 +472,7 @@ pub fn writeRelocs(macho_file: *MachO, code: []u8, relocs: *std.ArrayList(macho. if (cie.getPersonality(macho_file)) |sym| { const r_address = math.cast(i32, cie.out_offset + cie.personality.?.offset) orelse return error.Overflow; const r_symbolnum = math.cast(u24, sym.getOutputSymtabIndex(macho_file).?) orelse return error.Overflow; - relocs.appendAssumeCapacity(.{ + relocs[i] = .{ .r_address = r_address, .r_symbolnum = r_symbolnum, .r_length = 2, @@ -481,7 +483,8 @@ pub fn writeRelocs(macho_file: *MachO, code: []u8, relocs: *std.ArrayList(macho. .x86_64 => @intFromEnum(macho.reloc_type_x86_64.X86_64_RELOC_GOT), else => unreachable, }, - }); + }; + i += 1; } } } @@ -532,6 +535,8 @@ pub fn writeRelocs(macho_file: *MachO, code: []u8, relocs: *std.ArrayList(macho. } } } + + assert(relocs.len == i); } pub const EH_PE = struct { diff --git a/src/MachO/fat.zig b/src/MachO/fat.zig index 46cf0139..7772f7a4 100644 --- a/src/MachO/fat.zig +++ b/src/MachO/fat.zig @@ -8,11 +8,17 @@ const native_endian = builtin.target.cpu.arch.endian(); const MachO = @import("../MachO.zig"); -pub fn isFatLibrary(file: std.fs.File) bool { - const reader = file.reader(); - const hdr = reader.readStructEndian(macho.fat_header, .big) catch return false; - defer file.seekTo(0) catch {}; - return hdr.magic == macho.FAT_MAGIC; +pub fn readFatHeader(file: std.fs.File) !macho.fat_header { + return readFatHeaderGeneric(macho.fat_header, file, 0); +} + +fn readFatHeaderGeneric(comptime Hdr: type, file: std.fs.File, offset: usize) !Hdr { + var buffer: [@sizeOf(Hdr)]u8 = undefined; + const nread = try file.preadAll(&buffer, offset); + if (nread != buffer.len) return error.InputOutput; + var hdr = @as(*align(1) const Hdr, @ptrCast(&buffer)).*; + mem.byteSwapAllFields(Hdr, &hdr); + return hdr; } pub const Arch = struct { @@ -21,15 +27,12 @@ pub const Arch = struct { size: u32, }; -pub fn parseArchs(file: std.fs.File, buffer: *[2]Arch) ![]const Arch { - const reader = file.reader(); - const fat_header = try reader.readStructEndian(macho.fat_header, .big); - assert(fat_header.magic == macho.FAT_MAGIC); - +pub fn parseArchs(file: std.fs.File, fat_header: macho.fat_header, out: *[2]Arch) ![]const Arch { var count: usize = 0; var fat_arch_index: u32 = 0; - while (fat_arch_index < fat_header.nfat_arch) : (fat_arch_index += 1) { - const fat_arch = try reader.readStructEndian(macho.fat_arch, .big); + while (fat_arch_index < fat_header.nfat_arch and count < out.len) : (fat_arch_index += 1) { + const offset = @sizeOf(macho.fat_header) + @sizeOf(macho.fat_arch) * fat_arch_index; + const fat_arch = try readFatHeaderGeneric(macho.fat_arch, file, offset); // If we come across an architecture that we do not know how to handle, that's // fine because we can keep looking for one that might match. const arch: std.Target.Cpu.Arch = switch (fat_arch.cputype) { @@ -37,9 +40,9 @@ pub fn parseArchs(file: std.fs.File, buffer: *[2]Arch) ![]const Arch { macho.CPU_TYPE_X86_64 => if (fat_arch.cpusubtype == macho.CPU_SUBTYPE_X86_64_ALL) .x86_64 else continue, else => continue, }; - buffer[count] = .{ .tag = arch, .offset = fat_arch.offset, .size = fat_arch.size }; + out[count] = .{ .tag = arch, .offset = fat_arch.offset, .size = fat_arch.size }; count += 1; } - return buffer[0..count]; + return out[0..count]; } diff --git a/src/MachO/file.zig b/src/MachO/file.zig index f35db136..d21342e0 100644 --- a/src/MachO/file.zig +++ b/src/MachO/file.zig @@ -28,17 +28,17 @@ pub const File = union(enum) { } } - pub fn resolveSymbols(file: File, macho_file: *MachO) void { + pub fn resolveSymbols(file: File, macho_file: *MachO) !void { switch (file) { - .internal => unreachable, - inline else => |x| x.resolveSymbols(macho_file), + inline else => |x| try x.resolveSymbols(macho_file), } } - pub fn resetGlobals(file: File, macho_file: *MachO) void { + pub fn scanRelocs(file: File, macho_file: *MachO) !void { switch (file) { - .internal => unreachable, - inline else => |x| x.resetGlobals(macho_file), + .dylib => unreachable, + .object => |x| try x.scanRelocs(macho_file), + .internal => |x| x.scanRelocs(macho_file), } } @@ -69,20 +69,126 @@ pub const File = union(enum) { return base + (file.getIndex() << 24); } - pub fn getSymbols(file: File) []const Symbol.Index { + pub fn getAtom(file: File, atom_index: Atom.Index) ?*Atom { return switch (file) { - inline else => |x| x.symbols.items, + .dylib => unreachable, + inline else => |x| x.getAtom(atom_index), }; } pub fn getAtoms(file: File) []const Atom.Index { return switch (file) { .dylib => unreachable, - inline else => |x| x.atoms.items, + inline else => |x| x.getAtoms(), + }; + } + + pub fn addAtomExtra(file: File, allocator: Allocator, extra: Atom.Extra) !u32 { + return switch (file) { + .dylib => unreachable, + inline else => |x| x.addAtomExtra(allocator, extra), + }; + } + + pub fn getAtomExtra(file: File, index: u32) Atom.Extra { + return switch (file) { + .dylib => unreachable, + inline else => |x| x.getAtomExtra(index), + }; + } + + pub fn setAtomExtra(file: File, index: u32, extra: Atom.Extra) void { + return switch (file) { + .dylib => unreachable, + inline else => |x| x.setAtomExtra(index, extra), + }; + } + + pub fn getSymbols(file: File) []Symbol { + return switch (file) { + inline else => |x| x.symbols.items, + }; + } + + pub fn getSymbolRef(file: File, sym_index: Symbol.Index, macho_file: *MachO) MachO.Ref { + return switch (file) { + inline else => |x| x.getSymbolRef(sym_index, macho_file), + }; + } + + pub fn markImportsAndExports(file: File, macho_file: *MachO) void { + const nsyms = switch (file) { + .dylib => unreachable, + inline else => |x| x.symbols.items.len, + }; + for (0..nsyms) |i| { + const ref = file.getSymbolRef(@intCast(i), macho_file); + if (ref.getFile(macho_file) == null) continue; + const sym = ref.getSymbol(macho_file).?; + if (sym.visibility != .global) continue; + if (sym.getFile(macho_file).? == .dylib and !sym.flags.abs) { + sym.flags.import = true; + continue; + } + if (file.getIndex() == ref.file) { + sym.flags.@"export" = true; + } + } + } + + pub fn createSymbolIndirection(file: File, macho_file: *MachO) !void { + const nsyms = switch (file) { + inline else => |x| x.symbols.items.len, + }; + for (0..nsyms) |i| { + const ref = file.getSymbolRef(@intCast(i), macho_file); + if (ref.getFile(macho_file) == null) continue; + if (ref.file != file.getIndex()) continue; + const sym = ref.getSymbol(macho_file).?; + if (sym.getSectionFlags().got) { + log.debug("'{s}' needs GOT", .{sym.getName(macho_file)}); + try macho_file.got.addSymbol(ref, macho_file); + } + if (sym.getSectionFlags().stubs) { + log.debug("'{s}' needs STUBS", .{sym.getName(macho_file)}); + try macho_file.stubs.addSymbol(ref, macho_file); + } + if (sym.getSectionFlags().tlv_ptr) { + log.debug("'{s}' needs TLV pointer", .{sym.getName(macho_file)}); + try macho_file.tlv_ptr.addSymbol(ref, macho_file); + } + if (sym.getSectionFlags().objc_stubs) { + log.debug("'{s}' needs OBJC STUBS", .{sym.getName(macho_file)}); + try macho_file.objc_stubs.addSymbol(ref, macho_file); + } + } + } + + pub fn initOutputSections(file: File, macho_file: *MachO) !void { + const tracy = trace(@src()); + defer tracy.end(); + for (file.getAtoms()) |atom_index| { + const atom = file.getAtom(atom_index) orelse continue; + if (!atom.alive.load(.seq_cst)) continue; + atom.out_n_sect = try Atom.initOutputSection(atom.getInputSection(macho_file), macho_file); + } + } + + pub fn dedupLiterals(file: File, lp: MachO.LiteralPool, macho_file: *MachO) void { + return switch (file) { + .dylib => unreachable, + inline else => |x| x.dedupLiterals(lp, macho_file), + }; + } + + pub fn writeAtoms(file: File, macho_file: *MachO) !void { + return switch (file) { + .dylib => unreachable, + inline else => |x| x.writeAtoms(macho_file), }; } - pub fn calcSymtabSize(file: File, macho_file: *MachO) !void { + pub fn calcSymtabSize(file: File, macho_file: *MachO) void { return switch (file) { inline else => |x| x.calcSymtabSize(macho_file), }; @@ -107,8 +213,12 @@ pub const File = union(enum) { pub const HandleIndex = Index; }; +const assert = std.debug.assert; +const bind = @import("dyld_info/bind.zig"); +const log = std.log.scoped(.link); const macho = std.macho; const std = @import("std"); +const trace = @import("../tracy.zig").trace; const Allocator = std.mem.Allocator; const Atom = @import("Atom.zig"); diff --git a/src/MachO/hasher.zig b/src/MachO/hasher.zig index 95faaf3a..c7b2b02c 100644 --- a/src/MachO/hasher.zig +++ b/src/MachO/hasher.zig @@ -54,6 +54,9 @@ pub fn ParallelHasher(comptime Hasher: type) type { err: *fs.File.PReadError!usize, wg: *WaitGroup, ) void { + const tracy = trace(@src()); + defer tracy.end(); + defer wg.finish(); err.* = file.preadAll(buffer, fstart); Hasher.hash(buffer, out, .{}); diff --git a/src/MachO/relocatable.zig b/src/MachO/relocatable.zig index f291aa76..d4cc5546 100644 --- a/src/MachO/relocatable.zig +++ b/src/MachO/relocatable.zig @@ -21,7 +21,7 @@ pub fn flush(macho_file: *MachO) !void { seg.cmdsize += seg.nsects * @sizeOf(macho.section_64); } - var off = try allocateSections(macho_file); + try allocateSections(macho_file); { // Allocate the single segment. @@ -45,17 +45,9 @@ pub fn flush(macho_file: *MachO) !void { state_log.debug("{}", .{macho_file.dumpState()}); - try macho_file.calcSymtabSize(); - try writeAtoms(macho_file); - try writeCompactUnwind(macho_file); - try writeEhFrame(macho_file); - - off = mem.alignForward(u32, off, @alignOf(u64)); - off = try macho_file.writeDataInCode(0, off); - off = mem.alignForward(u32, off, @alignOf(u64)); - off = try macho_file.writeSymtab(off); - off = mem.alignForward(u32, off, @alignOf(u64)); - off = try macho_file.writeStrtab(off); + try writeSections(macho_file); + sortRelocs(macho_file); + try writeSectionsToFile(macho_file); const ncmds, const sizeofcmds = try writeLoadCommands(macho_file); try writeHeader(macho_file, ncmds, sizeofcmds); @@ -63,13 +55,13 @@ pub fn flush(macho_file: *MachO) !void { fn markExports(macho_file: *MachO) void { for (macho_file.objects.items) |index| { - for (macho_file.getFile(index).?.getSymbols()) |sym_index| { - const sym = macho_file.getSymbol(sym_index); - const file = sym.getFile(macho_file) orelse continue; + const object = macho_file.getFile(index).?.object; + for (object.symbols.items, 0..) |*sym, i| { + const ref = object.getSymbolRef(@intCast(i), macho_file); + const file = ref.getFile(macho_file) orelse continue; + if (file.getIndex() != index) continue; if (sym.visibility != .global) continue; - if (file.getIndex() == index) { - sym.flags.@"export" = true; - } + sym.flags.@"export" = true; } } } @@ -77,33 +69,16 @@ fn markExports(macho_file: *MachO) void { fn claimUnresolved(macho_file: *MachO) void { for (macho_file.objects.items) |index| { const object = macho_file.getFile(index).?.object; - - for (object.symbols.items, 0..) |sym_index, i| { - const nlist_idx = @as(Symbol.Index, @intCast(i)); - const nlist = object.symtab.items(.nlist)[nlist_idx]; - if (!nlist.ext()) continue; - if (!nlist.undf()) continue; - - const sym = macho_file.getSymbol(sym_index); - if (sym.getFile(macho_file) != null) continue; - - sym.value = 0; - sym.atom = 0; - sym.nlist_idx = nlist_idx; - sym.file = index; - sym.flags.weak_ref = nlist.weakRef(); - sym.flags.import = true; - sym.visibility = .global; - } + object.claimUnresolvedRelocatable(macho_file); } } fn initOutputSections(macho_file: *MachO) !void { for (macho_file.objects.items) |index| { - const object = macho_file.getFile(index).?.object; - for (object.atoms.items) |atom_index| { - const atom = macho_file.getAtom(atom_index) orelse continue; - if (!atom.flags.alive) continue; + const file = macho_file.getFile(index).?; + for (file.getAtoms()) |atom_index| { + const atom = file.getAtom(atom_index) orelse continue; + if (!atom.alive.load(.seq_cst)) continue; atom.out_n_sect = try Atom.initOutputSection(atom.getInputSection(macho_file), macho_file); } } @@ -127,60 +102,173 @@ fn initOutputSections(macho_file: *MachO) !void { } fn calcSectionSizes(macho_file: *MachO) !void { - const slice = macho_file.sections.slice(); - for (slice.items(.header), slice.items(.atoms)) |*header, atoms| { - if (atoms.items.len == 0) continue; - for (atoms.items) |atom_index| { - const atom = macho_file.getAtom(atom_index).?; - const atom_alignment = try math.powi(u32, 2, atom.alignment); - const offset = mem.alignForward(u64, header.size, atom_alignment); - const padding = offset - header.size; - atom.value = offset; - header.size += padding + atom.size; - header.@"align" = @max(header.@"align", atom.alignment); - header.nreloc += atom.calcNumRelocs(macho_file); + const tracy = trace(@src()); + defer tracy.end(); + + var wg: WaitGroup = .{}; + + { + wg.reset(); + defer wg.wait(); + + const slice = macho_file.sections.slice(); + for (slice.items(.atoms), 0..) |atoms, i| { + if (atoms.items.len == 0) continue; + macho_file.base.thread_pool.spawnWg(&wg, calcSectionSizeWorker, .{ macho_file, @as(u8, @intCast(i)) }); } - } - if (macho_file.unwind_info_sect_index) |index| { - calcCompactUnwindSize(macho_file, index); - } + if (macho_file.eh_frame_sect_index) |_| { + macho_file.base.thread_pool.spawnWg(&wg, calcEhFrameSizeWorker, .{macho_file}); + } + + for (macho_file.objects.items) |index| { + if (macho_file.unwind_info_sect_index) |_| { + macho_file.base.thread_pool.spawnWg(&wg, Object.calcCompactUnwindSizeRelocatable, .{ + macho_file.getFile(index).?.object, + macho_file, + }); + } - if (macho_file.eh_frame_sect_index) |index| { - const sect = &macho_file.sections.items(.header)[index]; - sect.size = try eh_frame.calcSize(macho_file); - sect.@"align" = 3; - sect.nreloc = eh_frame.calcNumRelocs(macho_file); + macho_file.base.thread_pool.spawnWg(&wg, File.calcSymtabSize, .{ macho_file.getFile(index).?, macho_file }); + } + + macho_file.base.thread_pool.spawnWg(&wg, MachO.updateLinkeditSizeWorker, .{ macho_file, .data_in_code }); } + + calcCompactUnwindSize(macho_file); + calcSymtabSize(macho_file); + + if (macho_file.has_errors.swap(false, .seq_cst)) return error.FlushFailed; } -fn calcCompactUnwindSize(macho_file: *MachO, sect_index: u8) void { - var size: u32 = 0; - var nreloc: u32 = 0; +fn calcSectionSizeWorker(macho_file: *MachO, sect_id: u8) void { + const tracy = trace(@src()); + defer tracy.end(); - for (macho_file.objects.items) |index| { - const object = macho_file.getFile(index).?.object; - for (object.unwind_records.items) |irec| { - const rec = macho_file.getUnwindRecord(irec); - if (!rec.alive) continue; - size += @sizeOf(macho.compact_unwind_entry); - nreloc += 1; - if (rec.getPersonality(macho_file)) |_| { - nreloc += 1; - } - if (rec.getLsdaAtom(macho_file)) |_| { - nreloc += 1; + const doWork = struct { + fn doWork(mfile: *MachO, header: *macho.section_64, atoms: []const MachO.Ref) !void { + for (atoms) |ref| { + const atom = ref.getAtom(mfile).?; + const p2align = atom.alignment; + const atom_alignment = try math.powi(u32, 2, p2align); + const offset = mem.alignForward(u64, header.size, atom_alignment); + const padding = offset - header.size; + atom.value = offset; + header.size += padding + atom.size; + header.@"align" = @max(header.@"align", p2align); + const nreloc = atom.calcNumRelocs(mfile); + atom.addExtra(.{ .rel_out_index = header.nreloc, .rel_out_count = nreloc }, mfile); + header.nreloc += nreloc; } } + }.doWork; + + const slice = macho_file.sections.slice(); + const header = &slice.items(.header)[sect_id]; + const atoms = slice.items(.atoms)[sect_id].items; + doWork(macho_file, header, atoms) catch |err| { + macho_file.base.fatal("failed to calculate size of section '{s},{s}': {s}", .{ + header.segName(), + header.sectName(), + @errorName(err), + }); + _ = macho_file.has_errors.swap(true, .seq_cst); + }; +} + +fn calcCompactUnwindSize(macho_file: *MachO) void { + const tracy = trace(@src()); + defer tracy.end(); + + var nrec: u32 = 0; + var nreloc: u32 = 0; + + for (macho_file.objects.items) |index| { + const ctx = &macho_file.getFile(index).?.object.compact_unwind_ctx; + ctx.rec_index = nrec; + ctx.reloc_index = nreloc; + nrec += ctx.rec_count; + nreloc += ctx.reloc_count; } - const sect = &macho_file.sections.items(.header)[sect_index]; - sect.size = size; + const sect = &macho_file.sections.items(.header)[macho_file.unwind_info_sect_index.?]; + sect.size = nrec * @sizeOf(macho.compact_unwind_entry); sect.nreloc = nreloc; sect.@"align" = 3; } -fn allocateSections(macho_file: *MachO) !u32 { +fn calcEhFrameSizeWorker(macho_file: *MachO) void { + const tracy = trace(@src()); + defer tracy.end(); + + const doWork = struct { + fn doWork(mfile: *MachO, header: *macho.section_64) !void { + header.size = try eh_frame.calcSize(mfile); + header.@"align" = 3; + header.nreloc = eh_frame.calcNumRelocs(mfile); + } + }.doWork; + + const header = &macho_file.sections.items(.header)[macho_file.eh_frame_sect_index.?]; + doWork(macho_file, header) catch |err| { + macho_file.base.fatal("failed to calculate size of section '__TEXT,__eh_frame': {s}", .{ + @errorName(err), + }); + _ = macho_file.has_errors.swap(true, .seq_cst); + }; +} + +fn calcSymtabSize(macho_file: *MachO) void { + const tracy = trace(@src()); + defer tracy.end(); + + var nlocals: u32 = 0; + var nstabs: u32 = 0; + var nexports: u32 = 0; + var nimports: u32 = 0; + var strsize: u32 = 1; + + for (macho_file.objects.items) |index| { + const object = macho_file.getFile(index).?.object; + const ctx = &object.output_symtab_ctx; + ctx.ilocal = nlocals; + ctx.istab = nstabs; + ctx.iexport = nexports; + ctx.iimport = nimports; + ctx.stroff = strsize; + nlocals += ctx.nlocals; + nstabs += ctx.nstabs; + nexports += ctx.nexports; + nimports += ctx.nimports; + strsize += ctx.strsize; + } + + for (macho_file.objects.items) |index| { + const object = macho_file.getFile(index).?.object; + const ctx = &object.output_symtab_ctx; + ctx.istab += nlocals; + ctx.iexport += nlocals + nstabs; + ctx.iimport += nlocals + nstabs + nexports; + } + + { + const cmd = &macho_file.symtab_cmd; + cmd.nsyms = nlocals + nstabs + nexports + nimports; + cmd.strsize = strsize; + } + + { + const cmd = &macho_file.dysymtab_cmd; + cmd.ilocalsym = 0; + cmd.nlocalsym = nlocals + nstabs; + cmd.iextdefsym = nlocals + nstabs; + cmd.nextdefsym = nexports; + cmd.iundefsym = nlocals + nstabs + nexports; + cmd.nundefsym = nimports; + } +} + +fn allocateSections(macho_file: *MachO) !void { var fileoff = load_commands.calcLoadCommandsSizeObject(macho_file) + @sizeOf(macho.mach_header_64); var vmaddr: u64 = 0; const slice = macho_file.sections.slice(); @@ -204,7 +292,25 @@ fn allocateSections(macho_file: *MachO) !u32 { fileoff = header.reloff + header.nreloc * @sizeOf(macho.relocation_info); } - return fileoff; + // In -r mode, there is no LINKEDIT segment and so we allocate required LINKEDIT commands + // as if they were detached or part of the single segment. + + // DATA_IN_CODE + { + const cmd = &macho_file.data_in_code_cmd; + cmd.dataoff = fileoff; + fileoff += cmd.datasize; + fileoff = mem.alignForward(u32, fileoff, @alignOf(u64)); + } + + // SYMTAB + { + const cmd = &macho_file.symtab_cmd; + cmd.symoff = fileoff; + fileoff += cmd.nsyms * @sizeOf(macho.nlist_64); + fileoff = mem.alignForward(u32, fileoff, @alignOf(u32)); + cmd.stroff = fileoff; + } } // We need to sort relocations in descending order to be compatible with Apple's linker. @@ -213,154 +319,117 @@ fn sortReloc(ctx: void, lhs: macho.relocation_info, rhs: macho.relocation_info) return lhs.r_address > rhs.r_address; } -fn writeAtoms(macho_file: *MachO) !void { +fn writeSections(macho_file: *MachO) !void { const tracy = trace(@src()); defer tracy.end(); const gpa = macho_file.base.allocator; - const cpu_arch = macho_file.options.cpu_arch.?; const slice = macho_file.sections.slice(); - - var relocs = std.ArrayList(macho.relocation_info).init(gpa); - defer relocs.deinit(); - - for (slice.items(.header), slice.items(.atoms)) |header, atoms| { - if (atoms.items.len == 0) continue; + for (slice.items(.header), slice.items(.out), slice.items(.relocs)) |header, *out, *relocs| { if (header.isZerofill()) continue; - - const code = try gpa.alloc(u8, header.size); - defer gpa.free(code); + const cpu_arch = macho_file.options.cpu_arch.?; + try out.resize(macho_file.base.allocator, header.size); const padding_byte: u8 = if (header.isCode() and cpu_arch == .x86_64) 0xcc else 0; - @memset(code, padding_byte); - - try relocs.ensureTotalCapacity(header.nreloc); + @memset(out.items, padding_byte); + try relocs.resize(macho_file.base.allocator, header.nreloc); + } - for (atoms.items) |atom_index| { - const atom = macho_file.getAtom(atom_index).?; - assert(atom.flags.alive); - const off = atom.value; - try atom.getCode(macho_file, code[off..][0..atom.size]); - try atom.writeRelocs(macho_file, code[off..][0..atom.size], &relocs); - } + const cmd = macho_file.symtab_cmd; + try macho_file.symtab.resize(gpa, cmd.nsyms); + try macho_file.strtab.resize(gpa, cmd.strsize); + macho_file.strtab.items[0] = 0; - assert(relocs.items.len == header.nreloc); + var wg: WaitGroup = .{}; + { + wg.reset(); + defer wg.wait(); - mem.sort(macho.relocation_info, relocs.items, {}, sortReloc); + for (macho_file.objects.items) |index| { + macho_file.base.thread_pool.spawnWg(&wg, writeAtomsWorker, .{ macho_file, macho_file.getFile(index).?.object }); + macho_file.base.thread_pool.spawnWg(&wg, Object.writeSymtab, .{ macho_file.getFile(index).?.object.*, macho_file }); + } - // TODO scattered writes? - try macho_file.base.file.pwriteAll(code, header.offset); - try macho_file.base.file.pwriteAll(mem.sliceAsBytes(relocs.items), header.reloff); + if (macho_file.eh_frame_sect_index) |_| { + macho_file.base.thread_pool.spawnWg(&wg, writeEhFrameWorker, .{macho_file}); + } - relocs.clearRetainingCapacity(); + if (macho_file.unwind_info_sect_index) |_| { + for (macho_file.objects.items) |index| { + macho_file.base.thread_pool.spawnWg(&wg, writeCompactUnwindWorker, .{ + macho_file, + macho_file.getFile(index).?.object, + }); + } + } } + + if (macho_file.has_errors.swap(false, .seq_cst)) return error.FlushFailed; } -fn writeCompactUnwind(macho_file: *MachO) !void { - const sect_index = macho_file.unwind_info_sect_index orelse return; - const gpa = macho_file.base.allocator; - const header = macho_file.sections.items(.header)[sect_index]; - - const nrecs = @divExact(header.size, @sizeOf(macho.compact_unwind_entry)); - var entries = try std.ArrayList(macho.compact_unwind_entry).initCapacity(gpa, nrecs); - defer entries.deinit(); - - var relocs = try std.ArrayList(macho.relocation_info).initCapacity(gpa, header.nreloc); - defer relocs.deinit(); - - const addReloc = struct { - fn addReloc(offset: i32, cpu_arch: std.Target.Cpu.Arch) macho.relocation_info { - return .{ - .r_address = offset, - .r_symbolnum = 0, - .r_pcrel = 0, - .r_length = 3, - .r_extern = 0, - .r_type = switch (cpu_arch) { - .aarch64 => @intFromEnum(macho.reloc_type_arm64.ARM64_RELOC_UNSIGNED), - .x86_64 => @intFromEnum(macho.reloc_type_x86_64.X86_64_RELOC_UNSIGNED), - else => unreachable, - }, - }; - } - }.addReloc; +fn writeAtomsWorker(macho_file: *MachO, object: *Object) void { + const tracy = trace(@src()); + defer tracy.end(); + object.writeAtomsRelocatable(macho_file) catch |err| { + macho_file.base.fatal("{}: failed to write atoms: {s}", .{ object.fmtPath(), @errorName(err) }); + _ = macho_file.has_errors.swap(true, .seq_cst); + }; +} - var offset: i32 = 0; - for (macho_file.objects.items) |index| { - const object = macho_file.getFile(index).?.object; - for (object.unwind_records.items) |irec| { - const rec = macho_file.getUnwindRecord(irec); - if (!rec.alive) continue; - - var out: macho.compact_unwind_entry = .{ - .rangeStart = 0, - .rangeLength = rec.length, - .compactUnwindEncoding = rec.enc.enc, - .personalityFunction = 0, - .lsda = 0, - }; - - { - // Function address - const atom = rec.getAtom(macho_file); - const addr = rec.getAtomAddress(macho_file); - out.rangeStart = addr; - var reloc = addReloc(offset, macho_file.options.cpu_arch.?); - reloc.r_symbolnum = atom.out_n_sect + 1; - relocs.appendAssumeCapacity(reloc); - } +fn sortRelocs(macho_file: *MachO) void { + const tracy = trace(@src()); + defer tracy.end(); - // Personality function - if (rec.getPersonality(macho_file)) |sym| { - const r_symbolnum = math.cast(u24, sym.getOutputSymtabIndex(macho_file).?) orelse return error.Overflow; - var reloc = addReloc(offset + 16, macho_file.options.cpu_arch.?); - reloc.r_symbolnum = r_symbolnum; - reloc.r_extern = 1; - relocs.appendAssumeCapacity(reloc); - } + const worker = struct { + fn worker(relocs: []macho.relocation_info) void { + const tr = trace(@src()); + defer tr.end(); + mem.sort(macho.relocation_info, relocs, {}, sortReloc); + } + }.worker; - // LSDA address - if (rec.getLsdaAtom(macho_file)) |atom| { - const addr = rec.getLsdaAddress(macho_file); - out.lsda = addr; - var reloc = addReloc(offset + 24, macho_file.options.cpu_arch.?); - reloc.r_symbolnum = atom.out_n_sect + 1; - relocs.appendAssumeCapacity(reloc); - } + var wg: WaitGroup = .{}; + { + wg.reset(); + defer wg.wait(); - entries.appendAssumeCapacity(out); - offset += @sizeOf(macho.compact_unwind_entry); + for (macho_file.sections.items(.relocs)) |*relocs| { + macho_file.base.thread_pool.spawnWg(&wg, worker, .{relocs.items}); } } - - assert(entries.items.len == nrecs); - assert(relocs.items.len == header.nreloc); - - mem.sort(macho.relocation_info, relocs.items, {}, sortReloc); - - // TODO scattered writes? - try macho_file.base.file.pwriteAll(mem.sliceAsBytes(entries.items), header.offset); - try macho_file.base.file.pwriteAll(mem.sliceAsBytes(relocs.items), header.reloff); } -fn writeEhFrame(macho_file: *MachO) !void { - const sect_index = macho_file.eh_frame_sect_index orelse return; - const gpa = macho_file.base.allocator; - const header = macho_file.sections.items(.header)[sect_index]; - - const code = try gpa.alloc(u8, header.size); - defer gpa.free(code); +fn writeSectionsToFile(macho_file: *MachO) !void { + const tracy = trace(@src()); + defer tracy.end(); - var relocs = try std.ArrayList(macho.relocation_info).initCapacity(gpa, header.nreloc); - defer relocs.deinit(); + const slice = macho_file.sections.slice(); + for (slice.items(.header), slice.items(.out), slice.items(.relocs)) |header, out, relocs| { + try macho_file.base.file.pwriteAll(out.items, header.offset); + try macho_file.base.file.pwriteAll(mem.sliceAsBytes(relocs.items), header.reloff); + } - try eh_frame.writeRelocs(macho_file, code, &relocs); - assert(relocs.items.len == header.nreloc); + try macho_file.writeDataInCode(); + try macho_file.base.file.pwriteAll(mem.sliceAsBytes(macho_file.symtab.items), macho_file.symtab_cmd.symoff); + try macho_file.base.file.pwriteAll(macho_file.strtab.items, macho_file.symtab_cmd.stroff); +} - mem.sort(macho.relocation_info, relocs.items, {}, sortReloc); +fn writeCompactUnwindWorker(macho_file: *MachO, object: *Object) void { + const tracy = trace(@src()); + defer tracy.end(); + object.writeCompactUnwindRelocatable(macho_file) catch |err| { + macho_file.base.fatal("failed to write '__LD,__eh_frame' section: {s}", .{@errorName(err)}); + _ = macho_file.has_errors.swap(true, .seq_cst); + }; +} - // TODO scattered writes? - try macho_file.base.file.pwriteAll(code, header.offset); - try macho_file.base.file.pwriteAll(mem.sliceAsBytes(relocs.items), header.reloff); +fn writeEhFrameWorker(macho_file: *MachO) void { + const sect_index = macho_file.eh_frame_sect_index.?; + const buffer = macho_file.sections.items(.out)[sect_index]; + const relocs = macho_file.sections.items(.relocs)[sect_index]; + eh_frame.writeRelocs(macho_file, buffer.items, relocs.items) catch |err| { + macho_file.base.fatal("failed to write '__TEXT,__eh_frame' section: {s}", .{@errorName(err)}); + _ = macho_file.has_errors.swap(true, .seq_cst); + }; } fn writeLoadCommands(macho_file: *MachO) !struct { usize, usize } { @@ -451,5 +520,8 @@ const std = @import("std"); const trace = @import("../tracy.zig").trace; const Atom = @import("Atom.zig"); +const File = @import("file.zig").File; const MachO = @import("../MachO.zig"); +const Object = @import("Object.zig"); const Symbol = @import("Symbol.zig"); +const WaitGroup = std.Thread.WaitGroup; diff --git a/src/MachO/synthetic.zig b/src/MachO/synthetic.zig index 428d0de3..7981b468 100644 --- a/src/MachO/synthetic.zig +++ b/src/MachO/synthetic.zig @@ -1,5 +1,5 @@ pub const GotSection = struct { - symbols: std.ArrayListUnmanaged(Symbol.Index) = .{}, + symbols: std.ArrayListUnmanaged(MachO.Ref) = .{}, pub const Index = u32; @@ -7,13 +7,13 @@ pub const GotSection = struct { got.symbols.deinit(allocator); } - pub fn addSymbol(got: *GotSection, sym_index: Symbol.Index, macho_file: *MachO) !void { + pub fn addSymbol(got: *GotSection, ref: MachO.Ref, macho_file: *MachO) !void { const gpa = macho_file.base.allocator; const index = @as(Index, @intCast(got.symbols.items.len)); const entry = try got.symbols.addOne(gpa); - entry.* = sym_index; - const symbol = macho_file.getSymbol(sym_index); - try symbol.addExtra(.{ .got = index }, macho_file); + entry.* = ref; + const symbol = ref.getSymbol(macho_file).?; + symbol.addExtra(.{ .got = index }, macho_file); } pub fn getAddress(got: GotSection, index: Index, macho_file: *MachO) u64 { @@ -26,46 +26,11 @@ pub const GotSection = struct { return got.symbols.items.len * @sizeOf(u64); } - pub fn addDyldRelocs(got: GotSection, macho_file: *MachO) !void { - const tracy = trace(@src()); - defer tracy.end(); - const gpa = macho_file.base.allocator; - const seg_id = macho_file.sections.items(.segment_id)[macho_file.got_sect_index.?]; - const seg = macho_file.segments.items[seg_id]; - - for (got.symbols.items, 0..) |sym_index, idx| { - const sym = macho_file.getSymbol(sym_index); - const addr = got.getAddress(@intCast(idx), macho_file); - const entry = bind.Entry{ - .target = sym_index, - .offset = addr - seg.vmaddr, - .segment_id = seg_id, - .addend = 0, - }; - if (sym.flags.import) { - try macho_file.bind.entries.append(gpa, entry); - if (sym.flags.weak) { - try macho_file.weak_bind.entries.append(gpa, entry); - } - } else { - try macho_file.rebase.entries.append(gpa, .{ - .offset = addr - seg.vmaddr, - .segment_id = seg_id, - }); - if (sym.flags.weak) { - try macho_file.weak_bind.entries.append(gpa, entry); - } else if (sym.flags.interposable) { - try macho_file.bind.entries.append(gpa, entry); - } - } - } - } - pub fn write(got: GotSection, macho_file: *MachO, writer: anytype) !void { const tracy = trace(@src()); defer tracy.end(); - for (got.symbols.items) |sym_index| { - const sym = macho_file.getSymbol(sym_index); + for (got.symbols.items) |ref| { + const sym = ref.getSymbol(macho_file).?; const value = if (sym.flags.import) @as(u64, 0) else sym.getAddress(.{}, macho_file); try writer.writeInt(u64, value, .little); } @@ -88,12 +53,12 @@ pub const GotSection = struct { ) !void { _ = options; _ = unused_fmt_string; - for (ctx.got.symbols.items, 0..) |entry, i| { - const symbol = ctx.macho_file.getSymbol(entry); - try writer.print(" {d}@0x{x} => {d}@0x{x} ({s})\n", .{ + for (ctx.got.symbols.items, 0..) |ref, i| { + const symbol = ref.getSymbol(ctx.macho_file).?; + try writer.print(" {d}@0x{x} => {}@0x{x} ({s})\n", .{ i, symbol.getGotAddress(ctx.macho_file), - entry, + ref, symbol.getAddress(.{}, ctx.macho_file), symbol.getName(ctx.macho_file), }); @@ -102,7 +67,7 @@ pub const GotSection = struct { }; pub const StubsSection = struct { - symbols: std.ArrayListUnmanaged(Symbol.Index) = .{}, + symbols: std.ArrayListUnmanaged(MachO.Ref) = .{}, pub const Index = u32; @@ -110,13 +75,13 @@ pub const StubsSection = struct { stubs.symbols.deinit(allocator); } - pub fn addSymbol(stubs: *StubsSection, sym_index: Symbol.Index, macho_file: *MachO) !void { + pub fn addSymbol(stubs: *StubsSection, ref: MachO.Ref, macho_file: *MachO) !void { const gpa = macho_file.base.allocator; const index = @as(Index, @intCast(stubs.symbols.items.len)); const entry = try stubs.symbols.addOne(gpa); - entry.* = sym_index; - const symbol = macho_file.getSymbol(sym_index); - try symbol.addExtra(.{ .stubs = index }, macho_file); + entry.* = ref; + const symbol = ref.getSymbol(macho_file).?; + symbol.addExtra(.{ .stubs = index }, macho_file); } pub fn getAddress(stubs: StubsSection, index: Index, macho_file: *MachO) u64 { @@ -136,8 +101,8 @@ pub const StubsSection = struct { const cpu_arch = macho_file.options.cpu_arch.?; const laptr_sect = macho_file.sections.items(.header)[macho_file.la_symbol_ptr_sect_index.?]; - for (stubs.symbols.items, 0..) |sym_index, idx| { - const sym = macho_file.getSymbol(sym_index); + for (stubs.symbols.items, 0..) |ref, idx| { + const sym = ref.getSymbol(macho_file).?; const source = sym.getAddress(.{ .stubs = true }, macho_file); const target = laptr_sect.addr + idx * @sizeOf(u64); switch (cpu_arch) { @@ -179,12 +144,12 @@ pub const StubsSection = struct { ) !void { _ = options; _ = unused_fmt_string; - for (ctx.stubs.symbols.items, 0..) |entry, i| { - const symbol = ctx.macho_file.getSymbol(entry); - try writer.print(" {d}@0x{x} => {d}@0x{x} ({s})\n", .{ + for (ctx.stubs.symbols.items, 0..) |ref, i| { + const symbol = ref.getSymbol(ctx.macho_file).?; + try writer.print(" {d}@0x{x} => {}@0x{x} ({s})\n", .{ i, symbol.getStubsAddress(ctx.macho_file), - entry, + ref, symbol.getAddress(.{}, ctx.macho_file), symbol.getName(ctx.macho_file), }); @@ -215,8 +180,8 @@ pub const StubsHelperSection = struct { _ = stubs_helper; const cpu_arch = macho_file.options.cpu_arch.?; var s: usize = preambleSize(cpu_arch); - for (macho_file.stubs.symbols.items) |sym_index| { - const sym = macho_file.getSymbol(sym_index); + for (macho_file.stubs.symbols.items) |ref| { + const sym = ref.getSymbol(macho_file).?; if (sym.flags.weak) continue; s += entrySize(cpu_arch); } @@ -235,8 +200,8 @@ pub const StubsHelperSection = struct { const entry_size = entrySize(cpu_arch); var idx: usize = 0; - for (macho_file.stubs.symbols.items) |sym_index| { - const sym = macho_file.getSymbol(sym_index); + for (macho_file.stubs.symbols.items) |ref| { + const sym = ref.getSymbol(macho_file).?; if (sym.flags.weak) continue; const offset = macho_file.lazy_bind.offsets.items[idx]; const source: i64 = @intCast(sect.addr + preamble_size + entry_size * idx); @@ -270,14 +235,15 @@ pub const StubsHelperSection = struct { fn writePreamble(stubs_helper: StubsHelperSection, macho_file: *MachO, writer: anytype) !void { _ = stubs_helper; + const obj = macho_file.getInternalObject().?; const cpu_arch = macho_file.options.cpu_arch.?; const sect = macho_file.sections.items(.header)[macho_file.stubs_helper_sect_index.?]; const dyld_private_addr = target: { - const sym = macho_file.getSymbol(macho_file.dyld_private_index.?); + const sym = obj.getDyldPrivateRef(macho_file).?.getSymbol(macho_file).?; break :target sym.getAddress(.{}, macho_file); }; const dyld_stub_binder_addr = target: { - const sym = macho_file.getSymbol(macho_file.dyld_stub_binder_index.?); + const sym = obj.getDyldStubBinderRef(macho_file).?.getSymbol(macho_file).?; break :target sym.getGotAddress(macho_file); }; switch (cpu_arch) { @@ -326,49 +292,6 @@ pub const LaSymbolPtrSection = struct { return macho_file.stubs.symbols.items.len * @sizeOf(u64); } - pub fn addDyldRelocs(laptr: LaSymbolPtrSection, macho_file: *MachO) !void { - const tracy = trace(@src()); - defer tracy.end(); - _ = laptr; - const gpa = macho_file.base.allocator; - - const sect = macho_file.sections.items(.header)[macho_file.la_symbol_ptr_sect_index.?]; - const seg_id = macho_file.sections.items(.segment_id)[macho_file.la_symbol_ptr_sect_index.?]; - const seg = macho_file.segments.items[seg_id]; - - for (macho_file.stubs.symbols.items, 0..) |sym_index, idx| { - const sym = macho_file.getSymbol(sym_index); - const addr = sect.addr + idx * @sizeOf(u64); - const rebase_entry = Rebase.Entry{ - .offset = addr - seg.vmaddr, - .segment_id = seg_id, - }; - const bind_entry = bind.Entry{ - .target = sym_index, - .offset = addr - seg.vmaddr, - .segment_id = seg_id, - .addend = 0, - }; - if (sym.flags.import) { - if (sym.flags.weak) { - try macho_file.bind.entries.append(gpa, bind_entry); - try macho_file.weak_bind.entries.append(gpa, bind_entry); - } else { - try macho_file.lazy_bind.entries.append(gpa, bind_entry); - try macho_file.rebase.entries.append(gpa, rebase_entry); - } - } else { - if (sym.flags.weak) { - try macho_file.weak_bind.entries.append(gpa, bind_entry); - try macho_file.rebase.entries.append(gpa, rebase_entry); - } else if (sym.flags.interposable) { - try macho_file.lazy_bind.entries.append(gpa, bind_entry); - try macho_file.rebase.entries.append(gpa, rebase_entry); - } - } - } - } - pub fn write(laptr: LaSymbolPtrSection, macho_file: *MachO, writer: anytype) !void { const tracy = trace(@src()); defer tracy.end(); @@ -376,8 +299,8 @@ pub const LaSymbolPtrSection = struct { const cpu_arch = macho_file.options.cpu_arch.?; const sect = macho_file.sections.items(.header)[macho_file.stubs_helper_sect_index.?]; var stub_helper_idx: u32 = 0; - for (macho_file.stubs.symbols.items) |sym_index| { - const sym = macho_file.getSymbol(sym_index); + for (macho_file.stubs.symbols.items) |ref| { + const sym = ref.getSymbol(macho_file).?; if (sym.flags.weak) { const value = sym.getAddress(.{ .stubs = false }, macho_file); try writer.writeInt(u64, @intCast(value), .little); @@ -392,7 +315,7 @@ pub const LaSymbolPtrSection = struct { }; pub const TlvPtrSection = struct { - symbols: std.ArrayListUnmanaged(Symbol.Index) = .{}, + symbols: std.ArrayListUnmanaged(MachO.Ref) = .{}, pub const Index = u32; @@ -400,13 +323,13 @@ pub const TlvPtrSection = struct { tlv.symbols.deinit(allocator); } - pub fn addSymbol(tlv: *TlvPtrSection, sym_index: Symbol.Index, macho_file: *MachO) !void { + pub fn addSymbol(tlv: *TlvPtrSection, ref: MachO.Ref, macho_file: *MachO) !void { const gpa = macho_file.base.allocator; const index = @as(Index, @intCast(tlv.symbols.items.len)); const entry = try tlv.symbols.addOne(gpa); - entry.* = sym_index; - const symbol = macho_file.getSymbol(sym_index); - try symbol.addExtra(.{ .tlv_ptr = index }, macho_file); + entry.* = ref; + const symbol = ref.getSymbol(macho_file).?; + symbol.addExtra(.{ .tlv_ptr = index }, macho_file); } pub fn getAddress(tlv: TlvPtrSection, index: Index, macho_file: *MachO) u64 { @@ -419,47 +342,12 @@ pub const TlvPtrSection = struct { return tlv.symbols.items.len * @sizeOf(u64); } - pub fn addDyldRelocs(tlv: TlvPtrSection, macho_file: *MachO) !void { - const tracy = trace(@src()); - defer tracy.end(); - const gpa = macho_file.base.allocator; - const seg_id = macho_file.sections.items(.segment_id)[macho_file.tlv_ptr_sect_index.?]; - const seg = macho_file.segments.items[seg_id]; - - for (tlv.symbols.items, 0..) |sym_index, idx| { - const sym = macho_file.getSymbol(sym_index); - const addr = tlv.getAddress(@intCast(idx), macho_file); - const entry = bind.Entry{ - .target = sym_index, - .offset = addr - seg.vmaddr, - .segment_id = seg_id, - .addend = 0, - }; - if (sym.flags.import) { - try macho_file.bind.entries.append(gpa, entry); - if (sym.flags.weak) { - try macho_file.weak_bind.entries.append(gpa, entry); - } - } else { - try macho_file.rebase.entries.append(gpa, .{ - .offset = addr - seg.vmaddr, - .segment_id = seg_id, - }); - if (sym.flags.weak) { - try macho_file.weak_bind.entries.append(gpa, entry); - } else if (sym.flags.interposable) { - try macho_file.bind.entries.append(gpa, entry); - } - } - } - } - pub fn write(tlv: TlvPtrSection, macho_file: *MachO, writer: anytype) !void { const tracy = trace(@src()); defer tracy.end(); - for (tlv.symbols.items) |sym_index| { - const sym = macho_file.getSymbol(sym_index); + for (tlv.symbols.items) |ref| { + const sym = ref.getSymbol(macho_file).?; if (sym.flags.import) { try writer.writeInt(u64, 0, .little); } else { @@ -485,12 +373,12 @@ pub const TlvPtrSection = struct { ) !void { _ = options; _ = unused_fmt_string; - for (ctx.tlv.symbols.items, 0..) |entry, i| { - const symbol = ctx.macho_file.getSymbol(entry); - try writer.print(" {d}@0x{x} => {d}@0x{x} ({s})\n", .{ + for (ctx.tlv.symbols.items, 0..) |ref, i| { + const symbol = ref.getSymbol(ctx.macho_file).?; + try writer.print(" {d}@0x{x} => {}@0x{x} ({s})\n", .{ i, symbol.getTlvPtrAddress(ctx.macho_file), - entry, + ref, symbol.getAddress(.{}, ctx.macho_file), symbol.getName(ctx.macho_file), }); @@ -499,7 +387,7 @@ pub const TlvPtrSection = struct { }; pub const ObjcStubsSection = struct { - symbols: std.ArrayListUnmanaged(Symbol.Index) = .{}, + symbols: std.ArrayListUnmanaged(MachO.Ref) = .{}, pub fn deinit(objc: *ObjcStubsSection, allocator: Allocator) void { objc.symbols.deinit(allocator); @@ -513,13 +401,13 @@ pub const ObjcStubsSection = struct { }; } - pub fn addSymbol(objc: *ObjcStubsSection, sym_index: Symbol.Index, macho_file: *MachO) !void { + pub fn addSymbol(objc: *ObjcStubsSection, ref: MachO.Ref, macho_file: *MachO) !void { const gpa = macho_file.base.allocator; const index = @as(Index, @intCast(objc.symbols.items.len)); const entry = try objc.symbols.addOne(gpa); - entry.* = sym_index; - const symbol = macho_file.getSymbol(sym_index); - try symbol.addExtra(.{ .objc_stubs = index }, macho_file); + entry.* = ref; + const symbol = ref.getSymbol(macho_file).?; + symbol.addExtra(.{ .objc_stubs = index }, macho_file); } pub fn getAddress(objc: ObjcStubsSection, index: Index, macho_file: *MachO) u64 { @@ -536,8 +424,10 @@ pub const ObjcStubsSection = struct { const tracy = trace(@src()); defer tracy.end(); - for (objc.symbols.items, 0..) |sym_index, idx| { - const sym = macho_file.getSymbol(sym_index); + const obj = macho_file.getInternalObject().?; + + for (objc.symbols.items, 0..) |ref, idx| { + const sym = ref.getSymbol(macho_file).?; const addr = objc.getAddress(@intCast(idx), macho_file); switch (macho_file.options.cpu_arch.?) { .x86_64 => { @@ -549,7 +439,7 @@ pub const ObjcStubsSection = struct { } try writer.writeAll(&.{ 0xff, 0x25 }); { - const target_sym = macho_file.getSymbol(macho_file.objc_msg_send_index.?); + const target_sym = obj.getObjcMsgSendRef(macho_file).?.getSymbol(macho_file).?; const target = target_sym.getGotAddress(macho_file); const source = addr + 7; try writer.writeInt(i32, @intCast(target - source - 2 - 4), .little); @@ -569,7 +459,7 @@ pub const ObjcStubsSection = struct { ); } { - const target_sym = macho_file.getSymbol(macho_file.objc_msg_send_index.?); + const target_sym = obj.getObjcMsgSendRef(macho_file).?.getSymbol(macho_file).?; const target = target_sym.getGotAddress(macho_file); const source = addr + 2 * @sizeOf(u32); const pages = try aarch64.calcNumberOfPages(@intCast(source), @intCast(target)); @@ -608,12 +498,12 @@ pub const ObjcStubsSection = struct { ) !void { _ = options; _ = unused_fmt_string; - for (ctx.objc.symbols.items, 0..) |entry, i| { - const symbol = ctx.macho_file.getSymbol(entry); - try writer.print(" {d}@0x{x} => {d}@0x{x} ({s})\n", .{ + for (ctx.objc.symbols.items, 0..) |ref, i| { + const symbol = ref.getSymbol(ctx.macho_file).?; + try writer.print(" {d}@0x{x} => {}@0x{x} ({s})\n", .{ i, symbol.getObjcStubsAddress(ctx.macho_file), - entry, + ref, symbol.getAddress(.{}, ctx.macho_file), symbol.getName(ctx.macho_file), }); @@ -629,45 +519,102 @@ pub const Indsymtab = struct { return @intCast(macho_file.stubs.symbols.items.len * 2 + macho_file.got.symbols.items.len); } + pub fn updateSize(ind: *Indsymtab, macho_file: *MachO) !void { + macho_file.dysymtab_cmd.nindirectsyms = ind.nsyms(macho_file); + } + pub fn write(ind: Indsymtab, macho_file: *MachO, writer: anytype) !void { const tracy = trace(@src()); defer tracy.end(); _ = ind; - for (macho_file.stubs.symbols.items) |sym_index| { - const sym = macho_file.getSymbol(sym_index); + for (macho_file.stubs.symbols.items) |ref| { + const sym = ref.getSymbol(macho_file).?; try writer.writeInt(u32, sym.getOutputSymtabIndex(macho_file).?, .little); } - for (macho_file.got.symbols.items) |sym_index| { - const sym = macho_file.getSymbol(sym_index); + for (macho_file.got.symbols.items) |ref| { + const sym = ref.getSymbol(macho_file).?; try writer.writeInt(u32, sym.getOutputSymtabIndex(macho_file).?, .little); } - for (macho_file.stubs.symbols.items) |sym_index| { - const sym = macho_file.getSymbol(sym_index); + for (macho_file.stubs.symbols.items) |ref| { + const sym = ref.getSymbol(macho_file).?; try writer.writeInt(u32, sym.getOutputSymtabIndex(macho_file).?, .little); } } }; -pub const RebaseSection = Rebase; -pub const BindSection = bind.Bind; -pub const WeakBindSection = bind.WeakBind; -pub const LazyBindSection = bind.LazyBind; -pub const ExportTrieSection = Trie; +pub const DataInCode = struct { + entries: std.ArrayListUnmanaged(macho.data_in_code_entry) = .{}, + + pub fn deinit(dice: *DataInCode, allocator: Allocator) void { + dice.entries.deinit(allocator); + } + + pub fn size(dice: DataInCode) usize { + return dice.entries.items.len * @sizeOf(macho.data_in_code_entry); + } + + pub fn updateSize(dice: *DataInCode, macho_file: *MachO) !void { + const gpa = macho_file.base.allocator; + const base_address = if (!macho_file.options.relocatable) + macho_file.getTextSegment().vmaddr + else + 0; + + for (macho_file.objects.items) |index| { + const object = macho_file.getFile(index).?.object; + const dices = object.getDataInCode(); + + try dice.entries.ensureUnusedCapacity(gpa, dices.len); + + var next_dice: usize = 0; + for (object.getAtoms()) |atom_index| { + if (next_dice >= dices.len) break; + const atom = object.getAtom(atom_index) orelse continue; + if (!atom.alive.load(.seq_cst)) continue; + const start_off = atom.getInputAddress(macho_file); + const end_off = start_off + atom.size; + const start_dice = next_dice; + + if (end_off < dices[next_dice].offset) continue; + + while (next_dice < dices.len and + dices[next_dice].offset < end_off) : (next_dice += 1) + {} + + if (atom.alive.load(.seq_cst)) for (dices[start_dice..next_dice]) |d| { + dice.entries.appendAssumeCapacity(.{ + .offset = @intCast(atom.getAddress(macho_file) + d.offset - start_off - base_address), + .length = d.length, + .kind = d.kind, + }); + }; + } + } + + macho_file.data_in_code_cmd.datasize = math.cast(u32, dice.size()) orelse return error.Overflow; + } +}; + +pub const Rebase = @import("dyld_info/Rebase.zig"); +pub const Bind = bind.Bind; +pub const WeakBind = bind.WeakBind; +pub const LazyBind = bind.LazyBind; +pub const ExportTrie = Trie; const aarch64 = @import("../aarch64.zig"); const assert = std.debug.assert; const bind = @import("dyld_info/bind.zig"); +const macho = std.macho; const math = std.math; const std = @import("std"); const trace = @import("../tracy.zig").trace; const Allocator = std.mem.Allocator; const MachO = @import("../MachO.zig"); -const Rebase = @import("dyld_info/Rebase.zig"); const Relocation = @import("Relocation.zig"); const Symbol = @import("Symbol.zig"); const Trie = @import("dyld_info/Trie.zig"); diff --git a/src/MachO/thunks.zig b/src/MachO/thunks.zig index aae580ac..390f0fff 100644 --- a/src/MachO/thunks.zig +++ b/src/MachO/thunks.zig @@ -5,27 +5,27 @@ pub fn createThunks(sect_id: u8, macho_file: *MachO) !void { const gpa = macho_file.base.allocator; const slice = macho_file.sections.slice(); const header = &slice.items(.header)[sect_id]; + const thnks = &slice.items(.thunks)[sect_id]; const atoms = slice.items(.atoms)[sect_id].items; assert(atoms.len > 0); - for (atoms) |atom_index| { - macho_file.getAtom(atom_index).?.value = @bitCast(@as(i64, -1)); + for (atoms) |ref| { + ref.getAtom(macho_file).?.value = @bitCast(@as(i64, -1)); } var i: usize = 0; while (i < atoms.len) { const start = i; - const start_atom = macho_file.getAtom(atoms[start]).?; - assert(start_atom.flags.alive); + const start_atom = atoms[start].getAtom(macho_file).?; + assert(start_atom.alive.load(.seq_cst)); start_atom.value = try advance(header, start_atom.size, start_atom.alignment); i += 1; while (i < atoms.len and header.size - start_atom.value < max_allowed_distance) : (i += 1) { - const atom_index = atoms[i]; - const atom = macho_file.getAtom(atom_index).?; - assert(atom.flags.alive); + const atom = atoms[i].getAtom(macho_file).?; + assert(atom.alive.load(.seq_cst)); atom.value = try advance(header, atom.size, atom.alignment); } @@ -33,20 +33,10 @@ pub fn createThunks(sect_id: u8, macho_file: *MachO) !void { const thunk_index = try macho_file.addThunk(); const thunk = macho_file.getThunk(thunk_index); thunk.out_n_sect = sect_id; + try thnks.append(gpa, thunk_index); // Scan relocs in the group and create trampolines for any unreachable callsite - for (atoms[start..i]) |atom_index| { - const atom = macho_file.getAtom(atom_index).?; - log.debug("atom({d}) {s}", .{ atom_index, atom.getName(macho_file) }); - for (atom.getRelocs(macho_file)) |rel| { - if (rel.type != .branch) continue; - if (isReachable(atom, rel, macho_file)) continue; - try thunk.symbols.put(gpa, rel.target, {}); - } - try atom.addExtra(.{ .thunk = thunk_index }, macho_file); - atom.flags.thunk = true; - } - + try scanRelocs(thunk_index, gpa, atoms[start..i], macho_file); thunk.value = try advance(header, thunk.size(), 2); log.debug("thunk({d}) : {}", .{ thunk_index, thunk.fmt(macho_file) }); @@ -62,14 +52,32 @@ fn advance(sect: *macho.section_64, size: u64, pow2_align: u32) !u64 { return offset; } +fn scanRelocs(thunk_index: Thunk.Index, gpa: Allocator, atoms: []const MachO.Ref, macho_file: *MachO) !void { + const tracy = trace(@src()); + defer tracy.end(); + + const thunk = macho_file.getThunk(thunk_index); + + for (atoms) |ref| { + const atom = ref.getAtom(macho_file).?; + log.debug("atom({d}) {s}", .{ atom.atom_index, atom.getName(macho_file) }); + for (atom.getRelocs(macho_file)) |rel| { + if (rel.type != .branch) continue; + if (isReachable(atom, rel, macho_file)) continue; + try thunk.symbols.put(gpa, rel.getTargetSymbolRef(atom.*, macho_file), {}); + } + atom.addExtra(.{ .thunk = thunk_index }, macho_file); + } +} + fn isReachable(atom: *const Atom, rel: Relocation, macho_file: *MachO) bool { - const target = rel.getTargetSymbol(macho_file); - if (target.flags.stubs or target.flags.objc_stubs) return false; - if (atom.out_n_sect != target.out_n_sect) return false; + const target = rel.getTargetSymbol(atom.*, macho_file); + if (target.getSectionFlags().stubs or target.getSectionFlags().objc_stubs) return false; + if (atom.out_n_sect != target.getOutputSectionIndex(macho_file)) return false; const target_atom = target.getAtom(macho_file).?; if (target_atom.value == @as(u64, @bitCast(@as(i64, -1)))) return false; const saddr = @as(i64, @intCast(atom.getAddress(macho_file))) + @as(i64, @intCast(rel.offset - atom.off)); - const taddr: i64 = @intCast(rel.getTargetAddress(macho_file)); + const taddr: i64 = @intCast(rel.getTargetAddress(atom.*, macho_file)); _ = math.cast(i28, taddr + rel.addend - saddr) orelse return false; return true; } @@ -77,7 +85,7 @@ fn isReachable(atom: *const Atom, rel: Relocation, macho_file: *MachO) bool { pub const Thunk = struct { value: u64 = 0, out_n_sect: u8 = 0, - symbols: std.AutoArrayHashMapUnmanaged(Symbol.Index, void) = .{}, + symbols: std.AutoArrayHashMapUnmanaged(MachO.Ref, void) = .{}, pub fn deinit(thunk: *Thunk, allocator: Allocator) void { thunk.symbols.deinit(allocator); @@ -92,13 +100,13 @@ pub const Thunk = struct { return header.addr + thunk.value; } - pub fn getTargetAddress(thunk: Thunk, sym_index: Symbol.Index, macho_file: *MachO) u64 { - return thunk.getAddress(macho_file) + thunk.symbols.getIndex(sym_index).? * trampoline_size; + pub fn getTargetAddress(thunk: Thunk, ref: MachO.Ref, macho_file: *MachO) u64 { + return thunk.getAddress(macho_file) + thunk.symbols.getIndex(ref).? * trampoline_size; } pub fn write(thunk: Thunk, macho_file: *MachO, writer: anytype) !void { - for (thunk.symbols.keys(), 0..) |sym_index, i| { - const sym = macho_file.getSymbol(sym_index); + for (thunk.symbols.keys(), 0..) |ref, i| { + const sym = ref.getSymbol(macho_file).?; const saddr = thunk.getAddress(macho_file) + i * trampoline_size; const taddr = sym.getAddress(.{}, macho_file); const pages = try aarch64.calcNumberOfPages(@intCast(saddr), @intCast(taddr)); @@ -145,9 +153,9 @@ pub const Thunk = struct { const thunk = ctx.thunk; const macho_file = ctx.macho_file; try writer.print("@{x} : size({x})\n", .{ thunk.value, thunk.size() }); - for (thunk.symbols.keys()) |index| { - const sym = macho_file.getSymbol(index); - try writer.print(" %{d} : {s} : @{x}\n", .{ index, sym.getName(macho_file), sym.value }); + for (thunk.symbols.keys()) |ref| { + const sym = ref.getSymbol(macho_file).?; + try writer.print(" {} : {s} : @{x}\n", .{ ref, sym.getName(macho_file), sym.value }); } } diff --git a/src/Zld.zig b/src/Zld.zig index 8bb41de6..6b895e5a 100644 --- a/src/Zld.zig +++ b/src/Zld.zig @@ -3,7 +3,9 @@ allocator: Allocator, file: fs.File, thread_pool: *ThreadPool, warnings: std.ArrayListUnmanaged(ErrorMsg) = .{}, +warnings_mutex: std.Thread.Mutex = .{}, errors: std.ArrayListUnmanaged(ErrorMsg) = .{}, +errors_mutex: std.Thread.Mutex = .{}, pub const Tag = enum { coff, @@ -206,7 +208,9 @@ pub fn openPath(allocator: Allocator, tag: Tag, options: Options, thread_pool: * pub fn deinit(base: *Zld) void { base.file.close(); + assert(base.warnings.items.len == 0); base.warnings.deinit(base.allocator); + assert(base.errors.items.len == 0); base.errors.deinit(base.allocator); switch (base.tag) { .elf => { @@ -242,15 +246,13 @@ pub fn flush(base: *Zld) !void { } pub fn warn(base: *Zld, comptime format: []const u8, args: anytype) void { - base.warnings.ensureUnusedCapacity(base.allocator, 1) catch return; - const msg = std.fmt.allocPrint(base.allocator, format, args) catch return; - base.warnings.appendAssumeCapacity(.{ .msg = msg }); + const warning = base.addWarningWithNotes(0) catch return; + warning.addMsg(format, args) catch return; } pub fn fatal(base: *Zld, comptime format: []const u8, args: anytype) void { - base.errors.ensureUnusedCapacity(base.allocator, 1) catch return; - const msg = std.fmt.allocPrint(base.allocator, format, args) catch return; - base.errors.appendAssumeCapacity(.{ .msg = msg }); + const err = base.addErrorWithNotes(0) catch return; + err.addMsg(format, args) catch return; } pub const ErrorWithNotes = struct { @@ -277,6 +279,8 @@ pub const ErrorWithNotes = struct { }; pub fn addErrorWithNotes(base: *Zld, note_count: usize) !ErrorWithNotes { + base.errors_mutex.lock(); + defer base.errors_mutex.unlock(); const err_index = base.errors.items.len; const err_msg = try base.errors.addOne(base.allocator); err_msg.* = .{ .msg = undefined }; @@ -285,6 +289,8 @@ pub fn addErrorWithNotes(base: *Zld, note_count: usize) !ErrorWithNotes { } pub fn addWarningWithNotes(base: *Zld, note_count: usize) !ErrorWithNotes { + base.warnings_mutex.lock(); + defer base.warnings_mutex.unlock(); const err_index = base.warnings.items.len; const err_msg = try base.warnings.addOne(base.allocator); err_msg.* = .{ .msg = undefined }; diff --git a/src/main.zig b/src/main.zig index 92514e4a..bbdc59d0 100644 --- a/src/main.zig +++ b/src/main.zig @@ -68,6 +68,8 @@ fn print(comptime format: []const u8, args: anytype) void { } fn fatal(comptime format: []const u8, args: anytype) noreturn { + std.debug.lockStdErr(); + defer std.debug.unlockStdErr(); print(format, args); std.process.exit(1); } @@ -113,6 +115,7 @@ pub fn main() !void { const zld = try Zld.openPath(gpa, tag, opts, &thread_pool); defer zld.deinit(); zld.flush() catch |err| switch (err) { + error.FlushFailed, error.InferCpuFailed, error.ParseFailed, error.MultipleSymbolDefinition, diff --git a/src/tracy.zig b/src/tracy.zig index 3e223a88..15ad6970 100644 --- a/src/tracy.zig +++ b/src/tracy.zig @@ -3,7 +3,8 @@ const builtin = @import("builtin"); const build_options = @import("build_options"); pub const enable = if (builtin.is_test) false else build_options.enable_tracy; -pub const enable_allocation = enable; +// pub const enable_allocation = enable; +pub const enable_allocation = false; pub const enable_callstack = enable; // TODO: make this configurable diff --git a/test/macho.zig b/test/macho.zig index 7878c787..f04b96c3 100644 --- a/test/macho.zig +++ b/test/macho.zig @@ -162,7 +162,7 @@ fn testBuildVersionMacOS(b: *Build, opts: Options) *Step { const exe = ld(b, "a.out", opts); exe.addFileSource(obj.getFile()); - exe.addArgs(&.{ "-syslibroot", opts.macos_sdk }); + exe.addArgs(&.{ "-dynamic", "-syslibroot", opts.macos_sdk, "-lSystem", "-lc" }); const check = exe.check(); check.checkInHeaders(); @@ -182,12 +182,15 @@ fn testBuildVersionMacOS(b: *Build, opts: Options) *Step { const exe = ld(b, "a.out", opts); exe.addFileSource(obj.getFile()); exe.addArgs(&.{ + "-dynamic", "-syslibroot", opts.macos_sdk, "-platform_version", "macos", "10.13", "10.13", + "-lSystem", + "-lc", }); const check = exe.check(); @@ -214,7 +217,7 @@ fn testBuildVersionIOS(b: *Build, opts: Options) *Step { const exe = ld(b, "a.out", opts); exe.addFileSource(obj.getFile()); - exe.addArgs(&.{ "-syslibroot", ios_sdk }); + exe.addArgs(&.{ "-dynamic", "-syslibroot", ios_sdk, "-lSystem", "-lc" }); const check = exe.check(); check.checkInHeaders(); @@ -233,7 +236,7 @@ fn testBuildVersionIOS(b: *Build, opts: Options) *Step { const exe = ld(b, "a.out", opts); exe.addFileSource(obj.getFile()); - exe.addArgs(&.{ "-syslibroot", ios_sdk }); + exe.addArgs(&.{ "-dynamic", "-syslibroot", ios_sdk, "-lSystem", "-lc" }); const check = exe.check(); check.checkInHeaders(); @@ -2698,7 +2701,16 @@ fn testSearchStrategy(b: *Build, opts: Options) *Step { const dylib = ld(b, "liba.dylib", opts); dylib.addFileSource(obj.getFile()); - dylib.addArgs(&.{ "-syslibroot", opts.macos_sdk, "-dylib", "-install_name", "@rpath/liba.dylib" }); + dylib.addArgs(&.{ + "-dynamic", + "-syslibroot", + opts.macos_sdk, + "-dylib", + "-install_name", + "@rpath/liba.dylib", + "-lSystem", + "-lc", + }); const main_c = \\#include @@ -3664,7 +3676,7 @@ fn testUnwindInfo(b: *Build, opts: Options) *Step { obj2.addArgs(flags); const exe = ld(b, "main", opts); - exe.addArgs(&.{ "-syslibroot", opts.macos_sdk, "-lc++" }); + exe.addArgs(&.{ "-dynamic", "-syslibroot", opts.macos_sdk, "-lc++", "-lSystem", "-lc" }); exe.addFileSource(obj.getFile()); exe.addFileSource(obj1.getFile()); exe.addFileSource(obj2.getFile()); @@ -4094,10 +4106,8 @@ fn lipo(b: *Build, name: []const u8) SysCmd { fn ld(b: *Build, name: []const u8, opts: Options) SysCmd { const cmd = Run.create(b, "ld"); cmd.addFileArg(opts.zld); - cmd.addArg("-dynamic"); cmd.addArg("-o"); const out = cmd.addOutputFileArg(name); - cmd.addArgs(&.{ "-lSystem", "-lc" }); return .{ .cmd = cmd, .out = out }; }