From 29b8816f1f612988fbc6d90462c8239379322240 Mon Sep 17 00:00:00 2001 From: Farid Zakaria Date: Mon, 26 Jun 2023 16:32:01 +0000 Subject: [PATCH 1/6] wip --- third_party/libcxx/atomic | 12 +- third_party/libcxx/libcxx.mk | 1 + third_party/libcxx/span | 14 + third_party/mold/README.cosmo | 18 + third_party/mold/archive-file.h | 178 + third_party/mold/cmdline.h | 91 + third_party/mold/common.h | 1001 ++++++ third_party/mold/compress.cc | 186 + third_party/mold/config.h.in | 4 + third_party/mold/demangle.cc | 52 + third_party/mold/elf/arch-alpha.cc | 331 ++ third_party/mold/elf/arch-arm32.cc | 737 ++++ third_party/mold/elf/arch-arm64.cc | 595 ++++ third_party/mold/elf/arch-i386.cc | 565 +++ third_party/mold/elf/arch-m68k.cc | 326 ++ third_party/mold/elf/arch-ppc32.cc | 452 +++ third_party/mold/elf/arch-ppc64v1.cc | 687 ++++ third_party/mold/elf/arch-ppc64v2.cc | 555 +++ third_party/mold/elf/arch-riscv.cc | 938 +++++ third_party/mold/elf/arch-s390x.cc | 491 +++ third_party/mold/elf/arch-sh4.cc | 355 ++ third_party/mold/elf/arch-sparc64.cc | 622 ++++ third_party/mold/elf/arch-x86-64.cc | 773 ++++ third_party/mold/elf/cmdline.cc | 1278 +++++++ third_party/mold/elf/dwarf.cc | 555 +++ third_party/mold/elf/elf.cc | 922 +++++ third_party/mold/elf/elf.h | 2053 +++++++++++ third_party/mold/elf/gc-sections.cc | 180 + third_party/mold/elf/icf.cc | 615 ++++ third_party/mold/elf/input-files.cc | 1497 ++++++++ third_party/mold/elf/input-sections.cc | 498 +++ third_party/mold/elf/jobs.cc | 85 + third_party/mold/elf/linker-script.cc | 425 +++ third_party/mold/elf/lto-unix.cc | 739 ++++ third_party/mold/elf/lto-win32.cc | 26 + third_party/mold/elf/lto.cc | 6 + third_party/mold/elf/lto.h | 167 + third_party/mold/elf/main.cc | 812 +++++ third_party/mold/elf/mapfile.cc | 117 + third_party/mold/elf/mold-wrapper.c | 171 + third_party/mold/elf/mold.h | 2852 +++++++++++++++ third_party/mold/elf/output-chunks.cc | 3153 +++++++++++++++++ third_party/mold/elf/passes.cc | 2653 ++++++++++++++ third_party/mold/elf/relocatable.cc | 198 ++ third_party/mold/elf/subprocess.cc | 166 + third_party/mold/elf/thunks.cc | 318 ++ third_party/mold/elf/tls.cc | 215 ++ third_party/mold/fake_tbb.h | 15 + third_party/mold/filepath.cc | 37 + third_party/mold/filetype.h | 194 + third_party/mold/glob.cc | 150 + third_party/mold/hyperloglog.cc | 21 + third_party/mold/integers.h | 222 ++ third_party/mold/main.cc | 188 + third_party/mold/mold.mk | 61 + third_party/mold/multi-glob.cc | 167 + third_party/mold/output-file-unix.h | 203 ++ third_party/mold/output-file-win32.h | 85 + third_party/mold/output-file.h | 6 + third_party/mold/perf.cc | 140 + third_party/mold/sha.h | 82 + third_party/mold/tar.cc | 113 + third_party/mold/test/elf/CMakeLists.txt | 74 + ...rch64_range-extension-thunk-disassembly.sh | 30 + third_party/mold/test/elf/abs-error.sh | 29 + third_party/mold/test/elf/absolute-symbols.sh | 67 + .../test/elf/allow-multiple-definition.sh | 10 + third_party/mold/test/elf/ar-alignment.sh | 35 + .../arm_range-extension-thunk-disassembly.sh | 34 + .../test/elf/arm_range-extension-thunk.sh | 60 + .../mold/test/elf/arm_thumb-interwork.sh | 45 + third_party/mold/test/elf/arm_tlsdesc.sh | 72 + third_party/mold/test/elf/as-needed-dso.sh | 25 + third_party/mold/test/elf/as-needed-weak.sh | 32 + third_party/mold/test/elf/as-needed.sh | 30 + third_party/mold/test/elf/as-needed2.sh | 39 + third_party/mold/test/elf/auxiliary.sh | 16 + third_party/mold/test/elf/bno-symbolic.sh | 43 + .../mold/test/elf/bsymbolic-functions.sh | 34 + third_party/mold/test/elf/bsymbolic.sh | 30 + third_party/mold/test/elf/bug178.sh | 17 + third_party/mold/test/elf/build-id.sh | 24 + third_party/mold/test/elf/canonical-plt.sh | 46 + third_party/mold/test/elf/cmdline.sh | 8 + .../mold/test/elf/color-diagnostics.sh | 20 + third_party/mold/test/elf/comment.sh | 11 + third_party/mold/test/elf/common-archive.sh | 53 + third_party/mold/test/elf/common-ref.sh | 38 + third_party/mold/test/elf/common.inc | 92 + third_party/mold/test/elf/common.sh | 33 + .../test/elf/compress-debug-sections-zstd.sh | 29 + .../mold/test/elf/compress-debug-sections.sh | 25 + .../mold/test/elf/compressed-debug-info.sh | 21 + .../mold/test/elf/copyrel-alignment.sh | 43 + .../mold/test/elf/copyrel-protected.sh | 22 + third_party/mold/test/elf/copyrel-relro.sh | 53 + third_party/mold/test/elf/copyrel.sh | 36 + .../mold/test/elf/dead-debug-sections.sh | 32 + .../mold/test/elf/debug-macro-section.sh | 23 + third_party/mold/test/elf/default-symver.sh | 14 + third_party/mold/test/elf/defsym-lto.sh | 29 + third_party/mold/test/elf/defsym.sh | 29 + third_party/mold/test/elf/defsym2.sh | 10 + third_party/mold/test/elf/demangle-rust.sh | 15 + third_party/mold/test/elf/demangle.sh | 29 + third_party/mold/test/elf/dependency-file.sh | 21 + .../mold/test/elf/disable-new-dtags.sh | 16 + third_party/mold/test/elf/discard.sh | 40 + third_party/mold/test/elf/dso-undef.sh | 28 + third_party/mold/test/elf/dt-init.sh | 52 + third_party/mold/test/elf/dt-needed.sh | 27 + third_party/mold/test/elf/duplicate-error.sh | 13 + third_party/mold/test/elf/dynamic-dt-debug.sh | 19 + third_party/mold/test/elf/dynamic-linker.sh | 20 + third_party/mold/test/elf/dynamic-list.sh | 37 + third_party/mold/test/elf/dynamic-list2.sh | 37 + third_party/mold/test/elf/dynamic-list3.sh | 40 + third_party/mold/test/elf/dynamic.sh | 27 + third_party/mold/test/elf/emit-relocs-cpp.sh | 16 + .../test/elf/emit-relocs-dead-sections.sh | 27 + third_party/mold/test/elf/emit-relocs.sh | 21 + third_party/mold/test/elf/empty-file.sh | 22 + third_party/mold/test/elf/empty-input.sh | 8 + third_party/mold/test/elf/empty-version.sh | 16 + third_party/mold/test/elf/entry.sh | 25 + third_party/mold/test/elf/exception.sh | 74 + third_party/mold/test/elf/exclude-libs.sh | 56 + third_party/mold/test/elf/exclude-libs2.sh | 21 + third_party/mold/test/elf/exclude-libs3.sh | 20 + third_party/mold/test/elf/execstack.sh | 16 + third_party/mold/test/elf/execute-only.sh | 26 + third_party/mold/test/elf/export-dynamic.sh | 24 + third_party/mold/test/elf/export-from-exe.sh | 28 + third_party/mold/test/elf/fatal-warnings.sh | 21 + third_party/mold/test/elf/filler.sh | 34 + third_party/mold/test/elf/filter.sh | 16 + third_party/mold/test/elf/func-addr.sh | 29 + third_party/mold/test/elf/gc-sections.sh | 56 + .../test/elf/gdb-index-compress-output.sh | 52 + third_party/mold/test/elf/gdb-index-dwarf2.sh | 63 + third_party/mold/test/elf/gdb-index-dwarf3.sh | 63 + third_party/mold/test/elf/gdb-index-dwarf4.sh | 63 + third_party/mold/test/elf/gdb-index-dwarf5.sh | 99 + third_party/mold/test/elf/gdb-index-empty.sh | 8 + .../mold/test/elf/gdb-index-split-dwarf.sh | 87 + third_party/mold/test/elf/glibc-2.22-bug.sh | 26 + .../mold/test/elf/global-offset-table.sh | 34 + third_party/mold/test/elf/gnu-hash.sh | 11 + third_party/mold/test/elf/gnu-unique.sh | 29 + third_party/mold/test/elf/gnu-warning.sh | 27 + third_party/mold/test/elf/hash-style.sh | 18 + third_party/mold/test/elf/hello-dynamic.sh | 19 + third_party/mold/test/elf/hello-static.sh | 21 + third_party/mold/test/elf/help.sh | 5 + third_party/mold/test/elf/hidden-undef.sh | 15 + .../mold/test/elf/hidden-weak-undef.sh | 14 + .../mold/test/elf/i386_tls-module-base.sh | 56 + third_party/mold/test/elf/icf-small.sh | 9 + third_party/mold/test/elf/icf.sh | 45 + third_party/mold/test/elf/ifunc-alias.sh | 27 + third_party/mold/test/elf/ifunc-dlopen.sh | 55 + third_party/mold/test/elf/ifunc-dso.sh | 42 + third_party/mold/test/elf/ifunc-dynamic.sh | 38 + third_party/mold/test/elf/ifunc-export.sh | 31 + third_party/mold/test/elf/ifunc-funcptr.sh | 44 + third_party/mold/test/elf/ifunc-noplt.sh | 35 + third_party/mold/test/elf/ifunc-static-pie.sh | 34 + third_party/mold/test/elf/ifunc-static.sh | 34 + third_party/mold/test/elf/image-base.sh | 31 + .../mold/test/elf/init-array-priorities.sh | 101 + third_party/mold/test/elf/init-in-dso.sh | 16 + third_party/mold/test/elf/init.sh | 15 + third_party/mold/test/elf/initfirst.sh | 20 + third_party/mold/test/elf/interpose.sh | 20 + .../mold/test/elf/invalid-version-script.sh | 11 + third_party/mold/test/elf/issue646.sh | 30 + .../mold/test/elf/large-alignment-dso.sh | 44 + third_party/mold/test/elf/large-alignment.sh | 45 + .../test/elf/large-max-page-size-strip.sh | 26 + .../mold/test/elf/large-max-page-size.sh | 19 + third_party/mold/test/elf/large-text.sh | 23 + third_party/mold/test/elf/link-order.sh | 23 + .../mold/test/elf/linker-script-defsym.sh | 32 + .../test/elf/linker-script-relocatable.sh | 29 + third_party/mold/test/elf/linker-script.sh | 29 + third_party/mold/test/elf/linker-script2.sh | 17 + third_party/mold/test/elf/linker-script3.sh | 15 + third_party/mold/test/elf/linker-script4.sh | 20 + third_party/mold/test/elf/lto-archive.sh | 51 + third_party/mold/test/elf/lto-dso.sh | 19 + third_party/mold/test/elf/lto-gcc.sh | 56 + third_party/mold/test/elf/lto-llvm.sh | 24 + .../mold/test/elf/lto-version-script.sh | 26 + third_party/mold/test/elf/many-sections.sh | 24 + third_party/mold/test/elf/many-sections2.sh | 13 + .../mold/test/elf/mergeable-strings.sh | 57 + third_party/mold/test/elf/missing-but-ok.sh | 12 + third_party/mold/test/elf/missing-error.sh | 15 + third_party/mold/test/elf/mold-wrapper.sh | 96 + third_party/mold/test/elf/mold-wrapper2.sh | 14 + .../mold/test/elf/no-eh-frame-header.sh | 16 + third_party/mold/test/elf/no-quick-exit.sh | 21 + .../mold/test/elf/no-undefined-version.sh | 12 + third_party/mold/test/elf/nocopyreloc.sh | 41 + third_party/mold/test/elf/noinhibit-exec.sh | 15 + .../mold/test/elf/non-canonical-plt.sh | 45 + third_party/mold/test/elf/nostdlib.sh | 13 + third_party/mold/test/elf/now.sh | 24 + third_party/mold/test/elf/oformat-binary.sh | 15 + third_party/mold/test/elf/omagic.sh | 23 + .../mold/test/elf/pack-dyn-relocs-relr.sh | 34 + third_party/mold/test/elf/package-metadata.sh | 19 + .../mold/test/elf/physical-image-base.sh | 37 + third_party/mold/test/elf/pie.sh | 22 + third_party/mold/test/elf/plt-dso.sh | 51 + third_party/mold/test/elf/pltgot.sh | 33 + third_party/mold/test/elf/preinit-array.sh | 24 + .../mold/test/elf/print-dependencies.sh | 15 + third_party/mold/test/elf/protected-dynsym.sh | 21 + third_party/mold/test/elf/protected.sh | 48 + third_party/mold/test/elf/push-pop-state.sh | 22 + .../mold/test/elf/range-extension-thunk.sh | 62 + third_party/mold/test/elf/relax-got-load.sh | 24 + third_party/mold/test/elf/reloc-rodata.sh | 26 + .../mold/test/elf/relocatable-archive.sh | 36 + .../mold/test/elf/relocatable-debug-info.sh | 27 + .../mold/test/elf/relocatable-exception.sh | 30 + .../test/elf/relocatable-merge-sections.sh | 21 + .../mold/test/elf/relocatable-no-ehframe.sh | 19 + third_party/mold/test/elf/relocatable.sh | 27 + third_party/mold/test/elf/relro.sh | 31 + third_party/mold/test/elf/repro.sh | 36 + third_party/mold/test/elf/require-defined.sh | 23 + third_party/mold/test/elf/response-file.sh | 21 + .../mold/test/elf/retain-symbols-file.sh | 24 + third_party/mold/test/elf/reverse-sections.sh | 44 + third_party/mold/test/elf/riscv64_norvc.sh | 33 + .../mold/test/elf/riscv64_obj-compatible.sh | 23 + .../mold/test/elf/riscv64_weak-undef.sh | 26 + third_party/mold/test/elf/rodata-name.sh | 57 + third_party/mold/test/elf/rosegment.sh | 26 + third_party/mold/test/elf/rpath.sh | 16 + third_party/mold/test/elf/run-clang.sh | 30 + third_party/mold/test/elf/run.sh | 59 + third_party/mold/test/elf/s390x_got.sh | 27 + third_party/mold/test/elf/section-align.sh | 17 + third_party/mold/test/elf/section-order.sh | 50 + third_party/mold/test/elf/section-start.sh | 44 + third_party/mold/test/elf/shared-abs-sym.sh | 36 + third_party/mold/test/elf/shared.sh | 41 + .../mold/test/elf/shuffle-sections-seed.sh | 37 + third_party/mold/test/elf/shuffle-sections.sh | 29 + third_party/mold/test/elf/soname.sh | 14 + third_party/mold/test/elf/start-lib.sh | 20 + .../mold/test/elf/start-stop-symbol.sh | 34 + third_party/mold/test/elf/start-stop.sh | 11 + third_party/mold/test/elf/static-archive.sh | 39 + third_party/mold/test/elf/static-pie.sh | 24 + third_party/mold/test/elf/stdout.sh | 22 + third_party/mold/test/elf/strip.sh | 31 + third_party/mold/test/elf/symbol-rank.sh | 49 + third_party/mold/test/elf/symbol-version.sh | 27 + third_party/mold/test/elf/symbol-version2.sh | 23 + third_party/mold/test/elf/symbol-version3.sh | 26 + third_party/mold/test/elf/symtab-dso.sh | 19 + .../mold/test/elf/symtab-section-symbols.sh | 19 + third_party/mold/test/elf/symtab.sh | 33 + .../mold/test/elf/synthetic-symbols.sh | 111 + .../mold/test/elf/sysroot-linker-script.sh | 22 + third_party/mold/test/elf/sysroot.sh | 39 + third_party/mold/test/elf/sysroot2.sh | 55 + third_party/mold/test/elf/tail-call.sh | 35 + third_party/mold/test/elf/thin-archive.sh | 44 + third_party/mold/test/elf/thread-count.sh | 22 + .../mold/test/elf/tls-alignment-multi.sh | 69 + third_party/mold/test/elf/tls-common.sh | 27 + .../mold/test/elf/tls-df-static-tls.sh | 21 + third_party/mold/test/elf/tls-dso.sh | 43 + third_party/mold/test/elf/tls-gd-noplt.sh | 49 + third_party/mold/test/elf/tls-gd-to-ie.sh | 45 + third_party/mold/test/elf/tls-gd.sh | 58 + third_party/mold/test/elf/tls-ie.sh | 56 + .../mold/test/elf/tls-irregular-start-addr.sh | 31 + .../mold/test/elf/tls-large-alignment.sh | 38 + .../mold/test/elf/tls-large-static-image.sh | 25 + third_party/mold/test/elf/tls-ld-noplt.sh | 36 + third_party/mold/test/elf/tls-ld.sh | 36 + third_party/mold/test/elf/tls-le-error.sh | 11 + third_party/mold/test/elf/tls-le.sh | 36 + third_party/mold/test/elf/tls-nopic.sh | 34 + third_party/mold/test/elf/tls-pic.sh | 33 + .../mold/test/elf/tls-small-alignment.sh | 36 + third_party/mold/test/elf/tlsdesc-import.sh | 37 + third_party/mold/test/elf/tlsdesc-static.sh | 40 + third_party/mold/test/elf/tlsdesc.sh | 60 + third_party/mold/test/elf/trace-symbol.sh | 43 + third_party/mold/test/elf/trace.sh | 21 + third_party/mold/test/elf/undefined.sh | 38 + .../mold/test/elf/unresolved-symbols.sh | 26 + third_party/mold/test/elf/verbose.sh | 19 + .../test/elf/version-script-search-paths.sh | 21 + third_party/mold/test/elf/version-script.sh | 20 + third_party/mold/test/elf/version-script10.sh | 21 + third_party/mold/test/elf/version-script11.sh | 20 + third_party/mold/test/elf/version-script12.sh | 26 + third_party/mold/test/elf/version-script13.sh | 21 + third_party/mold/test/elf/version-script14.sh | 32 + third_party/mold/test/elf/version-script15.sh | 27 + third_party/mold/test/elf/version-script16.sh | 14 + third_party/mold/test/elf/version-script17.sh | 33 + third_party/mold/test/elf/version-script18.sh | 24 + third_party/mold/test/elf/version-script2.sh | 45 + third_party/mold/test/elf/version-script3.sh | 41 + third_party/mold/test/elf/version-script4.sh | 31 + third_party/mold/test/elf/version-script5.sh | 21 + third_party/mold/test/elf/version-script6.sh | 35 + third_party/mold/test/elf/version-script7.sh | 18 + third_party/mold/test/elf/version-script8.sh | 39 + third_party/mold/test/elf/version-script9.sh | 19 + third_party/mold/test/elf/version.sh | 37 + third_party/mold/test/elf/versioned-undef.sh | 46 + third_party/mold/test/elf/visibility.sh | 24 + third_party/mold/test/elf/warn-common.sh | 21 + third_party/mold/test/elf/warn-once.sh | 18 + third_party/mold/test/elf/warn-symbol-type.sh | 25 + .../mold/test/elf/warn-unresolved-symbols.sh | 20 + third_party/mold/test/elf/weak-export-dso.sh | 25 + third_party/mold/test/elf/weak-export-exe.sh | 23 + third_party/mold/test/elf/weak-undef-dso.sh | 41 + third_party/mold/test/elf/weak-undef.sh | 45 + third_party/mold/test/elf/weak-undef2.sh | 27 + third_party/mold/test/elf/weak-undef4.sh | 50 + third_party/mold/test/elf/whole-archive.sh | 33 + third_party/mold/test/elf/wrap-lto.sh | 62 + third_party/mold/test/elf/wrap.sh | 62 + .../elf/x86_64_empty-mergeable-section.sh | 23 + .../test/elf/x86_64_emulation-deduction.sh | 13 + .../elf/x86_64_exception-mcmodel-large.sh | 24 + .../test/elf/x86_64_execstack-if-needed.sh | 18 + .../mold/test/elf/x86_64_gnu-linkonce.sh | 27 + .../mold/test/elf/x86_64_gnu-retain.sh | 32 + third_party/mold/test/elf/x86_64_gotpcrelx.sh | 29 + .../mold/test/elf/x86_64_ifunc-alias.sh | 37 + .../mold/test/elf/x86_64_incompatible-libs.sh | 51 + .../test/elf/x86_64_incompatible-libs2.sh | 47 + .../mold/test/elf/x86_64_incompatible-obj.sh | 18 + .../test/elf/x86_64_init-array-readonly.sh | 43 + .../mold/test/elf/x86_64_init-array.sh | 47 + third_party/mold/test/elf/x86_64_large-bss.sh | 15 + .../mold/test/elf/x86_64_mergeable-records.sh | 57 + .../mold/test/elf/x86_64_mergeable-strings.sh | 36 + .../mold/test/elf/x86_64_note-property.sh | 22 + .../mold/test/elf/x86_64_note-property2.sh | 84 + third_party/mold/test/elf/x86_64_note.sh | 44 + third_party/mold/test/elf/x86_64_note2.sh | 35 + third_party/mold/test/elf/x86_64_plt.sh | 29 + third_party/mold/test/elf/x86_64_pltgot.sh | 28 + .../mold/test/elf/x86_64_preinit-array.sh | 54 + third_party/mold/test/elf/x86_64_relax.sh | 58 + .../mold/test/elf/x86_64_reloc-overflow.sh | 15 + .../mold/test/elf/x86_64_reloc-zero.sh | 15 + third_party/mold/test/elf/x86_64_reloc.sh | 224 ++ .../mold/test/elf/x86_64_section-alignment.sh | 47 + .../mold/test/elf/x86_64_section-name.sh | 75 + .../test/elf/x86_64_tls-gd-mcmodel-large.sh | 51 + .../mold/test/elf/x86_64_tls-gd-to-ie.sh | 45 + .../mold/test/elf/x86_64_tls-large-tbss.sh | 37 + .../test/elf/x86_64_tls-ld-mcmodel-large.sh | 38 + .../mold/test/elf/x86_64_tls-module-base.sh | 50 + third_party/mold/test/elf/x86_64_unique.sh | 28 + .../mold/test/elf/x86_64_warn-execstack.sh | 15 + .../test/elf/x86_64_warn-shared-textrel.sh | 25 + .../mold/test/elf/x86_64_warn-textrel.sh | 25 + third_party/mold/test/elf/x86_64_z-ibt.sh | 18 + third_party/mold/test/elf/x86_64_z-ibtplt.sh | 42 + third_party/mold/test/elf/x86_64_z-shstk.sh | 17 + third_party/mold/test/elf/x86_64_z-text.sh | 47 + third_party/mold/test/elf/z-cet-report.sh | 18 + third_party/mold/test/elf/z-defs.sh | 20 + .../mold/test/elf/z-dynamic-undefined-weak.sh | 34 + third_party/mold/test/elf/z-max-page-size.sh | 33 + third_party/mold/test/elf/z-nodefaultlib.sh | 19 + third_party/mold/test/elf/z-nodump.sh | 13 + third_party/mold/test/elf/z-now.sh | 20 + third_party/mold/test/elf/z-origin.sh | 22 + third_party/mold/test/elf/z-separate-code.sh | 28 + .../mold/test/elf/z-start-stop-visibility.sh | 7 + third_party/mold/test/elf/z-unknown.sh | 6 + third_party/mold/test/gentoo-test.sh | 69 + third_party/mold/update-git-hash.cmake | 34 + third_party/mold/uuid.cc | 21 + 391 files changed, 41166 insertions(+), 6 deletions(-) create mode 100644 third_party/mold/README.cosmo create mode 100644 third_party/mold/archive-file.h create mode 100644 third_party/mold/cmdline.h create mode 100644 third_party/mold/common.h create mode 100644 third_party/mold/compress.cc create mode 100644 third_party/mold/config.h.in create mode 100644 third_party/mold/demangle.cc create mode 100644 third_party/mold/elf/arch-alpha.cc create mode 100644 third_party/mold/elf/arch-arm32.cc create mode 100644 third_party/mold/elf/arch-arm64.cc create mode 100644 third_party/mold/elf/arch-i386.cc create mode 100644 third_party/mold/elf/arch-m68k.cc create mode 100644 third_party/mold/elf/arch-ppc32.cc create mode 100644 third_party/mold/elf/arch-ppc64v1.cc create mode 100644 third_party/mold/elf/arch-ppc64v2.cc create mode 100644 third_party/mold/elf/arch-riscv.cc create mode 100644 third_party/mold/elf/arch-s390x.cc create mode 100644 third_party/mold/elf/arch-sh4.cc create mode 100644 third_party/mold/elf/arch-sparc64.cc create mode 100644 third_party/mold/elf/arch-x86-64.cc create mode 100644 third_party/mold/elf/cmdline.cc create mode 100644 third_party/mold/elf/dwarf.cc create mode 100644 third_party/mold/elf/elf.cc create mode 100644 third_party/mold/elf/elf.h create mode 100644 third_party/mold/elf/gc-sections.cc create mode 100644 third_party/mold/elf/icf.cc create mode 100644 third_party/mold/elf/input-files.cc create mode 100644 third_party/mold/elf/input-sections.cc create mode 100644 third_party/mold/elf/jobs.cc create mode 100644 third_party/mold/elf/linker-script.cc create mode 100644 third_party/mold/elf/lto-unix.cc create mode 100644 third_party/mold/elf/lto-win32.cc create mode 100644 third_party/mold/elf/lto.cc create mode 100644 third_party/mold/elf/lto.h create mode 100644 third_party/mold/elf/main.cc create mode 100644 third_party/mold/elf/mapfile.cc create mode 100644 third_party/mold/elf/mold-wrapper.c create mode 100644 third_party/mold/elf/mold.h create mode 100644 third_party/mold/elf/output-chunks.cc create mode 100644 third_party/mold/elf/passes.cc create mode 100644 third_party/mold/elf/relocatable.cc create mode 100644 third_party/mold/elf/subprocess.cc create mode 100644 third_party/mold/elf/thunks.cc create mode 100644 third_party/mold/elf/tls.cc create mode 100644 third_party/mold/fake_tbb.h create mode 100644 third_party/mold/filepath.cc create mode 100644 third_party/mold/filetype.h create mode 100644 third_party/mold/glob.cc create mode 100644 third_party/mold/hyperloglog.cc create mode 100644 third_party/mold/integers.h create mode 100644 third_party/mold/main.cc create mode 100644 third_party/mold/mold.mk create mode 100644 third_party/mold/multi-glob.cc create mode 100644 third_party/mold/output-file-unix.h create mode 100644 third_party/mold/output-file-win32.h create mode 100644 third_party/mold/output-file.h create mode 100644 third_party/mold/perf.cc create mode 100644 third_party/mold/sha.h create mode 100644 third_party/mold/tar.cc create mode 100644 third_party/mold/test/elf/CMakeLists.txt create mode 100755 third_party/mold/test/elf/aarch64_range-extension-thunk-disassembly.sh create mode 100755 third_party/mold/test/elf/abs-error.sh create mode 100755 third_party/mold/test/elf/absolute-symbols.sh create mode 100755 third_party/mold/test/elf/allow-multiple-definition.sh create mode 100755 third_party/mold/test/elf/ar-alignment.sh create mode 100755 third_party/mold/test/elf/arm_range-extension-thunk-disassembly.sh create mode 100755 third_party/mold/test/elf/arm_range-extension-thunk.sh create mode 100755 third_party/mold/test/elf/arm_thumb-interwork.sh create mode 100755 third_party/mold/test/elf/arm_tlsdesc.sh create mode 100755 third_party/mold/test/elf/as-needed-dso.sh create mode 100755 third_party/mold/test/elf/as-needed-weak.sh create mode 100755 third_party/mold/test/elf/as-needed.sh create mode 100755 third_party/mold/test/elf/as-needed2.sh create mode 100755 third_party/mold/test/elf/auxiliary.sh create mode 100755 third_party/mold/test/elf/bno-symbolic.sh create mode 100755 third_party/mold/test/elf/bsymbolic-functions.sh create mode 100755 third_party/mold/test/elf/bsymbolic.sh create mode 100755 third_party/mold/test/elf/bug178.sh create mode 100755 third_party/mold/test/elf/build-id.sh create mode 100755 third_party/mold/test/elf/canonical-plt.sh create mode 100755 third_party/mold/test/elf/cmdline.sh create mode 100755 third_party/mold/test/elf/color-diagnostics.sh create mode 100755 third_party/mold/test/elf/comment.sh create mode 100755 third_party/mold/test/elf/common-archive.sh create mode 100755 third_party/mold/test/elf/common-ref.sh create mode 100644 third_party/mold/test/elf/common.inc create mode 100755 third_party/mold/test/elf/common.sh create mode 100755 third_party/mold/test/elf/compress-debug-sections-zstd.sh create mode 100755 third_party/mold/test/elf/compress-debug-sections.sh create mode 100755 third_party/mold/test/elf/compressed-debug-info.sh create mode 100755 third_party/mold/test/elf/copyrel-alignment.sh create mode 100755 third_party/mold/test/elf/copyrel-protected.sh create mode 100755 third_party/mold/test/elf/copyrel-relro.sh create mode 100755 third_party/mold/test/elf/copyrel.sh create mode 100755 third_party/mold/test/elf/dead-debug-sections.sh create mode 100755 third_party/mold/test/elf/debug-macro-section.sh create mode 100755 third_party/mold/test/elf/default-symver.sh create mode 100755 third_party/mold/test/elf/defsym-lto.sh create mode 100755 third_party/mold/test/elf/defsym.sh create mode 100755 third_party/mold/test/elf/defsym2.sh create mode 100755 third_party/mold/test/elf/demangle-rust.sh create mode 100755 third_party/mold/test/elf/demangle.sh create mode 100755 third_party/mold/test/elf/dependency-file.sh create mode 100755 third_party/mold/test/elf/disable-new-dtags.sh create mode 100755 third_party/mold/test/elf/discard.sh create mode 100755 third_party/mold/test/elf/dso-undef.sh create mode 100755 third_party/mold/test/elf/dt-init.sh create mode 100755 third_party/mold/test/elf/dt-needed.sh create mode 100755 third_party/mold/test/elf/duplicate-error.sh create mode 100755 third_party/mold/test/elf/dynamic-dt-debug.sh create mode 100755 third_party/mold/test/elf/dynamic-linker.sh create mode 100755 third_party/mold/test/elf/dynamic-list.sh create mode 100755 third_party/mold/test/elf/dynamic-list2.sh create mode 100755 third_party/mold/test/elf/dynamic-list3.sh create mode 100755 third_party/mold/test/elf/dynamic.sh create mode 100755 third_party/mold/test/elf/emit-relocs-cpp.sh create mode 100755 third_party/mold/test/elf/emit-relocs-dead-sections.sh create mode 100755 third_party/mold/test/elf/emit-relocs.sh create mode 100755 third_party/mold/test/elf/empty-file.sh create mode 100755 third_party/mold/test/elf/empty-input.sh create mode 100755 third_party/mold/test/elf/empty-version.sh create mode 100755 third_party/mold/test/elf/entry.sh create mode 100755 third_party/mold/test/elf/exception.sh create mode 100755 third_party/mold/test/elf/exclude-libs.sh create mode 100755 third_party/mold/test/elf/exclude-libs2.sh create mode 100755 third_party/mold/test/elf/exclude-libs3.sh create mode 100755 third_party/mold/test/elf/execstack.sh create mode 100755 third_party/mold/test/elf/execute-only.sh create mode 100755 third_party/mold/test/elf/export-dynamic.sh create mode 100755 third_party/mold/test/elf/export-from-exe.sh create mode 100755 third_party/mold/test/elf/fatal-warnings.sh create mode 100755 third_party/mold/test/elf/filler.sh create mode 100755 third_party/mold/test/elf/filter.sh create mode 100755 third_party/mold/test/elf/func-addr.sh create mode 100755 third_party/mold/test/elf/gc-sections.sh create mode 100755 third_party/mold/test/elf/gdb-index-compress-output.sh create mode 100755 third_party/mold/test/elf/gdb-index-dwarf2.sh create mode 100755 third_party/mold/test/elf/gdb-index-dwarf3.sh create mode 100755 third_party/mold/test/elf/gdb-index-dwarf4.sh create mode 100755 third_party/mold/test/elf/gdb-index-dwarf5.sh create mode 100755 third_party/mold/test/elf/gdb-index-empty.sh create mode 100755 third_party/mold/test/elf/gdb-index-split-dwarf.sh create mode 100755 third_party/mold/test/elf/glibc-2.22-bug.sh create mode 100755 third_party/mold/test/elf/global-offset-table.sh create mode 100755 third_party/mold/test/elf/gnu-hash.sh create mode 100755 third_party/mold/test/elf/gnu-unique.sh create mode 100755 third_party/mold/test/elf/gnu-warning.sh create mode 100755 third_party/mold/test/elf/hash-style.sh create mode 100755 third_party/mold/test/elf/hello-dynamic.sh create mode 100755 third_party/mold/test/elf/hello-static.sh create mode 100755 third_party/mold/test/elf/help.sh create mode 100755 third_party/mold/test/elf/hidden-undef.sh create mode 100755 third_party/mold/test/elf/hidden-weak-undef.sh create mode 100755 third_party/mold/test/elf/i386_tls-module-base.sh create mode 100755 third_party/mold/test/elf/icf-small.sh create mode 100755 third_party/mold/test/elf/icf.sh create mode 100755 third_party/mold/test/elf/ifunc-alias.sh create mode 100755 third_party/mold/test/elf/ifunc-dlopen.sh create mode 100755 third_party/mold/test/elf/ifunc-dso.sh create mode 100755 third_party/mold/test/elf/ifunc-dynamic.sh create mode 100755 third_party/mold/test/elf/ifunc-export.sh create mode 100755 third_party/mold/test/elf/ifunc-funcptr.sh create mode 100755 third_party/mold/test/elf/ifunc-noplt.sh create mode 100755 third_party/mold/test/elf/ifunc-static-pie.sh create mode 100755 third_party/mold/test/elf/ifunc-static.sh create mode 100755 third_party/mold/test/elf/image-base.sh create mode 100755 third_party/mold/test/elf/init-array-priorities.sh create mode 100755 third_party/mold/test/elf/init-in-dso.sh create mode 100755 third_party/mold/test/elf/init.sh create mode 100755 third_party/mold/test/elf/initfirst.sh create mode 100755 third_party/mold/test/elf/interpose.sh create mode 100755 third_party/mold/test/elf/invalid-version-script.sh create mode 100755 third_party/mold/test/elf/issue646.sh create mode 100755 third_party/mold/test/elf/large-alignment-dso.sh create mode 100755 third_party/mold/test/elf/large-alignment.sh create mode 100755 third_party/mold/test/elf/large-max-page-size-strip.sh create mode 100755 third_party/mold/test/elf/large-max-page-size.sh create mode 100755 third_party/mold/test/elf/large-text.sh create mode 100755 third_party/mold/test/elf/link-order.sh create mode 100755 third_party/mold/test/elf/linker-script-defsym.sh create mode 100755 third_party/mold/test/elf/linker-script-relocatable.sh create mode 100755 third_party/mold/test/elf/linker-script.sh create mode 100755 third_party/mold/test/elf/linker-script2.sh create mode 100755 third_party/mold/test/elf/linker-script3.sh create mode 100755 third_party/mold/test/elf/linker-script4.sh create mode 100755 third_party/mold/test/elf/lto-archive.sh create mode 100755 third_party/mold/test/elf/lto-dso.sh create mode 100755 third_party/mold/test/elf/lto-gcc.sh create mode 100755 third_party/mold/test/elf/lto-llvm.sh create mode 100755 third_party/mold/test/elf/lto-version-script.sh create mode 100755 third_party/mold/test/elf/many-sections.sh create mode 100755 third_party/mold/test/elf/many-sections2.sh create mode 100755 third_party/mold/test/elf/mergeable-strings.sh create mode 100755 third_party/mold/test/elf/missing-but-ok.sh create mode 100755 third_party/mold/test/elf/missing-error.sh create mode 100755 third_party/mold/test/elf/mold-wrapper.sh create mode 100755 third_party/mold/test/elf/mold-wrapper2.sh create mode 100755 third_party/mold/test/elf/no-eh-frame-header.sh create mode 100755 third_party/mold/test/elf/no-quick-exit.sh create mode 100755 third_party/mold/test/elf/no-undefined-version.sh create mode 100755 third_party/mold/test/elf/nocopyreloc.sh create mode 100755 third_party/mold/test/elf/noinhibit-exec.sh create mode 100755 third_party/mold/test/elf/non-canonical-plt.sh create mode 100755 third_party/mold/test/elf/nostdlib.sh create mode 100755 third_party/mold/test/elf/now.sh create mode 100755 third_party/mold/test/elf/oformat-binary.sh create mode 100755 third_party/mold/test/elf/omagic.sh create mode 100755 third_party/mold/test/elf/pack-dyn-relocs-relr.sh create mode 100755 third_party/mold/test/elf/package-metadata.sh create mode 100755 third_party/mold/test/elf/physical-image-base.sh create mode 100755 third_party/mold/test/elf/pie.sh create mode 100755 third_party/mold/test/elf/plt-dso.sh create mode 100755 third_party/mold/test/elf/pltgot.sh create mode 100755 third_party/mold/test/elf/preinit-array.sh create mode 100755 third_party/mold/test/elf/print-dependencies.sh create mode 100755 third_party/mold/test/elf/protected-dynsym.sh create mode 100755 third_party/mold/test/elf/protected.sh create mode 100755 third_party/mold/test/elf/push-pop-state.sh create mode 100755 third_party/mold/test/elf/range-extension-thunk.sh create mode 100755 third_party/mold/test/elf/relax-got-load.sh create mode 100755 third_party/mold/test/elf/reloc-rodata.sh create mode 100755 third_party/mold/test/elf/relocatable-archive.sh create mode 100755 third_party/mold/test/elf/relocatable-debug-info.sh create mode 100755 third_party/mold/test/elf/relocatable-exception.sh create mode 100755 third_party/mold/test/elf/relocatable-merge-sections.sh create mode 100755 third_party/mold/test/elf/relocatable-no-ehframe.sh create mode 100755 third_party/mold/test/elf/relocatable.sh create mode 100755 third_party/mold/test/elf/relro.sh create mode 100755 third_party/mold/test/elf/repro.sh create mode 100755 third_party/mold/test/elf/require-defined.sh create mode 100755 third_party/mold/test/elf/response-file.sh create mode 100755 third_party/mold/test/elf/retain-symbols-file.sh create mode 100755 third_party/mold/test/elf/reverse-sections.sh create mode 100755 third_party/mold/test/elf/riscv64_norvc.sh create mode 100755 third_party/mold/test/elf/riscv64_obj-compatible.sh create mode 100755 third_party/mold/test/elf/riscv64_weak-undef.sh create mode 100755 third_party/mold/test/elf/rodata-name.sh create mode 100755 third_party/mold/test/elf/rosegment.sh create mode 100755 third_party/mold/test/elf/rpath.sh create mode 100755 third_party/mold/test/elf/run-clang.sh create mode 100755 third_party/mold/test/elf/run.sh create mode 100755 third_party/mold/test/elf/s390x_got.sh create mode 100755 third_party/mold/test/elf/section-align.sh create mode 100755 third_party/mold/test/elf/section-order.sh create mode 100755 third_party/mold/test/elf/section-start.sh create mode 100755 third_party/mold/test/elf/shared-abs-sym.sh create mode 100755 third_party/mold/test/elf/shared.sh create mode 100755 third_party/mold/test/elf/shuffle-sections-seed.sh create mode 100755 third_party/mold/test/elf/shuffle-sections.sh create mode 100755 third_party/mold/test/elf/soname.sh create mode 100755 third_party/mold/test/elf/start-lib.sh create mode 100755 third_party/mold/test/elf/start-stop-symbol.sh create mode 100755 third_party/mold/test/elf/start-stop.sh create mode 100755 third_party/mold/test/elf/static-archive.sh create mode 100755 third_party/mold/test/elf/static-pie.sh create mode 100755 third_party/mold/test/elf/stdout.sh create mode 100755 third_party/mold/test/elf/strip.sh create mode 100755 third_party/mold/test/elf/symbol-rank.sh create mode 100755 third_party/mold/test/elf/symbol-version.sh create mode 100755 third_party/mold/test/elf/symbol-version2.sh create mode 100755 third_party/mold/test/elf/symbol-version3.sh create mode 100755 third_party/mold/test/elf/symtab-dso.sh create mode 100755 third_party/mold/test/elf/symtab-section-symbols.sh create mode 100755 third_party/mold/test/elf/symtab.sh create mode 100755 third_party/mold/test/elf/synthetic-symbols.sh create mode 100755 third_party/mold/test/elf/sysroot-linker-script.sh create mode 100755 third_party/mold/test/elf/sysroot.sh create mode 100755 third_party/mold/test/elf/sysroot2.sh create mode 100755 third_party/mold/test/elf/tail-call.sh create mode 100755 third_party/mold/test/elf/thin-archive.sh create mode 100755 third_party/mold/test/elf/thread-count.sh create mode 100755 third_party/mold/test/elf/tls-alignment-multi.sh create mode 100755 third_party/mold/test/elf/tls-common.sh create mode 100755 third_party/mold/test/elf/tls-df-static-tls.sh create mode 100755 third_party/mold/test/elf/tls-dso.sh create mode 100755 third_party/mold/test/elf/tls-gd-noplt.sh create mode 100755 third_party/mold/test/elf/tls-gd-to-ie.sh create mode 100755 third_party/mold/test/elf/tls-gd.sh create mode 100755 third_party/mold/test/elf/tls-ie.sh create mode 100755 third_party/mold/test/elf/tls-irregular-start-addr.sh create mode 100755 third_party/mold/test/elf/tls-large-alignment.sh create mode 100755 third_party/mold/test/elf/tls-large-static-image.sh create mode 100755 third_party/mold/test/elf/tls-ld-noplt.sh create mode 100755 third_party/mold/test/elf/tls-ld.sh create mode 100755 third_party/mold/test/elf/tls-le-error.sh create mode 100755 third_party/mold/test/elf/tls-le.sh create mode 100755 third_party/mold/test/elf/tls-nopic.sh create mode 100755 third_party/mold/test/elf/tls-pic.sh create mode 100755 third_party/mold/test/elf/tls-small-alignment.sh create mode 100755 third_party/mold/test/elf/tlsdesc-import.sh create mode 100755 third_party/mold/test/elf/tlsdesc-static.sh create mode 100755 third_party/mold/test/elf/tlsdesc.sh create mode 100755 third_party/mold/test/elf/trace-symbol.sh create mode 100755 third_party/mold/test/elf/trace.sh create mode 100755 third_party/mold/test/elf/undefined.sh create mode 100755 third_party/mold/test/elf/unresolved-symbols.sh create mode 100755 third_party/mold/test/elf/verbose.sh create mode 100755 third_party/mold/test/elf/version-script-search-paths.sh create mode 100755 third_party/mold/test/elf/version-script.sh create mode 100755 third_party/mold/test/elf/version-script10.sh create mode 100755 third_party/mold/test/elf/version-script11.sh create mode 100755 third_party/mold/test/elf/version-script12.sh create mode 100755 third_party/mold/test/elf/version-script13.sh create mode 100755 third_party/mold/test/elf/version-script14.sh create mode 100755 third_party/mold/test/elf/version-script15.sh create mode 100755 third_party/mold/test/elf/version-script16.sh create mode 100755 third_party/mold/test/elf/version-script17.sh create mode 100755 third_party/mold/test/elf/version-script18.sh create mode 100755 third_party/mold/test/elf/version-script2.sh create mode 100755 third_party/mold/test/elf/version-script3.sh create mode 100755 third_party/mold/test/elf/version-script4.sh create mode 100755 third_party/mold/test/elf/version-script5.sh create mode 100755 third_party/mold/test/elf/version-script6.sh create mode 100755 third_party/mold/test/elf/version-script7.sh create mode 100755 third_party/mold/test/elf/version-script8.sh create mode 100755 third_party/mold/test/elf/version-script9.sh create mode 100755 third_party/mold/test/elf/version.sh create mode 100755 third_party/mold/test/elf/versioned-undef.sh create mode 100755 third_party/mold/test/elf/visibility.sh create mode 100755 third_party/mold/test/elf/warn-common.sh create mode 100755 third_party/mold/test/elf/warn-once.sh create mode 100755 third_party/mold/test/elf/warn-symbol-type.sh create mode 100755 third_party/mold/test/elf/warn-unresolved-symbols.sh create mode 100755 third_party/mold/test/elf/weak-export-dso.sh create mode 100755 third_party/mold/test/elf/weak-export-exe.sh create mode 100755 third_party/mold/test/elf/weak-undef-dso.sh create mode 100755 third_party/mold/test/elf/weak-undef.sh create mode 100755 third_party/mold/test/elf/weak-undef2.sh create mode 100755 third_party/mold/test/elf/weak-undef4.sh create mode 100755 third_party/mold/test/elf/whole-archive.sh create mode 100755 third_party/mold/test/elf/wrap-lto.sh create mode 100755 third_party/mold/test/elf/wrap.sh create mode 100755 third_party/mold/test/elf/x86_64_empty-mergeable-section.sh create mode 100755 third_party/mold/test/elf/x86_64_emulation-deduction.sh create mode 100755 third_party/mold/test/elf/x86_64_exception-mcmodel-large.sh create mode 100755 third_party/mold/test/elf/x86_64_execstack-if-needed.sh create mode 100755 third_party/mold/test/elf/x86_64_gnu-linkonce.sh create mode 100755 third_party/mold/test/elf/x86_64_gnu-retain.sh create mode 100755 third_party/mold/test/elf/x86_64_gotpcrelx.sh create mode 100755 third_party/mold/test/elf/x86_64_ifunc-alias.sh create mode 100755 third_party/mold/test/elf/x86_64_incompatible-libs.sh create mode 100755 third_party/mold/test/elf/x86_64_incompatible-libs2.sh create mode 100755 third_party/mold/test/elf/x86_64_incompatible-obj.sh create mode 100755 third_party/mold/test/elf/x86_64_init-array-readonly.sh create mode 100755 third_party/mold/test/elf/x86_64_init-array.sh create mode 100755 third_party/mold/test/elf/x86_64_large-bss.sh create mode 100755 third_party/mold/test/elf/x86_64_mergeable-records.sh create mode 100755 third_party/mold/test/elf/x86_64_mergeable-strings.sh create mode 100755 third_party/mold/test/elf/x86_64_note-property.sh create mode 100755 third_party/mold/test/elf/x86_64_note-property2.sh create mode 100755 third_party/mold/test/elf/x86_64_note.sh create mode 100755 third_party/mold/test/elf/x86_64_note2.sh create mode 100755 third_party/mold/test/elf/x86_64_plt.sh create mode 100755 third_party/mold/test/elf/x86_64_pltgot.sh create mode 100755 third_party/mold/test/elf/x86_64_preinit-array.sh create mode 100755 third_party/mold/test/elf/x86_64_relax.sh create mode 100755 third_party/mold/test/elf/x86_64_reloc-overflow.sh create mode 100755 third_party/mold/test/elf/x86_64_reloc-zero.sh create mode 100755 third_party/mold/test/elf/x86_64_reloc.sh create mode 100755 third_party/mold/test/elf/x86_64_section-alignment.sh create mode 100755 third_party/mold/test/elf/x86_64_section-name.sh create mode 100755 third_party/mold/test/elf/x86_64_tls-gd-mcmodel-large.sh create mode 100755 third_party/mold/test/elf/x86_64_tls-gd-to-ie.sh create mode 100755 third_party/mold/test/elf/x86_64_tls-large-tbss.sh create mode 100755 third_party/mold/test/elf/x86_64_tls-ld-mcmodel-large.sh create mode 100755 third_party/mold/test/elf/x86_64_tls-module-base.sh create mode 100755 third_party/mold/test/elf/x86_64_unique.sh create mode 100755 third_party/mold/test/elf/x86_64_warn-execstack.sh create mode 100755 third_party/mold/test/elf/x86_64_warn-shared-textrel.sh create mode 100755 third_party/mold/test/elf/x86_64_warn-textrel.sh create mode 100755 third_party/mold/test/elf/x86_64_z-ibt.sh create mode 100755 third_party/mold/test/elf/x86_64_z-ibtplt.sh create mode 100755 third_party/mold/test/elf/x86_64_z-shstk.sh create mode 100755 third_party/mold/test/elf/x86_64_z-text.sh create mode 100755 third_party/mold/test/elf/z-cet-report.sh create mode 100755 third_party/mold/test/elf/z-defs.sh create mode 100755 third_party/mold/test/elf/z-dynamic-undefined-weak.sh create mode 100755 third_party/mold/test/elf/z-max-page-size.sh create mode 100755 third_party/mold/test/elf/z-nodefaultlib.sh create mode 100755 third_party/mold/test/elf/z-nodump.sh create mode 100755 third_party/mold/test/elf/z-now.sh create mode 100755 third_party/mold/test/elf/z-origin.sh create mode 100755 third_party/mold/test/elf/z-separate-code.sh create mode 100755 third_party/mold/test/elf/z-start-stop-visibility.sh create mode 100755 third_party/mold/test/elf/z-unknown.sh create mode 100755 third_party/mold/test/gentoo-test.sh create mode 100644 third_party/mold/update-git-hash.cmake create mode 100644 third_party/mold/uuid.cc diff --git a/third_party/libcxx/atomic b/third_party/libcxx/atomic index 98744bed7c7..07ee818ef23 100644 --- a/third_party/libcxx/atomic +++ b/third_party/libcxx/atomic @@ -781,7 +781,7 @@ bool __cxx_atomic_compare_exchange_strong( } template -_LIBCPP_INLINE_VISIBILITY +_LIBCPP_INLINE_VISIBILITY inline bool __cxx_atomic_compare_exchange_strong( __cxx_atomic_base_impl<_Tp>* __a, _Tp* __expected, _Tp __value, memory_order __success, memory_order __failure) { @@ -835,7 +835,7 @@ _Tp __cxx_atomic_fetch_add(volatile __cxx_atomic_base_impl<_Tp>* __a, } template -_LIBCPP_INLINE_VISIBILITY +_LIBCPP_INLINE_VISIBILITY inline _Tp __cxx_atomic_fetch_add(__cxx_atomic_base_impl<_Tp>* __a, _Td __delta, memory_order __order) { return __atomic_fetch_add(&__a->__a_value, __delta * __skip_amt<_Tp>::value, @@ -851,7 +851,7 @@ _Tp __cxx_atomic_fetch_sub(volatile __cxx_atomic_base_impl<_Tp>* __a, } template -_LIBCPP_INLINE_VISIBILITY +_LIBCPP_INLINE_VISIBILITY inline _Tp __cxx_atomic_fetch_sub(__cxx_atomic_base_impl<_Tp>* __a, _Td __delta, memory_order __order) { return __atomic_fetch_sub(&__a->__a_value, __delta * __skip_amt<_Tp>::value, @@ -867,7 +867,7 @@ _Tp __cxx_atomic_fetch_and(volatile __cxx_atomic_base_impl<_Tp>* __a, } template -_LIBCPP_INLINE_VISIBILITY +_LIBCPP_INLINE_VISIBILITY inline _Tp __cxx_atomic_fetch_and(__cxx_atomic_base_impl<_Tp>* __a, _Tp __pattern, memory_order __order) { return __atomic_fetch_and(&__a->__a_value, __pattern, @@ -875,7 +875,7 @@ _Tp __cxx_atomic_fetch_and(__cxx_atomic_base_impl<_Tp>* __a, } template -_LIBCPP_INLINE_VISIBILITY +_LIBCPP_INLINE_VISIBILITY inline _Tp __cxx_atomic_fetch_or(volatile __cxx_atomic_base_impl<_Tp>* __a, _Tp __pattern, memory_order __order) { return __atomic_fetch_or(&__a->__a_value, __pattern, @@ -883,7 +883,7 @@ _Tp __cxx_atomic_fetch_or(volatile __cxx_atomic_base_impl<_Tp>* __a, } template -_LIBCPP_INLINE_VISIBILITY +_LIBCPP_INLINE_VISIBILITY inline _Tp __cxx_atomic_fetch_or(__cxx_atomic_base_impl<_Tp>* __a, _Tp __pattern, memory_order __order) { return __atomic_fetch_or(&__a->__a_value, __pattern, diff --git a/third_party/libcxx/libcxx.mk b/third_party/libcxx/libcxx.mk index 617bc7d33a7..ddcce656de2 100644 --- a/third_party/libcxx/libcxx.mk +++ b/third_party/libcxx/libcxx.mk @@ -110,6 +110,7 @@ THIRD_PARTY_LIBCXX_A_HDRS = \ third_party/libcxx/refstring.hh \ third_party/libcxx/regex \ third_party/libcxx/scoped_allocator \ + third_party/libcxx/span \ third_party/libcxx/set \ third_party/libcxx/span \ third_party/libcxx/sstream \ diff --git a/third_party/libcxx/span b/third_party/libcxx/span index c56bed57ee1..66cef1de3be 100644 --- a/third_party/libcxx/span +++ b/third_party/libcxx/span @@ -1,5 +1,8 @@ // -*- C++ -*- +<<<<<<< HEAD // clang-format off +======= +>>>>>>> 80151924e (wip) //===------------------------------ span ---------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. @@ -130,10 +133,17 @@ template */ #include "third_party/libcxx/__config" +<<<<<<< HEAD #include "third_party/libcxx/iterator" // for iterators #include "third_party/libcxx/array" // for array #include "third_party/libcxx/type_traits" // for remove_cv, etc #include "third_party/libcxx/cstddef" // for byte +======= +#include "third_party/libcxx/cstddef" // for ptrdiff_t +#include "third_party/libcxx/iterator" // for iterators +#include "third_party/libcxx/array" // for array +#include "third_party/libcxx/type_traits" // for remove_cv, etc +>>>>>>> 80151924e (wip) #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) #pragma GCC system_header @@ -588,4 +598,8 @@ template _LIBCPP_END_NAMESPACE_STD +<<<<<<< HEAD +#endif // _LIBCPP_SPAN +======= #endif // _LIBCPP_SPAN +>>>>>>> 80151924e (wip) diff --git a/third_party/mold/README.cosmo b/third_party/mold/README.cosmo new file mode 100644 index 00000000000..7694651d6be --- /dev/null +++ b/third_party/mold/README.cosmo @@ -0,0 +1,18 @@ +DESCRIPTION + + Mold: A Modern Linker 🦠 + + mold is a faster drop-in replacement for existing Unix linkers. + It is several times quicker than the LLVM lld linker, the second-fastest open-source linker, + which I initially developed a few years ago. mold aims to enhance developer productivity by minimizing build time, + particularly in rapid debug-edit-rebuild cycles. + +SOURCE + + https://github.com/rui314/mold + + commit d4d93d7fb72dd19c44aafa4dd5397e35787d33ad + Author: Rui Ueyama + Date: Mon Jun 19 12:35:20 2023 +0900 + + Format diff --git a/third_party/mold/archive-file.h b/third_party/mold/archive-file.h new file mode 100644 index 00000000000..b630904e5cd --- /dev/null +++ b/third_party/mold/archive-file.h @@ -0,0 +1,178 @@ +// clang-format off +// This file contains functions to read an archive file (.a file). +// An archive file is just a bundle of object files. It's similar to +// tar or zip, but the contents are not compressed. +// +// An archive file is either "regular" or "thin". A regular archive +// contains object files directly, while a thin archive contains only +// pathnames. In the latter case, actual file contents have to be read +// from given pathnames. A regular archive is sometimes called "fat" +// archive as opposed to "thin". +// +// If an archive file is given to the linker, the linker pulls out +// object files that are needed to resolve undefined symbols. So, +// bunding object files as an archive and giving that archive to the +// linker has a different meaning than directly giving the same set of +// object files to the linker. The former links only needed object +// files, while the latter links all the given object files. +// +// Therefore, if you link libc.a for example, not all the libc +// functions are linked to your binary. Instead, only object files +// that provides functions and variables used in your program get +// linked. To make this efficient, static library functions are +// usually separated to each object file in an archive file. You can +// see the contents of libc.a by running `ar t +// /usr/lib/x86_64-linux-gnu/libc.a`. + +#pragma once + +#include "third_party/mold/common.h" +#include "third_party/mold/filetype.h" + +namespace mold { + +struct ArHdr { + char ar_name[16]; + char ar_date[12]; + char ar_uid[6]; + char ar_gid[6]; + char ar_mode[8]; + char ar_size[10]; + char ar_fmag[2]; + + bool starts_with(std::string_view s) const { + return std::string_view(ar_name, s.size()) == s; + } + + bool is_strtab() const { + return starts_with("// "); + } + + bool is_symtab() const { + return starts_with("/ ") || starts_with("/SYM64/ "); + } + + std::string read_name(std::string_view strtab, u8 *&ptr) const { + // BSD-style long filename + if (starts_with("#1/")) { + int namelen = atoi(ar_name + 3); + std::string name{(char *)ptr, (size_t)namelen}; + ptr += namelen; + + if (size_t pos = name.find('\0')) + name = name.substr(0, pos); + return name; + } + + // SysV-style long filename + if (starts_with("/")) { + const char *start = strtab.data() + atoi(ar_name + 1); + return {start, (const char *)strstr(start, "/\n")}; + } + + // Short fileanme + if (const char *end = (char *)memchr(ar_name, '/', sizeof(ar_name))) + return {ar_name, end}; + return {ar_name, sizeof(ar_name)}; + } +}; + +template +std::vector +read_thin_archive_members(Context &ctx, MappedFile *mf) { + u8 *begin = mf->data; + u8 *data = begin + 8; + std::vector vec; + std::string_view strtab; + + while (data < begin + mf->size) { + // Each header is aligned to a 2 byte boundary. + if ((begin - data) % 2) + data++; + + ArHdr &hdr = *(ArHdr *)data; + u8 *body = data + sizeof(hdr); + u64 size = atol(hdr.ar_size); + + // Read a string table. + if (hdr.is_strtab()) { + strtab = {(char *)body, (size_t)size}; + data = body + size; + continue; + } + + // Skip a symbol table. + if (hdr.is_symtab()) { + data = body + size; + continue; + } + + if (!hdr.starts_with("#1/") && !hdr.starts_with("/")) + Fatal(ctx) << mf->name << ": filename is not stored as a long filename"; + + std::string name = hdr.read_name(strtab, body); + + // Skip if symbol table + if (name == "__.SYMDEF" || name == "__.SYMDEF SORTED") + continue; + + std::string path = name.starts_with('/') ? + name : (filepath(mf->name).parent_path() / name).string(); + vec.push_back(MappedFile::must_open(ctx, path)); + vec.back()->thin_parent = mf; + data = body; + } + return vec; +} + +template +std::vector read_fat_archive_members(Context &ctx, MappedFile *mf) { + u8 *begin = mf->data; + u8 *data = begin + 8; + std::vector vec; + std::string_view strtab; + + while (begin + mf->size - data >= 2) { + if ((begin - data) % 2) + data++; + + ArHdr &hdr = *(ArHdr *)data; + u8 *body = data + sizeof(hdr); + u64 size = atol(hdr.ar_size); + data = body + size; + + // Read if string table + if (hdr.is_strtab()) { + strtab = {(char *)body, (size_t)size}; + continue; + } + + // Skip if symbol table + if (hdr.is_symtab()) + continue; + + // Read the name field + std::string name = hdr.read_name(strtab, body); + + // Skip if symbol table + if (name == "__.SYMDEF" || name == "__.SYMDEF SORTED") + continue; + + vec.push_back(mf->slice(ctx, name, body - begin, data - body)); + } + return vec; +} + +template +std::vector read_archive_members(Context &ctx, MappedFile *mf) { + switch (get_file_type(ctx, mf)) { + case FileType::AR: + return read_fat_archive_members(ctx, mf); + case FileType::THIN_AR: + return read_thin_archive_members(ctx, mf); + default: + unreachable(); + } +} + +} // namespace mold diff --git a/third_party/mold/cmdline.h b/third_party/mold/cmdline.h new file mode 100644 index 00000000000..c95bbf2e2e0 --- /dev/null +++ b/third_party/mold/cmdline.h @@ -0,0 +1,91 @@ +// clang-format off +#pragma once + +#include "third_party/mold/common.h" + +namespace mold { + +template +std::vector +read_response_file(Context &ctx, std::string_view path) { + std::vector vec; + MappedFile *mf = MappedFile::must_open(ctx, std::string(path)); + u8 *data = mf->data; + + auto read_quoted = [&](i64 i, char quote) { + std::string buf; + while (i < mf->size && data[i] != quote) { + if (data[i] == '\\') { + buf.append(1, data[i + 1]); + i += 2; + } else { + buf.append(1, data[i++]); + } + } + if (i >= mf->size) + Fatal(ctx) << path << ": premature end of input"; + vec.push_back(save_string(ctx, buf)); + return i + 1; + }; + + auto read_unquoted = [&](i64 i) { + std::string buf; + + while (i < mf->size) { + if (data[i] == '\\' && i + 1 < mf->size) { + buf.append(1, data[i + 1]); + i += 2; + continue; + } + + if (!isspace(data[i])) { + buf.append(1, data[i++]); + continue; + } + + break; + } + + vec.push_back(save_string(ctx, buf)); + return i; + }; + + for (i64 i = 0; i < mf->size;) { + if (isspace(data[i])) + i++; + else if (data[i] == '\'') + i = read_quoted(i + 1, '\''); + else if (data[i] == '\"') + i = read_quoted(i + 1, '\"'); + else + i = read_unquoted(i); + } + return vec; +} + +// Replace "@path/to/some/text/file" with its file contents. +template +std::vector expand_response_files(Context &ctx, char **argv) { + std::vector vec; + for (i64 i = 0; argv[i]; i++) { + if (argv[i][0] == '@') + append(vec, read_response_file(ctx, argv[i] + 1)); + else + vec.push_back(argv[i]); + } + return vec; +} + +static inline std::string_view string_trim(std::string_view str) { + size_t pos = str.find_first_not_of(" \t"); + if (pos == str.npos) + return ""; + str = str.substr(pos); + + pos = str.find_last_not_of(" \t"); + if (pos == str.npos) + return str; + return str.substr(0, pos + 1); +} + +} // namespace mold diff --git a/third_party/mold/common.h b/third_party/mold/common.h new file mode 100644 index 00000000000..f8ef1300e37 --- /dev/null +++ b/third_party/mold/common.h @@ -0,0 +1,1001 @@ +// clang-format off +#pragma once + +#include "third_party/mold/integers.h" + +#include "third_party/libcxx/array" +#include "third_party/libcxx/atomic" +#include "third_party/libcxx/bit" +#include "third_party/libcxx/bitset" +#include "third_party/libcxx/cassert" +#include "third_party/libcxx/cstdio" +#include "third_party/libcxx/cstring" +#include "libc/calls/calls.h" +#include "libc/calls/struct/flock.h" +#include "libc/calls/weirdtypes.h" +#include "libc/sysv/consts/at.h" +#include "libc/sysv/consts/f.h" +#include "libc/sysv/consts/fd.h" +#include "libc/sysv/consts/o.h" +#include "libc/sysv/consts/posix.h" +#include "libc/sysv/consts/s.h" +#include "libc/sysv/consts/splice.h" +#include "third_party/libcxx/filesystem" +#include "third_party/libcxx/iostream" +#include "third_party/libcxx/mutex" +#include "third_party/libcxx/optional" +#include "third_party/libcxx/span" +#include "third_party/libcxx/sstream" +#include "third_party/libcxx/string" +#include "third_party/libcxx/string_view" +#include "libc/calls/calls.h" +#include "libc/calls/struct/stat.h" +#include "libc/calls/struct/stat.macros.h" +#include "libc/calls/struct/timespec.h" +#include "libc/calls/weirdtypes.h" +#include "libc/sysv/consts/s.h" +#include "libc/sysv/consts/utime.h" +#include "libc/time/time.h" +#include "libc/calls/makedev.h" +#include "libc/calls/weirdtypes.h" +#include "libc/thread/thread.h" +#include "libc/calls/typedef/u.h" +#include "libc/calls/weirdtypes.h" +#include "libc/intrin/newbie.h" +#include "libc/sock/select.h" +#include "libc/sysv/consts/endian.h" +// MISSING #include +// MISSING #include +#include "third_party/libcxx/vector" + +#ifdef _WIN32 +// MISSING #include +#else +#include "libc/calls/calls.h" +#include "libc/calls/weirdtypes.h" +#include "libc/runtime/runtime.h" +#include "libc/sysv/consts/map.h" +#include "libc/sysv/consts/mlock.h" +#include "libc/sysv/consts/msync.h" +#include "libc/sysv/consts/posix.h" +#include "libc/sysv/consts/prot.h" +#include "libc/sysv/consts/madv.h" +#include "libc/sysv/consts/mfd.h" +#include "libc/sysv/consts/mremap.h" +#include "libc/calls/calls.h" +#include "libc/calls/weirdtypes.h" +#include "libc/runtime/pathconf.h" +#include "libc/runtime/runtime.h" +#include "libc/runtime/sysconf.h" +#include "libc/sysv/consts/f.h" +#include "libc/sysv/consts/fileno.h" +#include "libc/sysv/consts/o.h" +#include "libc/sysv/consts/ok.h" +#include "libc/time/time.h" +#include "third_party/getopt/getopt.h" +#include "third_party/musl/crypt.h" +#include "third_party/musl/lockf.h" +#endif + +#define XXH_INLINE_ALL 1 +#include "third_party/xxhash/xxhash.h" + +#ifdef NDEBUG +# define unreachable() __builtin_unreachable() +#else +# define unreachable() assert(0 && "unreachable") +#endif + +// __builtin_assume() is supported only by clang, and [[assume]] is +// available only in C++23, so we use this macro when giving a hint to +// the compiler's optimizer what's true. +#define ASSUME(x) do { if (!(x)) __builtin_unreachable(); } while (0) + +// This is an assert() that is enabled even in the release build. +#define ASSERT(x) \ + do { \ + if (!(x)) { \ + std::cerr << "Assertion failed: (" << #x \ + << "), function " << __FUNCTION__ \ + << ", file " << __FILE__ \ + << ", line " << __LINE__ << ".\n"; \ + std::abort(); \ + } \ + } while (0) + +inline uint64_t hash_string(std::string_view str) { + return XXH3_64bits(str.data(), str.size()); +} + +class HashCmp { +public: + static size_t hash(const std::string_view &k) { + return hash_string(k); + } + + static bool equal(const std::string_view &k1, const std::string_view &k2) { + return k1 == k2; + } +}; + +namespace mold { + +using namespace std::literals::string_literals; +using namespace std::literals::string_view_literals; + +template class OutputFile; + +inline char *output_tmpfile; +inline thread_local bool opt_demangle; + +inline u8 *output_buffer_start = nullptr; +inline u8 *output_buffer_end = nullptr; + +inline std::string mold_version; +extern std::string mold_version_string; +extern std::string mold_git_hash; + +std::string errno_string(); +std::string get_self_path(); +void cleanup(); +void install_signal_handler(); +i64 get_default_thread_count(); + +static u64 combine_hash(u64 a, u64 b) { + return a ^ (b + 0x9e3779b9 + (a << 6) + (a >> 2)); +} + +// +// Error output +// + +template +class SyncOut { +public: + SyncOut(Context &ctx, std::ostream *out = &std::cout) : out(out) { + opt_demangle = ctx.arg.demangle; + } + + ~SyncOut() { + if (out) { + std::scoped_lock lock(mu); + *out << ss.str() << "\n"; + } + } + + template SyncOut &operator<<(T &&val) { + if (out) + ss << std::forward(val); + return *this; + } + + static inline std::mutex mu; + +private: + std::ostream *out; + std::stringstream ss; +}; + +template +static std::string add_color(Context &ctx, std::string msg) { + if (ctx.arg.color_diagnostics) + return "mold: \033[0;1;31m" + msg + ":\033[0m "; + return "mold: " + msg + ": "; +} + +template +class Fatal { +public: + Fatal(Context &ctx) : out(ctx, &std::cerr) { + out << add_color(ctx, "fatal"); + } + + [[noreturn]] ~Fatal() { + out.~SyncOut(); + cleanup(); + _exit(1); + } + + template Fatal &operator<<(T &&val) { + out << std::forward(val); + return *this; + } + +private: + SyncOut out; +}; + +template +class Error { +public: + Error(Context &ctx) : out(ctx, &std::cerr) { + if (ctx.arg.noinhibit_exec) { + out << add_color(ctx, "warning"); + } else { + out << add_color(ctx, "error"); + ctx.has_error = true; + } + } + + template Error &operator<<(T &&val) { + out << std::forward(val); + return *this; + } + +private: + SyncOut out; +}; + +template +class Warn { +public: + Warn(Context &ctx) + : out(ctx, ctx.arg.suppress_warnings ? nullptr : &std::cerr) { + if (ctx.arg.fatal_warnings) { + out << add_color(ctx, "error"); + ctx.has_error = true; + } else { + out << add_color(ctx, "warning"); + } + } + + template Warn &operator<<(T &&val) { + out << std::forward(val); + return *this; + } + +private: + SyncOut out; +}; + +// +// Atomics +// + +// This is the same as std::atomic except that the default memory +// order is relaxed instead of sequential consistency. +template +struct Atomic : std::atomic { + static constexpr std::memory_order relaxed = std::memory_order_relaxed; + + using std::atomic::atomic; + + Atomic(const Atomic &other) { store(other.load()); } + + Atomic &operator=(const Atomic &other) { + store(other.load()); + return *this; + } + + void operator=(T val) { store(val); } + operator T() const { return load(); } + + void store(T val) { std::atomic::store(val, relaxed); } + T load() const { return std::atomic::load(relaxed); } + T exchange(T val) { return std::atomic::exchange(val, relaxed); } + T operator|=(T val) { return std::atomic::fetch_or(val, relaxed); } + T operator++() { return std::atomic::fetch_add(1, relaxed) + 1; } + T operator--() { return std::atomic::fetch_sub(1, relaxed) - 1; } + T operator++(int) { return std::atomic::fetch_add(1, relaxed); } + T operator--(int) { return std::atomic::fetch_sub(1, relaxed); } + + bool test_and_set() { + // A relaxed load + branch (assuming miss) takes only around 20 cycles, + // while an atomic RMW can easily take hundreds on x86. We note that it's + // common that another thread beat us in marking, so doing an optimistic + // early test tends to improve performance in the ~20% ballpark. + return load() || exchange(true); + } +}; + +// +// Bit vector +// + +class BitVector { +public: + BitVector() = default; + BitVector(u32 size) : vec((size + 7) / 8) {} + + void resize(u32 size) { vec.resize((size + 7) / 8); } + bool get(u32 idx) const { return vec[idx / 8] & (1 << (idx % 8)); } + void set(u32 idx) { vec[idx / 8] |= 1 << (idx % 8); } + +private: + std::vector vec; +}; + +// +// Utility functions +// + +// Some C++ libraries haven't implemented std::has_single_bit yet. +inline bool has_single_bit(u64 val) { + return std::popcount(val) == 1; +} + +// Some C++ libraries haven't implemented std::bit_ceil yet. +inline u64 bit_ceil(u64 val) { + if (has_single_bit(val)) + return val; + return 1LL << (64 - std::countl_zero(val)); +} + +inline u64 align_to(u64 val, u64 align) { + if (align == 0) + return val; + assert(has_single_bit(align)); + return (val + align - 1) & ~(align - 1); +} + +inline u64 align_down(u64 val, u64 align) { + assert(has_single_bit(align)); + return val & ~(align - 1); +} + +inline u64 bit(u64 val, i64 pos) { + return (val >> pos) & 1; +}; + +// Returns [hi:lo] bits of val. +inline u64 bits(u64 val, u64 hi, u64 lo) { + return (val >> lo) & ((1LL << (hi - lo + 1)) - 1); +} + +inline i64 sign_extend(u64 val, i64 size) { + return (i64)(val << (63 - size)) >> (63 - size); +}; + +template > +void update_minimum(std::atomic &atomic, u64 new_val, Compare cmp = {}) { + T old_val = atomic.load(std::memory_order_relaxed); + while (cmp(new_val, old_val) && + !atomic.compare_exchange_weak(old_val, new_val, + std::memory_order_relaxed)); +} + +template > +void update_maximum(std::atomic &atomic, u64 new_val, Compare cmp = {}) { + T old_val = atomic.load(std::memory_order_relaxed); + while (cmp(old_val, new_val) && + !atomic.compare_exchange_weak(old_val, new_val, + std::memory_order_relaxed)); +} + +template +inline void append(std::vector &vec1, std::vector vec2) { + vec1.insert(vec1.end(), vec2.begin(), vec2.end()); +} + +template +inline std::vector flatten(std::vector> &vec) { + i64 size = 0; + for (std::vector &v : vec) + size += v.size(); + + std::vector ret; + ret.reserve(size); + for (std::vector &v : vec) + append(ret, v); + return ret; +} + +template +inline void sort(T &vec) { + std::stable_sort(vec.begin(), vec.end()); +} + +template +inline void sort(T &vec, U less) { + std::stable_sort(vec.begin(), vec.end(), less); +} + +template +inline void remove_duplicates(std::vector &vec) { + vec.erase(std::unique(vec.begin(), vec.end()), vec.end()); +} + +inline i64 write_string(void *buf, std::string_view str) { + memcpy(buf, str.data(), str.size()); + *((u8 *)buf + str.size()) = '\0'; + return str.size() + 1; +} + +template +inline i64 write_vector(void *buf, const std::vector &vec) { + i64 sz = vec.size() * sizeof(T); + memcpy(buf, vec.data(), sz); + return sz; +} + +inline void encode_uleb(std::vector &vec, u64 val) { + do { + u8 byte = val & 0x7f; + val >>= 7; + vec.push_back(val ? (byte | 0x80) : byte); + } while (val); +} + +inline void encode_sleb(std::vector &vec, i64 val) { + for (;;) { + u8 byte = val & 0x7f; + val >>= 7; + + bool neg = (byte & 0x40); + if ((val == 0 && !neg) || (val == -1 && neg)) { + vec.push_back(byte); + break; + } + vec.push_back(byte | 0x80); + } +} + +inline i64 write_uleb(u8 *buf, u64 val) { + i64 i = 0; + do { + u8 byte = val & 0x7f; + val >>= 7; + buf[i++] = val ? (byte | 0x80) : byte; + } while (val); + return i; +} + +inline u64 read_uleb(u8 *&buf) { + u64 val = 0; + u8 shift = 0; + u8 byte; + do { + byte = *buf++; + val |= (byte & 0x7f) << shift; + shift += 7; + } while (byte & 0x80); + return val; +} + +inline u64 read_uleb(u8 const*&buf) { + return read_uleb(const_cast(buf)); +} + +inline u64 read_uleb(std::string_view &str) { + u8 *start = (u8 *)&str[0]; + u8 *ptr = start; + u64 val = read_uleb(ptr); + str = str.substr(ptr - start); + return val; +} + +inline i64 uleb_size(u64 val) { +#if __GNUC__ +#pragma GCC unroll 8 +#endif + for (int i = 1; i < 9; i++) + if (val < (1LL << (7 * i))) + return i; + return 9; +} + +template +std::string_view save_string(Context &ctx, const std::string &str) { + u8 *buf = new u8[str.size() + 1]; + memcpy(buf, str.data(), str.size()); + buf[str.size()] = '\0'; + ctx.string_pool.push_back(std::unique_ptr(buf)); + return {(char *)buf, str.size()}; +} + +inline bool remove_prefix(std::string_view &s, std::string_view prefix) { + if (s.starts_with(prefix)) { + s = s.substr(prefix.size()); + return true; + } + return false; +} + +// +// Concurrent Map +// + +// This is an implementation of a fast concurrent hash map. Unlike +// ordinary hash tables, this impl just aborts if it becomes full. +// So you need to give a correct estimation of the final size before +// using it. We use this hash map to uniquify pieces of data in +// mergeable sections. +template +class ConcurrentMap { +public: + ConcurrentMap() {} + + ConcurrentMap(i64 nbuckets) { + resize(nbuckets); + } + + ~ConcurrentMap() { + if (keys) { + free((void *)keys); + free((void *)key_sizes); + free((void *)values); + } + } + + void resize(i64 nbuckets) { + this->~ConcurrentMap(); + + nbuckets = std::max(MIN_NBUCKETS, bit_ceil(nbuckets)); + + this->nbuckets = nbuckets; + keys = (std::atomic *)calloc(nbuckets, sizeof(char *)); + key_sizes = (u32 *)malloc(nbuckets * sizeof(u32)); + values = (T *)malloc(nbuckets * sizeof(T)); + } + + std::pair insert(std::string_view key, u64 hash, const T &val) { + if (!keys) + return {nullptr, false}; + + assert(has_single_bit(nbuckets)); + i64 idx = hash & (nbuckets - 1); + i64 retry = 0; + + while (retry < MAX_RETRY) { + const char *ptr = keys[idx].load(std::memory_order_acquire); + if (ptr == marker) { + pause(); + continue; + } + + if (ptr == nullptr) { + if (!keys[idx].compare_exchange_weak(ptr, marker, + std::memory_order_acquire)) + continue; + new (values + idx) T(val); + key_sizes[idx] = key.size(); + keys[idx].store(key.data(), std::memory_order_release); + return {values + idx, true}; + } + + if (key.size() == key_sizes[idx] && + memcmp(ptr, key.data(), key_sizes[idx]) == 0) + return {values + idx, false}; + + u64 mask = nbuckets / NUM_SHARDS - 1; + idx = (idx & ~mask) | ((idx + 1) & mask); + retry++; + } + + assert(false && "ConcurrentMap is full"); + return {nullptr, false}; + } + + const char *get_key(i64 idx) { + return keys[idx].load(std::memory_order_relaxed); + } + + static constexpr i64 MIN_NBUCKETS = 2048; + static constexpr i64 NUM_SHARDS = 16; + static constexpr i64 MAX_RETRY = 128; + + i64 nbuckets = 0; + u32 *key_sizes = nullptr; + T *values = nullptr; + +private: + static void pause() { +#if defined(__x86_64__) + asm volatile("pause"); +#elif defined(__aarch64__) + asm volatile("yield"); +#endif + } + +private: + std::atomic *keys = nullptr; + static constexpr const char *marker = "marker"; +}; + +// +// output-file.h +// + +template +class OutputFile { +public: + static std::unique_ptr> + open(Context &ctx, std::string path, i64 filesize, i64 perm); + + virtual void close(Context &ctx) = 0; + virtual ~OutputFile() = default; + + u8 *buf = nullptr; + std::string path; + i64 filesize; + bool is_mmapped; + bool is_unmapped = false; + +protected: + OutputFile(std::string path, i64 filesize, bool is_mmapped) + : path(path), filesize(filesize), is_mmapped(is_mmapped) {} +}; + +// +// hyperloglog.cc +// + +class HyperLogLog { +public: + HyperLogLog() : buckets(NBUCKETS) {} + + void insert(u32 hash) { + update_maximum(buckets[hash & (NBUCKETS - 1)], std::countl_zero(hash) + 1); + } + + i64 get_cardinality() const; + + void merge(const HyperLogLog &other) { + for (i64 i = 0; i < NBUCKETS; i++) + update_maximum(buckets[i], other.buckets[i]); + } + +private: + static constexpr i64 NBUCKETS = 2048; + static constexpr double ALPHA = 0.79402; + + std::vector buckets; +}; + +// +// glob.cc +// + +class Glob { + typedef enum { STRING, STAR, QUESTION, BRACKET } Kind; + + struct Element { + Element(Kind k) : kind(k) {} + Kind kind; + std::string str; + std::bitset<256> bitset; + }; + +public: + static std::optional compile(std::string_view pat); + bool match(std::string_view str); + +private: + Glob(std::vector &&vec) : elements(vec) {} + static bool do_match(std::string_view str, std::span elements); + + std::vector elements; +}; + +// +// multi-glob.cc +// + +class MultiGlob { +public: + bool add(std::string_view pat, u32 val); + bool empty() const { return strings.empty(); } + std::optional find(std::string_view str); + +private: + struct TrieNode { + u32 value = -1; + TrieNode *suffix_link = nullptr; + std::unique_ptr children[256]; + }; + + void compile(); + void fix_suffix_links(TrieNode &node); + void fix_values(); + + std::vector strings; + std::unique_ptr root; + std::vector> globs; + std::once_flag once; + bool is_compiled = false; +}; + +// +// uuid.cc +// + +std::array get_uuid_v4(); + +// +// filepath.cc +// + +template +std::filesystem::path filepath(const T &path) { + return {path, std::filesystem::path::format::generic_format}; +} + +std::string get_realpath(std::string_view path); +std::string path_clean(std::string_view path); +std::filesystem::path to_abs_path(std::filesystem::path path); + +// +// demangle.cc +// + +std::string_view demangle(std::string_view name); +std::optional cpp_demangle(std::string_view name); + +// +// compress.cc +// + +class Compressor { +public: + virtual void write_to(u8 *buf) = 0; + virtual ~Compressor() {} + i64 compressed_size = 0; +}; + +class ZlibCompressor : public Compressor { +public: + ZlibCompressor(u8 *buf, i64 size); + void write_to(u8 *buf) override; + +private: + std::vector> shards; + u64 checksum = 0; +}; + +class ZstdCompressor : public Compressor { +public: + ZstdCompressor(u8 *buf, i64 size); + void write_to(u8 *buf) override; + +private: + std::vector> shards; +}; + +// +// perf.cc +// + +// Counter is used to collect statistics numbers. +class Counter { +public: + Counter(std::string_view name, i64 value = 0) : name(name), values(value) { + static std::mutex mu; + std::scoped_lock lock(mu); + instances.push_back(this); + } + + Counter &operator++(int) { + if (enabled) [[unlikely]] + values.local()++; + return *this; + } + + Counter &operator+=(int delta) { + if (enabled) [[unlikely]] + values.local() += delta; + return *this; + } + + static void print(); + + static inline bool enabled = false; + +private: + i64 get_value(); + + std::string_view name; + tbb::enumerable_thread_specific values; + + static inline std::vector instances; +}; + +// Timer and TimeRecord records elapsed time (wall clock time) +// used by each pass of the linker. +struct TimerRecord { + TimerRecord(std::string name, TimerRecord *parent = nullptr); + void stop(); + + std::string name; + TimerRecord *parent; + tbb::concurrent_vector children; + i64 start; + i64 end; + i64 user; + i64 sys; + bool stopped = false; +}; + +void +print_timer_records(tbb::concurrent_vector> &); + +template +class Timer { +public: + Timer(Context &ctx, std::string name, Timer *parent = nullptr) { + record = new TimerRecord(name, parent ? parent->record : nullptr); + ctx.timer_records.push_back(std::unique_ptr(record)); + } + + Timer(const Timer &) = delete; + + ~Timer() { + record->stop(); + } + + void stop() { + record->stop(); + } + +private: + TimerRecord *record; +}; + +// +// tar.cc +// + +// TarFile is a class to create a tar file. +// +// If you pass `--repro` to mold, mold collects all input files and +// put them into `.repro.tar`, so that it is easy to +// run the same command with the same command line arguments. +class TarWriter { +public: + static std::unique_ptr + open(std::string output_path, std::string basedir); + + ~TarWriter(); + void append(std::string path, std::string_view data); + +private: + static constexpr i64 BLOCK_SIZE = 512; + + TarWriter(FILE *out, std::string basedir) : out(out), basedir(basedir) {} + + FILE *out = nullptr; + std::string basedir; +}; + +// +// Memory-mapped file +// + +// MappedFile represents an mmap'ed input file. +// mold uses mmap-IO only. +template +class MappedFile { +public: + static MappedFile *open(Context &ctx, std::string path); + static MappedFile *must_open(Context &ctx, std::string path); + + ~MappedFile() { unmap(); } + void unmap(); + + MappedFile *slice(Context &ctx, std::string name, u64 start, u64 size); + + std::string_view get_contents() { + return std::string_view((char *)data, size); + } + + i64 get_offset() const { + return parent ? (data - parent->data + parent->get_offset()) : 0; + } + + // Returns a string that uniquely identify a file that is possibly + // in an archive. + std::string get_identifier() const { + if (parent) { + // We use the file offset within an archive as an identifier + // because archive members may have the same name. + return parent->name + ":" + std::to_string(get_offset()); + } + + if (thin_parent) { + // If this is a thin archive member, the filename part is + // guaranteed to be unique. + return thin_parent->name + ":" + name; + } + return name; + } + + std::string name; + u8 *data = nullptr; + i64 size = 0; + bool given_fullpath = true; + MappedFile *parent = nullptr; + MappedFile *thin_parent = nullptr; + int fd = -1; +#ifdef _WIN32 + HANDLE file_handle = INVALID_HANDLE_VALUE; +#endif +}; + +template +MappedFile *MappedFile::open(Context &ctx, std::string path) { + if (path.starts_with('/') && !ctx.arg.chroot.empty()) + path = ctx.arg.chroot + "/" + path_clean(path); + + i64 fd; +#ifdef _WIN32 + fd = ::_open(path.c_str(), O_RDONLY); +#else + fd = ::open(path.c_str(), O_RDONLY); +#endif + + if (fd == -1) { + if (errno != ENOENT) + Fatal(ctx) << "opening " << path << " failed: " << errno_string(); + return nullptr; + } + + struct stat st; + if (fstat(fd, &st) == -1) + Fatal(ctx) << path << ": fstat failed: " << errno_string(); + + MappedFile *mf = new MappedFile; + ctx.mf_pool.push_back(std::unique_ptr(mf)); + + mf->name = path; + mf->size = st.st_size; + + if (st.st_size > 0) { +#ifdef _WIN32 + HANDLE handle = CreateFileMapping((HANDLE)_get_osfhandle(fd), + nullptr, PAGE_READWRITE, 0, + st.st_size, nullptr); + if (!handle) + Fatal(ctx) << path << ": CreateFileMapping failed: " << GetLastError(); + mf->file_handle = handle; + mf->data = (u8 *)MapViewOfFile(handle, FILE_MAP_ALL_ACCESS, 0, 0, st.st_size); + if (!mf->data) + Fatal(ctx) << path << ": MapViewOfFile failed: " << GetLastError(); +#else + mf->data = (u8 *)mmap(nullptr, st.st_size, PROT_READ | PROT_WRITE, + MAP_PRIVATE, fd, 0); + if (mf->data == MAP_FAILED) + Fatal(ctx) << path << ": mmap failed: " << errno_string(); +#endif + } + + close(fd); + return mf; +} + +template +MappedFile * +MappedFile::must_open(Context &ctx, std::string path) { + if (MappedFile *mf = MappedFile::open(ctx, path)) + return mf; + Fatal(ctx) << "cannot open " << path << ": " << errno_string(); +} + +template +MappedFile * +MappedFile::slice(Context &ctx, std::string name, u64 start, u64 size) { + MappedFile *mf = new MappedFile; + mf->name = name; + mf->data = data + start; + mf->size = size; + mf->parent = this; + + ctx.mf_pool.push_back(std::unique_ptr(mf)); + return mf; +} + +template +void MappedFile::unmap() { + if (size == 0 || parent || !data) + return; + +#ifdef _WIN32 + UnmapViewOfFile(data); + if (file_handle != INVALID_HANDLE_VALUE) + CloseHandle(file_handle); +#else + munmap(data, size); +#endif + + data = nullptr; +} + +} // namespace mold diff --git a/third_party/mold/compress.cc b/third_party/mold/compress.cc new file mode 100644 index 00000000000..32cc7ba3986 --- /dev/null +++ b/third_party/mold/compress.cc @@ -0,0 +1,186 @@ +// clang-format off +// This file implements a multi-threaded zlib and zstd compression +// routine. +// +// zlib-compressed data can be merged just by concatenation as long as +// each piece of data is flushed with Z_SYNC_FLUSH. In this file, we +// split input data into multiple shards, compress them individually +// and concatenate them. We then append a header, a trailer and a +// checksum so that the concatenated data is valid zlib-format data. +// +// zstd-compressed data can be merged in the same way. +// +// Using threads to compress data has a downside. Since the dictionary +// is reset on boundaries of shards, compression ratio is sacrificed +// a little bit. However, if a shard size is large enough, that loss +// is negligible in practice. + +#include "third_party/mold/common.h" + +// MISSING #include +// MISSING #include +// MISSING #include + +#define CHECK(fn) \ + do { \ + [[maybe_unused]] int r = (fn); \ + assert(r == Z_OK); \ + } while (0) + +namespace mold { + +static constexpr i64 SHARD_SIZE = 1024 * 1024; + +static std::vector split(std::string_view input) { + std::vector shards; + + while (input.size() >= SHARD_SIZE) { + shards.push_back(input.substr(0, SHARD_SIZE)); + input = input.substr(SHARD_SIZE); + } + if (!input.empty()) + shards.push_back(input); + return shards; +} + +static std::vector zlib_compress(std::string_view input) { + // Initialize zlib stream. Since debug info is generally compressed + // pretty well with lower compression levels, we chose compression + // level 1. + z_stream strm; + strm.zalloc = Z_NULL; + strm.zfree = Z_NULL; + strm.opaque = Z_NULL; + + CHECK(deflateInit2(&strm, 1, Z_DEFLATED, -15, 8, Z_DEFAULT_STRATEGY)); + + // Set an input buffer + strm.avail_in = input.size(); + strm.next_in = (u8 *)input.data(); + + // Set an output buffer. deflateBound() returns an upper bound + // on the compression size. +16 for Z_SYNC_FLUSH. + std::vector buf(deflateBound(&strm, strm.avail_in) + 16); + + // Compress data. It writes all compressed bytes except the last + // partial byte, so up to 7 bits can be held to be written to the + // buffer. + strm.avail_out = buf.size(); + strm.next_out = buf.data(); + CHECK(deflate(&strm, Z_BLOCK)); + + // This is a workaround for libbacktrace before 2022-04-06. + // + // Zlib is a bit stream, and what Z_SYNC_FLUSH does is to write a + // three bit value indicating the start of an uncompressed data + // block followed by four byte data 00 00 ff ff which indicates that + // the length of the block is zero. libbacktrace uses its own zlib + // inflate routine, and it had a bug that if that particular three + // bit value happens to end at a byte boundary, it accidentally + // skipped the next byte. + // + // In order to avoid triggering that bug, we should avoid calling + // deflate() with Z_SYNC_FLUSH if the current bit position is 5. + // If it's 5, we insert an empty block consisting of 10 bits so + // that the bit position is 7 in the next byte. + // + // https://github.com/ianlancetaylor/libbacktrace/pull/87 + int nbits; + deflatePending(&strm, Z_NULL, &nbits); + if (nbits == 5) + CHECK(deflatePrime(&strm, 10, 2)); + CHECK(deflate(&strm, Z_SYNC_FLUSH)); + + assert(strm.avail_out > 0); + buf.resize(buf.size() - strm.avail_out); + buf.shrink_to_fit(); + deflateEnd(&strm); + return buf; +} + +ZlibCompressor::ZlibCompressor(u8 *buf, i64 size) { + std::string_view input{(char *)buf, (size_t)size}; + std::vector inputs = split(input); + std::vector adlers(inputs.size()); + shards.resize(inputs.size()); + + // Compress each shard + tbb::parallel_for((i64)0, (i64)inputs.size(), [&](i64 i) { + adlers[i] = adler32(1, (u8 *)inputs[i].data(), inputs[i].size()); + shards[i] = zlib_compress(inputs[i]); + }); + + // Combine checksums + checksum = adlers[0]; + for (i64 i = 1; i < inputs.size(); i++) + checksum = adler32_combine(checksum, adlers[i], inputs[i].size()); + + // Comput the total size + compressed_size = 8; // the header and the trailer + for (std::vector &shard : shards) + compressed_size += shard.size(); +} + +void ZlibCompressor::write_to(u8 *buf) { + // Write a zlib-format header + buf[0] = 0x78; + buf[1] = 0x9c; + + // Copy compressed data + std::vector offsets(shards.size()); + offsets[0] = 2; // +2 for header + for (i64 i = 1; i < shards.size(); i++) + offsets[i] = offsets[i - 1] + shards[i - 1].size(); + + tbb::parallel_for((i64)0, (i64)shards.size(), [&](i64 i) { + memcpy(&buf[offsets[i]], shards[i].data(), shards[i].size()); + }); + + // Write a trailer + u8 *end = buf + compressed_size; + end[-6] = 3; + end[-5] = 0; + + // Write a checksum + *(ub32 *)(end - 4) = checksum; +} + +static std::vector zstd_compress(std::string_view input) { + std::vector buf(ZSTD_COMPRESSBOUND(input.size())); + constexpr int level = 3; // compression level; must be between 1 to 22 + + size_t sz = ZSTD_compress(buf.data(), buf.size(), input.data(), input.size(), + level); + assert(!ZSTD_isError(sz)); + buf.resize(sz); + buf.shrink_to_fit(); + return buf; +} + +ZstdCompressor::ZstdCompressor(u8 *buf, i64 size) { + std::string_view input{(char *)buf, (size_t)size}; + std::vector inputs = split(input); + shards.resize(inputs.size()); + + // Compress each shard + tbb::parallel_for((i64)0, (i64)inputs.size(), [&](i64 i) { + shards[i] = zstd_compress(inputs[i]); + }); + + compressed_size = 0; + for (std::vector &shard : shards) + compressed_size += shard.size(); +} + +void ZstdCompressor::write_to(u8 *buf) { + // Copy compressed data + std::vector offsets(shards.size()); + for (i64 i = 1; i < shards.size(); i++) + offsets[i] = offsets[i - 1] + shards[i - 1].size(); + + tbb::parallel_for((i64)0, (i64)shards.size(), [&](i64 i) { + memcpy(&buf[offsets[i]], shards[i].data(), shards[i].size()); + }); +} + +} // namespace mold diff --git a/third_party/mold/config.h.in b/third_party/mold/config.h.in new file mode 100644 index 00000000000..174cda60f05 --- /dev/null +++ b/third_party/mold/config.h.in @@ -0,0 +1,4 @@ +// clang-format off +#define MOLD_VERSION "@mold_VERSION@" +#define MOLD_LIBDIR "@CMAKE_INSTALL_FULL_LIBDIR@" +#cmakedefine01 MOLD_IS_SOLD diff --git a/third_party/mold/demangle.cc b/third_party/mold/demangle.cc new file mode 100644 index 00000000000..e4e478b3699 --- /dev/null +++ b/third_party/mold/demangle.cc @@ -0,0 +1,52 @@ +// clang-format off +#include "third_party/mold/common.h" + +#include "third_party/libcxx/cstdlib" + +#ifndef _WIN32 +// MISSING #include +#endif + +// MISSING #include "../third-party/rust-demangle/rust-demangle.h" + +namespace mold { + +std::string_view demangle(std::string_view name) { + static thread_local char *p; + if (p) + free(p); + + // Try to demangle as a Rust symbol. Since legacy-style Rust symbols + // are also valid as a C++ mangled name, we need to call this before + // cpp_demangle. + p = rust_demangle(std::string(name).c_str(), 0); + if (p) + return p; + + // Try to demangle as a C++ symbol. + if (std::optional s = cpp_demangle(name)) + return *s; + return name; +} + +std::optional cpp_demangle(std::string_view name) { + static thread_local char *buf; + static thread_local size_t buflen; + + // TODO(cwasser): Actually demangle Symbols on Windows using e.g. + // `UnDecorateSymbolName` from Dbghelp, maybe even Itanium symbols? +#ifndef _WIN32 + if (name.starts_with("_Z")) { + int status; + char *p = abi::__cxa_demangle(std::string(name).c_str(), buf, &buflen, &status); + if (status == 0) { + buf = p; + return p; + } + } +#endif + + return {}; +} + +} // namespace mold diff --git a/third_party/mold/elf/arch-alpha.cc b/third_party/mold/elf/arch-alpha.cc new file mode 100644 index 00000000000..e0b332d7c8f --- /dev/null +++ b/third_party/mold/elf/arch-alpha.cc @@ -0,0 +1,331 @@ +// clang-format off +// Alpha is a 64-bit RISC ISA developed by DEC (Digital Equipment +// Corporation) in the early '90s. It aimed to be an ISA that would last +// 25 years. DEC expected Alpha would become 1000x faster during that time +// span. Since the ISA was developed from scratch for future machines, +// it's 64-bit from the beginning. There's no 32-bit variant. +// +// DEC ported its own Unix (Tru64) to Alpha. Microsoft also ported Windows +// NT to it. But it wasn't a huge commercial success. +// +// DEC was acquired by Compaq in 1997. In the late '90s, Intel and +// Hewlett-Packard were advertising that their upcoming Itanium processor +// would achieve significantly better performance than RISC processors, so +// Compaq decided to discontinue the Alpha processor line to switch to +// Itanium. Itanium resulted in a miserable failure, but it still suceeded +// to wipe out several RISC processors just by promising overly optimistic +// perf numbers. Alpha as an ISA would probably have been fine after 25 +// years since its introduction (which is 1992 + 25 = 2017), but the +// company and its market didn't last that long. +// +// From the linker's point of view, there are a few peculiarities in its +// psABI as shown below: +// +// - Alpha lacks PC-relative memory load/store instructions, so it uses +// register-relative load/store instructions in position-independent +// code. Specifically, GP (which is an alias for $r29) is always +// maintained to refer to .got+0x8000, and global variables' addresses +// are loaded in a GP-relative manner. +// +// - It looks like even function addresses are first loaded to register +// in a GP-relative manner before calling it. We can relax it to +// convert the instruction sequence with a direct branch instruction, +// but by default, object files don't use a direct branch to call a +// function. Therefore, by default, we don't need to create a PLT. +// Any function call is made by first reading its address from GOT and +// jump to the address. + +#include "third_party/mold/elf/mold.h" + +namespace mold::elf { + +using E = ALPHA; + +// A 32-bit immediate can be materialized in a register with a "load high" +// and a "load low" instruction sequence. The first instruction sets the +// upper 16 bits in a register, and the second one set the lower 16 +// bits. When doing so, they sign-extend an immediate. Therefore, if the +// 15th bit of an immediate happens to be 1, setting a "low half" value +// negates the upper 16 bit values that has already been set in a +// register. To compensate that, we need to add 0x8000 when setting the +// upper 16 bits. +static u32 hi(u32 val) { + return bits(val + 0x8000, 31, 16); +} + +template <> +void write_plt_header(Context &ctx, u8 *buf) {} + +template <> +void write_plt_entry(Context &ctx, u8 *buf, Symbol &sym) {} + +template <> +void write_pltgot_entry(Context &ctx, u8 *buf, Symbol &sym) {} + +template <> +void EhFrameSection::apply_reloc(Context &ctx, const ElfRel &rel, + u64 offset, u64 val) { + u8 *loc = ctx.buf + this->shdr.sh_offset + offset; + + switch (rel.r_type) { + case R_NONE: + break; + case R_ALPHA_SREL32: + *(ul32 *)loc = val - this->shdr.sh_addr - offset; + break; + default: + Fatal(ctx) << "unsupported relocation in .eh_frame: " << rel; + } +} + +template <> +void InputSection::apply_reloc_alloc(Context &ctx, u8 *base) { + std::span> rels = get_rels(ctx); + + ElfRel *dynrel = nullptr; + if (ctx.reldyn) + dynrel = (ElfRel *)(ctx.buf + ctx.reldyn->shdr.sh_offset + + file.reldyn_offset + this->reldyn_offset); + + for (i64 i = 0; i < rels.size(); i++) { + const ElfRel &rel = rels[i]; + if (rel.r_type == R_NONE) + continue; + + Symbol &sym = *file.symbols[rel.r_sym]; + u8 *loc = base + rel.r_offset; + + u64 S = sym.get_addr(ctx); + u64 A = rel.r_addend; + u64 P = get_addr() + rel.r_offset; + u64 G = sym.get_got_idx(ctx) * sizeof(Word); + u64 GOT = ctx.got->shdr.sh_addr; + u64 GP = ctx.got->shdr.sh_addr + 0x8000; + + switch (rel.r_type) { + case R_ALPHA_REFQUAD: + apply_dyn_absrel(ctx, sym, rel, loc, S, A, P, dynrel); + break; + case R_ALPHA_GPREL32: + *(ul32 *)loc = S + A - GP; + break; + case R_ALPHA_LITERAL: + if (A) + *(ul16 *)loc = ctx.extra.got->get_addr(sym, A) - GP; + else + *(ul16 *)loc = GOT + G - GP; + break; + case R_ALPHA_BRSGP: + *(ul32 *)loc |= bits(S + A - P - 4, 22, 0); + break; + case R_ALPHA_GPDISP: + *(ul16 *)loc = hi(GP - P); + *(ul16 *)(loc + A) = GP - P; + break; + case R_ALPHA_SREL32: + *(ul32 *)loc = S + A - P; + break; + case R_ALPHA_GPRELHIGH: + *(ul16 *)loc = hi(S + A - GP); + break; + case R_ALPHA_GPRELLOW: + *(ul16 *)loc = S + A - GP; + break; + case R_ALPHA_TLSGD: + *(ul16 *)loc = sym.get_tlsgd_addr(ctx) - GP; + break; + case R_ALPHA_TLSLDM: + *(ul16 *)loc = ctx.got->get_tlsld_addr(ctx) - GP; + break; + case R_ALPHA_DTPRELHI: + *(ul16 *)loc = hi(S + A - ctx.dtp_addr); + break; + case R_ALPHA_DTPRELLO: + *(ul16 *)loc = S + A - ctx.dtp_addr; + break; + case R_ALPHA_GOTTPREL: + *(ul16 *)loc = sym.get_gottp_addr(ctx) + A - GP; + break; + case R_ALPHA_TPRELHI: + *(ul16 *)loc = hi(S + A - ctx.tp_addr); + break; + case R_ALPHA_TPRELLO: + *(ul16 *)loc = S + A - ctx.tp_addr; + break; + case R_ALPHA_LITUSE: + case R_ALPHA_HINT: + break; + default: + unreachable(); + } + } +} + +template <> +void InputSection::apply_reloc_nonalloc(Context &ctx, u8 *base) { + std::span> rels = get_rels(ctx); + + for (i64 i = 0; i < rels.size(); i++) { + const ElfRel &rel = rels[i]; + if (rel.r_type == R_NONE || record_undef_error(ctx, rel)) + continue; + + Symbol &sym = *file.symbols[rel.r_sym]; + u8 *loc = base + rel.r_offset; + + SectionFragment *frag; + i64 frag_addend; + std::tie(frag, frag_addend) = get_fragment(ctx, rel); + + u64 S = frag ? frag->get_addr(ctx) : sym.get_addr(ctx); + u64 A = frag ? frag_addend : (i64)rel.r_addend; + + switch (rel.r_type) { + case R_ALPHA_REFLONG: + if (std::optional val = get_tombstone(sym, frag)) + *(ul32 *)loc = *val; + else + *(ul32 *)loc = S + A; + break; + case R_ALPHA_REFQUAD: + if (std::optional val = get_tombstone(sym, frag)) + *(ul64 *)loc = *val; + else + *(ul64 *)loc = S + A; + break; + default: + Fatal(ctx) << *this << ": invalid relocation for non-allocated sections: " + << rel; + } + } +} + +template <> +void InputSection::scan_relocations(Context &ctx) { + assert(shdr().sh_flags & SHF_ALLOC); + + this->reldyn_offset = file.num_dynrel * sizeof(ElfRel); + std::span> rels = get_rels(ctx); + + for (i64 i = 0; i < rels.size(); i++) { + const ElfRel &rel = rels[i]; + if (rel.r_type == R_NONE || record_undef_error(ctx, rel)) + continue; + + Symbol &sym = *file.symbols[rel.r_sym]; + + if (sym.is_ifunc()) + Error(ctx) << sym << ": GNU ifunc symbol is not supported on Alpha"; + + switch (rel.r_type) { + case R_ALPHA_REFQUAD: + scan_dyn_absrel(ctx, sym, rel); + break; + case R_ALPHA_LITERAL: + if (rel.r_addend) + ctx.extra.got->add_symbol(sym, rel.r_addend); + else + sym.flags |= NEEDS_GOT; + break; + case R_ALPHA_SREL32: + scan_pcrel(ctx, sym, rel); + break; + case R_ALPHA_BRSGP: + if (sym.is_imported) + sym.flags |= NEEDS_PLT; + break; + case R_ALPHA_TLSGD: + sym.flags |= NEEDS_TLSGD; + break; + case R_ALPHA_TLSLDM: + ctx.needs_tlsld = true; + break; + case R_ALPHA_GOTTPREL: + sym.flags |= NEEDS_GOTTP; + break; + case R_ALPHA_TPRELHI: + case R_ALPHA_TPRELLO: + check_tlsle(ctx, sym, rel); + break; + case R_ALPHA_GPREL32: + case R_ALPHA_LITUSE: + case R_ALPHA_GPDISP: + case R_ALPHA_HINT: + case R_ALPHA_GPRELHIGH: + case R_ALPHA_GPRELLOW: + case R_ALPHA_DTPRELHI: + case R_ALPHA_DTPRELLO: + break; + default: + Fatal(ctx) << *this << ": unknown relocation: " << rel; + } + } +} + +// An R_ALPHA_LITERAL relocation may request the linker to create a GOT +// entry for an external symbol with a non-zero addend. This is an unusual +// request which is not found in any other targets. +// +// Referring an external symbol with a non-zero addend is a bad practice +// because we need to create as many dynamic relocations as the number of +// distinctive addends for the same symbol. +// +// We don't want to mess up the implementation of the common GOT section +// for Alpha. So we create another GOT-like section, .alpha_got. Any GOT +// entry for an R_ALPHA_LITERAL reloc with a non-zero addend is created +// not in .got but in .alpha_got. +// +// Since .alpha_got entries are accessed relative to GP, .alpha_got +// needs to be close enough to .got. It's actually placed next to .got. +void AlphaGotSection::add_symbol(Symbol &sym, i64 addend) { + assert(addend); + std::scoped_lock lock(mu); + entries.push_back({&sym, addend}); +} + +bool operator<(const AlphaGotSection::Entry &a, const AlphaGotSection::Entry &b) { + return std::tuple(a.sym->file->priority, a.sym->sym_idx, a.addend) < + std::tuple(b.sym->file->priority, b.sym->sym_idx, b.addend); +}; + +u64 AlphaGotSection::get_addr(Symbol &sym, i64 addend) { + auto it = std::lower_bound(entries.begin(), entries.end(), Entry{&sym, addend}); + assert(it != entries.end()); + return this->shdr.sh_addr + (it - entries.begin()) * sizeof(Word); +} + +i64 AlphaGotSection::get_reldyn_size(Context &ctx) const { + i64 n = 0; + for (const Entry &e : entries) + if (e.sym->is_imported || (ctx.arg.pic && !e.sym->is_absolute())) + n++; + return n; +} + +void AlphaGotSection::finalize() { + sort(entries); + remove_duplicates(entries); + shdr.sh_size = entries.size() * sizeof(Word); +} + +void AlphaGotSection::copy_buf(Context &ctx) { + ElfRel *dynrel = (ElfRel *)(ctx.buf + ctx.reldyn->shdr.sh_offset + + reldyn_offset); + + for (i64 i = 0; i < entries.size(); i++) { + Entry &e = entries[i]; + u64 P = this->shdr.sh_addr + sizeof(Word) * i; + ul64 *buf = (ul64 *)(ctx.buf + this->shdr.sh_offset + sizeof(Word) * i); + + if (e.sym->is_imported) { + *buf = ctx.arg.apply_dynamic_relocs ? e.addend : 0; + *dynrel++ = ElfRel(P, E::R_ABS, e.sym->get_dynsym_idx(ctx), e.addend); + } else { + *buf = e.sym->get_addr(ctx) + e.addend; + if (ctx.arg.pic && !e.sym->is_absolute()) + *dynrel++ = ElfRel(P, E::R_RELATIVE, 0, *buf); + } + } +} + +} // namespace mold::elf diff --git a/third_party/mold/elf/arch-arm32.cc b/third_party/mold/elf/arch-arm32.cc new file mode 100644 index 00000000000..5ac207b0aec --- /dev/null +++ b/third_party/mold/elf/arch-arm32.cc @@ -0,0 +1,737 @@ +// clang-format off +// ARM32 is a bit special from the linker's viewpoint because ARM +// processors support two different instruction encodings: Thumb and +// ARM (in a narrower sense). Thumb instructions are either 16 bits or +// 32 bits, while ARM instructions are all 32 bits. Feature-wise, +// thumb is a subset of ARM, so not all ARM instructions are +// representable in Thumb. +// +// ARM processors originally supported only ARM instructions. Thumb +// instructions were later added to increase code density. +// +// ARM processors runs in either ARM mode or Thumb mode. The mode can +// be switched using BX (branch and mode exchange)-family instructions. +// We need to use that instructions to, for example, call a function +// encoded in Thumb from a function encoded in ARM. Sometimes, the +// linker even has to emit an interworking thunk code to switch mode. +// +// ARM instructions are aligned to 4 byte boundaries. Thumb are to 2 +// byte boundaries. +// +// You can distinguish Thumb functions from ARM functions by looking +// at the least significant bit (LSB) of its "address". If LSB is 0, +// it's ARM; otherwise, Thumb. +// +// For example, if a symbol `foo` is of type STT_FUNC and has value +// 0x2001, `foo` is a function using Thumb instructions whose address +// is 0x2000 (not 0x2001, as Thumb instructions are always 2-byte +// aligned). Likewise, if a function pointer has value 0x2001, it +// refers a Thumb function at 0x2000. +// +// https://github.com/ARM-software/abi-aa/blob/main/aaelf32/aaelf32.rst + +#include "third_party/mold/elf/mold.h" + +namespace mold::elf { + +using E = ARM32; + +template <> +i64 get_addend(u8 *loc, const ElfRel &rel) { + switch (rel.r_type) { + case R_ARM_ABS32: + case R_ARM_REL32: + case R_ARM_TARGET1: + case R_ARM_BASE_PREL: + case R_ARM_GOTOFF32: + case R_ARM_GOT_PREL: + case R_ARM_GOT_BREL: + case R_ARM_TLS_GD32: + case R_ARM_TLS_LDM32: + case R_ARM_TLS_LDO32: + case R_ARM_TLS_IE32: + case R_ARM_TLS_LE32: + case R_ARM_TLS_GOTDESC: + case R_ARM_TARGET2: + return *(il32 *)loc; + case R_ARM_THM_JUMP11: + return sign_extend(*(ul16 *)loc, 10) << 1; + case R_ARM_THM_CALL: + case R_ARM_THM_JUMP24: + case R_ARM_THM_TLS_CALL: { + u32 S = bit(*(ul16 *)loc, 10); + u32 J1 = bit(*(ul16 *)(loc + 2), 13); + u32 J2 = bit(*(ul16 *)(loc + 2), 11); + u32 I1 = !(J1 ^ S); + u32 I2 = !(J2 ^ S); + u32 imm10 = bits(*(ul16 *)loc, 9, 0); + u32 imm11 = bits(*(ul16 *)(loc + 2), 10, 0); + u32 val = (S << 24) | (I1 << 23) | (I2 << 22) | (imm10 << 12) | (imm11 << 1); + return sign_extend(val, 24); + } + case R_ARM_CALL: + case R_ARM_JUMP24: + case R_ARM_PLT32: + case R_ARM_TLS_CALL: + return sign_extend(*(ul32 *)loc, 23) << 2; + case R_ARM_MOVW_PREL_NC: + case R_ARM_MOVW_ABS_NC: + case R_ARM_MOVT_PREL: + case R_ARM_MOVT_ABS: { + u32 imm12 = bits(*(ul32 *)loc, 11, 0); + u32 imm4 = bits(*(ul32 *)loc, 19, 16); + return sign_extend((imm4 << 12) | imm12, 15); + } + case R_ARM_PREL31: + return sign_extend(*(ul32 *)loc, 30); + case R_ARM_THM_MOVW_PREL_NC: + case R_ARM_THM_MOVW_ABS_NC: + case R_ARM_THM_MOVT_PREL: + case R_ARM_THM_MOVT_ABS: { + u32 imm4 = bits(*(ul16 *)loc, 3, 0); + u32 i = bit(*(ul16 *)loc, 10); + u32 imm3 = bits(*(ul16 *)(loc + 2), 14, 12); + u32 imm8 = bits(*(ul16 *)(loc + 2), 7, 0); + u32 val = (imm4 << 12) | (i << 11) | (imm3 << 8) | imm8; + return sign_extend(val, 15); + } + default: + return 0; + } +} + +static void write_mov_imm(u8 *loc, u32 val) { + u32 imm12 = bits(val, 11, 0); + u32 imm4 = bits(val, 15, 12); + *(ul32 *)loc = (*(ul32 *)loc & 0xfff0f000) | (imm4 << 16) | imm12; +} + +static void write_thm_b_imm(u8 *loc, u32 val) { + // https://developer.arm.com/documentation/ddi0406/cb/Application-Level-Architecture/Instruction-Details/Alphabetical-list-of-instructions/BL--BLX--immediate- + u32 sign = bit(val, 24); + u32 I1 = bit(val, 23); + u32 I2 = bit(val, 22); + u32 J1 = !I1 ^ sign; + u32 J2 = !I2 ^ sign; + u32 imm10 = bits(val, 21, 12); + u32 imm11 = bits(val, 11, 1); + + ul16 *buf = (ul16 *)loc; + buf[0] = (buf[0] & 0b1111'1000'0000'0000) | (sign << 10) | imm10; + buf[1] = (buf[1] & 0b1101'0000'0000'0000) | (J1 << 13) | (J2 << 11) | imm11; +} + +static void write_thm_mov_imm(u8 *loc, u32 val) { + // https://developer.arm.com/documentation/ddi0406/cb/Application-Level-Architecture/Instruction-Details/Alphabetical-list-of-instructions/MOVT + u32 imm4 = bits(val, 15, 12); + u32 i = bit(val, 11); + u32 imm3 = bits(val, 10, 8); + u32 imm8 = bits(val, 7, 0); + + ul16 *buf = (ul16 *)loc; + buf[0] = (buf[0] & 0b1111'1011'1111'0000) | (i << 10) | imm4; + buf[1] = (buf[1] & 0b1000'1111'0000'0000) | (imm3 << 12) | imm8; +} + +template <> +void write_addend(u8 *loc, i64 val, const ElfRel &rel) { + switch (rel.r_type) { + case R_ARM_NONE: + break; + case R_ARM_ABS32: + case R_ARM_REL32: + case R_ARM_TARGET1: + case R_ARM_BASE_PREL: + case R_ARM_GOTOFF32: + case R_ARM_GOT_PREL: + case R_ARM_GOT_BREL: + case R_ARM_TLS_GD32: + case R_ARM_TLS_LDM32: + case R_ARM_TLS_LDO32: + case R_ARM_TLS_IE32: + case R_ARM_TLS_LE32: + case R_ARM_TLS_GOTDESC: + case R_ARM_TARGET2: + *(ul32 *)loc = val; + break; + case R_ARM_THM_JUMP11: + *(ul16 *)loc = (*(ul16 *)loc & 0xf800) | bits(val, 11, 1); + break; + case R_ARM_THM_CALL: + case R_ARM_THM_JUMP24: + case R_ARM_THM_TLS_CALL: + write_thm_b_imm(loc, val); + break; + case R_ARM_CALL: + case R_ARM_JUMP24: + case R_ARM_PLT32: + *(ul32 *)loc = (*(ul32 *)loc & 0xff00'0000) | bits(val, 25, 2); + break; + case R_ARM_MOVW_PREL_NC: + case R_ARM_MOVW_ABS_NC: + case R_ARM_MOVT_PREL: + case R_ARM_MOVT_ABS: + write_mov_imm(loc, val); + break; + case R_ARM_PREL31: + *(ul32 *)loc = (*(ul32 *)loc & 0x8000'0000) | (val & 0x7fff'ffff); + break; + case R_ARM_THM_MOVW_PREL_NC: + case R_ARM_THM_MOVW_ABS_NC: + case R_ARM_THM_MOVT_PREL: + case R_ARM_THM_MOVT_ABS: + write_thm_mov_imm(loc, val); + break; + default: + unreachable(); + } +} + +template <> +void write_plt_header(Context &ctx, u8 *buf) { + static const ul32 insn[] = { + 0xe52d'e004, // push {lr} + 0xe59f'e004, // ldr lr, 2f + 0xe08f'e00e, // 1: add lr, pc, lr + 0xe5be'f008, // ldr pc, [lr, #8]! + 0x0000'0000, // 2: .word .got.plt - 1b - 8 + 0xe320'f000, // nop + 0xe320'f000, // nop + 0xe320'f000, // nop + }; + + memcpy(buf, insn, sizeof(insn)); + *(ul32 *)(buf + 16) = ctx.gotplt->shdr.sh_addr - ctx.plt->shdr.sh_addr - 16; +} + +static const ul32 plt_entry[] = { + 0xe59f'c004, // 1: ldr ip, 2f + 0xe08c'c00f, // add ip, ip, pc + 0xe59c'f000, // ldr pc, [ip] + 0x0000'0000, // 2: .word sym@GOT - 1b +}; + +template <> +void write_plt_entry(Context &ctx, u8 *buf, Symbol &sym) { + memcpy(buf, plt_entry, sizeof(plt_entry)); + *(ul32 *)(buf + 12) = sym.get_gotplt_addr(ctx) - sym.get_plt_addr(ctx) - 12; +} + +template <> +void write_pltgot_entry(Context &ctx, u8 *buf, Symbol &sym) { + memcpy(buf, plt_entry, sizeof(plt_entry)); + *(ul32 *)(buf + 12) = sym.get_got_addr(ctx) - sym.get_plt_addr(ctx) - 12; +} + +// ARM does not use .eh_frame for exception handling. Instead, it uses +// .ARM.exidx and .ARM.extab. So this function is empty. +template <> +void EhFrameSection::apply_reloc(Context &ctx, const ElfRel &rel, + u64 offset, u64 val) {} + +// ARM and Thumb branch instructions can jump within ±16 MiB. +static bool is_jump_reachable(i64 val) { + return sign_extend(val, 24) == val; +} + +template <> +void InputSection::apply_reloc_alloc(Context &ctx, u8 *base) { + std::span> rels = get_rels(ctx); + + ElfRel *dynrel = nullptr; + if (ctx.reldyn) + dynrel = (ElfRel *)(ctx.buf + ctx.reldyn->shdr.sh_offset + + file.reldyn_offset + this->reldyn_offset); + + auto get_tls_trampoline_addr = [&, i = 0](u64 addr) mutable { + for (; i < output_section->thunks.size(); i++) { + i64 disp = output_section->shdr.sh_addr + output_section->thunks[i]->offset - + addr; + if (is_jump_reachable(disp)) + return disp; + } + unreachable(); + }; + + for (i64 i = 0; i < rels.size(); i++) { + const ElfRel &rel = rels[i]; + if (rel.r_type == R_NONE || rel.r_type == R_ARM_V4BX) + continue; + + Symbol &sym = *file.symbols[rel.r_sym]; + u8 *loc = base + rel.r_offset; + + auto check = [&](i64 val, i64 lo, i64 hi) { + if (val < lo || hi <= val) + Error(ctx) << *this << ": relocation " << rel << " against " + << sym << " out of range: " << val << " is not in [" + << lo << ", " << hi << ")"; + }; + + u64 S = sym.get_addr(ctx); + u64 A = get_addend(*this, rel); + u64 P = get_addr() + rel.r_offset; + u64 T = S & 1; + u64 G = sym.get_got_idx(ctx) * sizeof(Word); + u64 GOT = ctx.got->shdr.sh_addr; + + auto get_thumb_thunk_addr = [&] { return get_thunk_addr(i); }; + auto get_arm_thunk_addr = [&] { return get_thunk_addr(i) + 4; }; + + switch (rel.r_type) { + case R_ARM_ABS32: + case R_ARM_TARGET1: + apply_dyn_absrel(ctx, sym, rel, loc, S, A, P, dynrel); + break; + case R_ARM_REL32: + *(ul32 *)loc = S + A - P; + break; + case R_ARM_THM_CALL: { + if (sym.is_remaining_undef_weak()) { + // On ARM, calling an weak undefined symbol jumps to the + // next instruction. + *(ul32 *)loc = 0x8000'f3af; // NOP.W + break; + } + + // THM_CALL relocation refers either BL or BLX instruction. + // They are different in only one bit. We need to use BL if + // the jump target is Thumb. Otherwise, use BLX. + i64 val = S + A - P; + if (is_jump_reachable(val)) { + if (T) { + write_thm_b_imm(loc, val); + *(ul16 *)(loc + 2) |= 0x1000; // rewrite to BL + } else { + write_thm_b_imm(loc, align_to(val, 4)); + *(ul16 *)(loc + 2) &= ~0x1000; // rewrite to BLX + } + } else { + write_thm_b_imm(loc, align_to(get_arm_thunk_addr() + A - P, 4)); + *(ul16 *)(loc + 2) &= ~0x1000; // rewrite to BLX + } + break; + } + case R_ARM_BASE_PREL: + *(ul32 *)loc = GOT + A - P; + break; + case R_ARM_GOTOFF32: + *(ul32 *)loc = ((S + A) | T) - GOT; + break; + case R_ARM_GOT_PREL: + case R_ARM_TARGET2: + *(ul32 *)loc = GOT + G + A - P; + break; + case R_ARM_GOT_BREL: + *(ul32 *)loc = G + A; + break; + case R_ARM_CALL: { + if (sym.is_remaining_undef_weak()) { + *(ul32 *)loc = 0xe320'f000; // NOP + break; + } + + // Just like THM_CALL, ARM_CALL relocation refers either BL or + // BLX instruction. We may need to rewrite BL → BLX or BLX → BL. + bool is_bl = ((*(ul32 *)loc & 0xff00'0000) == 0xeb00'0000); + bool is_blx = ((*(ul32 *)loc & 0xfe00'0000) == 0xfa00'0000); + if (!is_bl && !is_blx) + Fatal(ctx) << *this << ": R_ARM_CALL refers neither BL nor BLX"; + + u64 val = S + A - P; + if (is_jump_reachable(val)) { + if (T) { + *(ul32 *)loc = 0xfa00'0000; // BLX + *(ul32 *)loc |= (bit(val, 1) << 24) | bits(val, 25, 2); + } else { + *(ul32 *)loc = 0xeb00'0000; // BL + *(ul32 *)loc |= bits(val, 25, 2); + } + } else { + *(ul32 *)loc = 0xeb00'0000; // BL + *(ul32 *)loc |= bits(get_arm_thunk_addr() + A - P, 25, 2); + } + break; + } + case R_ARM_JUMP24: { + if (sym.is_remaining_undef_weak()) { + *(ul32 *)loc = 0xe320'f000; // NOP + break; + } + + // These relocs refers a B (unconditional branch) instruction. + // Unlike BL or BLX, we can't rewrite B to BX in place when the + // processor mode switch is required because BX doesn't takes an + // immediate; it takes only a register. So if mode switch is + // required, we jump to a linker-synthesized thunk which does the + // job with a longer code sequence. + u64 val = S + A - P; + if (!is_jump_reachable(val) || T) + val = get_arm_thunk_addr() + A - P; + *(ul32 *)loc = (*(ul32 *)loc & 0xff00'0000) | bits(val, 25, 2); + break; + } + case R_ARM_PLT32: + if (sym.is_remaining_undef_weak()) { + *(ul32 *)loc = 0xe320'f000; // NOP + } else { + u64 val = (T ? get_arm_thunk_addr() : S) + A - P; + *(ul32 *)loc = (*(ul32 *)loc & 0xff00'0000) | bits(val, 25, 2); + } + break; + case R_ARM_THM_JUMP11: + assert(T); + check(S + A - P, -(1 << 11), 1 << 11); + *(ul16 *)loc &= 0xf800; + *(ul16 *)loc |= bits(S + A - P, 11, 1); + break; + case R_ARM_THM_JUMP19: { + i64 val = S + A - P; + check(val, -(1 << 19), 1 << 19); + + // sign:J2:J1:imm6:imm11:'0' + u32 sign = bit(val, 20); + u32 J2 = bit(val, 19); + u32 J1 = bit(val, 18); + u32 imm6 = bits(val, 17, 12); + u32 imm11 = bits(val, 11, 1); + + *(ul16 *)loc &= 0b1111'1011'1100'0000; + *(ul16 *)loc |= (sign << 10) | imm6; + + *(ul16 *)(loc + 2) &= 0b1101'0000'0000'0000; + *(ul16 *)(loc + 2) |= (J2 << 13) | (J1 << 11) | imm11; + break; + } + case R_ARM_THM_JUMP24: { + if (sym.is_remaining_undef_weak()) { + *(ul32 *)loc = 0x8000'f3af; // NOP + break; + } + + // Just like R_ARM_JUMP24, we need to jump to a thunk if we need to + // switch processor mode. + u64 val = S + A - P; + if (!is_jump_reachable(val) || !T) + val = get_thumb_thunk_addr() + A - P; + write_thm_b_imm(loc, val); + break; + } + case R_ARM_MOVW_PREL_NC: + write_mov_imm(loc, ((S + A) | T) - P); + break; + case R_ARM_MOVW_ABS_NC: + write_mov_imm(loc, (S + A) | T); + break; + case R_ARM_THM_MOVW_PREL_NC: + write_thm_mov_imm(loc, ((S + A) | T) - P); + break; + case R_ARM_PREL31: + check(S + A - P, -(1LL << 30), 1LL << 30); + *(ul32 *)loc &= 0x8000'0000; + *(ul32 *)loc |= (S + A - P) & 0x7fff'ffff; + break; + case R_ARM_THM_MOVW_ABS_NC: + write_thm_mov_imm(loc, (S + A) | T); + break; + case R_ARM_MOVT_PREL: + write_mov_imm(loc, (S + A - P) >> 16); + break; + case R_ARM_THM_MOVT_PREL: + write_thm_mov_imm(loc, (S + A - P) >> 16); + break; + case R_ARM_MOVT_ABS: + write_mov_imm(loc, (S + A) >> 16); + break; + case R_ARM_THM_MOVT_ABS: + write_thm_mov_imm(loc, (S + A) >> 16); + break; + case R_ARM_TLS_GD32: + *(ul32 *)loc = sym.get_tlsgd_addr(ctx) + A - P; + break; + case R_ARM_TLS_LDM32: + *(ul32 *)loc = ctx.got->get_tlsld_addr(ctx) + A - P; + break; + case R_ARM_TLS_LDO32: + *(ul32 *)loc = S + A - ctx.dtp_addr; + break; + case R_ARM_TLS_IE32: + *(ul32 *)loc = sym.get_gottp_addr(ctx) + A - P; + break; + case R_ARM_TLS_LE32: + *(ul32 *)loc = S + A - ctx.tp_addr; + break; + case R_ARM_TLS_GOTDESC: + if (sym.has_tlsdesc(ctx)) { + // A is odd if the corresponding TLS_CALL is Thumb. + if (A & 1) + *(ul32 *)loc = sym.get_tlsdesc_addr(ctx) - P + A - 6; + else + *(ul32 *)loc = sym.get_tlsdesc_addr(ctx) - P + A - 4; + } else { + *(ul32 *)loc = S - ctx.tp_addr; + } + break; + case R_ARM_TLS_CALL: + if (sym.has_tlsdesc(ctx)) { + // BL + *(ul32 *)loc = 0xeb00'0000 | bits(get_tls_trampoline_addr(P + 8), 25, 2); + } else { + // BL -> NOP + *(ul32 *)loc = 0xe320'f000; + } + break; + case R_ARM_THM_TLS_CALL: + if (sym.has_tlsdesc(ctx)) { + u64 val = align_to(get_tls_trampoline_addr(P + 4), 4); + write_thm_b_imm(loc, val); + *(ul16 *)(loc + 2) &= ~0x1000; // rewrite BL with BLX + } else { + // BL -> NOP.W + *(ul32 *)loc = 0x8000'f3af; + } + break; + default: + Error(ctx) << *this << ": unknown relocation: " << rel; + } + } +} + +template <> +void InputSection::apply_reloc_nonalloc(Context &ctx, u8 *base) { + std::span> rels = get_rels(ctx); + + for (i64 i = 0; i < rels.size(); i++) { + const ElfRel &rel = rels[i]; + if (rel.r_type == R_NONE || record_undef_error(ctx, rel)) + continue; + + Symbol &sym = *file.symbols[rel.r_sym]; + u8 *loc = base + rel.r_offset; + + SectionFragment *frag; + i64 frag_addend; + std::tie(frag, frag_addend) = get_fragment(ctx, rel); + + u64 S = frag ? frag->get_addr(ctx) : sym.get_addr(ctx); + u64 A = frag ? frag_addend : get_addend(*this, rel); + + switch (rel.r_type) { + case R_ARM_ABS32: + if (std::optional val = get_tombstone(sym, frag)) + *(ul32 *)loc = *val; + else + *(ul32 *)loc = S + A; + break; + case R_ARM_TLS_LDO32: + if (std::optional val = get_tombstone(sym, frag)) + *(ul32 *)loc = *val; + else + *(ul32 *)loc = S + A - ctx.dtp_addr; + break; + default: + Fatal(ctx) << *this << ": invalid relocation for non-allocated sections: " + << rel; + break; + } + } +} + +template <> +void InputSection::scan_relocations(Context &ctx) { + assert(shdr().sh_flags & SHF_ALLOC); + + this->reldyn_offset = file.num_dynrel * sizeof(ElfRel); + std::span> rels = get_rels(ctx); + + // Scan relocations + for (i64 i = 0; i < rels.size(); i++) { + const ElfRel &rel = rels[i]; + if (rel.r_type == R_NONE || record_undef_error(ctx, rel)) + continue; + + Symbol &sym = *file.symbols[rel.r_sym]; + + if (sym.is_ifunc()) + sym.flags |= NEEDS_GOT | NEEDS_PLT; + + switch (rel.r_type) { + case R_ARM_ABS32: + case R_ARM_MOVT_ABS: + case R_ARM_THM_MOVT_ABS: + case R_ARM_TARGET1: + scan_dyn_absrel(ctx, sym, rel); + break; + case R_ARM_THM_CALL: + case R_ARM_CALL: + case R_ARM_JUMP24: + case R_ARM_PLT32: + case R_ARM_THM_JUMP24: + if (sym.is_imported) + sym.flags |= NEEDS_PLT; + break; + case R_ARM_GOT_PREL: + case R_ARM_GOT_BREL: + case R_ARM_TARGET2: + sym.flags |= NEEDS_GOT; + break; + case R_ARM_MOVT_PREL: + case R_ARM_THM_MOVT_PREL: + case R_ARM_PREL31: + scan_pcrel(ctx, sym, rel); + break; + case R_ARM_TLS_GD32: + sym.flags |= NEEDS_TLSGD; + break; + case R_ARM_TLS_LDM32: + ctx.needs_tlsld = true; + break; + case R_ARM_TLS_IE32: + sym.flags |= NEEDS_GOTTP; + break; + case R_ARM_TLS_GOTDESC: + if (!relax_tlsdesc(ctx, sym)) + sym.flags |= NEEDS_TLSDESC; + break; + case R_ARM_TLS_LE32: + check_tlsle(ctx, sym, rel); + break; + case R_ARM_REL32: + case R_ARM_BASE_PREL: + case R_ARM_GOTOFF32: + case R_ARM_THM_JUMP11: + case R_ARM_THM_JUMP19: + case R_ARM_MOVW_PREL_NC: + case R_ARM_MOVW_ABS_NC: + case R_ARM_THM_MOVW_PREL_NC: + case R_ARM_THM_MOVW_ABS_NC: + case R_ARM_TLS_LDO32: + case R_ARM_TLS_CALL: + case R_ARM_THM_TLS_CALL: + case R_ARM_V4BX: + break; + default: + Error(ctx) << *this << ": unknown relocation: " << rel; + } + } +} + +template <> +void RangeExtensionThunk::copy_buf(Context &ctx) { + u8 *buf = ctx.buf + output_section.shdr.sh_offset + offset; + + // TLS trampoline code. ARM32's TLSDESC is designed so that this + // common piece of code is factored out from object files to reduce + // output size. Since no one provide, the linker has to synthesize it. + static ul32 hdr[] = { + 0xe08e'0000, // add r0, lr, r0 + 0xe590'1004, // ldr r1, [r0, #4] + 0xe12f'ff11, // bx r1 + }; + + // This is a range extension and mode switch thunk. + // It has two entry points: +0 for Thumb and +4 for ARM. + const u8 entry[] = { + // .thumb + 0xfc, 0x46, // mov ip, pc + 0x60, 0x47, // bx ip # jumps to the following `ldr` insn + // .arm + 0x04, 0xc0, 0x9f, 0xe5, // ldr ip, 2f + 0x0f, 0xc0, 0x8c, 0xe0, // 1: add ip, ip, pc + 0x1c, 0xff, 0x2f, 0xe1, // bx ip + 0x00, 0x00, 0x00, 0x00, // 2: .word sym - 1b + }; + + static_assert(E::thunk_hdr_size == sizeof(hdr)); + static_assert(E::thunk_size == sizeof(entry)); + + memcpy(buf, hdr, sizeof(hdr)); + + for (i64 i = 0; i < symbols.size(); i++) { + u8 *loc = buf + sizeof(hdr) + i * sizeof(entry); + memcpy(loc, entry, sizeof(entry)); + + u64 S = symbols[i]->get_addr(ctx); + u64 P = output_section.shdr.sh_addr + offset + sizeof(hdr) + i * sizeof(entry); + *(ul32 *)(loc + 16) = S - P - 16; + } +} + +// ARM executables use an .ARM.exidx section to look up an exception +// handling record for the current instruction pointer. The table needs +// to be sorted by their addresses. +// +// Other target uses .eh_frame_hdr instead for the same purpose. +// I don't know why only ARM uses the different mechanism, but it's +// likely that it's due to some historical reason. +// +// This function sorts .ARM.exidx records. +void fixup_arm_exidx_section(Context &ctx) { + Timer t(ctx, "fixup_arm_exidx_section"); + + OutputSection *osec = find_section(ctx, SHT_ARM_EXIDX); + if (!osec) + return; + + // .ARM.exidx records consists of a signed 31-bit relative address + // and a 32-bit value. The relative address indicates the start + // address of a function that the record covers. The value is one of + // the followings: + // + // 1. CANTUNWIND indicating that there's no unwinding info for the function, + // 2. a compact unwinding record encoded into a 32-bit value, or + // 3. a 31-bit relative address which points to a larger record in + // the .ARM.extab section. + // + // CANTUNWIND is value 1. The most significant bit is set in (2) but + // not in (3). So we can distinguished them just by looking at a value. + const u32 EXIDX_CANTUNWIND = 1; + + struct Entry { + ul32 addr; + ul32 val; + }; + + if (osec->shdr.sh_size % sizeof(Entry)) + Fatal(ctx) << "invalid .ARM.exidx section size"; + + Entry *ent = (Entry *)(ctx.buf + osec->shdr.sh_offset); + i64 num_entries = osec->shdr.sh_size / sizeof(Entry); + + // Entry's addresses are relative to themselves. In order to sort + // records by addresses, we first translate them so that the addresses + // are relative to the beginning of the section. + auto is_relative = [](u32 val) { + return val != EXIDX_CANTUNWIND && !(val & 0x8000'0000); + }; + + tbb::parallel_for((i64)0, num_entries, [&](i64 i) { + i64 offset = sizeof(Entry) * i; + ent[i].addr = sign_extend(ent[i].addr, 30) + offset; + if (is_relative(ent[i].val)) + ent[i].val = 0x7fff'ffff & (ent[i].val + offset); + }); + + tbb::parallel_sort(ent, ent + num_entries, [](const Entry &a, const Entry &b) { + return a.addr < b.addr; + }); + + // Make addresses relative to themselves. + tbb::parallel_for((i64)0, num_entries, [&](i64 i) { + i64 offset = sizeof(Entry) * i; + ent[i].addr = 0x7fff'ffff & (ent[i].addr - offset); + if (is_relative(ent[i].val)) + ent[i].val = 0x7fff'ffff & (ent[i].val - offset); + }); + + // .ARM.exidx's sh_link should be set to the .text section index. + // Runtime doesn't care about it, but the binutils's strip command does. + if (ctx.shdr) { + if (Chunk *text = find_section(ctx, ".text")) { + osec->shdr.sh_link = text->shndx; + ctx.shdr->copy_buf(ctx); + } + } +} + +} // namespace mold::elf diff --git a/third_party/mold/elf/arch-arm64.cc b/third_party/mold/elf/arch-arm64.cc new file mode 100644 index 00000000000..ee1f5c09832 --- /dev/null +++ b/third_party/mold/elf/arch-arm64.cc @@ -0,0 +1,595 @@ +// clang-format off +// This file contains ARM64-specific code. Being new, the ARM64's ELF +// psABI doesn't have anything peculiar. ARM64 is a clean RISC +// instruction set that supports PC-relative load/store instructions. +// +// Unlike ARM32, instructions length doesn't vary. All ARM64 +// instructions are 4 bytes long. +// +// Branch instructions used for function call can jump within ±128 MiB. +// We need to create range extension thunks to support binaries whose +// .text is larger than that. +// +// Unlike most other targets, the TLSDESC access model is used by default +// for -fPIC to access thread-local variables instead of the less +// efficient GD model. You can still enable GD but it needs the +// -mtls-dialect=trad flag. Since GD is used rarely, we don't need to +// implement GD → LE relaxation. +// +// https://github.com/ARM-software/abi-aa/blob/main/aaelf64/aaelf64.rst + +#include "third_party/mold/elf/mold.h" + +namespace mold::elf { + +using E = ARM64; + +static void write_adrp(u8 *buf, u64 val) { + *(ul32 *)buf |= (bits(val, 13, 12) << 29) | (bits(val, 32, 14) << 5); +} + +static void write_adr(u8 *buf, u64 val) { + *(ul32 *)buf |= (bits(val, 1, 0) << 29) | (bits(val, 20, 2) << 5); +} + +static void write_movn_movz(u8 *buf, i64 val) { + *(ul32 *)buf &= 0b0000'0000'0110'0000'0000'0000'0001'1111; + + if (val >= 0) + *(ul32 *)buf |= 0xd280'0000 | (bits(val, 15, 0) << 5); // rewrite to movz + else + *(ul32 *)buf |= 0x9280'0000 | (bits(~val, 15, 0) << 5); // rewrite to movn +} + +static u64 page(u64 val) { + return val & 0xffff'ffff'ffff'f000; +} + +template <> +void write_plt_header(Context &ctx, u8 *buf) { + static const ul32 insn[] = { + 0xa9bf'7bf0, // stp x16, x30, [sp,#-16]! + 0x9000'0010, // adrp x16, .got.plt[2] + 0xf940'0211, // ldr x17, [x16, .got.plt[2]] + 0x9100'0210, // add x16, x16, .got.plt[2] + 0xd61f'0220, // br x17 + 0xd503'201f, // nop + 0xd503'201f, // nop + 0xd503'201f, // nop + }; + + u64 gotplt = ctx.gotplt->shdr.sh_addr + 16; + u64 plt = ctx.plt->shdr.sh_addr; + + memcpy(buf, insn, sizeof(insn)); + write_adrp(buf + 4, page(gotplt) - page(plt + 4)); + *(ul32 *)(buf + 8) |= bits(gotplt, 11, 3) << 10; + *(ul32 *)(buf + 12) |= (gotplt & 0xfff) << 10; +} + +template <> +void write_plt_entry(Context &ctx, u8 *buf, Symbol &sym) { + static const ul32 insn[] = { + 0x9000'0010, // adrp x16, .got.plt[n] + 0xf940'0211, // ldr x17, [x16, .got.plt[n]] + 0x9100'0210, // add x16, x16, .got.plt[n] + 0xd61f'0220, // br x17 + }; + + u64 gotplt = sym.get_gotplt_addr(ctx); + u64 plt = sym.get_plt_addr(ctx); + + memcpy(buf, insn, sizeof(insn)); + write_adrp(buf, page(gotplt) - page(plt)); + *(ul32 *)(buf + 4) |= bits(gotplt, 11, 3) << 10; + *(ul32 *)(buf + 8) |= (gotplt & 0xfff) << 10; +} + +template <> +void write_pltgot_entry(Context &ctx, u8 *buf, Symbol &sym) { + static const ul32 insn[] = { + 0x9000'0010, // adrp x16, GOT[n] + 0xf940'0211, // ldr x17, [x16, GOT[n]] + 0xd61f'0220, // br x17 + 0xd503'201f, // nop + }; + + u64 got = sym.get_got_addr(ctx); + u64 plt = sym.get_plt_addr(ctx); + + memcpy(buf, insn, sizeof(insn)); + write_adrp(buf, page(got) - page(plt)); + *(ul32 *)(buf + 4) |= bits(got, 11, 3) << 10; +} + +template <> +void EhFrameSection::apply_reloc(Context &ctx, const ElfRel &rel, + u64 offset, u64 val) { + u8 *loc = ctx.buf + this->shdr.sh_offset + offset; + + switch (rel.r_type) { + case R_NONE: + break; + case R_AARCH64_ABS64: + *(ul64 *)loc = val; + break; + case R_AARCH64_PREL32: + *(ul32 *)loc = val - this->shdr.sh_addr - offset; + break; + case R_AARCH64_PREL64: + *(ul64 *)loc = val - this->shdr.sh_addr - offset; + break; + default: + Fatal(ctx) << "unsupported relocation in .eh_frame: " << rel; + } +} + +static bool is_adrp(u8 *loc) { + // https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/ADRP--Form-PC-relative-address-to-4KB-page- + u32 insn = *(ul32 *)loc; + return (bits(insn, 31, 24) & 0b1001'1111) == 0b1001'0000; +} + +static bool is_ldr(u8 *loc) { + // https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/LDR--immediate---Load-Register--immediate-- + u32 insn = *(ul32 *)loc; + return (bits(insn, 31, 20) & 0b1111'1111'1100) == 0b1111'1001'0100; +} + +static bool is_add(u8 *loc) { + // https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/ADD--immediate---Add--immediate-- + u32 insn = *(ul32 *)loc; + return (bits(insn, 31, 20) & 0b1111'1111'1100) == 0b1001'0001'0000; +} + +template <> +void InputSection::apply_reloc_alloc(Context &ctx, u8 *base) { + std::span> rels = get_rels(ctx); + + ElfRel *dynrel = nullptr; + if (ctx.reldyn) + dynrel = (ElfRel *)(ctx.buf + ctx.reldyn->shdr.sh_offset + + file.reldyn_offset + this->reldyn_offset); + + for (i64 i = 0; i < rels.size(); i++) { + const ElfRel &rel = rels[i]; + if (rel.r_type == R_NONE) + continue; + + Symbol &sym = *file.symbols[rel.r_sym]; + u8 *loc = base + rel.r_offset; + + auto check = [&](i64 val, i64 lo, i64 hi) { + if (val < lo || hi <= val) + Error(ctx) << *this << ": relocation " << rel << " against " + << sym << " out of range: " << val << " is not in [" + << lo << ", " << hi << ")"; + }; + + u64 S = sym.get_addr(ctx); + u64 A = rel.r_addend; + u64 P = get_addr() + rel.r_offset; + u64 G = sym.get_got_idx(ctx) * sizeof(Word); + u64 GOT = ctx.got->shdr.sh_addr; + + switch (rel.r_type) { + case R_AARCH64_ABS64: + apply_dyn_absrel(ctx, sym, rel, loc, S, A, P, dynrel); + break; + case R_AARCH64_LDST8_ABS_LO12_NC: + case R_AARCH64_ADD_ABS_LO12_NC: + *(ul32 *)loc |= bits(S + A, 11, 0) << 10; + break; + case R_AARCH64_LDST16_ABS_LO12_NC: + *(ul32 *)loc |= bits(S + A, 11, 1) << 10; + break; + case R_AARCH64_LDST32_ABS_LO12_NC: + *(ul32 *)loc |= bits(S + A, 11, 2) << 10; + break; + case R_AARCH64_LDST64_ABS_LO12_NC: + *(ul32 *)loc |= bits(S + A, 11, 3) << 10; + break; + case R_AARCH64_LDST128_ABS_LO12_NC: + *(ul32 *)loc |= bits(S + A, 11, 4) << 10; + break; + case R_AARCH64_MOVW_UABS_G0: + check(S + A, 0, 1 << 16); + *(ul32 *)loc |= bits(S + A, 15, 0) << 5; + break; + case R_AARCH64_MOVW_UABS_G0_NC: + *(ul32 *)loc |= bits(S + A, 15, 0) << 5; + break; + case R_AARCH64_MOVW_UABS_G1: + check(S + A, 0, 1LL << 32); + *(ul32 *)loc |= bits(S + A, 31, 16) << 5; + break; + case R_AARCH64_MOVW_UABS_G1_NC: + *(ul32 *)loc |= bits(S + A, 31, 16) << 5; + break; + case R_AARCH64_MOVW_UABS_G2: + check(S + A, 0, 1LL << 48); + *(ul32 *)loc |= bits(S + A, 47, 32) << 5; + break; + case R_AARCH64_MOVW_UABS_G2_NC: + *(ul32 *)loc |= bits(S + A, 47, 32) << 5; + break; + case R_AARCH64_MOVW_UABS_G3: + *(ul32 *)loc |= bits(S + A, 63, 48) << 5; + break; + case R_AARCH64_ADR_GOT_PAGE: + if (sym.has_got(ctx)) { + i64 val = page(G + GOT + A) - page(P); + check(val, -(1LL << 32), 1LL << 32); + write_adrp(loc, val); + } else { + // Relax GOT-loading ADRP+LDR to an immediate ADRP+ADD + i64 val = page(S + A) - page(P); + check(val, -(1LL << 32), 1LL << 32); + write_adrp(loc, val); + + u32 reg = bits(*(ul32 *)loc, 4, 0); + *(ul32 *)(loc + 4) = 0x9100'0000 | (reg << 5) | reg; // ADD + *(ul32 *)(loc + 4) |= bits(S + A, 11, 0) << 10; + i++; + } + break; + case R_AARCH64_ADR_PREL_PG_HI21: { + // The ARM64 psABI defines that an `ADRP x0, foo` and `ADD x0, x0, + // :lo12: foo` instruction pair to materialize a PC-relative address + // in a register can be relaxed to `NOP` followed by `ADR x0, foo` + // if foo is in PC ± 1 MiB. + if (ctx.arg.relax && i + 1 < rels.size() && + sign_extend(S + A - P - 4, 20) == S + A - P - 4) { + const ElfRel &rel2 = rels[i + 1]; + if (rel2.r_type == R_AARCH64_ADD_ABS_LO12_NC && + rel2.r_sym == rel.r_sym && + rel2.r_offset == rel.r_offset + 4 && + rel2.r_addend == rel.r_addend && + is_adrp(loc) && + is_add(loc + 4)) { + u32 reg1 = bits(*(ul32 *)loc, 4, 0); + u32 reg2 = bits(*(ul32 *)(loc + 4), 4, 0); + if (reg1 == reg2) { + *(ul32 *)loc = 0xd503'201f; // nop + *(ul32 *)(loc + 4) = 0x1000'0000 | reg1; // adr + write_adr(loc + 4, S + A - P - 4); + i++; + break; + } + } + } + + i64 val = page(S + A) - page(P); + check(val, -(1LL << 32), 1LL << 32); + write_adrp(loc, val); + break; + } + case R_AARCH64_ADR_PREL_LO21: + check(S + A - P, -(1LL << 20), 1LL << 20); + write_adr(loc, S + A - P); + break; + case R_AARCH64_CALL26: + case R_AARCH64_JUMP26: { + if (sym.is_remaining_undef_weak()) { + // On ARM, calling an weak undefined symbol jumps to the + // next instruction. + *(ul32 *)loc = 0xd503'201f; // nop + break; + } + + i64 val = S + A - P; + if (val < -(1 << 27) || (1 << 27) <= val) + val = get_thunk_addr(i) + A - P; + *(ul32 *)loc |= bits(val, 27, 2); + break; + } + case R_AARCH64_PLT32: + check(S + A - P, -(1LL << 31), 1LL << 31); + *(ul32 *)loc = S + A - P; + break; + case R_AARCH64_CONDBR19: + case R_AARCH64_LD_PREL_LO19: + check(S + A - P, -(1LL << 20), 1LL << 20); + *(ul32 *)loc |= bits(S + A - P, 20, 2) << 5; + break; + case R_AARCH64_PREL16: + check(S + A - P, -(1LL << 15), 1LL << 15); + *(ul16 *)loc = S + A - P; + break; + case R_AARCH64_PREL32: + check(S + A - P, -(1LL << 31), 1LL << 32); + *(ul32 *)loc = S + A - P; + break; + case R_AARCH64_PREL64: + *(ul64 *)loc = S + A - P; + break; + case R_AARCH64_LD64_GOT_LO12_NC: + *(ul32 *)loc |= bits(G + GOT + A, 11, 3) << 10; + break; + case R_AARCH64_LD64_GOTPAGE_LO15: { + i64 val = G + GOT + A - page(GOT); + check(val, 0, 1 << 15); + *(ul32 *)loc |= bits(val, 14, 3) << 10; + break; + } + case R_AARCH64_TLSIE_ADR_GOTTPREL_PAGE21: { + i64 val = page(sym.get_gottp_addr(ctx) + A) - page(P); + check(val, -(1LL << 32), 1LL << 32); + write_adrp(loc, val); + break; + } + case R_AARCH64_TLSIE_LD64_GOTTPREL_LO12_NC: + *(ul32 *)loc |= bits(sym.get_gottp_addr(ctx) + A, 11, 3) << 10; + break; + case R_AARCH64_TLSLE_MOVW_TPREL_G0: { + i64 val = S + A - ctx.tp_addr; + check(val, -(1 << 15), 1 << 15); + write_movn_movz(loc, val); + break; + } + case R_AARCH64_TLSLE_MOVW_TPREL_G0_NC: + *(ul32 *)loc |= bits(S + A - ctx.tp_addr, 15, 0) << 5; + break; + case R_AARCH64_TLSLE_MOVW_TPREL_G1: { + i64 val = S + A - ctx.tp_addr; + check(val, -(1LL << 31), 1LL << 31); + write_movn_movz(loc, val >> 16); + break; + } + case R_AARCH64_TLSLE_MOVW_TPREL_G1_NC: + *(ul32 *)loc |= bits(S + A - ctx.tp_addr, 31, 16) << 5; + break; + case R_AARCH64_TLSLE_MOVW_TPREL_G2: { + i64 val = S + A - ctx.tp_addr; + check(val, -(1LL << 47), 1LL << 47); + write_movn_movz(loc, val >> 32); + break; + } + case R_AARCH64_TLSLE_ADD_TPREL_HI12: { + i64 val = S + A - ctx.tp_addr; + check(val, 0, 1LL << 24); + *(ul32 *)loc |= bits(val, 23, 12) << 10; + break; + } + case R_AARCH64_TLSLE_ADD_TPREL_LO12: + check(S + A - ctx.tp_addr, 0, 1 << 12); + *(ul32 *)loc |= bits(S + A - ctx.tp_addr, 11, 0) << 10; + break; + case R_AARCH64_TLSLE_ADD_TPREL_LO12_NC: + *(ul32 *)loc |= bits(S + A - ctx.tp_addr, 11, 0) << 10; + break; + case R_AARCH64_TLSGD_ADR_PAGE21: { + i64 val = page(sym.get_tlsgd_addr(ctx) + A) - page(P); + check(val, -(1LL << 32), 1LL << 32); + write_adrp(loc, val); + break; + } + case R_AARCH64_TLSGD_ADD_LO12_NC: + *(ul32 *)loc |= bits(sym.get_tlsgd_addr(ctx) + A, 11, 0) << 10; + break; + case R_AARCH64_TLSDESC_ADR_PAGE21: + if (sym.has_tlsdesc(ctx)) { + i64 val = page(sym.get_tlsdesc_addr(ctx) + A) - page(P); + check(val, -(1LL << 32), 1LL << 32); + write_adrp(loc, val); + } else { + // adrp x0, 0 -> movz x0, #tls_ofset_hi, lsl #16 + i64 val = (S + A - ctx.tp_addr); + check(val, -(1LL << 32), 1LL << 32); + *(ul32 *)loc = 0xd2a0'0000 | (bits(val, 32, 16) << 5); + } + break; + case R_AARCH64_TLSDESC_LD64_LO12: + if (sym.has_tlsdesc(ctx)) { + *(ul32 *)loc |= bits(sym.get_tlsdesc_addr(ctx) + A, 11, 3) << 10; + } else { + // ldr x2, [x0] -> movk x0, #tls_ofset_lo + u32 offset_lo = (S + A - ctx.tp_addr) & 0xffff; + *(ul32 *)loc = 0xf280'0000 | (offset_lo << 5); + } + break; + case R_AARCH64_TLSDESC_ADD_LO12: + if (sym.has_tlsdesc(ctx)) { + *(ul32 *)loc |= bits(sym.get_tlsdesc_addr(ctx) + A, 11, 0) << 10; + } else { + // add x0, x0, #0 -> nop + *(ul32 *)loc = 0xd503'201f; + } + break; + case R_AARCH64_TLSDESC_CALL: + if (!sym.has_tlsdesc(ctx)) { + // blr x2 -> nop + *(ul32 *)loc = 0xd503'201f; + } + break; + default: + unreachable(); + } + } +} + +template <> +void InputSection::apply_reloc_nonalloc(Context &ctx, u8 *base) { + std::span> rels = get_rels(ctx); + + for (i64 i = 0; i < rels.size(); i++) { + const ElfRel &rel = rels[i]; + if (rel.r_type == R_NONE || record_undef_error(ctx, rel)) + continue; + + Symbol &sym = *file.symbols[rel.r_sym]; + u8 *loc = base + rel.r_offset; + + auto check = [&](i64 val, i64 lo, i64 hi) { + if (val < lo || hi <= val) + Error(ctx) << *this << ": relocation " << rel << " against " + << sym << " out of range: " << val << " is not in [" + << lo << ", " << hi << ")"; + }; + + SectionFragment *frag; + i64 frag_addend; + std::tie(frag, frag_addend) = get_fragment(ctx, rel); + + u64 S = frag ? frag->get_addr(ctx) : sym.get_addr(ctx); + u64 A = frag ? frag_addend : (i64)rel.r_addend; + + switch (rel.r_type) { + case R_AARCH64_ABS64: + if (std::optional val = get_tombstone(sym, frag)) + *(ul64 *)loc = *val; + else + *(ul64 *)loc = S + A; + break; + case R_AARCH64_ABS32: { + i64 val = S + A; + check(val, 0, 1LL << 32); + *(ul32 *)loc = val; + break; + } + default: + Fatal(ctx) << *this << ": invalid relocation for non-allocated sections: " + << rel; + break; + } + } +} + +template <> +void InputSection::scan_relocations(Context &ctx) { + assert(shdr().sh_flags & SHF_ALLOC); + + this->reldyn_offset = file.num_dynrel * sizeof(ElfRel); + std::span> rels = get_rels(ctx); + + // Scan relocations + for (i64 i = 0; i < rels.size(); i++) { + const ElfRel &rel = rels[i]; + if (rel.r_type == R_NONE || record_undef_error(ctx, rel)) + continue; + + Symbol &sym = *file.symbols[rel.r_sym]; + u8 *loc = (u8 *)(contents.data() + rel.r_offset); + + if (sym.is_ifunc()) + sym.flags |= NEEDS_GOT | NEEDS_PLT; + + switch (rel.r_type) { + case R_AARCH64_ABS64: + scan_dyn_absrel(ctx, sym, rel); + break; + case R_AARCH64_ADR_GOT_PAGE: + // An ADR_GOT_PAGE and GOT_LO12_NC relocation pair is used to load a + // symbol's address from GOT. If the GOT value is a link-time + // constant, we may be able to rewrite the ADRP+LDR instruction pair + // with an ADRP+ADD, eliminating a GOT memory load. + if (ctx.arg.relax && sym.is_relative() && !sym.is_imported && + !sym.is_ifunc() && i + 1 < rels.size()) { + // ADRP+LDR must be consecutive and use the same register to relax. + const ElfRel &rel2 = rels[i + 1]; + if (rel2.r_type == R_AARCH64_LD64_GOT_LO12_NC && + rel2.r_offset == rel.r_offset + 4 && + rel2.r_sym == rel.r_sym && + rel.r_addend == 0 && + rel2.r_addend == 0 && + is_adrp(loc) && + is_ldr(loc + 4)) { + u32 rd = bits(*(ul32 *)loc, 4, 0); + u32 rn = bits(*(ul32 *)(loc + 4), 9, 5); + u32 rt = bits(*(ul32 *)(loc + 4), 4, 0); + if (rd == rn && rn == rt) { + i++; + break; + } + } + } + sym.flags |= NEEDS_GOT; + break; + case R_AARCH64_LD64_GOT_LO12_NC: + case R_AARCH64_LD64_GOTPAGE_LO15: + sym.flags |= NEEDS_GOT; + break; + case R_AARCH64_CALL26: + case R_AARCH64_JUMP26: + case R_AARCH64_PLT32: + if (sym.is_imported) + sym.flags |= NEEDS_PLT; + break; + case R_AARCH64_TLSIE_ADR_GOTTPREL_PAGE21: + case R_AARCH64_TLSIE_LD64_GOTTPREL_LO12_NC: + sym.flags |= NEEDS_GOTTP; + break; + case R_AARCH64_ADR_PREL_PG_HI21: + scan_pcrel(ctx, sym, rel); + break; + case R_AARCH64_TLSGD_ADR_PAGE21: + sym.flags |= NEEDS_TLSGD; + break; + case R_AARCH64_TLSDESC_ADR_PAGE21: + case R_AARCH64_TLSDESC_LD64_LO12: + case R_AARCH64_TLSDESC_ADD_LO12: + if (!relax_tlsdesc(ctx, sym)) + sym.flags |= NEEDS_TLSDESC; + break; + case R_AARCH64_TLSLE_MOVW_TPREL_G0: + case R_AARCH64_TLSLE_MOVW_TPREL_G0_NC: + case R_AARCH64_TLSLE_MOVW_TPREL_G1: + case R_AARCH64_TLSLE_MOVW_TPREL_G1_NC: + case R_AARCH64_TLSLE_MOVW_TPREL_G2: + case R_AARCH64_TLSLE_ADD_TPREL_HI12: + case R_AARCH64_TLSLE_ADD_TPREL_LO12: + case R_AARCH64_TLSLE_ADD_TPREL_LO12_NC: + check_tlsle(ctx, sym, rel); + break; + case R_AARCH64_ADD_ABS_LO12_NC: + case R_AARCH64_ADR_PREL_LO21: + case R_AARCH64_CONDBR19: + case R_AARCH64_LD_PREL_LO19: + case R_AARCH64_LDST16_ABS_LO12_NC: + case R_AARCH64_LDST32_ABS_LO12_NC: + case R_AARCH64_LDST64_ABS_LO12_NC: + case R_AARCH64_LDST128_ABS_LO12_NC: + case R_AARCH64_LDST8_ABS_LO12_NC: + case R_AARCH64_MOVW_UABS_G0: + case R_AARCH64_MOVW_UABS_G0_NC: + case R_AARCH64_MOVW_UABS_G1: + case R_AARCH64_MOVW_UABS_G1_NC: + case R_AARCH64_MOVW_UABS_G2: + case R_AARCH64_MOVW_UABS_G2_NC: + case R_AARCH64_MOVW_UABS_G3: + case R_AARCH64_PREL16: + case R_AARCH64_PREL32: + case R_AARCH64_PREL64: + case R_AARCH64_TLSGD_ADD_LO12_NC: + case R_AARCH64_TLSDESC_CALL: + break; + default: + Error(ctx) << *this << ": unknown relocation: " << rel; + } + } +} + +template <> +void RangeExtensionThunk::copy_buf(Context &ctx) { + u8 *buf = ctx.buf + output_section.shdr.sh_offset + offset; + + static const ul32 data[] = { + 0x9000'0010, // adrp x16, 0 # R_AARCH64_ADR_PREL_PG_HI21 + 0x9100'0210, // add x16, x16 # R_AARCH64_ADD_ABS_LO12_NC + 0xd61f'0200, // br x16 + }; + + static_assert(E::thunk_size == sizeof(data)); + + for (i64 i = 0; i < symbols.size(); i++) { + u64 S = symbols[i]->get_addr(ctx); + u64 P = output_section.shdr.sh_addr + offset + i * E::thunk_size; + + u8 *loc = buf + i * E::thunk_size; + memcpy(loc , data, sizeof(data)); + write_adrp(loc, page(S) - page(P)); + *(ul32 *)(loc + 4) |= bits(S, 11, 0) << 10; + } +} + +} // namespace mold::elf diff --git a/third_party/mold/elf/arch-i386.cc b/third_party/mold/elf/arch-i386.cc new file mode 100644 index 00000000000..2692cacf1c2 --- /dev/null +++ b/third_party/mold/elf/arch-i386.cc @@ -0,0 +1,565 @@ +// clang-format off +// i386 is similar to x86-64 but lacks PC-relative memory access +// instructions. So it's not straightforward to support position- +// independent code (PIC) on that target. +// +// If an object file is compiled with -fPIC, a function that needs to load +// a value from memory first obtains its own address with the following +// code +// +// call __x86.get_pc_thunk.bx +// +// where __x86.get_pc_thunk.bx is defined as +// +// __x86.get_pc_thunk.bx: +// mov (%esp), %ebx # move the return address to %ebx +// ret +// +// . With the function's own address (or, more precisely, the address +// immediately after the call instruction), the function can compute an +// absolute address of a variable with its address + link-time constant. +// +// Executing call-mov-ret isn't very cheap, and allocating one register to +// store PC isn't cheap too, especially given that i386 has only 8 +// general-purpose registers. But that's the cost of PIC on i386. You need +// to pay it when creating a .so and a position-independent executable. +// +// When a position-independent function calls another function, it sets +// %ebx to the address of .got. Position-independent PLT entries use that +// register to load values from .got.plt/.got. +// +// If we are creating a position-dependent executable (PDE), we can't +// assume that %ebx is set to .got. For PDE, we need to create position- +// dependent PLT entries which don't use %ebx. +// +// https://github.com/rui314/psabi/blob/main/i386.pdf + +#include "third_party/mold/elf/mold.h" + +namespace mold::elf { + +using E = I386; + +template <> +i64 get_addend(u8 *loc, const ElfRel &rel) { + switch (rel.r_type) { + case R_386_8: + case R_386_PC8: + return *loc; + case R_386_16: + case R_386_PC16: + return *(ul16 *)loc; + case R_386_32: + case R_386_PC32: + case R_386_GOT32: + case R_386_GOT32X: + case R_386_PLT32: + case R_386_GOTOFF: + case R_386_GOTPC: + case R_386_TLS_LDM: + case R_386_TLS_GOTIE: + case R_386_TLS_LE: + case R_386_TLS_IE: + case R_386_TLS_GD: + case R_386_TLS_LDO_32: + case R_386_SIZE32: + case R_386_TLS_GOTDESC: + return *(ul32 *)loc; + default: + return 0; + } +} + +template <> +void write_addend(u8 *loc, i64 val, const ElfRel &rel) { + switch (rel.r_type) { + case R_386_NONE: + break; + case R_386_8: + case R_386_PC8: + *loc = val; + break; + case R_386_16: + case R_386_PC16: + *(ul16 *)loc = val; + break; + case R_386_32: + case R_386_PC32: + case R_386_GOT32: + case R_386_GOT32X: + case R_386_PLT32: + case R_386_GOTOFF: + case R_386_GOTPC: + case R_386_TLS_LDM: + case R_386_TLS_GOTIE: + case R_386_TLS_LE: + case R_386_TLS_IE: + case R_386_TLS_GD: + case R_386_TLS_LDO_32: + case R_386_SIZE32: + case R_386_TLS_GOTDESC: + *(ul32 *)loc = val; + break; + default: + unreachable(); + } +} + +template <> +void write_plt_header(Context &ctx, u8 *buf) { + if (ctx.arg.pic) { + static const u8 insn[] = { + 0xf3, 0x0f, 0x1e, 0xfb, // endbr32 + 0x51, // push %ecx + 0x8d, 0x8b, 0, 0, 0, 0, // lea GOTPLT+4(%ebx), %ecx + 0xff, 0x31, // push (%ecx) + 0xff, 0x61, 0x04, // jmp *0x4(%ecx) + }; + memcpy(buf, insn, sizeof(insn)); + *(ul32 *)(buf + 7) = ctx.gotplt->shdr.sh_addr - ctx.got->shdr.sh_addr + 4; + } else { + static const u8 insn[] = { + 0xf3, 0x0f, 0x1e, 0xfb, // endbr32 + 0x51, // push %ecx + 0xb9, 0, 0, 0, 0, // mov GOTPLT+4, %ecx + 0xff, 0x31, // push (%ecx) + 0xff, 0x61, 0x04, // jmp *0x4(%ecx) + 0xcc, // (padding) + }; + memcpy(buf, insn, sizeof(insn)); + *(ul32 *)(buf + 6) = ctx.gotplt->shdr.sh_addr + 4; + } +} + +template <> +void write_plt_entry(Context &ctx, u8 *buf, Symbol &sym) { + if (ctx.arg.pic) { + static const u8 insn[] = { + 0xf3, 0x0f, 0x1e, 0xfb, // endbr32 + 0xb9, 0, 0, 0, 0, // mov $reloc_offset, %ecx + 0xff, 0xa3, 0, 0, 0, 0, // jmp *foo@GOT(%ebx) + 0xcc, // (padding) + }; + memcpy(buf, insn, sizeof(insn)); + *(ul32 *)(buf + 5) = sym.get_plt_idx(ctx) * sizeof(ElfRel); + *(ul32 *)(buf + 11) = sym.get_gotplt_addr(ctx) - ctx.got->shdr.sh_addr; + } else { + static const u8 insn[] = { + 0xf3, 0x0f, 0x1e, 0xfb, // endbr32 + 0xb9, 0, 0, 0, 0, // mov $reloc_offset, %ecx + 0xff, 0x25, 0, 0, 0, 0, // jmp *foo@GOT + 0xcc, // (padding) + }; + memcpy(buf, insn, sizeof(insn)); + *(ul32 *)(buf + 5) = sym.get_plt_idx(ctx) * sizeof(ElfRel); + *(ul32 *)(buf + 11) = sym.get_gotplt_addr(ctx); + } +} + +template <> +void write_pltgot_entry(Context &ctx, u8 *buf, Symbol &sym) { + if (ctx.arg.pic) { + static const u8 insn[] = { + 0xf3, 0x0f, 0x1e, 0xfb, // endbr32 + 0xff, 0xa3, 0, 0, 0, 0, // jmp *foo@GOT(%ebx) + 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, // (padding) + }; + memcpy(buf, insn, sizeof(insn)); + *(ul32 *)(buf + 6) = sym.get_got_addr(ctx) - ctx.got->shdr.sh_addr; + } else { + static const u8 insn[] = { + 0xf3, 0x0f, 0x1e, 0xfb, // endbr32 + 0xff, 0x25, 0, 0, 0, 0, // jmp *foo@GOT + 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, // (padding) + }; + memcpy(buf, insn, sizeof(insn)); + *(ul32 *)(buf + 6) = sym.get_got_addr(ctx); + } +} + +template <> +void EhFrameSection::apply_reloc(Context &ctx, const ElfRel &rel, + u64 offset, u64 val) { + u8 *loc = ctx.buf + this->shdr.sh_offset + offset; + + switch (rel.r_type) { + case R_NONE: + break; + case R_386_32: + *(ul32 *)loc = val; + break; + case R_386_PC32: + *(ul32 *)loc = val - this->shdr.sh_addr - offset; + break; + default: + Fatal(ctx) << "unsupported relocation in .eh_frame: " << rel; + } +} + +static u32 relax_got32x(u8 *loc) { + // mov imm(%reg1), %reg2 -> lea imm(%reg1), %reg2 + if (loc[0] == 0x8b) + return 0x8d00 | loc[1]; + return 0; +} + +// Relax GD to LE +static void relax_gd_to_le(u8 *loc, ElfRel rel, u64 val) { + static const u8 insn[] = { + 0x65, 0xa1, 0, 0, 0, 0, // mov %gs:0, %eax + 0x81, 0xc0, 0, 0, 0, 0, // add $tp_offset, %eax + }; + + switch (rel.r_type) { + case R_386_PLT32: + case R_386_PC32: + memcpy(loc - 3, insn, sizeof(insn)); + *(ul32 *)(loc + 5) = val; + break; + case R_386_GOT32: + case R_386_GOT32X: + memcpy(loc - 2, insn, sizeof(insn)); + *(ul32 *)(loc + 6) = val; + break; + default: + unreachable(); + } +} + +// Relax LD to LE +static void relax_ld_to_le(u8 *loc, ElfRel rel, u64 val) { + switch (rel.r_type) { + case R_386_PLT32: + case R_386_PC32: { + static const u8 insn[] = { + 0x65, 0xa1, 0, 0, 0, 0, // mov %gs:0, %eax + 0x2d, 0, 0, 0, 0, // sub $tls_size, %eax + }; + memcpy(loc - 2, insn, sizeof(insn)); + *(ul32 *)(loc + 5) = val; + break; + } + case R_386_GOT32: + case R_386_GOT32X: { + static const u8 insn[] = { + 0x65, 0xa1, 0, 0, 0, 0, // mov %gs:0, %eax + 0x2d, 0, 0, 0, 0, // sub $tls_size, %eax + 0x90, // nop + }; + memcpy(loc - 2, insn, sizeof(insn)); + *(ul32 *)(loc + 5) = val; + break; + } + default: + unreachable(); + } +} + +template <> +void InputSection::apply_reloc_alloc(Context &ctx, u8 *base) { + std::span> rels = get_rels(ctx); + + ElfRel *dynrel = nullptr; + if (ctx.reldyn) + dynrel = (ElfRel *)(ctx.buf + ctx.reldyn->shdr.sh_offset + + file.reldyn_offset + this->reldyn_offset); + + for (i64 i = 0; i < rels.size(); i++) { + const ElfRel &rel = rels[i]; + if (rel.r_type == R_NONE) + continue; + + Symbol &sym = *file.symbols[rel.r_sym]; + u8 *loc = base + rel.r_offset; + + auto check = [&](i64 val, i64 lo, i64 hi) { + if (val < lo || hi <= val) + Error(ctx) << *this << ": relocation " << rel << " against " + << sym << " out of range: " << val << " is not in [" + << lo << ", " << hi << ")"; + }; + + u64 S = sym.get_addr(ctx); + u64 A = get_addend(*this, rel); + u64 P = get_addr() + rel.r_offset; + u64 G = sym.get_got_idx(ctx) * sizeof(Word); + u64 GOT = ctx.got->shdr.sh_addr; + + switch (rel.r_type) { + case R_386_8: + check(S + A, 0, 1 << 8); + *loc = S + A; + break; + case R_386_16: + check(S + A, 0, 1 << 16); + *(ul16 *)loc = S + A; + break; + case R_386_32: + apply_dyn_absrel(ctx, sym, rel, loc, S, A, P, dynrel); + break; + case R_386_PC8: + check(S + A - P, -(1 << 7), 1 << 7); + *loc = S + A - P; + break; + case R_386_PC16: + check(S + A - P, -(1 << 15), 1 << 15); + *(ul16 *)loc = S + A - P; + break; + case R_386_PC32: + case R_386_PLT32: + *(ul32 *)loc = S + A - P; + break; + case R_386_GOT32: + *(ul32 *)loc = G + A; + break; + case R_386_GOT32X: + if (sym.has_got(ctx)) { + *(ul32 *)loc = G + A; + } else { + u32 insn = relax_got32x(loc - 2); + assert(insn); + loc[-2] = insn >> 8; + loc[-1] = insn; + *(ul32 *)loc = S + A - GOT; + } + break; + case R_386_GOTOFF: + *(ul32 *)loc = S + A - GOT; + break; + case R_386_GOTPC: + *(ul32 *)loc = GOT + A - P; + break; + case R_386_TLS_GOTIE: + *(ul32 *)loc = sym.get_gottp_addr(ctx) + A - GOT; + break; + case R_386_TLS_LE: + *(ul32 *)loc = S + A - ctx.tp_addr; + break; + case R_386_TLS_IE: + *(ul32 *)loc = sym.get_gottp_addr(ctx) + A; + break; + case R_386_TLS_GD: + if (sym.has_tlsgd(ctx)) { + *(ul32 *)loc = sym.get_tlsgd_addr(ctx) + A - GOT; + } else { + relax_gd_to_le(loc, rels[i + 1], S - ctx.tp_addr); + i++; + } + break; + case R_386_TLS_LDM: + if (ctx.got->has_tlsld(ctx)) { + *(ul32 *)loc = ctx.got->get_tlsld_addr(ctx) + A - GOT; + } else { + relax_ld_to_le(loc, rels[i + 1], ctx.tp_addr - ctx.tls_begin); + i++; + } + break; + case R_386_TLS_LDO_32: + *(ul32 *)loc = S + A - ctx.dtp_addr; + break; + case R_386_SIZE32: + *(ul32 *)loc = sym.esym().st_size + A; + break; + case R_386_TLS_GOTDESC: + if (sym.has_tlsdesc(ctx)) { + *(ul32 *)loc = sym.get_tlsdesc_addr(ctx) + A - GOT; + } else { + static const u8 insn[] = { + 0x8d, 0x05, 0, 0, 0, 0, // lea 0, %eax + }; + memcpy(loc - 2, insn, sizeof(insn)); + *(ul32 *)loc = S + A - ctx.tp_addr; + } + break; + case R_386_TLS_DESC_CALL: + if (!sym.has_tlsdesc(ctx)) { + // call *(%eax) -> nop + loc[0] = 0x66; + loc[1] = 0x90; + } + break; + default: + unreachable(); + } + } +} + +template <> +void InputSection::apply_reloc_nonalloc(Context &ctx, u8 *base) { + std::span> rels = get_rels(ctx); + + for (i64 i = 0; i < rels.size(); i++) { + const ElfRel &rel = rels[i]; + if (rel.r_type == R_NONE || record_undef_error(ctx, rel)) + continue; + + Symbol &sym = *file.symbols[rel.r_sym]; + u8 *loc = base + rel.r_offset; + + auto check = [&](i64 val, i64 lo, i64 hi) { + if (val < lo || hi <= val) + Error(ctx) << *this << ": relocation " << rel << " against " + << sym << " out of range: " << val << " is not in [" + << lo << ", " << hi << ")"; + }; + + SectionFragment *frag; + i64 frag_addend; + std::tie(frag, frag_addend) = get_fragment(ctx, rel); + + u64 S = frag ? frag->get_addr(ctx) : sym.get_addr(ctx); + u64 A = frag ? frag_addend : get_addend(*this, rel); + u64 GOT = ctx.got->shdr.sh_addr; + + switch (rel.r_type) { + case R_386_8: + check(S + A, 0, 1 << 8); + *loc = S + A; + break; + case R_386_16: + check(S + A, 0, 1 << 16); + *(ul16 *)loc = S + A; + break; + case R_386_32: + if (std::optional val = get_tombstone(sym, frag)) + *(ul32 *)loc = *val; + else + *(ul32 *)loc = S + A; + break; + case R_386_PC8: + check(S + A, -(1 << 7), 1 << 7); + *loc = S + A; + break; + case R_386_PC16: + check(S + A, -(1 << 15), 1 << 15); + *(ul16 *)loc = S + A; + break; + case R_386_PC32: + *(ul32 *)loc = S + A; + break; + case R_386_GOTPC: + *(ul32 *)loc = GOT + A; + break; + case R_386_GOTOFF: + *(ul32 *)loc = S + A - GOT; + break; + case R_386_TLS_LDO_32: + if (std::optional val = get_tombstone(sym, frag)) + *(ul32 *)loc = *val; + else + *(ul32 *)loc = S + A - ctx.dtp_addr; + break; + case R_386_SIZE32: + *(ul32 *)loc = sym.esym().st_size + A; + break; + default: + unreachable(); + } + } +} + +template <> +void InputSection::scan_relocations(Context &ctx) { + assert(shdr().sh_flags & SHF_ALLOC); + + this->reldyn_offset = file.num_dynrel * sizeof(ElfRel); + std::span> rels = get_rels(ctx); + + // Scan relocations + for (i64 i = 0; i < rels.size(); i++) { + const ElfRel &rel = rels[i]; + if (rel.r_type == R_NONE || record_undef_error(ctx, rel)) + continue; + + Symbol &sym = *file.symbols[rel.r_sym]; + u8 *loc = (u8 *)(contents.data() + rel.r_offset); + + if (sym.is_ifunc()) + sym.flags |= NEEDS_GOT | NEEDS_PLT; + + switch (rel.r_type) { + case R_386_8: + case R_386_16: + scan_absrel(ctx, sym, rel); + break; + case R_386_32: + scan_dyn_absrel(ctx, sym, rel); + break; + case R_386_PC8: + case R_386_PC16: + case R_386_PC32: + scan_pcrel(ctx, sym, rel); + break; + case R_386_GOT32: + case R_386_GOTPC: + sym.flags |= NEEDS_GOT; + break; + case R_386_GOT32X: { + // We always want to relax GOT32X because static PIE doesn't + // work without it. + bool do_relax = !sym.is_imported && sym.is_relative() && + relax_got32x(loc - 2); + if (!do_relax) + sym.flags |= NEEDS_GOT; + break; + } + case R_386_PLT32: + if (sym.is_imported) + sym.flags |= NEEDS_PLT; + break; + case R_386_TLS_GOTIE: + case R_386_TLS_IE: + sym.flags |= NEEDS_GOTTP; + break; + case R_386_TLS_GD: + if (i + 1 == rels.size()) + Fatal(ctx) << *this << ": TLS_GD reloc must be followed by PLT or GOT32"; + + if (u32 ty = rels[i + 1].r_type; + ty != R_386_PLT32 && ty != R_386_PC32 && + ty != R_386_GOT32 && ty != R_386_GOT32X) + Fatal(ctx) << *this << ": TLS_GD reloc must be followed by PLT or GOT32"; + + // We always relax if -static because libc.a doesn't contain + // __tls_get_addr(). + if (ctx.arg.is_static || + (ctx.arg.relax && !ctx.arg.shared && !sym.is_imported)) + i++; + else + sym.flags |= NEEDS_TLSGD; + break; + case R_386_TLS_LDM: + if (i + 1 == rels.size()) + Fatal(ctx) << *this << ": TLS_LDM reloc must be followed by PLT or GOT32"; + + if (u32 ty = rels[i + 1].r_type; + ty != R_386_PLT32 && ty != R_386_PC32 && + ty != R_386_GOT32 && ty != R_386_GOT32X) + Fatal(ctx) << *this << ": TLS_LDM reloc must be followed by PLT or GOT32"; + + // We always relax if -static because libc.a doesn't contain + // __tls_get_addr(). + if (ctx.arg.is_static || (ctx.arg.relax && !ctx.arg.shared)) + i++; + else + ctx.needs_tlsld = true; + break; + case R_386_TLS_GOTDESC: + if (!relax_tlsdesc(ctx, sym)) + sym.flags |= NEEDS_TLSDESC; + break; + case R_386_TLS_LE: + check_tlsle(ctx, sym, rel); + break; + case R_386_GOTOFF: + case R_386_TLS_LDO_32: + case R_386_SIZE32: + case R_386_TLS_DESC_CALL: + break; + default: + Error(ctx) << *this << ": unknown relocation: " << rel; + } + } +} + +} // namespace mold::elf diff --git a/third_party/mold/elf/arch-m68k.cc b/third_party/mold/elf/arch-m68k.cc new file mode 100644 index 00000000000..1b3b8721241 --- /dev/null +++ b/third_party/mold/elf/arch-m68k.cc @@ -0,0 +1,326 @@ +// clang-format off +// This file contains code for the Motorola 68000 series microprocessors, +// which is often abbreviated as m68k. Running a Unix-like system on a +// m68k-based machine today is probably a retro-computing hobby activity, +// but the processor was a popular choice to build Unix computers during +// '80s. Early Sun workstations for example used m68k. Macintosh until +// 1994 were based on m68k as well until they switched to PowerPC (and +// then to x86 and to ARM.) +// +// From the linker's point of view, it is not hard to support m68k. It's +// just a 32-bit big-endian CISC ISA. Compared to comtemporary i386, +// m68k's psABI is actually simpler because m68k has PC-relative memory +// access instructions and therefore can support position-independent +// code without too much hassle. +// +// https://github.com/rui314/psabi/blob/main/m68k.pdf + +#include "third_party/mold/elf/mold.h" + +namespace mold::elf { + +using E = M68K; + +template <> +void write_plt_header(Context &ctx, u8 *buf) { + static const u8 insn[] = { + 0x2f, 0x00, // move.l %d0, -(%sp) + 0x2f, 0x3b, 0x01, 0x70, 0, 0, 0, 0, // move.l (GOTPLT+4, %pc), -(%sp) + 0x4e, 0xfb, 0x01, 0x71, 0, 0, 0, 0, // jmp ([GOTPLT+8, %pc]) + }; + + memcpy(buf, insn, sizeof(insn)); + *(ub32 *)(buf + 6) = ctx.gotplt->shdr.sh_addr - ctx.plt->shdr.sh_addr; + *(ub32 *)(buf + 14) = ctx.gotplt->shdr.sh_addr - ctx.plt->shdr.sh_addr - 4; +} + +template <> +void write_plt_entry(Context &ctx, u8 *buf, Symbol &sym) { + static const u8 insn[] = { + 0x20, 0x3c, 0, 0, 0, 0, // move.l PLT_OFFSET, %d0 + 0x4e, 0xfb, 0x01, 0x71, 0, 0, 0, 0, // jmp ([GOTPLT_ENTRY, %pc]) + }; + + memcpy(buf, insn, sizeof(insn)); + *(ub32 *)(buf + 2) = sym.get_plt_idx(ctx) * sizeof(ElfRel); + *(ub32 *)(buf + 10) = sym.get_gotplt_addr(ctx) - sym.get_plt_addr(ctx) - 8; +} + +template <> +void write_pltgot_entry(Context &ctx, u8 *buf, Symbol &sym) { + static const u8 insn[] = { + 0x4e, 0xfb, 0x01, 0x71, 0, 0, 0, 0, // jmp ([GOT_ENTRY, %pc]) + }; + + memcpy(buf, insn, sizeof(insn)); + *(ub32 *)(buf + 4) = sym.get_got_addr(ctx) - sym.get_plt_addr(ctx) - 2; +} + +template <> +void EhFrameSection::apply_reloc(Context &ctx, const ElfRel &rel, + u64 offset, u64 val) { + u8 *loc = ctx.buf + this->shdr.sh_offset + offset; + + switch (rel.r_type) { + case R_NONE: + break; + case R_68K_32: + *(ub32 *)loc = val; + break; + case R_68K_PC32: + *(ub32 *)loc = val - this->shdr.sh_addr - offset; + break; + default: + Fatal(ctx) << "unsupported relocation in .eh_frame: " << rel; + } +} + +template <> +void InputSection::apply_reloc_alloc(Context &ctx, u8 *base) { + std::span> rels = get_rels(ctx); + + ElfRel *dynrel = nullptr; + if (ctx.reldyn) + dynrel = (ElfRel *)(ctx.buf + ctx.reldyn->shdr.sh_offset + + file.reldyn_offset + this->reldyn_offset); + + for (i64 i = 0; i < rels.size(); i++) { + const ElfRel &rel = rels[i]; + if (rel.r_type == R_NONE) + continue; + + Symbol &sym = *file.symbols[rel.r_sym]; + u8 *loc = base + rel.r_offset; + + auto check = [&](i64 val, i64 lo, i64 hi) { + if (val < lo || hi <= val) + Error(ctx) << *this << ": relocation " << rel << " against " + << sym << " out of range: " << val << " is not in [" + << lo << ", " << hi << ")"; + }; + + auto write16 = [&](u64 val) { + check(val, 0, 1 << 16); + *(ub16 *)loc = val; + }; + + auto write16s = [&](u64 val) { + check(val, -(1 << 15), 1 << 15); + *(ub16 *)loc = val; + }; + + auto write8 = [&](u64 val) { + check(val, 0, 1 << 8); + *loc = val; + }; + + auto write8s = [&](u64 val) { + check(val, -(1 << 7), 1 << 7); + *loc = val; + }; + + u64 S = sym.get_addr(ctx); + u64 A = rel.r_addend; + u64 P = get_addr() + rel.r_offset; + u64 G = sym.get_got_idx(ctx) * sizeof(Word); + u64 GOT = ctx.got->shdr.sh_addr; + + switch (rel.r_type) { + case R_68K_32: + apply_dyn_absrel(ctx, sym, rel, loc, S, A, P, dynrel); + break; + case R_68K_16: + write16(S + A); + break; + case R_68K_8: + write8(S + A); + break; + case R_68K_PC32: + case R_68K_PLT32: + *(ub32 *)loc = S + A - P; + break; + case R_68K_PC16: + case R_68K_PLT16: + write16s(S + A - P); + break; + case R_68K_PC8: + case R_68K_PLT8: + write8s(S + A - P); + break; + case R_68K_GOTPCREL32: + *(ub32 *)loc = GOT + A - P; + break; + case R_68K_GOTPCREL16: + write16s(GOT + A - P); + break; + case R_68K_GOTPCREL8: + write8s(GOT + A - P); + break; + case R_68K_GOTOFF32: + *(ub32 *)loc = G + A; + break; + case R_68K_GOTOFF16: + write16(G + A); + break; + case R_68K_GOTOFF8: + write8(G + A); + break; + case R_68K_TLS_GD32: + *(ub32 *)loc = sym.get_tlsgd_addr(ctx) + A - GOT; + break; + case R_68K_TLS_GD16: + write16(sym.get_tlsgd_addr(ctx) + A - GOT); + break; + case R_68K_TLS_GD8: + write8(sym.get_tlsgd_addr(ctx) + A - GOT); + break; + case R_68K_TLS_LDM32: + *(ub32 *)loc = ctx.got->get_tlsld_addr(ctx) + A - GOT; + break; + case R_68K_TLS_LDM16: + write16(ctx.got->get_tlsld_addr(ctx) + A - GOT); + break; + case R_68K_TLS_LDM8: + write8(ctx.got->get_tlsld_addr(ctx) + A - GOT); + break; + case R_68K_TLS_LDO32: + *(ub32 *)loc = S + A - ctx.dtp_addr; + break; + case R_68K_TLS_LDO16: + write16s(S + A - ctx.dtp_addr); + break; + case R_68K_TLS_LDO8: + write8s(S + A - ctx.dtp_addr); + break; + case R_68K_TLS_IE32: + *(ub32 *)loc = sym.get_gottp_addr(ctx) + A - GOT; + break; + case R_68K_TLS_IE16: + write16(sym.get_gottp_addr(ctx) + A - GOT); + break; + case R_68K_TLS_IE8: + write8(sym.get_gottp_addr(ctx) + A - GOT); + break; + case R_68K_TLS_LE32: + *(ub32 *)loc = S + A - ctx.tp_addr; + break; + case R_68K_TLS_LE16: + write16(S + A - ctx.tp_addr); + break; + case R_68K_TLS_LE8: + write8(S + A - ctx.tp_addr); + break; + default: + unreachable(); + } + } +} + +template <> +void InputSection::apply_reloc_nonalloc(Context &ctx, u8 *base) { + std::span> rels = get_rels(ctx); + + for (i64 i = 0; i < rels.size(); i++) { + const ElfRel &rel = rels[i]; + if (rel.r_type == R_NONE || record_undef_error(ctx, rel)) + continue; + + Symbol &sym = *file.symbols[rel.r_sym]; + u8 *loc = base + rel.r_offset; + + SectionFragment *frag; + i64 frag_addend; + std::tie(frag, frag_addend) = get_fragment(ctx, rel); + + u64 S = frag ? frag->get_addr(ctx) : sym.get_addr(ctx); + u64 A = frag ? frag_addend : (i64)rel.r_addend; + + switch (rel.r_type) { + case R_68K_32: + if (std::optional val = get_tombstone(sym, frag)) + *(ub32 *)loc = *val; + else + *(ub32 *)loc = S + A; + break; + default: + Fatal(ctx) << *this << ": invalid relocation for non-allocated sections: " + << rel; + } + } +} + +template <> +void InputSection::scan_relocations(Context &ctx) { + assert(shdr().sh_flags & SHF_ALLOC); + + this->reldyn_offset = file.num_dynrel * sizeof(ElfRel); + std::span> rels = get_rels(ctx); + + for (i64 i = 0; i < rels.size(); i++) { + const ElfRel &rel = rels[i]; + if (rel.r_type == R_NONE || record_undef_error(ctx, rel)) + continue; + + Symbol &sym = *file.symbols[rel.r_sym]; + + if (sym.is_ifunc()) + Error(ctx) << sym << ": GNU ifunc symbol is not supported on m68k"; + + switch (rel.r_type) { + case R_68K_32: + scan_dyn_absrel(ctx, sym, rel); + break; + case R_68K_16: + case R_68K_8: + scan_absrel(ctx, sym, rel); + break; + case R_68K_PC32: + case R_68K_PC16: + case R_68K_PC8: + scan_pcrel(ctx, sym, rel); + break; + case R_68K_GOTPCREL32: + case R_68K_GOTPCREL16: + case R_68K_GOTPCREL8: + case R_68K_GOTOFF32: + case R_68K_GOTOFF16: + case R_68K_GOTOFF8: + sym.flags |= NEEDS_GOT; + break; + case R_68K_PLT32: + case R_68K_PLT16: + case R_68K_PLT8: + if (sym.is_imported) + sym.flags |= NEEDS_PLT; + break; + case R_68K_TLS_GD32: + case R_68K_TLS_GD16: + case R_68K_TLS_GD8: + sym.flags |= NEEDS_TLSGD; + break; + case R_68K_TLS_LDM32: + case R_68K_TLS_LDM16: + case R_68K_TLS_LDM8: + ctx.needs_tlsld = true; + break; + case R_68K_TLS_IE32: + case R_68K_TLS_IE16: + case R_68K_TLS_IE8: + sym.flags |= NEEDS_GOTTP; + break; + case R_68K_TLS_LE32: + case R_68K_TLS_LE16: + case R_68K_TLS_LE8: + check_tlsle(ctx, sym, rel); + break; + case R_68K_TLS_LDO32: + case R_68K_TLS_LDO16: + case R_68K_TLS_LDO8: + break; + default: + Error(ctx) << *this << ": unknown relocation: " << rel; + } + } +} + +} // namespace mold::elf diff --git a/third_party/mold/elf/arch-ppc32.cc b/third_party/mold/elf/arch-ppc32.cc new file mode 100644 index 00000000000..c3a1db4cec6 --- /dev/null +++ b/third_party/mold/elf/arch-ppc32.cc @@ -0,0 +1,452 @@ +// clang-format off +// This file implements the PowerPC 32-bit ISA. For 64-bit PowerPC, see +// arch-ppc64v1.cpp and arch-ppc64v2.cpp. +// +// PPC32 is a RISC ISA. It has 32 general-purpose registers (GPRs). +// r0, r11 and r12 are reserved for static linkers, so we can use these +// registers in PLTs and range extension thunks. In addition to that, it +// has a few special registers. Notable ones are LR which holds a return +// address and CTR which we can use to store a branch target address. +// +// It feels that the PPC32 psABI is unnecessarily complicated at first +// glance, but that is mainly stemmed from the fact that the ISA lacks +// PC-relative load/store instructions. Since machine instructions cannot +// load data relative to its own address, it is not straightforward to +// support position-independent code (PIC) on PPC32. +// +// A position-independent function typically contains the following code +// in the prologue to obtain its own address: +// +// mflr r0 // save the current return address to %r0 +// bcl 20, 31, 4 // call the next instruction as if it were a function +// mtlr r12 // save the return address to %r12 +// mtlr r0 // restore the original return address +// +// An object file compiled with -fPIC contains a data section named +// `.got2` to store addresses of locally-defined global variables and +// constants. A PIC function usually computes its .got2+0x8000 and set it +// to %r30. This scheme allows the function to access global objects +// defined in the same input file with a single %r30-relative load/store +// instruction with a 16-bit offset, given that .got2 is smaller than +// 0x10000 (or 65536) bytes. +// +// Since each object file has its own .got2, %r30 refers to different +// places in a merged .got2 for two functions that came from different +// input files. Therefore, %r30 makes sense only within a single function. +// +// Technically, we can reuse a %r30 value in our PLT if we create a PLT +// _for each input file_ (that's what GNU ld seems to be doing), but that +// doesn't seems to be worth its complexity. Our PLT simply doesn't rely +// on a %r30 value. +// +// https://github.com/rui314/psabi/blob/main/ppc32.pdf + +#include "third_party/mold/elf/mold.h" + +namespace mold::elf { + +using E = PPC32; + +static u64 lo(u64 x) { return x & 0xffff; } +static u64 hi(u64 x) { return x >> 16; } +static u64 ha(u64 x) { return (x + 0x8000) >> 16; } +static u64 high(u64 x) { return (x >> 16) & 0xffff; } +static u64 higha(u64 x) { return ((x + 0x8000) >> 16) & 0xffff; } + +template <> +void write_plt_header(Context &ctx, u8 *buf) { + static const ub32 insn[] = { + // Get the address of this PLT section + 0x7c08'02a6, // mflr r0 + 0x429f'0005, // bcl 20, 31, 4 + 0x7d88'02a6, // 1: mflr r12 + 0x7c08'03a6, // mtlr r0 + + // Compute the runtime address of GOTPLT+12 + 0x3d8c'0000, // addis r12, r12, (GOTPLT - 1b)@higha + 0x398c'0000, // addi r12, r12, (GOTPLT - 1b)@lo + + // Compute the PLT entry offset + 0x7d6c'5850, // sub r11, r11, r12 + 0x1d6b'0003, // mulli r11, r11, 3 + + // Load GOTPLT[2] and branch to GOTPLT[1] + 0x800c'fff8, // lwz r0, -8(r12) + 0x7c09'03a6, // mtctr r0 + 0x818c'fffc, // lwz r12, -4(r12) + 0x4e80'0420, // bctr + 0x6000'0000, // nop + 0x6000'0000, // nop + 0x6000'0000, // nop + 0x6000'0000, // nop + }; + + static_assert(sizeof(insn) == E::plt_hdr_size); + memcpy(buf, insn, sizeof(insn)); + + ub32 *loc = (ub32 *)buf; + loc[4] |= higha(ctx.gotplt->shdr.sh_addr - ctx.plt->shdr.sh_addr + 4); + loc[5] |= lo(ctx.gotplt->shdr.sh_addr - ctx.plt->shdr.sh_addr + 4); +} + +static const ub32 plt_entry[] = { + // Get the address of this PLT entry + 0x7c08'02a6, // mflr r0 + 0x429f'0005, // bcl 20, 31, 4 + 0x7d88'02a6, // mflr r12 + 0x7c08'03a6, // mtlr r0 + + // Load an address from the GOT/GOTPLT entry and jump to that address + 0x3d6c'0000, // addis r11, r12, OFFSET@higha + 0x396b'0000, // addi r11, r11, OFFSET@lo + 0x818b'0000, // lwz r12, 0(r11) + 0x7d89'03a6, // mtctr r12 + 0x4e80'0420, // bctr +}; + +template <> +void write_plt_entry(Context &ctx, u8 *buf, Symbol &sym) { + static_assert(E::plt_size == sizeof(plt_entry)); + memcpy(buf, plt_entry, sizeof(plt_entry)); + + ub32 *loc = (ub32 *)buf; + i64 offset = sym.get_gotplt_addr(ctx) - sym.get_plt_addr(ctx) - 8; + loc[4] |= higha(offset); + loc[5] |= lo(offset); +} + +template <> +void write_pltgot_entry(Context &ctx, u8 *buf, Symbol &sym) { + static_assert(E::pltgot_size == sizeof(plt_entry)); + memcpy(buf, plt_entry, sizeof(plt_entry)); + + ub32 *loc = (ub32 *)buf; + i64 offset = sym.get_got_addr(ctx) - sym.get_plt_addr(ctx) - 8; + loc[4] |= higha(offset); + loc[5] |= lo(offset); +} + +template <> +void EhFrameSection::apply_reloc(Context &ctx, const ElfRel &rel, + u64 offset, u64 val) { + u8 *loc = ctx.buf + this->shdr.sh_offset + offset; + + switch (rel.r_type) { + case R_NONE: + break; + case R_PPC_ADDR32: + *(ub32 *)loc = val; + break; + case R_PPC_REL32: + *(ub32 *)loc = val - this->shdr.sh_addr - offset; + break; + default: + Fatal(ctx) << "unsupported relocation in .eh_frame: " << rel; + } +} + +template <> +void InputSection::apply_reloc_alloc(Context &ctx, u8 *base) { + std::span> rels = get_rels(ctx); + + ElfRel *dynrel = nullptr; + if (ctx.reldyn) + dynrel = (ElfRel *)(ctx.buf + ctx.reldyn->shdr.sh_offset + + file.reldyn_offset + this->reldyn_offset); + + u64 GOT2 = file.ppc32_got2 ? file.ppc32_got2->get_addr() : 0; + + for (i64 i = 0; i < rels.size(); i++) { + const ElfRel &rel = rels[i]; + if (rel.r_type == R_NONE) + continue; + + Symbol &sym = *file.symbols[rel.r_sym]; + u8 *loc = base + rel.r_offset; + + u64 S = sym.get_addr(ctx); + u64 A = rel.r_addend; + u64 P = get_addr() + rel.r_offset; + u64 G = sym.get_got_idx(ctx) * sizeof(Word); + u64 GOT = ctx.got->shdr.sh_addr; + + switch (rel.r_type) { + case R_PPC_ADDR32: + case R_PPC_UADDR32: + apply_dyn_absrel(ctx, sym, rel, loc, S, A, P, dynrel); + break; + case R_PPC_ADDR14: + *(ub32 *)loc |= bits(S + A, 15, 2) << 2; + break; + case R_PPC_ADDR16: + case R_PPC_UADDR16: + case R_PPC_ADDR16_LO: + *(ub16 *)loc = lo(S + A); + break; + case R_PPC_ADDR16_HI: + *(ub16 *)loc = hi(S + A); + break; + case R_PPC_ADDR16_HA: + *(ub16 *)loc = ha(S + A); + break; + case R_PPC_ADDR24: + *(ub32 *)loc |= bits(S + A, 25, 2) << 2; + break; + case R_PPC_ADDR30: + *(ub32 *)loc |= bits(S + A, 31, 2) << 2; + break; + case R_PPC_PLT16_LO: + *(ub16 *)loc = lo(G + GOT - A - GOT2); + break; + case R_PPC_PLT16_HI: + *(ub16 *)loc = hi(G + GOT - A - GOT2); + break; + case R_PPC_PLT16_HA: + *(ub16 *)loc = ha(G + GOT - A - GOT2); + break; + case R_PPC_PLT32: + *(ub32 *)loc = G + GOT - A - GOT2; + break; + case R_PPC_REL14: + *(ub32 *)loc |= bits(S + A - P, 15, 2) << 2; + break; + case R_PPC_REL16: + case R_PPC_REL16_LO: + *(ub16 *)loc = lo(S + A - P); + break; + case R_PPC_REL16_HI: + *(ub16 *)loc = hi(S + A - P); + break; + case R_PPC_REL16_HA: + *(ub16 *)loc = ha(S + A - P); + break; + case R_PPC_REL24: + case R_PPC_LOCAL24PC: { + i64 val = S + A - P; + if (sign_extend(val, 25) != val) + val = get_thunk_addr(i) - P; + *(ub32 *)loc |= bits(val, 25, 2) << 2; + break; + } + case R_PPC_PLTREL24: { + i64 val = S - P; + if (sym.has_plt(ctx) || sign_extend(val, 25) != val) + val = get_thunk_addr(i) - P; + *(ub32 *)loc |= bits(val, 25, 2) << 2; + break; + } + case R_PPC_REL32: + case R_PPC_PLTREL32: + *(ub32 *)loc = S + A - P; + break; + case R_PPC_GOT16: + case R_PPC_GOT16_LO: + *(ub16 *)loc = lo(G + A); + break; + case R_PPC_GOT16_HI: + *(ub16 *)loc = hi(G + A); + break; + case R_PPC_GOT16_HA: + *(ub16 *)loc = ha(G + A); + break; + case R_PPC_TPREL16_LO: + *(ub16 *)loc = lo(S + A - ctx.tp_addr); + break; + case R_PPC_TPREL16_HI: + *(ub16 *)loc = hi(S + A - ctx.tp_addr); + break; + case R_PPC_TPREL16_HA: + *(ub16 *)loc = ha(S + A - ctx.tp_addr); + break; + case R_PPC_DTPREL16_LO: + *(ub16 *)loc = lo(S + A - ctx.dtp_addr); + break; + case R_PPC_DTPREL16_HI: + *(ub16 *)loc = hi(S + A - ctx.dtp_addr); + break; + case R_PPC_DTPREL16_HA: + *(ub16 *)loc = ha(S + A - ctx.dtp_addr); + break; + case R_PPC_GOT_TLSGD16: + *(ub16 *)loc = sym.get_tlsgd_addr(ctx) - GOT; + break; + case R_PPC_GOT_TLSLD16: + *(ub16 *)loc = ctx.got->get_tlsld_addr(ctx) - GOT; + break; + case R_PPC_GOT_TPREL16: + *(ub16 *)loc = sym.get_gottp_addr(ctx) - GOT; + break; + case R_PPC_TLS: + case R_PPC_TLSGD: + case R_PPC_TLSLD: + case R_PPC_PLTSEQ: + case R_PPC_PLTCALL: + break; + default: + unreachable(); + } + } +} + +template <> +void InputSection::apply_reloc_nonalloc(Context &ctx, u8 *base) { + std::span> rels = get_rels(ctx); + + for (i64 i = 0; i < rels.size(); i++) { + const ElfRel &rel = rels[i]; + if (rel.r_type == R_NONE || record_undef_error(ctx, rel)) + continue; + + Symbol &sym = *file.symbols[rel.r_sym]; + u8 *loc = base + rel.r_offset; + + SectionFragment *frag; + i64 frag_addend; + std::tie(frag, frag_addend) = get_fragment(ctx, rel); + + u64 S = frag ? frag->get_addr(ctx) : sym.get_addr(ctx); + u64 A = frag ? frag_addend : (i64)rel.r_addend; + + switch (rel.r_type) { + case R_PPC_ADDR32: + if (std::optional val = get_tombstone(sym, frag)) + *(ub32 *)loc = *val; + else + *(ub32 *)loc = S + A; + break; + default: + Fatal(ctx) << *this << ": invalid relocation for non-allocated sections: " + << rel; + } + } +} + +template <> +void InputSection::scan_relocations(Context &ctx) { + assert(shdr().sh_flags & SHF_ALLOC); + + this->reldyn_offset = file.num_dynrel * sizeof(ElfRel); + std::span> rels = get_rels(ctx); + + // Scan relocations + for (i64 i = 0; i < rels.size(); i++) { + const ElfRel &rel = rels[i]; + if (rel.r_type == R_NONE || record_undef_error(ctx, rel)) + continue; + + Symbol &sym = *file.symbols[rel.r_sym]; + + if (sym.is_ifunc()) + sym.flags |= NEEDS_GOT | NEEDS_PLT; + + switch (rel.r_type) { + case R_PPC_ADDR32: + case R_PPC_UADDR32: + scan_dyn_absrel(ctx, sym, rel); + break; + case R_PPC_ADDR14: + case R_PPC_ADDR16: + case R_PPC_UADDR16: + case R_PPC_ADDR16_LO: + case R_PPC_ADDR16_HI: + case R_PPC_ADDR16_HA: + case R_PPC_ADDR24: + case R_PPC_ADDR30: + scan_absrel(ctx, sym, rel); + break; + case R_PPC_REL14: + case R_PPC_REL16: + case R_PPC_REL16_LO: + case R_PPC_REL16_HI: + case R_PPC_REL16_HA: + case R_PPC_REL32: + scan_pcrel(ctx, sym, rel); + break; + case R_PPC_GOT16: + case R_PPC_GOT16_LO: + case R_PPC_GOT16_HI: + case R_PPC_GOT16_HA: + case R_PPC_PLT16_LO: + case R_PPC_PLT16_HI: + case R_PPC_PLT16_HA: + case R_PPC_PLT32: + sym.flags |= NEEDS_GOT; + break; + case R_PPC_REL24: + case R_PPC_PLTREL24: + case R_PPC_PLTREL32: + if (sym.is_imported) + sym.flags |= NEEDS_PLT; + break; + case R_PPC_GOT_TLSGD16: + sym.flags |= NEEDS_TLSGD; + break; + case R_PPC_GOT_TLSLD16: + ctx.needs_tlsld = true; + break; + case R_PPC_GOT_TPREL16: + sym.flags |= NEEDS_GOTTP; + break; + case R_PPC_TPREL16_LO: + case R_PPC_TPREL16_HI: + case R_PPC_TPREL16_HA: + check_tlsle(ctx, sym, rel); + break; + case R_PPC_LOCAL24PC: + case R_PPC_TLS: + case R_PPC_TLSGD: + case R_PPC_TLSLD: + case R_PPC_DTPREL16_LO: + case R_PPC_DTPREL16_HI: + case R_PPC_DTPREL16_HA: + case R_PPC_PLTSEQ: + case R_PPC_PLTCALL: + break; + default: + Error(ctx) << *this << ": unknown relocation: " << rel; + } + } +} + +template <> +void RangeExtensionThunk::copy_buf(Context &ctx) { + u8 *buf = ctx.buf + output_section.shdr.sh_offset + offset; + + static const ub32 local_thunk[] = { + // Get this thunk's address + 0x7c08'02a6, // mflr r0 + 0x429f'0005, // bcl 20, 31, 4 + 0x7d88'02a6, // mflr r12 + 0x7c08'03a6, // mtlr r0 + + // Materialize the destination's address in %r11 and jump to that address + 0x3d6c'0000, // addis r11, r12, OFFSET@higha + 0x396b'0000, // addi r11, r11, OFFSET@lo + 0x7d69'03a6, // mtctr r11 + 0x4e80'0420, // bctr + 0x6000'0000, // nop + }; + + static_assert(E::thunk_size == sizeof(plt_entry)); + static_assert(E::thunk_size == sizeof(local_thunk)); + + for (i64 i = 0; i < symbols.size(); i++) { + ub32 *loc = (ub32 *)(buf + i * E::thunk_size); + Symbol &sym = *symbols[i]; + + if (sym.has_plt(ctx)) { + memcpy(loc, plt_entry, sizeof(plt_entry)); + u64 got = sym.has_got(ctx) ? sym.get_got_addr(ctx) : sym.get_gotplt_addr(ctx); + i64 val = got - get_addr(i) - 8; + loc[4] |= higha(val); + loc[5] |= lo(val); + } else { + memcpy(loc, local_thunk, sizeof(local_thunk)); + i64 val = sym.get_addr(ctx) - get_addr(i) - 8; + loc[4] |= higha(val); + loc[5] |= lo(val); + } + } +} + +} // namespace mold::elf diff --git a/third_party/mold/elf/arch-ppc64v1.cc b/third_party/mold/elf/arch-ppc64v1.cc new file mode 100644 index 00000000000..c45581269dc --- /dev/null +++ b/third_party/mold/elf/arch-ppc64v1.cc @@ -0,0 +1,687 @@ +// clang-format off +// This file contains code for the 64-bit PowerPC ELFv1 ABI that is +// commonly used for big-endian PPC systems. Modern PPC systems that use +// the processor in the little-endian mode use the ELFv2 ABI instead. For +// ELFv2, see arch-ppc64v2.cc. +// +// Even though they are similiar, ELFv1 isn't only different from ELFv2 in +// endianness. The most notable difference is, in ELFv1, a function +// pointer doesn't directly refer to the entry point of a function but +// instead refers to a data structure so-called "function descriptor". +// +// The function descriptor is essentially a pair of a function entry point +// address and a value that should be set to %r2 before calling that +// function. There is also a third member for "the environment pointer for +// languages such as Pascal and PL/1" according to the psABI, but it looks +// like no one acutally uses it. In total, the function descriptor is 24 +// bytes long. Here is why we need it. +// +// PPC generally lacks PC-relative data access instructions. Position- +// independent code sets GOT + 0x8000 to %r2 and access global variables +// relative to %r2. +// +// Each ELF file has its own GOT. If a function calls another function in +// the same ELF file, it doesn't have to reset %r2. However, if it is in +// other file (e.g. other .so), it has to set a new value to %r2 so that +// the register contains the callee's GOT + 0x8000. +// +// In this way, you can't call a function just by knowing the function's +// entry point address. You also need to know a proper %r2 value for the +// function. This is why a function pointer refers to a tuple of an +// address and a %r2 value. +// +// If a function call is made through PLT, PLT takes care of restoring %r2. +// Therefore, the caller has to restore %r2 only for function calls +// through function pointers. +// +// .opd (short for "official procedure descriptors") contains function +// descriptors. +// +// You can think OPD as this: even in other targets, a function can have a +// few different addresses for different purposes. It may not only have an +// entry point address but may also have PLT and/or GOT addresses. +// In PPCV1, it may have an OPD address in addition to these. OPD address +// is used for relocations that refers to the address of a function as a +// function pointer. +// +// https://github.com/rui314/psabi/blob/main/ppc64v1.pdf + +#include "third_party/mold/elf/mold.h" + +#include "third_party/libcxx/algorithm" +// MISSING #include + +namespace mold::elf { + +using E = PPC64V1; + +static u64 lo(u64 x) { return x & 0xffff; } +static u64 hi(u64 x) { return x >> 16; } +static u64 ha(u64 x) { return (x + 0x8000) >> 16; } +static u64 high(u64 x) { return (x >> 16) & 0xffff; } +static u64 higha(u64 x) { return ((x + 0x8000) >> 16) & 0xffff; } + +// .plt is used only for lazy symbol resolution on PPC64. All PLT +// calls are made via range extension thunks even if they are within +// reach. Thunks read addresses from .got.plt and jump there. +// Therefore, once PLT symbols are resolved and final addresses are +// written to .got.plt, thunks just skip .plt and directly jump to the +// resolved addresses. +template <> +void write_plt_header(Context &ctx, u8 *buf) { + static const ub32 insn[] = { + 0x7d88'02a6, // mflr r12 + 0x429f'0005, // bcl 20, 31, 4 // obtain PC + 0x7d68'02a6, // mflr r11 + 0xe84b'0024, // ld r2,36(r11) + 0x7d88'03a6, // mtlr r12 + 0x7d62'5a14, // add r11,r2,r11 + 0xe98b'0000, // ld r12,0(r11) + 0xe84b'0008, // ld r2,8(r11) + 0x7d89'03a6, // mtctr r12 + 0xe96b'0010, // ld r11,16(r11) + 0x4e80'0420, // bctr + // .quad .got.plt - .plt - 8 + 0x0000'0000, + 0x0000'0000, + }; + + static_assert(sizeof(insn) == E::plt_hdr_size); + memcpy(buf, insn, sizeof(insn)); + *(ub64 *)(buf + 44) = ctx.gotplt->shdr.sh_addr - ctx.plt->shdr.sh_addr - 8; +} + +template <> +void write_plt_entry(Context &ctx, u8 *buf, Symbol &sym) { + ub32 *loc = (ub32 *)buf; + i64 idx = sym.get_plt_idx(ctx); + + // The PPC64 ELFv1 ABI requires PLT entries to be vary in size depending + // on their indices. Unlike other targets, .got.plt is filled not by us + // but by the loader, so we don't have a control over where the initial + // call to the PLT entry jumps to. So we need to strictly follow the PLT + // section layout as the loader expect it to be. + if (idx < 0x8000) { + static const ub32 insn[] = { + 0x3800'0000, // li r0, PLT_INDEX + 0x4b00'0000, // b plt0 + }; + + memcpy(loc, insn, sizeof(insn)); + loc[0] |= idx; + loc[1] |= (ctx.plt->shdr.sh_addr - sym.get_plt_addr(ctx) - 4) & 0x00ff'ffff; + } else { + static const ub32 insn[] = { + 0x3c00'0000, // lis r0, PLT_INDEX@high + 0x6000'0000, // ori r0, r0, PLT_INDEX@lo + 0x4b00'0000, // b plt0 + }; + + memcpy(loc, insn, sizeof(insn)); + loc[0] |= high(idx); + loc[1] |= lo(idx); + loc[2] |= (ctx.plt->shdr.sh_addr - sym.get_plt_addr(ctx) - 8) & 0x00ff'ffff; + } +} + +// .plt.got is not necessary on PPC64 because range extension thunks +// directly read GOT entries and jump there. +template <> +void write_pltgot_entry(Context &ctx, u8 *buf, Symbol &sym) {} + +template <> +void EhFrameSection::apply_reloc(Context &ctx, const ElfRel &rel, + u64 offset, u64 val) { + u8 *loc = ctx.buf + this->shdr.sh_offset + offset; + + switch (rel.r_type) { + case R_NONE: + break; + case R_PPC64_ADDR64: + *(ub64 *)loc = val; + break; + case R_PPC64_REL32: + *(ub32 *)loc = val - this->shdr.sh_addr - offset; + break; + case R_PPC64_REL64: + *(ub64 *)loc = val - this->shdr.sh_addr - offset; + break; + default: + Fatal(ctx) << "unsupported relocation in .eh_frame: " << rel; + } +} + +template <> +void InputSection::apply_reloc_alloc(Context &ctx, u8 *base) { + std::span> rels = get_rels(ctx); + + ElfRel *dynrel = nullptr; + if (ctx.reldyn) + dynrel = (ElfRel *)(ctx.buf + ctx.reldyn->shdr.sh_offset + + file.reldyn_offset + this->reldyn_offset); + + for (i64 i = 0; i < rels.size(); i++) { + const ElfRel &rel = rels[i]; + if (rel.r_type == R_NONE) + continue; + + Symbol &sym = *file.symbols[rel.r_sym]; + u8 *loc = base + rel.r_offset; + + auto check = [&](i64 val, i64 lo, i64 hi) { + if (val < lo || hi <= val) + Error(ctx) << *this << ": relocation " << rel << " against " + << sym << " out of range: " << val << " is not in [" + << lo << ", " << hi << ")"; + }; + + u64 S = sym.get_addr(ctx); + u64 A = rel.r_addend; + u64 P = get_addr() + rel.r_offset; + u64 G = sym.get_got_idx(ctx) * sizeof(Word); + u64 GOT = ctx.got->shdr.sh_addr; + u64 TOC = ctx.extra.TOC->value; + + switch (rel.r_type) { + case R_PPC64_ADDR64: + apply_toc_rel(ctx, sym, rel, loc, S, A, P, dynrel); + break; + case R_PPC64_TOC: + apply_toc_rel(ctx, *ctx.extra.TOC, rel, loc, TOC, A, P, dynrel); + break; + case R_PPC64_TOC16_HA: + *(ub16 *)loc = ha(S + A - TOC); + break; + case R_PPC64_TOC16_LO: + *(ub16 *)loc = lo(S + A - TOC); + break; + case R_PPC64_TOC16_DS: + check(S + A - TOC, -(1 << 15), 1 << 15); + *(ub16 *)loc |= (S + A - TOC) & 0xfffc; + break; + case R_PPC64_TOC16_LO_DS: + *(ub16 *)loc |= (S + A - TOC) & 0xfffc; + break; + case R_PPC64_REL24: { + i64 val = sym.get_addr(ctx, NO_OPD) + A - P; + if (sym.has_plt(ctx) || sign_extend(val, 25) != val) + val = get_thunk_addr(i) + A - P; + + check(val, -(1 << 25), 1 << 25); + *(ub32 *)loc |= bits(val, 25, 2) << 2; + + // If a callee is an external function, PLT saves %r2 to the + // caller's r2 save slot. We need to restore it after function + // return. To do so, there's usually a NOP as a placeholder + // after a BL. 0x6000'0000 is a NOP. + if (sym.has_plt(ctx) && *(ub32 *)(loc + 4) == 0x6000'0000) + *(ub32 *)(loc + 4) = 0xe841'0028; // ld r2, 40(r1) + break; + } + case R_PPC64_REL32: + *(ub32 *)loc = S + A - P; + break; + case R_PPC64_REL64: + *(ub64 *)loc = S + A - P; + break; + case R_PPC64_REL16_HA: + *(ub16 *)loc = ha(S + A - P); + break; + case R_PPC64_REL16_LO: + *(ub16 *)loc = lo(S + A - P); + break; + case R_PPC64_PLT16_HA: + *(ub16 *)loc = ha(G + GOT - TOC); + break; + case R_PPC64_PLT16_HI: + *(ub16 *)loc = hi(G + GOT - TOC); + break; + case R_PPC64_PLT16_LO: + *(ub16 *)loc = lo(G + GOT - TOC); + break; + case R_PPC64_PLT16_LO_DS: + *(ub16 *)loc |= (G + GOT - TOC) & 0xfffc; + break; + case R_PPC64_GOT_TPREL16_HA: + *(ub16 *)loc = ha(sym.get_gottp_addr(ctx) - TOC); + break; + case R_PPC64_GOT_TLSGD16_HA: + *(ub16 *)loc = ha(sym.get_tlsgd_addr(ctx) - TOC); + break; + case R_PPC64_GOT_TLSGD16_LO: + *(ub16 *)loc = lo(sym.get_tlsgd_addr(ctx) - TOC); + break; + case R_PPC64_GOT_TLSLD16_HA: + *(ub16 *)loc = ha(ctx.got->get_tlsld_addr(ctx) - TOC); + break; + case R_PPC64_GOT_TLSLD16_LO: + *(ub16 *)loc = lo(ctx.got->get_tlsld_addr(ctx) - TOC); + break; + case R_PPC64_DTPREL16_HA: + *(ub16 *)loc = ha(S + A - ctx.dtp_addr); + break; + case R_PPC64_DTPREL16_LO: + *(ub16 *)loc = lo(S + A - ctx.dtp_addr); + break; + case R_PPC64_TPREL16_HA: + *(ub16 *)loc = ha(S + A - ctx.tp_addr); + break; + case R_PPC64_TPREL16_LO: + *(ub16 *)loc = lo(S + A - ctx.tp_addr); + break; + case R_PPC64_GOT_TPREL16_LO_DS: + *(ub16 *)loc |= (sym.get_gottp_addr(ctx) - TOC) & 0xfffc; + break; + case R_PPC64_PLTSEQ: + case R_PPC64_PLTCALL: + case R_PPC64_TLS: + case R_PPC64_TLSGD: + case R_PPC64_TLSLD: + break; + default: + unreachable(); + } + } +} + +template <> +void InputSection::apply_reloc_nonalloc(Context &ctx, u8 *base) { + std::span> rels = get_rels(ctx); + + for (i64 i = 0; i < rels.size(); i++) { + const ElfRel &rel = rels[i]; + if (rel.r_type == R_NONE || record_undef_error(ctx, rel)) + continue; + + Symbol &sym = *file.symbols[rel.r_sym]; + u8 *loc = base + rel.r_offset; + + auto check = [&](i64 val, i64 lo, i64 hi) { + if (val < lo || hi <= val) + Error(ctx) << *this << ": relocation " << rel << " against " + << sym << " out of range: " << val << " is not in [" + << lo << ", " << hi << ")"; + }; + + SectionFragment *frag; + i64 frag_addend; + std::tie(frag, frag_addend) = get_fragment(ctx, rel); + + u64 S = frag ? frag->get_addr(ctx) : sym.get_addr(ctx); + u64 A = frag ? frag_addend : (i64)rel.r_addend; + + switch (rel.r_type) { + case R_PPC64_ADDR64: + if (std::optional val = get_tombstone(sym, frag)) + *(ub64 *)loc = *val; + else + *(ub64 *)loc = S + A; + break; + case R_PPC64_ADDR32: { + i64 val = S + A; + check(val, 0, 1LL << 32); + *(ub32 *)loc = val; + break; + } + case R_PPC64_DTPREL64: + *(ub64 *)loc = S + A - ctx.dtp_addr; + break; + default: + Fatal(ctx) << *this << ": invalid relocation for non-allocated sections: " + << rel; + } + } +} + +template <> +void InputSection::scan_relocations(Context &ctx) { + assert(shdr().sh_flags & SHF_ALLOC); + + this->reldyn_offset = file.num_dynrel * sizeof(ElfRel); + std::span> rels = get_rels(ctx); + + // Scan relocations + for (i64 i = 0; i < rels.size(); i++) { + const ElfRel &rel = rels[i]; + if (rel.r_type == R_NONE || record_undef_error(ctx, rel)) + continue; + + Symbol &sym = *file.symbols[rel.r_sym]; + + if (sym.is_ifunc()) + sym.flags |= NEEDS_GOT | NEEDS_PLT | NEEDS_PPC_OPD; + + // Any relocation except R_PPC64_REL24 is considered as an + // address-taking relocation. + if (rel.r_type != R_PPC64_REL24 && sym.get_type() == STT_FUNC) + sym.flags |= NEEDS_PPC_OPD; + + switch (rel.r_type) { + case R_PPC64_ADDR64: + case R_PPC64_TOC: + scan_toc_rel(ctx, sym, rel); + break; + case R_PPC64_GOT_TPREL16_HA: + sym.flags |= NEEDS_GOTTP; + break; + case R_PPC64_REL24: + if (sym.is_imported) + sym.flags |= NEEDS_PLT; + break; + case R_PPC64_PLT16_HA: + sym.flags |= NEEDS_GOT; + break; + case R_PPC64_GOT_TLSGD16_HA: + sym.flags |= NEEDS_TLSGD; + break; + case R_PPC64_GOT_TLSLD16_HA: + ctx.needs_tlsld = true; + break; + case R_PPC64_TPREL16_HA: + case R_PPC64_TPREL16_LO: + check_tlsle(ctx, sym, rel); + break; + case R_PPC64_REL32: + case R_PPC64_REL64: + case R_PPC64_TOC16_HA: + case R_PPC64_TOC16_LO: + case R_PPC64_TOC16_LO_DS: + case R_PPC64_TOC16_DS: + case R_PPC64_REL16_HA: + case R_PPC64_REL16_LO: + case R_PPC64_PLT16_HI: + case R_PPC64_PLT16_LO: + case R_PPC64_PLT16_LO_DS: + case R_PPC64_PLTSEQ: + case R_PPC64_PLTCALL: + case R_PPC64_GOT_TPREL16_LO_DS: + case R_PPC64_GOT_TLSGD16_LO: + case R_PPC64_GOT_TLSLD16_LO: + case R_PPC64_TLS: + case R_PPC64_TLSGD: + case R_PPC64_TLSLD: + case R_PPC64_DTPREL16_HA: + case R_PPC64_DTPREL16_LO: + break; + default: + Error(ctx) << *this << ": unknown relocation: " << rel; + } + } +} + +template <> +void RangeExtensionThunk::copy_buf(Context &ctx) { + u8 *buf = ctx.buf + output_section.shdr.sh_offset + offset; + + // If the destination is .plt.got, we save the current r2, read an + // address of a function descriptor from .got, restore %r2 and jump + // to the function. + static const ub32 pltgot_thunk[] = { + // Store the caller's %r2 + 0xf841'0028, // std %r2, 40(%r1) + + // Load an address of a function descriptor + 0x3d82'0000, // addis %r12, %r2, foo@got@toc@ha + 0xe98c'0000, // ld %r12, foo@got@toc@lo(%r12) + + // Restore the callee's %r2 + 0xe84c'0008, // ld %r2, 8(%r12) + + // Jump to the function + 0xe98c'0000, // ld %r12, 0(%r12) + 0x7d89'03a6, // mtctr %r12 + 0x4e80'0420, // bctr + }; + + // If the destination is .plt, read a function descriptor from .got.plt. + static const ub32 plt_thunk[] = { + // Store the caller's %r2 + 0xf841'0028, // std %r2, 40(%r1) + + // Materialize an address of a function descriptor + 0x3d82'0000, // addis %r12, %r2, foo@gotplt@toc@ha + 0x398c'0000, // addi %r12, %r12, foo@gotplt@toc@lo + + // Restore the callee's %r2 + 0xe84c'0008, // ld %r2, 8(%r12) + + // Jump to the function + 0xe98c'0000, // ld %r12, 0(%r12) + 0x7d89'03a6, // mtctr %r12 + 0x4e80'0420, // bctr + }; + + // If the destination is a non-imported function, we directly jump + // to the function entry address. + static const ub32 local_thunk[] = { + 0x3d82'0000, // addis r12, r2, foo@toc@ha + 0x398c'0000, // addi r12, r12, foo@toc@lo + 0x7d89'03a6, // mtctr r12 + 0x4e80'0420, // bctr + 0x6000'0000, // nop + 0x6000'0000, // nop + 0x6000'0000, // nop + }; + + static_assert(E::thunk_size == sizeof(pltgot_thunk)); + static_assert(E::thunk_size == sizeof(plt_thunk)); + static_assert(E::thunk_size == sizeof(local_thunk)); + + for (i64 i = 0; i < symbols.size(); i++) { + Symbol &sym = *symbols[i]; + ub32 *loc = (ub32 *)(buf + i * E::thunk_size); + + if (sym.has_got(ctx)) { + memcpy(loc, pltgot_thunk, sizeof(pltgot_thunk)); + i64 val = sym.get_got_addr(ctx) - ctx.extra.TOC->value; + loc[1] |= higha(val); + loc[2] |= lo(val); + } else if(sym.has_plt(ctx)) { + memcpy(loc, plt_thunk, sizeof(plt_thunk)); + i64 val = sym.get_gotplt_addr(ctx) - ctx.extra.TOC->value; + loc[1] |= higha(val); + loc[2] |= lo(val); + } else { + memcpy(loc, local_thunk, sizeof(local_thunk)); + i64 val = sym.get_addr(ctx, NO_OPD) - ctx.extra.TOC->value; + loc[0] |= higha(val); + loc[1] |= lo(val); + } + } +} + +static InputSection *get_opd_section(ObjectFile &file) { + for (std::unique_ptr> &isec : file.sections) + if (isec && isec->name() == ".opd") + return isec.get(); + return nullptr; +} + +static ElfRel * +get_relocation_at(Context &ctx, InputSection &isec, i64 offset) { + std::span> rels = isec.get_rels(ctx); + + auto it = std::lower_bound(rels.begin(), rels.end(), offset, + [](const ElfRel &r, i64 offset) { + return r.r_offset < offset; + }); + + if (it == rels.end()) + return nullptr; + if (it->r_offset != offset) + return nullptr; + return &*it; +} + +struct OpdSymbol { + bool operator<(const OpdSymbol &x) const { return r_offset < x.r_offset; } + + u64 r_offset = 0; + Symbol *sym = nullptr; +}; + +static Symbol * +get_opd_sym_at(Context &ctx, std::span syms, u64 offset) { + auto it = std::lower_bound(syms.begin(), syms.end(), OpdSymbol{offset}); + if (it == syms.end()) + return nullptr; + if (it->r_offset != offset) + return nullptr; + return it->sym; +} + +// Compiler creates an .opd entry for each function symbol. The intention +// is to make it possible to create an output .opd section just by linking +// input .opd sections in the same manner as we do to other normal input +// sections. +// +// However, in reality, .opd isn't a normal input section. It needs many +// special treatments as follows: +// +// 1. A function symbol refers to not a .text but an .opd. Its address +// works fine for address-taking relocations such as R_PPC64_ADDR64. +// However, R_PPC64_REL24 (which is used for branch instruction) needs +// a function's real address instead of the function's .opd address. +// We need to read .opd contents to find out a function entry point +// address to apply R_PPC64_REL24. +// +// 2. Output .opd entries are needed only for functions whose addresses +// are taken. Just copying input .opd sections to an output would +// produces lots of dead .opd entries. +// +// 3. In this design, all function symbols refer to an .opd section, and +// that doesn't work well with graph traversal optimizations such as +// garbage collection or identical comdat folding. For example, garbage +// collector would mark an .opd alive which in turn mark all functions +// thatare referenced by .opd as alive, effectively keeping all +// functions as alive. +// +// The problem is that the compiler creates a half-baked .opd section, and +// the linker has to figure out what all these .opd entries and +// relocations are trying to achieve. It's like the compiler would emit a +// half-baked .plt section in an object file and the linker has to deal +// with that. That's not a good design. +// +// So, in this function, we undo what the compiler did to .opd. We remove +// function symbols from .opd and reattach them to their function entry +// points. We also rewrite relocations that directly refer to an input +// .opd section so that they refer to function symbols instead. We then +// mark input .opd sections as dead. +// +// After this function, we mark symbols with the NEEDS_PPC_OPD flag if the +// symbol needs an .opd entry. We then create an output .opd just like we +// do for .plt or .got. +void ppc64v1_rewrite_opd(Context &ctx) { + tbb::parallel_for_each(ctx.objs, [&](ObjectFile *file) { + InputSection *opd = get_opd_section(*file); + if (!opd) + return; + opd->is_alive = false; + + // Move symbols from .opd to .text. + std::vector opd_syms; + + for (Symbol *sym : file->symbols) { + if (sym->file != file || sym->get_input_section() != opd) + continue; + + if (u32 ty = sym->get_type(); ty != STT_FUNC && ty != STT_GNU_IFUNC) + continue; + + ElfRel *rel = get_relocation_at(ctx, *opd, sym->value); + if (!rel) + Fatal(ctx) << *file << ": cannot find a relocation in .opd for " + << *sym << " at offset 0x" << std::hex << (u64)sym->value; + + Symbol *sym2 = file->symbols[rel->r_sym]; + if (sym2->get_type() != STT_SECTION) + Fatal(ctx) << *file << ": bad relocation in .opd referring " << *sym2; + + opd_syms.push_back({sym->value, sym}); + + sym->set_input_section(sym2->get_input_section()); + sym->value = rel->r_addend; + } + + // Sort symbols so that get_opd_sym_at() can do binary search. + sort(opd_syms); + + // Rewrite relocations so that they directly refer to .opd. + for (std::unique_ptr> &isec : file->sections) { + if (!isec || !isec->is_alive || isec.get() == opd) + continue; + + for (ElfRel &r : isec->get_rels(ctx)) { + Symbol &sym = *file->symbols[r.r_sym]; + if (sym.get_input_section() != opd) + continue; + + Symbol *real_sym = get_opd_sym_at(ctx, opd_syms, r.r_addend); + if (!real_sym) + Fatal(ctx) << *isec << ": cannot find a symbol in .opd for " << r + << " at offset 0x" << std::hex << (u64)r.r_addend; + + r.r_sym = real_sym->sym_idx; + r.r_addend = 0; + } + } + }); +} + +// When a function is exported, the dynamic symbol for the function should +// refers to the function's .opd entry. This function marks such symbols +// with NEEDS_PPC_OPD. +void ppc64v1_scan_symbols(Context &ctx) { + tbb::parallel_for_each(ctx.objs, [&](ObjectFile *file) { + for (Symbol *sym : file->symbols) + if (sym->file == file && sym->is_exported) + if (u32 ty = sym->get_type(); ty == STT_FUNC || ty == STT_GNU_IFUNC) + sym->flags |= NEEDS_PPC_OPD; + }); + + // Functions referenced by the ELF header also have to have .opd entries. + auto mark = [&](std::string_view name) { + if (!name.empty()) + if (Symbol &sym = *get_symbol(ctx, name); !sym.is_imported) + sym.flags |= NEEDS_PPC_OPD; + }; + + mark(ctx.arg.entry); + mark(ctx.arg.init); + mark(ctx.arg.fini); +} + +void PPC64OpdSection::add_symbol(Context &ctx, Symbol *sym) { + sym->set_opd_idx(ctx, symbols.size()); + symbols.push_back(sym); + this->shdr.sh_size += ENTRY_SIZE; +} + +i64 PPC64OpdSection::get_reldyn_size(Context &ctx) const { + if (ctx.arg.pic) + return symbols.size() * 2; + return 0; +} + +void PPC64OpdSection::copy_buf(Context &ctx) { + ub64 *buf = (ub64 *)(ctx.buf + this->shdr.sh_offset); + + ElfRel *rel = nullptr; + if (ctx.arg.pic) + rel = (ElfRel *)(ctx.buf + ctx.reldyn->shdr.sh_offset + reldyn_offset); + + for (Symbol *sym : symbols) { + u64 addr = sym->get_addr(ctx, NO_PLT | NO_OPD); + *buf++ = addr; + *buf++ = ctx.extra.TOC->value; + *buf++ = 0; + + if (ctx.arg.pic) { + u64 loc = sym->get_opd_addr(ctx); + *rel++ = ElfRel(loc, E::R_RELATIVE, 0, addr); + *rel++ = ElfRel(loc + 8, E::R_RELATIVE, 0, ctx.extra.TOC->value); + } + } +} + +} // namespace mold::elf diff --git a/third_party/mold/elf/arch-ppc64v2.cc b/third_party/mold/elf/arch-ppc64v2.cc new file mode 100644 index 00000000000..3582bb57f58 --- /dev/null +++ b/third_party/mold/elf/arch-ppc64v2.cc @@ -0,0 +1,555 @@ +// clang-format off +// This file implements the PowerPC ELFv2 ABI which was standardized in +// 2014. Modern little-endian PowerPC systems are based on this ABI. +// The ABI is often referred to as "ppc64le". This shouldn't be confused +// with "ppc64" which refers to the original, big-endian PowerPC systems. +// +// PPC64 is a bit tricky to support because PC-relative load/store +// instructions hadn't been available until Power10 which debuted in 2021. +// Prior to Power10, it wasn't trivial for position-independent code (PIC) +// to load a value from, for example, .got, as we can't do that with [PC + +// the offset to the .got entry]. +// +// In the following, I'll explain how PIC is supported on pre-Power10 +// systems first and then explain what has changed with Power10. +// +// +// Position-independent code on Power9 or earlier: +// +// We can get the program counter on older PPC64 systems with the +// following four instructions +// +// mflr r1 // save the current link register to r1 +// bl .+4 // branch to the next instruction as if it were a function +// mflr r0 // copy the return address to r0 +// mtlr r1 // restore the original link register value +// +// , but it's too expensive to do if we do this for each load/store. +// +// As a workaround, most functions are compiled in such a way that r2 is +// assumed to always contain the address of .got + 0x8000. With this, we +// can for example load the first entry of .got with a single instruction +// `lw r0, -0x8000(r2)`. r2 is called the TOC pointer. +// +// There's only one .got for each ELF module. Therefore, if a callee is in +// the same ELF module, r2 doesn't have to be recomputed. Most function +// calls are usually within the same ELF module, so this mechanism is +// efficient. +// +// A function compiled for pre-Power10 usually has two entry points, +// global and local. The global entry point usually 8 bytes precedes +// the local entry point. In between is the following instructions: +// +// addis r2, r12, .TOC.@ha +// addi r2, r2, .TOC.@lo + 4; +// +// The global entry point assumes that the address of itself is in r12, +// and it computes its own TOC pointer from r12. It's easy to do so for +// the callee because the offset between its .got + 0x8000 and the +// function is known at link-time. The above code sequence then falls +// through to the local entry point that assumes r2 is .got + 0x8000. +// +// So, if a callee's TOC pointer is different from the current one +// (e.g. calling a function in another .so), we first load the callee's +// address to r12 (e.g. from .got.plt with a r2-relative load) and branch +// to that address. Then the callee computes its own TOC pointer using +// r12. +// +// +// Position-independent code on Power10: +// +// Power10 added 8-bytes-long instructions to the ISA. Some of them are +// PC-relative load/store instructions that take 34 bits offsets. +// Functions compiled with `-mcpu=power10` use these instructions for PIC. +// r2 does not have a special meaning in such fucntions. +// +// When a fucntion compiled for Power10 calls a function that uses the TOC +// pointer, we need to compute a correct value for TOC and set it to r2 +// before transferring the control to the callee. Thunks are responsible +// for doing it. +// +// `_NOTOC` relocations such as `R_PPC64_REL24_NOTOC` indicate that the +// callee does not use TOC (i.e. compiled with `-mcpu=power10`). If a +// function using TOC is referenced via a `_NOTOC` relocation, that call +// is made through a range extension thunk. +// +// +// Note on section names: the PPC64 psABI uses a weird naming convention +// which calls .got.plt .plt. We ignored that part because it's just +// confusing. Since the runtime only cares about segments, we should be +// able to name sections whatever we want. +// +// https://github.com/rui314/psabi/blob/main/ppc64v2.pdf + +#include "third_party/mold/elf/mold.h" + +namespace mold::elf { + +using E = PPC64V2; + +static u64 lo(u64 x) { return x & 0xffff; } +static u64 hi(u64 x) { return x >> 16; } +static u64 ha(u64 x) { return (x + 0x8000) >> 16; } +static u64 high(u64 x) { return (x >> 16) & 0xffff; } +static u64 higha(u64 x) { return ((x + 0x8000) >> 16) & 0xffff; } + +static u64 prefix34(u64 x) { + return bits(x, 33, 16) | (bits(x, 15, 0) << 32); +} + +// .plt is used only for lazy symbol resolution on PPC64. All PLT +// calls are made via range extension thunks even if they are within +// reach. Thunks read addresses from .got.plt and jump there. +// Therefore, once PLT symbols are resolved and final addresses are +// written to .got.plt, thunks just skip .plt and directly jump to the +// resolved addresses. +template <> +void write_plt_header(Context &ctx, u8 *buf) { + static const ul32 insn[] = { + // Get PC + 0x7c08'02a6, // mflr r0 + 0x429f'0005, // bcl 20, 31, 4 // obtain PC + 0x7d68'02a6, // mflr r11 + 0x7c08'03a6, // mtlr r0 + + // Compute the PLT entry index + 0xe80b'002c, // ld r0, 44(r11) + 0x7d8b'6050, // subf r12, r11, r12 + 0x7d60'5a14, // add r11, r0, r11 + 0x380c'ffcc, // addi r0, r12, -52 + 0x7800'f082, // rldicl r0, r0, 62, 2 + + // Load .got.plt[0] and .got.plt[1] and branch to .got.plt[0] + 0xe98b'0000, // ld r12, 0(r11) + 0x7d89'03a6, // mtctr r12 + 0xe96b'0008, // ld r11, 8(r11) + 0x4e80'0420, // bctr + + // .quad .got.plt - .plt - 8 + 0x0000'0000, + 0x0000'0000, + }; + + memcpy(buf, insn, sizeof(insn)); + *(ul64 *)(buf + 52) = ctx.gotplt->shdr.sh_addr - ctx.plt->shdr.sh_addr - 8; +} + +template <> +void write_plt_entry(Context &ctx, u8 *buf, Symbol &sym) { + // When the control is transferred to a PLT entry, the PLT entry's + // address is already set to %r12 by the caller. + i64 offset = ctx.plt->shdr.sh_addr - sym.get_plt_addr(ctx); + *(ul32 *)buf = 0x4b00'0000 | (offset & 0x00ff'ffff); // b plt0 +} + +// .plt.got is not necessary on PPC64 because range extension thunks +// directly read GOT entries and jump there. +template <> +void write_pltgot_entry(Context &ctx, u8 *buf, Symbol &sym) {} + +template <> +void EhFrameSection::apply_reloc(Context &ctx, const ElfRel &rel, + u64 offset, u64 val) { + u8 *loc = ctx.buf + this->shdr.sh_offset + offset; + + switch (rel.r_type) { + case R_NONE: + break; + case R_PPC64_ADDR64: + *(ul64 *)loc = val; + break; + case R_PPC64_REL32: + *(ul32 *)loc = val - this->shdr.sh_addr - offset; + break; + case R_PPC64_REL64: + *(ul64 *)loc = val - this->shdr.sh_addr - offset; + break; + default: + Fatal(ctx) << "unsupported relocation in .eh_frame: " << rel; + } +} + +static u64 get_local_entry_offset(Context &ctx, Symbol &sym) { + i64 val = sym.esym().ppc_local_entry; + assert(val <= 7); + if (val == 7) + Fatal(ctx) << sym << ": local entry offset 7 is reserved"; + + if (val == 0 || val == 1) + return 0; + return 1 << val; +} + +template <> +void InputSection::apply_reloc_alloc(Context &ctx, u8 *base) { + std::span> rels = get_rels(ctx); + + ElfRel *dynrel = nullptr; + if (ctx.reldyn) + dynrel = (ElfRel *)(ctx.buf + ctx.reldyn->shdr.sh_offset + + file.reldyn_offset + this->reldyn_offset); + + for (i64 i = 0; i < rels.size(); i++) { + const ElfRel &rel = rels[i]; + if (rel.r_type == R_NONE) + continue; + + Symbol &sym = *file.symbols[rel.r_sym]; + u8 *loc = base + rel.r_offset; + + u64 S = sym.get_addr(ctx); + u64 A = rel.r_addend; + u64 P = get_addr() + rel.r_offset; + u64 G = sym.get_got_idx(ctx) * sizeof(Word); + u64 GOT = ctx.got->shdr.sh_addr; + u64 TOC = ctx.extra.TOC->value; + + auto r2save_thunk_addr = [&] { return get_thunk_addr(i); }; + auto no_r2save_thunk_addr = [&] { return get_thunk_addr(i) + 4; }; + + switch (rel.r_type) { + case R_PPC64_ADDR64: + if (name() == ".toc") + apply_toc_rel(ctx, sym, rel, loc, S, A, P, dynrel); + else + apply_dyn_absrel(ctx, sym, rel, loc, S, A, P, dynrel); + break; + case R_PPC64_TOC16_HA: + *(ul16 *)loc = ha(S + A - TOC); + break; + case R_PPC64_TOC16_LO: + *(ul16 *)loc = lo(S + A - TOC); + break; + case R_PPC64_TOC16_DS: + case R_PPC64_TOC16_LO_DS: + *(ul16 *)loc |= (S + A - TOC) & 0xfffc; + break; + case R_PPC64_REL24: + if (sym.has_plt(ctx) || !sym.esym().preserves_r2()) { + i64 val = r2save_thunk_addr() + A - P; + *(ul32 *)loc |= bits(val, 25, 2) << 2; + + // The thunk saves %r2 to the caller's r2 save slot. We need to + // restore it after function return. To do so, there's usually a + // NOP as a placeholder after a BL. 0x6000'0000 is a NOP. + if (*(ul32 *)(loc + 4) == 0x6000'0000) + *(ul32 *)(loc + 4) = 0xe841'0018; // ld r2, 24(r1) + } else { + i64 val = S + get_local_entry_offset(ctx, sym) + A - P; + if (sign_extend(val, 25) != val) + val = no_r2save_thunk_addr() + A - P; + *(ul32 *)loc |= bits(val, 25, 2) << 2; + } + break; + case R_PPC64_REL24_NOTOC: + if (sym.has_plt(ctx) || sym.esym().uses_toc()) { + i64 val = no_r2save_thunk_addr() + A - P; + *(ul32 *)loc |= bits(val, 25, 2) << 2; + } else { + i64 val = S + A - P; + if (sign_extend(val, 25) != val) + val = no_r2save_thunk_addr() + A - P; + *(ul32 *)loc |= bits(val, 25, 2) << 2; + } + break; + case R_PPC64_REL32: + *(ul32 *)loc = S + A - P; + break; + case R_PPC64_REL64: + *(ul64 *)loc = S + A - P; + break; + case R_PPC64_REL16_HA: + *(ul16 *)loc = ha(S + A - P); + break; + case R_PPC64_REL16_LO: + *(ul16 *)loc = lo(S + A - P); + break; + case R_PPC64_PLT16_HA: + *(ul16 *)loc = ha(G + GOT - TOC); + break; + case R_PPC64_PLT16_HI: + *(ul16 *)loc = hi(G + GOT - TOC); + break; + case R_PPC64_PLT16_LO: + *(ul16 *)loc = lo(G + GOT - TOC); + break; + case R_PPC64_PLT16_LO_DS: + *(ul16 *)loc |= (G + GOT - TOC) & 0xfffc; + break; + case R_PPC64_PLT_PCREL34: + case R_PPC64_PLT_PCREL34_NOTOC: + case R_PPC64_GOT_PCREL34: + *(ul64 *)loc |= prefix34(G + GOT - P); + break; + case R_PPC64_PCREL34: + *(ul64 *)loc |= prefix34(S + A - P); + break; + case R_PPC64_GOT_TPREL16_HA: + *(ul16 *)loc = ha(sym.get_gottp_addr(ctx) - TOC); + break; + case R_PPC64_GOT_TPREL16_LO_DS: + *(ul16 *)loc |= (sym.get_gottp_addr(ctx) - TOC) & 0xfffc; + break; + case R_PPC64_GOT_TPREL_PCREL34: + *(ul64 *)loc |= prefix34(sym.get_gottp_addr(ctx) - P); + break; + case R_PPC64_GOT_TLSGD16_HA: + *(ul16 *)loc = ha(sym.get_tlsgd_addr(ctx) - TOC); + break; + case R_PPC64_GOT_TLSGD16_LO: + *(ul16 *)loc = lo(sym.get_tlsgd_addr(ctx) - TOC); + break; + case R_PPC64_GOT_TLSGD_PCREL34: + *(ul64 *)loc |= prefix34(sym.get_tlsgd_addr(ctx) - P); + break; + case R_PPC64_GOT_TLSLD16_HA: + *(ul16 *)loc = ha(ctx.got->get_tlsld_addr(ctx) - TOC); + break; + case R_PPC64_GOT_TLSLD16_LO: + *(ul16 *)loc = lo(ctx.got->get_tlsld_addr(ctx) - TOC); + break; + case R_PPC64_GOT_TLSLD_PCREL34: + *(ul64 *)loc |= prefix34(ctx.got->get_tlsld_addr(ctx) - P); + break; + case R_PPC64_DTPREL16_HA: + *(ul16 *)loc = ha(S + A - ctx.dtp_addr); + break; + case R_PPC64_DTPREL16_LO: + *(ul16 *)loc = lo(S + A - ctx.dtp_addr); + break; + case R_PPC64_DTPREL34: + *(ul64 *)loc |= prefix34(S + A - ctx.dtp_addr); + break; + case R_PPC64_TPREL16_HA: + *(ul16 *)loc = ha(S + A - ctx.tp_addr); + break; + case R_PPC64_TPREL16_LO: + *(ul16 *)loc = lo(S + A - ctx.tp_addr); + break; + case R_PPC64_PLTSEQ: + case R_PPC64_PLTSEQ_NOTOC: + case R_PPC64_PLTCALL: + case R_PPC64_PLTCALL_NOTOC: + case R_PPC64_TLS: + case R_PPC64_TLSGD: + case R_PPC64_TLSLD: + break; + default: + unreachable(); + } + } +} + +template <> +void InputSection::apply_reloc_nonalloc(Context &ctx, u8 *base) { + std::span> rels = get_rels(ctx); + + for (i64 i = 0; i < rels.size(); i++) { + const ElfRel &rel = rels[i]; + if (rel.r_type == R_NONE || record_undef_error(ctx, rel)) + continue; + + Symbol &sym = *file.symbols[rel.r_sym]; + u8 *loc = base + rel.r_offset; + + auto check = [&](i64 val, i64 lo, i64 hi) { + if (val < lo || hi <= val) + Error(ctx) << *this << ": relocation " << rel << " against " + << sym << " out of range: " << val << " is not in [" + << lo << ", " << hi << ")"; + }; + + SectionFragment *frag; + i64 frag_addend; + std::tie(frag, frag_addend) = get_fragment(ctx, rel); + + u64 S = frag ? frag->get_addr(ctx) : sym.get_addr(ctx); + u64 A = frag ? frag_addend : (i64)rel.r_addend; + + switch (rel.r_type) { + case R_PPC64_ADDR64: + if (std::optional val = get_tombstone(sym, frag)) + *(ul64 *)loc = *val; + else + *(ul64 *)loc = S + A; + break; + case R_PPC64_ADDR32: { + i64 val = S + A; + check(val, 0, 1LL << 32); + *(ul32 *)loc = val; + break; + } + case R_PPC64_DTPREL64: + *(ul64 *)loc = S + A - ctx.dtp_addr; + break; + default: + Fatal(ctx) << *this << ": invalid relocation for non-allocated sections: " + << rel; + } + } +} + +template <> +void InputSection::scan_relocations(Context &ctx) { + assert(shdr().sh_flags & SHF_ALLOC); + + this->reldyn_offset = file.num_dynrel * sizeof(ElfRel); + std::span> rels = get_rels(ctx); + + // Scan relocations + for (i64 i = 0; i < rels.size(); i++) { + const ElfRel &rel = rels[i]; + if (rel.r_type == R_NONE || record_undef_error(ctx, rel)) + continue; + + Symbol &sym = *file.symbols[rel.r_sym]; + + if (sym.is_ifunc()) + sym.flags |= NEEDS_GOT | NEEDS_PLT; + + switch (rel.r_type) { + case R_PPC64_ADDR64: + if (name() == ".toc") + scan_toc_rel(ctx, sym, rel); + else + scan_dyn_absrel(ctx, sym, rel); + break; + case R_PPC64_GOT_TPREL16_HA: + case R_PPC64_GOT_TPREL_PCREL34: + sym.flags |= NEEDS_GOTTP; + break; + case R_PPC64_REL24: + if (sym.is_imported) + sym.flags |= NEEDS_PLT; + break; + case R_PPC64_REL24_NOTOC: + if (sym.is_imported) + sym.flags |= NEEDS_PLT; + ctx.extra.is_power10 = true; + break; + case R_PPC64_PLT16_HA: + case R_PPC64_PLT_PCREL34: + case R_PPC64_PLT_PCREL34_NOTOC: + case R_PPC64_GOT_PCREL34: + sym.flags |= NEEDS_GOT; + break; + case R_PPC64_GOT_TLSGD16_HA: + case R_PPC64_GOT_TLSGD_PCREL34: + sym.flags |= NEEDS_TLSGD; + break; + case R_PPC64_GOT_TLSLD16_HA: + case R_PPC64_GOT_TLSLD_PCREL34: + ctx.needs_tlsld = true; + break; + case R_PPC64_TPREL16_HA: + case R_PPC64_TPREL16_LO: + check_tlsle(ctx, sym, rel); + break; + case R_PPC64_REL32: + case R_PPC64_REL64: + case R_PPC64_TOC16_HA: + case R_PPC64_TOC16_LO: + case R_PPC64_TOC16_LO_DS: + case R_PPC64_TOC16_DS: + case R_PPC64_REL16_HA: + case R_PPC64_REL16_LO: + case R_PPC64_PLT16_HI: + case R_PPC64_PLT16_LO: + case R_PPC64_PLT16_LO_DS: + case R_PPC64_PCREL34: + case R_PPC64_PLTSEQ: + case R_PPC64_PLTSEQ_NOTOC: + case R_PPC64_PLTCALL: + case R_PPC64_PLTCALL_NOTOC: + case R_PPC64_GOT_TPREL16_LO_DS: + case R_PPC64_GOT_TLSGD16_LO: + case R_PPC64_GOT_TLSLD16_LO: + case R_PPC64_TLS: + case R_PPC64_TLSGD: + case R_PPC64_TLSLD: + case R_PPC64_DTPREL16_HA: + case R_PPC64_DTPREL16_LO: + case R_PPC64_DTPREL34: + break; + default: + Error(ctx) << *this << ": unknown relocation: " << rel; + } + } +} + +template <> +void RangeExtensionThunk::copy_buf(Context &ctx) { + u8 *buf = ctx.buf + output_section.shdr.sh_offset + offset; + + // If the destination is PLT, we read an address from .got.plt or .got + // and jump there. + static const ul32 plt_thunk[] = { + 0xf841'0018, // std r2, 24(r1) + 0x3d82'0000, // addis r12, r2, foo@gotplt@toc@ha + 0xe98c'0000, // ld r12, foo@gotplt@toc@lo(r12) + 0x7d89'03a6, // mtctr r12 + 0x4e80'0420, // bctr + }; + + static const ul32 plt_thunk_power10[] = { + 0xf841'0018, // std r2, 24(r1) + 0x0410'0000, // pld r12, foo@gotplt@pcrel + 0xe580'0000, + 0x7d89'03a6, // mtctr r12 + 0x4e80'0420, // bctr + }; + + // If the destination is a non-imported function, we directly jump + // to its local entry point. + static const ul32 local_thunk[] = { + 0xf841'0018, // std r2, 24(r1) + 0x3d82'0000, // addis r12, r2, foo@toc@ha + 0x398c'0000, // addi r12, r12, foo@toc@lo + 0x7d89'03a6, // mtctr r12 + 0x4e80'0420, // bctr + }; + + static const ul32 local_thunk_power10[] = { + 0xf841'0018, // std r2, 24(r1) + 0x0610'0000, // pla r12, foo@pcrel + 0x3980'0000, + 0x7d89'03a6, // mtctr r12 + 0x4e80'0420, // bctr + }; + + static_assert(E::thunk_size == sizeof(plt_thunk)); + static_assert(E::thunk_size == sizeof(plt_thunk_power10)); + static_assert(E::thunk_size == sizeof(local_thunk)); + static_assert(E::thunk_size == sizeof(local_thunk_power10)); + + for (i64 i = 0; i < symbols.size(); i++) { + Symbol &sym = *symbols[i]; + ul32 *loc = (ul32 *)(buf + i * E::thunk_size); + + if (sym.has_plt(ctx)) { + u64 got = sym.has_got(ctx) ? sym.get_got_addr(ctx) : sym.get_gotplt_addr(ctx); + + if (ctx.extra.is_power10) { + memcpy(loc, plt_thunk_power10, E::thunk_size); + *(ul64 *)(loc + 1) |= prefix34(got - get_addr(i) - 4); + } else { + i64 val = got - ctx.extra.TOC->value; + memcpy(loc, plt_thunk, E::thunk_size); + loc[1] |= higha(val); + loc[2] |= lo(val); + } + } else { + if (ctx.extra.is_power10) { + memcpy(loc, local_thunk_power10, E::thunk_size); + *(ul64 *)(loc + 1) |= prefix34(sym.get_addr(ctx) - get_addr(i) - 4); + } else { + i64 val = sym.get_addr(ctx) - ctx.extra.TOC->value; + memcpy(loc, local_thunk, E::thunk_size); + loc[1] |= higha(val); + loc[2] |= lo(val); + } + } + } +} + +} // namespace mold::elf diff --git a/third_party/mold/elf/arch-riscv.cc b/third_party/mold/elf/arch-riscv.cc new file mode 100644 index 00000000000..ddef6419b61 --- /dev/null +++ b/third_party/mold/elf/arch-riscv.cc @@ -0,0 +1,938 @@ +// clang-format off +// RISC-V is a clean RISC ISA. It supports PC-relative load/store for +// position-independent code. Its 32-bit and 64-bit ISAs are almost +// identical. That is, you can think RV32 as a RV64 without 64-bit +// operations. In this file, we support both RV64 and RV32. +// +// RISC-V is essentially little-endian, but the big-endian version is +// available as an extension. GCC supports `-mbig-endian` to generate +// big-endian code. Even in big-endian mode, machine instructions are +// defined to be encoded in little-endian, though. Only the behavior of +// load/store instructions are different between LE RISC-V and BE RISC-V. +// +// From the linker's point of view, the RISC-V's psABI is unique because +// sections in input object files can be shrunk while being copied to the +// output file. That is contrary to other psABIs in which sections are an +// atomic unit of copying. Let me explain it in more details. +// +// Since RISC-V instructions are 16-bit or 32-bit long, there's no way to +// embed a very large immediate into a branch instruction. In fact, JAL +// (jump and link) instruction can jump to only within PC ± 1 MiB because +// its immediate is only 21 bits long. If the destination is out of its +// reach, we need to use two instructions instead; the first instruction +// being AUIPC which sets upper 20 bits to a register and the second being +// JALR with a 12-bit immediate and the register. Combined, they specify a +// 32 bits displacement. +// +// Other RISC ISAs have the same limitation, and they solved the problem by +// letting the linker create so-called "range extension thunks". It works as +// follows: the compiler optimistically emits single jump instructions for +// function calls. If the linker finds that a branch target is out of reach, +// it emits a small piece of machine code near the branch instruction and +// redirect the branch to the linker-synthesized code. The code constructs a +// full 32-bit address in a register and jump to the destination. That +// linker-synthesized code is called "range extension thunks" or just +// "thunks". +// +// The RISC-V psABI is unique that it works the other way around. That is, +// for RISC-V, the compiler always emits two instructions (AUIPC + JAL) for +// function calls. If the linker finds the destination is reachable with a +// single instruction, it replaces the two instructions with the one and +// shrink the section size by one instruction length, instead of filling the +// gap with a nop. +// +// With the presence of this relaxation, sections can no longer be +// considered as an atomic unit. If we delete 4 bytes from the middle of a +// section, all contents after that point needs to be shifted by 4. Symbol +// values and relocation offsets have to be adjusted accordingly if they +// refer to past the deleted bytes. +// +// In mold, we use `r_deltas` to memorize how many bytes have be adjusted +// for relocations. For symbols, we directly mutate their `value` member. +// +// RISC-V object files tend to have way more relocations than those for +// other targets. This is because all branches, including ones that jump +// within the same section, are explicitly expressed with relocations. +// Here is why we need them: all control-flow statements such as `if` or +// `for` are implemented using branch instructions. For other targets, the +// compiler doesn't emit relocations for such branches because they know +// at compile-time exactly how many bytes has to be skipped. That's not +// true to RISC-V because the linker may delete bytes between a branch and +// its destination. Therefore, all branches including in-section ones have +// to be explicitly expressed with relocations. +// +// Note that this mechanism only shrink sections and never enlarge, as +// the compiler always emits the longest instruction sequence. This +// makes the linker implementation a bit simpler because we don't need +// to worry about oscillation. +// +// https://github.com/riscv-non-isa/riscv-elf-psabi-doc/blob/master/riscv-elf.adoc + +#include "third_party/mold/elf/mold.h" + +// MISSING #include +// MISSING #include + +namespace mold::elf { + +static void write_itype(u8 *loc, u32 val) { + *(ul32 *)loc &= 0b000000'00000'11111'111'11111'1111111; + *(ul32 *)loc |= bits(val, 11, 0) << 20; +} + +static void write_stype(u8 *loc, u32 val) { + *(ul32 *)loc &= 0b000000'11111'11111'111'00000'1111111; + *(ul32 *)loc |= bits(val, 11, 5) << 25 | bits(val, 4, 0) << 7; +} + +static void write_btype(u8 *loc, u32 val) { + *(ul32 *)loc &= 0b000000'11111'11111'111'00000'1111111; + *(ul32 *)loc |= bit(val, 12) << 31 | bits(val, 10, 5) << 25 | + bits(val, 4, 1) << 8 | bit(val, 11) << 7; +} + +static void write_utype(u8 *loc, u32 val) { + *(ul32 *)loc &= 0b000000'00000'00000'000'11111'1111111; + + // U-type instructions are used in combination with I-type + // instructions. U-type insn sets an immediate to the upper 20-bits + // of a register. I-type insn sign-extends a 12-bits immediate and + // adds it to a register value to construct a complete value. 0x800 + // is added here to compensate for the sign-extension. + *(ul32 *)loc |= (val + 0x800) & 0xffff'f000; +} + +static void write_jtype(u8 *loc, u32 val) { + *(ul32 *)loc &= 0b000000'00000'00000'000'11111'1111111; + *(ul32 *)loc |= bit(val, 20) << 31 | bits(val, 10, 1) << 21 | + bit(val, 11) << 20 | bits(val, 19, 12) << 12; +} + +static void write_cbtype(u8 *loc, u32 val) { + *(ul16 *)loc &= 0b111'000'111'00000'11; + *(ul16 *)loc |= bit(val, 8) << 12 | bit(val, 4) << 11 | bit(val, 3) << 10 | + bit(val, 7) << 6 | bit(val, 6) << 5 | bit(val, 2) << 4 | + bit(val, 1) << 3 | bit(val, 5) << 2; +} + +static void write_cjtype(u8 *loc, u32 val) { + *(ul16 *)loc &= 0b111'00000000000'11; + *(ul16 *)loc |= bit(val, 11) << 12 | bit(val, 4) << 11 | bit(val, 9) << 10 | + bit(val, 8) << 9 | bit(val, 10) << 8 | bit(val, 6) << 7 | + bit(val, 7) << 6 | bit(val, 3) << 5 | bit(val, 2) << 4 | + bit(val, 1) << 3 | bit(val, 5) << 2; +} + +static void overwrite_uleb(u8 *loc, u64 val) { + while (*loc & 0b1000'0000) { + *loc++ = 0b1000'0000 | (val & 0b0111'1111); + val >>= 7; + } +} + +// Returns the rd register of an R/I/U/J-type instruction. +static u32 get_rd(u32 val) { + return bits(val, 11, 7); +} + +static void set_rs1(u8 *loc, u32 rs1) { + assert(rs1 < 32); + *(ul32 *)loc &= 0b111111'11111'00000'111'11111'1111111; + *(ul32 *)loc |= rs1 << 15; +} + +template +void write_plt_header(Context &ctx, u8 *buf) { + static const ul32 insn_64[] = { + 0x0000'0397, // auipc t2, %pcrel_hi(.got.plt) + 0x41c3'0333, // sub t1, t1, t3 # .plt entry + hdr + 12 + 0x0003'be03, // ld t3, %pcrel_lo(1b)(t2) # _dl_runtime_resolve + 0xfd43'0313, // addi t1, t1, -44 # .plt entry + 0x0003'8293, // addi t0, t2, %pcrel_lo(1b) # &.got.plt + 0x0013'5313, // srli t1, t1, 1 # .plt entry offset + 0x0082'b283, // ld t0, 8(t0) # link map + 0x000e'0067, // jr t3 + }; + + static const ul32 insn_32[] = { + 0x0000'0397, // auipc t2, %pcrel_hi(.got.plt) + 0x41c3'0333, // sub t1, t1, t3 # .plt entry + hdr + 12 + 0x0003'ae03, // lw t3, %pcrel_lo(1b)(t2) # _dl_runtime_resolve + 0xfd43'0313, // addi t1, t1, -44 # .plt entry + 0x0003'8293, // addi t0, t2, %pcrel_lo(1b) # &.got.plt + 0x0023'5313, // srli t1, t1, 2 # .plt entry offset + 0x0042'a283, // lw t0, 4(t0) # link map + 0x000e'0067, // jr t3 + }; + + if constexpr (E::is_64) + memcpy(buf, insn_64, sizeof(insn_64)); + else + memcpy(buf, insn_32, sizeof(insn_32)); + + u64 gotplt = ctx.gotplt->shdr.sh_addr; + u64 plt = ctx.plt->shdr.sh_addr; + write_utype(buf, gotplt - plt); + write_itype(buf + 8, gotplt - plt); + write_itype(buf + 16, gotplt - plt); +} + +static const ul32 plt_entry_64[] = { + 0x0000'0e17, // auipc t3, %pcrel_hi(function@.got.plt) + 0x000e'3e03, // ld t3, %pcrel_lo(1b)(t3) + 0x000e'0367, // jalr t1, t3 + 0x0000'0013, // nop +}; + +static const ul32 plt_entry_32[] = { + 0x0000'0e17, // auipc t3, %pcrel_hi(function@.got.plt) + 0x000e'2e03, // lw t3, %pcrel_lo(1b)(t3) + 0x000e'0367, // jalr t1, t3 + 0x0000'0013, // nop +}; + +template +void write_plt_entry(Context &ctx, u8 *buf, Symbol &sym) { + if constexpr (E::is_64) + memcpy(buf, plt_entry_64, sizeof(plt_entry_64)); + else + memcpy(buf, plt_entry_32, sizeof(plt_entry_32)); + + u64 gotplt = sym.get_gotplt_addr(ctx); + u64 plt = sym.get_plt_addr(ctx); + write_utype(buf, gotplt - plt); + write_itype(buf + 4, gotplt - plt); +} + +template +void write_pltgot_entry(Context &ctx, u8 *buf, Symbol &sym) { + if constexpr (E::is_64) + memcpy(buf, plt_entry_64, sizeof(plt_entry_64)); + else + memcpy(buf, plt_entry_32, sizeof(plt_entry_32)); + + u64 got = sym.get_got_addr(ctx); + u64 plt = sym.get_plt_addr(ctx); + write_utype(buf, got - plt); + write_itype(buf + 4, got - plt); +} + +template +void EhFrameSection::apply_reloc(Context &ctx, const ElfRel &rel, + u64 offset, u64 val) { + u8 *loc = ctx.buf + this->shdr.sh_offset + offset; + + switch (rel.r_type) { + case R_NONE: + break; + case R_RISCV_ADD32: + *(U32 *)loc += val; + break; + case R_RISCV_SUB8: + *loc -= val; + break; + case R_RISCV_SUB16: + *(U16 *)loc -= val; + break; + case R_RISCV_SUB32: + *(U32 *)loc -= val; + break; + case R_RISCV_SUB6: + *loc = (*loc & 0b1100'0000) | ((*loc - val) & 0b0011'1111); + break; + case R_RISCV_SET6: + *loc = (*loc & 0b1100'0000) | (val & 0b0011'1111); + break; + case R_RISCV_SET8: + *loc = val; + break; + case R_RISCV_SET16: + *(U16 *)loc = val; + break; + case R_RISCV_SET32: + *(U32 *)loc = val; + break; + case R_RISCV_32_PCREL: + *(U32 *)loc = val - this->shdr.sh_addr - offset; + break; + default: + Fatal(ctx) << "unsupported relocation in .eh_frame: " << rel; + } +} + +template +void InputSection::apply_reloc_alloc(Context &ctx, u8 *base) { + std::span> rels = get_rels(ctx); + + ElfRel *dynrel = nullptr; + if (ctx.reldyn) + dynrel = (ElfRel *)(ctx.buf + ctx.reldyn->shdr.sh_offset + + file.reldyn_offset + this->reldyn_offset); + + auto get_r_delta = [&](i64 idx) { + return extra.r_deltas.empty() ? 0 : extra.r_deltas[idx]; + }; + + for (i64 i = 0; i < rels.size(); i++) { + const ElfRel &rel = rels[i]; + if (rel.r_type == R_NONE || rel.r_type == R_RISCV_RELAX) + continue; + + Symbol &sym = *file.symbols[rel.r_sym]; + i64 r_offset = rel.r_offset - get_r_delta(i); + i64 removed_bytes = get_r_delta(i + 1) - get_r_delta(i); + u8 *loc = base + r_offset; + + auto check = [&](i64 val, i64 lo, i64 hi) { + if (val < lo || hi <= val) + Error(ctx) << *this << ": relocation " << rel << " against " + << sym << " out of range: " << val << " is not in [" + << lo << ", " << hi << ")"; + }; + + auto find_paired_reloc = [&] { + Symbol &sym = *file.symbols[rels[i].r_sym]; + assert(sym.get_input_section() == this); + + if (sym.value < r_offset) { + for (i64 j = i - 1; j >= 0; j--) + if (u32 ty = rels[j].r_type; + ty == R_RISCV_GOT_HI20 || ty == R_RISCV_TLS_GOT_HI20 || + ty == R_RISCV_TLS_GD_HI20 || ty == R_RISCV_PCREL_HI20) + if (sym.value == rels[j].r_offset - get_r_delta(j)) + return j; + } else { + for (i64 j = i + 1; j < rels.size(); j++) + if (u32 ty = rels[j].r_type; + ty == R_RISCV_GOT_HI20 || ty == R_RISCV_TLS_GOT_HI20 || + ty == R_RISCV_TLS_GD_HI20 || ty == R_RISCV_PCREL_HI20) + if (sym.value == rels[j].r_offset - get_r_delta(j)) + return j; + } + + Fatal(ctx) << *this << ": paired relocation is missing: " << i; + }; + + u64 S = sym.get_addr(ctx); + u64 A = rel.r_addend; + u64 P = get_addr() + r_offset; + u64 G = sym.get_got_idx(ctx) * sizeof(Word); + u64 GOT = ctx.got->shdr.sh_addr; + + switch (rel.r_type) { + case R_RISCV_32: + if constexpr (E::is_64) + *(U32 *)loc = S + A; + else + apply_dyn_absrel(ctx, sym, rel, loc, S, A, P, dynrel); + break; + case R_RISCV_64: + assert(E::is_64); + apply_dyn_absrel(ctx, sym, rel, loc, S, A, P, dynrel); + break; + case R_RISCV_BRANCH: + check(S + A - P, -(1 << 12), 1 << 12); + write_btype(loc, S + A - P); + break; + case R_RISCV_JAL: + check(S + A - P, -(1 << 20), 1 << 20); + write_jtype(loc, S + A - P); + break; + case R_RISCV_CALL: + case R_RISCV_CALL_PLT: { + u32 rd = get_rd(*(ul32 *)(contents.data() + rel.r_offset + 4)); + + if (removed_bytes == 4) { + // auipc + jalr -> jal + *(ul32 *)loc = (rd << 7) | 0b1101111; + write_jtype(loc, S + A - P); + } else if (removed_bytes == 6 && rd == 0) { + // auipc + jalr -> c.j + *(ul16 *)loc = 0b101'00000000000'01; + write_cjtype(loc, S + A - P); + } else if (removed_bytes == 6 && rd == 1) { + // auipc + jalr -> c.jal + assert(!E::is_64); + *(ul16 *)loc = 0b001'00000000000'01; + write_cjtype(loc, S + A - P); + } else { + assert(removed_bytes == 0); + // Calling an undefined weak symbol does not make sense. + // We make such call into an infinite loop. This should + // help debugging of a faulty program. + u64 val = sym.esym().is_undef_weak() ? 0 : S + A - P; + check(val, -(1LL << 31), 1LL << 31); + write_utype(loc, val); + write_itype(loc + 4, val); + } + break; + } + case R_RISCV_GOT_HI20: + write_utype(loc, G + GOT + A - P); + break; + case R_RISCV_TLS_GOT_HI20: + write_utype(loc, sym.get_gottp_addr(ctx) + A - P); + break; + case R_RISCV_TLS_GD_HI20: + write_utype(loc, sym.get_tlsgd_addr(ctx) + A - P); + break; + case R_RISCV_PCREL_HI20: + write_utype(loc, S + A - P); + break; + case R_RISCV_PCREL_LO12_I: + case R_RISCV_PCREL_LO12_S: { + i64 idx2 = find_paired_reloc(); + const ElfRel &rel2 = rels[idx2]; + Symbol &sym2 = *file.symbols[rel2.r_sym]; + + u64 S = sym2.get_addr(ctx); + u64 A = rel2.r_addend; + u64 P = get_addr() + rel2.r_offset - get_r_delta(idx2); + u64 G = sym2.get_got_idx(ctx) * sizeof(Word); + u64 val; + + switch (rel2.r_type) { + case R_RISCV_GOT_HI20: + val = G + GOT + A - P; + break; + case R_RISCV_TLS_GOT_HI20: + val = sym2.get_gottp_addr(ctx) + A - P; + break; + case R_RISCV_TLS_GD_HI20: + val = sym2.get_tlsgd_addr(ctx) + A - P; + break; + case R_RISCV_PCREL_HI20: + val = S + A - P; + break; + default: + unreachable(); + } + + if (rel.r_type == R_RISCV_PCREL_LO12_I) + write_itype(loc, val); + else + write_stype(loc, val); + break; + } + case R_RISCV_HI20: + assert(removed_bytes == 0 || removed_bytes == 4); + if (removed_bytes == 0) { + check(S + A, -(1LL << 31), 1LL << 31); + write_utype(loc, S + A); + } + break; + case R_RISCV_LO12_I: + case R_RISCV_LO12_S: + if (rel.r_type == R_RISCV_LO12_I) + write_itype(loc, S + A); + else + write_stype(loc, S + A); + + // Rewrite `lw t1, 0(t0)` with `lw t1, 0(x0)` if the address is + // accessible relative to the zero register. If the upper 20 bits + // are all zero, the corresponding LUI might have been removed. + if (bits(S + A, 31, 12) == 0) + set_rs1(loc, 0); + break; + case R_RISCV_TPREL_HI20: + assert(removed_bytes == 0 || removed_bytes == 4); + if (removed_bytes == 0) + write_utype(loc, S + A - ctx.tp_addr); + break; + case R_RISCV_TPREL_ADD: + // This relocation just annotates an ADD instruction that can be + // removed when a TPREL is relaxed. No value is needed to be + // written. + assert(removed_bytes == 0 || removed_bytes == 4); + break; + case R_RISCV_TPREL_LO12_I: + case R_RISCV_TPREL_LO12_S: { + i64 val = S + A - ctx.tp_addr; + if (rel.r_type == R_RISCV_TPREL_LO12_I) + write_itype(loc, val); + else + write_stype(loc, val); + + // Rewrite `lw t1, 0(t0)` with `lw t1, 0(tp)` if the address is + // directly accessible using tp. tp is x4. + if (sign_extend(val, 11) == val) + set_rs1(loc, 4); + break; + } + case R_RISCV_ADD8: + loc += S + A; + break; + case R_RISCV_ADD16: + *(U16 *)loc += S + A; + break; + case R_RISCV_ADD32: + *(U32 *)loc += S + A; + break; + case R_RISCV_ADD64: + *(U64 *)loc += S + A; + break; + case R_RISCV_SUB8: + loc -= S + A; + break; + case R_RISCV_SUB16: + *(U16 *)loc -= S + A; + break; + case R_RISCV_SUB32: + *(U32 *)loc -= S + A; + break; + case R_RISCV_SUB64: + *(U64 *)loc -= S + A; + break; + case R_RISCV_ALIGN: { + // A R_RISCV_ALIGN is followed by a NOP sequence. We need to remove + // zero or more bytes so that the instruction after R_RISCV_ALIGN is + // aligned to a given alignment boundary. + // + // We need to guarantee that the NOP sequence is valid after byte + // removal (e.g. we can't remove the first 2 bytes of a 4-byte NOP). + // For the sake of simplicity, we always rewrite the entire NOP sequence. + i64 padding_bytes = rel.r_addend - removed_bytes; + assert((padding_bytes & 1) == 0); + + i64 i = 0; + for (; i <= padding_bytes - 4; i += 4) + *(ul32 *)(loc + i) = 0x0000'0013; // nop + if (i < padding_bytes) + *(ul16 *)(loc + i) = 0x0001; // c.nop + break; + } + case R_RISCV_RVC_BRANCH: + check(S + A - P, -(1 << 8), 1 << 8); + write_cbtype(loc, S + A - P); + break; + case R_RISCV_RVC_JUMP: + check(S + A - P, -(1 << 11), 1 << 11); + write_cjtype(loc, S + A - P); + break; + case R_RISCV_SUB6: + *loc = (*loc & 0b1100'0000) | ((*loc - (S + A)) & 0b0011'1111); + break; + case R_RISCV_SET6: + *loc = (*loc & 0b1100'0000) | ((S + A) & 0b0011'1111); + break; + case R_RISCV_SET8: + *loc = S + A; + break; + case R_RISCV_SET16: + *(U16 *)loc = S + A; + break; + case R_RISCV_SET32: + *(U32 *)loc = S + A; + break; + case R_RISCV_PLT32: + case R_RISCV_32_PCREL: + *(U32 *)loc = S + A - P; + break; + default: + unreachable(); + } + } +} + +template +void InputSection::apply_reloc_nonalloc(Context &ctx, u8 *base) { + std::span> rels = get_rels(ctx); + + for (i64 i = 0; i < rels.size(); i++) { + const ElfRel &rel = rels[i]; + if (rel.r_type == R_NONE || record_undef_error(ctx, rel)) + continue; + + Symbol &sym = *file.symbols[rel.r_sym]; + u8 *loc = base + rel.r_offset; + + SectionFragment *frag; + i64 frag_addend; + std::tie(frag, frag_addend) = get_fragment(ctx, rel); + + u64 S = frag ? frag->get_addr(ctx) : sym.get_addr(ctx); + u64 A = frag ? frag_addend : (i64)rel.r_addend; + + switch (rel.r_type) { + case R_RISCV_32: + *(U32 *)loc = S + A; + break; + case R_RISCV_64: + if (std::optional val = get_tombstone(sym, frag)) + *(U64 *)loc = *val; + else + *(U64 *)loc = S + A; + break; + case R_RISCV_ADD8: + *loc += S + A; + break; + case R_RISCV_ADD16: + *(U16 *)loc += S + A; + break; + case R_RISCV_ADD32: + *(U32 *)loc += S + A; + break; + case R_RISCV_ADD64: + *(U64 *)loc += S + A; + break; + case R_RISCV_SUB8: + *loc -= S + A; + break; + case R_RISCV_SUB16: + *(U16 *)loc -= S + A; + break; + case R_RISCV_SUB32: + *(U32 *)loc -= S + A; + break; + case R_RISCV_SUB64: + *(U64 *)loc -= S + A; + break; + case R_RISCV_SUB6: + *loc = (*loc & 0b1100'0000) | ((*loc - (S + A)) & 0b0011'1111); + break; + case R_RISCV_SET6: + *loc = (*loc & 0b1100'0000) | ((S + A) & 0b0011'1111); + break; + case R_RISCV_SET8: + *loc = S + A; + break; + case R_RISCV_SET16: + *(U16 *)loc = S + A; + break; + case R_RISCV_SET32: + *(U32 *)loc = S + A; + break; + case R_RISCV_SET_ULEB128: + overwrite_uleb(loc, S + A); + break; + case R_RISCV_SUB_ULEB128: { + u8 *p = loc; + u64 val = read_uleb(p); + overwrite_uleb(loc, val - S - A); + break; + } + default: + Fatal(ctx) << *this << ": invalid relocation for non-allocated sections: " + << rel; + break; + } + } +} + +template +void InputSection::copy_contents_riscv(Context &ctx, u8 *buf) { + // If a section is not relaxed, we can copy it as a one big chunk. + if (extra.r_deltas.empty()) { + uncompress_to(ctx, buf); + return; + } + + // A relaxed section is copied piece-wise. + std::span> rels = get_rels(ctx); + i64 pos = 0; + + for (i64 i = 0; i < rels.size(); i++) { + i64 delta = extra.r_deltas[i + 1] - extra.r_deltas[i]; + if (delta == 0) + continue; + assert(delta > 0); + + const ElfRel &r = rels[i]; + memcpy(buf, contents.data() + pos, r.r_offset - pos); + buf += r.r_offset - pos; + pos = r.r_offset + delta; + } + + memcpy(buf, contents.data() + pos, contents.size() - pos); +} + +template +void InputSection::scan_relocations(Context &ctx) { + assert(shdr().sh_flags & SHF_ALLOC); + + this->reldyn_offset = file.num_dynrel * sizeof(ElfRel); + std::span> rels = get_rels(ctx); + + // Scan relocations + for (i64 i = 0; i < rels.size(); i++) { + const ElfRel &rel = rels[i]; + if (rel.r_type == R_NONE || record_undef_error(ctx, rel)) + continue; + + Symbol &sym = *file.symbols[rel.r_sym]; + + if (sym.is_ifunc()) + sym.flags |= NEEDS_GOT | NEEDS_PLT; + + switch (rel.r_type) { + case R_RISCV_32: + if constexpr (E::is_64) + scan_absrel(ctx, sym, rel); + else + scan_dyn_absrel(ctx, sym, rel); + break; + case R_RISCV_HI20: + scan_absrel(ctx, sym, rel); + break; + case R_RISCV_64: + if constexpr (!E::is_64) + Fatal(ctx) << *this << ": R_RISCV_64 cannot be used on RV32"; + scan_dyn_absrel(ctx, sym, rel); + break; + case R_RISCV_CALL: + case R_RISCV_CALL_PLT: + case R_RISCV_PLT32: + if (sym.is_imported) + sym.flags |= NEEDS_PLT; + break; + case R_RISCV_GOT_HI20: + sym.flags |= NEEDS_GOT; + break; + case R_RISCV_TLS_GOT_HI20: + sym.flags |= NEEDS_GOTTP; + break; + case R_RISCV_TLS_GD_HI20: + sym.flags |= NEEDS_TLSGD; + break; + case R_RISCV_32_PCREL: + scan_pcrel(ctx, sym, rel); + break; + case R_RISCV_TPREL_HI20: + case R_RISCV_TPREL_LO12_I: + case R_RISCV_TPREL_LO12_S: + case R_RISCV_TPREL_ADD: + check_tlsle(ctx, sym, rel); + break; + case R_RISCV_BRANCH: + case R_RISCV_JAL: + case R_RISCV_PCREL_HI20: + case R_RISCV_PCREL_LO12_I: + case R_RISCV_PCREL_LO12_S: + case R_RISCV_LO12_I: + case R_RISCV_LO12_S: + case R_RISCV_ADD8: + case R_RISCV_ADD16: + case R_RISCV_ADD32: + case R_RISCV_ADD64: + case R_RISCV_SUB8: + case R_RISCV_SUB16: + case R_RISCV_SUB32: + case R_RISCV_SUB64: + case R_RISCV_ALIGN: + case R_RISCV_RVC_BRANCH: + case R_RISCV_RVC_JUMP: + case R_RISCV_RELAX: + case R_RISCV_SUB6: + case R_RISCV_SET6: + case R_RISCV_SET8: + case R_RISCV_SET16: + case R_RISCV_SET32: + break; + default: + Error(ctx) << *this << ": unknown relocation: " << rel; + } + } +} + +template +static bool is_resizable(Context &ctx, InputSection *isec) { + return isec && isec->is_alive && (isec->shdr().sh_flags & SHF_ALLOC) && + (isec->shdr().sh_flags & SHF_EXECINSTR); +} + +// Returns the distance between a relocated place and a symbol. +template +static i64 compute_distance(Context &ctx, Symbol &sym, + InputSection &isec, const ElfRel &rel) { + // We handle absolute symbols as if they were infinitely far away + // because `shrink_section` may increase a distance between a branch + // instruction and an absolute symbol. Branching to an absolute + // location is extremely rare in real code, though. + if (sym.is_absolute()) + return INT32_MAX; + + // Likewise, relocations against weak undefined symbols won't be relaxed. + if (sym.esym().is_undef_weak()) + return INT32_MAX; + + // Compute a distance between the relocated place and the symbol. + i64 S = sym.get_addr(ctx); + i64 A = rel.r_addend; + i64 P = isec.get_addr() + rel.r_offset; + return S + A - P; +} + +// Scan relocations to shrink sections. +template +static void shrink_section(Context &ctx, InputSection &isec, bool use_rvc) { + std::span> rels = isec.get_rels(ctx); + isec.extra.r_deltas.resize(rels.size() + 1); + + i64 delta = 0; + + for (i64 i = 0; i < rels.size(); i++) { + const ElfRel &r = rels[i]; + Symbol &sym = *isec.file.symbols[r.r_sym]; + isec.extra.r_deltas[i] = delta; + + // Handling R_RISCV_ALIGN is mandatory. + // + // R_RISCV_ALIGN refers to NOP instructions. We need to eliminate some + // or all of the instructions so that the instruction that immediately + // follows the NOPs is aligned to a specified alignment boundary. + if (r.r_type == R_RISCV_ALIGN) { + // The total bytes of NOPs is stored to r_addend, so the next + // instruction is r_addend away. + u64 loc = isec.get_addr() + r.r_offset - delta; + u64 next_loc = loc + r.r_addend; + u64 alignment = bit_ceil(r.r_addend + 1); + assert(alignment <= (1 << isec.p2align)); + delta += next_loc - align_to(loc, alignment); + continue; + } + + // Handling other relocations is optional. + if (!ctx.arg.relax || i == rels.size() - 1 || + rels[i + 1].r_type != R_RISCV_RELAX) + continue; + + // Linker-synthesized symbols haven't been assigned their final + // values when we are shrinking sections because actual values can + // be computed only after we fix the file layout. Therefore, we + // assume that relocations against such symbols are always + // non-relaxable. + if (sym.file == ctx.internal_obj) + continue; + + switch (r.r_type) { + case R_RISCV_CALL: + case R_RISCV_CALL_PLT: { + // These relocations refer to an AUIPC + JALR instruction pair to + // allow to jump to anywhere in PC ± 2 GiB. If the jump target is + // close enough to PC, we can use C.J, C.JAL or JAL instead. + i64 dist = compute_distance(ctx, sym, isec, r); + if (dist & 1) + break; + + i64 rd = get_rd(*(ul32 *)(isec.contents.data() + r.r_offset + 4)); + + if (rd == 0 && sign_extend(dist, 11) == dist && use_rvc) { + // If rd is x0 and the jump target is within ±2 KiB, we can use + // C.J, saving 6 bytes. + delta += 6; + } else if (rd == 1 && sign_extend(dist, 11) == dist && use_rvc && !E::is_64) { + // If rd is x1 and the jump target is within ±2 KiB, we can use + // C.JAL. This is RV32 only because C.JAL is RV32-only instruction. + delta += 6; + } else if (sign_extend(dist, 20) == dist) { + // If the jump target is within ±1 MiB, we can use JAL. + delta += 4; + } + break; + } + case R_RISCV_HI20: + // If the upper 20 bits are all zero, we can remove LUI. + // The corresponding instructions referred to by LO12_I/LO12_S + // relocations will use the zero register instead. + if (bits(sym.get_addr(ctx), 31, 12) == 0) + delta += 4; + break; + case R_RISCV_TPREL_HI20: + case R_RISCV_TPREL_ADD: + // These relocations are used to add a high 20-bit value to the + // thread pointer. The following two instructions materializes + // TP + HI20(foo) in %r5, for example. + // + // lui a5,%tprel_hi(foo) # R_RISCV_TPREL_HI20 (symbol) + // add a5,a5,tp,%tprel_add(foo) # R_RISCV_TPREL_ADD (symbol) + // + // Then thread-local variable `foo` is accessed with a low 12-bit + // offset like this: + // + // sw t0,%tprel_lo(foo)(a5) # R_RISCV_TPREL_LO12_S (symbol) + // + // However, if the variable is at TP ±2 KiB, TP + HI20(foo) is the + // same as TP, so we can instead access the thread-local variable + // directly using TP like this: + // + // sw t0,%tprel_lo(foo)(tp) + // + // Here, we remove `lui` and `add` if the offset is within ±2 KiB. + if (i64 val = sym.get_addr(ctx) + r.r_addend - ctx.tp_addr; + sign_extend(val, 11) == val) + delta += 4; + break; + } + } + + isec.extra.r_deltas[rels.size()] = delta; + isec.sh_size -= delta; +} + +// Shrink sections by interpreting relocations. +// +// This operation seems to be optional, because by default longest +// instructions are being used. However, calling this function is actually +// mandatory because of R_RISCV_ALIGN. R_RISCV_ALIGN is a directive to the +// linker to align the location referred to by the relocation to a +// specified byte boundary. We at least have to interpret them to satisfy +// the alignment constraints. +template +i64 riscv_resize_sections(Context &ctx) { + Timer t(ctx, "riscv_resize_sections"); + + // True if we can use the 2-byte instructions. This is usually true on + // Unix because RV64GC is generally considered the baseline hardware. + bool use_rvc = get_eflags(ctx) & EF_RISCV_RVC; + + // Find all the relocations that can be relaxed. + // This step should only shrink sections. + tbb::parallel_for_each(ctx.objs, [&](ObjectFile *file) { + for (std::unique_ptr> &isec : file->sections) + if (is_resizable(ctx, isec.get())) + shrink_section(ctx, *isec, use_rvc); + }); + + // Fix symbol values. + tbb::parallel_for_each(ctx.objs, [&](ObjectFile *file) { + for (Symbol *sym : file->symbols) { + if (sym->file != file) + continue; + + InputSection *isec = sym->get_input_section(); + if (!isec || isec->extra.r_deltas.empty()) + continue; + + std::span> rels = isec->get_rels(ctx); + auto it = std::lower_bound(rels.begin(), rels.end(), sym->value, + [&](const ElfRel &r, u64 val) { + return r.r_offset < val; + }); + + sym->value -= isec->extra.r_deltas[it - rels.begin()]; + } + }); + + // Re-compute section offset again to finalize them. + compute_section_sizes(ctx); + return set_osec_offsets(ctx); +} + +#define INSTANTIATE(E) \ + template void write_plt_header(Context &, u8 *); \ + template void write_plt_entry(Context &, u8 *, Symbol &); \ + template void write_pltgot_entry(Context &, u8 *, Symbol &); \ + template void \ + EhFrameSection::apply_reloc(Context &, const ElfRel &, u64, u64); \ + template void InputSection::apply_reloc_alloc(Context &, u8 *); \ + template void InputSection::apply_reloc_nonalloc(Context &, u8 *); \ + template void InputSection::copy_contents_riscv(Context &, u8 *); \ + template void InputSection::scan_relocations(Context &); \ + template i64 riscv_resize_sections(Context &); + +INSTANTIATE(RV64LE); +INSTANTIATE(RV64BE); +INSTANTIATE(RV32LE); +INSTANTIATE(RV32BE); + +} // namespace mold::elf diff --git a/third_party/mold/elf/arch-s390x.cc b/third_party/mold/elf/arch-s390x.cc new file mode 100644 index 00000000000..72c2965ed3b --- /dev/null +++ b/third_party/mold/elf/arch-s390x.cc @@ -0,0 +1,491 @@ +// clang-format off +// This file contains code for the IBM z/Architecture 64-bit ISA, which is +// commonly referred to as "s390x" on Linux. +// +// z/Architecture is a 64-bit CISC ISA developed by IBM around 2000 for +// IBM's "big iron" mainframe computers. The computers are direct +// descendents of IBM System/360 all the way back in 1966. I've never +// actually seen a mainframe, and you probaly haven't either, but it looks +// like the mainframe market is still large enough to sustain its ecosystem. +// Ubuntu for example provides the official support for s390x as of 2022. +// Since they are being actively maintained, we need to support them. +// +// As an instruction set, s390x isn't particularly odd. It has 16 general- +// purpose registers. Instructions are 2, 4 or 6 bytes long and always +// aligned to 2 bytes boundaries. Despite unfamiliarty, I found that it +// just feels like an x86-64 in a parallel universe. +// +// Here is the register usage in this ABI: +// +// r0-r1: reserved as scratch registers so we can use them in our PLT +// r2: parameter passing and return values +// r3-r6: parameter passing +// r12: address of GOT if position-independent code +// r14: return address +// r15: stack pointer +// a1: upper 32 bits of TP (thread pointer) +// a2: lower 32 bits of TP (thread pointer) +// +// Thread-local storage (TLS) is supported on s390x in the same way as it +// is on other targets with one exeption. On other targets, __tls_get_addr +// is used to get an address of a thread-local variable. On s390x, +// __tls_get_offset is used instead. The difference is __tls_get_offset +// returns an address of a thread-local variable as an offset from TP. So +// we need to add TP to a return value before use. I don't know why it is +// different, but that is the way it is. +// +// https://github.com/rui314/psabi/blob/main/s390x.pdf + +#include "third_party/mold/elf/mold.h" + +namespace mold::elf { + +using E = S390X; + +static void write_mid20(u8 *loc, u64 val) { + *(ub32 *)loc |= (bits(val, 11, 0) << 16) | (bits(val, 19, 12) << 8); +} + +template <> +void write_plt_header(Context &ctx, u8 *buf) { + static u8 insn[] = { + 0xe3, 0x00, 0xf0, 0x38, 0x00, 0x24, // stg %r0, 56(%r15) + 0xc0, 0x10, 0, 0, 0, 0, // larl %r1, GOTPLT_OFFSET + 0xd2, 0x07, 0xf0, 0x30, 0x10, 0x08, // mvc 48(8, %r15), 8(%r1) + 0xe3, 0x10, 0x10, 0x10, 0x00, 0x04, // lg %r1, 16(%r1) + 0x07, 0xf1, // br %r1 + 0x07, 0x00, 0x07, 0x00, 0x07, 0x00, // nopr; nopr; nopr + }; + + memcpy(buf, insn, sizeof(insn)); + *(ub32 *)(buf + 8) = (ctx.gotplt->shdr.sh_addr - ctx.plt->shdr.sh_addr - 6) >> 1; +} + +template <> +void write_plt_entry(Context &ctx, u8 *buf, Symbol &sym) { + static u8 insn[] = { + 0xc0, 0x10, 0, 0, 0, 0, // larl %r1, GOTPLT_ENTRY_OFFSET + 0xe3, 0x10, 0x10, 0x00, 0x00, 0x04, // lg %r1, (%r1) + 0xc0, 0x01, 0, 0, 0, 0, // lgfi %r0, PLT_INDEX + 0x07, 0xf1, // br %r1 + 0x07, 0x00, 0x07, 0x00, 0x07, 0x00, // nopr; nopr; nopr + 0x07, 0x00, 0x07, 0x00, 0x07, 0x00, // nopr; nopr; nopr + }; + + memcpy(buf, insn, sizeof(insn)); + *(ub32 *)(buf + 2) = (sym.get_gotplt_addr(ctx) - sym.get_plt_addr(ctx)) >> 1; + *(ub32 *)(buf + 14) = sym.get_plt_idx(ctx) * sizeof(ElfRel); +} + +template <> +void write_pltgot_entry(Context &ctx, u8 *buf, Symbol &sym) { + static u8 insn[] = { + 0xc0, 0x10, 0, 0, 0, 0, // larl %r1, GOT_ENTRY_OFFSET + 0xe3, 0x10, 0x10, 0x00, 0x00, 0x04, // lg %r1, (%r1) + 0x07, 0xf1, // br %r1 + 0x07, 0x00, // nopr + }; + + memcpy(buf, insn, sizeof(insn)); + *(ub32 *)(buf + 2) = (sym.get_got_addr(ctx) - sym.get_plt_addr(ctx)) >> 1; +} + +template <> +void EhFrameSection::apply_reloc(Context &ctx, const ElfRel &rel, + u64 offset, u64 val) { + u8 *loc = ctx.buf + this->shdr.sh_offset + offset; + + switch (rel.r_type) { + case R_NONE: + break; + case R_390_PC32: + *(ub32 *)loc = val - this->shdr.sh_addr - offset; + break; + case R_390_64: + *(ub64 *)loc = val; + break; + default: + Fatal(ctx) << "unsupported relocation in .eh_frame: " << rel; + } +} + +template <> +void InputSection::apply_reloc_alloc(Context &ctx, u8 *base) { + std::span> rels = get_rels(ctx); + + ElfRel *dynrel = nullptr; + if (ctx.reldyn) + dynrel = (ElfRel *)(ctx.buf + ctx.reldyn->shdr.sh_offset + + file.reldyn_offset + this->reldyn_offset); + + for (i64 i = 0; i < rels.size(); i++) { + const ElfRel &rel = rels[i]; + if (rel.r_type == R_NONE) + continue; + + Symbol &sym = *file.symbols[rel.r_sym]; + u8 *loc = base + rel.r_offset; + + auto check = [&](i64 val, i64 lo, i64 hi) { + if (val < lo || hi <= val) + Error(ctx) << *this << ": relocation " << rel << " against " + << sym << " out of range: " << val << " is not in [" + << lo << ", " << hi << ")"; + }; + + auto check_dbl = [&](i64 val, i64 lo, i64 hi) { + check(val, lo, hi); + + // R_390_*DBL relocs should never refer a symbol at an odd address + if (val & 1) + Error(ctx) << *this << ": misaligned symbol " << sym + << " for relocation " << rel; + }; + + u64 S = sym.get_addr(ctx); + u64 A = rel.r_addend; + u64 P = get_addr() + rel.r_offset; + u64 G = sym.get_got_idx(ctx) * sizeof(Word); + u64 GOT = ctx.got->shdr.sh_addr; + + switch (rel.r_type) { + case R_390_64: + apply_dyn_absrel(ctx, sym, rel, loc, S, A, P, dynrel); + break; + case R_390_8: + check(S + A, 0, 1 << 8); + *loc = S + A; + break; + case R_390_12: + check(S + A, 0, 1 << 12); + *(ul16 *)loc |= bits(S + A, 11, 0); + break; + case R_390_16: + check(S + A, 0, 1 << 16); + *(ub16 *)loc = S + A; + break; + case R_390_20: + check(S + A, 0, 1 << 20); + write_mid20(loc, S + A); + break; + case R_390_32: + case R_390_PLT32: + check(S + A, 0, 1LL << 32); + *(ub32 *)loc = S + A; + break; + case R_390_PLT64: + *(ub64 *)loc = S + A; + break; + case R_390_PC12DBL: + case R_390_PLT12DBL: + check_dbl(S + A - P, -(1 << 12), 1 << 12); + *(ul16 *)loc |= bits(S + A - P, 12, 1); + break; + case R_390_PC16: + check(S + A - P, -(1 << 15), 1 << 15); + *(ub16 *)loc = S + A - P; + break; + case R_390_PC32: + check(S + A - P, -(1LL << 31), 1LL << 31); + *(ub32 *)loc = S + A - P; + break; + case R_390_PC64: + *(ub64 *)loc = S + A - P; + break; + case R_390_PC16DBL: + case R_390_PLT16DBL: + check_dbl(S + A - P, -(1 << 16), 1 << 16); + *(ub16 *)loc = (S + A - P) >> 1; + break; + case R_390_PC24DBL: + case R_390_PLT24DBL: + check_dbl(S + A - P, -(1 << 24), 1 << 24); + *(ub32 *)loc |= bits(S + A - P, 24, 1); + break; + case R_390_PC32DBL: + case R_390_PLT32DBL: + check_dbl(S + A - P, -(1LL << 32), 1LL << 32); + *(ub32 *)loc = (S + A - P) >> 1; + break; + case R_390_GOT12: + case R_390_GOTPLT12: + check(G + A, 0, 1 << 12); + *(ul16 *)loc |= bits(G + A, 11, 0); + break; + case R_390_GOT16: + case R_390_GOTPLT16: + check(G + A, 0, 1 << 16); + *(ub16 *)loc = G + A; + break; + case R_390_GOT20: + case R_390_GOTPLT20: + check(G + A, 0, 1 << 20); + write_mid20(loc, G + A); + break; + case R_390_GOT32: + case R_390_GOTPLT32: + check(G + A, 0, 1LL << 32); + *(ub32 *)loc = G + A; + break; + case R_390_GOT64: + case R_390_GOTPLT64: + *(ub64 *)loc = G + A; + break; + case R_390_GOTOFF16: + case R_390_PLTOFF16: + check(S + A - GOT, -(1 << 15), 1 << 15); + *(ub16 *)loc = S + A - GOT; + break; + case R_390_GOTOFF32: + case R_390_PLTOFF32: + check(S + A - GOT, -(1LL << 31), 1LL << 31); + *(ub32 *)loc = S + A - GOT; + break; + case R_390_GOTOFF64: + case R_390_PLTOFF64: + *(ub64 *)loc = S + A - GOT; + break; + case R_390_GOTPC: + *(ub64 *)loc = GOT + A - P; + break; + case R_390_GOTPCDBL: + check_dbl(GOT + A - P, -(1LL << 32), 1LL << 32); + *(ub32 *)loc = (GOT + A - P) >> 1; + break; + case R_390_GOTENT: + check(GOT + G + A - P, -(1LL << 32), 1LL << 32); + *(ub32 *)loc = (GOT + G + A - P) >> 1; + break; + case R_390_TLS_LE32: + *(ub32 *)loc = S + A - ctx.tp_addr; + break; + case R_390_TLS_LE64: + *(ub64 *)loc = S + A - ctx.tp_addr; + break; + case R_390_TLS_GOTIE20: + write_mid20(loc, sym.get_gottp_addr(ctx) + A - GOT); + break; + case R_390_TLS_IEENT: + *(ub32 *)loc = (sym.get_gottp_addr(ctx) + A - P) >> 1; + break; + case R_390_TLS_GD32: + if (sym.has_tlsgd(ctx)) + *(ub32 *)loc = sym.get_tlsgd_addr(ctx) + A - GOT; + else if (sym.has_gottp(ctx)) + *(ub32 *)loc = sym.get_gottp_addr(ctx) + A - GOT; + else + *(ub32 *)loc = S + A - ctx.tp_addr; + break; + case R_390_TLS_GD64: + if (sym.has_tlsgd(ctx)) + *(ub64 *)loc = sym.get_tlsgd_addr(ctx) + A - GOT; + else if (sym.has_gottp(ctx)) + *(ub64 *)loc = sym.get_gottp_addr(ctx) + A - GOT; + else + *(ub64 *)loc = S + A - ctx.tp_addr; + break; + case R_390_TLS_GDCALL: + if (sym.has_tlsgd(ctx)) { + // do nothing + } else if (sym.has_gottp(ctx)) { + // lg %r2, 0(%r2, %r12) + static u8 insn[] = { 0xe3, 0x22, 0xc0, 0x00, 0x00, 0x04 }; + memcpy(loc, insn, sizeof(insn)); + } else { + // nop + static u8 insn[] = { 0xc0, 0x04, 0x00, 0x00, 0x00, 0x00 }; + memcpy(loc, insn, sizeof(insn)); + } + break; + case R_390_TLS_LDM32: + if (ctx.got->has_tlsld(ctx)) + *(ub32 *)loc = ctx.got->get_tlsld_addr(ctx) + A - GOT; + break; + case R_390_TLS_LDM64: + if (ctx.got->has_tlsld(ctx)) + *(ub64 *)loc = ctx.got->get_tlsld_addr(ctx) + A - GOT; + break; + case R_390_TLS_LDO32: + if (ctx.got->has_tlsld(ctx)) + *(ub32 *)loc = S + A - ctx.dtp_addr; + else + *(ub32 *)loc = S + A - ctx.tp_addr; + break; + case R_390_TLS_LDO64: + if (ctx.got->has_tlsld(ctx)) + *(ub64 *)loc = S + A - ctx.dtp_addr; + else + *(ub64 *)loc = S + A - ctx.tp_addr; + break; + case R_390_TLS_LDCALL: + if (!ctx.got->has_tlsld(ctx)) { + // nop + static u8 insn[] = { 0xc0, 0x04, 0x00, 0x00, 0x00, 0x00 }; + memcpy(loc, insn, sizeof(insn)); + } + break; + default: + unreachable(); + } + } +} + +template <> +void InputSection::apply_reloc_nonalloc(Context &ctx, u8 *base) { + std::span> rels = get_rels(ctx); + + for (i64 i = 0; i < rels.size(); i++) { + const ElfRel &rel = rels[i]; + if (rel.r_type == R_NONE || record_undef_error(ctx, rel)) + continue; + + Symbol &sym = *file.symbols[rel.r_sym]; + u8 *loc = base + rel.r_offset; + + auto check = [&](i64 val, i64 lo, i64 hi) { + if (val < lo || hi <= val) + Error(ctx) << *this << ": relocation " << rel << " against " + << sym << " out of range: " << val << " is not in [" + << lo << ", " << hi << ")"; + }; + + SectionFragment *frag; + i64 frag_addend; + std::tie(frag, frag_addend) = get_fragment(ctx, rel); + + u64 S = frag ? frag->get_addr(ctx) : sym.get_addr(ctx); + u64 A = frag ? frag_addend : (i64)rel.r_addend; + + switch (rel.r_type) { + case R_390_32: { + i64 val = S + A; + check(val, 0, 1LL << 32); + *(ub32 *)loc = val; + break; + } + case R_390_64: + if (std::optional val = get_tombstone(sym, frag)) + *(ub64 *)loc = *val; + else + *(ub64 *)loc = S + A; + break; + case R_390_TLS_LDO64: + if (std::optional val = get_tombstone(sym, frag)) + *(ub64 *)loc = *val; + else + *(ub64 *)loc = S + A - ctx.dtp_addr; + break; + default: + Fatal(ctx) << *this << ": apply_reloc_nonalloc: " << rel; + } + } +} + +template <> +void InputSection::scan_relocations(Context &ctx) { + assert(shdr().sh_flags & SHF_ALLOC); + + this->reldyn_offset = file.num_dynrel * sizeof(ElfRel); + std::span> rels = get_rels(ctx); + + // Scan relocations + for (i64 i = 0; i < rels.size(); i++) { + const ElfRel &rel = rels[i]; + if (rel.r_type == R_NONE || record_undef_error(ctx, rel)) + continue; + + Symbol &sym = *file.symbols[rel.r_sym]; + + if (sym.is_ifunc()) + sym.flags |= NEEDS_GOT | NEEDS_PLT; + + switch (rel.r_type) { + case R_390_64: + scan_dyn_absrel(ctx, sym, rel); + break; + case R_390_8: + case R_390_12: + case R_390_16: + case R_390_20: + case R_390_32: + scan_absrel(ctx, sym, rel); + break; + case R_390_PC16: + case R_390_PC16DBL: + case R_390_PC32: + case R_390_PC32DBL: + case R_390_PC64: + scan_pcrel(ctx, sym, rel); + break; + case R_390_GOT12: + case R_390_GOT16: + case R_390_GOT20: + case R_390_GOT32: + case R_390_GOT64: + case R_390_GOTOFF16: + case R_390_GOTOFF32: + case R_390_GOTOFF64: + case R_390_GOTPLT12: + case R_390_GOTPLT16: + case R_390_GOTPLT20: + case R_390_GOTPLT32: + case R_390_GOTPLT64: + case R_390_GOTPC: + case R_390_GOTPCDBL: + case R_390_GOTENT: + sym.flags |= NEEDS_GOT; + break; + case R_390_PLT12DBL: + case R_390_PLT16DBL: + case R_390_PLT24DBL: + case R_390_PLT32: + case R_390_PLT32DBL: + case R_390_PLT64: + case R_390_PLTOFF16: + case R_390_PLTOFF32: + case R_390_PLTOFF64: + if (sym.is_imported) + sym.flags |= NEEDS_PLT; + break; + case R_390_TLS_GOTIE20: + case R_390_TLS_IEENT: + sym.flags |= NEEDS_GOTTP; + break; + case R_390_TLS_GD32: + case R_390_TLS_GD64: + // We always want to relax calls to __tls_get_offset() in statically- + // linked executables because __tls_get_offset() in libc.a just calls + // abort(). + if (ctx.arg.is_static || + (ctx.arg.relax && !sym.is_imported && !ctx.arg.shared)) { + // do nothing + } else if (ctx.arg.relax && !sym.is_imported && ctx.arg.shared && + !ctx.arg.z_dlopen) { + sym.flags |= NEEDS_GOTTP; + } else { + sym.flags |= NEEDS_TLSGD; + } + break; + case R_390_TLS_LDM32: + case R_390_TLS_LDM64: { + bool do_relax = ctx.arg.is_static || (ctx.arg.relax && !ctx.arg.shared); + if (!do_relax) + ctx.needs_tlsld = true; + break; + } + case R_390_TLS_LE32: + case R_390_TLS_LE64: + check_tlsle(ctx, sym, rel); + break; + case R_390_TLS_LDO32: + case R_390_TLS_LDO64: + case R_390_TLS_GDCALL: + case R_390_TLS_LDCALL: + break; + default: + Fatal(ctx) << *this << ": scan_relocations: " << rel; + } + } +} + +} // namespace mold::elf diff --git a/third_party/mold/elf/arch-sh4.cc b/third_party/mold/elf/arch-sh4.cc new file mode 100644 index 00000000000..dcce34df95b --- /dev/null +++ b/third_party/mold/elf/arch-sh4.cc @@ -0,0 +1,355 @@ +// clang-format off +// SH-4 (SuperH 4) is a 32-bit RISC ISA developed by Hitachi in the early +// '90s. Some relatively powerful systems were developed with SH-4. +// A notable example is Sega's Dreamcast game console which debuted in 1998. +// Hitachi later spun off its semiconductor division as an independent +// company, Renesas, and Renesas is still selling SH-4 processors for the +// embedded market. It has never been as popular as ARM is, and its +// popularity continues to decline though. +// +// SH-4's most distinctive feature compared to other RISC ISAs is that its +// instructions are 16 bits in length instead of more common 32 bits for +// better code density. This difference affects various aspects of its +// instruction set as shown below: +// +// - SH-4 has 16 general-purpose registers (GPRs) instead of the most +// commmon 32 GPR configuration to save one bit to specify a register. +// +// - Binary instructions such as ADD normally take three register in +// RISC ISAs (e.g. x ← y ⊕ z where x, y and z are registers), but +// SH-4's instructions take only two registers. The result of an +// operation is written to one of the source registers (e.g. x ← x ⊕ y). +// +// - Usual RISC ISAs have "load high" and "load low" instructions to set +// an immediate to most significant and least significant bits in a +// register to construct a full 32-bit value in a register. This +// technique is hard to use in SH-4, as 16 bit instructions are too +// small to contain large immediates. On SH-4, large immediates are +// loaded from memory using `mov.l` PC-relative load instruction. +// +// - Many RISC ISAs are, despite their name, actually fairly complex. +// They tend to have hundreds if not thousands of different instructions. +// SH-4 doesn't really have that many instructions because its 16-bit +// machine code simply can't encode many different opcodes. As a +// result, the number of relocations the linker has to support is also +// small. +// +// Beside these, SH-4 has a delay branch slot just like contemporary MIPS +// and SPARC. That is, one instruction after a branch instruction will +// always be executed even if the branch is taken. Delay branch slot allows +// a pipelined CPU to start and finish executing an instruction after a +// branch regardless of the branch's condition, simplifying the processor's +// implementation. It's considered a bad premature optimization nowadays, +// though. Modern RISC processors don't have it. +// +// Here are notes about the SH-4 psABI: +// +// - If a source file is compiled with -fPIC, each function starts +// with a piece of code to store the address of .got to %r12. +// We can use the register in our PLT for position-independent output. +// +// - Even though it uses the RELA-type relocations, relocation addends +// are stored not to the r_addend field but to the relocated section +// contents for some reason. Therefore, it's effectively REL. +// +// - It looks like the ecosystem has bit-rotted. Some tests, especially +// one using C++ exceptions, don't pass even with GNU ld. +// +// - GCC/SH4 tends to write dynamically-relocated data into .text, so the +// output from the linker contains lots of text relocations. That's not +// a problem with embedded programming, I guess. + +#include "third_party/mold/elf/mold.h" + +namespace mold::elf { + +using E = SH4; + +// Even though SH-4 uses RELA-type relocations, addends are stored to +// relocated places for some reason. +template <> +i64 get_addend(u8 *loc, const ElfRel &rel) { + switch (rel.r_type) { + case R_SH_DIR32: + case R_SH_REL32: + case R_SH_TLS_GD_32: + case R_SH_TLS_LD_32: + case R_SH_TLS_LDO_32: + case R_SH_TLS_IE_32: + case R_SH_TLS_LE_32: + case R_SH_TLS_DTPMOD32: + case R_SH_TLS_DTPOFF32: + case R_SH_TLS_TPOFF32: + case R_SH_GOT32: + case R_SH_PLT32: + case R_SH_GOTOFF: + case R_SH_GOTPC: + case R_SH_GOTPLT32: + return *(ul32 *)loc; + default: + return 0; + } +} + +template <> +void write_plt_header(Context &ctx, u8 *buf) { + if (ctx.arg.pic) { + static const u8 insn[] = { + 0x02, 0xd2, // mov.l 1f, r2 + 0xcc, 0x32, // add r12, r2 + 0x22, 0x50, // mov.l @(8, r2), r0 + 0x21, 0x52, // mov.l @(4, r2), r2 + 0x2b, 0x40, // jmp @r0 + 0x00, 0xe0, // mov #0, r0 + 0, 0, 0, 0, // 1: .long GOTPLT + }; + + static_assert(sizeof(insn) == E::plt_hdr_size); + memcpy(buf, insn, sizeof(insn)); + *(ul32 *)(buf + 12) = ctx.gotplt->shdr.sh_addr - ctx.got->shdr.sh_addr; + } else { + static const u8 insn[] = { + 0x02, 0xd2, // mov.l 1f, r2 + 0x22, 0x50, // mov.l @(8, r2), r0 + 0x21, 0x52, // mov.l @(4, r2), r2 + 0x2b, 0x40, // jmp @r0 + 0x00, 0xe0, // mov #0, r0 + 0x09, 0x00, // nop + 0, 0, 0, 0, // 1: .long GOTPLT + }; + + static_assert(sizeof(insn) == E::plt_hdr_size); + memcpy(buf, insn, sizeof(insn)); + *(ul32 *)(buf + 12) = ctx.gotplt->shdr.sh_addr; + } +} + +template <> +void write_plt_entry(Context &ctx, u8 *buf, Symbol &sym) { + if (ctx.arg.pic) { + static const u8 insn[] = { + 0x01, 0xd0, // mov.l 1f, r0 + 0xce, 0x00, // mov.l @(r0, r12), r0 + 0x2b, 0x40, // jmp @r0 + 0x01, 0xd1, // mov.l 2f, r1 + 0, 0, 0, 0, // 1: .long GOTPLT_ENTRY + 0, 0, 0, 0, // 2: .long INDEX_IN_RELPLT + }; + + static_assert(sizeof(insn) == E::plt_size); + memcpy(buf, insn, sizeof(insn)); + *(ul32 *)(buf + 8) = sym.get_gotplt_addr(ctx) - ctx.got->shdr.sh_addr; + *(ul32 *)(buf + 12) = sym.get_plt_idx(ctx) * sizeof(ElfRel); + } else { + static const u8 insn[] = { + 0x01, 0xd0, // mov.l 1f, r0 + 0x02, 0x60, // mov.l @r0, r0 + 0x2b, 0x40, // jmp @r0 + 0x01, 0xd1, // mov.l 2f, r1 + 0, 0, 0, 0, // 1: .long GOTPLT_ENTRY + 0, 0, 0, 0, // 2: .long INDEX_IN_RELPLT + }; + + static_assert(sizeof(insn) == E::plt_size); + memcpy(buf, insn, sizeof(insn)); + *(ul32 *)(buf + 8) = sym.get_gotplt_addr(ctx); + *(ul32 *)(buf + 12) = sym.get_plt_idx(ctx) * sizeof(ElfRel); + } +} + +template <> +void write_pltgot_entry(Context &ctx, u8 *buf, Symbol &sym) { + if (ctx.arg.pic) { + static const u8 insn[] = { + 0x01, 0xd0, // mov.l 1f, r0 + 0xce, 0x00, // mov.l @(r0, r12), r0 + 0x2b, 0x40, // jmp @r0 + 0x09, 0x00, // nop + 0, 0, 0, 0, // 1: .long GOT_ENTRY + }; + + static_assert(sizeof(insn) == E::pltgot_size); + memcpy(buf, insn, sizeof(insn)); + *(ul32 *)(buf + 8) = sym.get_got_addr(ctx) - ctx.got->shdr.sh_addr; + } else { + static const u8 insn[] = { + 0x01, 0xd0, // mov.l 1f, r0 + 0x02, 0x60, // mov.l @r0, r0 + 0x2b, 0x40, // jmp @r0 + 0x09, 0x00, // nop + 0, 0, 0, 0, // 1: .long GOT_ENTRY + }; + + static_assert(sizeof(insn) == E::pltgot_size); + memcpy(buf, insn, sizeof(insn)); + *(ul32 *)(buf + 8) = sym.get_got_addr(ctx); + } +} + +template <> +void EhFrameSection::apply_reloc(Context &ctx, const ElfRel &rel, + u64 offset, u64 val) { + u8 *loc = ctx.buf + this->shdr.sh_offset + offset; + + switch (rel.r_type) { + case R_NONE: + break; + case R_SH_DIR32: + *(ul32 *)loc = val; + break; + case R_SH_REL32: + *(ul32 *)loc = val - this->shdr.sh_addr - offset; + break; + default: + Fatal(ctx) << "unsupported relocation in .eh_frame: " << rel; + } +} + +template <> +void InputSection::apply_reloc_alloc(Context &ctx, u8 *base) { + std::span> rels = get_rels(ctx); + + ElfRel *dynrel = nullptr; + if (ctx.reldyn) + dynrel = (ElfRel *)(ctx.buf + ctx.reldyn->shdr.sh_offset + + file.reldyn_offset + this->reldyn_offset); + + for (i64 i = 0; i < rels.size(); i++) { + const ElfRel &rel = rels[i]; + if (rel.r_type == R_NONE) + continue; + + Symbol &sym = *file.symbols[rel.r_sym]; + u8 *loc = base + rel.r_offset; + + u64 S = sym.get_addr(ctx); + u64 A = get_addend(loc, rel); + u64 P = get_addr() + rel.r_offset; + u64 G = sym.get_got_idx(ctx) * sizeof(Word); + u64 GOT = ctx.got->shdr.sh_addr; + + switch (rel.r_type) { + case R_SH_DIR32: + apply_dyn_absrel(ctx, sym, rel, loc, S, A, P, dynrel); + break; + case R_SH_REL32: + case R_SH_PLT32: + *(ul32 *)loc = S + A - P; + break; + case R_SH_GOT32: + *(ul32 *)loc = G; + break; + case R_SH_GOTPC: + *(ul32 *)loc = GOT + A - P; + break; + case R_SH_GOTOFF: + *(ul32 *)loc = S + A - GOT; + break; + case R_SH_TLS_GD_32: + *(ul32 *)loc = sym.get_tlsgd_addr(ctx) + A - GOT; + break; + case R_SH_TLS_LD_32: + *(ul32 *)loc = ctx.got->get_tlsld_addr(ctx) + A - GOT; + break; + case R_SH_TLS_LDO_32: + *(ul32 *)loc = S + A - ctx.dtp_addr; + break; + case R_SH_TLS_IE_32: + *(ul32 *)loc = sym.get_gottp_addr(ctx) + A - GOT; + break; + case R_SH_TLS_LE_32: + *(ul32 *)loc = S + A - ctx.tp_addr; + break; + default: + unreachable(); + } + } +} + +template <> +void InputSection::apply_reloc_nonalloc(Context &ctx, u8 *base) { + std::span> rels = get_rels(ctx); + + for (i64 i = 0; i < rels.size(); i++) { + const ElfRel &rel = rels[i]; + if (rel.r_type == R_NONE || record_undef_error(ctx, rel)) + continue; + + Symbol &sym = *file.symbols[rel.r_sym]; + u8 *loc = base + rel.r_offset; + + SectionFragment *frag; + i64 frag_addend; + std::tie(frag, frag_addend) = get_fragment(ctx, rel); + + u64 S = frag ? frag->get_addr(ctx) : sym.get_addr(ctx); + u64 A = frag ? frag_addend : get_addend(loc, rel); + + switch (rel.r_type) { + case R_SH_DIR32: + if (std::optional val = get_tombstone(sym, frag)) + *(ul32 *)loc = *val; + else + *(ul32 *)loc = S + A; + break; + default: + Fatal(ctx) << *this << ": invalid relocation for non-allocated sections: " + << rel; + } + } +} + +template <> +void InputSection::scan_relocations(Context &ctx) { + assert(shdr().sh_flags & SHF_ALLOC); + + this->reldyn_offset = file.num_dynrel * sizeof(ElfRel); + std::span> rels = get_rels(ctx); + + for (i64 i = 0; i < rels.size(); i++) { + const ElfRel &rel = rels[i]; + if (rel.r_type == R_NONE || record_undef_error(ctx, rel)) + continue; + + Symbol &sym = *file.symbols[rel.r_sym]; + + if (sym.is_ifunc()) + Error(ctx) << sym << ": GNU ifunc symbol is not supported on sh4"; + + switch (rel.r_type) { + case R_SH_DIR32: + scan_dyn_absrel(ctx, sym, rel); + break; + case R_SH_REL32: + scan_pcrel(ctx, sym, rel); + break; + case R_SH_GOT32: + sym.flags |= NEEDS_GOT; + break; + case R_SH_PLT32: + if (sym.is_imported) + sym.flags |= NEEDS_PLT; + break; + case R_SH_TLS_GD_32: + sym.flags |= NEEDS_TLSGD; + break; + case R_SH_TLS_LD_32: + ctx.needs_tlsld = true; + break; + case R_SH_TLS_IE_32: + sym.flags |= NEEDS_GOTTP; + break; + case R_SH_TLS_LE_32: + check_tlsle(ctx, sym, rel); + break; + case R_SH_GOTPC: + case R_SH_GOTOFF: + case R_SH_TLS_LDO_32: + break; + default: + Fatal(ctx) << *this << ": unknown relocation: " << rel; + } + } +} + +} // namespace mold::elf diff --git a/third_party/mold/elf/arch-sparc64.cc b/third_party/mold/elf/arch-sparc64.cc new file mode 100644 index 00000000000..35ac760b481 --- /dev/null +++ b/third_party/mold/elf/arch-sparc64.cc @@ -0,0 +1,622 @@ +// clang-format off +// SPARC is a RISC ISA developed by Sun Microsystems. +// +// The byte order of the processor is big-endian. Anything larger than a +// byte is stored in the "reverse" order compared to little-endian +// processors such as x86-64. +// +// All instructions are 4 bytes long and aligned to 4 bytes boundaries. +// +// A notable feature of SPARC is that, unlike other RISC ISAs, it doesn't +// need range extension thunks. It is because the SPARC's CALL instruction +// contains a whopping 30 bits immediate. The processor scales it by 4 to +// extend it to 32 bits (this is doable because all instructions are +// aligned to 4 bytes boundaries, so the least significant two bits are +// always zero). That means CALL's reach is PC ± 2 GiB, elinating the +// need of range extension thunks. It comes with the cost that the CALL +// instruction alone takes 1/4th of the instruction encoding space, +// though. +// +// SPARC has 32 general purpose registers. CALL instruction saves a return +// address to %o7, which is an alias for %r15. Thread pointer is stored to +// %g7 which is %r7. +// +// SPARC does not have PC-relative load/store instructions. To access data +// in the position-independent manner, we usually first set the address of +// .got to, for example, %l7, with the following piece of code +// +// sethi %hi(. - _GLOBAL_OFFSET_TABLE_), %l7 +// add %l7, %lo(. - _GLOBAL_OFFSET_TABLE_), %l7 +// call __sparc_get_pc_thunk.l7 +// nop +// +// where __sparc_get_pc_thunk.l7 is defined as +// +// retl +// add %o7, %l7, %l7 +// +// . SETHI and the following ADD materialize a 32 bits offset to .got. +// CALL instruction sets a return address to $o7, and the subsequent ADD +// adds it to the GOT offset to materialize the absolute address of .got. +// +// Note that we have a NOP after CALL and an ADD after RETL because of +// SPARC's delay branch slots. That is, the SPARC processor always +// executes one instruction after a branch even if the branch is taken. +// This may seem like an odd behavior, and indeed it is considered as such +// (that's a premature optimization for the early pipelined SPARC +// processors), but that's been a part of the ISA's spec so that's what it +// is. +// +// Note also that the .got address obtained this way is not shared between +// functions, so functions can use an arbitrary register to hold the .got +// address. That also means each function needs to execute the above piece +// of code to become position-independent. +// +// This scheme is very similar to i386. That may not be a coincidence +// because the i386 ELF psABI is created by Sun Microsystems too. +// +// https://github.com/rui314/psabi/blob/main/sparc.pdf + +#include "third_party/mold/elf/mold.h" + +namespace mold::elf { + +using E = SPARC64; + +// SPARC's PLT section is writable despite containing executable code. +// We don't need to write the PLT header entry because the dynamic loader +// will do that for us. +// +// We also don't need a .got.plt section to store the result of lazy PLT +// symbol resolution because the dynamic symbol resolver directly mutates +// instructions in PLT so that they jump to the right places next time. +// That's why each PLT entry contains lots of NOPs; they are a placeholder +// for the runtime to add more instructions. +// +// Self-modifying code is nowadays considered really bad from the security +// point of view, though. +template <> +void write_plt_header(Context &ctx, u8 *buf) { + memset(buf, 0, E::plt_hdr_size); +} + +template <> +void write_plt_entry(Context &ctx, u8 *buf, Symbol &sym) { + static ub32 insn[] = { + 0x0300'0000, // sethi (. - .PLT0), %g1 + 0x3068'0000, // ba,a %xcc, .PLT1 + 0x0100'0000, // nop + 0x0100'0000, // nop + 0x0100'0000, // nop + 0x0100'0000, // nop + 0x0100'0000, // nop + 0x0100'0000, // nop + }; + + u64 plt0 = ctx.plt->shdr.sh_addr; + u64 plt1 = ctx.plt->shdr.sh_addr + E::plt_size; + u64 entry = sym.get_plt_addr(ctx); + + memcpy(buf, insn, sizeof(insn)); + *(ub32 *)buf |= bits(entry - plt0, 21, 0); + *(ub32 *)(buf + 4) |= bits(plt1 - entry - 4, 20, 2); +} + +template <> +void write_pltgot_entry(Context &ctx, u8 *buf, Symbol &sym) { + static ub32 entry[] = { + 0x8a10'000f, // mov %o7, %g5 + 0x4000'0002, // call . + 8 + 0xc25b'e014, // ldx [ %o7 + 20 ], %g1 + 0xc25b'c001, // ldx [ %o7 + %g1 ], %g1 + 0x81c0'4000, // jmp %g1 + 0x9e10'0005, // mov %g5, %o7 + 0x0000'0000, // .quad $plt_entry - $got_entry + 0x0000'0000, + }; + + memcpy(buf, entry, sizeof(entry)); + *(ub64 *)(buf + 24) = sym.get_got_addr(ctx) - sym.get_plt_addr(ctx) - 4; +} + +template <> +void EhFrameSection::apply_reloc(Context &ctx, const ElfRel &rel, + u64 offset, u64 val) { + u8 *loc = ctx.buf + this->shdr.sh_offset + offset; + + switch (rel.r_type) { + case R_NONE: + break; + case R_SPARC_64: + case R_SPARC_UA64: + *(ub64 *)loc = val; + break; + case R_SPARC_DISP32: + *(ub32 *)loc = val - this->shdr.sh_addr - offset; + break; + default: + Fatal(ctx) << "unsupported relocation in .eh_frame: " << rel; + } +} + +template <> +void InputSection::apply_reloc_alloc(Context &ctx, u8 *base) { + std::span> rels = get_rels(ctx); + + ElfRel *dynrel = nullptr; + if (ctx.reldyn) + dynrel = (ElfRel *)(ctx.buf + ctx.reldyn->shdr.sh_offset + + file.reldyn_offset + this->reldyn_offset); + + for (i64 i = 0; i < rels.size(); i++) { + const ElfRel &rel = rels[i]; + if (rel.r_type == R_NONE) + continue; + + Symbol &sym = *file.symbols[rel.r_sym]; + u8 *loc = base + rel.r_offset; + + auto check = [&](i64 val, i64 lo, i64 hi) { + if (val < lo || hi <= val) + Error(ctx) << *this << ": relocation " << rel << " against " + << sym << " out of range: " << val << " is not in [" + << lo << ", " << hi << ")"; + }; + + u64 S = sym.get_addr(ctx); + u64 A = rel.r_addend; + u64 P = (get_addr() + rel.r_offset); + u64 G = (sym.get_got_idx(ctx) * sizeof(Word)); + u64 GOT = ctx.got->shdr.sh_addr; + + switch (rel.r_type) { + case R_SPARC_64: + apply_dyn_absrel(ctx, sym, rel, loc, S, A, P, dynrel); + break; + case R_SPARC_5: + check(S + A, 0, 1 << 5); + *(ub32 *)loc |= bits(S + A, 4, 0); + break; + case R_SPARC_6: + check(S + A, 0, 1 << 6); + *(ub32 *)loc |= bits(S + A, 5, 0); + break; + case R_SPARC_7: + check(S + A, 0, 1 << 7); + *(ub32 *)loc |= bits(S + A, 6, 0); + break; + case R_SPARC_8: + check(S + A, 0, 1 << 8); + *(u8 *)loc = S + A; + break; + case R_SPARC_10: + check(S + A, 0, 1 << 10); + *(ub32 *)loc |= bits(S + A, 9, 0); + break; + case R_SPARC_LO10: + case R_SPARC_LOPLT10: + *(ub32 *)loc |= bits(S + A, 9, 0); + break; + case R_SPARC_11: + check(S + A, 0, 1 << 11); + *(ub32 *)loc |= bits(S + A, 10, 0); + break; + case R_SPARC_13: + check(S + A, 0, 1 << 13); + *(ub32 *)loc |= bits(S + A, 12, 0); + break; + case R_SPARC_16: + case R_SPARC_UA16: + check(S + A, 0, 1 << 16); + *(ub16 *)loc = S + A; + break; + case R_SPARC_22: + check(S + A, 0, 1 << 22); + *(ub32 *)loc |= bits(S + A, 21, 0); + break; + case R_SPARC_32: + case R_SPARC_UA32: + case R_SPARC_PLT32: + check(S + A, 0, 1LL << 32); + *(ub32 *)loc = S + A; + break; + case R_SPARC_PLT64: + case R_SPARC_UA64: + case R_SPARC_REGISTER: + *(ub64 *)loc = S + A; + break; + case R_SPARC_DISP8: + check(S + A - P, -(1 << 7), 1 << 7); + *(u8 *)loc = S + A - P; + break; + case R_SPARC_DISP16: + check(S + A - P, -(1 << 15), 1 << 15); + *(ub16 *)loc = S + A - P; + break; + case R_SPARC_DISP32: + case R_SPARC_PCPLT32: + check(S + A - P, -(1LL << 31), 1LL << 31); + *(ub32 *)loc = S + A - P; + break; + case R_SPARC_DISP64: + *(ub64 *)loc = S + A - P; + break; + case R_SPARC_WDISP16: { + i64 val = S + A - P; + check(val, -(1 << 16), 1 << 16); + *(ub16 *)loc |= (bit(val, 16) << 21) | bits(val, 15, 2); + break; + } + case R_SPARC_WDISP19: + check(S + A - P, -(1 << 20), 1 << 20); + *(ub32 *)loc |= bits(S + A - P, 20, 2); + break; + case R_SPARC_WDISP22: + check(S + A - P, -(1 << 23), 1 << 23); + *(ub32 *)loc |= bits(S + A - P, 23, 2); + break; + case R_SPARC_WDISP30: + case R_SPARC_WPLT30: + check(S + A - P, -(1LL << 31), 1LL << 31); + *(ub32 *)loc |= bits(S + A - P, 31, 2); + break; + case R_SPARC_HI22: + case R_SPARC_HIPLT22: + case R_SPARC_LM22: + *(ub32 *)loc |= bits(S + A, 31, 10); + break; + case R_SPARC_GOT10: + *(ub32 *)loc |= bits(G, 9, 0); + break; + case R_SPARC_GOT13: + check(G, 0, 1 << 12); + *(ub32 *)loc |= bits(G, 12, 0); + break; + case R_SPARC_GOT22: + *(ub32 *)loc |= bits(G, 31, 10); + break; + case R_SPARC_GOTDATA_HIX22: { + i64 val = S + A - GOT; + *(ub32 *)loc |= bits(val < 0 ? ~val : val, 31, 10); + break; + } + case R_SPARC_GOTDATA_LOX10: { + i64 val = S + A - GOT; + *(ub32 *)loc |= bits(val, 9, 0) | (val < 0 ? 0b1'1100'0000'0000 : 0); + break; + } + case R_SPARC_GOTDATA_OP_HIX22: + // We always have to relax a GOT load to a load immediate if a + // symbol is local, because R_SPARC_GOTDATA_OP cannot represent + // an addend for a local symbol. + if (sym.is_imported || sym.is_ifunc()) { + *(ub32 *)loc |= bits(G, 31, 10); + } else if (sym.is_absolute()) { + i64 val = S + A; + *(ub32 *)loc |= bits(val < 0 ? ~val : val, 31, 10); + } else { + i64 val = S + A - GOT; + *(ub32 *)loc |= bits(val < 0 ? ~val : val, 31, 10); + } + break; + case R_SPARC_GOTDATA_OP_LOX10: { + if (sym.is_imported || sym.is_ifunc()) { + *(ub32 *)loc |= bits(G, 9, 0); + } else if (sym.is_absolute()) { + i64 val = S + A; + *(ub32 *)loc |= bits(val, 9, 0) | (val < 0 ? 0b1'1100'0000'0000 : 0); + } else { + i64 val = S + A - GOT; + *(ub32 *)loc |= bits(val, 9, 0) | (val < 0 ? 0b1'1100'0000'0000 : 0); + } + break; + } + case R_SPARC_GOTDATA_OP: + if (sym.is_imported || sym.is_ifunc()) + break; + + if (sym.is_absolute()) { + // ldx [ %g2 + %g1 ], %g1 → nop + *(ub32 *)loc = 0x0100'0000; + } else { + // ldx [ %g2 + %g1 ], %g1 → add %g2, %g1, %g1 + *(ub32 *)loc &= 0b00'11111'000000'11111'1'11111111'11111; + *(ub32 *)loc |= 0b10'00000'000000'00000'0'00000000'00000; + } + break; + case R_SPARC_PC10: + case R_SPARC_PCPLT10: + *(ub32 *)loc |= bits(S + A - P, 9, 0); + break; + case R_SPARC_PC22: + case R_SPARC_PCPLT22: + case R_SPARC_PC_LM22: + *(ub32 *)loc |= bits(S + A - P, 31, 10); + break; + case R_SPARC_OLO10: + *(ub32 *)loc |= bits(bits(S + A, 9, 0) + rel.r_type_data, 12, 0); + break; + case R_SPARC_HH22: + *(ub32 *)loc |= bits(S + A, 63, 42); + break; + case R_SPARC_HM10: + *(ub32 *)loc |= bits(S + A, 41, 32); + break; + case R_SPARC_PC_HH22: + *(ub32 *)loc |= bits(S + A - P, 63, 42); + break; + case R_SPARC_PC_HM10: + *(ub32 *)loc |= bits(S + A - P, 41, 32); + break; + case R_SPARC_HIX22: + *(ub32 *)loc |= bits(~(S + A), 31, 10); + break; + case R_SPARC_LOX10: + *(ub32 *)loc |= bits(S + A, 9, 0) | 0b1'1100'0000'0000; + break; + case R_SPARC_H44: + *(ub32 *)loc |= bits(S + A, 43, 22); + break; + case R_SPARC_M44: + *(ub32 *)loc |= bits(S + A, 21, 12); + break; + case R_SPARC_L44: + *(ub32 *)loc |= bits(S + A, 11, 0); + break; + case R_SPARC_TLS_GD_HI22: + *(ub32 *)loc |= bits(sym.get_tlsgd_addr(ctx) + A - GOT, 31, 10); + break; + case R_SPARC_TLS_GD_LO10: + *(ub32 *)loc |= bits(sym.get_tlsgd_addr(ctx) + A - GOT, 9, 0); + break; + case R_SPARC_TLS_GD_CALL: + case R_SPARC_TLS_LDM_CALL: { + u64 addr; + if (ctx.arg.is_static) + addr = ctx.extra.tls_get_addr_sec->shdr.sh_addr; + else + addr = ctx.extra.tls_get_addr_sym->get_addr(ctx); + + *(ub32 *)loc |= bits(addr + A - P, 31, 2); + break; + } + case R_SPARC_TLS_LDM_HI22: + *(ub32 *)loc |= bits(ctx.got->get_tlsld_addr(ctx) + A - GOT, 31, 10); + break; + case R_SPARC_TLS_LDM_LO10: + *(ub32 *)loc |= bits(ctx.got->get_tlsld_addr(ctx) + A - GOT, 9, 0); + break; + case R_SPARC_TLS_LDO_HIX22: + *(ub32 *)loc |= bits(S + A - ctx.dtp_addr, 31, 10); + break; + case R_SPARC_TLS_LDO_LOX10: + *(ub32 *)loc |= bits(S + A - ctx.dtp_addr, 9, 0); + break; + case R_SPARC_TLS_IE_HI22: + *(ub32 *)loc |= bits(sym.get_gottp_addr(ctx) + A - GOT, 31, 10); + break; + case R_SPARC_TLS_IE_LO10: + *(ub32 *)loc |= bits(sym.get_gottp_addr(ctx) + A - GOT, 9, 0); + break; + case R_SPARC_TLS_LE_HIX22: + *(ub32 *)loc |= bits(~(S + A - ctx.tp_addr), 31, 10); + break; + case R_SPARC_TLS_LE_LOX10: + *(ub32 *)loc |= bits(S + A - ctx.tp_addr, 9, 0) | 0b1'1100'0000'0000; + break; + case R_SPARC_SIZE32: + *(ub32 *)loc = sym.esym().st_size + A; + break; + case R_SPARC_TLS_GD_ADD: + case R_SPARC_TLS_LDM_ADD: + case R_SPARC_TLS_LDO_ADD: + case R_SPARC_TLS_IE_LD: + case R_SPARC_TLS_IE_LDX: + case R_SPARC_TLS_IE_ADD: + break; + default: + unreachable(); + } + } +} + +template <> +void InputSection::apply_reloc_nonalloc(Context &ctx, u8 *base) { + std::span> rels = get_rels(ctx); + + for (i64 i = 0; i < rels.size(); i++) { + const ElfRel &rel = rels[i]; + if (rel.r_type == R_NONE || record_undef_error(ctx, rel)) + continue; + + Symbol &sym = *file.symbols[rel.r_sym]; + u8 *loc = base + rel.r_offset; + + auto check = [&](i64 val, i64 lo, i64 hi) { + if (val < lo || hi <= val) + Error(ctx) << *this << ": relocation " << rel << " against " + << sym << " out of range: " << val << " is not in [" + << lo << ", " << hi << ")"; + }; + + SectionFragment *frag; + i64 frag_addend; + std::tie(frag, frag_addend) = get_fragment(ctx, rel); + + u64 S = frag ? frag->get_addr(ctx) : sym.get_addr(ctx); + u64 A = frag ? frag_addend : (i64)rel.r_addend; + + switch (rel.r_type) { + case R_SPARC_64: + case R_SPARC_UA64: + if (std::optional val = get_tombstone(sym, frag)) + *(ub64 *)loc = *val; + else + *(ub64 *)loc = S + A; + break; + case R_SPARC_32: + case R_SPARC_UA32: { + i64 val = S + A; + check(val, 0, 1LL << 32); + *(ub32 *)loc = val; + break; + } + case R_SPARC_TLS_DTPOFF32: + *(ub32 *)loc = S + A - ctx.dtp_addr; + break; + case R_SPARC_TLS_DTPOFF64: + *(ub64 *)loc = S + A - ctx.dtp_addr; + break; + default: + Fatal(ctx) << *this << ": apply_reloc_nonalloc: " << rel; + } + } +} + +template <> +void InputSection::scan_relocations(Context &ctx) { + assert(shdr().sh_flags & SHF_ALLOC); + + this->reldyn_offset = file.num_dynrel * sizeof(ElfRel); + std::span> rels = get_rels(ctx); + + // Scan relocations + for (i64 i = 0; i < rels.size(); i++) { + const ElfRel &rel = rels[i]; + if (rel.r_type == R_NONE || record_undef_error(ctx, rel)) + continue; + + Symbol &sym = *file.symbols[rel.r_sym]; + + if (sym.is_ifunc()) + sym.flags |= NEEDS_GOT | NEEDS_PLT; + + switch (rel.r_type) { + case R_SPARC_64: + scan_dyn_absrel(ctx, sym, rel); + break; + case R_SPARC_8: + case R_SPARC_5: + case R_SPARC_6: + case R_SPARC_7: + case R_SPARC_10: + case R_SPARC_11: + case R_SPARC_13: + case R_SPARC_16: + case R_SPARC_22: + case R_SPARC_32: + case R_SPARC_REGISTER: + case R_SPARC_UA16: + case R_SPARC_UA32: + case R_SPARC_UA64: + case R_SPARC_PC_HM10: + case R_SPARC_OLO10: + case R_SPARC_LOX10: + case R_SPARC_HM10: + case R_SPARC_M44: + case R_SPARC_HIX22: + case R_SPARC_LO10: + case R_SPARC_L44: + case R_SPARC_LM22: + case R_SPARC_HI22: + case R_SPARC_H44: + case R_SPARC_HH22: + scan_absrel(ctx, sym, rel); + break; + case R_SPARC_PLT32: + case R_SPARC_WPLT30: + case R_SPARC_WDISP30: + case R_SPARC_HIPLT22: + case R_SPARC_LOPLT10: + case R_SPARC_PCPLT32: + case R_SPARC_PCPLT22: + case R_SPARC_PCPLT10: + case R_SPARC_PLT64: + if (sym.is_imported) + sym.flags |= NEEDS_PLT; + break; + case R_SPARC_GOT13: + case R_SPARC_GOT10: + case R_SPARC_GOT22: + case R_SPARC_GOTDATA_HIX22: + sym.flags |= NEEDS_GOT; + break; + case R_SPARC_GOTDATA_OP_HIX22: + if (sym.is_imported) + sym.flags |= NEEDS_GOT; + break; + case R_SPARC_DISP16: + case R_SPARC_DISP32: + case R_SPARC_DISP64: + case R_SPARC_DISP8: + case R_SPARC_PC10: + case R_SPARC_PC22: + case R_SPARC_PC_LM22: + case R_SPARC_WDISP16: + case R_SPARC_WDISP19: + case R_SPARC_WDISP22: + case R_SPARC_PC_HH22: + scan_pcrel(ctx, sym, rel); + break; + case R_SPARC_TLS_GD_HI22: + sym.flags |= NEEDS_TLSGD; + break; + case R_SPARC_TLS_LDM_HI22: + ctx.needs_tlsld = true; + break; + case R_SPARC_TLS_IE_HI22: + sym.flags |= NEEDS_GOTTP; + break; + case R_SPARC_TLS_GD_CALL: + case R_SPARC_TLS_LDM_CALL: + if (!ctx.arg.is_static && ctx.extra.tls_get_addr_sym->is_imported) + ctx.extra.tls_get_addr_sym->flags |= NEEDS_PLT; + break; + case R_SPARC_TLS_LE_HIX22: + case R_SPARC_TLS_LE_LOX10: + check_tlsle(ctx, sym, rel); + break; + case R_SPARC_GOTDATA_OP_LOX10: + case R_SPARC_GOTDATA_OP: + case R_SPARC_GOTDATA_LOX10: + case R_SPARC_TLS_GD_LO10: + case R_SPARC_TLS_GD_ADD: + case R_SPARC_TLS_LDM_LO10: + case R_SPARC_TLS_LDM_ADD: + case R_SPARC_TLS_LDO_HIX22: + case R_SPARC_TLS_LDO_LOX10: + case R_SPARC_TLS_LDO_ADD: + case R_SPARC_TLS_IE_ADD: + case R_SPARC_TLS_IE_LD: + case R_SPARC_TLS_IE_LDX: + case R_SPARC_TLS_IE_LO10: + case R_SPARC_SIZE32: + break; + default: + Fatal(ctx) << *this << ": scan_relocations: " << rel; + } + } +} + +// __tls_get_addr is not defined by libc.a, so we can't use that function +// in statically-linked executables. This section provides a replacement. +void SparcTlsGetAddrSection::copy_buf(Context &ctx) { + ub32 *buf = (ub32 *)(ctx.buf + this->shdr.sh_offset); + + static const ub32 insn[] = { + 0x0300'0000, // sethi %hi(TP_SIZE), %g1 + 0x8210'6000, // or %g1, %lo(TP_SIZE), %g1 + 0x8221'c001, // sub %g7, %g1, %g1 + 0xd05a'2008, // ldx [ %o0 + 8 ], %o0 + 0x81c3'e008, // retl + 0x9000'4008, // add %g1, %o0, %o0 + }; + + assert(this->shdr.sh_size == sizeof(insn)); + memcpy(buf, insn, sizeof(insn)); + + buf[0] |= bits(ctx.tp_addr - ctx.tls_begin, 31, 10); + buf[1] |= bits(ctx.tp_addr - ctx.tls_begin, 9, 0); +} + +} // namespace mold::elf diff --git a/third_party/mold/elf/arch-x86-64.cc b/third_party/mold/elf/arch-x86-64.cc new file mode 100644 index 00000000000..05746b374c7 --- /dev/null +++ b/third_party/mold/elf/arch-x86-64.cc @@ -0,0 +1,773 @@ +// clang-format off +// Supporting x86-64 is straightforward. Unlike its predecessor, i386, +// x86-64 supports PC-relative addressing for position-independent code. +// Being CISC, its instructions are variable in size. Branch instructions +// take 4 bytes offsets, so we don't need range extension thunks. +// +// The psABI specifies %r11 as neither caller- nor callee-saved. It's +// intentionally left out so that we can use it as a scratch register in +// PLT. +// +// Thread Pointer (TP) is stored not to a general-purpose register but to +// FS segment register. Segment register is a 64-bits register which can +// be used as a base address for memory access. Each thread has a unique +// FS value, and they access their thread-local variables relative to FS +// as %fs:offset_from_tp. +// +// The value of a segment register itself is not generally readable from +// the user space. As a workaround, libc initializes %fs:0 (the first word +// referenced by FS) to the value of %fs itself. So we can obtain TP just +// by `mov %fs:0, %rax` if we need it. +// +// For historical reasons, TP points past the end of the TLS block on x86. +// This is contrary to other psABIs which usually use the beginning of the +// TLS block as TP (with some addend). As a result, offsets from TP to +// thread-local variables (TLVs) in the main executable are all negative. +// +// https://github.com/rui314/psabi/blob/main/x86-64.pdf +// https://github.com/rui314/psabi/blob/main/i386.pdf + +#include "third_party/mold/elf/mold.h" + +namespace mold::elf { + +using E = X86_64; + +// This is a security-enhanced version of the regular PLT. The PLT +// header and each PLT entry starts with endbr64 for the Intel's +// control-flow enforcement security mechanism. +// +// Note that our IBT-enabled PLT instruction sequence is different +// from the one used in GNU ld. GNU's IBTPLT implementation uses two +// separate sections (.plt and .plt.sec) in which one PLT entry takes +// 32 bytes in total. Our IBTPLT consists of just .plt and each entry +// is 16 bytes long. +// +// Our PLT entry clobbers %r11, but that's fine because the resolver +// function (_dl_runtime_resolve) clobbers %r11 anyway. +template <> +void write_plt_header(Context &ctx, u8 *buf) { + static const u8 insn[] = { + 0xf3, 0x0f, 0x1e, 0xfa, // endbr64 + 0x41, 0x53, // push %r11 + 0xff, 0x35, 0, 0, 0, 0, // push GOTPLT+8(%rip) + 0xff, 0x25, 0, 0, 0, 0, // jmp *GOTPLT+16(%rip) + 0xcc, 0xcc, 0xcc, 0xcc, // (padding) + 0xcc, 0xcc, 0xcc, 0xcc, // (padding) + 0xcc, 0xcc, 0xcc, 0xcc, // (padding) + 0xcc, 0xcc, // (padding) + }; + + memcpy(buf, insn, sizeof(insn)); + *(ul32 *)(buf + 8) = ctx.gotplt->shdr.sh_addr - ctx.plt->shdr.sh_addr - 4; + *(ul32 *)(buf + 14) = ctx.gotplt->shdr.sh_addr - ctx.plt->shdr.sh_addr - 2; +} + +template <> +void write_plt_entry(Context &ctx, u8 *buf, Symbol &sym) { + static const u8 insn[] = { + 0xf3, 0x0f, 0x1e, 0xfa, // endbr64 + 0x41, 0xbb, 0, 0, 0, 0, // mov $index_in_relplt, %r11d + 0xff, 0x25, 0, 0, 0, 0, // jmp *foo@GOTPLT + }; + + memcpy(buf, insn, sizeof(insn)); + *(ul32 *)(buf + 6) = sym.get_plt_idx(ctx); + *(ul32 *)(buf + 12) = sym.get_gotplt_addr(ctx) - sym.get_plt_addr(ctx) - 16; +} + +template <> +void write_pltgot_entry(Context &ctx, u8 *buf, Symbol &sym) { + static const u8 insn[] = { + 0xf3, 0x0f, 0x1e, 0xfa, // endbr64 + 0xff, 0x25, 0, 0, 0, 0, // jmp *foo@GOT + 0xcc, 0xcc, 0xcc, 0xcc, // (padding) + 0xcc, 0xcc, // (padding) + }; + + memcpy(buf, insn, sizeof(insn)); + *(ul32 *)(buf + 6) = sym.get_got_addr(ctx) - sym.get_plt_addr(ctx) - 10; +} + +template <> +void EhFrameSection::apply_reloc(Context &ctx, const ElfRel &rel, + u64 offset, u64 val) { + u8 *loc = ctx.buf + this->shdr.sh_offset + offset; + + switch (rel.r_type) { + case R_NONE: + break; + case R_X86_64_32: + *(ul32 *)loc = val; + break; + case R_X86_64_64: + *(ul64 *)loc = val; + break; + case R_X86_64_PC32: + *(ul32 *)loc = val - this->shdr.sh_addr - offset; + break; + case R_X86_64_PC64: + *(ul64 *)loc = val - this->shdr.sh_addr - offset; + break; + default: + Fatal(ctx) << "unsupported relocation in .eh_frame: " << rel; + } +} + +static u32 relax_gotpcrelx(u8 *loc) { + switch ((loc[0] << 8) | loc[1]) { + case 0xff15: return 0x90e8; // call *0(%rip) -> call 0 + case 0xff25: return 0x90e9; // jmp *0(%rip) -> jmp 0 + } + return 0; +} + +static u32 relax_rex_gotpcrelx(u8 *loc) { + switch ((loc[0] << 16) | (loc[1] << 8) | loc[2]) { + case 0x488b05: return 0x488d05; // mov 0(%rip), %rax -> lea 0(%rip), %rax + case 0x488b0d: return 0x488d0d; // mov 0(%rip), %rcx -> lea 0(%rip), %rcx + case 0x488b15: return 0x488d15; // mov 0(%rip), %rdx -> lea 0(%rip), %rdx + case 0x488b1d: return 0x488d1d; // mov 0(%rip), %rbx -> lea 0(%rip), %rbx + case 0x488b25: return 0x488d25; // mov 0(%rip), %rsp -> lea 0(%rip), %rsp + case 0x488b2d: return 0x488d2d; // mov 0(%rip), %rbp -> lea 0(%rip), %rbp + case 0x488b35: return 0x488d35; // mov 0(%rip), %rsi -> lea 0(%rip), %rsi + case 0x488b3d: return 0x488d3d; // mov 0(%rip), %rdi -> lea 0(%rip), %rdi + case 0x4c8b05: return 0x4c8d05; // mov 0(%rip), %r8 -> lea 0(%rip), %r8 + case 0x4c8b0d: return 0x4c8d0d; // mov 0(%rip), %r9 -> lea 0(%rip), %r9 + case 0x4c8b15: return 0x4c8d15; // mov 0(%rip), %r10 -> lea 0(%rip), %r10 + case 0x4c8b1d: return 0x4c8d1d; // mov 0(%rip), %r11 -> lea 0(%rip), %r11 + case 0x4c8b25: return 0x4c8d25; // mov 0(%rip), %r12 -> lea 0(%rip), %r12 + case 0x4c8b2d: return 0x4c8d2d; // mov 0(%rip), %r13 -> lea 0(%rip), %r13 + case 0x4c8b35: return 0x4c8d35; // mov 0(%rip), %r14 -> lea 0(%rip), %r14 + case 0x4c8b3d: return 0x4c8d3d; // mov 0(%rip), %r15 -> lea 0(%rip), %r15 + } + return 0; +} + +static u32 relax_gottpoff(u8 *loc) { + switch ((loc[0] << 16) | (loc[1] << 8) | loc[2]) { + case 0x488b05: return 0x48c7c0; // mov 0(%rip), %rax -> mov $0, %rax + case 0x488b0d: return 0x48c7c1; // mov 0(%rip), %rcx -> mov $0, %rcx + case 0x488b15: return 0x48c7c2; // mov 0(%rip), %rdx -> mov $0, %rdx + case 0x488b1d: return 0x48c7c3; // mov 0(%rip), %rbx -> mov $0, %rbx + case 0x488b25: return 0x48c7c4; // mov 0(%rip), %rsp -> mov $0, %rsp + case 0x488b2d: return 0x48c7c5; // mov 0(%rip), %rbp -> mov $0, %rbp + case 0x488b35: return 0x48c7c6; // mov 0(%rip), %rsi -> mov $0, %rsi + case 0x488b3d: return 0x48c7c7; // mov 0(%rip), %rdi -> mov $0, %rdi + case 0x4c8b05: return 0x49c7c0; // mov 0(%rip), %r8 -> mov $0, %r8 + case 0x4c8b0d: return 0x49c7c1; // mov 0(%rip), %r9 -> mov $0, %r9 + case 0x4c8b15: return 0x49c7c2; // mov 0(%rip), %r10 -> mov $0, %r10 + case 0x4c8b1d: return 0x49c7c3; // mov 0(%rip), %r11 -> mov $0, %r11 + case 0x4c8b25: return 0x49c7c4; // mov 0(%rip), %r12 -> mov $0, %r12 + case 0x4c8b2d: return 0x49c7c5; // mov 0(%rip), %r13 -> mov $0, %r13 + case 0x4c8b35: return 0x49c7c6; // mov 0(%rip), %r14 -> mov $0, %r14 + case 0x4c8b3d: return 0x49c7c7; // mov 0(%rip), %r15 -> mov $0, %r15 + } + return 0; +} + +static u32 relax_gotpc32_tlsdesc(u8 *loc) { + switch ((loc[0] << 16) | (loc[1] << 8) | loc[2]) { + case 0x488d05: return 0x48c7c0; // lea 0(%rip), %rax -> mov $0, %rax + case 0x488d0d: return 0x48c7c1; // lea 0(%rip), %rcx -> mov $0, %rcx + case 0x488d15: return 0x48c7c2; // lea 0(%rip), %rdx -> mov $0, %rdx + case 0x488d1d: return 0x48c7c3; // lea 0(%rip), %rbx -> mov $0, %rbx + case 0x488d25: return 0x48c7c4; // lea 0(%rip), %rsp -> mov $0, %rsp + case 0x488d2d: return 0x48c7c5; // lea 0(%rip), %rbp -> mov $0, %rbp + case 0x488d35: return 0x48c7c6; // lea 0(%rip), %rsi -> mov $0, %rsi + case 0x488d3d: return 0x48c7c7; // lea 0(%rip), %rdi -> mov $0, %rdi + case 0x4c8d05: return 0x49c7c0; // lea 0(%rip), %r8 -> mov $0, %r8 + case 0x4c8d0d: return 0x49c7c1; // lea 0(%rip), %r9 -> mov $0, %r9 + case 0x4c8d15: return 0x49c7c2; // lea 0(%rip), %r10 -> mov $0, %r10 + case 0x4c8d1d: return 0x49c7c3; // lea 0(%rip), %r11 -> mov $0, %r11 + case 0x4c8d25: return 0x49c7c4; // lea 0(%rip), %r12 -> mov $0, %r12 + case 0x4c8d2d: return 0x49c7c5; // lea 0(%rip), %r13 -> mov $0, %r13 + case 0x4c8d35: return 0x49c7c6; // lea 0(%rip), %r14 -> mov $0, %r14 + case 0x4c8d3d: return 0x49c7c7; // lea 0(%rip), %r15 -> mov $0, %r15 + } + return 0; +} + +// Rewrite a function call to __tls_get_addr to a cheaper instruction +// sequence. We can do this when we know the thread-local variable's TP- +// relative address at link-time. +static void relax_gd_to_le(u8 *loc, ElfRel rel, u64 val) { + switch (rel.r_type) { + case R_X86_64_PLT32: + case R_X86_64_PC32: + case R_X86_64_GOTPCREL: + case R_X86_64_GOTPCRELX: { + // The original instructions are the following: + // + // 66 48 8d 3d 00 00 00 00 lea foo@tlsgd(%rip), %rdi + // 66 66 48 e8 00 00 00 00 call __tls_get_addr + // + // or + // + // 66 48 8d 3d 00 00 00 00 lea foo@tlsgd(%rip), %rdi + // 66 48 ff 15 00 00 00 00 call *__tls_get_addr@GOT(%rip) + static const u8 insn[] = { + 0x64, 0x48, 0x8b, 0x04, 0x25, 0, 0, 0, 0, // mov %fs:0, %rax + 0x48, 0x81, 0xc0, 0, 0, 0, 0, // add $tp_offset, %rax + }; + memcpy(loc - 4, insn, sizeof(insn)); + *(ul32 *)(loc + 8) = val; + break; + } + case R_X86_64_PLTOFF64: { + // The original instructions are the following: + // + // 48 8d 3d 00 00 00 00 lea foo@tlsgd(%rip), %rdi + // 48 b8 00 00 00 00 00 00 00 00 movabs __tls_get_addr, %rax + // 48 01 d8 add %rbx, %rax + // ff d0 call *%rax + static const u8 insn[] = { + 0x64, 0x48, 0x8b, 0x04, 0x25, 0, 0, 0, 0, // mov %fs:0, %rax + 0x48, 0x81, 0xc0, 0, 0, 0, 0, // add $tp_offset, %rax + 0x66, 0x0f, 0x1f, 0x44, 0x00, 0x00, // nop + }; + memcpy(loc - 3, insn, sizeof(insn)); + *(ul32 *)(loc + 9) = val; + break; + } + default: + unreachable(); + } +} + +static void relax_gd_to_ie(u8 *loc, ElfRel rel, u64 val) { + switch (rel.r_type) { + case R_X86_64_PLT32: + case R_X86_64_PC32: + case R_X86_64_GOTPCREL: + case R_X86_64_GOTPCRELX: { + static const u8 insn[] = { + 0x64, 0x48, 0x8b, 0x04, 0x25, 0, 0, 0, 0, // mov %fs:0, %rax + 0x48, 0x03, 0x05, 0, 0, 0, 0, // add foo@gottpoff(%rip), %rax + }; + memcpy(loc - 4, insn, sizeof(insn)); + *(ul32 *)(loc + 8) = val - 12; + break; + } + case R_X86_64_PLTOFF64: { + static const u8 insn[] = { + 0x64, 0x48, 0x8b, 0x04, 0x25, 0, 0, 0, 0, // mov %fs:0, %rax + 0x48, 0x03, 0x05, 0, 0, 0, 0, // add foo@gottpoff(%rip), %rax + 0x66, 0x0f, 0x1f, 0x44, 0x00, 0x00, // nop + }; + memcpy(loc - 3, insn, sizeof(insn)); + *(ul32 *)(loc + 9) = val - 13; + break; + } + default: + unreachable(); + } +} + +// Rewrite a function call to __tls_get_addr to a cheaper instruction +// sequence. The difference from relax_gd_to_le is that we are +// materializing a Dynamic Thread Pointer for the current ELF module +// instead of an address for a particular thread-local variable. +static void relax_ld_to_le(u8 *loc, ElfRel rel, u64 val) { + switch (rel.r_type) { + case R_X86_64_PLT32: + case R_X86_64_PC32: { + // The original instructions are the following: + // + // 48 8d 3d 00 00 00 00 lea foo@tlsld(%rip), %rdi + // e8 00 00 00 00 call __tls_get_addr + static const u8 insn[] = { + 0x31, 0xc0, // xor %eax, %eax + 0x64, 0x48, 0x8b, 0x00, // mov %fs:(%rax), %rax + 0x48, 0x2d, 0, 0, 0, 0, // sub $tls_size, %rax + }; + memcpy(loc - 3, insn, sizeof(insn)); + *(ul32 *)(loc + 5) = val; + break; + } + case R_X86_64_GOTPCREL: + case R_X86_64_GOTPCRELX: { + // The original instructions are the following: + // + // 48 8d 3d 00 00 00 00 lea foo@tlsld(%rip), %rdi + // ff 15 00 00 00 00 call *__tls_get_addr@GOT(%rip) + static const u8 insn[] = { + 0x31, 0xc0, // xor %eax, %eax + 0x64, 0x48, 0x8b, 0x00, // mov %fs:(%rax), %rax + 0x48, 0x2d, 0, 0, 0, 0, // sub $tls_size, %rax + 0x90, // nop + }; + memcpy(loc - 3, insn, sizeof(insn)); + *(ul32 *)(loc + 5) = val; + break; + } + case R_X86_64_PLTOFF64: { + // The original instructions are the following: + // + // 48 8d 3d 00 00 00 00 lea foo@tlsld(%rip), %rdi + // 48 b8 00 00 00 00 00 00 00 00 movabs __tls_get_addr@GOTOFF, %rax + // 48 01 d8 add %rbx, %rax + // ff d0 call *%rax + static const u8 insn[] = { + 0x31, 0xc0, // xor %eax, %eax + 0x64, 0x48, 0x8b, 0x00, // mov %fs:(%rax), %rax + 0x48, 0x2d, 0, 0, 0, 0, // sub $tls_size, %rax + 0x0f, 0x1f, 0x44, 0x00, 0x00, // nop + 0x0f, 0x1f, 0x44, 0x00, 0x00, // nop + }; + memcpy(loc - 3, insn, sizeof(insn)); + *(ul32 *)(loc + 5) = val; + break; + } + default: + unreachable(); + } +} + +// Apply relocations to SHF_ALLOC sections (i.e. sections that are +// mapped to memory at runtime) based on the result of +// scan_relocations(). +template <> +void InputSection::apply_reloc_alloc(Context &ctx, u8 *base) { + std::span> rels = get_rels(ctx); + + ElfRel *dynrel = nullptr; + if (ctx.reldyn) + dynrel = (ElfRel *)(ctx.buf + ctx.reldyn->shdr.sh_offset + + file.reldyn_offset + this->reldyn_offset); + + for (i64 i = 0; i < rels.size(); i++) { + const ElfRel &rel = rels[i]; + if (rel.r_type == R_NONE) + continue; + + Symbol &sym = *file.symbols[rel.r_sym]; + u8 *loc = base + rel.r_offset; + + auto check = [&](i64 val, i64 lo, i64 hi) { + if (val < lo || hi <= val) + Error(ctx) << *this << ": relocation " << rel << " against " + << sym << " out of range: " << val << " is not in [" + << lo << ", " << hi << ")"; + }; + + auto write32 = [&](u64 val) { + check(val, 0, 1LL << 32); + *(ul32 *)loc = val; + }; + + auto write32s = [&](u64 val) { + check(val, -(1LL << 31), 1LL << 31); + *(ul32 *)loc = val; + }; + + u64 S = sym.get_addr(ctx); + u64 A = rel.r_addend; + u64 P = get_addr() + rel.r_offset; + u64 G = sym.get_got_addr(ctx) - ctx.gotplt->shdr.sh_addr; + u64 GOTPLT = ctx.gotplt->shdr.sh_addr; + + switch (rel.r_type) { + case R_X86_64_8: + check(S + A, 0, 1 << 8); + *loc = S + A; + break; + case R_X86_64_16: + check(S + A, 0, 1 << 16); + *(ul16 *)loc = S + A; + break; + case R_X86_64_32: + write32(S + A); + break; + case R_X86_64_32S: + write32s(S + A); + break; + case R_X86_64_64: + apply_dyn_absrel(ctx, sym, rel, loc, S, A, P, dynrel); + break; + case R_X86_64_PC8: + check(S + A - P, -(1 << 7), 1 << 7); + *loc = S + A - P; + break; + case R_X86_64_PC16: + check(S + A - P, -(1 << 15), 1 << 15); + *(ul16 *)loc = S + A - P; + break; + case R_X86_64_PC32: + case R_X86_64_PLT32: + write32s(S + A - P); + break; + case R_X86_64_PC64: + *(ul64 *)loc = S + A - P; + break; + case R_X86_64_GOT32: + write32s(G + A); + break; + case R_X86_64_GOT64: + *(ul64 *)loc = G + A; + break; + case R_X86_64_GOTOFF64: + case R_X86_64_PLTOFF64: + *(ul64 *)loc = S + A - GOTPLT; + break; + case R_X86_64_GOTPC32: + write32s(GOTPLT + A - P); + break; + case R_X86_64_GOTPC64: + *(ul64 *)loc = GOTPLT + A - P; + break; + case R_X86_64_GOTPCREL: + write32s(G + GOTPLT + A - P); + break; + case R_X86_64_GOTPCREL64: + *(ul64 *)loc = G + GOTPLT + A - P; + break; + case R_X86_64_GOTPCRELX: + // We always want to relax GOTPCRELX relocs even if --no-relax + // was given because some static PIE runtime code depends on these + // relaxations. + if (!sym.is_imported && !sym.is_ifunc() && sym.is_relative()) { + u32 insn = relax_gotpcrelx(loc - 2); + i64 val = S + A - P; + if (insn && (i32)val == val) { + loc[-2] = insn >> 8; + loc[-1] = insn; + *(ul32 *)loc = val; + break; + } + } + write32s(G + GOTPLT + A - P); + break; + case R_X86_64_REX_GOTPCRELX: + if (!sym.is_imported && !sym.is_ifunc() && sym.is_relative()) { + u32 insn = relax_rex_gotpcrelx(loc - 3); + i64 val = S + A - P; + if (insn && (i32)val == val) { + loc[-3] = insn >> 16; + loc[-2] = insn >> 8; + loc[-1] = insn; + *(ul32 *)loc = val; + break; + } + } + write32s(G + GOTPLT + A - P); + break; + case R_X86_64_TLSGD: + if (sym.has_tlsgd(ctx)) { + write32s(sym.get_tlsgd_addr(ctx) + A - P); + } else if (sym.has_gottp(ctx)) { + relax_gd_to_ie(loc, rels[i + 1], sym.get_gottp_addr(ctx) - P); + i++; + } else { + relax_gd_to_le(loc, rels[i + 1], S - ctx.tp_addr); + i++; + } + break; + case R_X86_64_TLSLD: + if (ctx.got->has_tlsld(ctx)) { + write32s(ctx.got->get_tlsld_addr(ctx) + A - P); + } else { + relax_ld_to_le(loc, rels[i + 1], ctx.tp_addr - ctx.tls_begin); + i++; + } + break; + case R_X86_64_DTPOFF32: + write32s(S + A - ctx.dtp_addr); + break; + case R_X86_64_DTPOFF64: + *(ul64 *)loc = S + A - ctx.dtp_addr; + break; + case R_X86_64_TPOFF32: + write32s(S + A - ctx.tp_addr); + break; + case R_X86_64_TPOFF64: + *(ul64 *)loc = S + A - ctx.tp_addr; + break; + case R_X86_64_GOTTPOFF: + if (sym.has_gottp(ctx)) { + write32s(sym.get_gottp_addr(ctx) + A - P); + } else { + u32 insn = relax_gottpoff(loc - 3); + loc[-3] = insn >> 16; + loc[-2] = insn >> 8; + loc[-1] = insn; + write32s(S - ctx.tp_addr); + assert(A == -4); + } + break; + case R_X86_64_GOTPC32_TLSDESC: + if (sym.has_tlsdesc(ctx)) { + write32s(sym.get_tlsdesc_addr(ctx) + A - P); + } else { + u32 insn = relax_gotpc32_tlsdesc(loc - 3); + loc[-3] = insn >> 16; + loc[-2] = insn >> 8; + loc[-1] = insn; + write32s(S - ctx.tp_addr); + assert(A == -4); + } + break; + case R_X86_64_SIZE32: + write32(sym.esym().st_size + A); + break; + case R_X86_64_SIZE64: + *(ul64 *)loc = sym.esym().st_size + A; + break; + case R_X86_64_TLSDESC_CALL: + if (!sym.has_tlsdesc(ctx)) { + // call *(%rax) -> nop + loc[0] = 0x66; + loc[1] = 0x90; + } + break; + default: + unreachable(); + } + } +} + +// This function is responsible for applying relocations against +// non-SHF_ALLOC sections (i.e. sections that are not mapped to memory +// at runtime). +// +// Relocations against non-SHF_ALLOC sections are much easier to +// handle than that against SHF_ALLOC sections. It is because, since +// they are not mapped to memory, they don't contain any variable or +// function and never need PLT or GOT. Non-SHF_ALLOC sections are +// mostly debug info sections. +// +// Relocations against non-SHF_ALLOC sections are not scanned by +// scan_relocations. +template <> +void InputSection::apply_reloc_nonalloc(Context &ctx, u8 *base) { + std::span> rels = get_rels(ctx); + + for (i64 i = 0; i < rels.size(); i++) { + const ElfRel &rel = rels[i]; + if (rel.r_type == R_NONE || record_undef_error(ctx, rel)) + continue; + + Symbol &sym = *file.symbols[rel.r_sym]; + u8 *loc = base + rel.r_offset; + + auto check = [&](i64 val, i64 lo, i64 hi) { + if (val < lo || hi <= val) + Error(ctx) << *this << ": relocation " << rel << " against " + << sym << " out of range: " << val << " is not in [" + << lo << ", " << hi << ")"; + }; + + auto write32 = [&](u64 val) { + check(val, 0, 1LL << 32); + *(ul32 *)loc = val; + }; + + auto write32s = [&](u64 val) { + check(val, -(1LL << 31), 1LL << 31); + *(ul32 *)loc = val; + }; + + SectionFragment *frag; + i64 frag_addend; + std::tie(frag, frag_addend) = get_fragment(ctx, rel); + + u64 S = frag ? frag->get_addr(ctx) : sym.get_addr(ctx); + u64 A = frag ? frag_addend : (i64)rel.r_addend; + + switch (rel.r_type) { + case R_X86_64_8: + check(S + A, 0, 1 << 8); + *loc = S + A; + break; + case R_X86_64_16: + check(S + A, 0, 1 << 16); + *(ul16 *)loc = S + A; + break; + case R_X86_64_32: + write32(S + A); + break; + case R_X86_64_32S: + write32s(S + A); + break; + case R_X86_64_64: + if (std::optional val = get_tombstone(sym, frag)) + *(ul64 *)loc = *val; + else + *(ul64 *)loc = S + A; + break; + case R_X86_64_DTPOFF32: + if (std::optional val = get_tombstone(sym, frag)) + *(ul32 *)loc = *val; + else + write32s(S + A - ctx.dtp_addr); + break; + case R_X86_64_DTPOFF64: + if (std::optional val = get_tombstone(sym, frag)) + *(ul64 *)loc = *val; + else + *(ul64 *)loc = S + A - ctx.dtp_addr; + break; + case R_X86_64_GOTOFF64: + *(ul64 *)loc = S + A - ctx.gotplt->shdr.sh_addr; + break; + case R_X86_64_GOTPC64: + // PC-relative relocation doesn't make sense for non-memory-allocated + // section, but GCC 6.3.0 seems to create this reloc for + // _GLOBAL_OFFSET_TABLE_. + *(ul64 *)loc = ctx.gotplt->shdr.sh_addr + A; + break; + case R_X86_64_SIZE32: + write32(sym.esym().st_size + A); + break; + case R_X86_64_SIZE64: + *(ul64 *)loc = sym.esym().st_size + A; + break; + default: + Fatal(ctx) << *this << ": invalid relocation for non-allocated sections: " + << rel; + break; + } + } +} + +// Linker has to create data structures in an output file to apply +// some type of relocations. For example, if a relocation refers a GOT +// or a PLT entry of a symbol, linker has to create an entry in .got +// or in .plt for that symbol. In order to fix the file layout, we +// need to scan relocations. +template <> +void InputSection::scan_relocations(Context &ctx) { + assert(shdr().sh_flags & SHF_ALLOC); + + this->reldyn_offset = file.num_dynrel * sizeof(ElfRel); + std::span> rels = get_rels(ctx); + + // Scan relocations + for (i64 i = 0; i < rels.size(); i++) { + const ElfRel &rel = rels[i]; + if (rel.r_type == R_NONE || record_undef_error(ctx, rel)) + continue; + + Symbol &sym = *file.symbols[rel.r_sym]; + u8 *loc = (u8 *)(contents.data() + rel.r_offset); + + if (sym.is_ifunc()) + sym.flags |= NEEDS_GOT | NEEDS_PLT; + + switch (rel.r_type) { + case R_X86_64_8: + case R_X86_64_16: + case R_X86_64_32: + case R_X86_64_32S: + scan_absrel(ctx, sym, rel); + break; + case R_X86_64_64: + scan_dyn_absrel(ctx, sym, rel); + break; + case R_X86_64_PC8: + case R_X86_64_PC16: + case R_X86_64_PC32: + case R_X86_64_PC64: + scan_pcrel(ctx, sym, rel); + break; + case R_X86_64_GOT32: + case R_X86_64_GOT64: + case R_X86_64_GOTPC32: + case R_X86_64_GOTPC64: + case R_X86_64_GOTPCREL: + case R_X86_64_GOTPCREL64: + case R_X86_64_GOTPCRELX: + case R_X86_64_REX_GOTPCRELX: + sym.flags |= NEEDS_GOT; + break; + case R_X86_64_PLT32: + case R_X86_64_PLTOFF64: + if (sym.is_imported) + sym.flags |= NEEDS_PLT; + break; + case R_X86_64_TLSGD: + if (rel.r_addend != -4) + Fatal(ctx) << *this << ": bad r_addend for R_X86_64_TLSGD"; + + if (i + 1 == rels.size()) + Fatal(ctx) << *this << ": TLSGD reloc must be followed by PLT or GOTPCREL"; + + if (u32 ty = rels[i + 1].r_type; + ty != R_X86_64_PLT32 && ty != R_X86_64_PC32 && + ty != R_X86_64_PLTOFF64 && ty != R_X86_64_GOTPCREL && + ty != R_X86_64_GOTPCRELX) + Fatal(ctx) << *this << ": TLSGD reloc must be followed by PLT or GOTPCREL"; + + if (ctx.arg.is_static || + (ctx.arg.relax && !sym.is_imported && !ctx.arg.shared)) { + // We always relax if -static because libc.a doesn't contain + // __tls_get_addr(). + i++; + } else if (ctx.arg.relax && !sym.is_imported && ctx.arg.shared && + !ctx.arg.z_dlopen) { + sym.flags |= NEEDS_GOTTP; + i++; + } else { + sym.flags |= NEEDS_TLSGD; + } + break; + case R_X86_64_TLSLD: + if (rel.r_addend != -4) + Fatal(ctx) << *this << ": bad r_addend for R_X86_64_TLSLD"; + + if (i + 1 == rels.size()) + Fatal(ctx) << *this << ": TLSLD reloc must be followed by PLT or GOTPCREL"; + + if (u32 ty = rels[i + 1].r_type; + ty != R_X86_64_PLT32 && ty != R_X86_64_PC32 && + ty != R_X86_64_PLTOFF64 && ty != R_X86_64_GOTPCREL && + ty != R_X86_64_GOTPCRELX) + Fatal(ctx) << *this << ": TLSLD reloc must be followed by PLT or GOTPCREL"; + + // We always relax if -static because libc.a doesn't contain + // __tls_get_addr(). + if (ctx.arg.is_static || (ctx.arg.relax && !ctx.arg.shared)) + i++; + else + ctx.needs_tlsld = true; + break; + case R_X86_64_GOTTPOFF: { + if (rel.r_addend != -4) + Fatal(ctx) << *this << ": bad r_addend for R_X86_64_GOTTPOFF"; + + bool do_relax = ctx.arg.relax && !ctx.arg.shared && + !sym.is_imported && relax_gottpoff(loc - 3); + if (!do_relax) + sym.flags |= NEEDS_GOTTP; + break; + } + case R_X86_64_GOTPC32_TLSDESC: { + if (rel.r_addend != -4) + Fatal(ctx) << *this << ": bad r_addend for R_X86_64_GOTPC32_TLSDESC"; + + if (relax_gotpc32_tlsdesc(loc - 3) == 0) + Fatal(ctx) << *this << ": GOTPC32_TLSDESC relocation is used" + << " against an invalid code sequence"; + + if (!relax_tlsdesc(ctx, sym)) + sym.flags |= NEEDS_TLSDESC; + break; + } + case R_X86_64_TPOFF32: + case R_X86_64_TPOFF64: + check_tlsle(ctx, sym, rel); + break; + case R_X86_64_GOTOFF64: + case R_X86_64_DTPOFF32: + case R_X86_64_DTPOFF64: + case R_X86_64_SIZE32: + case R_X86_64_SIZE64: + case R_X86_64_TLSDESC_CALL: + break; + default: + Error(ctx) << *this << ": unknown relocation: " << rel; + } + } +} + +} // namespace mold::elf diff --git a/third_party/mold/elf/cmdline.cc b/third_party/mold/elf/cmdline.cc new file mode 100644 index 00000000000..685f41469e2 --- /dev/null +++ b/third_party/mold/elf/cmdline.cc @@ -0,0 +1,1278 @@ +// clang-format off +#include "third_party/mold/elf/mold.h" +// MISSING #include "../common/cmdline.h" + +#include "third_party/libcxx/regex" +#include "third_party/libcxx/sstream" +#include "libc/calls/calls.h" +#include "libc/calls/struct/stat.h" +#include "libc/calls/struct/stat.macros.h" +#include "libc/calls/struct/timespec.h" +#include "libc/calls/weirdtypes.h" +#include "libc/sysv/consts/s.h" +#include "libc/sysv/consts/utime.h" +#include "libc/time/time.h" +#include "libc/calls/makedev.h" +#include "libc/calls/weirdtypes.h" +#include "libc/thread/thread.h" +#include "libc/calls/typedef/u.h" +#include "libc/calls/weirdtypes.h" +#include "libc/intrin/newbie.h" +#include "libc/sock/select.h" +#include "libc/sysv/consts/endian.h" +#include "third_party/libcxx/unordered_set" + +#ifdef _WIN32 +# define _isatty isatty +# define STDERR_FILENO (_fileno(stderr)) +#else +#include "libc/calls/calls.h" +#include "libc/calls/weirdtypes.h" +#include "libc/runtime/pathconf.h" +#include "libc/runtime/runtime.h" +#include "libc/runtime/sysconf.h" +#include "libc/sysv/consts/f.h" +#include "libc/sysv/consts/fileno.h" +#include "libc/sysv/consts/o.h" +#include "libc/sysv/consts/ok.h" +#include "libc/time/time.h" +#include "third_party/getopt/getopt.h" +#include "third_party/musl/crypt.h" +#include "third_party/musl/lockf.h" +#endif + +namespace mold::elf { + +inline const char helpmsg[] = R"( +Options: + --help Report usage information + -v, --version Report version information + -V Report version and target information + -(, --start-group Ignored + -), --end-group Ignored + -C DIR, --directory DIR Change to DIR before doing anything + -E, --export-dynamic Put symbols in the dynamic symbol table + --no-export-dynamic + -F LIBNAME, --filter LIBNAME + Set DT_FILTER to the specified value + -I FILE, --dynamic-linker FILE + Set dynamic linker path + --no-dynamic-linker + -L DIR, --library-path DIR Add DIR to library search path + -M, --print-map Write map file to stdout + -N, --omagic Do not page align data, do not make text readonly + --no-omagic + -O NUMBER Ignored + -S, --strip-debug Strip .debug_* sections + -T FILE, --script FILE Read linker script + -X, --discard-locals Discard temporary local symbols + -e SYMBOL, --entry SYMBOL Set program entry point + -f SHLIB, --auxiliary SHLIB Set DT_AUXILIARY to the specified value + -h LIBNAME, --soname LIBNAME + Set shared library name + -l LIBNAME Search for a given library + -m TARGET Set target + -o FILE, --output FILE Set output filename + -q, --emit-relocs Leaves relocation sections in the output + -r, --relocatable Generate relocatable output + -s, --strip-all Strip .symtab section + -u SYMBOL, --undefined SYMBOL + Force to resolve SYMBOL + --Bdynamic, --dy Link against shared libraries (default) + --Bstatic, --dn, --static Do not link against shared libraries + --Bsymbolic Bind global symbols locally + --Bsymbolic-functions Bind global functions locally + --Bno-symbolic Cancel --Bsymbolic and --Bsymbolic-functions + --Map FILE Write map file to a given file + --Tbss=ADDR Set address to .bss + --Tdata Set address to .data + --Ttext Set address to .text + --allow-multiple-definition Allow multiple definitions + --apply-dynamic-relocs Apply link-time values for dynamic relocations (defualt) + --no-apply-dynamic-relocs + --as-needed Only set DT_NEEDED if used + --no-as-needed + --build-id [none,md5,sha1,sha256,uuid,HEXSTRING] + Generate build ID + --no-build-id + --chroot DIR Set a given path to root directory + --color-diagnostics=[auto,always,never] + Use colors in diagnostics + --color-diagnostics Alias for --color-diagnostics=always + --compress-debug-sections [none,zlib,zlib-gabi,zstd] + Compress .debug_* sections + --dc Ignored + --dependency-file=FILE Write Makefile-style dependency rules to FILE + --defsym=SYMBOL=VALUE Define a symbol alias + --demangle Demangle C++ symbols in log messages (default) + --no-demangle + --enable-new-dtags Emit DT_RUNPATH for --rpath (default) + --disable-new-dtags Emit DT_RPATH for --rpath + --execute-only Make executable segments unreadable + --dp Ignored + --dynamic-list Read a list of dynamic symbols (implies -Bsymbolic) + --eh-frame-hdr Create .eh_frame_hdr section + --no-eh-frame-hdr + --exclude-libs LIB,LIB,.. Mark all symbols in given libraries hidden + --export-dynamic-symbol Put symbols matching glob in the dynamic symbol table + --export-dynamic-symbol-list + Read a list of dynamic symbols + --fatal-warnings Treat warnings as errors + --no-fatal-warnings Do not treat warnings as errors (default) + --fini SYMBOL Call SYMBOL at unload-time + --fork Spawn a child process (default) + --no-fork + --gc-sections Remove unreferenced sections + --no-gc-sections + --gdb-index Create .gdb_index for faster gdb startup + --hash-style [sysv,gnu,both,none] + Set hash style + --icf=[all,safe,none] Fold identical code + --no-icf + --ignore-data-address-equality + Allow merging non-executable sections with --icf + --image-base ADDR Set the base address to a given value + --init SYMBOL Call SYMBOL at load-time + --no-undefined Report undefined symbols (even with --shared) + --noinhibit-exec Create an output file even if errors occur + --oformat=binary Omit ELF, section and program headers + --pack-dyn-relocs=[relr,none] + Pack dynamic relocations + --package-metadata=STRING Set a given string to .note.package + --perf Print performance statistics + --pie, --pic-executable Create a position independent executable + --no-pie, --no-pic-executable + --pop-state Restore state of flags governing input file handling + --print-gc-sections Print removed unreferenced sections + --no-print-gc-sections + --print-icf-sections Print folded identical sections + --no-print-icf-sections + --push-state Save state of flags governing input file handling + --quick-exit Use quick_exit to exit (default) + --no-quick-exit + --relax Optimize instructions (default) + --no-relax + --repro Embed input files to .repro section + --require-defined SYMBOL Require SYMBOL be defined in the final output + --retain-symbols-file FILE Keep only symbols listed in FILE + --reverse-sections Reverses input sections in the output file + --rosegment Put read-only non-executable sections in their own segment (default) + --no-rosegment Put read-only non-executable sections in an executable segment + --rpath DIR Add DIR to runtime search path + --rpath-link DIR Ignored + --run COMMAND ARG... Run COMMAND with mold as /usr/bin/ld + --section-start=SECTION=ADDR Set address to section + --shared, --Bshareable Create a share library + --shuffle-sections[=SEED] Randomize the output by shuffling input sections + --sort-common Ignored + --sort-section Ignored + --spare-dynamic-tags NUMBER Reserve give number of tags in .dynamic section + --start-lib Give following object files in-archive-file semantics + --end-lib End the effect of --start-lib + --stats Print input statistics + --sysroot DIR Set target system root directory + --thread-count COUNT, --threads=COUNT + Use COUNT number of threads + --threads Use multiple threads (default) + --no-threads + --trace Print name of each input file + --undefined-version Do not report version scripts that refer undefined symbols + --no-undefined-version Report version scripts that refer undefined symbols (default) + --unique PATTERN Don't merge input sections that match a given pattern + --unresolved-symbols [report-all,ignore-all,ignore-in-object-files,ignore-in-shared-libs] + How to handle unresolved symbols + --version-script FILE Read version script + --warn-common Warn about common symbols + --no-warn-common + --warn-once Only warn once for each undefined symbol + --warn-shared-textrel Warn if the output .so needs text relocations + --warn-textrel Warn if the output file needs text relocations + --warn-unresolved-symbols Report unresolved symbols as warnings + --error-unresolved-symbols + Report unresolved symbols as errors (default) + --whole-archive Include all objects from static archives + --no-whole-archive + --wrap SYMBOL Use wrapper function for a given symbol + -z defs Report undefined symbols (even with --shared) + -z nodefs + -z common-page-size=VALUE Ignored + -z execstack Require executable stack + -z noexecstack + -z execstack-if-needed Make the stack area execuable if an input file explicitly requests it + -z initfirst Mark DSO to be initialized first at runtime + -z interpose Mark object to interpose all DSOs but executable + -z keep-text-section-prefix Keep .text.{hot,unknown,unlikely,startup,exit} as separate sections in the final binary + -z nokeep-text-section-prefix + -z lazy Enable lazy function resolution (default) + -z max-page-size=VALUE Use VALUE as the memory page size + -z nocopyreloc Do not create copy relocations + -z nodefaultlib Make the dynamic loader to ignore default search paths + -z nodelete Mark DSO non-deletable at runtime + -z nodlopen Mark DSO not available to dlopen + -z nodump Mark DSO not available to dldump + -z now Disable lazy function resolution + -z origin Mark object requiring immediate $ORIGIN processing at runtime + -z pack-relative-relocs Alias for --pack-dyn-relocs=relr + -z nopack-relative-relocs + -z separate-loadable-segments + Separate all loadable segments to different pages + -z separate-code Separate code and data into different pages + -z noseparate-code Allow overlap in pages + -z relro Make some sections read-only after relocation (default) + -z norelro + -z text Report error if DT_TEXTREL is set + -z notext + -z textoff + +mold: supported targets: elf32-i386 elf64-x86-64 elf32-littlearm elf64-littleaarch64 elf32-littleriscv elf32-bigriscv elf64-littleriscv elf64-bigriscv elf32-powerpc elf64-powerpc elf64-powerpc elf64-powerpcle elf64-s390 elf64-sparc elf32-m68k elf32-sh-linux elf64-alpha +mold: supported emulations: elf_i386 elf_x86_64 armelf_linux_eabi aarch64linux aarch64elf elf32lriscv elf32briscv elf64lriscv elf64briscv elf32ppc elf32ppclinux elf64ppc elf64lppc elf64_s390 elf64_sparc m68kelf shlelf_linux elf64alpha)"; + +static std::vector add_dashes(std::string name) { + // Single-letter option + if (name.size() == 1) + return {"-" + name}; + + // Multi-letter linker options can be preceded by either a single + // dash or double dashes except ones starting with "o", which must + // be preceded by double dashes. For example, "-omagic" is + // interpreted as "-o magic". If you really want to specify the + // "omagic" option, you have to pass "--omagic". + if (name[0] == 'o') + return {"--" + name}; + return {"-" + name, "--" + name}; +} + +template +static i64 parse_hex(Context &ctx, std::string opt, std::string_view value) { + auto flags = std::regex_constants::optimize | std::regex_constants::ECMAScript; + static std::regex re(R"((?:0x|0X)?([0-9a-fA-F]+))", flags); + + std::cmatch m; + if (!std::regex_match(value.data(), value.data() + value.size(), m, re)) + Fatal(ctx) << "option -" << opt << ": not a hexadecimal number"; + return std::stoul(m[1], nullptr, 16); +} + +template +static i64 parse_number(Context &ctx, std::string opt, + std::string_view value) { + size_t nread; + + if (value.starts_with('-')) { + i64 ret = std::stoul(std::string(value.substr(1)), &nread, 0); + if (value.size() - 1 != nread) + Fatal(ctx) << "option -" << opt << ": not a number: " << value; + return -ret; + } + + i64 ret = std::stoul(std::string(value), &nread, 0); + if (value.size() != nread) + Fatal(ctx) << "option -" << opt << ": not a number: " << value; + return ret; +} + +template +static std::vector parse_hex_build_id(Context &ctx, std::string_view arg) { + auto flags = std::regex_constants::optimize | std::regex_constants::ECMAScript; + static std::regex re(R"(0[xX]([0-9a-fA-F][0-9a-fA-F])+)", flags); + + if (!std::regex_match(arg.begin(), arg.end(), re)) + Fatal(ctx) << "invalid build-id: " << arg; + + arg = arg.substr(2); + + auto fn = [](char c) { + if ('0' <= c && c <= '9') + return c - '0'; + if ('a' <= c && c <= 'f') + return c - 'a' + 10; + assert('A' <= c && c <= 'F'); + return c - 'A' + 10; + }; + + std::vector vec; + for (i64 i = 0; i < arg.size(); i += 2) + vec.push_back((fn(arg[i]) << 4) | fn(arg[i + 1])); + return vec; +} + +static std::vector +split_by_comma_or_colon(std::string_view str) { + std::vector vec; + + for (;;) { + i64 pos = str.find_first_of(",:"); + if (pos == str.npos) { + vec.push_back(str); + break; + } + vec.push_back(str.substr(0, pos)); + str = str.substr(pos); + } + return vec; +} + +template +static void read_retain_symbols_file(Context &ctx, std::string_view path) { + MappedFile> *mf = + MappedFile>::must_open(ctx, std::string(path)); + std::string_view data((char *)mf->data, mf->size); + + ctx.arg.retain_symbols_file.reset(new std::unordered_set); + + while (!data.empty()) { + size_t pos = data.find('\n'); + std::string_view name; + + if (pos == data.npos) { + name = data; + data = ""; + } else { + name = data.substr(0, pos); + data = data.substr(pos + 1); + } + + name = string_trim(name); + if (!name.empty()) + ctx.arg.retain_symbols_file->insert(name); + } +} + +static bool is_file(std::string_view path) { + struct stat st; + return stat(std::string(path).c_str(), &st) == 0 && + (st.st_mode & S_IFMT) != S_IFDIR; +} + +template +static std::vector +parse_section_order(Context &ctx, std::string_view arg) { + auto flags = std::regex_constants::ECMAScript | std::regex_constants::icase | + std::regex_constants::optimize; + static std::regex re1(R"(^\s*(TEXT|DATA|RODATA|BSS)(?:\s|$))", flags); + static std::regex re2(R"(^\s*([a-zA-Z0-9_.][^\s]*|EHDR|PHDR)(?:\s|$))", flags); + static std::regex re3(R"(^\s*=(0x[0-9a-f]+|\d+)(?:\s|$))", flags); + static std::regex re4(R"(^\s*%(0x[0-9a-f]+|\d*)(?:\s|$))", flags); + static std::regex re5(R"(^\s*!(\S+)(?:\s|$))", flags); + + std::vector vec; + arg = string_trim(arg); + + while (!arg.empty()) { + SectionOrder ord; + std::cmatch m; + + if (std::regex_search(arg.data(), arg.data() + arg.size(), m, re1)) { + ord.type = SectionOrder::GROUP; + ord.name = m[1].str(); + } else if (std::regex_search(arg.data(), arg.data() + arg.size(), m, re2)) { + ord.type = SectionOrder::SECTION; + ord.name = m[1].str(); + } else if (std::regex_search(arg.data(), arg.data() + arg.size(), m, re3)) { + ord.type = SectionOrder::ADDR; + std::string s = m[1]; + ord.value = std::stoull(s, nullptr, s.starts_with("0x") ? 16 : 10); + } else if (std::regex_search(arg.data(), arg.data() + arg.size(), m, re4)) { + ord.type = SectionOrder::ALIGN; + std::string s = m[1]; + ord.value = std::stoull(s, nullptr, s.starts_with("0x") ? 16 : 10); + } else if (std::regex_search(arg.data(), arg.data() + arg.size(), m, re5)) { + ord.type = SectionOrder::SYMBOL; + ord.name = m[1].str(); + } else { + Fatal(ctx) << "--section-order: parse error: " << arg; + } + + vec.push_back(ord); + arg = arg.substr(m[0].length()); + } + + bool is_first = true; + for (SectionOrder &ord : vec) { + if (ord.type == SectionOrder::SECTION) { + if (is_first) { + is_first = false; + } else if (ord.name == "EHDR") + Fatal(ctx) << "--section-order: EHDR must be the first " + << "section specifier: " << arg; + } + } + + return vec; +} + +template +static std::variant *, u64> +parse_defsym_value(Context &ctx, std::string_view s) { + if (s.starts_with("0x") || s.starts_with("0X")) { + size_t nread; + u64 addr = std::stoull(std::string(s), &nread, 16); + if (s.size() != nread) + return {}; + return addr; + } + + if (s.find_first_not_of("0123456789") == s.npos) + return (u64)std::stoull(std::string(s), nullptr, 10); + return get_symbol(ctx, s); +} + +template +std::vector parse_nonpositional_args(Context &ctx) { + std::span args = ctx.cmdline_args; + args = args.subspan(1); + + std::vector remaining; + std::string_view arg; + + ctx.page_size = E::page_size; + ctx.arg.color_diagnostics = isatty(STDERR_FILENO); + + bool version_shown = false; + bool warn_shared_textrel = false; + std::optional z_separate_code; + std::optional z_relro; + std::unordered_set rpaths; + + auto add_rpath = [&](std::string_view arg) { + if (rpaths.insert(arg).second) { + if (!ctx.arg.rpaths.empty()) + ctx.arg.rpaths += ':'; + ctx.arg.rpaths += arg; + } + }; + + // RISC-V object files contains lots of local symbols, so by default + // we discard them. This is compatible with GNU ld. + if constexpr (is_riscv) + ctx.arg.discard_locals = true; + + // It looks like the SPARC's dynamic linker takes both RELA's r_addend + // and the value at the relocated place. So we don't want to write + // values to relocated places. + if (is_sparc) + ctx.arg.apply_dynamic_relocs = false; + + auto read_arg = [&](std::string name) { + for (std::string opt : add_dashes(name)) { + if (args[0] == opt) { + if (args.size() == 1) + Fatal(ctx) << "option -" << name << ": argument missing"; + arg = args[1]; + args = args.subspan(2); + return true; + } + + std::string prefix = (name.size() == 1) ? opt : opt + "="; + if (args[0].starts_with(prefix)) { + arg = args[0].substr(prefix.size()); + args = args.subspan(1); + return true; + } + } + return false; + }; + + auto read_eq = [&](std::string name) { + for (std::string opt : add_dashes(name)) { + if (args[0].starts_with(opt + "=")) { + arg = args[0].substr(opt.size() + 1); + args = args.subspan(1); + return true; + } + } + return false; + }; + + auto read_flag = [&](std::string name) { + for (std::string opt : add_dashes(name)) { + if (args[0] == opt) { + args = args.subspan(1); + return true; + } + } + return false; + }; + + auto read_z_flag = [&](std::string name) { + if (args.size() >= 2 && args[0] == "-z" && args[1] == name) { + args = args.subspan(2); + return true; + } + + if (!args.empty() && args[0] == "-z" + name) { + args = args.subspan(1); + return true; + } + return false; + }; + + auto read_z_arg = [&](std::string name) { + if (args.size() >= 2 && args[0] == "-z" && args[1].starts_with(name + "=")) { + arg = args[1].substr(name.size() + 1); + args = args.subspan(2); + return true; + } + + if (!args.empty() && args[0].starts_with("-z" + name + "=")) { + arg = args[0].substr(name.size() + 3); + args = args.subspan(1); + return true; + } + return false; + }; + + while (!args.empty()) { + if (read_flag("help")) { + SyncOut(ctx) << "Usage: " << ctx.cmdline_args[0] + << " [options] file...\n" << helpmsg; + exit(0); + } + + if (read_arg("o") || read_arg("output")) { + ctx.arg.output = arg; + } else if (read_arg("dynamic-linker") || read_arg("I")) { + ctx.arg.dynamic_linker = arg; + } else if (read_flag("no-dynamic-linker")) { + ctx.arg.dynamic_linker = ""; + } else if (read_flag("v")) { + SyncOut(ctx) << mold_version; + version_shown = true; + } else if (read_flag("version")) { + SyncOut(ctx) << mold_version; + exit(0); + } else if (read_flag("V")) { + SyncOut(ctx) << mold_version + << "\n Supported emulations:\n elf_x86_64\n elf_i386\n" + << " aarch64linux\n armelf_linux_eabi\n elf64lriscv\n" + << " elf64briscv\n elf32lriscv\n elf32briscv\n" + << " elf32ppc\n elf64ppc\n elf64lppc\n elf64_s390\n" + << " elf64_sparc\n m68kelf\n shlelf_linux\n elf64alpha"; + version_shown = true; + } else if (read_arg("m")) { + if (arg == "elf_x86_64") { + ctx.arg.emulation = X86_64::target_name; + } else if (arg == "elf_i386") { + ctx.arg.emulation = I386::target_name; + } else if (arg == "aarch64linux") { + ctx.arg.emulation = ARM64::target_name; + } else if (arg == "armelf_linux_eabi") { + ctx.arg.emulation = ARM32::target_name; + } else if (arg == "elf64lriscv") { + ctx.arg.emulation = RV64LE::target_name; + } else if (arg == "elf64briscv") { + ctx.arg.emulation = RV64BE::target_name; + } else if (arg == "elf32lriscv") { + ctx.arg.emulation = RV32LE::target_name; + } else if (arg == "elf32briscv") { + ctx.arg.emulation = RV32BE::target_name; + } else if (arg == "elf32ppc" || arg == "elf32ppclinux") { + ctx.arg.emulation = PPC32::target_name; + } else if (arg == "elf64ppc") { + ctx.arg.emulation = PPC64V1::target_name; + } else if (arg == "elf64lppc") { + ctx.arg.emulation = PPC64V2::target_name; + } else if (arg == "elf64_s390") { + ctx.arg.emulation = S390X::target_name; + } else if (arg == "elf64_sparc") { + ctx.arg.emulation = SPARC64::target_name; + } else if (arg == "m68kelf") { + ctx.arg.emulation = M68K::target_name; + } else if (arg == "shlelf_linux") { + ctx.arg.emulation = SH4::target_name; + } else if (arg == "elf64alpha") { + ctx.arg.emulation = ALPHA::target_name; + } else { + Fatal(ctx) << "unknown -m argument: " << arg; + } + } else if (read_flag("end-lib")) { + remaining.push_back("--end-lib"); + } else if (read_flag("export-dynamic") || read_flag("E")) { + ctx.arg.export_dynamic = true; + } else if (read_flag("no-export-dynamic")) { + ctx.arg.export_dynamic = false; + } else if (read_flag("Bsymbolic")) { + ctx.arg.Bsymbolic = true; + } else if (read_flag("Bsymbolic-functions")) { + ctx.arg.Bsymbolic_functions = true; + } else if (read_flag("Bno-symbolic")) { + ctx.arg.Bsymbolic = false; + ctx.arg.Bsymbolic_functions = false; + } else if (read_arg("exclude-libs")) { + append(ctx.arg.exclude_libs, split_by_comma_or_colon(arg)); + } else if (read_flag("q") || read_flag("emit-relocs")) { + ctx.arg.emit_relocs = true; + ctx.arg.discard_locals = false; + } else if (read_arg("e") || read_arg("entry")) { + ctx.arg.entry = arg; + } else if (read_arg("Map")) { + ctx.arg.Map = arg; + ctx.arg.print_map = true; + } else if (read_flag("print-dependencies")) { + ctx.arg.print_dependencies = true; + } else if (read_flag("print-map") || read_flag("M")) { + ctx.arg.print_map = true; + } else if (read_flag("Bstatic") || read_flag("dn") || read_flag("static")) { + ctx.arg.is_static = true; + remaining.push_back("--Bstatic"); + } else if (read_flag("Bdynamic") || read_flag("dy")) { + ctx.arg.is_static = false; + remaining.push_back("--Bdynamic"); + } else if (read_flag("shared") || read_flag("Bshareable")) { + ctx.arg.shared = true; + } else if (read_arg("spare-dynamic-tags")) { + ctx.arg.spare_dynamic_tags = parse_number(ctx, "spare-dynamic-tags", arg); + } else if (read_flag("start-lib")) { + remaining.push_back("--start-lib"); + } else if (read_flag("start-stop")) { + ctx.arg.start_stop = true; + } else if (read_arg("dependency-file")) { + ctx.arg.dependency_file = arg; + } else if (read_arg("defsym")) { + size_t pos = arg.find('='); + if (pos == arg.npos || pos == arg.size() - 1) + Fatal(ctx) << "-defsym: syntax error: " << arg; + ctx.arg.defsyms.emplace_back(get_symbol(ctx, arg.substr(0, pos)), + parse_defsym_value(ctx, arg.substr(pos + 1))); + } else if (read_flag(":lto-pass2")) { + ctx.arg.lto_pass2 = true; + } else if (read_arg(":ignore-ir-file")) { + ctx.arg.ignore_ir_file.insert(arg); + } else if (read_flag("demangle")) { + ctx.arg.demangle = true; + } else if (read_flag("no-demangle")) { + ctx.arg.demangle = false; + } else if (read_flag("default-symver")) { + ctx.arg.default_symver = true; + } else if (read_flag("noinhibit-exec")) { + ctx.arg.noinhibit_exec = true; + } else if (read_flag("shuffle-sections")) { + ctx.arg.shuffle_sections = SHUFFLE_SECTIONS_SHUFFLE; + } else if (read_eq("shuffle-sections")) { + ctx.arg.shuffle_sections = SHUFFLE_SECTIONS_SHUFFLE; + ctx.arg.shuffle_sections_seed = parse_number(ctx, "shuffle-sections", arg); + } else if (read_flag("reverse-sections")) { + ctx.arg.shuffle_sections = SHUFFLE_SECTIONS_REVERSE; + } else if (read_flag("rosegment")) { + ctx.arg.rosegment = true; + } else if (read_flag("no-rosegment")) { + ctx.arg.rosegment = false; + } else if (read_arg("y") || read_arg("trace-symbol")) { + ctx.arg.trace_symbol.push_back(arg); + } else if (read_arg("filler")) { + ctx.arg.filler = parse_hex(ctx, "filler", arg); + } else if (read_arg("L") || read_arg("library-path")) { + ctx.arg.library_paths.push_back(std::string(arg)); + } else if (read_arg("sysroot")) { + ctx.arg.sysroot = arg; + } else if (read_arg("unique")) { + std::optional pat = Glob::compile(arg); + if (!pat) + Fatal(ctx) << "-unique: invalid glob pattern: " << arg; + ctx.arg.unique = std::move(*pat); + } else if (read_arg("unresolved-symbols")) { + if (arg == "report-all" || arg == "ignore-in-shared-libs") + ctx.arg.unresolved_symbols = UNRESOLVED_ERROR; + else if (arg == "ignore-all" || arg == "ignore-in-object-files") + ctx.arg.unresolved_symbols = UNRESOLVED_IGNORE; + else + Fatal(ctx) << "unknown --unresolved-symbols argument: " << arg; + } else if (read_arg("u") || read_arg("undefined")) { + ctx.arg.undefined.push_back(arg); + } else if (read_arg("require-defined")) { + ctx.arg.require_defined.push_back(arg); + } else if (read_arg("init")) { + ctx.arg.init = arg; + } else if (read_arg("fini")) { + ctx.arg.fini = arg; + } else if (read_arg("hash-style")) { + if (arg == "sysv") { + ctx.arg.hash_style_sysv = true; + ctx.arg.hash_style_gnu = false; + } else if (arg == "gnu") { + ctx.arg.hash_style_sysv = false; + ctx.arg.hash_style_gnu = true; + } else if (arg == "both") { + ctx.arg.hash_style_sysv = true; + ctx.arg.hash_style_gnu = true; + } else if (arg == "none") { + ctx.arg.hash_style_sysv = false; + ctx.arg.hash_style_gnu = false; + } else { + Fatal(ctx) << "invalid --hash-style argument: " << arg; + } + } else if (read_arg("soname") || + read_arg("h")) { + ctx.arg.soname = arg; + } else if (read_flag("allow-multiple-definition")) { + ctx.arg.allow_multiple_definition = true; + } else if (read_flag("apply-dynamic-relocs")) { + ctx.arg.apply_dynamic_relocs = true; + } else if (read_flag("no-apply-dynamic-relocs")) { + ctx.arg.apply_dynamic_relocs = false; + } else if (read_flag("trace")) { + ctx.arg.trace = true; + } else if (read_flag("eh-frame-hdr")) { + ctx.arg.eh_frame_hdr = true; + } else if (read_flag("no-eh-frame-hdr")) { + ctx.arg.eh_frame_hdr = false; + } else if (read_flag("pie") || read_flag("pic-executable")) { + ctx.arg.pic = true; + ctx.arg.pie = true; + } else if (read_flag("no-pie") || read_flag("no-pic-executable") || + read_flag("nopie")) { + ctx.arg.pic = false; + ctx.arg.pie = false; + } else if (read_flag("relax")) { + ctx.arg.relax = true; + } else if (read_flag("no-relax")) { + ctx.arg.relax = false; + } else if (read_flag("gdb-index")) { + ctx.arg.gdb_index = true; + } else if (read_flag("no-gdb-index")) { + ctx.arg.gdb_index = false; + } else if (read_flag("r") || read_flag("relocatable")) { + ctx.arg.relocatable = true; + ctx.arg.emit_relocs = true; + ctx.arg.discard_locals = false; + } else if (read_flag("relocatable-merge-sections")) { + ctx.arg.relocatable_merge_sections = true; + } else if (read_flag("perf")) { + ctx.arg.perf = true; + } else if (read_flag("pack-dyn-relocs=relr")) { + ctx.arg.pack_dyn_relocs_relr = true; + } else if (read_flag("pack-dyn-relocs=none")) { + ctx.arg.pack_dyn_relocs_relr = false; + } else if (read_arg("package-metadata")) { + ctx.arg.package_metadata = arg; + } else if (read_flag("stats")) { + ctx.arg.stats = true; + Counter::enabled = true; + } else if (read_arg("C") || read_arg("directory")) { + ctx.arg.directory = arg; + } else if (read_arg("chroot")) { + ctx.arg.chroot = arg; + } else if (read_flag("color-diagnostics") || + read_flag("color-diagnostics=auto")) { + ctx.arg.color_diagnostics = isatty(STDERR_FILENO); + } else if (read_flag("color-diagnostics=always")) { + ctx.arg.color_diagnostics = true; + } else if (read_flag("color-diagnostics=never")) { + ctx.arg.color_diagnostics = false; + } else if (read_flag("warn-common")) { + ctx.arg.warn_common = true; + } else if (read_flag("no-warn-common")) { + ctx.arg.warn_common = false; + } else if (read_flag("warn-once")) { + ctx.arg.warn_once = true; + } else if (read_flag("warn-shared-textrel")) { + warn_shared_textrel = true; + } else if (read_flag("warn-textrel")) { + ctx.arg.warn_textrel = true; + } else if (read_flag("enable-new-dtags")) { + ctx.arg.enable_new_dtags = true; + } else if (read_flag("disable-new-dtags")) { + ctx.arg.enable_new_dtags = false; + } else if (read_flag("execute-only")) { + ctx.arg.execute_only = true; + } else if (read_arg("compress-debug-sections")) { + if (arg == "zlib" || arg == "zlib-gabi") + ctx.arg.compress_debug_sections = COMPRESS_ZLIB; + else if (arg == "zstd") + ctx.arg.compress_debug_sections = COMPRESS_ZSTD; + else if (arg == "none") + ctx.arg.compress_debug_sections = COMPRESS_NONE; + else + Fatal(ctx) << "invalid --compress-debug-sections argument: " << arg; + } else if (read_arg("wrap")) { + ctx.arg.wrap.insert(arg); + } else if (read_flag("omagic") || read_flag("N")) { + ctx.arg.omagic = true; + ctx.arg.is_static = true; + } else if (read_flag("no-omagic")) { + ctx.arg.omagic = false; + } else if (read_arg("oformat")) { + if (arg != "binary") + Fatal(ctx) << "-oformat: " << arg << " is not supported"; + ctx.arg.oformat_binary = true; + } else if (read_arg("retain-symbols-file")) { + read_retain_symbols_file(ctx, arg); + } else if (read_arg("section-align")) { + size_t pos = arg.find('='); + if (pos == arg.npos || pos == arg.size() - 1) + Fatal(ctx) << "--section-align: syntax error: " << arg; + i64 value = parse_number(ctx, "section-align", arg.substr(pos + 1)); + if (!has_single_bit(value)) + Fatal(ctx) << "--section-align=" << arg << ": value must be a power of 2"; + ctx.arg.section_align[arg.substr(0, pos)] = value; + } else if (read_arg("section-start")) { + size_t pos = arg.find('='); + if (pos == arg.npos || pos == arg.size() - 1) + Fatal(ctx) << "--section-start: syntax error: " << arg; + ctx.arg.section_start[arg.substr(0, pos)] = + parse_hex(ctx, "section-start", arg.substr(pos + 1)); + } else if (read_arg("section-order")) { + ctx.arg.section_order = parse_section_order(ctx, arg); + } else if (read_arg("Tbss")) { + ctx.arg.section_start[".bss"] = parse_hex(ctx, "Tbss", arg); + } else if (read_arg("Tdata")) { + ctx.arg.section_start[".data"] = parse_hex(ctx, "Tdata", arg); + } else if (read_arg("Ttext")) { + ctx.arg.section_start[".text"] = parse_hex(ctx, "Ttext", arg); + } else if (read_flag("repro")) { + ctx.arg.repro = true; + } else if (read_z_flag("now")) { + ctx.arg.z_now = true; + } else if (read_z_flag("lazy")) { + ctx.arg.z_now = false; + } else if (read_z_flag("cet-report=none")) { + ctx.arg.z_cet_report = CET_REPORT_NONE; + } else if (read_z_flag("cet-report=warning")) { + ctx.arg.z_cet_report = CET_REPORT_WARNING; + } else if (read_z_flag("cet-report=error")) { + ctx.arg.z_cet_report = CET_REPORT_ERROR; + } else if (read_z_flag("execstack")) { + ctx.arg.z_execstack = true; + } else if (read_z_flag("execstack-if-needed")) { + ctx.arg.z_execstack_if_needed = true; + } else if (read_z_arg("max-page-size")) { + ctx.page_size = parse_number(ctx, "-z max-page-size", arg); + if (!has_single_bit(ctx.page_size)) + Fatal(ctx) << "-z max-page-size " << arg << ": value must be a power of 2"; + } else if (read_z_arg("start-stop-visibility")) { + if (arg != "hidden") + Fatal(ctx) << "-z start-stop-visibility: unsupported visibility: " << arg; + } else if (read_z_flag("noexecstack")) { + ctx.arg.z_execstack = false; + } else if (read_z_flag("relro")) { + z_relro = true; + } else if (read_z_flag("norelro")) { + z_relro = false; + } else if (read_z_flag("defs")) { + ctx.arg.z_defs = true; + } else if (read_z_flag("undefs")) { + ctx.arg.z_defs = false; + } else if (read_z_flag("nodlopen")) { + ctx.arg.z_dlopen = false; + } else if (read_z_flag("nodelete")) { + ctx.arg.z_delete = false; + } else if (read_z_flag("nocopyreloc")) { + ctx.arg.z_copyreloc = false; + } else if (read_z_flag("nodump")) { + ctx.arg.z_dump = false; + } else if (read_z_flag("initfirst")) { + ctx.arg.z_initfirst = true; + } else if (read_z_flag("interpose")) { + ctx.arg.z_interpose = true; + } else if (read_z_flag("ibt")) { + ctx.arg.z_ibt = true; + } else if (read_z_flag("ibtplt")) { + } else if (read_z_flag("muldefs")) { + ctx.arg.allow_multiple_definition = true; + } else if (read_z_flag("keep-text-section-prefix")) { + ctx.arg.z_keep_text_section_prefix = true; + } else if (read_z_flag("nokeep-text-section-prefix")) { + ctx.arg.z_keep_text_section_prefix = false; + } else if (read_z_flag("shstk")) { + ctx.arg.z_shstk = true; + } else if (read_z_flag("text")) { + ctx.arg.z_text = true; + } else if (read_z_flag("notext") || read_z_flag("textoff")) { + ctx.arg.z_text = false; + } else if (read_z_flag("origin")) { + ctx.arg.z_origin = true; + } else if (read_z_flag("nodefaultlib")) { + ctx.arg.z_nodefaultlib = true; + } else if (read_z_flag("pack-relative-relocs")) { + ctx.arg.pack_dyn_relocs_relr = true; + } else if (read_z_flag("nopack-relative-relocs")) { + ctx.arg.pack_dyn_relocs_relr = false; + } else if (read_z_flag("separate-loadable-segments")) { + z_separate_code = SEPARATE_LOADABLE_SEGMENTS; + } else if (read_z_flag("separate-code")) { + z_separate_code = SEPARATE_CODE; + } else if (read_z_flag("noseparate-code")) { + z_separate_code = NOSEPARATE_CODE; + } else if (read_z_flag("dynamic-undefined-weak")) { + ctx.arg.z_dynamic_undefined_weak = true; + } else if (read_z_flag("nodynamic-undefined-weak")) { + ctx.arg.z_dynamic_undefined_weak = false; + } else if (read_flag("no-undefined")) { + ctx.arg.z_defs = true; + } else if (read_flag("fatal-warnings")) { + ctx.arg.fatal_warnings = true; + } else if (read_flag("no-fatal-warnings")) { + ctx.arg.fatal_warnings = false; + } else if (read_flag("fork")) { + ctx.arg.fork = true; + } else if (read_flag("no-fork")) { + ctx.arg.fork = false; + } else if (read_flag("gc-sections")) { + ctx.arg.gc_sections = true; + } else if (read_flag("no-gc-sections")) { + ctx.arg.gc_sections = false; + } else if (read_flag("print-gc-sections")) { + ctx.arg.print_gc_sections = true; + } else if (read_flag("no-print-gc-sections")) { + ctx.arg.print_gc_sections = false; + } else if (read_arg("icf")) { + if (arg == "all") { + ctx.arg.icf = true; + ctx.arg.icf_all = true; + } else if (arg == "safe") { + ctx.arg.icf = true; + } else if (arg == "none") { + ctx.arg.icf = false; + } else { + Fatal(ctx) << "unknown --icf argument: " << arg; + } + } else if (read_flag("no-icf")) { + ctx.arg.icf = false; + } else if (read_flag("ignore-data-address-equality")) { + ctx.arg.ignore_data_address_equality = true; + } else if (read_arg("image-base")) { + ctx.arg.image_base = parse_number(ctx, "image-base", arg); + } else if (read_arg("physical-image-base")) { + ctx.arg.physical_image_base = parse_number(ctx, "physical-image-base", arg); + } else if (read_flag("print-icf-sections")) { + ctx.arg.print_icf_sections = true; + } else if (read_flag("no-print-icf-sections")) { + ctx.arg.print_icf_sections = false; + } else if (read_flag("quick-exit")) { + ctx.arg.quick_exit = true; + } else if (read_flag("no-quick-exit")) { + ctx.arg.quick_exit = false; + } else if (read_arg("plugin")) { + ctx.arg.plugin = arg; + } else if (read_arg("plugin-opt")) { + ctx.arg.plugin_opt.push_back(std::string(arg)); + } else if (read_flag("lto-cs-profile-generate")) { + ctx.arg.plugin_opt.push_back("cs-profile-generate"); + } else if (read_arg("lto-cs-profile-file")) { + ctx.arg.plugin_opt.push_back("cs-profile-path=" + std::string(arg)); + } else if (read_flag("lto-debug-pass-manager")) { + ctx.arg.plugin_opt.push_back("debug-pass-manager"); + } else if (read_flag("disable-verify")) { + ctx.arg.plugin_opt.push_back("disable-verify"); + } else if (read_flag("lto-emit-asm")) { + ctx.arg.plugin_opt.push_back("emit-asm"); + } else if (read_arg("thinlto-jobs")) { + ctx.arg.plugin_opt.push_back("jobs=" + std::string(arg)); + } else if (read_flag("no-legacy-pass-manager")) { + ctx.arg.plugin_opt.push_back("legacy-pass-manager"); + } else if (read_arg("lto-partitions")) { + ctx.arg.plugin_opt.push_back("lto-partitions=" + std::string(arg)); + } else if (read_flag("no-lto-legacy-pass-manager")) { + ctx.arg.plugin_opt.push_back("new-pass-manager"); + } else if (read_arg("lto-obj-path")) { + ctx.arg.plugin_opt.push_back("obj-path=" + std::string(arg)); + } else if (read_arg("opt-remarks-filename")) { + ctx.arg.plugin_opt.push_back("opt-remarks-filename=" + std::string(arg)); + } else if (read_arg("opt-remarks-format")) { + ctx.arg.plugin_opt.push_back("opt-remarks-format=" + std::string(arg)); + } else if (read_arg("opt-remarks-hotness-threshold")) { + ctx.arg.plugin_opt.push_back("opt-remarks-hotness-threshold=" + + std::string(arg)); + } else if (read_arg("opt-remarks-passes")) { + ctx.arg.plugin_opt.push_back("opt-remarks-passes=" + std::string(arg)); + } else if (read_flag("opt-remarks-with_hotness")) { + ctx.arg.plugin_opt.push_back("opt-remarks-with-hotness"); + } else if (args[0].starts_with("-lto-O")) { + ctx.arg.plugin_opt.push_back("O" + std::string(args[0].substr(6))); + args = args.subspan(1); + } else if (args[0].starts_with("--lto-O")) { + ctx.arg.plugin_opt.push_back("O" + std::string(args[0].substr(7))); + args = args.subspan(1); + } else if (read_arg("lto-pseudo-probe-for-profiling")) { + ctx.arg.plugin_opt.push_back("pseudo-probe-for-profiling=" + + std::string(arg)); + } else if (read_arg("lto-sample-profile")) { + ctx.arg.plugin_opt.push_back("sample-profile=" + std::string(arg)); + } else if (read_flag("save-temps")) { + ctx.arg.plugin_opt.push_back("save-temps"); + } else if (read_flag("thinlto-emit-imports-files")) { + ctx.arg.plugin_opt.push_back("thinlto-emit-imports-files"); + } else if (read_arg("thinlto-index-only")) { + ctx.arg.plugin_opt.push_back("thinlto-index-only=" + std::string(arg)); + } else if (read_flag("thinlto-index-only")) { + ctx.arg.plugin_opt.push_back("thinlto-index-only"); + } else if (read_arg("thinlto-object-suffix-replace")) { + ctx.arg.plugin_opt.push_back("thinlto-object-suffix-replace=" + + std::string(arg)); + } else if (read_arg("thinlto-prefix-replace")) { + ctx.arg.plugin_opt.push_back("thinlto-prefix-replace=" + std::string(arg)); + } else if (read_arg("thinlto-cache-dir")) { + ctx.arg.plugin_opt.push_back("cache-dir=" + std::string(arg)); + } else if (read_arg("thinlto-cache-policy")) { + ctx.arg.plugin_opt.push_back("cache-policy=" + std::string(arg)); + } else if (read_arg("thinlto-jobs")) { + ctx.arg.plugin_opt.push_back("jobs=" + std::string(arg)); + } else if (read_arg("thread-count")) { + ctx.arg.thread_count = parse_number(ctx, "thread-count", arg); + } else if (read_flag("threads")) { + ctx.arg.thread_count = 0; + } else if (read_flag("no-threads")) { + ctx.arg.thread_count = 1; + } else if (read_eq("threads")) { + ctx.arg.thread_count = parse_number(ctx, "threads", arg); + } else if (read_flag("discard-all") || read_flag("x")) { + ctx.arg.discard_all = true; + } else if (read_flag("discard-locals") || read_flag("X")) { + ctx.arg.discard_locals = true; + } else if (read_flag("strip-all") || read_flag("s")) { + ctx.arg.strip_all = true; + } else if (read_flag("strip-debug") || read_flag("S")) { + ctx.arg.strip_all = true; + } else if (read_flag("warn-unresolved-symbols")) { + ctx.arg.unresolved_symbols = UNRESOLVED_WARN; + } else if (read_flag("error-unresolved-symbols")) { + ctx.arg.unresolved_symbols = UNRESOLVED_ERROR; + } else if (read_arg("rpath")) { + add_rpath(arg); + } else if (read_arg("R")) { + if (is_file(arg)) + Fatal(ctx) << "-R" << arg + << ": -R as an alias for --just-symbols is not supported"; + add_rpath(arg); + } else if (read_flag("undefined-version")) { + ctx.arg.undefined_version = true; + } else if (read_flag("no-undefined-version")) { + ctx.arg.undefined_version = false; + } else if (read_flag("build-id")) { + ctx.arg.build_id.kind = BuildId::HASH; + ctx.arg.build_id.hash_size = 20; + } else if (read_arg("build-id")) { + if (arg == "none") { + ctx.arg.build_id.kind = BuildId::NONE; + } else if (arg == "uuid") { + ctx.arg.build_id.kind = BuildId::UUID; + } else if (arg == "md5") { + ctx.arg.build_id.kind = BuildId::HASH; + ctx.arg.build_id.hash_size = 16; + } else if (arg == "sha1") { + ctx.arg.build_id.kind = BuildId::HASH; + ctx.arg.build_id.hash_size = 20; + } else if (arg == "sha256") { + ctx.arg.build_id.kind = BuildId::HASH; + ctx.arg.build_id.hash_size = 32; + } else if (arg.starts_with("0x") || arg.starts_with("0X")) { + ctx.arg.build_id.kind = BuildId::HEX; + ctx.arg.build_id.value = parse_hex_build_id(ctx, arg); + } else { + Fatal(ctx) << "invalid --build-id argument: " << arg; + } + } else if (read_flag("no-build-id")) { + ctx.arg.build_id.kind = BuildId::NONE; + } else if (read_arg("format") || read_arg("b")) { + if (arg == "binary") + Fatal(ctx) + << "mold does not support `-b binary`. If you want to convert a" + << " binary file into an object file, use `objcopy -I binary -O" + << " default ` instead."; + Fatal(ctx) << "unknown command line option: -b " << arg; + } else if (read_arg("auxiliary") || read_arg("f")) { + ctx.arg.auxiliary.push_back(arg); + } else if (read_arg("filter") || read_arg("F")) { + ctx.arg.filter.push_back(arg); + } else if (read_arg("O")) { + } else if (read_flag("O0")) { + } else if (read_flag("O1")) { + } else if (read_flag("O2")) { + } else if (read_flag("verbose")) { + } else if (read_flag("color-diagnostics")) { + } else if (read_flag("eh-frame-hdr")) { + } else if (read_flag("start-group")) { + } else if (read_flag("end-group")) { + } else if (read_flag("(")) { + } else if (read_flag(")")) { + } else if (read_flag("fatal-warnings")) { + } else if (read_flag("enable-new-dtags")) { + } else if (read_flag("disable-new-dtags")) { + } else if (read_flag("nostdlib")) { + } else if (read_flag("allow-shlib-undefined")) { + } else if (read_flag("no-allow-shlib-undefined")) { + } else if (read_flag("no-add-needed")) { + } else if (read_flag("no-call-graph-profile-sort")) { + } else if (read_flag("no-copy-dt-needed-entries")) { + } else if (read_arg("sort-section")) { + } else if (read_flag("sort-common")) { + } else if (read_flag("dc")) { + } else if (read_flag("dp")) { + } else if (read_flag("fix-cortex-a53-835769")) { + } else if (read_flag("fix-cortex-a53-843419")) { + } else if (read_flag("EL")) { + } else if (read_flag("warn-once")) { + } else if (read_flag("nodefaultlibs")) { + } else if (read_flag("warn-constructors")) { + } else if (read_flag("warn-execstack")) { + } else if (read_flag("no-warn-execstack")) { + } else if (read_flag("secure-plt")) { + } else if (read_arg("rpath-link")) { + } else if (read_z_flag("combreloc")) { + } else if (read_z_flag("nocombreloc")) { + } else if (read_z_arg("common-page-size")) { + } else if (read_flag("no-keep-memory")) { + } else if (read_arg("max-cache-size")) { + } else if (read_arg("version-script")) { + // --version-script, --dynamic-list and --export-dynamic-symbol[-list] + // are treated as positional arguments even if they are actually not + // positional. This is because linker scripts (a positional argument) + // can also specify a version script, and it's better to consolidate + // parsing in read_input_files. In particular, version scripts can + // modify ctx.default_version which we initialize *after* parsing + // non-positional args, so the parsing cannot be done right here. + remaining.push_back("--version-script=" + std::string(arg)); + } else if (read_arg("dynamic-list")) { + ctx.arg.Bsymbolic = true; + remaining.push_back("--dynamic-list=" + std::string(arg)); + } else if (read_arg("export-dynamic-symbol")) { + remaining.push_back("--export-dynamic-symbol=" + std::string(arg)); + } else if (read_arg("export-dynamic-symbol-list")) { + remaining.push_back("--export-dynamic-symbol-list=" + std::string(arg)); + } else if (read_flag("as-needed")) { + remaining.push_back("--as-needed"); + } else if (read_flag("no-as-needed")) { + remaining.push_back("--no-as-needed"); + } else if (read_flag("whole-archive")) { + remaining.push_back("--whole-archive"); + } else if (read_flag("no-whole-archive")) { + remaining.push_back("--no-whole-archive"); + } else if (read_arg("l")) { + remaining.push_back("-l" + std::string(arg)); + } else if (read_arg("script") || read_arg("T")) { + remaining.push_back(std::string(arg)); + } else if (read_flag("push-state")) { + remaining.push_back("--push-state"); + } else if (read_flag("pop-state")) { + remaining.push_back("--pop-state"); + } else if (args[0].starts_with("-z") && args[0].size() > 2) { + Warn(ctx) << "unknown command line option: " << args[0]; + args = args.subspan(1); + } else if (args[0] == "-z" && args.size() >= 2) { + Warn(ctx) << "unknown command line option: -z " << args[1]; + args = args.subspan(2); + } else if (args[0] == "-dynamic") { + Fatal(ctx) << "unknown command line option: -dynamic;" + << " -dynamic is a macOS linker's option. If you are trying" + << " to build a binary for an Apple platform, you need to use" + << " ld64.mold instead of mold or ld.mold."; + } else { + if (args[0][0] == '-') + Fatal(ctx) << "unknown command line option: " << args[0]; + remaining.push_back(std::string(args[0])); + args = args.subspan(1); + } + } + + if (!ctx.arg.sysroot.empty()) { + for (std::string &path : ctx.arg.library_paths) { + if (std::string_view(path).starts_with('=')) + path = ctx.arg.sysroot + path.substr(1); + else if (std::string_view(path).starts_with("$SYSROOT")) + path = ctx.arg.sysroot + path.substr(8); + } + } + + // Clean library paths by removing redundant `/..` and `/.` + // so that they are easier to read in log messages. + for (std::string &path : ctx.arg.library_paths) + path = path_clean(path); + + if (ctx.arg.shared) + ctx.arg.pic = true; + + if (ctx.arg.pic) + ctx.arg.image_base = 0; + + if (ctx.arg.retain_symbols_file) { + ctx.arg.strip_all = false; + ctx.arg.discard_all = false; + } + + if (ctx.arg.relocatable) + ctx.arg.is_static = true; + + // --section-order implies `-z separate-loadable-segments` + if (z_separate_code) + ctx.arg.z_separate_code = *z_separate_code; + else if (!ctx.arg.section_order.empty()) + ctx.arg.z_separate_code = SEPARATE_LOADABLE_SEGMENTS; + + // --section-order implies `-z norelro` + if (z_relro) + ctx.arg.z_relro = *z_relro; + else if (!ctx.arg.section_order.empty()) + ctx.arg.z_relro = false; + + if (!ctx.arg.shared) { + if (!ctx.arg.filter.empty()) + Fatal(ctx) << "-filter may not be used without -shared"; + if (!ctx.arg.auxiliary.empty()) + Fatal(ctx) << "-auxiliary may not be used without -shared"; + } + + if (!ctx.arg.apply_dynamic_relocs && !E::is_rela) + Fatal(ctx) << "--no-apply-dynamic-relocs may not be used on " + << E::target_name; + + if (is_sparc && ctx.arg.apply_dynamic_relocs) + Fatal(ctx) << "--apply-dynamic-relocs may not be used on SPARC64"; + + if (!ctx.arg.section_start.empty() && !ctx.arg.section_order.empty()) + Fatal(ctx) << "--section-start may not be used with --section-order"; + + if (ctx.arg.image_base % ctx.page_size) + Fatal(ctx) << "-image-base must be a multiple of -max-page-size"; + + if (ctx.arg.thread_count == 0) + ctx.arg.thread_count = get_default_thread_count(); + + if (char *env = getenv("MOLD_REPRO"); env && env[0]) + ctx.arg.repro = true; + + if (ctx.arg.shared || ctx.arg.export_dynamic) + ctx.default_version = VER_NDX_GLOBAL; + else + ctx.default_version = VER_NDX_LOCAL; + + if (ctx.arg.default_symver) { + std::string ver = ctx.arg.soname.empty() ? + filepath(ctx.arg.output).filename().string() : std::string(ctx.arg.soname); + ctx.arg.version_definitions.push_back(ver); + ctx.default_version = VER_NDX_LAST_RESERVED + 1; + } + + if (ctx.arg.shared && warn_shared_textrel) + ctx.arg.warn_textrel = true; + + ctx.arg.undefined.push_back(ctx.arg.entry); + + // --oformat=binary implies --strip-all because without a section + // header, there's no way to identify the locations of a symbol + // table in an output file in the first place. + if (ctx.arg.oformat_binary) + ctx.arg.strip_all = true; + + // By default, mold tries to ovewrite to an output file if exists + // because at least on Linux, writing to an existing file is much + // faster than creating a fresh file and writing to it. + // + // However, if an existing file is in use, writing to it will mess + // up processes that are executing that file. Linux prevents a write + // to a running executable file; it returns ETXTBSY on open(2). + // However, that mechanism doesn't protect .so files. Therefore, we + // want to disable this optimization if we are creating a shared + // object file. + if (ctx.arg.shared) + ctx.overwrite_output_file = false; + + if (version_shown && remaining.empty()) + exit(0); + return remaining; +} + +using E = MOLD_TARGET; + +template std::vector parse_nonpositional_args(Context &ctx); + +} // namespace mold::elf diff --git a/third_party/mold/elf/dwarf.cc b/third_party/mold/elf/dwarf.cc new file mode 100644 index 00000000000..f8fcf2bcf49 --- /dev/null +++ b/third_party/mold/elf/dwarf.cc @@ -0,0 +1,555 @@ +// clang-format off +// This file contains code to read DWARF debug info to create .gdb_index. +// +// .gdb_index is an optional section to speed up GNU debugger. It contains +// two maps: 1) a map from function/variable/type names to compunits, and +// 2) a map from function address ranges to compunits. gdb uses these +// maps to quickly find a compunit given a name or an instruction pointer. +// +// (Terminology: a compilation unit, which often abbreviated as compunit +// or cu, is a unit of debug info. An input .debug_info section usually +// contains one compunit, and thus an output .debug_info contains as +// many compunits as the number of input files.) +// +// .gdb_index is not mandatory. All the information in .gdb_index is +// also in other debug info sections. You can actually create an +// executable without .gdb_index and later add it using `gdb-add-index` +// post-processing tool that comes with gdb. +// +// The mapping from names to compunits is 1:n while the mapping from +// address ranges to compunits is 1:1. That is, two object files may +// define the same type name (with the same definition), while there +// should be no two functions that overlap with each other in memory. +// +// .gdb_index contains an on-disk hash table for names, so gdb can +// lookup names without loading all strings into memory and construct an +// in-memory hash table. +// +// Names are in .debug_gnu_pubnames and .debug_gnu_pubtypes input +// sections. These sections are created if `-ggnu-pubnames` is given. +// Besides names, these sections contains attributes for each name so +// that gdb can distinguish type names from function names, for example. +// +// A compunit contains one or more function address ranges. If an +// object file is compiled without -ffunction-sections, it contains +// only one .text section and therefore contains a single address range. +// Such range is typically stored directly to the compunit. +// +// If an object file is compiled with -fucntion-sections, it contains +// more than one .text section, and it has as many address ranges as +// the number of .text sections. Such discontiguous address ranges are +// stored to .debug_ranges in DWARF 2/3/4/5 and +// .debug_rnglists/.debug_addr in DWARF 5. +// +// .debug_info section contains DWARF debug info. Although we don't need +// to parse the whole .debug_info section to read address ranges, we +// have to do a little bit. DWARF is complicated and often handled using +// a library such as libdwarf. But we don't use any library because we +// don't want to add an extra run-time dependency just for --gdb-index. +// +// This page explains the format of .gdb_index: +// https://sourceware.org/gdb/onlinedocs/gdb/Index-Section-Format.html + +#include "third_party/mold/elf/mold.h" + +namespace mold::elf { + +// The hash function for .gdb_index. +static u32 gdb_hash(std::string_view name) { + u32 h = 0; + for (u8 c : name) { + if ('A' <= c && c <= 'Z') + c = 'a' + c - 'A'; + h = h * 67 + c - 113; + } + return h; +} + +// Split .debug_info into so-called "compilation units". A .debug_info +// section usually contains one compunit unless it was created by `ld -r`. +// This is for --gdb-index. +template +std::vector +read_compunits(Context &ctx, ObjectFile &file) { + file.debug_info->uncompress(ctx); + std::string_view data = file.debug_info->contents; + std::vector vec; + + while (!data.empty()) { + if (data.size() < 4) + Fatal(ctx) << *file.debug_info << ": corrupted .debug_info"; + if (*(U32 *)data.data() == 0xffff'ffff) + Fatal(ctx) << *file.debug_info << ": --gdb-index: DWARF64 not supported"; + i64 len = *(U32 *)data.data() + 4; + vec.push_back(data.substr(0, len)); + data = data.substr(len); + } + return vec; +} + +// Parses .debug_gnu_pubnames and .debug_gnu_pubtypes. These sections +// start with a 14 bytes header followed by (4-byte offset, 1-byte type, +// null-terminated string) tuples. +// +// The 4-byte offset is an offset into .debug_info that contains details +// about the name. The 1-byte type is a type of the corresponding name +// (e.g. function, variable or datatype). The string is a name of a +// function, a variable or a type. +template +std::vector read_pubnames(Context &ctx, ObjectFile &file) { + std::vector vec; + + auto get_cu_idx = [&](InputSection &isec, i64 offset) { + i64 off = 0; + for (i64 i = 0; i < file.compunits.size(); i++) { + if (offset == off) + return file.compunits_idx + i; + off += file.compunits[i].size(); + } + Fatal(ctx) << isec << ": corrupted debug_info_offset"; + }; + + auto read = [&](InputSection &isec) { + isec.uncompress(ctx); + std::string_view contents = isec.contents; + + while (!contents.empty()) { + if (contents.size() < 14) + Fatal(ctx) << isec << ": corrupted header"; + + u32 len = *(U32 *)contents.data() + 4; + u32 debug_info_offset = *(U32 *)(contents.data() + 6); + u32 cu_idx = get_cu_idx(isec, debug_info_offset); + + std::string_view data = contents.substr(14, len - 14); + contents = contents.substr(len); + + while (!data.empty()) { + u32 offset = *(U32 *)data.data(); + data = data.substr(4); + if (offset == 0) + break; + + u8 type = data[0]; + data = data.substr(1); + + std::string_view name = data.data(); + data = data.substr(name.size() + 1); + + vec.push_back({name, gdb_hash(name), (type << 24) | cu_idx}); + } + } + }; + + if (file.debug_pubnames) + read(*file.debug_pubnames); + if (file.debug_pubtypes) + read(*file.debug_pubtypes); + + // Uniquify elements because GCC 11 seems to emit one record for each + // comdat group which results in having a lot of duplicate records. + auto less = [](const GdbIndexName &a, const GdbIndexName &b) { + return std::tuple{a.hash, a.attr, a.name} < + std::tuple{b.hash, b.attr, b.name}; + }; + + auto equal = [](const GdbIndexName &a, const GdbIndexName &b) { + return std::tuple{a.hash, a.attr, a.name} == + std::tuple{b.hash, b.attr, b.name}; + }; + + std::sort(vec.begin(), vec.end(), less); + vec.erase(std::unique(vec.begin(), vec.end(), equal), vec.end()); + return vec; +} + +template +static u8 *get_buffer(Context &ctx, Chunk *chunk) { + if (u8 *buf = chunk->get_uncompressed_data()) + return buf; + return ctx.buf + chunk->shdr.sh_offset; +} + +// Try to find a compilation unit from .debug_info and its +// corresponding record from .debug_abbrev and returns them. +template +static std::tuple +find_compunit(Context &ctx, ObjectFile &file, i64 offset) { + // Read .debug_info to find the record at a given offset. + u8 *cu = get_buffer(ctx, ctx.debug_info) + offset; + u32 dwarf_version = *(U16 *)(cu + 4); + u32 abbrev_offset; + + // Skip a header. + switch (dwarf_version) { + case 2: + case 3: + case 4: + abbrev_offset = *(U32 *)(cu + 6); + if (u32 address_size = cu[10]; address_size != sizeof(Word)) + Fatal(ctx) << file << ": --gdb-index: unsupported address size " + << address_size; + cu += 11; + break; + case 5: { + abbrev_offset = *(U32 *)(cu + 8); + if (u32 address_size = cu[7]; address_size != sizeof(Word)) + Fatal(ctx) << file << ": --gdb-index: unsupported address size " + << address_size; + + switch (u32 unit_type = cu[6]; unit_type) { + case DW_UT_compile: + case DW_UT_partial: + cu += 12; + break; + case DW_UT_skeleton: + case DW_UT_split_compile: + cu += 20; + break; + default: + Fatal(ctx) << file << ": --gdb-index: unknown DW_UT_* value: 0x" + << std::hex << unit_type; + } + break; + } + default: + Fatal(ctx) << file << ": --gdb-index: unknown DWARF version: " + << dwarf_version; + } + + u32 abbrev_code = read_uleb(cu); + + // Find a .debug_abbrev record corresponding to the .debug_info record. + // We assume the .debug_info record at a given offset is of + // DW_TAG_compile_unit which describes a compunit. + u8 *abbrev = get_buffer(ctx, ctx.debug_abbrev) + abbrev_offset; + + for (;;) { + u32 code = read_uleb(abbrev); + if (code == 0) + Fatal(ctx) << file << ": --gdb-index: .debug_abbrev does not contain" + << " a record for the first .debug_info record"; + + if (code == abbrev_code) { + // Found a record + u64 abbrev_tag = read_uleb(abbrev); + if (abbrev_tag != DW_TAG_compile_unit && abbrev_tag != DW_TAG_skeleton_unit) + Fatal(ctx) << file << ": --gdb-index: the first entry's tag is not" + << " DW_TAG_compile_unit/DW_TAG_skeleton_unit but 0x" + << std::hex << abbrev_tag; + break; + } + + // Skip an uninteresting record + read_uleb(abbrev); // tag + abbrev++; // has_children byte + for (;;) { + u64 name = read_uleb(abbrev); + u64 form = read_uleb(abbrev); + if (name == 0 && form == 0) + break; + if (form == DW_FORM_implicit_const) + read_uleb(abbrev); + } + } + + abbrev++; // skip has_children byte + return {cu, abbrev, dwarf_version}; +} + +// Estimate the number of address ranges contained in a given file. +// It may over-estimate but never under-estimate. +template +i64 estimate_address_areas(Context &ctx, ObjectFile &file) { + // Each CU contains zero or one address area. + i64 ret = file.compunits.size(); + + // In DWARF 4, a CU can refer address ranges in .debug_ranges. + // .debug_ranges contains a vector of [begin, end) address pairs. + // The last entry must be a null terminator, so we do -1. + if (file.debug_ranges) + ret += file.debug_ranges->sh_size / sizeof(Word) / 2 - 1; + + // In DWARF 5, a CU can refer address ranges in .debug_rnglists, which + // contains variable-length entries. The smallest possible range entry + // is one byte for the code and two ULEB128 values (each can be as + // small as one byte), so 3 bytes. + if (file.debug_rnglists) + ret += file.debug_rnglists->sh_size / 3; + return ret; +} + +// .debug_info contains variable-length fields. This class reads them. +template +class DebugInfoReader { +public: + DebugInfoReader(Context &ctx, ObjectFile &file, u8 *cu) + : ctx(ctx), file(file), cu(cu) {} + + u64 read(u64 form); + + Context &ctx; + ObjectFile &file; + u8 *cu; +}; + +// Read value of the given DW_FORM_* form. If a value is not scalar, +// returns a dummy value 0. +template +u64 DebugInfoReader::read(u64 form) { + switch (form) { + case DW_FORM_flag_present: + return 0; + case DW_FORM_data1: + case DW_FORM_flag: + case DW_FORM_strx1: + case DW_FORM_addrx1: + case DW_FORM_ref1: + return *cu++; + case DW_FORM_data2: + case DW_FORM_strx2: + case DW_FORM_addrx2: + case DW_FORM_ref2: { + u64 val = *(U16 *)cu; + cu += 2; + return val; + } + case DW_FORM_strx3: + case DW_FORM_addrx3: { + u64 val = *(U24 *)cu; + cu += 3; + return val; + } + case DW_FORM_data4: + case DW_FORM_strp: + case DW_FORM_sec_offset: + case DW_FORM_line_strp: + case DW_FORM_strx4: + case DW_FORM_addrx4: + case DW_FORM_ref4: { + u64 val = *(U32 *)cu; + cu += 4; + return val; + } + case DW_FORM_data8: + case DW_FORM_ref8: { + u64 val = *(U64 *)cu; + cu += 8; + return val; + } + case DW_FORM_addr: + case DW_FORM_ref_addr: { + u64 val = *(Word *)cu; + cu += sizeof(Word); + return val; + } + case DW_FORM_strx: + case DW_FORM_addrx: + case DW_FORM_udata: + case DW_FORM_ref_udata: + case DW_FORM_loclistx: + case DW_FORM_rnglistx: + return read_uleb(cu); + case DW_FORM_string: + cu += strlen((char *)cu) + 1; + return 0; + default: + Fatal(ctx) << file << ": --gdb-index: unhandled debug info form: 0x" + << std::hex << form; + return 0; + } +} + +// Read a range list from .debug_ranges starting at the given offset. +template +static std::vector +read_debug_range(Context &ctx, ObjectFile &file, Word *range) { + std::vector vec; + u64 base = 0; + + for (i64 i = 0; range[i] || range[i + 1]; i += 2) { + if (range[i] + 1 == 0) { + // base address selection entry + base = range[i + 1]; + } else { + vec.push_back(range[i] + base); + vec.push_back(range[i + 1] + base); + } + } + return vec; +} + +// Read a range list from .debug_rnglists starting at the given offset. +template +static std::vector +read_rnglist_range(Context &ctx, ObjectFile &file, u8 *rnglist, + Word *addrx) { + std::vector vec; + u64 base = 0; + + for (;;) { + switch (*rnglist++) { + case DW_RLE_end_of_list: + return vec; + case DW_RLE_base_addressx: + base = addrx[read_uleb(rnglist)]; + break; + case DW_RLE_startx_endx: + vec.push_back(addrx[read_uleb(rnglist)]); + vec.push_back(addrx[read_uleb(rnglist)]); + break; + case DW_RLE_startx_length: + vec.push_back(addrx[read_uleb(rnglist)]); + vec.push_back(vec.back() + read_uleb(rnglist)); + break; + case DW_RLE_offset_pair: + vec.push_back(base + read_uleb(rnglist)); + vec.push_back(base + read_uleb(rnglist)); + break; + case DW_RLE_base_address: + base = *(Word *)rnglist; + rnglist += sizeof(Word); + break; + case DW_RLE_start_end: + vec.push_back(*(Word *)rnglist); + rnglist += sizeof(Word); + vec.push_back(*(Word *)rnglist); + rnglist += sizeof(Word); + break; + case DW_RLE_start_length: + vec.push_back(*(Word *)rnglist); + rnglist += sizeof(Word); + vec.push_back(vec.back() + read_uleb(rnglist)); + break; + } + } +} + +// Returns a list of address ranges explained by a compunit at the +// `offset` in an output .debug_info section. +// +// .debug_info contains DWARF debug info records, so this function +// parses DWARF. If a designated compunit contains multiple ranges, the +// ranges are read from .debug_ranges (or .debug_rnglists for DWARF5). +// Otherwise, a range is read directly from .debug_info (or possibly +// from .debug_addr for DWARF5). +template +std::vector +read_address_areas(Context &ctx, ObjectFile &file, i64 offset) { + u8 *cu; + u8 *abbrev; + u32 dwarf_version; + std::tie(cu, abbrev, dwarf_version) = find_compunit(ctx, file, offset); + + DebugInfoReader reader{ctx, file, cu}; + + struct Record { + u64 form = 0; + u64 value = 0; + }; + + Record low_pc; + Record high_pc; + Record ranges; + std::optional rnglists_base; + Word *addrx = nullptr; + + // Read all interesting debug records. + for (;;) { + u64 name = read_uleb(abbrev); + u64 form = read_uleb(abbrev); + if (name == 0 && form == 0) + break; + + u64 val = reader.read(form); + + switch (name) { + case DW_AT_low_pc: + low_pc = {form, val}; + break; + case DW_AT_high_pc: + high_pc = {form, val}; + break; + case DW_AT_rnglists_base: + rnglists_base = val; + break; + case DW_AT_addr_base: + addrx = (Word *)(get_buffer(ctx, ctx.debug_addr) + val); + break; + case DW_AT_ranges: + ranges = {form, val}; + break; + } + } + + // Handle non-contiguous address ranges. + if (ranges.form) { + if (dwarf_version <= 4) { + Word *range_begin = + (Word *)(get_buffer(ctx, ctx.debug_ranges) + ranges.value); + return read_debug_range(ctx, file, range_begin); + } + + assert(dwarf_version == 5); + + u8 *buf = get_buffer(ctx, ctx.debug_rnglists); + if (ranges.form == DW_FORM_sec_offset) + return read_rnglist_range(ctx, file, buf + ranges.value, addrx); + + if (!rnglists_base) + Fatal(ctx) << file << ": --gdb-index: missing DW_AT_rnglists_base"; + + u8 *base = buf + *rnglists_base; + return read_rnglist_range(ctx, file, base + *(U32 *)base, addrx); + } + + // Handle a contiguous address range. + if (low_pc.form && high_pc.form) { + u64 lo; + + switch (low_pc.form) { + case DW_FORM_addr: + lo = low_pc.value; + break; + case DW_FORM_addrx: + case DW_FORM_addrx1: + case DW_FORM_addrx2: + case DW_FORM_addrx4: + lo = addrx[low_pc.value]; + break; + default: + Fatal(ctx) << file << ": --gdb-index: unhandled form for DW_AT_low_pc: 0x" + << std::hex << high_pc.form; + } + + switch (high_pc.form) { + case DW_FORM_addr: + return {lo, high_pc.value}; + case DW_FORM_addrx: + case DW_FORM_addrx1: + case DW_FORM_addrx2: + case DW_FORM_addrx4: + return {lo, addrx[high_pc.value]}; + case DW_FORM_udata: + case DW_FORM_data1: + case DW_FORM_data2: + case DW_FORM_data4: + case DW_FORM_data8: + return {lo, lo + high_pc.value}; + default: + Fatal(ctx) << file << ": --gdb-index: unhandled form for DW_AT_high_pc: 0x" + << std::hex << high_pc.form; + } + } + + return {}; +} + +using E = MOLD_TARGET; + +template std::vector read_compunits(Context &, ObjectFile &); +template std::vector read_pubnames(Context &, ObjectFile &); +template i64 estimate_address_areas(Context &, ObjectFile &); +template std::vector read_address_areas(Context &, ObjectFile &, i64); + +} // namespace mold::elf diff --git a/third_party/mold/elf/elf.cc b/third_party/mold/elf/elf.cc new file mode 100644 index 00000000000..6f96925eb17 --- /dev/null +++ b/third_party/mold/elf/elf.cc @@ -0,0 +1,922 @@ +// clang-format off +#include "third_party/mold/elf/mold.h" + +namespace mold::elf { + +template <> +std::string rel_to_string(u32 r_type) { + switch (r_type) { + case R_X86_64_NONE: return "R_X86_64_NONE"; + case R_X86_64_64: return "R_X86_64_64"; + case R_X86_64_PC32: return "R_X86_64_PC32"; + case R_X86_64_GOT32: return "R_X86_64_GOT32"; + case R_X86_64_PLT32: return "R_X86_64_PLT32"; + case R_X86_64_COPY: return "R_X86_64_COPY"; + case R_X86_64_GLOB_DAT: return "R_X86_64_GLOB_DAT"; + case R_X86_64_JUMP_SLOT: return "R_X86_64_JUMP_SLOT"; + case R_X86_64_RELATIVE: return "R_X86_64_RELATIVE"; + case R_X86_64_GOTPCREL: return "R_X86_64_GOTPCREL"; + case R_X86_64_32: return "R_X86_64_32"; + case R_X86_64_32S: return "R_X86_64_32S"; + case R_X86_64_16: return "R_X86_64_16"; + case R_X86_64_PC16: return "R_X86_64_PC16"; + case R_X86_64_8: return "R_X86_64_8"; + case R_X86_64_PC8: return "R_X86_64_PC8"; + case R_X86_64_DTPMOD64: return "R_X86_64_DTPMOD64"; + case R_X86_64_DTPOFF64: return "R_X86_64_DTPOFF64"; + case R_X86_64_TPOFF64: return "R_X86_64_TPOFF64"; + case R_X86_64_TLSGD: return "R_X86_64_TLSGD"; + case R_X86_64_TLSLD: return "R_X86_64_TLSLD"; + case R_X86_64_DTPOFF32: return "R_X86_64_DTPOFF32"; + case R_X86_64_GOTTPOFF: return "R_X86_64_GOTTPOFF"; + case R_X86_64_TPOFF32: return "R_X86_64_TPOFF32"; + case R_X86_64_PC64: return "R_X86_64_PC64"; + case R_X86_64_GOTOFF64: return "R_X86_64_GOTOFF64"; + case R_X86_64_GOTPC32: return "R_X86_64_GOTPC32"; + case R_X86_64_GOT64: return "R_X86_64_GOT64"; + case R_X86_64_GOTPCREL64: return "R_X86_64_GOTPCREL64"; + case R_X86_64_GOTPC64: return "R_X86_64_GOTPC64"; + case R_X86_64_GOTPLT64: return "R_X86_64_GOTPLT64"; + case R_X86_64_PLTOFF64: return "R_X86_64_PLTOFF64"; + case R_X86_64_SIZE32: return "R_X86_64_SIZE32"; + case R_X86_64_SIZE64: return "R_X86_64_SIZE64"; + case R_X86_64_GOTPC32_TLSDESC: return "R_X86_64_GOTPC32_TLSDESC"; + case R_X86_64_TLSDESC_CALL: return "R_X86_64_TLSDESC_CALL"; + case R_X86_64_TLSDESC: return "R_X86_64_TLSDESC"; + case R_X86_64_IRELATIVE: return "R_X86_64_IRELATIVE"; + case R_X86_64_GOTPCRELX: return "R_X86_64_GOTPCRELX"; + case R_X86_64_REX_GOTPCRELX: return "R_X86_64_REX_GOTPCRELX"; + } + return "unknown (" + std::to_string(r_type) + ")"; +} + +template <> +std::string rel_to_string(u32 r_type) { + switch (r_type) { + case R_386_NONE: return "R_386_NONE"; + case R_386_32: return "R_386_32"; + case R_386_PC32: return "R_386_PC32"; + case R_386_GOT32: return "R_386_GOT32"; + case R_386_PLT32: return "R_386_PLT32"; + case R_386_COPY: return "R_386_COPY"; + case R_386_GLOB_DAT: return "R_386_GLOB_DAT"; + case R_386_JUMP_SLOT: return "R_386_JUMP_SLOT"; + case R_386_RELATIVE: return "R_386_RELATIVE"; + case R_386_GOTOFF: return "R_386_GOTOFF"; + case R_386_GOTPC: return "R_386_GOTPC"; + case R_386_32PLT: return "R_386_32PLT"; + case R_386_TLS_TPOFF: return "R_386_TLS_TPOFF"; + case R_386_TLS_IE: return "R_386_TLS_IE"; + case R_386_TLS_GOTIE: return "R_386_TLS_GOTIE"; + case R_386_TLS_LE: return "R_386_TLS_LE"; + case R_386_TLS_GD: return "R_386_TLS_GD"; + case R_386_TLS_LDM: return "R_386_TLS_LDM"; + case R_386_16: return "R_386_16"; + case R_386_PC16: return "R_386_PC16"; + case R_386_8: return "R_386_8"; + case R_386_PC8: return "R_386_PC8"; + case R_386_TLS_GD_32: return "R_386_TLS_GD_32"; + case R_386_TLS_GD_PUSH: return "R_386_TLS_GD_PUSH"; + case R_386_TLS_GD_CALL: return "R_386_TLS_GD_CALL"; + case R_386_TLS_GD_POP: return "R_386_TLS_GD_POP"; + case R_386_TLS_LDM_32: return "R_386_TLS_LDM_32"; + case R_386_TLS_LDM_PUSH: return "R_386_TLS_LDM_PUSH"; + case R_386_TLS_LDM_CALL: return "R_386_TLS_LDM_CALL"; + case R_386_TLS_LDM_POP: return "R_386_TLS_LDM_POP"; + case R_386_TLS_LDO_32: return "R_386_TLS_LDO_32"; + case R_386_TLS_IE_32: return "R_386_TLS_IE_32"; + case R_386_TLS_LE_32: return "R_386_TLS_LE_32"; + case R_386_TLS_DTPMOD32: return "R_386_TLS_DTPMOD32"; + case R_386_TLS_DTPOFF32: return "R_386_TLS_DTPOFF32"; + case R_386_TLS_TPOFF32: return "R_386_TLS_TPOFF32"; + case R_386_SIZE32: return "R_386_SIZE32"; + case R_386_TLS_GOTDESC: return "R_386_TLS_GOTDESC"; + case R_386_TLS_DESC_CALL: return "R_386_TLS_DESC_CALL"; + case R_386_TLS_DESC: return "R_386_TLS_DESC"; + case R_386_IRELATIVE: return "R_386_IRELATIVE"; + case R_386_GOT32X: return "R_386_GOT32X"; + } + return "unknown (" + std::to_string(r_type) + ")"; +} + +template <> +std::string rel_to_string(u32 r_type) { + switch (r_type) { + case R_AARCH64_NONE: return "R_AARCH64_NONE"; + case R_AARCH64_ABS64: return "R_AARCH64_ABS64"; + case R_AARCH64_ABS32: return "R_AARCH64_ABS32"; + case R_AARCH64_ABS16: return "R_AARCH64_ABS16"; + case R_AARCH64_PREL64: return "R_AARCH64_PREL64"; + case R_AARCH64_PREL32: return "R_AARCH64_PREL32"; + case R_AARCH64_PREL16: return "R_AARCH64_PREL16"; + case R_AARCH64_MOVW_UABS_G0: return "R_AARCH64_MOVW_UABS_G0"; + case R_AARCH64_MOVW_UABS_G0_NC: return "R_AARCH64_MOVW_UABS_G0_NC"; + case R_AARCH64_MOVW_UABS_G1: return "R_AARCH64_MOVW_UABS_G1"; + case R_AARCH64_MOVW_UABS_G1_NC: return "R_AARCH64_MOVW_UABS_G1_NC"; + case R_AARCH64_MOVW_UABS_G2: return "R_AARCH64_MOVW_UABS_G2"; + case R_AARCH64_MOVW_UABS_G2_NC: return "R_AARCH64_MOVW_UABS_G2_NC"; + case R_AARCH64_MOVW_UABS_G3: return "R_AARCH64_MOVW_UABS_G3"; + case R_AARCH64_MOVW_SABS_G0: return "R_AARCH64_MOVW_SABS_G0"; + case R_AARCH64_MOVW_SABS_G1: return "R_AARCH64_MOVW_SABS_G1"; + case R_AARCH64_MOVW_SABS_G2: return "R_AARCH64_MOVW_SABS_G2"; + case R_AARCH64_LD_PREL_LO19: return "R_AARCH64_LD_PREL_LO19"; + case R_AARCH64_ADR_PREL_LO21: return "R_AARCH64_ADR_PREL_LO21"; + case R_AARCH64_ADR_PREL_PG_HI21: return "R_AARCH64_ADR_PREL_PG_HI21"; + case R_AARCH64_ADR_PREL_PG_HI21_NC: return "R_AARCH64_ADR_PREL_PG_HI21_NC"; + case R_AARCH64_ADD_ABS_LO12_NC: return "R_AARCH64_ADD_ABS_LO12_NC"; + case R_AARCH64_LDST8_ABS_LO12_NC: return "R_AARCH64_LDST8_ABS_LO12_NC"; + case R_AARCH64_TSTBR14: return "R_AARCH64_TSTBR14"; + case R_AARCH64_CONDBR19: return "R_AARCH64_CONDBR19"; + case R_AARCH64_JUMP26: return "R_AARCH64_JUMP26"; + case R_AARCH64_CALL26: return "R_AARCH64_CALL26"; + case R_AARCH64_LDST16_ABS_LO12_NC: return "R_AARCH64_LDST16_ABS_LO12_NC"; + case R_AARCH64_LDST32_ABS_LO12_NC: return "R_AARCH64_LDST32_ABS_LO12_NC"; + case R_AARCH64_LDST64_ABS_LO12_NC: return "R_AARCH64_LDST64_ABS_LO12_NC"; + case R_AARCH64_MOVW_PREL_G0: return "R_AARCH64_MOVW_PREL_G0"; + case R_AARCH64_MOVW_PREL_G0_NC: return "R_AARCH64_MOVW_PREL_G0_NC"; + case R_AARCH64_MOVW_PREL_G1: return "R_AARCH64_MOVW_PREL_G1"; + case R_AARCH64_MOVW_PREL_G1_NC: return "R_AARCH64_MOVW_PREL_G1_NC"; + case R_AARCH64_MOVW_PREL_G2: return "R_AARCH64_MOVW_PREL_G2"; + case R_AARCH64_MOVW_PREL_G2_NC: return "R_AARCH64_MOVW_PREL_G2_NC"; + case R_AARCH64_MOVW_PREL_G3: return "R_AARCH64_MOVW_PREL_G3"; + case R_AARCH64_LDST128_ABS_LO12_NC: return "R_AARCH64_LDST128_ABS_LO12_NC"; + case R_AARCH64_ADR_GOT_PAGE: return "R_AARCH64_ADR_GOT_PAGE"; + case R_AARCH64_LD64_GOT_LO12_NC: return "R_AARCH64_LD64_GOT_LO12_NC"; + case R_AARCH64_LD64_GOTPAGE_LO15: return "R_AARCH64_LD64_GOTPAGE_LO15"; + case R_AARCH64_PLT32: return "R_AARCH64_PLT32"; + case R_AARCH64_TLSGD_ADR_PREL21: return "R_AARCH64_TLSGD_ADR_PREL21"; + case R_AARCH64_TLSGD_ADR_PAGE21: return "R_AARCH64_TLSGD_ADR_PAGE21"; + case R_AARCH64_TLSGD_ADD_LO12_NC: return "R_AARCH64_TLSGD_ADD_LO12_NC"; + case R_AARCH64_TLSGD_MOVW_G1: return "R_AARCH64_TLSGD_MOVW_G1"; + case R_AARCH64_TLSGD_MOVW_G0_NC: return "R_AARCH64_TLSGD_MOVW_G0_NC"; + case R_AARCH64_TLSLD_ADR_PREL21: return "R_AARCH64_TLSLD_ADR_PREL21"; + case R_AARCH64_TLSLD_ADR_PAGE21: return "R_AARCH64_TLSLD_ADR_PAGE21"; + case R_AARCH64_TLSLD_ADD_LO12_NC: return "R_AARCH64_TLSLD_ADD_LO12_NC"; + case R_AARCH64_TLSLD_MOVW_G1: return "R_AARCH64_TLSLD_MOVW_G1"; + case R_AARCH64_TLSLD_MOVW_G0_NC: return "R_AARCH64_TLSLD_MOVW_G0_NC"; + case R_AARCH64_TLSLD_LD_PREL19: return "R_AARCH64_TLSLD_LD_PREL19"; + case R_AARCH64_TLSLD_MOVW_DTPREL_G2: return "R_AARCH64_TLSLD_MOVW_DTPREL_G2"; + case R_AARCH64_TLSLD_MOVW_DTPREL_G1: return "R_AARCH64_TLSLD_MOVW_DTPREL_G1"; + case R_AARCH64_TLSLD_MOVW_DTPREL_G1_NC: return "R_AARCH64_TLSLD_MOVW_DTPREL_G1_NC"; + case R_AARCH64_TLSLD_MOVW_DTPREL_G0: return "R_AARCH64_TLSLD_MOVW_DTPREL_G0"; + case R_AARCH64_TLSLD_MOVW_DTPREL_G0_NC: return "R_AARCH64_TLSLD_MOVW_DTPREL_G0_NC"; + case R_AARCH64_TLSLD_ADD_DTPREL_HI12: return "R_AARCH64_TLSLD_ADD_DTPREL_HI12"; + case R_AARCH64_TLSLD_ADD_DTPREL_LO12: return "R_AARCH64_TLSLD_ADD_DTPREL_LO12"; + case R_AARCH64_TLSLD_ADD_DTPREL_LO12_NC: return "R_AARCH64_TLSLD_ADD_DTPREL_LO12_NC"; + case R_AARCH64_TLSLD_LDST8_DTPREL_LO12: return "R_AARCH64_TLSLD_LDST8_DTPREL_LO12"; + case R_AARCH64_TLSLD_LDST8_DTPREL_LO12_NC: return "R_AARCH64_TLSLD_LDST8_DTPREL_LO12_NC"; + case R_AARCH64_TLSLD_LDST16_DTPREL_LO12: return "R_AARCH64_TLSLD_LDST16_DTPREL_LO12"; + case R_AARCH64_TLSLD_LDST16_DTPREL_LO12_NC: return "R_AARCH64_TLSLD_LDST16_DTPREL_LO12_NC"; + case R_AARCH64_TLSLD_LDST32_DTPREL_LO12: return "R_AARCH64_TLSLD_LDST32_DTPREL_LO12"; + case R_AARCH64_TLSLD_LDST32_DTPREL_LO12_NC: return "R_AARCH64_TLSLD_LDST32_DTPREL_LO12_NC"; + case R_AARCH64_TLSLD_LDST64_DTPREL_LO12: return "R_AARCH64_TLSLD_LDST64_DTPREL_LO12"; + case R_AARCH64_TLSLD_LDST64_DTPREL_LO12_NC: return "R_AARCH64_TLSLD_LDST64_DTPREL_LO12_NC"; + case R_AARCH64_TLSIE_MOVW_GOTTPREL_G1: return "R_AARCH64_TLSIE_MOVW_GOTTPREL_G1"; + case R_AARCH64_TLSIE_MOVW_GOTTPREL_G0_NC: return "R_AARCH64_TLSIE_MOVW_GOTTPREL_G0_NC"; + case R_AARCH64_TLSIE_ADR_GOTTPREL_PAGE21: return "R_AARCH64_TLSIE_ADR_GOTTPREL_PAGE21"; + case R_AARCH64_TLSIE_LD64_GOTTPREL_LO12_NC: return "R_AARCH64_TLSIE_LD64_GOTTPREL_LO12_NC"; + case R_AARCH64_TLSIE_LD_GOTTPREL_PREL19: return "R_AARCH64_TLSIE_LD_GOTTPREL_PREL19"; + case R_AARCH64_TLSLE_MOVW_TPREL_G2: return "R_AARCH64_TLSLE_MOVW_TPREL_G2"; + case R_AARCH64_TLSLE_MOVW_TPREL_G1: return "R_AARCH64_TLSLE_MOVW_TPREL_G1"; + case R_AARCH64_TLSLE_MOVW_TPREL_G1_NC: return "R_AARCH64_TLSLE_MOVW_TPREL_G1_NC"; + case R_AARCH64_TLSLE_MOVW_TPREL_G0: return "R_AARCH64_TLSLE_MOVW_TPREL_G0"; + case R_AARCH64_TLSLE_MOVW_TPREL_G0_NC: return "R_AARCH64_TLSLE_MOVW_TPREL_G0_NC"; + case R_AARCH64_TLSLE_ADD_TPREL_HI12: return "R_AARCH64_TLSLE_ADD_TPREL_HI12"; + case R_AARCH64_TLSLE_ADD_TPREL_LO12: return "R_AARCH64_TLSLE_ADD_TPREL_LO12"; + case R_AARCH64_TLSLE_ADD_TPREL_LO12_NC: return "R_AARCH64_TLSLE_ADD_TPREL_LO12_NC"; + case R_AARCH64_TLSLE_LDST8_TPREL_LO12: return "R_AARCH64_TLSLE_LDST8_TPREL_LO12"; + case R_AARCH64_TLSLE_LDST8_TPREL_LO12_NC: return "R_AARCH64_TLSLE_LDST8_TPREL_LO12_NC"; + case R_AARCH64_TLSLE_LDST16_TPREL_LO12: return "R_AARCH64_TLSLE_LDST16_TPREL_LO12"; + case R_AARCH64_TLSLE_LDST16_TPREL_LO12_NC: return "R_AARCH64_TLSLE_LDST16_TPREL_LO12_NC"; + case R_AARCH64_TLSLE_LDST32_TPREL_LO12: return "R_AARCH64_TLSLE_LDST32_TPREL_LO12"; + case R_AARCH64_TLSLE_LDST32_TPREL_LO12_NC: return "R_AARCH64_TLSLE_LDST32_TPREL_LO12_NC"; + case R_AARCH64_TLSLE_LDST64_TPREL_LO12: return "R_AARCH64_TLSLE_LDST64_TPREL_LO12"; + case R_AARCH64_TLSLE_LDST64_TPREL_LO12_NC: return "R_AARCH64_TLSLE_LDST64_TPREL_LO12_NC"; + case R_AARCH64_TLSDESC_ADR_PAGE21: return "R_AARCH64_TLSDESC_ADR_PAGE21"; + case R_AARCH64_TLSDESC_LD64_LO12: return "R_AARCH64_TLSDESC_LD64_LO12"; + case R_AARCH64_TLSDESC_ADD_LO12: return "R_AARCH64_TLSDESC_ADD_LO12"; + case R_AARCH64_TLSDESC_CALL: return "R_AARCH64_TLSDESC_CALL"; + case R_AARCH64_TLSLE_LDST128_TPREL_LO12_NC: return "R_AARCH64_TLSLE_LDST128_TPREL_LO12_NC"; + case R_AARCH64_COPY: return "R_AARCH64_COPY"; + case R_AARCH64_GLOB_DAT: return "R_AARCH64_GLOB_DAT"; + case R_AARCH64_JUMP_SLOT: return "R_AARCH64_JUMP_SLOT"; + case R_AARCH64_RELATIVE: return "R_AARCH64_RELATIVE"; + case R_AARCH64_TLS_DTPMOD64: return "R_AARCH64_TLS_DTPMOD64"; + case R_AARCH64_TLS_DTPREL64: return "R_AARCH64_TLS_DTPREL64"; + case R_AARCH64_TLS_TPREL64: return "R_AARCH64_TLS_TPREL64"; + case R_AARCH64_TLSDESC: return "R_AARCH64_TLSDESC"; + case R_AARCH64_IRELATIVE: return "R_AARCH64_IRELATIVE"; + } + return "unknown (" + std::to_string(r_type) + ")"; +} + +template <> +std::string rel_to_string(u32 r_type) { + switch (r_type) { + case R_ARM_NONE: return "R_ARM_NONE"; + case R_ARM_PC24: return "R_ARM_PC24"; + case R_ARM_ABS32: return "R_ARM_ABS32"; + case R_ARM_REL32: return "R_ARM_REL32"; + case R_ARM_LDR_PC_G0: return "R_ARM_LDR_PC_G0"; + case R_ARM_ABS16: return "R_ARM_ABS16"; + case R_ARM_ABS12: return "R_ARM_ABS12"; + case R_ARM_THM_ABS5: return "R_ARM_THM_ABS5"; + case R_ARM_ABS8: return "R_ARM_ABS8"; + case R_ARM_SBREL32: return "R_ARM_SBREL32"; + case R_ARM_THM_CALL: return "R_ARM_THM_CALL"; + case R_ARM_THM_PC8: return "R_ARM_THM_PC8"; + case R_ARM_BREL_ADJ: return "R_ARM_BREL_ADJ"; + case R_ARM_TLS_DESC: return "R_ARM_TLS_DESC"; + case R_ARM_THM_SWI8: return "R_ARM_THM_SWI8"; + case R_ARM_XPC25: return "R_ARM_XPC25"; + case R_ARM_THM_XPC22: return "R_ARM_THM_XPC22"; + case R_ARM_TLS_DTPMOD32: return "R_ARM_TLS_DTPMOD32"; + case R_ARM_TLS_DTPOFF32: return "R_ARM_TLS_DTPOFF32"; + case R_ARM_TLS_TPOFF32: return "R_ARM_TLS_TPOFF32"; + case R_ARM_COPY: return "R_ARM_COPY"; + case R_ARM_GLOB_DAT: return "R_ARM_GLOB_DAT"; + case R_ARM_JUMP_SLOT: return "R_ARM_JUMP_SLOT"; + case R_ARM_RELATIVE: return "R_ARM_RELATIVE"; + case R_ARM_GOTOFF32: return "R_ARM_GOTOFF32"; + case R_ARM_BASE_PREL: return "R_ARM_BASE_PREL"; + case R_ARM_GOT_BREL: return "R_ARM_GOT_BREL"; + case R_ARM_PLT32: return "R_ARM_PLT32"; + case R_ARM_CALL: return "R_ARM_CALL"; + case R_ARM_JUMP24: return "R_ARM_JUMP24"; + case R_ARM_THM_JUMP24: return "R_ARM_THM_JUMP24"; + case R_ARM_BASE_ABS: return "R_ARM_BASE_ABS"; + case R_ARM_ALU_PCREL_7_0: return "R_ARM_ALU_PCREL_7_0"; + case R_ARM_ALU_PCREL_15_8: return "R_ARM_ALU_PCREL_15_8"; + case R_ARM_ALU_PCREL_23_15: return "R_ARM_ALU_PCREL_23_15"; + case R_ARM_LDR_SBREL_11_0_NC: return "R_ARM_LDR_SBREL_11_0_NC"; + case R_ARM_ALU_SBREL_19_12_NC: return "R_ARM_ALU_SBREL_19_12_NC"; + case R_ARM_ALU_SBREL_27_20_CK: return "R_ARM_ALU_SBREL_27_20_CK"; + case R_ARM_TARGET1: return "R_ARM_TARGET1"; + case R_ARM_SBREL31: return "R_ARM_SBREL31"; + case R_ARM_V4BX: return "R_ARM_V4BX"; + case R_ARM_TARGET2: return "R_ARM_TARGET2"; + case R_ARM_PREL31: return "R_ARM_PREL31"; + case R_ARM_MOVW_ABS_NC: return "R_ARM_MOVW_ABS_NC"; + case R_ARM_MOVT_ABS: return "R_ARM_MOVT_ABS"; + case R_ARM_MOVW_PREL_NC: return "R_ARM_MOVW_PREL_NC"; + case R_ARM_MOVT_PREL: return "R_ARM_MOVT_PREL"; + case R_ARM_THM_MOVW_ABS_NC: return "R_ARM_THM_MOVW_ABS_NC"; + case R_ARM_THM_MOVT_ABS: return "R_ARM_THM_MOVT_ABS"; + case R_ARM_THM_MOVW_PREL_NC: return "R_ARM_THM_MOVW_PREL_NC"; + case R_ARM_THM_MOVT_PREL: return "R_ARM_THM_MOVT_PREL"; + case R_ARM_THM_JUMP19: return "R_ARM_THM_JUMP19"; + case R_ARM_THM_JUMP6: return "R_ARM_THM_JUMP6"; + case R_ARM_THM_ALU_PREL_11_0: return "R_ARM_THM_ALU_PREL_11_0"; + case R_ARM_THM_PC12: return "R_ARM_THM_PC12"; + case R_ARM_ABS32_NOI: return "R_ARM_ABS32_NOI"; + case R_ARM_REL32_NOI: return "R_ARM_REL32_NOI"; + case R_ARM_ALU_PC_G0_NC: return "R_ARM_ALU_PC_G0_NC"; + case R_ARM_ALU_PC_G0: return "R_ARM_ALU_PC_G0"; + case R_ARM_ALU_PC_G1_NC: return "R_ARM_ALU_PC_G1_NC"; + case R_ARM_ALU_PC_G1: return "R_ARM_ALU_PC_G1"; + case R_ARM_ALU_PC_G2: return "R_ARM_ALU_PC_G2"; + case R_ARM_LDR_PC_G1: return "R_ARM_LDR_PC_G1"; + case R_ARM_LDR_PC_G2: return "R_ARM_LDR_PC_G2"; + case R_ARM_LDRS_PC_G0: return "R_ARM_LDRS_PC_G0"; + case R_ARM_LDRS_PC_G1: return "R_ARM_LDRS_PC_G1"; + case R_ARM_LDRS_PC_G2: return "R_ARM_LDRS_PC_G2"; + case R_ARM_LDC_PC_G0: return "R_ARM_LDC_PC_G0"; + case R_ARM_LDC_PC_G1: return "R_ARM_LDC_PC_G1"; + case R_ARM_LDC_PC_G2: return "R_ARM_LDC_PC_G2"; + case R_ARM_ALU_SB_G0_NC: return "R_ARM_ALU_SB_G0_NC"; + case R_ARM_ALU_SB_G0: return "R_ARM_ALU_SB_G0"; + case R_ARM_ALU_SB_G1_NC: return "R_ARM_ALU_SB_G1_NC"; + case R_ARM_ALU_SB_G1: return "R_ARM_ALU_SB_G1"; + case R_ARM_ALU_SB_G2: return "R_ARM_ALU_SB_G2"; + case R_ARM_LDR_SB_G0: return "R_ARM_LDR_SB_G0"; + case R_ARM_LDR_SB_G1: return "R_ARM_LDR_SB_G1"; + case R_ARM_LDR_SB_G2: return "R_ARM_LDR_SB_G2"; + case R_ARM_LDRS_SB_G0: return "R_ARM_LDRS_SB_G0"; + case R_ARM_LDRS_SB_G1: return "R_ARM_LDRS_SB_G1"; + case R_ARM_LDRS_SB_G2: return "R_ARM_LDRS_SB_G2"; + case R_ARM_LDC_SB_G0: return "R_ARM_LDC_SB_G0"; + case R_ARM_LDC_SB_G1: return "R_ARM_LDC_SB_G1"; + case R_ARM_LDC_SB_G2: return "R_ARM_LDC_SB_G2"; + case R_ARM_MOVW_BREL_NC: return "R_ARM_MOVW_BREL_NC"; + case R_ARM_MOVT_BREL: return "R_ARM_MOVT_BREL"; + case R_ARM_MOVW_BREL: return "R_ARM_MOVW_BREL"; + case R_ARM_THM_MOVW_BREL_NC: return "R_ARM_THM_MOVW_BREL_NC"; + case R_ARM_THM_MOVT_BREL: return "R_ARM_THM_MOVT_BREL"; + case R_ARM_THM_MOVW_BREL: return "R_ARM_THM_MOVW_BREL"; + case R_ARM_TLS_GOTDESC: return "R_ARM_TLS_GOTDESC"; + case R_ARM_TLS_CALL: return "R_ARM_TLS_CALL"; + case R_ARM_TLS_DESCSEQ: return "R_ARM_TLS_DESCSEQ"; + case R_ARM_THM_TLS_CALL: return "R_ARM_THM_TLS_CALL"; + case R_ARM_PLT32_ABS: return "R_ARM_PLT32_ABS"; + case R_ARM_GOT_ABS: return "R_ARM_GOT_ABS"; + case R_ARM_GOT_PREL: return "R_ARM_GOT_PREL"; + case R_ARM_GOT_BREL12: return "R_ARM_GOT_BREL12"; + case R_ARM_GOTOFF12: return "R_ARM_GOTOFF12"; + case R_ARM_GOTRELAX: return "R_ARM_GOTRELAX"; + case R_ARM_GNU_VTENTRY: return "R_ARM_GNU_VTENTRY"; + case R_ARM_GNU_VTINHERIT: return "R_ARM_GNU_VTINHERIT"; + case R_ARM_THM_JUMP11: return "R_ARM_THM_JUMP11"; + case R_ARM_THM_JUMP8: return "R_ARM_THM_JUMP8"; + case R_ARM_TLS_GD32: return "R_ARM_TLS_GD32"; + case R_ARM_TLS_LDM32: return "R_ARM_TLS_LDM32"; + case R_ARM_TLS_LDO32: return "R_ARM_TLS_LDO32"; + case R_ARM_TLS_IE32: return "R_ARM_TLS_IE32"; + case R_ARM_TLS_LE32: return "R_ARM_TLS_LE32"; + case R_ARM_TLS_LDO12: return "R_ARM_TLS_LDO12"; + case R_ARM_TLS_LE12: return "R_ARM_TLS_LE12"; + case R_ARM_TLS_IE12GP: return "R_ARM_TLS_IE12GP"; + case R_ARM_PRIVATE_0: return "R_ARM_PRIVATE_0"; + case R_ARM_PRIVATE_1: return "R_ARM_PRIVATE_1"; + case R_ARM_PRIVATE_2: return "R_ARM_PRIVATE_2"; + case R_ARM_PRIVATE_3: return "R_ARM_PRIVATE_3"; + case R_ARM_PRIVATE_4: return "R_ARM_PRIVATE_4"; + case R_ARM_PRIVATE_5: return "R_ARM_PRIVATE_5"; + case R_ARM_PRIVATE_6: return "R_ARM_PRIVATE_6"; + case R_ARM_PRIVATE_7: return "R_ARM_PRIVATE_7"; + case R_ARM_PRIVATE_8: return "R_ARM_PRIVATE_8"; + case R_ARM_PRIVATE_9: return "R_ARM_PRIVATE_9"; + case R_ARM_PRIVATE_10: return "R_ARM_PRIVATE_10"; + case R_ARM_PRIVATE_11: return "R_ARM_PRIVATE_11"; + case R_ARM_PRIVATE_12: return "R_ARM_PRIVATE_12"; + case R_ARM_PRIVATE_13: return "R_ARM_PRIVATE_13"; + case R_ARM_PRIVATE_14: return "R_ARM_PRIVATE_14"; + case R_ARM_PRIVATE_15: return "R_ARM_PRIVATE_15"; + case R_ARM_ME_TOO: return "R_ARM_ME_TOO"; + case R_ARM_THM_TLS_DESCSEQ16: return "R_ARM_THM_TLS_DESCSEQ16"; + case R_ARM_THM_TLS_DESCSEQ32: return "R_ARM_THM_TLS_DESCSEQ32"; + case R_ARM_THM_BF16: return "R_ARM_THM_BF16"; + case R_ARM_THM_BF12: return "R_ARM_THM_BF12"; + case R_ARM_THM_BF18: return "R_ARM_THM_BF18"; + case R_ARM_IRELATIVE: return "R_ARM_IRELATIVE"; + } + return "unknown (" + std::to_string(r_type) + ")"; +} + +template <> +std::string rel_to_string(u32 r_type) { + switch (r_type) { + case R_RISCV_NONE: return "R_RISCV_NONE"; + case R_RISCV_32: return "R_RISCV_32"; + case R_RISCV_64: return "R_RISCV_64"; + case R_RISCV_RELATIVE: return "R_RISCV_RELATIVE"; + case R_RISCV_COPY: return "R_RISCV_COPY"; + case R_RISCV_JUMP_SLOT: return "R_RISCV_JUMP_SLOT"; + case R_RISCV_TLS_DTPMOD32: return "R_RISCV_TLS_DTPMOD32"; + case R_RISCV_TLS_DTPMOD64: return "R_RISCV_TLS_DTPMOD64"; + case R_RISCV_TLS_DTPREL32: return "R_RISCV_TLS_DTPREL32"; + case R_RISCV_TLS_DTPREL64: return "R_RISCV_TLS_DTPREL64"; + case R_RISCV_TLS_TPREL32: return "R_RISCV_TLS_TPREL32"; + case R_RISCV_TLS_TPREL64: return "R_RISCV_TLS_TPREL64"; + case R_RISCV_BRANCH: return "R_RISCV_BRANCH"; + case R_RISCV_JAL: return "R_RISCV_JAL"; + case R_RISCV_CALL: return "R_RISCV_CALL"; + case R_RISCV_CALL_PLT: return "R_RISCV_CALL_PLT"; + case R_RISCV_GOT_HI20: return "R_RISCV_GOT_HI20"; + case R_RISCV_TLS_GOT_HI20: return "R_RISCV_TLS_GOT_HI20"; + case R_RISCV_TLS_GD_HI20: return "R_RISCV_TLS_GD_HI20"; + case R_RISCV_PCREL_HI20: return "R_RISCV_PCREL_HI20"; + case R_RISCV_PCREL_LO12_I: return "R_RISCV_PCREL_LO12_I"; + case R_RISCV_PCREL_LO12_S: return "R_RISCV_PCREL_LO12_S"; + case R_RISCV_HI20: return "R_RISCV_HI20"; + case R_RISCV_LO12_I: return "R_RISCV_LO12_I"; + case R_RISCV_LO12_S: return "R_RISCV_LO12_S"; + case R_RISCV_TPREL_HI20: return "R_RISCV_TPREL_HI20"; + case R_RISCV_TPREL_LO12_I: return "R_RISCV_TPREL_LO12_I"; + case R_RISCV_TPREL_LO12_S: return "R_RISCV_TPREL_LO12_S"; + case R_RISCV_TPREL_ADD: return "R_RISCV_TPREL_ADD"; + case R_RISCV_ADD8: return "R_RISCV_ADD8"; + case R_RISCV_ADD16: return "R_RISCV_ADD16"; + case R_RISCV_ADD32: return "R_RISCV_ADD32"; + case R_RISCV_ADD64: return "R_RISCV_ADD64"; + case R_RISCV_SUB8: return "R_RISCV_SUB8"; + case R_RISCV_SUB16: return "R_RISCV_SUB16"; + case R_RISCV_SUB32: return "R_RISCV_SUB32"; + case R_RISCV_SUB64: return "R_RISCV_SUB64"; + case R_RISCV_ALIGN: return "R_RISCV_ALIGN"; + case R_RISCV_RVC_BRANCH: return "R_RISCV_RVC_BRANCH"; + case R_RISCV_RVC_JUMP: return "R_RISCV_RVC_JUMP"; + case R_RISCV_RVC_LUI: return "R_RISCV_RVC_LUI"; + case R_RISCV_RELAX: return "R_RISCV_RELAX"; + case R_RISCV_SUB6: return "R_RISCV_SUB6"; + case R_RISCV_SET6: return "R_RISCV_SET6"; + case R_RISCV_SET8: return "R_RISCV_SET8"; + case R_RISCV_SET16: return "R_RISCV_SET16"; + case R_RISCV_SET32: return "R_RISCV_SET32"; + case R_RISCV_32_PCREL: return "R_RISCV_32_PCREL"; + case R_RISCV_IRELATIVE: return "R_RISCV_IRELATIVE"; + case R_RISCV_PLT32: return "R_RISCV_PLT32"; + case R_RISCV_SET_ULEB128: return "R_RISCV_SET_ULEB128"; + case R_RISCV_SUB_ULEB128: return "R_RISCV_SUB_ULEB128"; + } + return "unknown (" + std::to_string(r_type) + ")"; +} + +template <> +std::string rel_to_string(u32 r_type) { + return rel_to_string(r_type); +} + +template <> +std::string rel_to_string(u32 r_type) { + return rel_to_string(r_type); +} + +template <> +std::string rel_to_string(u32 r_type) { + return rel_to_string(r_type); +} + +template <> +std::string rel_to_string(u32 r_type) { + switch (r_type) { + case R_PPC_NONE: return "R_PPC_NONE"; + case R_PPC_ADDR32: return "R_PPC_ADDR32"; + case R_PPC_ADDR24: return "R_PPC_ADDR24"; + case R_PPC_ADDR16: return "R_PPC_ADDR16"; + case R_PPC_ADDR16_LO: return "R_PPC_ADDR16_LO"; + case R_PPC_ADDR16_HI: return "R_PPC_ADDR16_HI"; + case R_PPC_ADDR16_HA: return "R_PPC_ADDR16_HA"; + case R_PPC_ADDR14: return "R_PPC_ADDR14"; + case R_PPC_ADDR14_BRTAKEN: return "R_PPC_ADDR14_BRTAKEN"; + case R_PPC_ADDR14_BRNTAKEN: return "R_PPC_ADDR14_BRNTAKEN"; + case R_PPC_REL24: return "R_PPC_REL24"; + case R_PPC_REL14: return "R_PPC_REL14"; + case R_PPC_REL14_BRTAKEN: return "R_PPC_REL14_BRTAKEN"; + case R_PPC_REL14_BRNTAKEN: return "R_PPC_REL14_BRNTAKEN"; + case R_PPC_GOT16: return "R_PPC_GOT16"; + case R_PPC_GOT16_LO: return "R_PPC_GOT16_LO"; + case R_PPC_GOT16_HI: return "R_PPC_GOT16_HI"; + case R_PPC_GOT16_HA: return "R_PPC_GOT16_HA"; + case R_PPC_PLTREL24: return "R_PPC_PLTREL24"; + case R_PPC_COPY: return "R_PPC_COPY"; + case R_PPC_GLOB_DAT: return "R_PPC_GLOB_DAT"; + case R_PPC_JMP_SLOT: return "R_PPC_JMP_SLOT"; + case R_PPC_RELATIVE: return "R_PPC_RELATIVE"; + case R_PPC_LOCAL24PC: return "R_PPC_LOCAL24PC"; + case R_PPC_UADDR32: return "R_PPC_UADDR32"; + case R_PPC_UADDR16: return "R_PPC_UADDR16"; + case R_PPC_REL32: return "R_PPC_REL32"; + case R_PPC_PLT32: return "R_PPC_PLT32"; + case R_PPC_PLTREL32: return "R_PPC_PLTREL32"; + case R_PPC_PLT16_LO: return "R_PPC_PLT16_LO"; + case R_PPC_PLT16_HI: return "R_PPC_PLT16_HI"; + case R_PPC_PLT16_HA: return "R_PPC_PLT16_HA"; + case R_PPC_SDAREL16: return "R_PPC_SDAREL16"; + case R_PPC_SECTOFF: return "R_PPC_SECTOFF"; + case R_PPC_SECTOFF_LO: return "R_PPC_SECTOFF_LO"; + case R_PPC_SECTOFF_HI: return "R_PPC_SECTOFF_HI"; + case R_PPC_SECTOFF_HA: return "R_PPC_SECTOFF_HA"; + case R_PPC_ADDR30: return "R_PPC_ADDR30"; + case R_PPC_TLS: return "R_PPC_TLS"; + case R_PPC_DTPMOD32: return "R_PPC_DTPMOD32"; + case R_PPC_TPREL16: return "R_PPC_TPREL16"; + case R_PPC_TPREL16_LO: return "R_PPC_TPREL16_LO"; + case R_PPC_TPREL16_HI: return "R_PPC_TPREL16_HI"; + case R_PPC_TPREL16_HA: return "R_PPC_TPREL16_HA"; + case R_PPC_TPREL32: return "R_PPC_TPREL32"; + case R_PPC_DTPREL16: return "R_PPC_DTPREL16"; + case R_PPC_DTPREL16_LO: return "R_PPC_DTPREL16_LO"; + case R_PPC_DTPREL16_HI: return "R_PPC_DTPREL16_HI"; + case R_PPC_DTPREL16_HA: return "R_PPC_DTPREL16_HA"; + case R_PPC_DTPREL32: return "R_PPC_DTPREL32"; + case R_PPC_GOT_TLSGD16: return "R_PPC_GOT_TLSGD16"; + case R_PPC_GOT_TLSGD16_LO: return "R_PPC_GOT_TLSGD16_LO"; + case R_PPC_GOT_TLSGD16_HI: return "R_PPC_GOT_TLSGD16_HI"; + case R_PPC_GOT_TLSGD16_HA: return "R_PPC_GOT_TLSGD16_HA"; + case R_PPC_GOT_TLSLD16: return "R_PPC_GOT_TLSLD16"; + case R_PPC_GOT_TLSLD16_LO: return "R_PPC_GOT_TLSLD16_LO"; + case R_PPC_GOT_TLSLD16_HI: return "R_PPC_GOT_TLSLD16_HI"; + case R_PPC_GOT_TLSLD16_HA: return "R_PPC_GOT_TLSLD16_HA"; + case R_PPC_GOT_TPREL16: return "R_PPC_GOT_TPREL16"; + case R_PPC_GOT_TPREL16_LO: return "R_PPC_GOT_TPREL16_LO"; + case R_PPC_GOT_TPREL16_HI: return "R_PPC_GOT_TPREL16_HI"; + case R_PPC_GOT_TPREL16_HA: return "R_PPC_GOT_TPREL16_HA"; + case R_PPC_GOT_DTPREL16: return "R_PPC_GOT_DTPREL16"; + case R_PPC_GOT_DTPREL16_LO: return "R_PPC_GOT_DTPREL16_LO"; + case R_PPC_GOT_DTPREL16_HI: return "R_PPC_GOT_DTPREL16_HI"; + case R_PPC_GOT_DTPREL16_HA: return "R_PPC_GOT_DTPREL16_HA"; + case R_PPC_TLSGD: return "R_PPC_TLSGD"; + case R_PPC_TLSLD: return "R_PPC_TLSLD"; + case R_PPC_PLTSEQ: return "R_PPC_PLTSEQ"; + case R_PPC_PLTCALL: return "R_PPC_PLTCALL"; + case R_PPC_IRELATIVE: return "R_PPC_IRELATIVE"; + case R_PPC_REL16: return "R_PPC_REL16"; + case R_PPC_REL16_LO: return "R_PPC_REL16_LO"; + case R_PPC_REL16_HI: return "R_PPC_REL16_HI"; + case R_PPC_REL16_HA: return "R_PPC_REL16_HA"; + } + return "unknown (" + std::to_string(r_type) + ")"; +} + +template <> +std::string rel_to_string(u32 r_type) { + switch (r_type) { + case R_PPC64_NONE: return "R_PPC64_NONE"; + case R_PPC64_ADDR32: return "R_PPC64_ADDR32"; + case R_PPC64_ADDR24: return "R_PPC64_ADDR24"; + case R_PPC64_ADDR16: return "R_PPC64_ADDR16"; + case R_PPC64_ADDR16_LO: return "R_PPC64_ADDR16_LO"; + case R_PPC64_ADDR16_HI: return "R_PPC64_ADDR16_HI"; + case R_PPC64_ADDR16_HA: return "R_PPC64_ADDR16_HA"; + case R_PPC64_ADDR14: return "R_PPC64_ADDR14"; + case R_PPC64_ADDR14_BRTAKEN: return "R_PPC64_ADDR14_BRTAKEN"; + case R_PPC64_ADDR14_BRNTAKEN: return "R_PPC64_ADDR14_BRNTAKEN"; + case R_PPC64_REL24: return "R_PPC64_REL24"; + case R_PPC64_REL14: return "R_PPC64_REL14"; + case R_PPC64_REL14_BRTAKEN: return "R_PPC64_REL14_BRTAKEN"; + case R_PPC64_REL14_BRNTAKEN: return "R_PPC64_REL14_BRNTAKEN"; + case R_PPC64_GOT16: return "R_PPC64_GOT16"; + case R_PPC64_GOT16_LO: return "R_PPC64_GOT16_LO"; + case R_PPC64_GOT16_HI: return "R_PPC64_GOT16_HI"; + case R_PPC64_GOT16_HA: return "R_PPC64_GOT16_HA"; + case R_PPC64_COPY: return "R_PPC64_COPY"; + case R_PPC64_GLOB_DAT: return "R_PPC64_GLOB_DAT"; + case R_PPC64_JMP_SLOT: return "R_PPC64_JMP_SLOT"; + case R_PPC64_RELATIVE: return "R_PPC64_RELATIVE"; + case R_PPC64_REL32: return "R_PPC64_REL32"; + case R_PPC64_PLT16_LO: return "R_PPC64_PLT16_LO"; + case R_PPC64_PLT16_HI: return "R_PPC64_PLT16_HI"; + case R_PPC64_PLT16_HA: return "R_PPC64_PLT16_HA"; + case R_PPC64_ADDR64: return "R_PPC64_ADDR64"; + case R_PPC64_ADDR16_HIGHER: return "R_PPC64_ADDR16_HIGHER"; + case R_PPC64_ADDR16_HIGHERA: return "R_PPC64_ADDR16_HIGHERA"; + case R_PPC64_ADDR16_HIGHEST: return "R_PPC64_ADDR16_HIGHEST"; + case R_PPC64_ADDR16_HIGHESTA: return "R_PPC64_ADDR16_HIGHESTA"; + case R_PPC64_REL64: return "R_PPC64_REL64"; + case R_PPC64_TOC16: return "R_PPC64_TOC16"; + case R_PPC64_TOC16_LO: return "R_PPC64_TOC16_LO"; + case R_PPC64_TOC16_HI: return "R_PPC64_TOC16_HI"; + case R_PPC64_TOC16_HA: return "R_PPC64_TOC16_HA"; + case R_PPC64_TOC: return "R_PPC64_TOC"; + case R_PPC64_ADDR16_DS: return "R_PPC64_ADDR16_DS"; + case R_PPC64_ADDR16_LO_DS: return "R_PPC64_ADDR16_LO_DS"; + case R_PPC64_GOT16_DS: return "R_PPC64_GOT16_DS"; + case R_PPC64_GOT16_LO_DS: return "R_PPC64_GOT16_LO_DS"; + case R_PPC64_PLT16_LO_DS: return "R_PPC64_PLT16_LO_DS"; + case R_PPC64_TOC16_DS: return "R_PPC64_TOC16_DS"; + case R_PPC64_TOC16_LO_DS: return "R_PPC64_TOC16_LO_DS"; + case R_PPC64_TLS: return "R_PPC64_TLS"; + case R_PPC64_DTPMOD64: return "R_PPC64_DTPMOD64"; + case R_PPC64_TPREL16: return "R_PPC64_TPREL16"; + case R_PPC64_TPREL16_LO: return "R_PPC64_TPREL16_LO"; + case R_PPC64_TPREL16_HI: return "R_PPC64_TPREL16_HI"; + case R_PPC64_TPREL16_HA: return "R_PPC64_TPREL16_HA"; + case R_PPC64_TPREL64: return "R_PPC64_TPREL64"; + case R_PPC64_DTPREL16: return "R_PPC64_DTPREL16"; + case R_PPC64_DTPREL16_LO: return "R_PPC64_DTPREL16_LO"; + case R_PPC64_DTPREL16_HI: return "R_PPC64_DTPREL16_HI"; + case R_PPC64_DTPREL16_HA: return "R_PPC64_DTPREL16_HA"; + case R_PPC64_DTPREL64: return "R_PPC64_DTPREL64"; + case R_PPC64_GOT_TLSGD16: return "R_PPC64_GOT_TLSGD16"; + case R_PPC64_GOT_TLSGD16_LO: return "R_PPC64_GOT_TLSGD16_LO"; + case R_PPC64_GOT_TLSGD16_HI: return "R_PPC64_GOT_TLSGD16_HI"; + case R_PPC64_GOT_TLSGD16_HA: return "R_PPC64_GOT_TLSGD16_HA"; + case R_PPC64_GOT_TLSLD16: return "R_PPC64_GOT_TLSLD16"; + case R_PPC64_GOT_TLSLD16_LO: return "R_PPC64_GOT_TLSLD16_LO"; + case R_PPC64_GOT_TLSLD16_HI: return "R_PPC64_GOT_TLSLD16_HI"; + case R_PPC64_GOT_TLSLD16_HA: return "R_PPC64_GOT_TLSLD16_HA"; + case R_PPC64_GOT_TPREL16_DS: return "R_PPC64_GOT_TPREL16_DS"; + case R_PPC64_GOT_TPREL16_LO_DS: return "R_PPC64_GOT_TPREL16_LO_DS"; + case R_PPC64_GOT_TPREL16_HI: return "R_PPC64_GOT_TPREL16_HI"; + case R_PPC64_GOT_TPREL16_HA: return "R_PPC64_GOT_TPREL16_HA"; + case R_PPC64_GOT_DTPREL16_DS: return "R_PPC64_GOT_DTPREL16_DS"; + case R_PPC64_GOT_DTPREL16_LO_DS: return "R_PPC64_GOT_DTPREL16_LO_DS"; + case R_PPC64_GOT_DTPREL16_HI: return "R_PPC64_GOT_DTPREL16_HI"; + case R_PPC64_GOT_DTPREL16_HA: return "R_PPC64_GOT_DTPREL16_HA"; + case R_PPC64_TPREL16_DS: return "R_PPC64_TPREL16_DS"; + case R_PPC64_TPREL16_LO_DS: return "R_PPC64_TPREL16_LO_DS"; + case R_PPC64_TPREL16_HIGHER: return "R_PPC64_TPREL16_HIGHER"; + case R_PPC64_TPREL16_HIGHERA: return "R_PPC64_TPREL16_HIGHERA"; + case R_PPC64_TPREL16_HIGHEST: return "R_PPC64_TPREL16_HIGHEST"; + case R_PPC64_TPREL16_HIGHESTA: return "R_PPC64_TPREL16_HIGHESTA"; + case R_PPC64_DTPREL16_DS: return "R_PPC64_DTPREL16_DS"; + case R_PPC64_DTPREL16_LO_DS: return "R_PPC64_DTPREL16_LO_DS"; + case R_PPC64_DTPREL16_HIGHER: return "R_PPC64_DTPREL16_HIGHER"; + case R_PPC64_DTPREL16_HIGHERA: return "R_PPC64_DTPREL16_HIGHERA"; + case R_PPC64_DTPREL16_HIGHEST: return "R_PPC64_DTPREL16_HIGHEST"; + case R_PPC64_DTPREL16_HIGHESTA: return "R_PPC64_DTPREL16_HIGHESTA"; + case R_PPC64_TLSGD: return "R_PPC64_TLSGD"; + case R_PPC64_TLSLD: return "R_PPC64_TLSLD"; + case R_PPC64_ADDR16_HIGH: return "R_PPC64_ADDR16_HIGH"; + case R_PPC64_ADDR16_HIGHA: return "R_PPC64_ADDR16_HIGHA"; + case R_PPC64_TPREL16_HIGH: return "R_PPC64_TPREL16_HIGH"; + case R_PPC64_TPREL16_HIGHA: return "R_PPC64_TPREL16_HIGHA"; + case R_PPC64_DTPREL16_HIGH: return "R_PPC64_DTPREL16_HIGH"; + case R_PPC64_DTPREL16_HIGHA: return "R_PPC64_DTPREL16_HIGHA"; + case R_PPC64_REL24_NOTOC: return "R_PPC64_REL24_NOTOC"; + case R_PPC64_PLTSEQ: return "R_PPC64_PLTSEQ"; + case R_PPC64_PLTCALL: return "R_PPC64_PLTCALL"; + case R_PPC64_PLTSEQ_NOTOC: return "R_PPC64_PLTSEQ_NOTOC"; + case R_PPC64_PLTCALL_NOTOC: return "R_PPC64_PLTCALL_NOTOC"; + case R_PPC64_PCREL_OPT: return "R_PPC64_PCREL_OPT"; + case R_PPC64_PCREL34: return "R_PPC64_PCREL34"; + case R_PPC64_GOT_PCREL34: return "R_PPC64_GOT_PCREL34"; + case R_PPC64_PLT_PCREL34: return "R_PPC64_PLT_PCREL34"; + case R_PPC64_PLT_PCREL34_NOTOC: return "R_PPC64_PLT_PCREL34_NOTOC"; + case R_PPC64_TPREL34: return "R_PPC64_TPREL34"; + case R_PPC64_DTPREL34: return "R_PPC64_DTPREL34"; + case R_PPC64_GOT_TLSGD_PCREL34: return "R_PPC64_GOT_TLSGD_PCREL34"; + case R_PPC64_GOT_TLSLD_PCREL34: return "R_PPC64_GOT_TLSLD_PCREL34"; + case R_PPC64_GOT_TPREL_PCREL34: return "R_PPC64_GOT_TPREL_PCREL34"; + case R_PPC64_IRELATIVE: return "R_PPC64_IRELATIVE"; + case R_PPC64_REL16: return "R_PPC64_REL16"; + case R_PPC64_REL16_LO: return "R_PPC64_REL16_LO"; + case R_PPC64_REL16_HI: return "R_PPC64_REL16_HI"; + case R_PPC64_REL16_HA: return "R_PPC64_REL16_HA"; + } + return "unknown (" + std::to_string(r_type) + ")"; +} + +template <> +std::string rel_to_string(u32 r_type) { + return rel_to_string(r_type); +} + +template <> +std::string rel_to_string(u32 r_type) { + switch (r_type) { + case R_SPARC_NONE: return "R_SPARC_NONE"; + case R_SPARC_8: return "R_SPARC_8"; + case R_SPARC_16: return "R_SPARC_16"; + case R_SPARC_32: return "R_SPARC_32"; + case R_SPARC_DISP8: return "R_SPARC_DISP8"; + case R_SPARC_DISP16: return "R_SPARC_DISP16"; + case R_SPARC_DISP32: return "R_SPARC_DISP32"; + case R_SPARC_WDISP30: return "R_SPARC_WDISP30"; + case R_SPARC_WDISP22: return "R_SPARC_WDISP22"; + case R_SPARC_HI22: return "R_SPARC_HI22"; + case R_SPARC_22: return "R_SPARC_22"; + case R_SPARC_13: return "R_SPARC_13"; + case R_SPARC_LO10: return "R_SPARC_LO10"; + case R_SPARC_GOT10: return "R_SPARC_GOT10"; + case R_SPARC_GOT13: return "R_SPARC_GOT13"; + case R_SPARC_GOT22: return "R_SPARC_GOT22"; + case R_SPARC_PC10: return "R_SPARC_PC10"; + case R_SPARC_PC22: return "R_SPARC_PC22"; + case R_SPARC_WPLT30: return "R_SPARC_WPLT30"; + case R_SPARC_COPY: return "R_SPARC_COPY"; + case R_SPARC_GLOB_DAT: return "R_SPARC_GLOB_DAT"; + case R_SPARC_JMP_SLOT: return "R_SPARC_JMP_SLOT"; + case R_SPARC_RELATIVE: return "R_SPARC_RELATIVE"; + case R_SPARC_UA32: return "R_SPARC_UA32"; + case R_SPARC_PLT32: return "R_SPARC_PLT32"; + case R_SPARC_HIPLT22: return "R_SPARC_HIPLT22"; + case R_SPARC_LOPLT10: return "R_SPARC_LOPLT10"; + case R_SPARC_PCPLT32: return "R_SPARC_PCPLT32"; + case R_SPARC_PCPLT22: return "R_SPARC_PCPLT22"; + case R_SPARC_PCPLT10: return "R_SPARC_PCPLT10"; + case R_SPARC_10: return "R_SPARC_10"; + case R_SPARC_11: return "R_SPARC_11"; + case R_SPARC_64: return "R_SPARC_64"; + case R_SPARC_OLO10: return "R_SPARC_OLO10"; + case R_SPARC_HH22: return "R_SPARC_HH22"; + case R_SPARC_HM10: return "R_SPARC_HM10"; + case R_SPARC_LM22: return "R_SPARC_LM22"; + case R_SPARC_PC_HH22: return "R_SPARC_PC_HH22"; + case R_SPARC_PC_HM10: return "R_SPARC_PC_HM10"; + case R_SPARC_PC_LM22: return "R_SPARC_PC_LM22"; + case R_SPARC_WDISP16: return "R_SPARC_WDISP16"; + case R_SPARC_WDISP19: return "R_SPARC_WDISP19"; + case R_SPARC_7: return "R_SPARC_7"; + case R_SPARC_5: return "R_SPARC_5"; + case R_SPARC_6: return "R_SPARC_6"; + case R_SPARC_DISP64: return "R_SPARC_DISP64"; + case R_SPARC_PLT64: return "R_SPARC_PLT64"; + case R_SPARC_HIX22: return "R_SPARC_HIX22"; + case R_SPARC_LOX10: return "R_SPARC_LOX10"; + case R_SPARC_H44: return "R_SPARC_H44"; + case R_SPARC_M44: return "R_SPARC_M44"; + case R_SPARC_L44: return "R_SPARC_L44"; + case R_SPARC_REGISTER: return "R_SPARC_REGISTER"; + case R_SPARC_UA64: return "R_SPARC_UA64"; + case R_SPARC_UA16: return "R_SPARC_UA16"; + case R_SPARC_TLS_GD_HI22: return "R_SPARC_TLS_GD_HI22"; + case R_SPARC_TLS_GD_LO10: return "R_SPARC_TLS_GD_LO10"; + case R_SPARC_TLS_GD_ADD: return "R_SPARC_TLS_GD_ADD"; + case R_SPARC_TLS_GD_CALL: return "R_SPARC_TLS_GD_CALL"; + case R_SPARC_TLS_LDM_HI22: return "R_SPARC_TLS_LDM_HI22"; + case R_SPARC_TLS_LDM_LO10: return "R_SPARC_TLS_LDM_LO10"; + case R_SPARC_TLS_LDM_ADD: return "R_SPARC_TLS_LDM_ADD"; + case R_SPARC_TLS_LDM_CALL: return "R_SPARC_TLS_LDM_CALL"; + case R_SPARC_TLS_LDO_HIX22: return "R_SPARC_TLS_LDO_HIX22"; + case R_SPARC_TLS_LDO_LOX10: return "R_SPARC_TLS_LDO_LOX10"; + case R_SPARC_TLS_LDO_ADD: return "R_SPARC_TLS_LDO_ADD"; + case R_SPARC_TLS_IE_HI22: return "R_SPARC_TLS_IE_HI22"; + case R_SPARC_TLS_IE_LO10: return "R_SPARC_TLS_IE_LO10"; + case R_SPARC_TLS_IE_LD: return "R_SPARC_TLS_IE_LD"; + case R_SPARC_TLS_IE_LDX: return "R_SPARC_TLS_IE_LDX"; + case R_SPARC_TLS_IE_ADD: return "R_SPARC_TLS_IE_ADD"; + case R_SPARC_TLS_LE_HIX22: return "R_SPARC_TLS_LE_HIX22"; + case R_SPARC_TLS_LE_LOX10: return "R_SPARC_TLS_LE_LOX10"; + case R_SPARC_TLS_DTPMOD32: return "R_SPARC_TLS_DTPMOD32"; + case R_SPARC_TLS_DTPMOD64: return "R_SPARC_TLS_DTPMOD64"; + case R_SPARC_TLS_DTPOFF32: return "R_SPARC_TLS_DTPOFF32"; + case R_SPARC_TLS_DTPOFF64: return "R_SPARC_TLS_DTPOFF64"; + case R_SPARC_TLS_TPOFF32: return "R_SPARC_TLS_TPOFF32"; + case R_SPARC_TLS_TPOFF64: return "R_SPARC_TLS_TPOFF64"; + case R_SPARC_GOTDATA_HIX22: return "R_SPARC_GOTDATA_HIX22"; + case R_SPARC_GOTDATA_LOX10: return "R_SPARC_GOTDATA_LOX10"; + case R_SPARC_GOTDATA_OP_HIX22: return "R_SPARC_GOTDATA_OP_HIX22"; + case R_SPARC_GOTDATA_OP_LOX10: return "R_SPARC_GOTDATA_OP_LOX10"; + case R_SPARC_GOTDATA_OP: return "R_SPARC_GOTDATA_OP"; + case R_SPARC_IRELATIVE: return "R_SPARC_IRELATIVE"; + } + return "unknown (" + std::to_string(r_type) + ")"; +} + +template <> +std::string rel_to_string(u32 r_type) { + switch (r_type) { + case R_390_NONE: return "R_390_NONE"; + case R_390_8: return "R_390_8"; + case R_390_12: return "R_390_12"; + case R_390_16: return "R_390_16"; + case R_390_32: return "R_390_32"; + case R_390_PC32: return "R_390_PC32"; + case R_390_GOT12: return "R_390_GOT12"; + case R_390_GOT32: return "R_390_GOT32"; + case R_390_PLT32: return "R_390_PLT32"; + case R_390_COPY: return "R_390_COPY"; + case R_390_GLOB_DAT: return "R_390_GLOB_DAT"; + case R_390_JMP_SLOT: return "R_390_JMP_SLOT"; + case R_390_RELATIVE: return "R_390_RELATIVE"; + case R_390_GOTOFF32: return "R_390_GOTOFF32"; + case R_390_GOTPC: return "R_390_GOTPC"; + case R_390_GOT16: return "R_390_GOT16"; + case R_390_PC16: return "R_390_PC16"; + case R_390_PC16DBL: return "R_390_PC16DBL"; + case R_390_PLT16DBL: return "R_390_PLT16DBL"; + case R_390_PC32DBL: return "R_390_PC32DBL"; + case R_390_PLT32DBL: return "R_390_PLT32DBL"; + case R_390_GOTPCDBL: return "R_390_GOTPCDBL"; + case R_390_64: return "R_390_64"; + case R_390_PC64: return "R_390_PC64"; + case R_390_GOT64: return "R_390_GOT64"; + case R_390_PLT64: return "R_390_PLT64"; + case R_390_GOTENT: return "R_390_GOTENT"; + case R_390_GOTOFF16: return "R_390_GOTOFF16"; + case R_390_GOTOFF64: return "R_390_GOTOFF64"; + case R_390_GOTPLT12: return "R_390_GOTPLT12"; + case R_390_GOTPLT16: return "R_390_GOTPLT16"; + case R_390_GOTPLT32: return "R_390_GOTPLT32"; + case R_390_GOTPLT64: return "R_390_GOTPLT64"; + case R_390_GOTPLTENT: return "R_390_GOTPLTENT"; + case R_390_PLTOFF16: return "R_390_PLTOFF16"; + case R_390_PLTOFF32: return "R_390_PLTOFF32"; + case R_390_PLTOFF64: return "R_390_PLTOFF64"; + case R_390_TLS_LOAD: return "R_390_TLS_LOAD"; + case R_390_TLS_GDCALL: return "R_390_TLS_GDCALL"; + case R_390_TLS_LDCALL: return "R_390_TLS_LDCALL"; + case R_390_TLS_GD32: return "R_390_TLS_GD32"; + case R_390_TLS_GD64: return "R_390_TLS_GD64"; + case R_390_TLS_GOTIE12: return "R_390_TLS_GOTIE12"; + case R_390_TLS_GOTIE32: return "R_390_TLS_GOTIE32"; + case R_390_TLS_GOTIE64: return "R_390_TLS_GOTIE64"; + case R_390_TLS_LDM32: return "R_390_TLS_LDM32"; + case R_390_TLS_LDM64: return "R_390_TLS_LDM64"; + case R_390_TLS_IE32: return "R_390_TLS_IE32"; + case R_390_TLS_IE64: return "R_390_TLS_IE64"; + case R_390_TLS_IEENT: return "R_390_TLS_IEENT"; + case R_390_TLS_LE32: return "R_390_TLS_LE32"; + case R_390_TLS_LE64: return "R_390_TLS_LE64"; + case R_390_TLS_LDO32: return "R_390_TLS_LDO32"; + case R_390_TLS_LDO64: return "R_390_TLS_LDO64"; + case R_390_TLS_DTPMOD: return "R_390_TLS_DTPMOD"; + case R_390_TLS_DTPOFF: return "R_390_TLS_DTPOFF"; + case R_390_TLS_TPOFF: return "R_390_TLS_TPOFF"; + case R_390_20: return "R_390_20"; + case R_390_GOT20: return "R_390_GOT20"; + case R_390_GOTPLT20: return "R_390_GOTPLT20"; + case R_390_TLS_GOTIE20: return "R_390_TLS_GOTIE20"; + case R_390_IRELATIVE: return "R_390_IRELATIVE"; + case R_390_PC12DBL: return "R_390_PC12DBL"; + case R_390_PLT12DBL: return "R_390_PLT12DBL"; + case R_390_PC24DBL: return "R_390_PC24DBL"; + case R_390_PLT24DBL: return "R_390_PLT24DBL"; + } + return "unknown (" + std::to_string(r_type) + ")"; +} + +template <> +std::string rel_to_string(u32 r_type) { + switch (r_type) { + case R_68K_NONE: return "R_68K_NONE"; + case R_68K_32: return "R_68K_32"; + case R_68K_16: return "R_68K_16"; + case R_68K_8: return "R_68K_8"; + case R_68K_PC32: return "R_68K_PC32"; + case R_68K_PC16: return "R_68K_PC16"; + case R_68K_PC8: return "R_68K_PC8"; + case R_68K_GOTPCREL32: return "R_68K_GOTPCREL32"; + case R_68K_GOTPCREL16: return "R_68K_GOTPCREL16"; + case R_68K_GOTPCREL8: return "R_68K_GOTPCREL8"; + case R_68K_GOTOFF32: return "R_68K_GOTOFF32"; + case R_68K_GOTOFF16: return "R_68K_GOTOFF16"; + case R_68K_GOTOFF8: return "R_68K_GOTOFF8"; + case R_68K_PLT32: return "R_68K_PLT32"; + case R_68K_PLT16: return "R_68K_PLT16"; + case R_68K_PLT8: return "R_68K_PLT8"; + case R_68K_PLTOFF32: return "R_68K_PLTOFF32"; + case R_68K_PLTOFF16: return "R_68K_PLTOFF16"; + case R_68K_PLTOFF8: return "R_68K_PLTOFF8"; + case R_68K_COPY: return "R_68K_COPY"; + case R_68K_GLOB_DAT: return "R_68K_GLOB_DAT"; + case R_68K_JMP_SLOT: return "R_68K_JMP_SLOT"; + case R_68K_RELATIVE: return "R_68K_RELATIVE"; + case R_68K_TLS_GD32: return "R_68K_TLS_GD32"; + case R_68K_TLS_GD16: return "R_68K_TLS_GD16"; + case R_68K_TLS_GD8: return "R_68K_TLS_GD8"; + case R_68K_TLS_LDM32: return "R_68K_TLS_LDM32"; + case R_68K_TLS_LDM16: return "R_68K_TLS_LDM16"; + case R_68K_TLS_LDM8: return "R_68K_TLS_LDM8"; + case R_68K_TLS_LDO32: return "R_68K_TLS_LDO32"; + case R_68K_TLS_LDO16: return "R_68K_TLS_LDO16"; + case R_68K_TLS_LDO8: return "R_68K_TLS_LDO8"; + case R_68K_TLS_IE32: return "R_68K_TLS_IE32"; + case R_68K_TLS_IE16: return "R_68K_TLS_IE16"; + case R_68K_TLS_IE8: return "R_68K_TLS_IE8"; + case R_68K_TLS_LE32: return "R_68K_TLS_LE32"; + case R_68K_TLS_LE16: return "R_68K_TLS_LE16"; + case R_68K_TLS_LE8: return "R_68K_TLS_LE8"; + case R_68K_TLS_DTPMOD32: return "R_68K_TLS_DTPMOD32"; + case R_68K_TLS_DTPREL32: return "R_68K_TLS_DTPREL32"; + case R_68K_TLS_TPREL32: return "R_68K_TLS_TPREL32"; + } + return "unknown (" + std::to_string(r_type) + ")"; +} + +template <> +std::string rel_to_string(u32 r_type) { + switch (r_type) { + case R_SH_NONE: return "R_SH_NONE"; + case R_SH_DIR32: return "R_SH_DIR32"; + case R_SH_REL32: return "R_SH_REL32"; + case R_SH_DIR8WPN: return "R_SH_DIR8WPN"; + case R_SH_IND12W: return "R_SH_IND12W"; + case R_SH_DIR8WPL: return "R_SH_DIR8WPL"; + case R_SH_DIR8WPZ: return "R_SH_DIR8WPZ"; + case R_SH_DIR8BP: return "R_SH_DIR8BP"; + case R_SH_DIR8W: return "R_SH_DIR8W"; + case R_SH_DIR8L: return "R_SH_DIR8L"; + case R_SH_TLS_GD_32: return "R_SH_TLS_GD_32"; + case R_SH_TLS_LD_32: return "R_SH_TLS_LD_32"; + case R_SH_TLS_LDO_32: return "R_SH_TLS_LDO_32"; + case R_SH_TLS_IE_32: return "R_SH_TLS_IE_32"; + case R_SH_TLS_LE_32: return "R_SH_TLS_LE_32"; + case R_SH_TLS_DTPMOD32: return "R_SH_TLS_DTPMOD32"; + case R_SH_TLS_DTPOFF32: return "R_SH_TLS_DTPOFF32"; + case R_SH_TLS_TPOFF32: return "R_SH_TLS_TPOFF32"; + case R_SH_GOT32: return "R_SH_GOT32"; + case R_SH_PLT32: return "R_SH_PLT32"; + case R_SH_COPY: return "R_SH_COPY"; + case R_SH_GLOB_DAT: return "R_SH_GLOB_DAT"; + case R_SH_JMP_SLOT: return "R_SH_JMP_SLOT"; + case R_SH_RELATIVE: return "R_SH_RELATIVE"; + case R_SH_GOTOFF: return "R_SH_GOTOFF"; + case R_SH_GOTPC: return "R_SH_GOTPC"; + case R_SH_GOTPLT32: return "R_SH_GOTPLT32"; + } + return "unknown (" + std::to_string(r_type) + ")"; +} + +template <> +std::string rel_to_string(u32 r_type) { + switch (r_type) { + case R_ALPHA_NONE: return "R_ALPHA_NONE"; + case R_ALPHA_REFLONG: return "R_ALPHA_REFLONG"; + case R_ALPHA_REFQUAD: return "R_ALPHA_REFQUAD"; + case R_ALPHA_GPREL32: return "R_ALPHA_GPREL32"; + case R_ALPHA_LITERAL: return "R_ALPHA_LITERAL"; + case R_ALPHA_LITUSE: return "R_ALPHA_LITUSE"; + case R_ALPHA_GPDISP: return "R_ALPHA_GPDISP"; + case R_ALPHA_BRADDR: return "R_ALPHA_BRADDR"; + case R_ALPHA_HINT: return "R_ALPHA_HINT"; + case R_ALPHA_SREL16: return "R_ALPHA_SREL16"; + case R_ALPHA_SREL32: return "R_ALPHA_SREL32"; + case R_ALPHA_SREL64: return "R_ALPHA_SREL64"; + case R_ALPHA_GPRELHIGH: return "R_ALPHA_GPRELHIGH"; + case R_ALPHA_GPRELLOW: return "R_ALPHA_GPRELLOW"; + case R_ALPHA_GPREL16: return "R_ALPHA_GPREL16"; + case R_ALPHA_COPY: return "R_ALPHA_COPY"; + case R_ALPHA_GLOB_DAT: return "R_ALPHA_GLOB_DAT"; + case R_ALPHA_JMP_SLOT: return "R_ALPHA_JMP_SLOT"; + case R_ALPHA_RELATIVE: return "R_ALPHA_RELATIVE"; + case R_ALPHA_BRSGP: return "R_ALPHA_BRSGP"; + case R_ALPHA_TLSGD: return "R_ALPHA_TLSGD"; + case R_ALPHA_TLSLDM: return "R_ALPHA_TLSLDM"; + case R_ALPHA_DTPMOD64: return "R_ALPHA_DTPMOD64"; + case R_ALPHA_GOTDTPREL: return "R_ALPHA_GOTDTPREL"; + case R_ALPHA_DTPREL64: return "R_ALPHA_DTPREL64"; + case R_ALPHA_DTPRELHI: return "R_ALPHA_DTPRELHI"; + case R_ALPHA_DTPRELLO: return "R_ALPHA_DTPRELLO"; + case R_ALPHA_DTPREL16: return "R_ALPHA_DTPREL16"; + case R_ALPHA_GOTTPREL: return "R_ALPHA_GOTTPREL"; + case R_ALPHA_TPREL64: return "R_ALPHA_TPREL64"; + case R_ALPHA_TPRELHI: return "R_ALPHA_TPRELHI"; + case R_ALPHA_TPRELLO: return "R_ALPHA_TPRELLO"; + case R_ALPHA_TPREL16: return "R_ALPHA_TPREL16"; + } + return "unknown (" + std::to_string(r_type) + ")"; +} + +} // namespace mold::elf diff --git a/third_party/mold/elf/elf.h b/third_party/mold/elf/elf.h new file mode 100644 index 00000000000..0f0a2c72063 --- /dev/null +++ b/third_party/mold/elf/elf.h @@ -0,0 +1,2053 @@ +// clang-format off +#pragma once + +// MISSING #include "../common/integers.h" + +#include "third_party/libcxx/ostream" +#include "third_party/libcxx/string" +#include "third_party/libcxx/type_traits" + +namespace mold::elf { + +struct X86_64; +struct I386; +struct ARM64; +struct ARM32; +struct RV64LE; +struct RV64BE; +struct RV32LE; +struct RV32BE; +struct PPC32; +struct PPC64V1; +struct PPC64V2; +struct S390X; +struct SPARC64; +struct M68K; +struct SH4; +struct ALPHA; + +template struct ElfSym; +template struct ElfShdr; +template struct ElfEhdr; +template struct ElfPhdr; +template struct ElfRel; +template struct ElfDyn; +template struct ElfVerneed; +template struct ElfVernaux; +template struct ElfVerdef; +template struct ElfVerdaux; +template struct ElfChdr; +template struct ElfNhdr; + +template +std::string rel_to_string(u32 r_type); + +template +std::ostream &operator<<(std::ostream &out, const ElfRel &rel) { + out << rel_to_string(rel.r_type); + return out; +} + +enum : u32 { + R_NONE = 0, +}; + +enum : u32 { + SHN_UNDEF = 0, + SHN_ABS = 0xfff1, + SHN_COMMON = 0xfff2, + SHN_LORESERVE = 0xff00, + SHN_XINDEX = 0xffff, +}; + +enum : u32 { + SHT_NULL = 0, + SHT_PROGBITS = 1, + SHT_SYMTAB = 2, + SHT_STRTAB = 3, + SHT_RELA = 4, + SHT_HASH = 5, + SHT_DYNAMIC = 6, + SHT_NOTE = 7, + SHT_NOBITS = 8, + SHT_REL = 9, + SHT_SHLIB = 10, + SHT_DYNSYM = 11, + SHT_INIT_ARRAY = 14, + SHT_FINI_ARRAY = 15, + SHT_PREINIT_ARRAY = 16, + SHT_GROUP = 17, + SHT_SYMTAB_SHNDX = 18, + SHT_RELR = 19, + SHT_LLVM_ADDRSIG = 0x6fff4c03, + SHT_GNU_HASH = 0x6ffffff6, + SHT_GNU_VERDEF = 0x6ffffffd, + SHT_GNU_VERNEED = 0x6ffffffe, + SHT_GNU_VERSYM = 0x6fffffff, + SHT_X86_64_UNWIND = 0x70000001, + SHT_ARM_EXIDX = 0x70000001, + SHT_ARM_ATTRIBUTES = 0x70000003, +}; + +enum : u32 { + SHF_WRITE = 0x1, + SHF_ALLOC = 0x2, + SHF_EXECINSTR = 0x4, + SHF_MERGE = 0x10, + SHF_STRINGS = 0x20, + SHF_INFO_LINK = 0x40, + SHF_LINK_ORDER = 0x80, + SHF_GROUP = 0x200, + SHF_TLS = 0x400, + SHF_COMPRESSED = 0x800, + SHF_GNU_RETAIN = 0x200000, + SHF_EXCLUDE = 0x80000000, +}; + +enum : u32 { + GRP_COMDAT = 1, +}; + +enum : u32 { + STT_NOTYPE = 0, + STT_OBJECT = 1, + STT_FUNC = 2, + STT_SECTION = 3, + STT_FILE = 4, + STT_COMMON = 5, + STT_TLS = 6, + STT_GNU_IFUNC = 10, + STT_SPARC_REGISTER = 13, +}; + +template +inline std::string stt_to_string(u32 st_type) { + switch (st_type) { + case STT_NOTYPE: return "STT_NOTYPE"; + case STT_OBJECT: return "STT_OBJECT"; + case STT_FUNC: return "STT_FUNC"; + case STT_SECTION: return "STT_SECTION"; + case STT_FILE: return "STT_FILE"; + case STT_COMMON: return "STT_COMMON"; + case STT_TLS: return "STT_TLS"; + case STT_GNU_IFUNC: return "STT_GNU_IFUNC"; + } + + if constexpr (std::is_same_v) + if (st_type == STT_SPARC_REGISTER) + return "STT_SPARC_REGISTER"; + + return "unknown st_type (" + std::to_string(st_type) + ")"; +} + +enum : u32 { + STB_LOCAL = 0, + STB_GLOBAL = 1, + STB_WEAK = 2, + STB_GNU_UNIQUE = 10, +}; + +enum : u32 { + STV_DEFAULT = 0, + STV_INTERNAL = 1, + STV_HIDDEN = 2, + STV_PROTECTED = 3, +}; + +enum : u32 { + VER_NDX_LOCAL = 0, + VER_NDX_GLOBAL = 1, + VER_NDX_LAST_RESERVED = 1, +}; + +enum : u32 { + VER_FLG_BASE = 1, + VER_FLG_WEAK = 2, + VER_FLG_INFO = 4, +}; + +enum : u32 { + VERSYM_HIDDEN = 0x8000, +}; + +enum : u32 { + PT_NULL = 0, + PT_LOAD = 1, + PT_DYNAMIC = 2, + PT_INTERP = 3, + PT_NOTE = 4, + PT_SHLIB = 5, + PT_PHDR = 6, + PT_TLS = 7, + PT_GNU_EH_FRAME = 0x6474e550, + PT_GNU_STACK = 0x6474e551, + PT_GNU_RELRO = 0x6474e552, + PT_OPENBSD_RANDOMIZE = 0x65a3dbe6, + PT_ARM_EXIDX = 0x70000001, +}; + +enum : u32 { + PF_NONE = 0, + PF_X = 1, + PF_W = 2, + PF_R = 4, +}; + +enum : u32 { + ET_NONE = 0, + ET_REL = 1, + ET_EXEC = 2, + ET_DYN = 3, +}; + +enum : u32 { + ELFDATA2LSB = 1, + ELFDATA2MSB = 2, +}; + +enum : u32 { + ELFCLASS32 = 1, + ELFCLASS64 = 2, +}; + +enum : u32 { + EV_CURRENT = 1, +}; + +enum : u32 { + EM_NONE = 0, + EM_386 = 3, + EM_68K = 4, + EM_PPC = 20, + EM_PPC64 = 21, + EM_S390X = 22, + EM_ARM = 40, + EM_SH = 42, + EM_SPARC64 = 43, + EM_X86_64 = 62, + EM_AARCH64 = 183, + EM_RISCV = 243, + EM_ALPHA = 0x9026, +}; + +enum : u32 { + EI_CLASS = 4, + EI_DATA = 5, + EI_VERSION = 6, + EI_OSABI = 7, + EI_ABIVERSION = 8, +}; + +enum : u32 { + DT_NULL = 0, + DT_NEEDED = 1, + DT_PLTRELSZ = 2, + DT_PLTGOT = 3, + DT_HASH = 4, + DT_STRTAB = 5, + DT_SYMTAB = 6, + DT_RELA = 7, + DT_RELASZ = 8, + DT_RELAENT = 9, + DT_STRSZ = 10, + DT_SYMENT = 11, + DT_INIT = 12, + DT_FINI = 13, + DT_SONAME = 14, + DT_RPATH = 15, + DT_SYMBOLIC = 16, + DT_REL = 17, + DT_RELSZ = 18, + DT_RELENT = 19, + DT_PLTREL = 20, + DT_DEBUG = 21, + DT_TEXTREL = 22, + DT_JMPREL = 23, + DT_BIND_NOW = 24, + DT_INIT_ARRAY = 25, + DT_FINI_ARRAY = 26, + DT_INIT_ARRAYSZ = 27, + DT_FINI_ARRAYSZ = 28, + DT_RUNPATH = 29, + DT_FLAGS = 30, + DT_PREINIT_ARRAY = 32, + DT_PREINIT_ARRAYSZ = 33, + DT_RELRSZ = 35, + DT_RELR = 36, + DT_RELRENT = 37, + DT_GNU_HASH = 0x6ffffef5, + DT_VERSYM = 0x6ffffff0, + DT_RELACOUNT = 0x6ffffff9, + DT_RELCOUNT = 0x6ffffffa, + DT_FLAGS_1 = 0x6ffffffb, + DT_VERDEF = 0x6ffffffc, + DT_VERDEFNUM = 0x6ffffffd, + DT_VERNEED = 0x6ffffffe, + DT_VERNEEDNUM = 0x6fffffff, + DT_PPC_GOT = 0x70000000, + DT_PPC64_GLINK = 0x70000000, + DT_AUXILIARY = 0x7ffffffd, + DT_FILTER = 0x7fffffff, +}; + +enum : u32 { + DF_ORIGIN = 0x01, + DF_SYMBOLIC = 0x02, + DF_TEXTREL = 0x04, + DF_BIND_NOW = 0x08, + DF_STATIC_TLS = 0x10, +}; + +enum : u32 { + DF_1_NOW = 0x00000001, + DF_1_NODELETE = 0x00000008, + DF_1_INITFIRST = 0x00000020, + DF_1_NOOPEN = 0x00000040, + DF_1_ORIGIN = 0x00000080, + DF_1_INTERPOSE = 0x00000400, + DF_1_NODEFLIB = 0x00000800, + DF_1_NODUMP = 0x00001000, + DF_1_PIE = 0x08000000, +}; + +enum : u32 { + NT_GNU_ABI_TAG = 1, + NT_GNU_HWCAP = 2, + NT_GNU_BUILD_ID = 3, + NT_GNU_GOLD_VERSION = 4, + NT_GNU_PROPERTY_TYPE_0 = 5, + NT_FDO_PACKAGING_METADATA = 0xcafe1a7e, +}; + +enum : u32 { + GNU_PROPERTY_X86_UINT32_AND_LO = 0xc0000002, + GNU_PROPERTY_X86_UINT32_AND_HI = 0xc0007fff, + GNU_PROPERTY_X86_UINT32_OR_LO = 0xc0008000, + GNU_PROPERTY_X86_UINT32_OR_HI = 0xc000ffff, + GNU_PROPERTY_X86_UINT32_OR_AND_LO = 0xc0010000, + GNU_PROPERTY_X86_UINT32_OR_AND_HI = 0xc0017fff, + + GNU_PROPERTY_X86_FEATURE_1_IBT = 1, + GNU_PROPERTY_X86_FEATURE_1_SHSTK = 2, + GNU_PROPERTY_X86_FEATURE_1_AND = 0xc0000002, +}; + +enum : u32 { + ELFCOMPRESS_ZLIB = 1, + ELFCOMPRESS_ZSTD = 2, +}; + +enum : u32 { + EF_ARM_ABI_FLOAT_SOFT = 0x00000200, + EF_ARM_ABI_FLOAT_HARD = 0x00000400, + EF_ARM_EABI_VER5 = 0x05000000, +}; + +enum : u32 { + EF_RISCV_RVC = 1, + EF_RISCV_FLOAT_ABI = 6, + EF_RISCV_FLOAT_ABI_SOFT = 0, + EF_RISCV_FLOAT_ABI_SINGLE = 2, + EF_RISCV_FLOAT_ABI_DOUBLE = 4, + EF_RISCV_FLOAT_ABI_QUAD = 6, + EF_RISCV_RVE = 8, + EF_RISCV_TSO = 16, +}; + +enum : u32 { + EF_SPARC64_MM = 0x3, + EF_SPARC64_TSO = 0x0, + EF_SPARC64_PSO = 0x1, + EF_SPARC64_RMO = 0x2, + EF_SPARC_EXT_MASK = 0xffff00, + EF_SPARC_SUN_US1 = 0x000200, + EF_SPARC_HAL_R1 = 0x000400, + EF_SPARC_SUN_US3 = 0x000800, +}; + +enum : u32 { + STO_RISCV_VARIANT_CC = 0x80, + STO_ALPHA_NOPV = 0x20, + STO_ALPHA_STD_GPLOAD = 0x22, +}; + +// +// Relocation types +// + +enum : u32 { + R_X86_64_NONE = 0, + R_X86_64_64 = 1, + R_X86_64_PC32 = 2, + R_X86_64_GOT32 = 3, + R_X86_64_PLT32 = 4, + R_X86_64_COPY = 5, + R_X86_64_GLOB_DAT = 6, + R_X86_64_JUMP_SLOT = 7, + R_X86_64_RELATIVE = 8, + R_X86_64_GOTPCREL = 9, + R_X86_64_32 = 10, + R_X86_64_32S = 11, + R_X86_64_16 = 12, + R_X86_64_PC16 = 13, + R_X86_64_8 = 14, + R_X86_64_PC8 = 15, + R_X86_64_DTPMOD64 = 16, + R_X86_64_DTPOFF64 = 17, + R_X86_64_TPOFF64 = 18, + R_X86_64_TLSGD = 19, + R_X86_64_TLSLD = 20, + R_X86_64_DTPOFF32 = 21, + R_X86_64_GOTTPOFF = 22, + R_X86_64_TPOFF32 = 23, + R_X86_64_PC64 = 24, + R_X86_64_GOTOFF64 = 25, + R_X86_64_GOTPC32 = 26, + R_X86_64_GOT64 = 27, + R_X86_64_GOTPCREL64 = 28, + R_X86_64_GOTPC64 = 29, + R_X86_64_GOTPLT64 = 30, + R_X86_64_PLTOFF64 = 31, + R_X86_64_SIZE32 = 32, + R_X86_64_SIZE64 = 33, + R_X86_64_GOTPC32_TLSDESC = 34, + R_X86_64_TLSDESC_CALL = 35, + R_X86_64_TLSDESC = 36, + R_X86_64_IRELATIVE = 37, + R_X86_64_GOTPCRELX = 41, + R_X86_64_REX_GOTPCRELX = 42, +}; + +enum : u32 { + R_386_NONE = 0, + R_386_32 = 1, + R_386_PC32 = 2, + R_386_GOT32 = 3, + R_386_PLT32 = 4, + R_386_COPY = 5, + R_386_GLOB_DAT = 6, + R_386_JUMP_SLOT = 7, + R_386_RELATIVE = 8, + R_386_GOTOFF = 9, + R_386_GOTPC = 10, + R_386_32PLT = 11, + R_386_TLS_TPOFF = 14, + R_386_TLS_IE = 15, + R_386_TLS_GOTIE = 16, + R_386_TLS_LE = 17, + R_386_TLS_GD = 18, + R_386_TLS_LDM = 19, + R_386_16 = 20, + R_386_PC16 = 21, + R_386_8 = 22, + R_386_PC8 = 23, + R_386_TLS_GD_32 = 24, + R_386_TLS_GD_PUSH = 25, + R_386_TLS_GD_CALL = 26, + R_386_TLS_GD_POP = 27, + R_386_TLS_LDM_32 = 28, + R_386_TLS_LDM_PUSH = 29, + R_386_TLS_LDM_CALL = 30, + R_386_TLS_LDM_POP = 31, + R_386_TLS_LDO_32 = 32, + R_386_TLS_IE_32 = 33, + R_386_TLS_LE_32 = 34, + R_386_TLS_DTPMOD32 = 35, + R_386_TLS_DTPOFF32 = 36, + R_386_TLS_TPOFF32 = 37, + R_386_SIZE32 = 38, + R_386_TLS_GOTDESC = 39, + R_386_TLS_DESC_CALL = 40, + R_386_TLS_DESC = 41, + R_386_IRELATIVE = 42, + R_386_GOT32X = 43, +}; + +enum : u32 { + R_AARCH64_NONE = 0, + R_AARCH64_ABS64 = 0x101, + R_AARCH64_ABS32 = 0x102, + R_AARCH64_ABS16 = 0x103, + R_AARCH64_PREL64 = 0x104, + R_AARCH64_PREL32 = 0x105, + R_AARCH64_PREL16 = 0x106, + R_AARCH64_MOVW_UABS_G0 = 0x107, + R_AARCH64_MOVW_UABS_G0_NC = 0x108, + R_AARCH64_MOVW_UABS_G1 = 0x109, + R_AARCH64_MOVW_UABS_G1_NC = 0x10a, + R_AARCH64_MOVW_UABS_G2 = 0x10b, + R_AARCH64_MOVW_UABS_G2_NC = 0x10c, + R_AARCH64_MOVW_UABS_G3 = 0x10d, + R_AARCH64_MOVW_SABS_G0 = 0x10e, + R_AARCH64_MOVW_SABS_G1 = 0x10f, + R_AARCH64_MOVW_SABS_G2 = 0x110, + R_AARCH64_LD_PREL_LO19 = 0x111, + R_AARCH64_ADR_PREL_LO21 = 0x112, + R_AARCH64_ADR_PREL_PG_HI21 = 0x113, + R_AARCH64_ADR_PREL_PG_HI21_NC = 0x114, + R_AARCH64_ADD_ABS_LO12_NC = 0x115, + R_AARCH64_LDST8_ABS_LO12_NC = 0x116, + R_AARCH64_TSTBR14 = 0x117, + R_AARCH64_CONDBR19 = 0x118, + R_AARCH64_JUMP26 = 0x11a, + R_AARCH64_CALL26 = 0x11b, + R_AARCH64_LDST16_ABS_LO12_NC = 0x11c, + R_AARCH64_LDST32_ABS_LO12_NC = 0x11d, + R_AARCH64_LDST64_ABS_LO12_NC = 0x11e, + R_AARCH64_MOVW_PREL_G0 = 0x11f, + R_AARCH64_MOVW_PREL_G0_NC = 0x120, + R_AARCH64_MOVW_PREL_G1 = 0x121, + R_AARCH64_MOVW_PREL_G1_NC = 0x122, + R_AARCH64_MOVW_PREL_G2 = 0x123, + R_AARCH64_MOVW_PREL_G2_NC = 0x124, + R_AARCH64_MOVW_PREL_G3 = 0x125, + R_AARCH64_LDST128_ABS_LO12_NC = 0x12b, + R_AARCH64_ADR_GOT_PAGE = 0x137, + R_AARCH64_LD64_GOT_LO12_NC = 0x138, + R_AARCH64_LD64_GOTPAGE_LO15 = 0x139, + R_AARCH64_PLT32 = 0x13a, + R_AARCH64_TLSGD_ADR_PREL21 = 0x200, + R_AARCH64_TLSGD_ADR_PAGE21 = 0x201, + R_AARCH64_TLSGD_ADD_LO12_NC = 0x202, + R_AARCH64_TLSGD_MOVW_G1 = 0x203, + R_AARCH64_TLSGD_MOVW_G0_NC = 0x204, + R_AARCH64_TLSLD_ADR_PREL21 = 0x205, + R_AARCH64_TLSLD_ADR_PAGE21 = 0x206, + R_AARCH64_TLSLD_ADD_LO12_NC = 0x207, + R_AARCH64_TLSLD_MOVW_G1 = 0x208, + R_AARCH64_TLSLD_MOVW_G0_NC = 0x209, + R_AARCH64_TLSLD_LD_PREL19 = 0x20a, + R_AARCH64_TLSLD_MOVW_DTPREL_G2 = 0x20b, + R_AARCH64_TLSLD_MOVW_DTPREL_G1 = 0x20c, + R_AARCH64_TLSLD_MOVW_DTPREL_G1_NC = 0x20d, + R_AARCH64_TLSLD_MOVW_DTPREL_G0 = 0x20e, + R_AARCH64_TLSLD_MOVW_DTPREL_G0_NC = 0x20f, + R_AARCH64_TLSLD_ADD_DTPREL_HI12 = 0x210, + R_AARCH64_TLSLD_ADD_DTPREL_LO12 = 0x211, + R_AARCH64_TLSLD_ADD_DTPREL_LO12_NC = 0x212, + R_AARCH64_TLSLD_LDST8_DTPREL_LO12 = 0x213, + R_AARCH64_TLSLD_LDST8_DTPREL_LO12_NC = 0x214, + R_AARCH64_TLSLD_LDST16_DTPREL_LO12 = 0x215, + R_AARCH64_TLSLD_LDST16_DTPREL_LO12_NC = 0x216, + R_AARCH64_TLSLD_LDST32_DTPREL_LO12 = 0x217, + R_AARCH64_TLSLD_LDST32_DTPREL_LO12_NC = 0x218, + R_AARCH64_TLSLD_LDST64_DTPREL_LO12 = 0x219, + R_AARCH64_TLSLD_LDST64_DTPREL_LO12_NC = 0x21a, + R_AARCH64_TLSIE_MOVW_GOTTPREL_G1 = 0x21b, + R_AARCH64_TLSIE_MOVW_GOTTPREL_G0_NC = 0x21c, + R_AARCH64_TLSIE_ADR_GOTTPREL_PAGE21 = 0x21d, + R_AARCH64_TLSIE_LD64_GOTTPREL_LO12_NC = 0x21e, + R_AARCH64_TLSIE_LD_GOTTPREL_PREL19 = 0x21f, + R_AARCH64_TLSLE_MOVW_TPREL_G2 = 0x220, + R_AARCH64_TLSLE_MOVW_TPREL_G1 = 0x221, + R_AARCH64_TLSLE_MOVW_TPREL_G1_NC = 0x222, + R_AARCH64_TLSLE_MOVW_TPREL_G0 = 0x223, + R_AARCH64_TLSLE_MOVW_TPREL_G0_NC = 0x224, + R_AARCH64_TLSLE_ADD_TPREL_HI12 = 0x225, + R_AARCH64_TLSLE_ADD_TPREL_LO12 = 0x226, + R_AARCH64_TLSLE_ADD_TPREL_LO12_NC = 0x227, + R_AARCH64_TLSLE_LDST8_TPREL_LO12 = 0x228, + R_AARCH64_TLSLE_LDST8_TPREL_LO12_NC = 0x229, + R_AARCH64_TLSLE_LDST16_TPREL_LO12 = 0x22a, + R_AARCH64_TLSLE_LDST16_TPREL_LO12_NC = 0x22b, + R_AARCH64_TLSLE_LDST32_TPREL_LO12 = 0x22c, + R_AARCH64_TLSLE_LDST32_TPREL_LO12_NC = 0x22d, + R_AARCH64_TLSLE_LDST64_TPREL_LO12 = 0x22e, + R_AARCH64_TLSLE_LDST64_TPREL_LO12_NC = 0x22f, + R_AARCH64_TLSDESC_ADR_PAGE21 = 0x232, + R_AARCH64_TLSDESC_LD64_LO12 = 0x233, + R_AARCH64_TLSDESC_ADD_LO12 = 0x234, + R_AARCH64_TLSDESC_CALL = 0x239, + R_AARCH64_TLSLE_LDST128_TPREL_LO12_NC = 0x23b, + R_AARCH64_COPY = 0x400, + R_AARCH64_GLOB_DAT = 0x401, + R_AARCH64_JUMP_SLOT = 0x402, + R_AARCH64_RELATIVE = 0x403, + R_AARCH64_TLS_DTPMOD64 = 0x404, + R_AARCH64_TLS_DTPREL64 = 0x405, + R_AARCH64_TLS_TPREL64 = 0x406, + R_AARCH64_TLSDESC = 0x407, + R_AARCH64_IRELATIVE = 0x408, +}; + +enum : u32 { + R_ARM_NONE = 0x0, + R_ARM_PC24 = 0x1, + R_ARM_ABS32 = 0x2, + R_ARM_REL32 = 0x3, + R_ARM_LDR_PC_G0 = 0x4, + R_ARM_ABS16 = 0x5, + R_ARM_ABS12 = 0x6, + R_ARM_THM_ABS5 = 0x7, + R_ARM_ABS8 = 0x8, + R_ARM_SBREL32 = 0x9, + R_ARM_THM_CALL = 0xa, + R_ARM_THM_PC8 = 0xb, + R_ARM_BREL_ADJ = 0xc, + R_ARM_TLS_DESC = 0xd, + R_ARM_THM_SWI8 = 0xe, + R_ARM_XPC25 = 0xf, + R_ARM_THM_XPC22 = 0x10, + R_ARM_TLS_DTPMOD32 = 0x11, + R_ARM_TLS_DTPOFF32 = 0x12, + R_ARM_TLS_TPOFF32 = 0x13, + R_ARM_COPY = 0x14, + R_ARM_GLOB_DAT = 0x15, + R_ARM_JUMP_SLOT = 0x16, + R_ARM_RELATIVE = 0x17, + R_ARM_GOTOFF32 = 0x18, + R_ARM_BASE_PREL = 0x19, + R_ARM_GOT_BREL = 0x1a, + R_ARM_PLT32 = 0x1b, + R_ARM_CALL = 0x1c, + R_ARM_JUMP24 = 0x1d, + R_ARM_THM_JUMP24 = 0x1e, + R_ARM_BASE_ABS = 0x1f, + R_ARM_ALU_PCREL_7_0 = 0x20, + R_ARM_ALU_PCREL_15_8 = 0x21, + R_ARM_ALU_PCREL_23_15 = 0x22, + R_ARM_LDR_SBREL_11_0_NC = 0x23, + R_ARM_ALU_SBREL_19_12_NC = 0x24, + R_ARM_ALU_SBREL_27_20_CK = 0x25, + R_ARM_TARGET1 = 0x26, + R_ARM_SBREL31 = 0x27, + R_ARM_V4BX = 0x28, + R_ARM_TARGET2 = 0x29, + R_ARM_PREL31 = 0x2a, + R_ARM_MOVW_ABS_NC = 0x2b, + R_ARM_MOVT_ABS = 0x2c, + R_ARM_MOVW_PREL_NC = 0x2d, + R_ARM_MOVT_PREL = 0x2e, + R_ARM_THM_MOVW_ABS_NC = 0x2f, + R_ARM_THM_MOVT_ABS = 0x30, + R_ARM_THM_MOVW_PREL_NC = 0x31, + R_ARM_THM_MOVT_PREL = 0x32, + R_ARM_THM_JUMP19 = 0x33, + R_ARM_THM_JUMP6 = 0x34, + R_ARM_THM_ALU_PREL_11_0 = 0x35, + R_ARM_THM_PC12 = 0x36, + R_ARM_ABS32_NOI = 0x37, + R_ARM_REL32_NOI = 0x38, + R_ARM_ALU_PC_G0_NC = 0x39, + R_ARM_ALU_PC_G0 = 0x3a, + R_ARM_ALU_PC_G1_NC = 0x3b, + R_ARM_ALU_PC_G1 = 0x3c, + R_ARM_ALU_PC_G2 = 0x3d, + R_ARM_LDR_PC_G1 = 0x3e, + R_ARM_LDR_PC_G2 = 0x3f, + R_ARM_LDRS_PC_G0 = 0x40, + R_ARM_LDRS_PC_G1 = 0x41, + R_ARM_LDRS_PC_G2 = 0x42, + R_ARM_LDC_PC_G0 = 0x43, + R_ARM_LDC_PC_G1 = 0x44, + R_ARM_LDC_PC_G2 = 0x45, + R_ARM_ALU_SB_G0_NC = 0x46, + R_ARM_ALU_SB_G0 = 0x47, + R_ARM_ALU_SB_G1_NC = 0x48, + R_ARM_ALU_SB_G1 = 0x49, + R_ARM_ALU_SB_G2 = 0x4a, + R_ARM_LDR_SB_G0 = 0x4b, + R_ARM_LDR_SB_G1 = 0x4c, + R_ARM_LDR_SB_G2 = 0x4d, + R_ARM_LDRS_SB_G0 = 0x4e, + R_ARM_LDRS_SB_G1 = 0x4f, + R_ARM_LDRS_SB_G2 = 0x50, + R_ARM_LDC_SB_G0 = 0x51, + R_ARM_LDC_SB_G1 = 0x52, + R_ARM_LDC_SB_G2 = 0x53, + R_ARM_MOVW_BREL_NC = 0x54, + R_ARM_MOVT_BREL = 0x55, + R_ARM_MOVW_BREL = 0x56, + R_ARM_THM_MOVW_BREL_NC = 0x57, + R_ARM_THM_MOVT_BREL = 0x58, + R_ARM_THM_MOVW_BREL = 0x59, + R_ARM_TLS_GOTDESC = 0x5a, + R_ARM_TLS_CALL = 0x5b, + R_ARM_TLS_DESCSEQ = 0x5c, + R_ARM_THM_TLS_CALL = 0x5d, + R_ARM_PLT32_ABS = 0x5e, + R_ARM_GOT_ABS = 0x5f, + R_ARM_GOT_PREL = 0x60, + R_ARM_GOT_BREL12 = 0x61, + R_ARM_GOTOFF12 = 0x62, + R_ARM_GOTRELAX = 0x63, + R_ARM_GNU_VTENTRY = 0x64, + R_ARM_GNU_VTINHERIT = 0x65, + R_ARM_THM_JUMP11 = 0x66, + R_ARM_THM_JUMP8 = 0x67, + R_ARM_TLS_GD32 = 0x68, + R_ARM_TLS_LDM32 = 0x69, + R_ARM_TLS_LDO32 = 0x6a, + R_ARM_TLS_IE32 = 0x6b, + R_ARM_TLS_LE32 = 0x6c, + R_ARM_TLS_LDO12 = 0x6d, + R_ARM_TLS_LE12 = 0x6e, + R_ARM_TLS_IE12GP = 0x6f, + R_ARM_PRIVATE_0 = 0x70, + R_ARM_PRIVATE_1 = 0x71, + R_ARM_PRIVATE_2 = 0x72, + R_ARM_PRIVATE_3 = 0x73, + R_ARM_PRIVATE_4 = 0x74, + R_ARM_PRIVATE_5 = 0x75, + R_ARM_PRIVATE_6 = 0x76, + R_ARM_PRIVATE_7 = 0x77, + R_ARM_PRIVATE_8 = 0x78, + R_ARM_PRIVATE_9 = 0x79, + R_ARM_PRIVATE_10 = 0x7a, + R_ARM_PRIVATE_11 = 0x7b, + R_ARM_PRIVATE_12 = 0x7c, + R_ARM_PRIVATE_13 = 0x7d, + R_ARM_PRIVATE_14 = 0x7e, + R_ARM_PRIVATE_15 = 0x7f, + R_ARM_ME_TOO = 0x80, + R_ARM_THM_TLS_DESCSEQ16 = 0x81, + R_ARM_THM_TLS_DESCSEQ32 = 0x82, + R_ARM_THM_BF16 = 0x88, + R_ARM_THM_BF12 = 0x89, + R_ARM_THM_BF18 = 0x8a, + R_ARM_IRELATIVE = 0xa0, +}; + +enum : u32 { + R_RISCV_NONE = 0, + R_RISCV_32 = 1, + R_RISCV_64 = 2, + R_RISCV_RELATIVE = 3, + R_RISCV_COPY = 4, + R_RISCV_JUMP_SLOT = 5, + R_RISCV_TLS_DTPMOD32 = 6, + R_RISCV_TLS_DTPMOD64 = 7, + R_RISCV_TLS_DTPREL32 = 8, + R_RISCV_TLS_DTPREL64 = 9, + R_RISCV_TLS_TPREL32 = 10, + R_RISCV_TLS_TPREL64 = 11, + R_RISCV_BRANCH = 16, + R_RISCV_JAL = 17, + R_RISCV_CALL = 18, + R_RISCV_CALL_PLT = 19, + R_RISCV_GOT_HI20 = 20, + R_RISCV_TLS_GOT_HI20 = 21, + R_RISCV_TLS_GD_HI20 = 22, + R_RISCV_PCREL_HI20 = 23, + R_RISCV_PCREL_LO12_I = 24, + R_RISCV_PCREL_LO12_S = 25, + R_RISCV_HI20 = 26, + R_RISCV_LO12_I = 27, + R_RISCV_LO12_S = 28, + R_RISCV_TPREL_HI20 = 29, + R_RISCV_TPREL_LO12_I = 30, + R_RISCV_TPREL_LO12_S = 31, + R_RISCV_TPREL_ADD = 32, + R_RISCV_ADD8 = 33, + R_RISCV_ADD16 = 34, + R_RISCV_ADD32 = 35, + R_RISCV_ADD64 = 36, + R_RISCV_SUB8 = 37, + R_RISCV_SUB16 = 38, + R_RISCV_SUB32 = 39, + R_RISCV_SUB64 = 40, + R_RISCV_ALIGN = 43, + R_RISCV_RVC_BRANCH = 44, + R_RISCV_RVC_JUMP = 45, + R_RISCV_RVC_LUI = 46, + R_RISCV_RELAX = 51, + R_RISCV_SUB6 = 52, + R_RISCV_SET6 = 53, + R_RISCV_SET8 = 54, + R_RISCV_SET16 = 55, + R_RISCV_SET32 = 56, + R_RISCV_32_PCREL = 57, + R_RISCV_IRELATIVE = 58, + R_RISCV_PLT32 = 59, + R_RISCV_SET_ULEB128 = 60, + R_RISCV_SUB_ULEB128 = 61, +}; + +enum : u32 { + R_PPC_NONE = 0, + R_PPC_ADDR32 = 1, + R_PPC_ADDR24 = 2, + R_PPC_ADDR16 = 3, + R_PPC_ADDR16_LO = 4, + R_PPC_ADDR16_HI = 5, + R_PPC_ADDR16_HA = 6, + R_PPC_ADDR14 = 7, + R_PPC_ADDR14_BRTAKEN = 8, + R_PPC_ADDR14_BRNTAKEN = 9, + R_PPC_REL24 = 10, + R_PPC_REL14 = 11, + R_PPC_REL14_BRTAKEN = 12, + R_PPC_REL14_BRNTAKEN = 13, + R_PPC_GOT16 = 14, + R_PPC_GOT16_LO = 15, + R_PPC_GOT16_HI = 16, + R_PPC_GOT16_HA = 17, + R_PPC_PLTREL24 = 18, + R_PPC_COPY = 19, + R_PPC_GLOB_DAT = 20, + R_PPC_JMP_SLOT = 21, + R_PPC_RELATIVE = 22, + R_PPC_LOCAL24PC = 23, + R_PPC_UADDR32 = 24, + R_PPC_UADDR16 = 25, + R_PPC_REL32 = 26, + R_PPC_PLT32 = 27, + R_PPC_PLTREL32 = 28, + R_PPC_PLT16_LO = 29, + R_PPC_PLT16_HI = 30, + R_PPC_PLT16_HA = 31, + R_PPC_SDAREL16 = 32, + R_PPC_SECTOFF = 33, + R_PPC_SECTOFF_LO = 34, + R_PPC_SECTOFF_HI = 35, + R_PPC_SECTOFF_HA = 36, + R_PPC_ADDR30 = 37, + R_PPC_TLS = 67, + R_PPC_DTPMOD32 = 68, + R_PPC_TPREL16 = 69, + R_PPC_TPREL16_LO = 70, + R_PPC_TPREL16_HI = 71, + R_PPC_TPREL16_HA = 72, + R_PPC_TPREL32 = 73, + R_PPC_DTPREL16 = 74, + R_PPC_DTPREL16_LO = 75, + R_PPC_DTPREL16_HI = 76, + R_PPC_DTPREL16_HA = 77, + R_PPC_DTPREL32 = 78, + R_PPC_GOT_TLSGD16 = 79, + R_PPC_GOT_TLSGD16_LO = 80, + R_PPC_GOT_TLSGD16_HI = 81, + R_PPC_GOT_TLSGD16_HA = 82, + R_PPC_GOT_TLSLD16 = 83, + R_PPC_GOT_TLSLD16_LO = 84, + R_PPC_GOT_TLSLD16_HI = 85, + R_PPC_GOT_TLSLD16_HA = 86, + R_PPC_GOT_TPREL16 = 87, + R_PPC_GOT_TPREL16_LO = 88, + R_PPC_GOT_TPREL16_HI = 89, + R_PPC_GOT_TPREL16_HA = 90, + R_PPC_GOT_DTPREL16 = 91, + R_PPC_GOT_DTPREL16_LO = 92, + R_PPC_GOT_DTPREL16_HI = 93, + R_PPC_GOT_DTPREL16_HA = 94, + R_PPC_TLSGD = 95, + R_PPC_TLSLD = 96, + R_PPC_PLTSEQ = 119, + R_PPC_PLTCALL = 120, + R_PPC_IRELATIVE = 248, + R_PPC_REL16 = 249, + R_PPC_REL16_LO = 250, + R_PPC_REL16_HI = 251, + R_PPC_REL16_HA = 252, +}; + +enum : u32 { + R_PPC64_NONE = 0, + R_PPC64_ADDR32 = 1, + R_PPC64_ADDR24 = 2, + R_PPC64_ADDR16 = 3, + R_PPC64_ADDR16_LO = 4, + R_PPC64_ADDR16_HI = 5, + R_PPC64_ADDR16_HA = 6, + R_PPC64_ADDR14 = 7, + R_PPC64_ADDR14_BRTAKEN = 8, + R_PPC64_ADDR14_BRNTAKEN = 9, + R_PPC64_REL24 = 10, + R_PPC64_REL14 = 11, + R_PPC64_REL14_BRTAKEN = 12, + R_PPC64_REL14_BRNTAKEN = 13, + R_PPC64_GOT16 = 14, + R_PPC64_GOT16_LO = 15, + R_PPC64_GOT16_HI = 16, + R_PPC64_GOT16_HA = 17, + R_PPC64_COPY = 19, + R_PPC64_GLOB_DAT = 20, + R_PPC64_JMP_SLOT = 21, + R_PPC64_RELATIVE = 22, + R_PPC64_REL32 = 26, + R_PPC64_PLT16_LO = 29, + R_PPC64_PLT16_HI = 30, + R_PPC64_PLT16_HA = 31, + R_PPC64_ADDR64 = 38, + R_PPC64_ADDR16_HIGHER = 39, + R_PPC64_ADDR16_HIGHERA = 40, + R_PPC64_ADDR16_HIGHEST = 41, + R_PPC64_ADDR16_HIGHESTA = 42, + R_PPC64_REL64 = 44, + R_PPC64_TOC16 = 47, + R_PPC64_TOC16_LO = 48, + R_PPC64_TOC16_HI = 49, + R_PPC64_TOC16_HA = 50, + R_PPC64_TOC = 51, + R_PPC64_ADDR16_DS = 56, + R_PPC64_ADDR16_LO_DS = 57, + R_PPC64_GOT16_DS = 58, + R_PPC64_GOT16_LO_DS = 59, + R_PPC64_PLT16_LO_DS = 60, + R_PPC64_TOC16_DS = 63, + R_PPC64_TOC16_LO_DS = 64, + R_PPC64_TLS = 67, + R_PPC64_DTPMOD64 = 68, + R_PPC64_TPREL16 = 69, + R_PPC64_TPREL16_LO = 70, + R_PPC64_TPREL16_HI = 71, + R_PPC64_TPREL16_HA = 72, + R_PPC64_TPREL64 = 73, + R_PPC64_DTPREL16 = 74, + R_PPC64_DTPREL16_LO = 75, + R_PPC64_DTPREL16_HI = 76, + R_PPC64_DTPREL16_HA = 77, + R_PPC64_DTPREL64 = 78, + R_PPC64_GOT_TLSGD16 = 79, + R_PPC64_GOT_TLSGD16_LO = 80, + R_PPC64_GOT_TLSGD16_HI = 81, + R_PPC64_GOT_TLSGD16_HA = 82, + R_PPC64_GOT_TLSLD16 = 83, + R_PPC64_GOT_TLSLD16_LO = 84, + R_PPC64_GOT_TLSLD16_HI = 85, + R_PPC64_GOT_TLSLD16_HA = 86, + R_PPC64_GOT_TPREL16_DS = 87, + R_PPC64_GOT_TPREL16_LO_DS = 88, + R_PPC64_GOT_TPREL16_HI = 89, + R_PPC64_GOT_TPREL16_HA = 90, + R_PPC64_GOT_DTPREL16_DS = 91, + R_PPC64_GOT_DTPREL16_LO_DS = 92, + R_PPC64_GOT_DTPREL16_HI = 93, + R_PPC64_GOT_DTPREL16_HA = 94, + R_PPC64_TPREL16_DS = 95, + R_PPC64_TPREL16_LO_DS = 96, + R_PPC64_TPREL16_HIGHER = 97, + R_PPC64_TPREL16_HIGHERA = 98, + R_PPC64_TPREL16_HIGHEST = 99, + R_PPC64_TPREL16_HIGHESTA = 100, + R_PPC64_DTPREL16_DS = 101, + R_PPC64_DTPREL16_LO_DS = 102, + R_PPC64_DTPREL16_HIGHER = 103, + R_PPC64_DTPREL16_HIGHERA = 104, + R_PPC64_DTPREL16_HIGHEST = 105, + R_PPC64_DTPREL16_HIGHESTA = 106, + R_PPC64_TLSGD = 107, + R_PPC64_TLSLD = 108, + R_PPC64_ADDR16_HIGH = 110, + R_PPC64_ADDR16_HIGHA = 111, + R_PPC64_TPREL16_HIGH = 112, + R_PPC64_TPREL16_HIGHA = 113, + R_PPC64_DTPREL16_HIGH = 114, + R_PPC64_DTPREL16_HIGHA = 115, + R_PPC64_REL24_NOTOC = 116, + R_PPC64_PLTSEQ = 119, + R_PPC64_PLTCALL = 120, + R_PPC64_PLTSEQ_NOTOC = 121, + R_PPC64_PLTCALL_NOTOC = 122, + R_PPC64_PCREL_OPT = 123, + R_PPC64_PCREL34 = 132, + R_PPC64_GOT_PCREL34 = 133, + R_PPC64_PLT_PCREL34 = 134, + R_PPC64_PLT_PCREL34_NOTOC = 135, + R_PPC64_TPREL34 = 146, + R_PPC64_DTPREL34 = 147, + R_PPC64_GOT_TLSGD_PCREL34 = 148, + R_PPC64_GOT_TLSLD_PCREL34 = 149, + R_PPC64_GOT_TPREL_PCREL34 = 150, + R_PPC64_IRELATIVE = 248, + R_PPC64_REL16 = 249, + R_PPC64_REL16_LO = 250, + R_PPC64_REL16_HI = 251, + R_PPC64_REL16_HA = 252, +}; + +enum : u32 { + R_SPARC_NONE = 0, + R_SPARC_8 = 1, + R_SPARC_16 = 2, + R_SPARC_32 = 3, + R_SPARC_DISP8 = 4, + R_SPARC_DISP16 = 5, + R_SPARC_DISP32 = 6, + R_SPARC_WDISP30 = 7, + R_SPARC_WDISP22 = 8, + R_SPARC_HI22 = 9, + R_SPARC_22 = 10, + R_SPARC_13 = 11, + R_SPARC_LO10 = 12, + R_SPARC_GOT10 = 13, + R_SPARC_GOT13 = 14, + R_SPARC_GOT22 = 15, + R_SPARC_PC10 = 16, + R_SPARC_PC22 = 17, + R_SPARC_WPLT30 = 18, + R_SPARC_COPY = 19, + R_SPARC_GLOB_DAT = 20, + R_SPARC_JMP_SLOT = 21, + R_SPARC_RELATIVE = 22, + R_SPARC_UA32 = 23, + R_SPARC_PLT32 = 24, + R_SPARC_HIPLT22 = 25, + R_SPARC_LOPLT10 = 26, + R_SPARC_PCPLT32 = 27, + R_SPARC_PCPLT22 = 28, + R_SPARC_PCPLT10 = 29, + R_SPARC_10 = 30, + R_SPARC_11 = 31, + R_SPARC_64 = 32, + R_SPARC_OLO10 = 33, + R_SPARC_HH22 = 34, + R_SPARC_HM10 = 35, + R_SPARC_LM22 = 36, + R_SPARC_PC_HH22 = 37, + R_SPARC_PC_HM10 = 38, + R_SPARC_PC_LM22 = 39, + R_SPARC_WDISP16 = 40, + R_SPARC_WDISP19 = 41, + R_SPARC_7 = 43, + R_SPARC_5 = 44, + R_SPARC_6 = 45, + R_SPARC_DISP64 = 46, + R_SPARC_PLT64 = 47, + R_SPARC_HIX22 = 48, + R_SPARC_LOX10 = 49, + R_SPARC_H44 = 50, + R_SPARC_M44 = 51, + R_SPARC_L44 = 52, + R_SPARC_REGISTER = 53, + R_SPARC_UA64 = 54, + R_SPARC_UA16 = 55, + R_SPARC_TLS_GD_HI22 = 56, + R_SPARC_TLS_GD_LO10 = 57, + R_SPARC_TLS_GD_ADD = 58, + R_SPARC_TLS_GD_CALL = 59, + R_SPARC_TLS_LDM_HI22 = 60, + R_SPARC_TLS_LDM_LO10 = 61, + R_SPARC_TLS_LDM_ADD = 62, + R_SPARC_TLS_LDM_CALL = 63, + R_SPARC_TLS_LDO_HIX22 = 64, + R_SPARC_TLS_LDO_LOX10 = 65, + R_SPARC_TLS_LDO_ADD = 66, + R_SPARC_TLS_IE_HI22 = 67, + R_SPARC_TLS_IE_LO10 = 68, + R_SPARC_TLS_IE_LD = 69, + R_SPARC_TLS_IE_LDX = 70, + R_SPARC_TLS_IE_ADD = 71, + R_SPARC_TLS_LE_HIX22 = 72, + R_SPARC_TLS_LE_LOX10 = 73, + R_SPARC_TLS_DTPMOD32 = 74, + R_SPARC_TLS_DTPMOD64 = 75, + R_SPARC_TLS_DTPOFF32 = 76, + R_SPARC_TLS_DTPOFF64 = 77, + R_SPARC_TLS_TPOFF32 = 78, + R_SPARC_TLS_TPOFF64 = 79, + R_SPARC_GOTDATA_HIX22 = 80, + R_SPARC_GOTDATA_LOX10 = 81, + R_SPARC_GOTDATA_OP_HIX22 = 82, + R_SPARC_GOTDATA_OP_LOX10 = 83, + R_SPARC_GOTDATA_OP = 84, + R_SPARC_SIZE32 = 86, + R_SPARC_JMP_IREL = 248, + R_SPARC_IRELATIVE = 249, +}; + +enum : u32 { + R_390_NONE = 0, + R_390_8 = 1, + R_390_12 = 2, + R_390_16 = 3, + R_390_32 = 4, + R_390_PC32 = 5, + R_390_GOT12 = 6, + R_390_GOT32 = 7, + R_390_PLT32 = 8, + R_390_COPY = 9, + R_390_GLOB_DAT = 10, + R_390_JMP_SLOT = 11, + R_390_RELATIVE = 12, + R_390_GOTOFF32 = 13, + R_390_GOTPC = 14, + R_390_GOT16 = 15, + R_390_PC16 = 16, + R_390_PC16DBL = 17, + R_390_PLT16DBL = 18, + R_390_PC32DBL = 19, + R_390_PLT32DBL = 20, + R_390_GOTPCDBL = 21, + R_390_64 = 22, + R_390_PC64 = 23, + R_390_GOT64 = 24, + R_390_PLT64 = 25, + R_390_GOTENT = 26, + R_390_GOTOFF16 = 27, + R_390_GOTOFF64 = 28, + R_390_GOTPLT12 = 29, + R_390_GOTPLT16 = 30, + R_390_GOTPLT32 = 31, + R_390_GOTPLT64 = 32, + R_390_GOTPLTENT = 33, + R_390_PLTOFF16 = 34, + R_390_PLTOFF32 = 35, + R_390_PLTOFF64 = 36, + R_390_TLS_LOAD = 37, + R_390_TLS_GDCALL = 38, + R_390_TLS_LDCALL = 39, + R_390_TLS_GD32 = 40, + R_390_TLS_GD64 = 41, + R_390_TLS_GOTIE12 = 42, + R_390_TLS_GOTIE32 = 43, + R_390_TLS_GOTIE64 = 44, + R_390_TLS_LDM32 = 45, + R_390_TLS_LDM64 = 46, + R_390_TLS_IE32 = 47, + R_390_TLS_IE64 = 48, + R_390_TLS_IEENT = 49, + R_390_TLS_LE32 = 50, + R_390_TLS_LE64 = 51, + R_390_TLS_LDO32 = 52, + R_390_TLS_LDO64 = 53, + R_390_TLS_DTPMOD = 54, + R_390_TLS_DTPOFF = 55, + R_390_TLS_TPOFF = 56, + R_390_20 = 57, + R_390_GOT20 = 58, + R_390_GOTPLT20 = 59, + R_390_TLS_GOTIE20 = 60, + R_390_IRELATIVE = 61, + R_390_PC12DBL = 62, + R_390_PLT12DBL = 63, + R_390_PC24DBL = 64, + R_390_PLT24DBL = 65, +}; + +enum : u32 { + R_68K_NONE = 0, + R_68K_32 = 1, + R_68K_16 = 2, + R_68K_8 = 3, + R_68K_PC32 = 4, + R_68K_PC16 = 5, + R_68K_PC8 = 6, + R_68K_GOTPCREL32 = 7, + R_68K_GOTPCREL16 = 8, + R_68K_GOTPCREL8 = 9, + R_68K_GOTOFF32 = 10, + R_68K_GOTOFF16 = 11, + R_68K_GOTOFF8 = 12, + R_68K_PLT32 = 13, + R_68K_PLT16 = 14, + R_68K_PLT8 = 15, + R_68K_PLTOFF32 = 16, + R_68K_PLTOFF16 = 17, + R_68K_PLTOFF8 = 18, + R_68K_COPY = 19, + R_68K_GLOB_DAT = 20, + R_68K_JMP_SLOT = 21, + R_68K_RELATIVE = 22, + R_68K_TLS_GD32 = 25, + R_68K_TLS_GD16 = 26, + R_68K_TLS_GD8 = 27, + R_68K_TLS_LDM32 = 28, + R_68K_TLS_LDM16 = 29, + R_68K_TLS_LDM8 = 30, + R_68K_TLS_LDO32 = 31, + R_68K_TLS_LDO16 = 32, + R_68K_TLS_LDO8 = 33, + R_68K_TLS_IE32 = 34, + R_68K_TLS_IE16 = 35, + R_68K_TLS_IE8 = 36, + R_68K_TLS_LE32 = 37, + R_68K_TLS_LE16 = 38, + R_68K_TLS_LE8 = 39, + R_68K_TLS_DTPMOD32 = 40, + R_68K_TLS_DTPREL32 = 41, + R_68K_TLS_TPREL32 = 42, +}; + +enum : u32 { + R_SH_NONE = 0, + R_SH_DIR32 = 1, + R_SH_REL32 = 2, + R_SH_DIR8WPN = 3, + R_SH_IND12W = 4, + R_SH_DIR8WPL = 5, + R_SH_DIR8WPZ = 6, + R_SH_DIR8BP = 7, + R_SH_DIR8W = 8, + R_SH_DIR8L = 9, + R_SH_TLS_GD_32 = 144, + R_SH_TLS_LD_32 = 145, + R_SH_TLS_LDO_32 = 146, + R_SH_TLS_IE_32 = 147, + R_SH_TLS_LE_32 = 148, + R_SH_TLS_DTPMOD32 = 149, + R_SH_TLS_DTPOFF32 = 150, + R_SH_TLS_TPOFF32 = 151, + R_SH_GOT32 = 160, + R_SH_PLT32 = 161, + R_SH_COPY = 162, + R_SH_GLOB_DAT = 163, + R_SH_JMP_SLOT = 164, + R_SH_RELATIVE = 165, + R_SH_GOTOFF = 166, + R_SH_GOTPC = 167, + R_SH_GOTPLT32 = 168, +}; + +enum : u32 { + R_ALPHA_NONE = 0, + R_ALPHA_REFLONG = 1, + R_ALPHA_REFQUAD = 2, + R_ALPHA_GPREL32 = 3, + R_ALPHA_LITERAL = 4, + R_ALPHA_LITUSE = 5, + R_ALPHA_GPDISP = 6, + R_ALPHA_BRADDR = 7, + R_ALPHA_HINT = 8, + R_ALPHA_SREL16 = 9, + R_ALPHA_SREL32 = 10, + R_ALPHA_SREL64 = 11, + R_ALPHA_GPRELHIGH = 17, + R_ALPHA_GPRELLOW = 18, + R_ALPHA_GPREL16 = 19, + R_ALPHA_COPY = 24, + R_ALPHA_GLOB_DAT = 25, + R_ALPHA_JMP_SLOT = 26, + R_ALPHA_RELATIVE = 27, + R_ALPHA_BRSGP = 28, + R_ALPHA_TLSGD = 29, + R_ALPHA_TLSLDM = 30, + R_ALPHA_DTPMOD64 = 31, + R_ALPHA_GOTDTPREL = 32, + R_ALPHA_DTPREL64 = 33, + R_ALPHA_DTPRELHI = 34, + R_ALPHA_DTPRELLO = 35, + R_ALPHA_DTPREL16 = 36, + R_ALPHA_GOTTPREL = 37, + R_ALPHA_TPREL64 = 38, + R_ALPHA_TPRELHI = 39, + R_ALPHA_TPRELLO = 40, + R_ALPHA_TPREL16 = 41, +}; + +// +// DWARF data types +// + +enum : u32 { + DW_EH_PE_absptr = 0, + DW_EH_PE_omit = 0xff, + DW_EH_PE_uleb128 = 0x01, + DW_EH_PE_udata2 = 0x02, + DW_EH_PE_udata4 = 0x03, + DW_EH_PE_udata8 = 0x04, + DW_EH_PE_signed = 0x08, + DW_EH_PE_sleb128 = 0x09, + DW_EH_PE_sdata2 = 0x0a, + DW_EH_PE_sdata4 = 0x0b, + DW_EH_PE_sdata8 = 0x0c, + DW_EH_PE_pcrel = 0x10, + DW_EH_PE_textrel = 0x20, + DW_EH_PE_datarel = 0x30, + DW_EH_PE_funcrel = 0x40, + DW_EH_PE_aligned = 0x50, +}; + +enum : u32 { + DW_AT_low_pc = 0x11, + DW_AT_high_pc = 0x12, + DW_AT_producer = 0x25, + DW_AT_ranges = 0x55, + DW_AT_addr_base = 0x73, + DW_AT_rnglists_base = 0x74, +}; + +enum : u32 { + DW_TAG_compile_unit = 0x11, + DW_TAG_skeleton_unit = 0x4a, +}; + +enum : u32 { + DW_UT_compile = 0x01, + DW_UT_partial = 0x03, + DW_UT_skeleton = 0x04, + DW_UT_split_compile = 0x05, +}; + +enum : u32 { + DW_FORM_addr = 0x01, + DW_FORM_block2 = 0x03, + DW_FORM_block4 = 0x04, + DW_FORM_data2 = 0x05, + DW_FORM_data4 = 0x06, + DW_FORM_data8 = 0x07, + DW_FORM_string = 0x08, + DW_FORM_block = 0x09, + DW_FORM_block1 = 0x0a, + DW_FORM_data1 = 0x0b, + DW_FORM_flag = 0x0c, + DW_FORM_sdata = 0x0d, + DW_FORM_strp = 0x0e, + DW_FORM_udata = 0x0f, + DW_FORM_ref_addr = 0x10, + DW_FORM_ref1 = 0x11, + DW_FORM_ref2 = 0x12, + DW_FORM_ref4 = 0x13, + DW_FORM_ref8 = 0x14, + DW_FORM_ref_udata = 0x15, + DW_FORM_indirect = 0x16, + DW_FORM_sec_offset = 0x17, + DW_FORM_exprloc = 0x18, + DW_FORM_flag_present = 0x19, + DW_FORM_strx = 0x1a, + DW_FORM_addrx = 0x1b, + DW_FORM_ref_sup4 = 0x1c, + DW_FORM_strp_sup = 0x1d, + DW_FORM_data16 = 0x1e, + DW_FORM_line_strp = 0x1f, + DW_FORM_ref_sig8 = 0x20, + DW_FORM_implicit_const = 0x21, + DW_FORM_loclistx = 0x22, + DW_FORM_rnglistx = 0x23, + DW_FORM_ref_sup8 = 0x24, + DW_FORM_strx1 = 0x25, + DW_FORM_strx2 = 0x26, + DW_FORM_strx3 = 0x27, + DW_FORM_strx4 = 0x28, + DW_FORM_addrx1 = 0x29, + DW_FORM_addrx2 = 0x2a, + DW_FORM_addrx3 = 0x2b, + DW_FORM_addrx4 = 0x2c, +}; + +enum : u32 { + DW_RLE_end_of_list = 0x00, + DW_RLE_base_addressx = 0x01, + DW_RLE_startx_endx = 0x02, + DW_RLE_startx_length = 0x03, + DW_RLE_offset_pair = 0x04, + DW_RLE_base_address = 0x05, + DW_RLE_start_end = 0x06, + DW_RLE_start_length = 0x07, +}; + +// +// ELF types +// + +template using I16 = std::conditional_t; +template using I32 = std::conditional_t; +template using I64 = std::conditional_t; +template using U16 = std::conditional_t; +template using U24 = std::conditional_t; +template using U32 = std::conditional_t; +template using U64 = std::conditional_t; + +template using Word = std::conditional_t, U32>; +template using IWord = std::conditional_t, I32>; + +template requires E::is_64 +struct ElfSym { + bool is_undef() const { return st_shndx == SHN_UNDEF; } + bool is_abs() const { return st_shndx == SHN_ABS; } + bool is_common() const { return st_shndx == SHN_COMMON; } + bool is_weak() const { return st_bind == STB_WEAK; } + bool is_undef_weak() const { return is_undef() && is_weak(); } + + U32 st_name; + +#ifdef __LITTLE_ENDIAN__ + u8 st_type : 4; + u8 st_bind : 4; + u8 st_visibility : 2; + u8 : 6; +#else + u8 st_bind : 4; + u8 st_type : 4; + u8 : 6; + u8 st_visibility : 2; +#endif + + U16 st_shndx; + U64 st_value; + U64 st_size; +}; + +template requires (!E::is_64) +struct ElfSym { + bool is_undef() const { return st_shndx == SHN_UNDEF; } + bool is_abs() const { return st_shndx == SHN_ABS; } + bool is_common() const { return st_shndx == SHN_COMMON; } + bool is_weak() const { return st_bind == STB_WEAK; } + bool is_undef_weak() const { return is_undef() && is_weak(); } + + U32 st_name; + U32 st_value; + U32 st_size; + +#ifdef __LITTLE_ENDIAN__ + u8 st_type : 4; + u8 st_bind : 4; + u8 st_visibility : 2; + u8 : 6; +#else + u8 st_bind : 4; + u8 st_type : 4; + u8 : 6; + u8 st_visibility : 2; +#endif + + U16 st_shndx; +}; + +template +struct ElfShdr { + U32 sh_name; + U32 sh_type; + Word sh_flags; + Word sh_addr; + Word sh_offset; + Word sh_size; + U32 sh_link; + U32 sh_info; + Word sh_addralign; + Word sh_entsize; +}; + +template +struct ElfEhdr { + u8 e_ident[16]; + U16 e_type; + U16 e_machine; + U32 e_version; + Word e_entry; + Word e_phoff; + Word e_shoff; + U32 e_flags; + U16 e_ehsize; + U16 e_phentsize; + U16 e_phnum; + U16 e_shentsize; + U16 e_shnum; + U16 e_shstrndx; +}; + +template requires E::is_64 +struct ElfPhdr { + U32 p_type; + U32 p_flags; + U64 p_offset; + U64 p_vaddr; + U64 p_paddr; + U64 p_filesz; + U64 p_memsz; + U64 p_align; +}; + +template requires (!E::is_64) +struct ElfPhdr { + U32 p_type; + U32 p_offset; + U32 p_vaddr; + U32 p_paddr; + U32 p_filesz; + U32 p_memsz; + U32 p_flags; + U32 p_align; +}; + +// Depending on the target, ElfRel may or may not contain r_addend member. +// The relocation record containing r_addend is called RELA, and that +// without r_addend is called REL. +// +// If REL, relocation addends are stored as parts of section contents. +// That means we add a computed value to an existing value when writing a +// relocated value if REL. If RELA, we just overwrite an existing value +// with a newly computed value. +// +// We don't want to have too many `if (REL)`s and `if (RELA)`s in our +// codebase, so ElfRel always takes r_addend as a constructor argument. +// If it's REL, the argument will simply be ignored. +template requires E::is_le && E::is_rela +struct ElfRel { + ElfRel() = default; + ElfRel(u64 offset, u32 type, u32 sym, i64 addend) + : r_offset(offset), r_type(type), r_sym(sym), r_addend(addend) {} + + Word r_offset; + std::conditional_t, u8> r_type; + std::conditional_t, U24> r_sym; + IWord r_addend; +}; + +template requires (!E::is_le) && E::is_rela +struct ElfRel { + ElfRel() = default; + ElfRel(u64 offset, u32 type, u32 sym, i64 addend) + : r_offset(offset), r_sym(sym), r_type(type), r_addend(addend) {} + + Word r_offset; + std::conditional_t, U24> r_sym; + std::conditional_t, u8> r_type; + IWord r_addend; +}; + +template requires E::is_le && (!E::is_rela) +struct ElfRel { + ElfRel() = default; + ElfRel(u64 offset, u32 type, u32 sym, i64 addend = 0) + : r_offset(offset), r_type(type), r_sym(sym) {} + + Word r_offset; + std::conditional_t, u8> r_type; + std::conditional_t, U24> r_sym; +}; + +template requires (!E::is_le) && (!E::is_rela) +struct ElfRel { + ElfRel() = default; + ElfRel(u64 offset, u32 type, u32 sym, i64 addend = 0) + : r_offset(offset), r_sym(sym), r_type(type) {} + + Word r_offset; + std::conditional_t, U24> r_sym; + std::conditional_t, u8> r_type; +}; + +template +struct ElfDyn { + Word d_tag; + Word d_val; +}; + +template +struct ElfVerneed { + U16 vn_version; + U16 vn_cnt; + U32 vn_file; + U32 vn_aux; + U32 vn_next; +}; + +template +struct ElfVernaux { + U32 vna_hash; + U16 vna_flags; + U16 vna_other; + U32 vna_name; + U32 vna_next; +}; + +template +struct ElfVerdef { + U16 vd_version; + U16 vd_flags; + U16 vd_ndx; + U16 vd_cnt; + U32 vd_hash; + U32 vd_aux; + U32 vd_next; +}; + +template +struct ElfVerdaux { + U32 vda_name; + U32 vda_next; +}; + +template requires E::is_64 +struct ElfChdr { + U32 ch_type; + U32 ch_reserved; + U64 ch_size; + U64 ch_addralign; +}; + +template requires (!E::is_64) +struct ElfChdr { + U32 ch_type; + U32 ch_size; + U32 ch_addralign; +}; + +template +struct ElfNhdr { + U32 n_namesz; + U32 n_descsz; + U32 n_type; +}; + +// +// Target-specific ELF data types +// + +template <> +struct ElfSym { + bool is_undef() const { return st_shndx == SHN_UNDEF; } + bool is_abs() const { return st_shndx == SHN_ABS; } + bool is_common() const { return st_shndx == SHN_COMMON; } + bool is_weak() const { return st_bind == STB_WEAK; } + bool is_undef_weak() const { return is_undef() && is_weak(); } + + bool preserves_r2() const { return ppc_local_entry != 1; } + bool uses_toc() const { return ppc_local_entry > 1; } + + ul32 st_name; + +#ifdef __LITTLE_ENDIAN__ + u8 st_type : 4; + u8 st_bind : 4; + u8 st_visibility : 2; + u8 : 3; + u8 ppc_local_entry : 3; // This is PPC64V2-specific +#else + u8 st_bind : 4; + u8 st_type : 4; + u8 ppc_local_entry : 3; + u8 : 3; + u8 st_visibility : 2; +#endif + + ul16 st_shndx; + ul64 st_value; + ul64 st_size; +}; + +template <> +struct ElfSym { + bool is_undef() const { return st_shndx == SHN_UNDEF; } + bool is_abs() const { return st_shndx == SHN_ABS; } + bool is_common() const { return st_shndx == SHN_COMMON; } + bool is_weak() const { return st_bind == STB_WEAK; } + bool is_undef_weak() const { return is_undef() && is_weak(); } + + ul32 st_name; + +#ifdef __LITTLE_ENDIAN__ + u8 st_type : 4; + u8 st_bind : 4; + u8 st_visibility : 2; + u8 alpha_st_other : 6; // contains STO_ALPHA_NOPV, STO_ALPHA_STD_GPLOAD or 0 +#else + u8 st_bind : 4; + u8 st_type : 4; + u8 alpha_st_other : 6; + u8 st_visibility : 2; +#endif + + ul16 st_shndx; + ul64 st_value; + ul64 st_size; +}; + +template <> +struct ElfRel { + ElfRel() = default; + ElfRel(u64 offset, u32 type, u32 sym, i64 addend) + : r_offset(offset), r_sym(sym), r_type_data(0), r_type(type), + r_addend(addend) {} + + ub64 r_offset; + ub32 r_sym; + ub24 r_type_data; // SPARC-specific: used for R_SPARC_OLO10 + u8 r_type; + ib64 r_addend; +}; + +// +// Machine descriptions +// + +template +static constexpr bool supports_ifunc = requires { E::R_IRELATIVE; }; + +template +static constexpr bool supports_tlsdesc = requires { E::R_TLSDESC; }; + +template +static constexpr bool needs_thunk = requires { E::thunk_size; }; + +template static constexpr bool is_x86_64 = std::is_same_v; +template static constexpr bool is_i386 = std::is_same_v; +template static constexpr bool is_arm64 = std::is_same_v; +template static constexpr bool is_arm32 = std::is_same_v; +template static constexpr bool is_rv64le = std::is_same_v; +template static constexpr bool is_rv64be = std::is_same_v; +template static constexpr bool is_rv32le = std::is_same_v; +template static constexpr bool is_rv32be = std::is_same_v; +template static constexpr bool is_ppc32 = std::is_same_v; +template static constexpr bool is_ppc64v1 = std::is_same_v; +template static constexpr bool is_ppc64v2 = std::is_same_v; +template static constexpr bool is_s390x = std::is_same_v; +template static constexpr bool is_sparc64 = std::is_same_v; +template static constexpr bool is_m68k = std::is_same_v; +template static constexpr bool is_sh4 = std::is_same_v; +template static constexpr bool is_alpha = std::is_same_v; + +template static constexpr bool is_x86 = is_x86_64 || is_i386; +template static constexpr bool is_arm = is_arm64 || is_arm32; +template static constexpr bool is_rv64 = is_rv64le || is_rv64be; +template static constexpr bool is_rv32 = is_rv32le || is_rv32be; +template static constexpr bool is_riscv = is_rv64 || is_rv32; +template static constexpr bool is_ppc64 = is_ppc64v1 || is_ppc64v2; +template static constexpr bool is_ppc = is_ppc64 || is_ppc32; +template static constexpr bool is_sparc = is_sparc64; + +struct X86_64 { + static constexpr std::string_view target_name = "x86_64"; + static constexpr bool is_64 = true; + static constexpr bool is_le = true; + static constexpr bool is_rela = true; + static constexpr u32 page_size = 4096; + static constexpr u32 e_machine = EM_X86_64; + static constexpr u32 plt_hdr_size = 32; + static constexpr u32 plt_size = 16; + static constexpr u32 pltgot_size = 16; + + static constexpr u32 R_COPY = R_X86_64_COPY; + static constexpr u32 R_GLOB_DAT = R_X86_64_GLOB_DAT; + static constexpr u32 R_JUMP_SLOT = R_X86_64_JUMP_SLOT; + static constexpr u32 R_ABS = R_X86_64_64; + static constexpr u32 R_RELATIVE = R_X86_64_RELATIVE; + static constexpr u32 R_IRELATIVE = R_X86_64_IRELATIVE; + static constexpr u32 R_DTPOFF = R_X86_64_DTPOFF64; + static constexpr u32 R_TPOFF = R_X86_64_TPOFF64; + static constexpr u32 R_DTPMOD = R_X86_64_DTPMOD64; + static constexpr u32 R_TLSDESC = R_X86_64_TLSDESC; +}; + +struct I386 { + static constexpr std::string_view target_name = "i386"; + static constexpr bool is_64 = false; + static constexpr bool is_le = true; + static constexpr bool is_rela = false; + static constexpr u32 page_size = 4096; + static constexpr u32 e_machine = EM_386; + static constexpr u32 plt_hdr_size = 16; + static constexpr u32 plt_size = 16; + static constexpr u32 pltgot_size = 16; + + static constexpr u32 R_COPY = R_386_COPY; + static constexpr u32 R_GLOB_DAT = R_386_GLOB_DAT; + static constexpr u32 R_JUMP_SLOT = R_386_JUMP_SLOT; + static constexpr u32 R_ABS = R_386_32; + static constexpr u32 R_RELATIVE = R_386_RELATIVE; + static constexpr u32 R_IRELATIVE = R_386_IRELATIVE; + static constexpr u32 R_DTPOFF = R_386_TLS_DTPOFF32; + static constexpr u32 R_TPOFF = R_386_TLS_TPOFF; + static constexpr u32 R_DTPMOD = R_386_TLS_DTPMOD32; + static constexpr u32 R_TLSDESC = R_386_TLS_DESC; +}; + +struct ARM64 { + static constexpr std::string_view target_name = "arm64"; + static constexpr bool is_64 = true; + static constexpr bool is_le = true; + static constexpr bool is_rela = true; + static constexpr u32 page_size = 65536; + static constexpr u32 e_machine = EM_AARCH64; + static constexpr u32 plt_hdr_size = 32; + static constexpr u32 plt_size = 16; + static constexpr u32 pltgot_size = 16; + static constexpr u32 thunk_hdr_size = 0; + static constexpr u32 thunk_size = 12; + + static constexpr u32 R_COPY = R_AARCH64_COPY; + static constexpr u32 R_GLOB_DAT = R_AARCH64_GLOB_DAT; + static constexpr u32 R_JUMP_SLOT = R_AARCH64_JUMP_SLOT; + static constexpr u32 R_ABS = R_AARCH64_ABS64; + static constexpr u32 R_RELATIVE = R_AARCH64_RELATIVE; + static constexpr u32 R_IRELATIVE = R_AARCH64_IRELATIVE; + static constexpr u32 R_DTPOFF = R_AARCH64_TLS_DTPREL64; + static constexpr u32 R_TPOFF = R_AARCH64_TLS_TPREL64; + static constexpr u32 R_DTPMOD = R_AARCH64_TLS_DTPMOD64; + static constexpr u32 R_TLSDESC = R_AARCH64_TLSDESC; +}; + +struct ARM32 { + static constexpr std::string_view target_name = "arm32"; + static constexpr bool is_64 = false; + static constexpr bool is_le = true; + static constexpr bool is_rela = false; + static constexpr u32 page_size = 4096; + static constexpr u32 e_machine = EM_ARM; + static constexpr u32 plt_hdr_size = 32; + static constexpr u32 plt_size = 16; + static constexpr u32 pltgot_size = 16; + static constexpr u32 thunk_hdr_size = 12; + static constexpr u32 thunk_size = 20; + + static constexpr u32 R_COPY = R_ARM_COPY; + static constexpr u32 R_GLOB_DAT = R_ARM_GLOB_DAT; + static constexpr u32 R_JUMP_SLOT = R_ARM_JUMP_SLOT; + static constexpr u32 R_ABS = R_ARM_ABS32; + static constexpr u32 R_RELATIVE = R_ARM_RELATIVE; + static constexpr u32 R_IRELATIVE = R_ARM_IRELATIVE; + static constexpr u32 R_DTPOFF = R_ARM_TLS_DTPOFF32; + static constexpr u32 R_TPOFF = R_ARM_TLS_TPOFF32; + static constexpr u32 R_DTPMOD = R_ARM_TLS_DTPMOD32; + static constexpr u32 R_TLSDESC = R_ARM_TLS_DESC; +}; + +struct RV64LE { + static constexpr std::string_view target_name = "riscv64"; + static constexpr bool is_64 = true; + static constexpr bool is_le = true; + static constexpr bool is_rela = true; + static constexpr u32 page_size = 4096; + static constexpr u32 e_machine = EM_RISCV; + static constexpr u32 plt_hdr_size = 32; + static constexpr u32 plt_size = 16; + static constexpr u32 pltgot_size = 16; + + static constexpr u32 R_COPY = R_RISCV_COPY; + static constexpr u32 R_GLOB_DAT = R_RISCV_64; + static constexpr u32 R_JUMP_SLOT = R_RISCV_JUMP_SLOT; + static constexpr u32 R_ABS = R_RISCV_64; + static constexpr u32 R_RELATIVE = R_RISCV_RELATIVE; + static constexpr u32 R_IRELATIVE = R_RISCV_IRELATIVE; + static constexpr u32 R_DTPOFF = R_RISCV_TLS_DTPREL64; + static constexpr u32 R_TPOFF = R_RISCV_TLS_TPREL64; + static constexpr u32 R_DTPMOD = R_RISCV_TLS_DTPMOD64; +}; + +struct RV64BE { + static constexpr std::string_view target_name = "riscv64be"; + static constexpr bool is_64 = true; + static constexpr bool is_le = false; + static constexpr bool is_rela = true; + static constexpr u32 page_size = 4096; + static constexpr u32 e_machine = EM_RISCV; + static constexpr u32 plt_hdr_size = 32; + static constexpr u32 plt_size = 16; + static constexpr u32 pltgot_size = 16; + + static constexpr u32 R_COPY = R_RISCV_COPY; + static constexpr u32 R_GLOB_DAT = R_RISCV_64; + static constexpr u32 R_JUMP_SLOT = R_RISCV_JUMP_SLOT; + static constexpr u32 R_ABS = R_RISCV_64; + static constexpr u32 R_RELATIVE = R_RISCV_RELATIVE; + static constexpr u32 R_IRELATIVE = R_RISCV_IRELATIVE; + static constexpr u32 R_DTPOFF = R_RISCV_TLS_DTPREL64; + static constexpr u32 R_TPOFF = R_RISCV_TLS_TPREL64; + static constexpr u32 R_DTPMOD = R_RISCV_TLS_DTPMOD64; +}; + +struct RV32LE { + static constexpr std::string_view target_name = "riscv32"; + static constexpr bool is_64 = false; + static constexpr bool is_le = true; + static constexpr bool is_rela = true; + static constexpr u32 page_size = 4096; + static constexpr u32 e_machine = EM_RISCV; + static constexpr u32 plt_hdr_size = 32; + static constexpr u32 plt_size = 16; + static constexpr u32 pltgot_size = 16; + + static constexpr u32 R_COPY = R_RISCV_COPY; + static constexpr u32 R_GLOB_DAT = R_RISCV_32; + static constexpr u32 R_JUMP_SLOT = R_RISCV_JUMP_SLOT; + static constexpr u32 R_ABS = R_RISCV_32; + static constexpr u32 R_RELATIVE = R_RISCV_RELATIVE; + static constexpr u32 R_IRELATIVE = R_RISCV_IRELATIVE; + static constexpr u32 R_DTPOFF = R_RISCV_TLS_DTPREL32; + static constexpr u32 R_TPOFF = R_RISCV_TLS_TPREL32; + static constexpr u32 R_DTPMOD = R_RISCV_TLS_DTPMOD32; +}; + +struct RV32BE { + static constexpr std::string_view target_name = "riscv32be"; + static constexpr bool is_64 = false; + static constexpr bool is_le = false; + static constexpr bool is_rela = true; + static constexpr u32 page_size = 4096; + static constexpr u32 e_machine = EM_RISCV; + static constexpr u32 plt_hdr_size = 32; + static constexpr u32 plt_size = 16; + static constexpr u32 pltgot_size = 16; + + static constexpr u32 R_COPY = R_RISCV_COPY; + static constexpr u32 R_GLOB_DAT = R_RISCV_32; + static constexpr u32 R_JUMP_SLOT = R_RISCV_JUMP_SLOT; + static constexpr u32 R_ABS = R_RISCV_32; + static constexpr u32 R_RELATIVE = R_RISCV_RELATIVE; + static constexpr u32 R_IRELATIVE = R_RISCV_IRELATIVE; + static constexpr u32 R_DTPOFF = R_RISCV_TLS_DTPREL32; + static constexpr u32 R_TPOFF = R_RISCV_TLS_TPREL32; + static constexpr u32 R_DTPMOD = R_RISCV_TLS_DTPMOD32; +}; + +struct PPC32 { + static constexpr std::string_view target_name = "ppc32"; + static constexpr bool is_64 = false; + static constexpr bool is_le = false; + static constexpr bool is_rela = true; + static constexpr u32 page_size = 65536; + static constexpr u32 e_machine = EM_PPC; + static constexpr u32 plt_hdr_size = 64; + static constexpr u32 plt_size = 36; + static constexpr u32 pltgot_size = 36; + static constexpr u32 thunk_hdr_size = 0; + static constexpr u32 thunk_size = 36; + + static constexpr u32 R_COPY = R_PPC_COPY; + static constexpr u32 R_GLOB_DAT = R_PPC_GLOB_DAT; + static constexpr u32 R_JUMP_SLOT = R_PPC_JMP_SLOT; + static constexpr u32 R_ABS = R_PPC_ADDR32; + static constexpr u32 R_RELATIVE = R_PPC_RELATIVE; + static constexpr u32 R_IRELATIVE = R_PPC_IRELATIVE; + static constexpr u32 R_DTPOFF = R_PPC_DTPREL32; + static constexpr u32 R_TPOFF = R_PPC_TPREL32; + static constexpr u32 R_DTPMOD = R_PPC_DTPMOD32; +}; + +struct PPC64V1 { + static constexpr std::string_view target_name = "ppc64v1"; + static constexpr bool is_64 = true; + static constexpr bool is_le = false; + static constexpr bool is_rela = true; + static constexpr u32 page_size = 65536; + static constexpr u32 e_machine = EM_PPC64; + static constexpr u32 plt_hdr_size = 52; + static constexpr u32 pltgot_size = 0; + static constexpr u32 thunk_hdr_size = 0; + static constexpr u32 thunk_size = 28; + + static constexpr u32 R_COPY = R_PPC64_COPY; + static constexpr u32 R_GLOB_DAT = R_PPC64_GLOB_DAT; + static constexpr u32 R_JUMP_SLOT = R_PPC64_JMP_SLOT; + static constexpr u32 R_ABS = R_PPC64_ADDR64; + static constexpr u32 R_RELATIVE = R_PPC64_RELATIVE; + static constexpr u32 R_IRELATIVE = R_PPC64_IRELATIVE; + static constexpr u32 R_DTPOFF = R_PPC64_DTPREL64; + static constexpr u32 R_TPOFF = R_PPC64_TPREL64; + static constexpr u32 R_DTPMOD = R_PPC64_DTPMOD64; +}; + +struct PPC64V2 { + static constexpr std::string_view target_name = "ppc64v2"; + static constexpr bool is_64 = true; + static constexpr bool is_le = true; + static constexpr bool is_rela = true; + static constexpr u32 page_size = 65536; + static constexpr u32 e_machine = EM_PPC64; + static constexpr u32 plt_hdr_size = 60; + static constexpr u32 plt_size = 4; + static constexpr u32 pltgot_size = 0; + static constexpr u32 thunk_hdr_size = 0; + static constexpr u32 thunk_size = 20; + + static constexpr u32 R_COPY = R_PPC64_COPY; + static constexpr u32 R_GLOB_DAT = R_PPC64_GLOB_DAT; + static constexpr u32 R_JUMP_SLOT = R_PPC64_JMP_SLOT; + static constexpr u32 R_ABS = R_PPC64_ADDR64; + static constexpr u32 R_RELATIVE = R_PPC64_RELATIVE; + static constexpr u32 R_IRELATIVE = R_PPC64_IRELATIVE; + static constexpr u32 R_DTPOFF = R_PPC64_DTPREL64; + static constexpr u32 R_TPOFF = R_PPC64_TPREL64; + static constexpr u32 R_DTPMOD = R_PPC64_DTPMOD64; +}; + +struct S390X { + static constexpr std::string_view target_name = "s390x"; + static constexpr bool is_64 = true; + static constexpr bool is_le = false; + static constexpr bool is_rela = true; + static constexpr u32 page_size = 4096; + static constexpr u32 e_machine = EM_S390X; + static constexpr u32 plt_hdr_size = 32; + static constexpr u32 plt_size = 32; + static constexpr u32 pltgot_size = 16; + + static constexpr u32 R_COPY = R_390_COPY; + static constexpr u32 R_GLOB_DAT = R_390_GLOB_DAT; + static constexpr u32 R_JUMP_SLOT = R_390_JMP_SLOT; + static constexpr u32 R_ABS = R_390_64; + static constexpr u32 R_RELATIVE = R_390_RELATIVE; + static constexpr u32 R_IRELATIVE = R_390_IRELATIVE; + static constexpr u32 R_DTPOFF = R_390_TLS_DTPOFF; + static constexpr u32 R_TPOFF = R_390_TLS_TPOFF; + static constexpr u32 R_DTPMOD = R_390_TLS_DTPMOD; +}; + +struct SPARC64 { + static constexpr std::string_view target_name = "sparc64"; + static constexpr bool is_64 = true; + static constexpr bool is_le = false; + static constexpr bool is_rela = true; + static constexpr u32 page_size = 8192; + static constexpr u32 e_machine = EM_SPARC64; + static constexpr u32 plt_hdr_size = 128; + static constexpr u32 plt_size = 32; + static constexpr u32 pltgot_size = 32; + + static constexpr u32 R_COPY = R_SPARC_COPY; + static constexpr u32 R_GLOB_DAT = R_SPARC_GLOB_DAT; + static constexpr u32 R_JUMP_SLOT = R_SPARC_JMP_SLOT; + static constexpr u32 R_ABS = R_SPARC_64; + static constexpr u32 R_RELATIVE = R_SPARC_RELATIVE; + static constexpr u32 R_IRELATIVE = R_SPARC_IRELATIVE; + static constexpr u32 R_DTPOFF = R_SPARC_TLS_DTPOFF64; + static constexpr u32 R_TPOFF = R_SPARC_TLS_TPOFF64; + static constexpr u32 R_DTPMOD = R_SPARC_TLS_DTPMOD64; +}; + +struct M68K { + static constexpr std::string_view target_name = "m68k"; + static constexpr bool is_64 = false; + static constexpr bool is_le = false; + static constexpr bool is_rela = true; + static constexpr u32 page_size = 8192; + static constexpr u32 e_machine = EM_68K; + static constexpr u32 plt_hdr_size = 18; + static constexpr u32 plt_size = 14; + static constexpr u32 pltgot_size = 8; + + static constexpr u32 R_COPY = R_68K_COPY; + static constexpr u32 R_GLOB_DAT = R_68K_GLOB_DAT; + static constexpr u32 R_JUMP_SLOT = R_68K_JMP_SLOT; + static constexpr u32 R_ABS = R_68K_32; + static constexpr u32 R_RELATIVE = R_68K_RELATIVE; + static constexpr u32 R_DTPOFF = R_68K_TLS_DTPREL32; + static constexpr u32 R_TPOFF = R_68K_TLS_TPREL32; + static constexpr u32 R_DTPMOD = R_68K_TLS_DTPMOD32; +}; + +struct SH4 { + static constexpr std::string_view target_name = "sh4"; + static constexpr bool is_64 = false; + static constexpr bool is_le = true; + static constexpr bool is_rela = true; + static constexpr u32 page_size = 4096; + static constexpr u32 e_machine = EM_SH; + static constexpr u32 plt_hdr_size = 16; + static constexpr u32 plt_size = 16; + static constexpr u32 pltgot_size = 12; + + static constexpr u32 R_COPY = R_SH_COPY; + static constexpr u32 R_GLOB_DAT = R_SH_GLOB_DAT; + static constexpr u32 R_JUMP_SLOT = R_SH_JMP_SLOT; + static constexpr u32 R_ABS = R_SH_DIR32; + static constexpr u32 R_RELATIVE = R_SH_RELATIVE; + static constexpr u32 R_DTPOFF = R_SH_TLS_DTPOFF32; + static constexpr u32 R_TPOFF = R_SH_TLS_TPOFF32; + static constexpr u32 R_DTPMOD = R_SH_TLS_DTPMOD32; +}; + +struct ALPHA { + static constexpr std::string_view target_name = "alpha"; + static constexpr bool is_64 = true; + static constexpr bool is_le = true; + static constexpr bool is_rela = true; + static constexpr u32 page_size = 65536; + static constexpr u32 e_machine = EM_ALPHA; + static constexpr u32 plt_hdr_size = 0; + static constexpr u32 plt_size = 0; + static constexpr u32 pltgot_size = 0; + + static constexpr u32 R_COPY = R_ALPHA_COPY; + static constexpr u32 R_GLOB_DAT = R_ALPHA_GLOB_DAT; + static constexpr u32 R_JUMP_SLOT = R_ALPHA_JMP_SLOT; + static constexpr u32 R_ABS = R_ALPHA_REFQUAD; + static constexpr u32 R_RELATIVE = R_ALPHA_RELATIVE; + static constexpr u32 R_DTPOFF = R_ALPHA_DTPREL64; + static constexpr u32 R_TPOFF = R_ALPHA_TPREL64; + static constexpr u32 R_DTPMOD = R_ALPHA_DTPMOD64; +}; + +} // namespace mold::elf diff --git a/third_party/mold/elf/gc-sections.cc b/third_party/mold/elf/gc-sections.cc new file mode 100644 index 00000000000..fcb69e575f7 --- /dev/null +++ b/third_party/mold/elf/gc-sections.cc @@ -0,0 +1,180 @@ +// clang-format off +// This file implements a mark-sweep garbage collector for -gc-sections. +// In this algorithm, vertices are sections and edges are relocations. +// Any section that is reachable from a root section is considered alive. + +#include "third_party/mold/elf/mold.h" + +// MISSING #include +// MISSING #include + +namespace mold::elf { + +template +static bool should_keep(const InputSection &isec) { + u32 type = isec.shdr().sh_type; + u32 flags = isec.shdr().sh_flags; + std::string_view name = isec.name(); + + return (flags & SHF_GNU_RETAIN) || + type == SHT_NOTE || + type == SHT_INIT_ARRAY || + type == SHT_FINI_ARRAY || + type == SHT_PREINIT_ARRAY || + (is_arm32 && type == SHT_ARM_EXIDX) || + name.starts_with(".ctors") || + name.starts_with(".dtors") || + name.starts_with(".init") || + name.starts_with(".fini") || + is_c_identifier(name); +} + +template +static bool mark_section(InputSection *isec) { + return isec && isec->is_alive && !isec->is_visited.test_and_set(); +} + +template +static void visit(Context &ctx, InputSection *isec, + tbb::feeder *> &feeder, i64 depth) { + assert(isec->is_visited); + + // If this is a text section, .eh_frame may contain records + // describing how to handle exceptions for that function. + // We want to keep associated .eh_frame records. + for (FdeRecord &fde : isec->get_fdes()) + for (const ElfRel &rel : fde.get_rels(isec->file).subspan(1)) + if (Symbol *sym = isec->file.symbols[rel.r_sym]) + if (mark_section(sym->get_input_section())) + feeder.add(sym->get_input_section()); + + for (const ElfRel &rel : isec->get_rels(ctx)) { + Symbol &sym = *isec->file.symbols[rel.r_sym]; + + // Symbol can refer either a section fragment or an input section. + // Mark a fragment as alive. + if (SectionFragment *frag = sym.get_frag()) { + frag->is_alive = true; + continue; + } + + // Mark a section alive. For better performacne, we don't call + // `feeder.add` too often. + if (mark_section(sym.get_input_section())) { + if (depth < 3) + visit(ctx, sym.get_input_section(), feeder, depth + 1); + else + feeder.add(sym.get_input_section()); + } + } +} + +template +static void collect_root_set(Context &ctx, + tbb::concurrent_vector *> &rootset) { + Timer t(ctx, "collect_root_set"); + + auto enqueue_section = [&](InputSection *isec) { + if (mark_section(isec)) + rootset.push_back(isec); + }; + + auto enqueue_symbol = [&](Symbol *sym) { + if (sym) { + if (SectionFragment *frag = sym->get_frag()) + frag->is_alive = true; + else + enqueue_section(sym->get_input_section()); + } + }; + + // Add sections that are not subject to garbage collection. + tbb::parallel_for_each(ctx.objs, [&](ObjectFile *file) { + for (std::unique_ptr> &isec : file->sections) { + if (!isec || !isec->is_alive) + continue; + + // --gc-sections discards only SHF_ALLOC sections. If you want to + // reduce the amount of non-memory-mapped segments, you should + // use `strip` command, compile without debug info or use + // --strip-all linker option. + u32 flags = isec->shdr().sh_flags; + if (!(flags & SHF_ALLOC)) + isec->is_visited = true; + + if (should_keep(*isec)) + enqueue_section(isec.get()); + } + }); + + // Add sections containing exported symbols + tbb::parallel_for_each(ctx.objs, [&](ObjectFile *file) { + for (Symbol *sym : file->symbols) + if (sym->file == file && sym->is_exported) + enqueue_symbol(sym); + }); + + // Add sections referenced by root symbols. + enqueue_symbol(get_symbol(ctx, ctx.arg.entry)); + + for (std::string_view name : ctx.arg.undefined) + enqueue_symbol(get_symbol(ctx, name)); + + for (std::string_view name : ctx.arg.require_defined) + enqueue_symbol(get_symbol(ctx, name)); + + // .eh_frame consists of variable-length records called CIE and FDE + // records, and they are a unit of inclusion or exclusion. + // We just keep all CIEs and everything that are referenced by them. + tbb::parallel_for_each(ctx.objs, [&](ObjectFile *file) { + for (CieRecord &cie : file->cies) + for (const ElfRel &rel : cie.get_rels()) + enqueue_symbol(file->symbols[rel.r_sym]); + }); +} + +// Mark all reachable sections +template +static void mark(Context &ctx, + tbb::concurrent_vector *> &rootset) { + Timer t(ctx, "mark"); + + tbb::parallel_for_each(rootset, [&](InputSection *isec, + tbb::feeder *> &feeder) { + visit(ctx, isec, feeder, 0); + }); +} + +// Remove unreachable sections +template +static void sweep(Context &ctx) { + Timer t(ctx, "sweep"); + static Counter counter("garbage_sections"); + + tbb::parallel_for_each(ctx.objs, [&](ObjectFile *file) { + for (std::unique_ptr> &isec : file->sections) { + if (isec && isec->is_alive && !isec->is_visited) { + if (ctx.arg.print_gc_sections) + SyncOut(ctx) << "removing unused section " << *isec; + isec->kill(); + counter++; + } + } + }); +} + +template +void gc_sections(Context &ctx) { + Timer t(ctx, "gc"); + + tbb::concurrent_vector *> rootset; + collect_root_set(ctx, rootset); + mark(ctx, rootset); + sweep(ctx); +} + +using E = MOLD_TARGET; + +template void gc_sections(Context &ctx); + +} // namespace mold::elf diff --git a/third_party/mold/elf/icf.cc b/third_party/mold/elf/icf.cc new file mode 100644 index 00000000000..5d6a643e552 --- /dev/null +++ b/third_party/mold/elf/icf.cc @@ -0,0 +1,615 @@ +// clang-format off +// This file implements the Identical Code Folding feature which can +// reduce the output file size of a typical program by a few percent. +// ICF identifies read-only input sections that happen to be identical +// and thus can be used interchangeably. ICF leaves one of them and discards +// the others. +// +// ICF is usually used in combination with -ffunction-sections and +// -fdata-sections compiler options, so that object files have one section +// for each function or variable instead of having one large .text or .data. +// The unit of ICF merging is section. +// +// Two sections are considered identical by ICF if they have the exact +// same contents, metadata such as section flags, exception handling +// records, and relocations. The last one is interesting because two +// relocations are considered identical if they point to the _same_ +// section in terms of ICF. +// +// To see what that means, consider two sections, A and B, which are +// identical except for one pair of relocations. Say, A has a relocation to +// section C, and B has a relocation to D. In this case, A and B are +// considered identical if C and D are considered identical. C and D can be +// either really the same section or two different sections that are +// considered identical by ICF. Below is an example of such inputs, A, B, C +// and D: +// +// void A() { C(); } +// void B() { D(); } +// void C() { A(); } +// void D() { B(); } +// +// If we assume A and B are mergeable, we can merge C and D, which makes A +// and B mergeable. There's no contradiction in our assumption, so we can +// conclude that A and B as well as C and D are mergeable. +// +// This problem boils down to one in graph theory. Input to ICF can be +// considered as a directed graph in which vertices are sections and edges +// are relocations. Vertices have labels (section contents, etc.), and so +// are edges (relocation offsets, etc.). Two vertices are considered +// identical if and only if the (possibly infinite) their unfoldings into +// regular trees are equal. Given this formulation, we want to find as +// many identical vertices as possible. +// +// Just like a lot of problems with graph, this problem doesn't have a +// straightforward "optimal" solution, and we need to resort to heuristics. +// +// mold approaches this problem by hashing program trees with increasing depth +// on each iteration. +// For example, when we start, we only hash individual functions with +// their call into other functions omitted. From the second iteration, we +// put the function they call into the hash by appending the hash of those +// functions from the previous iteration. This means that the nth iteration +// hashes call chain up to (n-1) levels deep. +// We use a cryptographic hash function, so the unique number of hashes will +// only monotonically increase as we take into account of deeper trees with +// iterations (otherwise, that means we have found a hash collision). We stop +// when the unique number of hashes stop increasing; this is based on the fact +// that once we observe an iteration with the same amount of unique hashes as +// the previous iteration, it will remain unchanged for further iterations. +// This is provable, but here we omit the proof for brevity. +// +// When compared to other approaches, mold's approach has a relatively cheaper +// cost per iteration, and as a bonus, is highly parallelizable. +// For Chromium, mold's ICF finishes in less than 1 second with 20 threads, +// whereas lld takes 5 seconds and gold takes 50 seconds under the same +// conditions. + +#include "third_party/mold/elf/mold.h" +// MISSING #include "../common/sha.h" + +#include "third_party/libcxx/array" +#include "third_party/libcxx/cstdio" +// MISSING #include +// MISSING #include +// MISSING #include +// MISSING #include +// MISSING #include +// MISSING #include + +static constexpr int64_t HASH_SIZE = 16; + +typedef std::array Digest; + +namespace std { +template<> struct hash { + size_t operator()(const Digest &k) const { + return *(int64_t *)&k[0]; + } +}; +} + +namespace mold::elf { + +template +static void uniquify_cies(Context &ctx) { + Timer t(ctx, "uniquify_cies"); + std::vector *> cies; + + for (ObjectFile *file : ctx.objs) { + for (CieRecord &cie : file->cies) { + for (i64 i = 0; i < cies.size(); i++) { + if (cie.equals(*cies[i])) { + cie.icf_idx = i; + goto found; + } + } + cie.icf_idx = cies.size(); + cies.push_back(&cie); + found:; + } + } +} + +template +static bool is_eligible(Context &ctx, InputSection &isec) { + const ElfShdr &shdr = isec.shdr(); + std::string_view name = isec.name(); + + bool is_alloc = (shdr.sh_flags & SHF_ALLOC); + bool is_exec = (shdr.sh_flags & SHF_EXECINSTR) || + ctx.arg.ignore_data_address_equality; + bool is_relro = (name == ".data.rel.ro" || + name.starts_with(".data.rel.ro.")); + bool is_readonly = !(shdr.sh_flags & SHF_WRITE) || is_relro; + bool is_bss = (shdr.sh_type == SHT_NOBITS); + bool is_empty = (shdr.sh_size == 0); + bool is_init = (shdr.sh_type == SHT_INIT_ARRAY || name == ".init"); + bool is_fini = (shdr.sh_type == SHT_FINI_ARRAY || name == ".fini"); + bool is_enumerable = is_c_identifier(name); + bool is_addr_taken = !ctx.arg.icf_all && isec.address_significant; + + return is_alloc && is_exec && is_readonly && !is_bss && !is_empty && + !is_init && !is_fini && !is_enumerable && !is_addr_taken; +} + +static Digest digest_final(SHA256Hash &sha) { + u8 buf[SHA256_SIZE]; + sha.finish(buf); + + Digest digest; + memcpy(digest.data(), buf, HASH_SIZE); + return digest; +} + +template +static bool is_leaf(Context &ctx, InputSection &isec) { + if (!isec.get_rels(ctx).empty()) + return false; + + for (FdeRecord &fde : isec.get_fdes()) + if (fde.get_rels(isec.file).size() > 1) + return false; + + return true; +} + +template +struct LeafHasher { + size_t operator()(InputSection *isec) const { + u64 h = hash_string(isec->contents); + for (FdeRecord &fde : isec->get_fdes()) { + u64 h2 = hash_string(fde.get_contents(isec->file).substr(8)); + h = combine_hash(h, h2); + } + return h; + } +}; + +template +struct LeafEq { + bool operator()(InputSection *a, InputSection *b) const { + if (a->contents != b->contents) + return false; + + std::span> x = a->get_fdes(); + std::span> y = b->get_fdes(); + + if (x.size() != y.size()) + return false; + + for (i64 i = 0; i < x.size(); i++) + if (x[i].get_contents(a->file).substr(8) != + y[i].get_contents(b->file).substr(8)) + return false; + return true; + } +}; + +// Early merge of leaf nodes, which can be processed without constructing the +// entire graph. This reduces the vertex count and improves memory efficiency. +template +static void merge_leaf_nodes(Context &ctx) { + Timer t(ctx, "merge_leaf_nodes"); + + static Counter eligible("icf_eligibles"); + static Counter non_eligible("icf_non_eligibles"); + static Counter leaf("icf_leaf_nodes"); + + tbb::concurrent_unordered_map *, InputSection *, + LeafHasher, LeafEq> map; + + tbb::parallel_for((i64)0, (i64)ctx.objs.size(), [&](i64 i) { + for (std::unique_ptr> &isec : ctx.objs[i]->sections) { + if (!isec || !isec->is_alive) + continue; + + if (!is_eligible(ctx, *isec)) { + non_eligible++; + continue; + } + + if (is_leaf(ctx, *isec)) { + leaf++; + isec->icf_leaf = true; + auto [it, inserted] = map.insert({isec.get(), isec.get()}); + if (!inserted && isec->get_priority() < it->second->get_priority()) + it->second = isec.get(); + } else { + eligible++; + isec->icf_eligible = true; + } + } + }); + + tbb::parallel_for((i64)0, (i64)ctx.objs.size(), [&](i64 i) { + for (std::unique_ptr> &isec : ctx.objs[i]->sections) { + if (isec && isec->is_alive && isec->icf_leaf) { + auto it = map.find(isec.get()); + assert(it != map.end()); + isec->leader = it->second; + } + } + }); +} + +template +static Digest compute_digest(Context &ctx, InputSection &isec) { + SHA256Hash sha; + + auto hash = [&](auto val) { + sha.update((u8 *)&val, sizeof(val)); + }; + + auto hash_string = [&](std::string_view str) { + hash(str.size()); + sha.update((u8 *)str.data(), str.size()); + }; + + auto hash_symbol = [&](Symbol &sym) { + InputSection *isec = sym.get_input_section(); + + if (!sym.file) { + hash('1'); + hash((u64)&sym); + } else if (SectionFragment *frag = sym.get_frag()) { + hash('2'); + hash((u64)frag); + } else if (!isec) { + hash('3'); + } else if (isec->leader) { + hash('4'); + hash((u64)isec->leader); + } else if (isec->icf_eligible) { + hash('5'); + } else { + hash('6'); + hash((u64)isec); + } + hash(sym.value); + }; + + hash_string(isec.contents); + hash(isec.shdr().sh_flags); + hash(isec.get_fdes().size()); + hash(isec.get_rels(ctx).size()); + + for (FdeRecord &fde : isec.get_fdes()) { + hash(isec.file.cies[fde.cie_idx].icf_idx); + + // Bytes 0 to 4 contain the length of this record, and + // bytes 4 to 8 contain an offset to CIE. + hash_string(fde.get_contents(isec.file).substr(8)); + + hash(fde.get_rels(isec.file).size()); + + for (const ElfRel &rel : fde.get_rels(isec.file).subspan(1)) { + hash_symbol(*isec.file.symbols[rel.r_sym]); + hash(rel.r_type); + hash(rel.r_offset - fde.input_offset); + hash(get_addend(isec.file.cies[fde.cie_idx].input_section, rel)); + } + } + + for (i64 i = 0; i < isec.get_rels(ctx).size(); i++) { + const ElfRel &rel = isec.get_rels(ctx)[i]; + hash(rel.r_offset); + hash(rel.r_type); + hash(get_addend(isec, rel)); + hash_symbol(*isec.file.symbols[rel.r_sym]); + } + + return digest_final(sha); +} + +template +static std::vector *> gather_sections(Context &ctx) { + Timer t(ctx, "gather_sections"); + + // Count the number of input sections for each input file. + std::vector num_sections(ctx.objs.size()); + + tbb::parallel_for((i64)0, (i64)ctx.objs.size(), [&](i64 i) { + for (std::unique_ptr> &isec : ctx.objs[i]->sections) + if (isec && isec->is_alive && isec->icf_eligible) + num_sections[i]++; + }); + + std::vector section_indices(ctx.objs.size()); + for (i64 i = 0; i < ctx.objs.size() - 1; i++) + section_indices[i + 1] = section_indices[i] + num_sections[i]; + + std::vector *> sections( + section_indices.back() + num_sections.back()); + + // Fill `sections` contents. + tbb::parallel_for((i64)0, (i64)ctx.objs.size(), [&](i64 i) { + i64 idx = section_indices[i]; + for (std::unique_ptr> &isec : ctx.objs[i]->sections) + if (isec && isec->is_alive && isec->icf_eligible) + sections[idx++] = isec.get(); + }); + + tbb::parallel_for((i64)0, (i64)sections.size(), [&](i64 i) { + sections[i]->icf_idx = i; + }); + + return sections; +} + +template +static std::vector +compute_digests(Context &ctx, std::span *> sections) { + Timer t(ctx, "compute_digests"); + + std::vector digests(sections.size()); + tbb::parallel_for((i64)0, (i64)sections.size(), [&](i64 i) { + digests[i] = compute_digest(ctx, *sections[i]); + }); + return digests; +} + +// Build a graph, treating every function as a vertex and every function call +// as an edge. See the description at the top for a more detailed formulation. +// We use u32 indices here to improve cache locality. +template +static void gather_edges(Context &ctx, + std::span *> sections, + std::vector &edges, + std::vector &edge_indices) { + Timer t(ctx, "gather_edges"); + + if (sections.empty()) + return; + + std::vector num_edges(sections.size()); + edge_indices.resize(sections.size()); + + tbb::parallel_for((i64)0, (i64)sections.size(), [&](i64 i) { + InputSection &isec = *sections[i]; + assert(isec.icf_eligible); + + for (i64 j = 0; j < isec.get_rels(ctx).size(); j++) { + const ElfRel &rel = isec.get_rels(ctx)[j]; + Symbol &sym = *isec.file.symbols[rel.r_sym]; + if (!sym.get_frag()) + if (InputSection *isec = sym.get_input_section()) + if (isec->icf_eligible) + num_edges[i]++; + } + }); + + for (i64 i = 0; i < num_edges.size() - 1; i++) + edge_indices[i + 1] = edge_indices[i] + num_edges[i]; + + edges.resize(edge_indices.back() + num_edges.back()); + + tbb::parallel_for((i64)0, (i64)num_edges.size(), [&](i64 i) { + InputSection &isec = *sections[i]; + i64 idx = edge_indices[i]; + + for (ElfRel &rel : isec.get_rels(ctx)) { + Symbol &sym = *isec.file.symbols[rel.r_sym]; + if (InputSection *isec = sym.get_input_section()) + if (isec->icf_eligible) + edges[idx++] = isec->icf_idx; + } + }); +} + +template +static i64 propagate(std::span> digests, + std::span edges, std::span edge_indices, + bool &slot, BitVector &converged, + tbb::affinity_partitioner &ap) { + static Counter round("icf_round"); + round++; + + i64 num_digests = digests[0].size(); + tbb::enumerable_thread_specific changed; + + tbb::parallel_for((i64)0, num_digests, [&](i64 i) { + if (converged.get(i)) + return; + + SHA256Hash sha; + sha.update(digests[2][i].data(), HASH_SIZE); + + i64 begin = edge_indices[i]; + i64 end = (i + 1 == num_digests) ? edges.size() : edge_indices[i + 1]; + + for (i64 j : edges.subspan(begin, end - begin)) + sha.update(digests[slot][j].data(), HASH_SIZE); + + digests[!slot][i] = digest_final(sha); + + if (digests[slot][i] == digests[!slot][i]) { + // This node has converged. Skip further iterations as it will + // yield the same hash. + converged.set(i); + } else { + changed.local()++; + } + }, ap); + + slot = !slot; + return changed.combine(std::plus()); +} + +template +static i64 count_num_classes(std::span digests, + tbb::affinity_partitioner &ap) { + std::vector vec(digests.begin(), digests.end()); + tbb::parallel_sort(vec); + + tbb::enumerable_thread_specific num_classes; + tbb::parallel_for((i64)0, (i64)vec.size() - 1, [&](i64 i) { + if (vec[i] != vec[i + 1]) + num_classes.local()++; + }, ap); + return num_classes.combine(std::plus()); +} + +template +static void print_icf_sections(Context &ctx) { + tbb::concurrent_vector *> leaders; + tbb::concurrent_unordered_multimap *, InputSection *> map; + + tbb::parallel_for_each(ctx.objs, [&](ObjectFile *file) { + for (std::unique_ptr> &isec : file->sections) { + if (isec && isec->is_alive && isec->leader) { + if (isec.get() == isec->leader) + leaders.push_back(isec.get()); + else + map.insert({isec->leader, isec.get()}); + } + } + }); + + tbb::parallel_sort(leaders.begin(), leaders.end(), + [&](InputSection *a, InputSection *b) { + return a->get_priority() < b->get_priority(); + }); + + i64 saved_bytes = 0; + + for (InputSection *leader : leaders) { + auto [begin, end] = map.equal_range(leader); + if (begin == end) + continue; + + SyncOut(ctx) << "selected section " << *leader; + + i64 n = 0; + for (auto it = begin; it != end; it++) { + SyncOut(ctx) << " removing identical section " << *it->second; + n++; + } + saved_bytes += leader->contents.size() * n; + } + + SyncOut(ctx) << "ICF saved " << saved_bytes << " bytes"; +} + +template +void icf_sections(Context &ctx) { + Timer t(ctx, "icf"); + if (ctx.objs.empty()) + return; + + uniquify_cies(ctx); + merge_leaf_nodes(ctx); + + // Prepare for the propagation rounds. + std::vector *> sections = gather_sections(ctx); + + // We allocate 3 arrays to store hashes for each vertex. + // + // Index 0 and 1 are used for tree hashes from the previous + // iteration and the current iteration. They switch roles every + // iteration. See `slot` below. + // + // Index 2 stores the initial, single-vertex hash. This is combined + // with hashes from the connected vertices to form the tree hash + // described above. + std::vector> digests(3); + digests[0] = compute_digests(ctx, sections); + digests[1].resize(digests[0].size()); + digests[2] = digests[0]; + + std::vector edges; + std::vector edge_indices; + gather_edges(ctx, sections, edges, edge_indices); + + BitVector converged(digests[0].size()); + bool slot = 0; + + // Execute the propagation rounds until convergence is obtained. + { + Timer t(ctx, "propagate"); + tbb::affinity_partitioner ap; + + // A cheap test that the graph hasn't converged yet. + // The loop after this one uses a strict condition, but it's expensive + // as it requires sorting the entire hash collection. + // + // For nodes that have a cycle in downstream (i.e. recursive + // functions and functions that calls recursive functions) will always + // change with the iterations. Nodes that doesn't (i.e. non-recursive + // functions) will stop changing as soon as the propagation depth reaches + // the call tree depth. + // Here, we test whether we have reached sufficient depth for the latter, + // which is a necessary (but not sufficient) condition for convergence. + i64 num_changed = -1; + for (;;) { + i64 n = propagate(digests, edges, edge_indices, slot, converged, ap); + if (n == num_changed) + break; + num_changed = n; + } + + // Run the pass until the unique number of hashes stop increasing, at which + // point we have achieved convergence (proof omitted for brevity). + i64 num_classes = -1; + for (;;) { + // count_num_classes requires sorting which is O(n log n), so do a little + // more work beforehand to amortize that log factor. + for (i64 i = 0; i < 10; i++) + propagate(digests, edges, edge_indices, slot, converged, ap); + + i64 n = count_num_classes(digests[slot], ap); + if (n == num_classes) + break; + num_classes = n; + } + } + + // Group sections by SHA digest. + { + Timer t(ctx, "group"); + + auto *map = new tbb::concurrent_unordered_map *>; + std::span digest = digests[slot]; + + tbb::parallel_for((i64)0, (i64)sections.size(), [&](i64 i) { + InputSection *isec = sections[i]; + auto [it, inserted] = map->insert({digest[i], isec}); + if (!inserted && isec->get_priority() < it->second->get_priority()) + it->second = isec; + }); + + tbb::parallel_for((i64)0, (i64)sections.size(), [&](i64 i) { + auto it = map->find(digest[i]); + assert(it != map->end()); + sections[i]->leader = it->second; + }); + + // Since free'ing the map is slow, postpone it. + ctx.on_exit.push_back([=] { delete map; }); + } + + if (ctx.arg.print_icf_sections) + print_icf_sections(ctx); + + // Eliminate duplicate sections. + // Symbols pointing to eliminated sections will be redirected on the fly when + // exporting to the symtab. + { + Timer t(ctx, "sweep"); + static Counter eliminated("icf_eliminated"); + tbb::parallel_for_each(ctx.objs, [](ObjectFile *file) { + for (std::unique_ptr> &isec : file->sections) { + if (isec && isec->is_alive && isec->is_killed_by_icf()) { + isec->kill(); + eliminated++; + } + } + }); + } +} + +using E = MOLD_TARGET; + +template void icf_sections(Context &ctx); + +} // namespace mold::elf diff --git a/third_party/mold/elf/input-files.cc b/third_party/mold/elf/input-files.cc new file mode 100644 index 00000000000..5638cee4d52 --- /dev/null +++ b/third_party/mold/elf/input-files.cc @@ -0,0 +1,1497 @@ +// clang-format off +#include "third_party/mold/elf/mold.h" + +#include "third_party/libcxx/bit" +#include "third_party/libcxx/cstring" +// MISSING #include + +#ifndef _WIN32 +#include "libc/calls/calls.h" +#include "libc/calls/weirdtypes.h" +#include "libc/runtime/pathconf.h" +#include "libc/runtime/runtime.h" +#include "libc/runtime/sysconf.h" +#include "libc/sysv/consts/f.h" +#include "libc/sysv/consts/fileno.h" +#include "libc/sysv/consts/o.h" +#include "libc/sysv/consts/ok.h" +#include "libc/time/time.h" +#include "third_party/getopt/getopt.h" +#include "third_party/musl/crypt.h" +#include "third_party/musl/lockf.h" +#endif + +namespace mold::elf { + +template +InputFile::InputFile(Context &ctx, MappedFile> *mf) + : mf(mf), filename(mf->name) { + if (mf->size < sizeof(ElfEhdr)) + Fatal(ctx) << *this << ": file too small"; + if (memcmp(mf->data, "\177ELF", 4)) + Fatal(ctx) << *this << ": not an ELF file"; + + ElfEhdr &ehdr = *(ElfEhdr *)mf->data; + is_dso = (ehdr.e_type == ET_DYN); + + ElfShdr *sh_begin = (ElfShdr *)(mf->data + ehdr.e_shoff); + + // e_shnum contains the total number of sections in an object file. + // Since it is a 16-bit integer field, it's not large enough to + // represent >65535 sections. If an object file contains more than 65535 + // sections, the actual number is stored to sh_size field. + i64 num_sections = (ehdr.e_shnum == 0) ? sh_begin->sh_size : ehdr.e_shnum; + + if (mf->data + mf->size < (u8 *)(sh_begin + num_sections)) + Fatal(ctx) << mf->name << ": e_shoff or e_shnum corrupted: " + << mf->size << " " << num_sections; + elf_sections = {sh_begin, sh_begin + num_sections}; + + // e_shstrndx is a 16-bit field. If .shstrtab's section index is + // too large, the actual number is stored to sh_link field. + i64 shstrtab_idx = (ehdr.e_shstrndx == SHN_XINDEX) + ? sh_begin->sh_link : ehdr.e_shstrndx; + + shstrtab = this->get_string(ctx, shstrtab_idx); +} + +template +std::span> InputFile::get_phdrs() { + ElfEhdr &ehdr = get_ehdr(); + return {(ElfPhdr *)(mf->data + ehdr.e_phoff), ehdr.e_phnum}; +} + +template +ElfShdr *InputFile::find_section(i64 type) { + for (ElfShdr &sec : elf_sections) + if (sec.sh_type == type) + return &sec; + return nullptr; +} + +template +void InputFile::clear_symbols() { + for (Symbol *sym : get_global_syms()) { + if (__atomic_load_n(&sym->file, __ATOMIC_ACQUIRE) == this) { + sym->origin = 0; + sym->value = -1; + sym->sym_idx = -1; + sym->ver_idx = 0; + sym->is_weak = false; + sym->is_imported = false; + sym->is_exported = false; + __atomic_store_n(&sym->file, nullptr, __ATOMIC_RELEASE); + } + } +} + +// Find the source filename. It should be listed in symtab as STT_FILE. +template +std::string_view InputFile::get_source_name() const { + for (i64 i = 0; i < first_global; i++) + if (Symbol *sym = symbols[i]; sym->get_type() == STT_FILE) + return sym->name(); + return ""; +} + +template +ObjectFile::ObjectFile(Context &ctx, MappedFile> *mf, + std::string archive_name, bool is_in_lib) + : InputFile(ctx, mf), archive_name(archive_name), is_in_lib(is_in_lib) { + this->is_alive = !is_in_lib; +} + +template +ObjectFile * +ObjectFile::create(Context &ctx, MappedFile> *mf, + std::string archive_name, bool is_in_lib) { + ObjectFile *obj = new ObjectFile(ctx, mf, archive_name, is_in_lib); + ctx.obj_pool.emplace_back(obj); + return obj; +} + +template +static bool is_debug_section(const ElfShdr &shdr, std::string_view name) { + return !(shdr.sh_flags & SHF_ALLOC) && name.starts_with(".debug"); +} + +template +void +ObjectFile::read_note_gnu_property(Context &ctx, const ElfShdr &shdr) { + std::string_view data = this->get_string(ctx, shdr); + + while (!data.empty()) { + ElfNhdr &hdr = *(ElfNhdr *)data.data(); + data = data.substr(sizeof(hdr)); + + std::string_view name = data.substr(0, hdr.n_namesz - 1); + data = data.substr(align_to(hdr.n_namesz, 4)); + + std::string_view desc = data.substr(0, hdr.n_descsz); + data = data.substr(align_to(hdr.n_descsz, sizeof(Word))); + + if (hdr.n_type != NT_GNU_PROPERTY_TYPE_0 || name != "GNU") + continue; + + while (!desc.empty()) { + u32 type = *(U32 *)desc.data(); + u32 size = *(U32 *)(desc.data() + 4); + desc = desc.substr(8); + + // The majority of currently defined .note.gnu.property + // use 32-bit values. + // We don't know how to handle anything else, so if we encounter + // one, skip it. + // + // The following properties have a different size: + // - GNU_PROPERTY_STACK_SIZE + // - GNU_PROPERTY_NO_COPY_ON_PROTECTED + if (size == 4) + gnu_properties[type] |= *(U32 *)desc.data(); + desc = desc.substr(align_to(size, sizeof(Word))); + } + } +} + +template +void ObjectFile::initialize_sections(Context &ctx) { + // Read sections + for (i64 i = 0; i < this->elf_sections.size(); i++) { + const ElfShdr &shdr = this->elf_sections[i]; + + if ((shdr.sh_flags & SHF_EXCLUDE) && !(shdr.sh_flags & SHF_ALLOC) && + shdr.sh_type != SHT_LLVM_ADDRSIG && !ctx.arg.relocatable) + continue; + + switch (shdr.sh_type) { + case SHT_GROUP: { + // Get the signature of this section group. + if (shdr.sh_info >= this->elf_syms.size()) + Fatal(ctx) << *this << ": invalid symbol index"; + const ElfSym &esym = this->elf_syms[shdr.sh_info]; + + std::string_view signature; + if (esym.st_type == STT_SECTION) { + signature = this->shstrtab.data() + + this->elf_sections[esym.st_shndx].sh_name; + } else { + signature = this->symbol_strtab.data() + esym.st_name; + } + + // Ignore a broken comdat group GCC emits for .debug_macros. + // https://github.com/rui314/mold/issues/438 + if (signature.starts_with("wm4.")) + continue; + + // Get comdat group members. + std::span> entries = this->template get_data>(ctx, shdr); + + if (entries.empty()) + Fatal(ctx) << *this << ": empty SHT_GROUP"; + if (entries[0] == 0) + continue; + if (entries[0] != GRP_COMDAT) + Fatal(ctx) << *this << ": unsupported SHT_GROUP format"; + + typename decltype(ctx.comdat_groups)::const_accessor acc; + ctx.comdat_groups.insert(acc, {signature, ComdatGroup()}); + ComdatGroup *group = const_cast(&acc->second); + comdat_groups.push_back({group, (u32)i, entries.subspan(1)}); + break; + } + case SHT_SYMTAB_SHNDX: + symtab_shndx_sec = this->template get_data>(ctx, shdr); + break; + case SHT_SYMTAB: + case SHT_STRTAB: + case SHT_REL: + case SHT_RELA: + case SHT_NULL: + case SHT_ARM_ATTRIBUTES: + break; + default: { + std::string_view name = this->shstrtab.data() + shdr.sh_name; + + // .note.GNU-stack section controls executable-ness of the stack + // area in GNU linkers. We ignore that section because silently + // making the stack area executable is too dangerous. Tell our + // users about the difference if that matters. + if (name == ".note.GNU-stack" && !ctx.arg.relocatable) { + if (shdr.sh_flags & SHF_EXECINSTR) { + if (!ctx.arg.z_execstack && !ctx.arg.z_execstack_if_needed) + Warn(ctx) << *this << ": this file may cause a segmentation" + " fault because it requires an executable stack. See" + " https://github.com/rui314/mold/tree/main/docs/execstack.md" + " for more info."; + needs_executable_stack = true; + } + continue; + } + + if (name == ".note.gnu.property") { + read_note_gnu_property(ctx, shdr); + continue; + } + + // Ignore a build-id section in an input file. This doesn't normally + // happen, but you can create such object file with + // `ld.bfd -r --build-id`. + if (name == ".note.gnu.build-id") + continue; + + // Ignore these sections for compatibility with old glibc i386 CRT files. + if (name == ".gnu.linkonce.t.__x86.get_pc_thunk.bx" || + name == ".gnu.linkonce.t.__i686.get_pc_thunk.bx") + continue; + + // Also ignore this for compatibility with ICC + if (name == ".gnu.linkonce.d.DW.ref.__gxx_personality_v0") + continue; + + // Ignore debug sections if --strip-all or --strip-debug is given. + if ((ctx.arg.strip_all || ctx.arg.strip_debug) && + is_debug_section(shdr, name)) + continue; + + // Save .llvm_addrsig for --icf=safe. + if (shdr.sh_type == SHT_LLVM_ADDRSIG && !ctx.arg.relocatable) { + llvm_addrsig = std::make_unique>(ctx, *this, name, i); + continue; + } + + // If an output file doesn't have a section header (i.e. + // --oformat=binary is given), we discard all non-memory-allocated + // sections. This is because without a section header, we can't find + // their places in an output file in the first place. + if (ctx.arg.oformat_binary && !(shdr.sh_flags & SHF_ALLOC)) + continue; + + this->sections[i] = std::make_unique>(ctx, *this, name, i); + + if constexpr (is_ppc32) + if (name == ".got2") + ppc32_got2 = this->sections[i].get(); + + // Save debug sections for --gdb-index. + if (ctx.arg.gdb_index) { + InputSection *isec = this->sections[i].get(); + + if (name == ".debug_info") + debug_info = isec; + if (name == ".debug_ranges") + debug_ranges = isec; + if (name == ".debug_rnglists") + debug_rnglists = isec; + + // If --gdb-index is given, contents of .debug_gnu_pubnames and + // .debug_gnu_pubtypes are copied to .gdb_index, so keeping them + // in an output file is just a waste of space. + if (name == ".debug_gnu_pubnames") { + debug_pubnames = isec; + isec->is_alive = false; + } + + if (name == ".debug_gnu_pubtypes") { + debug_pubtypes = isec; + isec->is_alive = false; + } + + // .debug_types is similar to .debug_info but contains type info + // only. It exists only in DWARF 4, has been removed in DWARF 5 and + // neither GCC nor Clang generate it by default + // (-fdebug-types-section is needed). As such there is probably + // little need to support it. + if (name == ".debug_types") + Fatal(ctx) << *this << ": mold's --gdb-index is not compatible" + " with .debug_types; to fix this error, remove" + " -fdebug-types-section and recompile"; + } + + static Counter counter("regular_sections"); + counter++; + break; + } + } + } + + // Attach relocation sections to their target sections. + for (i64 i = 0; i < this->elf_sections.size(); i++) { + const ElfShdr &shdr = this->elf_sections[i]; + if (shdr.sh_type != (E::is_rela ? SHT_RELA : SHT_REL)) + continue; + + if (shdr.sh_info >= sections.size()) + Fatal(ctx) << *this << ": invalid relocated section index: " + << (u32)shdr.sh_info; + + if (std::unique_ptr> &target = sections[shdr.sh_info]) { + assert(target->relsec_idx == -1); + target->relsec_idx = i; + } + } +} + +template +void ObjectFile::initialize_ehframe_sections(Context &ctx) { + for (i64 i = 0; i < sections.size(); i++) { + std::unique_ptr> &isec = sections[i]; + if (isec && isec->is_alive && isec->name() == ".eh_frame") { + read_ehframe(ctx, *isec); + } + } +} + +// .eh_frame contains data records explaining how to handle exceptions. +// When an exception is thrown, the runtime searches a record from +// .eh_frame with the current program counter as a key. A record that +// covers the current PC explains how to find a handler and how to +// transfer the control ot it. +// +// Unlike the most other sections, linker has to parse .eh_frame contents +// because of the following reasons: +// +// - There's usually only one .eh_frame section for each object file, +// which explains how to handle exceptions for all functions in the same +// object. If we just copy them, the resulting .eh_frame section will +// contain lots of records for dead sections (i.e. de-duplicated inline +// functions). We want to copy only records for live functions. +// +// - .eh_frame contains two types of records: CIE and FDE. There's usually +// only one CIE at beginning of .eh_frame section followed by FDEs. +// Compiler usually emits the identical CIE record for all object files. +// We want to merge identical CIEs in an output .eh_frame section to +// reduce the section size. +// +// - Scanning a .eh_frame section to find a record is an O(n) operation +// where n is the number of records in the section. To reduce it to +// O(log n), linker creates a .eh_frame_hdr section. The section +// contains a sorted list of [an address in .text, an FDE address whose +// coverage starts at the .text address] to make binary search doable. +// In order to create .eh_frame_hdr, linker has to read .eh_frame. +// +// This function parses an input .eh_frame section. +template +void ObjectFile::read_ehframe(Context &ctx, InputSection &isec) { + std::span> rels = isec.get_rels(ctx); + i64 cies_begin = cies.size(); + i64 fdes_begin = fdes.size(); + + // Read CIEs and FDEs until empty. + std::string_view contents = this->get_string(ctx, isec.shdr()); + i64 rel_idx = 0; + + for (std::string_view data = contents; !data.empty();) { + i64 size = *(U32 *)data.data(); + if (size == 0) + break; + + i64 begin_offset = data.data() - contents.data(); + i64 end_offset = begin_offset + size + 4; + i64 id = *(U32 *)(data.data() + 4); + data = data.substr(size + 4); + + i64 rel_begin = rel_idx; + while (rel_idx < rels.size() && rels[rel_idx].r_offset < end_offset) + rel_idx++; + assert(rel_idx == rels.size() || begin_offset <= rels[rel_begin].r_offset); + + if (id == 0) { + // This is CIE. + cies.emplace_back(ctx, *this, isec, begin_offset, rels, rel_begin); + } else { + // This is FDE. + if (rel_begin == rel_idx || rels[rel_begin].r_sym == 0) { + // FDE has no valid relocation, which means FDE is dead from + // the beginning. Compilers usually don't create such FDE, but + // `ld -r` tend to generate such dead FDEs. + continue; + } + + if (rels[rel_begin].r_offset - begin_offset != 8) + Fatal(ctx) << isec << ": FDE's first relocation should have offset 8"; + + fdes.emplace_back(begin_offset, rel_begin); + } + } + + // Associate CIEs to FDEs. + auto find_cie = [&](i64 offset) { + for (i64 i = cies_begin; i < cies.size(); i++) + if (cies[i].input_offset == offset) + return i; + Fatal(ctx) << isec << ": bad FDE pointer"; + }; + + for (i64 i = fdes_begin; i < fdes.size(); i++) { + i64 cie_offset = *(I32 *)(contents.data() + fdes[i].input_offset + 4); + fdes[i].cie_idx = find_cie(fdes[i].input_offset + 4 - cie_offset); + } + + auto get_isec = [&](const FdeRecord &fde) -> InputSection * { + return get_section(this->elf_syms[rels[fde.rel_idx].r_sym]); + }; + + // We assume that FDEs for the same input sections are contiguous + // in `fdes` vector. + std::stable_sort(fdes.begin() + fdes_begin, fdes.end(), + [&](const FdeRecord &a, const FdeRecord &b) { + return get_isec(a)->get_priority() < get_isec(b)->get_priority(); + }); + + // Associate FDEs to input sections. + for (i64 i = fdes_begin; i < fdes.size();) { + InputSection *isec = get_isec(fdes[i]); + assert(isec->fde_begin == -1); + isec->fde_begin = i++; + + while (i < fdes.size() && isec == get_isec(fdes[i])) + i++; + isec->fde_end = i; + } +} + +// Returns a symbol object for a given key. This function handles +// the -wrap option. +template +static Symbol *insert_symbol(Context &ctx, const ElfSym &esym, + std::string_view key, std::string_view name) { + if (esym.is_undef() && name.starts_with("__real_") && + ctx.arg.wrap.contains(name.substr(7))) { + return get_symbol(ctx, key.substr(7), name.substr(7)); + } + + Symbol *sym = get_symbol(ctx, key, name); + + if (esym.is_undef() && sym->is_wrapped) { + key = save_string(ctx, "__wrap_" + std::string(key)); + name = save_string(ctx, "__wrap_" + std::string(name)); + return get_symbol(ctx, key, name); + } + return sym; +} + +template +void ObjectFile::initialize_symbols(Context &ctx) { + if (!symtab_sec) + return; + + static Counter counter("all_syms"); + counter += this->elf_syms.size(); + + // Initialize local symbols + this->local_syms.resize(this->first_global); + this->local_syms[0].file = this; + this->local_syms[0].sym_idx = 0; + + for (i64 i = 1; i < this->first_global; i++) { + const ElfSym &esym = this->elf_syms[i]; + if (esym.is_common()) + Fatal(ctx) << *this << ": common local symbol?"; + + std::string_view name; + if (esym.st_type == STT_SECTION) + name = this->shstrtab.data() + this->elf_sections[get_shndx(esym)].sh_name; + else + name = this->symbol_strtab.data() + esym.st_name; + + Symbol &sym = this->local_syms[i]; + sym.set_name(name); + sym.file = this; + sym.value = esym.st_value; + sym.sym_idx = i; + + if (!esym.is_abs()) + sym.set_input_section(sections[get_shndx(esym)].get()); + } + + this->symbols.resize(this->elf_syms.size()); + + i64 num_globals = this->elf_syms.size() - this->first_global; + has_symver.resize(num_globals); + + for (i64 i = 0; i < this->first_global; i++) + this->symbols[i] = &this->local_syms[i]; + + // Initialize global symbols + for (i64 i = this->first_global; i < this->elf_syms.size(); i++) { + const ElfSym &esym = this->elf_syms[i]; + + // Get a symbol name + std::string_view key = this->symbol_strtab.data() + esym.st_name; + std::string_view name = key; + + // Parse symbol version after atsign + if (i64 pos = name.find('@'); pos != name.npos) { + std::string_view ver = name.substr(pos); + name = name.substr(0, pos); + + if (ver != "@" && ver != "@@") { + if (ver.starts_with("@@")) + key = name; + has_symver.set(i - this->first_global); + } + } + + this->symbols[i] = insert_symbol(ctx, esym, key, name); + if (esym.is_common()) + has_common_symbol = true; + } +} + +// Relocations are usually sorted by r_offset in relocation tables, +// but for some reason only RISC-V does not follow that convention. +// We expect them to be sorted, so sort them if necessary. +template +void ObjectFile::sort_relocations(Context &ctx) { + if constexpr (is_riscv) { + auto less = [&](const ElfRel &a, const ElfRel &b) { + return a.r_offset < b.r_offset; + }; + + for (i64 i = 1; i < sections.size(); i++) { + std::unique_ptr> &isec = sections[i]; + if (!isec || !isec->is_alive || !(isec->shdr().sh_flags & SHF_ALLOC)) + continue; + + std::span> rels = isec->get_rels(ctx); + if (!std::is_sorted(rels.begin(), rels.end(), less)) + sort(rels, less); + } + } +} + +static size_t find_null(std::string_view data, u64 entsize) { + if (entsize == 1) + return data.find('\0'); + + for (i64 i = 0; i <= data.size() - entsize; i += entsize) + if (data.substr(i, entsize).find_first_not_of('\0') == data.npos) + return i; + + return data.npos; +} + +// Mergeable sections (sections with SHF_MERGE bit) typically contain +// string literals. Linker is expected to split the section contents +// into null-terminated strings, merge them with mergeable strings +// from other object files, and emit uniquified strings to an output +// file. +// +// This mechanism reduces the size of an output file. If two source +// files happen to contain the same string literal, the output will +// contain only a single copy of it. +// +// It is less common than string literals, but mergeable sections can +// contain fixed-sized read-only records too. +// +// This function splits the section contents into small pieces that we +// call "section fragments". Section fragment is a unit of merging. +// +// We do not support mergeable sections that have relocations. +template +static std::unique_ptr> +split_section(Context &ctx, InputSection &sec) { + if (!sec.is_alive || sec.relsec_idx != -1) + return nullptr; + + const ElfShdr &shdr = sec.shdr(); + if (!(shdr.sh_flags & SHF_MERGE)) + return nullptr; + + std::unique_ptr> rec(new MergeableSection); + rec->parent = MergedSection::get_instance(ctx, sec.name(), shdr.sh_type, + shdr.sh_flags); + rec->p2align = sec.p2align; + + if (sec.sh_size == 0) + return rec; + + // If thes section contents are compressed, uncompress them. + sec.uncompress(ctx); + + std::string_view data = sec.contents; + const char *begin = data.data(); + u64 entsize = shdr.sh_entsize; + HyperLogLog estimator; + + // Split sections + if (shdr.sh_flags & SHF_STRINGS) { + if (entsize == 0) { + // GHC (Glasgow Haskell Compiler) sometimes creates a mergeable + // string section with entsize of 0 instead of 1, though such + // entsize is technically wrong. This is a workaround for the issue. + entsize = 1; + } + + while (!data.empty()) { + size_t end = find_null(data, entsize); + if (end == data.npos) + Fatal(ctx) << sec << ": string is not null terminated"; + + std::string_view substr = data.substr(0, end + entsize); + data = data.substr(end + entsize); + + rec->strings.push_back(substr); + rec->frag_offsets.push_back(substr.data() - begin); + + u64 hash = hash_string(substr); + rec->hashes.push_back(hash); + estimator.insert(hash); + } + } else { + // OCaml compiler seems to create a mergeable non-string section with + // entisze of 0. Such section is malformed. We do not split such section. + if (entsize == 0) + return nullptr; + + if (data.size() % entsize) + Fatal(ctx) << sec << ": section size is not multiple of sh_entsize"; + + while (!data.empty()) { + std::string_view substr = data.substr(0, entsize); + data = data.substr(entsize); + + rec->strings.push_back(substr); + rec->frag_offsets.push_back(substr.data() - begin); + + u64 hash = hash_string(substr); + rec->hashes.push_back(hash); + estimator.insert(hash); + } + } + + rec->parent->estimator.merge(estimator); + + static Counter counter("string_fragments"); + counter += rec->fragments.size(); + return rec; +} + +// Usually a section is an atomic unit of inclusion or exclusion. +// Linker doesn't care about its contents. However, if a section is a +// mergeable section (a section with SHF_MERGE bit set), the linker is +// expected to split it into smaller pieces and merge each piece with +// other pieces from different object files. In mold, we call the +// atomic unit of mergeable section "section pieces". +// +// This feature is typically used for string literals. String literals +// are usually put into a mergeable section by a compiler. If the same +// string literal happen to occur in two different translation units, +// a linker merges them into a single instance of a string, so that +// a linker's output doesn't contain duplicate string literals. +// +// Handling symbols in mergeable sections is a bit tricky. Assume that +// we have a mergeable section with the following contents and symbols: +// +// Hello world\0foo bar\0 +// ^ ^ +// .rodata .L.str1 +// .L.str0 +// +// '\0' represents a NUL byte. This mergeable section contains two +// section pieces, "Hello world" and "foo bar". The first string is +// referred by two symbols, .rodata and .L.str0, and the second by +// .L.str1. .rodata is a section symbol and therefore a local symbol +// and refers the begining of the section. +// +// In this example, there are actually two different ways to point to +// string "foo bar", because .rodata+12 and .L.str1+0 refer the same +// place in the section. This kind of "out-of-bound" reference occurs +// only when a symbol is a section symbol. In other words, compiler +// may use an offset from the beginning of a section to refer any +// section piece in a section, but it doesn't do for any other types +// of symbols. +// +// In mold, we attach section pieces symbols. If a relocation refers a +// section symbol whose section is a mergeable section, we create a +// new dummy symbol with a section piece and redirect the relocation +// to the symbol. If a non-section symbol refers a section piece, the +// section piece is attached to the symbol. +template +void ObjectFile::initialize_mergeable_sections(Context &ctx) { + mergeable_sections.resize(sections.size()); + + for (i64 i = 0; i < sections.size(); i++) { + if (std::unique_ptr> &isec = sections[i]) { + if (std::unique_ptr> m = split_section(ctx, *isec)) { + mergeable_sections[i] = std::move(m); + isec->is_alive = false; + } + } + } +} + +template +void ObjectFile::resolve_section_pieces(Context &ctx) { + for (std::unique_ptr> &m : mergeable_sections) { + if (m) { + m->fragments.reserve(m->strings.size()); + for (i64 i = 0; i < m->strings.size(); i++) + m->fragments.push_back(m->parent->insert(ctx, m->strings[i], m->hashes[i], + m->p2align)); + + // Shrink vectors that we will never use again to reclaim memory. + m->strings.clear(); + m->hashes.clear(); + } + } + + // Attach section pieces to symbols. + for (i64 i = 1; i < this->elf_syms.size(); i++) { + Symbol &sym = *this->symbols[i]; + const ElfSym &esym = this->elf_syms[i]; + + if (esym.is_abs() || esym.is_common() || esym.is_undef()) + continue; + + std::unique_ptr> &m = mergeable_sections[get_shndx(esym)]; + if (!m || m->fragments.empty()) + continue; + + SectionFragment *frag; + i64 frag_offset; + std::tie(frag, frag_offset) = m->get_fragment(esym.st_value); + + if (!frag) + Fatal(ctx) << *this << ": bad symbol value: " << esym.st_value; + + sym.set_frag(frag); + sym.value = frag_offset; + } + + // Compute the size of frag_syms. + i64 nfrag_syms = 0; + for (std::unique_ptr> &isec : sections) + if (isec && isec->is_alive && (isec->shdr().sh_flags & SHF_ALLOC)) + for (ElfRel &r : isec->get_rels(ctx)) + if (const ElfSym &esym = this->elf_syms[r.r_sym]; + esym.st_type == STT_SECTION && mergeable_sections[get_shndx(esym)]) + nfrag_syms++; + + this->frag_syms.resize(nfrag_syms); + + // For each relocation referring a mergeable section symbol, we create + // a new dummy non-section symbol and redirect the relocation to the + // newly-created symbol. + i64 idx = 0; + for (std::unique_ptr> &isec : sections) { + if (!isec || !isec->is_alive || !(isec->shdr().sh_flags & SHF_ALLOC)) + continue; + + for (ElfRel &r : isec->get_rels(ctx)) { + const ElfSym &esym = this->elf_syms[r.r_sym]; + if (esym.st_type != STT_SECTION) + continue; + + std::unique_ptr> &m = mergeable_sections[get_shndx(esym)]; + if (!m) + continue; + + i64 r_addend = get_addend(*isec, r); + + SectionFragment *frag; + i64 in_frag_offset; + std::tie(frag, in_frag_offset) = m->get_fragment(esym.st_value + r_addend); + + if (!frag) + Fatal(ctx) << *this << ": bad relocation at " << r.r_sym; + + Symbol &sym = this->frag_syms[idx]; + sym.file = this; + sym.set_name(""); + sym.sym_idx = r.r_sym; + sym.visibility = STV_HIDDEN; + sym.set_frag(frag); + sym.value = in_frag_offset - r_addend; + r.r_sym = this->elf_syms.size() + idx; + idx++; + } + } + + assert(idx == this->frag_syms.size()); + + for (Symbol &sym : this->frag_syms) + this->symbols.push_back(&sym); +} + +template +void ObjectFile::mark_addrsig(Context &ctx) { + // Parse a .llvm_addrsig section. + if (llvm_addrsig) { + u8 *cur = (u8 *)llvm_addrsig->contents.data(); + u8 *end = cur + llvm_addrsig->contents.size(); + + while (cur != end) { + Symbol &sym = *this->symbols[read_uleb(cur)]; + if (sym.file == this) + if (InputSection *isec = sym.get_input_section()) + isec->address_significant = true; + } + } + + // We treat a symbol's address as significant if + // + // 1. we have no address significance information for the symbol, or + // 2. the symbol can be referenced from the outside in an address- + // significant manner. + for (Symbol *sym : this->symbols) + if (sym->file == this) + if (InputSection *isec = sym->get_input_section()) + if (!llvm_addrsig || sym->is_exported) + isec->address_significant = true; +} + +template +void ObjectFile::parse(Context &ctx) { + sections.resize(this->elf_sections.size()); + symtab_sec = this->find_section(SHT_SYMTAB); + + if (symtab_sec) { + // In ELF, all local symbols precede global symbols in the symbol table. + // sh_info has an index of the first global symbol. + this->first_global = symtab_sec->sh_info; + this->elf_syms = this->template get_data>(ctx, *symtab_sec); + this->symbol_strtab = this->get_string(ctx, symtab_sec->sh_link); + } + + initialize_sections(ctx); + initialize_symbols(ctx); + sort_relocations(ctx); + initialize_ehframe_sections(ctx); +} + +// Symbols with higher priorities overwrites symbols with lower priorities. +// Here is the list of priorities, from the highest to the lowest. +// +// 1. Strong defined symbol +// 2. Weak defined symbol +// 3. Strong defined symbol in a DSO/archive +// 4. Weak Defined symbol in a DSO/archive +// 5. Common symbol +// 6. Common symbol in an archive +// 7. Unclaimed (nonexistent) symbol +// +// Ties are broken by file priority. +template +static u64 get_rank(InputFile *file, const ElfSym &esym, bool is_in_archive) { + auto get_sym_rank = [&] { + if (esym.is_common()) { + assert(!file->is_dso); + return is_in_archive ? 6 : 5; + } + + if (file->is_dso || is_in_archive) + return (esym.st_bind == STB_WEAK) ? 4 : 3; + + if (esym.st_bind == STB_WEAK) + return 2; + return 1; + }; + + return (get_sym_rank() << 24) + file->priority; +} + +template +static u64 get_rank(const Symbol &sym) { + if (!sym.file) + return 7 << 24; + return get_rank(sym.file, sym.esym(), !sym.file->is_alive); +} + +// Symbol's visibility is set to the most restrictive one. For example, +// if one input file has a defined symbol `foo` with the default +// visibility and the other input file has an undefined symbol `foo` +// with the hidden visibility, the resulting symbol is a hidden defined +// symbol. +template +void ObjectFile::merge_visibility(Context &ctx, Symbol &sym, + u8 visibility) { + // Canonicalize visibility + if (visibility == STV_INTERNAL) + visibility = STV_HIDDEN; + + auto priority = [&](u8 visibility) { + switch (visibility) { + case STV_HIDDEN: + return 1; + case STV_PROTECTED: + return 2; + case STV_DEFAULT: + return 3; + } + Fatal(ctx) << *this << ": unknown symbol visibility: " << sym; + }; + + update_minimum(sym.visibility, visibility, [&](u8 a, u8 b) { + return priority(a) < priority(b); + }); +} + +template +static void print_trace_symbol(Context &ctx, InputFile &file, + const ElfSym &esym, Symbol &sym) { + if (!esym.is_undef()) + SyncOut(ctx) << "trace-symbol: " << file << ": definition of " << sym; + else if (esym.is_weak()) + SyncOut(ctx) << "trace-symbol: " << file << ": weak reference to " << sym; + else + SyncOut(ctx) << "trace-symbol: " << file << ": reference to " << sym; +} + +template +void ObjectFile::resolve_symbols(Context &ctx) { + for (i64 i = this->first_global; i < this->elf_syms.size(); i++) { + Symbol &sym = *this->symbols[i]; + const ElfSym &esym = this->elf_syms[i]; + + if (esym.is_undef()) + continue; + + InputSection *isec = nullptr; + if (!esym.is_abs() && !esym.is_common()) { + isec = get_section(esym); + if (!isec || !isec->is_alive) + continue; + } + + std::scoped_lock lock(sym.mu); + if (get_rank(this, esym, !this->is_alive) < get_rank(sym)) { + sym.file = this; + sym.set_input_section(isec); + sym.value = esym.st_value; + sym.sym_idx = i; + sym.ver_idx = ctx.default_version; + sym.is_weak = esym.is_weak(); + } + } +} + +template +void +ObjectFile::mark_live_objects(Context &ctx, + std::function *)> feeder) { + assert(this->is_alive); + + for (i64 i = this->first_global; i < this->elf_syms.size(); i++) { + const ElfSym &esym = this->elf_syms[i]; + Symbol &sym = *this->symbols[i]; + + if (!esym.is_undef() && exclude_libs) + merge_visibility(ctx, sym, STV_HIDDEN); + else + merge_visibility(ctx, sym, esym.st_visibility); + + if (sym.is_traced) + print_trace_symbol(ctx, *this, esym, sym); + + if (esym.is_weak()) + continue; + + if (!sym.file) + continue; + + bool keep = esym.is_undef() || (esym.is_common() && !sym.esym().is_common()); + if (keep && !sym.file->is_alive.test_and_set()) { + feeder(sym.file); + + if (sym.is_traced) + SyncOut(ctx) << "trace-symbol: " << *this << " keeps " << *sym.file + << " for " << sym; + } + } +} + +template +void ObjectFile::scan_relocations(Context &ctx) { + // Scan relocations against seciton contents + for (std::unique_ptr> &isec : sections) + if (isec && isec->is_alive && (isec->shdr().sh_flags & SHF_ALLOC)) + isec->scan_relocations(ctx); + + // Scan relocations against exception frames + for (CieRecord &cie : cies) { + for (ElfRel &rel : cie.get_rels()) { + Symbol &sym = *this->symbols[rel.r_sym]; + + if (sym.is_imported) { + if (sym.get_type() != STT_FUNC) + Fatal(ctx) << *this << ": " << sym + << ": .eh_frame CIE record with an external data reference" + << " is not supported"; + sym.flags |= NEEDS_PLT; + } + } + } +} + +// Common symbols are used by C's tantative definitions. Tentative +// definition is an obscure C feature which allows users to omit `extern` +// from global variable declarations in a header file. For example, if you +// have a tentative definition `int foo;` in a header which is included +// into multiple translation units, `foo` will be included into multiple +// object files, but it won't cause the duplicate symbol error. Instead, +// the linker will merge them into a single instance of `foo`. +// +// If a header file contains a tentative definition `int foo;` and one of +// a C file contains a definition with initial value such as `int foo = 5;`, +// then the "real" definition wins. The symbol for the tentative definition +// will be resolved to the real definition. If there is no "real" +// definition, the tentative definition gets the default initial value 0. +// +// Tentative definitions are represented as "common symbols" in an object +// file. In this function, we allocate spaces in .common or .tls_common +// for remaining common symbols that were not resolved to usual defined +// symbols in previous passes. +template +void ObjectFile::convert_common_symbols(Context &ctx) { + if (!has_common_symbol) + return; + + for (i64 i = this->first_global; i < this->elf_syms.size(); i++) { + if (!this->elf_syms[i].is_common()) + continue; + + Symbol &sym = *this->symbols[i]; + std::scoped_lock lock(sym.mu); + + if (sym.file != this) { + if (ctx.arg.warn_common) + Warn(ctx) << *this << ": multiple common symbols: " << sym; + continue; + } + + elf_sections2.push_back({}); + ElfShdr &shdr = elf_sections2.back(); + memset(&shdr, 0, sizeof(shdr)); + + std::string_view name; + + if (sym.get_type() == STT_TLS) { + name = ".tls_common"; + shdr.sh_flags = SHF_ALLOC | SHF_WRITE | SHF_TLS; + } else { + name = ".common"; + shdr.sh_flags = SHF_ALLOC | SHF_WRITE; + } + + shdr.sh_type = SHT_NOBITS; + shdr.sh_size = this->elf_syms[i].st_size; + shdr.sh_addralign = this->elf_syms[i].st_value; + + i64 idx = this->elf_sections.size() + elf_sections2.size() - 1; + std::unique_ptr> isec = + std::make_unique>(ctx, *this, name, idx); + + sym.file = this; + sym.set_input_section(isec.get()); + sym.value = 0; + sym.sym_idx = i; + sym.ver_idx = ctx.default_version; + sym.is_weak = false; + + sections.push_back(std::move(isec)); + } +} + +template +static bool should_write_to_local_symtab(Context &ctx, Symbol &sym) { + if (sym.get_type() == STT_SECTION) + return false; + + // Local symbols are discarded if --discard-local is given or they + // are in a mergeable section. I *believe* we exclude symbols in + // mergeable sections because (1) there are too many and (2) they are + // merged, so their origins shouldn't matter, but I don't really + // know the rationale. Anyway, this is the behavior of the + // traditional linkers. + if (sym.name().starts_with(".L")) { + if (ctx.arg.discard_locals) + return false; + + if (InputSection *isec = sym.get_input_section()) + if (isec->shdr().sh_flags & SHF_MERGE) + return false; + } + + return true; +} + +template +void ObjectFile::compute_symtab_size(Context &ctx) { + if (ctx.arg.strip_all) + return; + + this->output_sym_indices.resize(this->elf_syms.size(), -1); + + auto is_alive = [&](Symbol &sym) -> bool { + if (!ctx.arg.gc_sections) + return true; + + if (SectionFragment *frag = sym.get_frag()) + return frag->is_alive; + if (InputSection *isec = sym.get_input_section()) + return isec->is_alive; + return true; + }; + + // Compute the size of local symbols + if (!ctx.arg.discard_all && !ctx.arg.strip_all && !ctx.arg.retain_symbols_file) { + for (i64 i = 1; i < this->first_global; i++) { + Symbol &sym = *this->symbols[i]; + + if (is_alive(sym) && should_write_to_local_symtab(ctx, sym)) { + this->strtab_size += sym.name().size() + 1; + this->output_sym_indices[i] = this->num_local_symtab++; + sym.write_to_symtab = true; + } + } + } + + // Compute the size of global symbols. + for (i64 i = this->first_global; i < this->elf_syms.size(); i++) { + Symbol &sym = *this->symbols[i]; + + if (sym.file == this && is_alive(sym) && + (!ctx.arg.retain_symbols_file || sym.write_to_symtab)) { + this->strtab_size += sym.name().size() + 1; + // Global symbols can be demoted to local symbols based on visibility, + // version scripts etc. + if (sym.is_local(ctx)) + this->output_sym_indices[i] = this->num_local_symtab++; + else + this->output_sym_indices[i] = this->num_global_symtab++; + sym.write_to_symtab = true; + } + } +} + +template +void ObjectFile::populate_symtab(Context &ctx) { + ElfSym *symtab_base = (ElfSym *)(ctx.buf + ctx.symtab->shdr.sh_offset); + + u8 *strtab_base = ctx.buf + ctx.strtab->shdr.sh_offset; + i64 strtab_off = this->strtab_offset; + + auto write_sym = [&](Symbol &sym, i64 &symtab_idx) { + U32 *xindex = nullptr; + if (ctx.symtab_shndx) + xindex = (U32 *)(ctx.buf + ctx.symtab_shndx->shdr.sh_offset + + symtab_idx * 4); + + symtab_base[symtab_idx++] = to_output_esym(ctx, sym, strtab_off, xindex); + strtab_off += write_string(strtab_base + strtab_off, sym.name()); + }; + + i64 local_symtab_idx = this->local_symtab_idx; + i64 global_symtab_idx = this->global_symtab_idx; + for (i64 i = 1; i < this->first_global; i++) { + Symbol &sym = *this->symbols[i]; + if (sym.write_to_symtab) + write_sym(sym, local_symtab_idx); + } + + for (i64 i = this->first_global; i < this->elf_syms.size(); i++) { + Symbol &sym = *this->symbols[i]; + if (sym.file == this && sym.write_to_symtab) { + if (sym.is_local(ctx)) + write_sym(sym, local_symtab_idx); + else + write_sym(sym, global_symtab_idx); + } + } +} + +template +std::ostream &operator<<(std::ostream &out, const InputFile &file) { + if (file.is_dso) { + out << path_clean(file.filename); + return out; + } + + ObjectFile *obj = (ObjectFile *)&file; + if (obj->archive_name == "") + out << path_clean(obj->filename); + else + out << path_clean(obj->archive_name) << "(" << obj->filename + ")"; + return out; +} + +template +SharedFile * +SharedFile::create(Context &ctx, MappedFile> *mf) { + SharedFile *obj = new SharedFile(ctx, mf); + ctx.dso_pool.emplace_back(obj); + return obj; +} + +template +SharedFile::SharedFile(Context &ctx, MappedFile> *mf) + : InputFile(ctx, mf) { + this->is_needed = ctx.as_needed; + this->is_alive = !ctx.as_needed; +} + +template +std::string SharedFile::get_soname(Context &ctx) { + if (ElfShdr *sec = this->find_section(SHT_DYNAMIC)) + for (ElfDyn &dyn : this->template get_data>(ctx, *sec)) + if (dyn.d_tag == DT_SONAME) + return this->symbol_strtab.data() + dyn.d_val; + + if (this->mf->given_fullpath) + return this->filename; + + return filepath(this->filename).filename().string(); +} + +template +void SharedFile::parse(Context &ctx) { + symtab_sec = this->find_section(SHT_DYNSYM); + if (!symtab_sec) + return; + + this->symbol_strtab = this->get_string(ctx, symtab_sec->sh_link); + soname = get_soname(ctx); + version_strings = read_verdef(ctx); + + // Read a symbol table. + std::span> esyms = this->template get_data>(ctx, *symtab_sec); + + std::span> vers; + if (ElfShdr *sec = this->find_section(SHT_GNU_VERSYM)) + vers = this->template get_data>(ctx, *sec); + + for (i64 i = symtab_sec->sh_info; i < esyms.size(); i++) { + u16 ver; + if (vers.empty() || esyms[i].is_undef()) + ver = VER_NDX_GLOBAL; + else + ver = (vers[i] & ~VERSYM_HIDDEN); + + if (ver == VER_NDX_LOCAL) + continue; + + std::string_view name = this->symbol_strtab.data() + esyms[i].st_name; + bool is_hidden = (!vers.empty() && (vers[i] & VERSYM_HIDDEN)); + + this->elf_syms2.push_back(esyms[i]); + this->versyms.push_back(ver); + + if (is_hidden) { + std::string_view mangled_name = save_string( + ctx, std::string(name) + "@" + std::string(version_strings[ver])); + this->symbols.push_back(get_symbol(ctx, mangled_name, name)); + } else { + this->symbols.push_back(get_symbol(ctx, name)); + } + } + + this->elf_syms = elf_syms2; + this->first_global = 0; + + static Counter counter("dso_syms"); + counter += this->elf_syms.size(); +} + +// Symbol versioning is a GNU extension to the ELF file format. I don't +// particularly like the feature as it complicates the semantics of +// dynamic linking, but we need to support it anyway because it is +// mandatory on glibc-based systems such as most Linux distros. +// +// Let me explain what symbol versioning is. Symbol versioning is a +// mechanism to allow multiple symbols of the same name but of different +// versions live together in a shared object file. It's convenient if you +// want to make an API-breaking change to some function but want to keep +// old programs working with the newer libraries. +// +// With symbol versioning, dynamic symbols are resolved by (name, version) +// tuple instead of just by name. For example, glibc 2.35 defines two +// different versions of `posix_spawn`, `posix_spawn` of version +// "GLIBC_2.15" and that of version "GLIBC_2.2.5". Any executable that +// uses `posix_spawn` is linked either to that of "GLIBC_2.15" or that of +// "GLIBC_2.2.5" +// +// Versions are just strings, and no ordering is defined between them. +// For example, "GLIBC_2.15" is not considered a newer version of +// "GLIBC_2.2.5" or vice versa. They are considered just different. +// +// If a shared object file has versioned symbols, it contains a parallel +// array for the symbol table. Version strings can be found in that +// parallel table. +// +// One version is considered the "default" version for each shared object. +// If an undefiend symbol `foo` is resolved to a symbol defined by the +// shared object, it's marked so that it'll be resolved to (`foo`, the +// default version of the library) at load-time. +template +std::vector SharedFile::read_verdef(Context &ctx) { + std::vector ret(VER_NDX_LAST_RESERVED + 1); + + ElfShdr *verdef_sec = this->find_section(SHT_GNU_VERDEF); + if (!verdef_sec) + return ret; + + std::string_view verdef = this->get_string(ctx, *verdef_sec); + std::string_view strtab = this->get_string(ctx, verdef_sec->sh_link); + + ElfVerdef *ver = (ElfVerdef *)verdef.data(); + + for (;;) { + if (ret.size() <= ver->vd_ndx) + ret.resize(ver->vd_ndx + 1); + + ElfVerdaux *aux = (ElfVerdaux *)((u8 *)ver + ver->vd_aux); + ret[ver->vd_ndx] = strtab.data() + aux->vda_name; + if (!ver->vd_next) + break; + + ver = (ElfVerdef *)((u8 *)ver + ver->vd_next); + } + return ret; +} + +template +void SharedFile::resolve_symbols(Context &ctx) { + for (i64 i = 0; i < this->symbols.size(); i++) { + Symbol &sym = *this->symbols[i]; + const ElfSym &esym = this->elf_syms[i]; + if (esym.is_undef()) + continue; + + std::scoped_lock lock(sym.mu); + + if (get_rank(this, esym, false) < get_rank(sym)) { + sym.file = this; + sym.origin = 0; + sym.value = esym.st_value; + sym.sym_idx = i; + sym.ver_idx = versyms[i]; + sym.is_weak = false; + } + } +} + +template +void +SharedFile::mark_live_objects(Context &ctx, + std::function *)> feeder) { + for (i64 i = 0; i < this->elf_syms.size(); i++) { + const ElfSym &esym = this->elf_syms[i]; + Symbol &sym = *this->symbols[i]; + + if (sym.is_traced) + print_trace_symbol(ctx, *this, esym, sym); + + if (esym.is_undef() && sym.file && !sym.file->is_alive.test_and_set()) { + feeder(sym.file); + + if (sym.is_traced) + SyncOut(ctx) << "trace-symbol: " << *this << " keeps " << *sym.file + << " for " << sym; + } + } +} + +template +std::span *> SharedFile::find_aliases(Symbol *sym) { + assert(sym->file == this); + + std::call_once(init_aliases, [&] { + for (Symbol *sym : this->symbols) + if (sym->file == this) + aliases.push_back(sym); + + tbb::parallel_sort(aliases.begin(), aliases.end(), + [](Symbol *a, Symbol *b) { + const ElfSym &x = a->esym(); + const ElfSym &y = b->esym(); + return std::tuple{x.st_value, &x} < std::tuple{y.st_value, &y}; + }); + }); + + auto [begin, end] = std::equal_range(aliases.begin(), aliases.end(), sym, + [&](Symbol *x, Symbol *y) { + return x->esym().st_value < y->esym().st_value; + }); + + return {&*begin, &*end}; +} + +// Infer an alignment of a DSO symbol. An alignment of a symbol in other +// .so is not something we usually care about, but when we create a copy +// relocation for a symbol, we need to preserve its alignment requirement. +// +// Symbol alignment is not explicitly represented in an ELF file. In this +// function, we conservatively infer it from a symbol address and a +// section alignment requirement. +template +i64 SharedFile::get_alignment(Symbol *sym) { + ElfShdr &shdr = this->elf_sections[sym->esym().st_shndx]; + i64 align = std::max(1, shdr.sh_addralign); + if (sym->value) + align = std::min(align, 1LL << std::countr_zero(sym->value)); + return align; +} + +template +bool SharedFile::is_readonly(Symbol *sym) { + u64 val = sym->esym().st_value; + + for (ElfPhdr &phdr : this->get_phdrs()) + if (phdr.p_type == PT_LOAD && !(phdr.p_flags & PF_W) && + phdr.p_vaddr <= val && val < phdr.p_vaddr + phdr.p_memsz) + return true; + return false; +} + +template +void SharedFile::compute_symtab_size(Context &ctx) { + if (ctx.arg.strip_all) + return; + + this->output_sym_indices.resize(this->elf_syms.size(), -1); + + // Compute the size of global symbols. + for (i64 i = this->first_global; i < this->symbols.size(); i++) { + Symbol &sym = *this->symbols[i]; + + if (sym.file == this && (sym.is_imported || sym.is_exported) && + (!ctx.arg.retain_symbols_file || sym.write_to_symtab)) { + this->strtab_size += sym.name().size() + 1; + this->output_sym_indices[i] = this->num_global_symtab++; + sym.write_to_symtab = true; + } + } +} + +template +void SharedFile::populate_symtab(Context &ctx) { + ElfSym *symtab = + (ElfSym *)(ctx.buf + ctx.symtab->shdr.sh_offset) + this->global_symtab_idx; + + u8 *strtab = ctx.buf + ctx.strtab->shdr.sh_offset; + i64 strtab_off = this->strtab_offset; + + for (i64 i = 0, j = this->first_global; j < this->elf_syms.size(); i++, j++) { + Symbol &sym = *this->symbols[j]; + if (sym.file != this || !sym.write_to_symtab) + continue; + + U32 *xindex = nullptr; + if (ctx.symtab_shndx) + xindex = (U32 *)(ctx.buf + ctx.symtab_shndx->shdr.sh_offset + + (this->global_symtab_idx + i) * 4); + + *symtab++ = to_output_esym(ctx, sym, strtab_off, xindex); + strtab_off += write_string(strtab + strtab_off, sym.name()); + } +} + +using E = MOLD_TARGET; + +template class InputFile; +template class ObjectFile; +template class SharedFile; +template std::ostream &operator<<(std::ostream &, const InputFile &); + +} // namespace mold::elf diff --git a/third_party/mold/elf/input-sections.cc b/third_party/mold/elf/input-sections.cc new file mode 100644 index 00000000000..2446c2d6100 --- /dev/null +++ b/third_party/mold/elf/input-sections.cc @@ -0,0 +1,498 @@ +// clang-format off +#include "third_party/mold/elf/mold.h" + +#include "third_party/libcxx/limits" +// MISSING #include +// MISSING #include + +namespace mold::elf { + +typedef enum { + NONE, ERROR, COPYREL, DYN_COPYREL, PLT, CPLT, DYN_CPLT, DYNREL, BASEREL, IFUNC, +} Action; + +template +bool CieRecord::equals(const CieRecord &other) const { + if (get_contents() != other.get_contents()) + return false; + + std::span> x = get_rels(); + std::span> y = other.get_rels(); + if (x.size() != y.size()) + return false; + + for (i64 i = 0; i < x.size(); i++) { + if (x[i].r_offset - input_offset != y[i].r_offset - other.input_offset || + x[i].r_type != y[i].r_type || + file.symbols[x[i].r_sym] != other.file.symbols[y[i].r_sym] || + get_addend(input_section, x[i]) != get_addend(other.input_section, y[i])) + return false; + } + return true; +} + +static i64 to_p2align(u64 alignment) { + if (alignment == 0) + return 0; + return std::countr_zero(alignment); +} + +template +InputSection::InputSection(Context &ctx, ObjectFile &file, + std::string_view name, i64 shndx) + : file(file), shndx(shndx) { + if (shndx < file.elf_sections.size()) + contents = {(char *)file.mf->data + shdr().sh_offset, (size_t)shdr().sh_size}; + + if (shdr().sh_flags & SHF_COMPRESSED) { + ElfChdr &chdr = *(ElfChdr *)&contents[0]; + sh_size = chdr.ch_size; + p2align = to_p2align(chdr.ch_addralign); + } else { + sh_size = shdr().sh_size; + p2align = to_p2align(shdr().sh_addralign); + } + + // Sections may have been compressed. We usually uncompress them + // directly into the mmap'ed output file, but we want to uncompress + // early for REL-type ELF types to read relocation addends from + // section contents. For RELA-type, we don't need to do this because + // addends are in relocations. + // + // SH-4 stores addends to sections despite being RELA, which is a + // special (and buggy) case. + if constexpr (!E::is_rela || is_sh4) + uncompress(ctx); +} + +template +void InputSection::uncompress(Context &ctx) { + if (!(shdr().sh_flags & SHF_COMPRESSED) || uncompressed) + return; + + u8 *buf = new u8[sh_size]; + uncompress_to(ctx, buf); + contents = std::string_view((char *)buf, sh_size); + ctx.string_pool.emplace_back(buf); + uncompressed = true; +} + +template +void InputSection::uncompress_to(Context &ctx, u8 *buf) { + if (!(shdr().sh_flags & SHF_COMPRESSED) || uncompressed) { + memcpy(buf, contents.data(), contents.size()); + return; + } + + if (contents.size() < sizeof(ElfChdr)) + Fatal(ctx) << *this << ": corrupted compressed section"; + + ElfChdr &hdr = *(ElfChdr *)&contents[0]; + std::string_view data = contents.substr(sizeof(ElfChdr)); + + switch (hdr.ch_type) { + case ELFCOMPRESS_ZLIB: { + unsigned long size = sh_size; + if (::uncompress(buf, &size, (u8 *)data.data(), data.size()) != Z_OK) + Fatal(ctx) << *this << ": uncompress failed"; + assert(size == sh_size); + break; + } + case ELFCOMPRESS_ZSTD: + if (ZSTD_decompress(buf, sh_size, (u8 *)data.data(), data.size()) != sh_size) + Fatal(ctx) << *this << ": ZSTD_decompress failed"; + break; + default: + Fatal(ctx) << *this << ": unsupported compression type: 0x" + << std::hex << hdr.ch_type; + } +} + +template +static Action get_rel_action(Context &ctx, Symbol &sym, + const Action table[3][4]) { + auto get_output_type = [&] { + if (ctx.arg.shared) + return 0; + if (ctx.arg.pie) + return 1; + return 2; + }; + + auto get_sym_type = [&] { + if (sym.is_absolute()) + return 0; + if (!sym.is_imported) + return 1; + if (sym.get_type() != STT_FUNC) + return 2; + return 3; + }; + + return table[get_output_type()][get_sym_type()]; +} + +template +static void scan_rel(Context &ctx, InputSection &isec, Symbol &sym, + const ElfRel &rel, Action action) { + bool writable = (isec.shdr().sh_flags & SHF_WRITE); + + auto error = [&] { + std::string msg = sym.is_absolute() ? "-fno-PIC" : "-fPIC"; + Error(ctx) << isec << ": " << rel << " relocation at offset 0x" + << std::hex << rel.r_offset << " against symbol `" + << sym << "' can not be used; recompile with " << msg; + }; + + auto check_textrel = [&] { + if (!writable) { + if (ctx.arg.z_text) { + error(); + } else if (ctx.arg.warn_textrel) { + Warn(ctx) << isec << ": relocation against symbol `" << sym + << "' in read-only section"; + } + ctx.has_textrel = true; + } + }; + + auto copyrel = [&] { + assert(sym.is_imported); + if (sym.esym().st_visibility == STV_PROTECTED) { + Error(ctx) << isec + << ": cannot make copy relocation for protected symbol '" << sym + << "', defined in " << *sym.file << "; recompile with -fPIC"; + } + sym.flags |= NEEDS_COPYREL; + }; + + auto dynrel = [&] { + check_textrel(); + isec.file.num_dynrel++; + }; + + switch (action) { + case NONE: + break; + case ERROR: + error(); + break; + case COPYREL: + if (!ctx.arg.z_copyreloc) + error(); + copyrel(); + break; + case DYN_COPYREL: + if (writable || !ctx.arg.z_copyreloc) + dynrel(); + else + copyrel(); + break; + case PLT: + sym.flags |= NEEDS_PLT; + break; + case CPLT: + sym.flags |= NEEDS_CPLT; + break; + case DYN_CPLT: + if (writable) + dynrel(); + else + sym.flags |= NEEDS_CPLT; + break; + case DYNREL: + dynrel(); + break; + case BASEREL: + check_textrel(); + if (!isec.is_relr_reloc(ctx, rel)) + isec.file.num_dynrel++; + break; + case IFUNC: + dynrel(); + ctx.num_ifunc_dynrels++; + break; + default: + unreachable(); + } +} + +template +static Action get_pcrel_action(Context &ctx, Symbol &sym) { + // This is for PC-relative relocations (e.g. R_X86_64_PC32). + // We cannot promote them to dynamic relocations because the dynamic + // linker generally does not support PC-relative relocations. + constexpr static Action table[3][4] = { + // Absolute Local Imported data Imported code + { ERROR, NONE, ERROR, PLT }, // Shared object + { ERROR, NONE, COPYREL, PLT }, // Position-independent exec + { NONE, NONE, COPYREL, CPLT }, // Position-dependent exec + }; + + return get_rel_action(ctx, sym, table); +} + +template +static Action get_absrel_action(Context &ctx, Symbol &sym) { + // This is a decision table for absolute relocations that is smaller + // than the word size (e.g. R_X86_64_32). Since the dynamic linker + // generally does not support dynamic relocations smaller than the + // word size, we need to report an error if a relocation cannot be + // resolved at link-time. + constexpr static Action table[3][4] = { + // Absolute Local Imported data Imported code + { NONE, ERROR, ERROR, ERROR }, // Shared object + { NONE, ERROR, ERROR, ERROR }, // Position-independent exec + { NONE, NONE, COPYREL, CPLT }, // Position-dependent exec + }; + + return get_rel_action(ctx, sym, table); +} + +template +static Action get_dyn_absrel_action(Context &ctx, Symbol &sym) { + if (sym.is_ifunc()) + return IFUNC; + + // This is a decision table for absolute relocations for the word + // size data (e.g. R_X86_64_64). Unlike the absrel_table, we can emit + // a dynamic relocation if we cannot resolve an address at link-time. + constexpr static Action table[3][4] = { + // Absolute Local Imported data Imported code + { NONE, BASEREL, DYNREL, DYNREL }, // Shared object + { NONE, BASEREL, DYNREL, DYNREL }, // Position-independent exec + { NONE, NONE, DYN_COPYREL, DYN_CPLT }, // Position-dependent exec + }; + + return get_rel_action(ctx, sym, table); +} + +template +static Action get_ppc64_toc_action(Context &ctx, Symbol &sym) { + if (sym.is_ifunc()) + return IFUNC; + + // As a special case, we do not create copy relocations nor canonical + // PLTs for .toc sections. PPC64's .toc is a compiler-generated + // GOT-like section, and no user-generated code directly uses values + // in it. + constexpr static Action table[3][4] = { + // Absolute Local Imported data Imported code + { NONE, BASEREL, DYNREL, DYNREL }, // Shared object + { NONE, BASEREL, DYNREL, DYNREL }, // Position-independent exec + { NONE, NONE, DYNREL, DYNREL }, // Position-dependent exec + }; + + return get_rel_action(ctx, sym, table); +} + +template +void InputSection::scan_pcrel(Context &ctx, Symbol &sym, + const ElfRel &rel) { + scan_rel(ctx, *this, sym, rel, get_pcrel_action(ctx, sym)); +} + +template +void InputSection::scan_absrel(Context &ctx, Symbol &sym, + const ElfRel &rel) { + scan_rel(ctx, *this, sym, rel, get_absrel_action(ctx, sym)); +} + +template +void InputSection::scan_dyn_absrel(Context &ctx, Symbol &sym, + const ElfRel &rel) { + scan_rel(ctx, *this, sym, rel, get_dyn_absrel_action(ctx, sym)); +} + +template +void InputSection::scan_toc_rel(Context &ctx, Symbol &sym, + const ElfRel &rel) { + scan_rel(ctx, *this, sym, rel, get_ppc64_toc_action(ctx, sym)); +} + +template +void InputSection::check_tlsle(Context &ctx, Symbol &sym, + const ElfRel &rel) { + if (ctx.arg.shared) + Error(ctx) << *this << ": relocation " << rel << " against `" << sym + << "` can not be used when making a shared object;" + << " recompile with -fPIC"; +} + +template +static void apply_absrel(Context &ctx, InputSection &isec, + Symbol &sym, const ElfRel &rel, u8 *loc, + u64 S, i64 A, u64 P, ElfRel *&dynrel, + Action action) { + bool writable = (isec.shdr().sh_flags & SHF_WRITE); + + auto apply_dynrel = [&] { + *dynrel++ = ElfRel(P, E::R_ABS, sym.get_dynsym_idx(ctx), A); + if (ctx.arg.apply_dynamic_relocs) + *(Word *)loc = A; + }; + + switch (action) { + case COPYREL: + case CPLT: + case NONE: + *(Word *)loc = S + A; + break; + case BASEREL: + if (isec.is_relr_reloc(ctx, rel)) { + *(Word *)loc = S + A; + } else { + *dynrel++ = ElfRel(P, E::R_RELATIVE, 0, S + A); + if (ctx.arg.apply_dynamic_relocs) + *(Word *)loc = S + A; + } + break; + case DYN_COPYREL: + if (writable || !ctx.arg.z_copyreloc) + apply_dynrel(); + else + *(Word *)loc = S + A; + break; + case DYN_CPLT: + if (writable) + apply_dynrel(); + else + *(Word *)loc = S + A; + break; + case DYNREL: + apply_dynrel(); + break; + case IFUNC: + if constexpr (supports_ifunc) { + u64 addr = sym.get_addr(ctx, NO_PLT) + A; + *dynrel++ = ElfRel(P, E::R_IRELATIVE, 0, addr); + if (ctx.arg.apply_dynamic_relocs) + *(Word *)loc = addr; + } else { + unreachable(); + } + break; + default: + unreachable(); + } +} + +template +void InputSection::apply_dyn_absrel(Context &ctx, Symbol &sym, + const ElfRel &rel, u8 *loc, + u64 S, i64 A, u64 P, + ElfRel *&dynrel) { + apply_absrel(ctx, *this, sym, rel, loc, S, A, P, dynrel, + get_dyn_absrel_action(ctx, sym)); +} + +template +void InputSection::apply_toc_rel(Context &ctx, Symbol &sym, + const ElfRel &rel, u8 *loc, + u64 S, i64 A, u64 P, + ElfRel *&dynrel) { + apply_absrel(ctx, *this, sym, rel, loc, S, A, P, dynrel, + get_ppc64_toc_action(ctx, sym)); +} + +template +void InputSection::write_to(Context &ctx, u8 *buf) { + if (shdr().sh_type == SHT_NOBITS || sh_size == 0) + return; + + // Copy data + if constexpr (is_riscv) + copy_contents_riscv(ctx, buf); + else + uncompress_to(ctx, buf); + + // Apply relocations + if (!ctx.arg.relocatable) { + if (shdr().sh_flags & SHF_ALLOC) + apply_reloc_alloc(ctx, buf); + else + apply_reloc_nonalloc(ctx, buf); + } +} + +// Get the name of a function containin a given offset. +template +std::string_view InputSection::get_func_name(Context &ctx, i64 offset) const { + for (const ElfSym &esym : file.elf_syms) { + if (esym.st_shndx == shndx && esym.st_type == STT_FUNC && + esym.st_value <= offset && offset < esym.st_value + esym.st_size) { + std::string_view name = file.symbol_strtab.data() + esym.st_name; + if (ctx.arg.demangle) + return demangle(name); + return name; + } + } + return ""; +} + +// Test if the symbol a given relocation refers to has already been resolved. +// If not, record that error and returns true. +template +bool InputSection::record_undef_error(Context &ctx, const ElfRel &rel) { + // If a relocation refers to a linker-synthesized symbol for a + // section fragment, it's always been resolved. + if (file.elf_syms.size() <= rel.r_sym) + return false; + + Symbol &sym = *file.symbols[rel.r_sym]; + const ElfSym &esym = file.elf_syms[rel.r_sym]; + + // If a symbol is defined in a comdat group, and the comdat group is + // discarded, the symbol may not have an owner. It is technically an + // violation of the One Definition Rule, so it is a programmer's fault. + if (!sym.file) { + Error(ctx) << *this << ": " << sym << " refers to a discarded COMDAT section" + << " probably due to an ODR violation"; + return true; + } + + auto record = [&] { + std::stringstream ss; + if (std::string_view source = file.get_source_name(); !source.empty()) + ss << ">>> referenced by " << source << "\n"; + else + ss << ">>> referenced by " << *this << "\n"; + + ss << ">>> " << file; + if (std::string_view func = get_func_name(ctx, rel.r_offset); !func.empty()) + ss << ":(" << func << ")"; + + typename decltype(ctx.undef_errors)::accessor acc; + ctx.undef_errors.insert(acc, {sym.name(), {}}); + acc->second.push_back(ss.str()); + }; + + // A non-weak undefined symbol must be promoted to an imported + // symbol or resolved to an defined symbol. Otherwise, it's an + // undefined symbol error. + // + // Every ELF file has an absolute local symbol as its first symbol. + // Referring to that symbol is always valid. + bool is_undef = esym.is_undef() && !esym.is_weak() && sym.sym_idx; + if (!sym.is_imported && is_undef && sym.esym().is_undef()) { + record(); + return true; + } + + // If a protected/hidden undefined symbol is resolved to other .so, + // it's handled as if no symbols were found. + if (sym.file->is_dso && + (sym.visibility == STV_PROTECTED || sym.visibility == STV_HIDDEN)) { + record(); + return true; + } + + return false; +} + +using E = MOLD_TARGET; + +template struct CieRecord; +template class InputSection; + +} // namespace mold::elf diff --git a/third_party/mold/elf/jobs.cc b/third_party/mold/elf/jobs.cc new file mode 100644 index 00000000000..451419c8ff5 --- /dev/null +++ b/third_party/mold/elf/jobs.cc @@ -0,0 +1,85 @@ +// clang-format off +#include "third_party/mold/elf/mold.h" + +#ifndef _WIN32 +#include "libc/calls/calls.h" +#include "libc/calls/struct/flock.h" +#include "libc/calls/weirdtypes.h" +#include "libc/sysv/consts/at.h" +#include "libc/sysv/consts/f.h" +#include "libc/sysv/consts/fd.h" +#include "libc/sysv/consts/o.h" +#include "libc/sysv/consts/posix.h" +#include "libc/sysv/consts/s.h" +#include "libc/sysv/consts/splice.h" +#include "third_party/musl/passwd.h" +#include "libc/calls/calls.h" +#include "libc/calls/struct/stat.h" +#include "libc/calls/struct/stat.macros.h" +#include "libc/calls/struct/timespec.h" +#include "libc/calls/weirdtypes.h" +#include "libc/sysv/consts/s.h" +#include "libc/sysv/consts/utime.h" +#include "libc/time/time.h" +#include "libc/calls/makedev.h" +#include "libc/calls/weirdtypes.h" +#include "libc/thread/thread.h" +#include "libc/calls/typedef/u.h" +#include "libc/calls/weirdtypes.h" +#include "libc/intrin/newbie.h" +#include "libc/sock/select.h" +#include "libc/sysv/consts/endian.h" +#include "libc/calls/calls.h" +#include "libc/calls/weirdtypes.h" +#include "libc/runtime/pathconf.h" +#include "libc/runtime/runtime.h" +#include "libc/runtime/sysconf.h" +#include "libc/sysv/consts/f.h" +#include "libc/sysv/consts/fileno.h" +#include "libc/sysv/consts/o.h" +#include "libc/sysv/consts/ok.h" +#include "libc/time/time.h" +#include "third_party/getopt/getopt.h" +#include "third_party/musl/crypt.h" +#include "third_party/musl/lockf.h" +#endif + +namespace mold::elf { + +template +void acquire_global_lock(Context &ctx) { +#ifndef _WIN32 + char *jobs = getenv("MOLD_JOBS"); + if (!jobs || std::string(jobs) != "1") + return; + + char *home = getenv("HOME"); + if (!home) + home = getpwuid(getuid())->pw_dir; + + std::string path = std::string(home) + "/.mold-lock"; + int fd = open(path.c_str(), O_WRONLY | O_CREAT | O_CLOEXEC, 0600); + if (fd == -1) + return; + + if (lockf(fd, F_LOCK, 0) == -1) + return; + + ctx.global_lock_fd = fd; +#endif +} + +template +void release_global_lock(Context &ctx) { +#ifndef _WIN32 + if (ctx.global_lock_fd) + close(*ctx.global_lock_fd); +#endif +} + +using E = MOLD_TARGET; + +template void acquire_global_lock(Context &); +template void release_global_lock(Context &); + +} // namespace mold::elf diff --git a/third_party/mold/elf/linker-script.cc b/third_party/mold/elf/linker-script.cc new file mode 100644 index 00000000000..ad3390c475b --- /dev/null +++ b/third_party/mold/elf/linker-script.cc @@ -0,0 +1,425 @@ +// clang-format off +// On Linux, /usr/lib/x86_64-linux-gnu/libc.so is not actually +// a shared object file but an ASCII text file containing a linker +// script to include a "real" libc.so file. Therefore, we need to +// support a (very limited) subset of the linker script language. + +#include "third_party/mold/elf/mold.h" + +#include "third_party/libcxx/cctype" +#include "third_party/libcxx/iomanip" + +namespace mold::elf { + +template +static thread_local MappedFile> *current_file; + +template +void read_version_script(Context &ctx, std::span &tok); + +static std::string_view get_line(std::string_view input, const char *pos) { + assert(input.data() <= pos); + assert(pos < input.data() + input.size()); + + i64 start = input.rfind('\n', pos - input.data()); + if (start == input.npos) + start = 0; + else + start++; + + i64 end = input.find('\n', pos - input.data()); + if (end == input.npos) + end = input.size(); + + return input.substr(start, end - start); +} + +template +class SyntaxError { +public: + SyntaxError(Context &ctx, std::string_view errpos) : out(ctx) { + std::string_view contents = current_file->get_contents(); + std::string_view line = get_line(contents, errpos.data()); + + i64 lineno = 1; + for (i64 i = 0; contents.data() + i < line.data(); i++) + if (contents[i] == '\n') + lineno++; + + i64 column = errpos.data() - line.data(); + + std::stringstream ss; + ss << current_file->name << ":" << lineno << ": "; + i64 indent = (i64)ss.tellp() + strlen("mold: "); + ss << line << "\n" << std::setw(indent + column) << " " << "^ "; + out << ss.str(); + } + + template SyntaxError &operator<<(T &&val) { + out << std::forward(val); + return *this; + } + + [[noreturn]] ~SyntaxError() = default; + + Fatal> out; +}; + +template +static std::vector +tokenize(Context &ctx, std::string_view input) { + std::vector vec; + while (!input.empty()) { + if (isspace(input[0])) { + input = input.substr(1); + continue; + } + + if (input.starts_with("/*")) { + i64 pos = input.find("*/", 2); + if (pos == std::string_view::npos) + SyntaxError(ctx, input) << "unclosed comment"; + input = input.substr(pos + 2); + continue; + } + + if (input[0] == '#') { + i64 pos = input.find("\n", 1); + if (pos == std::string_view::npos) + break; + input = input.substr(pos + 1); + continue; + } + + if (input[0] == '"') { + i64 pos = input.find('"', 1); + if (pos == std::string_view::npos) + SyntaxError(ctx, input) << "unclosed string literal"; + vec.push_back(input.substr(0, pos + 1)); + input = input.substr(pos + 1); + continue; + } + + i64 pos = input.find_first_not_of( + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz" + "0123456789_.$/\\~=+[]*?-!^:"); + + if (pos == 0) + pos = 1; + else if (pos == input.npos) + pos = input.size(); + + vec.push_back(input.substr(0, pos)); + input = input.substr(pos); + } + return vec; +} + +template +static std::span +skip(Context &ctx, std::span tok, std::string_view str) { + if (tok.empty()) + Fatal(ctx) << current_file->name << ": expected '" << str + << "', but got EOF"; + if (tok[0] != str) + SyntaxError(ctx, tok[0]) << "expected '" << str << "'"; + return tok.subspan(1); +} + +static std::string_view unquote(std::string_view s) { + if (s.size() > 0 && s[0] == '"') { + assert(s[s.size() - 1] == '"'); + return s.substr(1, s.size() - 2); + } + return s; +} + +template +static std::span +read_output_format(Context &ctx, std::span tok) { + tok = skip(ctx, tok, "("); + while (!tok.empty() && tok[0] != ")") + tok = tok.subspan(1); + if (tok.empty()) + Fatal(ctx) << current_file->name << ": expected ')', but got EOF"; + return tok.subspan(1); +} + +template +static bool is_in_sysroot(Context &ctx, std::string path) { + std::string rel = to_abs_path(path) + .lexically_relative(to_abs_path(ctx.arg.sysroot)) + .string(); + return rel != "." && !rel.starts_with("../"); +} + +template +static MappedFile> *resolve_path(Context &ctx, std::string_view tok) { + std::string str(unquote(tok)); + + // GNU ld prepends the sysroot if a pathname starts with '/' and the + // script being processed is in the sysroot. We do the same. + if (str.starts_with('/') && is_in_sysroot(ctx, current_file->name)) + return MappedFile>::must_open(ctx, ctx.arg.sysroot + str); + + if (str.starts_with('=')) { + std::string path; + if (ctx.arg.sysroot.empty()) + path = str.substr(1); + else + path = ctx.arg.sysroot + str.substr(1); + return MappedFile>::must_open(ctx, path); + } + + if (str.starts_with("-l")) + return find_library(ctx, str.substr(2)); + + if (MappedFile> *mf = open_library(ctx, str)) + return mf; + + for (std::string_view dir : ctx.arg.library_paths) { + std::string path = std::string(dir) + "/" + str; + if (MappedFile> *mf = open_library(ctx, path)) + return mf; + } + + SyntaxError(ctx, tok) << "library not found: " << str; +} + +template +static std::span +read_group(Context &ctx, std::span tok) { + tok = skip(ctx, tok, "("); + + while (!tok.empty() && tok[0] != ")") { + if (tok[0] == "AS_NEEDED") { + bool orig = ctx.as_needed; + ctx.as_needed = true; + tok = read_group(ctx, tok.subspan(1)); + ctx.as_needed = orig; + continue; + } + + MappedFile> *mf = resolve_path(ctx, tok[0]); + read_file(ctx, mf); + tok = tok.subspan(1); + } + + if (tok.empty()) + Fatal(ctx) << current_file->name << ": expected ')', but got EOF"; + return tok.subspan(1); +} + +template +void parse_linker_script(Context &ctx, MappedFile> *mf) { + current_file = mf; + + std::vector vec = tokenize(ctx, mf->get_contents()); + std::span tok = vec; + + while (!tok.empty()) { + if (tok[0] == "OUTPUT_FORMAT") { + tok = read_output_format(ctx, tok.subspan(1)); + } else if (tok[0] == "INPUT" || tok[0] == "GROUP") { + tok = read_group(ctx, tok.subspan(1)); + } else if (tok[0] == "VERSION") { + tok = tok.subspan(1); + tok = skip(ctx, tok, "{"); + read_version_script(ctx, tok); + tok = skip(ctx, tok, "}"); + } else if (tok.size() > 3 && tok[1] == "=" && tok[3] == ";") { + ctx.arg.defsyms.emplace_back(get_symbol(ctx, unquote(tok[0])), + get_symbol(ctx, unquote(tok[2]))); + tok = tok.subspan(4); + } else if (tok[0] == ";") { + tok = tok.subspan(1); + } else { + SyntaxError(ctx, tok[0]) << "unknown linker script token"; + } + } +} + +template +std::string_view +get_script_output_type(Context &ctx, MappedFile> *mf) { + current_file = mf; + + std::vector vec = tokenize(ctx, mf->get_contents()); + std::span tok = vec; + + if (tok.size() >= 3 && tok[0] == "OUTPUT_FORMAT" && tok[1] == "(") { + if (tok[2] == "elf64-x86-64") + return X86_64::target_name; + if (tok[2] == "elf32-i386") + return I386::target_name; + } + + if (tok.size() >= 3 && (tok[0] == "INPUT" || tok[0] == "GROUP") && + tok[1] == "(") + if (MappedFile> *mf = + MappedFile>::open(ctx, std::string(unquote(tok[2])))) + return get_machine_type(ctx, mf); + + return ""; +} + +static bool read_label(std::span &tok, + std::string label) { + if (tok.size() >= 1 && tok[0] == label + ":") { + tok = tok.subspan(1); + return true; + } + + if (tok.size() >= 2 && tok[0] == label && tok[1] == ":") { + tok = tok.subspan(2); + return true; + } + return false; +} + +template +static void +read_version_script_commands(Context &ctx, std::span &tok, + std::string_view ver_str, u16 ver_idx, bool is_cpp) { + bool is_global = true; + + while (!tok.empty() && tok[0] != "}") { + if (read_label(tok, "global")) { + is_global = true; + continue; + } + + if (read_label(tok, "local")) { + is_global = false; + continue; + } + + if (tok[0] == "extern") { + tok = tok.subspan(1); + + if (!tok.empty() && tok[0] == "\"C\"") { + tok = tok.subspan(1); + tok = skip(ctx, tok, "{"); + read_version_script_commands( ctx, tok, ver_str, ver_idx, false); + } else { + tok = skip(ctx, tok, "\"C++\""); + tok = skip(ctx, tok, "{"); + read_version_script_commands(ctx, tok, ver_str, ver_idx, true); + } + + tok = skip(ctx, tok, "}"); + tok = skip(ctx, tok, ";"); + continue; + } + + if (tok[0] == "*") { + ctx.default_version = (is_global ? ver_idx : (u32)VER_NDX_LOCAL); + ctx.default_version_from_version_script = true; + } else if (is_global) { + ctx.version_patterns.push_back({unquote(tok[0]), current_file->name, + ver_str, ver_idx, is_cpp}); + } else { + ctx.version_patterns.push_back({unquote(tok[0]), current_file->name, + ver_str, VER_NDX_LOCAL, is_cpp}); + } + + tok = tok.subspan(1); + + if (!tok.empty() && tok[0] == "}") + return; + tok = skip(ctx, tok, ";"); + } +} + +template +void read_version_script(Context &ctx, std::span &tok) { + u16 next_ver = VER_NDX_LAST_RESERVED + ctx.arg.version_definitions.size() + 1; + + while (!tok.empty() && tok[0] != "}") { + std::string_view ver_str; + u16 ver_idx; + + if (tok[0] == "{") { + ver_str = "global"; + ver_idx = VER_NDX_GLOBAL; + } else { + ver_str = tok[0]; + ver_idx = next_ver++; + ctx.arg.version_definitions.push_back(std::string(tok[0])); + tok = tok.subspan(1); + } + + tok = skip(ctx, tok, "{"); + read_version_script_commands(ctx, tok, ver_str, ver_idx, false); + tok = skip(ctx, tok, "}"); + if (!tok.empty() && tok[0] != ";") + tok = tok.subspan(1); + tok = skip(ctx, tok, ";"); + } +} + +template +void parse_version_script(Context &ctx, MappedFile> *mf) { + current_file = mf; + std::vector vec = tokenize(ctx, mf->get_contents()); + std::span tok = vec; + read_version_script(ctx, tok); + if (!tok.empty()) + SyntaxError(ctx, tok[0]) << "trailing garbage token"; +} + +template +void read_dynamic_list_commands(Context &ctx, std::span &tok, + bool is_cpp) { + while (!tok.empty() && tok[0] != "}") { + if (tok[0] == "extern") { + tok = tok.subspan(1); + + if (!tok.empty() && tok[0] == "\"C\"") { + tok = tok.subspan(1); + tok = skip(ctx, tok, "{"); + read_dynamic_list_commands(ctx, tok, false); + } else { + tok = skip(ctx, tok, "\"C++\""); + tok = skip(ctx, tok, "{"); + read_dynamic_list_commands(ctx, tok, true); + } + + tok = skip(ctx, tok, "}"); + tok = skip(ctx, tok, ";"); + continue; + } + + if (tok[0] == "*") + ctx.default_version = VER_NDX_GLOBAL; + else + ctx.version_patterns.push_back({unquote(tok[0]), current_file->name, + "global", VER_NDX_GLOBAL, is_cpp}); + + tok = skip(ctx, tok.subspan(1), ";"); + } +} + +template +void parse_dynamic_list(Context &ctx, MappedFile> *mf) { + current_file = mf; + std::vector vec = tokenize(ctx, mf->get_contents()); + std::span tok = vec; + + tok = skip(ctx, tok, "{"); + read_dynamic_list_commands(ctx, tok, false); + tok = skip(ctx, tok, "}"); + tok = skip(ctx, tok, ";"); + + if (!tok.empty()) + SyntaxError(ctx, tok[0]) << "trailing garbage token"; +} + +using E = MOLD_TARGET; + +template void parse_linker_script(Context &, MappedFile> *); +template std::string_view get_script_output_type(Context &, MappedFile> *); +template void parse_version_script(Context &, MappedFile> *); +template void parse_dynamic_list(Context &, MappedFile> *); + +} // namespace mold::elf diff --git a/third_party/mold/elf/lto-unix.cc b/third_party/mold/elf/lto-unix.cc new file mode 100644 index 00000000000..eec570251b6 --- /dev/null +++ b/third_party/mold/elf/lto-unix.cc @@ -0,0 +1,739 @@ +// clang-format off +// This file handles the linker plugin to support LTO (Link-Time +// Optimization). +// +// LTO is a technique to do whole-program optimization to a program. Since +// a linker sees the whole program as opposed to a single compilation +// unit, it in theory can do some optimizations that cannot be done in the +// usual separate compilation model. For example, LTO should be able to +// inline functions that are defined in other compilation unit. +// +// In GCC and Clang, all you have to do to enable LTO is adding the +// `-flto` flag to the compiler and the linker command lines. If `-flto` +// is given, the compiler generates a file that contains not machine code +// but the compiler's IR (intermediate representation). In GCC, the output +// is an ELF file which wraps GCC's IR. In LLVM, it's not even an ELF file +// but just a raw LLVM IR file. +// +// Here is what we have to do if at least one input file is not a usual +// ELF file but an IR object file: +// +// 1. Read symbols both from usual ELF files and from IR object files and +// resolve symbols as usual. +// +// 2. Pass all IR objects to the compiler backend. The compiler backend +// compiles the IRs and returns a few big ELF object files as a +// result. +// +// 3. Parse the returned ELF files and overwrite IR object symbols with +// the returned ones, discarding IR object files. +// +// 4. Continue the rest of the linking process as usual. +// +// When gcc or clang inovkes ld, they pass `-plugin linker-plugin.so` to +// the linker. The given .so file provides a way to call the compiler +// backend. +// +// The linker plugin API is documented at +// https://gcc.gnu.org/wiki/whopr/driver, though the document is a bit +// outdated. +// +// Frankly, the linker plugin API is peculiar and is not very easy to use. +// For some reason, the API functions don't return the result of a +// function call as a return value but instead calls other function with +// the result as its argument to "return" the result. +// +// For example, the first thing you need to do after dlopen()'ing a linker +// plugin .so is to call `onload` function with a list of callback +// functions. `onload` calls callbacks to notify about the pointers to +// other functions the linker plugin provides. I don't know why `onload` +// can't just return a list of functions or why the linker plugin can't +// define not only `onload` but other functions, but that's what it is. +// +// Here is the steps to use the linker plugin: +// +// 1. dlopen() the linker plugin .so and call `onload` to obtain pointers +// to other functions provided by the plugin. +// +// 2. Call `claim_file_hook` with an IR object file to read its symbol +// table. `claim_file_hook` calls the `add_symbols` callback to +// "return" a list of symbols. +// +// 3. `claim_file_hook` returns LDPT_OK only when the plugin wants to +// handle a given file. Since we pass only IR object files to the +// plugin in mold, it always returns LDPT_OK in our case. +// +// 4. Once we made a decision as to which object file to include into the +// output file, we call `all_symbols_read_hook` to compile IR objects +// into a few big ELF files. That function calls the `get_symbols` +// callback to ask us about the symbol resolution results. (The +// compiler backend needs to know whether an undefined symbol in an IR +// object was resolved to a regular object file or a shared object to +// do whole program optimization, for example.) +// +// 5. `all_symbols_read_hook` "returns" the result by calling the +// `add_input_file` callback. The callback is called with a path to an +// LTO'ed ELF file. We parse that ELF file and override symbols +// defined by IR objects with the ELF file's ones. +// +// 6. Lastly, we call `cleanup_hook` to remove temporary files created by +// the compiler backend. + +#include "third_party/mold/elf/mold.h" +#include "third_party/mold/elf/lto.h" + +#include "third_party/libcxx/cstdarg" +#include "third_party/libcxx/cstring" +#include "libc/runtime/dlfcn.h" +#include "libc/calls/calls.h" +#include "libc/calls/struct/flock.h" +#include "libc/calls/weirdtypes.h" +#include "libc/sysv/consts/at.h" +#include "libc/sysv/consts/f.h" +#include "libc/sysv/consts/fd.h" +#include "libc/sysv/consts/o.h" +#include "libc/sysv/consts/posix.h" +#include "libc/sysv/consts/s.h" +#include "libc/sysv/consts/splice.h" +#include "third_party/libcxx/sstream" +// MISSING #include +#include "libc/calls/calls.h" +#include "libc/calls/weirdtypes.h" +#include "libc/runtime/pathconf.h" +#include "libc/runtime/runtime.h" +#include "libc/runtime/sysconf.h" +#include "libc/sysv/consts/f.h" +#include "libc/sysv/consts/fileno.h" +#include "libc/sysv/consts/o.h" +#include "libc/sysv/consts/ok.h" +#include "libc/time/time.h" +#include "third_party/getopt/getopt.h" +#include "third_party/musl/crypt.h" +#include "third_party/musl/lockf.h" + +#if 0 +# define LOG std::cerr +#else +# define LOG std::ostringstream() +#endif + +namespace mold::elf { + +// Global variables +// We store LTO-related information to global variables, +// as the LTO plugin is not thread-safe by design anyway. + +template static Context *gctx; +template static std::vector *> lto_objects; + +static int phase = 0; +static std::vector plugin_symbols; +static ClaimFileHandler *claim_file_hook; +static AllSymbolsReadHandler *all_symbols_read_hook; +static CleanupHandler *cleanup_hook; +static bool is_gcc_linker_api_v1 = false; + +// Event handlers + +template +static PluginStatus message(PluginLevel level, const char *fmt, ...) { + LOG << "message\n"; + Context &ctx = *gctx; + + char buf[1000]; + va_list ap; + va_start(ap, fmt); + vsnprintf(buf, sizeof(buf), fmt, ap); + va_end(ap); + + switch (level) { + case LDPL_INFO: + SyncOut(ctx) << buf; + break; + case LDPL_WARNING: + Warn(ctx) << buf; + break; + case LDPL_ERROR: + case LDPL_FATAL: + Fatal(ctx) << buf; + } + + return LDPS_OK; +} + +template +static PluginStatus register_claim_file_hook(ClaimFileHandler fn) { + LOG << "register_claim_file_hook\n"; + claim_file_hook = fn; + return LDPS_OK; +} + +template +static PluginStatus +register_all_symbols_read_hook(AllSymbolsReadHandler fn) { + LOG << "register_all_symbols_read_hook\n"; + all_symbols_read_hook = fn; + return LDPS_OK; +} + +template +static PluginStatus register_cleanup_hook(CleanupHandler fn) { + LOG << "register_cleanup_hook\n"; + cleanup_hook = fn; + return LDPS_OK; +} + +static PluginStatus +add_symbols(void *handle, int nsyms, const PluginSymbol *psyms) { + LOG << "add_symbols: " << nsyms << "\n"; + assert(phase == 1); + plugin_symbols = {psyms, psyms + nsyms}; + return LDPS_OK; +} + +template +static PluginStatus add_input_file(const char *path) { + LOG << "add_input_file: " << path << "\n"; + + Context &ctx = *gctx; + static i64 file_priority = 100; + + MappedFile> *mf = MappedFile>::must_open(ctx, path); + + ObjectFile *file = ObjectFile::create(ctx, mf, "", false); + ctx.obj_pool.emplace_back(file); + lto_objects.push_back(file); + + file->priority = file_priority++; + file->is_alive = true; + file->parse(ctx); + file->resolve_symbols(ctx); + return LDPS_OK; +} + +static PluginStatus +get_input_file(const void *handle, struct PluginInputFile *file) { + LOG << "get_input_file\n"; + return LDPS_OK; +} + +template +static PluginStatus release_input_file(const void *handle) { + LOG << "release_input_file\n"; + + ObjectFile &file = *(ObjectFile *)handle; + if (file.mf->fd != -1) { + close(file.mf->fd); + file.mf->fd = -1; + } + return LDPS_OK; +} + +static PluginStatus add_input_library(const char *path) { + LOG << "add_input_library\n"; + return LDPS_OK; +} + +static PluginStatus set_extra_library_path(const char *path) { + LOG << "set_extra_library_path\n"; + return LDPS_OK; +} + +template +static PluginStatus get_view(const void *handle, const void **view) { + LOG << "get_view\n"; + + ObjectFile &file = *(ObjectFile *)handle; + *view = (void *)file.mf->data; + return LDPS_OK; +} + +static PluginStatus +get_input_section_count(const void *handle, int *count) { + LOG << "get_input_section_count\n"; + return LDPS_OK; +} + +static PluginStatus +get_input_section_type(const PluginSection section, int *type) { + LOG << "get_input_section_type\n"; + return LDPS_OK; +} + +static PluginStatus +get_input_section_name(const PluginSection section, + char **section_name) { + LOG << "get_input_section_name\n"; + return LDPS_OK; +} + +static PluginStatus +get_input_section_contents(const PluginSection section, + const char **section_contents, + size_t *len) { + LOG << "get_input_section_contents\n"; + return LDPS_OK; +} + +static PluginStatus +update_section_order(const PluginSection *section_list, + int num_sections) { + LOG << "update_section_order\n"; + return LDPS_OK; +} + +static PluginStatus allow_section_ordering() { + LOG << "allow_section_ordering\n"; + return LDPS_OK; +} + +static PluginStatus +get_symbols_v1(const void *handle, int nsyms, PluginSymbol *psyms) { + unreachable(); +} + +// get_symbols teaches the LTO plugin as to how we have resolved symbols. +// The plugin uses the symbol resolution info to optimize the program. +// +// For example, if a definition in an IR file is not referenced by +// non-IR objects at all, the plugin may choose to completely inline +// that definition within the IR objects and remove the symbol from the +// LTO result. On the other hand, if a definition is referenced by a +// non-IR object, it has to keep the symbol in the LTO result. +template +static PluginStatus +get_symbols(const void *handle, int nsyms, PluginSymbol *psyms, bool is_v2) { + ObjectFile &file = *(ObjectFile *)handle; + assert(file.is_lto_obj); + + // If file is an archive member which was not chose to be included in + // to the final result, we need to make the plugin to ignore all + // symbols. + if (!file.is_alive) { + assert(!is_v2); + for (int i = 0; i < nsyms; i++) + psyms[i].resolution = LDPR_PREEMPTED_REG; + return LDPS_NO_SYMS; + } + + auto get_resolution = [&](ElfSym &esym, Symbol &sym) { + if (!sym.file) + return LDPR_UNDEF; + + if (sym.file == &file) { + if (sym.referenced_by_regular_obj) + return LDPR_PREVAILING_DEF; + if (sym.is_exported) + return is_v2 ? LDPR_PREVAILING_DEF : LDPR_PREVAILING_DEF_IRONLY_EXP; + return LDPR_PREVAILING_DEF_IRONLY; + } + + if (sym.file->is_dso) + return LDPR_RESOLVED_DYN; + + if (((ObjectFile *)sym.file)->is_lto_obj && !sym.is_wrapped) + return esym.is_undef() ? LDPR_RESOLVED_IR : LDPR_PREEMPTED_IR; + return esym.is_undef() ? LDPR_RESOLVED_EXEC : LDPR_PREEMPTED_REG; + }; + + // Set the symbol resolution results to psyms. + for (i64 i = 0; i < nsyms; i++) { + ElfSym &esym = file.elf_syms[i + 1]; + Symbol &sym = *file.symbols[i + 1]; + psyms[i].resolution = get_resolution(esym, sym); + } + return LDPS_OK; +} + +// This function restarts mold itself with `--:lto-pass2` and +// `--:ignore-ir-file` flags. We do this as a workaround for the old +// linker plugins that do not support the get_symbols_v3 API. +// +// get_symbols_v1 and get_symbols_v2 don't provide a way to ignore an +// object file we previously passed to the linker plugin. So we can't +// "unload" object files in archives that we ended up not choosing to +// include into the final output. +// +// As a workaround, we restart the linker with a list of object files +// the linker has to ignore, so that it won't read the object files +// from archives next time. +// +// This is an ugly hack and should be removed once GCC adopts the v3 API. +template +static void restart_process(Context &ctx) { + std::vector args; + + for (std::string_view arg : ctx.cmdline_args) + args.push_back(strdup(std::string(arg).c_str())); + + for (std::unique_ptr> &file : ctx.obj_pool) + if (file->is_lto_obj && !file->is_alive) + args.push_back(strdup(("--:ignore-ir-file=" + + file->mf->get_identifier()).c_str())); + + args.push_back("--:lto-pass2"); + args.push_back(nullptr); + + std::cout << std::flush; + std::cerr << std::flush; + + std::string self = get_self_path(); + execv(self.c_str(), (char * const *)args.data()); + std::cerr << "execv failed: " << errno_string() << "\n"; + _exit(1); +} + +template +static PluginStatus +get_symbols_v2(const void *handle, int nsyms, PluginSymbol *psyms) { + LOG << "get_symbols_v2\n"; + return get_symbols(handle, nsyms, psyms, true); +} + +template +static PluginStatus +get_symbols_v3(const void *handle, int nsyms, PluginSymbol *psyms) { + LOG << "get_symbols_v3\n"; + return get_symbols(handle, nsyms, psyms, false); +} + +static PluginStatus allow_unique_segment_for_sections() { + LOG << "allow_unique_segment_for_sections\n"; + return LDPS_OK; +} + +static PluginStatus +unique_segment_for_sections(const char *segment_name, + uint64_t flags, + uint64_t align, + const PluginSection *section_list, + int num_sections) { + LOG << "unique_segment_for_sections\n"; + return LDPS_OK; +} + +static PluginStatus +get_input_section_alignment(const PluginSection section, + int *addralign) { + LOG << "get_input_section_alignment\n"; + return LDPS_OK; +} + +static PluginStatus +get_input_section_size(const PluginSection section, uint64_t *size) { + LOG << "get_input_section_size\n"; + return LDPS_OK; +} + +template +static PluginStatus +register_new_input_hook(NewInputHandler fn) { + LOG << "register_new_input_hook\n"; + return LDPS_OK; +} + +static PluginStatus +get_wrap_symbols(uint64_t *num_symbols, const char ***wrap_symbols) { + LOG << "get_wrap_symbols\n"; + return LDPS_OK; +} + +template +static PluginLinkerAPIVersion +get_api_version(const char *plugin_identifier, + unsigned plugin_version, + int minimal_api_supported, + int maximal_api_supported, + const char **linker_identifier, + const char **linker_version) { + if (LAPI_V1 < minimal_api_supported) + Fatal(*gctx) << "LTO plugin does not support V0 or V1 API"; + + std::string version = mold_version + "\0"s; + + *linker_identifier = "mold"; + *linker_version = version.data(); + + if (LAPI_V1 <= maximal_api_supported) { + is_gcc_linker_api_v1 = true; + return LAPI_V1; + } + return LAPI_V0; +} + +template +static void load_plugin(Context &ctx) { + assert(phase == 0); + phase = 1; + gctx = &ctx; + + void *handle = dlopen(ctx.arg.plugin.c_str(), RTLD_NOW | RTLD_GLOBAL); + if (!handle) + Fatal(ctx) << "could not open plugin file: " << dlerror(); + + OnloadFn *onload = (OnloadFn *)dlsym(handle, "onload"); + if (!onload) + Fatal(ctx) << "failed to load plugin " << ctx.arg.plugin << ": " + << dlerror(); + + auto save = [&](std::string_view str) { + return save_string(ctx, std::string(str).c_str()).data(); + }; + + std::vector tv; + tv.emplace_back(LDPT_MESSAGE, message); + + if (ctx.arg.shared) + tv.emplace_back(LDPT_LINKER_OUTPUT, LDPO_DYN); + else if (ctx.arg.pie) + tv.emplace_back(LDPT_LINKER_OUTPUT, LDPO_PIE); + else + tv.emplace_back(LDPT_LINKER_OUTPUT, LDPO_EXEC); + + for (std::string_view opt : ctx.arg.plugin_opt) + tv.emplace_back(LDPT_OPTION, save(opt)); + + tv.emplace_back(LDPT_REGISTER_CLAIM_FILE_HOOK, register_claim_file_hook); + tv.emplace_back(LDPT_REGISTER_ALL_SYMBOLS_READ_HOOK, + register_all_symbols_read_hook); + tv.emplace_back(LDPT_REGISTER_CLEANUP_HOOK, register_cleanup_hook); + tv.emplace_back(LDPT_ADD_SYMBOLS, add_symbols); + tv.emplace_back(LDPT_GET_SYMBOLS, get_symbols_v1); + tv.emplace_back(LDPT_ADD_INPUT_FILE, add_input_file); + tv.emplace_back(LDPT_GET_INPUT_FILE, get_input_file); + tv.emplace_back(LDPT_RELEASE_INPUT_FILE, release_input_file); + tv.emplace_back(LDPT_ADD_INPUT_LIBRARY, add_input_library); + tv.emplace_back(LDPT_OUTPUT_NAME, save(ctx.arg.output)); + tv.emplace_back(LDPT_SET_EXTRA_LIBRARY_PATH, set_extra_library_path); + tv.emplace_back(LDPT_GET_VIEW, get_view); + tv.emplace_back(LDPT_GET_INPUT_SECTION_COUNT, get_input_section_count); + tv.emplace_back(LDPT_GET_INPUT_SECTION_TYPE, get_input_section_type); + tv.emplace_back(LDPT_GET_INPUT_SECTION_NAME, get_input_section_name); + tv.emplace_back(LDPT_GET_INPUT_SECTION_CONTENTS, get_input_section_contents); + tv.emplace_back(LDPT_UPDATE_SECTION_ORDER, update_section_order); + tv.emplace_back(LDPT_ALLOW_SECTION_ORDERING, allow_section_ordering); + tv.emplace_back(LDPT_ADD_SYMBOLS_V2, add_symbols); + tv.emplace_back(LDPT_GET_SYMBOLS_V2, get_symbols_v2); + tv.emplace_back(LDPT_ALLOW_UNIQUE_SEGMENT_FOR_SECTIONS, + allow_unique_segment_for_sections); + tv.emplace_back(LDPT_UNIQUE_SEGMENT_FOR_SECTIONS, unique_segment_for_sections); + tv.emplace_back(LDPT_GET_SYMBOLS_V3, get_symbols_v3); + tv.emplace_back(LDPT_GET_INPUT_SECTION_ALIGNMENT, get_input_section_alignment); + tv.emplace_back(LDPT_GET_INPUT_SECTION_SIZE, get_input_section_size); + tv.emplace_back(LDPT_REGISTER_NEW_INPUT_HOOK, register_new_input_hook); + tv.emplace_back(LDPT_GET_WRAP_SYMBOLS, get_wrap_symbols); + tv.emplace_back(LDPT_GET_API_VERSION, get_api_version); + tv.emplace_back(LDPT_NULL, 0); + + [[maybe_unused]] PluginStatus status = onload(tv.data()); + assert(status == LDPS_OK); +} + +template +static ElfSym to_elf_sym(PluginSymbol &psym) { + ElfSym esym; + memset(&esym, 0, sizeof(esym)); + + switch (psym.def) { + case LDPK_DEF: + esym.st_shndx = SHN_ABS; + break; + case LDPK_WEAKDEF: + esym.st_shndx = SHN_ABS; + esym.st_bind = STB_WEAK; + break; + case LDPK_UNDEF: + esym.st_shndx = SHN_UNDEF; + break; + case LDPK_WEAKUNDEF: + esym.st_shndx = SHN_UNDEF; + esym.st_bind = STB_WEAK; + break; + case LDPK_COMMON: + esym.st_shndx = SHN_COMMON; + break; + } + + switch (psym.symbol_type) { + case LDST_UNKNOWN: + break; + case LDST_FUNCTION: + esym.st_type = STT_FUNC; + break; + case LDST_VARIABLE: + esym.st_type = STT_OBJECT; + break; + }; + + switch (psym.visibility) { + case LDPV_DEFAULT: + break; + case LDPV_PROTECTED: + esym.st_visibility = STV_PROTECTED; + break; + case LDPV_INTERNAL: + esym.st_visibility = STV_INTERNAL; + break; + case LDPV_HIDDEN: + esym.st_visibility = STV_HIDDEN; + break; + } + + esym.st_size = psym.size; + return esym; +} + +// Returns true if a given linker plugin looks like LLVM's one. +// Returns false if it's GCC. +template +static bool is_llvm(Context &ctx) { + return ctx.arg.plugin.ends_with("LLVMgold.so"); +} + +// Returns true if a given linker plugin supports the get_symbols_v3 API. +// Any version of LLVM and GCC 12 or newer support it. +template +static bool supports_v3_api(Context &ctx) { + return is_gcc_linker_api_v1 || is_llvm(ctx); +} + +template +ObjectFile *read_lto_object(Context &ctx, MappedFile> *mf) { + // V0 API's claim_file is not thread-safe. + static std::mutex mu; + std::unique_lock lock(mu, std::defer_lock); + if (!is_gcc_linker_api_v1) + lock.lock(); + + if (ctx.arg.plugin.empty()) + Fatal(ctx) << mf->name << ": don't know how to handle this LTO object file " + << "because no -plugin option was given. Please make sure you " + << "added -flto not only for creating object files but also for " + << "creating the final executable."; + + // dlopen the linker plugin file + static std::once_flag flag; + std::call_once(flag, [&] { load_plugin(ctx); }); + + // Create mold's object instance + ObjectFile *obj = new ObjectFile; + ctx.obj_pool.emplace_back(obj); + + obj->filename = mf->name; + obj->symbols.push_back(new Symbol); + obj->first_global = 1; + obj->is_lto_obj = true; + obj->mf = mf; + + // Create plugin's object instance + PluginInputFile file = {}; + + MappedFile> *mf2 = mf->parent ? mf->parent : mf; + file.name = save_string(ctx, mf2->name).data(); + if (mf2->fd == -1) + mf2->fd = open(file.name, O_RDONLY); + file.fd = mf2->fd; + if (file.fd == -1) + Fatal(ctx) << "cannot open " << file.name << ": " << errno_string(); + + if (mf->parent) + obj->archive_name = mf->parent->name; + + file.offset = mf->get_offset(); + file.filesize = mf->size; + file.handle = (void *)obj; + + LOG << "read_lto_symbols: "<< mf->name << "\n"; + + // claim_file_hook() calls add_symbols() which initializes `plugin_symbols` + int claimed = false; + claim_file_hook(&file, &claimed); + if (!claimed) + Fatal(ctx) << mf->name << ": not claimed by the LTO plugin;" + << " please make sure you are using the same compiler of the" + << " same version for all object files"; + + // It looks like GCC doesn't need fd after claim_file_hook() while + // LLVM needs it and takes the ownership of fd. To prevent "too many + // open files" issue, we close fd only for GCC. This is ugly, though. + if (!is_llvm(ctx)) { + close(mf2->fd); + mf2->fd = -1; + } + + // Initialize object symbols + std::vector> *esyms = new std::vector>(1); + + for (PluginSymbol &psym : plugin_symbols) { + esyms->push_back(to_elf_sym(psym)); + obj->symbols.push_back(get_symbol(ctx, save_string(ctx, psym.name))); + } + + obj->elf_syms = *esyms; + obj->has_symver.resize(esyms->size()); + plugin_symbols.clear(); + return obj; +} + +// Entry point +template +std::vector *> do_lto(Context &ctx) { + Timer t(ctx, "do_lto"); + + if (!ctx.arg.lto_pass2 && !supports_v3_api(ctx)) + restart_process(ctx); + + assert(phase == 1); + phase = 2; + + // Set `referenced_by_regular_obj` bit. + tbb::parallel_for_each(ctx.objs, [&](ObjectFile *file) { + if (file->is_lto_obj) + return; + + for (i64 i = file->first_global; i < file->symbols.size(); i++) { + Symbol &sym = *file->symbols[i]; + + if (sym.file && !sym.file->is_dso && + ((ObjectFile *)sym.file)->is_lto_obj) { + std::scoped_lock lock(sym.mu); + sym.referenced_by_regular_obj = true; + } + } + }); + + // Symbols specified by the --wrap option needs to be visible from + // regular object files. + for (std::string_view name : ctx.arg.wrap) { + get_symbol(ctx, name)->referenced_by_regular_obj = true; + + std::string_view x = save_string(ctx, "__wrap_" + std::string(name)); + std::string_view y = save_string(ctx, "__real_" + std::string(name)); + + get_symbol(ctx, x)->referenced_by_regular_obj = true; + get_symbol(ctx, y)->referenced_by_regular_obj = true; + } + + // all_symbols_read_hook() calls add_input_file() and add_input_library() + LOG << "all symbols read\n"; + if (PluginStatus st = all_symbols_read_hook(); st != LDPS_OK) + Fatal(ctx) << "LTO: all_symbols_read_hook returns " << st; + + return lto_objects; +} + +template +void lto_cleanup(Context &ctx) { + Timer t(ctx, "lto_cleanup"); + + if (cleanup_hook) + cleanup_hook(); +} + +using E = MOLD_TARGET; + +template ObjectFile *read_lto_object(Context &, MappedFile> *); +template std::vector *> do_lto(Context &); +template void lto_cleanup(Context &); + +} // namespace mold::elf diff --git a/third_party/mold/elf/lto-win32.cc b/third_party/mold/elf/lto-win32.cc new file mode 100644 index 00000000000..29da7888b78 --- /dev/null +++ b/third_party/mold/elf/lto-win32.cc @@ -0,0 +1,26 @@ +// clang-format off +#include "third_party/mold/elf/mold.h" +#include "third_party/mold/elf/lto.h" + +namespace mold::elf { + +template +ObjectFile *read_lto_object(Context &ctx, MappedFile> *mf) { + Fatal(ctx) << "LTO is not supported on Windows"; +} + +template +std::vector *> do_lto(Context &ctx) { + return {}; +} + +template +void lto_cleanup(Context &ctx) {} + +using E = MOLD_TARGET; + +template ObjectFile *read_lto_object(Context &, MappedFile> *); +template std::vector *> do_lto(Context &); +template void lto_cleanup(Context &); + +} // namespace mold::elf diff --git a/third_party/mold/elf/lto.cc b/third_party/mold/elf/lto.cc new file mode 100644 index 00000000000..6899c4c6065 --- /dev/null +++ b/third_party/mold/elf/lto.cc @@ -0,0 +1,6 @@ +// clang-format off +#ifdef _WIN32 +#include "third_party/mold/elf/lto-win32.cc" +#else +#include "third_party/mold/elf/lto-unix.cc" +#endif diff --git a/third_party/mold/elf/lto.h b/third_party/mold/elf/lto.h new file mode 100644 index 00000000000..51a0cd12fc1 --- /dev/null +++ b/third_party/mold/elf/lto.h @@ -0,0 +1,167 @@ +// clang-format off +#pragma once + +// MISSING #include "../common/integers.h" + +namespace mold { + +enum PluginStatus { + LDPS_OK, + LDPS_NO_SYMS, + LDPS_BAD_HANDLE, + LDPS_ERR, +}; + +enum PluginTag { + LDPT_NULL, + LDPT_API_VERSION, + LDPT_GOLD_VERSION, + LDPT_LINKER_OUTPUT, + LDPT_OPTION, + LDPT_REGISTER_CLAIM_FILE_HOOK, + LDPT_REGISTER_ALL_SYMBOLS_READ_HOOK, + LDPT_REGISTER_CLEANUP_HOOK, + LDPT_ADD_SYMBOLS, + LDPT_GET_SYMBOLS, + LDPT_ADD_INPUT_FILE, + LDPT_MESSAGE, + LDPT_GET_INPUT_FILE, + LDPT_RELEASE_INPUT_FILE, + LDPT_ADD_INPUT_LIBRARY, + LDPT_OUTPUT_NAME, + LDPT_SET_EXTRA_LIBRARY_PATH, + LDPT_GNU_LD_VERSION, + LDPT_GET_VIEW, + LDPT_GET_INPUT_SECTION_COUNT, + LDPT_GET_INPUT_SECTION_TYPE, + LDPT_GET_INPUT_SECTION_NAME, + LDPT_GET_INPUT_SECTION_CONTENTS, + LDPT_UPDATE_SECTION_ORDER, + LDPT_ALLOW_SECTION_ORDERING, + LDPT_GET_SYMBOLS_V2, + LDPT_ALLOW_UNIQUE_SEGMENT_FOR_SECTIONS, + LDPT_UNIQUE_SEGMENT_FOR_SECTIONS, + LDPT_GET_SYMBOLS_V3, + LDPT_GET_INPUT_SECTION_ALIGNMENT, + LDPT_GET_INPUT_SECTION_SIZE, + LDPT_REGISTER_NEW_INPUT_HOOK, + LDPT_GET_WRAP_SYMBOLS, + LDPT_ADD_SYMBOLS_V2, + LDPT_GET_API_VERSION, +}; + +enum PluginApiVersion { + LD_PLUGIN_API_VERSION = 1, +}; + +struct PluginTagValue { + PluginTagValue(PluginTag tag, int val) : tag(tag), val(val) {} + + template + PluginTagValue(PluginTag tag, T *ptr) : tag(tag), ptr((void *)ptr) {} + + PluginTag tag; + union { + int val; + void *ptr; + }; +}; + +enum PluginOutputFileType { + LDPO_REL, + LDPO_EXEC, + LDPO_DYN, + LDPO_PIE, +}; + +struct PluginInputFile { + const char *name; + i32 fd; + u64 offset; + u64 filesize; + void *handle; +}; + +struct PluginSection { + const void *handle; + u32 shndx; +}; + +struct PluginSymbol { + char *name; + char *version; +#ifdef __LITTLE_ENDIAN__ + u8 def; + u8 symbol_type; + u8 section_kind; + u8 padding; +#else + u8 padding; + u8 section_kind; + u8 symbol_type; + u8 def; +#endif + i32 visibility; + u64 size; + char *comdat_key; + i32 resolution; +}; + +enum PluginSymbolKind { + LDPK_DEF, + LDPK_WEAKDEF, + LDPK_UNDEF, + LDPK_WEAKUNDEF, + LDPK_COMMON, +}; + +enum PluginSymbolVisibility { + LDPV_DEFAULT, + LDPV_PROTECTED, + LDPV_INTERNAL, + LDPV_HIDDEN, +}; + +enum PluginSymbolType { + LDST_UNKNOWN, + LDST_FUNCTION, + LDST_VARIABLE, +}; + +enum PluginSymbolSectionKind { + LDSSK_DEFAULT, + LDSSK_BSS, +}; + +enum PluginSymbolResolution { + LDPR_UNKNOWN, + LDPR_UNDEF, + LDPR_PREVAILING_DEF, + LDPR_PREVAILING_DEF_IRONLY, + LDPR_PREEMPTED_REG, + LDPR_PREEMPTED_IR, + LDPR_RESOLVED_IR, + LDPR_RESOLVED_EXEC, + LDPR_RESOLVED_DYN, + LDPR_PREVAILING_DEF_IRONLY_EXP, +}; + +enum PluginLevel { + LDPL_INFO, + LDPL_WARNING, + LDPL_ERROR, + LDPL_FATAL, +}; + +enum PluginLinkerAPIVersion { + LAPI_V0, + LAPI_V1, +}; + +typedef PluginStatus OnloadFn(PluginTagValue *tv); +typedef PluginStatus ClaimFileHandler(const PluginInputFile *, int *); +typedef PluginStatus AllSymbolsReadHandler(); +typedef PluginStatus CleanupHandler(); +typedef PluginStatus NewInputHandler(const PluginInputFile *); + +} // namespace mold diff --git a/third_party/mold/elf/main.cc b/third_party/mold/elf/main.cc new file mode 100644 index 00000000000..d8d512ab53c --- /dev/null +++ b/third_party/mold/elf/main.cc @@ -0,0 +1,812 @@ +// clang-format off +#include "third_party/mold/elf/mold.h" +// MISSING #include "../common/archive-file.h" +// MISSING #include "../common/cmdline.h" +// MISSING #include "../common/output-file.h" + +#include "third_party/libcxx/cstring" +#include "third_party/libcxx/functional" +#include "third_party/libcxx/iomanip" +#include "third_party/libcxx/map" +#include "third_party/libcxx/regex" +#include "libc/calls/calls.h" +#include "libc/calls/sigtimedwait.h" +#include "libc/calls/struct/sigaction.h" +#include "libc/calls/struct/siginfo.h" +#include "libc/sysv/consts/sa.h" +#include "libc/sysv/consts/sicode.h" +#include "libc/sysv/consts/ss.h" +#include "libc/calls/calls.h" +#include "libc/calls/struct/stat.h" +#include "libc/calls/struct/stat.macros.h" +#include "libc/calls/struct/timespec.h" +#include "libc/calls/weirdtypes.h" +#include "libc/sysv/consts/s.h" +#include "libc/sysv/consts/utime.h" +#include "libc/time/time.h" +#include "libc/calls/makedev.h" +#include "libc/calls/weirdtypes.h" +#include "libc/thread/thread.h" +#include "libc/calls/typedef/u.h" +#include "libc/calls/weirdtypes.h" +#include "libc/intrin/newbie.h" +#include "libc/sock/select.h" +#include "libc/sysv/consts/endian.h" +// MISSING #include +// MISSING #include +#include "third_party/libcxx/unordered_set" + +#ifdef _WIN32 +// MISSING #include +# define _chdir chdir +#else +#include "libc/calls/calls.h" +#include "libc/calls/weirdtypes.h" +#include "libc/runtime/pathconf.h" +#include "libc/runtime/runtime.h" +#include "libc/runtime/sysconf.h" +#include "libc/sysv/consts/f.h" +#include "libc/sysv/consts/fileno.h" +#include "libc/sysv/consts/o.h" +#include "libc/sysv/consts/ok.h" +#include "libc/time/time.h" +#include "third_party/getopt/getopt.h" +#include "third_party/musl/crypt.h" +#include "third_party/musl/lockf.h" +#endif + +namespace mold::elf { + +// Read the beginning of a given file and returns its machine type +// (e.g. EM_X86_64 or EM_386). +template +std::string_view get_machine_type(Context &ctx, MappedFile> *mf) { + auto get_elf_type = [&](u8 *buf) -> std::string_view { + bool is_le = (((ElfEhdr *)buf)->e_ident[EI_DATA] == ELFDATA2LSB); + bool is_64; + u32 e_machine; + + if (is_le) { + auto &ehdr = *(ElfEhdr *)buf; + is_64 = (ehdr.e_ident[EI_CLASS] == ELFCLASS64); + e_machine = ehdr.e_machine; + } else { + auto &ehdr = *(ElfEhdr *)buf; + is_64 = (ehdr.e_ident[EI_CLASS] == ELFCLASS64); + e_machine = ehdr.e_machine; + } + + switch (e_machine) { + case EM_386: + return I386::target_name; + case EM_X86_64: + return X86_64::target_name; + case EM_ARM: + return ARM32::target_name; + case EM_AARCH64: + return ARM64::target_name; + case EM_RISCV: + if (is_le) + return is_64 ? RV64LE::target_name : RV32LE::target_name; + return is_64 ? RV64BE::target_name : RV32BE::target_name; + case EM_PPC: + return PPC32::target_name; + case EM_PPC64: + return is_le ? PPC64V2::target_name : PPC64V1::target_name; + case EM_S390X: + return S390X::target_name; + case EM_SPARC64: + return SPARC64::target_name; + case EM_68K: + return M68K::target_name; + case EM_SH: + return SH4::target_name; + case EM_ALPHA: + return ALPHA::target_name; + default: + return ""; + } + }; + + switch (get_file_type(ctx, mf)) { + case FileType::ELF_OBJ: + case FileType::ELF_DSO: + case FileType::GCC_LTO_OBJ: + return get_elf_type(mf->data); + case FileType::AR: + for (MappedFile> *child : read_fat_archive_members(ctx, mf)) + if (get_file_type(ctx, child) == FileType::ELF_OBJ) + return get_elf_type(child->data); + return ""; + case FileType::THIN_AR: + for (MappedFile> *child : read_thin_archive_members(ctx, mf)) + if (get_file_type(ctx, child) == FileType::ELF_OBJ) + return get_elf_type(child->data); + return ""; + case FileType::TEXT: + return get_script_output_type(ctx, mf); + default: + return ""; + } +} + +template +static void +check_file_compatibility(Context &ctx, MappedFile> *mf) { + std::string_view target = get_machine_type(ctx, mf); + if (target != ctx.arg.emulation) + Fatal(ctx) << mf->name << ": incompatible file type: " + << ctx.arg.emulation << " is expected but got " << target; +} + +template +static ObjectFile *new_object_file(Context &ctx, MappedFile> *mf, + std::string archive_name) { + static Counter count("parsed_objs"); + count++; + + check_file_compatibility(ctx, mf); + + bool in_lib = ctx.in_lib || (!archive_name.empty() && !ctx.whole_archive); + ObjectFile *file = ObjectFile::create(ctx, mf, archive_name, in_lib); + file->priority = ctx.file_priority++; + ctx.tg.run([file, &ctx] { file->parse(ctx); }); + if (ctx.arg.trace) + SyncOut(ctx) << "trace: " << *file; + return file; +} + +template +static ObjectFile *new_lto_obj(Context &ctx, MappedFile> *mf, + std::string archive_name) { + static Counter count("parsed_lto_objs"); + count++; + + if (ctx.arg.ignore_ir_file.count(mf->get_identifier())) + return nullptr; + + ObjectFile *file = read_lto_object(ctx, mf); + file->priority = ctx.file_priority++; + file->archive_name = archive_name; + file->is_in_lib = ctx.in_lib || (!archive_name.empty() && !ctx.whole_archive); + file->is_alive = !file->is_in_lib; + ctx.has_lto_object = true; + if (ctx.arg.trace) + SyncOut(ctx) << "trace: " << *file; + return file; +} + +template +static SharedFile * +new_shared_file(Context &ctx, MappedFile> *mf) { + check_file_compatibility(ctx, mf); + + SharedFile *file = SharedFile::create(ctx, mf); + file->priority = ctx.file_priority++; + ctx.tg.run([file, &ctx] { file->parse(ctx); }); + if (ctx.arg.trace) + SyncOut(ctx) << "trace: " << *file; + return file; +} + +template +void read_file(Context &ctx, MappedFile> *mf) { + if (ctx.visited.contains(mf->name)) + return; + + switch (get_file_type(ctx, mf)) { + case FileType::ELF_OBJ: + ctx.objs.push_back(new_object_file(ctx, mf, "")); + return; + case FileType::ELF_DSO: + ctx.dsos.push_back(new_shared_file(ctx, mf)); + ctx.visited.insert(mf->name); + return; + case FileType::AR: + case FileType::THIN_AR: + for (MappedFile> *child : read_archive_members(ctx, mf)) { + switch (get_file_type(ctx, child)) { + case FileType::ELF_OBJ: + ctx.objs.push_back(new_object_file(ctx, child, mf->name)); + break; + case FileType::GCC_LTO_OBJ: + case FileType::LLVM_BITCODE: + if (ObjectFile *file = new_lto_obj(ctx, child, mf->name)) + ctx.objs.push_back(file); + break; + case FileType::ELF_DSO: + Warn(ctx) << mf->name << "(" << child->name + << "): shared object file in an archive is ignored"; + break; + default: + break; + } + } + ctx.visited.insert(mf->name); + return; + case FileType::TEXT: + parse_linker_script(ctx, mf); + return; + case FileType::GCC_LTO_OBJ: + case FileType::LLVM_BITCODE: + if (ObjectFile *file = new_lto_obj(ctx, mf, "")) + ctx.objs.push_back(file); + return; + default: + Fatal(ctx) << mf->name << ": unknown file type"; + } +} + +template +static std::string_view +deduce_machine_type(Context &ctx, std::span args) { + for (std::string_view arg : args) + if (!arg.starts_with('-')) + if (auto *mf = MappedFile>::open(ctx, std::string(arg))) + if (std::string_view target = get_machine_type(ctx, mf); + !target.empty()) + return target; + Fatal(ctx) << "-m option is missing"; +} + +template +MappedFile> *open_library(Context &ctx, std::string path) { + MappedFile> *mf = MappedFile>::open(ctx, path); + if (!mf) + return nullptr; + + std::string_view target = get_machine_type(ctx, mf); + if (target.empty() || target == E::target_name) + return mf; + Warn(ctx) << path << ": skipping incompatible file " << target + << " " << (int)E::e_machine; + return nullptr; +} + +template +MappedFile> *find_library(Context &ctx, std::string name) { + if (name.starts_with(':')) { + for (std::string_view dir : ctx.arg.library_paths) { + std::string path = std::string(dir) + "/" + name.substr(1); + if (MappedFile> *mf = open_library(ctx, path)) + return mf; + } + Fatal(ctx) << "library not found: " << name; + } + + for (std::string_view dir : ctx.arg.library_paths) { + std::string stem = std::string(dir) + "/lib" + name; + if (!ctx.is_static) + if (MappedFile> *mf = open_library(ctx, stem + ".so")) + return mf; + if (MappedFile> *mf = open_library(ctx, stem + ".a")) + return mf; + } + Fatal(ctx) << "library not found: " << name; +} + +template +MappedFile> *find_from_search_paths(Context &ctx, std::string name) { + if (MappedFile> *mf = MappedFile>::open(ctx, name)) + return mf; + + for (std::string_view dir : ctx.arg.library_paths) + if (MappedFile> *mf = + MappedFile>::open(ctx, std::string(dir) + "/" + name)) + return mf; + return nullptr; +} + +template +static void read_input_files(Context &ctx, std::span args) { + Timer t(ctx, "read_input_files"); + + std::vector> state; + ctx.is_static = ctx.arg.is_static; + + while (!args.empty()) { + std::string_view arg = args[0]; + args = args.subspan(1); + + if (arg == "--as-needed") { + ctx.as_needed = true; + } else if (arg == "--no-as-needed") { + ctx.as_needed = false; + } else if (arg == "--whole-archive") { + ctx.whole_archive = true; + } else if (arg == "--no-whole-archive") { + ctx.whole_archive = false; + } else if (arg == "--Bstatic") { + ctx.is_static = true; + } else if (arg == "--Bdynamic") { + ctx.is_static = false; + } else if (arg == "--start-lib") { + ctx.in_lib = true; + } else if (arg == "--end-lib") { + ctx.in_lib = false; + } else if (remove_prefix(arg, "--version-script=")) { + MappedFile> *mf = find_from_search_paths(ctx, std::string(arg)); + if (!mf) + Fatal(ctx) << "--version-script: file not found: " << arg; + parse_version_script(ctx, mf); + } else if (remove_prefix(arg, "--dynamic-list=")) { + MappedFile> *mf = find_from_search_paths(ctx, std::string(arg)); + if (!mf) + Fatal(ctx) << "--dynamic-list: file not found: " << arg; + parse_dynamic_list(ctx, mf); + } else if (remove_prefix(arg, "--export-dynamic-symbol=")) { + if (arg == "*") + ctx.default_version = VER_NDX_GLOBAL; + else + ctx.version_patterns.push_back({arg, "--export-dynamic-symbol", + "global", VER_NDX_GLOBAL, false}); + } else if (remove_prefix(arg, "--export-dynamic-symbol-list=")) { + MappedFile> *mf = find_from_search_paths(ctx, std::string(arg)); + if (!mf) + Fatal(ctx) << "--export-dynamic-symbol-list: file not found: " << arg; + parse_dynamic_list(ctx, mf); + } else if (arg == "--push-state") { + state.push_back({ctx.as_needed, ctx.whole_archive, ctx.is_static, + ctx.in_lib}); + } else if (arg == "--pop-state") { + if (state.empty()) + Fatal(ctx) << "no state pushed before popping"; + std::tie(ctx.as_needed, ctx.whole_archive, ctx.is_static, ctx.in_lib) = + state.back(); + state.pop_back(); + } else if (remove_prefix(arg, "-l")) { + MappedFile> *mf = find_library(ctx, std::string(arg)); + mf->given_fullpath = false; + read_file(ctx, mf); + } else { + read_file(ctx, MappedFile>::must_open(ctx, std::string(arg))); + } + } + + if (ctx.objs.empty()) + Fatal(ctx) << "no input files"; + + ctx.tg.wait(); +} + +// Since elf_main is a template, we can't run it without a type parameter. +// We speculatively run elf_main with X86_64, and if the speculation was +// wrong, re-run it with an actual machine type. +template +static int redo_main(int argc, char **argv, std::string_view target) { + if (target == I386::target_name) + return elf_main(argc, argv); + if (target == ARM64::target_name) + return elf_main(argc, argv); + if (target == ARM32::target_name) + return elf_main(argc, argv); + if (target == RV64LE::target_name) + return elf_main(argc, argv); + if (target == RV64BE::target_name) + return elf_main(argc, argv); + if (target == RV32LE::target_name) + return elf_main(argc, argv); + if (target == RV32BE::target_name) + return elf_main(argc, argv); + if (target == PPC32::target_name) + return elf_main(argc, argv); + if (target == PPC64V1::target_name) + return elf_main(argc, argv); + if (target == PPC64V2::target_name) + return elf_main(argc, argv); + if (target == S390X::target_name) + return elf_main(argc, argv); + if (target == SPARC64::target_name) + return elf_main(argc, argv); + if (target == M68K::target_name) + return elf_main(argc, argv); + if (target == SH4::target_name) + return elf_main(argc, argv); + if (target == ALPHA::target_name) + return elf_main(argc, argv); + unreachable(); +} + +template +int elf_main(int argc, char **argv) { + Context ctx; + + // Process -run option first. process_run_subcommand() does not return. + if (argc >= 2 && (argv[1] == "-run"sv || argv[1] == "--run"sv)) { +#if defined(_WIN32) || defined(__APPLE__) + Fatal(ctx) << "-run is supported only on Unix"; +#endif + process_run_subcommand(ctx, argc, argv); + } + + // Parse non-positional command line options + ctx.cmdline_args = expand_response_files(ctx, argv); + std::vector file_args = parse_nonpositional_args(ctx); + + // If no -m option is given, deduce it from input files. + if (ctx.arg.emulation.empty()) + ctx.arg.emulation = deduce_machine_type(ctx, file_args); + + // Redo if -m is not x86-64. + if constexpr (is_x86_64) + if (ctx.arg.emulation != X86_64::target_name) + return redo_main(argc, argv, ctx.arg.emulation); + + Timer t_all(ctx, "all"); + + install_signal_handler(); + + if (!ctx.arg.directory.empty()) + if (chdir(ctx.arg.directory.c_str()) == -1) + Fatal(ctx) << "chdir failed: " << ctx.arg.directory + << ": " << errno_string(); + + // Fork a subprocess unless --no-fork is given. + std::function on_complete; + +#if !defined(_WIN32) && !defined(__APPLE__) + if (ctx.arg.fork) + on_complete = fork_child(); +#endif + + acquire_global_lock(ctx); + + tbb::global_control tbb_cont(tbb::global_control::max_allowed_parallelism, + ctx.arg.thread_count); + + // Handle --wrap options if any. + for (std::string_view name : ctx.arg.wrap) + get_symbol(ctx, name)->is_wrapped = true; + + // Handle --retain-symbols-file options if any. + if (ctx.arg.retain_symbols_file) + for (std::string_view name : *ctx.arg.retain_symbols_file) + get_symbol(ctx, name)->write_to_symtab = true; + + for (std::string_view arg : ctx.arg.trace_symbol) + get_symbol(ctx, arg)->is_traced = true; + + // Parse input files + read_input_files(ctx, file_args); + + // Uniquify shared object files by soname + { + std::unordered_set seen; + std::erase_if(ctx.dsos, [&](SharedFile *file) { + return !seen.insert(file->soname).second; + }); + } + + Timer t_total(ctx, "total"); + Timer t_before_copy(ctx, "before_copy"); + + // Apply -exclude-libs + apply_exclude_libs(ctx); + + // Create a dummy file containing linker-synthesized symbols. + if (!ctx.arg.relocatable) + create_internal_file(ctx); + + // resolve_symbols is 4 things in 1 phase: + // + // - Determine the set of object files to extract from archives. + // - Remove redundant COMDAT sections (e.g. duplicate inline functions). + // - Finally, the actual symbol resolution. + // - LTO, which requires preliminary symbol resolution before running + // and a follow-up re-resolution after the LTO objects are emitted. + // + // These passes have complex interactions, and unfortunately has to be + // put together in a single phase. + resolve_symbols(ctx); + + // "Kill" .eh_frame input sections after symbol resolution. + kill_eh_frame_sections(ctx); + + // Resolve mergeable section pieces to merge them. + resolve_section_pieces(ctx); + + // Handle --relocatable. Since the linker's behavior is quite different + // from the normal one when the option is given, the logic is implemented + // to a separate file. + if (ctx.arg.relocatable) { + combine_objects(ctx); + return 0; + } + + // Create .bss sections for common symbols. + convert_common_symbols(ctx); + + // Apply version scripts. + apply_version_script(ctx); + + // Parse symbol version suffixes (e.g. "foo@ver1"). + parse_symbol_version(ctx); + + // Set is_imported and is_exported bits for each symbol. + compute_import_export(ctx); + + // Read address-significant section information. + if (ctx.arg.icf && !ctx.arg.icf_all) + mark_addrsig(ctx); + + // Garbage-collect unreachable sections. + if (ctx.arg.gc_sections) + gc_sections(ctx); + + // Merge identical read-only sections. + if (ctx.arg.icf) + icf_sections(ctx); + + // Compute sizes of sections containing mergeable strings. + compute_merged_section_sizes(ctx); + + // Create linker-synthesized sections such as .got or .plt. + create_synthetic_sections(ctx); + + // Make sure that there's no duplicate symbol + if (!ctx.arg.allow_multiple_definition) + check_duplicate_symbols(ctx); + + // Warn if symbols with different types are defined under the same name. + check_symbol_types(ctx); + + if constexpr (is_ppc64v1) + ppc64v1_rewrite_opd(ctx); + + // Bin input sections into output sections. + create_output_sections(ctx); + + // Add synthetic symbols such as __ehdr_start or __end. + add_synthetic_symbols(ctx); + + // Beyond this point, no new files will be added to ctx.objs + // or ctx.dsos. + + // Handle `-z cet-report`. + if (ctx.arg.z_cet_report != CET_REPORT_NONE) + check_cet_errors(ctx); + + // Handle `-z execstack-if-needed`. + if (ctx.arg.z_execstack_if_needed) + for (ObjectFile *file : ctx.objs) + if (file->needs_executable_stack) + ctx.arg.z_execstack = true; + + // If we are linking a .so file, remaining undefined symbols does + // not cause a linker error. Instead, they are treated as if they + // were imported symbols. + // + // If we are linking an executable, weak undefs are converted to + // weakly imported symbols so that they'll have another chance to be + // resolved. + claim_unresolved_symbols(ctx); + + // Beyond this point, no new symbols will be added to the result. + + // Handle --print-dependencies + if (ctx.arg.print_dependencies) + print_dependencies(ctx); + + // Handle -repro + if (ctx.arg.repro) + write_repro_file(ctx); + + // Handle --require-defined + for (std::string_view name : ctx.arg.require_defined) + if (!get_symbol(ctx, name)->file) + Error(ctx) << "--require-defined: undefined symbol: " << name; + + // .init_array and .fini_array contents have to be sorted by + // a special rule. Sort them. + sort_init_fini(ctx); + + // Likewise, .ctors and .dtors have to be sorted. They are rare + // because they are superceded by .init_array/.fini_array, though. + sort_ctor_dtor(ctx); + + // Handle --shuffle-sections + if (ctx.arg.shuffle_sections != SHUFFLE_SECTIONS_NONE) + shuffle_sections(ctx); + + // Copy string referred by .dynamic to .dynstr. + for (SharedFile *file : ctx.dsos) + ctx.dynstr->add_string(file->soname); + for (std::string_view str : ctx.arg.auxiliary) + ctx.dynstr->add_string(str); + for (std::string_view str : ctx.arg.filter) + ctx.dynstr->add_string(str); + if (!ctx.arg.rpaths.empty()) + ctx.dynstr->add_string(ctx.arg.rpaths); + if (!ctx.arg.soname.empty()) + ctx.dynstr->add_string(ctx.arg.soname); + + if constexpr (is_ppc64v1) + ppc64v1_scan_symbols(ctx); + + // Scan relocations to find symbols that need entries in .got, .plt, + // .got.plt, .dynsym, .dynstr, etc. + scan_relocations(ctx); + + // Compute sizes of output sections while assigning offsets + // within an output section to input sections. + compute_section_sizes(ctx); + + // Sort sections by section attributes so that we'll have to + // create as few segments as possible. + sort_output_sections(ctx); + + // If --packed_dyn_relocs=relr was given, base relocations are stored + // to a .relr.dyn section in a compressed form. Construct a compressed + // relocations now so that we can fix section sizes and file layout. + if (ctx.arg.pack_dyn_relocs_relr) + construct_relr(ctx); + + // Reserve a space for dynamic symbol strings in .dynstr and sort + // .dynsym contents if necessary. Beyond this point, no symbol will + // be added to .dynsym. + ctx.dynsym->finalize(ctx); + + // Print reports about undefined symbols, if needed. + if (ctx.arg.unresolved_symbols == UNRESOLVED_ERROR) + report_undef_errors(ctx); + + // Fill .gnu.version_d section contents. + if (ctx.verdef) + ctx.verdef->construct(ctx); + + // Fill .gnu.version_r section contents. + ctx.verneed->construct(ctx); + + // Compute .symtab and .strtab sizes for each file. + create_output_symtab(ctx); + + // .eh_frame is a special section from the linker's point of view, + // as its contents are parsed and reconstructed by the linker, + // unlike other sections that are regarded as opaque bytes. + // Here, we construct output .eh_frame contents. + ctx.eh_frame->construct(ctx); + + // Handle --gdb-index. + if (ctx.arg.gdb_index) + ctx.gdb_index->construct(ctx); + + // If --emit-relocs is given, we'll copy relocation sections from input + // files to an output file. + if (ctx.arg.emit_relocs) + create_reloc_sections(ctx); + + // Compute the section header values for all sections. + compute_section_headers(ctx); + + // Assign offsets to output sections + i64 filesize = set_osec_offsets(ctx); + + // On RISC-V, branches are encode using multiple instructions so + // that they can jump to anywhere in ±2 GiB by default. They may + // be replaced with shorter instruction sequences if destinations + // are close enough. Do this optimization. + if constexpr (is_riscv) + filesize = riscv_resize_sections(ctx); + + // At this point, memory layout is fixed. + + // Set actual addresses to linker-synthesized symbols. + fix_synthetic_symbols(ctx); + + // Beyond this, you can assume that symbol addresses including their + // GOT or PLT addresses have a correct final value. + + // If --compress-debug-sections is given, compress .debug_* sections + // using zlib. + if (ctx.arg.compress_debug_sections != COMPRESS_NONE) + filesize = compress_debug_sections(ctx); + + // At this point, both memory and file layouts are fixed. + + t_before_copy.stop(); + + // Create an output file + ctx.output_file = + OutputFile>::open(ctx, ctx.arg.output, filesize, 0777); + ctx.buf = ctx.output_file->buf; + + Timer t_copy(ctx, "copy"); + + // Copy input sections to the output file and apply relocations. + copy_chunks(ctx); + + // Some part of .gdb_index couldn't be computed until other debug + // sections are complete. We have complete debug sections now, so + // write the rest of .gdb_index. + if (ctx.gdb_index) + ctx.gdb_index->write_address_areas(ctx); + + // Dynamic linker works better with sorted .rela.dyn section, + // so we sort them. + ctx.reldyn->sort(ctx); + + // Zero-clear paddings between sections + clear_padding(ctx); + + // .note.gnu.build-id section contains a cryptographic hash of the + // entire output file. Now that we wrote everything except build-id, + // we can compute it. + if (ctx.buildid) + ctx.buildid->write_buildid(ctx); + + t_copy.stop(); + ctx.checkpoint(); + + // Close the output file. This is the end of the linker's main job. + ctx.output_file->close(ctx); + + // Handle --dependency-file + if (!ctx.arg.dependency_file.empty()) + write_dependency_file(ctx); + + if (ctx.has_lto_object) + lto_cleanup(ctx); + + t_total.stop(); + t_all.stop(); + + if (ctx.arg.print_map) + print_map(ctx); + + // Show stats numbers + if (ctx.arg.stats) + show_stats(ctx); + + if (ctx.arg.perf) + print_timer_records(ctx.timer_records); + + std::cout << std::flush; + std::cerr << std::flush; + if (on_complete) + on_complete(); + + release_global_lock(ctx); + + if (ctx.arg.quick_exit) + _exit(0); + + for (std::function &fn : ctx.on_exit) + fn(); + ctx.checkpoint(); + return 0; +} + +using E = MOLD_TARGET; + +template void read_file(Context &, MappedFile> *); +template MappedFile> *open_library(Context &, std::string); + +#ifdef MOLD_X86_64 + +extern template int elf_main(int, char **); +extern template int elf_main(int, char **); +extern template int elf_main(int, char **); +extern template int elf_main(int, char **); +extern template int elf_main(int, char **); +extern template int elf_main(int, char **); +extern template int elf_main(int, char **); +extern template int elf_main(int, char **); +extern template int elf_main(int, char **); +extern template int elf_main(int, char **); +extern template int elf_main(int, char **); +extern template int elf_main(int, char **); +extern template int elf_main(int, char **); +extern template int elf_main(int, char **); +extern template int elf_main(int, char **); + +int main(int argc, char **argv) { + return elf_main(argc, argv); +} + +#else + +template int elf_main(int, char **); + +#endif + +} // namespace mold::elf diff --git a/third_party/mold/elf/mapfile.cc b/third_party/mold/elf/mapfile.cc new file mode 100644 index 00000000000..79c0bfe0f7e --- /dev/null +++ b/third_party/mold/elf/mapfile.cc @@ -0,0 +1,117 @@ +// clang-format off +#include "third_party/mold/elf/mold.h" + +#include "third_party/libcxx/fstream" +#include "third_party/libcxx/iomanip" +#include "third_party/libcxx/ios" +#include "third_party/libcxx/sstream" +// MISSING #include +#include "third_party/libcxx/unordered_map" + +namespace mold::elf { + +template +using Map = + tbb::concurrent_hash_map *, std::vector *>>; + +template +static std::unique_ptr open_output_file(Context &ctx) { + std::unique_ptr file(new std::ofstream); + file->open(ctx.arg.Map.c_str()); + if (!file->is_open()) + Fatal(ctx) << "cannot open " << ctx.arg.Map << ": " << errno_string(); + return file; +} + +template +static Map get_map(Context &ctx) { + Map map; + + tbb::parallel_for_each(ctx.objs, [&](ObjectFile *file) { + for (Symbol *sym : file->symbols) { + if (sym->file != file || sym->get_type() == STT_SECTION) + continue; + + if (InputSection *isec = sym->get_input_section()) { + assert(file == &isec->file); + typename Map::accessor acc; + map.insert(acc, {isec, {}}); + acc->second.push_back(sym); + } + } + }); + + if (map.size() <= 1) + return map; + + tbb::parallel_for(map.range(), [](const typename Map::range_type &range) { + for (auto it = range.begin(); it != range.end(); it++) { + std::vector *> &vec = it->second; + sort(vec, [](Symbol *a, Symbol *b) { return a->value < b->value; }); + } + }); + return map; +} + +template +void print_map(Context &ctx) { + std::ostream *out = &std::cout; + std::unique_ptr file; + + if (!ctx.arg.Map.empty()) { + file = open_output_file(ctx); + out = file.get(); + } + + // Construct a section-to-symbol map. + Map map = get_map(ctx); + + // Print a mapfile. + *out << " VMA Size Align Out In Symbol\n"; + + for (Chunk *osec : ctx.chunks) { + *out << std::showbase + << std::setw(18) << std::hex << (u64)osec->shdr.sh_addr << std::dec + << std::setw(11) << (u64)osec->shdr.sh_size + << std::setw(6) << (u64)osec->shdr.sh_addralign + << " " << osec->name << "\n"; + + if (osec->kind() != OUTPUT_SECTION) + continue; + + std::span *> members = ((OutputSection *)osec)->members; + std::vector bufs(members.size()); + + tbb::parallel_for((i64)0, (i64)members.size(), [&](i64 i) { + InputSection *mem = members[i]; + std::ostringstream ss; + opt_demangle = ctx.arg.demangle; + u64 addr = osec->shdr.sh_addr + mem->offset; + + ss << std::showbase + << std::setw(18) << std::hex << addr << std::dec + << std::setw(11) << (u64)mem->sh_size + << std::setw(6) << (1 << (u64)mem->p2align) + << " " << *mem << "\n"; + + typename Map::const_accessor acc; + if (map.find(acc, mem)) + for (Symbol *sym : acc->second) + ss << std::showbase + << std::setw(18) << std::hex << sym->get_addr(ctx) << std::dec + << " 0 0 " + << *sym << "\n"; + + bufs[i] = ss.str(); + }); + + for (std::string &str : bufs) + *out << str; + } +} + +using E = MOLD_TARGET; + +template void print_map(Context &ctx); + +} // namespace mold::elf diff --git a/third_party/mold/elf/mold-wrapper.c b/third_party/mold/elf/mold-wrapper.c new file mode 100644 index 00000000000..db3df53063b --- /dev/null +++ b/third_party/mold/elf/mold-wrapper.c @@ -0,0 +1,171 @@ +// clang-format off +#define _GNU_SOURCE 1 + +#if !defined(__OpenBSD__) && !defined(__FreeBSD__) +#include "libc/mem/alloca.h" +#endif +#include "libc/runtime/dlfcn.h" +#include "libc/calls/weirdtypes.h" +#include "libc/stdio/posix_spawn.h" + + +#include "libc/calls/calls.h" +#include "libc/calls/weirdtypes.h" +#include "libc/fmt/fmt.h" +#include "libc/stdio/dprintf.h" +#include "libc/stdio/stdio.h" +#include "libc/stdio/temp.h" +#include "third_party/musl/tempnam.h" +#include "libc/calls/calls.h" +#include "libc/calls/termios.h" +#include "libc/fmt/conv.h" +#include "libc/limits.h" +#include "libc/mem/alg.h" +#include "libc/mem/alloca.h" +#include "libc/mem/mem.h" +#include "libc/runtime/runtime.h" +#include "libc/stdio/dprintf.h" +#include "libc/stdio/rand.h" +#include "libc/stdio/temp.h" +#include "libc/str/str.h" +#include "libc/sysv/consts/exit.h" +#include "third_party/getopt/getopt.h" +#include "third_party/musl/crypt.h" +#include "third_party/musl/rand48.h" +#include "libc/mem/alg.h" +#include "libc/mem/mem.h" +#include "libc/str/str.h" +#include "libc/calls/calls.h" +#include "libc/calls/weirdtypes.h" +#include "libc/runtime/pathconf.h" +#include "libc/runtime/runtime.h" +#include "libc/runtime/sysconf.h" +#include "libc/sysv/consts/f.h" +#include "libc/sysv/consts/fileno.h" +#include "libc/sysv/consts/o.h" +#include "libc/sysv/consts/ok.h" +#include "libc/time/time.h" +#include "third_party/getopt/getopt.h" +#include "third_party/musl/crypt.h" +#include "third_party/musl/lockf.h" + +extern char **environ; + +static char *get_mold_path() { + char *path = getenv("MOLD_PATH"); + if (path) + return path; + fprintf(stderr, "MOLD_PATH is not set\n"); + exit(1); +} + +static void debug_print(const char *fmt, ...) { + if (!getenv("MOLD_WRAPPER_DEBUG")) + return; + + va_list ap; + va_start(ap, fmt); + fprintf(stderr, "mold-wrapper.so: "); + vfprintf(stderr, fmt, ap); + fflush(stderr); + va_end(ap); +} + +static int count_args(va_list *ap) { + va_list aq; + va_copy(aq, *ap); + + int i = 0; + while (va_arg(aq, char *)) + i++; + va_end(aq); + return i; +} + +static void copy_args(char **argv, const char *arg0, va_list *ap) { + int i = 1; + char *arg; + while ((arg = va_arg(*ap, char *))) + argv[i++] = arg; + + ((const char **)argv)[0] = arg0; + ((const char **)argv)[i] = NULL; +} + +static bool is_ld(const char *path) { + const char *ptr = path + strlen(path); + while (path < ptr && ptr[-1] != '/') + ptr--; + + return !strcmp(ptr, "ld") || !strcmp(ptr, "ld.lld") || + !strcmp(ptr, "ld.gold") || !strcmp(ptr, "ld.bfd") || + !strcmp(ptr, "ld.mold"); +} + +int execvpe(const char *file, char *const *argv, char *const *envp) { + debug_print("execvpe %s\n", file); + + if (!strcmp(file, "ld") || is_ld(file)) + file = get_mold_path(); + + for (int i = 0; envp[i]; i++) + putenv(envp[i]); + + typeof(execvpe) *real = dlsym(RTLD_NEXT, "execvp"); + return real(file, argv, environ); +} + +int execve(const char *path, char *const *argv, char *const *envp) { + debug_print("execve %s\n", path); + if (is_ld(path)) + path = get_mold_path(); + typeof(execve) *real = dlsym(RTLD_NEXT, "execve"); + return real(path, argv, envp); +} + +int execl(const char *path, const char *arg0, ...) { + va_list ap; + va_start(ap, arg0); + char **argv = alloca((count_args(&ap) + 2) * sizeof(char *)); + copy_args(argv, arg0, &ap); + va_end(ap); + return execve(path, argv, environ); +} + +int execlp(const char *file, const char *arg0, ...) { + va_list ap; + va_start(ap, arg0); + char **argv = alloca((count_args(&ap) + 2) * sizeof(char *)); + copy_args(argv, arg0, &ap); + va_end(ap); + return execvpe(file, argv, environ); +} + +int execle(const char *path, const char *arg0, ...) { + va_list ap; + va_start(ap, arg0); + char **argv = alloca((count_args(&ap) + 2) * sizeof(char *)); + copy_args(argv, arg0, &ap); + char **env = va_arg(ap, char **); + va_end(ap); + return execve(path, argv, env); +} + +int execv(const char *path, char *const *argv) { + return execve(path, argv, environ); +} + +int execvp(const char *file, char *const *argv) { + return execvpe(file, argv, environ); +} + +int posix_spawn(pid_t *pid, const char *path, + const posix_spawn_file_actions_t *file_actions, + const posix_spawnattr_t *attrp, + char *const *argv, char *const *envp) { + debug_print("posix_spawn %s\n", path); + if (is_ld(path)) + path = get_mold_path(); + typeof(posix_spawn) *real = dlsym(RTLD_NEXT, "posix_spawn"); + return real(pid, path, file_actions, attrp, argv, envp); +} diff --git a/third_party/mold/elf/mold.h b/third_party/mold/elf/mold.h new file mode 100644 index 00000000000..07ee9dc315d --- /dev/null +++ b/third_party/mold/elf/mold.h @@ -0,0 +1,2852 @@ +// clang-format off +#pragma once + +#include "third_party/mold/elf/elf.h" +// MISSING #include "../common/common.h" + +#include "third_party/libcxx/atomic" +#include "third_party/libcxx/bitset" +#include "third_party/libcxx/cassert" +#include "third_party/libcxx/cstdint" +#include "third_party/libcxx/fstream" +#include "third_party/libcxx/functional" +#include "third_party/libcxx/iostream" +#include "third_party/libcxx/map" +#include "third_party/libcxx/memory" +#include "third_party/libcxx/mutex" +#include "third_party/libcxx/optional" +// MISSING #include +#include "third_party/libcxx/sstream" +#include "third_party/libcxx/string" +#include "third_party/libcxx/string_view" +// MISSING #include +// MISSING #include +// MISSING #include +// MISSING #include +// MISSING #include +// MISSING #include +#include "third_party/libcxx/type_traits" +#include "third_party/libcxx/unordered_map" +#include "third_party/libcxx/unordered_set" +#include "third_party/libcxx/variant" +#include "third_party/libcxx/vector" + +#ifndef _WIN32 +#include "libc/calls/calls.h" +#include "libc/calls/weirdtypes.h" +#include "libc/runtime/pathconf.h" +#include "libc/runtime/runtime.h" +#include "libc/runtime/sysconf.h" +#include "libc/sysv/consts/f.h" +#include "libc/sysv/consts/fileno.h" +#include "libc/sysv/consts/o.h" +#include "libc/sysv/consts/ok.h" +#include "libc/time/time.h" +#include "third_party/getopt/getopt.h" +#include "third_party/musl/crypt.h" +#include "third_party/musl/lockf.h" +#endif + +namespace mold::elf { + +static constexpr i32 SHA256_SIZE = 32; + +template class InputFile; +template class InputSection; +template class MergedSection; +template class ObjectFile; +template class Chunk; +template class OutputSection; +template class SharedFile; +template class Symbol; +template struct CieRecord; +template struct Context; +template struct FdeRecord; +template class RelocSection; + +template +std::ostream &operator<<(std::ostream &out, const Symbol &sym); + +// +// Mergeable section fragments +// + +template +struct SectionFragment { + SectionFragment(MergedSection *sec, bool is_alive) + : output_section(*sec), is_alive(is_alive) {} + + u64 get_addr(Context &ctx) const; + + MergedSection &output_section; + u32 offset = -1; + Atomic p2align = 0; + Atomic is_alive = false; +}; + +// Additional class members for dynamic symbols. Because most symbols +// don't need them and we allocate tens of millions of symbol objects +// for large programs, we separate them from `Symbol` class to save +// memory. +template +struct SymbolAux { + i32 got_idx = -1; + i32 gottp_idx = -1; + i32 tlsgd_idx = -1; + i32 tlsdesc_idx = -1; + i32 plt_idx = -1; + i32 pltgot_idx = -1; + i32 dynsym_idx = -1; + u32 djb_hash = 0; +}; + +template <> +struct SymbolAux : SymbolAux { + i32 opd_idx = -1; +}; + +// +// thunks.cc +// + +template +class RangeExtensionThunk {}; + +template requires needs_thunk +class RangeExtensionThunk { +public: + RangeExtensionThunk(OutputSection &osec, i64 thunk_idx, i64 offset) + : output_section(osec), thunk_idx(thunk_idx), offset(offset) {} + + i64 size() const { return E::thunk_hdr_size + symbols.size() * E::thunk_size; } + void copy_buf(Context &ctx); + + u64 get_addr(i64 idx) const { + return output_section.shdr.sh_addr + offset + E::thunk_hdr_size + + idx * E::thunk_size; + } + + static constexpr i64 alignment = 4; + + OutputSection &output_section; + i64 thunk_idx; + i64 offset; + std::mutex mu; + std::vector *> symbols; +}; + +struct RangeExtensionRef { + i16 thunk_idx = -1; + i16 sym_idx = -1; +}; + +template +void create_range_extension_thunks(Context &ctx, OutputSection &osec); + +// +// input-sections.cc +// + +// .eh_frame section contains CIE and FDE records to teach the runtime +// how to handle exceptions. Usually, a .eh_frame contains one CIE +// followed by as many FDEs as the number of functions defined by the +// file. CIE contains common information for FDEs (it is actually +// short for Common Information Entry). FDE contains the start address +// of a function and its length as well as how to handle exceptions +// for that function. +// +// Unlike other sections, the linker has to parse .eh_frame for optimal +// output for the following reasons: +// +// - Compilers tend to emit the same CIE as long as the programming +// language is the same, so CIEs in input object files are almost +// always identical. We want to merge them to make a resulting +// .eh_frame smaller. +// +// - If we eliminate a function (e.g. when we see two object files +// containing the duplicate definition of an inlined function), we +// want to also eliminate a corresponding FDE so that a resulting +// .eh_frame doesn't contain a dead FDE entry. +// +// - If we need to compare two function definitions for equality for +// ICF, we need to compare not only the function body but also its +// exception handlers. +// +// Note that we assume that the first relocation entry for an FDE +// always points to the function that the FDE is associated to. +template +struct CieRecord { + CieRecord(Context &ctx, ObjectFile &file, InputSection &isec, + u32 input_offset, std::span> rels, u32 rel_idx) + : file(file), input_section(isec), input_offset(input_offset), + rel_idx(rel_idx), rels(rels), contents(file.get_string(ctx, isec.shdr())) {} + + i64 size() const { + return *(U32 *)(contents.data() + input_offset) + 4; + } + + std::string_view get_contents() const { + return contents.substr(input_offset, size()); + } + + std::span> get_rels() const { + i64 end = rel_idx; + while (end < rels.size() && rels[end].r_offset < input_offset + size()) + end++; + return rels.subspan(rel_idx, end - rel_idx); + } + + bool equals(const CieRecord &other) const; + + ObjectFile &file; + InputSection &input_section; + u32 input_offset = -1; + u32 output_offset = -1; + u32 rel_idx = -1; + u32 icf_idx = -1; + bool is_leader = false; + std::span> rels; + std::string_view contents; +}; + +template +struct FdeRecord { + FdeRecord(u32 input_offset, u32 rel_idx) + : input_offset(input_offset), rel_idx(rel_idx) {} + + i64 size(ObjectFile &file) const; + std::string_view get_contents(ObjectFile &file) const; + std::span> get_rels(ObjectFile &file) const; + + u32 input_offset = -1; + u32 output_offset = -1; + u32 rel_idx = -1; + u16 cie_idx = -1; + Atomic is_alive = true; +}; + +// A struct to hold target-dependent input section members. +template +struct InputSectionExtras {}; + +template requires needs_thunk +struct InputSectionExtras { + std::vector range_extn; +}; + +template requires is_riscv +struct InputSectionExtras { + std::vector r_deltas; +}; + +// InputSection represents a section in an input object file. +template +class InputSection { +public: + InputSection(Context &ctx, ObjectFile &file, std::string_view name, + i64 shndx); + + void uncompress(Context &ctx); + void uncompress_to(Context &ctx, u8 *buf); + void scan_relocations(Context &ctx); + void write_to(Context &ctx, u8 *buf); + void apply_reloc_alloc(Context &ctx, u8 *base); + void apply_reloc_nonalloc(Context &ctx, u8 *base); + void kill(); + + std::string_view name() const; + i64 get_priority() const; + u64 get_addr() const; + const ElfShdr &shdr() const; + std::span> get_rels(Context &ctx) const; + std::span> get_fdes() const; + std::string_view get_func_name(Context &ctx, i64 offset) const; + bool is_relr_reloc(Context &ctx, const ElfRel &rel) const; + bool is_killed_by_icf() const; + + bool record_undef_error(Context &ctx, const ElfRel &rel); + + ObjectFile &file; + OutputSection *output_section = nullptr; + u64 sh_size = -1; + + std::string_view contents; + + [[no_unique_address]] InputSectionExtras extra; + + i32 fde_begin = -1; + i32 fde_end = -1; + + u32 offset = -1; + u32 shndx = -1; + u32 relsec_idx = -1; + u32 reldyn_offset = 0; + + // For COMDAT de-duplication and garbage collection + std::atomic_bool is_alive = true; + u8 p2align = 0; + + bool address_significant : 1 = false; + bool uncompressed : 1 = false; + + // For garbage collection + Atomic is_visited = false; + + // For ICF + // + // `leader` is the section that this section has been merged with. + // Three kind of values are possible: + // - `leader == nullptr`: This section was not eligible for ICF. + // - `leader == this`: This section was retained. + // - `leader != this`: This section was merged with another identical section. + InputSection *leader = nullptr; + u32 icf_idx = -1; + bool icf_eligible = false; + bool icf_leaf = false; + +private: + void scan_pcrel(Context &ctx, Symbol &sym, const ElfRel &rel); + void scan_absrel(Context &ctx, Symbol &sym, const ElfRel &rel); + void scan_dyn_absrel(Context &ctx, Symbol &sym, const ElfRel &rel); + void scan_toc_rel(Context &ctx, Symbol &sym, const ElfRel &rel); + + void check_tlsle(Context &ctx, Symbol &sym, const ElfRel &rel); + + void apply_dyn_absrel(Context &ctx, Symbol &sym, const ElfRel &rel, + u8 *loc, u64 S, i64 A, u64 P, ElfRel *&dynrel); + + void apply_toc_rel(Context &ctx, Symbol &sym, const ElfRel &rel, + u8 *loc, u64 S, i64 A, u64 P, ElfRel *&dynrel); + + void copy_contents_riscv(Context &ctx, u8 *buf); + + std::pair *, i64> + get_fragment(Context &ctx, const ElfRel &rel); + + u64 get_thunk_addr(i64 idx); + + std::optional get_tombstone(Symbol &sym, SectionFragment *frag); +}; + +// +// tls.cc +// + +template u64 get_tls_begin(Context &); +template u64 get_tp_addr(Context &); +template u64 get_dtp_addr(Context &); + +// +// output-chunks.cc +// + +template +u64 get_eflags(Context &ctx); + +template +i64 to_phdr_flags(Context &ctx, Chunk *chunk); + +template +bool is_relro(Context &ctx, Chunk *chunk); + +template +std::string_view get_output_name(Context &ctx, std::string_view name, u64 flags); + +template +void write_plt_header(Context &ctx, u8 *buf); + +template +void write_plt_entry(Context &ctx, u8 *buf, Symbol &sym); + +template +void write_pltgot_entry(Context &ctx, u8 *buf, Symbol &sym); + +typedef enum { HEADER, OUTPUT_SECTION, SYNTHETIC } ChunkKind; + +// Chunk represents a contiguous region in an output file. +template +class Chunk { +public: + virtual ~Chunk() = default; + virtual ChunkKind kind() { return SYNTHETIC; } + virtual OutputSection *to_osec() { return nullptr; } + virtual i64 get_reldyn_size(Context &ctx) const { return 0; } + virtual void copy_buf(Context &ctx) {} + virtual void write_to(Context &ctx, u8 *buf) { unreachable(); } + virtual void update_shdr(Context &ctx) {} + + // For --gdb-index + virtual u8 *get_uncompressed_data() { return nullptr; } + + std::string_view name; + ElfShdr shdr = { .sh_addralign = 1 }; + i64 shndx = 0; + + // Some synethetic sections add local symbols to the output. + // For example, range extension thunks adds function_name@thunk + // symbol for each thunk entry. The following members are used + // for such synthesizing symbols. + virtual void compute_symtab_size(Context &ctx) {}; + virtual void populate_symtab(Context &ctx) {}; + + i64 local_symtab_idx = 0; + i64 num_local_symtab = 0; + i64 strtab_size = 0; + i64 strtab_offset = 0; + + // Offset in .rel.dyn + i64 reldyn_offset = 0; + + // For --section-order + i64 sect_order = 0; +}; + +// ELF header +template +class OutputEhdr : public Chunk { +public: + OutputEhdr(u32 sh_flags) { + this->name = "EHDR"; + this->shdr.sh_flags = sh_flags; + this->shdr.sh_size = sizeof(ElfEhdr); + this->shdr.sh_addralign = sizeof(Word); + } + + ChunkKind kind() override { return HEADER; } + void copy_buf(Context &ctx) override; +}; + +// Section header +template +class OutputShdr : public Chunk { +public: + OutputShdr() { + this->name = "SHDR"; + this->shdr.sh_size = 1; + this->shdr.sh_addralign = sizeof(Word); + } + + ChunkKind kind() override { return HEADER; } + void copy_buf(Context &ctx) override; +}; + +// Program header +template +class OutputPhdr : public Chunk { +public: + OutputPhdr(u32 sh_flags) { + this->name = "PHDR"; + this->shdr.sh_flags = sh_flags; + this->shdr.sh_addralign = sizeof(Word); + } + + ChunkKind kind() override { return HEADER; } + void update_shdr(Context &ctx) override; + void copy_buf(Context &ctx) override; + + std::vector> phdrs; +}; + +template +class InterpSection : public Chunk { +public: + InterpSection() { + this->name = ".interp"; + this->shdr.sh_type = SHT_PROGBITS; + this->shdr.sh_flags = SHF_ALLOC; + } + + void update_shdr(Context &ctx) override; + void copy_buf(Context &ctx) override; +}; + +// Sections +template +class OutputSection : public Chunk { +public: + OutputSection(std::string_view name, u32 type, u64 flags) { + this->name = name; + this->shdr.sh_type = type; + this->shdr.sh_flags = flags; + } + + ChunkKind kind() override { return OUTPUT_SECTION; } + OutputSection *to_osec() override { return this; } + void copy_buf(Context &ctx) override; + void write_to(Context &ctx, u8 *buf) override; + + void compute_symtab_size(Context &ctx) override; + void populate_symtab(Context &ctx) override; + + std::vector *> members; + + void construct_relr(Context &ctx); + std::vector relr; + + std::vector>> thunks; + std::unique_ptr> reloc_sec; +}; + +template +class GotSection : public Chunk { +public: + GotSection() { + this->name = ".got"; + this->shdr.sh_type = SHT_PROGBITS; + this->shdr.sh_flags = SHF_ALLOC | SHF_WRITE; + this->shdr.sh_addralign = sizeof(Word); + + // We always create a .got so that _GLOBAL_OFFSET_TABLE_ has + // something to point to. s390x psABI defines GOT[1] as a + // reserved slot, so we allocate one more on s390x. + this->shdr.sh_size = (is_s390x ? 2 : 1) * sizeof(Word); + } + + void add_got_symbol(Context &ctx, Symbol *sym); + void add_gottp_symbol(Context &ctx, Symbol *sym); + void add_tlsgd_symbol(Context &ctx, Symbol *sym); + void add_tlsdesc_symbol(Context &ctx, Symbol *sym); + void add_tlsld(Context &ctx); + + u64 get_tlsld_addr(Context &ctx) const; + bool has_tlsld(Context &ctx) const { return tlsld_idx != -1; } + i64 get_reldyn_size(Context &ctx) const override; + void copy_buf(Context &ctx) override; + + void compute_symtab_size(Context &ctx) override; + void populate_symtab(Context &ctx) override; + + std::vector *> got_syms; + std::vector *> gottp_syms; + std::vector *> tlsgd_syms; + std::vector *> tlsdesc_syms; + u32 tlsld_idx = -1; + + void construct_relr(Context &ctx); + std::vector relr; +}; + +template +class GotPltSection : public Chunk { +public: + GotPltSection() { + this->name = ".got.plt"; + this->shdr.sh_type = is_ppc64 ? SHT_NOBITS : SHT_PROGBITS; + this->shdr.sh_flags = SHF_ALLOC | SHF_WRITE; + this->shdr.sh_addralign = sizeof(Word); + this->shdr.sh_size = HDR_SIZE; + } + + void update_shdr(Context &ctx) override; + void copy_buf(Context &ctx) override; + + static constexpr i64 HDR_SIZE = (is_ppc64v2 ? 2 : 3) * sizeof(Word); + static constexpr i64 ENTRY_SIZE = (is_ppc64v1 ? 3 : 1) * sizeof(Word); +}; + +template +class PltSection : public Chunk { +public: + PltSection() { + this->name = ".plt"; + this->shdr.sh_type = SHT_PROGBITS; + + if constexpr (is_sparc) { + this->shdr.sh_flags = SHF_ALLOC | SHF_EXECINSTR | SHF_WRITE; + this->shdr.sh_addralign = 256; + } else { + this->shdr.sh_flags = SHF_ALLOC | SHF_EXECINSTR; + this->shdr.sh_addralign = 16; + } + } + + void add_symbol(Context &ctx, Symbol *sym); + void update_shdr(Context &ctx) override; + void copy_buf(Context &ctx) override; + + void compute_symtab_size(Context &ctx) override; + void populate_symtab(Context &ctx) override; + + std::vector *> symbols; +}; + +template +class PltGotSection : public Chunk { +public: + PltGotSection() { + this->name = ".plt.got"; + this->shdr.sh_type = SHT_PROGBITS; + this->shdr.sh_flags = SHF_ALLOC | SHF_EXECINSTR; + this->shdr.sh_addralign = 16; + } + + void add_symbol(Context &ctx, Symbol *sym); + void copy_buf(Context &ctx) override; + + void compute_symtab_size(Context &ctx) override; + void populate_symtab(Context &ctx) override; + + std::vector *> symbols; +}; + +template +class RelPltSection : public Chunk { +public: + RelPltSection() { + this->name = E::is_rela ? ".rela.plt" : ".rel.plt"; + this->shdr.sh_type = E::is_rela ? SHT_RELA : SHT_REL; + this->shdr.sh_flags = SHF_ALLOC; + this->shdr.sh_entsize = sizeof(ElfRel); + this->shdr.sh_addralign = sizeof(Word); + } + + void update_shdr(Context &ctx) override; + void copy_buf(Context &ctx) override; +}; + +template +class RelDynSection : public Chunk { +public: + RelDynSection() { + this->name = E::is_rela ? ".rela.dyn" : ".rel.dyn"; + this->shdr.sh_type = E::is_rela ? SHT_RELA : SHT_REL; + this->shdr.sh_flags = SHF_ALLOC; + this->shdr.sh_entsize = sizeof(ElfRel); + this->shdr.sh_addralign = sizeof(Word); + } + + void update_shdr(Context &ctx) override; + void sort(Context &ctx); +}; + +template +class RelrDynSection : public Chunk { +public: + RelrDynSection() { + this->name = ".relr.dyn"; + this->shdr.sh_type = SHT_RELR; + this->shdr.sh_flags = SHF_ALLOC; + this->shdr.sh_entsize = sizeof(Word); + this->shdr.sh_addralign = sizeof(Word); + } + + void update_shdr(Context &ctx) override; + void copy_buf(Context &ctx) override; +}; + +template +class StrtabSection : public Chunk { +public: + StrtabSection() { + this->name = ".strtab"; + this->shdr.sh_type = SHT_STRTAB; + } + + void update_shdr(Context &ctx) override; +}; + +template +class ShstrtabSection : public Chunk { +public: + ShstrtabSection() { + this->name = ".shstrtab"; + this->shdr.sh_type = SHT_STRTAB; + } + + void update_shdr(Context &ctx) override; + void copy_buf(Context &ctx) override; +}; + +template +class DynstrSection : public Chunk { +public: + DynstrSection() { + this->name = ".dynstr"; + this->shdr.sh_type = SHT_STRTAB; + this->shdr.sh_flags = SHF_ALLOC; + } + + void keep() { this->shdr.sh_size = 1; } + i64 add_string(std::string_view str); + i64 find_string(std::string_view str); + void copy_buf(Context &ctx) override; + + i64 dynsym_offset = -1; + +private: + std::unordered_map strings; +}; + +template +class DynamicSection : public Chunk { +public: + DynamicSection() { + this->name = ".dynamic"; + this->shdr.sh_type = SHT_DYNAMIC; + this->shdr.sh_flags = SHF_ALLOC | SHF_WRITE; + this->shdr.sh_addralign = sizeof(Word); + this->shdr.sh_entsize = sizeof(ElfDyn); + } + + void update_shdr(Context &ctx) override; + void copy_buf(Context &ctx) override; +}; + +template +ElfSym to_output_esym(Context &ctx, Symbol &sym, u32 st_name, + U32 *shndx); + +template +class SymtabSection : public Chunk { +public: + SymtabSection() { + this->name = ".symtab"; + this->shdr.sh_type = SHT_SYMTAB; + this->shdr.sh_entsize = sizeof(ElfSym); + this->shdr.sh_addralign = sizeof(Word); + } + + void update_shdr(Context &ctx) override; + void copy_buf(Context &ctx) override; +}; + +template +class SymtabShndxSection : public Chunk { +public: + SymtabShndxSection() { + this->name = ".symtab_shndx"; + this->shdr.sh_type = SHT_SYMTAB_SHNDX; + this->shdr.sh_entsize = 4; + this->shdr.sh_addralign = 4; + } +}; + +template +class DynsymSection : public Chunk { +public: + DynsymSection() { + this->name = ".dynsym"; + this->shdr.sh_type = SHT_DYNSYM; + this->shdr.sh_flags = SHF_ALLOC; + this->shdr.sh_entsize = sizeof(ElfSym); + this->shdr.sh_addralign = sizeof(Word); + } + + void keep() { this->symbols.resize(1); } + void add_symbol(Context &ctx, Symbol *sym); + void finalize(Context &ctx); + void update_shdr(Context &ctx) override; + void copy_buf(Context &ctx) override; + + std::vector *> symbols; +}; + +template +class HashSection : public Chunk { +public: + HashSection() { + this->name = ".hash"; + this->shdr.sh_type = SHT_HASH; + this->shdr.sh_flags = SHF_ALLOC; + this->shdr.sh_entsize = 4; + this->shdr.sh_addralign = 4; + } + + void update_shdr(Context &ctx) override; + void copy_buf(Context &ctx) override; +}; + +template +class GnuHashSection : public Chunk { +public: + GnuHashSection() { + this->name = ".gnu.hash"; + this->shdr.sh_type = SHT_GNU_HASH; + this->shdr.sh_flags = SHF_ALLOC; + this->shdr.sh_addralign = sizeof(Word); + } + + std::span *> get_exported_symbols(Context &ctx); + void update_shdr(Context &ctx) override; + void copy_buf(Context &ctx) override; + + static constexpr i64 LOAD_FACTOR = 8; + static constexpr i64 HEADER_SIZE = 16; + static constexpr i64 BLOOM_SHIFT = 26; + + u32 num_buckets = -1; + u32 num_bloom = 1; +}; + +template +class MergedSection : public Chunk { +public: + static MergedSection * + get_instance(Context &ctx, std::string_view name, u64 type, u64 flags); + + SectionFragment *insert(Context &ctx, std::string_view data, + u64 hash, i64 p2align); + + void assign_offsets(Context &ctx); + void copy_buf(Context &ctx) override; + void write_to(Context &ctx, u8 *buf) override; + void print_stats(Context &ctx); + + HyperLogLog estimator; + +private: + MergedSection(std::string_view name, u64 flags, u32 type); + + ConcurrentMap> map; + std::vector shard_offsets; + std::once_flag once_flag; +}; + +template +class EhFrameSection : public Chunk { +public: + EhFrameSection() { + this->name = ".eh_frame"; + this->shdr.sh_type = SHT_PROGBITS; + this->shdr.sh_flags = SHF_ALLOC; + this->shdr.sh_addralign = sizeof(Word); + } + + void construct(Context &ctx); + void apply_reloc(Context &ctx, const ElfRel &rel, u64 offset, u64 val); + void copy_buf(Context &ctx) override; +}; + +template +class EhFrameHdrSection : public Chunk { +public: + EhFrameHdrSection() { + this->name = ".eh_frame_hdr"; + this->shdr.sh_type = SHT_PROGBITS; + this->shdr.sh_flags = SHF_ALLOC; + this->shdr.sh_addralign = 4; + this->shdr.sh_size = HEADER_SIZE; + } + + static constexpr i64 HEADER_SIZE = 12; + + void update_shdr(Context &ctx) override; + void copy_buf(Context &ctx) override; + + u32 num_fdes = 0; +}; + +template +class EhFrameRelocSection : public Chunk { +public: + EhFrameRelocSection() { + this->name = E::is_rela ? ".rela.eh_frame" : ".rel.eh_frame"; + this->shdr.sh_type = E::is_rela ? SHT_RELA : SHT_REL; + this->shdr.sh_flags = SHF_INFO_LINK; + this->shdr.sh_addralign = sizeof(Word); + this->shdr.sh_entsize = sizeof(ElfRel); + } + + void update_shdr(Context &ctx) override; + void copy_buf(Context &ctx) override; +}; + +template +class CopyrelSection : public Chunk { +public: + CopyrelSection(bool is_relro) : is_relro(is_relro) { + this->name = is_relro ? ".copyrel.rel.ro" : ".copyrel"; + this->shdr.sh_type = SHT_NOBITS; + this->shdr.sh_flags = SHF_ALLOC | SHF_WRITE; + } + + void add_symbol(Context &ctx, Symbol *sym); + void update_shdr(Context &ctx) override; + i64 get_reldyn_size(Context &ctx) const override { return symbols.size(); } + void copy_buf(Context &ctx) override; + + bool is_relro; + std::vector *> symbols; +}; + +template +class VersymSection : public Chunk { +public: + VersymSection() { + this->name = ".gnu.version"; + this->shdr.sh_type = SHT_GNU_VERSYM; + this->shdr.sh_flags = SHF_ALLOC; + this->shdr.sh_entsize = 2; + this->shdr.sh_addralign = 2; + } + + void update_shdr(Context &ctx) override; + void copy_buf(Context &ctx) override; + + std::vector> contents; +}; + +template +class VerneedSection : public Chunk { +public: + VerneedSection() { + this->name = ".gnu.version_r"; + this->shdr.sh_type = SHT_GNU_VERNEED; + this->shdr.sh_flags = SHF_ALLOC; + this->shdr.sh_addralign = sizeof(Word); + } + + void construct(Context &ctx); + void update_shdr(Context &ctx) override; + void copy_buf(Context &ctx) override; + + std::vector contents; +}; + +template +class VerdefSection : public Chunk { +public: + VerdefSection() { + this->name = ".gnu.version_d"; + this->shdr.sh_type = SHT_GNU_VERDEF; + this->shdr.sh_flags = SHF_ALLOC; + this->shdr.sh_addralign = 8; + } + + void construct(Context &ctx); + void update_shdr(Context &ctx) override; + void copy_buf(Context &ctx) override; + + std::vector contents; +}; + +template +class BuildIdSection : public Chunk { +public: + BuildIdSection() { + this->name = ".note.gnu.build-id"; + this->shdr.sh_type = SHT_NOTE; + this->shdr.sh_flags = SHF_ALLOC; + this->shdr.sh_addralign = 4; + this->shdr.sh_size = 1; + } + + void update_shdr(Context &ctx) override; + void copy_buf(Context &ctx) override; + void write_buildid(Context &ctx); + + static constexpr i64 HEADER_SIZE = 16; +}; + +template +class NotePackageSection : public Chunk { +public: + NotePackageSection() { + this->name = ".note.package"; + this->shdr.sh_type = SHT_NOTE; + this->shdr.sh_flags = SHF_ALLOC; + this->shdr.sh_addralign = 4; + } + + void update_shdr(Context &ctx) override; + void copy_buf(Context &ctx) override; +}; + +template +class NotePropertySection : public Chunk { +public: + NotePropertySection() { + this->name = ".note.gnu.property"; + this->shdr.sh_type = SHT_NOTE; + this->shdr.sh_flags = SHF_ALLOC; + this->shdr.sh_addralign = sizeof(Word); + } + + void update_shdr(Context &ctx) override; + void copy_buf(Context &ctx) override; + +private: + static constexpr i64 ENTRY_SIZE = E::is_64 ? 16 : 12; + + std::map properties; +}; + +struct GdbIndexName { + std::string_view name; + u32 hash = 0; + u32 attr = 0; + u32 entry_idx = 0; +}; + +template +class GdbIndexSection : public Chunk { +public: + GdbIndexSection() { + this->name = ".gdb_index"; + this->shdr.sh_type = SHT_PROGBITS; + this->shdr.sh_addralign = 4; + } + + void construct(Context &ctx); + void copy_buf(Context &ctx) override; + void write_address_areas(Context &ctx); + +private: + struct SectionHeader { + ul32 version = 7; + ul32 cu_list_offset = 0; + ul32 cu_types_offset = 0; + ul32 areas_offset = 0; + ul32 symtab_offset = 0; + ul32 const_pool_offset = 0; + }; + + struct MapEntry { + MapEntry(ObjectFile *owner, u32 hash) : owner(owner), hash(hash) {} + + MapEntry(const MapEntry &other) + : owner(other.owner.load()), num_attrs(other.num_attrs.load()), + hash(other.hash), name_offset(other.name_offset), + attr_offset(other.attr_offset) {} + + std::atomic *> owner; + std::atomic_uint32_t num_attrs = 0; + u32 hash = 0; + u32 name_offset = -1; + u32 attr_offset = -1; + }; + + SectionHeader header; + ConcurrentMap map; +}; + +template +class CompressedSection : public Chunk { +public: + CompressedSection(Context &ctx, Chunk &chunk); + void copy_buf(Context &ctx) override; + u8 *get_uncompressed_data() override { return uncompressed.get(); } + +private: + ElfChdr chdr = {}; + std::unique_ptr compressed; + std::unique_ptr uncompressed; +}; + +template +class RelocSection : public Chunk { +public: + RelocSection(Context &ctx, OutputSection &osec); + void update_shdr(Context &ctx) override; + void copy_buf(Context &ctx) override; + +private: + OutputSection &output_section; + std::vector offsets; +}; + +// PT_GNU_RELRO works on page granularity. We want to align its end to +// a page boundary. We append this section at end of a segment so that +// the segment always ends at a page boundary. +template +class RelroPaddingSection : public Chunk { +public: + RelroPaddingSection() { + this->name = ".relro_padding"; + this->shdr.sh_type = SHT_NOBITS; + this->shdr.sh_flags = SHF_ALLOC | SHF_WRITE; + this->shdr.sh_addralign = 1; + this->shdr.sh_size = 1; + } +}; + +template +class ComdatGroupSection : public Chunk { +public: + ComdatGroupSection(Symbol &sym, std::vector *> members) + : sym(sym), members(std::move(members)) { + this->name = ".group"; + this->shdr.sh_type = SHT_GROUP; + this->shdr.sh_entsize = 4; + this->shdr.sh_addralign = 4; + this->shdr.sh_size = this->members.size() * 4 + 4; + } + + void update_shdr(Context &ctx) override; + void copy_buf(Context &ctx) override; + +private: + Symbol &sym; + std::vector *> members; +}; + +// +// dwarf.cc +// + +template +std::vector +read_compunits(Context &ctx, ObjectFile &file); + +template +std::vector read_pubnames(Context &ctx, ObjectFile &file); + +template +i64 estimate_address_areas(Context &ctx, ObjectFile &file); + +template +std::vector +read_address_areas(Context &ctx, ObjectFile &file, i64 offset); + +// +// input-files.cc +// + +// A comdat section typically represents an inline function, +// which are de-duplicated by the linker. +// +// For each inline function, there's one comdat section, which +// contains section indices of the function code and its data such as +// string literals, if any. +// +// Comdat sections are identified by its signature. If two comdat +// sections have the same signature, the linker picks up one and +// discards the other by eliminating all sections that the other +// comdat section refers to. +struct ComdatGroup { + ComdatGroup() = default; + ComdatGroup(const ComdatGroup &other) : owner(other.owner.load()) {} + + // The file priority of the owner file of this comdat section. + std::atomic_uint32_t owner = -1; +}; + +template +struct ComdatGroupRef { + ComdatGroup *group; + u32 sect_idx; + std::span> members; +}; + +template +struct MergeableSection { + std::pair *, i64> get_fragment(i64 offset); + + MergedSection *parent; + u8 p2align = 0; + std::vector strings; + std::vector hashes; + std::vector frag_offsets; + std::vector *> fragments; +}; + +// InputFile is the base class of ObjectFile and SharedFile. +template +class InputFile { +public: + InputFile(Context &ctx, MappedFile> *mf); + InputFile() : filename("") {} + + virtual ~InputFile() = default; + + template std::span + get_data(Context &ctx, const ElfShdr &shdr); + + template std::span + get_data(Context &ctx, i64 idx); + + std::string_view get_string(Context &ctx, const ElfShdr &shdr); + std::string_view get_string(Context &ctx, i64 idx); + + ElfEhdr &get_ehdr() { return *(ElfEhdr *)mf->data; } + std::span> get_phdrs(); + + ElfShdr *find_section(i64 type); + + virtual void resolve_symbols(Context &ctx) = 0; + void clear_symbols(); + + virtual void + mark_live_objects(Context &ctx, + std::function *)> feeder) = 0; + + std::span *> get_global_syms(); + std::string_view get_source_name() const; + + MappedFile> *mf = nullptr; + std::span> elf_sections; + std::span> elf_syms; + std::vector *> symbols; + i64 first_global = 0; + + std::string filename; + bool is_dso = false; + u32 priority; + Atomic is_alive = false; + std::string_view shstrtab; + std::string_view symbol_strtab; + + // To create an output .symtab + u64 local_symtab_idx = 0; + u64 global_symtab_idx = 0; + u64 num_local_symtab = 0; + u64 num_global_symtab = 0; + u64 strtab_offset = 0; + u64 strtab_size = 0; + + // For --emit-relocs + std::vector output_sym_indices; + +protected: + std::vector> local_syms; + std::vector> frag_syms; +}; + +// ObjectFile represents an input .o file. +template +class ObjectFile : public InputFile { +public: + ObjectFile() = default; + + static ObjectFile *create(Context &ctx, MappedFile> *mf, + std::string archive_name, bool is_in_lib); + + void parse(Context &ctx); + void initialize_mergeable_sections(Context &ctx); + void resolve_section_pieces(Context &ctx); + void resolve_symbols(Context &ctx) override; + void mark_live_objects(Context &ctx, + std::function *)> feeder) override; + void convert_undefined_weak_symbols(Context &ctx); + void mark_addrsig(Context &ctx); + void scan_relocations(Context &ctx); + void convert_common_symbols(Context &ctx); + void compute_symtab_size(Context &ctx); + void populate_symtab(Context &ctx); + + i64 get_shndx(const ElfSym &esym); + InputSection *get_section(const ElfSym &esym); + + std::string archive_name; + std::vector>> sections; + std::vector>> mergeable_sections; + bool is_in_lib = false; + std::vector> elf_sections2; + std::vector> cies; + std::vector> fdes; + BitVector has_symver; + std::vector> comdat_groups; + bool exclude_libs = false; + std::map gnu_properties; + bool is_lto_obj = false; + bool needs_executable_stack = false; + + u64 num_dynrel = 0; + u64 reldyn_offset = 0; + + u64 fde_idx = 0; + u64 fde_offset = 0; + u64 fde_size = 0; + + // For ICF + std::unique_ptr> llvm_addrsig; + + // For .gdb_index + InputSection *debug_info = nullptr; + InputSection *debug_ranges = nullptr; + InputSection *debug_rnglists = nullptr; + InputSection *debug_pubnames = nullptr; + InputSection *debug_pubtypes = nullptr; + std::vector compunits; + std::vector gdb_names; + i64 compunits_idx = 0; + i64 attrs_size = 0; + i64 attrs_offset = 0; + i64 names_size = 0; + i64 names_offset = 0; + i64 num_areas = 0; + i64 area_offset = 0; + + // For PPC32 + InputSection *ppc32_got2 = nullptr; + +private: + ObjectFile(Context &ctx, MappedFile> *mf, + std::string archive_name, bool is_in_lib); + + void initialize_sections(Context &ctx); + void initialize_symbols(Context &ctx); + void sort_relocations(Context &ctx); + void initialize_ehframe_sections(Context &ctx); + void read_note_gnu_property(Context &ctx, const ElfShdr &shdr); + void read_ehframe(Context &ctx, InputSection &isec); + void override_symbol(Context &ctx, Symbol &sym, + const ElfSym &esym, i64 symidx); + void merge_visibility(Context &ctx, Symbol &sym, u8 visibility); + + bool has_common_symbol = false; + + const ElfShdr *symtab_sec; + std::span> symtab_shndx_sec; +}; + +// SharedFile represents an input .so file. +template +class SharedFile : public InputFile { +public: + static SharedFile *create(Context &ctx, MappedFile> *mf); + + void parse(Context &ctx); + void resolve_symbols(Context &ctx) override; + std::span *> find_aliases(Symbol *sym); + i64 get_alignment(Symbol *sym); + bool is_readonly(Symbol *sym); + + void mark_live_objects(Context &ctx, + std::function *)> feeder) override; + + void compute_symtab_size(Context &ctx); + void populate_symtab(Context &ctx); + + bool is_needed = false; + std::string soname; + std::vector version_strings; + std::vector> elf_syms2; + +private: + SharedFile(Context &ctx, MappedFile> *mf); + + std::string get_soname(Context &ctx); + void maybe_override_symbol(Symbol &sym, const ElfSym &esym); + std::vector read_verdef(Context &ctx); + + std::vector versyms; + const ElfShdr *symtab_sec; + + // Used by find_aliases() + std::once_flag init_aliases; + std::vector *> aliases; +}; + +// +// linker-script.cc +// + +template +void parse_linker_script(Context &ctx, MappedFile> *mf); + +template +std::string_view +get_script_output_type(Context &ctx, MappedFile> *mf); + +template +void parse_version_script(Context &ctx, MappedFile> *mf); + +template +void parse_dynamic_list(Context &ctx, MappedFile> *mf); + +// +// lto.cc +// + +template +ObjectFile *read_lto_object(Context &ctx, MappedFile> *mb); + +template +std::vector *> do_lto(Context &ctx); + +template +void lto_cleanup(Context &ctx); + +// +// gc-sections.cc +// + +template +void gc_sections(Context &ctx); + +// +// icf.cc +// + +template +void icf_sections(Context &ctx); + +// +// relocatable.cc +// + +template +void combine_objects(Context &ctx); + +// +// mapfile.cc +// + +template +void print_map(Context &ctx); + +// +// subprocess.cc +// + +std::function fork_child(); + +template +[[noreturn]] +void process_run_subcommand(Context &ctx, int argc, char **argv); + +// +// jobs.cc +// + +template void acquire_global_lock(Context &ctx); +template void release_global_lock(Context &ctx); + +// +// commandline.cc +// + +template +std::vector parse_nonpositional_args(Context &ctx); + +// +// passes.cc +// + +template void create_internal_file(Context &); +template void apply_exclude_libs(Context &); +template void create_synthetic_sections(Context &); +template void set_file_priority(Context &); +template void resolve_symbols(Context &); +template void kill_eh_frame_sections(Context &); +template void resolve_section_pieces(Context &); +template void convert_common_symbols(Context &); +template void compute_merged_section_sizes(Context &); +template void create_output_sections(Context &); +template void add_synthetic_symbols(Context &); +template void check_cet_errors(Context &); +template void print_dependencies(Context &); +template void write_repro_file(Context &); +template void check_duplicate_symbols(Context &); +template void check_symbol_types(Context &); +template void sort_init_fini(Context &); +template void sort_ctor_dtor(Context &); +template void shuffle_sections(Context &); +template void compute_section_sizes(Context &); +template void sort_output_sections(Context &); +template void claim_unresolved_symbols(Context &); +template void scan_relocations(Context &); +template void construct_relr(Context &); +template void create_output_symtab(Context &); +template void report_undef_errors(Context &); +template void create_reloc_sections(Context &); +template void copy_chunks(Context &); +template void apply_version_script(Context &); +template void parse_symbol_version(Context &); +template void compute_import_export(Context &); +template void mark_addrsig(Context &); +template void clear_padding(Context &); +template void compute_section_headers(Context &); +template i64 set_osec_offsets(Context &); +template void fix_synthetic_symbols(Context &); +template i64 compress_debug_sections(Context &); +template void write_dependency_file(Context &); +template void show_stats(Context &); + +// +// arch-arm32.cc +// + +void fixup_arm_exidx_section(Context &ctx); + +// +// arch-riscv64.cc +// + +template +i64 riscv_resize_sections(Context &ctx); + +// +// arch-ppc64v1.cc +// + +void ppc64v1_rewrite_opd(Context &ctx); +void ppc64v1_scan_symbols(Context &ctx); + +class PPC64OpdSection : public Chunk { +public: + PPC64OpdSection() { + this->name = ".opd"; + this->shdr.sh_type = SHT_PROGBITS; + this->shdr.sh_flags = SHF_ALLOC | SHF_WRITE; + this->shdr.sh_addralign = 8; + } + + void add_symbol(Context &ctx, Symbol *sym); + i64 get_reldyn_size(Context &ctx) const override; + void copy_buf(Context &ctx) override; + + static constexpr i64 ENTRY_SIZE = sizeof(Word) * 3; + + std::vector *> symbols; +}; + +// +// arch-sparc.cc +// + +class SparcTlsGetAddrSection : public Chunk { +public: + SparcTlsGetAddrSection() { + this->name = ".tls_get_addr"; + this->shdr.sh_type = SHT_PROGBITS; + this->shdr.sh_flags = SHF_ALLOC | SHF_EXECINSTR; + this->shdr.sh_addralign = 4; + this->shdr.sh_size = 24; + } + + void copy_buf(Context &ctx) override; +}; + +// +// arch-alpha.cc +// + +class AlphaGotSection : public Chunk { +public: + AlphaGotSection() { + this->name = ".alpha_got"; + this->shdr.sh_type = SHT_PROGBITS; + this->shdr.sh_flags = SHF_ALLOC | SHF_WRITE; + this->shdr.sh_addralign = 8; + } + + void add_symbol(Symbol &sym, i64 addend); + void finalize(); + u64 get_addr(Symbol &sym, i64 addend); + i64 get_reldyn_size(Context &ctx) const override; + void copy_buf(Context &ctx) override; + + struct Entry { + bool operator==(const Entry &) const = default; + Symbol *sym; + i64 addend; + }; + +private: + std::vector entries; + std::mutex mu; +}; + +// +// main.cc +// + +struct BuildId { + i64 size() const; + + enum { NONE, HEX, HASH, UUID } kind = NONE; + std::vector value; + i64 hash_size = 0; +}; + +typedef enum { COMPRESS_NONE, COMPRESS_ZLIB, COMPRESS_ZSTD } CompressKind; + +typedef enum { + UNRESOLVED_ERROR, + UNRESOLVED_WARN, + UNRESOLVED_IGNORE, +} UnresolvedKind; + +typedef enum { + SEPARATE_LOADABLE_SEGMENTS, + SEPARATE_CODE, + NOSEPARATE_CODE, +} SeparateCodeKind; + +typedef enum { + CET_REPORT_NONE, + CET_REPORT_WARNING, + CET_REPORT_ERROR, +} CetReportKind; + +typedef enum { + SHUFFLE_SECTIONS_NONE, + SHUFFLE_SECTIONS_SHUFFLE, + SHUFFLE_SECTIONS_REVERSE, +} ShuffleSectionsKind; + +struct VersionPattern { + std::string_view pattern; + std::string_view source; + std::string_view ver_str; + u16 ver_idx = -1; + bool is_cpp = false; +}; + +struct SectionOrder { + enum { NONE, SECTION, GROUP, ADDR, ALIGN, SYMBOL } type = NONE; + std::string name; + u64 value = 0; +}; + +// Target-specific context members +template struct ContextExtras {}; + +template <> struct ContextExtras { + Symbol *_SDA_BASE_ = nullptr; +}; + +template <> struct ContextExtras { + PPC64OpdSection *opd = nullptr; + Symbol *TOC = nullptr; +}; + +template <> struct ContextExtras { + Symbol *TOC = nullptr; + Atomic is_power10 = false; +}; + +template <> struct ContextExtras { + SparcTlsGetAddrSection *tls_get_addr_sec = nullptr; + Symbol *tls_get_addr_sym = nullptr; +}; + +template <> struct ContextExtras { + AlphaGotSection *got = nullptr; +}; + +// Context represents a context object for each invocation of the linker. +// It contains command line flags, pointers to singleton objects +// (such as linker-synthesized output sections), unique_ptrs for +// resource management, and other miscellaneous objects. +template +struct Context { + Context() = default; + Context(const Context &) = delete; + + void checkpoint() { + if (has_error) { + cleanup(); + _exit(1); + } + } + + // Command-line arguments + struct { + BuildId build_id; + CetReportKind z_cet_report = CET_REPORT_NONE; + CompressKind compress_debug_sections = COMPRESS_NONE; + SeparateCodeKind z_separate_code = NOSEPARATE_CODE; + ShuffleSectionsKind shuffle_sections = SHUFFLE_SECTIONS_NONE; + UnresolvedKind unresolved_symbols = UNRESOLVED_ERROR; + bool Bsymbolic = false; + bool Bsymbolic_functions = false; + bool allow_multiple_definition = false; + bool apply_dynamic_relocs = true; + bool color_diagnostics = false; + bool default_symver = false; + bool demangle = true; + bool discard_all = false; + bool discard_locals = false; + bool eh_frame_hdr = true; + bool emit_relocs = false; + bool enable_new_dtags = true; + bool execute_only = false; + bool export_dynamic = false; + bool fatal_warnings = false; + bool fork = true; + bool gc_sections = false; + bool gdb_index = false; + bool hash_style_gnu = true; + bool hash_style_sysv = true; + bool icf = false; + bool icf_all = false; + bool ignore_data_address_equality = false; + bool is_static = false; + bool lto_pass2 = false; + bool noinhibit_exec = false; + bool oformat_binary = false; + bool omagic = false; + bool pack_dyn_relocs_relr = false; + bool perf = false; + bool pic = false; + bool pie = false; + bool print_dependencies = false; + bool print_gc_sections = false; + bool print_icf_sections = false; + bool print_map = false; + bool quick_exit = true; + bool relax = true; + bool relocatable = false; + bool relocatable_merge_sections = false; + bool repro = false; + bool rosegment = true; + bool shared = false; + bool start_stop = false; + bool stats = false; + bool strip_all = false; + bool strip_debug = false; + bool suppress_warnings = false; + bool trace = false; + bool undefined_version = false; + bool warn_common = false; + bool warn_once = false; + bool warn_textrel = false; + bool z_copyreloc = true; + bool z_defs = false; + bool z_delete = true; + bool z_dlopen = true; + bool z_dump = true; + bool z_dynamic_undefined_weak = true; + bool z_execstack = false; + bool z_execstack_if_needed = false; + bool z_ibt = false; + bool z_initfirst = false; + bool z_interpose = false; + bool z_keep_text_section_prefix = false; + bool z_nodefaultlib = false; + bool z_now = false; + bool z_origin = false; + bool z_relro = true; + bool z_shstk = false; + bool z_text = false; + i64 filler = -1; + i64 spare_dynamic_tags = 5; + i64 thread_count = 0; + std::string_view emulation; + std::optional unique; + std::optional physical_image_base; + std::optional shuffle_sections_seed; + std::string Map; + std::string chroot; + std::string dependency_file; + std::string directory; + std::string dynamic_linker; + std::string entry = "_start"; + std::string fini = "_fini"; + std::string init = "_init"; + std::string output = "a.out"; + std::string package_metadata; + std::string plugin; + std::string rpaths; + std::string soname; + std::string sysroot; + std::unique_ptr> retain_symbols_file; + std::unordered_map section_align; + std::unordered_map section_start; + std::unordered_set ignore_ir_file; + std::unordered_set wrap; + std::vector section_order; + std::vector *, std::variant *, u64>>> defsyms; + std::vector library_paths; + std::vector plugin_opt; + std::vector version_definitions; + std::vector auxiliary; + std::vector exclude_libs; + std::vector filter; + std::vector require_defined; + std::vector trace_symbol; + std::vector undefined; + u64 image_base = 0x200000; + } arg; + + std::vector version_patterns; + u16 default_version = VER_NDX_GLOBAL; + i64 page_size = -1; + std::optional global_lock_fd; + + // true if default_version is set by a wildcard in version script. + bool default_version_from_version_script = false; + + // Reader context + bool as_needed = false; + bool whole_archive = false; + bool is_static; + bool in_lib = false; + i64 file_priority = 10000; + std::unordered_set visited; + tbb::task_group tg; + + bool has_error = false; + bool has_lto_object = false; + + // Symbol table + tbb::concurrent_hash_map, HashCmp> symbol_map; + tbb::concurrent_hash_map comdat_groups; + tbb::concurrent_vector>> merged_sections; + + tbb::concurrent_vector> timer_records; + tbb::concurrent_vector> on_exit; + + tbb::concurrent_vector>> obj_pool; + tbb::concurrent_vector>> dso_pool; + tbb::concurrent_vector> string_pool; + tbb::concurrent_vector>>> mf_pool; + tbb::concurrent_vector>> chunk_pool; + tbb::concurrent_vector>> osec_pool; + + // Symbol auxiliary data + std::vector> symbol_aux; + + // Fully-expanded command line args + std::vector cmdline_args; + + // Input files + std::vector *> objs; + std::vector *> dsos; + + ObjectFile *internal_obj = nullptr; + std::vector> internal_esyms; + + // Output buffer + std::unique_ptr>> output_file; + u8 *buf = nullptr; + bool overwrite_output_file = true; + + std::vector *> chunks; + std::atomic_bool needs_tlsld = false; + std::atomic_bool has_textrel = false; + std::atomic_uint32_t num_ifunc_dynrels = 0; + + tbb::concurrent_hash_map> undef_errors; + + // Output chunks + OutputEhdr *ehdr = nullptr; + OutputShdr *shdr = nullptr; + OutputPhdr *phdr = nullptr; + InterpSection *interp = nullptr; + GotSection *got = nullptr; + GotPltSection *gotplt = nullptr; + RelPltSection *relplt = nullptr; + RelDynSection *reldyn = nullptr; + RelrDynSection *relrdyn = nullptr; + DynamicSection *dynamic = nullptr; + StrtabSection *strtab = nullptr; + DynstrSection *dynstr = nullptr; + HashSection *hash = nullptr; + GnuHashSection *gnu_hash = nullptr; + ShstrtabSection *shstrtab = nullptr; + PltSection *plt = nullptr; + PltGotSection *pltgot = nullptr; + SymtabSection *symtab = nullptr; + SymtabShndxSection *symtab_shndx = nullptr; + DynsymSection *dynsym = nullptr; + EhFrameSection *eh_frame = nullptr; + EhFrameHdrSection *eh_frame_hdr = nullptr; + EhFrameRelocSection *eh_frame_reloc = nullptr; + CopyrelSection *copyrel = nullptr; + CopyrelSection *copyrel_relro = nullptr; + VersymSection *versym = nullptr; + VerneedSection *verneed = nullptr; + VerdefSection *verdef = nullptr; + BuildIdSection *buildid = nullptr; + NotePackageSection *note_package = nullptr; + NotePropertySection *note_property = nullptr; + GdbIndexSection *gdb_index = nullptr; + RelroPaddingSection *relro_padding = nullptr; + + [[no_unique_address]] ContextExtras extra; + + // For --gdb-index + Chunk *debug_info = nullptr; + Chunk *debug_abbrev = nullptr; + Chunk *debug_ranges = nullptr; + Chunk *debug_addr = nullptr; + Chunk *debug_rnglists = nullptr; + + // For thread-local variables + u64 tls_begin = 0; + u64 tp_addr = 0; + u64 dtp_addr = 0; + + // Linker-synthesized symbols + Symbol *_DYNAMIC = nullptr; + Symbol *_GLOBAL_OFFSET_TABLE_ = nullptr; + Symbol *_PROCEDURE_LINKAGE_TABLE_ = nullptr; + Symbol *_TLS_MODULE_BASE_ = nullptr; + Symbol *__GNU_EH_FRAME_HDR = nullptr; + Symbol *__bss_start = nullptr; + Symbol *__dso_handle = nullptr; + Symbol *__ehdr_start = nullptr; + Symbol *__executable_start = nullptr; + Symbol *__exidx_end = nullptr; + Symbol *__exidx_start = nullptr; + Symbol *__fini_array_end = nullptr; + Symbol *__fini_array_start = nullptr; + Symbol *__global_pointer = nullptr; + Symbol *__init_array_end = nullptr; + Symbol *__init_array_start = nullptr; + Symbol *__preinit_array_end = nullptr; + Symbol *__preinit_array_start = nullptr; + Symbol *__rel_iplt_end = nullptr; + Symbol *__rel_iplt_start = nullptr; + Symbol *_edata = nullptr; + Symbol *_end = nullptr; + Symbol *_etext = nullptr; + Symbol *edata = nullptr; + Symbol *end = nullptr; + Symbol *etext = nullptr; +}; + +template +std::string_view get_machine_type(Context &ctx, MappedFile> *mf); + +template +MappedFile> *open_library(Context &ctx, std::string path); + +template +MappedFile> *find_library(Context &ctx, std::string path); + +template +void read_file(Context &ctx, MappedFile> *mf); + +template +int elf_main(int argc, char **argv); + +int main(int argc, char **argv); + +template +std::ostream &operator<<(std::ostream &out, const InputFile &file); + +// +// Symbol +// + +enum { + NEEDS_GOT = 1 << 0, + NEEDS_PLT = 1 << 1, + NEEDS_CPLT = 1 << 2, + NEEDS_GOTTP = 1 << 3, + NEEDS_TLSGD = 1 << 4, + NEEDS_COPYREL = 1 << 5, + NEEDS_TLSDESC = 1 << 6, + NEEDS_PPC_OPD = 1 << 7, // for PPCv1 +}; + +// A struct to hold target-dependent symbol members. +template +struct SymbolExtras {}; + +template requires needs_thunk +struct SymbolExtras { + // For range extension thunks + i16 thunk_idx = -1; + i16 thunk_sym_idx = -1; +}; + +// Flags for Symbol::get_addr() +enum { + NO_PLT = 1 << 0, // Request an address other than .plt + NO_OPD = 1 << 1, // Request an address other than .opd (PPC64V1 only) +}; + +// Symbol class represents a defined symbol. +// +// A symbol has not only one but several different addresses if it +// has PLT or GOT entries. This class provides various functions to +// compute different addresses. +template +class Symbol { +public: + Symbol() = default; + Symbol(std::string_view name) : nameptr(name.data()), namelen(name.size()) {} + Symbol(const Symbol &other) : Symbol(other.name()) {} + + u64 get_addr(Context &ctx, i64 flags = 0) const; + u64 get_got_addr(Context &ctx) const; + u64 get_gotplt_addr(Context &ctx) const; + u64 get_gottp_addr(Context &ctx) const; + u64 get_tlsgd_addr(Context &ctx) const; + u64 get_tlsdesc_addr(Context &ctx) const; + u64 get_plt_addr(Context &ctx) const; + u64 get_opd_addr(Context &ctx) const; + + void set_got_idx(Context &ctx, i32 idx); + void set_gottp_idx(Context &ctx, i32 idx); + void set_tlsgd_idx(Context &ctx, i32 idx); + void set_tlsdesc_idx(Context &ctx, i32 idx); + void set_plt_idx(Context &ctx, i32 idx); + void set_pltgot_idx(Context &ctx, i32 idx); + void set_opd_idx(Context &ctx, i32 idx); + void set_dynsym_idx(Context &ctx, i32 idx); + + i32 get_got_idx(Context &ctx) const; + i32 get_gottp_idx(Context &ctx) const; + i32 get_tlsgd_idx(Context &ctx) const; + i32 get_tlsdesc_idx(Context &ctx) const; + i32 get_plt_idx(Context &ctx) const; + i32 get_pltgot_idx(Context &ctx) const; + i32 get_opd_idx(Context &ctx) const; + i32 get_dynsym_idx(Context &ctx) const; + + bool has_plt(Context &ctx) const; + bool has_got(Context &ctx) const { return get_got_idx(ctx) != -1; } + bool has_gottp(Context &ctx) const { return get_gottp_idx(ctx) != -1; } + bool has_tlsgd(Context &ctx) const { return get_tlsgd_idx(ctx) != -1; } + bool has_tlsdesc(Context &ctx) const { return get_tlsdesc_idx(ctx) != -1; } + bool has_opd(Context &ctx) const { return get_opd_idx(ctx) != -1; } + + u32 get_djb_hash(Context &ctx) const; + void set_djb_hash(Context &ctx, u32 hash); + + bool is_absolute() const; + bool is_relative() const { return !is_absolute(); } + bool is_local(Context &ctx) const; + bool is_ifunc() const { return get_type() == STT_GNU_IFUNC; } + bool is_remaining_undef_weak() const; + + InputSection *get_input_section() const; + Chunk *get_output_section() const; + SectionFragment *get_frag() const; + + void set_input_section(InputSection *); + void set_output_section(Chunk *); + void set_frag(SectionFragment *); + + void set_name(std::string_view); + std::string_view name() const; + + u32 get_type() const; + std::string_view get_version() const; + i64 get_output_sym_idx(Context &ctx) const; + const ElfSym &esym() const; + void add_aux(Context &ctx); + + // A symbol is owned by a file. If two or more files define the + // same symbol, the one with the strongest definition owns the symbol. + // If `file` is null, the symbol is equivalent to nonexistent. + InputFile *file = nullptr; + + // A symbol usually belongs to an input section, but it can belong + // to a section fragment, an output section or nothing + // (i.e. absolute symbol). `origin` holds one of them. We use the + // least significant two bits to distinguish type. + enum : uintptr_t { + TAG_ABS = 0b00, + TAG_ISEC = 0b01, + TAG_OSEC = 0b10, + TAG_FRAG = 0b11, + TAG_MASK = 0b11, + }; + + uintptr_t origin = 0; + + // `value` contains symbol value. If it's an absolute symbol, it is + // equivalent to its address. If it belongs to an input section or a + // section fragment, value is added to the base of the input section + // to yield an address. + u64 value = 0; + + const char *nameptr = nullptr; + i32 namelen = 0; + + // Index into the symbol table of the owner file. + i32 sym_idx = -1; + + i32 aux_idx = -1; + u16 ver_idx = 0; + + // `flags` has NEEDS_ flags. + Atomic flags = 0; + + tbb::spin_mutex mu; + Atomic visibility = STV_DEFAULT; + + bool is_weak : 1 = false; + bool write_to_symtab : 1 = false; // for --strip-all and the like + bool is_traced : 1 = false; // for --trace-symbol + bool is_wrapped : 1 = false; // for --wrap + + // If a symbol can be resolved to a symbol in a different ELF file at + // runtime, `is_imported` is true. If a symbol is a dynamic symbol and + // can be used by other ELF file at runtime, `is_exported` is true. + // + // Note that both can be true at the same time. Such symbol represents + // a function or data exported from this ELF file which can be + // imported by other definition at runtime. That is actually a usual + // exported symbol when creating a DSO. In other words, a dynamic + // symbol exported by a DSO is usually imported by itself. + // + // If is_imported is true and is_exported is false, it is a dynamic + // symbol just imported from other DSO. + // + // If is_imported is false and is_exported is true, there are two + // possible cases. If we are creating an executable, we know that + // exported symbols cannot be intercepted by any DSO (because the + // dynamic loader searches a dynamic symbol from an executable before + // examining any DSOs), so any exported symbol is export-only in an + // executable. If we are creating a DSO, export-only symbols + // represent a protected symbol (i.e. a symbol whose visibility is + // STV_PROTECTED). + bool is_imported : 1 = false; + bool is_exported : 1 = false; + + // `is_canonical` is true if this symbol represents a "canonical" PLT. + // Here is the explanation as to what the canonical PLT is. + // + // In C/C++, the process-wide function pointer equality is guaranteed. + // That is, if you take an address of a function `foo`, it's always + // evaluated to the same address wherever you do that. + // + // For the sake of explanation, assume that `libx.so` exports a + // function symbol `foo`, and there's a program that uses `libx.so`. + // Both `libx.so` and the main executable take the address of `foo`, + // which must be evaluated to the same address because of the above + // guarantee. + // + // If the main executable is position-independent code (PIC), `foo` is + // evaluated to the beginning of the function code, as you would have + // expected. The address of `foo` is stored to GOTs, and the machine + // code that takes the address of `foo` reads the GOT entries at + // runtime. + // + // However, if it's not PIC, the main executable's code was compiled + // to not use GOT (note that shared objects are always PIC, only + // executables can be non-PIC). It instead assumes that `foo` (and any + // other global variables/functions) has an address that is fixed at + // link-time. This assumption is correct if `foo` is in the same + // position-dependent executable, but it's not if `foo` is imported + // from some other DSO at runtime. + // + // In this case, we use the address of the `foo`'s PLT entry in the + // main executable (whose address is fixed at link-time) as its + // address. In order to guarantee pointer equality, we also need to + // fill foo's GOT entries in DSOs with the addres of the foo's PLT + // entry instead of `foo`'s real address. We can do that by setting a + // symbol value to `foo`'s dynamic symbol. If a symbol value is set, + // the dynamic loader initialize `foo`'s GOT entries with that value + // instead of the symbol's real address. + // + // We call such PLT entry in the main executable as "canonical". + // If `foo` has a canonical PLT, its address is evaluated to its + // canonical PLT's address. Otherwise, it's evaluated to `foo`'s + // address. + // + // Only non-PIC main executables may have canonical PLTs. PIC + // executables and shared objects never have a canonical PLT. + // + // This bit manages if we need to make this symbol's PLT canonical. + // This bit is meaningful only when the symbol has a PLT entry. + bool is_canonical : 1 = false; + + // If an input object file is not compiled with -fPIC (or with + // -fno-PIC), the file not position independent. That means the + // machine code included in the object file does not use GOT to access + // global variables. Instead, it assumes that addresses of global + // variables are known at link-time. + // + // Let's say `libx.so` exports a global variable `foo`, and a main + // executable uses the variable. If the executable is not compiled + // with -fPIC, we can't simply apply a relocation that refers `foo` + // because `foo`'s address is not known at link-time. + // + // In this case, we could print out the "recompile with -fPIC" error + // message, but there's a way to workaround. + // + // The loader supports a feature so-called "copy relocations". + // A copy relocation instructs the loader to copy data from a DSO to a + // specified location in the main executable. By using this feature, + // we can copy `foo`'s data to a BSS region at runtime. With that, + // we can apply relocations agianst `foo` as if `foo` existed in the + // main executable's BSS area, whose address is known at link-time. + // + // Copy relocations are used only by position-dependent executables. + // Position-independent executables and DSOs don't need them because + // they use GOT to access global variables. + // + // `has_copyrel` is true if we need to emit a copy relocation for this + // symbol. If the original symbol in a DSO is in a read-only memory + // region, `is_copyrel_readonly` is set to true so that the copied data + // will become read-only at run-time. + bool has_copyrel : 1 = false; + bool is_copyrel_readonly : 1 = false; + + // For LTO. True if the symbol is referenced by a regular object (as + // opposed to IR object). + bool referenced_by_regular_obj : 1 = false; + + // Target-dependent extra members. + [[no_unique_address]] SymbolExtras extra; +}; + +// If we haven't seen the same `key` before, create a new instance +// of Symbol and returns it. Otherwise, returns the previously- +// instantiated object. `key` is usually the same as `name`. +template +Symbol *get_symbol(Context &ctx, std::string_view key, + std::string_view name) { + typename decltype(ctx.symbol_map)::const_accessor acc; + ctx.symbol_map.insert(acc, {key, Symbol(name)}); + return const_cast *>(&acc->second); +} + +template +Symbol *get_symbol(Context &ctx, std::string_view name) { + return get_symbol(ctx, name, name); +} + +template +std::ostream &operator<<(std::ostream &out, const Symbol &sym) { + if (opt_demangle) + out << demangle(sym.name()); + else + out << sym.name(); + return out; +} + +// +// Inline objects and functions +// + +template +inline i64 FdeRecord::size(ObjectFile &file) const { + return *(U32 *)(file.cies[cie_idx].contents.data() + input_offset) + 4; +} + +template +inline std::string_view FdeRecord::get_contents(ObjectFile &file) const { + return file.cies[cie_idx].contents.substr(input_offset, size(file)); +} + +template +inline std::span> +FdeRecord::get_rels(ObjectFile &file) const { + std::span> rels = file.cies[cie_idx].rels; + i64 end = rel_idx; + while (end < rels.size() && rels[end].r_offset < input_offset + size(file)) + end++; + return rels.subspan(rel_idx, end - rel_idx); +} + +template +inline std::ostream & +operator<<(std::ostream &out, const InputSection &isec) { + out << isec.file << ":(" << isec.name() << ")"; + return out; +} + +template +inline u64 SectionFragment::get_addr(Context &ctx) const { + return output_section.shdr.sh_addr + offset; +} + +template +inline void InputSection::kill() { + if (is_alive.exchange(false)) + for (FdeRecord &fde : get_fdes()) + fde.is_alive = false; +} + +template +inline u64 InputSection::get_addr() const { + return output_section->shdr.sh_addr + offset; +} + +template +inline std::string_view InputSection::name() const { + if (file.elf_sections.size() <= shndx) + return ".common"; + return file.shstrtab.data() + file.elf_sections[shndx].sh_name; +} + +template +inline i64 InputSection::get_priority() const { + return ((i64)file.priority << 32) | shndx; +} + +template +i64 get_addend(u8 *loc, const ElfRel &rel); + +template requires E::is_rela && (!is_sh4) +inline i64 get_addend(u8 *loc, const ElfRel &rel) { + return rel.r_addend; +} + +template +i64 get_addend(InputSection &isec, const ElfRel &rel) { + return get_addend((u8 *)isec.contents.data() + rel.r_offset, rel); +} + +template +void write_addend(u8 *loc, i64 val, const ElfRel &rel); + +template requires E::is_rela +void write_addend(u8 *loc, i64 val, const ElfRel &rel) {} + +template +inline const ElfShdr &InputSection::shdr() const { + if (shndx < file.elf_sections.size()) + return file.elf_sections[shndx]; + return file.elf_sections2[shndx - file.elf_sections.size()]; +} + +template +inline std::span> InputSection::get_rels(Context &ctx) const { + if (relsec_idx == -1) + return {}; + return file.template get_data>(ctx, file.elf_sections[relsec_idx]); +} + +template +inline std::span> InputSection::get_fdes() const { + if (fde_begin == -1) + return {}; + std::span> span(file.fdes); + return span.subspan(fde_begin, fde_end - fde_begin); +} + +template +std::pair *, i64> +InputSection::get_fragment(Context &ctx, const ElfRel &rel) { + assert(!(shdr().sh_flags & SHF_ALLOC)); + + const ElfSym &esym = file.elf_syms[rel.r_sym]; + if (esym.st_type == STT_SECTION) + if (std::unique_ptr> &m = + file.mergeable_sections[file.get_shndx(esym)]) + return m->get_fragment(esym.st_value + get_addend(*this, rel)); + + return {nullptr, 0}; +} + +template +u64 InputSection::get_thunk_addr(i64 idx) { + if constexpr (needs_thunk) { + RangeExtensionRef ref = extra.range_extn[idx]; + assert(ref.thunk_idx != -1); + return output_section->thunks[ref.thunk_idx]->get_addr(ref.sym_idx); + } + unreachable(); +} + +// Input object files may contain duplicate code for inline functions +// and such. Linkers de-duplicate them at link-time. However, linkers +// generaly don't remove debug info for de-duplicated functions because +// doing that requires parsing the entire debug section. +// +// Instead, linkers write "tombstone" values to dead debug info records +// instead of bogus values so that debuggers can skip them. +// +// This function returns a tombstone value for the symbol if the symbol +// refers a dead debug info section. +template +inline std::optional +InputSection::get_tombstone(Symbol &sym, SectionFragment *frag) { + if (frag) + return {}; + + InputSection *isec = sym.get_input_section(); + + // Setting a tombstone is a special feature for a dead debug section. + if (!isec || isec->is_alive) + return {}; + + std::string_view s = name(); + if (!s.starts_with(".debug")) + return {}; + + // If the section was dead due to ICF, we don't want to emit debug + // info for that section but want to set real values to .debug_line so + // that users can set a breakpoint inside a merged section. + if (isec->is_killed_by_icf() && s == ".debug_line") + return {}; + + // 0 is an invalid value in most debug info sections, so we use it + // as a tombstone value. .debug_loc and .debug_ranges reserve 0 as + // the terminator marker, so we use 1 if that's the case. + return (s == ".debug_loc" || s == ".debug_ranges") ? 1 : 0; +} + +template +inline bool +InputSection::is_relr_reloc(Context &ctx, const ElfRel &rel) const { + return ctx.arg.pack_dyn_relocs_relr && + !(shdr().sh_flags & SHF_EXECINSTR) && + (shdr().sh_addralign % sizeof(Word)) == 0 && + (rel.r_offset % sizeof(Word)) == 0; +} + +template +inline bool InputSection::is_killed_by_icf() const { + return this->leader && this->leader != this; +} + +template +std::pair *, i64> +MergeableSection::get_fragment(i64 offset) { + std::vector &vec = frag_offsets; + auto it = std::upper_bound(vec.begin(), vec.end(), offset); + i64 idx = it - 1 - vec.begin(); + return {fragments[idx], offset - vec[idx]}; +} + +template +template +inline std::span +InputFile::get_data(Context &ctx, const ElfShdr &shdr) { + std::string_view view = this->get_string(ctx, shdr); + if (view.size() % sizeof(T)) + Fatal(ctx) << *this << ": corrupted section"; + return {(T *)view.data(), view.size() / sizeof(T)}; +} + +template +template +inline std::span InputFile::get_data(Context &ctx, i64 idx) { + if (elf_sections.size() <= idx) + Fatal(ctx) << *this << ": invalid section index"; + return this->template get_data(elf_sections[idx]); +} + +template +inline std::string_view +InputFile::get_string(Context &ctx, const ElfShdr &shdr) { + u8 *begin = mf->data + shdr.sh_offset; + u8 *end = begin + shdr.sh_size; + if (mf->data + mf->size < end) + Fatal(ctx) << *this << ": section header is out of range: " << shdr.sh_offset; + return {(char *)begin, (size_t)(end - begin)}; +} + +template +inline std::string_view InputFile::get_string(Context &ctx, i64 idx) { + assert(idx < elf_sections.size()); + + if (elf_sections.size() <= idx) + Fatal(ctx) << *this << ": invalid section index: " << idx; + return this->get_string(ctx, elf_sections[idx]); +} + +template +inline std::span *> InputFile::get_global_syms() { + return std::span *>(this->symbols).subspan(this->first_global); +} + +template +inline i64 ObjectFile::get_shndx(const ElfSym &esym) { + assert(&this->elf_syms[0] <= &esym); + assert(&esym <= &this->elf_syms[this->elf_syms.size() - 1]); + + if (esym.st_shndx == SHN_XINDEX) + return symtab_shndx_sec[&esym - &this->elf_syms[0]]; + return esym.st_shndx; +} + +template +inline InputSection *ObjectFile::get_section(const ElfSym &esym) { + return sections[get_shndx(esym)].get(); +} + +template +OutputSection *find_section(Context &ctx, u32 sh_type) { + for (Chunk *chunk : ctx.chunks) + if (OutputSection *osec = chunk->to_osec()) + if (osec->shdr.sh_type == sh_type) + return osec; + return nullptr; +} + +template +OutputSection *find_section(Context &ctx, std::string_view name) { + for (Chunk *chunk : ctx.chunks) + if (OutputSection *osec = chunk->to_osec()) + if (osec->name == name) + return osec; + return nullptr; +} + +template +u64 Symbol::get_addr(Context &ctx, i64 flags) const { + if (SectionFragment *frag = get_frag()) { + if (!frag->is_alive) { + // This condition is met if a non-alloc section refers an + // alloc section and if the referenced piece of data is + // garbage-collected. Typically, this condition occurs if a + // debug info section refers a string constant in .rodata. + return 0; + } + + return frag->get_addr(ctx) + value; + } + + if (has_copyrel) { + return is_copyrel_readonly + ? ctx.copyrel_relro->shdr.sh_addr + value + : ctx.copyrel->shdr.sh_addr + value; + } + + if constexpr (is_ppc64v1) + if (!(flags & NO_OPD) && has_opd(ctx)) + return get_opd_addr(ctx); + + if (!(flags & NO_PLT) && has_plt(ctx)) { + assert(is_imported || is_ifunc()); + return get_plt_addr(ctx); + } + + InputSection *isec = get_input_section(); + if (!isec) + return value; // absolute symbol + + if (!isec->is_alive) { + if (isec->is_killed_by_icf()) + return isec->leader->get_addr() + value; + + if (isec->name() == ".eh_frame") { + // .eh_frame contents are parsed and reconstructed by the linker, + // so pointing to a specific location in a source .eh_frame + // section doesn't make much sense. However, CRT files contain + // symbols pointing to the very beginning and ending of the section. + // + // If LTO is enabled, GCC may add `.lto_priv.` as a symbol + // suffix. That's why we use starts_with() instead of `==` here. + if (name().starts_with("__EH_FRAME_BEGIN__") || + name().starts_with("__EH_FRAME_LIST__") || + name().starts_with(".eh_frame_seg") || + esym().st_type == STT_SECTION) + return ctx.eh_frame->shdr.sh_addr; + + if (name().starts_with("__FRAME_END__") || + name().starts_with("__EH_FRAME_LIST_END__")) + return ctx.eh_frame->shdr.sh_addr + ctx.eh_frame->shdr.sh_size; + + // ARM object files contain "$d" local symbol at the beginning + // of data sections. Their values are not significant for .eh_frame, + // so we just treat them as offset 0. + if (name() == "$d" || name().starts_with("$d.")) + return ctx.eh_frame->shdr.sh_addr; + + Fatal(ctx) << "symbol referring to .eh_frame is not supported: " + << *this << " " << *file; + } + + // The control can reach here if there's a relocation that refers + // a local symbol belonging to a comdat group section. This is a + // violation of the spec, as all relocations should use only global + // symbols of comdat members. However, .eh_frame tends to have such + // relocations. + return 0; + } + + return isec->get_addr() + value; +} + +template +inline u64 Symbol::get_got_addr(Context &ctx) const { + return ctx.got->shdr.sh_addr + get_got_idx(ctx) * sizeof(Word); +} + +template +inline u64 Symbol::get_gotplt_addr(Context &ctx) const { + assert(get_plt_idx(ctx) != -1); + return ctx.gotplt->shdr.sh_addr + GotPltSection::HDR_SIZE + + get_plt_idx(ctx) * GotPltSection::ENTRY_SIZE; +} + +template +inline u64 Symbol::get_gottp_addr(Context &ctx) const { + assert(get_gottp_idx(ctx) != -1); + return ctx.got->shdr.sh_addr + get_gottp_idx(ctx) * sizeof(Word); +} + +template +inline u64 Symbol::get_tlsgd_addr(Context &ctx) const { + assert(get_tlsgd_idx(ctx) != -1); + return ctx.got->shdr.sh_addr + get_tlsgd_idx(ctx) * sizeof(Word); +} + +template +inline u64 Symbol::get_tlsdesc_addr(Context &ctx) const { + assert(get_tlsdesc_idx(ctx) != -1); + return ctx.got->shdr.sh_addr + get_tlsdesc_idx(ctx) * sizeof(Word); +} + +template +inline u64 to_plt_offset(i32 pltidx) { + if constexpr (is_ppc64v1) { + // The PPC64 ELFv1 ABI requires PLT entries to vary in size + // depending on their indices. For entries whose PLT index is + // less than 32768, the entry size is 8 bytes. Other entries are + // 12 bytes long. + if (pltidx < 0x8000) + return E::plt_hdr_size + pltidx * 8; + return E::plt_hdr_size + 0x8000 * 8 + (pltidx - 0x8000) * 12; + } else { + return E::plt_hdr_size + pltidx * E::plt_size; + } +} + +template +inline u64 Symbol::get_plt_addr(Context &ctx) const { + if (i32 idx = get_plt_idx(ctx); idx != -1) + return ctx.plt->shdr.sh_addr + to_plt_offset(idx); + return ctx.pltgot->shdr.sh_addr + get_pltgot_idx(ctx) * E::pltgot_size; +} + +template +inline u64 Symbol::get_opd_addr(Context &ctx) const { + assert(get_opd_idx(ctx) != -1); + return ctx.extra.opd->shdr.sh_addr + + get_opd_idx(ctx) * PPC64OpdSection::ENTRY_SIZE; +} + +template +inline void Symbol::set_got_idx(Context &ctx, i32 idx) { + assert(aux_idx != -1); + assert(ctx.symbol_aux[aux_idx].got_idx < 0); + ctx.symbol_aux[aux_idx].got_idx = idx; +} + +template +inline void Symbol::set_gottp_idx(Context &ctx, i32 idx) { + assert(aux_idx != -1); + assert(ctx.symbol_aux[aux_idx].gottp_idx < 0); + ctx.symbol_aux[aux_idx].gottp_idx = idx; +} + +template +inline void Symbol::set_tlsgd_idx(Context &ctx, i32 idx) { + assert(aux_idx != -1); + assert(ctx.symbol_aux[aux_idx].tlsgd_idx < 0); + ctx.symbol_aux[aux_idx].tlsgd_idx = idx; +} + +template +inline void Symbol::set_tlsdesc_idx(Context &ctx, i32 idx) { + assert(aux_idx != -1); + assert(ctx.symbol_aux[aux_idx].tlsdesc_idx < 0); + ctx.symbol_aux[aux_idx].tlsdesc_idx = idx; +} + +template +inline void Symbol::set_plt_idx(Context &ctx, i32 idx) { + assert(aux_idx != -1); + assert(ctx.symbol_aux[aux_idx].plt_idx < 0); + ctx.symbol_aux[aux_idx].plt_idx = idx; +} + +template +inline void Symbol::set_pltgot_idx(Context &ctx, i32 idx) { + assert(aux_idx != -1); + assert(ctx.symbol_aux[aux_idx].pltgot_idx < 0); + ctx.symbol_aux[aux_idx].pltgot_idx = idx; +} + +template +inline void Symbol::set_opd_idx(Context &ctx, i32 idx) { + assert(aux_idx != -1); + assert(ctx.symbol_aux[aux_idx].opd_idx < 0); + ctx.symbol_aux[aux_idx].opd_idx = idx; +} + +template +inline void Symbol::set_dynsym_idx(Context &ctx, i32 idx) { + assert(aux_idx != -1); + ctx.symbol_aux[aux_idx].dynsym_idx = idx; +} + +template +inline i32 Symbol::get_got_idx(Context &ctx) const { + return (aux_idx == -1) ? -1 : ctx.symbol_aux[aux_idx].got_idx; +} + +template +inline i32 Symbol::get_gottp_idx(Context &ctx) const { + return (aux_idx == -1) ? -1 : ctx.symbol_aux[aux_idx].gottp_idx; +} + +template +inline i32 Symbol::get_tlsgd_idx(Context &ctx) const { + return (aux_idx == -1) ? -1 : ctx.symbol_aux[aux_idx].tlsgd_idx; +} + +template +inline i32 Symbol::get_tlsdesc_idx(Context &ctx) const { + return (aux_idx == -1) ? -1 : ctx.symbol_aux[aux_idx].tlsdesc_idx; +} + +template +inline i32 Symbol::get_plt_idx(Context &ctx) const { + return (aux_idx == -1) ? -1 : ctx.symbol_aux[aux_idx].plt_idx; +} + +template +inline i32 Symbol::get_pltgot_idx(Context &ctx) const { + return (aux_idx == -1) ? -1 : ctx.symbol_aux[aux_idx].pltgot_idx; +} + +template +inline i32 Symbol::get_opd_idx(Context &ctx) const { + return (aux_idx == -1) ? -1 : ctx.symbol_aux[aux_idx].opd_idx; +} + +template +inline i32 Symbol::get_dynsym_idx(Context &ctx) const { + return (aux_idx == -1) ? -1 : ctx.symbol_aux[aux_idx].dynsym_idx; +} + +template +inline u32 Symbol::get_djb_hash(Context &ctx) const { + assert(aux_idx != -1); + return ctx.symbol_aux[aux_idx].djb_hash; +} + +template +inline void Symbol::set_djb_hash(Context &ctx, u32 hash) { + assert(aux_idx != -1); + ctx.symbol_aux[aux_idx].djb_hash = hash; +} + +template +inline bool Symbol::has_plt(Context &ctx) const { + return get_plt_idx(ctx) != -1 || get_pltgot_idx(ctx) != -1; +} + +template +inline bool Symbol::is_absolute() const { + if (file && file->is_dso) + return esym().is_abs(); + + return !is_imported && !get_frag() && !get_input_section() && + !get_output_section(); +} + +template +inline bool Symbol::is_local(Context &ctx) const { + if (ctx.arg.relocatable) + return esym().st_bind == STB_LOCAL; + return !is_imported && !is_exported; +} + +// A remaining weak undefined symbol is promoted to a dynamic symbol +// in DSO and resolved to 0 in an executable. This function returns +// true if it's latter. +template +inline bool Symbol::is_remaining_undef_weak() const { + return !is_imported && esym().is_undef_weak(); +} + +template +inline InputSection *Symbol::get_input_section() const { + if ((origin & TAG_MASK) == TAG_ISEC) + return (InputSection *)(origin & ~TAG_MASK); + return nullptr; +} + +template +inline Chunk *Symbol::get_output_section() const { + if ((origin & TAG_MASK) == TAG_OSEC) + return (Chunk *)(origin & ~TAG_MASK); + return nullptr; +} + +template +inline SectionFragment *Symbol::get_frag() const { + if ((origin & TAG_MASK) == TAG_FRAG) + return (SectionFragment *)(origin & ~TAG_MASK); + return nullptr; +} + +template +inline void Symbol::set_input_section(InputSection *isec) { + uintptr_t addr = (uintptr_t)isec; + assert((addr & TAG_MASK) == 0); + origin = addr | TAG_ISEC; +} + +template +inline void Symbol::set_output_section(Chunk *osec) { + uintptr_t addr = (uintptr_t)osec; + assert((addr & TAG_MASK) == 0); + origin = addr | TAG_OSEC; +} + +template +inline void Symbol::set_frag(SectionFragment *frag) { + uintptr_t addr = (uintptr_t)frag; + assert((addr & TAG_MASK) == 0); + origin = addr | TAG_FRAG; +} + +template +inline u32 Symbol::get_type() const { + if (esym().st_type == STT_GNU_IFUNC && file->is_dso) + return STT_FUNC; + return esym().st_type; +} + +template +inline std::string_view Symbol::get_version() const { + if (file->is_dso) + return ((SharedFile *)file)->version_strings[ver_idx]; + return ""; +} + +template +inline i64 Symbol::get_output_sym_idx(Context &ctx) const { + i64 i = file->output_sym_indices[sym_idx]; + assert(i != -1); + if (is_local(ctx)) + return file->local_symtab_idx + i; + return file->global_symtab_idx + i; +} + +template +inline const ElfSym &Symbol::esym() const { + return file->elf_syms[sym_idx]; +} + +template +inline void Symbol::set_name(std::string_view name) { + nameptr = name.data(); + namelen = name.size(); +} + +template +inline std::string_view Symbol::name() const { + return {nameptr, (size_t)namelen}; +} + +template +inline void Symbol::add_aux(Context &ctx) { + if (aux_idx == -1) { + i64 sz = ctx.symbol_aux.size(); + aux_idx = sz; + ctx.symbol_aux.resize(sz + 1); + } +} + +inline bool is_c_identifier(std::string_view s) { + if (s.empty()) + return false; + + auto is_alpha = [](char c) { + return c == '_' || ('a' <= c && c <= 'z') || ('A' <= c && c <= 'Z'); + }; + + auto is_alnum = [&](char c) { + return is_alpha(c) || ('0' <= c && c <= '9'); + }; + + if (!is_alpha(s[0])) + return false; + for (i64 i = 1; i < s.size(); i++) + if (!is_alnum(s[i])) + return false; + return true; +} + +template +inline bool relax_tlsdesc(Context &ctx, Symbol &sym) { + // TLSDESC relocs must be always relaxed for statically-linked + // executables even if -no-relax is given. It is because a + // statically-linked executable doesn't contain a tranpoline + // function needed for TLSDESC. + if (ctx.arg.is_static) + return true; + return ctx.arg.relax && !ctx.arg.shared && !sym.is_imported; +} + +} // namespace mold::elf diff --git a/third_party/mold/elf/output-chunks.cc b/third_party/mold/elf/output-chunks.cc new file mode 100644 index 00000000000..40c06a9ad0b --- /dev/null +++ b/third_party/mold/elf/output-chunks.cc @@ -0,0 +1,3153 @@ +// clang-format off +#include "third_party/mold/elf/mold.h" +// MISSING #include "../common/sha.h" + +#include "third_party/libcxx/cctype" +#include "third_party/libcxx/set" +// MISSING #include +// MISSING #include +// MISSING #include +// MISSING #include +// MISSING #include + +#ifndef _WIN32 +#include "libc/calls/calls.h" +#include "libc/calls/weirdtypes.h" +#include "libc/runtime/runtime.h" +#include "libc/sysv/consts/map.h" +#include "libc/sysv/consts/mlock.h" +#include "libc/sysv/consts/msync.h" +#include "libc/sysv/consts/posix.h" +#include "libc/sysv/consts/prot.h" +#include "libc/sysv/consts/madv.h" +#include "libc/sysv/consts/mfd.h" +#include "libc/sysv/consts/mremap.h" +#endif + +namespace mold::elf { + +// The hash function for .hash. +static u32 elf_hash(std::string_view name) { + u32 h = 0; + for (u8 c : name) { + h = (h << 4) + c; + u32 g = h & 0xf0000000; + if (g != 0) + h ^= g >> 24; + h &= ~g; + } + return h; +} + +// The hash function for .gnu.hash. +static u32 djb_hash(std::string_view name) { + u32 h = 5381; + for (u8 c : name) + h = (h << 5) + h + c; + return h; +} + +template +u64 get_eflags(Context &ctx) { + if constexpr (is_arm32) + return EF_ARM_EABI_VER5; + + if constexpr (is_riscv) { + std::vector *> objs = ctx.objs; + std::erase(objs, ctx.internal_obj); + + if (objs.empty()) + return 0; + + u32 ret = objs[0]->get_ehdr().e_flags; + for (i64 i = 1; i < objs.size(); i++) { + u32 flags = objs[i]->get_ehdr().e_flags; + if (flags & EF_RISCV_RVC) + ret |= EF_RISCV_RVC; + + if ((flags & EF_RISCV_FLOAT_ABI) != (ret & EF_RISCV_FLOAT_ABI)) + Error(ctx) << *objs[i] + << ": cannot link object files with different floating-point ABI from " + << *objs[0]; + + if ((flags & EF_RISCV_RVE) != (ret & EF_RISCV_RVE)) + Error(ctx) << *objs[i] + << ": cannot link object files with different EF_RISCV_RVE from " + << *objs[0]; + + } + return ret; + } + + if constexpr (is_ppc64v2) + return 2; + return 0; +} + +template +void OutputEhdr::copy_buf(Context &ctx) { + ElfEhdr &hdr = *(ElfEhdr *)(ctx.buf + this->shdr.sh_offset); + memset(&hdr, 0, sizeof(hdr)); + + auto get_entry_addr = [&]() -> u64 { + if (ctx.arg.relocatable) + return 0; + + if (!ctx.arg.entry.empty()) + if (Symbol *sym = get_symbol(ctx, ctx.arg.entry); + sym->file && !sym->file->is_dso) + return sym->get_addr(ctx); + + if (OutputSection *osec = find_section(ctx, ".text")) + return osec->shdr.sh_addr; + return 0; + }; + + memcpy(&hdr.e_ident, "\177ELF", 4); + hdr.e_ident[EI_CLASS] = E::is_64 ? ELFCLASS64 : ELFCLASS32; + hdr.e_ident[EI_DATA] = E::is_le ? ELFDATA2LSB : ELFDATA2MSB; + hdr.e_ident[EI_VERSION] = EV_CURRENT; + hdr.e_machine = E::e_machine; + hdr.e_version = EV_CURRENT; + hdr.e_entry = get_entry_addr(); + hdr.e_flags = get_eflags(ctx); + hdr.e_ehsize = sizeof(ElfEhdr); + + // If e_shstrndx is too large, a dummy value is set to e_shstrndx. + // The real value is stored to the zero'th section's sh_link field. + if (ctx.shstrtab->shndx < SHN_LORESERVE) + hdr.e_shstrndx = ctx.shstrtab->shndx; + else + hdr.e_shstrndx = SHN_XINDEX; + + if (ctx.arg.relocatable) + hdr.e_type = ET_REL; + else if (ctx.arg.pic) + hdr.e_type = ET_DYN; + else + hdr.e_type = ET_EXEC; + + if (ctx.phdr) { + hdr.e_phoff = ctx.phdr->shdr.sh_offset; + hdr.e_phentsize = sizeof(ElfPhdr); + hdr.e_phnum = ctx.phdr->shdr.sh_size / sizeof(ElfPhdr); + } + + if (ctx.shdr) { + hdr.e_shoff = ctx.shdr->shdr.sh_offset; + hdr.e_shentsize = sizeof(ElfShdr); + + // Since e_shnum is a 16-bit integer field, we can't store a very + // large value there. If it is >65535, the real value is stored to + // the zero'th section's sh_size field. + i64 shnum = ctx.shdr->shdr.sh_size / sizeof(ElfShdr); + hdr.e_shnum = (shnum <= UINT16_MAX) ? shnum : 0; + } +} + +template +void OutputShdr::copy_buf(Context &ctx) { + ElfShdr *hdr = (ElfShdr *)(ctx.buf + this->shdr.sh_offset); + memset(hdr, 0, this->shdr.sh_size); + + i64 shnum = ctx.shdr->shdr.sh_size / sizeof(ElfShdr); + if (UINT16_MAX < shnum) + hdr->sh_size = shnum; + + if (SHN_LORESERVE <= ctx.shstrtab->shndx) + hdr->sh_link = ctx.shstrtab->shndx; + + for (Chunk *chunk : ctx.chunks) + if (chunk->shndx) + hdr[chunk->shndx] = chunk->shdr; +} + +template +i64 to_phdr_flags(Context &ctx, Chunk *chunk) { + // All sections are put into a single RWX segment if --omagic + if (ctx.arg.omagic) + return PF_R | PF_W | PF_X; + + bool write = (chunk->shdr.sh_flags & SHF_WRITE); + bool exec = (chunk->shdr.sh_flags & SHF_EXECINSTR); + + // .text is not readable if --execute-only + if (exec && ctx.arg.execute_only) { + if (write) + Error(ctx) << "--execute-only is not compatible with writable section: " + << chunk->name; + return PF_X; + } + + // .rodata is merged with .text if --no-rosegment + if (!write && !ctx.arg.rosegment) + exec = true; + + return PF_R | (write ? PF_W : PF_NONE) | (exec ? PF_X : PF_NONE); +} + +// PT_GNU_RELRO segment is a security mechanism to make more pages +// read-only than we could have done without it. +// +// Traditionally, sections are either read-only or read-write. If a +// section contains dynamic relocations, it must have been put into a +// read-write segment so that the program loader can mutate its +// contents in memory, even if no one will write to it at runtime. +// +// RELRO segment allows us to make such pages writable only when a +// program is being loaded. After that, the page becomes read-only. +// +// Some sections, such as .init, .fini, .got, .dynamic, contain +// dynamic relocations but doesn't have to be writable at runtime, +// so they are put into a RELRO segment. +template +bool is_relro(Context &ctx, Chunk *chunk) { + u64 flags = chunk->shdr.sh_flags; + u64 type = chunk->shdr.sh_type; + + if (flags & SHF_WRITE) + return (flags & SHF_TLS) || type == SHT_INIT_ARRAY || + type == SHT_FINI_ARRAY || type == SHT_PREINIT_ARRAY || + chunk == ctx.got || chunk == ctx.dynamic || + chunk == ctx.relro_padding || + (ctx.arg.z_now && ctx.gotplt && chunk == ctx.gotplt) || + chunk->name == ".alpha_got" || chunk->name == ".toc" || + chunk->name.ends_with(".rel.ro"); + return false; +} + +template +static std::vector> create_phdr(Context &ctx) { + std::vector> vec; + + auto define = [&](u64 type, u64 flags, i64 min_align, Chunk *chunk) { + vec.push_back({}); + ElfPhdr &phdr = vec.back(); + phdr.p_type = type; + phdr.p_flags = flags; + phdr.p_align = std::max(min_align, chunk->shdr.sh_addralign); + phdr.p_offset = chunk->shdr.sh_offset; + phdr.p_filesz = + (chunk->shdr.sh_type == SHT_NOBITS) ? 0 : (u64)chunk->shdr.sh_size; + phdr.p_vaddr = chunk->shdr.sh_addr; + phdr.p_paddr = chunk->shdr.sh_addr; + phdr.p_memsz = chunk->shdr.sh_size; + }; + + auto append = [&](Chunk *chunk) { + ElfPhdr &phdr = vec.back(); + phdr.p_align = std::max(phdr.p_align, chunk->shdr.sh_addralign); + if (!(chunk->shdr.sh_type == SHT_NOBITS)) + phdr.p_filesz = chunk->shdr.sh_addr + chunk->shdr.sh_size - phdr.p_vaddr; + phdr.p_memsz = chunk->shdr.sh_addr + chunk->shdr.sh_size - phdr.p_vaddr; + }; + + auto is_bss = [](Chunk *chunk) { + return chunk->shdr.sh_type == SHT_NOBITS && + !(chunk->shdr.sh_flags & SHF_TLS); + }; + + auto is_tbss = [](Chunk *chunk) { + return chunk->shdr.sh_type == SHT_NOBITS && + (chunk->shdr.sh_flags & SHF_TLS); + }; + + auto is_note = [](Chunk *chunk) { + ElfShdr &shdr = chunk->shdr; + return (shdr.sh_type == SHT_NOTE) && (shdr.sh_flags & SHF_ALLOC); + }; + + // Create a PT_PHDR for the program header itself. + if (ctx.phdr && (ctx.phdr->shdr.sh_flags & SHF_ALLOC)) + define(PT_PHDR, PF_R, sizeof(Word), ctx.phdr); + + // Create a PT_INTERP. + if (ctx.interp) + define(PT_INTERP, PF_R, 1, ctx.interp); + + // Create a PT_NOTE for SHF_NOTE sections. + for (i64 i = 0, end = ctx.chunks.size(); i < end;) { + Chunk *first = ctx.chunks[i++]; + if (!is_note(first)) + continue; + + i64 flags = to_phdr_flags(ctx, first); + i64 alignment = first->shdr.sh_addralign; + define(PT_NOTE, flags, alignment, first); + + while (i < end && is_note(ctx.chunks[i]) && + to_phdr_flags(ctx, ctx.chunks[i]) == flags) + append(ctx.chunks[i++]); + } + + // Create PT_LOAD segments. + { + i64 idx = vec.size(); + std::vector *> chunks = ctx.chunks; + std::erase_if(chunks, is_tbss); + + for (i64 i = 0, end = chunks.size(); i < end;) { + Chunk *first = chunks[i++]; + if (!(first->shdr.sh_flags & SHF_ALLOC)) + continue; + + i64 flags = to_phdr_flags(ctx, first); + define(PT_LOAD, flags, ctx.page_size, first); + + // Add contiguous ALLOC sections as long as they have the same + // section flags and there's no on-disk gap in between. + if (!is_bss(first)) + while (i < end && !is_bss(chunks[i]) && + to_phdr_flags(ctx, chunks[i]) == flags && + chunks[i]->shdr.sh_offset - first->shdr.sh_offset == + chunks[i]->shdr.sh_addr - first->shdr.sh_addr) + append(chunks[i++]); + + while (i < end && is_bss(chunks[i]) && + to_phdr_flags(ctx, chunks[i]) == flags) + append(chunks[i++]); + } + + // The ELF spec says that "loadable segment entries in the program + // header table appear in ascending order, sorted on the p_vaddr + // member". + std::stable_sort(vec.begin() + idx, vec.end(), + [](const ElfPhdr &a, const ElfPhdr &b) { + return a.p_vaddr < b.p_vaddr; + }); + } + + // Create a PT_TLS. + for (i64 i = 0; i < ctx.chunks.size(); i++) { + if (ctx.chunks[i]->shdr.sh_flags & SHF_TLS) { + define(PT_TLS, PF_R, 1, ctx.chunks[i++]); + while (i < ctx.chunks.size() && (ctx.chunks[i]->shdr.sh_flags & SHF_TLS)) + append(ctx.chunks[i++]); + } + } + + // Add PT_DYNAMIC + if (ctx.dynamic && ctx.dynamic->shdr.sh_size) + define(PT_DYNAMIC, PF_R | PF_W, 1, ctx.dynamic); + + // Add PT_GNU_EH_FRAME + if (ctx.eh_frame_hdr) + define(PT_GNU_EH_FRAME, PF_R, 1, ctx.eh_frame_hdr); + + // Add PT_GNU_STACK, which is a marker segment that doesn't really + // contain any segments. It controls executable bit of stack area. + ElfPhdr phdr = {}; + phdr.p_type = PT_GNU_STACK, + phdr.p_flags = ctx.arg.z_execstack ? (PF_R | PF_W | PF_X) : (PF_R | PF_W), + phdr.p_align = 1; + vec.push_back(phdr); + + // Create a PT_GNU_RELRO. + if (ctx.arg.z_relro) { + for (i64 i = 0; i < ctx.chunks.size(); i++) { + if (!is_relro(ctx, ctx.chunks[i])) + continue; + + define(PT_GNU_RELRO, PF_R, 1, ctx.chunks[i++]); + while (i < ctx.chunks.size() && is_relro(ctx, ctx.chunks[i])) + append(ctx.chunks[i++]); + vec.back().p_align = 1; + } + } + + // Create a PT_ARM_EDXIDX + if constexpr (is_arm32) + if (OutputSection *osec = find_section(ctx, SHT_ARM_EXIDX)) + define(PT_ARM_EXIDX, PF_R, 4, osec); + + // Create a PT_OPENBSD_RANDOMIZE + for (Chunk *chunk : ctx.chunks) + if (chunk->name == ".openbsd.randomdata") + define(PT_OPENBSD_RANDOMIZE, PF_R | PF_W, 1, chunk); + + // Set p_paddr if --physical-image-base was given. --physical-image-base + // is typically used in embedded programming to specify the base address + // of a memory-mapped ROM area. In that environment, paddr refers to a + // segment's initial location in ROM and vaddr refers the its run-time + // address. + // + // When a device is turned on, it start executing code at a fixed + // location in the ROM area. At that location is a startup routine that + // copies data or code from ROM to RAM before using them. + // + // .data must have different paddr and vaddr because ROM is not writable. + // paddr of .rodata and .text may or may be equal to vaddr. They can be + // directly read or executed from ROM, but oftentimes they are copied + // from ROM to RAM because Flash or EEPROM are usually much slower than + // DRAM. + // + // We want to keep vaddr == pvaddr for as many segments as possible so + // that they can be directly read/executed from ROM. If a gap between + // two segments is two page size or larger, we give up and pack segments + // tightly so that we don't waste too much ROM area. + if (ctx.arg.physical_image_base) { + for (i64 i = 0; i < vec.size(); i++) { + if (vec[i].p_type != PT_LOAD) + continue; + + u64 addr = *ctx.arg.physical_image_base; + bool in_sync = (vec[i].p_vaddr == addr); + + vec[i].p_paddr = addr; + addr += vec[i].p_memsz; + + for (i++; i < vec.size() && vec[i].p_type == PT_LOAD; i++) { + ElfPhdr &p = vec[i]; + if (in_sync && addr <= p.p_vaddr && p.p_vaddr < addr + ctx.page_size * 2) { + p.p_paddr = p.p_vaddr; + addr = p.p_vaddr + p.p_memsz; + } else { + in_sync = false; + p.p_paddr = addr; + addr += p.p_memsz; + } + } + break; + } + } + + return vec; +} + +template +void OutputPhdr::update_shdr(Context &ctx) { + phdrs = create_phdr(ctx); + this->shdr.sh_size = phdrs.size() * sizeof(ElfPhdr); + + ctx.tls_begin = get_tls_begin(ctx); + ctx.tp_addr = get_tp_addr(ctx); + ctx.dtp_addr = get_dtp_addr(ctx); +} + +template +void OutputPhdr::copy_buf(Context &ctx) { + write_vector(ctx.buf + this->shdr.sh_offset, phdrs); +} + +template +void InterpSection::update_shdr(Context &ctx) { + this->shdr.sh_size = ctx.arg.dynamic_linker.size() + 1; +} + +template +void InterpSection::copy_buf(Context &ctx) { + write_string(ctx.buf + this->shdr.sh_offset, ctx.arg.dynamic_linker); +} + +template +void RelDynSection::update_shdr(Context &ctx) { + i64 offset = 0; + + for (Chunk *chunk : ctx.chunks) { + chunk->reldyn_offset = offset; + offset += chunk->get_reldyn_size(ctx) * sizeof(ElfRel); + } + + for (ObjectFile *file : ctx.objs) { + file->reldyn_offset = offset; + offset += file->num_dynrel * sizeof(ElfRel); + } + + this->shdr.sh_size = offset; + this->shdr.sh_link = ctx.dynsym->shndx; +} + +template +void RelDynSection::sort(Context &ctx) { + Timer t(ctx, "sort_dynamic_relocs"); + + ElfRel *begin = (ElfRel *)(ctx.buf + this->shdr.sh_offset); + ElfRel *end = (ElfRel *)((u8 *)begin + this->shdr.sh_size); + + auto get_rank = [](u32 r_type) { + if (r_type == E::R_RELATIVE) + return 0; + if constexpr (supports_ifunc) + if (r_type == E::R_IRELATIVE) + return 2; + return 1; + }; + + // This is the reason why we sort dynamic relocations. Quote from + // https://www.airs.com/blog/archives/186: + // + // The dynamic linker in glibc uses a one element cache when processing + // relocs: if a relocation refers to the same symbol as the previous + // relocation, then the dynamic linker reuses the value rather than + // looking up the symbol again. Thus the dynamic linker gets the best + // results if the dynamic relocations are sorted so that all dynamic + // relocations for a given dynamic symbol are adjacent. + // + // Other than that, the linker sorts together all relative relocations, + // which don't have symbols. Two relative relocations, or two relocations + // against the same symbol, are sorted by the address in the output + // file. This tends to optimize paging and caching when there are two + // references from the same page. + // + // We group IFUNC relocations at the end of .rel.dyn because we want to + // apply all the other relocations before running user-supplied ifunc + // resolver functions. + tbb::parallel_sort(begin, end, [&](const ElfRel &a, const ElfRel &b) { + return std::tuple(get_rank(a.r_type), a.r_sym, a.r_offset) < + std::tuple(get_rank(b.r_type), b.r_sym, b.r_offset); + }); +} + +template +void RelrDynSection::update_shdr(Context &ctx) { + this->shdr.sh_link = ctx.dynsym->shndx; + + i64 n = ctx.got->relr.size(); + for (Chunk *chunk : ctx.chunks) + if (OutputSection *osec = chunk->to_osec()) + n += osec->relr.size(); + this->shdr.sh_size = n * sizeof(Word); +} + +template +void RelrDynSection::copy_buf(Context &ctx) { + Word *buf = (Word *)(ctx.buf + this->shdr.sh_offset); + + for (u64 val : ctx.got->relr) + *buf++ = (val & 1) ? val : (ctx.got->shdr.sh_addr + val); + + for (Chunk *chunk : ctx.chunks) + if (OutputSection *osec = chunk->to_osec()) + for (u64 val : osec->relr) + *buf++ = (val & 1) ? val : (osec->shdr.sh_addr + val); +} + +template +void StrtabSection::update_shdr(Context &ctx) { + i64 offset = 1; + + for (Chunk *chunk : ctx.chunks) { + chunk->strtab_offset = offset; + offset += chunk->strtab_size; + } + + for (ObjectFile *file : ctx.objs) { + file->strtab_offset = offset; + offset += file->strtab_size; + } + + for (SharedFile *file : ctx.dsos) { + file->strtab_offset = offset; + offset += file->strtab_size; + } + + this->shdr.sh_size = (offset == 1) ? 0 : offset; +} + +template +void ShstrtabSection::update_shdr(Context &ctx) { + std::unordered_map map; + i64 offset = 1; + + for (Chunk *chunk : ctx.chunks) { + if (chunk->kind() != ChunkKind::HEADER && !chunk->name.empty()) { + auto [it, inserted] = map.insert({chunk->name, offset}); + chunk->shdr.sh_name = it->second; + if (inserted) + offset += chunk->name.size() + 1; + } + } + + this->shdr.sh_size = offset; +} + +template +void ShstrtabSection::copy_buf(Context &ctx) { + u8 *base = ctx.buf + this->shdr.sh_offset; + base[0] = '\0'; + + for (Chunk *chunk : ctx.chunks) + if (chunk->kind() != ChunkKind::HEADER && !chunk->name.empty()) + write_string(base + chunk->shdr.sh_name, chunk->name); +} + +template +i64 DynstrSection::add_string(std::string_view str) { + if (this->shdr.sh_size == 0) + this->shdr.sh_size = 1; + + if (str.empty()) + return 0; + + auto [it, inserted] = strings.insert({str, this->shdr.sh_size}); + if (inserted) + this->shdr.sh_size += str.size() + 1; + return it->second; +} + +template +i64 DynstrSection::find_string(std::string_view str) { + if (str.empty()) + return 0; + + auto it = strings.find(str); + assert(it != strings.end()); + return it->second; +} + +template +void DynstrSection::copy_buf(Context &ctx) { + u8 *base = ctx.buf + this->shdr.sh_offset; + base[0] = '\0'; + + for (std::pair pair : strings) + write_string(base + pair.second, pair.first); + + if (!ctx.dynsym->symbols.empty()) { + i64 offset = dynsym_offset; + for (Symbol *sym : std::span *>(ctx.dynsym->symbols).subspan(1)) + offset += write_string(base + offset, sym->name()); + } +} + +template +void SymtabSection::update_shdr(Context &ctx) { + i64 nsyms = 1; + + // Section symbols + for (Chunk *chunk : ctx.chunks) + if (chunk->shndx) + nsyms++; + + // Linker-synthesized symbols + for (Chunk *chunk : ctx.chunks) { + chunk->local_symtab_idx = nsyms; + nsyms += chunk->num_local_symtab; + } + + // File local symbols + for (ObjectFile *file : ctx.objs) { + file->local_symtab_idx = nsyms; + nsyms += file->num_local_symtab; + } + + // File global symbols + for (ObjectFile *file : ctx.objs) { + file->global_symtab_idx = nsyms; + nsyms += file->num_global_symtab; + } + + for (SharedFile *file : ctx.dsos) { + file->global_symtab_idx = nsyms; + nsyms += file->num_global_symtab; + } + + this->shdr.sh_info = ctx.objs[0]->global_symtab_idx; + this->shdr.sh_link = ctx.strtab->shndx; + this->shdr.sh_size = (nsyms == 1) ? 0 : nsyms * sizeof(ElfSym); +} + +template +void SymtabSection::copy_buf(Context &ctx) { + ElfSym *buf = (ElfSym *)(ctx.buf + this->shdr.sh_offset); + memset(buf, 0, sizeof(ElfSym)); + + // Write the initial NUL byte to .strtab. + ctx.buf[ctx.strtab->shdr.sh_offset] = '\0'; + + // Create section symbols + for (Chunk *chunk : ctx.chunks) { + if (chunk->shndx) { + ElfSym &sym = buf[chunk->shndx]; + memset(&sym, 0, sizeof(sym)); + sym.st_type = STT_SECTION; + sym.st_value = chunk->shdr.sh_addr; + sym.st_shndx = chunk->shndx; + } + } + + // Populate linker-synthesized symbols + tbb::parallel_for_each(ctx.chunks, [&](Chunk *chunk) { + chunk->populate_symtab(ctx); + }); + + // Copy symbols from input files + tbb::parallel_for_each(ctx.objs, [&](ObjectFile *file) { + file->populate_symtab(ctx); + }); + + tbb::parallel_for_each(ctx.dsos, [&](SharedFile *file) { + file->populate_symtab(ctx); + }); +} + +template +static std::vector> create_dynamic_section(Context &ctx) { + std::vector> vec; + + auto define = [&](u64 tag, u64 val) { + vec.push_back(tag); + vec.push_back(val); + }; + + for (SharedFile *file : ctx.dsos) + define(DT_NEEDED, ctx.dynstr->find_string(file->soname)); + + if (!ctx.arg.rpaths.empty()) + define(ctx.arg.enable_new_dtags ? DT_RUNPATH : DT_RPATH, + ctx.dynstr->find_string(ctx.arg.rpaths)); + + if (!ctx.arg.soname.empty()) + define(DT_SONAME, ctx.dynstr->find_string(ctx.arg.soname)); + + for (std::string_view str : ctx.arg.auxiliary) + define(DT_AUXILIARY, ctx.dynstr->find_string(str)); + + for (std::string_view str : ctx.arg.filter) + define(DT_FILTER, ctx.dynstr->find_string(str)); + + if (ctx.reldyn->shdr.sh_size) { + define(E::is_rela ? DT_RELA : DT_REL, ctx.reldyn->shdr.sh_addr); + define(E::is_rela ? DT_RELASZ : DT_RELSZ, ctx.reldyn->shdr.sh_size); + define(E::is_rela ? DT_RELAENT : DT_RELENT, sizeof(ElfRel)); + } + + if (ctx.relrdyn) { + define(DT_RELR, ctx.relrdyn->shdr.sh_addr); + define(DT_RELRSZ, ctx.relrdyn->shdr.sh_size); + define(DT_RELRENT, ctx.relrdyn->shdr.sh_entsize); + } + + if (ctx.relplt->shdr.sh_size) { + define(DT_JMPREL, ctx.relplt->shdr.sh_addr); + define(DT_PLTRELSZ, ctx.relplt->shdr.sh_size); + define(DT_PLTREL, E::is_rela ? DT_RELA : DT_REL); + } + + if constexpr (is_sparc) { + if (ctx.plt->shdr.sh_size) + define(DT_PLTGOT, ctx.plt->shdr.sh_addr); + } else if constexpr (is_ppc32) { + if (ctx.gotplt->shdr.sh_size) + define(DT_PLTGOT, ctx.gotplt->shdr.sh_addr + GotPltSection::HDR_SIZE); + } else { + if (ctx.gotplt->shdr.sh_size) + define(DT_PLTGOT, ctx.gotplt->shdr.sh_addr); + } + + if (ctx.dynsym->shdr.sh_size) { + define(DT_SYMTAB, ctx.dynsym->shdr.sh_addr); + define(DT_SYMENT, sizeof(ElfSym)); + } + + if (ctx.dynstr->shdr.sh_size) { + define(DT_STRTAB, ctx.dynstr->shdr.sh_addr); + define(DT_STRSZ, ctx.dynstr->shdr.sh_size); + } + + if (find_section(ctx, SHT_INIT_ARRAY)) { + define(DT_INIT_ARRAY, ctx.__init_array_start->value); + define(DT_INIT_ARRAYSZ, + ctx.__init_array_end->value - ctx.__init_array_start->value); + } + + if (find_section(ctx, SHT_PREINIT_ARRAY)) { + define(DT_PREINIT_ARRAY, ctx.__preinit_array_start->value); + define(DT_PREINIT_ARRAYSZ, + ctx.__preinit_array_end->value - ctx.__preinit_array_start->value); + } + + if (find_section(ctx, SHT_FINI_ARRAY)) { + define(DT_FINI_ARRAY, ctx.__fini_array_start->value); + define(DT_FINI_ARRAYSZ, + ctx.__fini_array_end->value - ctx.__fini_array_start->value); + } + + if (ctx.versym->shdr.sh_size) + define(DT_VERSYM, ctx.versym->shdr.sh_addr); + + if (ctx.verneed->shdr.sh_size) { + define(DT_VERNEED, ctx.verneed->shdr.sh_addr); + define(DT_VERNEEDNUM, ctx.verneed->shdr.sh_info); + } + + if (ctx.verdef) { + define(DT_VERDEF, ctx.verdef->shdr.sh_addr); + define(DT_VERDEFNUM, ctx.verdef->shdr.sh_info); + } + + if (Symbol *sym = get_symbol(ctx, ctx.arg.init); + sym->file && !sym->file->is_dso) + define(DT_INIT, sym->get_addr(ctx)); + + if (Symbol *sym = get_symbol(ctx, ctx.arg.fini); + sym->file && !sym->file->is_dso) + define(DT_FINI, sym->get_addr(ctx)); + + if (ctx.hash) + define(DT_HASH, ctx.hash->shdr.sh_addr); + if (ctx.gnu_hash) + define(DT_GNU_HASH, ctx.gnu_hash->shdr.sh_addr); + if (ctx.has_textrel) + define(DT_TEXTREL, 0); + + i64 flags = 0; + i64 flags1 = 0; + + if (ctx.arg.pie) + flags1 |= DF_1_PIE; + + if (ctx.arg.z_now) { + flags |= DF_BIND_NOW; + flags1 |= DF_1_NOW; + } + + if (ctx.arg.z_origin) { + flags |= DF_ORIGIN; + flags1 |= DF_1_ORIGIN; + } + + if (!ctx.arg.z_dlopen) + flags1 |= DF_1_NOOPEN; + if (ctx.arg.z_nodefaultlib) + flags1 |= DF_1_NODEFLIB; + if (!ctx.arg.z_delete) + flags1 |= DF_1_NODELETE; + if (!ctx.arg.z_dump) + flags1 |= DF_1_NODUMP; + if (ctx.arg.z_initfirst) + flags1 |= DF_1_INITFIRST; + if (ctx.arg.z_interpose) + flags1 |= DF_1_INTERPOSE; + + if (!ctx.got->gottp_syms.empty()) + flags |= DF_STATIC_TLS; + if (ctx.has_textrel) + flags |= DF_TEXTREL; + + if (flags) + define(DT_FLAGS, flags); + if (flags1) + define(DT_FLAGS_1, flags1); + + if constexpr (is_ppc32) + define(DT_PPC_GOT, ctx.gotplt->shdr.sh_addr); + + if constexpr (is_ppc64) { + // PPC64_GLINK is defined by the psABI to refer 32 bytes before + // the first PLT entry. I don't know why it's 32 bytes off, but + // it's what it is. + define(DT_PPC64_GLINK, ctx.plt->shdr.sh_addr + E::plt_hdr_size - 32); + } + + // GDB needs a DT_DEBUG entry in an executable to store a word-size + // data for its own purpose. Its content is not important. + if (!ctx.arg.shared) + define(DT_DEBUG, 0); + + define(DT_NULL, 0); + + for (i64 i = 0; i < ctx.arg.spare_dynamic_tags; i++) + define(DT_NULL, 0); + + return vec; +} + +template +void DynamicSection::update_shdr(Context &ctx) { + if (ctx.arg.is_static && !ctx.arg.pie) + return; + + this->shdr.sh_size = create_dynamic_section(ctx).size() * sizeof(Word); + this->shdr.sh_link = ctx.dynstr->shndx; +} + +template +void DynamicSection::copy_buf(Context &ctx) { + std::vector> contents = create_dynamic_section(ctx); + assert(this->shdr.sh_size == contents.size() * sizeof(contents[0])); + write_vector(ctx.buf + this->shdr.sh_offset, contents); +} + +template +void OutputSection::copy_buf(Context &ctx) { + if (this->shdr.sh_type != SHT_NOBITS) + write_to(ctx, ctx.buf + this->shdr.sh_offset); +} + +template +void OutputSection::write_to(Context &ctx, u8 *buf) { + auto clear = [&](u8 *loc, i64 size) { + // As a special case, .init and .fini are filled with NOPs for s390x + // because the runtime executes the sections as if they were a single + // function. .init and .fini are superceded by .init_array and + // .fini_array but being actively used only on s390x. + if constexpr (is_s390x) { + if (this->name == ".init" || this->name == ".fini") { + for (i64 i = 0; i < size; i += 2) + *(ub16 *)(loc + i) = 0x0700; // nop + return; + } + } + memset(loc, 0, size); + }; + + tbb::parallel_for((i64)0, (i64)members.size(), [&](i64 i) { + // Copy section contents to an output file + InputSection &isec = *members[i]; + isec.write_to(ctx, buf + isec.offset); + + // Clear trailing padding + u64 this_end = isec.offset + isec.sh_size; + u64 next_start = (i == members.size() - 1) ? + (u64)this->shdr.sh_size : members[i + 1]->offset; + clear(buf + this_end, next_start - this_end); + }); + + if constexpr (needs_thunk) { + tbb::parallel_for_each(thunks, + [&](std::unique_ptr> &thunk) { + thunk->copy_buf(ctx); + }); + } +} + +// .relr.dyn contains base relocations encoded in a space-efficient form. +// The contents of the section is essentially just a list of addresses +// that have to be fixed up at runtime. +// +// Here is the encoding scheme (we assume 64-bit ELF in this description +// for the sake of simplicity): .relr.dyn contains zero or more address +// groups. Each address group consists of a 64-bit start address followed +// by zero or more 63-bit bitmaps. Let A be the address of a start +// address. Then, the loader fixes address A. If Nth bit in the following +// bitmap is on, the loader also fixes address A + N * 8. In this scheme, +// one address and one bitmap can represent up to 64 base relocations in a +// 512 bytes range. +// +// A start address and a bitmap is distinguished by the lowest significant +// bit. An address must be even and thus its LSB is 0 (odd address is not +// representable in this encoding and such relocation must be stored to +// the .rel.dyn section). A bitmap has LSB 1. +static std::vector encode_relr(std::span pos, i64 word_size) { + std::vector vec; + u64 num_bits = word_size * 8 - 1; + u64 max_delta = num_bits * word_size; + + for (i64 i = 0; i < pos.size();) { + assert(i == 0 || pos[i - 1] <= pos[i]); + assert(pos[i] % word_size == 0); + + vec.push_back(pos[i]); + u64 base = pos[i] + word_size; + i++; + + for (;;) { + u64 bits = 0; + for (; i < pos.size() && pos[i] - base < max_delta; i++) + bits |= 1LL << ((pos[i] - base) / word_size); + + if (!bits) + break; + + vec.push_back((bits << 1) | 1); + base += max_delta; + } + } + return vec; +} + +template +void OutputSection::construct_relr(Context &ctx) { + if (!ctx.arg.pic) + return; + if (!(this->shdr.sh_flags & SHF_ALLOC)) + return; + if (this->shdr.sh_addralign % sizeof(Word)) + return; + + // Skip it if it is a text section because .text doesn't usually + // contain any dynamic relocations. + if (this->shdr.sh_flags & SHF_EXECINSTR) + return; + + // Collect base relocations + std::vector> shards(members.size()); + + tbb::parallel_for((i64)0, (i64)members.size(), [&](i64 i) { + InputSection &isec = *members[i]; + if ((1 << isec.p2align) < sizeof(Word)) + return; + + for (const ElfRel &r : isec.get_rels(ctx)) + if (r.r_type == E::R_ABS && (r.r_offset % sizeof(Word)) == 0) + if (Symbol &sym = *isec.file.symbols[r.r_sym]; + !sym.is_absolute() && !sym.is_imported) + shards[i].push_back(isec.offset + r.r_offset); + }); + + // Compress them + std::vector pos = flatten(shards); + relr = encode_relr(pos, sizeof(Word)); +} + +// Compute spaces needed for thunk symbols +template +void OutputSection::compute_symtab_size(Context &ctx) { + if (ctx.arg.strip_all || ctx.arg.retain_symbols_file || ctx.arg.relocatable) + return; + + if constexpr (needs_thunk) { + this->strtab_size = 0; + this->num_local_symtab = 0; + + if constexpr (is_arm32) + this->strtab_size = 9; // for "$t", "$a" and "$d" symbols + + for (std::unique_ptr> &thunk : thunks) { + // For ARM32, we emit additional symbol "$t", "$a" and "$d" for + // each thunk to mark the beginning of ARM code. + if constexpr (is_arm32) + this->num_local_symtab += thunk->symbols.size() * 4; + else + this->num_local_symtab += thunk->symbols.size(); + + for (Symbol *sym : thunk->symbols) + this->strtab_size += sym->name().size() + sizeof("$thunk"); + } + } +} + +// If we create range extension thunks, we also synthesize symbols to mark +// the locations of thunks. Creating such symbols is optional, but it helps +// disassembling and/or debugging our output. +template +void OutputSection::populate_symtab(Context &ctx) { + if (this->strtab_size == 0) + return; + + if constexpr (needs_thunk) { + ElfSym *esym = + (ElfSym *)(ctx.buf + ctx.symtab->shdr.sh_offset) + this->local_symtab_idx; + + u8 *strtab_base = ctx.buf + ctx.strtab->shdr.sh_offset; + u8 *strtab = strtab_base + this->strtab_offset; + + if constexpr (is_arm32) { + // ARM uses these symbols to mark the begining of Thumb code, ARM + // code and data, respectively. Our thunk contains all of them. + strtab += write_string(strtab, "$t"); + strtab += write_string(strtab, "$a"); + strtab += write_string(strtab, "$d"); + } + + for (std::unique_ptr> &thunk : thunks) { + for (i64 i = 0; i < thunk->symbols.size(); i++) { + Symbol &sym = *thunk->symbols[i]; + + auto write_esym = [&](i64 st_name, i64 off) { + memset(esym, 0, sizeof(*esym)); + esym->st_name = st_name; + esym->st_type = STT_FUNC; + esym->st_shndx = this->shndx; + esym->st_value = thunk->get_addr(i) + off; + esym++; + }; + + write_esym(strtab - strtab_base, 0); + + strtab += write_string(strtab, sym.name()) - 1; + strtab += write_string(strtab, "$thunk"); + + // Emit "$t", "$a" and "$d" if ARM32. + if constexpr (is_arm32) { + write_esym(this->strtab_offset, 0); + write_esym(this->strtab_offset + 3, 4); + write_esym(this->strtab_offset + 6, 16); + } + } + } + } +} + +template +void GotSection::add_got_symbol(Context &ctx, Symbol *sym) { + sym->set_got_idx(ctx, this->shdr.sh_size / sizeof(Word)); + this->shdr.sh_size += sizeof(Word); + got_syms.push_back(sym); +} + +template +void GotSection::add_gottp_symbol(Context &ctx, Symbol *sym) { + sym->set_gottp_idx(ctx, this->shdr.sh_size / sizeof(Word)); + this->shdr.sh_size += sizeof(Word); + gottp_syms.push_back(sym); +} + +template +void GotSection::add_tlsgd_symbol(Context &ctx, Symbol *sym) { + sym->set_tlsgd_idx(ctx, this->shdr.sh_size / sizeof(Word)); + this->shdr.sh_size += sizeof(Word) * 2; + tlsgd_syms.push_back(sym); +} + +template +void GotSection::add_tlsdesc_symbol(Context &ctx, Symbol *sym) { + assert(supports_tlsdesc); + sym->set_tlsdesc_idx(ctx, this->shdr.sh_size / sizeof(Word)); + this->shdr.sh_size += sizeof(Word) * 2; + tlsdesc_syms.push_back(sym); + + if (sym != ctx._TLS_MODULE_BASE_) + ctx.dynsym->add_symbol(ctx, sym); +} + +template +void GotSection::add_tlsld(Context &ctx) { + if (tlsld_idx != -1) + return; + tlsld_idx = this->shdr.sh_size / sizeof(Word); + this->shdr.sh_size += sizeof(Word) * 2; +} + +template +u64 GotSection::get_tlsld_addr(Context &ctx) const { + assert(tlsld_idx != -1); + return this->shdr.sh_addr + tlsld_idx * sizeof(Word); +} + +template +struct GotEntry { + bool is_relr(Context &ctx) const { + return r_type == E::R_RELATIVE && ctx.arg.pack_dyn_relocs_relr; + } + + i64 idx = 0; + u64 val = 0; + i64 r_type = R_NONE; + Symbol *sym = nullptr; +}; + +// Get .got and .rel.dyn contents. +// +// .got is a linker-synthesized constant pool whose entry is of pointer +// size. If we know a correct value for an entry, we'll just set that value +// to the entry. Otherwise, we'll create a dynamic relocation and let the +// dynamic linker to fill the entry at load-time. +// +// Most GOT entries contain addresses of global variable. If a global +// variable is an imported symbol, we don't know its address until runtime. +// GOT contains the addresses of such variables at runtime so that we can +// access imported global variables via GOT. +// +// Thread-local variables (TLVs) also use GOT entries. We need them because +// TLVs are accessed in a different way than the ordinary global variables. +// Their addresses are not unique; each thread has its own copy of TLVs. +template +static std::vector> get_got_entries(Context &ctx) { + std::vector> entries; + + // Create GOT entries for ordinary symbols + for (Symbol *sym : ctx.got->got_syms) { + i64 idx = sym->get_got_idx(ctx); + + // If a symbol is imported, let the dynamic linker to resolve it. + if (sym->is_imported) { + entries.push_back({idx, 0, E::R_GLOB_DAT, sym}); + continue; + } + + // IFUNC always needs to be fixed up by the dynamic linker. + if constexpr (supports_ifunc) { + if (sym->is_ifunc()) { + entries.push_back({idx, sym->get_addr(ctx, NO_PLT), E::R_IRELATIVE}); + continue; + } + } + + // If we know an address at link-time, fill that GOT entry now. + // It may need a base relocation, though. + if (ctx.arg.pic && sym->is_relative()) + entries.push_back({idx, sym->get_addr(ctx, NO_PLT), E::R_RELATIVE}); + else + entries.push_back({idx, sym->get_addr(ctx, NO_PLT)}); + } + + // Create GOT entries for TLVs. + for (Symbol *sym : ctx.got->tlsgd_syms) { + i64 idx = sym->get_tlsgd_idx(ctx); + + // If a symbol is imported, let the dynamic linker to resolve it. + if (sym->is_imported) { + entries.push_back({idx, 0, E::R_DTPMOD, sym}); + entries.push_back({idx + 1, 0, E::R_DTPOFF, sym}); + continue; + } + + // If we are creating a shared library, we know the TLV's offset + // within the current TLS block. We don't know the module ID though. + if (ctx.arg.shared) { + entries.push_back({idx, 0, E::R_DTPMOD}); + entries.push_back({idx + 1, sym->get_addr(ctx) - ctx.dtp_addr}); + continue; + } + + // If we are creating an executable, we know both the module ID and the + // offset. Module ID 1 indicates the main executable. + entries.push_back({idx, 1}); + entries.push_back({idx + 1, sym->get_addr(ctx) - ctx.dtp_addr}); + } + + if constexpr (supports_tlsdesc) { + for (Symbol *sym : ctx.got->tlsdesc_syms) { + // _TLS_MODULE_BASE_ is a linker-synthesized virtual symbol that + // refers the begining of the TLS block. + if (sym == ctx._TLS_MODULE_BASE_) + entries.push_back({sym->get_tlsdesc_idx(ctx), 0, E::R_TLSDESC}); + else + entries.push_back({sym->get_tlsdesc_idx(ctx), 0, E::R_TLSDESC, sym}); + } + } + + for (Symbol *sym : ctx.got->gottp_syms) { + i64 idx = sym->get_gottp_idx(ctx); + + // If we know nothing about the symbol, let the dynamic linker + // to fill the GOT entry. + if (sym->is_imported) { + entries.push_back({idx, 0, E::R_TPOFF, sym}); + continue; + } + + // If we know the offset within the current thread vector, + // let the dynamic linker to adjust it. + if (ctx.arg.shared) { + entries.push_back({idx, sym->get_addr(ctx) - ctx.tls_begin, E::R_TPOFF}); + continue; + } + + // Otherwise, we know the offset from the thread pointer (TP) at + // link-time, so we can fill the GOT entry directly. + entries.push_back({idx, sym->get_addr(ctx) - ctx.tp_addr}); + } + + if (ctx.got->tlsld_idx != -1) { + if (ctx.arg.shared) + entries.push_back({ctx.got->tlsld_idx, 0, E::R_DTPMOD}); + else + entries.push_back({ctx.got->tlsld_idx, 1}); // 1 means the main executable + } + + return entries; +} + +template +i64 GotSection::get_reldyn_size(Context &ctx) const { + i64 n = 0; + for (GotEntry &ent : get_got_entries(ctx)) + if (!ent.is_relr(ctx) && ent.r_type != R_NONE) + n++; + return n; +} + +// Fill .got and .rel.dyn. +template +void GotSection::copy_buf(Context &ctx) { + Word *buf = (Word *)(ctx.buf + this->shdr.sh_offset); + memset(buf, 0, this->shdr.sh_size); + + // s390x psABI requires GOT[0] to be set to the link-time value of _DYNAMIC. + if constexpr (is_s390x) + if (ctx.dynamic) + buf[0] = ctx.dynamic->shdr.sh_addr; + + // arm64 psABI doesn't say anything about GOT[0], but glibc/arm64's code + // path for -static-pie wrongly assumed that GOT[0] refers _DYNAMIC. + // + // https://sourceware.org/git/?p=glibc.git;a=commitdiff;h=43d06ed218fc8be5 + if constexpr (is_arm64) + if (ctx.dynamic && ctx.arg.is_static && ctx.arg.pie) + buf[0] = ctx.dynamic->shdr.sh_addr; + + ElfRel *rel = (ElfRel *)(ctx.buf + ctx.reldyn->shdr.sh_offset + + this->reldyn_offset); + + for (GotEntry &ent : get_got_entries(ctx)) { + if (ent.is_relr(ctx) || ent.r_type == R_NONE) { + buf[ent.idx] = ent.val; + } else { + *rel++ = ElfRel(this->shdr.sh_addr + ent.idx * sizeof(Word), + ent.r_type, + ent.sym ? ent.sym->get_dynsym_idx(ctx) : 0, + ent.val); + + if (ctx.arg.apply_dynamic_relocs) + buf[ent.idx] = ent.val; + } + } +} + +template +void GotSection::construct_relr(Context &ctx) { + assert(ctx.arg.pack_dyn_relocs_relr); + + std::vector pos; + for (GotEntry &ent : get_got_entries(ctx)) + if (ent.is_relr(ctx)) + pos.push_back(ent.idx * sizeof(Word)); + + relr = encode_relr(pos, sizeof(Word)); +} + +template +void GotSection::compute_symtab_size(Context &ctx) { + if (ctx.arg.strip_all || ctx.arg.retain_symbols_file) + return; + + this->strtab_size = 0; + this->num_local_symtab = 0; + + for (Symbol *sym : got_syms) { + this->strtab_size += sym->name().size() + sizeof("$got"); + this->num_local_symtab++; + } + + for (Symbol *sym : gottp_syms) { + this->strtab_size += sym->name().size() + sizeof("$gottp"); + this->num_local_symtab++; + } + + for (Symbol *sym : tlsgd_syms) { + this->strtab_size += sym->name().size() + sizeof("$tlsgd"); + this->num_local_symtab++; + } + + for (Symbol *sym : tlsdesc_syms) { + this->strtab_size += sym->name().size() + sizeof("$tlsdesc"); + this->num_local_symtab++; + } + + if (tlsld_idx != -1) { + this->strtab_size += sizeof("$tlsld"); + this->num_local_symtab++; + } +} + +template +void GotSection::populate_symtab(Context &ctx) { + if (this->strtab_size == 0) + return; + + ElfSym *esym = + (ElfSym *)(ctx.buf + ctx.symtab->shdr.sh_offset) + this->local_symtab_idx; + + u8 *strtab_base = ctx.buf + ctx.strtab->shdr.sh_offset; + u8 *strtab = strtab_base + this->strtab_offset; + + auto write = [&](std::string_view name, std::string_view suffix, i64 value) { + memset(esym, 0, sizeof(*esym)); + esym->st_name = strtab - strtab_base; + esym->st_type = STT_OBJECT; + esym->st_shndx = this->shndx; + esym->st_value = value; + esym++; + + strtab += write_string(strtab, name) - 1; + strtab += write_string(strtab, suffix); + }; + + for (Symbol *sym : got_syms) + write(sym->name(), "$got", sym->get_got_addr(ctx)); + + for (Symbol *sym : gottp_syms) + write(sym->name(), "$gottp", sym->get_gottp_addr(ctx)); + + for (Symbol *sym : tlsgd_syms) + write(sym->name(), "$tlsgd", sym->get_tlsgd_addr(ctx)); + + for (Symbol *sym : tlsdesc_syms) + write(sym->name(), "$tlsdesc", sym->get_tlsdesc_addr(ctx)); + + if (tlsld_idx != -1) + write("", "$tlsld", get_tlsld_addr(ctx)); +} + +template +void GotPltSection::update_shdr(Context &ctx) { + this->shdr.sh_size = HDR_SIZE + ctx.plt->symbols.size() * ENTRY_SIZE; +} + +template +void GotPltSection::copy_buf(Context &ctx) { + // On PPC64, it's dynamic loader responsibility to fill the .got.plt + // section. Dynamic loader finds the address of the first PLT entry by + // DT_PPC64_GLINK and assumes that each PLT entry is 4 bytes long. + if constexpr (!is_ppc64) { + Word *buf = (Word *)(ctx.buf + this->shdr.sh_offset); + + // The first slot of .got.plt points to _DYNAMIC, as requested by + // the psABI. The second and the third slots are reserved by the psABI. + static_assert(HDR_SIZE / sizeof(Word) == 3); + + buf[0] = ctx.dynamic ? (u64)ctx.dynamic->shdr.sh_addr : 0; + buf[1] = 0; + buf[2] = 0; + + for (i64 i = 0; i < ctx.plt->symbols.size(); i++) + buf[i + 3] = ctx.plt->shdr.sh_addr; + } +} + +template +void PltSection::add_symbol(Context &ctx, Symbol *sym) { + assert(!sym->has_plt(ctx)); + + sym->set_plt_idx(ctx, symbols.size()); + symbols.push_back(sym); + ctx.dynsym->add_symbol(ctx, sym); +} + +template +void PltSection::update_shdr(Context &ctx) { + if (symbols.empty()) + this->shdr.sh_size = 0; + else + this->shdr.sh_size = to_plt_offset(symbols.size()); +} + +template +void PltSection::copy_buf(Context &ctx) { + u8 *buf = ctx.buf + ctx.plt->shdr.sh_offset; + write_plt_header(ctx, buf); + + for (i64 i = 0; i < symbols.size(); i++) + write_plt_entry(ctx, buf + to_plt_offset(i), *symbols[i]); +} + +template +void PltSection::compute_symtab_size(Context &ctx) { + if (ctx.arg.strip_all || ctx.arg.retain_symbols_file) + return; + + this->num_local_symtab = symbols.size(); + this->strtab_size = 0; + + for (Symbol *sym : symbols) + this->strtab_size += sym->name().size() + sizeof("$plt"); +} + +template +void PltSection::populate_symtab(Context &ctx) { + if (this->strtab_size == 0) + return; + + ElfSym *esym = + (ElfSym *)(ctx.buf + ctx.symtab->shdr.sh_offset) + this->local_symtab_idx; + + u8 *strtab_base = ctx.buf + ctx.strtab->shdr.sh_offset; + u8 *strtab = strtab_base + this->strtab_offset; + + for (Symbol *sym : symbols) { + memset(esym, 0, sizeof(*esym)); + esym->st_name = strtab - strtab_base; + esym->st_type = STT_FUNC; + esym->st_shndx = this->shndx; + esym->st_value = sym->get_plt_addr(ctx); + esym++; + + strtab += write_string(strtab, sym->name()) - 1; + strtab += write_string(strtab, "$plt"); + } +} + +template +void PltGotSection::add_symbol(Context &ctx, Symbol *sym) { + assert(!sym->has_plt(ctx)); + assert(sym->has_got(ctx)); + + sym->set_pltgot_idx(ctx, symbols.size()); + symbols.push_back(sym); + this->shdr.sh_size = symbols.size() * E::pltgot_size; +} + +template +void PltGotSection::copy_buf(Context &ctx) { + u8 *buf = ctx.buf + ctx.pltgot->shdr.sh_offset; + for (i64 i = 0; i < symbols.size(); i++) + write_pltgot_entry(ctx, buf + i * E::pltgot_size, *symbols[i]); +} + +template +void PltGotSection::compute_symtab_size(Context &ctx) { + if (ctx.arg.strip_all || ctx.arg.retain_symbols_file) + return; + + this->num_local_symtab = symbols.size(); + this->strtab_size = 0; + + for (Symbol *sym : symbols) + this->strtab_size += sym->name().size() + sizeof("$pltgot"); +} + +template +void PltGotSection::populate_symtab(Context &ctx) { + if (this->strtab_size == 0) + return; + + ElfSym *esym = + (ElfSym *)(ctx.buf + ctx.symtab->shdr.sh_offset) + this->local_symtab_idx; + + u8 *strtab_base = ctx.buf + ctx.strtab->shdr.sh_offset; + u8 *strtab = strtab_base + this->strtab_offset; + + for (Symbol *sym : symbols) { + memset(esym, 0, sizeof(*esym)); + esym->st_name = strtab - strtab_base; + esym->st_type = STT_FUNC; + esym->st_shndx = this->shndx; + esym->st_value = sym->get_plt_addr(ctx); + esym++; + + strtab += write_string(strtab, sym->name()) - 1; + strtab += write_string(strtab, "$pltgot"); + } +} + +template +void RelPltSection::update_shdr(Context &ctx) { + this->shdr.sh_size = ctx.plt->symbols.size() * sizeof(ElfRel); + this->shdr.sh_link = ctx.dynsym->shndx; + + if (!is_sparc) + this->shdr.sh_info = ctx.gotplt->shndx; +} + +template +void RelPltSection::copy_buf(Context &ctx) { + ElfRel *buf = (ElfRel *)(ctx.buf + this->shdr.sh_offset); + + for (Symbol *sym : ctx.plt->symbols) { + // SPARC doesn't have a .got.plt because its role is merged to .plt. + // On SPARC, .plt is writable (!) and the dynamic linker directly + // modifies .plt's machine instructions as it resolves dynamic symbols. + // Therefore, it doesn't need a separate section to store the symbol + // resolution results. That is of course horrible from the security + // point of view, though. + u64 addr = is_sparc ? sym->get_plt_addr(ctx) : sym->get_gotplt_addr(ctx); + *buf++ = ElfRel(addr, E::R_JUMP_SLOT, sym->get_dynsym_idx(ctx), 0); + } +} + +template +ElfSym to_output_esym(Context &ctx, Symbol &sym, u32 st_name, + U32 *shn_xindex) { + ElfSym esym; + memset(&esym, 0, sizeof(esym)); + + esym.st_name = st_name; + esym.st_type = sym.get_type(); + esym.st_size = sym.esym().st_size; + + if (sym.is_local(ctx)) + esym.st_bind = STB_LOCAL; + else if (sym.is_weak) + esym.st_bind = STB_WEAK; + else if (sym.file->is_dso) + esym.st_bind = STB_GLOBAL; + else + esym.st_bind = sym.esym().st_bind; + + if constexpr (is_ppc64v2) + esym.ppc_local_entry = sym.esym().ppc_local_entry; + + if constexpr (is_alpha) + esym.alpha_st_other = sym.esym().alpha_st_other; + + auto get_st_shndx = [&](Symbol &sym) -> u32 { + if (SectionFragment *frag = sym.get_frag()) + if (frag->is_alive) + return frag->output_section.shndx; + + if constexpr (is_ppc64v1) + if (sym.has_opd(ctx)) + return ctx.extra.opd->shndx; + + if (InputSection *isec = sym.get_input_section()) { + if (isec->is_alive) + return isec->output_section->shndx; + else if (isec->is_killed_by_icf()) + return isec->leader->output_section->shndx; + } + + return SHN_UNDEF; + }; + + u32 shndx = 0; + if (sym.has_copyrel) { + shndx = sym.is_copyrel_readonly ? ctx.copyrel_relro->shndx : ctx.copyrel->shndx; + esym.st_value = sym.get_addr(ctx); + } else if (sym.file->is_dso || sym.esym().is_undef()) { + shndx = SHN_UNDEF; + if (sym.is_canonical) + esym.st_value = sym.get_plt_addr(ctx); + } else if (Chunk *osec = sym.get_output_section()) { + // Linker-synthesized symbols + shndx = osec->shndx; + esym.st_value = sym.get_addr(ctx); + } else if (SectionFragment *frag = sym.get_frag()) { + // Section fragment + shndx = frag->output_section.shndx; + esym.st_value = sym.get_addr(ctx); + } else if (!sym.get_input_section()) { + // Absolute symbol + shndx = SHN_ABS; + esym.st_value = sym.get_addr(ctx); + } else if (sym.get_type() == STT_TLS) { + shndx = get_st_shndx(sym); + esym.st_value = sym.get_addr(ctx) - ctx.tls_begin; + } else { + shndx = get_st_shndx(sym); + esym.st_visibility = sym.visibility; + esym.st_value = sym.get_addr(ctx, NO_PLT); + } + + // Symbol's st_shndx is only 16 bits wide, so we can't store a large + // section index there. If the total number of sections is equal to + // or greater than SHN_LORESERVE (= 65280), the real index is stored + // to a SHT_SYMTAB_SHNDX section which contains a parallel array of + // the symbol table. + if (shn_xindex) { + *shn_xindex = shndx; + esym.st_shndx = SHN_XINDEX; + } else { + if (shndx >= SHN_LORESERVE && shndx != SHN_ABS && shndx != SHN_COMMON) + Fatal(ctx) << sym << ": internal error: output symbol index too large: " + << shndx; + esym.st_shndx = shndx; + } + + return esym; +} + +template +void DynsymSection::add_symbol(Context &ctx, Symbol *sym) { + if (symbols.empty()) + symbols.resize(1); + + if (sym->get_dynsym_idx(ctx) == -1) { + sym->set_dynsym_idx(ctx, -2); + symbols.push_back(sym); + } +} + +template +void DynsymSection::finalize(Context &ctx) { + Timer t(ctx, "DynsymSection::finalize"); + if (symbols.empty()) + return; + + // Sort symbols. In any symtab, local symbols must precede global symbols. + auto first_global = std::stable_partition(symbols.begin() + 1, symbols.end(), + [&](Symbol *sym) { + return sym->is_local(ctx); + }); + + // We also place undefined symbols before defined symbols for .gnu.hash. + // Defined symbols are sorted by their hashes for .gnu.hash. + if (ctx.gnu_hash) { + // Count the number of exported symbols to compute the size of .gnu.hash. + i64 num_exported = 0; + for (i64 i = 1; i < symbols.size(); i++) + if (symbols[i]->is_exported) + num_exported++; + + u32 num_buckets = num_exported / ctx.gnu_hash->LOAD_FACTOR + 1; + ctx.gnu_hash->num_buckets = num_buckets; + + tbb::parallel_for((i64)(first_global - symbols.begin()), (i64)symbols.size(), + [&](i64 i) { + Symbol &sym = *symbols[i]; + sym.set_dynsym_idx(ctx, i); + sym.set_djb_hash(ctx, djb_hash(sym.name())); + }); + + tbb::parallel_sort(first_global, symbols.end(), + [&](Symbol *a, Symbol *b) { + if (a->is_exported != b->is_exported) + return b->is_exported; + + u32 h1 = a->get_djb_hash(ctx) % num_buckets; + u32 h2 = b->get_djb_hash(ctx) % num_buckets; + return std::tuple(h1, a->get_dynsym_idx(ctx)) < + std::tuple(h2, b->get_dynsym_idx(ctx)); + }); + } + + // Compute .dynstr size + ctx.dynstr->dynsym_offset = ctx.dynstr->shdr.sh_size; + + tbb::enumerable_thread_specific size; + tbb::parallel_for((i64)1, (i64)symbols.size(), [&](i64 i) { + symbols[i]->set_dynsym_idx(ctx, i); + size.local() += symbols[i]->name().size() + 1; + }); + + ctx.dynstr->shdr.sh_size += size.combine(std::plus()); + + // ELF's symbol table sh_info holds the offset of the first global symbol. + this->shdr.sh_info = first_global - symbols.begin(); +} + +template +void DynsymSection::update_shdr(Context &ctx) { + this->shdr.sh_link = ctx.dynstr->shndx; + this->shdr.sh_size = sizeof(ElfSym) * symbols.size(); +} + +template +void DynsymSection::copy_buf(Context &ctx) { + u8 *base = ctx.buf + this->shdr.sh_offset; + memset(base, 0, sizeof(ElfSym)); + i64 name_offset = ctx.dynstr->dynsym_offset; + + for (i64 i = 1; i < symbols.size(); i++) { + Symbol &sym = *symbols[i]; + ElfSym &esym = + *(ElfSym *)(base + sym.get_dynsym_idx(ctx) * sizeof(ElfSym)); + + esym = to_output_esym(ctx, sym, name_offset, nullptr); + name_offset += sym.name().size() + 1; + assert(esym.st_bind != STB_LOCAL || i < this->shdr.sh_info); + } +} + +template +void HashSection::update_shdr(Context &ctx) { + if (ctx.dynsym->symbols.empty()) + return; + + i64 header_size = 8; + i64 num_slots = ctx.dynsym->symbols.size(); + this->shdr.sh_size = header_size + num_slots * 8; + this->shdr.sh_link = ctx.dynsym->shndx; +} + +template +void HashSection::copy_buf(Context &ctx) { + u8 *base = ctx.buf + this->shdr.sh_offset; + memset(base, 0, this->shdr.sh_size); + + std::span *> syms = ctx.dynsym->symbols; + U32 *hdr = (U32 *)base; + U32 *buckets = (U32 *)(base + 8); + U32 *chains = buckets + syms.size(); + + hdr[0] = hdr[1] = syms.size(); + + std::vector hashes(syms.size()); + tbb::parallel_for((i64)1, (i64)syms.size(), [&](i64 i) { + hashes[i] = elf_hash(syms[i]->name()) % syms.size(); + }); + + for (i64 i = 1; i < syms.size(); i++) { + i64 h = hashes[i]; + chains[syms[i]->get_dynsym_idx(ctx)] = buckets[h]; + buckets[h] = syms[i]->get_dynsym_idx(ctx); + } +} + +template +std::span *> +GnuHashSection::get_exported_symbols(Context &ctx) { + std::span *> syms = ctx.dynsym->symbols; + auto it = std::partition_point(syms.begin() + 1, syms.end(), [](Symbol *sym) { + return !sym->is_exported; + }); + return syms.subspan(it - syms.begin()); +} + +template +void GnuHashSection::update_shdr(Context &ctx) { + if (ctx.dynsym->symbols.empty()) + return; + + this->shdr.sh_link = ctx.dynsym->shndx; + + i64 num_exported = get_exported_symbols(ctx).size(); + if (num_exported) { + // We allocate 12 bits for each symbol in the bloom filter. + i64 num_bits = num_exported * 12; + num_bloom = bit_ceil(num_bits / (sizeof(Word) * 8)); + } + + this->shdr.sh_size = HEADER_SIZE; // Header + this->shdr.sh_size += num_bloom * sizeof(Word); // Bloom filter + this->shdr.sh_size += num_buckets * 4; // Hash buckets + this->shdr.sh_size += num_exported * 4; // Hash values +} + +template +void GnuHashSection::copy_buf(Context &ctx) { + u8 *base = ctx.buf + this->shdr.sh_offset; + memset(base, 0, this->shdr.sh_size); + + std::span *> syms = get_exported_symbols(ctx); + std::vector indices(syms.size()); + i64 exported_offset = ctx.dynsym->symbols.size() - syms.size(); + + *(U32 *)base = num_buckets; + *(U32 *)(base + 4) = exported_offset; + *(U32 *)(base + 8) = num_bloom; + *(U32 *)(base + 12) = BLOOM_SHIFT; + + // Write a bloom filter + Word *bloom = (Word *)(base + HEADER_SIZE); + + for (i64 i = 0; i < syms.size(); i++) { + constexpr i64 word_bits = sizeof(Word) * 8; + + i64 h = syms[i]->get_djb_hash(ctx); + indices[i] = h % num_buckets; + + i64 idx = (h / word_bits) % num_bloom; + bloom[idx] |= 1LL << (h % word_bits); + bloom[idx] |= 1LL << ((h >> BLOOM_SHIFT) % word_bits); + } + + // Write hash bucket indices + U32 *buckets = (U32 *)(bloom + num_bloom); + + for (i64 i = 0; i < syms.size(); i++) + if (!buckets[indices[i]]) + buckets[indices[i]] = i + exported_offset; + + // Write a hash table + U32 *table = buckets + num_buckets; + + for (i64 i = 0; i < syms.size(); i++) { + // The last entry in a chain must be terminated with an entry with + // least-significant bit 1. + u32 h = syms[i]->get_djb_hash(ctx); + if (i == syms.size() - 1 || indices[i] != indices[i + 1]) + table[i] = h | 1; + else + table[i] = h & ~1; + } +} + +template +std::string_view +get_merged_output_name(Context &ctx, std::string_view name, u64 flags) { + if (ctx.arg.relocatable && !ctx.arg.relocatable_merge_sections) + return name; + if (ctx.arg.unique && ctx.arg.unique->match(name)) + return name; + if (name == ".rodata" || name.starts_with(".rodata.")) + return (flags & SHF_STRINGS) ? ".rodata.str" : ".rodata.cst"; + return name; +} + +template +MergedSection::MergedSection(std::string_view name, u64 flags, u32 type) { + this->name = name; + this->shdr.sh_flags = flags; + this->shdr.sh_type = type; +} + +template +MergedSection * +MergedSection::get_instance(Context &ctx, std::string_view name, + u64 type, u64 flags) { + name = get_merged_output_name(ctx, name, flags); + flags = flags & ~(u64)SHF_GROUP & ~(u64)SHF_COMPRESSED; + + auto find = [&]() -> MergedSection * { + for (std::unique_ptr> &osec : ctx.merged_sections) + if (std::tuple(name, flags, type) == + std::tuple(osec->name, osec->shdr.sh_flags, osec->shdr.sh_type)) + return osec.get(); + return nullptr; + }; + + // Search for an exiting output section. + static std::shared_mutex mu; + { + std::shared_lock lock(mu); + if (MergedSection *osec = find()) + return osec; + } + + // Create a new output section. + std::unique_lock lock(mu); + if (MergedSection *osec = find()) + return osec; + + MergedSection *osec = new MergedSection(name, flags, type); + ctx.merged_sections.emplace_back(osec); + return osec; +} + +template +SectionFragment * +MergedSection::insert(Context &ctx, std::string_view data, u64 hash, + i64 p2align) { + std::call_once(once_flag, [&] { + // We aim 2/3 occupation ratio + map.resize(estimator.get_cardinality() * 3 / 2); + }); + + // Even if GC is enabled, we garbage-collect only memory-mapped strings. + // Non-memory-allocated strings are typically identifiers used by debug info. + // To remove such strings, use the `strip` command. + bool is_alive = !ctx.arg.gc_sections || !(this->shdr.sh_flags & SHF_ALLOC); + + SectionFragment *frag; + bool inserted; + std::tie(frag, inserted) = + map.insert(data, hash, SectionFragment(this, is_alive)); + update_maximum(frag->p2align, p2align); + return frag; +} + +template +void MergedSection::assign_offsets(Context &ctx) { + std::vector sizes(map.NUM_SHARDS); + std::vector max_p2aligns(map.NUM_SHARDS); + shard_offsets.resize(map.NUM_SHARDS + 1); + + i64 shard_size = map.nbuckets / map.NUM_SHARDS; + + tbb::parallel_for((i64)0, map.NUM_SHARDS, [&](i64 i) { + struct KeyVal { + std::string_view key; + SectionFragment *val; + }; + + std::vector fragments; + fragments.reserve(shard_size); + + for (i64 j = shard_size * i; j < shard_size * (i + 1); j++) + if (const char *key = map.get_key(j)) + if (SectionFragment &frag = map.values[j]; frag.is_alive) + fragments.push_back({{key, map.key_sizes[j]}, &frag}); + + // Sort fragments to make output deterministic. + tbb::parallel_sort(fragments.begin(), fragments.end(), + [](const KeyVal &a, const KeyVal &b) { + return std::tuple{(u32)a.val->p2align, a.key.size(), a.key} < + std::tuple{(u32)b.val->p2align, b.key.size(), b.key}; + }); + + // Assign offsets. + i64 offset = 0; + i64 p2align = 0; + + for (KeyVal &kv : fragments) { + SectionFragment &frag = *kv.val; + offset = align_to(offset, 1 << frag.p2align); + frag.offset = offset; + offset += kv.key.size(); + p2align = std::max(p2align, frag.p2align); + } + + sizes[i] = offset; + max_p2aligns[i] = p2align; + + static Counter merged_strings("merged_strings"); + merged_strings += fragments.size(); + }); + + i64 p2align = 0; + for (i64 x : max_p2aligns) + p2align = std::max(p2align, x); + + for (i64 i = 1; i < map.NUM_SHARDS + 1; i++) + shard_offsets[i] = + align_to(shard_offsets[i - 1] + sizes[i - 1], 1 << p2align); + + tbb::parallel_for((i64)1, map.NUM_SHARDS, [&](i64 i) { + for (i64 j = shard_size * i; j < shard_size * (i + 1); j++) + if (SectionFragment &frag = map.values[j]; frag.is_alive) + frag.offset += shard_offsets[i]; + }); + + this->shdr.sh_size = shard_offsets[map.NUM_SHARDS]; + this->shdr.sh_addralign = 1 << p2align; +} + +template +void MergedSection::copy_buf(Context &ctx) { + write_to(ctx, ctx.buf + this->shdr.sh_offset); +} + +template +void MergedSection::write_to(Context &ctx, u8 *buf) { + i64 shard_size = map.nbuckets / map.NUM_SHARDS; + + tbb::parallel_for((i64)0, map.NUM_SHARDS, [&](i64 i) { + memset(buf + shard_offsets[i], 0, shard_offsets[i + 1] - shard_offsets[i]); + + for (i64 j = shard_size * i; j < shard_size * (i + 1); j++) + if (const char *key = map.get_key(j)) + if (SectionFragment &frag = map.values[j]; frag.is_alive) + memcpy(buf + frag.offset, key, map.key_sizes[j]); + }); +} + +template +void MergedSection::print_stats(Context &ctx) { + i64 used = 0; + for (i64 i = 0; i < map.nbuckets; i++) + if (map.get_key(i)) + used++; + + SyncOut(ctx) << this->name + << " estimation=" << estimator.get_cardinality() + << " actual=" << used; +} + +template +void EhFrameSection::construct(Context &ctx) { + Timer t(ctx, "eh_frame"); + + // If .eh_frame is missing in all input files, we don't want to + // create an output .eh_frame section. + if (std::all_of(ctx.objs.begin(), ctx.objs.end(), + [](ObjectFile *file) { return file->cies.empty(); })) { + this->shdr.sh_size = 0; + return; + } + + // Remove dead FDEs and assign them offsets within their corresponding + // CIE group. + tbb::parallel_for_each(ctx.objs, [&](ObjectFile *file) { + std::erase_if(file->fdes, [](FdeRecord &fde) { return !fde.is_alive; }); + + i64 offset = 0; + for (FdeRecord &fde : file->fdes) { + fde.output_offset = offset; + offset += fde.size(*file); + } + file->fde_size = offset; + }); + + // Uniquify CIEs and assign offsets to them. + std::vector *> leaders; + auto find_leader = [&](CieRecord &cie) -> CieRecord * { + for (CieRecord *leader : leaders) + if (cie.equals(*leader)) + return leader; + return nullptr; + }; + + i64 offset = 0; + for (ObjectFile *file : ctx.objs) { + for (CieRecord &cie : file->cies) { + if (CieRecord *leader = find_leader(cie)) { + cie.output_offset = leader->output_offset; + } else { + cie.output_offset = offset; + cie.is_leader = true; + offset += cie.size(); + leaders.push_back(&cie); + } + } + } + + // Assign FDE offsets to files. + i64 idx = 0; + for (ObjectFile *file : ctx.objs) { + file->fde_idx = idx; + idx += file->fdes.size(); + + file->fde_offset = offset; + offset += file->fde_size; + } + + // .eh_frame must end with a null word. + this->shdr.sh_size = offset + 4; +} + +// Write to .eh_frame and .eh_frame_hdr. +template +void EhFrameSection::copy_buf(Context &ctx) { + u8 *base = ctx.buf + this->shdr.sh_offset; + + struct HdrEntry { + I32 init_addr; + I32 fde_addr; + }; + + HdrEntry *eh_hdr_begin = nullptr; + if (ctx.eh_frame_hdr) + eh_hdr_begin = (HdrEntry *)(ctx.buf + ctx.eh_frame_hdr->shdr.sh_offset + + EhFrameHdrSection::HEADER_SIZE); + + tbb::parallel_for_each(ctx.objs, [&](ObjectFile *file) { + // Copy CIEs. + for (CieRecord &cie : file->cies) { + if (!cie.is_leader) + continue; + + std::string_view contents = cie.get_contents(); + memcpy(base + cie.output_offset, contents.data(), contents.size()); + + if (ctx.arg.relocatable) + continue; + + for (const ElfRel &rel : cie.get_rels()) { + assert(rel.r_offset - cie.input_offset < contents.size()); + + Symbol &sym = *file->symbols[rel.r_sym]; + u64 loc = cie.output_offset + rel.r_offset - cie.input_offset; + u64 val = sym.get_addr(ctx) + get_addend(cie.input_section, rel); + apply_reloc(ctx, rel, loc, val); + } + } + + // Copy FDEs. + for (i64 i = 0; i < file->fdes.size(); i++) { + FdeRecord &fde = file->fdes[i]; + i64 offset = file->fde_offset + fde.output_offset; + + std::string_view contents = fde.get_contents(*file); + memcpy(base + offset, contents.data(), contents.size()); + + CieRecord &cie = file->cies[fde.cie_idx]; + *(U32 *)(base + offset + 4) = offset + 4 - cie.output_offset; + + if (ctx.arg.relocatable) + continue; + + bool is_first = true; + for (const ElfRel &rel : fde.get_rels(*file)) { + assert(rel.r_offset - fde.input_offset < contents.size()); + + Symbol &sym = *file->symbols[rel.r_sym]; + u64 loc = offset + rel.r_offset - fde.input_offset; + u64 val = sym.get_addr(ctx) + get_addend(cie.input_section, rel); + apply_reloc(ctx, rel, loc, val); + + if (eh_hdr_begin && is_first) { + // Write to .eh_frame_hdr + HdrEntry &ent = eh_hdr_begin[file->fde_idx + i]; + u64 sh_addr = ctx.eh_frame_hdr->shdr.sh_addr; + ent.init_addr = val - sh_addr; + ent.fde_addr = this->shdr.sh_addr + offset - sh_addr; + is_first = false; + } + } + } + }); + + // Write a terminator. + *(U32 *)(base + this->shdr.sh_size - 4) = 0; + + // Sort .eh_frame_hdr contents. + if (eh_hdr_begin) { + tbb::parallel_sort(eh_hdr_begin, eh_hdr_begin + ctx.eh_frame_hdr->num_fdes, + [](const HdrEntry &a, const HdrEntry &b) { + return a.init_addr < b.init_addr; + }); + } +} + +template +void EhFrameHdrSection::update_shdr(Context &ctx) { + num_fdes = 0; + for (ObjectFile *file : ctx.objs) + num_fdes += file->fdes.size(); + this->shdr.sh_size = HEADER_SIZE + num_fdes * 8; +} + +template +void EhFrameHdrSection::copy_buf(Context &ctx) { + u8 *base = ctx.buf + this->shdr.sh_offset; + + // Write a header. The actual table is written by EhFrameHdr::copy_buf. + base[0] = 1; + base[1] = DW_EH_PE_pcrel | DW_EH_PE_sdata4; + base[2] = DW_EH_PE_udata4; + base[3] = DW_EH_PE_datarel | DW_EH_PE_sdata4; + + *(U32 *)(base + 4) = ctx.eh_frame->shdr.sh_addr - this->shdr.sh_addr - 4; + *(U32 *)(base + 8) = num_fdes; +} + +template +void EhFrameRelocSection::update_shdr(Context &ctx) { + tbb::enumerable_thread_specific count; + + tbb::parallel_for_each(ctx.objs, [&](ObjectFile *file) { + for (CieRecord &cie : file->cies) + if (cie.is_leader) + count.local() += cie.get_rels().size(); + + for (FdeRecord &fde : file->fdes) + count.local() += fde.get_rels(*file).size(); + }); + + this->shdr.sh_size = count.combine(std::plus()) * sizeof(ElfRel); + this->shdr.sh_link = ctx.symtab->shndx; + this->shdr.sh_info = ctx.eh_frame->shndx; +} + +template +void EhFrameRelocSection::copy_buf(Context &ctx) { + ElfRel *buf = (ElfRel *)(ctx.buf + this->shdr.sh_offset); + + auto copy = [&](ObjectFile &file, InputSection &isec, + const ElfRel &r, u64 offset) { + Symbol &sym = *file.symbols[r.r_sym]; + memset(buf, 0, sizeof(*buf)); + + if (sym.esym().st_type == STT_SECTION) { + // We discard section symbols in input files and re-create new + // ones for each output section. So we need to adjust relocations' + // addends if they refer a section symbol. + InputSection *target = sym.get_input_section(); + buf->r_sym = target->output_section->shndx; + + if constexpr (E::is_rela) + buf->r_addend = get_addend(isec, r) + target->offset; + else if (ctx.arg.relocatable) + write_addend(ctx.buf + ctx.eh_frame->shdr.sh_offset + offset, + get_addend(isec, r) + target->offset, r); + } else { + buf->r_sym = sym.get_output_sym_idx(ctx); + if constexpr (E::is_rela) + buf->r_addend = get_addend(isec, r); + } + + buf->r_offset = ctx.eh_frame->shdr.sh_addr + offset; + buf->r_type = r.r_type; + buf++; + }; + + for (ObjectFile *file : ctx.objs) { + for (CieRecord &cie : file->cies) + if (cie.is_leader) + for (const ElfRel &rel : cie.get_rels()) + copy(*file, cie.input_section, rel, + cie.output_offset + rel.r_offset - cie.input_offset); + + for (FdeRecord &fde : file->fdes) { + i64 offset = file->fde_offset + fde.output_offset; + for (const ElfRel &rel : fde.get_rels(*file)) + copy(*file, file->cies[fde.cie_idx].input_section, rel, + offset + rel.r_offset - fde.input_offset); + } + } +} + +template +void CopyrelSection::add_symbol(Context &ctx, Symbol *sym) { + if (sym->has_copyrel) + return; + + assert(!ctx.arg.shared); + assert(sym->file->is_dso); + + symbols.push_back(sym); + + SharedFile &file = *(SharedFile *)sym->file; + i64 alignment = file.get_alignment(sym); + u64 offset = align_to(this->shdr.sh_size, alignment); + + this->shdr.sh_size = offset + sym->esym().st_size; + this->shdr.sh_addralign = std::max(alignment, this->shdr.sh_addralign); + + // We need to create dynamic symbols not only for this particular symbol + // but also for its aliases (i.e. other symbols at the same address) + // becasue otherwise the aliases are broken apart at runtime. + // For example, `environ`, `_environ` and `__environ` in libc.so are + // aliases. If one of the symbols is copied by a copy relocation, other + // symbols have to refer to the copied place as well. + for (Symbol *sym2 : file.find_aliases(sym)) { + sym2->add_aux(ctx); + sym2->is_imported = true; + sym2->is_exported = true; + sym2->has_copyrel = true; + sym2->is_copyrel_readonly = is_relro; + sym2->value = offset; + ctx.dynsym->add_symbol(ctx, sym2); + } +} + +template +void CopyrelSection::update_shdr(Context &ctx) { + // SHT_NOBITS sections (i.e. BSS sections) have to be at the end of + // a segment, so a .copyrel.rel.ro usually requires one extra + // segment for it. We turn a .copyrel.rel.ro into a regular section + // if it is very small to avoid the cost of the extra segment. + constexpr i64 threshold = 4096; + if (is_relro && ctx.arg.z_relro && this->shdr.sh_size < threshold) + this->shdr.sh_type = SHT_PROGBITS; +} + +template +void CopyrelSection::copy_buf(Context &ctx) { + if (this->shdr.sh_type == SHT_PROGBITS) + memset(ctx.buf + this->shdr.sh_offset, 0, this->shdr.sh_size); + + ElfRel *rel = (ElfRel *)(ctx.buf + ctx.reldyn->shdr.sh_offset + + this->reldyn_offset); + + for (Symbol *sym : symbols) + *rel++ = ElfRel(sym->get_addr(ctx), E::R_COPY, sym->get_dynsym_idx(ctx), + 0); +} + +template +void VersymSection::update_shdr(Context &ctx) { + this->shdr.sh_size = contents.size() * sizeof(contents[0]); + this->shdr.sh_link = ctx.dynsym->shndx; +} + +template +void VersymSection::copy_buf(Context &ctx) { + write_vector(ctx.buf + this->shdr.sh_offset, contents); +} + +template +void VerneedSection::construct(Context &ctx) { + Timer t(ctx, "fill_verneed"); + + if (ctx.dynsym->symbols.empty()) + return; + + // Create a list of versioned symbols and sort by file and version. + std::vector *> syms(ctx.dynsym->symbols.begin() + 1, + ctx.dynsym->symbols.end()); + + std::erase_if(syms, [](Symbol *sym) { + return !sym->file->is_dso || sym->ver_idx <= VER_NDX_LAST_RESERVED; + }); + + if (syms.empty()) + return; + + sort(syms, [](Symbol *a, Symbol *b) { + return std::tuple(((SharedFile *)a->file)->soname, a->ver_idx) < + std::tuple(((SharedFile *)b->file)->soname, b->ver_idx); + }); + + // Resize of .gnu.version + ctx.versym->contents.resize(ctx.dynsym->symbols.size(), 1); + ctx.versym->contents[0] = 0; + + // Allocate a large enough buffer for .gnu.version_r. + contents.resize((sizeof(ElfVerneed) + sizeof(ElfVernaux)) * syms.size()); + + // Fill .gnu.version_r. + u8 *buf = (u8 *)&contents[0]; + u8 *ptr = buf; + ElfVerneed *verneed = nullptr; + ElfVernaux *aux = nullptr; + + u16 veridx = VER_NDX_LAST_RESERVED + ctx.arg.version_definitions.size(); + + auto start_group = [&](InputFile *file) { + this->shdr.sh_info++; + if (verneed) + verneed->vn_next = ptr - (u8 *)verneed; + + verneed = (ElfVerneed *)ptr; + ptr += sizeof(*verneed); + verneed->vn_version = 1; + verneed->vn_file = ctx.dynstr->find_string(((SharedFile *)file)->soname); + verneed->vn_aux = sizeof(ElfVerneed); + aux = nullptr; + }; + + auto add_entry = [&](Symbol *sym) { + verneed->vn_cnt++; + + if (aux) + aux->vna_next = sizeof(ElfVernaux); + aux = (ElfVernaux *)ptr; + ptr += sizeof(*aux); + + std::string_view verstr = sym->get_version(); + aux->vna_hash = elf_hash(verstr); + aux->vna_other = ++veridx; + aux->vna_name = ctx.dynstr->add_string(verstr); + }; + + for (i64 i = 0; i < syms.size(); i++) { + if (i == 0 || syms[i - 1]->file != syms[i]->file) { + start_group(syms[i]->file); + add_entry(syms[i]); + } else if (syms[i - 1]->ver_idx != syms[i]->ver_idx) { + add_entry(syms[i]); + } + + ctx.versym->contents[syms[i]->get_dynsym_idx(ctx)] = veridx; + } + + // Resize .gnu.version_r to fit to its contents. + contents.resize(ptr - buf); +} + +template +void VerneedSection::update_shdr(Context &ctx) { + this->shdr.sh_size = contents.size(); + this->shdr.sh_link = ctx.dynstr->shndx; +} + +template +void VerneedSection::copy_buf(Context &ctx) { + write_vector(ctx.buf + this->shdr.sh_offset, contents); +} + +template +void VerdefSection::construct(Context &ctx) { + Timer t(ctx, "fill_verdef"); + + if (ctx.arg.version_definitions.empty()) + return; + + // Resize .gnu.version + ctx.versym->contents.resize(ctx.dynsym->symbols.size(), 1); + ctx.versym->contents[0] = 0; + + // Allocate a buffer for .gnu.version_d. + contents.resize((sizeof(ElfVerdef) + sizeof(ElfVerdaux)) * + (ctx.arg.version_definitions.size() + 1)); + + u8 *buf = (u8 *)&contents[0]; + u8 *ptr = buf; + ElfVerdef *verdef = nullptr; + + auto write = [&](std::string_view verstr, i64 idx, i64 flags) { + this->shdr.sh_info++; + if (verdef) + verdef->vd_next = ptr - (u8 *)verdef; + + verdef = (ElfVerdef *)ptr; + ptr += sizeof(ElfVerdef); + + verdef->vd_version = 1; + verdef->vd_flags = flags; + verdef->vd_ndx = idx; + verdef->vd_cnt = 1; + verdef->vd_hash = elf_hash(verstr); + verdef->vd_aux = sizeof(ElfVerdef); + + ElfVerdaux *aux = (ElfVerdaux *)ptr; + ptr += sizeof(ElfVerdaux); + aux->vda_name = ctx.dynstr->add_string(verstr); + }; + + std::string_view basename = ctx.arg.soname.empty() ? + ctx.arg.output : ctx.arg.soname; + write(basename, 1, VER_FLG_BASE); + + i64 idx = 2; + for (std::string_view verstr : ctx.arg.version_definitions) + write(verstr, idx++, 0); + + for (Symbol *sym : std::span *>(ctx.dynsym->symbols).subspan(1)) + ctx.versym->contents[sym->get_dynsym_idx(ctx)] = sym->ver_idx; +} + +template +void VerdefSection::update_shdr(Context &ctx) { + this->shdr.sh_size = contents.size(); + this->shdr.sh_link = ctx.dynstr->shndx; +} + +template +void VerdefSection::copy_buf(Context &ctx) { + write_vector(ctx.buf + this->shdr.sh_offset, contents); +} + +inline i64 BuildId::size() const { + switch (kind) { + case HEX: + return value.size(); + case HASH: + return hash_size; + case UUID: + return 16; + default: + unreachable(); + } +} + +template +void BuildIdSection::update_shdr(Context &ctx) { + this->shdr.sh_size = HEADER_SIZE + ctx.arg.build_id.size(); +} + +template +void BuildIdSection::copy_buf(Context &ctx) { + U32 *base = (U32 *)(ctx.buf + this->shdr.sh_offset); + memset(base, 0, this->shdr.sh_size); + base[0] = 4; // Name size + base[1] = ctx.arg.build_id.size(); // Hash size + base[2] = NT_GNU_BUILD_ID; // Type + memcpy(base + 3, "GNU", 4); // Name string +} + +template +static void compute_sha256(Context &ctx, i64 offset) { + u8 *buf = ctx.buf; + i64 filesize = ctx.output_file->filesize; + + i64 shard_size = 4096 * 1024; + i64 num_shards = align_to(filesize, shard_size) / shard_size; + std::vector shards(num_shards * SHA256_SIZE); + + tbb::parallel_for((i64)0, num_shards, [&](i64 i) { + u8 *begin = buf + shard_size * i; + u8 *end = (i == num_shards - 1) ? buf + filesize : begin + shard_size; + sha256_hash(begin, end - begin, shards.data() + i * SHA256_SIZE); + +#ifndef _WIN32 + // We call munmap early for each chunk so that the last munmap + // gets cheaper. We assume that the .note.build-id section is + // at the beginning of an output file. This is an ugly performance + // hack, but we can save about 30 ms for a 2 GiB output. + if (i > 0 && ctx.output_file->is_mmapped) + munmap(begin, end - begin); +#endif + }); + + assert(ctx.arg.build_id.size() <= SHA256_SIZE); + + u8 digest[SHA256_SIZE]; + sha256_hash(shards.data(), shards.size(), digest); + memcpy(buf + offset, digest, ctx.arg.build_id.size()); + +#ifndef _WIN32 + if (ctx.output_file->is_mmapped) { + munmap(buf, std::min(filesize, shard_size)); + ctx.output_file->is_unmapped = true; + } +#endif +} + +template +void BuildIdSection::write_buildid(Context &ctx) { + Timer t(ctx, "build_id"); + + switch (ctx.arg.build_id.kind) { + case BuildId::HEX: + write_vector(ctx.buf + this->shdr.sh_offset + HEADER_SIZE, + ctx.arg.build_id.value); + return; + case BuildId::HASH: + // Modern x86 processors have purpose-built instructions to accelerate + // SHA256 computation, and SHA256 outperforms MD5 on such computers. + // So, we always compute SHA256 and truncate it if smaller digest was + // requested. + compute_sha256(ctx, this->shdr.sh_offset + HEADER_SIZE); + return; + case BuildId::UUID: { + std::array uuid = get_uuid_v4(); + memcpy(ctx.buf + this->shdr.sh_offset + HEADER_SIZE, uuid.data(), 16); + return; + } + default: + unreachable(); + } +} + +template +void NotePackageSection::update_shdr(Context &ctx) { + if (!ctx.arg.package_metadata.empty()) { + // +17 is for the header and the NUL terminator + this->shdr.sh_size = align_to(ctx.arg.package_metadata.size() + 17, 4); + } +} + +template +void NotePackageSection::copy_buf(Context &ctx) { + U32 *buf = (U32 *)(ctx.buf + this->shdr.sh_offset); + memset(buf, 0, this->shdr.sh_size); + + buf[0] = 4; // Name size + buf[1] = this->shdr.sh_size - 16; // Content size + buf[2] = NT_FDO_PACKAGING_METADATA; // Type + memcpy(buf + 3, "FDO", 4); // Name + write_string(buf + 4, ctx.arg.package_metadata); // Content +} + +// Merges input files' .note.gnu.property values. +template +void NotePropertySection::update_shdr(Context &ctx) { + // The rules we support are only specified for x86 psABI + if (!is_x86) + return; + + // Reset to the initial state so that this function is idempotent + properties.clear(); + + // Obtain the list of keys + std::vector *> files = ctx.objs; + std::erase(files, ctx.internal_obj); + std::set keys; + + for (ObjectFile *file : files) + for (std::pair kv : file->gnu_properties) + keys.insert(kv.first); + + auto get_value = [](ObjectFile *file, u32 key) -> u32 { + auto it = file->gnu_properties.find(key); + if (it != file->gnu_properties.end()) + return it->second; + return 0; + }; + + // Merge values for each key + for (u32 key : keys) { + auto has_key = [&](ObjectFile *file) { + return file->gnu_properties.contains(key); + }; + + if (GNU_PROPERTY_X86_UINT32_AND_LO <= key && + key <= GNU_PROPERTY_X86_UINT32_AND_HI) { + // An AND feature is set if all input objects have the property and + // the feature. + if (std::all_of(files.begin(), files.end(), has_key)) { + properties[key] = 0xffff'ffff; + for (ObjectFile *file : files) + properties[key] &= get_value(file, key); + } + } else if (GNU_PROPERTY_X86_UINT32_OR_LO <= key && + key <= GNU_PROPERTY_X86_UINT32_OR_HI) { + // An OR feature is set if some input object has the feature. + for (ObjectFile *file : files) + properties[key] |= get_value(file, key); + } else if (GNU_PROPERTY_X86_UINT32_OR_AND_LO <= key && + key <= GNU_PROPERTY_X86_UINT32_OR_AND_HI) { + // An OR-AND feature is set if all input object files have the property + // and some of them has the feature. + if (std::all_of(files.begin(), files.end(), has_key)) + for (ObjectFile *file : files) + properties[key] |= get_value(file, key); + } + } + + if (ctx.arg.z_ibt) + properties[GNU_PROPERTY_X86_FEATURE_1_AND] |= GNU_PROPERTY_X86_FEATURE_1_IBT; + if (ctx.arg.z_shstk) + properties[GNU_PROPERTY_X86_FEATURE_1_AND] |= GNU_PROPERTY_X86_FEATURE_1_SHSTK; + + std::erase_if(properties, [](std::pair kv) { + return kv.second == 0; + }); + + if (properties.empty()) + this->shdr.sh_size = 0; + else + this->shdr.sh_size = 16 + ENTRY_SIZE * properties.size(); +} + +template +void NotePropertySection::copy_buf(Context &ctx) { + U32 *buf = (U32 *)(ctx.buf + this->shdr.sh_offset); + memset(buf, 0, this->shdr.sh_size); + + buf[0] = 4; // Name size + buf[1] = ENTRY_SIZE * properties.size(); // Content size + buf[2] = NT_GNU_PROPERTY_TYPE_0; // Type + memcpy(buf + 3, "GNU", 4); // Name + + i64 idx = 4; + for (std::pair kv : properties) { + buf[idx] = kv.first; // Feature type + buf[idx + 1] = 4; // Feature size + buf[idx + 2] = kv.second; // Feature flags + idx += ENTRY_SIZE / sizeof(U32); + } +} + +// This page explains the format of .gdb_index: +// https://sourceware.org/gdb/onlinedocs/gdb/Index-Section-Format.html +template +void GdbIndexSection::construct(Context &ctx) { + Timer t(ctx, "GdbIndexSection::construct"); + + std::atomic_bool has_debug_info = false; + + // Read debug sections + tbb::parallel_for_each(ctx.objs, [&](ObjectFile *file) { + if (file->debug_info) { + // Read compilation units from .debug_info. + file->compunits = read_compunits(ctx, *file); + + // Count the number of address areas contained in this file. + file->num_areas = estimate_address_areas(ctx, *file); + has_debug_info = true; + } + }); + + if (!has_debug_info) + return; + + // Initialize `area_offset` and `compunits_idx`. + for (i64 i = 0; i < ctx.objs.size() - 1; i++) { + ctx.objs[i + 1]->area_offset = + ctx.objs[i]->area_offset + ctx.objs[i]->num_areas * 20; + ctx.objs[i + 1]->compunits_idx = + ctx.objs[i]->compunits_idx + ctx.objs[i]->compunits.size(); + } + + // Read .debug_gnu_pubnames and .debug_gnu_pubtypes. + tbb::parallel_for_each(ctx.objs, [&](ObjectFile *file) { + file->gdb_names = read_pubnames(ctx, *file); + }); + + // Estimate the unique number of pubnames. + HyperLogLog estimator; + tbb::parallel_for_each(ctx.objs, [&](ObjectFile *file) { + HyperLogLog e; + for (GdbIndexName &name : file->gdb_names) + e.insert(name.hash); + estimator.merge(e); + }); + + // Uniquify pubnames by inserting all name strings into a concurrent + // hashmap. + map.resize(estimator.get_cardinality() * 2); + tbb::enumerable_thread_specific num_names; + + tbb::parallel_for_each(ctx.objs, [&](ObjectFile *file) { + for (GdbIndexName &name : file->gdb_names) { + MapEntry *ent; + bool inserted; + std::tie(ent, inserted) = map.insert(name.name, name.hash, {file, name.hash}); + if (inserted) + num_names.local()++; + + ObjectFile *old_val = ent->owner; + while (file->priority < old_val->priority && + !ent->owner.compare_exchange_weak(old_val, file)); + + ent->num_attrs++; + name.entry_idx = ent - map.values; + } + }); + + // Assign offsets for names and attributes within each file. + tbb::parallel_for_each(ctx.objs, [&](ObjectFile *file) { + for (GdbIndexName &name : file->gdb_names) { + MapEntry &ent = map.values[name.entry_idx]; + if (ent.owner == file) { + ent.attr_offset = file->attrs_size; + file->attrs_size += (ent.num_attrs + 1) * 4; + ent.name_offset = file->names_size; + file->names_size += name.name.size() + 1; + } + } + }); + + // Compute per-file name and attributes offsets. + for (i64 i = 0; i < ctx.objs.size() - 1; i++) + ctx.objs[i + 1]->attrs_offset = + ctx.objs[i]->attrs_offset + ctx.objs[i]->attrs_size; + + ctx.objs[0]->names_offset = + ctx.objs.back()->attrs_offset + ctx.objs.back()->attrs_size; + + for (i64 i = 0; i < ctx.objs.size() - 1; i++) + ctx.objs[i + 1]->names_offset = + ctx.objs[i]->names_offset + ctx.objs[i]->names_size; + + // .gdb_index contains an on-disk hash table for pubnames and + // pubtypes. We aim 75% utilization. As per the format specification, + // It must be a power of two. + i64 num_symtab_entries = + std::max(bit_ceil(num_names.combine(std::plus()) * 4 / 3), 16); + + // Now that we can compute the size of this section. + ObjectFile &last = *ctx.objs.back(); + i64 compunits_size = (last.compunits_idx + last.compunits.size()) * 16; + i64 areas_size = last.area_offset + last.num_areas * 20; + i64 offset = sizeof(header); + + header.cu_list_offset = offset; + offset += compunits_size; + + header.cu_types_offset = offset; + header.areas_offset = offset; + offset += areas_size; + + header.symtab_offset = offset; + offset += num_symtab_entries * 8; + + header.const_pool_offset = offset; + offset += last.names_offset + last.names_size; + + this->shdr.sh_size = offset; +} + +template +void GdbIndexSection::copy_buf(Context &ctx) { + u8 *buf = ctx.buf + this->shdr.sh_offset; + + // Write section header. + memcpy(buf, &header, sizeof(header)); + buf += sizeof(header); + + // Write compilation unit list. + for (ObjectFile *file : ctx.objs) { + if (file->debug_info) { + u64 offset = file->debug_info->offset; + for (std::string_view cu : file->compunits) { + *(ul64 *)buf = offset; + *(ul64 *)(buf + 8) = cu.size(); + buf += 16; + offset += cu.size(); + } + } + } + + // Skip address areas. It'll be filled by write_address_areas. + buf += header.symtab_offset - header.areas_offset; + + // Write an on-disk hash table for names. + u32 symtab_size = header.const_pool_offset - header.symtab_offset; + memset(buf, 0, symtab_size); + + assert(has_single_bit(symtab_size / 8)); + u32 mask = symtab_size / 8 - 1; + + for (i64 i = 0; i < map.nbuckets; i++) { + if (map.get_key(i)) { + u32 hash = map.values[i].hash; + u32 step = (hash & mask) | 1; + u32 j = hash & mask; + + while (*(U32 *)(buf + j * 8)) + j = (j + step) & mask; + + ObjectFile &file = *map.values[i].owner; + *(ul32 *)(buf + j * 8) = file.names_offset + map.values[i].name_offset; + *(ul32 *)(buf + j * 8 + 4) = file.attrs_offset + map.values[i].attr_offset; + } + } + + buf += symtab_size; + + // Write CU vector + memset(buf, 0, ctx.objs[0]->names_offset); + + tbb::parallel_for_each(ctx.objs, [&](ObjectFile *file) { + std::atomic_uint32_t *attrs = (std::atomic_uint32_t *)buf; + + for (GdbIndexName &name : file->gdb_names) { + MapEntry &ent = map.values[name.entry_idx]; + u32 idx = (ent.owner.load()->attrs_offset + ent.attr_offset) / 4; + attrs[idx + ++attrs[idx]] = name.attr; + } + }); + + // Sort CU vector for build reproducibility + const i64 shard_size = map.nbuckets / map.NUM_SHARDS; + + tbb::parallel_for((i64)0, (i64)map.NUM_SHARDS, [&](i64 i) { + u32 *attrs = (u32 *)buf; + + for (i64 j = shard_size * i; j < shard_size * (i + 1); j++) { + if (map.get_key(j)) { + MapEntry &ent = map.values[j]; + u32 idx = (ent.owner.load()->attrs_offset + ent.attr_offset) / 4; + u32 *start = attrs + idx + 1; + std::sort(start, start + attrs[idx]); + } + } + }); + + // .gdb_index contents are little-endian, so swap bytes if big-endian. + if constexpr (std::endian::native == std::endian::big) + for (i64 i = 0; i < ctx.objs[0]->names_offset; i += 4) + *(u32 *)(buf + i) = bswap(*(u32 *)(buf + i)); + + // Write pubnames and pubtypes. + tbb::parallel_for((i64)0, (i64)map.NUM_SHARDS, [&](i64 i) { + for (i64 j = shard_size * i; j < shard_size * (i + 1); j++) { + if (const char *key = map.get_key(j)) { + ObjectFile &file = *map.values[j].owner; + std::string_view name{key, map.key_sizes[j]}; + write_string(buf + file.names_offset + map.values[j].name_offset, name); + } + } + }); +} + +template +void GdbIndexSection::write_address_areas(Context &ctx) { + Timer t(ctx, "GdbIndexSection::write_address_areas"); + + if (this->shdr.sh_size == 0) + return; + + u8 *base = ctx.buf + this->shdr.sh_offset; + + for (Chunk *chunk : ctx.chunks) { + std::string_view name = chunk->name; + if (name == ".debug_info") + ctx.debug_info = chunk; + if (name == ".debug_abbrev") + ctx.debug_abbrev = chunk; + if (name == ".debug_ranges") + ctx.debug_ranges = chunk; + if (name == ".debug_addr") + ctx.debug_addr = chunk; + if (name == ".debug_rnglists") + ctx.debug_rnglists = chunk; + } + + assert(ctx.debug_info); + assert(ctx.debug_abbrev); + + struct Entry { + ul64 start; + ul64 end; + ul32 attr; + }; + + // Read address ranges from debug sections and copy them to .gdb_index. + tbb::parallel_for_each(ctx.objs, [&](ObjectFile *file) { + if (!file->debug_info) + return; + + Entry *begin = (Entry *)(base + header.areas_offset + file->area_offset); + Entry *e = begin; + u64 offset = file->debug_info->offset; + + for (i64 i = 0; i < file->compunits.size(); i++) { + std::vector addrs = read_address_areas(ctx, *file, offset); + + for (i64 j = 0; j < addrs.size(); j += 2) { + // Skip an empty range + if (addrs[j] == addrs[j + 1]) + continue; + + // Gdb crashes if there are entries with address 0. + if (addrs[j] == 0) + continue; + + assert(e < begin + file->num_areas); + e->start = addrs[j]; + e->end = addrs[j + 1]; + e->attr = file->compunits_idx + i; + e++; + } + offset += file->compunits[i].size(); + } + + // Fill trailing null entries with dummy values because gdb + // crashes if there are entries with address 0. + u64 filler; + if (e == begin) + filler = ctx.etext->get_addr(ctx) - 1; + else + filler = e[-1].start; + + for (; e < begin + file->num_areas; e++) { + e->start = filler; + e->end = filler; + e->attr = file->compunits_idx; + } + }); +} + +template +CompressedSection::CompressedSection(Context &ctx, Chunk &chunk) { + assert(chunk.name.starts_with(".debug")); + this->name = chunk.name; + + uncompressed.reset(new u8[chunk.shdr.sh_size]); + chunk.write_to(ctx, uncompressed.get()); + + switch (ctx.arg.compress_debug_sections) { + case COMPRESS_ZLIB: + chdr.ch_type = ELFCOMPRESS_ZLIB; + compressed.reset(new ZlibCompressor(uncompressed.get(), chunk.shdr.sh_size)); + break; + case COMPRESS_ZSTD: + chdr.ch_type = ELFCOMPRESS_ZSTD; + compressed.reset(new ZstdCompressor(uncompressed.get(), chunk.shdr.sh_size)); + break; + default: + unreachable(); + } + + chdr.ch_size = chunk.shdr.sh_size; + chdr.ch_addralign = chunk.shdr.sh_addralign; + + this->shdr = chunk.shdr; + this->shdr.sh_flags |= SHF_COMPRESSED; + this->shdr.sh_addralign = 1; + this->shdr.sh_size = sizeof(chdr) + compressed->compressed_size; + this->shndx = chunk.shndx; + + // We don't need to keep the original data unless --gdb-index is given. + if (!ctx.arg.gdb_index) + uncompressed.reset(nullptr); +} + +template +void CompressedSection::copy_buf(Context &ctx) { + u8 *base = ctx.buf + this->shdr.sh_offset; + memcpy(base, &chdr, sizeof(chdr)); + compressed->write_to(base + sizeof(chdr)); +} + +template +RelocSection::RelocSection(Context &ctx, OutputSection &osec) + : output_section(osec) { + if constexpr (E::is_rela) { + this->name = save_string(ctx, ".rela" + std::string(osec.name)); + this->shdr.sh_type = SHT_RELA; + } else { + this->name = save_string(ctx, ".rel" + std::string(osec.name)); + this->shdr.sh_type = SHT_REL; + } + + this->shdr.sh_flags = SHF_INFO_LINK; + this->shdr.sh_addralign = sizeof(Word); + this->shdr.sh_entsize = sizeof(ElfRel); + + // Compute an offset for each input section + offsets.resize(osec.members.size()); + + auto scan = [&](const tbb::blocked_range &r, i64 sum, bool is_final) { + for (i64 i = r.begin(); i < r.end(); i++) { + InputSection &isec = *osec.members[i]; + if (is_final) + offsets[i] = sum; + sum += isec.get_rels(ctx).size(); + } + return sum; + }; + + i64 num_entries = tbb::parallel_scan( + tbb::blocked_range(0, osec.members.size()), 0, scan, std::plus()); + + this->shdr.sh_size = num_entries * sizeof(ElfRel); +} + +template +void RelocSection::update_shdr(Context &ctx) { + this->shdr.sh_link = ctx.symtab->shndx; + this->shdr.sh_info = output_section.shndx; +} + +template +void RelocSection::copy_buf(Context &ctx) { + auto write = [&](ElfRel &out, InputSection &isec, const ElfRel &rel) { + i64 symidx = 0; + i64 addend = 0; + + Symbol &sym = *isec.file.symbols[rel.r_sym]; + + if (sym.esym().st_type == STT_SECTION) { + if (SectionFragment *frag = sym.get_frag()) { + symidx = frag->output_section.shndx; + addend = frag->offset + sym.value + get_addend(isec, rel); + } else { + InputSection *target = sym.get_input_section(); + + if (OutputSection *osec = target->output_section) { + symidx = osec->shndx; + addend = get_addend(isec, rel) + target->offset; + } else if (isec.name() == ".eh_frame") { + symidx = ctx.eh_frame->shndx; + addend = get_addend(isec, rel); + } else { + // This is usually a dead debug section referring a + // COMDAT-eliminated section. + } + } + } else { + if (sym.sym_idx) + symidx = sym.get_output_sym_idx(ctx); + addend = get_addend(isec, rel); + } + + if constexpr (is_alpha) + if (rel.r_type == R_ALPHA_GPDISP || rel.r_type == R_ALPHA_LITUSE) + addend = rel.r_addend; + + i64 r_offset = isec.output_section->shdr.sh_addr + isec.offset + rel.r_offset; + out = ElfRel(r_offset, rel.r_type, symidx, addend); + + if (ctx.arg.relocatable) { + u8 *base = ctx.buf + isec.output_section->shdr.sh_offset + isec.offset; + write_addend(base + rel.r_offset, addend, rel); + } + }; + + tbb::parallel_for((i64)0, (i64)output_section.members.size(), [&](i64 i) { + ElfRel *buf = (ElfRel *)(ctx.buf + this->shdr.sh_offset) + offsets[i]; + InputSection &isec = *output_section.members[i]; + std::span> rels = isec.get_rels(ctx); + + for (i64 j = 0; j < rels.size(); j++) + write(buf[j], isec, rels[j]); + }); +} + +template +void ComdatGroupSection::update_shdr(Context &ctx) { + assert(ctx.arg.relocatable); + this->shdr.sh_link = ctx.symtab->shndx; + this->shdr.sh_info = sym.get_output_sym_idx(ctx); +} + +template +void ComdatGroupSection::copy_buf(Context &ctx) { + U32 *buf = (U32 *)(ctx.buf + this->shdr.sh_offset); + *buf++ = GRP_COMDAT; + for (Chunk *chunk : members) + *buf++ = chunk->shndx; +} + +using E = MOLD_TARGET; + +template class Chunk; +template class OutputEhdr; +template class OutputShdr; +template class OutputPhdr; +template class InterpSection; +template class OutputSection; +template class GotSection; +template class GotPltSection; +template class PltSection; +template class PltGotSection; +template class RelPltSection; +template class RelDynSection; +template class RelrDynSection; +template class StrtabSection; +template class ShstrtabSection; +template class DynstrSection; +template class DynamicSection; +template class SymtabSection; +template class DynsymSection; +template class HashSection; +template class GnuHashSection; +template class MergedSection; +template class EhFrameSection; +template class EhFrameHdrSection; +template class EhFrameRelocSection; +template class CopyrelSection; +template class VersymSection; +template class VerneedSection; +template class VerdefSection; +template class BuildIdSection; +template class NotePackageSection; +template class NotePropertySection; +template class GdbIndexSection; +template class CompressedSection; +template class RelocSection; +template class ComdatGroupSection; +template i64 to_phdr_flags(Context &ctx, Chunk *chunk); +template bool is_relro(Context &, Chunk *); +template ElfSym to_output_esym(Context &, Symbol &, u32, U32 *); + +} // namespace mold::elf diff --git a/third_party/mold/elf/passes.cc b/third_party/mold/elf/passes.cc new file mode 100644 index 00000000000..06b3dc3b378 --- /dev/null +++ b/third_party/mold/elf/passes.cc @@ -0,0 +1,2653 @@ +// clang-format off +#include "third_party/mold/elf/mold.h" + +#include "third_party/libcxx/fstream" +#include "third_party/libcxx/functional" +#include "third_party/libcxx/map" +#include "third_party/libcxx/optional" +#include "third_party/libcxx/random" +#include "third_party/libcxx/regex" +// MISSING #include +// MISSING #include +// MISSING #include +// MISSING #include +#include "third_party/libcxx/unordered_set" + +namespace mold::elf { + +template +void apply_exclude_libs(Context &ctx) { + Timer t(ctx, "apply_exclude_libs"); + + if (ctx.arg.exclude_libs.empty()) + return; + + std::unordered_set set(ctx.arg.exclude_libs.begin(), + ctx.arg.exclude_libs.end()); + + for (ObjectFile *file : ctx.objs) { + if (!file->archive_name.empty()) + if (set.contains("ALL") || + set.contains(filepath(file->archive_name).filename().string())) + file->exclude_libs = true; + } +} + +template +void create_synthetic_sections(Context &ctx) { + auto push = [&](T *x) { + ctx.chunks.push_back(x); + ctx.chunk_pool.emplace_back(x); + return x; + }; + + if (!ctx.arg.oformat_binary) { + auto find = [&](std::string_view name) { + for (SectionOrder &ord : ctx.arg.section_order) + if (ord.type == SectionOrder::SECTION && ord.name == name) + return true; + return false; + }; + + if (ctx.arg.section_order.empty() || find("EHDR")) + ctx.ehdr = push(new OutputEhdr(SHF_ALLOC)); + else + ctx.ehdr = push(new OutputEhdr(0)); + + if (ctx.arg.section_order.empty() || find("PHDR")) + ctx.phdr = push(new OutputPhdr(SHF_ALLOC)); + else + ctx.phdr = push(new OutputPhdr(0)); + + ctx.shdr = push(new OutputShdr); + } + + ctx.got = push(new GotSection); + + if constexpr (!is_sparc) + ctx.gotplt = push(new GotPltSection); + + ctx.reldyn = push(new RelDynSection); + ctx.relplt = push(new RelPltSection); + + if (ctx.arg.pack_dyn_relocs_relr) + ctx.relrdyn = push(new RelrDynSection); + + ctx.strtab = push(new StrtabSection); + ctx.plt = push(new PltSection); + ctx.pltgot = push(new PltGotSection); + ctx.symtab = push(new SymtabSection); + ctx.dynsym = push(new DynsymSection); + ctx.dynstr = push(new DynstrSection); + ctx.eh_frame = push(new EhFrameSection); + ctx.copyrel = push(new CopyrelSection(false)); + ctx.copyrel_relro = push(new CopyrelSection(true)); + + if (!ctx.arg.oformat_binary) + ctx.shstrtab = push(new ShstrtabSection); + + if (!ctx.arg.dynamic_linker.empty()) + ctx.interp = push(new InterpSection); + if (ctx.arg.build_id.kind != BuildId::NONE) + ctx.buildid = push(new BuildIdSection); + if (ctx.arg.eh_frame_hdr) + ctx.eh_frame_hdr = push(new EhFrameHdrSection); + if (ctx.arg.gdb_index) + ctx.gdb_index = push(new GdbIndexSection); + if (ctx.arg.z_relro && ctx.arg.section_order.empty() && + ctx.arg.z_separate_code != SEPARATE_LOADABLE_SEGMENTS) + ctx.relro_padding = push(new RelroPaddingSection); + if (ctx.arg.hash_style_sysv) + ctx.hash = push(new HashSection); + if (ctx.arg.hash_style_gnu) + ctx.gnu_hash = push(new GnuHashSection); + if (!ctx.arg.version_definitions.empty()) + ctx.verdef = push(new VerdefSection); + if (ctx.arg.emit_relocs) + ctx.eh_frame_reloc = push(new EhFrameRelocSection); + + if (ctx.arg.shared || !ctx.dsos.empty() || ctx.arg.pie) + ctx.dynamic = push(new DynamicSection); + + ctx.versym = push(new VersymSection); + ctx.verneed = push(new VerneedSection); + ctx.note_package = push(new NotePackageSection); + ctx.note_property = push(new NotePropertySection); + + + if constexpr (is_ppc64v1) + ctx.extra.opd = push(new PPC64OpdSection); + + if constexpr (is_sparc) { + if (ctx.arg.is_static) + ctx.extra.tls_get_addr_sec = push(new SparcTlsGetAddrSection); + ctx.extra.tls_get_addr_sym = get_symbol(ctx, "__tls_get_addr"); + } + + if constexpr (is_alpha) + ctx.extra.got = push(new AlphaGotSection); + + // If .dynamic exists, .dynsym and .dynstr must exist as well + // since .dynamic refers them. + if (ctx.dynamic) { + ctx.dynstr->keep(); + ctx.dynsym->keep(); + } +} + +template +static void mark_live_objects(Context &ctx) { + auto mark_symbol = [&](std::string_view name) { + if (InputFile *file = get_symbol(ctx, name)->file) + file->is_alive = true; + }; + + for (std::string_view name : ctx.arg.undefined) + mark_symbol(name); + for (std::string_view name : ctx.arg.require_defined) + mark_symbol(name); + + std::vector *> roots; + + for (InputFile *file : ctx.objs) + if (file->is_alive) + roots.push_back(file); + + for (InputFile *file : ctx.dsos) + if (file->is_alive) + roots.push_back(file); + + tbb::parallel_for_each(roots, [&](InputFile *file, + tbb::feeder *> &feeder) { + if (file->is_alive) + file->mark_live_objects(ctx, [&](InputFile *obj) { feeder.add(obj); }); + }); +} + +template +void do_resolve_symbols(Context &ctx) { + auto for_each_file = [&](std::function *)> fn) { + tbb::parallel_for_each(ctx.objs, fn); + tbb::parallel_for_each(ctx.dsos, fn); + }; + + // Due to legacy reasons, archive members will only get included in the final + // binary if they satisfy one of the undefined symbols in a non-archive object + // file. This is called archive extraction. In finalize_archive_extraction, + // this is processed as follows: + // + // 1. Do preliminary symbol resolution assuming all archive members + // are included. This matches the undefined symbols with ones to be + // extracted from archives. + // + // 2. Do a mark & sweep pass to eliminate unneeded archive members. + // + // Note that the symbol resolution inside finalize_archive_extraction uses a + // different rule. In order to prevent extracting archive members that can be + // satisfied by either non-archive object files or DSOs, the archive members + // are given a lower priority. This is not correct for the general case, where + // *extracted* object files have precedence over DSOs and even non-archive + // files that are passed earlier in the command line. Hence, the symbol + // resolution is thrown away once we determine which archive members to + // extract, and redone later with the formal rule. + { + Timer t(ctx, "extract_archive_members"); + + // Register symbols + for_each_file([&](InputFile *file) { file->resolve_symbols(ctx); }); + + // Mark reachable objects to decide which files to include into an output. + // This also merges symbol visibility. + mark_live_objects(ctx); + + // Cleanup. The rule used for archive extraction isn't accurate for the + // general case of symbol extraction, so reset the resolution to be redone + // later. + for_each_file([](InputFile *file) { file->clear_symbols(); }); + + // Now that the symbol references are gone, remove the eliminated files from + // the file list. + std::erase_if(ctx.objs, [](InputFile *file) { return !file->is_alive; }); + std::erase_if(ctx.dsos, [](InputFile *file) { return !file->is_alive; }); + } + + // COMDAT elimination needs to happen exactly here. + // + // It needs to be after archive extraction, otherwise we might assign COMDAT + // leader to an archive member that is not supposed to be extracted. + // + // It needs to happen before symbol resolution, otherwise we could eliminate + // a symbol that is already resolved to and cause dangling references. + { + Timer t(ctx, "eliminate_comdats"); + + tbb::parallel_for_each(ctx.objs, [](ObjectFile *file) { + for (ComdatGroupRef &ref : file->comdat_groups) + update_minimum(ref.group->owner, file->priority); + }); + + tbb::parallel_for_each(ctx.objs, [](ObjectFile *file) { + for (ComdatGroupRef &ref : file->comdat_groups) + if (ref.group->owner != file->priority) + for (u32 i : ref.members) + if (file->sections[i]) + file->sections[i]->kill(); + }); + } + + // Since we have turned on object files live bits, their symbols + // may now have higher priority than before. So run the symbol + // resolution pass again to get the final resolution result. + for_each_file([&](InputFile *file) { file->resolve_symbols(ctx); }); +} + +template +void resolve_symbols(Context &ctx) { + Timer t(ctx, "resolve_symbols"); + + std::vector *> objs = ctx.objs; + std::vector *> dsos = ctx.dsos; + + do_resolve_symbols(ctx); + + if (ctx.has_lto_object) { + // Do link-time optimization. We pass all IR object files to the + // compiler backend to compile them into a few ELF object files. + // + // The compiler backend needs to know how symbols are resolved, + // so compute symbol visibility, import/export bits, etc early. + mark_live_objects(ctx); + apply_version_script(ctx); + parse_symbol_version(ctx); + compute_import_export(ctx); + + // Do LTO. It compiles IR object files into a few big ELF files. + std::vector *> lto_objs = do_lto(ctx); + + // do_resolve_symbols() have removed unreferenced files. Restore the + // original files here because some of them may have to be resurrected + // because they are referenced by the ELF files returned from do_lto(). + ctx.objs = objs; + ctx.dsos = dsos; + + append(ctx.objs, lto_objs); + + // Redo name resolution from scratch. + tbb::parallel_for_each(ctx.objs, [&](ObjectFile *file) { + file->clear_symbols(); + }); + + tbb::parallel_for_each(ctx.dsos, [&](SharedFile *file) { + file->clear_symbols(); + }); + + // Remove IR object files. + for (ObjectFile *file : ctx.objs) + if (file->is_lto_obj) + file->is_alive = false; + + std::erase_if(ctx.objs, [](ObjectFile *file) { return file->is_lto_obj; }); + + do_resolve_symbols(ctx); + } +} + +// .eh_frame sections are parsed and regenerated by the linker for the purpose +// of deduplication and garbage collection. As such, the input sections should +// not be copied over. +// However, in very rare cases (e.g. GCC CRT compiled with LTO) we might need +// to resolve cross-object .eh_frame section references (they only point to +// begin or end and don't depend on the actual section contents). +// Therefore, the sections are "killed" after symbol resolution as a separate +// pass. +template +void kill_eh_frame_sections(Context &ctx) { + Timer t(ctx, "kill_eh_frame_sections"); + + tbb::parallel_for_each(ctx.objs, [&](ObjectFile *file) { + for (i64 i = 0; i < file->sections.size(); i++) { + if (std::unique_ptr> &isec = file->sections[i]) { + if (isec && isec->is_alive && isec->name() == ".eh_frame") { + isec->is_alive = false; + } + } + } + }); +} + +template +void resolve_section_pieces(Context &ctx) { + Timer t(ctx, "resolve_section_pieces"); + + tbb::parallel_for_each(ctx.objs, [&](ObjectFile *file) { + file->initialize_mergeable_sections(ctx); + }); + + tbb::parallel_for_each(ctx.objs, [&](ObjectFile *file) { + file->resolve_section_pieces(ctx); + }); +} + +template +void convert_common_symbols(Context &ctx) { + Timer t(ctx, "convert_common_symbols"); + + tbb::parallel_for_each(ctx.objs, [&](ObjectFile *file) { + file->convert_common_symbols(ctx); + }); +} + +template +static std::string get_cmdline_args(Context &ctx) { + std::stringstream ss; + ss << ctx.cmdline_args[1]; + for (i64 i = 2; i < ctx.cmdline_args.size(); i++) + ss << " " << ctx.cmdline_args[i]; + return ss.str(); +} + +template +void add_comment_string(Context &ctx, std::string str) { + MergedSection *sec = + MergedSection::get_instance(ctx, ".comment", SHT_PROGBITS, + SHF_MERGE | SHF_STRINGS); + + std::string_view buf = save_string(ctx, str); + std::string_view data(buf.data(), buf.size() + 1); + sec->insert(ctx, data, hash_string(data), 0); +} + +template +void compute_merged_section_sizes(Context &ctx) { + Timer t(ctx, "compute_merged_section_sizes"); + + // Add an identification string to .comment. + if (!ctx.arg.oformat_binary) + add_comment_string(ctx, mold_version); + + // Embed command line arguments for debugging. + if (char *env = getenv("MOLD_DEBUG"); env && env[0]) + add_comment_string(ctx, "mold command line: " + get_cmdline_args(ctx)); + + tbb::parallel_for_each(ctx.merged_sections, + [&](std::unique_ptr> &sec) { + sec->assign_offsets(ctx); + }); +} + +template +static std::vector> split(std::vector &input, i64 unit) { + std::span span(input); + std::vector> vec; + + while (span.size() >= unit) { + vec.push_back(span.subspan(0, unit)); + span = span.subspan(unit); + } + if (!span.empty()) + vec.push_back(span); + return vec; +} + +template +static u64 canonicalize_type(std::string_view name, u64 type) { + if (type == SHT_PROGBITS) { + if (name == ".init_array" || name.starts_with(".init_array.")) + return SHT_INIT_ARRAY; + if (name == ".fini_array" || name.starts_with(".fini_array.")) + return SHT_FINI_ARRAY; + } + + if constexpr (is_x86_64) + if (type == SHT_X86_64_UNWIND) + return SHT_PROGBITS; + return type; +} + +struct OutputSectionKey { + std::string_view name; + u64 type; + u64 flags; + + bool operator==(const OutputSectionKey &other) const { + return name == other.name && type == other.type && flags == other.flags; + } +}; + +template +std::string_view +get_output_name(Context &ctx, std::string_view name, u64 flags) { + if (ctx.arg.relocatable && !ctx.arg.relocatable_merge_sections) + return name; + if (ctx.arg.unique && ctx.arg.unique->match(name)) + return name; + if (flags & SHF_MERGE) + return name; + + if constexpr (is_arm32) { + if (name.starts_with(".ARM.exidx")) + return ".ARM.exidx"; + if (name.starts_with(".ARM.extab")) + return ".ARM.extab"; + } + + if constexpr (is_alpha) { + if (name.starts_with(".sdata.")) + return ".sdata"; + if (name.starts_with(".sbss.")) + return ".sbss"; + } + + if (ctx.arg.z_keep_text_section_prefix) { + static std::string_view prefixes[] = { + ".text.hot.", ".text.unknown.", ".text.unlikely.", ".text.startup.", + ".text.exit." + }; + + for (std::string_view prefix : prefixes) { + std::string_view stem = prefix.substr(0, prefix.size() - 1); + if (name == stem || name.starts_with(prefix)) + return stem; + } + } + + static std::string_view prefixes[] = { + ".text.", ".data.rel.ro.", ".data.", ".rodata.", ".bss.rel.ro.", ".bss.", + ".init_array.", ".fini_array.", ".tbss.", ".tdata.", ".gcc_except_table.", + ".ctors.", ".dtors.", ".gnu.warning.", ".openbsd.randomdata.", + }; + + for (std::string_view prefix : prefixes) { + std::string_view stem = prefix.substr(0, prefix.size() - 1); + if (name == stem || name.starts_with(prefix)) + return stem; + } + + return name; +} + +template +static OutputSectionKey +get_output_section_key(Context &ctx, InputSection &isec) { + const ElfShdr &shdr = isec.shdr(); + std::string_view name = get_output_name(ctx, isec.name(), shdr.sh_flags); + u64 type = canonicalize_type(name, shdr.sh_type); + u64 flags = shdr.sh_flags & ~(u64)SHF_COMPRESSED; + + if (!ctx.arg.relocatable) + flags &= ~(u64)SHF_GROUP & ~(u64)SHF_GNU_RETAIN; + + // .init_array is usually writable. We don't want to create multiple + // .init_array output sections, so make it always writable. + // So is .fini_array. + if (type == SHT_INIT_ARRAY || type == SHT_FINI_ARRAY) + flags |= SHF_WRITE; + return {name, type, flags}; +} + +// Create output sections for input sections. +template +void create_output_sections(Context &ctx) { + Timer t(ctx, "create_output_sections"); + + struct Hash { + size_t operator()(const OutputSectionKey &k) const { + u64 h = hash_string(k.name); + h = combine_hash(h, std::hash{}(k.type)); + h = combine_hash(h, std::hash{}(k.flags)); + return h; + } + }; + + std::unordered_map *, Hash> map; + std::shared_mutex mu; + + // Instantiate output sections + tbb::parallel_for_each(ctx.objs, [&](ObjectFile *file) { + // Make a per-thread cache of the main map to avoid lock contention. + // It makes a noticeable difference if we have millions of input sections. + decltype(map) cache; + { + std::shared_lock lock(mu); + cache = map; + } + + for (std::unique_ptr> &isec : file->sections) { + if (!isec || !isec->is_alive) + continue; + + OutputSectionKey key = get_output_section_key(ctx, *isec); + + if (auto it = cache.find(key); it != cache.end()) { + isec->output_section = it->second; + continue; + } + + auto get_or_insert = [&] { + { + std::shared_lock lock(mu); + if (auto it = map.find(key); it != map.end()) + return it->second; + } + + std::unique_ptr> osec = + std::make_unique>(key.name, key.type, key.flags); + + std::unique_lock lock(mu); + auto [it, inserted] = map.insert({key, osec.get()}); + OutputSection *ret = it->second; + lock.unlock(); + + if (inserted) + ctx.osec_pool.emplace_back(std::move(osec)); + return ret; + }; + + OutputSection *osec = get_or_insert(); + isec->output_section = osec; + cache.insert({key, osec}); + } + }); + + // Add input sections to output sections + for (ObjectFile *file : ctx.objs) + for (std::unique_ptr> &isec : file->sections) + if (isec && isec->is_alive) + isec->output_section->members.push_back(isec.get()); + + // Add output sections and mergeable sections to ctx.chunks + std::vector *> vec; + for (std::pair *> &kv : map) + vec.push_back(kv.second); + + for (std::unique_ptr> &osec : ctx.merged_sections) + if (osec->shdr.sh_size) + vec.push_back(osec.get()); + + // Sections are added to the section lists in an arbitrary order + // because they are created in parallel. Sort them to to make the + // output deterministic. + tbb::parallel_sort(vec.begin(), vec.end(), [](Chunk *x, Chunk *y) { + return std::tuple(x->name, x->shdr.sh_type, x->shdr.sh_flags) < + std::tuple(y->name, y->shdr.sh_type, y->shdr.sh_flags); + }); + + append(ctx.chunks, vec); +} + +// Create a dummy object file containing linker-synthesized +// symbols. +template +void create_internal_file(Context &ctx) { + ObjectFile *obj = new ObjectFile; + ctx.obj_pool.emplace_back(obj); + ctx.internal_obj = obj; + ctx.objs.push_back(obj); + + // Create linker-synthesized symbols. + ctx.internal_esyms.resize(1); + + obj->symbols.push_back(new Symbol); + obj->first_global = 1; + obj->is_alive = true; + obj->priority = 1; + + auto add = [&](Symbol *sym) { + obj->symbols.push_back(sym); + + // An actual value will be set to a linker-synthesized symbol by + // fix_synthetic_symbols(). Until then, `value` doesn't have a valid + // value. 0xdeadbeef is a unique dummy value to make debugging easier + // if the field is accidentally used before it gets a valid one. + sym->value = 0xdeadbeef; + + ElfSym esym; + memset(&esym, 0, sizeof(esym)); + esym.st_type = STT_NOTYPE; + esym.st_shndx = SHN_ABS; + esym.st_bind = STB_GLOBAL; + esym.st_visibility = STV_DEFAULT; + ctx.internal_esyms.push_back(esym); + }; + + auto add_undef = [&](Symbol *sym) { + obj->symbols.push_back(sym); + sym->value = 0xdeadbeef; + + ElfSym esym; + memset(&esym, 0, sizeof(esym)); + esym.st_type = STT_NOTYPE; + esym.st_shndx = SHN_UNDEF; + esym.st_bind = STB_GLOBAL; + esym.st_visibility = STV_DEFAULT; + ctx.internal_esyms.push_back(esym); + }; + + // Add --defsym symbols + for (i64 i = 0; i < ctx.arg.defsyms.size(); i++) { + Symbol *sym = ctx.arg.defsyms[i].first; + std::variant *, u64> val = ctx.arg.defsyms[i].second; + add(sym); + + if (Symbol **ref = std::get_if *>(&val)) { + // Add an undefined symbol to keep a reference to the defsym target. + // This prevents elimination by e.g. LTO or gc-sections. + // The undefined symbol will never make to the final object file; we + // double-check that the defsym target is not undefined in + // fix_synthetic_symbols. + add_undef(*ref); + } + } + + // Add --section-order symbols + for (SectionOrder &ord : ctx.arg.section_order) + if (ord.type == SectionOrder::SYMBOL) + add(get_symbol(ctx, ord.name)); + + obj->elf_syms = ctx.internal_esyms; + obj->has_symver.resize(ctx.internal_esyms.size() - 1); +} + +template +static std::optional +get_start_stop_name(Context &ctx, Chunk &chunk) { + if ((chunk.shdr.sh_flags & SHF_ALLOC) && !chunk.name.empty()) { + if (is_c_identifier(chunk.name)) + return std::string(chunk.name); + + if (ctx.arg.start_stop) { + auto isalnum = [](char c) { + return ('a' <= c && c <= 'z') || ('A' <= c && c <= 'Z') || + ('0' <= c && c <= '9'); + }; + + std::string s{chunk.name}; + if (s.starts_with('.')) + s = s.substr(1); + + for (i64 i = 0; i < s.size(); i++) + if (!isalnum(s[i])) + s[i] = '_'; + return s; + } + } + + return {}; +} + +template +void add_synthetic_symbols(Context &ctx) { + ObjectFile &obj = *ctx.internal_obj; + + auto add = [&](std::string_view name) { + ElfSym esym; + memset(&esym, 0, sizeof(esym)); + esym.st_type = STT_NOTYPE; + esym.st_shndx = SHN_ABS; + esym.st_bind = STB_GLOBAL; + esym.st_visibility = STV_HIDDEN; + ctx.internal_esyms.push_back(esym); + + Symbol *sym = get_symbol(ctx, name); + sym->value = 0xdeadbeef; // unique dummy value + obj.symbols.push_back(sym); + return sym; + }; + + ctx.__ehdr_start = add("__ehdr_start"); + ctx.__init_array_start = add("__init_array_start"); + ctx.__init_array_end = add("__init_array_end"); + ctx.__fini_array_start = add("__fini_array_start"); + ctx.__fini_array_end = add("__fini_array_end"); + ctx.__preinit_array_start = add("__preinit_array_start"); + ctx.__preinit_array_end = add("__preinit_array_end"); + ctx._DYNAMIC = add("_DYNAMIC"); + ctx._GLOBAL_OFFSET_TABLE_ = add("_GLOBAL_OFFSET_TABLE_"); + ctx._PROCEDURE_LINKAGE_TABLE_ = add("_PROCEDURE_LINKAGE_TABLE_"); + ctx.__bss_start = add("__bss_start"); + ctx._end = add("_end"); + ctx._etext = add("_etext"); + ctx._edata = add("_edata"); + ctx.__executable_start = add("__executable_start"); + + ctx.__rel_iplt_start = + add(E::is_rela ? "__rela_iplt_start" : "__rel_iplt_start"); + ctx.__rel_iplt_end = + add(E::is_rela ? "__rela_iplt_end" : "__rel_iplt_end"); + + if (ctx.arg.eh_frame_hdr) + ctx.__GNU_EH_FRAME_HDR = add("__GNU_EH_FRAME_HDR"); + + if (!get_symbol(ctx, "end")->file) + ctx.end = add("end"); + if (!get_symbol(ctx, "etext")->file) + ctx.etext = add("etext"); + if (!get_symbol(ctx, "edata")->file) + ctx.edata = add("edata"); + if (!get_symbol(ctx, "__dso_handle")->file) + ctx.__dso_handle = add("__dso_handle"); + + if constexpr (supports_tlsdesc) + ctx._TLS_MODULE_BASE_ = add("_TLS_MODULE_BASE_"); + + if constexpr (is_riscv) + if (!ctx.arg.shared) + ctx.__global_pointer = add("__global_pointer$"); + + if constexpr (is_arm32) { + ctx.__exidx_start = add("__exidx_start"); + ctx.__exidx_end = add("__exidx_end"); + } + + if constexpr (is_ppc64) + ctx.extra.TOC = add(".TOC."); + + if constexpr (is_ppc32) + ctx.extra._SDA_BASE_ = add("_SDA_BASE_"); + + for (Chunk *chunk : ctx.chunks) { + if (std::optional name = get_start_stop_name(ctx, *chunk)) { + add(save_string(ctx, "__start_" + *name)); + add(save_string(ctx, "__stop_" + *name)); + + if (ctx.arg.physical_image_base) { + add(save_string(ctx, "__phys_start_" + *name)); + add(save_string(ctx, "__phys_stop_" + *name)); + } + } + } + + obj.elf_syms = ctx.internal_esyms; + obj.has_symver.resize(ctx.internal_esyms.size() - 1); + + obj.resolve_symbols(ctx); + + // Make all synthetic symbols relative ones by associating them to + // a dummy output section. + for (Symbol *sym : obj.symbols) + if (sym->file == &obj) + sym->set_output_section(ctx.symtab); + + // Handle --defsym symbols. + for (i64 i = 0; i < ctx.arg.defsyms.size(); i++) { + Symbol *sym = ctx.arg.defsyms[i].first; + std::variant *, u64> val = ctx.arg.defsyms[i].second; + + Symbol *target = nullptr; + if (Symbol **ref = std::get_if *>(&val)) + target = *ref; + + // If the alias refers another symobl, copy ELF symbol attributes. + if (target) { + ElfSym &esym = obj.elf_syms[i + 1]; + esym.st_type = target->esym().st_type; + if constexpr (is_ppc64v2) + esym.ppc_local_entry = target->esym().ppc_local_entry; + } + + // Make the target absolute if necessary. + if (!target || target->is_absolute()) + sym->origin = 0; + } + +} + +template +void check_cet_errors(Context &ctx) { + bool warning = (ctx.arg.z_cet_report == CET_REPORT_WARNING); + assert(warning || ctx.arg.z_cet_report == CET_REPORT_ERROR); + + auto has_feature = [](ObjectFile *file, u32 feature) { + return std::any_of(file->gnu_properties.begin(), file->gnu_properties.end(), + [&](std::pair kv) { + return kv.first == GNU_PROPERTY_X86_FEATURE_1_AND && + (kv.second & feature); + }); + }; + + for (ObjectFile *file : ctx.objs) { + if (file == ctx.internal_obj) + continue; + + if (!has_feature(file, GNU_PROPERTY_X86_FEATURE_1_IBT)) { + if (warning) + Warn(ctx) << *file << ": -cet-report=warning: " + << "missing GNU_PROPERTY_X86_FEATURE_1_IBT"; + else + Error(ctx) << *file << ": -cet-report=error: " + << "missing GNU_PROPERTY_X86_FEATURE_1_IBT"; + } + + if (!has_feature(file, GNU_PROPERTY_X86_FEATURE_1_SHSTK)) { + if (warning) + Warn(ctx) << *file << ": -cet-report=warning: " + << "missing GNU_PROPERTY_X86_FEATURE_1_SHSTK"; + else + Error(ctx) << *file << ": -cet-report=error: " + << "missing GNU_PROPERTY_X86_FEATURE_1_SHSTK"; + } + } +} + +template +void print_dependencies(Context &ctx) { + SyncOut(ctx) << +R"(# This is an output of the mold linker's --print-dependencies option. +# +# Each line consists of 4 fields, , , and +# , separated by tab characters. It indicates that depends +# on to use . is either "u" or "w" for +# regular undefined or weak undefined, respectively. +# +# If you want to obtain dependency information per function granularity, +# compile source files with the -ffunction-sections compiler flag. +)"; + + auto println = [&](auto &src, Symbol &sym, ElfSym &esym) { + if (InputSection *isec = sym.get_input_section()) + SyncOut(ctx) << src << "\t" << *isec + << "\t" << (esym.is_weak() ? 'w' : 'u') + << "\t" << sym; + else + SyncOut(ctx) << src << "\t" << *sym.file + << "\t" << (esym.is_weak() ? 'w' : 'u') + << "\t" << sym; + }; + + for (ObjectFile *file : ctx.objs) { + for (std::unique_ptr> &isec : file->sections) { + if (!isec) + continue; + + std::unordered_set visited; + + for (const ElfRel &r : isec->get_rels(ctx)) { + if (r.r_type == R_NONE) + continue; + + ElfSym &esym = file->elf_syms[r.r_sym]; + Symbol &sym = *file->symbols[r.r_sym]; + + if (esym.is_undef() && sym.file && sym.file != file && + visited.insert((void *)&sym).second) + println(*isec, sym, esym); + } + } + } + + for (SharedFile *file : ctx.dsos) { + for (i64 i = file->first_global; i < file->symbols.size(); i++) { + ElfSym &esym = file->elf_syms[i]; + Symbol &sym = *file->symbols[i]; + if (esym.is_undef() && sym.file && sym.file != file) + println(*file, sym, esym); + } + } +} + +template +static std::string create_response_file(Context &ctx) { + std::string buf; + std::stringstream out; + + std::string cwd = std::filesystem::current_path().string(); + out << "-C " << cwd.substr(1) << "\n"; + + if (cwd != "/") { + out << "--chroot .."; + i64 depth = std::count(cwd.begin(), cwd.end(), '/'); + for (i64 i = 1; i < depth; i++) + out << "/.."; + out << "\n"; + } + + for (i64 i = 1; i < ctx.cmdline_args.size(); i++) { + std::string_view arg = ctx.cmdline_args[i]; + if (arg != "-repro" && arg != "--repro") + out << arg << "\n"; + } + return out.str(); +} + +template +void write_repro_file(Context &ctx) { + std::string path = ctx.arg.output + ".repro.tar"; + + std::unique_ptr tar = + TarWriter::open(path, filepath(ctx.arg.output).filename().string() + ".repro"); + if (!tar) + Fatal(ctx) << "cannot open " << path << ": " << errno_string(); + + tar->append("response.txt", save_string(ctx, create_response_file(ctx))); + tar->append("version.txt", save_string(ctx, mold_version + "\n")); + + std::unordered_set seen; + for (std::unique_ptr>> &mf : ctx.mf_pool) { + if (!mf->parent) { + std::string path = to_abs_path(mf->name).string(); + if (seen.insert(path).second) { + // We reopen a file because we may have modified the contents of mf + // in memory, which is mapped with PROT_WRITE and MAP_PRIVATE. + MappedFile> *mf2 = MappedFile>::must_open(ctx, path); + tar->append(path, mf2->get_contents()); + mf2->unmap(); + } + } + } +} + +template +void check_duplicate_symbols(Context &ctx) { + Timer t(ctx, "check_duplicate_symbols"); + + tbb::parallel_for_each(ctx.objs, [&](ObjectFile *file) { + for (i64 i = file->first_global; i < file->elf_syms.size(); i++) { + const ElfSym &esym = file->elf_syms[i]; + Symbol &sym = *file->symbols[i]; + + // Skip if our symbol is undef or weak + if (sym.file == file || sym.file == ctx.internal_obj || + esym.is_undef() || esym.is_common() || (esym.st_bind == STB_WEAK)) + continue; + + // Skip if our symbol is in a dead section. In most cases, the + // section has been eliminated due to comdat deduplication. + if (!esym.is_abs()) { + InputSection *isec = file->get_section(esym); + if (!isec || !isec->is_alive) + continue; + } + + Error(ctx) << "duplicate symbol: " << *file << ": " << *sym.file + << ": " << sym; + } + }); + + ctx.checkpoint(); +} + +template +void check_symbol_types(Context &ctx) { + Timer t(ctx, "check_symbol_types"); + + auto check = [&](InputFile *file) { + for (i64 i = file->first_global; i < file->elf_syms.size(); i++) { + const ElfSym &esym = file->elf_syms[i]; + Symbol &sym = *file->symbols[i]; + + if (!sym.file) + continue; + + u32 x = sym.esym().st_type; + if (x == STT_GNU_IFUNC) + x = STT_FUNC; + + u32 y = esym.st_type; + if (y == STT_GNU_IFUNC) + y = STT_FUNC; + + if (x != STT_NOTYPE && y != STT_NOTYPE && x != y) + Warn(ctx) << "symbol type mismatch: " << sym << '\n' + << ">>> defined in " << *sym.file << " as " + << stt_to_string(sym.esym().st_type) << '\n' + << ">>> defined in " << *file << " as " + << stt_to_string(esym.st_type); + } + }; + + tbb::parallel_for_each(ctx.objs, check); + tbb::parallel_for_each(ctx.dsos, check); +} + +template +void sort_init_fini(Context &ctx) { + Timer t(ctx, "sort_init_fini"); + + auto get_priority = [](InputSection *isec) { + static std::regex re(R"(\.(\d+)$)", std::regex_constants::optimize); + std::string_view name = isec->name(); + std::cmatch m; + if (std::regex_search(name.data(), name.data() + name.size(), m, re)) + return std::stoi(m[1]); + return 65536; + }; + + for (Chunk *chunk : ctx.chunks) { + if (OutputSection *osec = chunk->to_osec()) { + if (osec->name == ".init_array" || osec->name == ".preinit_array" || + osec->name == ".fini_array") { + if (ctx.arg.shuffle_sections == SHUFFLE_SECTIONS_REVERSE) + std::reverse(osec->members.begin(), osec->members.end()); + + std::unordered_map *, i64> map; + for (InputSection *isec : osec->members) + map.insert({isec, get_priority(isec)}); + + sort(osec->members, [&](InputSection *a, InputSection *b) { + return map[a] < map[b]; + }); + } + } + } +} + +template +void sort_ctor_dtor(Context &ctx) { + Timer t(ctx, "sort_ctor_dtor"); + + auto get_priority = [](InputSection *isec) { + auto opts = std::regex_constants::optimize | std::regex_constants::ECMAScript; + static std::regex re1(R"((?:clang_rt\.)?crtbegin)", opts); + static std::regex re2(R"((?:clang_rt\.)?crtend)", opts); + static std::regex re3(R"(\.(\d+)$)", opts); + + // crtbegin.o and crtend.o contain marker symbols such as + // __CTOR_LIST__ or __DTOR_LIST__. So they have to be at the + // beginning or end of the section. + std::smatch m; + if (std::regex_search(isec->file.filename, m, re1)) + return -2; + if (std::regex_search(isec->file.filename, m, re2)) + return 65536; + + std::string name(isec->name()); + if (std::regex_search(name, m, re3)) + return std::stoi(m[1]); + return -1; + }; + + for (Chunk *chunk : ctx.chunks) { + if (OutputSection *osec = chunk->to_osec()) { + if (osec->name == ".ctors" || osec->name == ".dtors") { + if (ctx.arg.shuffle_sections != SHUFFLE_SECTIONS_REVERSE) + std::reverse(osec->members.begin(), osec->members.end()); + + std::unordered_map *, i64> map; + for (InputSection *isec : osec->members) + map.insert({isec, get_priority(isec)}); + + sort(osec->members, [&](InputSection *a, InputSection *b) { + return map[a] < map[b]; + }); + } + } + } +} + +template +static void shuffle(std::vector &vec, u64 seed) { + if (vec.empty()) + return; + + // Xorshift random number generator. We use this RNG because it is + // measurably faster than MT19937. + auto rand = [&] { + seed ^= seed << 13; + seed ^= seed >> 7; + seed ^= seed << 17; + return seed; + }; + + // The Fisher-Yates shuffling algorithm. + // + // We don't want to use std::shuffle for build reproducibility. That is, + // std::shuffle's implementation is not guaranteed to be the same across + // platform, so even though the result is guaranteed to be randomly + // shuffled, the exact order may be different across implementations. + // + // We are not using std::uniform_int_distribution for the same reason. + for (i64 i = 0; i < vec.size() - 1; i++) + std::swap(vec[i], vec[i + rand() % (vec.size() - i)]); +} + +template +void shuffle_sections(Context &ctx) { + Timer t(ctx, "shuffle_sections"); + + auto is_eligible = [](OutputSection &osec) { + return osec.name != ".init" && osec.name != ".fini" && + osec.name != ".ctors" && osec.name != ".dtors" && + osec.name != ".init_array" && osec.name != ".preinit_array" && + osec.name != ".fini_array"; + }; + + switch (ctx.arg.shuffle_sections) { + case SHUFFLE_SECTIONS_NONE: + unreachable(); + case SHUFFLE_SECTIONS_SHUFFLE: { + u64 seed; + if (ctx.arg.shuffle_sections_seed) + seed = *ctx.arg.shuffle_sections_seed; + else + seed = ((u64)std::random_device()() << 32) | std::random_device()(); + + tbb::parallel_for_each(ctx.chunks, [&](Chunk *chunk) { + if (OutputSection *osec = chunk->to_osec()) + if (is_eligible(*osec)) + shuffle(osec->members, seed + hash_string(osec->name)); + }); + break; + } + case SHUFFLE_SECTIONS_REVERSE: + tbb::parallel_for_each(ctx.chunks, [&](Chunk *chunk) { + if (OutputSection *osec = chunk->to_osec()) + if (is_eligible(*osec)) + std::reverse(osec->members.begin(), osec->members.end()); + }); + break; + } +} + +template +void compute_section_sizes(Context &ctx) { + Timer t(ctx, "compute_section_sizes"); + + struct Group { + i64 size = 0; + i64 p2align = 0; + i64 offset = 0; + std::span *> members; + }; + + tbb::parallel_for_each(ctx.chunks, [&](Chunk *chunk) { + OutputSection *osec = chunk->to_osec(); + if (!osec) + return; + + // This pattern will be processed in the next loop. + if constexpr (needs_thunk) + if ((osec->shdr.sh_flags & SHF_EXECINSTR) && !ctx.arg.relocatable) + return; + + // Since one output section may contain millions of input sections, + // we first split input sections into groups and assign offsets to + // groups. + std::vector groups; + constexpr i64 group_size = 10000; + + for (std::span *> span : split(osec->members, group_size)) + groups.push_back(Group{.members = span}); + + tbb::parallel_for_each(groups, [](Group &group) { + for (InputSection *isec : group.members) { + group.size = align_to(group.size, 1 << isec->p2align) + isec->sh_size; + group.p2align = std::max(group.p2align, isec->p2align); + } + }); + + i64 offset = 0; + i64 p2align = 0; + + for (i64 i = 0; i < groups.size(); i++) { + offset = align_to(offset, 1 << groups[i].p2align); + groups[i].offset = offset; + offset += groups[i].size; + p2align = std::max(p2align, groups[i].p2align); + } + + osec->shdr.sh_size = offset; + osec->shdr.sh_addralign = 1 << p2align; + + // Assign offsets to input sections. + tbb::parallel_for_each(groups, [](Group &group) { + i64 offset = group.offset; + for (InputSection *isec : group.members) { + offset = align_to(offset, 1 << isec->p2align); + isec->offset = offset; + offset += isec->sh_size; + } + }); + }); + + // On ARM32 or ARM64, we may need to create so-called "range extension + // thunks" to extend branch instructions reach, as they can jump only + // to ±16 MiB or ±128 MiB, respecitvely. + // + // In the following loop, We compute the sizes of sections while + // inserting thunks. This pass cannot be parallelized. That is, + // create_range_extension_thunks is parallelized internally, but the + // function itself is not thread-safe. + if constexpr (needs_thunk) { + for (Chunk *chunk : ctx.chunks) { + OutputSection *osec = chunk->to_osec(); + if (osec && (osec->shdr.sh_flags & SHF_EXECINSTR) && !ctx.arg.relocatable) { + create_range_extension_thunks(ctx, *osec); + + for (InputSection *isec : osec->members) + osec->shdr.sh_addralign = + std::max(osec->shdr.sh_addralign, 1 << isec->p2align); + } + } + } + + for (Chunk *chunk : ctx.chunks) + if (OutputSection *osec = chunk->to_osec()) + if (u32 align = ctx.arg.section_align[osec->name]) + osec->shdr.sh_addralign = std::max(osec->shdr.sh_addralign, align); +} + +// Find all unresolved symbols and attach them to the most appropriate files. +// Note that even a symbol that will be reported as an undefined symbol will +// get an owner file in this function. Such symbol will be reported by +// ObjectFile::scan_relocations(). +template +void claim_unresolved_symbols(Context &ctx) { + Timer t(ctx, "claim_unresolved_symbols"); + + tbb::parallel_for_each(ctx.objs, [&](ObjectFile *file) { + if (!file->is_alive) + return; + + for (i64 i = file->first_global; i < file->elf_syms.size(); i++) { + const ElfSym &esym = file->elf_syms[i]; + Symbol &sym = *file->symbols[i]; + if (!esym.is_undef()) + continue; + + std::scoped_lock lock(sym.mu); + + if (sym.file) + if (!sym.esym().is_undef() || sym.file->priority <= file->priority) + continue; + + // If a symbol name is in the form of "foo@version", search for + // symbol "foo" and check if the symbol has version "version". + if (file->has_symver.get(i - file->first_global)) { + std::string_view str = file->symbol_strtab.data() + esym.st_name; + i64 pos = str.find('@'); + assert(pos != str.npos); + + std::string_view name = str.substr(0, pos); + std::string_view ver = str.substr(pos + 1); + + Symbol *sym2 = get_symbol(ctx, name); + if (sym2->file && sym2->file->is_dso && sym2->get_version() == ver) { + file->symbols[i] = sym2; + continue; + } + } + + auto claim = [&](bool is_imported) { + if (sym.is_traced) + SyncOut(ctx) << "trace-symbol: " << *file << ": unresolved" + << (esym.is_weak() ? " weak" : "") + << " symbol " << sym; + + sym.file = file; + sym.origin = 0; + sym.value = 0; + sym.sym_idx = i; + sym.is_weak = false; + sym.is_imported = is_imported; + sym.is_exported = false; + sym.ver_idx = is_imported ? 0 : ctx.default_version; + }; + + if (esym.is_undef_weak()) { + if (ctx.arg.shared && sym.visibility != STV_HIDDEN && + ctx.arg.z_dynamic_undefined_weak) { + // Global weak undefined symbols are promoted to dynamic symbols + // when linking a DSO unless `-z nodynamic_undefined_weak` was given. + claim(true); + } else { + // Otherwise, weak undefs are converted to absolute symbols with value 0. + claim(false); + } + continue; + } + + // Traditionally, remaining undefined symbols cause a link failure + // only when we are creating an executable. Undefined symbols in + // shared objects are promoted to dynamic symbols, so that they'll + // get another chance to be resolved at run-time. You can change the + // behavior by passing `-z defs` to the linker. + // + // Even if `-z defs` is given, weak undefined symbols are still + // promoted to dynamic symbols for compatibility with other linkers. + // Some major programs, notably Firefox, depend on the behavior + // (they use this loophole to export symbols from libxul.so). + if (ctx.arg.shared && sym.visibility != STV_HIDDEN && !ctx.arg.z_defs) { + claim(true); + continue; + } + + // Convert remaining undefined symbols to absolute symbols with value 0. + claim(false); + } + }); +} + +template +void scan_relocations(Context &ctx) { + Timer t(ctx, "scan_relocations"); + + // Scan relocations to find dynamic symbols. + tbb::parallel_for_each(ctx.objs, [&](ObjectFile *file) { + file->scan_relocations(ctx); + }); + + // Exit if there was a relocation that refers an undefined symbol. + ctx.checkpoint(); + + // Aggregate dynamic symbols to a single vector. + std::vector *> files; + append(files, ctx.objs); + append(files, ctx.dsos); + + std::vector *>> vec(files.size()); + + tbb::parallel_for((i64)0, (i64)files.size(), [&](i64 i) { + for (Symbol *sym : files[i]->symbols) + if (sym->file == files[i]) + if (sym->flags || sym->is_imported || sym->is_exported) + vec[i].push_back(sym); + }); + + std::vector *> syms = flatten(vec); + ctx.symbol_aux.reserve(syms.size()); + + // Assign offsets in additional tables for each dynamic symbol. + for (Symbol *sym : syms) { + sym->add_aux(ctx); + + if (sym->is_imported || sym->is_exported) + ctx.dynsym->add_symbol(ctx, sym); + + if (sym->flags & NEEDS_GOT) + ctx.got->add_got_symbol(ctx, sym); + + if (sym->flags & NEEDS_CPLT) { + sym->is_canonical = true; + + // A canonical PLT needs to be visible from DSOs. + sym->is_exported = true; + + // We can't use .plt.got for a canonical PLT because otherwise + // .plt.got and .got would refer to each other, resulting in an + // infinite loop at runtime. + ctx.plt->add_symbol(ctx, sym); + } else if (sym->flags & NEEDS_PLT) { + if (sym->flags & NEEDS_GOT) + ctx.pltgot->add_symbol(ctx, sym); + else + ctx.plt->add_symbol(ctx, sym); + } + + if (sym->flags & NEEDS_GOTTP) + ctx.got->add_gottp_symbol(ctx, sym); + + if (sym->flags & NEEDS_TLSGD) + ctx.got->add_tlsgd_symbol(ctx, sym); + + if (sym->flags & NEEDS_TLSDESC) + ctx.got->add_tlsdesc_symbol(ctx, sym); + + if (sym->flags & NEEDS_COPYREL) { + if (((SharedFile *)sym->file)->is_readonly(sym)) + ctx.copyrel_relro->add_symbol(ctx, sym); + else + ctx.copyrel->add_symbol(ctx, sym); + } + + if constexpr (is_ppc64v1) + if (sym->flags & NEEDS_PPC_OPD) + ctx.extra.opd->add_symbol(ctx, sym); + + sym->flags = 0; + } + + if (ctx.needs_tlsld) + ctx.got->add_tlsld(ctx); + + if constexpr (is_alpha) + ctx.extra.got->finalize(); + + if (ctx.has_textrel && ctx.arg.warn_textrel) + Warn(ctx) << "creating a DT_TEXTREL in an output file"; +} + +// Report all undefined symbols, grouped by symbol. +template +void report_undef_errors(Context &ctx) { + constexpr i64 max_errors = 3; + + for (auto &pair : ctx.undef_errors) { + std::string_view sym_name = pair.first; + std::span errors = pair.second; + + if (ctx.arg.demangle) + sym_name = demangle(sym_name); + + std::stringstream ss; + ss << "undefined symbol: " << sym_name << "\n"; + + for (i64 i = 0; i < errors.size() && i < max_errors; i++) + ss << errors[i]; + + if (errors.size() > max_errors) + ss << ">>> referenced " << (errors.size() - max_errors) << " more times\n"; + + if (ctx.arg.unresolved_symbols == UNRESOLVED_ERROR) + Error(ctx) << ss.str(); + else if (ctx.arg.unresolved_symbols == UNRESOLVED_WARN) + Warn(ctx) << ss.str(); + } + + ctx.checkpoint(); +} + +template +void create_reloc_sections(Context &ctx) { + Timer t(ctx, "create_reloc_sections"); + + // Create .rela.* sections + tbb::parallel_for((i64)0, (i64)ctx.chunks.size(), [&](i64 i) { + if (OutputSection *osec = ctx.chunks[i]->to_osec()) + osec->reloc_sec.reset(new RelocSection(ctx, *osec)); + }); + + for (i64 i = 0, end = ctx.chunks.size(); i < end; i++) + if (OutputSection *osec = ctx.chunks[i]->to_osec()) + if (RelocSection *x = osec->reloc_sec.get()) + ctx.chunks.push_back(x); +} + +// Copy chunks to an output file +template +void copy_chunks(Context &ctx) { + Timer t(ctx, "copy_chunks"); + + auto copy = [&](Chunk &chunk) { + std::string name = chunk.name.empty() ? "(header)" : std::string(chunk.name); + Timer t2(ctx, name, &t); + chunk.copy_buf(ctx); + }; + + // For --relocatable and --emit-relocs, we want to copy non-relocation + // sections first. This is because REL-type relocation sections (as + // opposed to RELA-type) stores relocation addends to target sections. + tbb::parallel_for_each(ctx.chunks, [&](Chunk *chunk) { + if (chunk->shdr.sh_type != (E::is_rela ? SHT_RELA : SHT_REL)) + copy(*chunk); + }); + + tbb::parallel_for_each(ctx.chunks, [&](Chunk *chunk) { + if (chunk->shdr.sh_type == (E::is_rela ? SHT_RELA : SHT_REL)) + copy(*chunk); + }); + + // Undefined symbols in SHF_ALLOC sections are found by scan_relocations(), + // but those in non-SHF_ALLOC sections cannot be found until we copy section + // contents. So we need to call this function again to report possible + // undefined errors. + report_undef_errors(ctx); + + if constexpr (is_arm32) + fixup_arm_exidx_section(ctx); +} + +template +void construct_relr(Context &ctx) { + Timer t(ctx, "construct_relr"); + + tbb::parallel_for_each(ctx.chunks, [&](Chunk *chunk) { + if (OutputSection *osec = chunk->to_osec()) + osec->construct_relr(ctx); + }); + + ctx.got->construct_relr(ctx); +} + +template +void create_output_symtab(Context &ctx) { + Timer t(ctx, "compute_symtab_size"); + + tbb::parallel_for_each(ctx.chunks, [&](Chunk *chunk) { + chunk->compute_symtab_size(ctx); + }); + + tbb::parallel_for_each(ctx.objs, [&](ObjectFile *file) { + file->compute_symtab_size(ctx); + }); + + tbb::parallel_for_each(ctx.dsos, [&](SharedFile *file) { + file->compute_symtab_size(ctx); + }); +} + +template +void apply_version_script(Context &ctx) { + Timer t(ctx, "apply_version_script"); + + // If all patterns are simple (i.e. not containing any meta- + // characters and is not a C++ name), we can simply look up + // symbols. + auto is_simple = [&] { + for (VersionPattern &v : ctx.version_patterns) + if (v.is_cpp || v.pattern.find_first_of("*?[") != v.pattern.npos) + return false; + return true; + }; + + if (is_simple()) { + for (VersionPattern &v : ctx.version_patterns) { + Symbol *sym = get_symbol(ctx, v.pattern); + + if (!sym->file && !ctx.arg.undefined_version) + Warn(ctx) << v.source << ": cannot assign version `" << v.ver_str + << "` to symbol `" << *sym << "`: symbol not found"; + + if (sym->file && !sym->file->is_dso) + sym->ver_idx = v.ver_idx; + } + return; + } + + // Otherwise, use glob pattern matchers. + MultiGlob matcher; + MultiGlob cpp_matcher; + + for (i64 i = 0; i < ctx.version_patterns.size(); i++) { + VersionPattern &v = ctx.version_patterns[i]; + if (v.is_cpp) { + if (!cpp_matcher.add(v.pattern, i)) + Fatal(ctx) << "invalid version pattern: " << v.pattern; + } else { + if (!matcher.add(v.pattern, i)) + Fatal(ctx) << "invalid version pattern: " << v.pattern; + } + } + + tbb::parallel_for_each(ctx.objs, [&](ObjectFile *file) { + for (Symbol *sym : file->get_global_syms()) { + if (sym->file != file) + continue; + + std::string_view name = sym->name(); + i64 match = INT64_MAX; + + if (std::optional idx = matcher.find(name)) + match = std::min(match, *idx); + + // Match non-mangled symbols against the C++ pattern as well. + // Weird, but required to match other linkers' behavior. + if (!cpp_matcher.empty()) { + if (std::optional s = cpp_demangle(name)) + name = *s; + if (std::optional idx = cpp_matcher.find(name)) + match = std::min(match, *idx); + } + + if (match != INT64_MAX) + sym->ver_idx = ctx.version_patterns[match].ver_idx; + } + }); +} + +template +void parse_symbol_version(Context &ctx) { + if (!ctx.arg.shared) + return; + + Timer t(ctx, "parse_symbol_version"); + + std::unordered_map verdefs; + for (i64 i = 0; i < ctx.arg.version_definitions.size(); i++) + verdefs[ctx.arg.version_definitions[i]] = i + VER_NDX_LAST_RESERVED + 1; + + tbb::parallel_for_each(ctx.objs, [&](ObjectFile *file) { + for (i64 i = file->first_global; i < file->elf_syms.size(); i++) { + // Match VERSION part of symbol foo@VERSION with version definitions. + if (!file->has_symver.get(i - file->first_global)) + continue; + + Symbol *sym = file->symbols[i]; + if (sym->file != file) + continue; + + const char *name = file->symbol_strtab.data() + file->elf_syms[i].st_name; + std::string_view ver = strchr(name, '@') + 1; + + bool is_default = false; + if (ver.starts_with('@')) { + is_default = true; + ver = ver.substr(1); + } + + auto it = verdefs.find(ver); + if (it == verdefs.end()) { + Error(ctx) << *file << ": symbol " << *sym << " has undefined version " + << ver; + continue; + } + + sym->ver_idx = it->second; + if (!is_default) + sym->ver_idx |= VERSYM_HIDDEN; + + // If both symbol `foo` and `foo@VERSION` are defined, `foo@VERSION` + // hides `foo` so that all references to `foo` are resolved to a + // versioned symbol. Likewise, if `foo@VERSION` and `foo@@VERSION` are + // defined, the default one takes precedence. + Symbol *sym2 = get_symbol(ctx, sym->name()); + if (sym2->file == file && + !file->has_symver.get(sym2->sym_idx - file->first_global)) + if (sym2->ver_idx == ctx.default_version || + (sym2->ver_idx & ~VERSYM_HIDDEN) == (sym->ver_idx & ~VERSYM_HIDDEN)) + sym2->ver_idx = VER_NDX_LOCAL; + } + }); +} + +template +void compute_import_export(Context &ctx) { + Timer t(ctx, "compute_import_export"); + + // If we are creating an executable, we want to export symbols referenced + // by DSOs unless they are explicitly marked as local by a version script. + if (!ctx.arg.shared) { + tbb::parallel_for_each(ctx.dsos, [&](SharedFile *file) { + for (Symbol *sym : file->symbols) { + if (sym->file && !sym->file->is_dso && sym->visibility != STV_HIDDEN) { + if (sym->ver_idx != VER_NDX_LOCAL || + !ctx.default_version_from_version_script) { + std::scoped_lock lock(sym->mu); + sym->is_exported = true; + } + } + } + }); + } + + // Export symbols that are not hidden or marked as local. + // We also want to mark imported symbols as such. + tbb::parallel_for_each(ctx.objs, [&](ObjectFile *file) { + for (Symbol *sym : file->get_global_syms()) { + if (!sym->file || sym->visibility == STV_HIDDEN || + sym->ver_idx == VER_NDX_LOCAL) + continue; + + // If we are using a symbol in a DSO, we need to import it at runtime. + if (sym->file != file && sym->file->is_dso && !sym->is_absolute()) { + std::scoped_lock lock(sym->mu); + sym->is_imported = true; + continue; + } + + // If we are creating a DSO, all global symbols are exported by default. + if (sym->file == file) { + std::scoped_lock lock(sym->mu); + sym->is_exported = true; + + if (ctx.arg.shared && sym->visibility != STV_PROTECTED && + !ctx.arg.Bsymbolic && + !(ctx.arg.Bsymbolic_functions && sym->get_type() == STT_FUNC)) + sym->is_imported = true; + } + } + }); +} + +template +void mark_addrsig(Context &ctx) { + Timer t(ctx, "mark_addrsig"); + + tbb::parallel_for_each(ctx.objs, [&](ObjectFile *file) { + file->mark_addrsig(ctx); + }); +} + +template +void clear_padding(Context &ctx) { + Timer t(ctx, "clear_padding"); + + auto zero = [&](Chunk *chunk, i64 next_start) { + i64 pos = chunk->shdr.sh_offset + chunk->shdr.sh_size; + memset(ctx.buf + pos, 0, next_start - pos); + }; + + std::vector *> chunks = ctx.chunks; + + std::erase_if(chunks, [](Chunk *chunk) { + return chunk->shdr.sh_type == SHT_NOBITS; + }); + + for (i64 i = 1; i < chunks.size(); i++) + zero(chunks[i - 1], chunks[i]->shdr.sh_offset); + zero(chunks.back(), ctx.output_file->filesize); +} + +// We want to sort output chunks in the following order. +// +// +// +// .interp +// .note +// .hash +// .gnu.hash +// .dynsym +// .dynstr +// .gnu.version +// .gnu.version_r +// .rela.dyn +// .rela.plt +// +// +// +// +// +// .got +// .toc +// .alpha_got +// +// .relro_padding +// +// +// +//
+// +// .interp and some other linker-synthesized sections are placed at the +// beginning of a file because they are needed by loader. Especially on +// a hard drive with spinning disks, it is important to read these +// sections in a single seek. +// +// .note sections are also placed at the beginning so that they are +// included in a core crash dump even if it's truncated by ulimit. In +// particular, if .note.gnu.build-id is in a truncated core file, you +// can at least identify which executable has crashed. +// +// A PT_NOTE segment will contain multiple .note sections if exists, +// but there's no way to represent a gap between .note sections. +// Therefore, we sort .note sections by decreasing alignment +// requirement. I believe each .note section size is a multiple of its +// alignment, so by sorting them by alignment, we should be able to +// avoid a gap between .note sections. +// +// .toc is placed right after .got for PPC64. PPC-specific .toc section +// contains data that may be accessed with a 16-bit offset relative to +// %r2. %r2 is set to .got + 32 KiB. Therefore, .toc needs to be within +// [.got, .got + 64 KiB). +// +// Other file layouts are possible, but this layout is chosen to keep +// the number of segments as few as possible. +template +void sort_output_sections_regular(Context &ctx) { + auto get_rank1 = [&](Chunk *chunk) { + u64 type = chunk->shdr.sh_type; + u64 flags = chunk->shdr.sh_flags; + + if (chunk == ctx.ehdr) + return 0; + if (chunk == ctx.phdr) + return 1; + if (chunk == ctx.interp) + return 2; + if (type == SHT_NOTE && (flags & SHF_ALLOC)) + return 3; + if (chunk == ctx.hash) + return 4; + if (chunk == ctx.gnu_hash) + return 5; + if (chunk == ctx.dynsym) + return 6; + if (chunk == ctx.dynstr) + return 7; + if (chunk == ctx.versym) + return 8; + if (chunk == ctx.verneed) + return 9; + if (chunk == ctx.reldyn) + return 10; + if (chunk == ctx.relplt) + return 11; + if (chunk == ctx.shdr) + return INT32_MAX; + + bool alloc = (flags & SHF_ALLOC); + bool writable = (flags & SHF_WRITE); + bool exec = (flags & SHF_EXECINSTR); + bool tls = (flags & SHF_TLS); + bool relro = is_relro(ctx, chunk); + bool is_bss = (type == SHT_NOBITS); + + return (1 << 10) | (!alloc << 9) | (writable << 8) | (exec << 7) | + (!tls << 6) | (!relro << 5) | (is_bss << 4); + }; + + auto get_rank2 = [&](Chunk *chunk) -> i64 { + if (chunk->shdr.sh_type == SHT_NOTE) + return -chunk->shdr.sh_addralign; + + if (chunk == ctx.got) + return 1; + if (chunk->name == ".toc") + return 2; + if (chunk->name == ".alpha_got") + return 3; + if (chunk == ctx.relro_padding) + return INT_MAX; + return 0; + }; + + sort(ctx.chunks, [&](Chunk *a, Chunk *b) { + // Sort sections by segments + i64 x = get_rank1(a); + i64 y = get_rank1(b); + if (x != y) + return x < y; + + // Ties are broken by additional rules + return get_rank2(a) < get_rank2(b); + }); +} + +template +static std::string_view get_section_order_group(Chunk &chunk) { + if (chunk.shdr.sh_type == SHT_NOBITS) + return "BSS"; + if (chunk.shdr.sh_flags & SHF_EXECINSTR) + return "TEXT"; + if (chunk.shdr.sh_flags & SHF_WRITE) + return "DATA"; + return "RODATA"; +}; + +// Sort sections according to a --section-order argument. +template +void sort_output_sections_by_order(Context &ctx) { + auto get_rank = [&](Chunk *chunk) -> i64 { + u64 flags = chunk->shdr.sh_flags; + + if (chunk == ctx.ehdr && !(chunk->shdr.sh_flags & SHF_ALLOC)) + return -2; + if (chunk == ctx.phdr && !(chunk->shdr.sh_flags & SHF_ALLOC)) + return -1; + + if (chunk == ctx.shdr) + return INT32_MAX; + if (!(flags & SHF_ALLOC)) + return INT32_MAX - 1; + + for (i64 i = 0; const SectionOrder &arg : ctx.arg.section_order) { + if (arg.type == SectionOrder::SECTION && arg.name == chunk->name) + return i; + i++; + } + + std::string_view group = get_section_order_group(*chunk); + + for (i64 i = 0; i < ctx.arg.section_order.size(); i++) { + SectionOrder arg = ctx.arg.section_order[i]; + if (arg.type == SectionOrder::GROUP && arg.name == group) + return i; + } + + Error(ctx) << "--section-order: missing section specification for " + << chunk->name; + return 0; + }; + + // It is an error if a section order cannot be determined by a given + // section order list. + for (Chunk *chunk : ctx.chunks) + chunk->sect_order = get_rank(chunk); + + // Sort output sections by --section-order + sort(ctx.chunks, [&](Chunk *a, Chunk *b) { + return a->sect_order < b->sect_order; + }); +} + +template +void sort_output_sections(Context &ctx) { + if (ctx.arg.section_order.empty()) + sort_output_sections_regular(ctx); + else + sort_output_sections_by_order(ctx); +} + +template +static bool is_tbss(Chunk *chunk) { + return (chunk->shdr.sh_type == SHT_NOBITS) && (chunk->shdr.sh_flags & SHF_TLS); +} + +// This function assigns virtual addresses to output sections. Assigning +// addresses is a bit tricky because we want to pack sections as tightly +// as possible while not violating the constraints imposed by the hardware +// and the OS kernel. Specifically, we need to satisfy the following +// constraints: +// +// - Memory protection (readable, writable and executable) works at page +// granularity. Therefore, if we want to set different memory attributes +// to two sections, we need to place them into separate pages. +// +// - The ELF spec requires that a section's file offset is congruent to +// its virtual address modulo the page size. For example, a section at +// virtual address 0x401234 on x86-64 (4 KiB, or 0x1000 byte page +// system) can be at file offset 0x3234 or 0x50234 but not at 0x1000. +// +// We need to insert paddings between sections if we can't satisfy the +// above constraints without them. +// +// We don't want to waste too much memory and disk space for paddings. +// There are a few tricks we can use to minimize paddings as below: +// +// - We want to place sections with the same memory attributes +// contiguous as possible. +// +// - We can map the same file region to memory more than once. For +// example, we can write code (with R and X bits) and read-only data +// (with R bit) adjacent on file and map it twice as the last page of +// the executable segment and the first page of the read-only data +// segment. This doesn't save memory but saves disk space. +template +static void set_virtual_addresses_regular(Context &ctx) { + constexpr i64 RELRO = 1LL << 32; + + auto get_flags = [&](Chunk *chunk) { + i64 flags = to_phdr_flags(ctx, chunk); + if (is_relro(ctx, chunk)) + return flags | RELRO; + return flags; + }; + + // Assign virtual addresses + std::vector *> &chunks = ctx.chunks; + u64 addr = ctx.arg.image_base; + + // TLS chunks alignments are special: in addition to having their virtual + // addresses aligned, they also have to be aligned when the region of + // tls_begin is copied to a new thread's storage area. In other words, their + // offset against tls_begin also has to be aligned. + // + // A good way to achieve this is to take the largest alignment requirement + // of all TLS sections and make tls_begin also aligned to that. + Chunk *first_tls_chunk = nullptr; + u64 tls_alignment = 1; + for (Chunk *chunk : chunks) { + if (chunk->shdr.sh_flags & SHF_TLS) { + if (!first_tls_chunk) + first_tls_chunk = chunk; + tls_alignment = std::max(tls_alignment, (u64)chunk->shdr.sh_addralign); + } + } + + auto alignment = [&](Chunk *chunk) { + return chunk == first_tls_chunk ? tls_alignment : (u64)chunk->shdr.sh_addralign; + }; + + for (i64 i = 0; i < chunks.size(); i++) { + if (!(chunks[i]->shdr.sh_flags & SHF_ALLOC)) + continue; + + // .relro_padding is a padding section to extend a PT_GNU_RELRO + // segment to cover an entire page. Technically, we don't need a + // .relro_padding section because we can leave a trailing part of a + // segment an unused space. However, the `strip` command would delete + // such an unused trailing part and make an executable invalid. + // So we add a dummy section. + if (chunks[i] == ctx.relro_padding) { + chunks[i]->shdr.sh_addr = addr; + chunks[i]->shdr.sh_size = align_to(addr, ctx.page_size) - addr; + addr += ctx.page_size; + continue; + } + + // Handle --section-start first + if (auto it = ctx.arg.section_start.find(chunks[i]->name); + it != ctx.arg.section_start.end()) { + addr = it->second; + chunks[i]->shdr.sh_addr = addr; + addr += chunks[i]->shdr.sh_size; + continue; + } + + // Memory protection works at page size granularity. We need to + // put sections with different memory attributes into different + // pages. We do it by inserting paddings here. + if (i > 0 && chunks[i - 1] != ctx.relro_padding) { + i64 flags1 = get_flags(chunks[i - 1]); + i64 flags2 = get_flags(chunks[i]); + + if (flags1 != flags2) { + switch (ctx.arg.z_separate_code) { + case SEPARATE_LOADABLE_SEGMENTS: + addr = align_to(addr, ctx.page_size); + break; + case SEPARATE_CODE: + if ((flags1 & PF_X) != (flags2 & PF_X)) { + addr = align_to(addr, ctx.page_size); + break; + } + [[fallthrough]]; + case NOSEPARATE_CODE: + if (addr % ctx.page_size != 0) + addr += ctx.page_size; + break; + default: + unreachable(); + } + } + } + + // TLS BSS sections are laid out so that they overlap with the + // subsequent non-tbss sections. Overlapping is fine because a STT_TLS + // segment contains an initialization image for newly-created threads, + // and no one except the runtime reads its contents. Even the runtime + // doesn't need a BSS part of a TLS initialization image; it just + // leaves zero-initialized bytes as-is instead of copying zeros. + // So no one really read tbss at runtime. + // + // We can instead allocate a dedicated virtual address space to tbss, + // but that would be just a waste of the address and disk space. + if (is_tbss(chunks[i])) { + u64 addr2 = addr; + for (;;) { + addr2 = align_to(addr2, alignment(chunks[i])); + chunks[i]->shdr.sh_addr = addr2; + addr2 += chunks[i]->shdr.sh_size; + if (i + 2 == chunks.size() || !is_tbss(chunks[i + 1])) + break; + i++; + } + continue; + } + + addr = align_to(addr, alignment(chunks[i])); + chunks[i]->shdr.sh_addr = addr; + addr += chunks[i]->shdr.sh_size; + } +} + +template +static void set_virtual_addresses_by_order(Context &ctx) { + std::vector *> &c = ctx.chunks; + u64 addr = ctx.arg.image_base; + i64 i = 0; + + while (i < c.size() && !(c[i]->shdr.sh_flags & SHF_ALLOC)) + i++; + + auto assign_addr = [&] { + if (i != 0) { + i64 flags1 = to_phdr_flags(ctx, c[i - 1]); + i64 flags2 = to_phdr_flags(ctx, c[i]); + + // Memory protection works at page size granularity. We need to + // put sections with different memory attributes into different + // pages. We do it by inserting paddings here. + if (flags1 != flags2) { + switch (ctx.arg.z_separate_code) { + case SEPARATE_LOADABLE_SEGMENTS: + addr = align_to(addr, ctx.page_size); + break; + case SEPARATE_CODE: + if ((flags1 & PF_X) != (flags2 & PF_X)) + addr = align_to(addr, ctx.page_size); + break; + default: + break; + } + } + } + + addr = align_to(addr, c[i]->shdr.sh_addralign); + c[i]->shdr.sh_addr = addr; + addr += c[i]->shdr.sh_size; + + do { + i++; + } while (i < c.size() && !(c[i]->shdr.sh_flags & SHF_ALLOC)); + }; + + for (i64 j = 0; j < ctx.arg.section_order.size(); j++) { + SectionOrder &ord = ctx.arg.section_order[j]; + switch (ord.type) { + case SectionOrder::SECTION: + if (i < c.size() && j == c[i]->sect_order) + assign_addr(); + break; + case SectionOrder::GROUP: + while (i < c.size() && j == c[i]->sect_order) + assign_addr(); + break; + case SectionOrder::ADDR: + addr = ord.value; + break; + case SectionOrder::ALIGN: + addr = align_to(addr, ord.value); + break; + case SectionOrder::SYMBOL: + get_symbol(ctx, ord.name)->value = addr; + break; + default: + unreachable(); + } + } +} + +// Returns the smallest integer N that satisfies N >= val and +// N mod align == skew mod align. +// +// Section's file offset must be congruent to its virtual address modulo +// the page size. We use this function to satisfy that requirement. +static u64 align_with_skew(u64 val, u64 align, u64 skew) { + u64 x = align_down(val, align) + skew % align; + return (val <= x) ? x : x + align; +} + +// Assign file offsets to output sections. +template +static i64 set_file_offsets(Context &ctx) { + std::vector *> &chunks = ctx.chunks; + u64 fileoff = 0; + i64 i = 0; + + while (i < chunks.size()) { + Chunk &first = *chunks[i]; + + if (!(first.shdr.sh_flags & SHF_ALLOC)) { + fileoff = align_to(fileoff, first.shdr.sh_addralign); + first.shdr.sh_offset = fileoff; + fileoff += first.shdr.sh_size; + i++; + continue; + } + + if (first.shdr.sh_type == SHT_NOBITS) { + i++; + continue; + } + + if (first.shdr.sh_addralign > ctx.page_size) + fileoff = align_to(fileoff, first.shdr.sh_addralign); + else + fileoff = align_with_skew(fileoff, ctx.page_size, first.shdr.sh_addr); + + // Assign ALLOC sections contiguous file offsets as long as they + // are contiguous in memory. + for (;;) { + chunks[i]->shdr.sh_offset = + fileoff + chunks[i]->shdr.sh_addr - first.shdr.sh_addr; + i++; + + if (i >= chunks.size() || + !(chunks[i]->shdr.sh_flags & SHF_ALLOC) || + chunks[i]->shdr.sh_type == SHT_NOBITS) + break; + + // If --start-section is given, addresses may not increase + // monotonically. + if (chunks[i]->shdr.sh_addr < first.shdr.sh_addr) + break; + + i64 gap_size = chunks[i]->shdr.sh_addr - chunks[i - 1]->shdr.sh_addr - + chunks[i - 1]->shdr.sh_size; + + // If --start-section is given, there may be a large gap between + // sections. We don't want to allocate a disk space for a gap if + // exists. + if (gap_size >= ctx.page_size) + break; + } + + fileoff = chunks[i - 1]->shdr.sh_offset + chunks[i - 1]->shdr.sh_size; + + while (i < chunks.size() && + (chunks[i]->shdr.sh_flags & SHF_ALLOC) && + chunks[i]->shdr.sh_type == SHT_NOBITS) + i++; + } + + return fileoff; +} + +template +void compute_section_headers(Context &ctx) { + // Update sh_size for each chunk. + for (Chunk *chunk : ctx.chunks) + chunk->update_shdr(ctx); + + // Remove empty chunks. + std::erase_if(ctx.chunks, [](Chunk *chunk) { + return chunk->kind() != OUTPUT_SECTION && chunk->shdr.sh_size == 0; + }); + + // Set section indices. + i64 shndx = 1; + for (i64 i = 0; i < ctx.chunks.size(); i++) + if (ctx.chunks[i]->kind() != HEADER) + ctx.chunks[i]->shndx = shndx++; + + if (ctx.symtab && SHN_LORESERVE <= shndx) { + SymtabShndxSection *sec = new SymtabShndxSection; + sec->shndx = shndx++; + sec->shdr.sh_link = ctx.symtab->shndx; + ctx.symtab_shndx = sec; + ctx.chunks.push_back(sec); + ctx.chunk_pool.emplace_back(sec); + } + + if (ctx.shdr) + ctx.shdr->shdr.sh_size = shndx * sizeof(ElfShdr); + + // Some types of section header refer other section by index. + // Recompute the section header to fill such fields with correct values. + for (Chunk *chunk : ctx.chunks) + chunk->update_shdr(ctx); + + if (ctx.symtab_shndx) { + i64 symtab_size = ctx.symtab->shdr.sh_size / sizeof(ElfSym); + ctx.symtab_shndx->shdr.sh_size = symtab_size * 4; + } +} + +// Assign virtual addresses and file offsets to output sections. +template +i64 set_osec_offsets(Context &ctx) { + Timer t(ctx, "set_osec_offsets"); + + for (;;) { + if (ctx.arg.section_order.empty()) + set_virtual_addresses_regular(ctx); + else + set_virtual_addresses_by_order(ctx); + + i64 fileoff = set_file_offsets(ctx); + + // Assigning new offsets may change the contents and the length + // of the program header, so repeat it until converge. + if (!ctx.phdr) + return fileoff; + + i64 sz = ctx.phdr->shdr.sh_size; + ctx.phdr->update_shdr(ctx); + if (sz == ctx.phdr->shdr.sh_size) + return fileoff; + } +} + +template +static i64 get_num_irelative_relocs(Context &ctx) { + i64 n = std::count_if(ctx.got->got_syms.begin(), ctx.got->got_syms.end(), + [](Symbol *sym) { return sym->is_ifunc(); }); + return n + ctx.num_ifunc_dynrels; +} + +template +static u64 to_paddr(Context &ctx, u64 vaddr) { + for (ElfPhdr &phdr : ctx.phdr->phdrs) + if (phdr.p_type == PT_LOAD) + if (phdr.p_vaddr <= vaddr && vaddr < phdr.p_vaddr + phdr.p_memsz) + return phdr.p_paddr + (vaddr - phdr.p_vaddr); + return 0; +} + +template +void fix_synthetic_symbols(Context &ctx) { + auto start = [](Symbol *sym, auto &chunk, i64 bias = 0) { + if (sym && chunk) { + sym->set_output_section(chunk); + sym->value = chunk->shdr.sh_addr + bias; + } + }; + + auto stop = [](Symbol *sym, auto &chunk) { + if (sym && chunk) { + sym->set_output_section(chunk); + sym->value = chunk->shdr.sh_addr + chunk->shdr.sh_size; + } + }; + + std::vector *> sections; + for (Chunk *chunk : ctx.chunks) + if (chunk->kind() != HEADER && (chunk->shdr.sh_flags & SHF_ALLOC)) + sections.push_back(chunk); + + auto find = [&](std::string name) -> Chunk * { + for (Chunk *chunk : sections) + if (chunk->name == name) + return chunk; + return nullptr; + }; + + // __bss_start + if (Chunk *chunk = find(".bss")) + start(ctx.__bss_start, chunk); + + if (ctx.ehdr && (ctx.ehdr->shdr.sh_flags & SHF_ALLOC)) { + ctx.__ehdr_start->set_output_section(sections[0]); + ctx.__ehdr_start->value = ctx.ehdr->shdr.sh_addr; + ctx.__executable_start->set_output_section(sections[0]); + ctx.__executable_start->value = ctx.ehdr->shdr.sh_addr; + } + + if (ctx.__dso_handle) { + ctx.__dso_handle->set_output_section(sections[0]); + ctx.__dso_handle->value = sections[0]->shdr.sh_addr; + } + + // __rel_iplt_start and __rel_iplt_end. These symbols need to be + // defined in a statically-linked non-relocatable executable because + // such executable lacks the .dynamic section and thus there's no way + // to find ifunc relocations other than these symbols. + // + // We don't want to set values to these symbols if we are creating a + // static PIE due to a glibc bug. Static PIE has a dynamic section. + // If we set values to these symbols in a static PIE, glibc attempts + // to run ifunc initializers twice, with the second attempt with wrong + // function addresses, causing a segmentation fault. + if (ctx.reldyn && ctx.arg.is_static && !ctx.arg.pie) { + stop(ctx.__rel_iplt_start, ctx.reldyn); + stop(ctx.__rel_iplt_end, ctx.reldyn); + + ctx.__rel_iplt_start->value -= + get_num_irelative_relocs(ctx) * sizeof(ElfRel); + } + + // __{init,fini}_array_{start,end} + for (Chunk *chunk : sections) { + switch (chunk->shdr.sh_type) { + case SHT_INIT_ARRAY: + start(ctx.__init_array_start, chunk); + stop(ctx.__init_array_end, chunk); + break; + case SHT_PREINIT_ARRAY: + start(ctx.__preinit_array_start, chunk); + stop(ctx.__preinit_array_end, chunk); + break; + case SHT_FINI_ARRAY: + start(ctx.__fini_array_start, chunk); + stop(ctx.__fini_array_end, chunk); + break; + } + } + + // _end, _etext, _edata and the like + for (Chunk *chunk : sections) { + if (chunk->shdr.sh_flags & SHF_ALLOC) { + stop(ctx._end, chunk); + stop(ctx.end, chunk); + } + + if (chunk->shdr.sh_flags & SHF_EXECINSTR) { + stop(ctx._etext, chunk); + stop(ctx.etext, chunk); + } + + if (chunk->shdr.sh_type != SHT_NOBITS && + (chunk->shdr.sh_flags & SHF_ALLOC)) { + stop(ctx._edata, chunk); + stop(ctx.edata, chunk); + } + } + + // _DYNAMIC + start(ctx._DYNAMIC, ctx.dynamic); + + // _GLOBAL_OFFSET_TABLE_. I don't know why, but for the sake of + // compatibility with existing code, it must be set to the beginning of + // .got.plt instead of .got only on i386 and x86-64. + if constexpr (is_x86) + start(ctx._GLOBAL_OFFSET_TABLE_, ctx.gotplt); + else + start(ctx._GLOBAL_OFFSET_TABLE_, ctx.got); + + // _PROCEDURE_LINKAGE_TABLE_. We need this on SPARC. + start(ctx._PROCEDURE_LINKAGE_TABLE_, ctx.plt); + + // _TLS_MODULE_BASE_. This symbol is used to obtain the address of + // the TLS block in the TLSDESC model. I believe GCC and Clang don't + // create a reference to it, but Intel compiler seems to be using + // this symbol. + if (ctx._TLS_MODULE_BASE_) { + ctx._TLS_MODULE_BASE_->set_output_section(sections[0]); + ctx._TLS_MODULE_BASE_->value = ctx.tls_begin; + } + + // __GNU_EH_FRAME_HDR + start(ctx.__GNU_EH_FRAME_HDR, ctx.eh_frame_hdr); + + // RISC-V's __global_pointer$ + if (ctx.__global_pointer) { + if (Chunk *chunk = find(".sdata")) { + start(ctx.__global_pointer, chunk, 0x800); + } else { + ctx.__global_pointer->set_output_section(sections[0]); + ctx.__global_pointer->value = 0; + } + } + + // ARM32's __exidx_{start,end} + if (ctx.__exidx_start) { + if (Chunk *chunk = find(".ARM.exidx")) { + start(ctx.__exidx_start, chunk); + stop(ctx.__exidx_end, chunk); + } + } + + // PPC64's ".TOC." symbol. + if constexpr (is_ppc64) { + if (Chunk *chunk = find(".got")) { + start(ctx.extra.TOC, chunk, 0x8000); + } else if (Chunk *chunk = find(".toc")) { + start(ctx.extra.TOC, chunk, 0x8000); + } else { + ctx.extra.TOC->set_output_section(sections[0]); + ctx.extra.TOC->value = 0; + } + } + + // __start_ and __stop_ symbols + for (Chunk *chunk : sections) { + if (std::optional name = get_start_stop_name(ctx, *chunk)) { + start(get_symbol(ctx, save_string(ctx, "__start_" + *name)), chunk); + stop(get_symbol(ctx, save_string(ctx, "__stop_" + *name)), chunk); + + if (ctx.arg.physical_image_base) { + u64 paddr = to_paddr(ctx, chunk->shdr.sh_addr); + + Symbol *x = get_symbol(ctx, save_string(ctx, "__phys_start_" + *name)); + x->set_output_section(chunk); + x->value = paddr; + + Symbol *y = get_symbol(ctx, save_string(ctx, "__phys_stop_" + *name)); + y->set_output_section(chunk); + y->value = paddr + chunk->shdr.sh_size; + } + } + } + + // --defsym=sym=value symbols + for (i64 i = 0; i < ctx.arg.defsyms.size(); i++) { + Symbol *sym = ctx.arg.defsyms[i].first; + std::variant *, u64> val = ctx.arg.defsyms[i].second; + + if (u64 *addr = std::get_if(&val)) { + sym->origin = 0; + sym->value = *addr; + continue; + } + + Symbol *sym2 = std::get *>(val); + if (!sym2->file) { + Error(ctx) << "--defsym: undefined symbol: " << *sym2; + continue; + } + + sym->value = sym2->value; + sym->origin = sym2->origin; + sym->visibility = sym2->visibility.load(); + } + + + // --section-order symbols + for (SectionOrder &ord : ctx.arg.section_order) + if (ord.type == SectionOrder::SYMBOL) + get_symbol(ctx, ord.name)->set_output_section(sections[0]); +} + +template +i64 compress_debug_sections(Context &ctx) { + Timer t(ctx, "compress_debug_sections"); + + tbb::parallel_for((i64)0, (i64)ctx.chunks.size(), [&](i64 i) { + Chunk &chunk = *ctx.chunks[i]; + + if ((chunk.shdr.sh_flags & SHF_ALLOC) || chunk.shdr.sh_size == 0 || + !chunk.name.starts_with(".debug")) + return; + + Chunk *comp = new CompressedSection(ctx, chunk); + ctx.chunk_pool.emplace_back(comp); + ctx.chunks[i] = comp; + }); + + ctx.shstrtab->update_shdr(ctx); + + if (ctx.ehdr) + ctx.ehdr->update_shdr(ctx); + if (ctx.shdr) + ctx.shdr->update_shdr(ctx); + + return set_osec_offsets(ctx); +} + +// Write Makefile-style dependency rules to a file specified by +// --dependency-file. This is analogous to the compiler's -M flag. +template +void write_dependency_file(Context &ctx) { + std::vector deps; + std::unordered_set seen; + + for (std::unique_ptr>> &mf : ctx.mf_pool) + if (!mf->parent) + if (std::string path = path_clean(mf->name); seen.insert(path).second) + deps.push_back(path); + + std::ofstream out; + out.open(ctx.arg.dependency_file); + if (out.fail()) + Fatal(ctx) << "--dependency-file: cannot open " << ctx.arg.dependency_file + << ": " << errno_string(); + + out << ctx.arg.output << ":"; + for (std::string &s : deps) + out << " " << s; + out << "\n"; + + for (std::string &s : deps) + out << "\n" << s << ":\n"; + out.close(); +} + +template +void show_stats(Context &ctx) { + for (ObjectFile *obj : ctx.objs) { + static Counter defined("defined_syms"); + defined += obj->first_global - 1; + + static Counter undefined("undefined_syms"); + undefined += obj->symbols.size() - obj->first_global; + + for (std::unique_ptr> &sec : obj->sections) { + if (!sec || !sec->is_alive) + continue; + + static Counter alloc("reloc_alloc"); + static Counter nonalloc("reloc_nonalloc"); + + if (sec->shdr().sh_flags & SHF_ALLOC) + alloc += sec->get_rels(ctx).size(); + else + nonalloc += sec->get_rels(ctx).size(); + } + + static Counter comdats("comdats"); + comdats += obj->comdat_groups.size(); + + static Counter removed_comdats("removed_comdat_mem"); + for (ComdatGroupRef &ref : obj->comdat_groups) + if (ref.group->owner != obj->priority) + removed_comdats += ref.members.size(); + + static Counter num_cies("num_cies"); + num_cies += obj->cies.size(); + + static Counter num_unique_cies("num_unique_cies"); + for (CieRecord &cie : obj->cies) + if (cie.is_leader) + num_unique_cies++; + + static Counter num_fdes("num_fdes"); + num_fdes += obj->fdes.size(); + } + + static Counter num_bytes("total_input_bytes"); + for (std::unique_ptr>> &mf : ctx.mf_pool) + num_bytes += mf->size; + + static Counter num_input_sections("input_sections"); + for (ObjectFile *file : ctx.objs) + num_input_sections += file->sections.size(); + + static Counter num_output_chunks("output_chunks", ctx.chunks.size()); + static Counter num_objs("num_objs", ctx.objs.size()); + static Counter num_dsos("num_dsos", ctx.dsos.size()); + + if constexpr (needs_thunk) { + static Counter thunk_bytes("thunk_bytes"); + for (Chunk *chunk : ctx.chunks) + if (OutputSection *osec = chunk->to_osec()) + for (std::unique_ptr> &thunk : osec->thunks) + thunk_bytes += thunk->size(); + } + + Counter::print(); + + for (std::unique_ptr> &sec : ctx.merged_sections) + sec->print_stats(ctx); +} + +using E = MOLD_TARGET; + +template void create_internal_file(Context &); +template void apply_exclude_libs(Context &); +template void create_synthetic_sections(Context &); +template void resolve_symbols(Context &); +template void kill_eh_frame_sections(Context &); +template void resolve_section_pieces(Context &); +template void convert_common_symbols(Context &); +template void compute_merged_section_sizes(Context &); +template void create_output_sections(Context &); +template void add_synthetic_symbols(Context &); +template void check_cet_errors(Context &); +template void print_dependencies(Context &); +template void write_repro_file(Context &); +template void check_duplicate_symbols(Context &); +template void check_symbol_types(Context &); +template void sort_init_fini(Context &); +template void sort_ctor_dtor(Context &); +template void shuffle_sections(Context &); +template void compute_section_sizes(Context &); +template void sort_output_sections(Context &); +template void claim_unresolved_symbols(Context &); +template void scan_relocations(Context &); +template void report_undef_errors(Context &); +template void create_reloc_sections(Context &); +template void copy_chunks(Context &); +template void construct_relr(Context &); +template void create_output_symtab(Context &); +template void apply_version_script(Context &); +template void parse_symbol_version(Context &); +template void compute_import_export(Context &); +template void mark_addrsig(Context &); +template void clear_padding(Context &); +template void compute_section_headers(Context &); +template i64 set_osec_offsets(Context &); +template void fix_synthetic_symbols(Context &); +template i64 compress_debug_sections(Context &); +template void write_dependency_file(Context &); +template void show_stats(Context &); + +} // namespace mold::elf diff --git a/third_party/mold/elf/relocatable.cc b/third_party/mold/elf/relocatable.cc new file mode 100644 index 00000000000..5b487fbc414 --- /dev/null +++ b/third_party/mold/elf/relocatable.cc @@ -0,0 +1,198 @@ +// clang-format off +// This file implements -r or --relocatable. That option forces the linker +// to combine input object files into another single large object file. +// Since the behavior of the linker when the option is given is quite +// different from that of the normal execution mode, we separate code for +// the feature into this separate file. +// +// The --relocatable option isn't used very often. After all, if you want +// to combine object files into a single file, you could use `ar`. +// However, some programs use it in a creative manner which is hard to be +// substituted with static archives, so we need to support this option in +// the same way as GNU ld does. A notable example is GHC (Glasgow Haskell +// Compiler). GHC has its own dynamic linker which can load a .o file (as +// opposed to a .so) into memory. GHC's module is not a shared object file +// but a combined object file. +// +// There are many different ways to combine object files into a single file. +// The simplest approach would be to just copy all sections from input files +// to an output file as-is with a few exceptions for singleton sections such +// as the symbol table or the string table. That works, but that's not +// compatible with GNU ld. +// +// To be compatible with GNU ld, we need to do the followings: +// +// - Regular sections containing opaque data (e.g. ".text" or ".data") +// are just copied as-is. Two sections with the same name are merged. +// +// - .symtab, .strtab and .shstrtab are merged. +// +// - COMDAT groups are uniquified. +// +// - Relocations are copied, but we need to fix symbol indices. + +#include "third_party/mold/elf/mold.h" + +// MISSING #include +// MISSING #include + +namespace mold::elf { + +// Create linker-synthesized sections +template +static void r_create_synthetic_sections(Context &ctx) { + auto push = [&](T *x) { + ctx.chunks.push_back(x); + ctx.chunk_pool.emplace_back(x); + return x; + }; + + ctx.ehdr = push(new OutputEhdr(0)); + ctx.shdr = push(new OutputShdr); + ctx.eh_frame = push(new EhFrameSection); + ctx.eh_frame_reloc = push(new EhFrameRelocSection); + ctx.strtab = push(new StrtabSection); + ctx.symtab = push(new SymtabSection); + ctx.shstrtab = push(new ShstrtabSection); + ctx.note_property = push(new NotePropertySection); +} + +// Create SHT_GROUP (i.e. comdat group) sections. We uniquify comdat +// sections by signature. We want to propagate input comdat groups as +// output comdat groups if they are still alive after uniquification. +template +static void create_comdat_group_sections(Context &ctx) { + Timer t(ctx, "create_comdat_group_sections"); + + std::vector *>> vec{ctx.objs.size()}; + + tbb::parallel_for((i64)0, (i64)ctx.objs.size(), [&](i64 i) { + ObjectFile &file = *ctx.objs[i]; + + for (ComdatGroupRef &ref : file.comdat_groups) { + if (ref.group->owner != file.priority) + continue; + + Symbol *sym = file.symbols[file.elf_sections[ref.sect_idx].sh_info]; + assert(sym); + + std::vector *> members; + for (u32 j : ref.members) { + const ElfShdr &shdr = file.elf_sections[j]; + if (shdr.sh_type == (E::is_rela ? SHT_RELA : SHT_REL)) { + InputSection &isec = *file.sections[shdr.sh_info]; + members.push_back(isec.output_section->reloc_sec.get()); + } else { + InputSection &isec = *file.sections[j]; + members.push_back(isec.output_section); + } + } + + vec[i].push_back(new ComdatGroupSection(*sym, std::move(members))); + } + }); + + for (std::vector *> &vec2 : vec) { + for (Chunk *chunk : vec2) { + ctx.chunks.push_back(chunk); + ctx.chunk_pool.emplace_back(chunk); + } + } +} + +// Unresolved undefined symbols in the -r mode are simply propagated to an +// output file as undefined symbols. This function guarantees that +// unresolved undefined symbols belongs to some input file. +template +static void r_claim_unresolved_symbols(Context &ctx) { + Timer t(ctx, "r_claim_unresolved_symbols"); + + tbb::parallel_for_each(ctx.objs, [&](ObjectFile *file) { + if (!file->is_alive) + return; + + for (i64 i = file->first_global; i < file->elf_syms.size(); i++) { + const ElfSym &esym = file->elf_syms[i]; + Symbol &sym = *file->symbols[i]; + if (!esym.is_undef()) + continue; + + std::scoped_lock lock(sym.mu); + + if (sym.file && + (!sym.esym().is_undef() || sym.file->priority <= file->priority)) + continue; + + sym.file = file; + sym.origin = 0; + sym.value = 0; + sym.sym_idx = i; + } + }); +} + +// Set output section in-file offsets. Output section memory addresses +// are left as zero. +template +static u64 r_set_osec_offsets(Context &ctx) { + u64 offset = 0; + for (Chunk *chunk : ctx.chunks) { + offset = align_to(offset, chunk->shdr.sh_addralign); + chunk->shdr.sh_offset = offset; + offset += chunk->shdr.sh_size; + } + return offset; +} + +template +void combine_objects(Context &ctx) { + compute_merged_section_sizes(ctx); + + create_output_sections(ctx); + + r_create_synthetic_sections(ctx); + + r_claim_unresolved_symbols(ctx); + + compute_section_sizes(ctx); + + sort_output_sections(ctx); + + create_output_symtab(ctx); + + ctx.eh_frame->construct(ctx); + + create_reloc_sections(ctx); + + create_comdat_group_sections(ctx); + + compute_section_headers(ctx); + + i64 filesize = r_set_osec_offsets(ctx); + ctx.output_file = + OutputFile>::open(ctx, ctx.arg.output, filesize, 0666); + ctx.buf = ctx.output_file->buf; + + copy_chunks(ctx); + clear_padding(ctx); + ctx.output_file->close(ctx); + ctx.checkpoint(); + + if (ctx.arg.print_map) + print_map(ctx); + + if (ctx.arg.stats) + show_stats(ctx); + + if (ctx.arg.perf) + print_timer_records(ctx.timer_records); + + if (ctx.arg.quick_exit) + _exit(0); +} + +using E = MOLD_TARGET; + +template void combine_objects(Context &); + +} // namespace mold::elf diff --git a/third_party/mold/elf/subprocess.cc b/third_party/mold/elf/subprocess.cc new file mode 100644 index 00000000000..3892bea5cde --- /dev/null +++ b/third_party/mold/elf/subprocess.cc @@ -0,0 +1,166 @@ +// clang-format off +#if !defined(_WIN32) && !defined(__APPLE__) + +#include "third_party/mold/elf/mold.h" +// MISSING #include "config.h" + +#include "third_party/libcxx/filesystem" +#include "libc/calls/calls.h" +#include "libc/calls/sigtimedwait.h" +#include "libc/calls/struct/sigaction.h" +#include "libc/calls/struct/siginfo.h" +#include "libc/sysv/consts/sa.h" +#include "libc/sysv/consts/sicode.h" +#include "libc/sysv/consts/ss.h" +#include "libc/calls/calls.h" +#include "libc/calls/struct/stat.h" +#include "libc/calls/struct/stat.macros.h" +#include "libc/calls/struct/timespec.h" +#include "libc/calls/weirdtypes.h" +#include "libc/sysv/consts/s.h" +#include "libc/sysv/consts/utime.h" +#include "libc/time/time.h" +#include "libc/calls/struct/itimerval.h" +#include "libc/calls/struct/timeval.h" +#include "libc/calls/weirdtypes.h" +#include "libc/sock/select.h" +#include "libc/sysv/consts/clock.h" +#include "libc/sysv/consts/itimer.h" +#include "libc/time/struct/timezone.h" +#include "libc/time/time.h" +#include "libc/calls/makedev.h" +#include "libc/calls/weirdtypes.h" +#include "libc/thread/thread.h" +#include "libc/calls/typedef/u.h" +#include "libc/calls/weirdtypes.h" +#include "libc/intrin/newbie.h" +#include "libc/sock/select.h" +#include "libc/sysv/consts/endian.h" +#include "libc/calls/calls.h" +#include "libc/calls/struct/siginfo.h" +#include "libc/calls/weirdtypes.h" +#include "libc/sysv/consts/w.h" +#include "libc/sysv/consts/waitid.h" +#include "libc/calls/calls.h" +#include "libc/calls/weirdtypes.h" +#include "libc/runtime/pathconf.h" +#include "libc/runtime/runtime.h" +#include "libc/runtime/sysconf.h" +#include "libc/sysv/consts/f.h" +#include "libc/sysv/consts/fileno.h" +#include "libc/sysv/consts/o.h" +#include "libc/sysv/consts/ok.h" +#include "libc/time/time.h" +#include "third_party/getopt/getopt.h" +#include "third_party/musl/crypt.h" +#include "third_party/musl/lockf.h" + +namespace mold::elf { + +#ifdef MOLD_X86_64 +// Exiting from a program with large memory usage is slow -- +// it may take a few hundred milliseconds. To hide the latency, +// we fork a child and let it do the actual linking work. +std::function fork_child() { + int pipefd[2]; + if (pipe(pipefd) == -1) { + perror("pipe"); + exit(1); + } + + pid_t pid = fork(); + if (pid == -1) { + perror("fork"); + exit(1); + } + + if (pid > 0) { + // Parent + close(pipefd[1]); + + char buf[1]; + if (read(pipefd[0], buf, 1) == 1) + _exit(0); + + int status; + waitpid(pid, &status, 0); + + if (WIFEXITED(status)) + _exit(WEXITSTATUS(status)); + if (WIFSIGNALED(status)) + raise(WTERMSIG(status)); + _exit(1); + } + + // Child + close(pipefd[0]); + + return [=] { + char buf[] = {1}; + [[maybe_unused]] int n = write(pipefd[1], buf, 1); + assert(n == 1); + }; +} +#endif + +template +static std::string find_dso(Context &ctx, std::filesystem::path self) { + // Look for mold-wrapper.so from the same directory as the executable is. + std::filesystem::path path = self.parent_path() / "mold-wrapper.so"; + std::error_code ec; + if (std::filesystem::is_regular_file(path, ec) && !ec) + return path; + + // If not found, search $(MOLD_LIBDIR)/mold, which is /usr/local/lib/mold + // by default. + path = MOLD_LIBDIR "/mold/mold-wrapper.so"; + if (std::filesystem::is_regular_file(path, ec) && !ec) + return path; + + // Look for ../lib/mold/mold-wrapper.so + path = self.parent_path() / "../lib/mold/mold-wrapper.so"; + if (std::filesystem::is_regular_file(path, ec) && !ec) + return path; + + Fatal(ctx) << "mold-wrapper.so is missing"; +} + +template +[[noreturn]] +void process_run_subcommand(Context &ctx, int argc, char **argv) { + assert(argv[1] == "-run"s || argv[1] == "--run"s); + + if (!argv[2]) + Fatal(ctx) << "-run: argument missing"; + + // Get the mold-wrapper.so path + std::string self = get_self_path(); + std::string dso_path = find_dso(ctx, self); + + // Set environment variables + putenv(strdup(("LD_PRELOAD=" + dso_path).c_str())); + putenv(strdup(("MOLD_PATH=" + self).c_str())); + + // If ld, ld.lld or ld.gold is specified, run mold itself + if (std::string cmd = filepath(argv[2]).filename(); + cmd == "ld" || cmd == "ld.lld" || cmd == "ld.gold") { + std::vector args; + args.push_back(argv[0]); + args.insert(args.end(), argv + 3, argv + argc); + args.push_back(nullptr); + execv(self.c_str(), args.data()); + Fatal(ctx) << "mold -run failed: " << self << ": " << errno_string(); + } + + // Execute a given command + execvp(argv[2], argv + 2); + Fatal(ctx) << "mold -run failed: " << argv[2] << ": " << errno_string(); +} + +using E = MOLD_TARGET; + +template void process_run_subcommand(Context &, int, char **); + +} // namespace mold::elf + +#endif diff --git a/third_party/mold/elf/thunks.cc b/third_party/mold/elf/thunks.cc new file mode 100644 index 00000000000..e8d0ebf644b --- /dev/null +++ b/third_party/mold/elf/thunks.cc @@ -0,0 +1,318 @@ +// clang-format off +// RISC instructions are usually up to 4 bytes long, so the immediates of +// their branch instructions are naturally smaller than 32 bits. This is +// contrary to x86-64 on which branch instructions take 4 bytes immediates +// and can jump to anywhere within PC ± 2 GiB. +// +// In fact, ARM32's branch instructions can jump only within ±16 MiB and +// ARM64's ±128 MiB, for example. If a branch target is further than that, +// we need to let it branch to a linker-synthesized code sequence that +// construct a full 32 bit address in a register and jump there. That +// linker-synthesized code is called "thunk". +// +// The function in this file creates thunks. +// +// Note that although thunks play an important role in an executable, they +// don't take up too much space in it. For example, among the clang-16's +// text segment whose size is ~300 MiB on ARM64, thunks in total occupy +// only ~30 KiB or 0.01%. Of course the number depends on an ISA; we would +// need more thunks on ARM32 whose branch range is shorter than ARM64. +// That said, the total size of thunks still isn't that much. Therefore, +// we don't need to try too hard to reduce thunk size to the absolute +// minimum. + +#if MOLD_ARM32 || MOLD_ARM64 || MOLD_PPC32 || MOLD_PPC64V1 || MOLD_PPC64V2 + +#include "third_party/mold/elf/mold.h" + +// MISSING #include +// MISSING #include + +namespace mold::elf { + +// Returns a branch reach in bytes for a given target. +template +static consteval i64 max_distance() { + // ARM64's branch has 26 bits immediate. The immediate is padded with + // implicit two-bit zeros because all instructions are 4 bytes aligned + // and therefore the least two bits are always zero. So the branch + // operand is effectively 28 bits long. That means the branch range is + // [-2^27, 2^27) or PC ± 128 MiB. + if (is_arm64) + return 1 << 27; + + // ARM32's Thumb branch has 24 bits immediate, and the instructions are + // aligned to 2, so it's effectively 25 bits. It's [-2^24, 2^24) or PC ± + // 16 MiB. + // + // ARM32's non-Thumb branches have twice longer range than its Thumb + // counterparts, but we conservatively use the Thumb's limitation. + if (is_arm32) + return 1 << 24; + + // PPC's branch has 24 bits immediate, and the instructions are aligned + // to 4, therefore the reach is [-2^25, 2^25) or PC ± 32 MiB. + assert(is_ppc); + return 1 << 25; +} + +// We create thunks for each 12.8/1.6/3.2 MiB code block for +// ARM64/ARM32/PPC, respectively. +template +static constexpr i64 batch_size = max_distance() / 10; + +// We assume that a single thunk group is smaller than 100 KiB. +static constexpr i64 max_thunk_size = 102400; + +// Returns true if a given relocation is of type used for function calls. +template +static bool needs_thunk_rel(const ElfRel &r) { + u32 ty = r.r_type; + + if constexpr (is_arm64) { + return ty == R_AARCH64_JUMP26 || ty == R_AARCH64_CALL26; + } else if constexpr (is_arm32) { + return ty == R_ARM_JUMP24 || ty == R_ARM_THM_JUMP24 || + ty == R_ARM_CALL || ty == R_ARM_THM_CALL || + ty == R_ARM_PLT32; + } else if constexpr (is_ppc32) { + return ty == R_PPC_REL24 || ty == R_PPC_PLTREL24 || ty == R_PPC_LOCAL24PC; + } else { + static_assert(is_ppc64); + return ty == R_PPC64_REL24 || ty == R_PPC64_REL24_NOTOC; + } +} + +template +static bool is_reachable(Context &ctx, InputSection &isec, + Symbol &sym, const ElfRel &rel) { + // We create thunks with a pessimistic assumption that all + // out-of-section relocations would be out-of-range. + InputSection *isec2 = sym.get_input_section(); + if (!isec2 || isec.output_section != isec2->output_section) + return false; + + // Even if the target is the same section, we branch to its PLT + // if it has one. So a symbol with a PLT is also considered an + // out-of-section reference. + if (sym.has_plt(ctx)) + return false; + + // If the target section is in the same output section but + // hasn't got any address yet, that's unreacahble. + if (isec2->offset == -1) + return false; + + // Thumb and ARM B instructions cannot be converted to BX, so we + // always have to make them jump to a thunk to switch processor mode + // even if their destinations are within their ranges. + if constexpr (is_arm32) { + bool is_thumb = sym.get_addr(ctx) & 1; + if ((rel.r_type == R_ARM_THM_JUMP24 && !is_thumb) || + (rel.r_type == R_ARM_JUMP24 && is_thumb) || + (rel.r_type == R_ARM_PLT32 && is_thumb)) + return false; + } + + // PowerPC before Power9 lacks PC-relative load/store instructions. + // Functions compiled for Power9 or earlier assume that r2 points to + // GOT+0x8000, while those for Power10 uses r2 as a scratch register. + // We need to a thunk to recompute r2 for interworking. + if constexpr (is_ppc64v2) { + if (rel.r_type == R_PPC64_REL24 && !sym.esym().preserves_r2()) + return false; + if (rel.r_type == R_PPC64_REL24_NOTOC && sym.esym().uses_toc()) + return false; + } + + // Compute a distance between the relocated place and the symbol + // and check if they are within reach. + i64 S = sym.get_addr(ctx, NO_OPD); + i64 A = get_addend(isec, rel); + i64 P = isec.get_addr() + rel.r_offset; + i64 val = S + A - P; + return -max_distance() <= val && val < max_distance(); +} + +template +static void reset_thunk(RangeExtensionThunk &thunk) { + for (Symbol *sym : thunk.symbols) { + sym->extra.thunk_idx = -1; + sym->extra.thunk_sym_idx = -1; + sym->flags = 0; + } +} + +// Scan relocations to collect symbols that need thunks. +template +static void scan_rels(Context &ctx, InputSection &isec, + RangeExtensionThunk &thunk) { + std::span> rels = isec.get_rels(ctx); + std::vector &range_extn = isec.extra.range_extn; + range_extn.resize(rels.size()); + + for (i64 i = 0; i < rels.size(); i++) { + const ElfRel &rel = rels[i]; + if (!needs_thunk_rel(rel)) + continue; + + // Skip if the symbol is undefined. apply_reloc() will report an error. + Symbol &sym = *isec.file.symbols[rel.r_sym]; + if (!sym.file) + continue; + + // Skip if the destination is within reach. + if (is_reachable(ctx, isec, sym, rel)) + continue; + + // This relocation needs a thunk. If the symbol is already in a + // previous thunk, reuse it. + if (sym.extra.thunk_idx != -1) { + range_extn[i].thunk_idx = sym.extra.thunk_idx; + range_extn[i].sym_idx = sym.extra.thunk_sym_idx; + continue; + } + + // Otherwise, add the symbol to the current thunk if it's not + // added already. + range_extn[i].thunk_idx = thunk.thunk_idx; + range_extn[i].sym_idx = -1; + + if (sym.flags.exchange(-1) == 0) { + std::scoped_lock lock(thunk.mu); + thunk.symbols.push_back(&sym); + } + } +} + +template +void create_range_extension_thunks(Context &ctx, OutputSection &osec) { + std::span *> m = osec.members; + if (m.empty()) + return; + + m[0]->offset = 0; + + // Initialize input sections with a dummy offset so that we can + // distinguish sections that have got an address with the one who + // haven't. + tbb::parallel_for((i64)1, (i64)m.size(), [&](i64 i) { + m[i]->offset = -1; + }); + + // We create thunks from the beginning of the section to the end. + // We manage progress using four offsets which increase monotonically. + // The locations they point to are always A <= B <= C <= D. + // + // Input sections between B and C are in the current batch. + // + // A is the input section with the smallest address than can reach + // anywhere from the current batch. + // + // D is the input section with the largest address such that the thunk + // is reachable from the current batch if it's inserted right before D. + // + // ................................ ............ + // A B C D + // ^ We insert a thunk for the current batch just before D + // <---> The current batch, which is smaller than batch_size + // <--------> Smaller than max_distance + // <--------> Smaller than max_distance + // <-------------> Reachable from the current batch + i64 a = 0; + i64 b = 0; + i64 c = 0; + i64 d = 0; + i64 offset = 0; + i64 thunk_idx = 0; + + while (b < m.size()) { + // Move D foward as far as we can jump from B to anywhere in a thunk at D. + while (d < m.size() && + align_to(offset, 1 << m[d]->p2align) + m[d]->sh_size + max_thunk_size < + m[b]->offset + max_distance()) { + offset = align_to(offset, 1 << m[d]->p2align); + m[d]->offset = offset; + offset += m[d]->sh_size; + d++; + } + + // Move C forward so that C is apart from B by BATCH_SIZE. We want + // to make sure that there's at least one section between B and C + // to ensure progress. + c = b + 1; + while (c < m.size() && + m[c]->offset + m[c]->sh_size < m[b]->offset + batch_size) + c++; + + // Move A forward so that A is reachable from C. + i64 c_offset = (c == m.size()) ? offset : m[c]->offset; + while (a < m.size() && m[a]->offset + max_distance() < c_offset) + a++; + + // Erase references to out-of-range thunks. + while (thunk_idx < osec.thunks.size() && + osec.thunks[thunk_idx]->offset < m[a]->offset) + reset_thunk(*osec.thunks[thunk_idx++]); + + // Create a thunk for input sections between B and C and place it at D. + offset = align_to(offset, RangeExtensionThunk::alignment); + RangeExtensionThunk *thunk = + new RangeExtensionThunk(osec, osec.thunks.size(), offset); + osec.thunks.emplace_back(thunk); + + // Scan relocations between B and C to collect symbols that need thunks. + tbb::parallel_for_each(m.begin() + b, m.begin() + c, + [&](InputSection *isec) { + scan_rels(ctx, *isec, *thunk); + }); + + // Now that we know the number of symbols in the thunk, we can compute + // its size. + assert(thunk->size() < max_thunk_size); + offset += thunk->size(); + + // Sort symbols added to the thunk to make the output deterministic. + sort(thunk->symbols, [](Symbol *a, Symbol *b) { + return std::tuple{a->file->priority, a->sym_idx} < + std::tuple{b->file->priority, b->sym_idx}; + }); + + // Assign offsets within the thunk to the symbols. + for (i64 i = 0; i < thunk->symbols.size(); i++) { + Symbol &sym = *thunk->symbols[i]; + sym.extra.thunk_idx = thunk->thunk_idx; + sym.extra.thunk_sym_idx = i; + } + + // Scan relocations again to fix symbol offsets in the last thunk. + tbb::parallel_for_each(m.begin() + b, m.begin() + c, + [&](InputSection *isec) { + std::span *> syms = isec->file.symbols; + std::span> rels = isec->get_rels(ctx); + std::span range_extn = isec->extra.range_extn; + + for (i64 i = 0; i < rels.size(); i++) + if (range_extn[i].thunk_idx == thunk->thunk_idx) + range_extn[i].sym_idx = syms[rels[i].r_sym]->extra.thunk_sym_idx; + }); + + // Move B forward to point to the begining of the next batch. + b = c; + } + + while (thunk_idx < osec.thunks.size()) + reset_thunk(*osec.thunks[thunk_idx++]); + + osec.shdr.sh_size = offset; +} + +using E = MOLD_TARGET; + +static_assert(max_thunk_size / E::thunk_size < INT16_MAX); + +template void create_range_extension_thunks(Context &, OutputSection &); + +} // namespace mold::elf + +#endif diff --git a/third_party/mold/elf/tls.cc b/third_party/mold/elf/tls.cc new file mode 100644 index 00000000000..133749caf66 --- /dev/null +++ b/third_party/mold/elf/tls.cc @@ -0,0 +1,215 @@ +// clang-format off +// This file contains helper functions for thread-local storage (TLS). +// TLS is probably the most obscure feature the linker has to support, +// so I'll explain it in detail in this comment. +// +// TLS is a per-thread storage. Thread-local variables (TLVs) are in a TLS +// so that each thread has its own set of thread-local variables. Taking +// an address of a TLV returns a unique value for each thread. For example, +// `&foo` for the following code returns different pointer values for +// different threads. +// +// thread_local int foo; +// +// TLV is a relatively new feature. C for example didn't provide the +// official support for it through the keyword `thread_local` until C11. +// TLV needs a coordination between the compiler, the linker and the +// runtime to work correctly. +// +// An ELF exectuable or a shared library using TLV contains a "TLS template +// image" in the PT_TLS segment. For each newly created thread including the +// initial one, the runtime allocates a contiguous memory for an executable +// and its depending shared libraries and copies template images there. That +// per-thread memory is called the "TLS block". After allocating and +// initializing a TLS block, the runtime sets a register to refer to the TLS +// block, so that the thread-local variables are accessible relative to the +// register. +// +// The register referring to the per-thread storage is called the Thread +// Pointer (TP). TP is part of the thread's context. When the kernel +// scheduler switches threads, TP is saved and restored automatically just +// like other registers are. +// +// The TLS template image is read-only. It contains TLVs' initial values +// for new threads, and no one writes to it at runtime. +// +// Now, let's think about how to access a TLV. We need to know the TLV's +// address to access it which can be done in several different ways as +// follows: +// +// 1. If we are creating an executable, we know the exact size of the TLS +// template image we are creating, and we know where the TP will be +// set to after the template is copied to the TLS block. Therefore, +// the TP-relative address of a TLV in the main executable is known at +// link-time. That means, computing a TLV's address can be as easy as +// `add %dst, %tp, `. +// +// 2. If we are creating a shared library, we don't exactly know where +// its TLS template image will be copied to in terms of the +// TP-relative address, because we don't know how large is the main +// executable's and other libraries' TLS template images are. Only the +// runtime knows the exact TP-relative address. +// +// We can solve the problem with an indirection. Specifically, for +// each TLV whose TP-relative address is only known at process startup +// time, we create a GOT entry to store its TP-relative address. We +// also emit a dynamic relocation to let the runtime to fill the GOT +// entry with a TP-relative address. +// +// Computing a TLV address in this scheme needs at least two machine +// instructions in most ISAs; first instruction loads a value from a +// GOT entry, and the second one adds the loaded value to TP. +// +// 3. Now, think about libraries that you dynamically load with dlopen. +// The TLS block for such library has to be allocated separately from +// the initial TLS block, so we now have two or more discontiguous +// TLS blocks. There's no easy formula to compute an address of a TLV +// in a separate TLS block. +// +// The address of a TLV in a separate TLS block can be obtained by +// calling a libc-provided function, __tls_get_addr(). The function +// takes two arguments; a module ID to identify the ELF file and the +// TLV's offset within the ELF file's TLS template image. Accessing a +// TLV is sometimes compiled to a function call! The module ID and the +// offset are usually stored to GOT as two consecutive words. +// +// The last access method is the most generic, so the compiler emits such +// code by default. But that's the most expensive one, so the linker +// rewrites instructions if possible so that 3) is relaxed to 2) or even +// to 1). +// +// 1) is called the Local Exec access model. 2) is Initial Exec, and 3) is +// General Dynamic. +// +// There's another little trick that the compiler can use if it knows two +// TLVs are in the same ELF file (usually in the same file as the code is). +// In this case, we can call __tls_get_addr() only once with a module ID and +// the offset 0 to obtain the base address of the ELF file's TLS block. The +// base address obtained this way is sometimes called Dynamic Thread Pointer +// or DTP. We can then compute TLVs' addresses by adding their DTP-relative +// addresses to DTP. This access model is called the Local Dynamic. +// +// +// === TLS Descriptor access model === +// +// As described above, there are arguably too many different TLS access +// models from the most generic one you can use in any ELF file to the most +// efficient one you can use only when building a main executable. Compiling +// source code with an appropriate TLS access model is bothersome. To solve +// the problem, a new TLS access model was proposed. That is called the TLS +// Descriptor (TLSDESC) model. +// +// For a TLV compiled with TLSDESC, we allocate two consecutive GOT slots +// and create a TLSDESC dynamic relocation for them. The dynamic linker +// sets a function pointer to the first GOT slot and its argument to the +// second slot. +// +// To access the TLV, we call the function pointer with the argument we +// read from the second GOT slot. The function returns the TLV's +// TP-relative address. +// +// The runtime chooses the best access method depending on the situation +// and sets a pointer to the most efficient code to the first GOT slot. +// For example, if a TLV's TP-relative address is known at process startup +// time, the runtime sets that address to the second GOT slot and set a +// function that just returns its argument to the first GOT slot. +// +// With TLSDECS, the compiler can always emit the same code for TLVs +// without sacrificing runtime performance. +// +// TLSDESC is better than the traditional, non-TLSDESC TLS access models. +// It's the default on ARM64, but on other targets, TLSDESC is +// unfortunately either optional or even not supported at all. So we still +// need to support both the traditional TLS models and the TLSDESC model. + +#include "third_party/mold/elf/mold.h" + +namespace mold::elf { + +template +static ElfPhdr *get_tls_segment(Context &ctx) { + if (ctx.phdr) + for (ElfPhdr &phdr : ctx.phdr->phdrs) + if (phdr.p_type == PT_TLS) + return &phdr; + return nullptr; +} + +template +u64 get_tls_begin(Context &ctx) { + if (ElfPhdr *phdr = get_tls_segment(ctx)) + return phdr->p_vaddr; + return 0; +} + +// Returns the TP address which can be used for efficient TLV accesses in +// the main executable. TP at runtime refers to a per-process TLS block +// whose address is not known at link-time. So the address returned from +// this function is the TP if the TLS template image were a TLS block. +template +u64 get_tp_addr(Context &ctx) { + ElfPhdr *phdr = get_tls_segment(ctx); + if (!phdr) + return 0; + + // On x86, SPARC and s390x, TP (%gs on i386, %fs on x86-64, %g7 on SPARC + // and %a0/%a1 on s390x) refers to past the end of the TLS block for + // historical reasons. TLVs are accessed with negative offsets from TP. + if constexpr (is_x86 || is_sparc || is_s390x) + return align_to(phdr->p_vaddr + phdr->p_memsz, phdr->p_align); + + // On ARM, SH4 and Alpha, the runtime appends two words at the beginning + // of TLV template image when copying TLVs to the TLS block, so we need + // to offset it. + if constexpr (is_arm || is_sh4 || is_alpha) + return align_down(phdr->p_vaddr - sizeof(Word) * 2, phdr->p_align); + + // On PPC and m68k, TP is 0x7000 (28 KiB) past the beginning of the TLV + // block to maximize the addressable range for load/store instructions + // with 16-bits signed immediates. It's not exactly 0x8000 (32 KiB) off + // because there's a small implementation-defined piece of data before + // the TLV block, and the runtime wants to access them efficiently too. + if constexpr (is_ppc || is_m68k) + return phdr->p_vaddr + 0x7000; + + // RISC-V just uses the beginning of the main executable's TLV block as + // TP. RISC-V load/store instructions usually take 12-bits signed + // immediates, so the beginning of TLV ± 2 KiB is accessible with a + // single load/store instruction. + assert(is_riscv); + return phdr->p_vaddr; +} + +// Returns the address __tls_get_addr() would return if it's called +// with offset 0. +template +u64 get_dtp_addr(Context &ctx) { + ElfPhdr *phdr = get_tls_segment(ctx); + if (!phdr) + return 0; + + // On PPC64 and m68k, R_DTPOFF is resolved to the address 0x8000 (32 + // KiB) past the start of the TLS block. The bias maximizes the + // accessible range for load/store instructions with 16-bits signed + // immediates. That is, if the offset were right at the beginning of + // the start of the TLS block, the half of addressible space (negative + // immediates) would have been wasted. + if constexpr (is_ppc || is_m68k) + return phdr->p_vaddr + 0x8000; + + // On RISC-V, the bias is 0x800 as the load/store instructions in the + // ISA usually have a 12-bit immediate. + if constexpr (is_riscv) + return phdr->p_vaddr + 0x800; + + // On other targets, DTP simply refers to the beginning of the TLS block. + return phdr->p_vaddr; +} + +using E = MOLD_TARGET; + +template u64 get_tls_begin(Context &); +template u64 get_tp_addr(Context &); +template u64 get_dtp_addr(Context &); + +} // namespace mold::elf diff --git a/third_party/mold/fake_tbb.h b/third_party/mold/fake_tbb.h new file mode 100644 index 00000000000..63bfc2b2b6c --- /dev/null +++ b/third_party/mold/fake_tbb.h @@ -0,0 +1,15 @@ +#ifndef __TBB_FAKE_H +#define __TBB_FAKE_H + +namespace tbb { + + template + void parallel_for_each(InputIterator first, InputIterator last, const Function& f) { + } + + template + void parallel_for_each(Range& rng, const Body& body) { + } + +} +#endif diff --git a/third_party/mold/filepath.cc b/third_party/mold/filepath.cc new file mode 100644 index 00000000000..96782d3db26 --- /dev/null +++ b/third_party/mold/filepath.cc @@ -0,0 +1,37 @@ +// clang-format off +#include "third_party/mold/common.h" + +#include "third_party/libcxx/filesystem" +#include "libc/calls/calls.h" +#include "libc/calls/struct/stat.h" +#include "libc/calls/struct/stat.macros.h" +#include "libc/calls/struct/timespec.h" +#include "libc/calls/weirdtypes.h" +#include "libc/sysv/consts/s.h" +#include "libc/sysv/consts/utime.h" +#include "libc/time/time.h" + +namespace mold { + +std::string get_realpath(std::string_view path) { + std::error_code ec; + std::filesystem::path link = std::filesystem::read_symlink(path, ec); + if (ec) + return std::string(path); + return (filepath(path) / ".." / link).lexically_normal().string(); +} + +// Removes redundant '/..' or '/.' from a given path. +// The transformation is done purely by lexical processing. +// This function does not access file system. +std::string path_clean(std::string_view path) { + return filepath(path).lexically_normal().string(); +} + +std::filesystem::path to_abs_path(std::filesystem::path path) { + if (path.is_absolute()) + return path.lexically_normal(); + return (std::filesystem::current_path() / path).lexically_normal(); +} + +} // namespace mold diff --git a/third_party/mold/filetype.h b/third_party/mold/filetype.h new file mode 100644 index 00000000000..41cfa59cbfe --- /dev/null +++ b/third_party/mold/filetype.h @@ -0,0 +1,194 @@ +// clang-format off +#pragma once + +#include "third_party/mold/common.h" +// MISSING #include "../elf/elf.h" + +namespace mold { + +enum class FileType { + UNKNOWN, + EMPTY, + ELF_OBJ, + ELF_DSO, + MACH_OBJ, + MACH_EXE, + MACH_DYLIB, + MACH_BUNDLE, + MACH_UNIVERSAL, + AR, + THIN_AR, + TAPI, + TEXT, + GCC_LTO_OBJ, + LLVM_BITCODE, +}; + +template +bool is_text_file(MappedFile *mf) { + u8 *data = mf->data; + return mf->size >= 4 && isprint(data[0]) && isprint(data[1]) && + isprint(data[2]) && isprint(data[3]); +} + +template +inline bool is_gcc_lto_obj(Context &ctx, MappedFile *mf) { + using namespace mold::elf; + + const char *data = mf->get_contents().data(); + ElfEhdr &ehdr = *(ElfEhdr *)data; + ElfShdr *sh_begin = (ElfShdr *)(data + ehdr.e_shoff); + std::span> shdrs{(ElfShdr *)(data + ehdr.e_shoff), ehdr.e_shnum}; + + // e_shstrndx is a 16-bit field. If .shstrtab's section index is + // too large, the actual number is stored to sh_link field. + i64 shstrtab_idx = (ehdr.e_shstrndx == SHN_XINDEX) + ? sh_begin->sh_link : ehdr.e_shstrndx; + + for (ElfShdr &sec : shdrs) { + // GCC FAT LTO objects contain both regular ELF sections and GCC- + // specific LTO sections, so that they can be linked as LTO objects if + // the LTO linker plugin is available and falls back as regular + // objects otherwise. GCC FAT LTO object can be identified by the + // presence of `.gcc.lto_.symtab` section. + if (!ctx.arg.plugin.empty()) { + std::string_view name = data + shdrs[shstrtab_idx].sh_offset + sec.sh_name; + if (name.starts_with(".gnu.lto_.symtab.")) + return true; + } + + if (sec.sh_type != SHT_SYMTAB) + continue; + + // GCC non-FAT LTO object contains only sections symbols followed by + // a common symbol whose name is `__gnu_lto_slim` (or `__gnu_lto_v1` + // for older GCC releases). + std::span> elf_syms{(ElfSym *)(data + sec.sh_offset), + (size_t)sec.sh_size / sizeof(ElfSym)}; + + auto skip = [](u8 type) { + return type == STT_NOTYPE || type == STT_FILE || type == STT_SECTION; + }; + + i64 i = 1; + while (i < elf_syms.size() && skip(elf_syms[i].st_type)) + i++; + + if (i < elf_syms.size() && elf_syms[i].st_shndx == SHN_COMMON) { + std::string_view name = + data + shdrs[sec.sh_link].sh_offset + elf_syms[i].st_name; + if (name.starts_with("__gnu_lto_")) + return true; + } + break; + } + + return false; +} + +template +FileType get_file_type(Context &ctx, MappedFile *mf) { + using namespace elf; + + std::string_view data = mf->get_contents(); + + if (data.empty()) + return FileType::EMPTY; + + if (data.starts_with("\177ELF")) { + u8 byte_order = ((ElfEhdr *)data.data())->e_ident[EI_DATA]; + + if (byte_order == ELFDATA2LSB) { + auto &ehdr = *(ElfEhdr *)data.data(); + + if (ehdr.e_type == ET_REL) { + if (ehdr.e_ident[EI_CLASS] == ELFCLASS32) { + if (is_gcc_lto_obj(ctx, mf)) + return FileType::GCC_LTO_OBJ; + } else { + if (is_gcc_lto_obj(ctx, mf)) + return FileType::GCC_LTO_OBJ; + } + return FileType::ELF_OBJ; + } + + if (ehdr.e_type == ET_DYN) + return FileType::ELF_DSO; + } else { + auto &ehdr = *(ElfEhdr *)data.data(); + + if (ehdr.e_type == ET_REL) { + if (ehdr.e_ident[EI_CLASS] == ELFCLASS32) { + if (is_gcc_lto_obj(ctx, mf)) + return FileType::GCC_LTO_OBJ; + } else { + if (is_gcc_lto_obj(ctx, mf)) + return FileType::GCC_LTO_OBJ; + } + return FileType::ELF_OBJ; + } + + if (ehdr.e_type == ET_DYN) + return FileType::ELF_DSO; + } + return FileType::UNKNOWN; + } + + if (data.starts_with("\xcf\xfa\xed\xfe")) { + switch (*(ul32 *)(data.data() + 12)) { + case 1: // MH_OBJECT + return FileType::MACH_OBJ; + case 2: // MH_EXECUTE + return FileType::MACH_EXE; + case 6: // MH_DYLIB + return FileType::MACH_DYLIB; + case 8: // MH_BUNDLE + return FileType::MACH_BUNDLE; + } + return FileType::UNKNOWN; + } + + if (data.starts_with("!\n")) + return FileType::AR; + if (data.starts_with("!\n")) + return FileType::THIN_AR; + if (data.starts_with("--- !tapi-tbd")) + return FileType::TAPI; + if (data.starts_with("\xca\xfe\xba\xbe")) + return FileType::MACH_UNIVERSAL; + if (is_text_file(mf)) + return FileType::TEXT; + if (data.starts_with("\xde\xc0\x17\x0b")) + return FileType::LLVM_BITCODE; + if (data.starts_with("BC\xc0\xde")) + return FileType::LLVM_BITCODE; + return FileType::UNKNOWN; +} + +inline std::string filetype_to_string(FileType type) { + switch (type) { + case FileType::UNKNOWN: return "UNKNOWN"; + case FileType::EMPTY: return "EMPTY"; + case FileType::ELF_OBJ: return "ELF_OBJ"; + case FileType::ELF_DSO: return "ELF_DSO"; + case FileType::MACH_EXE: return "MACH_EXE"; + case FileType::MACH_OBJ: return "MACH_OBJ"; + case FileType::MACH_DYLIB: return "MACH_DYLIB"; + case FileType::MACH_BUNDLE: return "MACH_BUNDLE"; + case FileType::MACH_UNIVERSAL: return "MACH_UNIVERSAL"; + case FileType::AR: return "AR"; + case FileType::THIN_AR: return "THIN_AR"; + case FileType::TAPI: return "TAPI"; + case FileType::TEXT: return "TEXT"; + case FileType::GCC_LTO_OBJ: return "GCC_LTO_OBJ"; + case FileType::LLVM_BITCODE: return "LLVM_BITCODE"; + } + return "UNKNOWN"; +} + +inline std::ostream &operator<<(std::ostream &out, FileType type) { + out << filetype_to_string(type); + return out; +} + +} // namespace mold diff --git a/third_party/mold/glob.cc b/third_party/mold/glob.cc new file mode 100644 index 00000000000..55037654b63 --- /dev/null +++ b/third_party/mold/glob.cc @@ -0,0 +1,150 @@ +// clang-format off +#include "third_party/mold/common.h" + +#include "third_party/libcxx/cstring" + +namespace mold { + +std::optional Glob::compile(std::string_view pat) { + std::vector vec; + + while (!pat.empty()) { + u8 c = pat[0]; + pat = pat.substr(1); + + switch (c) { + case '[': { + // Here are a few bracket pattern examples: + // + // [abc]: a, b or c + // [$\]!]: $, ] or ! + // [a-czg-i]: a, b, c, z, g, h, or i + // [^a-z]: Any character except lowercase letters + vec.push_back({BRACKET}); + std::bitset<256> &bitset = vec.back().bitset; + + bool negate = false; + if (!pat.empty() && pat[0] == '^') { + negate = true; + pat = pat.substr(1); + } + + bool closed = false; + + while (!pat.empty()) { + if (pat[0] == ']') { + pat = pat.substr(1); + closed = true; + break; + } + + if (pat[0] == '\\') { + pat = pat.substr(1); + if (pat.empty()) + return {}; + } + + if (pat.size() >= 3 && pat[1] == '-') { + u8 start = pat[0]; + u8 end = pat[2]; + pat = pat.substr(3); + + if (end == '\\') { + if (pat.empty()) + return {}; + end = pat[0]; + pat = pat.substr(1); + } + + if (end < start) + return {}; + + for (i64 i = start; i <= end; i++) + bitset[i] = true; + } else { + bitset[(u8)pat[0]] = true; + pat = pat.substr(1); + } + } + + if (!closed) + return {}; + + if (negate) + bitset.flip(); + break; + } + case '?': + vec.push_back({QUESTION}); + break; + case '*': + vec.push_back({STAR}); + break; + default: + if (vec.empty() || vec.back().kind != STRING) + vec.push_back({STRING}); + vec.back().str += c; + break; + } + } + + return {Glob{std::move(vec)}}; +} + +bool Glob::match(std::string_view str) { + return do_match(str, elements); +} + +bool Glob::do_match(std::string_view str, std::span elements) { + while (!elements.empty()) { + Element &e = elements[0]; + elements = elements.subspan(1); + + switch (e.kind) { + case STRING: + if (str.empty() || !str.starts_with(e.str)) + return false; + str = str.substr(e.str.size()); + break; + case STAR: + if (elements.empty()) + return true; + + // Patterns like "*foo*bar*" should be much more common than more + // complex ones like "*foo*[abc]*" or "*foo**?bar*", so we optimize + // the former case here. + if (elements[0].kind == STRING) { + for (;;) { + size_t pos = str.find(elements[0].str); + if (pos == str.npos) + break; + if (do_match(str.substr(pos + elements[0].str.size()), + elements.subspan(1))) + return true; + str = str.substr(pos + 1); + } + return false; + } + + // Other cases are handled here. + for (i64 j = 0; j < str.size(); j++) + if (do_match(str.substr(j), elements)) + return true; + return false; + case QUESTION: + if (str.empty()) + return false; + str = str.substr(1); + break; + case BRACKET: + if (str.empty() || !e.bitset[str[0]]) + return false; + str = str.substr(1); + break; + } + } + + return str.empty(); +} + +} // namespace mold diff --git a/third_party/mold/hyperloglog.cc b/third_party/mold/hyperloglog.cc new file mode 100644 index 00000000000..7c10616a4e4 --- /dev/null +++ b/third_party/mold/hyperloglog.cc @@ -0,0 +1,21 @@ +// clang-format off +// This file implements HyperLogLog algorithm, which estimates +// the number of unique items in a given multiset. +// +// For more info, read +// https://engineering.fb.com/2018/12/13/data-infrastructure/hyperloglog + +#include "third_party/mold/common.h" + +#include "third_party/libcxx/cmath" + +namespace mold { + +i64 HyperLogLog::get_cardinality() const { + double z = 0; + for (i64 val : buckets) + z += pow(2, -val); + return ALPHA * NBUCKETS * NBUCKETS / z; +} + +} // namespace mold diff --git a/third_party/mold/integers.h b/third_party/mold/integers.h new file mode 100644 index 00000000000..089be2b223a --- /dev/null +++ b/third_party/mold/integers.h @@ -0,0 +1,222 @@ +// clang-format off +// This file defines integral types for file input/output. We need to use +// these types instead of the plain integers (such as uint32_t or int32_t) +// when reading from/writing to an mmap'ed file area for the following +// reasons: +// +// 1. mold is always a cross linker and should not depend on what host it +// is running on. Users should be able to run mold on a big-endian +// SPARC machine to create a little-endian RV64 binary, for example. +// +// 2. Even though data members in all ELF data strucutres are naturally +// aligned, they are not guaranteed to be aligned on memory. Because +// archive file (.a file) aligns each member only to a 2 byte boundary, +// anything larger than 2 bytes may be unaligned in an mmap'ed memory. +// Unaligned access is an undefined behavior in C/C++, so we shouldn't +// cast an arbitrary pointer to a uint32_t, for example, to read a +// 32-bits value. +// +// The data types defined in this file don't depend on host byte order and +// don't do unaligned access. + +#pragma once + +#include "third_party/libcxx/bit" +#include "third_party/libcxx/cstdint" +#include "third_party/libcxx/cstring" + +#if !defined(__LITTLE_ENDIAN__) && !defined(__BIG_ENDIAN__) +# if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ +# define __LITTLE_ENDIAN__ 1 +# elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ +# define __BIG_ENDIAN__ 1 +# else +# error "unknown host byte order" +# endif +#endif + +namespace mold { + +typedef uint8_t u8; +typedef uint16_t u16; +typedef uint32_t u32; +typedef uint64_t u64; + +typedef int8_t i8; +typedef int16_t i16; +typedef int32_t i32; +typedef int64_t i64; + +template +static inline T bswap(T val) { + switch (sizeof(T)) { + case 2: return __builtin_bswap16(val); + case 4: return __builtin_bswap32(val); + case 8: return __builtin_bswap64(val); + default: __builtin_unreachable(); + } +} + +template +class LittleEndian { +public: + LittleEndian() = default; + LittleEndian(T x) { *this = x; } + + operator T() const { + if constexpr (sizeof(T) == SIZE) { + T x; + memcpy(&x, val, sizeof(T)); + if constexpr (std::endian::native == std::endian::big) + x = bswap(x); + return x; + } else { + static_assert(SIZE == 3); + return (val[2] << 16) | (val[1] << 8) | val[0]; + } + } + + LittleEndian &operator=(T x) { + if constexpr (sizeof(T) == SIZE) { + if constexpr (std::endian::native == std::endian::big) + x = bswap(x); + memcpy(val, &x, sizeof(T)); + } else { + static_assert(SIZE == 3); + val[2] = x >> 16; + val[1] = x >> 8; + val[0] = x; + } + return *this; + } + + LittleEndian &operator++() { + return *this = *this + 1; + } + + LittleEndian operator++(int) { + T ret = *this; + *this = *this + 1; + return ret; + } + + LittleEndian &operator--() { + return *this = *this - 1; + } + + LittleEndian operator--(int) { + T ret = *this; + *this = *this - 1; + return ret; + } + + LittleEndian &operator+=(T x) { + return *this = *this + x; + } + + LittleEndian &operator-=(T x) { + return *this = *this - x; + } + + LittleEndian &operator&=(T x) { + return *this = *this & x; + } + + LittleEndian &operator|=(T x) { + return *this = *this | x; + } + +private: + u8 val[SIZE]; +}; + +using il16 = LittleEndian; +using il32 = LittleEndian; +using il64 = LittleEndian; +using ul16 = LittleEndian; +using ul24 = LittleEndian; +using ul32 = LittleEndian; +using ul64 = LittleEndian; + +template +class BigEndian { +public: + BigEndian() = default; + BigEndian(T x) { *this = x; } + + operator T() const { + if constexpr (sizeof(T) == SIZE) { + T x; + memcpy(&x, val, sizeof(T)); + if constexpr (std::endian::native == std::endian::little) + x = bswap(x); + return x; + } else { + static_assert(SIZE == 3); + return (val[0] << 16) | (val[1] << 8) | val[2]; + } + } + + BigEndian &operator=(T x) { + if constexpr (sizeof(T) == SIZE) { + if constexpr (std::endian::native == std::endian::little) + x = bswap(x); + memcpy(val, &x, sizeof(T)); + } else { + static_assert(SIZE == 3); + val[0] = x >> 16; + val[1] = x >> 8; + val[2] = x; + } + return *this; + } + + BigEndian &operator++() { + return *this = *this + 1; + } + + BigEndian operator++(int) { + T ret = *this; + *this = *this + 1; + return ret; + } + + BigEndian &operator--() { + return *this = *this - 1; + } + + BigEndian operator--(int) { + T ret = *this; + *this = *this - 1; + return ret; + } + + BigEndian &operator+=(T x) { + return *this = *this + x; + } + + BigEndian &operator-=(T x) { + return *this = *this - x; + } + + BigEndian &operator&=(T x) { + return *this = *this & x; + } + + BigEndian &operator|=(T x) { + return *this = *this | x; + } + +private: + u8 val[SIZE]; +}; + +using ib16 = BigEndian; +using ib32 = BigEndian; +using ib64 = BigEndian; +using ub16 = BigEndian; +using ub24 = BigEndian; +using ub32 = BigEndian; +using ub64 = BigEndian; + +} // namespace mold diff --git a/third_party/mold/main.cc b/third_party/mold/main.cc new file mode 100644 index 00000000000..0012c58b5e0 --- /dev/null +++ b/third_party/mold/main.cc @@ -0,0 +1,188 @@ +// clang-format off +#include "third_party/mold/common.h" +// MISSING #include "config.h" + +#include "third_party/libcxx/cstring" +#include "third_party/libcxx/filesystem" +#include "libc/calls/calls.h" +#include "libc/calls/sigtimedwait.h" +#include "libc/calls/struct/sigaction.h" +#include "libc/calls/struct/siginfo.h" +#include "libc/sysv/consts/sa.h" +#include "libc/sysv/consts/sicode.h" +#include "libc/sysv/consts/ss.h" +// MISSING #include + +#ifdef USE_SYSTEM_MIMALLOC +// MISSING #include +#endif + +#ifdef __FreeBSD__ +// MISSING #include +#include "libc/calls/calls.h" +#include "libc/calls/weirdtypes.h" +#include "libc/runtime/pathconf.h" +#include "libc/runtime/runtime.h" +#include "libc/runtime/sysconf.h" +#include "libc/sysv/consts/f.h" +#include "libc/sysv/consts/fileno.h" +#include "libc/sysv/consts/o.h" +#include "libc/sysv/consts/ok.h" +#include "libc/time/time.h" +#include "third_party/getopt/getopt.h" +#include "third_party/musl/crypt.h" +#include "third_party/musl/lockf.h" +#endif + +namespace mold { + +std::string mold_version_string = MOLD_VERSION; + +namespace elf { +int main(int argc, char **argv); +} + +namespace macho { +int main(int argc, char **argv); +} + +static std::string get_mold_version() { + std::string name = MOLD_IS_SOLD ? "mold (sold) " : "mold "; + if (mold_git_hash.empty()) + return name + MOLD_VERSION + " (compatible with GNU ld)"; + return name + MOLD_VERSION + " (" + mold_git_hash + "; compatible with GNU ld)"; +} + +void cleanup() { + if (output_tmpfile) + unlink(output_tmpfile); +} + +std::string errno_string() { + // strerror is not thread-safe, so guard it with a lock. + static std::mutex mu; + std::scoped_lock lock(mu); + return strerror(errno); +} + +// Returns the path of the mold executable itself +std::string get_self_path() { +#ifdef __FreeBSD__ + // /proc may not be mounted on FreeBSD. The proper way to get the + // current executable's path is to use sysctl(2). + int mib[4]; + mib[0] = CTL_KERN; + mib[1] = KERN_PROC; + mib[2] = KERN_PROC_PATHNAME; + mib[3] = -1; + + size_t size; + sysctl(mib, 4, NULL, &size, NULL, 0); + + std::string path; + path.resize(size); + sysctl(mib, 4, path.data(), &size, NULL, 0); + return path; +#else + return std::filesystem::read_symlink("/proc/self/exe").string(); +#endif +} + +// mold mmap's an output file, and the mmap succeeds even if there's +// no enough space left on the filesystem. The actual disk blocks are +// not allocated on the mmap call but when the program writes to it +// for the first time. +// +// If a disk becomes full as a result of a write to an mmap'ed memory +// region, the failure of the write is reported as a SIGBUS or structured +// exeption with code EXCEPTION_IN_PAGE_ERROR on Windows. This +// signal handler catches that signal and prints out a user-friendly +// error message. Without this, it is very hard to realize that the +// disk might be full. +#ifdef _WIN32 + +static LONG WINAPI vectored_handler(_EXCEPTION_POINTERS *exception_info) { + static std::mutex mu; + std::scoped_lock lock{mu}; + + PEXCEPTION_RECORD exception_record = exception_info->ExceptionRecord; + ULONG_PTR *exception_information = exception_record->ExceptionInformation; + if (exception_record->ExceptionCode == EXCEPTION_IN_PAGE_ERROR && + (ULONG_PTR)output_buffer_start <= exception_information[1] && + exception_information[1] < (ULONG_PTR)output_buffer_end) { + + const char msg[] = "mold: failed to write to an output file. Disk full?\n"; + (void)!write(_fileno(stderr), msg, sizeof(msg) - 1); + } + + cleanup(); + _exit(1); +} + +void install_signal_handler() { + AddVectoredExceptionHandler(0, vectored_handler); +} + +#else + +static void sighandler(int signo, siginfo_t *info, void *ucontext) { + static std::mutex mu; + std::scoped_lock lock{mu}; + + switch (signo) { + case SIGSEGV: + case SIGBUS: + if (output_buffer_start <= info->si_addr && + info->si_addr < output_buffer_end) { + const char msg[] = "mold: failed to write to an output file. Disk full?\n"; + (void)!write(STDERR_FILENO, msg, sizeof(msg) - 1); + } + break; + case SIGABRT: { + const char msg[] = + "mold: aborted\n" + "mold: If mold failed due to a spurious failure of pthread_create, " + "it's likely because of https://github.com/oneapi-src/oneTBB/pull/824. " + "You should ensure that you are using 2021.9.0 or newer version of libtbb.\n"; + (void)!write(STDERR_FILENO, msg, sizeof(msg) - 1); + break; + } + } + + _exit(1); +} + +void install_signal_handler() { + struct sigaction action; + action.sa_sigaction = sighandler; + sigemptyset(&action.sa_mask); + action.sa_flags = SA_SIGINFO; + + sigaction(SIGABRT, &action, NULL); + sigaction(SIGINT, &action, NULL); + sigaction(SIGTERM, &action, NULL); + sigaction(SIGBUS, &action, NULL); +} + +#endif + +i64 get_default_thread_count() { + // mold doesn't scale well above 32 threads. + int n = tbb::global_control::active_value( + tbb::global_control::max_allowed_parallelism); + return std::min(n, 32); +} + +} // namespace mold + +int main(int argc, char **argv) { + mold::mold_version = mold::get_mold_version(); + +#if MOLD_IS_SOLD + std::string cmd = mold::filepath(argv[0]).filename().string(); + if (cmd == "ld64" || cmd.starts_with("ld64.")) + return mold::macho::main(argc, argv); +#endif + + return mold::elf::main(argc, argv); +} diff --git a/third_party/mold/mold.mk b/third_party/mold/mold.mk new file mode 100644 index 00000000000..e109116a722 --- /dev/null +++ b/third_party/mold/mold.mk @@ -0,0 +1,61 @@ +#-*-mode:makefile-gmake;indent-tabs-mode:t;tab-width:8;coding:utf-8-*-┐ +#───vi: set et ft=make ts=8 tw=8 fenc=utf-8 :vi───────────────────────┘ + +PKGS += THIRD_PARTY_MOLD + +private CPPFLAGS += -std=c++20 + +THIRD_PARTY_MOLD_ARTIFACTS += THIRD_PARTY_MOLD_A +THIRD_PARTY_MOLD = $(THIRD_PARTY_MOLD_A_DEPS) $(THIRD_PARTY_MOLD_A) +THIRD_PARTY_MOLD_A = o/$(MODE)/third_party/mold/mold.a +THIRD_PARTY_MOLD_FILES := $(wildcard third_party/mold/*) +THIRD_PARTY_MOLD_HDRS = $(filter %.h,$(THIRD_PARTY_MOLD_FILES)) +THIRD_PARTY_MOLD_SRCS = $(filter %.cc,$(THIRD_PARTY_MOLD_FILES)) +THIRD_PARTY_MOLD_OBJS = $(THIRD_PARTY_MOLD_SRCS:%.cc=o/$(MODE)/%.o) + +THIRD_PARTY_MOLD_A_DIRECTDEPS = \ + THIRD_PARTY_LIBCXX \ + THIRD_PARTY_XXHASH + +THIRD_PARTY_MOLD_A_DEPS := \ + $(call uniq,$(foreach x,$(THIRD_PARTY_MOLD_A_DIRECTDEPS),$($(x)))) + +# https://github.com/rui314/mold/blob/d4d93d7fb72dd19c44aafa4dd5397e35787d33ad/CMakeLists.txt#L62 +$(THIRD_PARTY_MOLD_OBJS): private \ + CPPFLAGS += \ + -std=gnu++20 \ + -fno-exceptions \ + -fno-unwind-tables \ + -fno-asynchronous-unwind-tables \ + -Wno-sign-compare \ + -Wno-unused-function \ + +THIRD_PARTY_MOLD_CHECKS = \ + $(THIRD_PARTY_MOLD_A).pkg \ + $(THIRD_PARTY_MOLD_HDRS:%=o/$(MODE)/%.ok) + +$(THIRD_PARTY_MOLD_A): \ + third_party/mold/ \ + $(THIRD_PARTY_MOLD_A).pkg \ + $(THIRD_PARTY_MOLD_OBJS) + +$(THIRD_PARTY_MOLD_A).pkg: \ + $(THIRD_PARTY_MOLD_OBJS) \ + $(foreach x,$(THIRD_PARTY_MOLD_A_DIRECTDEPS),$($(x)_A).pkg) + +o/$(MODE)/third_party/mold/mold.com.dbg: \ + $(THIRD_PARTY_MOLD) \ + o/$(MODE)/third_party/awk/main.o \ + $(CRT) \ + $(APE_NO_MODIFY_SELF) + @$(APELINK) + +THIRD_PARTY_MOLD_COMS = o/$(MODE)/third_party/mold/mold.com +THIRD_PARTY_MOLD_BINS = $(THIRD_PARTY_MOLD_COMS) $(THIRD_PARTY_MOLD_COMS:%=%.dbg) +THIRD_PARTY_MOLD_LIBS = $(THIRD_PARTY_MOLD_A) +$(THIRD_PARTY_MOLD_OBJS): $(BUILD_FILES) third_party/mold/mold.mk + +.PHONY: o/$(MODE)/third_party/mold +o/$(MODE)/third_party/mold: \ + $(THIRD_PARTY_MOLD_BINS) \ + $(THIRD_PARTY_AWK_CHECKS) diff --git a/third_party/mold/multi-glob.cc b/third_party/mold/multi-glob.cc new file mode 100644 index 00000000000..115bdea7cc5 --- /dev/null +++ b/third_party/mold/multi-glob.cc @@ -0,0 +1,167 @@ +// clang-format off +// This file implements the Aho-Corasick algorithm to match multiple +// glob patterns to symbol strings as quickly as possible. +// +// Here are some examples of glob patterns: +// +// qt_private_api_tag* +// *16QAccessibleCache* +// *32QAbstractFileIconProviderPrivate* +// *17QPixmapIconEngine* +// +// `*` is a wildcard that matches any substring. We sometimes have +// hundreds of glob patterns and have to match them against millions +// of symbol strings. +// +// Aho-Corasick cannot handle complex patterns such as `*foo*bar*`. +// We handle such patterns with the Glob class. Glob is relatively +// slow, but complex patterns are rare in practice, so it should be +// OK. + +#include "third_party/mold/common.h" + +#include "third_party/libcxx/queue" +#include "third_party/libcxx/regex" + +namespace mold { + +std::optional MultiGlob::find(std::string_view str) { + std::call_once(once, [&] { compile(); }); + u32 val = UINT32_MAX; + + if (root) { + // Match against simple glob patterns + TrieNode *node = root.get(); + + auto walk = [&](u8 c) { + for (;;) { + if (node->children[c]) { + node = node->children[c].get(); + val = std::min(val, node->value); + return; + } + + if (!node->suffix_link) + return; + node = node->suffix_link; + } + }; + + walk('\0'); + for (u8 c : str) + walk(c); + walk('\0'); + } + + // Match against complex glob patterns + for (std::pair &glob : globs) + if (glob.first.match(str)) + val = std::min(val, glob.second); + + if (val == UINT32_MAX) + return {}; + return val; +} + +static bool is_simple_pattern(std::string_view pat) { + static std::regex re(R"(\*?[^*[?]+\*?)", std::regex_constants::optimize); + return std::regex_match(pat.begin(), pat.end(), re); +} + +static std::string handle_stars(std::string_view pat) { + std::string str(pat); + + // Convert "foo" -> "\0foo\0", "*foo" -> "foo\0", "foo*" -> "\0foo" + // and "*foo*" -> "foo". Aho-Corasick can do only substring matching, + // so we use \0 as beginning/end-of-string markers. + if (str.starts_with('*') && str.ends_with('*')) + return str.substr(1, str.size() - 2); + if (str.starts_with('*')) + return str.substr(1) + "\0"s; + if (str.ends_with('*')) + return "\0"s + str.substr(0, str.size() - 1); + return "\0"s + str + "\0"s; +} + +bool MultiGlob::add(std::string_view pat, u32 val) { + assert(!is_compiled); + assert(!pat.empty()); + + strings.push_back(std::string(pat)); + + // Complex glob pattern + if (!is_simple_pattern(pat)) { + if (std::optional glob = Glob::compile(pat)) { + globs.push_back({std::move(*glob), val}); + return true; + } + return false; + } + + // Simple glob pattern + if (!root) + root.reset(new TrieNode); + TrieNode *node = root.get(); + + for (u8 c : handle_stars(pat)) { + if (!node->children[c]) + node->children[c].reset(new TrieNode); + node = node->children[c].get(); + } + + node->value = std::min(node->value, val); + return true; +} + +void MultiGlob::compile() { + is_compiled = true; + if (root) { + fix_suffix_links(*root); + fix_values(); + } +} + +void MultiGlob::fix_suffix_links(TrieNode &node) { + for (i64 i = 0; i < 256; i++) { + if (!node.children[i]) + continue; + + TrieNode &child = *node.children[i]; + + TrieNode *cur = node.suffix_link; + for (;;) { + if (!cur) { + child.suffix_link = root.get(); + break; + } + + if (cur->children[i]) { + child.suffix_link = cur->children[i].get(); + break; + } + + cur = cur->suffix_link; + } + + fix_suffix_links(child); + } +} + +void MultiGlob::fix_values() { + std::queue queue; + queue.push(root.get()); + + do { + TrieNode *node = queue.front(); + queue.pop(); + + for (std::unique_ptr &child : node->children) { + if (!child) + continue; + child->value = std::min(child->value, child->suffix_link->value); + queue.push(child.get()); + } + } while (!queue.empty()); +} + +} // namespace mold diff --git a/third_party/mold/output-file-unix.h b/third_party/mold/output-file-unix.h new file mode 100644 index 00000000000..4763586c2f9 --- /dev/null +++ b/third_party/mold/output-file-unix.h @@ -0,0 +1,203 @@ +// clang-format off +#include "third_party/mold/common.h" + +#include "libc/calls/calls.h" +#include "libc/calls/struct/flock.h" +#include "libc/calls/weirdtypes.h" +#include "libc/sysv/consts/at.h" +#include "libc/sysv/consts/f.h" +#include "libc/sysv/consts/fd.h" +#include "libc/sysv/consts/o.h" +#include "libc/sysv/consts/posix.h" +#include "libc/sysv/consts/s.h" +#include "libc/sysv/consts/splice.h" +#include "third_party/libcxx/filesystem" +#include "libc/calls/calls.h" +#include "libc/calls/weirdtypes.h" +#include "libc/runtime/runtime.h" +#include "libc/sysv/consts/map.h" +#include "libc/sysv/consts/mlock.h" +#include "libc/sysv/consts/msync.h" +#include "libc/sysv/consts/posix.h" +#include "libc/sysv/consts/prot.h" +#include "libc/sysv/consts/madv.h" +#include "libc/sysv/consts/mfd.h" +#include "libc/sysv/consts/mremap.h" +#include "libc/calls/calls.h" +#include "libc/calls/struct/stat.h" +#include "libc/calls/struct/stat.macros.h" +#include "libc/calls/struct/timespec.h" +#include "libc/calls/weirdtypes.h" +#include "libc/sysv/consts/s.h" +#include "libc/sysv/consts/utime.h" +#include "libc/time/time.h" +#include "libc/calls/makedev.h" +#include "libc/calls/weirdtypes.h" +#include "libc/thread/thread.h" +#include "libc/calls/typedef/u.h" +#include "libc/calls/weirdtypes.h" +#include "libc/intrin/newbie.h" +#include "libc/sock/select.h" +#include "libc/sysv/consts/endian.h" + +namespace mold { + +inline u32 get_umask() { + u32 orig_umask = umask(0); + umask(orig_umask); + return orig_umask; +} + +template +static std::pair +open_or_create_file(Context &ctx, std::string path, i64 filesize, i64 perm) { + std::string tmpl = filepath(path).parent_path() / ".mold-XXXXXX"; + char *path2 = (char *)save_string(ctx, tmpl).data(); + + i64 fd = mkstemp(path2); + if (fd == -1) + Fatal(ctx) << "cannot open " << path2 << ": " << errno_string(); + + // Reuse an existing file if exists and writable because on Linux, + // writing to an existing file is much faster than creating a fresh + // file and writing to it. + if (ctx.overwrite_output_file && rename(path.c_str(), path2) == 0) { + ::close(fd); + fd = ::open(path2, O_RDWR | O_CREAT, perm); + if (fd != -1 && !ftruncate(fd, filesize) && !fchmod(fd, perm & ~get_umask())) + return {fd, path2}; + + unlink(path2); + fd = ::open(path2, O_RDWR | O_CREAT, perm); + if (fd == -1) + Fatal(ctx) << "cannot open " << path2 << ": " << errno_string(); + } + + if (ftruncate(fd, filesize)) + Fatal(ctx) << "ftruncate failed: " << errno_string(); + + if (fchmod(fd, (perm & ~get_umask())) == -1) + Fatal(ctx) << "fchmod failed: " << errno_string(); + return {fd, path2}; +} + +template +class MemoryMappedOutputFile : public OutputFile { +public: + MemoryMappedOutputFile(Context &ctx, std::string path, i64 filesize, i64 perm) + : OutputFile(path, filesize, true) { + i64 fd; + std::tie(fd, output_tmpfile) = open_or_create_file(ctx, path, filesize, perm); + + this->buf = (u8 *)mmap(nullptr, filesize, PROT_READ | PROT_WRITE, + MAP_SHARED, fd, 0); + if (this->buf == MAP_FAILED) + Fatal(ctx) << path << ": mmap failed: " << errno_string(); + ::close(fd); + + mold::output_buffer_start = this->buf; + mold::output_buffer_end = this->buf + filesize; + } + + ~MemoryMappedOutputFile() { + if (fd2 != -1) + ::close(fd2); + } + + void close(Context &ctx) override { + Timer t(ctx, "close_file"); + + if (!this->is_unmapped) + munmap(this->buf, this->filesize); + + // If an output file already exists, open a file and then remove it. + // This is the fastest way to unlink a file, as it does not make the + // system to immediately release disk blocks occupied by the file. + fd2 = ::open(this->path.c_str(), O_RDONLY); + if (fd2 != -1) + unlink(this->path.c_str()); + + if (rename(output_tmpfile, this->path.c_str()) == -1) + Fatal(ctx) << this->path << ": rename failed: " << errno_string(); + output_tmpfile = nullptr; + } + +private: + int fd2 = -1; +}; + +template +class MallocOutputFile : public OutputFile { +public: + MallocOutputFile(Context &ctx, std::string path, i64 filesize, i64 perm) + : OutputFile(path, filesize, false), perm(perm) { + this->buf = (u8 *)mmap(NULL, filesize, PROT_READ | PROT_WRITE, + MAP_SHARED | MAP_ANONYMOUS, -1, 0); + if (this->buf == MAP_FAILED) + Fatal(ctx) << "mmap failed: " << errno_string(); + } + + void close(Context &ctx) override { + Timer t(ctx, "close_file"); + + if (this->path == "-") { + fwrite(this->buf, this->filesize, 1, stdout); + fclose(stdout); + return; + } + + i64 fd = ::open(this->path.c_str(), O_RDWR | O_CREAT, perm); + if (fd == -1) + Fatal(ctx) << "cannot open " << this->path << ": " << errno_string(); + + FILE *fp = fdopen(fd, "w"); + fwrite(this->buf, this->filesize, 1, fp); + fclose(fp); + } + +private: + i64 perm; +}; + +template +std::unique_ptr> +OutputFile::open(Context &ctx, std::string path, i64 filesize, i64 perm) { + Timer t(ctx, "open_file"); + + if (path.starts_with('/') && !ctx.arg.chroot.empty()) + path = ctx.arg.chroot + "/" + path_clean(path); + + bool is_special = false; + if (path == "-") { + is_special = true; + } else { + struct stat st; + if (stat(path.c_str(), &st) == 0 && (st.st_mode & S_IFMT) != S_IFREG) + is_special = true; + } + + OutputFile *file; + if (is_special) + file = new MallocOutputFile(ctx, path, filesize, perm); + else + file = new MemoryMappedOutputFile(ctx, path, filesize, perm); + +#ifdef MADV_HUGEPAGE + // Enable transparent huge page for an output memory-mapped file. + // On Linux, it has an effect only on tmpfs mounted with `huge=advise`, + // but it can make the linker ~10% faster. You can try it by creating + // a tmpfs with the following commands + // + // $ mkdir tmp + // $ sudo mount -t tmpfs -o size=2G,huge=advise none tmp + // + // and then specifying a path under the directory as an output file. + madvise(file->buf, filesize, MADV_HUGEPAGE); +#endif + + if (ctx.arg.filler != -1) + memset(file->buf, ctx.arg.filler, filesize); + return std::unique_ptr(file); +} + +} // namespace mold diff --git a/third_party/mold/output-file-win32.h b/third_party/mold/output-file-win32.h new file mode 100644 index 00000000000..4541bc8fff9 --- /dev/null +++ b/third_party/mold/output-file-win32.h @@ -0,0 +1,85 @@ +// clang-format off +#include "third_party/mold/common.h" + +#include "libc/calls/calls.h" +#include "libc/calls/struct/flock.h" +#include "libc/calls/weirdtypes.h" +#include "libc/sysv/consts/at.h" +#include "libc/sysv/consts/f.h" +#include "libc/sysv/consts/fd.h" +#include "libc/sysv/consts/o.h" +#include "libc/sysv/consts/posix.h" +#include "libc/sysv/consts/s.h" +#include "libc/sysv/consts/splice.h" +#include "third_party/libcxx/filesystem" +#include "libc/nt/accounting.h" +#include "libc/nt/automation.h" +#include "libc/nt/console.h" +#include "libc/nt/debug.h" +#include "libc/nt/dll.h" +#include "libc/nt/enum/keyaccess.h" +#include "libc/nt/enum/regtype.h" +#include "libc/nt/errors.h" +#include "libc/nt/events.h" +#include "libc/nt/files.h" +#include "libc/nt/ipc.h" +#include "libc/nt/memory.h" +#include "libc/nt/paint.h" +#include "libc/nt/process.h" +#include "libc/nt/registry.h" +#include "libc/nt/synchronization.h" +#include "libc/nt/thread.h" +#include "libc/nt/windows.h" +#include "libc/nt/winsock.h" + +namespace mold { + +template +class MallocOutputFile : public OutputFile { +public: + MallocOutputFile(Context &ctx, std::string path, i64 filesize, i64 perm) + : OutputFile(path, filesize, false), perm(perm) { + this->buf = (u8 *)malloc(filesize); + if (!this->buf) + Fatal(ctx) << "malloc failed"; + } + + void close(Context &ctx) override { + Timer t(ctx, "close_file"); + + if (this->path == "-") { + fwrite(this->buf, this->filesize, 1, stdout); + fclose(stdout); + return; + } + + i64 fd = ::open(this->path.c_str(), O_RDWR | O_CREAT, perm); + if (fd == -1) + Fatal(ctx) << "cannot open " << this->path << ": " << errno_string(); + + FILE *fp = fdopen(fd, "w"); + fwrite(this->buf, this->filesize, 1, fp); + fclose(fp); + free(this->buf); + } + +private: + i64 perm; +}; + +template +std::unique_ptr> +OutputFile::open(Context &ctx, std::string path, i64 filesize, i64 perm) { + Timer t(ctx, "open_file"); + + if (path.starts_with('/') && !ctx.arg.chroot.empty()) + path = ctx.arg.chroot + "/" + path_clean(path); + + OutputFile *file = new MallocOutputFile(ctx, path, filesize, perm); + + if (ctx.arg.filler != -1) + memset(file->buf, ctx.arg.filler, filesize); + return std::unique_ptr>(file); +} + +} // namespace mold diff --git a/third_party/mold/output-file.h b/third_party/mold/output-file.h new file mode 100644 index 00000000000..5e16cf31162 --- /dev/null +++ b/third_party/mold/output-file.h @@ -0,0 +1,6 @@ +// clang-format off +#if _WIN32 +#include "third_party/mold/output-file-win32.h" +#else +#include "third_party/mold/output-file-unix.h" +#endif diff --git a/third_party/mold/perf.cc b/third_party/mold/perf.cc new file mode 100644 index 00000000000..7da0df840f6 --- /dev/null +++ b/third_party/mold/perf.cc @@ -0,0 +1,140 @@ +// clang-format off +#include "third_party/mold/common.h" + +#include "third_party/libcxx/functional" +#include "third_party/libcxx/iomanip" +#include "third_party/libcxx/ios" + +#ifndef _WIN32 +#include "libc/calls/calls.h" +#include "libc/calls/struct/rlimit.h" +#include "libc/calls/struct/rusage.h" +#include "libc/calls/weirdtypes.h" +#include "libc/sysv/consts/prio.h" +#include "libc/sysv/consts/rlim.h" +#include "libc/sysv/consts/rlimit.h" +#include "libc/sysv/consts/rusage.h" +#include "libc/calls/struct/itimerval.h" +#include "libc/calls/struct/timeval.h" +#include "libc/calls/weirdtypes.h" +#include "libc/sock/select.h" +#include "libc/sysv/consts/clock.h" +#include "libc/sysv/consts/itimer.h" +#include "libc/time/struct/timezone.h" +#include "libc/time/time.h" +#endif + +namespace mold { + +i64 Counter::get_value() { + return values.combine(std::plus()); +} + +void Counter::print() { + sort(instances, [](Counter *a, Counter *b) { + return a->get_value() > b->get_value(); + }); + + for (Counter *c : instances) + std::cout << std::setw(20) << std::right << c->name + << "=" << c->get_value() << "\n"; +} + +static i64 now_nsec() { +#ifdef _WIN32 + return (i64)std::chrono::steady_clock::now().time_since_epoch().count(); +#else + struct timespec t; + clock_gettime(CLOCK_MONOTONIC, &t); + return (i64)t.tv_sec * 1'000'000'000 + t.tv_nsec; +#endif +} + +static std::pair get_usage() { +#ifdef _WIN32 + auto to_nsec = [](FILETIME t) -> i64 { + return ((u64)t.dwHighDateTime << 32 + (u64)t.dwLowDateTime) * 100; + }; + + FILETIME creation, exit, kernel, user; + GetProcessTimes(GetCurrentProcess(), &creation, &exit, &kernel, &user); + return {to_nsec(user), to_nsec(kernel)}; +#else + auto to_nsec = [](struct timeval t) -> i64 { + return (i64)t.tv_sec * 1'000'000'000 + t.tv_usec * 1'000; + }; + + struct rusage ru; + getrusage(RUSAGE_SELF, &ru); + return {to_nsec(ru.ru_utime), to_nsec(ru.ru_stime)}; +#endif +} + +TimerRecord::TimerRecord(std::string name, TimerRecord *parent) + : name(name), parent(parent) { + start = now_nsec(); + std::tie(user, sys) = get_usage(); + if (parent) + parent->children.push_back(this); +} + +void TimerRecord::stop() { + if (stopped) + return; + stopped = true; + + i64 user2; + i64 sys2; + std::tie(user2, sys2) = get_usage(); + + end = now_nsec(); + user = user2 - user; + sys = sys2 - sys; +} + +static void print_rec(TimerRecord &rec, i64 indent) { + printf(" % 8.3f % 8.3f % 8.3f %s%s\n", + ((double)rec.user / 1'000'000'000), + ((double)rec.sys / 1'000'000'000), + (((double)rec.end - rec.start) / 1'000'000'000), + std::string(indent * 2, ' ').c_str(), + rec.name.c_str()); + + sort(rec.children, [](TimerRecord *a, TimerRecord *b) { + return a->start < b->start; + }); + + for (TimerRecord *child : rec.children) + print_rec(*child, indent + 1); +} + +void print_timer_records( + tbb::concurrent_vector> &records) { + for (i64 i = records.size() - 1; i >= 0; i--) + records[i]->stop(); + + for (i64 i = 0; i < records.size(); i++) { + TimerRecord &inner = *records[i]; + if (inner.parent) + continue; + + for (i64 j = i - 1; j >= 0; j--) { + TimerRecord &outer = *records[j]; + if (outer.start <= inner.start && inner.end <= outer.end) { + inner.parent = &outer; + outer.children.push_back(&inner); + break; + } + } + } + + std::cout << " User System Real Name\n"; + + for (std::unique_ptr &rec : records) + if (!rec->parent) + print_rec(*rec, 0); + + std::cout << std::flush; +} + +} // namespace mold diff --git a/third_party/mold/sha.h b/third_party/mold/sha.h new file mode 100644 index 00000000000..03a45934739 --- /dev/null +++ b/third_party/mold/sha.h @@ -0,0 +1,82 @@ +// clang-format off +#pragma once + +#include "third_party/libcxx/cstdint" + +typedef uint8_t u8; +static constexpr int64_t SHA256_SIZE = 32; + +#ifdef _WIN32 +// On Windows, we use Microsoft CNG. + +// MISSING #include +// MISSING #include +// MISSING #include + +inline static BCRYPT_ALG_HANDLE get_sha256_handle() { + static std::once_flag once; + static BCRYPT_ALG_HANDLE alg; + + std::call_once(once, [&] { + BCryptOpenAlgorithmProvider(&alg, BCRYPT_SHA256_ALGORITHM, nullptr, 0); + }); + return alg; +} + +inline void sha256_hash(u8 *in, size_t len, u8 *out) { + BCryptHash(get_sha256_handle(), nullptr, 0, in, len, out, SHA256_SIZE); +} + +class SHA256Hash { +public: + SHA256Hash() { + BCryptCreateHash(get_sha256_handle(), &handle, nullptr, 0, nullptr, 0, 0); + } + + void update(u8 *data, size_t len) { + BCryptHashData(handle, data, len, 0); + } + + void finish(u8 *out) { + BCryptFinishHash(handle, out, SHA256_SIZE, 0); + } + +private: + BCRYPT_HASH_HANDLE handle; +}; + +#else +// On Unix, we use OpenSSL or the Apple's OpenSSL-compatible API. + +#ifdef __APPLE__ +# define COMMON_DIGEST_FOR_OPENSSL +// MISSING #include +# define SHA256(data, len, md) CC_SHA256(data, len, md) +#else +# define OPENSSL_SUPPRESS_DEPRECATED 1 +// MISSING #include +#endif + +inline void sha256_hash(u8 *in, size_t len, u8 *out) { + SHA256(in, len, out); +} + +class SHA256Hash { +public: + SHA256Hash() { + SHA256_Init(&ctx); + } + + void update(u8 *data, size_t len) { + SHA256_Update(&ctx, data, len); + } + + void finish(u8 *out) { + SHA256_Final(out, &ctx); + } + +private: + SHA256_CTX ctx; +}; + +#endif diff --git a/third_party/mold/tar.cc b/third_party/mold/tar.cc new file mode 100644 index 00000000000..8c855348d1e --- /dev/null +++ b/third_party/mold/tar.cc @@ -0,0 +1,113 @@ +// clang-format off +#include "third_party/mold/common.h" + +namespace mold { + +// A tar file consists of one or more Ustar header followed by data. +// Each Ustar header represents a single file in an archive. +// +// tar is an old file format, and its `name` field is only 100 bytes long. +// If `name` is longer than 100 bytes, we can emit a PAX header before a +// Ustar header to store a long filename. +// +// For simplicity, we always emit a PAX header even for a short filename. +struct UstarHeader { + UstarHeader() { + memset(this, 0, sizeof(*this)); + } + + void finalize() { + memset(checksum, ' ', sizeof(checksum)); + memcpy(magic, "ustar", 5); + memcpy(version, "00", 2); + + // Compute checksum + int sum = 0; + for (i64 i = 0; i < sizeof(*this); i++) + sum += ((u8 *)this)[i]; + + // We need to convince the compiler that sum isn't too big to silence + // -Werror=format-truncation. + ASSUME(sum < 01'000'000); + snprintf(checksum, sizeof(checksum), "%06o", sum); + } + + char name[100]; + char mode[8]; + char uid[8]; + char gid[8]; + char size[12]; + char mtime[12]; + char checksum[8]; + char typeflag[1]; + char linkname[100]; + char magic[6]; + char version[2]; + char uname[32]; + char gname[32]; + char devmajor[8]; + char devminor[8]; + char prefix[155]; + char pad[12]; +}; + +static std::string encode_path(std::string basedir, std::string path) { + path = path_clean(basedir + "/" + path); + + // Construct a string which contains something like + // "16 path=foo/bar\n" where 16 is the size of the string + // including the size string itself. + i64 len = std::string(" path=\n").size() + path.size(); + i64 total = std::to_string(len).size() + len; + total = std::to_string(total).size() + len; + return std::to_string(total) + " path=" + path + "\n"; +} + +std::unique_ptr +TarWriter::open(std::string output_path, std::string basedir) { + FILE *out = fopen(output_path.c_str(), "w"); + if (!out) + return nullptr; + return std::unique_ptr(new TarWriter(out, basedir)); +} + +TarWriter::~TarWriter() { + fclose(out); +} + +void TarWriter::append(std::string path, std::string_view data) { + // Write PAX header + static_assert(sizeof(UstarHeader) == BLOCK_SIZE); + UstarHeader pax; + + std::string attr = encode_path(basedir, path); + snprintf(pax.size, sizeof(pax.size), "%011zo", attr.size()); + pax.typeflag[0] = 'x'; + pax.finalize(); + fwrite(&pax, sizeof(pax), 1, out); + + // Write pathname + fwrite(attr.data(), attr.size(), 1, out); + fseek(out, align_to(ftell(out), BLOCK_SIZE), SEEK_SET); + + // Write Ustar header + UstarHeader ustar; + memcpy(ustar.mode, "0000664", 8); + snprintf(ustar.size, sizeof(ustar.size), "%011zo", data.size()); + ustar.finalize(); + fwrite(&ustar, sizeof(ustar), 1, out); + + // Write file contents + fwrite(data.data(), data.size(), 1, out); + fseek(out, align_to(ftell(out), BLOCK_SIZE), SEEK_SET); + + // A tar file must ends with two empty blocks, so write such + // terminator and seek back. + u8 terminator[BLOCK_SIZE * 2] = {}; + fwrite(&terminator, BLOCK_SIZE * 2, 1, out); + fseek(out, -BLOCK_SIZE * 2, SEEK_END); + + assert(ftell(out) % BLOCK_SIZE == 0); +} + +} // namespace mold diff --git a/third_party/mold/test/elf/CMakeLists.txt b/third_party/mold/test/elf/CMakeLists.txt new file mode 100644 index 00000000000..ff8cbd009d0 --- /dev/null +++ b/third_party/mold/test/elf/CMakeLists.txt @@ -0,0 +1,74 @@ +// clang-format off +option(MOLD_ENABLE_QEMU_TESTS "Enable tests on non-native targets" OFF) +option(MOLD_ENABLE_QEMU_TESTS_RV32 "Enable tests for RV32" OFF) +option(MOLD_ENABLE_QEMU_TESTS_POWER10 "Enable tests for Power10" OFF) + +function(add_target TRIPLE) + set(HOST ${CMAKE_HOST_SYSTEM_PROCESSOR}) + + if(${HOST} MATCHES "amd64") + set(HOST x86_64) + elseif(${HOST} MATCHES "arm.*") + set(HOST arm) + elseif(${HOST} STREQUAL "ppc64") + set(HOST powerpc64) + endif() + + if(${TRIPLE} MATCHES "${HOST}-.*") + set(IS_NATIVE 1) + endif() + + if(${TRIPLE} MATCHES "([^-]+)-.") + set(MACHINE ${CMAKE_MATCH_1}) + endif() + + if(IS_NATIVE OR MOLD_ENABLE_QEMU_TESTS) + file(GLOB ALL_TESTS RELATIVE ${CMAKE_CURRENT_LIST_DIR} CONFIGURE_DEPENDS + "*.sh") + + list(FILTER ALL_TESTS EXCLUDE REGEX "_") + + file(GLOB TESTS RELATIVE ${CMAKE_CURRENT_LIST_DIR} CONFIGURE_DEPENDS + "${MACHINE}_*.sh") + + list(APPEND TESTS ${ALL_TESTS}) + + foreach(TEST IN LISTS TESTS) + string(REGEX REPLACE "\\.sh$" "" TESTNAME "${MACHINE}-${TEST}") + + add_test(NAME ${TESTNAME} + COMMAND bash -x ${CMAKE_CURRENT_LIST_DIR}/${TEST} + WORKING_DIRECTORY ${mold_BINARY_DIR}) + + if(IS_NATIVE) + set_tests_properties(${TESTNAME} PROPERTIES + SKIP_REGULAR_EXPRESSION "skipped") + else() + set_tests_properties(${TESTNAME} PROPERTIES + ENVIRONMENT "TRIPLE=${TRIPLE}") + endif() + endforeach() + endif() +endfunction() + +add_target(x86_64-linux-gnu) +add_target(i686-linux-gnu) +add_target(aarch64-linux-gnu) +add_target(arm-linux-gnueabihf) +add_target(riscv64-linux-gnu) +add_target(powerpc-linux-gnu) +add_target(powerpc64-linux-gnu) +add_target(powerpc64le-linux-gnu) +add_target(sparc64-linux-gnu) +add_target(s390x-linux-gnu) +add_target(m68k-linux-gnu) +add_target(sh4-linux-gnu) +add_target(alpha-linux-gnu) + +if(MOLD_ENABLE_QEMU_TESTS_RV32) + add_target(riscv32-linux-gnu) +endif() + +if(MOLD_ENABLE_QEMU_TESTS_POWER10) + add_target(powerpc64le_power10-linux-gnu) +endif() diff --git a/third_party/mold/test/elf/aarch64_range-extension-thunk-disassembly.sh b/third_party/mold/test/elf/aarch64_range-extension-thunk-disassembly.sh new file mode 100755 index 00000000000..3fa905a1b69 --- /dev/null +++ b/third_party/mold/test/elf/aarch64_range-extension-thunk-disassembly.sh @@ -0,0 +1,30 @@ +// clang-format off +#!/bin/bash +. $(dirname $0)/common.inc + +[ $MACHINE = aarch64 ] || skip + +cat <:' diff --git a/third_party/mold/test/elf/abs-error.sh b/third_party/mold/test/elf/abs-error.sh new file mode 100755 index 00000000000..7364bd4a51b --- /dev/null +++ b/third_party/mold/test/elf/abs-error.sh @@ -0,0 +1,29 @@ +// clang-format off +#!/bin/bash +. $(dirname $0)/common.inc + +[ $MACHINE = aarch64 ] && skip +[ $MACHINE = ppc64 ] && skip +[ $MACHINE = ppc64le ] && skip +[ $MACHINE = s390x ] && skip +[ $MACHINE = alpha ] && skip + +cat <& $t/log +grep -q 'recompile with -fPIC' $t/log diff --git a/third_party/mold/test/elf/absolute-symbols.sh b/third_party/mold/test/elf/absolute-symbols.sh new file mode 100755 index 00000000000..60ada3ec697 --- /dev/null +++ b/third_party/mold/test/elf/absolute-symbols.sh @@ -0,0 +1,67 @@ +// clang-format off +#!/bin/bash +. $(dirname $0)/common.inc + +# This test crashes only on qemu-sparc64 running on GitHub Actions, +# even though it works on a local x86-64 machine and on an actual +# SPARC machine. +[ $MACHINE = sparc64 ] && skip + +cat < + +void handler(int signum, siginfo_t *info, void *ptr) { + printf("ip=%p\n", info->si_addr); + exit(0); +} + +extern volatile int foo; + +int main() { + struct sigaction act; + act.sa_flags = SA_SIGINFO | SA_RESETHAND; + act.sa_sigaction = handler; + sigemptyset(&act.sa_mask); + sigaction(SIGSEGV, &act, 0); + foo = 5; +} +EOF + +$CC -B. -o $t/exe -no-pie $t/a.o $t/b.o +$QEMU $t/exe | grep -q '^ip=0x80000.$' diff --git a/third_party/mold/test/elf/allow-multiple-definition.sh b/third_party/mold/test/elf/allow-multiple-definition.sh new file mode 100755 index 00000000000..257ee143783 --- /dev/null +++ b/third_party/mold/test/elf/allow-multiple-definition.sh @@ -0,0 +1,10 @@ +// clang-format off +#!/bin/bash +. $(dirname $0)/common.inc + +echo 'int main() { return 0; }' | $CC -c -o $t/a.o -xc - +echo 'int main() { return 1; }' | $CC -c -o $t/b.o -xc - + +! $CC -B. -o $t/exe $t/a.o $t/b.o 2> /dev/null || false +$CC -B. -o $t/exe $t/a.o $t/b.o -Wl,-allow-multiple-definition +$CC -B. -o $t/exe $t/a.o $t/b.o -Wl,-z,muldefs diff --git a/third_party/mold/test/elf/ar-alignment.sh b/third_party/mold/test/elf/ar-alignment.sh new file mode 100755 index 00000000000..276e8307fc6 --- /dev/null +++ b/third_party/mold/test/elf/ar-alignment.sh @@ -0,0 +1,35 @@ +// clang-format off +#!/bin/bash +. $(dirname $0)/common.inc + +cat <> $t/a.o + +cat <:' > $t/log + +grep -Eq 'mov\s+ip, pc' $t/log +grep -Eq 'bx\s+ip' $t/log +grep -Eq 'add\s+ip, ip, pc' $t/log diff --git a/third_party/mold/test/elf/arm_range-extension-thunk.sh b/third_party/mold/test/elf/arm_range-extension-thunk.sh new file mode 100755 index 00000000000..6f05f838982 --- /dev/null +++ b/third_party/mold/test/elf/arm_range-extension-thunk.sh @@ -0,0 +1,60 @@ +// clang-format off +#!/bin/bash +. $(dirname $0)/common.inc + +[ $MACHINE = arm ] || skip + +echo 'int main() {}' | $CC -c -o /dev/null -xc - -O0 -mthumb >& /dev/null \ + || skip + +cat < $t/a.c +#include "libc/calls/calls.h" +#include "libc/calls/weirdtypes.h" +#include "libc/fmt/fmt.h" +#include "libc/stdio/dprintf.h" +#include "libc/stdio/stdio.h" +#include "libc/stdio/temp.h" +#include "third_party/musl/tempnam.h" + +void fn3(); +void fn4(); + +__attribute__((section(".low"))) void fn1() { printf(" fn1"); fn3(); } +__attribute__((section(".low"))) void fn2() { printf(" fn2"); fn4(); } + +int main() { + printf(" main"); + fn1(); + printf("\n"); +} +EOF + +cat < $t/b.c +#include "libc/calls/calls.h" +#include "libc/calls/weirdtypes.h" +#include "libc/fmt/fmt.h" +#include "libc/stdio/dprintf.h" +#include "libc/stdio/stdio.h" +#include "libc/stdio/temp.h" +#include "third_party/musl/tempnam.h" + +void fn1(); +void fn2(); + +__attribute__((section(".high"))) void fn3() { printf(" fn3"); fn2(); } +__attribute__((section(".high"))) void fn4() { printf(" fn4"); } +EOF + +$CC -c -o $t/c.o $t/a.c -O0 -mthumb +$CC -c -o $t/d.o $t/b.c -O0 -marm + +$CC -B. -o $t/exe $t/c.o $t/d.o \ + -Wl,--section-start=.low=0x10000000,--section-start=.high=0x20000000 +$QEMU $t/exe | grep -q 'main fn1 fn3 fn2 fn4' + +$CC -c -o $t/e.o $t/a.c -O2 -mthumb +$CC -c -o $t/f.o $t/b.c -O2 -marm + +$CC -B. -o $t/exe $t/e.o $t/f.o \ + -Wl,--section-start=.low=0x10000000,--section-start=.high=0x20000000 +$QEMU $t/exe | grep -q 'main fn1 fn3 fn2 fn4' diff --git a/third_party/mold/test/elf/arm_thumb-interwork.sh b/third_party/mold/test/elf/arm_thumb-interwork.sh new file mode 100755 index 00000000000..a04f0f373c6 --- /dev/null +++ b/third_party/mold/test/elf/arm_thumb-interwork.sh @@ -0,0 +1,45 @@ +// clang-format off +#!/bin/bash +. $(dirname $0)/common.inc + +[[ $MACHINE == arm* ]] || skip + +echo 'int foo() { return 0; }' | $CC -o /dev/null -c -xc - -mthumb 2> /dev/null || skip + +cat <& /dev/null \ + || skip + +cat < $t/a.c +extern _Thread_local int foo; + +__attribute__((section(".low"))) +int get_foo() { + int y = foo; + return y; +} + +static _Thread_local int bar = 5; + +__attribute__((section(".high"))) +int get_bar() { + return bar; +} +EOF + +cat < $t/b.c +#include "libc/calls/calls.h" +#include "libc/calls/weirdtypes.h" +#include "libc/fmt/fmt.h" +#include "libc/stdio/dprintf.h" +#include "libc/stdio/stdio.h" +#include "libc/stdio/temp.h" +#include "third_party/musl/tempnam.h" + +_Thread_local int foo; + +int get_foo(); +int get_bar(); + +int main() { + foo = 42; + printf("%d %d\n", get_foo(), get_bar()); + return 0; +} +EOF + +$GCC -fPIC -mtls-dialect=gnu2 -c -o $t/c.o $t/a.c -marm +$GCC -fPIC -mtls-dialect=gnu2 -c -o $t/d.o $t/b.c -marm + +$CC -B. -o $t/exe1 $t/c.o $t/d.o +$QEMU $t/exe1 | grep -q '42 5' + +$CC -B. -o $t/exe2 $t/c.o $t/d.o -Wl,-no-relax +$QEMU $t/exe2 | grep -q '42 5' + +$CC -B. -o $t/exe3 $t/c.o $t/d.o -Wl,-no-relax \ + -Wl,--section-start=.low=0x10000000,--section-start=.high=0x20000000 +$QEMU $t/exe3 | grep -q '42 5' + +$GCC -fPIC -mtls-dialect=gnu2 -c -o $t/e.o $t/a.c -mthumb +$GCC -fPIC -mtls-dialect=gnu2 -c -o $t/f.o $t/b.c -mthumb + +$CC -B. -o $t/exe4 $t/e.o $t/f.o +$QEMU $t/exe4 | grep -q '42 5' + +$CC -B. -o $t/exe5 $t/e.o $t/f.o -Wl,-no-relax +$QEMU $t/exe5 | grep -q '42 5' + +$CC -B. -o $t/exe6 $t/e.o $t/f.o -Wl,-no-relax \ + -Wl,--section-start=.low=0x10000000,--section-start=.high=0x20000000 +$QEMU $t/exe6 | grep -q '42 5' diff --git a/third_party/mold/test/elf/as-needed-dso.sh b/third_party/mold/test/elf/as-needed-dso.sh new file mode 100755 index 00000000000..6a3134b47fe --- /dev/null +++ b/third_party/mold/test/elf/as-needed-dso.sh @@ -0,0 +1,25 @@ +// clang-format off +#!/bin/bash +. $(dirname $0)/common.inc + +cat < $t/log1 +! grep -q libfoo $t/log1 || false + +$CC -B. -o $t/exe2 $t/a.o -L$t -Wl,--as-needed -lbar -lfoo +readelf -W --dynamic $t/exe2 > $t/log2 +grep -q libfoo $t/log2 diff --git a/third_party/mold/test/elf/as-needed-weak.sh b/third_party/mold/test/elf/as-needed-weak.sh new file mode 100755 index 00000000000..b6aa11320df --- /dev/null +++ b/third_party/mold/test/elf/as-needed-weak.sh @@ -0,0 +1,32 @@ +// clang-format off +#!/bin/bash +. $(dirname $0)/common.inc + +cat < $t/log1 +grep -Fq 'Shared library: [libfoo.so]' $t/log1 +grep -Fq 'Shared library: [libbar.so]' $t/log1 + +$CC -o $t/exe2 $t/a.o -Wl,-as-needed -L$t -lbar -lfoo + +readelf --dynamic $t/exe2 > $t/log2 +! grep -Fq 'Shared library: [libfoo.so]' $t/log2 || false +! grep -Fq 'Shared library: [libbar.so]' $t/log2 || false diff --git a/third_party/mold/test/elf/as-needed.sh b/third_party/mold/test/elf/as-needed.sh new file mode 100755 index 00000000000..3d9e8911b58 --- /dev/null +++ b/third_party/mold/test/elf/as-needed.sh @@ -0,0 +1,30 @@ +// clang-format off +#!/bin/bash +. $(dirname $0)/common.inc + +cat < $t/readelf +grep -Fq 'Shared library: [libfoo.so]' $t/readelf +grep -Fq 'Shared library: [libbar.so]' $t/readelf + +$CC -B. -o $t/exe $t/a.o -Wl,--as-needed $t/b.so $t/c.so + +readelf --dynamic $t/exe > $t/readelf +grep -Fq 'Shared library: [libfoo.so]' $t/readelf +! grep -Fq 'Shared library: [libbar.so]' $t/readelf || false diff --git a/third_party/mold/test/elf/as-needed2.sh b/third_party/mold/test/elf/as-needed2.sh new file mode 100755 index 00000000000..9e046927781 --- /dev/null +++ b/third_party/mold/test/elf/as-needed2.sh @@ -0,0 +1,39 @@ +// clang-format off +#!/bin/bash +. $(dirname $0)/common.inc + +cat < $t/log +grep -q libbaz $t/log || false +! grep -q libbar $t/log || false +grep -q libfoo $t/log || false diff --git a/third_party/mold/test/elf/auxiliary.sh b/third_party/mold/test/elf/auxiliary.sh new file mode 100755 index 00000000000..366e5a82251 --- /dev/null +++ b/third_party/mold/test/elf/auxiliary.sh @@ -0,0 +1,16 @@ +// clang-format off +#!/bin/bash +. $(dirname $0)/common.inc + +cat < $t/log +grep -Fq 'Auxiliary library: [foo]' $t/log +grep -Fq 'Auxiliary library: [bar]' $t/log diff --git a/third_party/mold/test/elf/bno-symbolic.sh b/third_party/mold/test/elf/bno-symbolic.sh new file mode 100755 index 00000000000..5daae2ca8d2 --- /dev/null +++ b/third_party/mold/test/elf/bno-symbolic.sh @@ -0,0 +1,43 @@ +// clang-format off +#!/bin/bash +. $(dirname $0)/common.inc + +# GCC produces buggy code for this test case on s390x. +# https://sourceware.org/bugzilla/show_bug.cgi?id=29655 +[ $MACHINE = s390x ] && $CC -v 2>&1 | grep -E '^gcc version 1[0-3]\.' && skip + +cat < $t/a.c + +$CC -B. -o $t/exe $t/a.c -Wl,-build-id +readelf -n $t/exe | grep -qv 'GNU.*0x00000010.*NT_GNU_BUILD_ID' + +$CC -B. -o $t/exe $t/a.c -Wl,-build-id=uuid +readelf -nW $t/exe | + grep -Eq 'GNU.*0x00000010.*NT_GNU_BUILD_ID.*Build ID: ............4...[89abcdef]' + +$CC -B. -o $t/exe $t/a.c -Wl,-build-id=md5 +readelf -n $t/exe | grep -q 'GNU.*0x00000010.*NT_GNU_BUILD_ID' + +$CC -B. -o $t/exe $t/a.c -Wl,-build-id=sha1 +readelf -n $t/exe | grep -q 'GNU.*0x00000014.*NT_GNU_BUILD_ID' + +$CC -B. -o $t/exe $t/a.c -Wl,-build-id=sha256 +readelf -n $t/exe | grep -q 'GNU.*0x00000020.*NT_GNU_BUILD_ID' + +$CC -B. -o $t/exe $t/a.c -Wl,-build-id=0xdeadbeefdeadbeef +readelf -n $t/exe | grep -q 'Build ID: deadbeefdeadbeef' diff --git a/third_party/mold/test/elf/canonical-plt.sh b/third_party/mold/test/elf/canonical-plt.sh new file mode 100755 index 00000000000..e4b08ee08ae --- /dev/null +++ b/third_party/mold/test/elf/canonical-plt.sh @@ -0,0 +1,46 @@ +// clang-format off +#!/bin/bash +. $(dirname $0)/common.inc + +# GCC produces buggy code for this test case on s390x. +# https://sourceware.org/bugzilla/show_bug.cgi?id=29655 +[ $MACHINE = s390x ] && $CC -v 2>&1 | grep -E '^gcc version 1[0-3]\.' && skip + +cat <&1 | grep -q 'unknown command line option: -zfoo' +{ ./mold -z foo || true; } 2>&1 | grep -q 'unknown command line option: -z foo' +{ ./mold -abcdefg || true; } 2>&1 | grep -q 'unknown command line option: -abcdefg' +{ ./mold --abcdefg || true; } 2>&1 | grep -q 'unknown command line option: --abcdefg' diff --git a/third_party/mold/test/elf/color-diagnostics.sh b/third_party/mold/test/elf/color-diagnostics.sh new file mode 100755 index 00000000000..5c7d03b93b0 --- /dev/null +++ b/third_party/mold/test/elf/color-diagnostics.sh @@ -0,0 +1,20 @@ +// clang-format off +#!/bin/bash +. $(dirname $0)/common.inc + +cat < $t/log +! grep -q $'\033' $t/log || false + +! ./mold -o $t/exe $t/a.o --color-diagnostics=always 2> $t/log +grep -q $'\033' $t/log + +! ./mold -o $t/exe $t/a.o --color-diagnostics=never 2> $t/log +! grep -q $'\033' $t/log || false + +! ./mold -o $t/exe $t/a.o --color-diagnostics=auto 2> $t/log +! grep -q $'\033' $t/log || false diff --git a/third_party/mold/test/elf/comment.sh b/third_party/mold/test/elf/comment.sh new file mode 100755 index 00000000000..f244182bf42 --- /dev/null +++ b/third_party/mold/test/elf/comment.sh @@ -0,0 +1,11 @@ +// clang-format off +#!/bin/bash +. $(dirname $0)/common.inc + +cat <& /dev/null +} + +supports_ifunc() { + echo 'void x() __attribute__((ifunc("y"))); void *y() { return 0; }' | \ + $CC -c -o /dev/null -xc - >& /dev/null +} + +skip() { + echo skipped + trap - EXIT + exit 0 +} + +on_error() { + code=$? + echo "command failed: $1: $BASH_COMMAND" + trap - EXIT + exit $code +} + +on_exit() { + echo OK + exit 0 +} + +trap 'on_error $LINENO' ERR +trap on_exit EXIT + +# Print out the startup message +testname=$(basename "$0" .sh) +echo -n "Testing $testname ... " +t=$TESTDIR/$testname +mkdir -p $t diff --git a/third_party/mold/test/elf/common.sh b/third_party/mold/test/elf/common.sh new file mode 100755 index 00000000000..9490afcb0c3 --- /dev/null +++ b/third_party/mold/test/elf/common.sh @@ -0,0 +1,33 @@ +// clang-format off +#!/bin/bash +. $(dirname $0)/common.inc + +cat < $t/log +grep -q '.common .*NOBITS' $t/log diff --git a/third_party/mold/test/elf/compress-debug-sections-zstd.sh b/third_party/mold/test/elf/compress-debug-sections-zstd.sh new file mode 100755 index 00000000000..2e89d778948 --- /dev/null +++ b/third_party/mold/test/elf/compress-debug-sections-zstd.sh @@ -0,0 +1,29 @@ +// clang-format off +#!/bin/bash +. $(dirname $0)/common.inc + +# arm-linux-gnueabihf-objcopy crashes on x86-64 +[ $MACHINE = arm ] && skip +[ $MACHINE = riscv32 ] && skip + +command -v zstdcat >& /dev/null || skip + +cat < /dev/null diff --git a/third_party/mold/test/elf/compress-debug-sections.sh b/third_party/mold/test/elf/compress-debug-sections.sh new file mode 100755 index 00000000000..88c081790fa --- /dev/null +++ b/third_party/mold/test/elf/compress-debug-sections.sh @@ -0,0 +1,25 @@ +// clang-format off +#!/bin/bash +. $(dirname $0)/common.inc + +command -v dwarfdump >& /dev/null || skip + +cat < $t/log +grep -Fq '.debug_info SHF_COMPRESSED' $t/log +grep -Fq '.debug_str SHF_COMPRESSED' $t/log diff --git a/third_party/mold/test/elf/compressed-debug-info.sh b/third_party/mold/test/elf/compressed-debug-info.sh new file mode 100755 index 00000000000..058d2244409 --- /dev/null +++ b/third_party/mold/test/elf/compressed-debug-info.sh @@ -0,0 +1,21 @@ +// clang-format off +#!/bin/bash +. $(dirname $0)/common.inc + +command -v dwarfdump >& /dev/null || skip + +cat < /dev/null +readelf --sections $t/exe | grep -Fq .debug_info diff --git a/third_party/mold/test/elf/copyrel-alignment.sh b/third_party/mold/test/elf/copyrel-alignment.sh new file mode 100755 index 00000000000..ad367400088 --- /dev/null +++ b/third_party/mold/test/elf/copyrel-alignment.sh @@ -0,0 +1,43 @@ +// clang-format off +#!/bin/bash +. $(dirname $0)/common.inc + +[ $MACHINE = ppc64 ] && skip +[ $MACHINE = ppc64le ] && skip +[ $MACHINE = alpha ] && skip + +cat < /dev/null +readelf -W --sections $t/exe1 | grep -q '\.copyrel.* 32$' + +$CC -B. -o $t/exe2 $t/d.o $t/b.so -no-pie +$QEMU $t/exe2 > /dev/null +readelf -W --sections $t/exe2 | grep -q '\.copyrel.* 8$' + +$CC -B. -o $t/exe3 $t/d.o $t/c.so -no-pie +$QEMU $t/exe3 > /dev/null +readelf -W --sections $t/exe3 | grep -q '\.copyrel.* 256$' diff --git a/third_party/mold/test/elf/copyrel-protected.sh b/third_party/mold/test/elf/copyrel-protected.sh new file mode 100755 index 00000000000..c3961d02bf8 --- /dev/null +++ b/third_party/mold/test/elf/copyrel-protected.sh @@ -0,0 +1,22 @@ +// clang-format off +#!/bin/bash +. $(dirname $0)/common.inc + +[ $MACHINE = ppc64 ] && skip +[ $MACHINE = ppc64le ] && skip +[ $MACHINE = alpha ] && skip + +cat <& $t/log -no-pie || false +grep -Fq 'cannot make copy relocation for protected symbol' $t/log diff --git a/third_party/mold/test/elf/copyrel-relro.sh b/third_party/mold/test/elf/copyrel-relro.sh new file mode 100755 index 00000000000..9ba5788e5d1 --- /dev/null +++ b/third_party/mold/test/elf/copyrel-relro.sh @@ -0,0 +1,53 @@ +// clang-format off +#!/bin/bash +. $(dirname $0)/common.inc + +cat <& /dev/null || skip + +cat < /dev/null diff --git a/third_party/mold/test/elf/debug-macro-section.sh b/third_party/mold/test/elf/debug-macro-section.sh new file mode 100755 index 00000000000..05ab50a3719 --- /dev/null +++ b/third_party/mold/test/elf/debug-macro-section.sh @@ -0,0 +1,23 @@ +// clang-format off +#!/bin/bash +. $(dirname $0)/common.inc + +cat < $t/a.h +#define A 23 +#define B 99 +EOF + +cat < $t/log +! grep 'DW_MACRO_import -.* 0x0$' $t/log || false diff --git a/third_party/mold/test/elf/default-symver.sh b/third_party/mold/test/elf/default-symver.sh new file mode 100755 index 00000000000..7e644ce2c3e --- /dev/null +++ b/third_party/mold/test/elf/default-symver.sh @@ -0,0 +1,14 @@ +// clang-format off +#!/bin/bash +. $(dirname $0)/common.inc + +cat <& /dev/null \ + || skip + +cat < $t/log || false + +grep -Fq ' as core::iter::iterator::Iterator>::rposition::::{closure#0}' $t/log diff --git a/third_party/mold/test/elf/demangle.sh b/third_party/mold/test/elf/demangle.sh new file mode 100755 index 00000000000..69d48c3ade0 --- /dev/null +++ b/third_party/mold/test/elf/demangle.sh @@ -0,0 +1,29 @@ +// clang-format off +#!/bin/bash +. $(dirname $0)/common.inc + +cat < $t/log || false +grep -q 'undefined symbol: _Z3fooii$' $t/log + +! $CC -B. -o $t/exe $t/a.o -Wl,-demangle 2> $t/log || false +grep -Eq 'undefined symbol: foo\(int, int\)$' $t/log + +! $CC -B. -o $t/exe $t/a.o 2> $t/log || false +grep -Eq 'undefined symbol: foo\(int, int\)$' $t/log + +cat < $t/log || false +grep -q 'undefined symbol: Pi$' $t/log diff --git a/third_party/mold/test/elf/dependency-file.sh b/third_party/mold/test/elf/dependency-file.sh new file mode 100755 index 00000000000..742104fefa2 --- /dev/null +++ b/third_party/mold/test/elf/dependency-file.sh @@ -0,0 +1,21 @@ +// clang-format off +#!/bin/bash +. $(dirname $0)/common.inc + +cat < $t/log +grep -Fq _start $t/log +grep -Fq foo $t/log +grep -Fq .Lbar $t/log + +./mold -o $t/exe $t/a.o --discard-locals +readelf --symbols $t/exe > $t/log +grep -Fq _start $t/log +grep -Fq foo $t/log +! grep -Fq .Lbar $t/log || false + +./mold -o $t/exe $t/a.o --discard-all +readelf --symbols $t/exe > $t/log +grep -Fq _start $t/log +! grep -Fq foo $t/log || false +! grep -Fq .Lbar $t/log || false + +./mold -o $t/exe $t/a.o --strip-all +readelf --symbols $t/exe > $t/log +! grep -Fq _start $t/log || false +! grep -Fq foo $t/log || false +! grep -Fq .Lbar $t/log || false diff --git a/third_party/mold/test/elf/dso-undef.sh b/third_party/mold/test/elf/dso-undef.sh new file mode 100755 index 00000000000..73aa9ef918f --- /dev/null +++ b/third_party/mold/test/elf/dso-undef.sh @@ -0,0 +1,28 @@ +// clang-format off +#!/bin/bash +. $(dirname $0)/common.inc + +cat <&1 | grep -q musl && skip + +cat < $t/log1 +$QEMU $t/exe2 > $t/log2 + +! grep -q init $t/log1 || false +! grep -q fini $t/log1 || false + +grep -q init $t/log2 +grep -q fini $t/log2 diff --git a/third_party/mold/test/elf/dt-needed.sh b/third_party/mold/test/elf/dt-needed.sh new file mode 100755 index 00000000000..ad46eb71fbc --- /dev/null +++ b/third_party/mold/test/elf/dt-needed.sh @@ -0,0 +1,27 @@ +// clang-format off +#!/bin/bash +. $(dirname $0)/common.inc + +cat < $t/log || false +grep -q 'duplicate symbol: .*\.o: .*\.o: main' $t/log diff --git a/third_party/mold/test/elf/dynamic-dt-debug.sh b/third_party/mold/test/elf/dynamic-dt-debug.sh new file mode 100755 index 00000000000..586d6524949 --- /dev/null +++ b/third_party/mold/test/elf/dynamic-dt-debug.sh @@ -0,0 +1,19 @@ +// clang-format off +#!/bin/bash +. $(dirname $0)/common.inc + +cat < $t/log +grep -Fq '(DEBUG)' $t/log + +cat < $t/log +! grep -Fq '(DEBUG)' $t/log || false diff --git a/third_party/mold/test/elf/dynamic-linker.sh b/third_party/mold/test/elf/dynamic-linker.sh new file mode 100755 index 00000000000..058981a39ce --- /dev/null +++ b/third_party/mold/test/elf/dynamic-linker.sh @@ -0,0 +1,20 @@ +// clang-format off +#!/bin/bash +. $(dirname $0)/common.inc + +cat < $t/log +! grep -Fq .interp $t/log || false + +readelf --dynamic $t/exe > $t/log + +./mold -o $t/exe $t/a.o --dynamic-linker=/foo/bar + +readelf --sections $t/exe > $t/log +grep -Fq .interp $t/log diff --git a/third_party/mold/test/elf/dynamic-list.sh b/third_party/mold/test/elf/dynamic-list.sh new file mode 100755 index 00000000000..0921000bbdc --- /dev/null +++ b/third_party/mold/test/elf/dynamic-list.sh @@ -0,0 +1,37 @@ +// clang-format off +#!/bin/bash +. $(dirname $0)/common.inc + +cat < $t/log +! grep -q ' foo$' $t/log || false +! grep -q ' bar$' $t/log || false + +cat < $t/dyn +{ foo; bar; }; +EOF + +$CC -B. -o $t/exe1 $t/a.o -Wl,-dynamic-list=$t/dyn + +readelf --dyn-syms $t/exe1 > $t/log1 +grep -q ' foo$' $t/log1 +grep -q ' bar$' $t/log1 + +$CC -B. -o $t/exe2 $t/a.o -Wl,--export-dynamic-symbol-list=$t/dyn + +readelf --dyn-syms $t/exe2 > $t/log2 +grep -q ' foo$' $t/log2 +grep -q ' bar$' $t/log2 + +$CC -B. -o $t/exe3 $t/a.o -Wl,--export-dynamic-symbol=foo,--export-dynamic-symbol=bar + +readelf --dyn-syms $t/exe3 > $t/log3 +grep -q ' foo$' $t/log3 +grep -q ' bar$' $t/log3 diff --git a/third_party/mold/test/elf/dynamic-list2.sh b/third_party/mold/test/elf/dynamic-list2.sh new file mode 100755 index 00000000000..ed0d8275d60 --- /dev/null +++ b/third_party/mold/test/elf/dynamic-list2.sh @@ -0,0 +1,37 @@ +// clang-format off +#!/bin/bash +. $(dirname $0)/common.inc + +cat < $t/log +! grep -q ' foo$' $t/log || false +! grep -q ' bar$' $t/log || false + +cat < $t/dyn +{ foo; extern "C++" { "baz(int)"; }; }; +EOF + +$CC -B. -o $t/exe1 $t/a.o $t/b.o -Wl,-dynamic-list=$t/dyn + +readelf --dyn-syms $t/exe1 > $t/log1 +grep -q ' foo$' $t/log1 +! grep -q ' bar$' $t/log1 || false +grep -q ' _Z3bazi$' $t/log1 + +$CC -B. -o $t/exe2 $t/a.o $t/b.o -Wl,--export-dynamic-symbol-list=$t/dyn + +readelf --dyn-syms $t/exe2 > $t/log2 +grep -q ' foo$' $t/log2 +! grep -q ' bar$' $t/log2 || false +grep -q ' _Z3bazi$' $t/log2 diff --git a/third_party/mold/test/elf/dynamic-list3.sh b/third_party/mold/test/elf/dynamic-list3.sh new file mode 100755 index 00000000000..ced0a7b79bc --- /dev/null +++ b/third_party/mold/test/elf/dynamic-list3.sh @@ -0,0 +1,40 @@ +// clang-format off +#!/bin/bash +. $(dirname $0)/common.inc + +cat <<'EOF' > $t/dyn +{ + xyz; + foo*bar*[abc]x; +}; +EOF + +cat < $t/log1 +grep -q ' xyz$' $t/log1 +! grep -q ' foobarzx$' $t/log1 || false +grep -q ' foobarcx$' $t/log1 +grep -q ' foo123bar456bx$' $t/log1 +! grep -q ' foo123bar456c$' $t/log1 || false +! grep -q ' foo123bar456x$' $t/log1 || false + +$CC -B. -Wl,--export-dynamic-symbol-list=$t/dyn -o $t/exe2 $t/b.o + +readelf --dyn-syms $t/exe2 > $t/log2 +grep -q ' xyz$' $t/log2 +! grep -q ' foobarzx$' $t/log2 || false +grep -q ' foobarcx$' $t/log2 +grep -q ' foo123bar456bx$' $t/log2 +! grep -q ' foo123bar456c$' $t/log2 || false +! grep -q ' foo123bar456x$' $t/log2 || false diff --git a/third_party/mold/test/elf/dynamic.sh b/third_party/mold/test/elf/dynamic.sh new file mode 100755 index 00000000000..7a79d230495 --- /dev/null +++ b/third_party/mold/test/elf/dynamic.sh @@ -0,0 +1,27 @@ +// clang-format off +#!/bin/bash +. $(dirname $0)/common.inc + +echo '.globl main; main:' | $CC -o $t/a.o -c -x assembler - + +$CC -B. -o $t/exe $t/a.o + +readelf --dynamic $t/exe > $t/log +grep -Eq 'Shared library:.*\blibc\b' $t/log + +readelf -W --dyn-syms --use-dynamic $t/exe > $t/log2 +grep -Eq 'FUNC\s+GLOBAL\s+DEFAULT.*UND\s+__libc_start_main' $t/log2 + +cat < $t/log || false +grep -q 'unknown file type' $t/log diff --git a/third_party/mold/test/elf/empty-version.sh b/third_party/mold/test/elf/empty-version.sh new file mode 100755 index 00000000000..1bc40a9e8e7 --- /dev/null +++ b/third_party/mold/test/elf/empty-version.sh @@ -0,0 +1,16 @@ +// clang-format off +#!/bin/bash +. $(dirname $0)/common.inc + +cat < $t/log +grep -q "Entry point address:.*0x1000$" $t/log + +$CC -B. -o $t/exe2 -Wl,-e,bar $t/a.o $t/b.o +readelf -e $t/exe2 > $t/log +grep -q "Entry point address:.*0x2000$" $t/log diff --git a/third_party/mold/test/elf/exception.sh b/third_party/mold/test/elf/exception.sh new file mode 100755 index 00000000000..536f15a6362 --- /dev/null +++ b/third_party/mold/test/elf/exception.sh @@ -0,0 +1,74 @@ +// clang-format off +#!/bin/bash +. $(dirname $0)/common.inc + +[ $MACHINE = sh4 ] && skip + +static= +test_cflags -static && static=-static + +# I don't know why, but we need -pthread on m68k +static="$static -pthread" + +cat < $t/a.cc +int main() { + try { + throw 0; + } catch (int x) { + return x; + } + return 1; +} +EOF + +$CXX -c -o $t/b.o $t/a.cc -fPIC +$CXX -c -o $t/c.o $t/a.cc -fno-PIC + +$CXX -B. -o $t/exe1 $t/b.o $static +$QEMU $t/exe1 + +$CXX -B. -o $t/exe2 $t/c.o $static +$QEMU $t/exe2 + +$CXX -B. -o $t/exe3 $t/b.o -pie +$QEMU $t/exe3 + +$CXX -B. -o $t/exe4 $t/c.o -no-pie +$QEMU $t/exe4 + +$CXX -B. -o $t/exe5 $t/b.o -pie -Wl,--gc-sections +$QEMU $t/exe5 + +$CXX -B. -o $t/exe6 $t/c.o $static -Wl,--gc-sections +$QEMU $t/exe6 + +if [ $MACHINE = x86_64 ]; then + $CXX -c -o $t/d.o $t/a.cc -mcmodel=large -fPIC + + $CXX -B. -o $t/exe7 $t/d.o $static + $QEMU $t/exe7 + + $CXX -B. -o $t/exe8 $t/d.o -pie + $QEMU $t/exe8 +fi + +if [ $MACHINE = x86_64 -o $MACHINE = aarch64 ]; then + $CXX -c -o $t/e.o $t/a.cc -mcmodel=large -fno-PIC + + $CXX -B. -o $t/exe9 $t/e.o $static + $QEMU $t/exe9 + + $CXX -B. -o $t/exe10 $t/e.o -no-pie + $QEMU $t/exe10 +fi + +# riscv64-linux-gnu-strip crashes for some reason +if [ $MACHINE != riscv32 ]; then + $CXX -B. -o $t/exe11 $t/b.o -pie + $STRIP $t/exe11 + $QEMU $t/exe11 + + $CXX -B. -o $t/exe12 $t/c.o -no-pie + $STRIP $t/exe12 + $QEMU $t/exe12 +fi diff --git a/third_party/mold/test/elf/exclude-libs.sh b/third_party/mold/test/elf/exclude-libs.sh new file mode 100755 index 00000000000..2396006f15f --- /dev/null +++ b/third_party/mold/test/elf/exclude-libs.sh @@ -0,0 +1,56 @@ +// clang-format off +#!/bin/bash +. $(dirname $0)/common.inc + +cat < $t/log +grep -Fq foo $t/log +grep -Fq bar $t/log +grep -Fq baz $t/log + +$CC -B. -shared -o $t/f.so $t/e.o $t/c.a $t/d.a -Wl,-exclude-libs=c.a +readelf --dyn-syms $t/f.so > $t/log +! grep -Fq foo $t/log || false +grep -Fq bar $t/log +grep -Fq baz $t/log + +$CC -B. -shared -o $t/f.so $t/e.o $t/c.a $t/d.a -Wl,-exclude-libs=c.a -Wl,-exclude-libs=d.a +readelf --dyn-syms $t/f.so > $t/log +! grep -Fq foo $t/log || false +! grep -Fq bar $t/log || false +grep -Fq baz $t/log + +$CC -B. -shared -o $t/f.so $t/e.o $t/c.a $t/d.a -Wl,-exclude-libs=ALL +readelf --dyn-syms $t/f.so > $t/log +! grep -Fq foo $t/log || false +! grep -Fq bar $t/log || false +grep -Fq baz $t/log diff --git a/third_party/mold/test/elf/exclude-libs2.sh b/third_party/mold/test/elf/exclude-libs2.sh new file mode 100755 index 00000000000..8fbec93bd2e --- /dev/null +++ b/third_party/mold/test/elf/exclude-libs2.sh @@ -0,0 +1,21 @@ +// clang-format off +#!/bin/bash +. $(dirname $0)/common.inc + +cat < $t/log +grep -Fq foo $t/log diff --git a/third_party/mold/test/elf/exclude-libs3.sh b/third_party/mold/test/elf/exclude-libs3.sh new file mode 100755 index 00000000000..122fbc090d2 --- /dev/null +++ b/third_party/mold/test/elf/exclude-libs3.sh @@ -0,0 +1,20 @@ +// clang-format off +#!/bin/bash +. $(dirname $0)/common.inc + +cat < $t/log +grep -Fq foo $t/log diff --git a/third_party/mold/test/elf/execstack.sh b/third_party/mold/test/elf/execstack.sh new file mode 100755 index 00000000000..545a3430f82 --- /dev/null +++ b/third_party/mold/test/elf/execstack.sh @@ -0,0 +1,16 @@ +// clang-format off +#!/bin/bash +. $(dirname $0)/common.inc + +cat < $t/log +grep -Eq 'NOTYPE\s+GLOBAL DEFAULT\s+[0-9]+ bar' $t/log +grep -Eq 'NOTYPE\s+GLOBAL DEFAULT\s+[0-9]+ _start' $t/log diff --git a/third_party/mold/test/elf/export-from-exe.sh b/third_party/mold/test/elf/export-from-exe.sh new file mode 100755 index 00000000000..103f07f32a3 --- /dev/null +++ b/third_party/mold/test/elf/export-from-exe.sh @@ -0,0 +1,28 @@ +// clang-format off +#!/bin/bash +. $(dirname $0)/common.inc + +cat < /dev/null + +! $CC -B. -o $t/exe $t/a.o $t/b.o \ + -Wl,-warn-common -Wl,-fatal-warnings 2> /dev/null || false diff --git a/third_party/mold/test/elf/filler.sh b/third_party/mold/test/elf/filler.sh new file mode 100755 index 00000000000..ea3f9056cb9 --- /dev/null +++ b/third_party/mold/test/elf/filler.sh @@ -0,0 +1,34 @@ +// clang-format off +#!/bin/bash +. $(dirname $0)/common.inc + +test_cflags -static || skip + +cat < $t/txt1 + +$CC -B. -static -Wl,--filler,0x00 -o $t/exe2 $t/a.o +od -x $t/exe2 > $t/txt2 + +diff -q $t/txt1 $t/txt2 diff --git a/third_party/mold/test/elf/filter.sh b/third_party/mold/test/elf/filter.sh new file mode 100755 index 00000000000..33bfbdef05a --- /dev/null +++ b/third_party/mold/test/elf/filter.sh @@ -0,0 +1,16 @@ +// clang-format off +#!/bin/bash +. $(dirname $0)/common.inc + +cat < $t/log +grep -Fq 'Filter library: [foo]' $t/log +grep -Fq 'Filter library: [bar]' $t/log diff --git a/third_party/mold/test/elf/func-addr.sh b/third_party/mold/test/elf/func-addr.sh new file mode 100755 index 00000000000..b92eb89dc49 --- /dev/null +++ b/third_party/mold/test/elf/func-addr.sh @@ -0,0 +1,29 @@ +// clang-format off +#!/bin/bash +. $(dirname $0)/common.inc + +cat < $t/a.cc +#include "libc/calls/calls.h" +#include "libc/calls/weirdtypes.h" +#include "libc/fmt/fmt.h" +#include "libc/stdio/dprintf.h" +#include "libc/stdio/stdio.h" +#include "libc/stdio/temp.h" +#include "third_party/musl/tempnam.h" + +int two() { return 2; } + +int live_var1 = 1; +int live_var2 = two(); +int dead_var1 = 3; +int dead_var2 = 4; + +void live_fn1() {} +void live_fn2() { live_fn1(); } +void dead_fn1() {} +void dead_fn2() { dead_fn1(); } + +int main() { + printf("%d %d\n", live_var1, live_var2); + live_fn2(); +} +EOF + +$CXX -B. -o $t/exe1 $t/a.cc -ffunction-sections -fdata-sections + +readelf --symbols $t/exe1 > $t/log.1 +grep -qv live_fn1 $t/log.1 +grep -qv live_fn2 $t/log.1 +grep -qv dead_fn1 $t/log.1 +grep -qv dead_fn2 $t/log.1 +grep -qv live_var1 $t/log.1 +grep -qv live_var2 $t/log.1 +grep -qv dead_var1 $t/log.1 +grep -qv dead_var2 $t/log.1 +$QEMU $t/exe1 | grep -q '1 2' + +$CXX -B. -o $t/exe2 $t/a.cc -ffunction-sections -fdata-sections -Wl,-gc-sections + +readelf --symbols $t/exe2 > $t/log.2 +grep -q live_fn1 $t/log.2 +grep -q live_fn2 $t/log.2 +grep -qv dead_fn1 $t/log.2 +grep -qv dead_fn2 $t/log.2 +grep -q live_var1 $t/log.2 +grep -q live_var2 $t/log.2 +grep -qv dead_var1 $t/log.2 +grep -qv dead_var2 $t/log.2 +$QEMU $t/exe2 | grep -q '1 2' diff --git a/third_party/mold/test/elf/gdb-index-compress-output.sh b/third_party/mold/test/elf/gdb-index-compress-output.sh new file mode 100755 index 00000000000..e6f8736f84f --- /dev/null +++ b/third_party/mold/test/elf/gdb-index-compress-output.sh @@ -0,0 +1,52 @@ +// clang-format off +#!/bin/bash +. $(dirname $0)/common.inc + +[ $MACHINE = $HOST ] || skip +[ $MACHINE = riscv64 -o $MACHINE = riscv32 -o $MACHINE = sparc64 ] && skip + +command -v gdb >& /dev/null || skip + +cat < /dev/null | grep -Fq .gdb_index + +cat < /dev/null | grep -Fq .gdb_index + +$QEMU $t/exe | grep -q 'Hello world' + +DEBUGINFOD_URLS= gdb $t/exe -nx -batch -ex 'b main' -ex r -ex 'b trap' \ + -ex c -ex bt -ex quit >& $t/log + +grep -q 'hello () at .*:7' $t/log +grep -q 'greet () at .*:11' $t/log +grep -q 'main () at .*:4' $t/log diff --git a/third_party/mold/test/elf/gdb-index-dwarf2.sh b/third_party/mold/test/elf/gdb-index-dwarf2.sh new file mode 100755 index 00000000000..5a7950d83d6 --- /dev/null +++ b/third_party/mold/test/elf/gdb-index-dwarf2.sh @@ -0,0 +1,63 @@ +// clang-format off +#!/bin/bash +. $(dirname $0)/common.inc + +[ $MACHINE = $HOST ] || skip +[ $MACHINE = riscv64 -o $MACHINE = riscv32 -o $MACHINE = sparc64 ] && skip + +command -v gdb >& /dev/null || skip + +echo 'int main() {}' | $CC -o /dev/null -xc -gdwarf-2 -g - >& /dev/null || skip + +cat < /dev/null | grep -Fq .gdb_index + +cat < /dev/null | grep -Fq .gdb_index + +$QEMU $t/exe | grep -q 'Hello world' + +DEBUGINFOD_URLS= gdb $t/exe -nx -batch -ex 'b main' -ex r -ex 'b trap' \ + -ex c -ex bt -ex quit >& $t/log + +grep -q 'hello2 () at .*:7' $t/log +grep -q 'hello () at .*:4' $t/log +grep -q 'greet () at .*:8' $t/log +grep -q 'main () at .*:4' $t/log diff --git a/third_party/mold/test/elf/gdb-index-dwarf3.sh b/third_party/mold/test/elf/gdb-index-dwarf3.sh new file mode 100755 index 00000000000..8ecd5657ddd --- /dev/null +++ b/third_party/mold/test/elf/gdb-index-dwarf3.sh @@ -0,0 +1,63 @@ +// clang-format off +#!/bin/bash +. $(dirname $0)/common.inc + +[ $MACHINE = $HOST ] || skip +[ $MACHINE = riscv64 -o $MACHINE = riscv32 -o $MACHINE = sparc64 ] && skip + +command -v gdb >& /dev/null || skip + +test_cflags -gdwarf-3 || skip + +cat < /dev/null | grep -Fq .gdb_index + +cat < /dev/null | grep -Fq .gdb_index + +$QEMU $t/exe | grep -q 'Hello world' + +DEBUGINFOD_URLS= gdb $t/exe -nx -batch -ex 'b main' -ex r -ex 'b trap' \ + -ex c -ex bt -ex quit >& $t/log + +grep -q 'hello2 () at .*:7' $t/log +grep -q 'hello () at .*:4' $t/log +grep -q 'greet () at .*:8' $t/log +grep -q 'main () at .*:4' $t/log diff --git a/third_party/mold/test/elf/gdb-index-dwarf4.sh b/third_party/mold/test/elf/gdb-index-dwarf4.sh new file mode 100755 index 00000000000..838918fedde --- /dev/null +++ b/third_party/mold/test/elf/gdb-index-dwarf4.sh @@ -0,0 +1,63 @@ +// clang-format off +#!/bin/bash +. $(dirname $0)/common.inc + +[ $MACHINE = $HOST ] || skip +[ $MACHINE = riscv64 -o $MACHINE = riscv32 -o $MACHINE = sparc64 ] && skip + +command -v gdb >& /dev/null || skip + +test_cflags -gdwarf-4 -g || skip + +cat < /dev/null | grep -Fq .gdb_index + +cat < /dev/null | grep -Fq .gdb_index + +$QEMU $t/exe | grep -q 'Hello world' + +DEBUGINFOD_URLS= gdb $t/exe -nx -batch -ex 'b main' -ex r -ex 'b trap' \ + -ex c -ex bt -ex quit >& $t/log + +grep -q 'hello2 () at .*:7' $t/log +grep -q 'hello () at .*:4' $t/log +grep -q 'greet () at .*:8' $t/log +grep -q 'main () at .*:4' $t/log diff --git a/third_party/mold/test/elf/gdb-index-dwarf5.sh b/third_party/mold/test/elf/gdb-index-dwarf5.sh new file mode 100755 index 00000000000..3f965fc6e08 --- /dev/null +++ b/third_party/mold/test/elf/gdb-index-dwarf5.sh @@ -0,0 +1,99 @@ +// clang-format off +#!/bin/bash +. $(dirname $0)/common.inc + +[ $MACHINE = $HOST ] || skip +[ $MACHINE = riscv64 -o $MACHINE = riscv32 -o $MACHINE = sparc64 ] && skip + +command -v gdb >& /dev/null || skip + +test_cflags -gdwarf-5 -g || skip + +cat < $t/a.c +void fn3(); + +static void fn2() { + fn3(); +} + +void fn1() { + fn2(); +} +EOF + +cat < $t/b.c +void fn5(); + +static void fn4() { + fn5(); +} + +void fn3() { + fn4(); +} +EOF + +cat < $t/c.c +void fn7(); + +static void fn6() { + fn7(); +} + +void fn5() { + fn6(); +} +EOF + +cat < $t/d.c +#include "libc/calls/calls.h" +#include "libc/calls/weirdtypes.h" +#include "libc/fmt/fmt.h" +#include "libc/stdio/dprintf.h" +#include "libc/stdio/stdio.h" +#include "libc/stdio/temp.h" +#include "third_party/musl/tempnam.h" +void trap() {} + +static void fn8() { + printf("Hello world\n"); + trap(); +} + +void fn7() { + fn8(); +} +EOF + +$CC -c -o $t/a.o $t/a.c -fPIC -g -ggnu-pubnames -gdwarf-5 -ffunction-sections +$CC -c -o $t/b.o $t/b.c -fPIC -g -ggnu-pubnames -gdwarf-4 -ffunction-sections +$CC -c -o $t/c.o $t/c.c -fPIC -g -ggnu-pubnames -gdwarf-5 +$CC -c -o $t/d.o $t/d.c -fPIC -g -ggnu-pubnames -gdwarf-5 -ffunction-sections + +$CC -B. -shared -o $t/e.so $t/a.o $t/b.o $t/c.o $t/d.o -Wl,--gdb-index +readelf -WS $t/e.so 2> /dev/null | grep -Fq .gdb_index + +cat < /dev/null | grep -Fq .gdb_index + +$QEMU $t/exe | grep -q 'Hello world' + +DEBUGINFOD_URLS= gdb $t/exe -nx -batch -ex 'b main' -ex r -ex 'b trap' \ + -ex c -ex bt -ex quit >& $t/log + +grep -q 'fn8 () at .*/d.c:6' $t/log +grep -q 'fn7 () at .*/d.c:10' $t/log +grep -q 'fn6 () at .*/c.c:4' $t/log +grep -q 'fn5 () at .*/c.c:8' $t/log +grep -q 'fn4 () at .*/b.c:4' $t/log +grep -q 'fn3 () at .*/b.c:8' $t/log +grep -q 'fn2 () at .*/a.c:4' $t/log +grep -q 'fn1 () at .*/a.c:8' $t/log diff --git a/third_party/mold/test/elf/gdb-index-empty.sh b/third_party/mold/test/elf/gdb-index-empty.sh new file mode 100755 index 00000000000..54aef65330a --- /dev/null +++ b/third_party/mold/test/elf/gdb-index-empty.sh @@ -0,0 +1,8 @@ +// clang-format off +#!/bin/bash +. $(dirname $0)/common.inc + +echo 'void _start() {}' | $CC -c -o $t/a.o -xc - +./mold -o $t/exe $t/a.o -gdb-index +readelf -WS $t/exe > $t/log +! grep -Fq .gdb_index $t/log || false diff --git a/third_party/mold/test/elf/gdb-index-split-dwarf.sh b/third_party/mold/test/elf/gdb-index-split-dwarf.sh new file mode 100755 index 00000000000..de0bb200a51 --- /dev/null +++ b/third_party/mold/test/elf/gdb-index-split-dwarf.sh @@ -0,0 +1,87 @@ +// clang-format off +#!/bin/bash +. $(dirname $0)/common.inc + +[ $MACHINE = $HOST ] || skip +[ $MACHINE = riscv64 -o $MACHINE = riscv32 -o $MACHINE = sparc64 ] && skip + +command -v gdb >& /dev/null || skip + +test_cflags -gdwarf-5 -g || skip + +cat < $t/a.c +void fn3(); + +static void fn2() { + fn3(); +} + +void fn1() { + fn2(); +} +EOF + +cat < $t/b.c +void fn5(); + +static void fn4() { + fn5(); +} + +void fn3() { + fn4(); +} +EOF + +cat < $t/c.c +void fn7(); + +static void fn6() { + fn7(); +} + +void fn5() { + fn6(); +} +EOF + +cat < $t/d.c +#include "libc/calls/calls.h" +#include "libc/calls/weirdtypes.h" +#include "libc/fmt/fmt.h" +#include "libc/stdio/dprintf.h" +#include "libc/stdio/stdio.h" +#include "libc/stdio/temp.h" +#include "third_party/musl/tempnam.h" +void trap() {} + +static void fn8() { + printf("Hello world\n"); + trap(); +} + +void fn7() { + fn8(); +} +EOF + +$CC -c -o $t/a.o $t/a.c -fPIC -g -ggnu-pubnames -gdwarf-5 -gsplit-dwarf +$CC -c -o $t/b.o $t/b.c -fPIC -g -ggnu-pubnames -gdwarf-4 -gsplit-dwarf +$CC -c -o $t/c.o $t/c.c -fPIC -g -ggnu-pubnames -gdwarf-5 +$CC -c -o $t/d.o $t/d.c -fPIC -g -ggnu-pubnames -gdwarf-5 -gsplit-dwarf + +$CC -B. -shared -o $t/e.so $t/a.o $t/b.o $t/c.o $t/d.o -Wl,--gdb-index +readelf -WS $t/e.so 2> /dev/null | grep -Fq .gdb_index + +cat < /dev/null | grep -Fq .gdb_index + +$QEMU $t/exe | grep -q 'Hello world' diff --git a/third_party/mold/test/elf/glibc-2.22-bug.sh b/third_party/mold/test/elf/glibc-2.22-bug.sh new file mode 100755 index 00000000000..82317dc1d0b --- /dev/null +++ b/third_party/mold/test/elf/glibc-2.22-bug.sh @@ -0,0 +1,26 @@ +// clang-format off +#!/bin/bash +. $(dirname $0)/common.inc + +[ $MACHINE = alpha ] && skip + +# glibc 2.22 or prior have a bug that ld-linux.so.2 crashes on dlopen() +# if .rela.dyn and .rela.plt are not contiguous in a given DSO. +# This test verifies that these sections are contiguous in mold's output. + +cat < /dev/null +GOT_ADDR=$($QEMU $t/exe) + +# _GLOBAL_OFFSET_TABLE_ refers the end of .got only on x86. +# We assume .got is followed by .gotplt. +if [ $MACHINE = x86_64 -o $MACHINE = i386 ]; then + readelf -WS $t/exe | grep -q "\.got\.plt .*$GOT_ADDR " +else + readelf -WS $t/exe | grep -q "\.got .*$GOT_ADDR " +fi diff --git a/third_party/mold/test/elf/gnu-hash.sh b/third_party/mold/test/elf/gnu-hash.sh new file mode 100755 index 00000000000..d9348097c13 --- /dev/null +++ b/third_party/mold/test/elf/gnu-hash.sh @@ -0,0 +1,11 @@ +// clang-format off +#!/bin/bash +. $(dirname $0)/common.inc + +cat <& /dev/null || skip + +cat < $t/log +! grep -Fq ' .hash' $t/log || false +! grep -Fq ' .gnu.hash' $t/log || false diff --git a/third_party/mold/test/elf/hello-dynamic.sh b/third_party/mold/test/elf/hello-dynamic.sh new file mode 100755 index 00000000000..c366408a392 --- /dev/null +++ b/third_party/mold/test/elf/hello-dynamic.sh @@ -0,0 +1,19 @@ +// clang-format off +#!/bin/bash +. $(dirname $0)/common.inc + +cat <& $t/log +grep -q 'undefined symbol: foo' $t/log diff --git a/third_party/mold/test/elf/hidden-weak-undef.sh b/third_party/mold/test/elf/hidden-weak-undef.sh new file mode 100755 index 00000000000..82d7df532d7 --- /dev/null +++ b/third_party/mold/test/elf/hidden-weak-undef.sh @@ -0,0 +1,14 @@ +// clang-format off +#!/bin/bash +. $(dirname $0)/common.inc + +cat < $t/log +! grep -qw foo $t/log || false +grep -qw bar $t/log diff --git a/third_party/mold/test/elf/i386_tls-module-base.sh b/third_party/mold/test/elf/i386_tls-module-base.sh new file mode 100755 index 00000000000..4e70bd3bbba --- /dev/null +++ b/third_party/mold/test/elf/i386_tls-module-base.sh @@ -0,0 +1,56 @@ +// clang-format off +#!/bin/bash +. $(dirname $0)/common.inc + +[ $MACHINE = i386 ] || skip + +cat <<'EOF' | $CC -fPIC -o $t/a.o -c -xassembler - +.globl get_foo +.type get_foo, @function +get_foo: + push %ebx + call __x86.get_pc_thunk.bx +1: + addl $_GLOBAL_OFFSET_TABLE_ - 1b, %ebx + lea _TLS_MODULE_BASE_@TLSDESC(%ebx), %eax + call *_TLS_MODULE_BASE_@TLSCALL(%eax) + lea foo@dtpoff(%eax), %eax + mov %gs:(%eax), %eax + pop %ebx + ret +.section .tdata, "awT", @progbits +foo: +.long 20 +.section .note.GNU-stack, "", @progbits +EOF + +cat <: 10).*foobar' + +$CC -B. -o $t/exe $t/a.o $t/c.so +$QEMU $t/exe | grep -q 'Hello world' diff --git a/third_party/mold/test/elf/ifunc-dynamic.sh b/third_party/mold/test/elf/ifunc-dynamic.sh new file mode 100755 index 00000000000..b6f98d9d41d --- /dev/null +++ b/third_party/mold/test/elf/ifunc-dynamic.sh @@ -0,0 +1,38 @@ +// clang-format off +#!/bin/bash +. $(dirname $0)/common.inc + +supports_ifunc || skip + +cat <: 10)\s+GLOBAL DEFAULT.* foobar' diff --git a/third_party/mold/test/elf/ifunc-funcptr.sh b/third_party/mold/test/elf/ifunc-funcptr.sh new file mode 100755 index 00000000000..cc312035014 --- /dev/null +++ b/third_party/mold/test/elf/ifunc-funcptr.sh @@ -0,0 +1,44 @@ +// clang-format off +#!/bin/bash +. $(dirname $0)/common.inc + +supports_ifunc || skip + +cat <&1 | grep -q musl && skip + +cat <<'EOF' | $CC -c -o $t/a.o -xc - +#include "libc/calls/calls.h" +#include "libc/calls/weirdtypes.h" +#include "libc/fmt/fmt.h" +#include "libc/stdio/dprintf.h" +#include "libc/stdio/stdio.h" +#include "libc/stdio/temp.h" +#include "third_party/musl/tempnam.h" +__attribute__((constructor(10000))) void init4() { printf("1"); } +EOF + +cat <<'EOF' | $CC -c -o $t/b.o -xc - +#include "libc/calls/calls.h" +#include "libc/calls/weirdtypes.h" +#include "libc/fmt/fmt.h" +#include "libc/stdio/dprintf.h" +#include "libc/stdio/stdio.h" +#include "libc/stdio/temp.h" +#include "third_party/musl/tempnam.h" +__attribute__((constructor(1000))) void init3() { printf("2"); } +EOF + +cat <<'EOF' | $CC -c -o $t/c.o -xc - +#include "libc/calls/calls.h" +#include "libc/calls/weirdtypes.h" +#include "libc/fmt/fmt.h" +#include "libc/stdio/dprintf.h" +#include "libc/stdio/stdio.h" +#include "libc/stdio/temp.h" +#include "third_party/musl/tempnam.h" +__attribute__((constructor)) void init1() { printf("3"); } +EOF + +cat <<'EOF' | $CC -c -o $t/d.o -xc - +#include "libc/calls/calls.h" +#include "libc/calls/weirdtypes.h" +#include "libc/fmt/fmt.h" +#include "libc/stdio/dprintf.h" +#include "libc/stdio/stdio.h" +#include "libc/stdio/temp.h" +#include "third_party/musl/tempnam.h" +__attribute__((constructor)) void init2() { printf("4"); } +EOF + +cat <<'EOF' | $CC -c -o $t/e.o -xc - +#include "libc/calls/calls.h" +#include "libc/calls/weirdtypes.h" +#include "libc/fmt/fmt.h" +#include "libc/stdio/dprintf.h" +#include "libc/stdio/stdio.h" +#include "libc/stdio/temp.h" +#include "third_party/musl/tempnam.h" +__attribute__((destructor(10000))) void fini4() { printf("5"); } +EOF + +cat <<'EOF' | $CC -c -o $t/f.o -xc - +#include "libc/calls/calls.h" +#include "libc/calls/weirdtypes.h" +#include "libc/fmt/fmt.h" +#include "libc/stdio/dprintf.h" +#include "libc/stdio/stdio.h" +#include "libc/stdio/temp.h" +#include "third_party/musl/tempnam.h" +__attribute__((destructor(1000))) void fini3() { printf("6"); } +EOF + +cat <<'EOF' | $CC -c -o $t/g.o -xc - +#include "libc/calls/calls.h" +#include "libc/calls/weirdtypes.h" +#include "libc/fmt/fmt.h" +#include "libc/stdio/dprintf.h" +#include "libc/stdio/stdio.h" +#include "libc/stdio/temp.h" +#include "third_party/musl/tempnam.h" +__attribute__((destructor)) void fini1() { printf("7"); } +EOF + +cat <<'EOF' | $CC -c -o $t/h.o -xc - +#include "libc/calls/calls.h" +#include "libc/calls/weirdtypes.h" +#include "libc/fmt/fmt.h" +#include "libc/stdio/dprintf.h" +#include "libc/stdio/stdio.h" +#include "libc/stdio/temp.h" +#include "third_party/musl/tempnam.h" +__attribute__((destructor)) void fini2() { printf("8"); } +EOF + +cat < $t/log +! grep -Fq '(INIT)' $t/log || false diff --git a/third_party/mold/test/elf/init.sh b/third_party/mold/test/elf/init.sh new file mode 100755 index 00000000000..14327b69877 --- /dev/null +++ b/third_party/mold/test/elf/init.sh @@ -0,0 +1,15 @@ +// clang-format off +#!/bin/bash +. $(dirname $0)/common.inc + +cat < $t/log +! grep -Fq '(INIT)' $t/log || false diff --git a/third_party/mold/test/elf/initfirst.sh b/third_party/mold/test/elf/initfirst.sh new file mode 100755 index 00000000000..1bc653ece4f --- /dev/null +++ b/third_party/mold/test/elf/initfirst.sh @@ -0,0 +1,20 @@ +// clang-format off +#!/bin/bash +. $(dirname $0)/common.inc + +cat < $t/b.ver + +! $CC -B. -shared -o $t/c.so -Wl,-version-script,$t/b.ver \ + $t/a.o >& $t/log || false +grep -q 'invalid version pattern' $t/log diff --git a/third_party/mold/test/elf/issue646.sh b/third_party/mold/test/elf/issue646.sh new file mode 100755 index 00000000000..b5ce545426e --- /dev/null +++ b/third_party/mold/test/elf/issue646.sh @@ -0,0 +1,30 @@ +// clang-format off +#!/bin/bash +. $(dirname $0)/common.inc + +[ $MACHINE = sh4 ] && skip + +cat <& /dev/null || skip + +cat < $t/script +bar = foo; +EOF + +$CC -B. -o $t/b.so -shared $t/script $t/a.o +readelf -sW $t/b.so | grep -q 'FUNC .* bar' + +cat < $t/c.script + +./mold --relocatable -o $t/d.o $t/c.script + +$CC -B. -o $t/exe $t/d.o +$QEMU $t/exe | grep -q Hello diff --git a/third_party/mold/test/elf/linker-script.sh b/third_party/mold/test/elf/linker-script.sh new file mode 100755 index 00000000000..7090b0830a1 --- /dev/null +++ b/third_party/mold/test/elf/linker-script.sh @@ -0,0 +1,29 @@ +// clang-format off +#!/bin/bash +. $(dirname $0)/common.inc + +cat < $t/script +GROUP("$t/a.o") +EOF + +$CC -B. -o $t/exe $t/script +$QEMU $t/exe | grep -q 'Hello world' + +$CC -B. -o $t/exe -Wl,-T,$t/script +$QEMU $t/exe | grep -q 'Hello world' + +$CC -B. -o $t/exe -Wl,--script,$t/script +$QEMU $t/exe | grep -q 'Hello world' diff --git a/third_party/mold/test/elf/linker-script2.sh b/third_party/mold/test/elf/linker-script2.sh new file mode 100755 index 00000000000..14bea38604c --- /dev/null +++ b/third_party/mold/test/elf/linker-script2.sh @@ -0,0 +1,17 @@ +// clang-format off +#!/bin/bash +. $(dirname $0)/common.inc + +cat < $t/b.script +INPUT(-lfoo) +EOF + +$CC -B. -o $t/exe -L$t/foo/bar $t/b.script diff --git a/third_party/mold/test/elf/linker-script3.sh b/third_party/mold/test/elf/linker-script3.sh new file mode 100755 index 00000000000..0eaaecc26a1 --- /dev/null +++ b/third_party/mold/test/elf/linker-script3.sh @@ -0,0 +1,15 @@ +// clang-format off +#!/bin/bash +. $(dirname $0)/common.inc + +mkdir -p $t/foo + +cat < $t/b.script +INPUT(a.o) +EOF + +$CC -B. -o $t/exe -L$t/foo $t/b.script diff --git a/third_party/mold/test/elf/linker-script4.sh b/third_party/mold/test/elf/linker-script4.sh new file mode 100755 index 00000000000..c9d24dd109c --- /dev/null +++ b/third_party/mold/test/elf/linker-script4.sh @@ -0,0 +1,20 @@ +// clang-format off +#!/bin/bash +. $(dirname $0)/common.inc + +echo 'VERSION { ver_x { global: *; }; };' > $t/a.script + +cat < $t/b.s +.globl foo, bar, baz +foo: + nop +bar: + nop +baz: + nop +EOF + +$CC -B. -shared -o $t/c.so $t/a.script $t/b.s +readelf --version-info $t/c.so > $t/log + +grep -Fq 'Rev: 1 Flags: none Index: 2 Cnt: 1 Name: ver_x' $t/log diff --git a/third_party/mold/test/elf/lto-archive.sh b/third_party/mold/test/elf/lto-archive.sh new file mode 100755 index 00000000000..01945e53e81 --- /dev/null +++ b/third_party/mold/test/elf/lto-archive.sh @@ -0,0 +1,51 @@ +// clang-format off +#!/bin/bash +. $(dirname $0)/common.inc + +[ "$CC" = cc ] || skip + +echo 'int main() {}' | $CC -flto -o /dev/null -xc - >& /dev/null \ + || skip + +cat < $t/log +grep -q hello $t/log +! grep -q howdy $t/log || false diff --git a/third_party/mold/test/elf/lto-dso.sh b/third_party/mold/test/elf/lto-dso.sh new file mode 100755 index 00000000000..dcb06794ed5 --- /dev/null +++ b/third_party/mold/test/elf/lto-dso.sh @@ -0,0 +1,19 @@ +// clang-format off +#!/bin/bash +. $(dirname $0)/common.inc + +echo 'int main() {}' | $CC -flto -o /dev/null -xc - >& /dev/null \ + || skip + +cat <& /dev/null \ + || skip + +cat <&1 | grep -q -- -fwpa + +# Test FAT objects if -fno-use-linker-plugin is used + +cat <& /dev/null \ + || skip + +cat < $t/b.script +{ + global: foo; + local: *; +}; +EOF + +$CC -B. -shared -o $t/c.so -flto $t/a.o -Wl,-version-script=$t/b.script + +if [ $MACHINE = ppc64 ]; then + # On PPC64V1, function symbol refers a function descriptor in .opd + nm -D $t/c.so | grep -q 'D foo' + ! nm -D $t/c.so | grep -q 'D bar' || false +else + nm -D $t/c.so | grep -q 'T foo' + ! nm -D $t/c.so | grep -q 'T bar' || false +fi diff --git a/third_party/mold/test/elf/many-sections.sh b/third_party/mold/test/elf/many-sections.sh new file mode 100755 index 00000000000..52d5462d4fa --- /dev/null +++ b/third_party/mold/test/elf/many-sections.sh @@ -0,0 +1,24 @@ +// clang-format off +#!/bin/bash +. $(dirname $0)/common.inc + +seq 1 100000 | sed 's/.*/.section .data.\0,"aw"\n.word 0\n/g' | \ + $CC -c -xassembler -o $t/a.o - + +cat <<'EOF' | $CC -c -xc -o $t/b.o - +#include "libc/calls/calls.h" +#include "libc/calls/weirdtypes.h" +#include "libc/fmt/fmt.h" +#include "libc/stdio/dprintf.h" +#include "libc/stdio/stdio.h" +#include "libc/stdio/temp.h" +#include "third_party/musl/tempnam.h" + +int main() { + printf("Hello\n"); + return 0; +} +EOF + +$CC -B. -o $t/exe $t/a.o $t/b.o +$QEMU $t/exe | grep -q Hello diff --git a/third_party/mold/test/elf/many-sections2.sh b/third_party/mold/test/elf/many-sections2.sh new file mode 100755 index 00000000000..df575e65dc2 --- /dev/null +++ b/third_party/mold/test/elf/many-sections2.sh @@ -0,0 +1,13 @@ +// clang-format off +#!/bin/bash +. $(dirname $0)/common.inc + +# OneTBB isn't tsan-clean +nm mold | grep -q '__tsan_init' && skip + +seq 1 100000 | sed 's/.*/.section .data.\0,"aw"\n.globl x\0\nx\0: .word 0\n/g' | \ + $CC -c -xassembler -o $t/a.o - + +./mold --relocatable -o $t/b.o $t/a.o +readelf -WS $t/b.o | grep -Fq .data.100000 +readelf -Ws $t/b.o | grep -Fq 'GLOBAL DEFAULT 100000' diff --git a/third_party/mold/test/elf/mergeable-strings.sh b/third_party/mold/test/elf/mergeable-strings.sh new file mode 100755 index 00000000000..ccb20b4143c --- /dev/null +++ b/third_party/mold/test/elf/mergeable-strings.sh @@ -0,0 +1,57 @@ +// clang-format off +#!/bin/bash +. $(dirname $0)/common.inc + +cat < $t/log || false +grep -q 'undefined symbol: foo' $t/log +grep -q '>>> .*a\.o' $t/log diff --git a/third_party/mold/test/elf/mold-wrapper.sh b/third_party/mold/test/elf/mold-wrapper.sh new file mode 100755 index 00000000000..c1cbd65fe00 --- /dev/null +++ b/third_party/mold/test/elf/mold-wrapper.sh @@ -0,0 +1,96 @@ +// clang-format off +#!/bin/bash +. $(dirname $0)/common.inc + +[ "$CC" = cc ] || skip + +ldd mold-wrapper.so | grep -q libasan && skip + +nm mold | grep -q '__[at]san_init' && skip + +cat <<'EOF' > $t/a.sh +#!/bin/bash +echo "$0" "$@" $FOO +EOF + +chmod 755 $t/a.sh + +cat <<'EOF' | $CC -xc -o $t/exe - +#define _GNU_SOURCE 1 + +#include "libc/assert.h" +#include "libc/calls/calls.h" +#include "libc/calls/weirdtypes.h" +#include "libc/fmt/fmt.h" +#include "libc/stdio/dprintf.h" +#include "libc/stdio/stdio.h" +#include "libc/stdio/temp.h" +#include "third_party/musl/tempnam.h" +#include "libc/mem/alg.h" +#include "libc/mem/mem.h" +#include "libc/str/str.h" +#include "libc/calls/calls.h" +#include "libc/calls/weirdtypes.h" +#include "libc/runtime/pathconf.h" +#include "libc/runtime/runtime.h" +#include "libc/runtime/sysconf.h" +#include "libc/sysv/consts/f.h" +#include "libc/sysv/consts/fileno.h" +#include "libc/sysv/consts/o.h" +#include "libc/sysv/consts/ok.h" +#include "libc/time/time.h" +#include "third_party/getopt/getopt.h" +#include "third_party/musl/crypt.h" +#include "third_party/musl/lockf.h" + +extern char **environ; + +int main(int argc, char **argv) { + if (!strcmp(argv[1], "execl")) { + execl("/usr/bin/ld", "/usr/bin/ld", "execl", (char *)0); + perror("execl"); + return 1; + } + + if (!strcmp(argv[1], "execlp")) { + execlp("/usr/bin/ld", "/usr/bin/ld", "execlp", (char *)0); + perror("execl"); + return 1; + } + + if (!strcmp(argv[1], "execle")) { + execle("/usr/bin/ld", "/usr/bin/ld", "execle", (char *)0, environ); + perror("execl"); + return 1; + } + + if (!strcmp(argv[1], "execv")) { + execv("/usr/bin/ld", (char *[]){"/usr/bin/ld", "execv", (char *)0}); + perror("execl"); + return 1; + } + + if (!strcmp(argv[1], "execvp")) { + execvp("/usr/bin/ld", (char *[]){"/usr/bin/ld", "execvp", (char *)0}); + perror("execl"); + return 1; + } + + if (!strcmp(argv[1], "execvpe")) { + char *env[] = {"FOO=bar", NULL}; + execvpe("/usr/bin/ld", (char *[]){"/usr/bin/ld", "execvpe", (char *)0}, env); + perror("execl"); + return 1; + } + + fprintf(stderr, "unreachable: %s\n", argv[1]); + return 1; +} +EOF + +LD_PRELOAD=`pwd`/mold-wrapper.so MOLD_PATH=$t/a.sh $t/exe execl | grep -q 'a.sh execl' +LD_PRELOAD=`pwd`/mold-wrapper.so MOLD_PATH=$t/a.sh $t/exe execlp | grep -q 'a.sh execlp' +LD_PRELOAD=`pwd`/mold-wrapper.so MOLD_PATH=$t/a.sh $t/exe execle | grep -q 'a.sh execle' +LD_PRELOAD=`pwd`/mold-wrapper.so MOLD_PATH=$t/a.sh $t/exe execv | grep -q 'a.sh execv' +LD_PRELOAD=`pwd`/mold-wrapper.so MOLD_PATH=$t/a.sh $t/exe execvp | grep -q 'a.sh execvp' +LD_PRELOAD=`pwd`/mold-wrapper.so MOLD_PATH=$t/a.sh $t/exe execvpe | grep -q 'a.sh execvpe bar' diff --git a/third_party/mold/test/elf/mold-wrapper2.sh b/third_party/mold/test/elf/mold-wrapper2.sh new file mode 100755 index 00000000000..35f0f30f427 --- /dev/null +++ b/third_party/mold/test/elf/mold-wrapper2.sh @@ -0,0 +1,14 @@ +// clang-format off +#!/bin/bash +. $(dirname $0)/common.inc + +ldd mold-wrapper.so | grep -q libasan && skip + +nm mold | grep -q '__[at]san_init' && skip + +rm -rf $t +mkdir -p $t/bin $t/lib/mold +cp mold $t/bin +cp mold-wrapper.so $t/bin + +$t/bin/mold -run bash -c 'echo $LD_PRELOAD' | grep -q '/bin/mold-wrapper.so' diff --git a/third_party/mold/test/elf/no-eh-frame-header.sh b/third_party/mold/test/elf/no-eh-frame-header.sh new file mode 100755 index 00000000000..776bee71380 --- /dev/null +++ b/third_party/mold/test/elf/no-eh-frame-header.sh @@ -0,0 +1,16 @@ +// clang-format off +#!/bin/bash +. $(dirname $0)/common.inc + +cat < $t/log +! grep -F .eh_frame_hdr $t/log || false + +$QEMU $t/exe diff --git a/third_party/mold/test/elf/no-quick-exit.sh b/third_party/mold/test/elf/no-quick-exit.sh new file mode 100755 index 00000000000..263ecbea500 --- /dev/null +++ b/third_party/mold/test/elf/no-quick-exit.sh @@ -0,0 +1,21 @@ +// clang-format off +#!/bin/bash +. $(dirname $0)/common.inc + +cat < $t/a.ver + +cat < $t/log +grep -Fq 'a.ver: cannot assign version `ver_x` to symbol `foo`: symbol not found' $t/log diff --git a/third_party/mold/test/elf/nocopyreloc.sh b/third_party/mold/test/elf/nocopyreloc.sh new file mode 100755 index 00000000000..de5b9224947 --- /dev/null +++ b/third_party/mold/test/elf/nocopyreloc.sh @@ -0,0 +1,41 @@ +// clang-format off +#!/bin/bash +. $(dirname $0)/common.inc + +[ $MACHINE = i386 ] && skip +[ $MACHINE = m68k ] && skip +[ $MACHINE = arm ] && skip +[ $MACHINE = ppc64 ] && skip +[ $MACHINE = ppc64le ] && skip +[ $MACHINE = sh4 ] && skip +[ $MACHINE = alpha ] && skip + +cat < $t/log || false + +grep -q 'recompile with -fPIC' $t/log diff --git a/third_party/mold/test/elf/noinhibit-exec.sh b/third_party/mold/test/elf/noinhibit-exec.sh new file mode 100755 index 00000000000..bf9831fb986 --- /dev/null +++ b/third_party/mold/test/elf/noinhibit-exec.sh @@ -0,0 +1,15 @@ +// clang-format off +#!/bin/bash +. $(dirname $0)/common.inc + +cat <& $t/log1 || false +grep -q 'undefined symbol: no-such-sym' $t/log1 + +$CC -B. -shared -o $t/b.o $t/a.o -Wl,-require-defined=no-such-sym -Wl,-noinhibit-exec >& $t/log2 +grep -q 'undefined symbol: no-such-sym' $t/log2 diff --git a/third_party/mold/test/elf/non-canonical-plt.sh b/third_party/mold/test/elf/non-canonical-plt.sh new file mode 100755 index 00000000000..55e2bb6797d --- /dev/null +++ b/third_party/mold/test/elf/non-canonical-plt.sh @@ -0,0 +1,45 @@ +// clang-format off +#!/bin/bash +. $(dirname $0)/common.inc + +cat < $t/log +! grep -Fq ' .dynsym ' $t/log || false +! grep -Fq ' .dynstr ' $t/log || false diff --git a/third_party/mold/test/elf/now.sh b/third_party/mold/test/elf/now.sh new file mode 100755 index 00000000000..09d4e3c50ac --- /dev/null +++ b/third_party/mold/test/elf/now.sh @@ -0,0 +1,24 @@ +// clang-format off +#!/bin/bash +. $(dirname $0)/common.inc + +cat < $t/log +! grep -q 'Flags: NOW' $t/log || false diff --git a/third_party/mold/test/elf/oformat-binary.sh b/third_party/mold/test/elf/oformat-binary.sh new file mode 100755 index 00000000000..6a496d35699 --- /dev/null +++ b/third_party/mold/test/elf/oformat-binary.sh @@ -0,0 +1,15 @@ +// clang-format off +#!/bin/bash +. $(dirname $0)/common.inc + +cat <& /dev/null || skip + +cat < $t/log1 + +$CC -B. -o $t/exe2 $t/a.o -pie -Wl,-pack-dyn-relocs=relr +llvm-readelf -r $t/exe2 | grep RELATIVE | wc -l > $t/log2 + +diff $t/log1 $t/log2 + +llvm-readelf --dynamic $t/exe2 > $t/log3 +grep -wq RELR $t/log3 +grep -wq RELRSZ $t/log3 +grep -wq RELRENT $t/log3 diff --git a/third_party/mold/test/elf/package-metadata.sh b/third_party/mold/test/elf/package-metadata.sh new file mode 100755 index 00000000000..3665a04dfd5 --- /dev/null +++ b/third_party/mold/test/elf/package-metadata.sh @@ -0,0 +1,19 @@ +// clang-format off +#!/bin/bash +. $(dirname $0)/common.inc + +cat < $t/log1 +! grep -q PREINIT_ARRAY $t/log1 || false + +cat < $t/log 2> /dev/null +grep -q 'b\.o.*a\.o.*foo$' $t/log diff --git a/third_party/mold/test/elf/protected-dynsym.sh b/third_party/mold/test/elf/protected-dynsym.sh new file mode 100755 index 00000000000..3206badabc6 --- /dev/null +++ b/third_party/mold/test/elf/protected-dynsym.sh @@ -0,0 +1,21 @@ +// clang-format off +#!/bin/bash +. $(dirname $0)/common.inc + +cat < /dev/null | grep -q '3 4 0' diff --git a/third_party/mold/test/elf/push-pop-state.sh b/third_party/mold/test/elf/push-pop-state.sh new file mode 100755 index 00000000000..03c81867d2c --- /dev/null +++ b/third_party/mold/test/elf/push-pop-state.sh @@ -0,0 +1,22 @@ +// clang-format off +#!/bin/bash +. $(dirname $0)/common.inc + +cat < $t/log +grep -Fq a.so $t/log +! grep -Fq b.so $t/log || false diff --git a/third_party/mold/test/elf/range-extension-thunk.sh b/third_party/mold/test/elf/range-extension-thunk.sh new file mode 100755 index 00000000000..0a35a1728fb --- /dev/null +++ b/third_party/mold/test/elf/range-extension-thunk.sh @@ -0,0 +1,62 @@ +// clang-format off +#!/bin/bash +. $(dirname $0)/common.inc + +# Skip if 32 bits as we use very large addresses in this test. +[ $MACHINE = i386 ] && skip +[ $MACHINE = riscv32 ] && skip + +# It looks like SPARC's runtime can't handle PLT if it's too far from GOT. +[ $MACHINE = sparc64 ] && skip + +cat < $t/a.c +#include "libc/calls/calls.h" +#include "libc/calls/weirdtypes.h" +#include "libc/fmt/fmt.h" +#include "libc/stdio/dprintf.h" +#include "libc/stdio/stdio.h" +#include "libc/stdio/temp.h" +#include "third_party/musl/tempnam.h" + +void fn3(); +void fn4(); + +__attribute__((section(".low"))) void fn1() { printf(" fn1"); fn3(); } +__attribute__((section(".low"))) void fn2() { printf(" fn2"); fn4(); } + +int main() { + printf(" main"); + fn1(); + printf("\n"); +} +EOF + +cat < $t/b.c +#include "libc/calls/calls.h" +#include "libc/calls/weirdtypes.h" +#include "libc/fmt/fmt.h" +#include "libc/stdio/dprintf.h" +#include "libc/stdio/stdio.h" +#include "libc/stdio/temp.h" +#include "third_party/musl/tempnam.h" + +void fn1(); +void fn2(); + +__attribute__((section(".high"))) void fn3() { printf(" fn3"); fn2(); } +__attribute__((section(".high"))) void fn4() { printf(" fn4"); } +EOF + +$CC -c -o $t/c.o $t/a.c -O0 +$CC -c -o $t/d.o $t/b.c -O0 + +$CC -B. -o $t/exe1 $t/c.o $t/d.o \ + -Wl,--section-start=.low=0x10000000,--section-start=.high=0x20000000 +$QEMU $t/exe1 | grep -q 'main fn1 fn3 fn2 fn4' + +$CC -c -o $t/e.o $t/a.c -O2 +$CC -c -o $t/f.o $t/b.c -O2 + +$CC -B. -o $t/exe2 $t/e.o $t/f.o \ + -Wl,--section-start=.low=0x10000000,--section-start=.high=0x20000000 +$QEMU $t/exe2 | grep -q 'main fn1 fn3 fn2 fn4' diff --git a/third_party/mold/test/elf/relax-got-load.sh b/third_party/mold/test/elf/relax-got-load.sh new file mode 100755 index 00000000000..b5674d2e944 --- /dev/null +++ b/third_party/mold/test/elf/relax-got-load.sh @@ -0,0 +1,24 @@ +// clang-format off +#!/bin/bash +. $(dirname $0)/common.inc + +cat <& $t/log +grep -Eq 'relocation against symbol .+ can not be used; recompile with -fPIC' $t/log diff --git a/third_party/mold/test/elf/relocatable-archive.sh b/third_party/mold/test/elf/relocatable-archive.sh new file mode 100755 index 00000000000..83b775b8874 --- /dev/null +++ b/third_party/mold/test/elf/relocatable-archive.sh @@ -0,0 +1,36 @@ +// clang-format off +#!/bin/bash +. $(dirname $0)/common.inc + +# OneTBB isn't tsan-clean +nm mold | grep -q '__tsan_init' && skip + +cat < $t/log +grep -q 'foo\b' $t/log +grep -q 'bar\b' $t/log +! grep -q 'baz\b' $t/log || false diff --git a/third_party/mold/test/elf/relocatable-debug-info.sh b/third_party/mold/test/elf/relocatable-debug-info.sh new file mode 100755 index 00000000000..36a1acfcd72 --- /dev/null +++ b/third_party/mold/test/elf/relocatable-debug-info.sh @@ -0,0 +1,27 @@ +// clang-format off +#!/bin/bash +. $(dirname $0)/common.inc + +# OneTBB isn't tsan-clean +nm mold | grep -q '__tsan_init' && skip + +cat < $t/log1 +grep -Fq .text.foo $t/log1 +grep -Fq .text.bar $t/log1 + +./mold --relocatable -o $t/c.o $t/a.o --relocatable-merge-sections +readelf -WS $t/c.o > $t/log2 +! grep -Fq .text.foo $t/log2 || false +! grep -Fq .text.bar $t/log2 || false diff --git a/third_party/mold/test/elf/relocatable-no-ehframe.sh b/third_party/mold/test/elf/relocatable-no-ehframe.sh new file mode 100755 index 00000000000..2e4485ff379 --- /dev/null +++ b/third_party/mold/test/elf/relocatable-no-ehframe.sh @@ -0,0 +1,19 @@ +// clang-format off +#!/bin/bash +. $(dirname $0)/common.inc + +[ $MACHINE = alpha ] && skip + +# OneTBB isn't tsan-clean +nm mold | grep -q '__tsan_init' && skip + +cat < $t/log1 +! grep -Fq .eh_frame $t/log1 || false + +./mold --relocatable -o $t/b.o $t/a.o +readelf -WS $t/b.o > $t/log2 +! grep -Fq .eh_frame $t/log2 || false diff --git a/third_party/mold/test/elf/relocatable.sh b/third_party/mold/test/elf/relocatable.sh new file mode 100755 index 00000000000..cd0365f8448 --- /dev/null +++ b/third_party/mold/test/elf/relocatable.sh @@ -0,0 +1,27 @@ +// clang-format off +#!/bin/bash +. $(dirname $0)/common.inc + +# OneTBB isn't tsan-clean +nm mold | grep -q '__tsan_init' && skip + +cat < $t/log1 +grep -q 'GNU_RELRO ' $t/log1 + +$CC -B. -o $t/exe2 $t/a.o -Wl,-z,relro,-z,now +$QEMU $t/exe2 | grep -q 'Hello world' +readelf --segments -W $t/exe2 > $t/log2 +grep -q 'GNU_RELRO ' $t/log2 + +$CC -B. -o $t/exe3 $t/a.o -Wl,-z,norelro +$QEMU $t/exe3 | grep -q 'Hello world' +readelf --segments -W $t/exe3 > $t/log3 +! grep -q 'GNU_RELRO ' $t/log3 || false diff --git a/third_party/mold/test/elf/repro.sh b/third_party/mold/test/elf/repro.sh new file mode 100755 index 00000000000..8124e988e51 --- /dev/null +++ b/third_party/mold/test/elf/repro.sh @@ -0,0 +1,36 @@ +// clang-format off +#!/bin/bash +. $(dirname $0)/common.inc + +cat <& $t/log +grep -q 'undefined symbol: xyz' $t/log diff --git a/third_party/mold/test/elf/response-file.sh b/third_party/mold/test/elf/response-file.sh new file mode 100755 index 00000000000..b13710e3e7e --- /dev/null +++ b/third_party/mold/test/elf/response-file.sh @@ -0,0 +1,21 @@ +// clang-format off +#!/bin/bash +. $(dirname $0)/common.inc + +cat < $t/rsp + +$CC -o $t/exe $t/a.o -Wl,@$t/rsp diff --git a/third_party/mold/test/elf/retain-symbols-file.sh b/third_party/mold/test/elf/retain-symbols-file.sh new file mode 100755 index 00000000000..b18eedf6511 --- /dev/null +++ b/third_party/mold/test/elf/retain-symbols-file.sh @@ -0,0 +1,24 @@ +// clang-format off +#!/bin/bash +. $(dirname $0)/common.inc + +cat < $t/symbols +foo +baz +EOF + +$CC -B. -o $t/exe $t/a.o -Wl,--retain-symbols-file=$t/symbols +readelf -W --symbols $t/exe > $t/log + +! grep -qw foo $t/log || false +! grep -qw bar $t/log || false +! grep -qw main $t/log || false + +grep -qw baz $t/log diff --git a/third_party/mold/test/elf/reverse-sections.sh b/third_party/mold/test/elf/reverse-sections.sh new file mode 100755 index 00000000000..d5aae001567 --- /dev/null +++ b/third_party/mold/test/elf/reverse-sections.sh @@ -0,0 +1,44 @@ +// clang-format off +#!/bin/bash +. $(dirname $0)/common.inc + +cat < $t/log || false +grep -q 'cannot link object files with different floating-point ABI' $t/log diff --git a/third_party/mold/test/elf/riscv64_weak-undef.sh b/third_party/mold/test/elf/riscv64_weak-undef.sh new file mode 100755 index 00000000000..7afbf933d52 --- /dev/null +++ b/third_party/mold/test/elf/riscv64_weak-undef.sh @@ -0,0 +1,26 @@ +// clang-format off +#!/bin/bash +. $(dirname $0)/common.inc + +cat < $t/log1 +! grep -q '\.interp .* \.text' $t/log1 || false + +$CC -B. -o $t/exe2 $t/a.o -Wl,--rosegment +readelf -W --segments $t/exe2 > $t/log2 +! grep -q '\.interp .* \.text' $t/log2 || false + +$CC -B. -o $t/exe3 $t/a.o -Wl,--no-rosegment +readelf -W --segments $t/exe3 > $t/log3 +grep -q '\.interp .* \.text' $t/log3 diff --git a/third_party/mold/test/elf/rpath.sh b/third_party/mold/test/elf/rpath.sh new file mode 100755 index 00000000000..c1ab68e0f23 --- /dev/null +++ b/third_party/mold/test/elf/rpath.sh @@ -0,0 +1,16 @@ +// clang-format off +#!/bin/bash +. $(dirname $0)/common.inc + +cat <& /dev/null || skip + +cat <<'EOF' | $CC -xc -c -o $t/a.o - +#include "libc/calls/calls.h" +#include "libc/calls/weirdtypes.h" +#include "libc/fmt/fmt.h" +#include "libc/stdio/dprintf.h" +#include "libc/stdio/stdio.h" +#include "libc/stdio/temp.h" +#include "third_party/musl/tempnam.h" + +int main() { + printf("Hello\n"); + return 0; +} +EOF + +LD_PRELOAD=`pwd`/mold-wrapper.so MOLD_PATH=`pwd`/mold \ + clang -no-pie -o $t/exe $t/a.o -fuse-ld=/usr/bin/ld +readelf -p .comment $t/exe > $t/log +grep -q '[ms]old' $t/log diff --git a/third_party/mold/test/elf/run.sh b/third_party/mold/test/elf/run.sh new file mode 100755 index 00000000000..8adf98b3909 --- /dev/null +++ b/third_party/mold/test/elf/run.sh @@ -0,0 +1,59 @@ +// clang-format off +#!/bin/bash +. $(dirname $0)/common.inc + +[ "$CC" = cc ] || skip + +# ASAN doesn't work with LD_PRELOAD +nm mold-wrapper.so | grep -q '__[at]san_init' && skip + +cat <<'EOF' | $CC -xc -c -o $t/a.o - +#include "libc/calls/calls.h" +#include "libc/calls/weirdtypes.h" +#include "libc/fmt/fmt.h" +#include "libc/stdio/dprintf.h" +#include "libc/stdio/stdio.h" +#include "libc/stdio/temp.h" +#include "third_party/musl/tempnam.h" + +int main() { + printf("Hello\n"); + return 0; +} +EOF + +LD_PRELOAD=`pwd`/mold-wrapper.so MOLD_PATH=`pwd`/mold \ + $GCC -o $t/exe $t/a.o -B/usr/bin +readelf -p .comment $t/exe > $t/log +grep -q '[ms]old' $t/log + +./mold -run env | grep -q '^MOLD_PATH=.*/mold$' + +./mold -run /usr/bin/ld --version | grep -q '[ms]old' +./mold -run /usr/bin/ld.lld --version | grep -q '[ms]old' +./mold -run /usr/bin/ld.gold --version | grep -q '[ms]old' + +rm -f $t/ld $t/ld.lld $t/ld.gold $t/foo.ld +touch $t/ld $t/ld.lld $t/ld.gold +echo "#!/bin/sh" >$t/foo.ld +chmod 755 $t/ld $t/ld.lld $t/ld.gold $t/foo.ld + +./mold -run $t/ld --version | grep -q '[ms]old' +./mold -run $t/ld.lld --version | grep -q '[ms]old' +./mold -run $t/ld.gold --version | grep -q '[ms]old' +./mold -run $t/foo.ld --version | grep -q '[ms]old' && false + +cat <<'EOF' > $t/sh +#!/bin/sh +$1 --version +EOF + +chmod 755 $t/sh + +./mold -run $t/sh ld --version | grep -q '[ms]old' +./mold -run $t/sh foo.ld --version >& /dev/null | grep -q '[ms]old' && false + +./mold -run $t/sh $t/ld --version | grep -q '[ms]old' +./mold -run $t/sh $t/ld.lld --version | grep -q '[ms]old' +./mold -run $t/sh $t/ld.gold --version | grep -q '[ms]old' +./mold -run $t/sh $t/foo.ld --version | grep -q '[ms]old' && false diff --git a/third_party/mold/test/elf/s390x_got.sh b/third_party/mold/test/elf/s390x_got.sh new file mode 100755 index 00000000000..b7ddf28e9cd --- /dev/null +++ b/third_party/mold/test/elf/s390x_got.sh @@ -0,0 +1,27 @@ +// clang-format off +#!/bin/bash +. $(dirname $0)/common.inc + +[ $MACHINE = s390x ] || skip + +# GOT[0] must be set to the link-time address of .dynamic on s390x. + +cat <&1 | \ + grep -q 'must be a power of 2' diff --git a/third_party/mold/test/elf/section-order.sh b/third_party/mold/test/elf/section-order.sh new file mode 100755 index 00000000000..369e88a3311 --- /dev/null +++ b/third_party/mold/test/elf/section-order.sh @@ -0,0 +1,50 @@ +// clang-format off +#!/bin/bash +. $(dirname $0)/common.inc + +# qemu crashes if the ELF header is not mapped to memory +[ -z "$QEMU" ] || skip + +cat < $t/log3 +grep -Eq '\b0+200000 .* ehdr_start$' $t/log3 +grep -Eq '\b0+200040 .* rodata_start$' $t/log3 +grep -Eq '\b0+300000 .* phdr_start$' $t/log3 +grep -Eq '\b0+301000 .* phdr_end$' $t/log3 +grep -Eq '\b0+400000 .* text_start$' $t/log3 diff --git a/third_party/mold/test/elf/section-start.sh b/third_party/mold/test/elf/section-start.sh new file mode 100755 index 00000000000..51da71561a6 --- /dev/null +++ b/third_party/mold/test/elf/section-start.sh @@ -0,0 +1,44 @@ +// clang-format off +#!/bin/bash +. $(dirname $0)/common.inc + +# On PPC64V1, function pointers do not refer function entry addresses +# but instead refers "function descriptors" in .opd. +[ $MACHINE = ppc64 ] && skip + +[ $MACHINE = arm ] && flags=-marm + +cat < $t/log1 +diff $t/log1 <(sort $t/log1) + +$CC -B. -o $t/exe2 $t/a.o -no-pie \ + -Wl,--section-start=.fn1=0x20000000,--section-start=.fn2=0x10000000 +$QEMU $t/exe2 | grep -q 'main fn1 fn2 0x20000000 0x10000000' + +readelf -W --segments $t/exe2 | grep ' LOAD ' | sed 's/0x[0-9a-f]*//' > $t/log2 +diff $t/log2 <(sort $t/log2) diff --git a/third_party/mold/test/elf/shared-abs-sym.sh b/third_party/mold/test/elf/shared-abs-sym.sh new file mode 100755 index 00000000000..28cc2f502bb --- /dev/null +++ b/third_party/mold/test/elf/shared-abs-sym.sh @@ -0,0 +1,36 @@ +// clang-format off +#!/bin/bash +. $(dirname $0)/common.inc + +cat < $t/c.c +#include "libc/calls/calls.h" +#include "libc/calls/weirdtypes.h" +#include "libc/fmt/fmt.h" +#include "libc/stdio/dprintf.h" +#include "libc/stdio/stdio.h" +#include "libc/stdio/temp.h" +#include "third_party/musl/tempnam.h" +extern char foo; +int main() { printf("foo=%p\n", &foo); } +EOF + +$CC -fPIC -c -o $t/d.o $t/c.c +$CC -B. -o $t/exe1 -pie $t/d.o $t/b.so +$QEMU $t/exe1 | grep -q 'foo=0x3' + +nm -D $t/exe1 > $t/log1 +! grep -q foo $t/log1 || false + +$CC -fPIC -c -o $t/e.o $t/c.c +$CC -B. -o $t/exe2 -no-pie $t/e.o $t/b.so +$QEMU $t/exe2 | grep -q 'foo=0x3' + +nm -D $t/exe2 > $t/log2 +! grep -q foo $t/log2 || false diff --git a/third_party/mold/test/elf/shared.sh b/third_party/mold/test/elf/shared.sh new file mode 100755 index 00000000000..3b8876737cb --- /dev/null +++ b/third_party/mold/test/elf/shared.sh @@ -0,0 +1,41 @@ +// clang-format off +#!/bin/bash +. $(dirname $0)/common.inc + +cat <<'EOF' | $CC -fPIC -c -o $t/a.o -xc - +void fn2(); +void fn1() { fn2(); } +void fn3() {} +EOF + +$CC -B. -shared -o $t/b.so $t/a.o + +readelf --dyn-syms $t/b.so > $t/log + +grep -q '00000000 0 NOTYPE GLOBAL DEFAULT UND fn2' $t/log +grep -Eq 'FUNC GLOBAL DEFAULT .* fn1' $t/log + +cat <& /dev/null || false +diff $t/exe2 $t/exe3 +! diff $t/exe3 $t/exe4 >& /dev/null || false diff --git a/third_party/mold/test/elf/shuffle-sections.sh b/third_party/mold/test/elf/shuffle-sections.sh new file mode 100755 index 00000000000..0e11da24cb7 --- /dev/null +++ b/third_party/mold/test/elf/shuffle-sections.sh @@ -0,0 +1,29 @@ +// clang-format off +#!/bin/bash +. $(dirname $0)/common.inc + +cat <& /dev/null || false diff --git a/third_party/mold/test/elf/soname.sh b/third_party/mold/test/elf/soname.sh new file mode 100755 index 00000000000..ac084ac1db7 --- /dev/null +++ b/third_party/mold/test/elf/soname.sh @@ -0,0 +1,14 @@ +// clang-format off +#!/bin/bash +. $(dirname $0)/common.inc + +cat < $t/log +! grep -Fq 'Library soname' $t/log || false + +$CC -B. -o $t/b.so -shared $t/a.o -Wl,-soname,foo +readelf --dynamic $t/b.so | grep -Fq 'Library soname: [foo]' diff --git a/third_party/mold/test/elf/start-lib.sh b/third_party/mold/test/elf/start-lib.sh new file mode 100755 index 00000000000..220a0634f58 --- /dev/null +++ b/third_party/mold/test/elf/start-lib.sh @@ -0,0 +1,20 @@ +// clang-format off +#!/bin/bash +. $(dirname $0)/common.inc + +cat < $t/log +! grep -q ' foo$' $t/log || false +grep -q ' bar$' $t/log diff --git a/third_party/mold/test/elf/start-stop-symbol.sh b/third_party/mold/test/elf/start-stop-symbol.sh new file mode 100755 index 00000000000..cabd516007a --- /dev/null +++ b/third_party/mold/test/elf/start-stop-symbol.sh @@ -0,0 +1,34 @@ +// clang-format off +#!/bin/bash +. $(dirname $0)/common.inc + +cat <<'EOF' | $CC -c -o $t/a.o -xc - +__attribute__((section("foo"))) +char data[] = "section foo"; +EOF + +ar rcs $t/b.a $t/a.o + +cat < $t/log + +grep -Fq 'static-archive/d.a(long-long-long-filename.o)' $t/log +grep -Fq 'static-archive/d.a(b.o)' $t/log +grep -Fq static-archive/c.o $t/log + +$QEMU $t/exe | grep -q '8' diff --git a/third_party/mold/test/elf/static-pie.sh b/third_party/mold/test/elf/static-pie.sh new file mode 100755 index 00000000000..31f5a13fa55 --- /dev/null +++ b/third_party/mold/test/elf/static-pie.sh @@ -0,0 +1,24 @@ +// clang-format off +#!/bin/bash +. $(dirname $0)/common.inc + +test_cflags -static-pie || skip + +cat < $t/exe +chmod 755 $t/exe +$QEMU $t/exe | grep -q 'Hello world' diff --git a/third_party/mold/test/elf/strip.sh b/third_party/mold/test/elf/strip.sh new file mode 100755 index 00000000000..32979bf6eab --- /dev/null +++ b/third_party/mold/test/elf/strip.sh @@ -0,0 +1,31 @@ +// clang-format off +#!/bin/bash +. $(dirname $0)/common.inc + +cat <<'EOF' | $CC -x assembler -c -o $t/a.o -Wa,-L - +.globl _start, foo +_start: +foo: +bar: +.L.baz: +EOF + +./mold -o $t/exe $t/a.o +readelf --symbols $t/exe > $t/log +grep -Fq _start $t/log +grep -Fq foo $t/log +grep -Fq bar $t/log + +if [ $MACHINE '!=' riscv32 ] && [ $MACHINE '!=' riscv64 ]; then + grep -Fq .L.baz $t/log +fi + +./mold -o $t/exe $t/a.o -strip-all +readelf --symbols $t/exe > $t/log +! grep -Fq _start $t/log || false +! grep -Fq foo $t/log || false +! grep -Fq bar $t/log || false + +if [ $MACHINE '!=' riscv32 ] && [ $MACHINE '!=' riscv64 ]; then + ! grep -Fq .L.baz $t/log || false +fi diff --git a/third_party/mold/test/elf/symbol-rank.sh b/third_party/mold/test/elf/symbol-rank.sh new file mode 100755 index 00000000000..d784670dd9c --- /dev/null +++ b/third_party/mold/test/elf/symbol-rank.sh @@ -0,0 +1,49 @@ +// clang-format off +#!/bin/bash +. $(dirname $0)/common.inc + +cat < $t/b.ver +$CC -B. -shared -o $t/c.so $t/a.o -Wl,--version-script=$t/b.ver +readelf --symbols $t/c.so > $t/log + +grep -Fq 'foo@VER1' $t/log +grep -Fq 'foo@VER2' $t/log +grep -Fq 'foo@@VER3' $t/log diff --git a/third_party/mold/test/elf/symbol-version2.sh b/third_party/mold/test/elf/symbol-version2.sh new file mode 100755 index 00000000000..ae81123b115 --- /dev/null +++ b/third_party/mold/test/elf/symbol-version2.sh @@ -0,0 +1,23 @@ +// clang-format off +#!/bin/bash +. $(dirname $0)/common.inc + +cat < $t/b.version +TEST { global: foo; }; +EOF + +$CC -B. -o $t/c.so -shared $t/a.o -Wl,--version-script=$t/b.version +readelf -W --dyn-syms $t/c.so > $t/log + +grep -q ' foo@TEST$' $t/log +grep -q ' bar@TEST$' $t/log +grep -q ' bar1$' $t/log +! grep -q ' foo@@TEST$' $t/log || false diff --git a/third_party/mold/test/elf/symbol-version3.sh b/third_party/mold/test/elf/symbol-version3.sh new file mode 100755 index 00000000000..bb3508d73e8 --- /dev/null +++ b/third_party/mold/test/elf/symbol-version3.sh @@ -0,0 +1,26 @@ +// clang-format off +#!/bin/bash +. $(dirname $0)/common.inc + +cat < $t/b.version +TEST1 { global: foo; }; +TEST2 {}; +TEST3 {}; +EOF + +$CC -B. -o $t/c.so -shared $t/a.o -Wl,--version-script=$t/b.version +readelf -W --dyn-syms $t/c.so > $t/log + +grep -q ' foo@@TEST1$' $t/log +grep -q ' foo@TEST2$' $t/log +grep -q ' foo@TEST3$' $t/log +! grep -q ' foo$' $t/log || false diff --git a/third_party/mold/test/elf/symtab-dso.sh b/third_party/mold/test/elf/symtab-dso.sh new file mode 100755 index 00000000000..d967421dc9c --- /dev/null +++ b/third_party/mold/test/elf/symtab-dso.sh @@ -0,0 +1,19 @@ +// clang-format off +#!/bin/bash +. $(dirname $0)/common.inc + +cat < $t/c.map + +./mold -o $t/exe $t/a.o $t/b.o --version-script=$t/c.map + +readelf --symbols $t/exe > $t/log + +grep -Eq '0 NOTYPE LOCAL DEFAULT .* local1' $t/log +grep -Eq '0 NOTYPE LOCAL DEFAULT .* local2' $t/log +grep -Eq '0 NOTYPE LOCAL DEFAULT .* module_local' $t/log +grep -Eq '0 NOTYPE GLOBAL DEFAULT .* foo' $t/log +grep -Eq '0 NOTYPE GLOBAL DEFAULT .* bar' $t/log +grep -Eq '0 NOTYPE GLOBAL DEFAULT .* this_is_global' $t/log diff --git a/third_party/mold/test/elf/synthetic-symbols.sh b/third_party/mold/test/elf/synthetic-symbols.sh new file mode 100755 index 00000000000..0ec94edac74 --- /dev/null +++ b/third_party/mold/test/elf/synthetic-symbols.sh @@ -0,0 +1,111 @@ +// clang-format off +#!/bin/bash +. $(dirname $0)/common.inc + +cat < $t/log + +grep -q '^__ehdr_start=0x40000$' $t/log +grep -q '^__executable_start=0x40000$' $t/log +grep -q '^__dso_handle=' $t/log +grep -q '^section foo$' $t/log + +# Make sure that synthetic symbols overwrite existing ones + +cat < $t/log + +grep -q '^end=foo$' $t/log +grep -q '^etext=foo$' $t/log +grep -q '^edata=foo$' $t/log +grep -q '^__ehdr_start=0x40000$' $t/log +grep -q '^__executable_start=0x40000$' $t/log +grep -q '^section foo$' $t/log diff --git a/third_party/mold/test/elf/sysroot-linker-script.sh b/third_party/mold/test/elf/sysroot-linker-script.sh new file mode 100755 index 00000000000..64b030b29a3 --- /dev/null +++ b/third_party/mold/test/elf/sysroot-linker-script.sh @@ -0,0 +1,22 @@ +// clang-format off +#!/bin/bash +. $(dirname $0)/common.inc + +cat < $t/foo/bar/b.script +INPUT(/foo/bar/libfoo.a) +EOF + +cat <& /dev/null + +! $CC -B. -o $t/exe $t/c.o -Wl,--sysroot=$t \ + -Wl,-Lfoo/bar -lfoo >& /dev/null diff --git a/third_party/mold/test/elf/sysroot2.sh b/third_party/mold/test/elf/sysroot2.sh new file mode 100755 index 00000000000..5b5494c9288 --- /dev/null +++ b/third_party/mold/test/elf/sysroot2.sh @@ -0,0 +1,55 @@ +// clang-format off +#!/bin/bash +. $(dirname $0)/common.inc + +mkdir -p $t/bin $t/sysroot/foo + +cat < $t/a.script +INPUT(=/foo/x.o) +EOF + +cat < $t/sysroot/b.script +INPUT(/foo/y.o) +EOF + +cat < $t/log + +grep -Eq 'thin-archive/d.a\(.*long-long-long-filename.o\)' $t/log +grep -Eq 'thin-archive/d.a\(.*/b.o\)' $t/log +grep -Fq thin-archive/d.o $t/log + +$QEMU $t/exe | grep -q 15 diff --git a/third_party/mold/test/elf/thread-count.sh b/third_party/mold/test/elf/thread-count.sh new file mode 100755 index 00000000000..e96b4c07923 --- /dev/null +++ b/third_party/mold/test/elf/thread-count.sh @@ -0,0 +1,22 @@ +// clang-format off +#!/bin/bash +. $(dirname $0)/common.inc + +cat <& $t/log +grep -q 'relocation .* against `foo` can not be used when making a shared object; recompile with -fPIC' $t/log diff --git a/third_party/mold/test/elf/tls-le.sh b/third_party/mold/test/elf/tls-le.sh new file mode 100755 index 00000000000..883c0aba280 --- /dev/null +++ b/third_party/mold/test/elf/tls-le.sh @@ -0,0 +1,36 @@ +// clang-format off +#!/bin/bash +. $(dirname $0)/common.inc + +cat < $t/log + +grep -q 'trace-symbol: .*/a.o: reference to foo' $t/log +grep -q 'trace-symbol: .*/b.o: definition of foo' $t/log +grep -q 'trace-symbol: .*/c.so: definition of baz' $t/log diff --git a/third_party/mold/test/elf/trace.sh b/third_party/mold/test/elf/trace.sh new file mode 100755 index 00000000000..89dd1107eb9 --- /dev/null +++ b/third_party/mold/test/elf/trace.sh @@ -0,0 +1,21 @@ +// clang-format off +#!/bin/bash +. $(dirname $0)/common.inc + +cat < $t/log +grep -q '/a\.o$' $t/log diff --git a/third_party/mold/test/elf/undefined.sh b/third_party/mold/test/elf/undefined.sh new file mode 100755 index 00000000000..fb3a9ac75da --- /dev/null +++ b/third_party/mold/test/elf/undefined.sh @@ -0,0 +1,38 @@ +// clang-format off +#!/bin/bash +. $(dirname $0)/common.inc + +test_cflags -static || skip + +cat < $t/log +! grep -q foo $t/log || false +! grep -q bar $t/log || false + +./mold -static -o $t/exe $t/a.o $t/d.a -u foo +readelf --symbols $t/exe > $t/log +grep -q foo $t/log +! grep -q bar $t/log || false + +./mold -static -o $t/exe $t/a.o $t/d.a -u foo --undefined=bar +readelf --symbols $t/exe > $t/log +grep -q foo $t/log +grep -q bar $t/log diff --git a/third_party/mold/test/elf/unresolved-symbols.sh b/third_party/mold/test/elf/unresolved-symbols.sh new file mode 100755 index 00000000000..4a9394e9c60 --- /dev/null +++ b/third_party/mold/test/elf/unresolved-symbols.sh @@ -0,0 +1,26 @@ +// clang-format off +#!/bin/bash +. $(dirname $0)/common.inc + +cat <&1 | grep -q 'undefined.*foo' + +! $CC -B. -o $t/exe $t/a.o -Wl,-unresolved-symbols=report-all 2>&1 \ + | grep -q 'undefined.*foo' + +$CC -B. -o $t/exe $t/a.o -Wl,-unresolved-symbols=ignore-all + +! readelf --dyn-syms $t/exe | grep -w foo || false + +$CC -B. -o $t/exe $t/a.o -Wl,-unresolved-symbols=report-all \ + -Wl,--warn-unresolved-symbols 2>&1 | grep -q 'undefined.*foo' + +! $CC -B. -o $t/exe $t/a.o -Wl,-unresolved-symbols=ignore-in-object-files 2>&1 \ + | grep -q 'undefined.*foo' + +! $CC -B. -o $t/exe $t/a.o -Wl,-unresolved-symbols=ignore-in-shared-libs 2>&1 \ + | grep -q 'undefined.*foo' diff --git a/third_party/mold/test/elf/verbose.sh b/third_party/mold/test/elf/verbose.sh new file mode 100755 index 00000000000..1d0d55e6527 --- /dev/null +++ b/third_party/mold/test/elf/verbose.sh @@ -0,0 +1,19 @@ +// clang-format off +#!/bin/bash +. $(dirname $0)/common.inc + +cat < /dev/null diff --git a/third_party/mold/test/elf/version-script-search-paths.sh b/third_party/mold/test/elf/version-script-search-paths.sh new file mode 100755 index 00000000000..569409c6a21 --- /dev/null +++ b/third_party/mold/test/elf/version-script-search-paths.sh @@ -0,0 +1,21 @@ +// clang-format off +#!/bin/bash +. $(dirname $0)/common.inc + +mkdir -p $t/foo/bar +echo 'ver_x { global: *; };' > $t/foo/bar/a.ver + +cat < $t/b.s +.globl foo, bar, baz +foo: + nop +bar: + nop +baz: + nop +EOF + +$CC -B. -shared -o $t/c.so -Wl,-L$t/foo/bar -Wl,-version-script,a.ver $t/b.s +readelf --version-info $t/c.so > $t/log + +grep -Fq 'Rev: 1 Flags: none Index: 2 Cnt: 1 Name: ver_x' $t/log diff --git a/third_party/mold/test/elf/version-script.sh b/third_party/mold/test/elf/version-script.sh new file mode 100755 index 00000000000..ea4783c5490 --- /dev/null +++ b/third_party/mold/test/elf/version-script.sh @@ -0,0 +1,20 @@ +// clang-format off +#!/bin/bash +. $(dirname $0)/common.inc + +echo 'ver_x { global: *; };' > $t/a.ver + +cat < $t/b.s +.globl foo, bar, baz +foo: + nop +bar: + nop +baz: + nop +EOF + +$CC -B. -shared -o $t/c.so -Wl,-version-script,$t/a.ver $t/b.s +readelf --version-info $t/c.so > $t/log + +grep -Fq 'Rev: 1 Flags: none Index: 2 Cnt: 1 Name: ver_x' $t/log diff --git a/third_party/mold/test/elf/version-script10.sh b/third_party/mold/test/elf/version-script10.sh new file mode 100755 index 00000000000..b91d8a4acd1 --- /dev/null +++ b/third_party/mold/test/elf/version-script10.sh @@ -0,0 +1,21 @@ +// clang-format off +#!/bin/bash +. $(dirname $0)/common.inc + +echo 'VER1 { foo[12]; }; VER2 {};' > $t/a.ver + +cat < $t/b.s +.globl foo1, foo2, foo3 +foo1: + nop +foo2: + nop +foo3: + nop +EOF + +$CC -B. -shared -o $t/c.so -Wl,-version-script,$t/a.ver $t/b.s +readelf --dyn-syms $t/c.so > $t/log +grep -q ' foo1@@VER1$' $t/log +grep -q ' foo2@@VER1$' $t/log +! grep -q ' foo3@@VER1$' $t/log || false diff --git a/third_party/mold/test/elf/version-script11.sh b/third_party/mold/test/elf/version-script11.sh new file mode 100755 index 00000000000..6b415fbe76c --- /dev/null +++ b/third_party/mold/test/elf/version-script11.sh @@ -0,0 +1,20 @@ +// clang-format off +#!/bin/bash +. $(dirname $0)/common.inc + +cat <<'EOF' > $t/a.ver +VER_X1 { global: *; local: b*; }; +EOF + +cat < $t/log +grep -q 'foo@@VER_X1$' $t/log +! grep -q ' bar$' $t/log || false +! grep -q ' baz$' $t/log || false diff --git a/third_party/mold/test/elf/version-script12.sh b/third_party/mold/test/elf/version-script12.sh new file mode 100755 index 00000000000..08866455c0a --- /dev/null +++ b/third_party/mold/test/elf/version-script12.sh @@ -0,0 +1,26 @@ +// clang-format off +#!/bin/bash +. $(dirname $0)/common.inc + +cat <<'EOF' > $t/a.ver +{ +global: + *; + *foo_*; +local: + *foo*; +}; +EOF + +cat < $t/log +grep -q ' xyz$' $t/log +grep -q ' foo_bar$' $t/log +! grep -q ' foo$' $t/log || false diff --git a/third_party/mold/test/elf/version-script13.sh b/third_party/mold/test/elf/version-script13.sh new file mode 100755 index 00000000000..0de64420afe --- /dev/null +++ b/third_party/mold/test/elf/version-script13.sh @@ -0,0 +1,21 @@ +// clang-format off +#!/bin/bash +. $(dirname $0)/common.inc + +cat <<'EOF' > $t/a.ver +{ + global: *; + local: foo; +}; +EOF + +cat < $t/log +grep -q ' foobar$' $t/log +! grep -q ' foo$' $t/log || false diff --git a/third_party/mold/test/elf/version-script14.sh b/third_party/mold/test/elf/version-script14.sh new file mode 100755 index 00000000000..d09b60df39e --- /dev/null +++ b/third_party/mold/test/elf/version-script14.sh @@ -0,0 +1,32 @@ +// clang-format off +#!/bin/bash +. $(dirname $0)/common.inc + +cat <<'EOF' > $t/a.ver +{ +local: + *; +global: + xyz; + foo*bar*[abc]x; +}; +EOF + +cat < $t/log +grep -q ' xyz$' $t/log +! grep -q ' foobarzx$' $t/log || false +grep -q ' foobarcx$' $t/log +grep -q ' foo123bar456bx$' $t/log +! grep -q ' foo123bar456c$' $t/log || false +! grep -q ' foo123bar456x$' $t/log || false diff --git a/third_party/mold/test/elf/version-script15.sh b/third_party/mold/test/elf/version-script15.sh new file mode 100755 index 00000000000..ee1b347f557 --- /dev/null +++ b/third_party/mold/test/elf/version-script15.sh @@ -0,0 +1,27 @@ +// clang-format off +#!/bin/bash +. $(dirname $0)/common.inc + +cat <<'EOF' > $t/a.ver +{ +local: + *; +global: + [abc][^abc][^\]a-zABC]; +}; +EOF + +cat < $t/log +grep -q ' azZ$' $t/log +grep -q ' czZ$' $t/log +! grep -q ' azC$' $t/log || false +! grep -q ' aaZ$' $t/log || false diff --git a/third_party/mold/test/elf/version-script16.sh b/third_party/mold/test/elf/version-script16.sh new file mode 100755 index 00000000000..2bfbf13df32 --- /dev/null +++ b/third_party/mold/test/elf/version-script16.sh @@ -0,0 +1,14 @@ +// clang-format off +#!/bin/bash +. $(dirname $0)/common.inc + +cat <<'EOF' > $t/a.ver +{ local: *; global: extern "C++" { *foo*; }; }; +EOF + +cat < $t/c.ver +{ local: *; global: xyz; }; +EOF + +$CC -B. -o $t/exe2 $t/a.o $t/b.so -Wl,--version-script=$t/c.ver -Wl,--undefined-version +nm -g $t/exe2 > $t/log2 +! grep -q foo $t/log2 || false + +cat <<'EOF' > $t/d.ver +{ local: *; }; +EOF + +$CC -B. -o $t/exe3 $t/a.o $t/b.so -Wl,--version-script=$t/d.ver +nm -g $t/exe3 > $t/log3 +! grep -q foo $t/log3 || false diff --git a/third_party/mold/test/elf/version-script18.sh b/third_party/mold/test/elf/version-script18.sh new file mode 100755 index 00000000000..088b5240b46 --- /dev/null +++ b/third_party/mold/test/elf/version-script18.sh @@ -0,0 +1,24 @@ +// clang-format off +#!/bin/bash +. $(dirname $0)/common.inc + +# Test version script precedence. + +cat <<'EOF' > $t/a.ver +{ global: extern "C++" { *libalpha::*; }; local: *libbeta*; }; +EOF + +cat < + void foo() {} + template void foo(); +} +EOF + +$CC -B. -shared -Wl,--version-script=$t/a.ver -o $t/c.so $t/b.o +readelf --wide --dyn-syms $t/c.so | grep libalpha | grep -q Bar + diff --git a/third_party/mold/test/elf/version-script2.sh b/third_party/mold/test/elf/version-script2.sh new file mode 100755 index 00000000000..154ed2deb2d --- /dev/null +++ b/third_party/mold/test/elf/version-script2.sh @@ -0,0 +1,45 @@ +// clang-format off +#!/bin/bash +. $(dirname $0)/common.inc + +cat < $t/a.ver +ver1 { + global: foo; + local: *; +}; + +ver2 { + global: bar; +}; + +ver3 { + global: baz; +}; +EOF + +cat < $t/log +grep -Fq 'foo@ver1' $t/log +grep -Fq 'bar@ver2' $t/log +grep -Fq 'baz@ver3' $t/log diff --git a/third_party/mold/test/elf/version-script3.sh b/third_party/mold/test/elf/version-script3.sh new file mode 100755 index 00000000000..b79a4b91064 --- /dev/null +++ b/third_party/mold/test/elf/version-script3.sh @@ -0,0 +1,41 @@ +// clang-format off +#!/bin/bash +. $(dirname $0)/common.inc + +cat < $t/a.ver +ver1 { + global: f*o; + local: *; +}; + +ver2 { + global: b*; +}; +EOF + +cat < $t/log +grep -Fq 'foo@ver1' $t/log +grep -Fq 'bar@ver2' $t/log +grep -Fq 'baz@ver2' $t/log diff --git a/third_party/mold/test/elf/version-script4.sh b/third_party/mold/test/elf/version-script4.sh new file mode 100755 index 00000000000..4c2e4a93649 --- /dev/null +++ b/third_party/mold/test/elf/version-script4.sh @@ -0,0 +1,31 @@ +// clang-format off +#!/bin/bash +. $(dirname $0)/common.inc + +cat < $t/a.ver +{ + global: + extern "C++" { + foo::bar; + }; + + local: *; +}; +EOF + +cat < $t/log +grep -Fq _ZN3foo3barE $t/log +! grep -Fq ' bar' $t/log || false diff --git a/third_party/mold/test/elf/version-script5.sh b/third_party/mold/test/elf/version-script5.sh new file mode 100755 index 00000000000..196fdbf9ee1 --- /dev/null +++ b/third_party/mold/test/elf/version-script5.sh @@ -0,0 +1,21 @@ +// clang-format off +#!/bin/bash +. $(dirname $0)/common.inc + +cat < $t/a.ver +{ + extern "C" { foo }; + local: *; +}; +EOF + +cat < $t/log +grep -Fq foo $t/log +! grep -Fq ' main' $t/log || false diff --git a/third_party/mold/test/elf/version-script6.sh b/third_party/mold/test/elf/version-script6.sh new file mode 100755 index 00000000000..bfd8b7a03f3 --- /dev/null +++ b/third_party/mold/test/elf/version-script6.sh @@ -0,0 +1,35 @@ +// clang-format off +#!/bin/bash +. $(dirname $0)/common.inc + +cat <<'EOF' > $t/a.ver +VER_X1 { foo; }; +VER_X2 { bar; }; +EOF + +cat < $t/d.ver +VER_Y1 { local; *; }; +VER_Y2 { baz; }; +VER_Y3 { foo; }; +EOF + +cat < $t/log +grep -q 'foo@VER_X1' $t/log +grep -q 'bar@VER_X2' $t/log +grep -q 'baz@@VER_Y2' $t/log diff --git a/third_party/mold/test/elf/version-script7.sh b/third_party/mold/test/elf/version-script7.sh new file mode 100755 index 00000000000..3bb4913ae7b --- /dev/null +++ b/third_party/mold/test/elf/version-script7.sh @@ -0,0 +1,18 @@ +// clang-format off +#!/bin/bash +. $(dirname $0)/common.inc + +cat <<'EOF' > $t/a.ver +VER_X1 { *; }; +EOF + +cat < $t/log +grep -q 'foo$' $t/log +grep -q 'bar@@VER_X1' $t/log diff --git a/third_party/mold/test/elf/version-script8.sh b/third_party/mold/test/elf/version-script8.sh new file mode 100755 index 00000000000..da70acbc724 --- /dev/null +++ b/third_party/mold/test/elf/version-script8.sh @@ -0,0 +1,39 @@ +// clang-format off +#!/bin/bash +. $(dirname $0)/common.inc + +cat < $t/a.ver +ver1 { + global: ?oo; + local: *; +}; + +ver2 { + global: b?r; +}; +EOF + +cat < $t/log +grep -Fq 'foo@@ver1' $t/log +grep -Fq 'bar@@ver2' $t/log +! grep -Fq 'baz' $t/log || false diff --git a/third_party/mold/test/elf/version-script9.sh b/third_party/mold/test/elf/version-script9.sh new file mode 100755 index 00000000000..eef82e47e01 --- /dev/null +++ b/third_party/mold/test/elf/version-script9.sh @@ -0,0 +1,19 @@ +// clang-format off +#!/bin/bash +. $(dirname $0)/common.inc + +echo 'VER1 { extern "C++" {}; foo; }; VER2 {};' > $t/a.ver + +cat < $t/b.s +.globl foo, bar, baz +foo: + nop +bar: + nop +baz: + nop +EOF + +$CC -B. -shared -o $t/c.so -Wl,-version-script,$t/a.ver $t/b.s +readelf --dyn-syms $t/c.so > $t/log +grep -q ' foo@@VER1$' $t/log diff --git a/third_party/mold/test/elf/version.sh b/third_party/mold/test/elf/version.sh new file mode 100755 index 00000000000..b027a13addb --- /dev/null +++ b/third_party/mold/test/elf/version.sh @@ -0,0 +1,37 @@ +// clang-format off +#!/bin/bash +. $(dirname $0)/common.inc + +# OneTBB isn't tsan-clean +nm mold | grep -q '__tsan_init' && skip + +./mold -v | grep -q '[ms]old .*compatible with GNU ld' +./mold --version | grep -q '[ms]old .*compatible with GNU ld' + +./mold -V | grep -q '[ms]old .*compatible with GNU ld' +./mold -V | grep -q elf_x86_64 +./mold -V | grep -q elf_i386 + +cat <&1 | grep -q '[ms]old' +! [ -f $t/exe1 ] || false + +$CC -B. -Wl,-v -o $t/exe2 $t/a.o 2>&1 | grep -q '[ms]old' +$QEMU $t/exe2 | grep -q 'Hello world' + +! ./mold --v >& $t/log +grep -q 'unknown command line option:' $t/log diff --git a/third_party/mold/test/elf/versioned-undef.sh b/third_party/mold/test/elf/versioned-undef.sh new file mode 100755 index 00000000000..bfb723b9a87 --- /dev/null +++ b/third_party/mold/test/elf/versioned-undef.sh @@ -0,0 +1,46 @@ +// clang-format off +#!/bin/bash +. $(dirname $0)/common.inc + +# Skip if libc is musl because musl does not fully support GNU-style +# symbol versioning. +ldd --help 2>&1 | grep -q musl && skip + +cat < $t/b.ver +$CC -B. -shared -o $t/c.so $t/a.o -Wl,--version-script=$t/b.ver + +cat < $t/log +! grep -Fq foo $t/log || false diff --git a/third_party/mold/test/elf/warn-common.sh b/third_party/mold/test/elf/warn-common.sh new file mode 100755 index 00000000000..d57024ddafa --- /dev/null +++ b/third_party/mold/test/elf/warn-common.sh @@ -0,0 +1,21 @@ +// clang-format off +#!/bin/bash +. $(dirname $0)/common.inc + +cat < $t/log +! grep -Fq 'multiple common symbols' $t/log || false + +$CC -B. -o $t/exe $t/a.o $t/b.o -Wl,-warn-common 2> $t/log +grep -Fq 'multiple common symbols' $t/log diff --git a/third_party/mold/test/elf/warn-once.sh b/third_party/mold/test/elf/warn-once.sh new file mode 100755 index 00000000000..49ad32f0ddf --- /dev/null +++ b/third_party/mold/test/elf/warn-once.sh @@ -0,0 +1,18 @@ +// clang-format off +#!/bin/bash +. $(dirname $0)/common.inc + +cat <& $t/log + +[ "$(grep 'undefined symbol:.* foo$' $t/log | wc -l)" = 1 ] diff --git a/third_party/mold/test/elf/warn-symbol-type.sh b/third_party/mold/test/elf/warn-symbol-type.sh new file mode 100755 index 00000000000..539fc908a8f --- /dev/null +++ b/third_party/mold/test/elf/warn-symbol-type.sh @@ -0,0 +1,25 @@ +// clang-format off +#!/bin/bash +. $(dirname $0)/common.inc + +cat <& $t/log + +grep -q "warning: symbol type mismatch: times" $t/log diff --git a/third_party/mold/test/elf/warn-unresolved-symbols.sh b/third_party/mold/test/elf/warn-unresolved-symbols.sh new file mode 100755 index 00000000000..4ae37aea149 --- /dev/null +++ b/third_party/mold/test/elf/warn-unresolved-symbols.sh @@ -0,0 +1,20 @@ +// clang-format off +#!/bin/bash +. $(dirname $0)/common.inc + +cat <&1 \ + | grep -q 'undefined symbol:.*foo' + +$CC -B. -o $t/exe $t/a.o -Wl,-warn-unresolved-symbols 2>&1 \ + | grep -q 'undefined symbol:.*foo' + +! $CC -B. -o $t/exe $t/a.o -Wl,-warn-unresolved-symbols \ + --error-unresolved-symbols 2>&1 \ + | grep -q 'undefined symbol:.*foo' diff --git a/third_party/mold/test/elf/weak-export-dso.sh b/third_party/mold/test/elf/weak-export-dso.sh new file mode 100755 index 00000000000..d2dfccade5a --- /dev/null +++ b/third_party/mold/test/elf/weak-export-dso.sh @@ -0,0 +1,25 @@ +// clang-format off +#!/bin/bash +. $(dirname $0)/common.inc + +cat <& $t/log +grep -q 'undefined symbol: foo' $t/log diff --git a/third_party/mold/test/elf/weak-undef4.sh b/third_party/mold/test/elf/weak-undef4.sh new file mode 100755 index 00000000000..dfe371c90f1 --- /dev/null +++ b/third_party/mold/test/elf/weak-undef4.sh @@ -0,0 +1,50 @@ +// clang-format off +#!/bin/bash +. $(dirname $0)/common.inc + +cat < $t/readelf +! grep -q fn1 $t/readelf || false +! grep -q fn2 $t/readelf || false + +$CC -B. -nostdlib -o $t/exe $t/a.o -Wl,--whole-archive $t/d.a + +readelf --symbols $t/exe > $t/readelf +grep -q fn1 $t/readelf +grep -q fn2 $t/readelf + +$CC -B. -nostdlib -o $t/exe $t/a.o -Wl,--whole-archive \ + -Wl,--no-whole-archive $t/d.a + +readelf --symbols $t/exe > $t/readelf +! grep -q fn1 $t/readelf || false +! grep -q fn2 $t/readelf || false diff --git a/third_party/mold/test/elf/wrap-lto.sh b/third_party/mold/test/elf/wrap-lto.sh new file mode 100755 index 00000000000..b172fa7a1fa --- /dev/null +++ b/third_party/mold/test/elf/wrap-lto.sh @@ -0,0 +1,62 @@ +// clang-format off +#!/bin/bash +. $(dirname $0)/common.inc + +cat < $t/log +! grep -Fq .rodata.str1.1 $t/log || false diff --git a/third_party/mold/test/elf/x86_64_emulation-deduction.sh b/third_party/mold/test/elf/x86_64_emulation-deduction.sh new file mode 100755 index 00000000000..662ed7a0a7e --- /dev/null +++ b/third_party/mold/test/elf/x86_64_emulation-deduction.sh @@ -0,0 +1,13 @@ +// clang-format off +#!/bin/bash +. $(dirname $0)/common.inc + +# Skip if target is not x86-64 +[ $MACHINE = x86_64 ] || skip + +cat <& /dev/null; then + $CXX -B. -o $t/exe $t/a.o -static -mcmodel=large + $QEMU $t/exe +fi diff --git a/third_party/mold/test/elf/x86_64_execstack-if-needed.sh b/third_party/mold/test/elf/x86_64_execstack-if-needed.sh new file mode 100755 index 00000000000..e3e7f188eaa --- /dev/null +++ b/third_party/mold/test/elf/x86_64_execstack-if-needed.sh @@ -0,0 +1,18 @@ +// clang-format off +#!/bin/bash +. $(dirname $0)/common.inc + +[ $MACHINE = x86_64 ] || skip + +cat <& /dev/null +readelf --segments -W $t/exe | grep -q 'GNU_STACK.* RW ' + +$CC -B. -o $t/exe $t/a.o -Wl,-z,execstack-if-needed +readelf --segments -W $t/exe | grep -q 'GNU_STACK.* RWE ' diff --git a/third_party/mold/test/elf/x86_64_gnu-linkonce.sh b/third_party/mold/test/elf/x86_64_gnu-linkonce.sh new file mode 100755 index 00000000000..913e4a600de --- /dev/null +++ b/third_party/mold/test/elf/x86_64_gnu-linkonce.sh @@ -0,0 +1,27 @@ +// clang-format off +#!/bin/bash +. $(dirname $0)/common.inc + +[ $MACHINE = x86_64 ] || skip + +cat <:' | \ + grep -Fq 'puts$plt' diff --git a/third_party/mold/test/elf/x86_64_gnu-retain.sh b/third_party/mold/test/elf/x86_64_gnu-retain.sh new file mode 100755 index 00000000000..81511eacc83 --- /dev/null +++ b/third_party/mold/test/elf/x86_64_gnu-retain.sh @@ -0,0 +1,32 @@ +// clang-format off +#!/bin/bash +. $(dirname $0)/common.inc + +[ $MACHINE = x86_64 ] || skip + +echo '.section foo,"R"' | $CC -o /dev/null -c -xassembler - 2> /dev/null || skip + +cat < $t/log +! grep -q foo $t/log || false diff --git a/third_party/mold/test/elf/x86_64_gotpcrelx.sh b/third_party/mold/test/elf/x86_64_gotpcrelx.sh new file mode 100755 index 00000000000..3282aec3b90 --- /dev/null +++ b/third_party/mold/test/elf/x86_64_gotpcrelx.sh @@ -0,0 +1,29 @@ +// clang-format off +#!/bin/bash +. $(dirname $0)/common.inc + +cat <& /dev/null \ + || skip + +cat < $t/script/libfoo.so + +$CC -B. -o $t/exe -L$t/script -L$t/lib32 -L$t/lib64 \ + $t/e.o -lfoo -Wl,-rpath $t/lib64 >& $t/log + +grep -q 'script/libfoo.so: skipping incompatible file' $t/log +grep -q 'lib32/libfoo.so: skipping incompatible file' $t/log +grep -q 'lib32/libfoo.a: skipping incompatible file' $t/log +$QEMU $t/exe | grep -q 'Hello world' diff --git a/third_party/mold/test/elf/x86_64_incompatible-libs2.sh b/third_party/mold/test/elf/x86_64_incompatible-libs2.sh new file mode 100755 index 00000000000..6717a2e5ff5 --- /dev/null +++ b/third_party/mold/test/elf/x86_64_incompatible-libs2.sh @@ -0,0 +1,47 @@ +// clang-format off +#!/bin/bash +. $(dirname $0)/common.inc + +[ $MACHINE = x86_64 ] || skip + +echo 'int main() {}' | $CC -m32 -o $t/exe -xc - >& /dev/null \ + || skip + +cat < $t/script/libfoo.so + +$CC -B. -o $t/exe -L$t/lib32 -L$t/lib64 -lfoo $t/e.o -Wl,-rpath $t/lib64 \ + >& $t/log + +grep -q 'lib32/libfoo.so: skipping incompatible file' $t/log +$QEMU $t/exe | grep -q 'Hello world' diff --git a/third_party/mold/test/elf/x86_64_incompatible-obj.sh b/third_party/mold/test/elf/x86_64_incompatible-obj.sh new file mode 100755 index 00000000000..a95b4e63f3e --- /dev/null +++ b/third_party/mold/test/elf/x86_64_incompatible-obj.sh @@ -0,0 +1,18 @@ +// clang-format off +#!/bin/bash +. $(dirname $0)/common.inc + +[ $MACHINE = x86_64 ] || skip + +echo 'int main() {}' | $CC -m32 -o $t/exe -xc - >& /dev/null \ + || skip + +cat <& $t/log +grep -q "$t/b.o: incompatible file type: x86_64 is expected but got i386" $t/log diff --git a/third_party/mold/test/elf/x86_64_init-array-readonly.sh b/third_party/mold/test/elf/x86_64_init-array-readonly.sh new file mode 100755 index 00000000000..9205fc42e49 --- /dev/null +++ b/third_party/mold/test/elf/x86_64_init-array-readonly.sh @@ -0,0 +1,43 @@ +// clang-format off +#!/bin/bash +. $(dirname $0)/common.inc + +[ $MACHINE = x86_64 ] || skip + +cat < /dev/null || skip +volatile char arr[0x100000000]; +int main() { + return arr[2000]; +} +EOF + +$CC -B. -o $t/exe $t/a.o +$QEMU $t/exe diff --git a/third_party/mold/test/elf/x86_64_mergeable-records.sh b/third_party/mold/test/elf/x86_64_mergeable-records.sh new file mode 100755 index 00000000000..390959d342b --- /dev/null +++ b/third_party/mold/test/elf/x86_64_mergeable-records.sh @@ -0,0 +1,57 @@ +// clang-format off +#!/bin/bash +. $(dirname $0)/common.inc + +# Skip if target is not x86-64 +[ $MACHINE = x86_64 ] || skip + +test_cflags -static || skip + +cat <<'EOF' | $CC -o $t/a.o -c -x assembler - + .text + .globl main +main: + sub $8, %rsp + + mov $.L.str1, %rdi + xor %rax, %rax + call printf + + mov $.L.str1+1, %rdi + xor %rax, %rax + call printf + + mov $str2+2, %rdi + xor %rax, %rax + call printf + + mov $.L.str3+3, %rdi + xor %rax, %rax + call printf + + mov $.rodata.cst8+16, %rdi + xor %rax, %rax + call printf + + xor %rax, %rax + add $8, %rsp + ret + + .section .rodata.cst8, "aM", @progbits, 8 + .align 8 +.L.str1: + .ascii "abcdef\n\0" +.globl str2 +str2: + .ascii "ghijkl\n\0" +.L.str3: + .ascii "mnopqr\n\0" +EOF + +$CC -B. -static -o $t/exe $t/a.o + +$QEMU $t/exe | grep -q '^abcdef$' +$QEMU $t/exe | grep -q '^bcdef$' +$QEMU $t/exe | grep -q '^ijkl$' +$QEMU $t/exe | grep -q '^pqr$' +$QEMU $t/exe | grep -q '^mnopqr$' diff --git a/third_party/mold/test/elf/x86_64_mergeable-strings.sh b/third_party/mold/test/elf/x86_64_mergeable-strings.sh new file mode 100755 index 00000000000..fd22b551c14 --- /dev/null +++ b/third_party/mold/test/elf/x86_64_mergeable-strings.sh @@ -0,0 +1,36 @@ +// clang-format off +#!/bin/bash +. $(dirname $0)/common.inc + +test_cflags -static || skip + +# Skip if target is not x86-64 +[ $MACHINE = x86_64 ] || skip + +cat <<'EOF' | $CC -o $t/a.o -c -x assembler - + .text + .globl main +main: + sub $8, %rsp + mov $.L.str+3, %rdi + xor %rax, %rax + call printf + mov $.rodata.str1.1+16, %rdi + xor %rax, %rax + call printf + xor %rax, %rax + add $8, %rsp + ret + + .section .rodata.str1.1, "aMS", @progbits, 1 + .string "bar" +.L.str: +foo: + .string "xyzHello" + .string "foo world\n" +EOF + +$CC -B. -static -o $t/exe $t/a.o +$QEMU $t/exe | grep -q 'Hello world' + +readelf -sW $t/exe | grep -Eq '[0-9] foo$' diff --git a/third_party/mold/test/elf/x86_64_note-property.sh b/third_party/mold/test/elf/x86_64_note-property.sh new file mode 100755 index 00000000000..f4505dd1fc4 --- /dev/null +++ b/third_party/mold/test/elf/x86_64_note-property.sh @@ -0,0 +1,22 @@ +// clang-format off +#!/bin/bash +. $(dirname $0)/common.inc + +# Skip if target is not x86-64 +[ $MACHINE = x86_64 ] || skip + +$CC -fcf-protection=branch -c /dev/null -o /dev/null -xc 2> /dev/null || skip + +cat < $t/log + +grep -Eq '.note.bar\s+NOTE.+000008 00 A 0 0 4' $t/log +grep -Eq '.note.baz\s+NOTE.+000008 00 A 0 0 8' $t/log +grep -Eq '.note.nonalloc\s+NOTE.+000008 00 0 0 1' $t/log + +readelf --segments $t/exe > $t/log +grep -Fq '01 .note.baz .note.foo .note.bar' $t/log +! grep -q 'NOTE.*0x0000000000000000 0x0000000000000000' $t/log || false diff --git a/third_party/mold/test/elf/x86_64_note2.sh b/third_party/mold/test/elf/x86_64_note2.sh new file mode 100755 index 00000000000..b0320759d26 --- /dev/null +++ b/third_party/mold/test/elf/x86_64_note2.sh @@ -0,0 +1,35 @@ +// clang-format off +#!/bin/bash +. $(dirname $0)/common.inc + +[ $MACHINE = x86_64 ] || skip + +# Binutils 2.32 injects their own .note.gnu.property section interfering with the tests +test_cflags -Xassembler -mx86-used-note=no && CFLAGS="-Xassembler -mx86-used-note=no" || CFLAGS="" + +cat < $t/log +grep -Fq '01 .note.a .note.c .note.b' $t/log diff --git a/third_party/mold/test/elf/x86_64_plt.sh b/third_party/mold/test/elf/x86_64_plt.sh new file mode 100755 index 00000000000..505aee54520 --- /dev/null +++ b/third_party/mold/test/elf/x86_64_plt.sh @@ -0,0 +1,29 @@ +// clang-format off +#!/bin/bash +. $(dirname $0)/common.inc + +[ $MACHINE = x86_64 ] || skip + +cat <<'EOF' | $CC -o $t/a.o -c -x assembler - + .text + .globl main +main: + sub $8, %rsp + lea msg(%rip), %rdi + xor %rax, %rax + call printf@PLT + xor %rax, %rax + add $8, %rsp + ret + + .data +msg: + .string "Hello world\n" +EOF + +$CC -B. -o $t/exe $t/a.o + +readelf --sections $t/exe | grep -Fq '.got' +readelf --sections $t/exe | grep -Fq '.got.plt' + +$QEMU $t/exe | grep -q 'Hello world' diff --git a/third_party/mold/test/elf/x86_64_pltgot.sh b/third_party/mold/test/elf/x86_64_pltgot.sh new file mode 100755 index 00000000000..10c78557930 --- /dev/null +++ b/third_party/mold/test/elf/x86_64_pltgot.sh @@ -0,0 +1,28 @@ +// clang-format off +#!/bin/bash +. $(dirname $0)/common.inc + +[ $MACHINE = x86_64 ] || skip + +cat < $t/log + +grep -Eq '1034:.*jmp.* ' $t/log diff --git a/third_party/mold/test/elf/x86_64_preinit-array.sh b/third_party/mold/test/elf/x86_64_preinit-array.sh new file mode 100755 index 00000000000..f54596d008e --- /dev/null +++ b/third_party/mold/test/elf/x86_64_preinit-array.sh @@ -0,0 +1,54 @@ +// clang-format off +#!/bin/bash +. $(dirname $0)/common.inc + +ldd --help 2>&1 | grep -q musl && skip + +[ $MACHINE = x86_64 ] || skip + +cat <:' > $t/log + +grep -Eq 'lea \s*0x.+\(%rip\),%rax .*' $t/log +grep -Eq 'lea \s*0x.+\(%rip\),%rcx .*' $t/log +grep -Eq 'lea \s*0x.+\(%rip\),%rdx .*' $t/log +grep -Eq 'lea \s*0x.+\(%rip\),%rbx .*' $t/log +grep -Eq 'lea \s*0x.+\(%rip\),%rbp .*' $t/log +grep -Eq 'lea \s*0x.+\(%rip\),%rsi .*' $t/log +grep -Eq 'lea \s*0x.+\(%rip\),%rdi .*' $t/log +grep -Eq 'lea \s*0x.+\(%rip\),%r8 .*' $t/log +grep -Eq 'lea \s*0x.+\(%rip\),%r9 .*' $t/log +grep -Eq 'lea \s*0x.+\(%rip\),%r10 .*' $t/log +grep -Eq 'lea \s*0x.+\(%rip\),%r11 .*' $t/log +grep -Eq 'lea \s*0x.+\(%rip\),%r12 .*' $t/log +grep -Eq 'lea \s*0x.+\(%rip\),%r13 .*' $t/log +grep -Eq 'lea \s*0x.+\(%rip\),%r14 .*' $t/log +grep -Eq 'lea \s*0x.+\(%rip\),%r15 .*' $t/log +grep -Eq 'call.*' $t/log +grep -Eq 'jmp.*' $t/log diff --git a/third_party/mold/test/elf/x86_64_reloc-overflow.sh b/third_party/mold/test/elf/x86_64_reloc-overflow.sh new file mode 100755 index 00000000000..f0002e7143e --- /dev/null +++ b/third_party/mold/test/elf/x86_64_reloc-overflow.sh @@ -0,0 +1,15 @@ +// clang-format off +#!/bin/bash +. $(dirname $0)/common.inc + +[ $MACHINE = x86_64 ] || skip + +cat < $t/log || false +grep -Fq 'relocation R_X86_64_16 against foo out of range' $t/log diff --git a/third_party/mold/test/elf/x86_64_reloc-zero.sh b/third_party/mold/test/elf/x86_64_reloc-zero.sh new file mode 100755 index 00000000000..be356ef10ca --- /dev/null +++ b/third_party/mold/test/elf/x86_64_reloc-zero.sh @@ -0,0 +1,15 @@ +// clang-format off +#!/bin/bash +. $(dirname $0)/common.inc + +[ $MACHINE = x86_64 ] || skip + +cat < $t/d.s +.globl abs_sym +.set abs_sym, 42 + +.globl main +main: + sub $8, %rsp + lea abs_sym, %edi + call print + add $8, %rsp + ret +EOF + +$CC -B. -o $t/exe $t/c.so $t/d.s -no-pie +$QEMU $t/exe | grep -q '^42$' +$CC -B. -o $t/exe $t/c.so $t/d.s -pie +$QEMU $t/exe | grep -q '^42$' + +# GOT +cat <<'EOF' > $t/d.s +.globl main +main: + sub $8, %rsp + mov ext_var@GOTPCREL(%rip), %rdi + mov (%rdi), %edi + call print + add $8, %rsp + ret +EOF + +$CC -B. -o $t/exe $t/c.so $t/d.s -no-pie +$QEMU $t/exe | grep -q '^56$' +$CC -B. -o $t/exe $t/c.so $t/d.s -pie +$QEMU $t/exe | grep -q '^56$' + +# Copyrel +cat <<'EOF' > $t/d.s +.globl main +main: + sub $8, %rsp + mov ext_var(%rip), %edi + call print + add $8, %rsp + ret +EOF + +$CC -c -o $t/d.o $t/d.s +$CC -B. -o $t/exe $t/c.so $t/d.o -no-pie +$QEMU $t/exe | grep -q '^56$' +$CC -B. -o $t/exe $t/c.so $t/d.s -pie +$QEMU $t/exe | grep -q '^56$' + +# Copyrel +cat <<'EOF' > $t/d.s +.globl main +main: + sub $8, %rsp + mov foo(%rip), %rdi + mov (%rdi), %edi + call print + add $8, %rsp + ret + +.data +foo: + .quad ext_var +EOF + +$CC -B. -o $t/exe $t/c.so $t/d.s -no-pie +$QEMU $t/exe | grep -q '^56$' +$CC -B. -o $t/exe $t/c.so $t/d.s -pie +$QEMU $t/exe | grep -q '^56$' + +# PLT +cat <<'EOF' > $t/d.s +.globl main +main: + sub $8, %rsp + mov $76, %edi + call print@PLT + add $8, %rsp + ret +EOF + +$CC -B. -o $t/exe $t/c.so $t/d.s -no-pie +$QEMU $t/exe | grep -q '^76$' +$CC -B. -o $t/exe $t/c.so $t/d.s -pie +$QEMU $t/exe | grep -q '^76$' + +# PLT +cat <<'EOF' > $t/d.s +.globl main +main: + sub $8, %rsp + mov $76, %edi + lea print(%rip), %rax + call *%rax + add $8, %rsp + ret +EOF + +$CC -B. -o $t/exe $t/c.so $t/d.s -no-pie +$QEMU $t/exe | grep -q '^76$' +$CC -B. -o $t/exe $t/c.so $t/d.s -pie +$QEMU $t/exe | grep -q '^76$' + +# SIZE32 +cat <<'EOF' > $t/d.s +.globl main +main: + sub $8, %rsp + mov $foo+2@SIZE, %edi + call print@PLT + add $8, %rsp + ret + +.data +.globl foo +.type foo, %object +.size foo, 24 +foo: +EOF + +$CC -B. -o $t/exe $t/c.so $t/d.s +$QEMU $t/exe | grep -q '^26$' + +# SIZE64 +cat <<'EOF' > $t/d.s +.globl main +main: + sub $8, %rsp + movabs $foo+5@SIZE, %rdi + call print64@PLT + add $8, %rsp + ret + +.data +.globl foo +.type foo, %object +.size foo, 56 +foo: +EOF + +$CC -B. -o $t/exe $t/c.so $t/d.s +$QEMU $t/exe | grep -q '^61$' + +# GOTPCREL64 +cat <<'EOF' > $t/e.c +extern long ext_var; +static long arr[50000] = {1, 2, 3}; +void print64(long); + +int main() { + print64(ext_var * 1000000 + arr[2]); +} +EOF + +$CC -c -o $t/e.o $t/e.c -mcmodel=large -fPIC +$CC -B. -o $t/exe $t/c.so $t/e.o +$QEMU $t/exe | grep -q '^56000003$' + +# R_X86_64_32 against non-alloc section +cat <<'EOF' > $t/f.s +.globl main +main: + sub $8, %rsp + add $8, %rsp + ret + +.section .foo, "", @progbits +.zero 16 +foo: +.quad bar + +.section .bar, "", @progbits +.zero 24 +bar: +.quad foo +EOF + +$CC -c -o $t/f.o $t/f.s +$CC -B. -o $t/exe $t/f.o +readelf -x .foo -x .bar $t/exe > $t/log + +grep -Fq '0x00000010 00000000 00000000 10000000 00000000' $t/log +grep -Fq '0x00000010 18000000 00000000' $t/log diff --git a/third_party/mold/test/elf/x86_64_section-alignment.sh b/third_party/mold/test/elf/x86_64_section-alignment.sh new file mode 100755 index 00000000000..d1940029cae --- /dev/null +++ b/third_party/mold/test/elf/x86_64_section-alignment.sh @@ -0,0 +1,47 @@ +// clang-format off +#!/bin/bash +. $(dirname $0)/common.inc + +[ $MACHINE = x86_64 ] || skip + +cat <<'EOF' | $CC -c -o $t/a.o -xc - +#include "libc/inttypes.h" +#include "libc/limits.h" +#include "libc/literal.h" +#include "libc/calls/calls.h" +#include "libc/calls/weirdtypes.h" +#include "libc/fmt/fmt.h" +#include "libc/stdio/dprintf.h" +#include "libc/stdio/stdio.h" +#include "libc/stdio/temp.h" +#include "third_party/musl/tempnam.h" + +__attribute__((aligned(8192))) int foo = 1; + +typedef struct { + uint8_t e_ident[16]; + uint16_t e_type; + uint16_t e_machine; + uint32_t e_version; + uint64_t e_entry; + uint64_t e_phoff; + uint64_t e_shoff; + uint32_t e_flags; + uint16_t e_ehsize; + uint16_t e_phentsize; + uint16_t e_phnum; + uint16_t e_shentsize; + uint16_t e_shnum; + uint16_t e_shstrndx; +} Ehdr; + +char __ehdr_start; + +int main() { + Ehdr *e = (Ehdr *)&__ehdr_start; + printf("%lu %lu %lu\n", e->e_phoff % 8, e->e_shoff % 8, (uint64_t)&foo % 8192); +} +EOF + +$CC -B. -o $t/exe $t/a.o +$QEMU $t/exe | grep -q '^0 0 0$' diff --git a/third_party/mold/test/elf/x86_64_section-name.sh b/third_party/mold/test/elf/x86_64_section-name.sh new file mode 100755 index 00000000000..74f4eb7a82a --- /dev/null +++ b/third_party/mold/test/elf/x86_64_section-name.sh @@ -0,0 +1,75 @@ +// clang-format off +#!/bin/bash +. $(dirname $0)/common.inc + +[ $MACHINE = x86_64 ] || skip + +cat <<'EOF' | $CC -o $t/a.o -c -x assembler - +.globl _start +.text +_start: + ret + +.section .text.hot +.ascii ".text.hot " +.section .text.hot.foo +.ascii ".text.hot.foo " + +.section .text.unknown +.ascii ".text.unknown " +.section .text.unknown.foo +.ascii ".text.unknown.foo " + +.section .text.unlikely +.ascii ".text.unlikely " +.section .text.unlikely.foo +.ascii ".text.unlikely.foo " + +.section .text.startup +.ascii ".text.startup " +.section .text.startup.foo +.ascii ".text.startup.foo " + +.section .text.exit +.ascii ".text.exit " +.section .text.exit.foo +.ascii ".text.exit.foo " + +.section .text +.ascii ".text " +.section .text.foo +.ascii ".text.foo " + +.section .data.rel.ro +.ascii ".data.rel.ro " +.section .data.rel.ro.foo +.ascii ".data.rel.ro.foo " + +.section .data +.ascii ".data " +.section .data.foo +.ascii ".data.foo " + +.section .rodata +.ascii ".rodata " +.section .rodata.foo +.ascii ".rodata.foo " +EOF + +./mold -o $t/exe $t/a.o -z keep-text-section-prefix + +readelf -p .text.hot $t/exe | grep -Fq '.text.hot .text.hot.foo' +readelf -p .text.unknown $t/exe | grep -Fq '.text.unknown .text.unknown.foo' +readelf -p .text.unlikely $t/exe | grep -Fq '.text.unlikely .text.unlikely.foo' +readelf -p .text.startup $t/exe | grep -Fq '.text.startup .text.startup.foo' +readelf -p .text.exit $t/exe | grep -Fq '.text.exit .text.exit.foo' +readelf -p .text $t/exe | grep -Fq '.text .text.foo' +readelf -p .data.rel.ro $t/exe | grep -Fq '.data.rel.ro .data.rel.ro.foo' +readelf -p .data $t/exe | grep -Fq '.data .data.foo' +readelf -p .rodata $t/exe | grep -Fq '.rodata .rodata.foo' + +./mold -o $t/exe $t/a.o +! readelf --sections $t/exe | grep -Fq .text.hot || false + +./mold -o $t/exe $t/a.o -z nokeep-text-section-prefix +! readelf --sections $t/exe | grep -Fq .text.hot || false diff --git a/third_party/mold/test/elf/x86_64_tls-gd-mcmodel-large.sh b/third_party/mold/test/elf/x86_64_tls-gd-mcmodel-large.sh new file mode 100755 index 00000000000..5658269b55c --- /dev/null +++ b/third_party/mold/test/elf/x86_64_tls-gd-mcmodel-large.sh @@ -0,0 +1,51 @@ +// clang-format off +#!/bin/bash +. $(dirname $0)/common.inc + +[ $MACHINE = x86_64 ] || skip + +cat <&1 | grep -q 'may cause a segmentation fault' diff --git a/third_party/mold/test/elf/x86_64_warn-shared-textrel.sh b/third_party/mold/test/elf/x86_64_warn-shared-textrel.sh new file mode 100755 index 00000000000..b7a9dc47869 --- /dev/null +++ b/third_party/mold/test/elf/x86_64_warn-shared-textrel.sh @@ -0,0 +1,25 @@ +// clang-format off +#!/bin/bash +. $(dirname $0)/common.inc + +# Skip if libc is musl +ldd --help 2>&1 | grep -q musl && skip + +# Skip if target is not x86-64 +[ $MACHINE = x86_64 ] || skip + +cat <<'EOF' | $CC -c -o $t/a.o -x assembler - +.globl fn +fn: + movabs main, %rax + ret +EOF + +cat <& $t/log +grep -q 'relocation against symbol `main'\'' in read-only section' $t/log +grep -q 'creating a DT_TEXTREL in an output file' $t/log diff --git a/third_party/mold/test/elf/x86_64_warn-textrel.sh b/third_party/mold/test/elf/x86_64_warn-textrel.sh new file mode 100755 index 00000000000..25b4e0351a5 --- /dev/null +++ b/third_party/mold/test/elf/x86_64_warn-textrel.sh @@ -0,0 +1,25 @@ +// clang-format off +#!/bin/bash +. $(dirname $0)/common.inc + +# Skip if libc is musl +ldd --help 2>&1 | grep -q musl && skip + +# Skip if target is not x86-64 +[ $MACHINE = x86_64 ] || skip + +cat <<'EOF' | $CC -c -o $t/a.o -x assembler - +.globl fn +fn: + movabs main, %rax + ret +EOF + +cat <& $t/log +grep -q 'relocation against symbol `main'\'' in read-only section' $t/log +grep -q 'creating a DT_TEXTREL in an output file' $t/log diff --git a/third_party/mold/test/elf/x86_64_z-ibt.sh b/third_party/mold/test/elf/x86_64_z-ibt.sh new file mode 100755 index 00000000000..8ce8ad7aa80 --- /dev/null +++ b/third_party/mold/test/elf/x86_64_z-ibt.sh @@ -0,0 +1,18 @@ +// clang-format off +#!/bin/bash +. $(dirname $0)/common.inc + +[ $MACHINE = x86_64 ] || skip + +cat < $t/log +! grep -qw SHSTK $t/log + +$CC -B. -o $t/exe $t/a.o -Wl,-z,ibt +readelf --notes $t/exe | grep -qw IBT diff --git a/third_party/mold/test/elf/x86_64_z-ibtplt.sh b/third_party/mold/test/elf/x86_64_z-ibtplt.sh new file mode 100755 index 00000000000..29cdfe2fc3c --- /dev/null +++ b/third_party/mold/test/elf/x86_64_z-ibtplt.sh @@ -0,0 +1,42 @@ +// clang-format off +#!/bin/bash +. $(dirname $0)/common.inc + +[ $MACHINE = x86_64 ] || skip + +cat < $t/log +! grep -qw SHSTK $t/log + +$CC -B. -o $t/exe $t/a.o -Wl,-z,shstk +readelf --notes $t/exe | grep -qw SHSTK diff --git a/third_party/mold/test/elf/x86_64_z-text.sh b/third_party/mold/test/elf/x86_64_z-text.sh new file mode 100755 index 00000000000..5f44a4a6f5d --- /dev/null +++ b/third_party/mold/test/elf/x86_64_z-text.sh @@ -0,0 +1,47 @@ +// clang-format off +#!/bin/bash +. $(dirname $0)/common.inc + +# Skip if libc is musl +ldd --help 2>&1 | grep -q musl && skip + +# Skip if target is not x86-64 +[ $MACHINE = x86_64 ] || skip + +cat <<'EOF' | $CC -c -o $t/a.o -x assembler - +.globl fn1 +fn1: + sub $8, %rsp + movabs ptr, %rax + call *%rax + add $8, %rsp + ret +EOF + +cat <& $t/log +grep -q 'a.o: -cet-report=warning: missing GNU_PROPERTY_X86_FEATURE_1_IBT' $t/log +grep -q 'a.o: -cet-report=warning: missing GNU_PROPERTY_X86_FEATURE_1_SHSTK' $t/log + +! $CC -B. -o $t/exe $t/a.o -Wl,-z,cet-report=error >& $t/log +grep -q 'a.o: -cet-report=error: missing GNU_PROPERTY_X86_FEATURE_1_IBT' $t/log +grep -q 'a.o: -cet-report=error: missing GNU_PROPERTY_X86_FEATURE_1_SHSTK' $t/log diff --git a/third_party/mold/test/elf/z-defs.sh b/third_party/mold/test/elf/z-defs.sh new file mode 100755 index 00000000000..ff17d928b77 --- /dev/null +++ b/third_party/mold/test/elf/z-defs.sh @@ -0,0 +1,20 @@ +// clang-format off +#!/bin/bash +. $(dirname $0)/common.inc + +cat < $t/log || false +grep -q 'undefined symbol:.* foo' $t/log + +! $CC -B. -shared -o $t/b.so $t/a.o -Wl,-no-undefined 2> $t/log || false +grep -q 'undefined symbol:.* foo' $t/log + +$CC -B. -shared -o $t/c.so $t/a.o -Wl,-z,defs -Wl,--warn-unresolved-symbols 2> $t/log +grep -q 'undefined symbol:.* foo$' $t/log diff --git a/third_party/mold/test/elf/z-dynamic-undefined-weak.sh b/third_party/mold/test/elf/z-dynamic-undefined-weak.sh new file mode 100755 index 00000000000..decc7871182 --- /dev/null +++ b/third_party/mold/test/elf/z-dynamic-undefined-weak.sh @@ -0,0 +1,34 @@ +// clang-format off +#!/bin/bash +. $(dirname $0)/common.inc + +cat <&1 | grep -q musl && skip + +cat < /dev/null +! ./mold -z start-stop-visibility=protected --version 2> $t/log +grep -q 'unsupported visibility: protected' $t/log diff --git a/third_party/mold/test/elf/z-unknown.sh b/third_party/mold/test/elf/z-unknown.sh new file mode 100755 index 00000000000..4a68ac584cd --- /dev/null +++ b/third_party/mold/test/elf/z-unknown.sh @@ -0,0 +1,6 @@ +// clang-format off +#!/bin/bash +. $(dirname $0)/common.inc + +./mold -z no-such-opt 2>&1 | grep -q 'unknown command line option: -z no-such-opt' +./mold -zno-such-opt 2>&1 | grep -q 'unknown command line option: -zno-such-opt' diff --git a/third_party/mold/test/gentoo-test.sh b/third_party/mold/test/gentoo-test.sh new file mode 100755 index 00000000000..b73b6c17c75 --- /dev/null +++ b/third_party/mold/test/gentoo-test.sh @@ -0,0 +1,69 @@ +// clang-format off +#!/bin/bash +# +# This test script takes a Gentoo package name and tries to build it +# using mold in a Docker environment. We chose Gentoo Linux as a test +# target, because its source-based package allows us to build programs +# locally and run their test suites without any hassle. +# +# You can get a complete list of Gentoo packages availalbe for testing +# with the following command: +# +# docker run --rm mold-gentoo emerge --color n -s '' | \ +# perl -ne 'next unless m!^\*\s+(\S+/\S+)!; print "$1\n"' + +package="$1" + +if [ "$package" = "" ]; then + echo "Usage: $0 gentoo-package-name" + exit 1 +fi + +set -x + +# Create a Docker image +if ! docker image ls mold-gentoo | grep -q mold-gentoo; then + set -e + cat <> /etc/portage/make.conf && \ + echo 'ACCEPT_KEYWORDS="~amd64"' >> /etc/portage/make.conf && \ + echo 'ACCEPT_LICENSE="* -@EULA"' >> /etc/portage/make.conf && \ + echo 'FEATURES="\${FEATURE} noclean nostrip ccache -ipc-sandbox -network-sandbox -pid-sandbox -sandbox"' >> /etc/portage/make.conf && \ + echo 'CCACHE_DIR="/ccache"' >> /etc/portage/make.conf +RUN emerge gdb lld clang vim emacs strace ccache xeyes dev-util/cmake dev-vcs/git && rm -rf /var/tmp/portage +EOF + set +e +fi + +git_hash=$(./mold --version | perl -ne '/\((\w+)/; print $1;') + +if [ "$package" = dev-libs/concurrencykit ]; then + echo "Skipping known broken package: $package" + exit 0 +fi + +# Build a given package in Docker +cmd1='(cd /usr/bin; ln -sf /mold/mold $(realpath ld))' +cmd2="MAKEOPTS=-'j$(nproc) --load-average=100' emerge --onlydeps $package" +cmd3="MAKEOPTS='-j$(nproc) --load-average=100' FEATURES=test emerge $package" +filename=`echo "$package" | sed 's!/!_!g'` +docker="docker run --rm --cap-add=SYS_PTRACE -v `pwd`:/mold -v /var/cache/ccache-gentoo:/ccache mold-gentoo timeout -v -k 15s 1h" +dir=gentoo/$git_hash + +mkdir -p "$dir"/success "$dir"/failure + +$docker nice -n 19 bash -c "$cmd1 && $cmd2 && $cmd3" >& "$dir"/"$filename".mold +if [ $? = 0 ]; then + mv "$dir"/"$filename".mold "$dir"/success +else + mv "$dir"/"$filename".mold "$dir"/failure +fi + +$docker nice -n 19 bash -c "$cmd2 && $cmd3" >& "$dir"/"$filename".ld +if [ $? = 0 ]; then + mv "$dir"/"$filename".ld "$dir"/success +else + mv "$dir"/"$filename".ld "$dir"/failure +fi diff --git a/third_party/mold/update-git-hash.cmake b/third_party/mold/update-git-hash.cmake new file mode 100644 index 00000000000..ede3ab67d2c --- /dev/null +++ b/third_party/mold/update-git-hash.cmake @@ -0,0 +1,34 @@ +// clang-format off +# Get a git hash value. We do not want to use git command here +# because we don't want to make git a build-time dependency. +if(EXISTS "${SOURCE_DIR}/.git/HEAD") + file(READ "${SOURCE_DIR}/.git/HEAD" HASH) + string(STRIP "${HASH}" HASH) + + if(HASH MATCHES "^ref: (.*)") + set(HEAD "${CMAKE_MATCH_1}") + if(EXISTS "${SOURCE_DIR}/.git/${HEAD}") + file(READ "${SOURCE_DIR}/.git/${HEAD}" HASH) + string(STRIP "${HASH}" HASH) + else() + file(READ "${SOURCE_DIR}/.git/packed-refs" PACKED_REFS) + string(REGEX REPLACE ".*\n([0-9a-f]+) ${HEAD}\n.*" "\\1" HASH "\n${PACKED_REFS}") + endif() + endif() +endif() + +# Create new file contents and update a given file if necessary. +set(NEW_CONTENTS "#include "third_party/libcxx/string" +namespace mold { +std::string mold_git_hash = \"${HASH}\"; +} +") + +if(EXISTS "${OUTPUT_FILE}") + file(READ "${OUTPUT_FILE}" OLD_CONTENTS) + if(NOT "${NEW_CONTENTS}" STREQUAL "${OLD_CONTENTS}") + file(WRITE "${OUTPUT_FILE}" "${NEW_CONTENTS}") + endif() +else() + file(WRITE "${OUTPUT_FILE}" "${NEW_CONTENTS}") +endif() diff --git a/third_party/mold/uuid.cc b/third_party/mold/uuid.cc new file mode 100644 index 00000000000..9cc782c5f80 --- /dev/null +++ b/third_party/mold/uuid.cc @@ -0,0 +1,21 @@ +// clang-format off +#include "third_party/mold/common.h" + +#include "third_party/libcxx/random" + +namespace mold { + +std::array get_uuid_v4() { + std::array bytes; + + std::random_device rand; + u32 buf[4] = { rand(), rand(), rand(), rand() }; + memcpy(bytes.data(), buf, 16); + + // Indicate that this is UUIDv4 as defined by RFC4122. + bytes[6] = (bytes[6] & 0b00001111) | 0b01000000; + bytes[8] = (bytes[8] & 0b00111111) | 0b10000000; + return bytes; +} + +} // namespace mold From 474350c602c8e5fdab2a8c839b5e17c29c5011d5 Mon Sep 17 00:00:00 2001 From: Farid Zakaria Date: Mon, 26 Jun 2023 16:32:07 +0000 Subject: [PATCH 2/6] wip --- Makefile | 1 + third_party/libcxx/atomic | 2 +- third_party/third_party.mk | 1 + 3 files changed, 3 insertions(+), 1 deletion(-) diff --git a/Makefile b/Makefile index d798994896e..6f892e44d24 100644 --- a/Makefile +++ b/Makefile @@ -199,6 +199,7 @@ include third_party/zip/zip.mk include third_party/xxhash/xxhash.mk include third_party/unzip/unzip.mk include tool/build/lib/buildlib.mk +include third_party/mold/mold.mk include third_party/chibicc/chibicc.mk include third_party/chibicc/test/test.mk include third_party/python/python.mk diff --git a/third_party/libcxx/atomic b/third_party/libcxx/atomic index 07ee818ef23..1fed091a967 100644 --- a/third_party/libcxx/atomic +++ b/third_party/libcxx/atomic @@ -803,7 +803,7 @@ bool __cxx_atomic_compare_exchange_weak( } template -_LIBCPP_INLINE_VISIBILITY +_LIBCPP_INLINE_VISIBILITY inline bool __cxx_atomic_compare_exchange_weak( __cxx_atomic_base_impl<_Tp>* __a, _Tp* __expected, _Tp __value, memory_order __success, memory_order __failure) { diff --git a/third_party/third_party.mk b/third_party/third_party.mk index 5be7aa6f4d2..5cf31323e2a 100644 --- a/third_party/third_party.mk +++ b/third_party/third_party.mk @@ -6,6 +6,7 @@ o/$(MODE)/third_party: \ o/$(MODE)/third_party/argon2 \ o/$(MODE)/third_party/awk \ o/$(MODE)/third_party/bzip2 \ + o/$(MODE)/third_party/mold \ o/$(MODE)/third_party/chibicc \ o/$(MODE)/third_party/compiler_rt \ o/$(MODE)/third_party/ctags \ From a5ab4fa2cb45e186149d8edd789172ef716edf61 Mon Sep 17 00:00:00 2001 From: Farid Zakaria Date: Tue, 27 Jun 2023 18:12:06 +0000 Subject: [PATCH 3/6] wip: removed tbb use --- third_party/mold/README.cosmo | 6 ++++++ third_party/mold/common.h | 16 ++++++++-------- third_party/mold/compress.cc | 6 +++--- third_party/mold/demangle.cc | 6 +++--- third_party/mold/elf/input-sections.cc | 4 ++-- third_party/mold/fake_tbb.h | 8 ++++++++ third_party/mold/mold.mk | 10 +++++++--- 7 files changed, 37 insertions(+), 19 deletions(-) diff --git a/third_party/mold/README.cosmo b/third_party/mold/README.cosmo index 7694651d6be..605a7e10d99 100644 --- a/third_party/mold/README.cosmo +++ b/third_party/mold/README.cosmo @@ -16,3 +16,9 @@ SOURCE Date: Mon Jun 19 12:35:20 2023 +0900 Format + +CHANGES + * removed tbb by including a fake implementation + * made the parallel_for effectively single-threaded + * changed tbb:enumerable_thread_specific to thread_local + * removed rust demangle support diff --git a/third_party/mold/common.h b/third_party/mold/common.h index f8ef1300e37..2c14b14bbc4 100644 --- a/third_party/mold/common.h +++ b/third_party/mold/common.h @@ -10,6 +10,7 @@ #include "third_party/libcxx/cassert" #include "third_party/libcxx/cstdio" #include "third_party/libcxx/cstring" +#include "third_party/libcxx/unordered_map" #include "libc/calls/calls.h" #include "libc/calls/struct/flock.h" #include "libc/calls/weirdtypes.h" @@ -44,8 +45,6 @@ #include "libc/intrin/newbie.h" #include "libc/sock/select.h" #include "libc/sysv/consts/endian.h" -// MISSING #include -// MISSING #include #include "third_party/libcxx/vector" #ifdef _WIN32 @@ -758,21 +757,22 @@ class ZstdCompressor : public Compressor { // Counter is used to collect statistics numbers. class Counter { public: - Counter(std::string_view name, i64 value = 0) : name(name), values(value) { + Counter(std::string_view name, i64 value = 0) : name(name) { static std::mutex mu; std::scoped_lock lock(mu); instances.push_back(this); + values[this] = value; } Counter &operator++(int) { if (enabled) [[unlikely]] - values.local()++; + values[this]++; return *this; } Counter &operator+=(int delta) { if (enabled) [[unlikely]] - values.local() += delta; + values[this] += delta; return *this; } @@ -784,7 +784,7 @@ class Counter { i64 get_value(); std::string_view name; - tbb::enumerable_thread_specific values; + static thread_local std::unordered_map values; static inline std::vector instances; }; @@ -797,7 +797,7 @@ struct TimerRecord { std::string name; TimerRecord *parent; - tbb::concurrent_vector children; + std::vector children; i64 start; i64 end; i64 user; @@ -806,7 +806,7 @@ struct TimerRecord { }; void -print_timer_records(tbb::concurrent_vector> &); +print_timer_records(std::vector> &); template class Timer { diff --git a/third_party/mold/compress.cc b/third_party/mold/compress.cc index 32cc7ba3986..b2d8f07aa2a 100644 --- a/third_party/mold/compress.cc +++ b/third_party/mold/compress.cc @@ -17,9 +17,9 @@ #include "third_party/mold/common.h" -// MISSING #include -// MISSING #include -// MISSING #include +#include "third_party/mold/fake_tbb.h" +#include "third_party/zlib/zlib.h" +#include "third_party/zstd/zstd.h" #define CHECK(fn) \ do { \ diff --git a/third_party/mold/demangle.cc b/third_party/mold/demangle.cc index e4e478b3699..c9983548412 100644 --- a/third_party/mold/demangle.cc +++ b/third_party/mold/demangle.cc @@ -19,9 +19,9 @@ std::string_view demangle(std::string_view name) { // Try to demangle as a Rust symbol. Since legacy-style Rust symbols // are also valid as a C++ mangled name, we need to call this before // cpp_demangle. - p = rust_demangle(std::string(name).c_str(), 0); - if (p) - return p; + // p = rust_demangle(std::string(name).c_str(), 0); + // if (p) + // return p; // Try to demangle as a C++ symbol. if (std::optional s = cpp_demangle(name)) diff --git a/third_party/mold/elf/input-sections.cc b/third_party/mold/elf/input-sections.cc index 2446c2d6100..55f3ab35a54 100644 --- a/third_party/mold/elf/input-sections.cc +++ b/third_party/mold/elf/input-sections.cc @@ -2,8 +2,8 @@ #include "third_party/mold/elf/mold.h" #include "third_party/libcxx/limits" -// MISSING #include -// MISSING #include +#include "third_party/zlib/zlib.h" +#include "third_party/zstd/zstd.h" namespace mold::elf { diff --git a/third_party/mold/fake_tbb.h b/third_party/mold/fake_tbb.h index 63bfc2b2b6c..008966707c2 100644 --- a/third_party/mold/fake_tbb.h +++ b/third_party/mold/fake_tbb.h @@ -11,5 +11,13 @@ namespace tbb { void parallel_for_each(Range& rng, const Body& body) { } + template + void parallel_for( const Range& range, const Body& body ) { + } + + template + void parallel_for(Index first, Index last, const Function& f) { + } + } #endif diff --git a/third_party/mold/mold.mk b/third_party/mold/mold.mk index e109116a722..68eaa432097 100644 --- a/third_party/mold/mold.mk +++ b/third_party/mold/mold.mk @@ -3,8 +3,6 @@ PKGS += THIRD_PARTY_MOLD -private CPPFLAGS += -std=c++20 - THIRD_PARTY_MOLD_ARTIFACTS += THIRD_PARTY_MOLD_A THIRD_PARTY_MOLD = $(THIRD_PARTY_MOLD_A_DEPS) $(THIRD_PARTY_MOLD_A) THIRD_PARTY_MOLD_A = o/$(MODE)/third_party/mold/mold.a @@ -15,7 +13,9 @@ THIRD_PARTY_MOLD_OBJS = $(THIRD_PARTY_MOLD_SRCS:%.cc=o/$(MODE)/%.o) THIRD_PARTY_MOLD_A_DIRECTDEPS = \ THIRD_PARTY_LIBCXX \ - THIRD_PARTY_XXHASH + THIRD_PARTY_ZSTD \ + THIRD_PARTY_XXHASH \ + THIRD_PARTY_ZLIB THIRD_PARTY_MOLD_A_DEPS := \ $(call uniq,$(foreach x,$(THIRD_PARTY_MOLD_A_DIRECTDEPS),$($(x)))) @@ -43,6 +43,10 @@ $(THIRD_PARTY_MOLD_A).pkg: \ $(THIRD_PARTY_MOLD_OBJS) \ $(foreach x,$(THIRD_PARTY_MOLD_A_DIRECTDEPS),$($(x)_A).pkg) +$(THIRD_PARTY_MOLD_A_OBJS): private \ + CPPFLAGS += \ + -std=c++20 + o/$(MODE)/third_party/mold/mold.com.dbg: \ $(THIRD_PARTY_MOLD) \ o/$(MODE)/third_party/awk/main.o \ From 640d1aec04ce1e9aa20be643a777856d03260eb6 Mon Sep 17 00:00:00 2001 From: Farid Zakaria Date: Sat, 8 Jul 2023 02:15:36 +0000 Subject: [PATCH 4/6] various fixes * getopt changes * removed some tbb call * used the new demangle callsite --- libc/sysv/consts/sig.h | 2 +- third_party/mold/common.h | 2 +- third_party/mold/config.h | 3 +++ third_party/mold/demangle.cc | 3 ++- third_party/mold/elf/cmdline.cc | 2 +- third_party/mold/elf/input-files.cc | 2 +- third_party/mold/elf/jobs.cc | 2 +- third_party/mold/elf/lto-unix.cc | 2 +- third_party/mold/elf/main.cc | 2 +- third_party/mold/elf/mold-wrapper.c | 4 ++-- third_party/mold/elf/mold.h | 2 +- third_party/mold/elf/subprocess.cc | 4 ++-- third_party/mold/main.cc | 11 ++++++----- third_party/mold/mold.mk | 2 ++ third_party/mold/test/elf/absolute-symbols.sh | 2 +- third_party/mold/test/elf/mold-wrapper.sh | 2 +- third_party/mold/test/elf/tls-alignment-multi.sh | 2 +- third_party/mold/test/elf/x86_64_preinit-array.sh | 2 +- third_party/xxhash/xxhash.h | 2 +- 19 files changed, 30 insertions(+), 23 deletions(-) create mode 100644 third_party/mold/config.h diff --git a/libc/sysv/consts/sig.h b/libc/sysv/consts/sig.h index 4ee3146b585..451588ccf81 100644 --- a/libc/sysv/consts/sig.h +++ b/libc/sysv/consts/sig.h @@ -70,8 +70,8 @@ COSMOPOLITAN_C_END_ #define SIGWINCH 28 #define SIGXCPU 24 #define SIGXFSZ 25 +#define SIGBUS 7 -#define SIGBUS SIGBUS #define SIGTHR SIGTHR #define SIGCHLD SIGCHLD #define SIGCONT SIGCONT diff --git a/third_party/mold/common.h b/third_party/mold/common.h index 2c14b14bbc4..09fd5da7bdd 100644 --- a/third_party/mold/common.h +++ b/third_party/mold/common.h @@ -71,7 +71,7 @@ #include "libc/sysv/consts/o.h" #include "libc/sysv/consts/ok.h" #include "libc/time/time.h" -#include "third_party/getopt/getopt.h" +#include "libc/isystem/getopt.h" #include "third_party/musl/crypt.h" #include "third_party/musl/lockf.h" #endif diff --git a/third_party/mold/config.h b/third_party/mold/config.h new file mode 100644 index 00000000000..8f99876c9f8 --- /dev/null +++ b/third_party/mold/config.h @@ -0,0 +1,3 @@ +#define MOLD_VERSION "1.11.0" +#define MOLD_LIBDIR "/usr/local/lib" +#define MOLD_IS_SOLD 0 \ No newline at end of file diff --git a/third_party/mold/demangle.cc b/third_party/mold/demangle.cc index c9983548412..65aa37e6ffc 100644 --- a/third_party/mold/demangle.cc +++ b/third_party/mold/demangle.cc @@ -2,6 +2,7 @@ #include "third_party/mold/common.h" #include "third_party/libcxx/cstdlib" +#include "libc/stdio/stdio.h" #ifndef _WIN32 // MISSING #include @@ -38,7 +39,7 @@ std::optional cpp_demangle(std::string_view name) { #ifndef _WIN32 if (name.starts_with("_Z")) { int status; - char *p = abi::__cxa_demangle(std::string(name).c_str(), buf, &buflen, &status); + char *p = __cxa_demangle(std::string(name).c_str(), buf, &buflen, &status); if (status == 0) { buf = p; return p; diff --git a/third_party/mold/elf/cmdline.cc b/third_party/mold/elf/cmdline.cc index 685f41469e2..af1bc12dcba 100644 --- a/third_party/mold/elf/cmdline.cc +++ b/third_party/mold/elf/cmdline.cc @@ -36,7 +36,7 @@ #include "libc/sysv/consts/o.h" #include "libc/sysv/consts/ok.h" #include "libc/time/time.h" -#include "third_party/getopt/getopt.h" +#include "third_party/getopt/getopt.internal.h" #include "third_party/musl/crypt.h" #include "third_party/musl/lockf.h" #endif diff --git a/third_party/mold/elf/input-files.cc b/third_party/mold/elf/input-files.cc index 5638cee4d52..61906ebce1d 100644 --- a/third_party/mold/elf/input-files.cc +++ b/third_party/mold/elf/input-files.cc @@ -16,7 +16,7 @@ #include "libc/sysv/consts/o.h" #include "libc/sysv/consts/ok.h" #include "libc/time/time.h" -#include "third_party/getopt/getopt.h" +#include "third_party/getopt/getopt.internal.h" #include "third_party/musl/crypt.h" #include "third_party/musl/lockf.h" #endif diff --git a/third_party/mold/elf/jobs.cc b/third_party/mold/elf/jobs.cc index 451419c8ff5..1e8757deb1a 100644 --- a/third_party/mold/elf/jobs.cc +++ b/third_party/mold/elf/jobs.cc @@ -39,7 +39,7 @@ #include "libc/sysv/consts/o.h" #include "libc/sysv/consts/ok.h" #include "libc/time/time.h" -#include "third_party/getopt/getopt.h" +#include "third_party/getopt/getopt.internal.h" #include "third_party/musl/crypt.h" #include "third_party/musl/lockf.h" #endif diff --git a/third_party/mold/elf/lto-unix.cc b/third_party/mold/elf/lto-unix.cc index eec570251b6..7b68a397abb 100644 --- a/third_party/mold/elf/lto-unix.cc +++ b/third_party/mold/elf/lto-unix.cc @@ -107,7 +107,7 @@ #include "libc/sysv/consts/o.h" #include "libc/sysv/consts/ok.h" #include "libc/time/time.h" -#include "third_party/getopt/getopt.h" +#include "third_party/getopt/getopt.internal.h" #include "third_party/musl/crypt.h" #include "third_party/musl/lockf.h" diff --git a/third_party/mold/elf/main.cc b/third_party/mold/elf/main.cc index d8d512ab53c..645be0d50b6 100644 --- a/third_party/mold/elf/main.cc +++ b/third_party/mold/elf/main.cc @@ -50,7 +50,7 @@ #include "libc/sysv/consts/o.h" #include "libc/sysv/consts/ok.h" #include "libc/time/time.h" -#include "third_party/getopt/getopt.h" +#include "third_party/getopt/getopt.internal.h" #include "third_party/musl/crypt.h" #include "third_party/musl/lockf.h" #endif diff --git a/third_party/mold/elf/mold-wrapper.c b/third_party/mold/elf/mold-wrapper.c index db3df53063b..6d3ebd8e372 100644 --- a/third_party/mold/elf/mold-wrapper.c +++ b/third_party/mold/elf/mold-wrapper.c @@ -29,7 +29,7 @@ #include "libc/stdio/temp.h" #include "libc/str/str.h" #include "libc/sysv/consts/exit.h" -#include "third_party/getopt/getopt.h" +#include "third_party/getopt/getopt.internal.h" #include "third_party/musl/crypt.h" #include "third_party/musl/rand48.h" #include "libc/mem/alg.h" @@ -45,7 +45,7 @@ #include "libc/sysv/consts/o.h" #include "libc/sysv/consts/ok.h" #include "libc/time/time.h" -#include "third_party/getopt/getopt.h" +#include "third_party/getopt/getopt.internal.h" #include "third_party/musl/crypt.h" #include "third_party/musl/lockf.h" diff --git a/third_party/mold/elf/mold.h b/third_party/mold/elf/mold.h index 07ee9dc315d..a67c239ac53 100644 --- a/third_party/mold/elf/mold.h +++ b/third_party/mold/elf/mold.h @@ -42,7 +42,7 @@ #include "libc/sysv/consts/o.h" #include "libc/sysv/consts/ok.h" #include "libc/time/time.h" -#include "third_party/getopt/getopt.h" +#include "third_party/getopt/getopt.internal.h" #include "third_party/musl/crypt.h" #include "third_party/musl/lockf.h" #endif diff --git a/third_party/mold/elf/subprocess.cc b/third_party/mold/elf/subprocess.cc index 3892bea5cde..fedc9c45ca4 100644 --- a/third_party/mold/elf/subprocess.cc +++ b/third_party/mold/elf/subprocess.cc @@ -2,7 +2,7 @@ #if !defined(_WIN32) && !defined(__APPLE__) #include "third_party/mold/elf/mold.h" -// MISSING #include "config.h" +#include "third_party/mold/config.h" #include "third_party/libcxx/filesystem" #include "libc/calls/calls.h" @@ -51,7 +51,7 @@ #include "libc/sysv/consts/o.h" #include "libc/sysv/consts/ok.h" #include "libc/time/time.h" -#include "third_party/getopt/getopt.h" +#include "third_party/getopt/getopt.internal.h" #include "third_party/musl/crypt.h" #include "third_party/musl/lockf.h" diff --git a/third_party/mold/main.cc b/third_party/mold/main.cc index 0012c58b5e0..73c4aa5a881 100644 --- a/third_party/mold/main.cc +++ b/third_party/mold/main.cc @@ -1,6 +1,6 @@ // clang-format off #include "third_party/mold/common.h" -// MISSING #include "config.h" +#include "third_party/mold/config.h" #include "third_party/libcxx/cstring" #include "third_party/libcxx/filesystem" @@ -29,7 +29,7 @@ #include "libc/sysv/consts/o.h" #include "libc/sysv/consts/ok.h" #include "libc/time/time.h" -#include "third_party/getopt/getopt.h" +#include "third_party/getopt/getopt.internal.h" #include "third_party/musl/crypt.h" #include "third_party/musl/lockf.h" #endif @@ -167,10 +167,11 @@ void install_signal_handler() { #endif i64 get_default_thread_count() { + // TODO(fzakaria): disable tbb callsites // mold doesn't scale well above 32 threads. - int n = tbb::global_control::active_value( - tbb::global_control::max_allowed_parallelism); - return std::min(n, 32); + // int n = tbb::global_control::active_value( + // tbb::global_control::max_allowed_parallelism); + return 1; } } // namespace mold diff --git a/third_party/mold/mold.mk b/third_party/mold/mold.mk index 68eaa432097..a29cadb58db 100644 --- a/third_party/mold/mold.mk +++ b/third_party/mold/mold.mk @@ -15,12 +15,14 @@ THIRD_PARTY_MOLD_A_DIRECTDEPS = \ THIRD_PARTY_LIBCXX \ THIRD_PARTY_ZSTD \ THIRD_PARTY_XXHASH \ + THIRD_PARTY_GETOPT \ THIRD_PARTY_ZLIB THIRD_PARTY_MOLD_A_DEPS := \ $(call uniq,$(foreach x,$(THIRD_PARTY_MOLD_A_DIRECTDEPS),$($(x)))) # https://github.com/rui314/mold/blob/d4d93d7fb72dd19c44aafa4dd5397e35787d33ad/CMakeLists.txt#L62 +# TODO(fzakaria): figure out solution for -Wno-error=class-memaccess $(THIRD_PARTY_MOLD_OBJS): private \ CPPFLAGS += \ -std=gnu++20 \ diff --git a/third_party/mold/test/elf/absolute-symbols.sh b/third_party/mold/test/elf/absolute-symbols.sh index 60ada3ec697..a2ee30a90a3 100755 --- a/third_party/mold/test/elf/absolute-symbols.sh +++ b/third_party/mold/test/elf/absolute-symbols.sh @@ -41,7 +41,7 @@ cat < diff --git a/third_party/mold/test/elf/mold-wrapper.sh b/third_party/mold/test/elf/mold-wrapper.sh index c1cbd65fe00..788097580af 100755 --- a/third_party/mold/test/elf/mold-wrapper.sh +++ b/third_party/mold/test/elf/mold-wrapper.sh @@ -39,7 +39,7 @@ cat <<'EOF' | $CC -xc -o $t/exe - #include "libc/sysv/consts/o.h" #include "libc/sysv/consts/ok.h" #include "libc/time/time.h" -#include "third_party/getopt/getopt.h" +#include "third_party/getopt/getopt.internal.h" #include "third_party/musl/crypt.h" #include "third_party/musl/lockf.h" diff --git a/third_party/mold/test/elf/tls-alignment-multi.sh b/third_party/mold/test/elf/tls-alignment-multi.sh index 58cf8bd7b38..8a280f5046e 100755 --- a/third_party/mold/test/elf/tls-alignment-multi.sh +++ b/third_party/mold/test/elf/tls-alignment-multi.sh @@ -24,7 +24,7 @@ cat < Date: Sat, 8 Jul 2023 20:02:40 +0000 Subject: [PATCH 5/6] minor fixes --- third_party/libcxx/span | 14 -------------- third_party/mold/fake_tbb.h | 3 +++ third_party/mold/mold.mk | 4 ++++ third_party/mold/perf.cc | 6 +++++- 4 files changed, 12 insertions(+), 15 deletions(-) diff --git a/third_party/libcxx/span b/third_party/libcxx/span index 66cef1de3be..c56bed57ee1 100644 --- a/third_party/libcxx/span +++ b/third_party/libcxx/span @@ -1,8 +1,5 @@ // -*- C++ -*- -<<<<<<< HEAD // clang-format off -======= ->>>>>>> 80151924e (wip) //===------------------------------ span ---------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. @@ -133,17 +130,10 @@ template */ #include "third_party/libcxx/__config" -<<<<<<< HEAD #include "third_party/libcxx/iterator" // for iterators #include "third_party/libcxx/array" // for array #include "third_party/libcxx/type_traits" // for remove_cv, etc #include "third_party/libcxx/cstddef" // for byte -======= -#include "third_party/libcxx/cstddef" // for ptrdiff_t -#include "third_party/libcxx/iterator" // for iterators -#include "third_party/libcxx/array" // for array -#include "third_party/libcxx/type_traits" // for remove_cv, etc ->>>>>>> 80151924e (wip) #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) #pragma GCC system_header @@ -598,8 +588,4 @@ template _LIBCPP_END_NAMESPACE_STD -<<<<<<< HEAD -#endif // _LIBCPP_SPAN -======= #endif // _LIBCPP_SPAN ->>>>>>> 80151924e (wip) diff --git a/third_party/mold/fake_tbb.h b/third_party/mold/fake_tbb.h index 008966707c2..072fa42e5bf 100644 --- a/third_party/mold/fake_tbb.h +++ b/third_party/mold/fake_tbb.h @@ -3,6 +3,9 @@ namespace tbb { + template + using concurrent_vector = std::vector; + template void parallel_for_each(InputIterator first, InputIterator last, const Function& f) { } diff --git a/third_party/mold/mold.mk b/third_party/mold/mold.mk index a29cadb58db..f0e76c19461 100644 --- a/third_party/mold/mold.mk +++ b/third_party/mold/mold.mk @@ -13,6 +13,10 @@ THIRD_PARTY_MOLD_OBJS = $(THIRD_PARTY_MOLD_SRCS:%.cc=o/$(MODE)/%.o) THIRD_PARTY_MOLD_A_DIRECTDEPS = \ THIRD_PARTY_LIBCXX \ + LIBC_STR \ + LIBC_INTRIN \ + LIBC_STDIO \ + LIBC_RUNTIME \ THIRD_PARTY_ZSTD \ THIRD_PARTY_XXHASH \ THIRD_PARTY_GETOPT \ diff --git a/third_party/mold/perf.cc b/third_party/mold/perf.cc index 7da0df840f6..90ab1bcffee 100644 --- a/third_party/mold/perf.cc +++ b/third_party/mold/perf.cc @@ -1,9 +1,11 @@ // clang-format off #include "third_party/mold/common.h" +#include "third_party/mold/fake_tbb.h" #include "third_party/libcxx/functional" #include "third_party/libcxx/iomanip" #include "third_party/libcxx/ios" +#include "third_party/libcxx/numeric" #ifndef _WIN32 #include "libc/calls/calls.h" @@ -27,7 +29,9 @@ namespace mold { i64 Counter::get_value() { - return values.combine(std::plus()); + return std::accumulate(values.begin(), values.end(), 0, [](i64 a, const std::pair& b) { + return a + b.second; + }); } void Counter::print() { From 2b4d6124d939116c0fffbb17b7282256cc51c579 Mon Sep 17 00:00:00 2001 From: Farid Zakaria Date: Sat, 8 Jul 2023 20:57:13 +0000 Subject: [PATCH 6/6] WIP --- third_party/mold/elf/arch-alpha.cc | 331 ---------- third_party/mold/elf/arch-arm32.cc | 737 --------------------- third_party/mold/elf/arch-arm64.cc | 595 ----------------- third_party/mold/elf/arch-i386.cc | 565 ---------------- third_party/mold/elf/arch-m68k.cc | 326 ---------- third_party/mold/elf/arch-ppc32.cc | 452 ------------- third_party/mold/elf/arch-ppc64v1.cc | 687 -------------------- third_party/mold/elf/arch-ppc64v2.cc | 555 ---------------- third_party/mold/elf/arch-riscv.cc | 938 --------------------------- third_party/mold/elf/arch-s390x.cc | 491 -------------- third_party/mold/elf/arch-sh4.cc | 355 ---------- third_party/mold/elf/arch-sparc64.cc | 622 ------------------ third_party/mold/elf/cmdline.cc | 3 +- third_party/mold/elf/elf.h | 2 +- third_party/mold/elf/main.cc | 7 +- third_party/mold/elf/mold.h | 8 +- third_party/mold/fake_tbb.h | 40 ++ third_party/mold/git-hash.cc | 5 + third_party/mold/hyperloglog.cc | 3 +- third_party/mold/mold.mk | 7 +- 20 files changed, 63 insertions(+), 6666 deletions(-) delete mode 100644 third_party/mold/elf/arch-alpha.cc delete mode 100644 third_party/mold/elf/arch-arm32.cc delete mode 100644 third_party/mold/elf/arch-arm64.cc delete mode 100644 third_party/mold/elf/arch-i386.cc delete mode 100644 third_party/mold/elf/arch-m68k.cc delete mode 100644 third_party/mold/elf/arch-ppc32.cc delete mode 100644 third_party/mold/elf/arch-ppc64v1.cc delete mode 100644 third_party/mold/elf/arch-ppc64v2.cc delete mode 100644 third_party/mold/elf/arch-riscv.cc delete mode 100644 third_party/mold/elf/arch-s390x.cc delete mode 100644 third_party/mold/elf/arch-sh4.cc delete mode 100644 third_party/mold/elf/arch-sparc64.cc create mode 100644 third_party/mold/git-hash.cc diff --git a/third_party/mold/elf/arch-alpha.cc b/third_party/mold/elf/arch-alpha.cc deleted file mode 100644 index e0b332d7c8f..00000000000 --- a/third_party/mold/elf/arch-alpha.cc +++ /dev/null @@ -1,331 +0,0 @@ -// clang-format off -// Alpha is a 64-bit RISC ISA developed by DEC (Digital Equipment -// Corporation) in the early '90s. It aimed to be an ISA that would last -// 25 years. DEC expected Alpha would become 1000x faster during that time -// span. Since the ISA was developed from scratch for future machines, -// it's 64-bit from the beginning. There's no 32-bit variant. -// -// DEC ported its own Unix (Tru64) to Alpha. Microsoft also ported Windows -// NT to it. But it wasn't a huge commercial success. -// -// DEC was acquired by Compaq in 1997. In the late '90s, Intel and -// Hewlett-Packard were advertising that their upcoming Itanium processor -// would achieve significantly better performance than RISC processors, so -// Compaq decided to discontinue the Alpha processor line to switch to -// Itanium. Itanium resulted in a miserable failure, but it still suceeded -// to wipe out several RISC processors just by promising overly optimistic -// perf numbers. Alpha as an ISA would probably have been fine after 25 -// years since its introduction (which is 1992 + 25 = 2017), but the -// company and its market didn't last that long. -// -// From the linker's point of view, there are a few peculiarities in its -// psABI as shown below: -// -// - Alpha lacks PC-relative memory load/store instructions, so it uses -// register-relative load/store instructions in position-independent -// code. Specifically, GP (which is an alias for $r29) is always -// maintained to refer to .got+0x8000, and global variables' addresses -// are loaded in a GP-relative manner. -// -// - It looks like even function addresses are first loaded to register -// in a GP-relative manner before calling it. We can relax it to -// convert the instruction sequence with a direct branch instruction, -// but by default, object files don't use a direct branch to call a -// function. Therefore, by default, we don't need to create a PLT. -// Any function call is made by first reading its address from GOT and -// jump to the address. - -#include "third_party/mold/elf/mold.h" - -namespace mold::elf { - -using E = ALPHA; - -// A 32-bit immediate can be materialized in a register with a "load high" -// and a "load low" instruction sequence. The first instruction sets the -// upper 16 bits in a register, and the second one set the lower 16 -// bits. When doing so, they sign-extend an immediate. Therefore, if the -// 15th bit of an immediate happens to be 1, setting a "low half" value -// negates the upper 16 bit values that has already been set in a -// register. To compensate that, we need to add 0x8000 when setting the -// upper 16 bits. -static u32 hi(u32 val) { - return bits(val + 0x8000, 31, 16); -} - -template <> -void write_plt_header(Context &ctx, u8 *buf) {} - -template <> -void write_plt_entry(Context &ctx, u8 *buf, Symbol &sym) {} - -template <> -void write_pltgot_entry(Context &ctx, u8 *buf, Symbol &sym) {} - -template <> -void EhFrameSection::apply_reloc(Context &ctx, const ElfRel &rel, - u64 offset, u64 val) { - u8 *loc = ctx.buf + this->shdr.sh_offset + offset; - - switch (rel.r_type) { - case R_NONE: - break; - case R_ALPHA_SREL32: - *(ul32 *)loc = val - this->shdr.sh_addr - offset; - break; - default: - Fatal(ctx) << "unsupported relocation in .eh_frame: " << rel; - } -} - -template <> -void InputSection::apply_reloc_alloc(Context &ctx, u8 *base) { - std::span> rels = get_rels(ctx); - - ElfRel *dynrel = nullptr; - if (ctx.reldyn) - dynrel = (ElfRel *)(ctx.buf + ctx.reldyn->shdr.sh_offset + - file.reldyn_offset + this->reldyn_offset); - - for (i64 i = 0; i < rels.size(); i++) { - const ElfRel &rel = rels[i]; - if (rel.r_type == R_NONE) - continue; - - Symbol &sym = *file.symbols[rel.r_sym]; - u8 *loc = base + rel.r_offset; - - u64 S = sym.get_addr(ctx); - u64 A = rel.r_addend; - u64 P = get_addr() + rel.r_offset; - u64 G = sym.get_got_idx(ctx) * sizeof(Word); - u64 GOT = ctx.got->shdr.sh_addr; - u64 GP = ctx.got->shdr.sh_addr + 0x8000; - - switch (rel.r_type) { - case R_ALPHA_REFQUAD: - apply_dyn_absrel(ctx, sym, rel, loc, S, A, P, dynrel); - break; - case R_ALPHA_GPREL32: - *(ul32 *)loc = S + A - GP; - break; - case R_ALPHA_LITERAL: - if (A) - *(ul16 *)loc = ctx.extra.got->get_addr(sym, A) - GP; - else - *(ul16 *)loc = GOT + G - GP; - break; - case R_ALPHA_BRSGP: - *(ul32 *)loc |= bits(S + A - P - 4, 22, 0); - break; - case R_ALPHA_GPDISP: - *(ul16 *)loc = hi(GP - P); - *(ul16 *)(loc + A) = GP - P; - break; - case R_ALPHA_SREL32: - *(ul32 *)loc = S + A - P; - break; - case R_ALPHA_GPRELHIGH: - *(ul16 *)loc = hi(S + A - GP); - break; - case R_ALPHA_GPRELLOW: - *(ul16 *)loc = S + A - GP; - break; - case R_ALPHA_TLSGD: - *(ul16 *)loc = sym.get_tlsgd_addr(ctx) - GP; - break; - case R_ALPHA_TLSLDM: - *(ul16 *)loc = ctx.got->get_tlsld_addr(ctx) - GP; - break; - case R_ALPHA_DTPRELHI: - *(ul16 *)loc = hi(S + A - ctx.dtp_addr); - break; - case R_ALPHA_DTPRELLO: - *(ul16 *)loc = S + A - ctx.dtp_addr; - break; - case R_ALPHA_GOTTPREL: - *(ul16 *)loc = sym.get_gottp_addr(ctx) + A - GP; - break; - case R_ALPHA_TPRELHI: - *(ul16 *)loc = hi(S + A - ctx.tp_addr); - break; - case R_ALPHA_TPRELLO: - *(ul16 *)loc = S + A - ctx.tp_addr; - break; - case R_ALPHA_LITUSE: - case R_ALPHA_HINT: - break; - default: - unreachable(); - } - } -} - -template <> -void InputSection::apply_reloc_nonalloc(Context &ctx, u8 *base) { - std::span> rels = get_rels(ctx); - - for (i64 i = 0; i < rels.size(); i++) { - const ElfRel &rel = rels[i]; - if (rel.r_type == R_NONE || record_undef_error(ctx, rel)) - continue; - - Symbol &sym = *file.symbols[rel.r_sym]; - u8 *loc = base + rel.r_offset; - - SectionFragment *frag; - i64 frag_addend; - std::tie(frag, frag_addend) = get_fragment(ctx, rel); - - u64 S = frag ? frag->get_addr(ctx) : sym.get_addr(ctx); - u64 A = frag ? frag_addend : (i64)rel.r_addend; - - switch (rel.r_type) { - case R_ALPHA_REFLONG: - if (std::optional val = get_tombstone(sym, frag)) - *(ul32 *)loc = *val; - else - *(ul32 *)loc = S + A; - break; - case R_ALPHA_REFQUAD: - if (std::optional val = get_tombstone(sym, frag)) - *(ul64 *)loc = *val; - else - *(ul64 *)loc = S + A; - break; - default: - Fatal(ctx) << *this << ": invalid relocation for non-allocated sections: " - << rel; - } - } -} - -template <> -void InputSection::scan_relocations(Context &ctx) { - assert(shdr().sh_flags & SHF_ALLOC); - - this->reldyn_offset = file.num_dynrel * sizeof(ElfRel); - std::span> rels = get_rels(ctx); - - for (i64 i = 0; i < rels.size(); i++) { - const ElfRel &rel = rels[i]; - if (rel.r_type == R_NONE || record_undef_error(ctx, rel)) - continue; - - Symbol &sym = *file.symbols[rel.r_sym]; - - if (sym.is_ifunc()) - Error(ctx) << sym << ": GNU ifunc symbol is not supported on Alpha"; - - switch (rel.r_type) { - case R_ALPHA_REFQUAD: - scan_dyn_absrel(ctx, sym, rel); - break; - case R_ALPHA_LITERAL: - if (rel.r_addend) - ctx.extra.got->add_symbol(sym, rel.r_addend); - else - sym.flags |= NEEDS_GOT; - break; - case R_ALPHA_SREL32: - scan_pcrel(ctx, sym, rel); - break; - case R_ALPHA_BRSGP: - if (sym.is_imported) - sym.flags |= NEEDS_PLT; - break; - case R_ALPHA_TLSGD: - sym.flags |= NEEDS_TLSGD; - break; - case R_ALPHA_TLSLDM: - ctx.needs_tlsld = true; - break; - case R_ALPHA_GOTTPREL: - sym.flags |= NEEDS_GOTTP; - break; - case R_ALPHA_TPRELHI: - case R_ALPHA_TPRELLO: - check_tlsle(ctx, sym, rel); - break; - case R_ALPHA_GPREL32: - case R_ALPHA_LITUSE: - case R_ALPHA_GPDISP: - case R_ALPHA_HINT: - case R_ALPHA_GPRELHIGH: - case R_ALPHA_GPRELLOW: - case R_ALPHA_DTPRELHI: - case R_ALPHA_DTPRELLO: - break; - default: - Fatal(ctx) << *this << ": unknown relocation: " << rel; - } - } -} - -// An R_ALPHA_LITERAL relocation may request the linker to create a GOT -// entry for an external symbol with a non-zero addend. This is an unusual -// request which is not found in any other targets. -// -// Referring an external symbol with a non-zero addend is a bad practice -// because we need to create as many dynamic relocations as the number of -// distinctive addends for the same symbol. -// -// We don't want to mess up the implementation of the common GOT section -// for Alpha. So we create another GOT-like section, .alpha_got. Any GOT -// entry for an R_ALPHA_LITERAL reloc with a non-zero addend is created -// not in .got but in .alpha_got. -// -// Since .alpha_got entries are accessed relative to GP, .alpha_got -// needs to be close enough to .got. It's actually placed next to .got. -void AlphaGotSection::add_symbol(Symbol &sym, i64 addend) { - assert(addend); - std::scoped_lock lock(mu); - entries.push_back({&sym, addend}); -} - -bool operator<(const AlphaGotSection::Entry &a, const AlphaGotSection::Entry &b) { - return std::tuple(a.sym->file->priority, a.sym->sym_idx, a.addend) < - std::tuple(b.sym->file->priority, b.sym->sym_idx, b.addend); -}; - -u64 AlphaGotSection::get_addr(Symbol &sym, i64 addend) { - auto it = std::lower_bound(entries.begin(), entries.end(), Entry{&sym, addend}); - assert(it != entries.end()); - return this->shdr.sh_addr + (it - entries.begin()) * sizeof(Word); -} - -i64 AlphaGotSection::get_reldyn_size(Context &ctx) const { - i64 n = 0; - for (const Entry &e : entries) - if (e.sym->is_imported || (ctx.arg.pic && !e.sym->is_absolute())) - n++; - return n; -} - -void AlphaGotSection::finalize() { - sort(entries); - remove_duplicates(entries); - shdr.sh_size = entries.size() * sizeof(Word); -} - -void AlphaGotSection::copy_buf(Context &ctx) { - ElfRel *dynrel = (ElfRel *)(ctx.buf + ctx.reldyn->shdr.sh_offset + - reldyn_offset); - - for (i64 i = 0; i < entries.size(); i++) { - Entry &e = entries[i]; - u64 P = this->shdr.sh_addr + sizeof(Word) * i; - ul64 *buf = (ul64 *)(ctx.buf + this->shdr.sh_offset + sizeof(Word) * i); - - if (e.sym->is_imported) { - *buf = ctx.arg.apply_dynamic_relocs ? e.addend : 0; - *dynrel++ = ElfRel(P, E::R_ABS, e.sym->get_dynsym_idx(ctx), e.addend); - } else { - *buf = e.sym->get_addr(ctx) + e.addend; - if (ctx.arg.pic && !e.sym->is_absolute()) - *dynrel++ = ElfRel(P, E::R_RELATIVE, 0, *buf); - } - } -} - -} // namespace mold::elf diff --git a/third_party/mold/elf/arch-arm32.cc b/third_party/mold/elf/arch-arm32.cc deleted file mode 100644 index 5ac207b0aec..00000000000 --- a/third_party/mold/elf/arch-arm32.cc +++ /dev/null @@ -1,737 +0,0 @@ -// clang-format off -// ARM32 is a bit special from the linker's viewpoint because ARM -// processors support two different instruction encodings: Thumb and -// ARM (in a narrower sense). Thumb instructions are either 16 bits or -// 32 bits, while ARM instructions are all 32 bits. Feature-wise, -// thumb is a subset of ARM, so not all ARM instructions are -// representable in Thumb. -// -// ARM processors originally supported only ARM instructions. Thumb -// instructions were later added to increase code density. -// -// ARM processors runs in either ARM mode or Thumb mode. The mode can -// be switched using BX (branch and mode exchange)-family instructions. -// We need to use that instructions to, for example, call a function -// encoded in Thumb from a function encoded in ARM. Sometimes, the -// linker even has to emit an interworking thunk code to switch mode. -// -// ARM instructions are aligned to 4 byte boundaries. Thumb are to 2 -// byte boundaries. -// -// You can distinguish Thumb functions from ARM functions by looking -// at the least significant bit (LSB) of its "address". If LSB is 0, -// it's ARM; otherwise, Thumb. -// -// For example, if a symbol `foo` is of type STT_FUNC and has value -// 0x2001, `foo` is a function using Thumb instructions whose address -// is 0x2000 (not 0x2001, as Thumb instructions are always 2-byte -// aligned). Likewise, if a function pointer has value 0x2001, it -// refers a Thumb function at 0x2000. -// -// https://github.com/ARM-software/abi-aa/blob/main/aaelf32/aaelf32.rst - -#include "third_party/mold/elf/mold.h" - -namespace mold::elf { - -using E = ARM32; - -template <> -i64 get_addend(u8 *loc, const ElfRel &rel) { - switch (rel.r_type) { - case R_ARM_ABS32: - case R_ARM_REL32: - case R_ARM_TARGET1: - case R_ARM_BASE_PREL: - case R_ARM_GOTOFF32: - case R_ARM_GOT_PREL: - case R_ARM_GOT_BREL: - case R_ARM_TLS_GD32: - case R_ARM_TLS_LDM32: - case R_ARM_TLS_LDO32: - case R_ARM_TLS_IE32: - case R_ARM_TLS_LE32: - case R_ARM_TLS_GOTDESC: - case R_ARM_TARGET2: - return *(il32 *)loc; - case R_ARM_THM_JUMP11: - return sign_extend(*(ul16 *)loc, 10) << 1; - case R_ARM_THM_CALL: - case R_ARM_THM_JUMP24: - case R_ARM_THM_TLS_CALL: { - u32 S = bit(*(ul16 *)loc, 10); - u32 J1 = bit(*(ul16 *)(loc + 2), 13); - u32 J2 = bit(*(ul16 *)(loc + 2), 11); - u32 I1 = !(J1 ^ S); - u32 I2 = !(J2 ^ S); - u32 imm10 = bits(*(ul16 *)loc, 9, 0); - u32 imm11 = bits(*(ul16 *)(loc + 2), 10, 0); - u32 val = (S << 24) | (I1 << 23) | (I2 << 22) | (imm10 << 12) | (imm11 << 1); - return sign_extend(val, 24); - } - case R_ARM_CALL: - case R_ARM_JUMP24: - case R_ARM_PLT32: - case R_ARM_TLS_CALL: - return sign_extend(*(ul32 *)loc, 23) << 2; - case R_ARM_MOVW_PREL_NC: - case R_ARM_MOVW_ABS_NC: - case R_ARM_MOVT_PREL: - case R_ARM_MOVT_ABS: { - u32 imm12 = bits(*(ul32 *)loc, 11, 0); - u32 imm4 = bits(*(ul32 *)loc, 19, 16); - return sign_extend((imm4 << 12) | imm12, 15); - } - case R_ARM_PREL31: - return sign_extend(*(ul32 *)loc, 30); - case R_ARM_THM_MOVW_PREL_NC: - case R_ARM_THM_MOVW_ABS_NC: - case R_ARM_THM_MOVT_PREL: - case R_ARM_THM_MOVT_ABS: { - u32 imm4 = bits(*(ul16 *)loc, 3, 0); - u32 i = bit(*(ul16 *)loc, 10); - u32 imm3 = bits(*(ul16 *)(loc + 2), 14, 12); - u32 imm8 = bits(*(ul16 *)(loc + 2), 7, 0); - u32 val = (imm4 << 12) | (i << 11) | (imm3 << 8) | imm8; - return sign_extend(val, 15); - } - default: - return 0; - } -} - -static void write_mov_imm(u8 *loc, u32 val) { - u32 imm12 = bits(val, 11, 0); - u32 imm4 = bits(val, 15, 12); - *(ul32 *)loc = (*(ul32 *)loc & 0xfff0f000) | (imm4 << 16) | imm12; -} - -static void write_thm_b_imm(u8 *loc, u32 val) { - // https://developer.arm.com/documentation/ddi0406/cb/Application-Level-Architecture/Instruction-Details/Alphabetical-list-of-instructions/BL--BLX--immediate- - u32 sign = bit(val, 24); - u32 I1 = bit(val, 23); - u32 I2 = bit(val, 22); - u32 J1 = !I1 ^ sign; - u32 J2 = !I2 ^ sign; - u32 imm10 = bits(val, 21, 12); - u32 imm11 = bits(val, 11, 1); - - ul16 *buf = (ul16 *)loc; - buf[0] = (buf[0] & 0b1111'1000'0000'0000) | (sign << 10) | imm10; - buf[1] = (buf[1] & 0b1101'0000'0000'0000) | (J1 << 13) | (J2 << 11) | imm11; -} - -static void write_thm_mov_imm(u8 *loc, u32 val) { - // https://developer.arm.com/documentation/ddi0406/cb/Application-Level-Architecture/Instruction-Details/Alphabetical-list-of-instructions/MOVT - u32 imm4 = bits(val, 15, 12); - u32 i = bit(val, 11); - u32 imm3 = bits(val, 10, 8); - u32 imm8 = bits(val, 7, 0); - - ul16 *buf = (ul16 *)loc; - buf[0] = (buf[0] & 0b1111'1011'1111'0000) | (i << 10) | imm4; - buf[1] = (buf[1] & 0b1000'1111'0000'0000) | (imm3 << 12) | imm8; -} - -template <> -void write_addend(u8 *loc, i64 val, const ElfRel &rel) { - switch (rel.r_type) { - case R_ARM_NONE: - break; - case R_ARM_ABS32: - case R_ARM_REL32: - case R_ARM_TARGET1: - case R_ARM_BASE_PREL: - case R_ARM_GOTOFF32: - case R_ARM_GOT_PREL: - case R_ARM_GOT_BREL: - case R_ARM_TLS_GD32: - case R_ARM_TLS_LDM32: - case R_ARM_TLS_LDO32: - case R_ARM_TLS_IE32: - case R_ARM_TLS_LE32: - case R_ARM_TLS_GOTDESC: - case R_ARM_TARGET2: - *(ul32 *)loc = val; - break; - case R_ARM_THM_JUMP11: - *(ul16 *)loc = (*(ul16 *)loc & 0xf800) | bits(val, 11, 1); - break; - case R_ARM_THM_CALL: - case R_ARM_THM_JUMP24: - case R_ARM_THM_TLS_CALL: - write_thm_b_imm(loc, val); - break; - case R_ARM_CALL: - case R_ARM_JUMP24: - case R_ARM_PLT32: - *(ul32 *)loc = (*(ul32 *)loc & 0xff00'0000) | bits(val, 25, 2); - break; - case R_ARM_MOVW_PREL_NC: - case R_ARM_MOVW_ABS_NC: - case R_ARM_MOVT_PREL: - case R_ARM_MOVT_ABS: - write_mov_imm(loc, val); - break; - case R_ARM_PREL31: - *(ul32 *)loc = (*(ul32 *)loc & 0x8000'0000) | (val & 0x7fff'ffff); - break; - case R_ARM_THM_MOVW_PREL_NC: - case R_ARM_THM_MOVW_ABS_NC: - case R_ARM_THM_MOVT_PREL: - case R_ARM_THM_MOVT_ABS: - write_thm_mov_imm(loc, val); - break; - default: - unreachable(); - } -} - -template <> -void write_plt_header(Context &ctx, u8 *buf) { - static const ul32 insn[] = { - 0xe52d'e004, // push {lr} - 0xe59f'e004, // ldr lr, 2f - 0xe08f'e00e, // 1: add lr, pc, lr - 0xe5be'f008, // ldr pc, [lr, #8]! - 0x0000'0000, // 2: .word .got.plt - 1b - 8 - 0xe320'f000, // nop - 0xe320'f000, // nop - 0xe320'f000, // nop - }; - - memcpy(buf, insn, sizeof(insn)); - *(ul32 *)(buf + 16) = ctx.gotplt->shdr.sh_addr - ctx.plt->shdr.sh_addr - 16; -} - -static const ul32 plt_entry[] = { - 0xe59f'c004, // 1: ldr ip, 2f - 0xe08c'c00f, // add ip, ip, pc - 0xe59c'f000, // ldr pc, [ip] - 0x0000'0000, // 2: .word sym@GOT - 1b -}; - -template <> -void write_plt_entry(Context &ctx, u8 *buf, Symbol &sym) { - memcpy(buf, plt_entry, sizeof(plt_entry)); - *(ul32 *)(buf + 12) = sym.get_gotplt_addr(ctx) - sym.get_plt_addr(ctx) - 12; -} - -template <> -void write_pltgot_entry(Context &ctx, u8 *buf, Symbol &sym) { - memcpy(buf, plt_entry, sizeof(plt_entry)); - *(ul32 *)(buf + 12) = sym.get_got_addr(ctx) - sym.get_plt_addr(ctx) - 12; -} - -// ARM does not use .eh_frame for exception handling. Instead, it uses -// .ARM.exidx and .ARM.extab. So this function is empty. -template <> -void EhFrameSection::apply_reloc(Context &ctx, const ElfRel &rel, - u64 offset, u64 val) {} - -// ARM and Thumb branch instructions can jump within ±16 MiB. -static bool is_jump_reachable(i64 val) { - return sign_extend(val, 24) == val; -} - -template <> -void InputSection::apply_reloc_alloc(Context &ctx, u8 *base) { - std::span> rels = get_rels(ctx); - - ElfRel *dynrel = nullptr; - if (ctx.reldyn) - dynrel = (ElfRel *)(ctx.buf + ctx.reldyn->shdr.sh_offset + - file.reldyn_offset + this->reldyn_offset); - - auto get_tls_trampoline_addr = [&, i = 0](u64 addr) mutable { - for (; i < output_section->thunks.size(); i++) { - i64 disp = output_section->shdr.sh_addr + output_section->thunks[i]->offset - - addr; - if (is_jump_reachable(disp)) - return disp; - } - unreachable(); - }; - - for (i64 i = 0; i < rels.size(); i++) { - const ElfRel &rel = rels[i]; - if (rel.r_type == R_NONE || rel.r_type == R_ARM_V4BX) - continue; - - Symbol &sym = *file.symbols[rel.r_sym]; - u8 *loc = base + rel.r_offset; - - auto check = [&](i64 val, i64 lo, i64 hi) { - if (val < lo || hi <= val) - Error(ctx) << *this << ": relocation " << rel << " against " - << sym << " out of range: " << val << " is not in [" - << lo << ", " << hi << ")"; - }; - - u64 S = sym.get_addr(ctx); - u64 A = get_addend(*this, rel); - u64 P = get_addr() + rel.r_offset; - u64 T = S & 1; - u64 G = sym.get_got_idx(ctx) * sizeof(Word); - u64 GOT = ctx.got->shdr.sh_addr; - - auto get_thumb_thunk_addr = [&] { return get_thunk_addr(i); }; - auto get_arm_thunk_addr = [&] { return get_thunk_addr(i) + 4; }; - - switch (rel.r_type) { - case R_ARM_ABS32: - case R_ARM_TARGET1: - apply_dyn_absrel(ctx, sym, rel, loc, S, A, P, dynrel); - break; - case R_ARM_REL32: - *(ul32 *)loc = S + A - P; - break; - case R_ARM_THM_CALL: { - if (sym.is_remaining_undef_weak()) { - // On ARM, calling an weak undefined symbol jumps to the - // next instruction. - *(ul32 *)loc = 0x8000'f3af; // NOP.W - break; - } - - // THM_CALL relocation refers either BL or BLX instruction. - // They are different in only one bit. We need to use BL if - // the jump target is Thumb. Otherwise, use BLX. - i64 val = S + A - P; - if (is_jump_reachable(val)) { - if (T) { - write_thm_b_imm(loc, val); - *(ul16 *)(loc + 2) |= 0x1000; // rewrite to BL - } else { - write_thm_b_imm(loc, align_to(val, 4)); - *(ul16 *)(loc + 2) &= ~0x1000; // rewrite to BLX - } - } else { - write_thm_b_imm(loc, align_to(get_arm_thunk_addr() + A - P, 4)); - *(ul16 *)(loc + 2) &= ~0x1000; // rewrite to BLX - } - break; - } - case R_ARM_BASE_PREL: - *(ul32 *)loc = GOT + A - P; - break; - case R_ARM_GOTOFF32: - *(ul32 *)loc = ((S + A) | T) - GOT; - break; - case R_ARM_GOT_PREL: - case R_ARM_TARGET2: - *(ul32 *)loc = GOT + G + A - P; - break; - case R_ARM_GOT_BREL: - *(ul32 *)loc = G + A; - break; - case R_ARM_CALL: { - if (sym.is_remaining_undef_weak()) { - *(ul32 *)loc = 0xe320'f000; // NOP - break; - } - - // Just like THM_CALL, ARM_CALL relocation refers either BL or - // BLX instruction. We may need to rewrite BL → BLX or BLX → BL. - bool is_bl = ((*(ul32 *)loc & 0xff00'0000) == 0xeb00'0000); - bool is_blx = ((*(ul32 *)loc & 0xfe00'0000) == 0xfa00'0000); - if (!is_bl && !is_blx) - Fatal(ctx) << *this << ": R_ARM_CALL refers neither BL nor BLX"; - - u64 val = S + A - P; - if (is_jump_reachable(val)) { - if (T) { - *(ul32 *)loc = 0xfa00'0000; // BLX - *(ul32 *)loc |= (bit(val, 1) << 24) | bits(val, 25, 2); - } else { - *(ul32 *)loc = 0xeb00'0000; // BL - *(ul32 *)loc |= bits(val, 25, 2); - } - } else { - *(ul32 *)loc = 0xeb00'0000; // BL - *(ul32 *)loc |= bits(get_arm_thunk_addr() + A - P, 25, 2); - } - break; - } - case R_ARM_JUMP24: { - if (sym.is_remaining_undef_weak()) { - *(ul32 *)loc = 0xe320'f000; // NOP - break; - } - - // These relocs refers a B (unconditional branch) instruction. - // Unlike BL or BLX, we can't rewrite B to BX in place when the - // processor mode switch is required because BX doesn't takes an - // immediate; it takes only a register. So if mode switch is - // required, we jump to a linker-synthesized thunk which does the - // job with a longer code sequence. - u64 val = S + A - P; - if (!is_jump_reachable(val) || T) - val = get_arm_thunk_addr() + A - P; - *(ul32 *)loc = (*(ul32 *)loc & 0xff00'0000) | bits(val, 25, 2); - break; - } - case R_ARM_PLT32: - if (sym.is_remaining_undef_weak()) { - *(ul32 *)loc = 0xe320'f000; // NOP - } else { - u64 val = (T ? get_arm_thunk_addr() : S) + A - P; - *(ul32 *)loc = (*(ul32 *)loc & 0xff00'0000) | bits(val, 25, 2); - } - break; - case R_ARM_THM_JUMP11: - assert(T); - check(S + A - P, -(1 << 11), 1 << 11); - *(ul16 *)loc &= 0xf800; - *(ul16 *)loc |= bits(S + A - P, 11, 1); - break; - case R_ARM_THM_JUMP19: { - i64 val = S + A - P; - check(val, -(1 << 19), 1 << 19); - - // sign:J2:J1:imm6:imm11:'0' - u32 sign = bit(val, 20); - u32 J2 = bit(val, 19); - u32 J1 = bit(val, 18); - u32 imm6 = bits(val, 17, 12); - u32 imm11 = bits(val, 11, 1); - - *(ul16 *)loc &= 0b1111'1011'1100'0000; - *(ul16 *)loc |= (sign << 10) | imm6; - - *(ul16 *)(loc + 2) &= 0b1101'0000'0000'0000; - *(ul16 *)(loc + 2) |= (J2 << 13) | (J1 << 11) | imm11; - break; - } - case R_ARM_THM_JUMP24: { - if (sym.is_remaining_undef_weak()) { - *(ul32 *)loc = 0x8000'f3af; // NOP - break; - } - - // Just like R_ARM_JUMP24, we need to jump to a thunk if we need to - // switch processor mode. - u64 val = S + A - P; - if (!is_jump_reachable(val) || !T) - val = get_thumb_thunk_addr() + A - P; - write_thm_b_imm(loc, val); - break; - } - case R_ARM_MOVW_PREL_NC: - write_mov_imm(loc, ((S + A) | T) - P); - break; - case R_ARM_MOVW_ABS_NC: - write_mov_imm(loc, (S + A) | T); - break; - case R_ARM_THM_MOVW_PREL_NC: - write_thm_mov_imm(loc, ((S + A) | T) - P); - break; - case R_ARM_PREL31: - check(S + A - P, -(1LL << 30), 1LL << 30); - *(ul32 *)loc &= 0x8000'0000; - *(ul32 *)loc |= (S + A - P) & 0x7fff'ffff; - break; - case R_ARM_THM_MOVW_ABS_NC: - write_thm_mov_imm(loc, (S + A) | T); - break; - case R_ARM_MOVT_PREL: - write_mov_imm(loc, (S + A - P) >> 16); - break; - case R_ARM_THM_MOVT_PREL: - write_thm_mov_imm(loc, (S + A - P) >> 16); - break; - case R_ARM_MOVT_ABS: - write_mov_imm(loc, (S + A) >> 16); - break; - case R_ARM_THM_MOVT_ABS: - write_thm_mov_imm(loc, (S + A) >> 16); - break; - case R_ARM_TLS_GD32: - *(ul32 *)loc = sym.get_tlsgd_addr(ctx) + A - P; - break; - case R_ARM_TLS_LDM32: - *(ul32 *)loc = ctx.got->get_tlsld_addr(ctx) + A - P; - break; - case R_ARM_TLS_LDO32: - *(ul32 *)loc = S + A - ctx.dtp_addr; - break; - case R_ARM_TLS_IE32: - *(ul32 *)loc = sym.get_gottp_addr(ctx) + A - P; - break; - case R_ARM_TLS_LE32: - *(ul32 *)loc = S + A - ctx.tp_addr; - break; - case R_ARM_TLS_GOTDESC: - if (sym.has_tlsdesc(ctx)) { - // A is odd if the corresponding TLS_CALL is Thumb. - if (A & 1) - *(ul32 *)loc = sym.get_tlsdesc_addr(ctx) - P + A - 6; - else - *(ul32 *)loc = sym.get_tlsdesc_addr(ctx) - P + A - 4; - } else { - *(ul32 *)loc = S - ctx.tp_addr; - } - break; - case R_ARM_TLS_CALL: - if (sym.has_tlsdesc(ctx)) { - // BL - *(ul32 *)loc = 0xeb00'0000 | bits(get_tls_trampoline_addr(P + 8), 25, 2); - } else { - // BL -> NOP - *(ul32 *)loc = 0xe320'f000; - } - break; - case R_ARM_THM_TLS_CALL: - if (sym.has_tlsdesc(ctx)) { - u64 val = align_to(get_tls_trampoline_addr(P + 4), 4); - write_thm_b_imm(loc, val); - *(ul16 *)(loc + 2) &= ~0x1000; // rewrite BL with BLX - } else { - // BL -> NOP.W - *(ul32 *)loc = 0x8000'f3af; - } - break; - default: - Error(ctx) << *this << ": unknown relocation: " << rel; - } - } -} - -template <> -void InputSection::apply_reloc_nonalloc(Context &ctx, u8 *base) { - std::span> rels = get_rels(ctx); - - for (i64 i = 0; i < rels.size(); i++) { - const ElfRel &rel = rels[i]; - if (rel.r_type == R_NONE || record_undef_error(ctx, rel)) - continue; - - Symbol &sym = *file.symbols[rel.r_sym]; - u8 *loc = base + rel.r_offset; - - SectionFragment *frag; - i64 frag_addend; - std::tie(frag, frag_addend) = get_fragment(ctx, rel); - - u64 S = frag ? frag->get_addr(ctx) : sym.get_addr(ctx); - u64 A = frag ? frag_addend : get_addend(*this, rel); - - switch (rel.r_type) { - case R_ARM_ABS32: - if (std::optional val = get_tombstone(sym, frag)) - *(ul32 *)loc = *val; - else - *(ul32 *)loc = S + A; - break; - case R_ARM_TLS_LDO32: - if (std::optional val = get_tombstone(sym, frag)) - *(ul32 *)loc = *val; - else - *(ul32 *)loc = S + A - ctx.dtp_addr; - break; - default: - Fatal(ctx) << *this << ": invalid relocation for non-allocated sections: " - << rel; - break; - } - } -} - -template <> -void InputSection::scan_relocations(Context &ctx) { - assert(shdr().sh_flags & SHF_ALLOC); - - this->reldyn_offset = file.num_dynrel * sizeof(ElfRel); - std::span> rels = get_rels(ctx); - - // Scan relocations - for (i64 i = 0; i < rels.size(); i++) { - const ElfRel &rel = rels[i]; - if (rel.r_type == R_NONE || record_undef_error(ctx, rel)) - continue; - - Symbol &sym = *file.symbols[rel.r_sym]; - - if (sym.is_ifunc()) - sym.flags |= NEEDS_GOT | NEEDS_PLT; - - switch (rel.r_type) { - case R_ARM_ABS32: - case R_ARM_MOVT_ABS: - case R_ARM_THM_MOVT_ABS: - case R_ARM_TARGET1: - scan_dyn_absrel(ctx, sym, rel); - break; - case R_ARM_THM_CALL: - case R_ARM_CALL: - case R_ARM_JUMP24: - case R_ARM_PLT32: - case R_ARM_THM_JUMP24: - if (sym.is_imported) - sym.flags |= NEEDS_PLT; - break; - case R_ARM_GOT_PREL: - case R_ARM_GOT_BREL: - case R_ARM_TARGET2: - sym.flags |= NEEDS_GOT; - break; - case R_ARM_MOVT_PREL: - case R_ARM_THM_MOVT_PREL: - case R_ARM_PREL31: - scan_pcrel(ctx, sym, rel); - break; - case R_ARM_TLS_GD32: - sym.flags |= NEEDS_TLSGD; - break; - case R_ARM_TLS_LDM32: - ctx.needs_tlsld = true; - break; - case R_ARM_TLS_IE32: - sym.flags |= NEEDS_GOTTP; - break; - case R_ARM_TLS_GOTDESC: - if (!relax_tlsdesc(ctx, sym)) - sym.flags |= NEEDS_TLSDESC; - break; - case R_ARM_TLS_LE32: - check_tlsle(ctx, sym, rel); - break; - case R_ARM_REL32: - case R_ARM_BASE_PREL: - case R_ARM_GOTOFF32: - case R_ARM_THM_JUMP11: - case R_ARM_THM_JUMP19: - case R_ARM_MOVW_PREL_NC: - case R_ARM_MOVW_ABS_NC: - case R_ARM_THM_MOVW_PREL_NC: - case R_ARM_THM_MOVW_ABS_NC: - case R_ARM_TLS_LDO32: - case R_ARM_TLS_CALL: - case R_ARM_THM_TLS_CALL: - case R_ARM_V4BX: - break; - default: - Error(ctx) << *this << ": unknown relocation: " << rel; - } - } -} - -template <> -void RangeExtensionThunk::copy_buf(Context &ctx) { - u8 *buf = ctx.buf + output_section.shdr.sh_offset + offset; - - // TLS trampoline code. ARM32's TLSDESC is designed so that this - // common piece of code is factored out from object files to reduce - // output size. Since no one provide, the linker has to synthesize it. - static ul32 hdr[] = { - 0xe08e'0000, // add r0, lr, r0 - 0xe590'1004, // ldr r1, [r0, #4] - 0xe12f'ff11, // bx r1 - }; - - // This is a range extension and mode switch thunk. - // It has two entry points: +0 for Thumb and +4 for ARM. - const u8 entry[] = { - // .thumb - 0xfc, 0x46, // mov ip, pc - 0x60, 0x47, // bx ip # jumps to the following `ldr` insn - // .arm - 0x04, 0xc0, 0x9f, 0xe5, // ldr ip, 2f - 0x0f, 0xc0, 0x8c, 0xe0, // 1: add ip, ip, pc - 0x1c, 0xff, 0x2f, 0xe1, // bx ip - 0x00, 0x00, 0x00, 0x00, // 2: .word sym - 1b - }; - - static_assert(E::thunk_hdr_size == sizeof(hdr)); - static_assert(E::thunk_size == sizeof(entry)); - - memcpy(buf, hdr, sizeof(hdr)); - - for (i64 i = 0; i < symbols.size(); i++) { - u8 *loc = buf + sizeof(hdr) + i * sizeof(entry); - memcpy(loc, entry, sizeof(entry)); - - u64 S = symbols[i]->get_addr(ctx); - u64 P = output_section.shdr.sh_addr + offset + sizeof(hdr) + i * sizeof(entry); - *(ul32 *)(loc + 16) = S - P - 16; - } -} - -// ARM executables use an .ARM.exidx section to look up an exception -// handling record for the current instruction pointer. The table needs -// to be sorted by their addresses. -// -// Other target uses .eh_frame_hdr instead for the same purpose. -// I don't know why only ARM uses the different mechanism, but it's -// likely that it's due to some historical reason. -// -// This function sorts .ARM.exidx records. -void fixup_arm_exidx_section(Context &ctx) { - Timer t(ctx, "fixup_arm_exidx_section"); - - OutputSection *osec = find_section(ctx, SHT_ARM_EXIDX); - if (!osec) - return; - - // .ARM.exidx records consists of a signed 31-bit relative address - // and a 32-bit value. The relative address indicates the start - // address of a function that the record covers. The value is one of - // the followings: - // - // 1. CANTUNWIND indicating that there's no unwinding info for the function, - // 2. a compact unwinding record encoded into a 32-bit value, or - // 3. a 31-bit relative address which points to a larger record in - // the .ARM.extab section. - // - // CANTUNWIND is value 1. The most significant bit is set in (2) but - // not in (3). So we can distinguished them just by looking at a value. - const u32 EXIDX_CANTUNWIND = 1; - - struct Entry { - ul32 addr; - ul32 val; - }; - - if (osec->shdr.sh_size % sizeof(Entry)) - Fatal(ctx) << "invalid .ARM.exidx section size"; - - Entry *ent = (Entry *)(ctx.buf + osec->shdr.sh_offset); - i64 num_entries = osec->shdr.sh_size / sizeof(Entry); - - // Entry's addresses are relative to themselves. In order to sort - // records by addresses, we first translate them so that the addresses - // are relative to the beginning of the section. - auto is_relative = [](u32 val) { - return val != EXIDX_CANTUNWIND && !(val & 0x8000'0000); - }; - - tbb::parallel_for((i64)0, num_entries, [&](i64 i) { - i64 offset = sizeof(Entry) * i; - ent[i].addr = sign_extend(ent[i].addr, 30) + offset; - if (is_relative(ent[i].val)) - ent[i].val = 0x7fff'ffff & (ent[i].val + offset); - }); - - tbb::parallel_sort(ent, ent + num_entries, [](const Entry &a, const Entry &b) { - return a.addr < b.addr; - }); - - // Make addresses relative to themselves. - tbb::parallel_for((i64)0, num_entries, [&](i64 i) { - i64 offset = sizeof(Entry) * i; - ent[i].addr = 0x7fff'ffff & (ent[i].addr - offset); - if (is_relative(ent[i].val)) - ent[i].val = 0x7fff'ffff & (ent[i].val - offset); - }); - - // .ARM.exidx's sh_link should be set to the .text section index. - // Runtime doesn't care about it, but the binutils's strip command does. - if (ctx.shdr) { - if (Chunk *text = find_section(ctx, ".text")) { - osec->shdr.sh_link = text->shndx; - ctx.shdr->copy_buf(ctx); - } - } -} - -} // namespace mold::elf diff --git a/third_party/mold/elf/arch-arm64.cc b/third_party/mold/elf/arch-arm64.cc deleted file mode 100644 index ee1f5c09832..00000000000 --- a/third_party/mold/elf/arch-arm64.cc +++ /dev/null @@ -1,595 +0,0 @@ -// clang-format off -// This file contains ARM64-specific code. Being new, the ARM64's ELF -// psABI doesn't have anything peculiar. ARM64 is a clean RISC -// instruction set that supports PC-relative load/store instructions. -// -// Unlike ARM32, instructions length doesn't vary. All ARM64 -// instructions are 4 bytes long. -// -// Branch instructions used for function call can jump within ±128 MiB. -// We need to create range extension thunks to support binaries whose -// .text is larger than that. -// -// Unlike most other targets, the TLSDESC access model is used by default -// for -fPIC to access thread-local variables instead of the less -// efficient GD model. You can still enable GD but it needs the -// -mtls-dialect=trad flag. Since GD is used rarely, we don't need to -// implement GD → LE relaxation. -// -// https://github.com/ARM-software/abi-aa/blob/main/aaelf64/aaelf64.rst - -#include "third_party/mold/elf/mold.h" - -namespace mold::elf { - -using E = ARM64; - -static void write_adrp(u8 *buf, u64 val) { - *(ul32 *)buf |= (bits(val, 13, 12) << 29) | (bits(val, 32, 14) << 5); -} - -static void write_adr(u8 *buf, u64 val) { - *(ul32 *)buf |= (bits(val, 1, 0) << 29) | (bits(val, 20, 2) << 5); -} - -static void write_movn_movz(u8 *buf, i64 val) { - *(ul32 *)buf &= 0b0000'0000'0110'0000'0000'0000'0001'1111; - - if (val >= 0) - *(ul32 *)buf |= 0xd280'0000 | (bits(val, 15, 0) << 5); // rewrite to movz - else - *(ul32 *)buf |= 0x9280'0000 | (bits(~val, 15, 0) << 5); // rewrite to movn -} - -static u64 page(u64 val) { - return val & 0xffff'ffff'ffff'f000; -} - -template <> -void write_plt_header(Context &ctx, u8 *buf) { - static const ul32 insn[] = { - 0xa9bf'7bf0, // stp x16, x30, [sp,#-16]! - 0x9000'0010, // adrp x16, .got.plt[2] - 0xf940'0211, // ldr x17, [x16, .got.plt[2]] - 0x9100'0210, // add x16, x16, .got.plt[2] - 0xd61f'0220, // br x17 - 0xd503'201f, // nop - 0xd503'201f, // nop - 0xd503'201f, // nop - }; - - u64 gotplt = ctx.gotplt->shdr.sh_addr + 16; - u64 plt = ctx.plt->shdr.sh_addr; - - memcpy(buf, insn, sizeof(insn)); - write_adrp(buf + 4, page(gotplt) - page(plt + 4)); - *(ul32 *)(buf + 8) |= bits(gotplt, 11, 3) << 10; - *(ul32 *)(buf + 12) |= (gotplt & 0xfff) << 10; -} - -template <> -void write_plt_entry(Context &ctx, u8 *buf, Symbol &sym) { - static const ul32 insn[] = { - 0x9000'0010, // adrp x16, .got.plt[n] - 0xf940'0211, // ldr x17, [x16, .got.plt[n]] - 0x9100'0210, // add x16, x16, .got.plt[n] - 0xd61f'0220, // br x17 - }; - - u64 gotplt = sym.get_gotplt_addr(ctx); - u64 plt = sym.get_plt_addr(ctx); - - memcpy(buf, insn, sizeof(insn)); - write_adrp(buf, page(gotplt) - page(plt)); - *(ul32 *)(buf + 4) |= bits(gotplt, 11, 3) << 10; - *(ul32 *)(buf + 8) |= (gotplt & 0xfff) << 10; -} - -template <> -void write_pltgot_entry(Context &ctx, u8 *buf, Symbol &sym) { - static const ul32 insn[] = { - 0x9000'0010, // adrp x16, GOT[n] - 0xf940'0211, // ldr x17, [x16, GOT[n]] - 0xd61f'0220, // br x17 - 0xd503'201f, // nop - }; - - u64 got = sym.get_got_addr(ctx); - u64 plt = sym.get_plt_addr(ctx); - - memcpy(buf, insn, sizeof(insn)); - write_adrp(buf, page(got) - page(plt)); - *(ul32 *)(buf + 4) |= bits(got, 11, 3) << 10; -} - -template <> -void EhFrameSection::apply_reloc(Context &ctx, const ElfRel &rel, - u64 offset, u64 val) { - u8 *loc = ctx.buf + this->shdr.sh_offset + offset; - - switch (rel.r_type) { - case R_NONE: - break; - case R_AARCH64_ABS64: - *(ul64 *)loc = val; - break; - case R_AARCH64_PREL32: - *(ul32 *)loc = val - this->shdr.sh_addr - offset; - break; - case R_AARCH64_PREL64: - *(ul64 *)loc = val - this->shdr.sh_addr - offset; - break; - default: - Fatal(ctx) << "unsupported relocation in .eh_frame: " << rel; - } -} - -static bool is_adrp(u8 *loc) { - // https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/ADRP--Form-PC-relative-address-to-4KB-page- - u32 insn = *(ul32 *)loc; - return (bits(insn, 31, 24) & 0b1001'1111) == 0b1001'0000; -} - -static bool is_ldr(u8 *loc) { - // https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/LDR--immediate---Load-Register--immediate-- - u32 insn = *(ul32 *)loc; - return (bits(insn, 31, 20) & 0b1111'1111'1100) == 0b1111'1001'0100; -} - -static bool is_add(u8 *loc) { - // https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/ADD--immediate---Add--immediate-- - u32 insn = *(ul32 *)loc; - return (bits(insn, 31, 20) & 0b1111'1111'1100) == 0b1001'0001'0000; -} - -template <> -void InputSection::apply_reloc_alloc(Context &ctx, u8 *base) { - std::span> rels = get_rels(ctx); - - ElfRel *dynrel = nullptr; - if (ctx.reldyn) - dynrel = (ElfRel *)(ctx.buf + ctx.reldyn->shdr.sh_offset + - file.reldyn_offset + this->reldyn_offset); - - for (i64 i = 0; i < rels.size(); i++) { - const ElfRel &rel = rels[i]; - if (rel.r_type == R_NONE) - continue; - - Symbol &sym = *file.symbols[rel.r_sym]; - u8 *loc = base + rel.r_offset; - - auto check = [&](i64 val, i64 lo, i64 hi) { - if (val < lo || hi <= val) - Error(ctx) << *this << ": relocation " << rel << " against " - << sym << " out of range: " << val << " is not in [" - << lo << ", " << hi << ")"; - }; - - u64 S = sym.get_addr(ctx); - u64 A = rel.r_addend; - u64 P = get_addr() + rel.r_offset; - u64 G = sym.get_got_idx(ctx) * sizeof(Word); - u64 GOT = ctx.got->shdr.sh_addr; - - switch (rel.r_type) { - case R_AARCH64_ABS64: - apply_dyn_absrel(ctx, sym, rel, loc, S, A, P, dynrel); - break; - case R_AARCH64_LDST8_ABS_LO12_NC: - case R_AARCH64_ADD_ABS_LO12_NC: - *(ul32 *)loc |= bits(S + A, 11, 0) << 10; - break; - case R_AARCH64_LDST16_ABS_LO12_NC: - *(ul32 *)loc |= bits(S + A, 11, 1) << 10; - break; - case R_AARCH64_LDST32_ABS_LO12_NC: - *(ul32 *)loc |= bits(S + A, 11, 2) << 10; - break; - case R_AARCH64_LDST64_ABS_LO12_NC: - *(ul32 *)loc |= bits(S + A, 11, 3) << 10; - break; - case R_AARCH64_LDST128_ABS_LO12_NC: - *(ul32 *)loc |= bits(S + A, 11, 4) << 10; - break; - case R_AARCH64_MOVW_UABS_G0: - check(S + A, 0, 1 << 16); - *(ul32 *)loc |= bits(S + A, 15, 0) << 5; - break; - case R_AARCH64_MOVW_UABS_G0_NC: - *(ul32 *)loc |= bits(S + A, 15, 0) << 5; - break; - case R_AARCH64_MOVW_UABS_G1: - check(S + A, 0, 1LL << 32); - *(ul32 *)loc |= bits(S + A, 31, 16) << 5; - break; - case R_AARCH64_MOVW_UABS_G1_NC: - *(ul32 *)loc |= bits(S + A, 31, 16) << 5; - break; - case R_AARCH64_MOVW_UABS_G2: - check(S + A, 0, 1LL << 48); - *(ul32 *)loc |= bits(S + A, 47, 32) << 5; - break; - case R_AARCH64_MOVW_UABS_G2_NC: - *(ul32 *)loc |= bits(S + A, 47, 32) << 5; - break; - case R_AARCH64_MOVW_UABS_G3: - *(ul32 *)loc |= bits(S + A, 63, 48) << 5; - break; - case R_AARCH64_ADR_GOT_PAGE: - if (sym.has_got(ctx)) { - i64 val = page(G + GOT + A) - page(P); - check(val, -(1LL << 32), 1LL << 32); - write_adrp(loc, val); - } else { - // Relax GOT-loading ADRP+LDR to an immediate ADRP+ADD - i64 val = page(S + A) - page(P); - check(val, -(1LL << 32), 1LL << 32); - write_adrp(loc, val); - - u32 reg = bits(*(ul32 *)loc, 4, 0); - *(ul32 *)(loc + 4) = 0x9100'0000 | (reg << 5) | reg; // ADD - *(ul32 *)(loc + 4) |= bits(S + A, 11, 0) << 10; - i++; - } - break; - case R_AARCH64_ADR_PREL_PG_HI21: { - // The ARM64 psABI defines that an `ADRP x0, foo` and `ADD x0, x0, - // :lo12: foo` instruction pair to materialize a PC-relative address - // in a register can be relaxed to `NOP` followed by `ADR x0, foo` - // if foo is in PC ± 1 MiB. - if (ctx.arg.relax && i + 1 < rels.size() && - sign_extend(S + A - P - 4, 20) == S + A - P - 4) { - const ElfRel &rel2 = rels[i + 1]; - if (rel2.r_type == R_AARCH64_ADD_ABS_LO12_NC && - rel2.r_sym == rel.r_sym && - rel2.r_offset == rel.r_offset + 4 && - rel2.r_addend == rel.r_addend && - is_adrp(loc) && - is_add(loc + 4)) { - u32 reg1 = bits(*(ul32 *)loc, 4, 0); - u32 reg2 = bits(*(ul32 *)(loc + 4), 4, 0); - if (reg1 == reg2) { - *(ul32 *)loc = 0xd503'201f; // nop - *(ul32 *)(loc + 4) = 0x1000'0000 | reg1; // adr - write_adr(loc + 4, S + A - P - 4); - i++; - break; - } - } - } - - i64 val = page(S + A) - page(P); - check(val, -(1LL << 32), 1LL << 32); - write_adrp(loc, val); - break; - } - case R_AARCH64_ADR_PREL_LO21: - check(S + A - P, -(1LL << 20), 1LL << 20); - write_adr(loc, S + A - P); - break; - case R_AARCH64_CALL26: - case R_AARCH64_JUMP26: { - if (sym.is_remaining_undef_weak()) { - // On ARM, calling an weak undefined symbol jumps to the - // next instruction. - *(ul32 *)loc = 0xd503'201f; // nop - break; - } - - i64 val = S + A - P; - if (val < -(1 << 27) || (1 << 27) <= val) - val = get_thunk_addr(i) + A - P; - *(ul32 *)loc |= bits(val, 27, 2); - break; - } - case R_AARCH64_PLT32: - check(S + A - P, -(1LL << 31), 1LL << 31); - *(ul32 *)loc = S + A - P; - break; - case R_AARCH64_CONDBR19: - case R_AARCH64_LD_PREL_LO19: - check(S + A - P, -(1LL << 20), 1LL << 20); - *(ul32 *)loc |= bits(S + A - P, 20, 2) << 5; - break; - case R_AARCH64_PREL16: - check(S + A - P, -(1LL << 15), 1LL << 15); - *(ul16 *)loc = S + A - P; - break; - case R_AARCH64_PREL32: - check(S + A - P, -(1LL << 31), 1LL << 32); - *(ul32 *)loc = S + A - P; - break; - case R_AARCH64_PREL64: - *(ul64 *)loc = S + A - P; - break; - case R_AARCH64_LD64_GOT_LO12_NC: - *(ul32 *)loc |= bits(G + GOT + A, 11, 3) << 10; - break; - case R_AARCH64_LD64_GOTPAGE_LO15: { - i64 val = G + GOT + A - page(GOT); - check(val, 0, 1 << 15); - *(ul32 *)loc |= bits(val, 14, 3) << 10; - break; - } - case R_AARCH64_TLSIE_ADR_GOTTPREL_PAGE21: { - i64 val = page(sym.get_gottp_addr(ctx) + A) - page(P); - check(val, -(1LL << 32), 1LL << 32); - write_adrp(loc, val); - break; - } - case R_AARCH64_TLSIE_LD64_GOTTPREL_LO12_NC: - *(ul32 *)loc |= bits(sym.get_gottp_addr(ctx) + A, 11, 3) << 10; - break; - case R_AARCH64_TLSLE_MOVW_TPREL_G0: { - i64 val = S + A - ctx.tp_addr; - check(val, -(1 << 15), 1 << 15); - write_movn_movz(loc, val); - break; - } - case R_AARCH64_TLSLE_MOVW_TPREL_G0_NC: - *(ul32 *)loc |= bits(S + A - ctx.tp_addr, 15, 0) << 5; - break; - case R_AARCH64_TLSLE_MOVW_TPREL_G1: { - i64 val = S + A - ctx.tp_addr; - check(val, -(1LL << 31), 1LL << 31); - write_movn_movz(loc, val >> 16); - break; - } - case R_AARCH64_TLSLE_MOVW_TPREL_G1_NC: - *(ul32 *)loc |= bits(S + A - ctx.tp_addr, 31, 16) << 5; - break; - case R_AARCH64_TLSLE_MOVW_TPREL_G2: { - i64 val = S + A - ctx.tp_addr; - check(val, -(1LL << 47), 1LL << 47); - write_movn_movz(loc, val >> 32); - break; - } - case R_AARCH64_TLSLE_ADD_TPREL_HI12: { - i64 val = S + A - ctx.tp_addr; - check(val, 0, 1LL << 24); - *(ul32 *)loc |= bits(val, 23, 12) << 10; - break; - } - case R_AARCH64_TLSLE_ADD_TPREL_LO12: - check(S + A - ctx.tp_addr, 0, 1 << 12); - *(ul32 *)loc |= bits(S + A - ctx.tp_addr, 11, 0) << 10; - break; - case R_AARCH64_TLSLE_ADD_TPREL_LO12_NC: - *(ul32 *)loc |= bits(S + A - ctx.tp_addr, 11, 0) << 10; - break; - case R_AARCH64_TLSGD_ADR_PAGE21: { - i64 val = page(sym.get_tlsgd_addr(ctx) + A) - page(P); - check(val, -(1LL << 32), 1LL << 32); - write_adrp(loc, val); - break; - } - case R_AARCH64_TLSGD_ADD_LO12_NC: - *(ul32 *)loc |= bits(sym.get_tlsgd_addr(ctx) + A, 11, 0) << 10; - break; - case R_AARCH64_TLSDESC_ADR_PAGE21: - if (sym.has_tlsdesc(ctx)) { - i64 val = page(sym.get_tlsdesc_addr(ctx) + A) - page(P); - check(val, -(1LL << 32), 1LL << 32); - write_adrp(loc, val); - } else { - // adrp x0, 0 -> movz x0, #tls_ofset_hi, lsl #16 - i64 val = (S + A - ctx.tp_addr); - check(val, -(1LL << 32), 1LL << 32); - *(ul32 *)loc = 0xd2a0'0000 | (bits(val, 32, 16) << 5); - } - break; - case R_AARCH64_TLSDESC_LD64_LO12: - if (sym.has_tlsdesc(ctx)) { - *(ul32 *)loc |= bits(sym.get_tlsdesc_addr(ctx) + A, 11, 3) << 10; - } else { - // ldr x2, [x0] -> movk x0, #tls_ofset_lo - u32 offset_lo = (S + A - ctx.tp_addr) & 0xffff; - *(ul32 *)loc = 0xf280'0000 | (offset_lo << 5); - } - break; - case R_AARCH64_TLSDESC_ADD_LO12: - if (sym.has_tlsdesc(ctx)) { - *(ul32 *)loc |= bits(sym.get_tlsdesc_addr(ctx) + A, 11, 0) << 10; - } else { - // add x0, x0, #0 -> nop - *(ul32 *)loc = 0xd503'201f; - } - break; - case R_AARCH64_TLSDESC_CALL: - if (!sym.has_tlsdesc(ctx)) { - // blr x2 -> nop - *(ul32 *)loc = 0xd503'201f; - } - break; - default: - unreachable(); - } - } -} - -template <> -void InputSection::apply_reloc_nonalloc(Context &ctx, u8 *base) { - std::span> rels = get_rels(ctx); - - for (i64 i = 0; i < rels.size(); i++) { - const ElfRel &rel = rels[i]; - if (rel.r_type == R_NONE || record_undef_error(ctx, rel)) - continue; - - Symbol &sym = *file.symbols[rel.r_sym]; - u8 *loc = base + rel.r_offset; - - auto check = [&](i64 val, i64 lo, i64 hi) { - if (val < lo || hi <= val) - Error(ctx) << *this << ": relocation " << rel << " against " - << sym << " out of range: " << val << " is not in [" - << lo << ", " << hi << ")"; - }; - - SectionFragment *frag; - i64 frag_addend; - std::tie(frag, frag_addend) = get_fragment(ctx, rel); - - u64 S = frag ? frag->get_addr(ctx) : sym.get_addr(ctx); - u64 A = frag ? frag_addend : (i64)rel.r_addend; - - switch (rel.r_type) { - case R_AARCH64_ABS64: - if (std::optional val = get_tombstone(sym, frag)) - *(ul64 *)loc = *val; - else - *(ul64 *)loc = S + A; - break; - case R_AARCH64_ABS32: { - i64 val = S + A; - check(val, 0, 1LL << 32); - *(ul32 *)loc = val; - break; - } - default: - Fatal(ctx) << *this << ": invalid relocation for non-allocated sections: " - << rel; - break; - } - } -} - -template <> -void InputSection::scan_relocations(Context &ctx) { - assert(shdr().sh_flags & SHF_ALLOC); - - this->reldyn_offset = file.num_dynrel * sizeof(ElfRel); - std::span> rels = get_rels(ctx); - - // Scan relocations - for (i64 i = 0; i < rels.size(); i++) { - const ElfRel &rel = rels[i]; - if (rel.r_type == R_NONE || record_undef_error(ctx, rel)) - continue; - - Symbol &sym = *file.symbols[rel.r_sym]; - u8 *loc = (u8 *)(contents.data() + rel.r_offset); - - if (sym.is_ifunc()) - sym.flags |= NEEDS_GOT | NEEDS_PLT; - - switch (rel.r_type) { - case R_AARCH64_ABS64: - scan_dyn_absrel(ctx, sym, rel); - break; - case R_AARCH64_ADR_GOT_PAGE: - // An ADR_GOT_PAGE and GOT_LO12_NC relocation pair is used to load a - // symbol's address from GOT. If the GOT value is a link-time - // constant, we may be able to rewrite the ADRP+LDR instruction pair - // with an ADRP+ADD, eliminating a GOT memory load. - if (ctx.arg.relax && sym.is_relative() && !sym.is_imported && - !sym.is_ifunc() && i + 1 < rels.size()) { - // ADRP+LDR must be consecutive and use the same register to relax. - const ElfRel &rel2 = rels[i + 1]; - if (rel2.r_type == R_AARCH64_LD64_GOT_LO12_NC && - rel2.r_offset == rel.r_offset + 4 && - rel2.r_sym == rel.r_sym && - rel.r_addend == 0 && - rel2.r_addend == 0 && - is_adrp(loc) && - is_ldr(loc + 4)) { - u32 rd = bits(*(ul32 *)loc, 4, 0); - u32 rn = bits(*(ul32 *)(loc + 4), 9, 5); - u32 rt = bits(*(ul32 *)(loc + 4), 4, 0); - if (rd == rn && rn == rt) { - i++; - break; - } - } - } - sym.flags |= NEEDS_GOT; - break; - case R_AARCH64_LD64_GOT_LO12_NC: - case R_AARCH64_LD64_GOTPAGE_LO15: - sym.flags |= NEEDS_GOT; - break; - case R_AARCH64_CALL26: - case R_AARCH64_JUMP26: - case R_AARCH64_PLT32: - if (sym.is_imported) - sym.flags |= NEEDS_PLT; - break; - case R_AARCH64_TLSIE_ADR_GOTTPREL_PAGE21: - case R_AARCH64_TLSIE_LD64_GOTTPREL_LO12_NC: - sym.flags |= NEEDS_GOTTP; - break; - case R_AARCH64_ADR_PREL_PG_HI21: - scan_pcrel(ctx, sym, rel); - break; - case R_AARCH64_TLSGD_ADR_PAGE21: - sym.flags |= NEEDS_TLSGD; - break; - case R_AARCH64_TLSDESC_ADR_PAGE21: - case R_AARCH64_TLSDESC_LD64_LO12: - case R_AARCH64_TLSDESC_ADD_LO12: - if (!relax_tlsdesc(ctx, sym)) - sym.flags |= NEEDS_TLSDESC; - break; - case R_AARCH64_TLSLE_MOVW_TPREL_G0: - case R_AARCH64_TLSLE_MOVW_TPREL_G0_NC: - case R_AARCH64_TLSLE_MOVW_TPREL_G1: - case R_AARCH64_TLSLE_MOVW_TPREL_G1_NC: - case R_AARCH64_TLSLE_MOVW_TPREL_G2: - case R_AARCH64_TLSLE_ADD_TPREL_HI12: - case R_AARCH64_TLSLE_ADD_TPREL_LO12: - case R_AARCH64_TLSLE_ADD_TPREL_LO12_NC: - check_tlsle(ctx, sym, rel); - break; - case R_AARCH64_ADD_ABS_LO12_NC: - case R_AARCH64_ADR_PREL_LO21: - case R_AARCH64_CONDBR19: - case R_AARCH64_LD_PREL_LO19: - case R_AARCH64_LDST16_ABS_LO12_NC: - case R_AARCH64_LDST32_ABS_LO12_NC: - case R_AARCH64_LDST64_ABS_LO12_NC: - case R_AARCH64_LDST128_ABS_LO12_NC: - case R_AARCH64_LDST8_ABS_LO12_NC: - case R_AARCH64_MOVW_UABS_G0: - case R_AARCH64_MOVW_UABS_G0_NC: - case R_AARCH64_MOVW_UABS_G1: - case R_AARCH64_MOVW_UABS_G1_NC: - case R_AARCH64_MOVW_UABS_G2: - case R_AARCH64_MOVW_UABS_G2_NC: - case R_AARCH64_MOVW_UABS_G3: - case R_AARCH64_PREL16: - case R_AARCH64_PREL32: - case R_AARCH64_PREL64: - case R_AARCH64_TLSGD_ADD_LO12_NC: - case R_AARCH64_TLSDESC_CALL: - break; - default: - Error(ctx) << *this << ": unknown relocation: " << rel; - } - } -} - -template <> -void RangeExtensionThunk::copy_buf(Context &ctx) { - u8 *buf = ctx.buf + output_section.shdr.sh_offset + offset; - - static const ul32 data[] = { - 0x9000'0010, // adrp x16, 0 # R_AARCH64_ADR_PREL_PG_HI21 - 0x9100'0210, // add x16, x16 # R_AARCH64_ADD_ABS_LO12_NC - 0xd61f'0200, // br x16 - }; - - static_assert(E::thunk_size == sizeof(data)); - - for (i64 i = 0; i < symbols.size(); i++) { - u64 S = symbols[i]->get_addr(ctx); - u64 P = output_section.shdr.sh_addr + offset + i * E::thunk_size; - - u8 *loc = buf + i * E::thunk_size; - memcpy(loc , data, sizeof(data)); - write_adrp(loc, page(S) - page(P)); - *(ul32 *)(loc + 4) |= bits(S, 11, 0) << 10; - } -} - -} // namespace mold::elf diff --git a/third_party/mold/elf/arch-i386.cc b/third_party/mold/elf/arch-i386.cc deleted file mode 100644 index 2692cacf1c2..00000000000 --- a/third_party/mold/elf/arch-i386.cc +++ /dev/null @@ -1,565 +0,0 @@ -// clang-format off -// i386 is similar to x86-64 but lacks PC-relative memory access -// instructions. So it's not straightforward to support position- -// independent code (PIC) on that target. -// -// If an object file is compiled with -fPIC, a function that needs to load -// a value from memory first obtains its own address with the following -// code -// -// call __x86.get_pc_thunk.bx -// -// where __x86.get_pc_thunk.bx is defined as -// -// __x86.get_pc_thunk.bx: -// mov (%esp), %ebx # move the return address to %ebx -// ret -// -// . With the function's own address (or, more precisely, the address -// immediately after the call instruction), the function can compute an -// absolute address of a variable with its address + link-time constant. -// -// Executing call-mov-ret isn't very cheap, and allocating one register to -// store PC isn't cheap too, especially given that i386 has only 8 -// general-purpose registers. But that's the cost of PIC on i386. You need -// to pay it when creating a .so and a position-independent executable. -// -// When a position-independent function calls another function, it sets -// %ebx to the address of .got. Position-independent PLT entries use that -// register to load values from .got.plt/.got. -// -// If we are creating a position-dependent executable (PDE), we can't -// assume that %ebx is set to .got. For PDE, we need to create position- -// dependent PLT entries which don't use %ebx. -// -// https://github.com/rui314/psabi/blob/main/i386.pdf - -#include "third_party/mold/elf/mold.h" - -namespace mold::elf { - -using E = I386; - -template <> -i64 get_addend(u8 *loc, const ElfRel &rel) { - switch (rel.r_type) { - case R_386_8: - case R_386_PC8: - return *loc; - case R_386_16: - case R_386_PC16: - return *(ul16 *)loc; - case R_386_32: - case R_386_PC32: - case R_386_GOT32: - case R_386_GOT32X: - case R_386_PLT32: - case R_386_GOTOFF: - case R_386_GOTPC: - case R_386_TLS_LDM: - case R_386_TLS_GOTIE: - case R_386_TLS_LE: - case R_386_TLS_IE: - case R_386_TLS_GD: - case R_386_TLS_LDO_32: - case R_386_SIZE32: - case R_386_TLS_GOTDESC: - return *(ul32 *)loc; - default: - return 0; - } -} - -template <> -void write_addend(u8 *loc, i64 val, const ElfRel &rel) { - switch (rel.r_type) { - case R_386_NONE: - break; - case R_386_8: - case R_386_PC8: - *loc = val; - break; - case R_386_16: - case R_386_PC16: - *(ul16 *)loc = val; - break; - case R_386_32: - case R_386_PC32: - case R_386_GOT32: - case R_386_GOT32X: - case R_386_PLT32: - case R_386_GOTOFF: - case R_386_GOTPC: - case R_386_TLS_LDM: - case R_386_TLS_GOTIE: - case R_386_TLS_LE: - case R_386_TLS_IE: - case R_386_TLS_GD: - case R_386_TLS_LDO_32: - case R_386_SIZE32: - case R_386_TLS_GOTDESC: - *(ul32 *)loc = val; - break; - default: - unreachable(); - } -} - -template <> -void write_plt_header(Context &ctx, u8 *buf) { - if (ctx.arg.pic) { - static const u8 insn[] = { - 0xf3, 0x0f, 0x1e, 0xfb, // endbr32 - 0x51, // push %ecx - 0x8d, 0x8b, 0, 0, 0, 0, // lea GOTPLT+4(%ebx), %ecx - 0xff, 0x31, // push (%ecx) - 0xff, 0x61, 0x04, // jmp *0x4(%ecx) - }; - memcpy(buf, insn, sizeof(insn)); - *(ul32 *)(buf + 7) = ctx.gotplt->shdr.sh_addr - ctx.got->shdr.sh_addr + 4; - } else { - static const u8 insn[] = { - 0xf3, 0x0f, 0x1e, 0xfb, // endbr32 - 0x51, // push %ecx - 0xb9, 0, 0, 0, 0, // mov GOTPLT+4, %ecx - 0xff, 0x31, // push (%ecx) - 0xff, 0x61, 0x04, // jmp *0x4(%ecx) - 0xcc, // (padding) - }; - memcpy(buf, insn, sizeof(insn)); - *(ul32 *)(buf + 6) = ctx.gotplt->shdr.sh_addr + 4; - } -} - -template <> -void write_plt_entry(Context &ctx, u8 *buf, Symbol &sym) { - if (ctx.arg.pic) { - static const u8 insn[] = { - 0xf3, 0x0f, 0x1e, 0xfb, // endbr32 - 0xb9, 0, 0, 0, 0, // mov $reloc_offset, %ecx - 0xff, 0xa3, 0, 0, 0, 0, // jmp *foo@GOT(%ebx) - 0xcc, // (padding) - }; - memcpy(buf, insn, sizeof(insn)); - *(ul32 *)(buf + 5) = sym.get_plt_idx(ctx) * sizeof(ElfRel); - *(ul32 *)(buf + 11) = sym.get_gotplt_addr(ctx) - ctx.got->shdr.sh_addr; - } else { - static const u8 insn[] = { - 0xf3, 0x0f, 0x1e, 0xfb, // endbr32 - 0xb9, 0, 0, 0, 0, // mov $reloc_offset, %ecx - 0xff, 0x25, 0, 0, 0, 0, // jmp *foo@GOT - 0xcc, // (padding) - }; - memcpy(buf, insn, sizeof(insn)); - *(ul32 *)(buf + 5) = sym.get_plt_idx(ctx) * sizeof(ElfRel); - *(ul32 *)(buf + 11) = sym.get_gotplt_addr(ctx); - } -} - -template <> -void write_pltgot_entry(Context &ctx, u8 *buf, Symbol &sym) { - if (ctx.arg.pic) { - static const u8 insn[] = { - 0xf3, 0x0f, 0x1e, 0xfb, // endbr32 - 0xff, 0xa3, 0, 0, 0, 0, // jmp *foo@GOT(%ebx) - 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, // (padding) - }; - memcpy(buf, insn, sizeof(insn)); - *(ul32 *)(buf + 6) = sym.get_got_addr(ctx) - ctx.got->shdr.sh_addr; - } else { - static const u8 insn[] = { - 0xf3, 0x0f, 0x1e, 0xfb, // endbr32 - 0xff, 0x25, 0, 0, 0, 0, // jmp *foo@GOT - 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, // (padding) - }; - memcpy(buf, insn, sizeof(insn)); - *(ul32 *)(buf + 6) = sym.get_got_addr(ctx); - } -} - -template <> -void EhFrameSection::apply_reloc(Context &ctx, const ElfRel &rel, - u64 offset, u64 val) { - u8 *loc = ctx.buf + this->shdr.sh_offset + offset; - - switch (rel.r_type) { - case R_NONE: - break; - case R_386_32: - *(ul32 *)loc = val; - break; - case R_386_PC32: - *(ul32 *)loc = val - this->shdr.sh_addr - offset; - break; - default: - Fatal(ctx) << "unsupported relocation in .eh_frame: " << rel; - } -} - -static u32 relax_got32x(u8 *loc) { - // mov imm(%reg1), %reg2 -> lea imm(%reg1), %reg2 - if (loc[0] == 0x8b) - return 0x8d00 | loc[1]; - return 0; -} - -// Relax GD to LE -static void relax_gd_to_le(u8 *loc, ElfRel rel, u64 val) { - static const u8 insn[] = { - 0x65, 0xa1, 0, 0, 0, 0, // mov %gs:0, %eax - 0x81, 0xc0, 0, 0, 0, 0, // add $tp_offset, %eax - }; - - switch (rel.r_type) { - case R_386_PLT32: - case R_386_PC32: - memcpy(loc - 3, insn, sizeof(insn)); - *(ul32 *)(loc + 5) = val; - break; - case R_386_GOT32: - case R_386_GOT32X: - memcpy(loc - 2, insn, sizeof(insn)); - *(ul32 *)(loc + 6) = val; - break; - default: - unreachable(); - } -} - -// Relax LD to LE -static void relax_ld_to_le(u8 *loc, ElfRel rel, u64 val) { - switch (rel.r_type) { - case R_386_PLT32: - case R_386_PC32: { - static const u8 insn[] = { - 0x65, 0xa1, 0, 0, 0, 0, // mov %gs:0, %eax - 0x2d, 0, 0, 0, 0, // sub $tls_size, %eax - }; - memcpy(loc - 2, insn, sizeof(insn)); - *(ul32 *)(loc + 5) = val; - break; - } - case R_386_GOT32: - case R_386_GOT32X: { - static const u8 insn[] = { - 0x65, 0xa1, 0, 0, 0, 0, // mov %gs:0, %eax - 0x2d, 0, 0, 0, 0, // sub $tls_size, %eax - 0x90, // nop - }; - memcpy(loc - 2, insn, sizeof(insn)); - *(ul32 *)(loc + 5) = val; - break; - } - default: - unreachable(); - } -} - -template <> -void InputSection::apply_reloc_alloc(Context &ctx, u8 *base) { - std::span> rels = get_rels(ctx); - - ElfRel *dynrel = nullptr; - if (ctx.reldyn) - dynrel = (ElfRel *)(ctx.buf + ctx.reldyn->shdr.sh_offset + - file.reldyn_offset + this->reldyn_offset); - - for (i64 i = 0; i < rels.size(); i++) { - const ElfRel &rel = rels[i]; - if (rel.r_type == R_NONE) - continue; - - Symbol &sym = *file.symbols[rel.r_sym]; - u8 *loc = base + rel.r_offset; - - auto check = [&](i64 val, i64 lo, i64 hi) { - if (val < lo || hi <= val) - Error(ctx) << *this << ": relocation " << rel << " against " - << sym << " out of range: " << val << " is not in [" - << lo << ", " << hi << ")"; - }; - - u64 S = sym.get_addr(ctx); - u64 A = get_addend(*this, rel); - u64 P = get_addr() + rel.r_offset; - u64 G = sym.get_got_idx(ctx) * sizeof(Word); - u64 GOT = ctx.got->shdr.sh_addr; - - switch (rel.r_type) { - case R_386_8: - check(S + A, 0, 1 << 8); - *loc = S + A; - break; - case R_386_16: - check(S + A, 0, 1 << 16); - *(ul16 *)loc = S + A; - break; - case R_386_32: - apply_dyn_absrel(ctx, sym, rel, loc, S, A, P, dynrel); - break; - case R_386_PC8: - check(S + A - P, -(1 << 7), 1 << 7); - *loc = S + A - P; - break; - case R_386_PC16: - check(S + A - P, -(1 << 15), 1 << 15); - *(ul16 *)loc = S + A - P; - break; - case R_386_PC32: - case R_386_PLT32: - *(ul32 *)loc = S + A - P; - break; - case R_386_GOT32: - *(ul32 *)loc = G + A; - break; - case R_386_GOT32X: - if (sym.has_got(ctx)) { - *(ul32 *)loc = G + A; - } else { - u32 insn = relax_got32x(loc - 2); - assert(insn); - loc[-2] = insn >> 8; - loc[-1] = insn; - *(ul32 *)loc = S + A - GOT; - } - break; - case R_386_GOTOFF: - *(ul32 *)loc = S + A - GOT; - break; - case R_386_GOTPC: - *(ul32 *)loc = GOT + A - P; - break; - case R_386_TLS_GOTIE: - *(ul32 *)loc = sym.get_gottp_addr(ctx) + A - GOT; - break; - case R_386_TLS_LE: - *(ul32 *)loc = S + A - ctx.tp_addr; - break; - case R_386_TLS_IE: - *(ul32 *)loc = sym.get_gottp_addr(ctx) + A; - break; - case R_386_TLS_GD: - if (sym.has_tlsgd(ctx)) { - *(ul32 *)loc = sym.get_tlsgd_addr(ctx) + A - GOT; - } else { - relax_gd_to_le(loc, rels[i + 1], S - ctx.tp_addr); - i++; - } - break; - case R_386_TLS_LDM: - if (ctx.got->has_tlsld(ctx)) { - *(ul32 *)loc = ctx.got->get_tlsld_addr(ctx) + A - GOT; - } else { - relax_ld_to_le(loc, rels[i + 1], ctx.tp_addr - ctx.tls_begin); - i++; - } - break; - case R_386_TLS_LDO_32: - *(ul32 *)loc = S + A - ctx.dtp_addr; - break; - case R_386_SIZE32: - *(ul32 *)loc = sym.esym().st_size + A; - break; - case R_386_TLS_GOTDESC: - if (sym.has_tlsdesc(ctx)) { - *(ul32 *)loc = sym.get_tlsdesc_addr(ctx) + A - GOT; - } else { - static const u8 insn[] = { - 0x8d, 0x05, 0, 0, 0, 0, // lea 0, %eax - }; - memcpy(loc - 2, insn, sizeof(insn)); - *(ul32 *)loc = S + A - ctx.tp_addr; - } - break; - case R_386_TLS_DESC_CALL: - if (!sym.has_tlsdesc(ctx)) { - // call *(%eax) -> nop - loc[0] = 0x66; - loc[1] = 0x90; - } - break; - default: - unreachable(); - } - } -} - -template <> -void InputSection::apply_reloc_nonalloc(Context &ctx, u8 *base) { - std::span> rels = get_rels(ctx); - - for (i64 i = 0; i < rels.size(); i++) { - const ElfRel &rel = rels[i]; - if (rel.r_type == R_NONE || record_undef_error(ctx, rel)) - continue; - - Symbol &sym = *file.symbols[rel.r_sym]; - u8 *loc = base + rel.r_offset; - - auto check = [&](i64 val, i64 lo, i64 hi) { - if (val < lo || hi <= val) - Error(ctx) << *this << ": relocation " << rel << " against " - << sym << " out of range: " << val << " is not in [" - << lo << ", " << hi << ")"; - }; - - SectionFragment *frag; - i64 frag_addend; - std::tie(frag, frag_addend) = get_fragment(ctx, rel); - - u64 S = frag ? frag->get_addr(ctx) : sym.get_addr(ctx); - u64 A = frag ? frag_addend : get_addend(*this, rel); - u64 GOT = ctx.got->shdr.sh_addr; - - switch (rel.r_type) { - case R_386_8: - check(S + A, 0, 1 << 8); - *loc = S + A; - break; - case R_386_16: - check(S + A, 0, 1 << 16); - *(ul16 *)loc = S + A; - break; - case R_386_32: - if (std::optional val = get_tombstone(sym, frag)) - *(ul32 *)loc = *val; - else - *(ul32 *)loc = S + A; - break; - case R_386_PC8: - check(S + A, -(1 << 7), 1 << 7); - *loc = S + A; - break; - case R_386_PC16: - check(S + A, -(1 << 15), 1 << 15); - *(ul16 *)loc = S + A; - break; - case R_386_PC32: - *(ul32 *)loc = S + A; - break; - case R_386_GOTPC: - *(ul32 *)loc = GOT + A; - break; - case R_386_GOTOFF: - *(ul32 *)loc = S + A - GOT; - break; - case R_386_TLS_LDO_32: - if (std::optional val = get_tombstone(sym, frag)) - *(ul32 *)loc = *val; - else - *(ul32 *)loc = S + A - ctx.dtp_addr; - break; - case R_386_SIZE32: - *(ul32 *)loc = sym.esym().st_size + A; - break; - default: - unreachable(); - } - } -} - -template <> -void InputSection::scan_relocations(Context &ctx) { - assert(shdr().sh_flags & SHF_ALLOC); - - this->reldyn_offset = file.num_dynrel * sizeof(ElfRel); - std::span> rels = get_rels(ctx); - - // Scan relocations - for (i64 i = 0; i < rels.size(); i++) { - const ElfRel &rel = rels[i]; - if (rel.r_type == R_NONE || record_undef_error(ctx, rel)) - continue; - - Symbol &sym = *file.symbols[rel.r_sym]; - u8 *loc = (u8 *)(contents.data() + rel.r_offset); - - if (sym.is_ifunc()) - sym.flags |= NEEDS_GOT | NEEDS_PLT; - - switch (rel.r_type) { - case R_386_8: - case R_386_16: - scan_absrel(ctx, sym, rel); - break; - case R_386_32: - scan_dyn_absrel(ctx, sym, rel); - break; - case R_386_PC8: - case R_386_PC16: - case R_386_PC32: - scan_pcrel(ctx, sym, rel); - break; - case R_386_GOT32: - case R_386_GOTPC: - sym.flags |= NEEDS_GOT; - break; - case R_386_GOT32X: { - // We always want to relax GOT32X because static PIE doesn't - // work without it. - bool do_relax = !sym.is_imported && sym.is_relative() && - relax_got32x(loc - 2); - if (!do_relax) - sym.flags |= NEEDS_GOT; - break; - } - case R_386_PLT32: - if (sym.is_imported) - sym.flags |= NEEDS_PLT; - break; - case R_386_TLS_GOTIE: - case R_386_TLS_IE: - sym.flags |= NEEDS_GOTTP; - break; - case R_386_TLS_GD: - if (i + 1 == rels.size()) - Fatal(ctx) << *this << ": TLS_GD reloc must be followed by PLT or GOT32"; - - if (u32 ty = rels[i + 1].r_type; - ty != R_386_PLT32 && ty != R_386_PC32 && - ty != R_386_GOT32 && ty != R_386_GOT32X) - Fatal(ctx) << *this << ": TLS_GD reloc must be followed by PLT or GOT32"; - - // We always relax if -static because libc.a doesn't contain - // __tls_get_addr(). - if (ctx.arg.is_static || - (ctx.arg.relax && !ctx.arg.shared && !sym.is_imported)) - i++; - else - sym.flags |= NEEDS_TLSGD; - break; - case R_386_TLS_LDM: - if (i + 1 == rels.size()) - Fatal(ctx) << *this << ": TLS_LDM reloc must be followed by PLT or GOT32"; - - if (u32 ty = rels[i + 1].r_type; - ty != R_386_PLT32 && ty != R_386_PC32 && - ty != R_386_GOT32 && ty != R_386_GOT32X) - Fatal(ctx) << *this << ": TLS_LDM reloc must be followed by PLT or GOT32"; - - // We always relax if -static because libc.a doesn't contain - // __tls_get_addr(). - if (ctx.arg.is_static || (ctx.arg.relax && !ctx.arg.shared)) - i++; - else - ctx.needs_tlsld = true; - break; - case R_386_TLS_GOTDESC: - if (!relax_tlsdesc(ctx, sym)) - sym.flags |= NEEDS_TLSDESC; - break; - case R_386_TLS_LE: - check_tlsle(ctx, sym, rel); - break; - case R_386_GOTOFF: - case R_386_TLS_LDO_32: - case R_386_SIZE32: - case R_386_TLS_DESC_CALL: - break; - default: - Error(ctx) << *this << ": unknown relocation: " << rel; - } - } -} - -} // namespace mold::elf diff --git a/third_party/mold/elf/arch-m68k.cc b/third_party/mold/elf/arch-m68k.cc deleted file mode 100644 index 1b3b8721241..00000000000 --- a/third_party/mold/elf/arch-m68k.cc +++ /dev/null @@ -1,326 +0,0 @@ -// clang-format off -// This file contains code for the Motorola 68000 series microprocessors, -// which is often abbreviated as m68k. Running a Unix-like system on a -// m68k-based machine today is probably a retro-computing hobby activity, -// but the processor was a popular choice to build Unix computers during -// '80s. Early Sun workstations for example used m68k. Macintosh until -// 1994 were based on m68k as well until they switched to PowerPC (and -// then to x86 and to ARM.) -// -// From the linker's point of view, it is not hard to support m68k. It's -// just a 32-bit big-endian CISC ISA. Compared to comtemporary i386, -// m68k's psABI is actually simpler because m68k has PC-relative memory -// access instructions and therefore can support position-independent -// code without too much hassle. -// -// https://github.com/rui314/psabi/blob/main/m68k.pdf - -#include "third_party/mold/elf/mold.h" - -namespace mold::elf { - -using E = M68K; - -template <> -void write_plt_header(Context &ctx, u8 *buf) { - static const u8 insn[] = { - 0x2f, 0x00, // move.l %d0, -(%sp) - 0x2f, 0x3b, 0x01, 0x70, 0, 0, 0, 0, // move.l (GOTPLT+4, %pc), -(%sp) - 0x4e, 0xfb, 0x01, 0x71, 0, 0, 0, 0, // jmp ([GOTPLT+8, %pc]) - }; - - memcpy(buf, insn, sizeof(insn)); - *(ub32 *)(buf + 6) = ctx.gotplt->shdr.sh_addr - ctx.plt->shdr.sh_addr; - *(ub32 *)(buf + 14) = ctx.gotplt->shdr.sh_addr - ctx.plt->shdr.sh_addr - 4; -} - -template <> -void write_plt_entry(Context &ctx, u8 *buf, Symbol &sym) { - static const u8 insn[] = { - 0x20, 0x3c, 0, 0, 0, 0, // move.l PLT_OFFSET, %d0 - 0x4e, 0xfb, 0x01, 0x71, 0, 0, 0, 0, // jmp ([GOTPLT_ENTRY, %pc]) - }; - - memcpy(buf, insn, sizeof(insn)); - *(ub32 *)(buf + 2) = sym.get_plt_idx(ctx) * sizeof(ElfRel); - *(ub32 *)(buf + 10) = sym.get_gotplt_addr(ctx) - sym.get_plt_addr(ctx) - 8; -} - -template <> -void write_pltgot_entry(Context &ctx, u8 *buf, Symbol &sym) { - static const u8 insn[] = { - 0x4e, 0xfb, 0x01, 0x71, 0, 0, 0, 0, // jmp ([GOT_ENTRY, %pc]) - }; - - memcpy(buf, insn, sizeof(insn)); - *(ub32 *)(buf + 4) = sym.get_got_addr(ctx) - sym.get_plt_addr(ctx) - 2; -} - -template <> -void EhFrameSection::apply_reloc(Context &ctx, const ElfRel &rel, - u64 offset, u64 val) { - u8 *loc = ctx.buf + this->shdr.sh_offset + offset; - - switch (rel.r_type) { - case R_NONE: - break; - case R_68K_32: - *(ub32 *)loc = val; - break; - case R_68K_PC32: - *(ub32 *)loc = val - this->shdr.sh_addr - offset; - break; - default: - Fatal(ctx) << "unsupported relocation in .eh_frame: " << rel; - } -} - -template <> -void InputSection::apply_reloc_alloc(Context &ctx, u8 *base) { - std::span> rels = get_rels(ctx); - - ElfRel *dynrel = nullptr; - if (ctx.reldyn) - dynrel = (ElfRel *)(ctx.buf + ctx.reldyn->shdr.sh_offset + - file.reldyn_offset + this->reldyn_offset); - - for (i64 i = 0; i < rels.size(); i++) { - const ElfRel &rel = rels[i]; - if (rel.r_type == R_NONE) - continue; - - Symbol &sym = *file.symbols[rel.r_sym]; - u8 *loc = base + rel.r_offset; - - auto check = [&](i64 val, i64 lo, i64 hi) { - if (val < lo || hi <= val) - Error(ctx) << *this << ": relocation " << rel << " against " - << sym << " out of range: " << val << " is not in [" - << lo << ", " << hi << ")"; - }; - - auto write16 = [&](u64 val) { - check(val, 0, 1 << 16); - *(ub16 *)loc = val; - }; - - auto write16s = [&](u64 val) { - check(val, -(1 << 15), 1 << 15); - *(ub16 *)loc = val; - }; - - auto write8 = [&](u64 val) { - check(val, 0, 1 << 8); - *loc = val; - }; - - auto write8s = [&](u64 val) { - check(val, -(1 << 7), 1 << 7); - *loc = val; - }; - - u64 S = sym.get_addr(ctx); - u64 A = rel.r_addend; - u64 P = get_addr() + rel.r_offset; - u64 G = sym.get_got_idx(ctx) * sizeof(Word); - u64 GOT = ctx.got->shdr.sh_addr; - - switch (rel.r_type) { - case R_68K_32: - apply_dyn_absrel(ctx, sym, rel, loc, S, A, P, dynrel); - break; - case R_68K_16: - write16(S + A); - break; - case R_68K_8: - write8(S + A); - break; - case R_68K_PC32: - case R_68K_PLT32: - *(ub32 *)loc = S + A - P; - break; - case R_68K_PC16: - case R_68K_PLT16: - write16s(S + A - P); - break; - case R_68K_PC8: - case R_68K_PLT8: - write8s(S + A - P); - break; - case R_68K_GOTPCREL32: - *(ub32 *)loc = GOT + A - P; - break; - case R_68K_GOTPCREL16: - write16s(GOT + A - P); - break; - case R_68K_GOTPCREL8: - write8s(GOT + A - P); - break; - case R_68K_GOTOFF32: - *(ub32 *)loc = G + A; - break; - case R_68K_GOTOFF16: - write16(G + A); - break; - case R_68K_GOTOFF8: - write8(G + A); - break; - case R_68K_TLS_GD32: - *(ub32 *)loc = sym.get_tlsgd_addr(ctx) + A - GOT; - break; - case R_68K_TLS_GD16: - write16(sym.get_tlsgd_addr(ctx) + A - GOT); - break; - case R_68K_TLS_GD8: - write8(sym.get_tlsgd_addr(ctx) + A - GOT); - break; - case R_68K_TLS_LDM32: - *(ub32 *)loc = ctx.got->get_tlsld_addr(ctx) + A - GOT; - break; - case R_68K_TLS_LDM16: - write16(ctx.got->get_tlsld_addr(ctx) + A - GOT); - break; - case R_68K_TLS_LDM8: - write8(ctx.got->get_tlsld_addr(ctx) + A - GOT); - break; - case R_68K_TLS_LDO32: - *(ub32 *)loc = S + A - ctx.dtp_addr; - break; - case R_68K_TLS_LDO16: - write16s(S + A - ctx.dtp_addr); - break; - case R_68K_TLS_LDO8: - write8s(S + A - ctx.dtp_addr); - break; - case R_68K_TLS_IE32: - *(ub32 *)loc = sym.get_gottp_addr(ctx) + A - GOT; - break; - case R_68K_TLS_IE16: - write16(sym.get_gottp_addr(ctx) + A - GOT); - break; - case R_68K_TLS_IE8: - write8(sym.get_gottp_addr(ctx) + A - GOT); - break; - case R_68K_TLS_LE32: - *(ub32 *)loc = S + A - ctx.tp_addr; - break; - case R_68K_TLS_LE16: - write16(S + A - ctx.tp_addr); - break; - case R_68K_TLS_LE8: - write8(S + A - ctx.tp_addr); - break; - default: - unreachable(); - } - } -} - -template <> -void InputSection::apply_reloc_nonalloc(Context &ctx, u8 *base) { - std::span> rels = get_rels(ctx); - - for (i64 i = 0; i < rels.size(); i++) { - const ElfRel &rel = rels[i]; - if (rel.r_type == R_NONE || record_undef_error(ctx, rel)) - continue; - - Symbol &sym = *file.symbols[rel.r_sym]; - u8 *loc = base + rel.r_offset; - - SectionFragment *frag; - i64 frag_addend; - std::tie(frag, frag_addend) = get_fragment(ctx, rel); - - u64 S = frag ? frag->get_addr(ctx) : sym.get_addr(ctx); - u64 A = frag ? frag_addend : (i64)rel.r_addend; - - switch (rel.r_type) { - case R_68K_32: - if (std::optional val = get_tombstone(sym, frag)) - *(ub32 *)loc = *val; - else - *(ub32 *)loc = S + A; - break; - default: - Fatal(ctx) << *this << ": invalid relocation for non-allocated sections: " - << rel; - } - } -} - -template <> -void InputSection::scan_relocations(Context &ctx) { - assert(shdr().sh_flags & SHF_ALLOC); - - this->reldyn_offset = file.num_dynrel * sizeof(ElfRel); - std::span> rels = get_rels(ctx); - - for (i64 i = 0; i < rels.size(); i++) { - const ElfRel &rel = rels[i]; - if (rel.r_type == R_NONE || record_undef_error(ctx, rel)) - continue; - - Symbol &sym = *file.symbols[rel.r_sym]; - - if (sym.is_ifunc()) - Error(ctx) << sym << ": GNU ifunc symbol is not supported on m68k"; - - switch (rel.r_type) { - case R_68K_32: - scan_dyn_absrel(ctx, sym, rel); - break; - case R_68K_16: - case R_68K_8: - scan_absrel(ctx, sym, rel); - break; - case R_68K_PC32: - case R_68K_PC16: - case R_68K_PC8: - scan_pcrel(ctx, sym, rel); - break; - case R_68K_GOTPCREL32: - case R_68K_GOTPCREL16: - case R_68K_GOTPCREL8: - case R_68K_GOTOFF32: - case R_68K_GOTOFF16: - case R_68K_GOTOFF8: - sym.flags |= NEEDS_GOT; - break; - case R_68K_PLT32: - case R_68K_PLT16: - case R_68K_PLT8: - if (sym.is_imported) - sym.flags |= NEEDS_PLT; - break; - case R_68K_TLS_GD32: - case R_68K_TLS_GD16: - case R_68K_TLS_GD8: - sym.flags |= NEEDS_TLSGD; - break; - case R_68K_TLS_LDM32: - case R_68K_TLS_LDM16: - case R_68K_TLS_LDM8: - ctx.needs_tlsld = true; - break; - case R_68K_TLS_IE32: - case R_68K_TLS_IE16: - case R_68K_TLS_IE8: - sym.flags |= NEEDS_GOTTP; - break; - case R_68K_TLS_LE32: - case R_68K_TLS_LE16: - case R_68K_TLS_LE8: - check_tlsle(ctx, sym, rel); - break; - case R_68K_TLS_LDO32: - case R_68K_TLS_LDO16: - case R_68K_TLS_LDO8: - break; - default: - Error(ctx) << *this << ": unknown relocation: " << rel; - } - } -} - -} // namespace mold::elf diff --git a/third_party/mold/elf/arch-ppc32.cc b/third_party/mold/elf/arch-ppc32.cc deleted file mode 100644 index c3a1db4cec6..00000000000 --- a/third_party/mold/elf/arch-ppc32.cc +++ /dev/null @@ -1,452 +0,0 @@ -// clang-format off -// This file implements the PowerPC 32-bit ISA. For 64-bit PowerPC, see -// arch-ppc64v1.cpp and arch-ppc64v2.cpp. -// -// PPC32 is a RISC ISA. It has 32 general-purpose registers (GPRs). -// r0, r11 and r12 are reserved for static linkers, so we can use these -// registers in PLTs and range extension thunks. In addition to that, it -// has a few special registers. Notable ones are LR which holds a return -// address and CTR which we can use to store a branch target address. -// -// It feels that the PPC32 psABI is unnecessarily complicated at first -// glance, but that is mainly stemmed from the fact that the ISA lacks -// PC-relative load/store instructions. Since machine instructions cannot -// load data relative to its own address, it is not straightforward to -// support position-independent code (PIC) on PPC32. -// -// A position-independent function typically contains the following code -// in the prologue to obtain its own address: -// -// mflr r0 // save the current return address to %r0 -// bcl 20, 31, 4 // call the next instruction as if it were a function -// mtlr r12 // save the return address to %r12 -// mtlr r0 // restore the original return address -// -// An object file compiled with -fPIC contains a data section named -// `.got2` to store addresses of locally-defined global variables and -// constants. A PIC function usually computes its .got2+0x8000 and set it -// to %r30. This scheme allows the function to access global objects -// defined in the same input file with a single %r30-relative load/store -// instruction with a 16-bit offset, given that .got2 is smaller than -// 0x10000 (or 65536) bytes. -// -// Since each object file has its own .got2, %r30 refers to different -// places in a merged .got2 for two functions that came from different -// input files. Therefore, %r30 makes sense only within a single function. -// -// Technically, we can reuse a %r30 value in our PLT if we create a PLT -// _for each input file_ (that's what GNU ld seems to be doing), but that -// doesn't seems to be worth its complexity. Our PLT simply doesn't rely -// on a %r30 value. -// -// https://github.com/rui314/psabi/blob/main/ppc32.pdf - -#include "third_party/mold/elf/mold.h" - -namespace mold::elf { - -using E = PPC32; - -static u64 lo(u64 x) { return x & 0xffff; } -static u64 hi(u64 x) { return x >> 16; } -static u64 ha(u64 x) { return (x + 0x8000) >> 16; } -static u64 high(u64 x) { return (x >> 16) & 0xffff; } -static u64 higha(u64 x) { return ((x + 0x8000) >> 16) & 0xffff; } - -template <> -void write_plt_header(Context &ctx, u8 *buf) { - static const ub32 insn[] = { - // Get the address of this PLT section - 0x7c08'02a6, // mflr r0 - 0x429f'0005, // bcl 20, 31, 4 - 0x7d88'02a6, // 1: mflr r12 - 0x7c08'03a6, // mtlr r0 - - // Compute the runtime address of GOTPLT+12 - 0x3d8c'0000, // addis r12, r12, (GOTPLT - 1b)@higha - 0x398c'0000, // addi r12, r12, (GOTPLT - 1b)@lo - - // Compute the PLT entry offset - 0x7d6c'5850, // sub r11, r11, r12 - 0x1d6b'0003, // mulli r11, r11, 3 - - // Load GOTPLT[2] and branch to GOTPLT[1] - 0x800c'fff8, // lwz r0, -8(r12) - 0x7c09'03a6, // mtctr r0 - 0x818c'fffc, // lwz r12, -4(r12) - 0x4e80'0420, // bctr - 0x6000'0000, // nop - 0x6000'0000, // nop - 0x6000'0000, // nop - 0x6000'0000, // nop - }; - - static_assert(sizeof(insn) == E::plt_hdr_size); - memcpy(buf, insn, sizeof(insn)); - - ub32 *loc = (ub32 *)buf; - loc[4] |= higha(ctx.gotplt->shdr.sh_addr - ctx.plt->shdr.sh_addr + 4); - loc[5] |= lo(ctx.gotplt->shdr.sh_addr - ctx.plt->shdr.sh_addr + 4); -} - -static const ub32 plt_entry[] = { - // Get the address of this PLT entry - 0x7c08'02a6, // mflr r0 - 0x429f'0005, // bcl 20, 31, 4 - 0x7d88'02a6, // mflr r12 - 0x7c08'03a6, // mtlr r0 - - // Load an address from the GOT/GOTPLT entry and jump to that address - 0x3d6c'0000, // addis r11, r12, OFFSET@higha - 0x396b'0000, // addi r11, r11, OFFSET@lo - 0x818b'0000, // lwz r12, 0(r11) - 0x7d89'03a6, // mtctr r12 - 0x4e80'0420, // bctr -}; - -template <> -void write_plt_entry(Context &ctx, u8 *buf, Symbol &sym) { - static_assert(E::plt_size == sizeof(plt_entry)); - memcpy(buf, plt_entry, sizeof(plt_entry)); - - ub32 *loc = (ub32 *)buf; - i64 offset = sym.get_gotplt_addr(ctx) - sym.get_plt_addr(ctx) - 8; - loc[4] |= higha(offset); - loc[5] |= lo(offset); -} - -template <> -void write_pltgot_entry(Context &ctx, u8 *buf, Symbol &sym) { - static_assert(E::pltgot_size == sizeof(plt_entry)); - memcpy(buf, plt_entry, sizeof(plt_entry)); - - ub32 *loc = (ub32 *)buf; - i64 offset = sym.get_got_addr(ctx) - sym.get_plt_addr(ctx) - 8; - loc[4] |= higha(offset); - loc[5] |= lo(offset); -} - -template <> -void EhFrameSection::apply_reloc(Context &ctx, const ElfRel &rel, - u64 offset, u64 val) { - u8 *loc = ctx.buf + this->shdr.sh_offset + offset; - - switch (rel.r_type) { - case R_NONE: - break; - case R_PPC_ADDR32: - *(ub32 *)loc = val; - break; - case R_PPC_REL32: - *(ub32 *)loc = val - this->shdr.sh_addr - offset; - break; - default: - Fatal(ctx) << "unsupported relocation in .eh_frame: " << rel; - } -} - -template <> -void InputSection::apply_reloc_alloc(Context &ctx, u8 *base) { - std::span> rels = get_rels(ctx); - - ElfRel *dynrel = nullptr; - if (ctx.reldyn) - dynrel = (ElfRel *)(ctx.buf + ctx.reldyn->shdr.sh_offset + - file.reldyn_offset + this->reldyn_offset); - - u64 GOT2 = file.ppc32_got2 ? file.ppc32_got2->get_addr() : 0; - - for (i64 i = 0; i < rels.size(); i++) { - const ElfRel &rel = rels[i]; - if (rel.r_type == R_NONE) - continue; - - Symbol &sym = *file.symbols[rel.r_sym]; - u8 *loc = base + rel.r_offset; - - u64 S = sym.get_addr(ctx); - u64 A = rel.r_addend; - u64 P = get_addr() + rel.r_offset; - u64 G = sym.get_got_idx(ctx) * sizeof(Word); - u64 GOT = ctx.got->shdr.sh_addr; - - switch (rel.r_type) { - case R_PPC_ADDR32: - case R_PPC_UADDR32: - apply_dyn_absrel(ctx, sym, rel, loc, S, A, P, dynrel); - break; - case R_PPC_ADDR14: - *(ub32 *)loc |= bits(S + A, 15, 2) << 2; - break; - case R_PPC_ADDR16: - case R_PPC_UADDR16: - case R_PPC_ADDR16_LO: - *(ub16 *)loc = lo(S + A); - break; - case R_PPC_ADDR16_HI: - *(ub16 *)loc = hi(S + A); - break; - case R_PPC_ADDR16_HA: - *(ub16 *)loc = ha(S + A); - break; - case R_PPC_ADDR24: - *(ub32 *)loc |= bits(S + A, 25, 2) << 2; - break; - case R_PPC_ADDR30: - *(ub32 *)loc |= bits(S + A, 31, 2) << 2; - break; - case R_PPC_PLT16_LO: - *(ub16 *)loc = lo(G + GOT - A - GOT2); - break; - case R_PPC_PLT16_HI: - *(ub16 *)loc = hi(G + GOT - A - GOT2); - break; - case R_PPC_PLT16_HA: - *(ub16 *)loc = ha(G + GOT - A - GOT2); - break; - case R_PPC_PLT32: - *(ub32 *)loc = G + GOT - A - GOT2; - break; - case R_PPC_REL14: - *(ub32 *)loc |= bits(S + A - P, 15, 2) << 2; - break; - case R_PPC_REL16: - case R_PPC_REL16_LO: - *(ub16 *)loc = lo(S + A - P); - break; - case R_PPC_REL16_HI: - *(ub16 *)loc = hi(S + A - P); - break; - case R_PPC_REL16_HA: - *(ub16 *)loc = ha(S + A - P); - break; - case R_PPC_REL24: - case R_PPC_LOCAL24PC: { - i64 val = S + A - P; - if (sign_extend(val, 25) != val) - val = get_thunk_addr(i) - P; - *(ub32 *)loc |= bits(val, 25, 2) << 2; - break; - } - case R_PPC_PLTREL24: { - i64 val = S - P; - if (sym.has_plt(ctx) || sign_extend(val, 25) != val) - val = get_thunk_addr(i) - P; - *(ub32 *)loc |= bits(val, 25, 2) << 2; - break; - } - case R_PPC_REL32: - case R_PPC_PLTREL32: - *(ub32 *)loc = S + A - P; - break; - case R_PPC_GOT16: - case R_PPC_GOT16_LO: - *(ub16 *)loc = lo(G + A); - break; - case R_PPC_GOT16_HI: - *(ub16 *)loc = hi(G + A); - break; - case R_PPC_GOT16_HA: - *(ub16 *)loc = ha(G + A); - break; - case R_PPC_TPREL16_LO: - *(ub16 *)loc = lo(S + A - ctx.tp_addr); - break; - case R_PPC_TPREL16_HI: - *(ub16 *)loc = hi(S + A - ctx.tp_addr); - break; - case R_PPC_TPREL16_HA: - *(ub16 *)loc = ha(S + A - ctx.tp_addr); - break; - case R_PPC_DTPREL16_LO: - *(ub16 *)loc = lo(S + A - ctx.dtp_addr); - break; - case R_PPC_DTPREL16_HI: - *(ub16 *)loc = hi(S + A - ctx.dtp_addr); - break; - case R_PPC_DTPREL16_HA: - *(ub16 *)loc = ha(S + A - ctx.dtp_addr); - break; - case R_PPC_GOT_TLSGD16: - *(ub16 *)loc = sym.get_tlsgd_addr(ctx) - GOT; - break; - case R_PPC_GOT_TLSLD16: - *(ub16 *)loc = ctx.got->get_tlsld_addr(ctx) - GOT; - break; - case R_PPC_GOT_TPREL16: - *(ub16 *)loc = sym.get_gottp_addr(ctx) - GOT; - break; - case R_PPC_TLS: - case R_PPC_TLSGD: - case R_PPC_TLSLD: - case R_PPC_PLTSEQ: - case R_PPC_PLTCALL: - break; - default: - unreachable(); - } - } -} - -template <> -void InputSection::apply_reloc_nonalloc(Context &ctx, u8 *base) { - std::span> rels = get_rels(ctx); - - for (i64 i = 0; i < rels.size(); i++) { - const ElfRel &rel = rels[i]; - if (rel.r_type == R_NONE || record_undef_error(ctx, rel)) - continue; - - Symbol &sym = *file.symbols[rel.r_sym]; - u8 *loc = base + rel.r_offset; - - SectionFragment *frag; - i64 frag_addend; - std::tie(frag, frag_addend) = get_fragment(ctx, rel); - - u64 S = frag ? frag->get_addr(ctx) : sym.get_addr(ctx); - u64 A = frag ? frag_addend : (i64)rel.r_addend; - - switch (rel.r_type) { - case R_PPC_ADDR32: - if (std::optional val = get_tombstone(sym, frag)) - *(ub32 *)loc = *val; - else - *(ub32 *)loc = S + A; - break; - default: - Fatal(ctx) << *this << ": invalid relocation for non-allocated sections: " - << rel; - } - } -} - -template <> -void InputSection::scan_relocations(Context &ctx) { - assert(shdr().sh_flags & SHF_ALLOC); - - this->reldyn_offset = file.num_dynrel * sizeof(ElfRel); - std::span> rels = get_rels(ctx); - - // Scan relocations - for (i64 i = 0; i < rels.size(); i++) { - const ElfRel &rel = rels[i]; - if (rel.r_type == R_NONE || record_undef_error(ctx, rel)) - continue; - - Symbol &sym = *file.symbols[rel.r_sym]; - - if (sym.is_ifunc()) - sym.flags |= NEEDS_GOT | NEEDS_PLT; - - switch (rel.r_type) { - case R_PPC_ADDR32: - case R_PPC_UADDR32: - scan_dyn_absrel(ctx, sym, rel); - break; - case R_PPC_ADDR14: - case R_PPC_ADDR16: - case R_PPC_UADDR16: - case R_PPC_ADDR16_LO: - case R_PPC_ADDR16_HI: - case R_PPC_ADDR16_HA: - case R_PPC_ADDR24: - case R_PPC_ADDR30: - scan_absrel(ctx, sym, rel); - break; - case R_PPC_REL14: - case R_PPC_REL16: - case R_PPC_REL16_LO: - case R_PPC_REL16_HI: - case R_PPC_REL16_HA: - case R_PPC_REL32: - scan_pcrel(ctx, sym, rel); - break; - case R_PPC_GOT16: - case R_PPC_GOT16_LO: - case R_PPC_GOT16_HI: - case R_PPC_GOT16_HA: - case R_PPC_PLT16_LO: - case R_PPC_PLT16_HI: - case R_PPC_PLT16_HA: - case R_PPC_PLT32: - sym.flags |= NEEDS_GOT; - break; - case R_PPC_REL24: - case R_PPC_PLTREL24: - case R_PPC_PLTREL32: - if (sym.is_imported) - sym.flags |= NEEDS_PLT; - break; - case R_PPC_GOT_TLSGD16: - sym.flags |= NEEDS_TLSGD; - break; - case R_PPC_GOT_TLSLD16: - ctx.needs_tlsld = true; - break; - case R_PPC_GOT_TPREL16: - sym.flags |= NEEDS_GOTTP; - break; - case R_PPC_TPREL16_LO: - case R_PPC_TPREL16_HI: - case R_PPC_TPREL16_HA: - check_tlsle(ctx, sym, rel); - break; - case R_PPC_LOCAL24PC: - case R_PPC_TLS: - case R_PPC_TLSGD: - case R_PPC_TLSLD: - case R_PPC_DTPREL16_LO: - case R_PPC_DTPREL16_HI: - case R_PPC_DTPREL16_HA: - case R_PPC_PLTSEQ: - case R_PPC_PLTCALL: - break; - default: - Error(ctx) << *this << ": unknown relocation: " << rel; - } - } -} - -template <> -void RangeExtensionThunk::copy_buf(Context &ctx) { - u8 *buf = ctx.buf + output_section.shdr.sh_offset + offset; - - static const ub32 local_thunk[] = { - // Get this thunk's address - 0x7c08'02a6, // mflr r0 - 0x429f'0005, // bcl 20, 31, 4 - 0x7d88'02a6, // mflr r12 - 0x7c08'03a6, // mtlr r0 - - // Materialize the destination's address in %r11 and jump to that address - 0x3d6c'0000, // addis r11, r12, OFFSET@higha - 0x396b'0000, // addi r11, r11, OFFSET@lo - 0x7d69'03a6, // mtctr r11 - 0x4e80'0420, // bctr - 0x6000'0000, // nop - }; - - static_assert(E::thunk_size == sizeof(plt_entry)); - static_assert(E::thunk_size == sizeof(local_thunk)); - - for (i64 i = 0; i < symbols.size(); i++) { - ub32 *loc = (ub32 *)(buf + i * E::thunk_size); - Symbol &sym = *symbols[i]; - - if (sym.has_plt(ctx)) { - memcpy(loc, plt_entry, sizeof(plt_entry)); - u64 got = sym.has_got(ctx) ? sym.get_got_addr(ctx) : sym.get_gotplt_addr(ctx); - i64 val = got - get_addr(i) - 8; - loc[4] |= higha(val); - loc[5] |= lo(val); - } else { - memcpy(loc, local_thunk, sizeof(local_thunk)); - i64 val = sym.get_addr(ctx) - get_addr(i) - 8; - loc[4] |= higha(val); - loc[5] |= lo(val); - } - } -} - -} // namespace mold::elf diff --git a/third_party/mold/elf/arch-ppc64v1.cc b/third_party/mold/elf/arch-ppc64v1.cc deleted file mode 100644 index c45581269dc..00000000000 --- a/third_party/mold/elf/arch-ppc64v1.cc +++ /dev/null @@ -1,687 +0,0 @@ -// clang-format off -// This file contains code for the 64-bit PowerPC ELFv1 ABI that is -// commonly used for big-endian PPC systems. Modern PPC systems that use -// the processor in the little-endian mode use the ELFv2 ABI instead. For -// ELFv2, see arch-ppc64v2.cc. -// -// Even though they are similiar, ELFv1 isn't only different from ELFv2 in -// endianness. The most notable difference is, in ELFv1, a function -// pointer doesn't directly refer to the entry point of a function but -// instead refers to a data structure so-called "function descriptor". -// -// The function descriptor is essentially a pair of a function entry point -// address and a value that should be set to %r2 before calling that -// function. There is also a third member for "the environment pointer for -// languages such as Pascal and PL/1" according to the psABI, but it looks -// like no one acutally uses it. In total, the function descriptor is 24 -// bytes long. Here is why we need it. -// -// PPC generally lacks PC-relative data access instructions. Position- -// independent code sets GOT + 0x8000 to %r2 and access global variables -// relative to %r2. -// -// Each ELF file has its own GOT. If a function calls another function in -// the same ELF file, it doesn't have to reset %r2. However, if it is in -// other file (e.g. other .so), it has to set a new value to %r2 so that -// the register contains the callee's GOT + 0x8000. -// -// In this way, you can't call a function just by knowing the function's -// entry point address. You also need to know a proper %r2 value for the -// function. This is why a function pointer refers to a tuple of an -// address and a %r2 value. -// -// If a function call is made through PLT, PLT takes care of restoring %r2. -// Therefore, the caller has to restore %r2 only for function calls -// through function pointers. -// -// .opd (short for "official procedure descriptors") contains function -// descriptors. -// -// You can think OPD as this: even in other targets, a function can have a -// few different addresses for different purposes. It may not only have an -// entry point address but may also have PLT and/or GOT addresses. -// In PPCV1, it may have an OPD address in addition to these. OPD address -// is used for relocations that refers to the address of a function as a -// function pointer. -// -// https://github.com/rui314/psabi/blob/main/ppc64v1.pdf - -#include "third_party/mold/elf/mold.h" - -#include "third_party/libcxx/algorithm" -// MISSING #include - -namespace mold::elf { - -using E = PPC64V1; - -static u64 lo(u64 x) { return x & 0xffff; } -static u64 hi(u64 x) { return x >> 16; } -static u64 ha(u64 x) { return (x + 0x8000) >> 16; } -static u64 high(u64 x) { return (x >> 16) & 0xffff; } -static u64 higha(u64 x) { return ((x + 0x8000) >> 16) & 0xffff; } - -// .plt is used only for lazy symbol resolution on PPC64. All PLT -// calls are made via range extension thunks even if they are within -// reach. Thunks read addresses from .got.plt and jump there. -// Therefore, once PLT symbols are resolved and final addresses are -// written to .got.plt, thunks just skip .plt and directly jump to the -// resolved addresses. -template <> -void write_plt_header(Context &ctx, u8 *buf) { - static const ub32 insn[] = { - 0x7d88'02a6, // mflr r12 - 0x429f'0005, // bcl 20, 31, 4 // obtain PC - 0x7d68'02a6, // mflr r11 - 0xe84b'0024, // ld r2,36(r11) - 0x7d88'03a6, // mtlr r12 - 0x7d62'5a14, // add r11,r2,r11 - 0xe98b'0000, // ld r12,0(r11) - 0xe84b'0008, // ld r2,8(r11) - 0x7d89'03a6, // mtctr r12 - 0xe96b'0010, // ld r11,16(r11) - 0x4e80'0420, // bctr - // .quad .got.plt - .plt - 8 - 0x0000'0000, - 0x0000'0000, - }; - - static_assert(sizeof(insn) == E::plt_hdr_size); - memcpy(buf, insn, sizeof(insn)); - *(ub64 *)(buf + 44) = ctx.gotplt->shdr.sh_addr - ctx.plt->shdr.sh_addr - 8; -} - -template <> -void write_plt_entry(Context &ctx, u8 *buf, Symbol &sym) { - ub32 *loc = (ub32 *)buf; - i64 idx = sym.get_plt_idx(ctx); - - // The PPC64 ELFv1 ABI requires PLT entries to be vary in size depending - // on their indices. Unlike other targets, .got.plt is filled not by us - // but by the loader, so we don't have a control over where the initial - // call to the PLT entry jumps to. So we need to strictly follow the PLT - // section layout as the loader expect it to be. - if (idx < 0x8000) { - static const ub32 insn[] = { - 0x3800'0000, // li r0, PLT_INDEX - 0x4b00'0000, // b plt0 - }; - - memcpy(loc, insn, sizeof(insn)); - loc[0] |= idx; - loc[1] |= (ctx.plt->shdr.sh_addr - sym.get_plt_addr(ctx) - 4) & 0x00ff'ffff; - } else { - static const ub32 insn[] = { - 0x3c00'0000, // lis r0, PLT_INDEX@high - 0x6000'0000, // ori r0, r0, PLT_INDEX@lo - 0x4b00'0000, // b plt0 - }; - - memcpy(loc, insn, sizeof(insn)); - loc[0] |= high(idx); - loc[1] |= lo(idx); - loc[2] |= (ctx.plt->shdr.sh_addr - sym.get_plt_addr(ctx) - 8) & 0x00ff'ffff; - } -} - -// .plt.got is not necessary on PPC64 because range extension thunks -// directly read GOT entries and jump there. -template <> -void write_pltgot_entry(Context &ctx, u8 *buf, Symbol &sym) {} - -template <> -void EhFrameSection::apply_reloc(Context &ctx, const ElfRel &rel, - u64 offset, u64 val) { - u8 *loc = ctx.buf + this->shdr.sh_offset + offset; - - switch (rel.r_type) { - case R_NONE: - break; - case R_PPC64_ADDR64: - *(ub64 *)loc = val; - break; - case R_PPC64_REL32: - *(ub32 *)loc = val - this->shdr.sh_addr - offset; - break; - case R_PPC64_REL64: - *(ub64 *)loc = val - this->shdr.sh_addr - offset; - break; - default: - Fatal(ctx) << "unsupported relocation in .eh_frame: " << rel; - } -} - -template <> -void InputSection::apply_reloc_alloc(Context &ctx, u8 *base) { - std::span> rels = get_rels(ctx); - - ElfRel *dynrel = nullptr; - if (ctx.reldyn) - dynrel = (ElfRel *)(ctx.buf + ctx.reldyn->shdr.sh_offset + - file.reldyn_offset + this->reldyn_offset); - - for (i64 i = 0; i < rels.size(); i++) { - const ElfRel &rel = rels[i]; - if (rel.r_type == R_NONE) - continue; - - Symbol &sym = *file.symbols[rel.r_sym]; - u8 *loc = base + rel.r_offset; - - auto check = [&](i64 val, i64 lo, i64 hi) { - if (val < lo || hi <= val) - Error(ctx) << *this << ": relocation " << rel << " against " - << sym << " out of range: " << val << " is not in [" - << lo << ", " << hi << ")"; - }; - - u64 S = sym.get_addr(ctx); - u64 A = rel.r_addend; - u64 P = get_addr() + rel.r_offset; - u64 G = sym.get_got_idx(ctx) * sizeof(Word); - u64 GOT = ctx.got->shdr.sh_addr; - u64 TOC = ctx.extra.TOC->value; - - switch (rel.r_type) { - case R_PPC64_ADDR64: - apply_toc_rel(ctx, sym, rel, loc, S, A, P, dynrel); - break; - case R_PPC64_TOC: - apply_toc_rel(ctx, *ctx.extra.TOC, rel, loc, TOC, A, P, dynrel); - break; - case R_PPC64_TOC16_HA: - *(ub16 *)loc = ha(S + A - TOC); - break; - case R_PPC64_TOC16_LO: - *(ub16 *)loc = lo(S + A - TOC); - break; - case R_PPC64_TOC16_DS: - check(S + A - TOC, -(1 << 15), 1 << 15); - *(ub16 *)loc |= (S + A - TOC) & 0xfffc; - break; - case R_PPC64_TOC16_LO_DS: - *(ub16 *)loc |= (S + A - TOC) & 0xfffc; - break; - case R_PPC64_REL24: { - i64 val = sym.get_addr(ctx, NO_OPD) + A - P; - if (sym.has_plt(ctx) || sign_extend(val, 25) != val) - val = get_thunk_addr(i) + A - P; - - check(val, -(1 << 25), 1 << 25); - *(ub32 *)loc |= bits(val, 25, 2) << 2; - - // If a callee is an external function, PLT saves %r2 to the - // caller's r2 save slot. We need to restore it after function - // return. To do so, there's usually a NOP as a placeholder - // after a BL. 0x6000'0000 is a NOP. - if (sym.has_plt(ctx) && *(ub32 *)(loc + 4) == 0x6000'0000) - *(ub32 *)(loc + 4) = 0xe841'0028; // ld r2, 40(r1) - break; - } - case R_PPC64_REL32: - *(ub32 *)loc = S + A - P; - break; - case R_PPC64_REL64: - *(ub64 *)loc = S + A - P; - break; - case R_PPC64_REL16_HA: - *(ub16 *)loc = ha(S + A - P); - break; - case R_PPC64_REL16_LO: - *(ub16 *)loc = lo(S + A - P); - break; - case R_PPC64_PLT16_HA: - *(ub16 *)loc = ha(G + GOT - TOC); - break; - case R_PPC64_PLT16_HI: - *(ub16 *)loc = hi(G + GOT - TOC); - break; - case R_PPC64_PLT16_LO: - *(ub16 *)loc = lo(G + GOT - TOC); - break; - case R_PPC64_PLT16_LO_DS: - *(ub16 *)loc |= (G + GOT - TOC) & 0xfffc; - break; - case R_PPC64_GOT_TPREL16_HA: - *(ub16 *)loc = ha(sym.get_gottp_addr(ctx) - TOC); - break; - case R_PPC64_GOT_TLSGD16_HA: - *(ub16 *)loc = ha(sym.get_tlsgd_addr(ctx) - TOC); - break; - case R_PPC64_GOT_TLSGD16_LO: - *(ub16 *)loc = lo(sym.get_tlsgd_addr(ctx) - TOC); - break; - case R_PPC64_GOT_TLSLD16_HA: - *(ub16 *)loc = ha(ctx.got->get_tlsld_addr(ctx) - TOC); - break; - case R_PPC64_GOT_TLSLD16_LO: - *(ub16 *)loc = lo(ctx.got->get_tlsld_addr(ctx) - TOC); - break; - case R_PPC64_DTPREL16_HA: - *(ub16 *)loc = ha(S + A - ctx.dtp_addr); - break; - case R_PPC64_DTPREL16_LO: - *(ub16 *)loc = lo(S + A - ctx.dtp_addr); - break; - case R_PPC64_TPREL16_HA: - *(ub16 *)loc = ha(S + A - ctx.tp_addr); - break; - case R_PPC64_TPREL16_LO: - *(ub16 *)loc = lo(S + A - ctx.tp_addr); - break; - case R_PPC64_GOT_TPREL16_LO_DS: - *(ub16 *)loc |= (sym.get_gottp_addr(ctx) - TOC) & 0xfffc; - break; - case R_PPC64_PLTSEQ: - case R_PPC64_PLTCALL: - case R_PPC64_TLS: - case R_PPC64_TLSGD: - case R_PPC64_TLSLD: - break; - default: - unreachable(); - } - } -} - -template <> -void InputSection::apply_reloc_nonalloc(Context &ctx, u8 *base) { - std::span> rels = get_rels(ctx); - - for (i64 i = 0; i < rels.size(); i++) { - const ElfRel &rel = rels[i]; - if (rel.r_type == R_NONE || record_undef_error(ctx, rel)) - continue; - - Symbol &sym = *file.symbols[rel.r_sym]; - u8 *loc = base + rel.r_offset; - - auto check = [&](i64 val, i64 lo, i64 hi) { - if (val < lo || hi <= val) - Error(ctx) << *this << ": relocation " << rel << " against " - << sym << " out of range: " << val << " is not in [" - << lo << ", " << hi << ")"; - }; - - SectionFragment *frag; - i64 frag_addend; - std::tie(frag, frag_addend) = get_fragment(ctx, rel); - - u64 S = frag ? frag->get_addr(ctx) : sym.get_addr(ctx); - u64 A = frag ? frag_addend : (i64)rel.r_addend; - - switch (rel.r_type) { - case R_PPC64_ADDR64: - if (std::optional val = get_tombstone(sym, frag)) - *(ub64 *)loc = *val; - else - *(ub64 *)loc = S + A; - break; - case R_PPC64_ADDR32: { - i64 val = S + A; - check(val, 0, 1LL << 32); - *(ub32 *)loc = val; - break; - } - case R_PPC64_DTPREL64: - *(ub64 *)loc = S + A - ctx.dtp_addr; - break; - default: - Fatal(ctx) << *this << ": invalid relocation for non-allocated sections: " - << rel; - } - } -} - -template <> -void InputSection::scan_relocations(Context &ctx) { - assert(shdr().sh_flags & SHF_ALLOC); - - this->reldyn_offset = file.num_dynrel * sizeof(ElfRel); - std::span> rels = get_rels(ctx); - - // Scan relocations - for (i64 i = 0; i < rels.size(); i++) { - const ElfRel &rel = rels[i]; - if (rel.r_type == R_NONE || record_undef_error(ctx, rel)) - continue; - - Symbol &sym = *file.symbols[rel.r_sym]; - - if (sym.is_ifunc()) - sym.flags |= NEEDS_GOT | NEEDS_PLT | NEEDS_PPC_OPD; - - // Any relocation except R_PPC64_REL24 is considered as an - // address-taking relocation. - if (rel.r_type != R_PPC64_REL24 && sym.get_type() == STT_FUNC) - sym.flags |= NEEDS_PPC_OPD; - - switch (rel.r_type) { - case R_PPC64_ADDR64: - case R_PPC64_TOC: - scan_toc_rel(ctx, sym, rel); - break; - case R_PPC64_GOT_TPREL16_HA: - sym.flags |= NEEDS_GOTTP; - break; - case R_PPC64_REL24: - if (sym.is_imported) - sym.flags |= NEEDS_PLT; - break; - case R_PPC64_PLT16_HA: - sym.flags |= NEEDS_GOT; - break; - case R_PPC64_GOT_TLSGD16_HA: - sym.flags |= NEEDS_TLSGD; - break; - case R_PPC64_GOT_TLSLD16_HA: - ctx.needs_tlsld = true; - break; - case R_PPC64_TPREL16_HA: - case R_PPC64_TPREL16_LO: - check_tlsle(ctx, sym, rel); - break; - case R_PPC64_REL32: - case R_PPC64_REL64: - case R_PPC64_TOC16_HA: - case R_PPC64_TOC16_LO: - case R_PPC64_TOC16_LO_DS: - case R_PPC64_TOC16_DS: - case R_PPC64_REL16_HA: - case R_PPC64_REL16_LO: - case R_PPC64_PLT16_HI: - case R_PPC64_PLT16_LO: - case R_PPC64_PLT16_LO_DS: - case R_PPC64_PLTSEQ: - case R_PPC64_PLTCALL: - case R_PPC64_GOT_TPREL16_LO_DS: - case R_PPC64_GOT_TLSGD16_LO: - case R_PPC64_GOT_TLSLD16_LO: - case R_PPC64_TLS: - case R_PPC64_TLSGD: - case R_PPC64_TLSLD: - case R_PPC64_DTPREL16_HA: - case R_PPC64_DTPREL16_LO: - break; - default: - Error(ctx) << *this << ": unknown relocation: " << rel; - } - } -} - -template <> -void RangeExtensionThunk::copy_buf(Context &ctx) { - u8 *buf = ctx.buf + output_section.shdr.sh_offset + offset; - - // If the destination is .plt.got, we save the current r2, read an - // address of a function descriptor from .got, restore %r2 and jump - // to the function. - static const ub32 pltgot_thunk[] = { - // Store the caller's %r2 - 0xf841'0028, // std %r2, 40(%r1) - - // Load an address of a function descriptor - 0x3d82'0000, // addis %r12, %r2, foo@got@toc@ha - 0xe98c'0000, // ld %r12, foo@got@toc@lo(%r12) - - // Restore the callee's %r2 - 0xe84c'0008, // ld %r2, 8(%r12) - - // Jump to the function - 0xe98c'0000, // ld %r12, 0(%r12) - 0x7d89'03a6, // mtctr %r12 - 0x4e80'0420, // bctr - }; - - // If the destination is .plt, read a function descriptor from .got.plt. - static const ub32 plt_thunk[] = { - // Store the caller's %r2 - 0xf841'0028, // std %r2, 40(%r1) - - // Materialize an address of a function descriptor - 0x3d82'0000, // addis %r12, %r2, foo@gotplt@toc@ha - 0x398c'0000, // addi %r12, %r12, foo@gotplt@toc@lo - - // Restore the callee's %r2 - 0xe84c'0008, // ld %r2, 8(%r12) - - // Jump to the function - 0xe98c'0000, // ld %r12, 0(%r12) - 0x7d89'03a6, // mtctr %r12 - 0x4e80'0420, // bctr - }; - - // If the destination is a non-imported function, we directly jump - // to the function entry address. - static const ub32 local_thunk[] = { - 0x3d82'0000, // addis r12, r2, foo@toc@ha - 0x398c'0000, // addi r12, r12, foo@toc@lo - 0x7d89'03a6, // mtctr r12 - 0x4e80'0420, // bctr - 0x6000'0000, // nop - 0x6000'0000, // nop - 0x6000'0000, // nop - }; - - static_assert(E::thunk_size == sizeof(pltgot_thunk)); - static_assert(E::thunk_size == sizeof(plt_thunk)); - static_assert(E::thunk_size == sizeof(local_thunk)); - - for (i64 i = 0; i < symbols.size(); i++) { - Symbol &sym = *symbols[i]; - ub32 *loc = (ub32 *)(buf + i * E::thunk_size); - - if (sym.has_got(ctx)) { - memcpy(loc, pltgot_thunk, sizeof(pltgot_thunk)); - i64 val = sym.get_got_addr(ctx) - ctx.extra.TOC->value; - loc[1] |= higha(val); - loc[2] |= lo(val); - } else if(sym.has_plt(ctx)) { - memcpy(loc, plt_thunk, sizeof(plt_thunk)); - i64 val = sym.get_gotplt_addr(ctx) - ctx.extra.TOC->value; - loc[1] |= higha(val); - loc[2] |= lo(val); - } else { - memcpy(loc, local_thunk, sizeof(local_thunk)); - i64 val = sym.get_addr(ctx, NO_OPD) - ctx.extra.TOC->value; - loc[0] |= higha(val); - loc[1] |= lo(val); - } - } -} - -static InputSection *get_opd_section(ObjectFile &file) { - for (std::unique_ptr> &isec : file.sections) - if (isec && isec->name() == ".opd") - return isec.get(); - return nullptr; -} - -static ElfRel * -get_relocation_at(Context &ctx, InputSection &isec, i64 offset) { - std::span> rels = isec.get_rels(ctx); - - auto it = std::lower_bound(rels.begin(), rels.end(), offset, - [](const ElfRel &r, i64 offset) { - return r.r_offset < offset; - }); - - if (it == rels.end()) - return nullptr; - if (it->r_offset != offset) - return nullptr; - return &*it; -} - -struct OpdSymbol { - bool operator<(const OpdSymbol &x) const { return r_offset < x.r_offset; } - - u64 r_offset = 0; - Symbol *sym = nullptr; -}; - -static Symbol * -get_opd_sym_at(Context &ctx, std::span syms, u64 offset) { - auto it = std::lower_bound(syms.begin(), syms.end(), OpdSymbol{offset}); - if (it == syms.end()) - return nullptr; - if (it->r_offset != offset) - return nullptr; - return it->sym; -} - -// Compiler creates an .opd entry for each function symbol. The intention -// is to make it possible to create an output .opd section just by linking -// input .opd sections in the same manner as we do to other normal input -// sections. -// -// However, in reality, .opd isn't a normal input section. It needs many -// special treatments as follows: -// -// 1. A function symbol refers to not a .text but an .opd. Its address -// works fine for address-taking relocations such as R_PPC64_ADDR64. -// However, R_PPC64_REL24 (which is used for branch instruction) needs -// a function's real address instead of the function's .opd address. -// We need to read .opd contents to find out a function entry point -// address to apply R_PPC64_REL24. -// -// 2. Output .opd entries are needed only for functions whose addresses -// are taken. Just copying input .opd sections to an output would -// produces lots of dead .opd entries. -// -// 3. In this design, all function symbols refer to an .opd section, and -// that doesn't work well with graph traversal optimizations such as -// garbage collection or identical comdat folding. For example, garbage -// collector would mark an .opd alive which in turn mark all functions -// thatare referenced by .opd as alive, effectively keeping all -// functions as alive. -// -// The problem is that the compiler creates a half-baked .opd section, and -// the linker has to figure out what all these .opd entries and -// relocations are trying to achieve. It's like the compiler would emit a -// half-baked .plt section in an object file and the linker has to deal -// with that. That's not a good design. -// -// So, in this function, we undo what the compiler did to .opd. We remove -// function symbols from .opd and reattach them to their function entry -// points. We also rewrite relocations that directly refer to an input -// .opd section so that they refer to function symbols instead. We then -// mark input .opd sections as dead. -// -// After this function, we mark symbols with the NEEDS_PPC_OPD flag if the -// symbol needs an .opd entry. We then create an output .opd just like we -// do for .plt or .got. -void ppc64v1_rewrite_opd(Context &ctx) { - tbb::parallel_for_each(ctx.objs, [&](ObjectFile *file) { - InputSection *opd = get_opd_section(*file); - if (!opd) - return; - opd->is_alive = false; - - // Move symbols from .opd to .text. - std::vector opd_syms; - - for (Symbol *sym : file->symbols) { - if (sym->file != file || sym->get_input_section() != opd) - continue; - - if (u32 ty = sym->get_type(); ty != STT_FUNC && ty != STT_GNU_IFUNC) - continue; - - ElfRel *rel = get_relocation_at(ctx, *opd, sym->value); - if (!rel) - Fatal(ctx) << *file << ": cannot find a relocation in .opd for " - << *sym << " at offset 0x" << std::hex << (u64)sym->value; - - Symbol *sym2 = file->symbols[rel->r_sym]; - if (sym2->get_type() != STT_SECTION) - Fatal(ctx) << *file << ": bad relocation in .opd referring " << *sym2; - - opd_syms.push_back({sym->value, sym}); - - sym->set_input_section(sym2->get_input_section()); - sym->value = rel->r_addend; - } - - // Sort symbols so that get_opd_sym_at() can do binary search. - sort(opd_syms); - - // Rewrite relocations so that they directly refer to .opd. - for (std::unique_ptr> &isec : file->sections) { - if (!isec || !isec->is_alive || isec.get() == opd) - continue; - - for (ElfRel &r : isec->get_rels(ctx)) { - Symbol &sym = *file->symbols[r.r_sym]; - if (sym.get_input_section() != opd) - continue; - - Symbol *real_sym = get_opd_sym_at(ctx, opd_syms, r.r_addend); - if (!real_sym) - Fatal(ctx) << *isec << ": cannot find a symbol in .opd for " << r - << " at offset 0x" << std::hex << (u64)r.r_addend; - - r.r_sym = real_sym->sym_idx; - r.r_addend = 0; - } - } - }); -} - -// When a function is exported, the dynamic symbol for the function should -// refers to the function's .opd entry. This function marks such symbols -// with NEEDS_PPC_OPD. -void ppc64v1_scan_symbols(Context &ctx) { - tbb::parallel_for_each(ctx.objs, [&](ObjectFile *file) { - for (Symbol *sym : file->symbols) - if (sym->file == file && sym->is_exported) - if (u32 ty = sym->get_type(); ty == STT_FUNC || ty == STT_GNU_IFUNC) - sym->flags |= NEEDS_PPC_OPD; - }); - - // Functions referenced by the ELF header also have to have .opd entries. - auto mark = [&](std::string_view name) { - if (!name.empty()) - if (Symbol &sym = *get_symbol(ctx, name); !sym.is_imported) - sym.flags |= NEEDS_PPC_OPD; - }; - - mark(ctx.arg.entry); - mark(ctx.arg.init); - mark(ctx.arg.fini); -} - -void PPC64OpdSection::add_symbol(Context &ctx, Symbol *sym) { - sym->set_opd_idx(ctx, symbols.size()); - symbols.push_back(sym); - this->shdr.sh_size += ENTRY_SIZE; -} - -i64 PPC64OpdSection::get_reldyn_size(Context &ctx) const { - if (ctx.arg.pic) - return symbols.size() * 2; - return 0; -} - -void PPC64OpdSection::copy_buf(Context &ctx) { - ub64 *buf = (ub64 *)(ctx.buf + this->shdr.sh_offset); - - ElfRel *rel = nullptr; - if (ctx.arg.pic) - rel = (ElfRel *)(ctx.buf + ctx.reldyn->shdr.sh_offset + reldyn_offset); - - for (Symbol *sym : symbols) { - u64 addr = sym->get_addr(ctx, NO_PLT | NO_OPD); - *buf++ = addr; - *buf++ = ctx.extra.TOC->value; - *buf++ = 0; - - if (ctx.arg.pic) { - u64 loc = sym->get_opd_addr(ctx); - *rel++ = ElfRel(loc, E::R_RELATIVE, 0, addr); - *rel++ = ElfRel(loc + 8, E::R_RELATIVE, 0, ctx.extra.TOC->value); - } - } -} - -} // namespace mold::elf diff --git a/third_party/mold/elf/arch-ppc64v2.cc b/third_party/mold/elf/arch-ppc64v2.cc deleted file mode 100644 index 3582bb57f58..00000000000 --- a/third_party/mold/elf/arch-ppc64v2.cc +++ /dev/null @@ -1,555 +0,0 @@ -// clang-format off -// This file implements the PowerPC ELFv2 ABI which was standardized in -// 2014. Modern little-endian PowerPC systems are based on this ABI. -// The ABI is often referred to as "ppc64le". This shouldn't be confused -// with "ppc64" which refers to the original, big-endian PowerPC systems. -// -// PPC64 is a bit tricky to support because PC-relative load/store -// instructions hadn't been available until Power10 which debuted in 2021. -// Prior to Power10, it wasn't trivial for position-independent code (PIC) -// to load a value from, for example, .got, as we can't do that with [PC + -// the offset to the .got entry]. -// -// In the following, I'll explain how PIC is supported on pre-Power10 -// systems first and then explain what has changed with Power10. -// -// -// Position-independent code on Power9 or earlier: -// -// We can get the program counter on older PPC64 systems with the -// following four instructions -// -// mflr r1 // save the current link register to r1 -// bl .+4 // branch to the next instruction as if it were a function -// mflr r0 // copy the return address to r0 -// mtlr r1 // restore the original link register value -// -// , but it's too expensive to do if we do this for each load/store. -// -// As a workaround, most functions are compiled in such a way that r2 is -// assumed to always contain the address of .got + 0x8000. With this, we -// can for example load the first entry of .got with a single instruction -// `lw r0, -0x8000(r2)`. r2 is called the TOC pointer. -// -// There's only one .got for each ELF module. Therefore, if a callee is in -// the same ELF module, r2 doesn't have to be recomputed. Most function -// calls are usually within the same ELF module, so this mechanism is -// efficient. -// -// A function compiled for pre-Power10 usually has two entry points, -// global and local. The global entry point usually 8 bytes precedes -// the local entry point. In between is the following instructions: -// -// addis r2, r12, .TOC.@ha -// addi r2, r2, .TOC.@lo + 4; -// -// The global entry point assumes that the address of itself is in r12, -// and it computes its own TOC pointer from r12. It's easy to do so for -// the callee because the offset between its .got + 0x8000 and the -// function is known at link-time. The above code sequence then falls -// through to the local entry point that assumes r2 is .got + 0x8000. -// -// So, if a callee's TOC pointer is different from the current one -// (e.g. calling a function in another .so), we first load the callee's -// address to r12 (e.g. from .got.plt with a r2-relative load) and branch -// to that address. Then the callee computes its own TOC pointer using -// r12. -// -// -// Position-independent code on Power10: -// -// Power10 added 8-bytes-long instructions to the ISA. Some of them are -// PC-relative load/store instructions that take 34 bits offsets. -// Functions compiled with `-mcpu=power10` use these instructions for PIC. -// r2 does not have a special meaning in such fucntions. -// -// When a fucntion compiled for Power10 calls a function that uses the TOC -// pointer, we need to compute a correct value for TOC and set it to r2 -// before transferring the control to the callee. Thunks are responsible -// for doing it. -// -// `_NOTOC` relocations such as `R_PPC64_REL24_NOTOC` indicate that the -// callee does not use TOC (i.e. compiled with `-mcpu=power10`). If a -// function using TOC is referenced via a `_NOTOC` relocation, that call -// is made through a range extension thunk. -// -// -// Note on section names: the PPC64 psABI uses a weird naming convention -// which calls .got.plt .plt. We ignored that part because it's just -// confusing. Since the runtime only cares about segments, we should be -// able to name sections whatever we want. -// -// https://github.com/rui314/psabi/blob/main/ppc64v2.pdf - -#include "third_party/mold/elf/mold.h" - -namespace mold::elf { - -using E = PPC64V2; - -static u64 lo(u64 x) { return x & 0xffff; } -static u64 hi(u64 x) { return x >> 16; } -static u64 ha(u64 x) { return (x + 0x8000) >> 16; } -static u64 high(u64 x) { return (x >> 16) & 0xffff; } -static u64 higha(u64 x) { return ((x + 0x8000) >> 16) & 0xffff; } - -static u64 prefix34(u64 x) { - return bits(x, 33, 16) | (bits(x, 15, 0) << 32); -} - -// .plt is used only for lazy symbol resolution on PPC64. All PLT -// calls are made via range extension thunks even if they are within -// reach. Thunks read addresses from .got.plt and jump there. -// Therefore, once PLT symbols are resolved and final addresses are -// written to .got.plt, thunks just skip .plt and directly jump to the -// resolved addresses. -template <> -void write_plt_header(Context &ctx, u8 *buf) { - static const ul32 insn[] = { - // Get PC - 0x7c08'02a6, // mflr r0 - 0x429f'0005, // bcl 20, 31, 4 // obtain PC - 0x7d68'02a6, // mflr r11 - 0x7c08'03a6, // mtlr r0 - - // Compute the PLT entry index - 0xe80b'002c, // ld r0, 44(r11) - 0x7d8b'6050, // subf r12, r11, r12 - 0x7d60'5a14, // add r11, r0, r11 - 0x380c'ffcc, // addi r0, r12, -52 - 0x7800'f082, // rldicl r0, r0, 62, 2 - - // Load .got.plt[0] and .got.plt[1] and branch to .got.plt[0] - 0xe98b'0000, // ld r12, 0(r11) - 0x7d89'03a6, // mtctr r12 - 0xe96b'0008, // ld r11, 8(r11) - 0x4e80'0420, // bctr - - // .quad .got.plt - .plt - 8 - 0x0000'0000, - 0x0000'0000, - }; - - memcpy(buf, insn, sizeof(insn)); - *(ul64 *)(buf + 52) = ctx.gotplt->shdr.sh_addr - ctx.plt->shdr.sh_addr - 8; -} - -template <> -void write_plt_entry(Context &ctx, u8 *buf, Symbol &sym) { - // When the control is transferred to a PLT entry, the PLT entry's - // address is already set to %r12 by the caller. - i64 offset = ctx.plt->shdr.sh_addr - sym.get_plt_addr(ctx); - *(ul32 *)buf = 0x4b00'0000 | (offset & 0x00ff'ffff); // b plt0 -} - -// .plt.got is not necessary on PPC64 because range extension thunks -// directly read GOT entries and jump there. -template <> -void write_pltgot_entry(Context &ctx, u8 *buf, Symbol &sym) {} - -template <> -void EhFrameSection::apply_reloc(Context &ctx, const ElfRel &rel, - u64 offset, u64 val) { - u8 *loc = ctx.buf + this->shdr.sh_offset + offset; - - switch (rel.r_type) { - case R_NONE: - break; - case R_PPC64_ADDR64: - *(ul64 *)loc = val; - break; - case R_PPC64_REL32: - *(ul32 *)loc = val - this->shdr.sh_addr - offset; - break; - case R_PPC64_REL64: - *(ul64 *)loc = val - this->shdr.sh_addr - offset; - break; - default: - Fatal(ctx) << "unsupported relocation in .eh_frame: " << rel; - } -} - -static u64 get_local_entry_offset(Context &ctx, Symbol &sym) { - i64 val = sym.esym().ppc_local_entry; - assert(val <= 7); - if (val == 7) - Fatal(ctx) << sym << ": local entry offset 7 is reserved"; - - if (val == 0 || val == 1) - return 0; - return 1 << val; -} - -template <> -void InputSection::apply_reloc_alloc(Context &ctx, u8 *base) { - std::span> rels = get_rels(ctx); - - ElfRel *dynrel = nullptr; - if (ctx.reldyn) - dynrel = (ElfRel *)(ctx.buf + ctx.reldyn->shdr.sh_offset + - file.reldyn_offset + this->reldyn_offset); - - for (i64 i = 0; i < rels.size(); i++) { - const ElfRel &rel = rels[i]; - if (rel.r_type == R_NONE) - continue; - - Symbol &sym = *file.symbols[rel.r_sym]; - u8 *loc = base + rel.r_offset; - - u64 S = sym.get_addr(ctx); - u64 A = rel.r_addend; - u64 P = get_addr() + rel.r_offset; - u64 G = sym.get_got_idx(ctx) * sizeof(Word); - u64 GOT = ctx.got->shdr.sh_addr; - u64 TOC = ctx.extra.TOC->value; - - auto r2save_thunk_addr = [&] { return get_thunk_addr(i); }; - auto no_r2save_thunk_addr = [&] { return get_thunk_addr(i) + 4; }; - - switch (rel.r_type) { - case R_PPC64_ADDR64: - if (name() == ".toc") - apply_toc_rel(ctx, sym, rel, loc, S, A, P, dynrel); - else - apply_dyn_absrel(ctx, sym, rel, loc, S, A, P, dynrel); - break; - case R_PPC64_TOC16_HA: - *(ul16 *)loc = ha(S + A - TOC); - break; - case R_PPC64_TOC16_LO: - *(ul16 *)loc = lo(S + A - TOC); - break; - case R_PPC64_TOC16_DS: - case R_PPC64_TOC16_LO_DS: - *(ul16 *)loc |= (S + A - TOC) & 0xfffc; - break; - case R_PPC64_REL24: - if (sym.has_plt(ctx) || !sym.esym().preserves_r2()) { - i64 val = r2save_thunk_addr() + A - P; - *(ul32 *)loc |= bits(val, 25, 2) << 2; - - // The thunk saves %r2 to the caller's r2 save slot. We need to - // restore it after function return. To do so, there's usually a - // NOP as a placeholder after a BL. 0x6000'0000 is a NOP. - if (*(ul32 *)(loc + 4) == 0x6000'0000) - *(ul32 *)(loc + 4) = 0xe841'0018; // ld r2, 24(r1) - } else { - i64 val = S + get_local_entry_offset(ctx, sym) + A - P; - if (sign_extend(val, 25) != val) - val = no_r2save_thunk_addr() + A - P; - *(ul32 *)loc |= bits(val, 25, 2) << 2; - } - break; - case R_PPC64_REL24_NOTOC: - if (sym.has_plt(ctx) || sym.esym().uses_toc()) { - i64 val = no_r2save_thunk_addr() + A - P; - *(ul32 *)loc |= bits(val, 25, 2) << 2; - } else { - i64 val = S + A - P; - if (sign_extend(val, 25) != val) - val = no_r2save_thunk_addr() + A - P; - *(ul32 *)loc |= bits(val, 25, 2) << 2; - } - break; - case R_PPC64_REL32: - *(ul32 *)loc = S + A - P; - break; - case R_PPC64_REL64: - *(ul64 *)loc = S + A - P; - break; - case R_PPC64_REL16_HA: - *(ul16 *)loc = ha(S + A - P); - break; - case R_PPC64_REL16_LO: - *(ul16 *)loc = lo(S + A - P); - break; - case R_PPC64_PLT16_HA: - *(ul16 *)loc = ha(G + GOT - TOC); - break; - case R_PPC64_PLT16_HI: - *(ul16 *)loc = hi(G + GOT - TOC); - break; - case R_PPC64_PLT16_LO: - *(ul16 *)loc = lo(G + GOT - TOC); - break; - case R_PPC64_PLT16_LO_DS: - *(ul16 *)loc |= (G + GOT - TOC) & 0xfffc; - break; - case R_PPC64_PLT_PCREL34: - case R_PPC64_PLT_PCREL34_NOTOC: - case R_PPC64_GOT_PCREL34: - *(ul64 *)loc |= prefix34(G + GOT - P); - break; - case R_PPC64_PCREL34: - *(ul64 *)loc |= prefix34(S + A - P); - break; - case R_PPC64_GOT_TPREL16_HA: - *(ul16 *)loc = ha(sym.get_gottp_addr(ctx) - TOC); - break; - case R_PPC64_GOT_TPREL16_LO_DS: - *(ul16 *)loc |= (sym.get_gottp_addr(ctx) - TOC) & 0xfffc; - break; - case R_PPC64_GOT_TPREL_PCREL34: - *(ul64 *)loc |= prefix34(sym.get_gottp_addr(ctx) - P); - break; - case R_PPC64_GOT_TLSGD16_HA: - *(ul16 *)loc = ha(sym.get_tlsgd_addr(ctx) - TOC); - break; - case R_PPC64_GOT_TLSGD16_LO: - *(ul16 *)loc = lo(sym.get_tlsgd_addr(ctx) - TOC); - break; - case R_PPC64_GOT_TLSGD_PCREL34: - *(ul64 *)loc |= prefix34(sym.get_tlsgd_addr(ctx) - P); - break; - case R_PPC64_GOT_TLSLD16_HA: - *(ul16 *)loc = ha(ctx.got->get_tlsld_addr(ctx) - TOC); - break; - case R_PPC64_GOT_TLSLD16_LO: - *(ul16 *)loc = lo(ctx.got->get_tlsld_addr(ctx) - TOC); - break; - case R_PPC64_GOT_TLSLD_PCREL34: - *(ul64 *)loc |= prefix34(ctx.got->get_tlsld_addr(ctx) - P); - break; - case R_PPC64_DTPREL16_HA: - *(ul16 *)loc = ha(S + A - ctx.dtp_addr); - break; - case R_PPC64_DTPREL16_LO: - *(ul16 *)loc = lo(S + A - ctx.dtp_addr); - break; - case R_PPC64_DTPREL34: - *(ul64 *)loc |= prefix34(S + A - ctx.dtp_addr); - break; - case R_PPC64_TPREL16_HA: - *(ul16 *)loc = ha(S + A - ctx.tp_addr); - break; - case R_PPC64_TPREL16_LO: - *(ul16 *)loc = lo(S + A - ctx.tp_addr); - break; - case R_PPC64_PLTSEQ: - case R_PPC64_PLTSEQ_NOTOC: - case R_PPC64_PLTCALL: - case R_PPC64_PLTCALL_NOTOC: - case R_PPC64_TLS: - case R_PPC64_TLSGD: - case R_PPC64_TLSLD: - break; - default: - unreachable(); - } - } -} - -template <> -void InputSection::apply_reloc_nonalloc(Context &ctx, u8 *base) { - std::span> rels = get_rels(ctx); - - for (i64 i = 0; i < rels.size(); i++) { - const ElfRel &rel = rels[i]; - if (rel.r_type == R_NONE || record_undef_error(ctx, rel)) - continue; - - Symbol &sym = *file.symbols[rel.r_sym]; - u8 *loc = base + rel.r_offset; - - auto check = [&](i64 val, i64 lo, i64 hi) { - if (val < lo || hi <= val) - Error(ctx) << *this << ": relocation " << rel << " against " - << sym << " out of range: " << val << " is not in [" - << lo << ", " << hi << ")"; - }; - - SectionFragment *frag; - i64 frag_addend; - std::tie(frag, frag_addend) = get_fragment(ctx, rel); - - u64 S = frag ? frag->get_addr(ctx) : sym.get_addr(ctx); - u64 A = frag ? frag_addend : (i64)rel.r_addend; - - switch (rel.r_type) { - case R_PPC64_ADDR64: - if (std::optional val = get_tombstone(sym, frag)) - *(ul64 *)loc = *val; - else - *(ul64 *)loc = S + A; - break; - case R_PPC64_ADDR32: { - i64 val = S + A; - check(val, 0, 1LL << 32); - *(ul32 *)loc = val; - break; - } - case R_PPC64_DTPREL64: - *(ul64 *)loc = S + A - ctx.dtp_addr; - break; - default: - Fatal(ctx) << *this << ": invalid relocation for non-allocated sections: " - << rel; - } - } -} - -template <> -void InputSection::scan_relocations(Context &ctx) { - assert(shdr().sh_flags & SHF_ALLOC); - - this->reldyn_offset = file.num_dynrel * sizeof(ElfRel); - std::span> rels = get_rels(ctx); - - // Scan relocations - for (i64 i = 0; i < rels.size(); i++) { - const ElfRel &rel = rels[i]; - if (rel.r_type == R_NONE || record_undef_error(ctx, rel)) - continue; - - Symbol &sym = *file.symbols[rel.r_sym]; - - if (sym.is_ifunc()) - sym.flags |= NEEDS_GOT | NEEDS_PLT; - - switch (rel.r_type) { - case R_PPC64_ADDR64: - if (name() == ".toc") - scan_toc_rel(ctx, sym, rel); - else - scan_dyn_absrel(ctx, sym, rel); - break; - case R_PPC64_GOT_TPREL16_HA: - case R_PPC64_GOT_TPREL_PCREL34: - sym.flags |= NEEDS_GOTTP; - break; - case R_PPC64_REL24: - if (sym.is_imported) - sym.flags |= NEEDS_PLT; - break; - case R_PPC64_REL24_NOTOC: - if (sym.is_imported) - sym.flags |= NEEDS_PLT; - ctx.extra.is_power10 = true; - break; - case R_PPC64_PLT16_HA: - case R_PPC64_PLT_PCREL34: - case R_PPC64_PLT_PCREL34_NOTOC: - case R_PPC64_GOT_PCREL34: - sym.flags |= NEEDS_GOT; - break; - case R_PPC64_GOT_TLSGD16_HA: - case R_PPC64_GOT_TLSGD_PCREL34: - sym.flags |= NEEDS_TLSGD; - break; - case R_PPC64_GOT_TLSLD16_HA: - case R_PPC64_GOT_TLSLD_PCREL34: - ctx.needs_tlsld = true; - break; - case R_PPC64_TPREL16_HA: - case R_PPC64_TPREL16_LO: - check_tlsle(ctx, sym, rel); - break; - case R_PPC64_REL32: - case R_PPC64_REL64: - case R_PPC64_TOC16_HA: - case R_PPC64_TOC16_LO: - case R_PPC64_TOC16_LO_DS: - case R_PPC64_TOC16_DS: - case R_PPC64_REL16_HA: - case R_PPC64_REL16_LO: - case R_PPC64_PLT16_HI: - case R_PPC64_PLT16_LO: - case R_PPC64_PLT16_LO_DS: - case R_PPC64_PCREL34: - case R_PPC64_PLTSEQ: - case R_PPC64_PLTSEQ_NOTOC: - case R_PPC64_PLTCALL: - case R_PPC64_PLTCALL_NOTOC: - case R_PPC64_GOT_TPREL16_LO_DS: - case R_PPC64_GOT_TLSGD16_LO: - case R_PPC64_GOT_TLSLD16_LO: - case R_PPC64_TLS: - case R_PPC64_TLSGD: - case R_PPC64_TLSLD: - case R_PPC64_DTPREL16_HA: - case R_PPC64_DTPREL16_LO: - case R_PPC64_DTPREL34: - break; - default: - Error(ctx) << *this << ": unknown relocation: " << rel; - } - } -} - -template <> -void RangeExtensionThunk::copy_buf(Context &ctx) { - u8 *buf = ctx.buf + output_section.shdr.sh_offset + offset; - - // If the destination is PLT, we read an address from .got.plt or .got - // and jump there. - static const ul32 plt_thunk[] = { - 0xf841'0018, // std r2, 24(r1) - 0x3d82'0000, // addis r12, r2, foo@gotplt@toc@ha - 0xe98c'0000, // ld r12, foo@gotplt@toc@lo(r12) - 0x7d89'03a6, // mtctr r12 - 0x4e80'0420, // bctr - }; - - static const ul32 plt_thunk_power10[] = { - 0xf841'0018, // std r2, 24(r1) - 0x0410'0000, // pld r12, foo@gotplt@pcrel - 0xe580'0000, - 0x7d89'03a6, // mtctr r12 - 0x4e80'0420, // bctr - }; - - // If the destination is a non-imported function, we directly jump - // to its local entry point. - static const ul32 local_thunk[] = { - 0xf841'0018, // std r2, 24(r1) - 0x3d82'0000, // addis r12, r2, foo@toc@ha - 0x398c'0000, // addi r12, r12, foo@toc@lo - 0x7d89'03a6, // mtctr r12 - 0x4e80'0420, // bctr - }; - - static const ul32 local_thunk_power10[] = { - 0xf841'0018, // std r2, 24(r1) - 0x0610'0000, // pla r12, foo@pcrel - 0x3980'0000, - 0x7d89'03a6, // mtctr r12 - 0x4e80'0420, // bctr - }; - - static_assert(E::thunk_size == sizeof(plt_thunk)); - static_assert(E::thunk_size == sizeof(plt_thunk_power10)); - static_assert(E::thunk_size == sizeof(local_thunk)); - static_assert(E::thunk_size == sizeof(local_thunk_power10)); - - for (i64 i = 0; i < symbols.size(); i++) { - Symbol &sym = *symbols[i]; - ul32 *loc = (ul32 *)(buf + i * E::thunk_size); - - if (sym.has_plt(ctx)) { - u64 got = sym.has_got(ctx) ? sym.get_got_addr(ctx) : sym.get_gotplt_addr(ctx); - - if (ctx.extra.is_power10) { - memcpy(loc, plt_thunk_power10, E::thunk_size); - *(ul64 *)(loc + 1) |= prefix34(got - get_addr(i) - 4); - } else { - i64 val = got - ctx.extra.TOC->value; - memcpy(loc, plt_thunk, E::thunk_size); - loc[1] |= higha(val); - loc[2] |= lo(val); - } - } else { - if (ctx.extra.is_power10) { - memcpy(loc, local_thunk_power10, E::thunk_size); - *(ul64 *)(loc + 1) |= prefix34(sym.get_addr(ctx) - get_addr(i) - 4); - } else { - i64 val = sym.get_addr(ctx) - ctx.extra.TOC->value; - memcpy(loc, local_thunk, E::thunk_size); - loc[1] |= higha(val); - loc[2] |= lo(val); - } - } - } -} - -} // namespace mold::elf diff --git a/third_party/mold/elf/arch-riscv.cc b/third_party/mold/elf/arch-riscv.cc deleted file mode 100644 index ddef6419b61..00000000000 --- a/third_party/mold/elf/arch-riscv.cc +++ /dev/null @@ -1,938 +0,0 @@ -// clang-format off -// RISC-V is a clean RISC ISA. It supports PC-relative load/store for -// position-independent code. Its 32-bit and 64-bit ISAs are almost -// identical. That is, you can think RV32 as a RV64 without 64-bit -// operations. In this file, we support both RV64 and RV32. -// -// RISC-V is essentially little-endian, but the big-endian version is -// available as an extension. GCC supports `-mbig-endian` to generate -// big-endian code. Even in big-endian mode, machine instructions are -// defined to be encoded in little-endian, though. Only the behavior of -// load/store instructions are different between LE RISC-V and BE RISC-V. -// -// From the linker's point of view, the RISC-V's psABI is unique because -// sections in input object files can be shrunk while being copied to the -// output file. That is contrary to other psABIs in which sections are an -// atomic unit of copying. Let me explain it in more details. -// -// Since RISC-V instructions are 16-bit or 32-bit long, there's no way to -// embed a very large immediate into a branch instruction. In fact, JAL -// (jump and link) instruction can jump to only within PC ± 1 MiB because -// its immediate is only 21 bits long. If the destination is out of its -// reach, we need to use two instructions instead; the first instruction -// being AUIPC which sets upper 20 bits to a register and the second being -// JALR with a 12-bit immediate and the register. Combined, they specify a -// 32 bits displacement. -// -// Other RISC ISAs have the same limitation, and they solved the problem by -// letting the linker create so-called "range extension thunks". It works as -// follows: the compiler optimistically emits single jump instructions for -// function calls. If the linker finds that a branch target is out of reach, -// it emits a small piece of machine code near the branch instruction and -// redirect the branch to the linker-synthesized code. The code constructs a -// full 32-bit address in a register and jump to the destination. That -// linker-synthesized code is called "range extension thunks" or just -// "thunks". -// -// The RISC-V psABI is unique that it works the other way around. That is, -// for RISC-V, the compiler always emits two instructions (AUIPC + JAL) for -// function calls. If the linker finds the destination is reachable with a -// single instruction, it replaces the two instructions with the one and -// shrink the section size by one instruction length, instead of filling the -// gap with a nop. -// -// With the presence of this relaxation, sections can no longer be -// considered as an atomic unit. If we delete 4 bytes from the middle of a -// section, all contents after that point needs to be shifted by 4. Symbol -// values and relocation offsets have to be adjusted accordingly if they -// refer to past the deleted bytes. -// -// In mold, we use `r_deltas` to memorize how many bytes have be adjusted -// for relocations. For symbols, we directly mutate their `value` member. -// -// RISC-V object files tend to have way more relocations than those for -// other targets. This is because all branches, including ones that jump -// within the same section, are explicitly expressed with relocations. -// Here is why we need them: all control-flow statements such as `if` or -// `for` are implemented using branch instructions. For other targets, the -// compiler doesn't emit relocations for such branches because they know -// at compile-time exactly how many bytes has to be skipped. That's not -// true to RISC-V because the linker may delete bytes between a branch and -// its destination. Therefore, all branches including in-section ones have -// to be explicitly expressed with relocations. -// -// Note that this mechanism only shrink sections and never enlarge, as -// the compiler always emits the longest instruction sequence. This -// makes the linker implementation a bit simpler because we don't need -// to worry about oscillation. -// -// https://github.com/riscv-non-isa/riscv-elf-psabi-doc/blob/master/riscv-elf.adoc - -#include "third_party/mold/elf/mold.h" - -// MISSING #include -// MISSING #include - -namespace mold::elf { - -static void write_itype(u8 *loc, u32 val) { - *(ul32 *)loc &= 0b000000'00000'11111'111'11111'1111111; - *(ul32 *)loc |= bits(val, 11, 0) << 20; -} - -static void write_stype(u8 *loc, u32 val) { - *(ul32 *)loc &= 0b000000'11111'11111'111'00000'1111111; - *(ul32 *)loc |= bits(val, 11, 5) << 25 | bits(val, 4, 0) << 7; -} - -static void write_btype(u8 *loc, u32 val) { - *(ul32 *)loc &= 0b000000'11111'11111'111'00000'1111111; - *(ul32 *)loc |= bit(val, 12) << 31 | bits(val, 10, 5) << 25 | - bits(val, 4, 1) << 8 | bit(val, 11) << 7; -} - -static void write_utype(u8 *loc, u32 val) { - *(ul32 *)loc &= 0b000000'00000'00000'000'11111'1111111; - - // U-type instructions are used in combination with I-type - // instructions. U-type insn sets an immediate to the upper 20-bits - // of a register. I-type insn sign-extends a 12-bits immediate and - // adds it to a register value to construct a complete value. 0x800 - // is added here to compensate for the sign-extension. - *(ul32 *)loc |= (val + 0x800) & 0xffff'f000; -} - -static void write_jtype(u8 *loc, u32 val) { - *(ul32 *)loc &= 0b000000'00000'00000'000'11111'1111111; - *(ul32 *)loc |= bit(val, 20) << 31 | bits(val, 10, 1) << 21 | - bit(val, 11) << 20 | bits(val, 19, 12) << 12; -} - -static void write_cbtype(u8 *loc, u32 val) { - *(ul16 *)loc &= 0b111'000'111'00000'11; - *(ul16 *)loc |= bit(val, 8) << 12 | bit(val, 4) << 11 | bit(val, 3) << 10 | - bit(val, 7) << 6 | bit(val, 6) << 5 | bit(val, 2) << 4 | - bit(val, 1) << 3 | bit(val, 5) << 2; -} - -static void write_cjtype(u8 *loc, u32 val) { - *(ul16 *)loc &= 0b111'00000000000'11; - *(ul16 *)loc |= bit(val, 11) << 12 | bit(val, 4) << 11 | bit(val, 9) << 10 | - bit(val, 8) << 9 | bit(val, 10) << 8 | bit(val, 6) << 7 | - bit(val, 7) << 6 | bit(val, 3) << 5 | bit(val, 2) << 4 | - bit(val, 1) << 3 | bit(val, 5) << 2; -} - -static void overwrite_uleb(u8 *loc, u64 val) { - while (*loc & 0b1000'0000) { - *loc++ = 0b1000'0000 | (val & 0b0111'1111); - val >>= 7; - } -} - -// Returns the rd register of an R/I/U/J-type instruction. -static u32 get_rd(u32 val) { - return bits(val, 11, 7); -} - -static void set_rs1(u8 *loc, u32 rs1) { - assert(rs1 < 32); - *(ul32 *)loc &= 0b111111'11111'00000'111'11111'1111111; - *(ul32 *)loc |= rs1 << 15; -} - -template -void write_plt_header(Context &ctx, u8 *buf) { - static const ul32 insn_64[] = { - 0x0000'0397, // auipc t2, %pcrel_hi(.got.plt) - 0x41c3'0333, // sub t1, t1, t3 # .plt entry + hdr + 12 - 0x0003'be03, // ld t3, %pcrel_lo(1b)(t2) # _dl_runtime_resolve - 0xfd43'0313, // addi t1, t1, -44 # .plt entry - 0x0003'8293, // addi t0, t2, %pcrel_lo(1b) # &.got.plt - 0x0013'5313, // srli t1, t1, 1 # .plt entry offset - 0x0082'b283, // ld t0, 8(t0) # link map - 0x000e'0067, // jr t3 - }; - - static const ul32 insn_32[] = { - 0x0000'0397, // auipc t2, %pcrel_hi(.got.plt) - 0x41c3'0333, // sub t1, t1, t3 # .plt entry + hdr + 12 - 0x0003'ae03, // lw t3, %pcrel_lo(1b)(t2) # _dl_runtime_resolve - 0xfd43'0313, // addi t1, t1, -44 # .plt entry - 0x0003'8293, // addi t0, t2, %pcrel_lo(1b) # &.got.plt - 0x0023'5313, // srli t1, t1, 2 # .plt entry offset - 0x0042'a283, // lw t0, 4(t0) # link map - 0x000e'0067, // jr t3 - }; - - if constexpr (E::is_64) - memcpy(buf, insn_64, sizeof(insn_64)); - else - memcpy(buf, insn_32, sizeof(insn_32)); - - u64 gotplt = ctx.gotplt->shdr.sh_addr; - u64 plt = ctx.plt->shdr.sh_addr; - write_utype(buf, gotplt - plt); - write_itype(buf + 8, gotplt - plt); - write_itype(buf + 16, gotplt - plt); -} - -static const ul32 plt_entry_64[] = { - 0x0000'0e17, // auipc t3, %pcrel_hi(function@.got.plt) - 0x000e'3e03, // ld t3, %pcrel_lo(1b)(t3) - 0x000e'0367, // jalr t1, t3 - 0x0000'0013, // nop -}; - -static const ul32 plt_entry_32[] = { - 0x0000'0e17, // auipc t3, %pcrel_hi(function@.got.plt) - 0x000e'2e03, // lw t3, %pcrel_lo(1b)(t3) - 0x000e'0367, // jalr t1, t3 - 0x0000'0013, // nop -}; - -template -void write_plt_entry(Context &ctx, u8 *buf, Symbol &sym) { - if constexpr (E::is_64) - memcpy(buf, plt_entry_64, sizeof(plt_entry_64)); - else - memcpy(buf, plt_entry_32, sizeof(plt_entry_32)); - - u64 gotplt = sym.get_gotplt_addr(ctx); - u64 plt = sym.get_plt_addr(ctx); - write_utype(buf, gotplt - plt); - write_itype(buf + 4, gotplt - plt); -} - -template -void write_pltgot_entry(Context &ctx, u8 *buf, Symbol &sym) { - if constexpr (E::is_64) - memcpy(buf, plt_entry_64, sizeof(plt_entry_64)); - else - memcpy(buf, plt_entry_32, sizeof(plt_entry_32)); - - u64 got = sym.get_got_addr(ctx); - u64 plt = sym.get_plt_addr(ctx); - write_utype(buf, got - plt); - write_itype(buf + 4, got - plt); -} - -template -void EhFrameSection::apply_reloc(Context &ctx, const ElfRel &rel, - u64 offset, u64 val) { - u8 *loc = ctx.buf + this->shdr.sh_offset + offset; - - switch (rel.r_type) { - case R_NONE: - break; - case R_RISCV_ADD32: - *(U32 *)loc += val; - break; - case R_RISCV_SUB8: - *loc -= val; - break; - case R_RISCV_SUB16: - *(U16 *)loc -= val; - break; - case R_RISCV_SUB32: - *(U32 *)loc -= val; - break; - case R_RISCV_SUB6: - *loc = (*loc & 0b1100'0000) | ((*loc - val) & 0b0011'1111); - break; - case R_RISCV_SET6: - *loc = (*loc & 0b1100'0000) | (val & 0b0011'1111); - break; - case R_RISCV_SET8: - *loc = val; - break; - case R_RISCV_SET16: - *(U16 *)loc = val; - break; - case R_RISCV_SET32: - *(U32 *)loc = val; - break; - case R_RISCV_32_PCREL: - *(U32 *)loc = val - this->shdr.sh_addr - offset; - break; - default: - Fatal(ctx) << "unsupported relocation in .eh_frame: " << rel; - } -} - -template -void InputSection::apply_reloc_alloc(Context &ctx, u8 *base) { - std::span> rels = get_rels(ctx); - - ElfRel *dynrel = nullptr; - if (ctx.reldyn) - dynrel = (ElfRel *)(ctx.buf + ctx.reldyn->shdr.sh_offset + - file.reldyn_offset + this->reldyn_offset); - - auto get_r_delta = [&](i64 idx) { - return extra.r_deltas.empty() ? 0 : extra.r_deltas[idx]; - }; - - for (i64 i = 0; i < rels.size(); i++) { - const ElfRel &rel = rels[i]; - if (rel.r_type == R_NONE || rel.r_type == R_RISCV_RELAX) - continue; - - Symbol &sym = *file.symbols[rel.r_sym]; - i64 r_offset = rel.r_offset - get_r_delta(i); - i64 removed_bytes = get_r_delta(i + 1) - get_r_delta(i); - u8 *loc = base + r_offset; - - auto check = [&](i64 val, i64 lo, i64 hi) { - if (val < lo || hi <= val) - Error(ctx) << *this << ": relocation " << rel << " against " - << sym << " out of range: " << val << " is not in [" - << lo << ", " << hi << ")"; - }; - - auto find_paired_reloc = [&] { - Symbol &sym = *file.symbols[rels[i].r_sym]; - assert(sym.get_input_section() == this); - - if (sym.value < r_offset) { - for (i64 j = i - 1; j >= 0; j--) - if (u32 ty = rels[j].r_type; - ty == R_RISCV_GOT_HI20 || ty == R_RISCV_TLS_GOT_HI20 || - ty == R_RISCV_TLS_GD_HI20 || ty == R_RISCV_PCREL_HI20) - if (sym.value == rels[j].r_offset - get_r_delta(j)) - return j; - } else { - for (i64 j = i + 1; j < rels.size(); j++) - if (u32 ty = rels[j].r_type; - ty == R_RISCV_GOT_HI20 || ty == R_RISCV_TLS_GOT_HI20 || - ty == R_RISCV_TLS_GD_HI20 || ty == R_RISCV_PCREL_HI20) - if (sym.value == rels[j].r_offset - get_r_delta(j)) - return j; - } - - Fatal(ctx) << *this << ": paired relocation is missing: " << i; - }; - - u64 S = sym.get_addr(ctx); - u64 A = rel.r_addend; - u64 P = get_addr() + r_offset; - u64 G = sym.get_got_idx(ctx) * sizeof(Word); - u64 GOT = ctx.got->shdr.sh_addr; - - switch (rel.r_type) { - case R_RISCV_32: - if constexpr (E::is_64) - *(U32 *)loc = S + A; - else - apply_dyn_absrel(ctx, sym, rel, loc, S, A, P, dynrel); - break; - case R_RISCV_64: - assert(E::is_64); - apply_dyn_absrel(ctx, sym, rel, loc, S, A, P, dynrel); - break; - case R_RISCV_BRANCH: - check(S + A - P, -(1 << 12), 1 << 12); - write_btype(loc, S + A - P); - break; - case R_RISCV_JAL: - check(S + A - P, -(1 << 20), 1 << 20); - write_jtype(loc, S + A - P); - break; - case R_RISCV_CALL: - case R_RISCV_CALL_PLT: { - u32 rd = get_rd(*(ul32 *)(contents.data() + rel.r_offset + 4)); - - if (removed_bytes == 4) { - // auipc + jalr -> jal - *(ul32 *)loc = (rd << 7) | 0b1101111; - write_jtype(loc, S + A - P); - } else if (removed_bytes == 6 && rd == 0) { - // auipc + jalr -> c.j - *(ul16 *)loc = 0b101'00000000000'01; - write_cjtype(loc, S + A - P); - } else if (removed_bytes == 6 && rd == 1) { - // auipc + jalr -> c.jal - assert(!E::is_64); - *(ul16 *)loc = 0b001'00000000000'01; - write_cjtype(loc, S + A - P); - } else { - assert(removed_bytes == 0); - // Calling an undefined weak symbol does not make sense. - // We make such call into an infinite loop. This should - // help debugging of a faulty program. - u64 val = sym.esym().is_undef_weak() ? 0 : S + A - P; - check(val, -(1LL << 31), 1LL << 31); - write_utype(loc, val); - write_itype(loc + 4, val); - } - break; - } - case R_RISCV_GOT_HI20: - write_utype(loc, G + GOT + A - P); - break; - case R_RISCV_TLS_GOT_HI20: - write_utype(loc, sym.get_gottp_addr(ctx) + A - P); - break; - case R_RISCV_TLS_GD_HI20: - write_utype(loc, sym.get_tlsgd_addr(ctx) + A - P); - break; - case R_RISCV_PCREL_HI20: - write_utype(loc, S + A - P); - break; - case R_RISCV_PCREL_LO12_I: - case R_RISCV_PCREL_LO12_S: { - i64 idx2 = find_paired_reloc(); - const ElfRel &rel2 = rels[idx2]; - Symbol &sym2 = *file.symbols[rel2.r_sym]; - - u64 S = sym2.get_addr(ctx); - u64 A = rel2.r_addend; - u64 P = get_addr() + rel2.r_offset - get_r_delta(idx2); - u64 G = sym2.get_got_idx(ctx) * sizeof(Word); - u64 val; - - switch (rel2.r_type) { - case R_RISCV_GOT_HI20: - val = G + GOT + A - P; - break; - case R_RISCV_TLS_GOT_HI20: - val = sym2.get_gottp_addr(ctx) + A - P; - break; - case R_RISCV_TLS_GD_HI20: - val = sym2.get_tlsgd_addr(ctx) + A - P; - break; - case R_RISCV_PCREL_HI20: - val = S + A - P; - break; - default: - unreachable(); - } - - if (rel.r_type == R_RISCV_PCREL_LO12_I) - write_itype(loc, val); - else - write_stype(loc, val); - break; - } - case R_RISCV_HI20: - assert(removed_bytes == 0 || removed_bytes == 4); - if (removed_bytes == 0) { - check(S + A, -(1LL << 31), 1LL << 31); - write_utype(loc, S + A); - } - break; - case R_RISCV_LO12_I: - case R_RISCV_LO12_S: - if (rel.r_type == R_RISCV_LO12_I) - write_itype(loc, S + A); - else - write_stype(loc, S + A); - - // Rewrite `lw t1, 0(t0)` with `lw t1, 0(x0)` if the address is - // accessible relative to the zero register. If the upper 20 bits - // are all zero, the corresponding LUI might have been removed. - if (bits(S + A, 31, 12) == 0) - set_rs1(loc, 0); - break; - case R_RISCV_TPREL_HI20: - assert(removed_bytes == 0 || removed_bytes == 4); - if (removed_bytes == 0) - write_utype(loc, S + A - ctx.tp_addr); - break; - case R_RISCV_TPREL_ADD: - // This relocation just annotates an ADD instruction that can be - // removed when a TPREL is relaxed. No value is needed to be - // written. - assert(removed_bytes == 0 || removed_bytes == 4); - break; - case R_RISCV_TPREL_LO12_I: - case R_RISCV_TPREL_LO12_S: { - i64 val = S + A - ctx.tp_addr; - if (rel.r_type == R_RISCV_TPREL_LO12_I) - write_itype(loc, val); - else - write_stype(loc, val); - - // Rewrite `lw t1, 0(t0)` with `lw t1, 0(tp)` if the address is - // directly accessible using tp. tp is x4. - if (sign_extend(val, 11) == val) - set_rs1(loc, 4); - break; - } - case R_RISCV_ADD8: - loc += S + A; - break; - case R_RISCV_ADD16: - *(U16 *)loc += S + A; - break; - case R_RISCV_ADD32: - *(U32 *)loc += S + A; - break; - case R_RISCV_ADD64: - *(U64 *)loc += S + A; - break; - case R_RISCV_SUB8: - loc -= S + A; - break; - case R_RISCV_SUB16: - *(U16 *)loc -= S + A; - break; - case R_RISCV_SUB32: - *(U32 *)loc -= S + A; - break; - case R_RISCV_SUB64: - *(U64 *)loc -= S + A; - break; - case R_RISCV_ALIGN: { - // A R_RISCV_ALIGN is followed by a NOP sequence. We need to remove - // zero or more bytes so that the instruction after R_RISCV_ALIGN is - // aligned to a given alignment boundary. - // - // We need to guarantee that the NOP sequence is valid after byte - // removal (e.g. we can't remove the first 2 bytes of a 4-byte NOP). - // For the sake of simplicity, we always rewrite the entire NOP sequence. - i64 padding_bytes = rel.r_addend - removed_bytes; - assert((padding_bytes & 1) == 0); - - i64 i = 0; - for (; i <= padding_bytes - 4; i += 4) - *(ul32 *)(loc + i) = 0x0000'0013; // nop - if (i < padding_bytes) - *(ul16 *)(loc + i) = 0x0001; // c.nop - break; - } - case R_RISCV_RVC_BRANCH: - check(S + A - P, -(1 << 8), 1 << 8); - write_cbtype(loc, S + A - P); - break; - case R_RISCV_RVC_JUMP: - check(S + A - P, -(1 << 11), 1 << 11); - write_cjtype(loc, S + A - P); - break; - case R_RISCV_SUB6: - *loc = (*loc & 0b1100'0000) | ((*loc - (S + A)) & 0b0011'1111); - break; - case R_RISCV_SET6: - *loc = (*loc & 0b1100'0000) | ((S + A) & 0b0011'1111); - break; - case R_RISCV_SET8: - *loc = S + A; - break; - case R_RISCV_SET16: - *(U16 *)loc = S + A; - break; - case R_RISCV_SET32: - *(U32 *)loc = S + A; - break; - case R_RISCV_PLT32: - case R_RISCV_32_PCREL: - *(U32 *)loc = S + A - P; - break; - default: - unreachable(); - } - } -} - -template -void InputSection::apply_reloc_nonalloc(Context &ctx, u8 *base) { - std::span> rels = get_rels(ctx); - - for (i64 i = 0; i < rels.size(); i++) { - const ElfRel &rel = rels[i]; - if (rel.r_type == R_NONE || record_undef_error(ctx, rel)) - continue; - - Symbol &sym = *file.symbols[rel.r_sym]; - u8 *loc = base + rel.r_offset; - - SectionFragment *frag; - i64 frag_addend; - std::tie(frag, frag_addend) = get_fragment(ctx, rel); - - u64 S = frag ? frag->get_addr(ctx) : sym.get_addr(ctx); - u64 A = frag ? frag_addend : (i64)rel.r_addend; - - switch (rel.r_type) { - case R_RISCV_32: - *(U32 *)loc = S + A; - break; - case R_RISCV_64: - if (std::optional val = get_tombstone(sym, frag)) - *(U64 *)loc = *val; - else - *(U64 *)loc = S + A; - break; - case R_RISCV_ADD8: - *loc += S + A; - break; - case R_RISCV_ADD16: - *(U16 *)loc += S + A; - break; - case R_RISCV_ADD32: - *(U32 *)loc += S + A; - break; - case R_RISCV_ADD64: - *(U64 *)loc += S + A; - break; - case R_RISCV_SUB8: - *loc -= S + A; - break; - case R_RISCV_SUB16: - *(U16 *)loc -= S + A; - break; - case R_RISCV_SUB32: - *(U32 *)loc -= S + A; - break; - case R_RISCV_SUB64: - *(U64 *)loc -= S + A; - break; - case R_RISCV_SUB6: - *loc = (*loc & 0b1100'0000) | ((*loc - (S + A)) & 0b0011'1111); - break; - case R_RISCV_SET6: - *loc = (*loc & 0b1100'0000) | ((S + A) & 0b0011'1111); - break; - case R_RISCV_SET8: - *loc = S + A; - break; - case R_RISCV_SET16: - *(U16 *)loc = S + A; - break; - case R_RISCV_SET32: - *(U32 *)loc = S + A; - break; - case R_RISCV_SET_ULEB128: - overwrite_uleb(loc, S + A); - break; - case R_RISCV_SUB_ULEB128: { - u8 *p = loc; - u64 val = read_uleb(p); - overwrite_uleb(loc, val - S - A); - break; - } - default: - Fatal(ctx) << *this << ": invalid relocation for non-allocated sections: " - << rel; - break; - } - } -} - -template -void InputSection::copy_contents_riscv(Context &ctx, u8 *buf) { - // If a section is not relaxed, we can copy it as a one big chunk. - if (extra.r_deltas.empty()) { - uncompress_to(ctx, buf); - return; - } - - // A relaxed section is copied piece-wise. - std::span> rels = get_rels(ctx); - i64 pos = 0; - - for (i64 i = 0; i < rels.size(); i++) { - i64 delta = extra.r_deltas[i + 1] - extra.r_deltas[i]; - if (delta == 0) - continue; - assert(delta > 0); - - const ElfRel &r = rels[i]; - memcpy(buf, contents.data() + pos, r.r_offset - pos); - buf += r.r_offset - pos; - pos = r.r_offset + delta; - } - - memcpy(buf, contents.data() + pos, contents.size() - pos); -} - -template -void InputSection::scan_relocations(Context &ctx) { - assert(shdr().sh_flags & SHF_ALLOC); - - this->reldyn_offset = file.num_dynrel * sizeof(ElfRel); - std::span> rels = get_rels(ctx); - - // Scan relocations - for (i64 i = 0; i < rels.size(); i++) { - const ElfRel &rel = rels[i]; - if (rel.r_type == R_NONE || record_undef_error(ctx, rel)) - continue; - - Symbol &sym = *file.symbols[rel.r_sym]; - - if (sym.is_ifunc()) - sym.flags |= NEEDS_GOT | NEEDS_PLT; - - switch (rel.r_type) { - case R_RISCV_32: - if constexpr (E::is_64) - scan_absrel(ctx, sym, rel); - else - scan_dyn_absrel(ctx, sym, rel); - break; - case R_RISCV_HI20: - scan_absrel(ctx, sym, rel); - break; - case R_RISCV_64: - if constexpr (!E::is_64) - Fatal(ctx) << *this << ": R_RISCV_64 cannot be used on RV32"; - scan_dyn_absrel(ctx, sym, rel); - break; - case R_RISCV_CALL: - case R_RISCV_CALL_PLT: - case R_RISCV_PLT32: - if (sym.is_imported) - sym.flags |= NEEDS_PLT; - break; - case R_RISCV_GOT_HI20: - sym.flags |= NEEDS_GOT; - break; - case R_RISCV_TLS_GOT_HI20: - sym.flags |= NEEDS_GOTTP; - break; - case R_RISCV_TLS_GD_HI20: - sym.flags |= NEEDS_TLSGD; - break; - case R_RISCV_32_PCREL: - scan_pcrel(ctx, sym, rel); - break; - case R_RISCV_TPREL_HI20: - case R_RISCV_TPREL_LO12_I: - case R_RISCV_TPREL_LO12_S: - case R_RISCV_TPREL_ADD: - check_tlsle(ctx, sym, rel); - break; - case R_RISCV_BRANCH: - case R_RISCV_JAL: - case R_RISCV_PCREL_HI20: - case R_RISCV_PCREL_LO12_I: - case R_RISCV_PCREL_LO12_S: - case R_RISCV_LO12_I: - case R_RISCV_LO12_S: - case R_RISCV_ADD8: - case R_RISCV_ADD16: - case R_RISCV_ADD32: - case R_RISCV_ADD64: - case R_RISCV_SUB8: - case R_RISCV_SUB16: - case R_RISCV_SUB32: - case R_RISCV_SUB64: - case R_RISCV_ALIGN: - case R_RISCV_RVC_BRANCH: - case R_RISCV_RVC_JUMP: - case R_RISCV_RELAX: - case R_RISCV_SUB6: - case R_RISCV_SET6: - case R_RISCV_SET8: - case R_RISCV_SET16: - case R_RISCV_SET32: - break; - default: - Error(ctx) << *this << ": unknown relocation: " << rel; - } - } -} - -template -static bool is_resizable(Context &ctx, InputSection *isec) { - return isec && isec->is_alive && (isec->shdr().sh_flags & SHF_ALLOC) && - (isec->shdr().sh_flags & SHF_EXECINSTR); -} - -// Returns the distance between a relocated place and a symbol. -template -static i64 compute_distance(Context &ctx, Symbol &sym, - InputSection &isec, const ElfRel &rel) { - // We handle absolute symbols as if they were infinitely far away - // because `shrink_section` may increase a distance between a branch - // instruction and an absolute symbol. Branching to an absolute - // location is extremely rare in real code, though. - if (sym.is_absolute()) - return INT32_MAX; - - // Likewise, relocations against weak undefined symbols won't be relaxed. - if (sym.esym().is_undef_weak()) - return INT32_MAX; - - // Compute a distance between the relocated place and the symbol. - i64 S = sym.get_addr(ctx); - i64 A = rel.r_addend; - i64 P = isec.get_addr() + rel.r_offset; - return S + A - P; -} - -// Scan relocations to shrink sections. -template -static void shrink_section(Context &ctx, InputSection &isec, bool use_rvc) { - std::span> rels = isec.get_rels(ctx); - isec.extra.r_deltas.resize(rels.size() + 1); - - i64 delta = 0; - - for (i64 i = 0; i < rels.size(); i++) { - const ElfRel &r = rels[i]; - Symbol &sym = *isec.file.symbols[r.r_sym]; - isec.extra.r_deltas[i] = delta; - - // Handling R_RISCV_ALIGN is mandatory. - // - // R_RISCV_ALIGN refers to NOP instructions. We need to eliminate some - // or all of the instructions so that the instruction that immediately - // follows the NOPs is aligned to a specified alignment boundary. - if (r.r_type == R_RISCV_ALIGN) { - // The total bytes of NOPs is stored to r_addend, so the next - // instruction is r_addend away. - u64 loc = isec.get_addr() + r.r_offset - delta; - u64 next_loc = loc + r.r_addend; - u64 alignment = bit_ceil(r.r_addend + 1); - assert(alignment <= (1 << isec.p2align)); - delta += next_loc - align_to(loc, alignment); - continue; - } - - // Handling other relocations is optional. - if (!ctx.arg.relax || i == rels.size() - 1 || - rels[i + 1].r_type != R_RISCV_RELAX) - continue; - - // Linker-synthesized symbols haven't been assigned their final - // values when we are shrinking sections because actual values can - // be computed only after we fix the file layout. Therefore, we - // assume that relocations against such symbols are always - // non-relaxable. - if (sym.file == ctx.internal_obj) - continue; - - switch (r.r_type) { - case R_RISCV_CALL: - case R_RISCV_CALL_PLT: { - // These relocations refer to an AUIPC + JALR instruction pair to - // allow to jump to anywhere in PC ± 2 GiB. If the jump target is - // close enough to PC, we can use C.J, C.JAL or JAL instead. - i64 dist = compute_distance(ctx, sym, isec, r); - if (dist & 1) - break; - - i64 rd = get_rd(*(ul32 *)(isec.contents.data() + r.r_offset + 4)); - - if (rd == 0 && sign_extend(dist, 11) == dist && use_rvc) { - // If rd is x0 and the jump target is within ±2 KiB, we can use - // C.J, saving 6 bytes. - delta += 6; - } else if (rd == 1 && sign_extend(dist, 11) == dist && use_rvc && !E::is_64) { - // If rd is x1 and the jump target is within ±2 KiB, we can use - // C.JAL. This is RV32 only because C.JAL is RV32-only instruction. - delta += 6; - } else if (sign_extend(dist, 20) == dist) { - // If the jump target is within ±1 MiB, we can use JAL. - delta += 4; - } - break; - } - case R_RISCV_HI20: - // If the upper 20 bits are all zero, we can remove LUI. - // The corresponding instructions referred to by LO12_I/LO12_S - // relocations will use the zero register instead. - if (bits(sym.get_addr(ctx), 31, 12) == 0) - delta += 4; - break; - case R_RISCV_TPREL_HI20: - case R_RISCV_TPREL_ADD: - // These relocations are used to add a high 20-bit value to the - // thread pointer. The following two instructions materializes - // TP + HI20(foo) in %r5, for example. - // - // lui a5,%tprel_hi(foo) # R_RISCV_TPREL_HI20 (symbol) - // add a5,a5,tp,%tprel_add(foo) # R_RISCV_TPREL_ADD (symbol) - // - // Then thread-local variable `foo` is accessed with a low 12-bit - // offset like this: - // - // sw t0,%tprel_lo(foo)(a5) # R_RISCV_TPREL_LO12_S (symbol) - // - // However, if the variable is at TP ±2 KiB, TP + HI20(foo) is the - // same as TP, so we can instead access the thread-local variable - // directly using TP like this: - // - // sw t0,%tprel_lo(foo)(tp) - // - // Here, we remove `lui` and `add` if the offset is within ±2 KiB. - if (i64 val = sym.get_addr(ctx) + r.r_addend - ctx.tp_addr; - sign_extend(val, 11) == val) - delta += 4; - break; - } - } - - isec.extra.r_deltas[rels.size()] = delta; - isec.sh_size -= delta; -} - -// Shrink sections by interpreting relocations. -// -// This operation seems to be optional, because by default longest -// instructions are being used. However, calling this function is actually -// mandatory because of R_RISCV_ALIGN. R_RISCV_ALIGN is a directive to the -// linker to align the location referred to by the relocation to a -// specified byte boundary. We at least have to interpret them to satisfy -// the alignment constraints. -template -i64 riscv_resize_sections(Context &ctx) { - Timer t(ctx, "riscv_resize_sections"); - - // True if we can use the 2-byte instructions. This is usually true on - // Unix because RV64GC is generally considered the baseline hardware. - bool use_rvc = get_eflags(ctx) & EF_RISCV_RVC; - - // Find all the relocations that can be relaxed. - // This step should only shrink sections. - tbb::parallel_for_each(ctx.objs, [&](ObjectFile *file) { - for (std::unique_ptr> &isec : file->sections) - if (is_resizable(ctx, isec.get())) - shrink_section(ctx, *isec, use_rvc); - }); - - // Fix symbol values. - tbb::parallel_for_each(ctx.objs, [&](ObjectFile *file) { - for (Symbol *sym : file->symbols) { - if (sym->file != file) - continue; - - InputSection *isec = sym->get_input_section(); - if (!isec || isec->extra.r_deltas.empty()) - continue; - - std::span> rels = isec->get_rels(ctx); - auto it = std::lower_bound(rels.begin(), rels.end(), sym->value, - [&](const ElfRel &r, u64 val) { - return r.r_offset < val; - }); - - sym->value -= isec->extra.r_deltas[it - rels.begin()]; - } - }); - - // Re-compute section offset again to finalize them. - compute_section_sizes(ctx); - return set_osec_offsets(ctx); -} - -#define INSTANTIATE(E) \ - template void write_plt_header(Context &, u8 *); \ - template void write_plt_entry(Context &, u8 *, Symbol &); \ - template void write_pltgot_entry(Context &, u8 *, Symbol &); \ - template void \ - EhFrameSection::apply_reloc(Context &, const ElfRel &, u64, u64); \ - template void InputSection::apply_reloc_alloc(Context &, u8 *); \ - template void InputSection::apply_reloc_nonalloc(Context &, u8 *); \ - template void InputSection::copy_contents_riscv(Context &, u8 *); \ - template void InputSection::scan_relocations(Context &); \ - template i64 riscv_resize_sections(Context &); - -INSTANTIATE(RV64LE); -INSTANTIATE(RV64BE); -INSTANTIATE(RV32LE); -INSTANTIATE(RV32BE); - -} // namespace mold::elf diff --git a/third_party/mold/elf/arch-s390x.cc b/third_party/mold/elf/arch-s390x.cc deleted file mode 100644 index 72c2965ed3b..00000000000 --- a/third_party/mold/elf/arch-s390x.cc +++ /dev/null @@ -1,491 +0,0 @@ -// clang-format off -// This file contains code for the IBM z/Architecture 64-bit ISA, which is -// commonly referred to as "s390x" on Linux. -// -// z/Architecture is a 64-bit CISC ISA developed by IBM around 2000 for -// IBM's "big iron" mainframe computers. The computers are direct -// descendents of IBM System/360 all the way back in 1966. I've never -// actually seen a mainframe, and you probaly haven't either, but it looks -// like the mainframe market is still large enough to sustain its ecosystem. -// Ubuntu for example provides the official support for s390x as of 2022. -// Since they are being actively maintained, we need to support them. -// -// As an instruction set, s390x isn't particularly odd. It has 16 general- -// purpose registers. Instructions are 2, 4 or 6 bytes long and always -// aligned to 2 bytes boundaries. Despite unfamiliarty, I found that it -// just feels like an x86-64 in a parallel universe. -// -// Here is the register usage in this ABI: -// -// r0-r1: reserved as scratch registers so we can use them in our PLT -// r2: parameter passing and return values -// r3-r6: parameter passing -// r12: address of GOT if position-independent code -// r14: return address -// r15: stack pointer -// a1: upper 32 bits of TP (thread pointer) -// a2: lower 32 bits of TP (thread pointer) -// -// Thread-local storage (TLS) is supported on s390x in the same way as it -// is on other targets with one exeption. On other targets, __tls_get_addr -// is used to get an address of a thread-local variable. On s390x, -// __tls_get_offset is used instead. The difference is __tls_get_offset -// returns an address of a thread-local variable as an offset from TP. So -// we need to add TP to a return value before use. I don't know why it is -// different, but that is the way it is. -// -// https://github.com/rui314/psabi/blob/main/s390x.pdf - -#include "third_party/mold/elf/mold.h" - -namespace mold::elf { - -using E = S390X; - -static void write_mid20(u8 *loc, u64 val) { - *(ub32 *)loc |= (bits(val, 11, 0) << 16) | (bits(val, 19, 12) << 8); -} - -template <> -void write_plt_header(Context &ctx, u8 *buf) { - static u8 insn[] = { - 0xe3, 0x00, 0xf0, 0x38, 0x00, 0x24, // stg %r0, 56(%r15) - 0xc0, 0x10, 0, 0, 0, 0, // larl %r1, GOTPLT_OFFSET - 0xd2, 0x07, 0xf0, 0x30, 0x10, 0x08, // mvc 48(8, %r15), 8(%r1) - 0xe3, 0x10, 0x10, 0x10, 0x00, 0x04, // lg %r1, 16(%r1) - 0x07, 0xf1, // br %r1 - 0x07, 0x00, 0x07, 0x00, 0x07, 0x00, // nopr; nopr; nopr - }; - - memcpy(buf, insn, sizeof(insn)); - *(ub32 *)(buf + 8) = (ctx.gotplt->shdr.sh_addr - ctx.plt->shdr.sh_addr - 6) >> 1; -} - -template <> -void write_plt_entry(Context &ctx, u8 *buf, Symbol &sym) { - static u8 insn[] = { - 0xc0, 0x10, 0, 0, 0, 0, // larl %r1, GOTPLT_ENTRY_OFFSET - 0xe3, 0x10, 0x10, 0x00, 0x00, 0x04, // lg %r1, (%r1) - 0xc0, 0x01, 0, 0, 0, 0, // lgfi %r0, PLT_INDEX - 0x07, 0xf1, // br %r1 - 0x07, 0x00, 0x07, 0x00, 0x07, 0x00, // nopr; nopr; nopr - 0x07, 0x00, 0x07, 0x00, 0x07, 0x00, // nopr; nopr; nopr - }; - - memcpy(buf, insn, sizeof(insn)); - *(ub32 *)(buf + 2) = (sym.get_gotplt_addr(ctx) - sym.get_plt_addr(ctx)) >> 1; - *(ub32 *)(buf + 14) = sym.get_plt_idx(ctx) * sizeof(ElfRel); -} - -template <> -void write_pltgot_entry(Context &ctx, u8 *buf, Symbol &sym) { - static u8 insn[] = { - 0xc0, 0x10, 0, 0, 0, 0, // larl %r1, GOT_ENTRY_OFFSET - 0xe3, 0x10, 0x10, 0x00, 0x00, 0x04, // lg %r1, (%r1) - 0x07, 0xf1, // br %r1 - 0x07, 0x00, // nopr - }; - - memcpy(buf, insn, sizeof(insn)); - *(ub32 *)(buf + 2) = (sym.get_got_addr(ctx) - sym.get_plt_addr(ctx)) >> 1; -} - -template <> -void EhFrameSection::apply_reloc(Context &ctx, const ElfRel &rel, - u64 offset, u64 val) { - u8 *loc = ctx.buf + this->shdr.sh_offset + offset; - - switch (rel.r_type) { - case R_NONE: - break; - case R_390_PC32: - *(ub32 *)loc = val - this->shdr.sh_addr - offset; - break; - case R_390_64: - *(ub64 *)loc = val; - break; - default: - Fatal(ctx) << "unsupported relocation in .eh_frame: " << rel; - } -} - -template <> -void InputSection::apply_reloc_alloc(Context &ctx, u8 *base) { - std::span> rels = get_rels(ctx); - - ElfRel *dynrel = nullptr; - if (ctx.reldyn) - dynrel = (ElfRel *)(ctx.buf + ctx.reldyn->shdr.sh_offset + - file.reldyn_offset + this->reldyn_offset); - - for (i64 i = 0; i < rels.size(); i++) { - const ElfRel &rel = rels[i]; - if (rel.r_type == R_NONE) - continue; - - Symbol &sym = *file.symbols[rel.r_sym]; - u8 *loc = base + rel.r_offset; - - auto check = [&](i64 val, i64 lo, i64 hi) { - if (val < lo || hi <= val) - Error(ctx) << *this << ": relocation " << rel << " against " - << sym << " out of range: " << val << " is not in [" - << lo << ", " << hi << ")"; - }; - - auto check_dbl = [&](i64 val, i64 lo, i64 hi) { - check(val, lo, hi); - - // R_390_*DBL relocs should never refer a symbol at an odd address - if (val & 1) - Error(ctx) << *this << ": misaligned symbol " << sym - << " for relocation " << rel; - }; - - u64 S = sym.get_addr(ctx); - u64 A = rel.r_addend; - u64 P = get_addr() + rel.r_offset; - u64 G = sym.get_got_idx(ctx) * sizeof(Word); - u64 GOT = ctx.got->shdr.sh_addr; - - switch (rel.r_type) { - case R_390_64: - apply_dyn_absrel(ctx, sym, rel, loc, S, A, P, dynrel); - break; - case R_390_8: - check(S + A, 0, 1 << 8); - *loc = S + A; - break; - case R_390_12: - check(S + A, 0, 1 << 12); - *(ul16 *)loc |= bits(S + A, 11, 0); - break; - case R_390_16: - check(S + A, 0, 1 << 16); - *(ub16 *)loc = S + A; - break; - case R_390_20: - check(S + A, 0, 1 << 20); - write_mid20(loc, S + A); - break; - case R_390_32: - case R_390_PLT32: - check(S + A, 0, 1LL << 32); - *(ub32 *)loc = S + A; - break; - case R_390_PLT64: - *(ub64 *)loc = S + A; - break; - case R_390_PC12DBL: - case R_390_PLT12DBL: - check_dbl(S + A - P, -(1 << 12), 1 << 12); - *(ul16 *)loc |= bits(S + A - P, 12, 1); - break; - case R_390_PC16: - check(S + A - P, -(1 << 15), 1 << 15); - *(ub16 *)loc = S + A - P; - break; - case R_390_PC32: - check(S + A - P, -(1LL << 31), 1LL << 31); - *(ub32 *)loc = S + A - P; - break; - case R_390_PC64: - *(ub64 *)loc = S + A - P; - break; - case R_390_PC16DBL: - case R_390_PLT16DBL: - check_dbl(S + A - P, -(1 << 16), 1 << 16); - *(ub16 *)loc = (S + A - P) >> 1; - break; - case R_390_PC24DBL: - case R_390_PLT24DBL: - check_dbl(S + A - P, -(1 << 24), 1 << 24); - *(ub32 *)loc |= bits(S + A - P, 24, 1); - break; - case R_390_PC32DBL: - case R_390_PLT32DBL: - check_dbl(S + A - P, -(1LL << 32), 1LL << 32); - *(ub32 *)loc = (S + A - P) >> 1; - break; - case R_390_GOT12: - case R_390_GOTPLT12: - check(G + A, 0, 1 << 12); - *(ul16 *)loc |= bits(G + A, 11, 0); - break; - case R_390_GOT16: - case R_390_GOTPLT16: - check(G + A, 0, 1 << 16); - *(ub16 *)loc = G + A; - break; - case R_390_GOT20: - case R_390_GOTPLT20: - check(G + A, 0, 1 << 20); - write_mid20(loc, G + A); - break; - case R_390_GOT32: - case R_390_GOTPLT32: - check(G + A, 0, 1LL << 32); - *(ub32 *)loc = G + A; - break; - case R_390_GOT64: - case R_390_GOTPLT64: - *(ub64 *)loc = G + A; - break; - case R_390_GOTOFF16: - case R_390_PLTOFF16: - check(S + A - GOT, -(1 << 15), 1 << 15); - *(ub16 *)loc = S + A - GOT; - break; - case R_390_GOTOFF32: - case R_390_PLTOFF32: - check(S + A - GOT, -(1LL << 31), 1LL << 31); - *(ub32 *)loc = S + A - GOT; - break; - case R_390_GOTOFF64: - case R_390_PLTOFF64: - *(ub64 *)loc = S + A - GOT; - break; - case R_390_GOTPC: - *(ub64 *)loc = GOT + A - P; - break; - case R_390_GOTPCDBL: - check_dbl(GOT + A - P, -(1LL << 32), 1LL << 32); - *(ub32 *)loc = (GOT + A - P) >> 1; - break; - case R_390_GOTENT: - check(GOT + G + A - P, -(1LL << 32), 1LL << 32); - *(ub32 *)loc = (GOT + G + A - P) >> 1; - break; - case R_390_TLS_LE32: - *(ub32 *)loc = S + A - ctx.tp_addr; - break; - case R_390_TLS_LE64: - *(ub64 *)loc = S + A - ctx.tp_addr; - break; - case R_390_TLS_GOTIE20: - write_mid20(loc, sym.get_gottp_addr(ctx) + A - GOT); - break; - case R_390_TLS_IEENT: - *(ub32 *)loc = (sym.get_gottp_addr(ctx) + A - P) >> 1; - break; - case R_390_TLS_GD32: - if (sym.has_tlsgd(ctx)) - *(ub32 *)loc = sym.get_tlsgd_addr(ctx) + A - GOT; - else if (sym.has_gottp(ctx)) - *(ub32 *)loc = sym.get_gottp_addr(ctx) + A - GOT; - else - *(ub32 *)loc = S + A - ctx.tp_addr; - break; - case R_390_TLS_GD64: - if (sym.has_tlsgd(ctx)) - *(ub64 *)loc = sym.get_tlsgd_addr(ctx) + A - GOT; - else if (sym.has_gottp(ctx)) - *(ub64 *)loc = sym.get_gottp_addr(ctx) + A - GOT; - else - *(ub64 *)loc = S + A - ctx.tp_addr; - break; - case R_390_TLS_GDCALL: - if (sym.has_tlsgd(ctx)) { - // do nothing - } else if (sym.has_gottp(ctx)) { - // lg %r2, 0(%r2, %r12) - static u8 insn[] = { 0xe3, 0x22, 0xc0, 0x00, 0x00, 0x04 }; - memcpy(loc, insn, sizeof(insn)); - } else { - // nop - static u8 insn[] = { 0xc0, 0x04, 0x00, 0x00, 0x00, 0x00 }; - memcpy(loc, insn, sizeof(insn)); - } - break; - case R_390_TLS_LDM32: - if (ctx.got->has_tlsld(ctx)) - *(ub32 *)loc = ctx.got->get_tlsld_addr(ctx) + A - GOT; - break; - case R_390_TLS_LDM64: - if (ctx.got->has_tlsld(ctx)) - *(ub64 *)loc = ctx.got->get_tlsld_addr(ctx) + A - GOT; - break; - case R_390_TLS_LDO32: - if (ctx.got->has_tlsld(ctx)) - *(ub32 *)loc = S + A - ctx.dtp_addr; - else - *(ub32 *)loc = S + A - ctx.tp_addr; - break; - case R_390_TLS_LDO64: - if (ctx.got->has_tlsld(ctx)) - *(ub64 *)loc = S + A - ctx.dtp_addr; - else - *(ub64 *)loc = S + A - ctx.tp_addr; - break; - case R_390_TLS_LDCALL: - if (!ctx.got->has_tlsld(ctx)) { - // nop - static u8 insn[] = { 0xc0, 0x04, 0x00, 0x00, 0x00, 0x00 }; - memcpy(loc, insn, sizeof(insn)); - } - break; - default: - unreachable(); - } - } -} - -template <> -void InputSection::apply_reloc_nonalloc(Context &ctx, u8 *base) { - std::span> rels = get_rels(ctx); - - for (i64 i = 0; i < rels.size(); i++) { - const ElfRel &rel = rels[i]; - if (rel.r_type == R_NONE || record_undef_error(ctx, rel)) - continue; - - Symbol &sym = *file.symbols[rel.r_sym]; - u8 *loc = base + rel.r_offset; - - auto check = [&](i64 val, i64 lo, i64 hi) { - if (val < lo || hi <= val) - Error(ctx) << *this << ": relocation " << rel << " against " - << sym << " out of range: " << val << " is not in [" - << lo << ", " << hi << ")"; - }; - - SectionFragment *frag; - i64 frag_addend; - std::tie(frag, frag_addend) = get_fragment(ctx, rel); - - u64 S = frag ? frag->get_addr(ctx) : sym.get_addr(ctx); - u64 A = frag ? frag_addend : (i64)rel.r_addend; - - switch (rel.r_type) { - case R_390_32: { - i64 val = S + A; - check(val, 0, 1LL << 32); - *(ub32 *)loc = val; - break; - } - case R_390_64: - if (std::optional val = get_tombstone(sym, frag)) - *(ub64 *)loc = *val; - else - *(ub64 *)loc = S + A; - break; - case R_390_TLS_LDO64: - if (std::optional val = get_tombstone(sym, frag)) - *(ub64 *)loc = *val; - else - *(ub64 *)loc = S + A - ctx.dtp_addr; - break; - default: - Fatal(ctx) << *this << ": apply_reloc_nonalloc: " << rel; - } - } -} - -template <> -void InputSection::scan_relocations(Context &ctx) { - assert(shdr().sh_flags & SHF_ALLOC); - - this->reldyn_offset = file.num_dynrel * sizeof(ElfRel); - std::span> rels = get_rels(ctx); - - // Scan relocations - for (i64 i = 0; i < rels.size(); i++) { - const ElfRel &rel = rels[i]; - if (rel.r_type == R_NONE || record_undef_error(ctx, rel)) - continue; - - Symbol &sym = *file.symbols[rel.r_sym]; - - if (sym.is_ifunc()) - sym.flags |= NEEDS_GOT | NEEDS_PLT; - - switch (rel.r_type) { - case R_390_64: - scan_dyn_absrel(ctx, sym, rel); - break; - case R_390_8: - case R_390_12: - case R_390_16: - case R_390_20: - case R_390_32: - scan_absrel(ctx, sym, rel); - break; - case R_390_PC16: - case R_390_PC16DBL: - case R_390_PC32: - case R_390_PC32DBL: - case R_390_PC64: - scan_pcrel(ctx, sym, rel); - break; - case R_390_GOT12: - case R_390_GOT16: - case R_390_GOT20: - case R_390_GOT32: - case R_390_GOT64: - case R_390_GOTOFF16: - case R_390_GOTOFF32: - case R_390_GOTOFF64: - case R_390_GOTPLT12: - case R_390_GOTPLT16: - case R_390_GOTPLT20: - case R_390_GOTPLT32: - case R_390_GOTPLT64: - case R_390_GOTPC: - case R_390_GOTPCDBL: - case R_390_GOTENT: - sym.flags |= NEEDS_GOT; - break; - case R_390_PLT12DBL: - case R_390_PLT16DBL: - case R_390_PLT24DBL: - case R_390_PLT32: - case R_390_PLT32DBL: - case R_390_PLT64: - case R_390_PLTOFF16: - case R_390_PLTOFF32: - case R_390_PLTOFF64: - if (sym.is_imported) - sym.flags |= NEEDS_PLT; - break; - case R_390_TLS_GOTIE20: - case R_390_TLS_IEENT: - sym.flags |= NEEDS_GOTTP; - break; - case R_390_TLS_GD32: - case R_390_TLS_GD64: - // We always want to relax calls to __tls_get_offset() in statically- - // linked executables because __tls_get_offset() in libc.a just calls - // abort(). - if (ctx.arg.is_static || - (ctx.arg.relax && !sym.is_imported && !ctx.arg.shared)) { - // do nothing - } else if (ctx.arg.relax && !sym.is_imported && ctx.arg.shared && - !ctx.arg.z_dlopen) { - sym.flags |= NEEDS_GOTTP; - } else { - sym.flags |= NEEDS_TLSGD; - } - break; - case R_390_TLS_LDM32: - case R_390_TLS_LDM64: { - bool do_relax = ctx.arg.is_static || (ctx.arg.relax && !ctx.arg.shared); - if (!do_relax) - ctx.needs_tlsld = true; - break; - } - case R_390_TLS_LE32: - case R_390_TLS_LE64: - check_tlsle(ctx, sym, rel); - break; - case R_390_TLS_LDO32: - case R_390_TLS_LDO64: - case R_390_TLS_GDCALL: - case R_390_TLS_LDCALL: - break; - default: - Fatal(ctx) << *this << ": scan_relocations: " << rel; - } - } -} - -} // namespace mold::elf diff --git a/third_party/mold/elf/arch-sh4.cc b/third_party/mold/elf/arch-sh4.cc deleted file mode 100644 index dcce34df95b..00000000000 --- a/third_party/mold/elf/arch-sh4.cc +++ /dev/null @@ -1,355 +0,0 @@ -// clang-format off -// SH-4 (SuperH 4) is a 32-bit RISC ISA developed by Hitachi in the early -// '90s. Some relatively powerful systems were developed with SH-4. -// A notable example is Sega's Dreamcast game console which debuted in 1998. -// Hitachi later spun off its semiconductor division as an independent -// company, Renesas, and Renesas is still selling SH-4 processors for the -// embedded market. It has never been as popular as ARM is, and its -// popularity continues to decline though. -// -// SH-4's most distinctive feature compared to other RISC ISAs is that its -// instructions are 16 bits in length instead of more common 32 bits for -// better code density. This difference affects various aspects of its -// instruction set as shown below: -// -// - SH-4 has 16 general-purpose registers (GPRs) instead of the most -// commmon 32 GPR configuration to save one bit to specify a register. -// -// - Binary instructions such as ADD normally take three register in -// RISC ISAs (e.g. x ← y ⊕ z where x, y and z are registers), but -// SH-4's instructions take only two registers. The result of an -// operation is written to one of the source registers (e.g. x ← x ⊕ y). -// -// - Usual RISC ISAs have "load high" and "load low" instructions to set -// an immediate to most significant and least significant bits in a -// register to construct a full 32-bit value in a register. This -// technique is hard to use in SH-4, as 16 bit instructions are too -// small to contain large immediates. On SH-4, large immediates are -// loaded from memory using `mov.l` PC-relative load instruction. -// -// - Many RISC ISAs are, despite their name, actually fairly complex. -// They tend to have hundreds if not thousands of different instructions. -// SH-4 doesn't really have that many instructions because its 16-bit -// machine code simply can't encode many different opcodes. As a -// result, the number of relocations the linker has to support is also -// small. -// -// Beside these, SH-4 has a delay branch slot just like contemporary MIPS -// and SPARC. That is, one instruction after a branch instruction will -// always be executed even if the branch is taken. Delay branch slot allows -// a pipelined CPU to start and finish executing an instruction after a -// branch regardless of the branch's condition, simplifying the processor's -// implementation. It's considered a bad premature optimization nowadays, -// though. Modern RISC processors don't have it. -// -// Here are notes about the SH-4 psABI: -// -// - If a source file is compiled with -fPIC, each function starts -// with a piece of code to store the address of .got to %r12. -// We can use the register in our PLT for position-independent output. -// -// - Even though it uses the RELA-type relocations, relocation addends -// are stored not to the r_addend field but to the relocated section -// contents for some reason. Therefore, it's effectively REL. -// -// - It looks like the ecosystem has bit-rotted. Some tests, especially -// one using C++ exceptions, don't pass even with GNU ld. -// -// - GCC/SH4 tends to write dynamically-relocated data into .text, so the -// output from the linker contains lots of text relocations. That's not -// a problem with embedded programming, I guess. - -#include "third_party/mold/elf/mold.h" - -namespace mold::elf { - -using E = SH4; - -// Even though SH-4 uses RELA-type relocations, addends are stored to -// relocated places for some reason. -template <> -i64 get_addend(u8 *loc, const ElfRel &rel) { - switch (rel.r_type) { - case R_SH_DIR32: - case R_SH_REL32: - case R_SH_TLS_GD_32: - case R_SH_TLS_LD_32: - case R_SH_TLS_LDO_32: - case R_SH_TLS_IE_32: - case R_SH_TLS_LE_32: - case R_SH_TLS_DTPMOD32: - case R_SH_TLS_DTPOFF32: - case R_SH_TLS_TPOFF32: - case R_SH_GOT32: - case R_SH_PLT32: - case R_SH_GOTOFF: - case R_SH_GOTPC: - case R_SH_GOTPLT32: - return *(ul32 *)loc; - default: - return 0; - } -} - -template <> -void write_plt_header(Context &ctx, u8 *buf) { - if (ctx.arg.pic) { - static const u8 insn[] = { - 0x02, 0xd2, // mov.l 1f, r2 - 0xcc, 0x32, // add r12, r2 - 0x22, 0x50, // mov.l @(8, r2), r0 - 0x21, 0x52, // mov.l @(4, r2), r2 - 0x2b, 0x40, // jmp @r0 - 0x00, 0xe0, // mov #0, r0 - 0, 0, 0, 0, // 1: .long GOTPLT - }; - - static_assert(sizeof(insn) == E::plt_hdr_size); - memcpy(buf, insn, sizeof(insn)); - *(ul32 *)(buf + 12) = ctx.gotplt->shdr.sh_addr - ctx.got->shdr.sh_addr; - } else { - static const u8 insn[] = { - 0x02, 0xd2, // mov.l 1f, r2 - 0x22, 0x50, // mov.l @(8, r2), r0 - 0x21, 0x52, // mov.l @(4, r2), r2 - 0x2b, 0x40, // jmp @r0 - 0x00, 0xe0, // mov #0, r0 - 0x09, 0x00, // nop - 0, 0, 0, 0, // 1: .long GOTPLT - }; - - static_assert(sizeof(insn) == E::plt_hdr_size); - memcpy(buf, insn, sizeof(insn)); - *(ul32 *)(buf + 12) = ctx.gotplt->shdr.sh_addr; - } -} - -template <> -void write_plt_entry(Context &ctx, u8 *buf, Symbol &sym) { - if (ctx.arg.pic) { - static const u8 insn[] = { - 0x01, 0xd0, // mov.l 1f, r0 - 0xce, 0x00, // mov.l @(r0, r12), r0 - 0x2b, 0x40, // jmp @r0 - 0x01, 0xd1, // mov.l 2f, r1 - 0, 0, 0, 0, // 1: .long GOTPLT_ENTRY - 0, 0, 0, 0, // 2: .long INDEX_IN_RELPLT - }; - - static_assert(sizeof(insn) == E::plt_size); - memcpy(buf, insn, sizeof(insn)); - *(ul32 *)(buf + 8) = sym.get_gotplt_addr(ctx) - ctx.got->shdr.sh_addr; - *(ul32 *)(buf + 12) = sym.get_plt_idx(ctx) * sizeof(ElfRel); - } else { - static const u8 insn[] = { - 0x01, 0xd0, // mov.l 1f, r0 - 0x02, 0x60, // mov.l @r0, r0 - 0x2b, 0x40, // jmp @r0 - 0x01, 0xd1, // mov.l 2f, r1 - 0, 0, 0, 0, // 1: .long GOTPLT_ENTRY - 0, 0, 0, 0, // 2: .long INDEX_IN_RELPLT - }; - - static_assert(sizeof(insn) == E::plt_size); - memcpy(buf, insn, sizeof(insn)); - *(ul32 *)(buf + 8) = sym.get_gotplt_addr(ctx); - *(ul32 *)(buf + 12) = sym.get_plt_idx(ctx) * sizeof(ElfRel); - } -} - -template <> -void write_pltgot_entry(Context &ctx, u8 *buf, Symbol &sym) { - if (ctx.arg.pic) { - static const u8 insn[] = { - 0x01, 0xd0, // mov.l 1f, r0 - 0xce, 0x00, // mov.l @(r0, r12), r0 - 0x2b, 0x40, // jmp @r0 - 0x09, 0x00, // nop - 0, 0, 0, 0, // 1: .long GOT_ENTRY - }; - - static_assert(sizeof(insn) == E::pltgot_size); - memcpy(buf, insn, sizeof(insn)); - *(ul32 *)(buf + 8) = sym.get_got_addr(ctx) - ctx.got->shdr.sh_addr; - } else { - static const u8 insn[] = { - 0x01, 0xd0, // mov.l 1f, r0 - 0x02, 0x60, // mov.l @r0, r0 - 0x2b, 0x40, // jmp @r0 - 0x09, 0x00, // nop - 0, 0, 0, 0, // 1: .long GOT_ENTRY - }; - - static_assert(sizeof(insn) == E::pltgot_size); - memcpy(buf, insn, sizeof(insn)); - *(ul32 *)(buf + 8) = sym.get_got_addr(ctx); - } -} - -template <> -void EhFrameSection::apply_reloc(Context &ctx, const ElfRel &rel, - u64 offset, u64 val) { - u8 *loc = ctx.buf + this->shdr.sh_offset + offset; - - switch (rel.r_type) { - case R_NONE: - break; - case R_SH_DIR32: - *(ul32 *)loc = val; - break; - case R_SH_REL32: - *(ul32 *)loc = val - this->shdr.sh_addr - offset; - break; - default: - Fatal(ctx) << "unsupported relocation in .eh_frame: " << rel; - } -} - -template <> -void InputSection::apply_reloc_alloc(Context &ctx, u8 *base) { - std::span> rels = get_rels(ctx); - - ElfRel *dynrel = nullptr; - if (ctx.reldyn) - dynrel = (ElfRel *)(ctx.buf + ctx.reldyn->shdr.sh_offset + - file.reldyn_offset + this->reldyn_offset); - - for (i64 i = 0; i < rels.size(); i++) { - const ElfRel &rel = rels[i]; - if (rel.r_type == R_NONE) - continue; - - Symbol &sym = *file.symbols[rel.r_sym]; - u8 *loc = base + rel.r_offset; - - u64 S = sym.get_addr(ctx); - u64 A = get_addend(loc, rel); - u64 P = get_addr() + rel.r_offset; - u64 G = sym.get_got_idx(ctx) * sizeof(Word); - u64 GOT = ctx.got->shdr.sh_addr; - - switch (rel.r_type) { - case R_SH_DIR32: - apply_dyn_absrel(ctx, sym, rel, loc, S, A, P, dynrel); - break; - case R_SH_REL32: - case R_SH_PLT32: - *(ul32 *)loc = S + A - P; - break; - case R_SH_GOT32: - *(ul32 *)loc = G; - break; - case R_SH_GOTPC: - *(ul32 *)loc = GOT + A - P; - break; - case R_SH_GOTOFF: - *(ul32 *)loc = S + A - GOT; - break; - case R_SH_TLS_GD_32: - *(ul32 *)loc = sym.get_tlsgd_addr(ctx) + A - GOT; - break; - case R_SH_TLS_LD_32: - *(ul32 *)loc = ctx.got->get_tlsld_addr(ctx) + A - GOT; - break; - case R_SH_TLS_LDO_32: - *(ul32 *)loc = S + A - ctx.dtp_addr; - break; - case R_SH_TLS_IE_32: - *(ul32 *)loc = sym.get_gottp_addr(ctx) + A - GOT; - break; - case R_SH_TLS_LE_32: - *(ul32 *)loc = S + A - ctx.tp_addr; - break; - default: - unreachable(); - } - } -} - -template <> -void InputSection::apply_reloc_nonalloc(Context &ctx, u8 *base) { - std::span> rels = get_rels(ctx); - - for (i64 i = 0; i < rels.size(); i++) { - const ElfRel &rel = rels[i]; - if (rel.r_type == R_NONE || record_undef_error(ctx, rel)) - continue; - - Symbol &sym = *file.symbols[rel.r_sym]; - u8 *loc = base + rel.r_offset; - - SectionFragment *frag; - i64 frag_addend; - std::tie(frag, frag_addend) = get_fragment(ctx, rel); - - u64 S = frag ? frag->get_addr(ctx) : sym.get_addr(ctx); - u64 A = frag ? frag_addend : get_addend(loc, rel); - - switch (rel.r_type) { - case R_SH_DIR32: - if (std::optional val = get_tombstone(sym, frag)) - *(ul32 *)loc = *val; - else - *(ul32 *)loc = S + A; - break; - default: - Fatal(ctx) << *this << ": invalid relocation for non-allocated sections: " - << rel; - } - } -} - -template <> -void InputSection::scan_relocations(Context &ctx) { - assert(shdr().sh_flags & SHF_ALLOC); - - this->reldyn_offset = file.num_dynrel * sizeof(ElfRel); - std::span> rels = get_rels(ctx); - - for (i64 i = 0; i < rels.size(); i++) { - const ElfRel &rel = rels[i]; - if (rel.r_type == R_NONE || record_undef_error(ctx, rel)) - continue; - - Symbol &sym = *file.symbols[rel.r_sym]; - - if (sym.is_ifunc()) - Error(ctx) << sym << ": GNU ifunc symbol is not supported on sh4"; - - switch (rel.r_type) { - case R_SH_DIR32: - scan_dyn_absrel(ctx, sym, rel); - break; - case R_SH_REL32: - scan_pcrel(ctx, sym, rel); - break; - case R_SH_GOT32: - sym.flags |= NEEDS_GOT; - break; - case R_SH_PLT32: - if (sym.is_imported) - sym.flags |= NEEDS_PLT; - break; - case R_SH_TLS_GD_32: - sym.flags |= NEEDS_TLSGD; - break; - case R_SH_TLS_LD_32: - ctx.needs_tlsld = true; - break; - case R_SH_TLS_IE_32: - sym.flags |= NEEDS_GOTTP; - break; - case R_SH_TLS_LE_32: - check_tlsle(ctx, sym, rel); - break; - case R_SH_GOTPC: - case R_SH_GOTOFF: - case R_SH_TLS_LDO_32: - break; - default: - Fatal(ctx) << *this << ": unknown relocation: " << rel; - } - } -} - -} // namespace mold::elf diff --git a/third_party/mold/elf/arch-sparc64.cc b/third_party/mold/elf/arch-sparc64.cc deleted file mode 100644 index 35ac760b481..00000000000 --- a/third_party/mold/elf/arch-sparc64.cc +++ /dev/null @@ -1,622 +0,0 @@ -// clang-format off -// SPARC is a RISC ISA developed by Sun Microsystems. -// -// The byte order of the processor is big-endian. Anything larger than a -// byte is stored in the "reverse" order compared to little-endian -// processors such as x86-64. -// -// All instructions are 4 bytes long and aligned to 4 bytes boundaries. -// -// A notable feature of SPARC is that, unlike other RISC ISAs, it doesn't -// need range extension thunks. It is because the SPARC's CALL instruction -// contains a whopping 30 bits immediate. The processor scales it by 4 to -// extend it to 32 bits (this is doable because all instructions are -// aligned to 4 bytes boundaries, so the least significant two bits are -// always zero). That means CALL's reach is PC ± 2 GiB, elinating the -// need of range extension thunks. It comes with the cost that the CALL -// instruction alone takes 1/4th of the instruction encoding space, -// though. -// -// SPARC has 32 general purpose registers. CALL instruction saves a return -// address to %o7, which is an alias for %r15. Thread pointer is stored to -// %g7 which is %r7. -// -// SPARC does not have PC-relative load/store instructions. To access data -// in the position-independent manner, we usually first set the address of -// .got to, for example, %l7, with the following piece of code -// -// sethi %hi(. - _GLOBAL_OFFSET_TABLE_), %l7 -// add %l7, %lo(. - _GLOBAL_OFFSET_TABLE_), %l7 -// call __sparc_get_pc_thunk.l7 -// nop -// -// where __sparc_get_pc_thunk.l7 is defined as -// -// retl -// add %o7, %l7, %l7 -// -// . SETHI and the following ADD materialize a 32 bits offset to .got. -// CALL instruction sets a return address to $o7, and the subsequent ADD -// adds it to the GOT offset to materialize the absolute address of .got. -// -// Note that we have a NOP after CALL and an ADD after RETL because of -// SPARC's delay branch slots. That is, the SPARC processor always -// executes one instruction after a branch even if the branch is taken. -// This may seem like an odd behavior, and indeed it is considered as such -// (that's a premature optimization for the early pipelined SPARC -// processors), but that's been a part of the ISA's spec so that's what it -// is. -// -// Note also that the .got address obtained this way is not shared between -// functions, so functions can use an arbitrary register to hold the .got -// address. That also means each function needs to execute the above piece -// of code to become position-independent. -// -// This scheme is very similar to i386. That may not be a coincidence -// because the i386 ELF psABI is created by Sun Microsystems too. -// -// https://github.com/rui314/psabi/blob/main/sparc.pdf - -#include "third_party/mold/elf/mold.h" - -namespace mold::elf { - -using E = SPARC64; - -// SPARC's PLT section is writable despite containing executable code. -// We don't need to write the PLT header entry because the dynamic loader -// will do that for us. -// -// We also don't need a .got.plt section to store the result of lazy PLT -// symbol resolution because the dynamic symbol resolver directly mutates -// instructions in PLT so that they jump to the right places next time. -// That's why each PLT entry contains lots of NOPs; they are a placeholder -// for the runtime to add more instructions. -// -// Self-modifying code is nowadays considered really bad from the security -// point of view, though. -template <> -void write_plt_header(Context &ctx, u8 *buf) { - memset(buf, 0, E::plt_hdr_size); -} - -template <> -void write_plt_entry(Context &ctx, u8 *buf, Symbol &sym) { - static ub32 insn[] = { - 0x0300'0000, // sethi (. - .PLT0), %g1 - 0x3068'0000, // ba,a %xcc, .PLT1 - 0x0100'0000, // nop - 0x0100'0000, // nop - 0x0100'0000, // nop - 0x0100'0000, // nop - 0x0100'0000, // nop - 0x0100'0000, // nop - }; - - u64 plt0 = ctx.plt->shdr.sh_addr; - u64 plt1 = ctx.plt->shdr.sh_addr + E::plt_size; - u64 entry = sym.get_plt_addr(ctx); - - memcpy(buf, insn, sizeof(insn)); - *(ub32 *)buf |= bits(entry - plt0, 21, 0); - *(ub32 *)(buf + 4) |= bits(plt1 - entry - 4, 20, 2); -} - -template <> -void write_pltgot_entry(Context &ctx, u8 *buf, Symbol &sym) { - static ub32 entry[] = { - 0x8a10'000f, // mov %o7, %g5 - 0x4000'0002, // call . + 8 - 0xc25b'e014, // ldx [ %o7 + 20 ], %g1 - 0xc25b'c001, // ldx [ %o7 + %g1 ], %g1 - 0x81c0'4000, // jmp %g1 - 0x9e10'0005, // mov %g5, %o7 - 0x0000'0000, // .quad $plt_entry - $got_entry - 0x0000'0000, - }; - - memcpy(buf, entry, sizeof(entry)); - *(ub64 *)(buf + 24) = sym.get_got_addr(ctx) - sym.get_plt_addr(ctx) - 4; -} - -template <> -void EhFrameSection::apply_reloc(Context &ctx, const ElfRel &rel, - u64 offset, u64 val) { - u8 *loc = ctx.buf + this->shdr.sh_offset + offset; - - switch (rel.r_type) { - case R_NONE: - break; - case R_SPARC_64: - case R_SPARC_UA64: - *(ub64 *)loc = val; - break; - case R_SPARC_DISP32: - *(ub32 *)loc = val - this->shdr.sh_addr - offset; - break; - default: - Fatal(ctx) << "unsupported relocation in .eh_frame: " << rel; - } -} - -template <> -void InputSection::apply_reloc_alloc(Context &ctx, u8 *base) { - std::span> rels = get_rels(ctx); - - ElfRel *dynrel = nullptr; - if (ctx.reldyn) - dynrel = (ElfRel *)(ctx.buf + ctx.reldyn->shdr.sh_offset + - file.reldyn_offset + this->reldyn_offset); - - for (i64 i = 0; i < rels.size(); i++) { - const ElfRel &rel = rels[i]; - if (rel.r_type == R_NONE) - continue; - - Symbol &sym = *file.symbols[rel.r_sym]; - u8 *loc = base + rel.r_offset; - - auto check = [&](i64 val, i64 lo, i64 hi) { - if (val < lo || hi <= val) - Error(ctx) << *this << ": relocation " << rel << " against " - << sym << " out of range: " << val << " is not in [" - << lo << ", " << hi << ")"; - }; - - u64 S = sym.get_addr(ctx); - u64 A = rel.r_addend; - u64 P = (get_addr() + rel.r_offset); - u64 G = (sym.get_got_idx(ctx) * sizeof(Word)); - u64 GOT = ctx.got->shdr.sh_addr; - - switch (rel.r_type) { - case R_SPARC_64: - apply_dyn_absrel(ctx, sym, rel, loc, S, A, P, dynrel); - break; - case R_SPARC_5: - check(S + A, 0, 1 << 5); - *(ub32 *)loc |= bits(S + A, 4, 0); - break; - case R_SPARC_6: - check(S + A, 0, 1 << 6); - *(ub32 *)loc |= bits(S + A, 5, 0); - break; - case R_SPARC_7: - check(S + A, 0, 1 << 7); - *(ub32 *)loc |= bits(S + A, 6, 0); - break; - case R_SPARC_8: - check(S + A, 0, 1 << 8); - *(u8 *)loc = S + A; - break; - case R_SPARC_10: - check(S + A, 0, 1 << 10); - *(ub32 *)loc |= bits(S + A, 9, 0); - break; - case R_SPARC_LO10: - case R_SPARC_LOPLT10: - *(ub32 *)loc |= bits(S + A, 9, 0); - break; - case R_SPARC_11: - check(S + A, 0, 1 << 11); - *(ub32 *)loc |= bits(S + A, 10, 0); - break; - case R_SPARC_13: - check(S + A, 0, 1 << 13); - *(ub32 *)loc |= bits(S + A, 12, 0); - break; - case R_SPARC_16: - case R_SPARC_UA16: - check(S + A, 0, 1 << 16); - *(ub16 *)loc = S + A; - break; - case R_SPARC_22: - check(S + A, 0, 1 << 22); - *(ub32 *)loc |= bits(S + A, 21, 0); - break; - case R_SPARC_32: - case R_SPARC_UA32: - case R_SPARC_PLT32: - check(S + A, 0, 1LL << 32); - *(ub32 *)loc = S + A; - break; - case R_SPARC_PLT64: - case R_SPARC_UA64: - case R_SPARC_REGISTER: - *(ub64 *)loc = S + A; - break; - case R_SPARC_DISP8: - check(S + A - P, -(1 << 7), 1 << 7); - *(u8 *)loc = S + A - P; - break; - case R_SPARC_DISP16: - check(S + A - P, -(1 << 15), 1 << 15); - *(ub16 *)loc = S + A - P; - break; - case R_SPARC_DISP32: - case R_SPARC_PCPLT32: - check(S + A - P, -(1LL << 31), 1LL << 31); - *(ub32 *)loc = S + A - P; - break; - case R_SPARC_DISP64: - *(ub64 *)loc = S + A - P; - break; - case R_SPARC_WDISP16: { - i64 val = S + A - P; - check(val, -(1 << 16), 1 << 16); - *(ub16 *)loc |= (bit(val, 16) << 21) | bits(val, 15, 2); - break; - } - case R_SPARC_WDISP19: - check(S + A - P, -(1 << 20), 1 << 20); - *(ub32 *)loc |= bits(S + A - P, 20, 2); - break; - case R_SPARC_WDISP22: - check(S + A - P, -(1 << 23), 1 << 23); - *(ub32 *)loc |= bits(S + A - P, 23, 2); - break; - case R_SPARC_WDISP30: - case R_SPARC_WPLT30: - check(S + A - P, -(1LL << 31), 1LL << 31); - *(ub32 *)loc |= bits(S + A - P, 31, 2); - break; - case R_SPARC_HI22: - case R_SPARC_HIPLT22: - case R_SPARC_LM22: - *(ub32 *)loc |= bits(S + A, 31, 10); - break; - case R_SPARC_GOT10: - *(ub32 *)loc |= bits(G, 9, 0); - break; - case R_SPARC_GOT13: - check(G, 0, 1 << 12); - *(ub32 *)loc |= bits(G, 12, 0); - break; - case R_SPARC_GOT22: - *(ub32 *)loc |= bits(G, 31, 10); - break; - case R_SPARC_GOTDATA_HIX22: { - i64 val = S + A - GOT; - *(ub32 *)loc |= bits(val < 0 ? ~val : val, 31, 10); - break; - } - case R_SPARC_GOTDATA_LOX10: { - i64 val = S + A - GOT; - *(ub32 *)loc |= bits(val, 9, 0) | (val < 0 ? 0b1'1100'0000'0000 : 0); - break; - } - case R_SPARC_GOTDATA_OP_HIX22: - // We always have to relax a GOT load to a load immediate if a - // symbol is local, because R_SPARC_GOTDATA_OP cannot represent - // an addend for a local symbol. - if (sym.is_imported || sym.is_ifunc()) { - *(ub32 *)loc |= bits(G, 31, 10); - } else if (sym.is_absolute()) { - i64 val = S + A; - *(ub32 *)loc |= bits(val < 0 ? ~val : val, 31, 10); - } else { - i64 val = S + A - GOT; - *(ub32 *)loc |= bits(val < 0 ? ~val : val, 31, 10); - } - break; - case R_SPARC_GOTDATA_OP_LOX10: { - if (sym.is_imported || sym.is_ifunc()) { - *(ub32 *)loc |= bits(G, 9, 0); - } else if (sym.is_absolute()) { - i64 val = S + A; - *(ub32 *)loc |= bits(val, 9, 0) | (val < 0 ? 0b1'1100'0000'0000 : 0); - } else { - i64 val = S + A - GOT; - *(ub32 *)loc |= bits(val, 9, 0) | (val < 0 ? 0b1'1100'0000'0000 : 0); - } - break; - } - case R_SPARC_GOTDATA_OP: - if (sym.is_imported || sym.is_ifunc()) - break; - - if (sym.is_absolute()) { - // ldx [ %g2 + %g1 ], %g1 → nop - *(ub32 *)loc = 0x0100'0000; - } else { - // ldx [ %g2 + %g1 ], %g1 → add %g2, %g1, %g1 - *(ub32 *)loc &= 0b00'11111'000000'11111'1'11111111'11111; - *(ub32 *)loc |= 0b10'00000'000000'00000'0'00000000'00000; - } - break; - case R_SPARC_PC10: - case R_SPARC_PCPLT10: - *(ub32 *)loc |= bits(S + A - P, 9, 0); - break; - case R_SPARC_PC22: - case R_SPARC_PCPLT22: - case R_SPARC_PC_LM22: - *(ub32 *)loc |= bits(S + A - P, 31, 10); - break; - case R_SPARC_OLO10: - *(ub32 *)loc |= bits(bits(S + A, 9, 0) + rel.r_type_data, 12, 0); - break; - case R_SPARC_HH22: - *(ub32 *)loc |= bits(S + A, 63, 42); - break; - case R_SPARC_HM10: - *(ub32 *)loc |= bits(S + A, 41, 32); - break; - case R_SPARC_PC_HH22: - *(ub32 *)loc |= bits(S + A - P, 63, 42); - break; - case R_SPARC_PC_HM10: - *(ub32 *)loc |= bits(S + A - P, 41, 32); - break; - case R_SPARC_HIX22: - *(ub32 *)loc |= bits(~(S + A), 31, 10); - break; - case R_SPARC_LOX10: - *(ub32 *)loc |= bits(S + A, 9, 0) | 0b1'1100'0000'0000; - break; - case R_SPARC_H44: - *(ub32 *)loc |= bits(S + A, 43, 22); - break; - case R_SPARC_M44: - *(ub32 *)loc |= bits(S + A, 21, 12); - break; - case R_SPARC_L44: - *(ub32 *)loc |= bits(S + A, 11, 0); - break; - case R_SPARC_TLS_GD_HI22: - *(ub32 *)loc |= bits(sym.get_tlsgd_addr(ctx) + A - GOT, 31, 10); - break; - case R_SPARC_TLS_GD_LO10: - *(ub32 *)loc |= bits(sym.get_tlsgd_addr(ctx) + A - GOT, 9, 0); - break; - case R_SPARC_TLS_GD_CALL: - case R_SPARC_TLS_LDM_CALL: { - u64 addr; - if (ctx.arg.is_static) - addr = ctx.extra.tls_get_addr_sec->shdr.sh_addr; - else - addr = ctx.extra.tls_get_addr_sym->get_addr(ctx); - - *(ub32 *)loc |= bits(addr + A - P, 31, 2); - break; - } - case R_SPARC_TLS_LDM_HI22: - *(ub32 *)loc |= bits(ctx.got->get_tlsld_addr(ctx) + A - GOT, 31, 10); - break; - case R_SPARC_TLS_LDM_LO10: - *(ub32 *)loc |= bits(ctx.got->get_tlsld_addr(ctx) + A - GOT, 9, 0); - break; - case R_SPARC_TLS_LDO_HIX22: - *(ub32 *)loc |= bits(S + A - ctx.dtp_addr, 31, 10); - break; - case R_SPARC_TLS_LDO_LOX10: - *(ub32 *)loc |= bits(S + A - ctx.dtp_addr, 9, 0); - break; - case R_SPARC_TLS_IE_HI22: - *(ub32 *)loc |= bits(sym.get_gottp_addr(ctx) + A - GOT, 31, 10); - break; - case R_SPARC_TLS_IE_LO10: - *(ub32 *)loc |= bits(sym.get_gottp_addr(ctx) + A - GOT, 9, 0); - break; - case R_SPARC_TLS_LE_HIX22: - *(ub32 *)loc |= bits(~(S + A - ctx.tp_addr), 31, 10); - break; - case R_SPARC_TLS_LE_LOX10: - *(ub32 *)loc |= bits(S + A - ctx.tp_addr, 9, 0) | 0b1'1100'0000'0000; - break; - case R_SPARC_SIZE32: - *(ub32 *)loc = sym.esym().st_size + A; - break; - case R_SPARC_TLS_GD_ADD: - case R_SPARC_TLS_LDM_ADD: - case R_SPARC_TLS_LDO_ADD: - case R_SPARC_TLS_IE_LD: - case R_SPARC_TLS_IE_LDX: - case R_SPARC_TLS_IE_ADD: - break; - default: - unreachable(); - } - } -} - -template <> -void InputSection::apply_reloc_nonalloc(Context &ctx, u8 *base) { - std::span> rels = get_rels(ctx); - - for (i64 i = 0; i < rels.size(); i++) { - const ElfRel &rel = rels[i]; - if (rel.r_type == R_NONE || record_undef_error(ctx, rel)) - continue; - - Symbol &sym = *file.symbols[rel.r_sym]; - u8 *loc = base + rel.r_offset; - - auto check = [&](i64 val, i64 lo, i64 hi) { - if (val < lo || hi <= val) - Error(ctx) << *this << ": relocation " << rel << " against " - << sym << " out of range: " << val << " is not in [" - << lo << ", " << hi << ")"; - }; - - SectionFragment *frag; - i64 frag_addend; - std::tie(frag, frag_addend) = get_fragment(ctx, rel); - - u64 S = frag ? frag->get_addr(ctx) : sym.get_addr(ctx); - u64 A = frag ? frag_addend : (i64)rel.r_addend; - - switch (rel.r_type) { - case R_SPARC_64: - case R_SPARC_UA64: - if (std::optional val = get_tombstone(sym, frag)) - *(ub64 *)loc = *val; - else - *(ub64 *)loc = S + A; - break; - case R_SPARC_32: - case R_SPARC_UA32: { - i64 val = S + A; - check(val, 0, 1LL << 32); - *(ub32 *)loc = val; - break; - } - case R_SPARC_TLS_DTPOFF32: - *(ub32 *)loc = S + A - ctx.dtp_addr; - break; - case R_SPARC_TLS_DTPOFF64: - *(ub64 *)loc = S + A - ctx.dtp_addr; - break; - default: - Fatal(ctx) << *this << ": apply_reloc_nonalloc: " << rel; - } - } -} - -template <> -void InputSection::scan_relocations(Context &ctx) { - assert(shdr().sh_flags & SHF_ALLOC); - - this->reldyn_offset = file.num_dynrel * sizeof(ElfRel); - std::span> rels = get_rels(ctx); - - // Scan relocations - for (i64 i = 0; i < rels.size(); i++) { - const ElfRel &rel = rels[i]; - if (rel.r_type == R_NONE || record_undef_error(ctx, rel)) - continue; - - Symbol &sym = *file.symbols[rel.r_sym]; - - if (sym.is_ifunc()) - sym.flags |= NEEDS_GOT | NEEDS_PLT; - - switch (rel.r_type) { - case R_SPARC_64: - scan_dyn_absrel(ctx, sym, rel); - break; - case R_SPARC_8: - case R_SPARC_5: - case R_SPARC_6: - case R_SPARC_7: - case R_SPARC_10: - case R_SPARC_11: - case R_SPARC_13: - case R_SPARC_16: - case R_SPARC_22: - case R_SPARC_32: - case R_SPARC_REGISTER: - case R_SPARC_UA16: - case R_SPARC_UA32: - case R_SPARC_UA64: - case R_SPARC_PC_HM10: - case R_SPARC_OLO10: - case R_SPARC_LOX10: - case R_SPARC_HM10: - case R_SPARC_M44: - case R_SPARC_HIX22: - case R_SPARC_LO10: - case R_SPARC_L44: - case R_SPARC_LM22: - case R_SPARC_HI22: - case R_SPARC_H44: - case R_SPARC_HH22: - scan_absrel(ctx, sym, rel); - break; - case R_SPARC_PLT32: - case R_SPARC_WPLT30: - case R_SPARC_WDISP30: - case R_SPARC_HIPLT22: - case R_SPARC_LOPLT10: - case R_SPARC_PCPLT32: - case R_SPARC_PCPLT22: - case R_SPARC_PCPLT10: - case R_SPARC_PLT64: - if (sym.is_imported) - sym.flags |= NEEDS_PLT; - break; - case R_SPARC_GOT13: - case R_SPARC_GOT10: - case R_SPARC_GOT22: - case R_SPARC_GOTDATA_HIX22: - sym.flags |= NEEDS_GOT; - break; - case R_SPARC_GOTDATA_OP_HIX22: - if (sym.is_imported) - sym.flags |= NEEDS_GOT; - break; - case R_SPARC_DISP16: - case R_SPARC_DISP32: - case R_SPARC_DISP64: - case R_SPARC_DISP8: - case R_SPARC_PC10: - case R_SPARC_PC22: - case R_SPARC_PC_LM22: - case R_SPARC_WDISP16: - case R_SPARC_WDISP19: - case R_SPARC_WDISP22: - case R_SPARC_PC_HH22: - scan_pcrel(ctx, sym, rel); - break; - case R_SPARC_TLS_GD_HI22: - sym.flags |= NEEDS_TLSGD; - break; - case R_SPARC_TLS_LDM_HI22: - ctx.needs_tlsld = true; - break; - case R_SPARC_TLS_IE_HI22: - sym.flags |= NEEDS_GOTTP; - break; - case R_SPARC_TLS_GD_CALL: - case R_SPARC_TLS_LDM_CALL: - if (!ctx.arg.is_static && ctx.extra.tls_get_addr_sym->is_imported) - ctx.extra.tls_get_addr_sym->flags |= NEEDS_PLT; - break; - case R_SPARC_TLS_LE_HIX22: - case R_SPARC_TLS_LE_LOX10: - check_tlsle(ctx, sym, rel); - break; - case R_SPARC_GOTDATA_OP_LOX10: - case R_SPARC_GOTDATA_OP: - case R_SPARC_GOTDATA_LOX10: - case R_SPARC_TLS_GD_LO10: - case R_SPARC_TLS_GD_ADD: - case R_SPARC_TLS_LDM_LO10: - case R_SPARC_TLS_LDM_ADD: - case R_SPARC_TLS_LDO_HIX22: - case R_SPARC_TLS_LDO_LOX10: - case R_SPARC_TLS_LDO_ADD: - case R_SPARC_TLS_IE_ADD: - case R_SPARC_TLS_IE_LD: - case R_SPARC_TLS_IE_LDX: - case R_SPARC_TLS_IE_LO10: - case R_SPARC_SIZE32: - break; - default: - Fatal(ctx) << *this << ": scan_relocations: " << rel; - } - } -} - -// __tls_get_addr is not defined by libc.a, so we can't use that function -// in statically-linked executables. This section provides a replacement. -void SparcTlsGetAddrSection::copy_buf(Context &ctx) { - ub32 *buf = (ub32 *)(ctx.buf + this->shdr.sh_offset); - - static const ub32 insn[] = { - 0x0300'0000, // sethi %hi(TP_SIZE), %g1 - 0x8210'6000, // or %g1, %lo(TP_SIZE), %g1 - 0x8221'c001, // sub %g7, %g1, %g1 - 0xd05a'2008, // ldx [ %o0 + 8 ], %o0 - 0x81c3'e008, // retl - 0x9000'4008, // add %g1, %o0, %o0 - }; - - assert(this->shdr.sh_size == sizeof(insn)); - memcpy(buf, insn, sizeof(insn)); - - buf[0] |= bits(ctx.tp_addr - ctx.tls_begin, 31, 10); - buf[1] |= bits(ctx.tp_addr - ctx.tls_begin, 9, 0); -} - -} // namespace mold::elf diff --git a/third_party/mold/elf/cmdline.cc b/third_party/mold/elf/cmdline.cc index af1bc12dcba..cd2faa96a9b 100644 --- a/third_party/mold/elf/cmdline.cc +++ b/third_party/mold/elf/cmdline.cc @@ -1,6 +1,6 @@ // clang-format off #include "third_party/mold/elf/mold.h" -// MISSING #include "../common/cmdline.h" +#include "third_party/mold/cmdline.h" #include "third_party/libcxx/regex" #include "third_party/libcxx/sstream" @@ -36,7 +36,6 @@ #include "libc/sysv/consts/o.h" #include "libc/sysv/consts/ok.h" #include "libc/time/time.h" -#include "third_party/getopt/getopt.internal.h" #include "third_party/musl/crypt.h" #include "third_party/musl/lockf.h" #endif diff --git a/third_party/mold/elf/elf.h b/third_party/mold/elf/elf.h index 0f0a2c72063..930b034a564 100644 --- a/third_party/mold/elf/elf.h +++ b/third_party/mold/elf/elf.h @@ -1,7 +1,7 @@ // clang-format off #pragma once -// MISSING #include "../common/integers.h" +#include "third_party/mold/integers.h" #include "third_party/libcxx/ostream" #include "third_party/libcxx/string" diff --git a/third_party/mold/elf/main.cc b/third_party/mold/elf/main.cc index 645be0d50b6..d83317a948b 100644 --- a/third_party/mold/elf/main.cc +++ b/third_party/mold/elf/main.cc @@ -1,9 +1,8 @@ // clang-format off #include "third_party/mold/elf/mold.h" -// MISSING #include "../common/archive-file.h" -// MISSING #include "../common/cmdline.h" -// MISSING #include "../common/output-file.h" - +#include "third_party/mold/archive-file.h" +#include "third_party/mold/cmdline.h" +#include "third_party/mold/output-file.h" #include "third_party/libcxx/cstring" #include "third_party/libcxx/functional" #include "third_party/libcxx/iomanip" diff --git a/third_party/mold/elf/mold.h b/third_party/mold/elf/mold.h index a67c239ac53..af72cadc60c 100644 --- a/third_party/mold/elf/mold.h +++ b/third_party/mold/elf/mold.h @@ -2,7 +2,7 @@ #pragma once #include "third_party/mold/elf/elf.h" -// MISSING #include "../common/common.h" +#include "third_party/mold/common.h" #include "third_party/libcxx/atomic" #include "third_party/libcxx/bitset" @@ -15,16 +15,19 @@ #include "third_party/libcxx/memory" #include "third_party/libcxx/mutex" #include "third_party/libcxx/optional" -// MISSING #include +#include "third_party/libcxx/span" #include "third_party/libcxx/sstream" #include "third_party/libcxx/string" #include "third_party/libcxx/string_view" + +#include "third_party/mold/fake_tbb.h" // MISSING #include // MISSING #include // MISSING #include // MISSING #include // MISSING #include // MISSING #include + #include "third_party/libcxx/type_traits" #include "third_party/libcxx/unordered_map" #include "third_party/libcxx/unordered_set" @@ -42,7 +45,6 @@ #include "libc/sysv/consts/o.h" #include "libc/sysv/consts/ok.h" #include "libc/time/time.h" -#include "third_party/getopt/getopt.internal.h" #include "third_party/musl/crypt.h" #include "third_party/musl/lockf.h" #endif diff --git a/third_party/mold/fake_tbb.h b/third_party/mold/fake_tbb.h index 072fa42e5bf..b0453d89383 100644 --- a/third_party/mold/fake_tbb.h +++ b/third_party/mold/fake_tbb.h @@ -6,6 +6,16 @@ namespace tbb { template using concurrent_vector = std::vector; +template < + class Key, + class T, + class Hash = std::hash, + class KeyEqual = std::equal_to, + class Allocator = std::allocator< std::pair > > + using concurrent_hash_map = std::unordered_map; + + using spin_mutex = std::mutex; + template void parallel_for_each(InputIterator first, InputIterator last, const Function& f) { } @@ -22,5 +32,35 @@ namespace tbb { void parallel_for(Index first, Index last, const Function& f) { } + enum task_group_status { + not_complete, + complete, + canceled + }; + + class task_group { + public: + task_group() {}; + ~task_group() {}; + + template + void run( Func&& f ) { + + }; + + template + task_group_status run_and_wait( const Func& f ) { + return task_group_status::complete; + }; + + task_group_status wait() { + return task_group_status::complete; + }; + + void cancel() { + + }; + }; + } #endif diff --git a/third_party/mold/git-hash.cc b/third_party/mold/git-hash.cc new file mode 100644 index 00000000000..2d520bd8760 --- /dev/null +++ b/third_party/mold/git-hash.cc @@ -0,0 +1,5 @@ +#include "third_party/libcxx/string" + +namespace mold { +std::string mold_git_hash = "d4d93d7fb72dd19c44aafa4dd5397e35787d33ad"; +} diff --git a/third_party/mold/hyperloglog.cc b/third_party/mold/hyperloglog.cc index 7c10616a4e4..f7974eef6a4 100644 --- a/third_party/mold/hyperloglog.cc +++ b/third_party/mold/hyperloglog.cc @@ -5,9 +5,10 @@ // For more info, read // https://engineering.fb.com/2018/12/13/data-infrastructure/hyperloglog +// TODO(fzakaria): changed from libcxx because pow symbol wasn't present. #include "third_party/mold/common.h" -#include "third_party/libcxx/cmath" +#include "libc/math.h" namespace mold { diff --git a/third_party/mold/mold.mk b/third_party/mold/mold.mk index f0e76c19461..128061376b6 100644 --- a/third_party/mold/mold.mk +++ b/third_party/mold/mold.mk @@ -6,7 +6,7 @@ PKGS += THIRD_PARTY_MOLD THIRD_PARTY_MOLD_ARTIFACTS += THIRD_PARTY_MOLD_A THIRD_PARTY_MOLD = $(THIRD_PARTY_MOLD_A_DEPS) $(THIRD_PARTY_MOLD_A) THIRD_PARTY_MOLD_A = o/$(MODE)/third_party/mold/mold.a -THIRD_PARTY_MOLD_FILES := $(wildcard third_party/mold/*) +THIRD_PARTY_MOLD_FILES := $(wildcard third_party/mold/*) $(wildcard third_party/mold/elf/*) THIRD_PARTY_MOLD_HDRS = $(filter %.h,$(THIRD_PARTY_MOLD_FILES)) THIRD_PARTY_MOLD_SRCS = $(filter %.cc,$(THIRD_PARTY_MOLD_FILES)) THIRD_PARTY_MOLD_OBJS = $(THIRD_PARTY_MOLD_SRCS:%.cc=o/$(MODE)/%.o) @@ -16,6 +16,9 @@ THIRD_PARTY_MOLD_A_DIRECTDEPS = \ LIBC_STR \ LIBC_INTRIN \ LIBC_STDIO \ + LIBC_CALLS \ + LIBC_TINYMATH \ + LIBC_SYSV \ LIBC_RUNTIME \ THIRD_PARTY_ZSTD \ THIRD_PARTY_XXHASH \ @@ -35,6 +38,8 @@ $(THIRD_PARTY_MOLD_OBJS): private \ -fno-asynchronous-unwind-tables \ -Wno-sign-compare \ -Wno-unused-function \ + -DMOLD_X86_64=1 \ + -DMOLD_TARGET=X86_64 THIRD_PARTY_MOLD_CHECKS = \ $(THIRD_PARTY_MOLD_A).pkg \