From 876ffb66e075a428a1a0da9819f8fdeac3105f5c Mon Sep 17 00:00:00 2001 From: Pavel Kopyl Date: Wed, 25 Sep 2024 16:19:56 +0200 Subject: [PATCH] [EVM] Add LLVMLinkEVM C-API Please, note this is a temporary patch. It adds initial support of dependencies, but it doesn't work in a general case. A full solution will be more sophisticated and will likely be implemented on the FE driver side without a need of the LLD usage. --- lld/include/lld-c/LLDAsLibraryC.h | 26 +++ lld/lld-c/LLDAsLibraryC.cpp | 196 ++++++++++++++++++ llvm/include/llvm/BinaryFormat/ELF.h | 2 +- llvm/lib/Object/ELF.cpp | 6 +- .../EVM/MCTargetDesc/EVMTargetStreamer.cpp | 10 +- .../EVM/MCTargetDesc/EVMTargetStreamer.h | 3 +- 6 files changed, 235 insertions(+), 8 deletions(-) diff --git a/lld/include/lld-c/LLDAsLibraryC.h b/lld/include/lld-c/LLDAsLibraryC.h index 5c1567983b0a..f38b8565e0b9 100644 --- a/lld/include/lld-c/LLDAsLibraryC.h +++ b/lld/include/lld-c/LLDAsLibraryC.h @@ -123,6 +123,32 @@ char **LLVMGetUndefinedLinkerSymbolsEraVM(LLVMMemoryBufferRef inBuffer, * LLVMGetUndefinedSymbolsEraVM(). */ void LLVMDisposeUndefinedLinkerSymbolsEraVM(char *linkerSymbolNames[], uint64_t numLinkerSymbols); + +/** Links the deploy and runtime ELF object files using the information about + * dependencies. + * \p inBuffers - array of input memory buffers with following structure: + * + * inBuffers[0] - deploy ELF object code + * inBuffers[1] - deployed (runtime) ELF object code + * -------------------------- + * inBuffers[2] - 1-st sub-contract (final EVM bytecode) + * ... + * inBuffers[N] - N-st sub-contract (final EVM bytecode) + * + * Sub-contracts are optional. They should have the same ordering as in + * the YUL layout. + * + * \p inBuffersIDs - array of string identifiers of the buffers. IDs correspond + * to the object names in the YUL layout. + * On success, outBuffers[0] will contain the deploy bytecode and outBuffers[1] + * the runtime bytecode. + * In case of an error the function returns 'true' and the error message is + * passes in \p errorMessage. The message should be disposed by + * 'LLVMDisposeMessage'. */ +LLVMBool LLVMLinkEVM(LLVMMemoryBufferRef *inBuffers, const char *inBuffersIDs[], + uint64_t numInBuffers, LLVMMemoryBufferRef outBuffers[2], + char **errorMessage); + LLVM_C_EXTERN_C_END #endif // LLD_C_LLDASLIBRARYC_H diff --git a/lld/lld-c/LLDAsLibraryC.cpp b/lld/lld-c/LLDAsLibraryC.cpp index 4de4aed67e91..2ecf8332e0cc 100644 --- a/lld/lld-c/LLDAsLibraryC.cpp +++ b/lld/lld-c/LLDAsLibraryC.cpp @@ -443,3 +443,199 @@ void LLVMDisposeUndefinedLinkerSymbolsEraVM(char *linkerSymbolNames[], std::free(linkerSymbolNames[idx]); std::free(linkerSymbolNames); } + +//----------------------------------------------------------------------------// + +/// This function generates a linker script for EVM architecture. +/// \p memBufs - array of input memory buffers with following structure: +/// +/// memBufs[0] - deploy object code +/// memBufs[1] - deployed object code +/// -------------------------- +/// memBufs[2] - 1-st sub-contract (final EVM bytecode) +/// ... +/// memBufs[N] - N-st sub-contract (final EVM bytecode) +/// +/// Sub-contracts are optional. They should have the same ordering as in +/// the YUL layout. +/// +/// \p bufIDs - array of string identifiers of the buffers. IDs correspond +/// to the object names in the YUL layout. +/// +/// For example, the YUL object: +/// +/// |--D_105_deploy --||--D_105_deployed --||-- B_40 --| +/// +/// __datasize_B_40 = 1384; +/// SECTIONS { +/// . = 0; +/// .text : SUBALIGN(1) { +/// D_105(.text); +/// __dataoffset_D_105_deployed = .; +/// D_105_deployed(.text); +/// __datasize_D_105_deployed = . - __dataoffset_D_105_deployed; +/// __dataoffset_B_40 = .; +/// __datasize_D_105 = __dataoffset_B_40 + __datasize_B_40; +/// LONG(__dataoffset_D_105_deployed); +/// } +/// +/// The dot '.' denotes current location in the resulting file. +/// The purpose of the script is to define datasize/dataoffset absolute symbols +/// that reflect the YUL layout. +static std::string creteEVMLinkerScript(ArrayRef memBufs, + ArrayRef bufIDs) { + assert(memBufs.size() == bufIDs.size()); + size_t numObjectsToLink = memBufs.size(); + StringRef dataSizePrefix("__datasize_"); + StringRef dataOffsetPrefix("__dataoffset_"); + + // Define the script part related to the top-level contract. + StringRef topName(bufIDs[0]); + StringRef deployed(bufIDs[1]); + + // Contains the linker script part corresponding to the top-level contract. + // For the example above, this contains: + // D_105(.text); + // __dataoffset_D_105_deployed = .; + // D_105_deployed(.text); + // __datasize_D_105_deployed = . - __dataoffset_D_105_deployed; + Twine topLevel = topName + "(.text);\n" + dataOffsetPrefix + deployed + + " = .;\n" + deployed + "(.text);\n" + dataSizePrefix + + deployed + " = . - " + dataOffsetPrefix + deployed + ";\n"; + + // Contains symbols whose values are the sizes of the dependent contracts. + // For the example above, this contains: + // __datasize_B_40 = 1384; + std::string symDatasizeDeps; + + // Contains symbols whose values are the offsets of the dependent contracts. + // For the example above, this contains: + // __dataoffset_B_40 = .; + std::string symDataOffsetDeps; + if (numObjectsToLink > 2) { + // Define datasize symbols for the dependent contracts. They start after + // {deploy, deployed} pair of the top-level contract, i.e. at index 2. + for (unsigned idx = 2; idx < numObjectsToLink; ++idx) + symDatasizeDeps += (dataSizePrefix + bufIDs[idx] + " = " + + Twine(LLVMGetBufferSize(memBufs[idx])) + ";\n") + .str(); + + symDataOffsetDeps = (dataOffsetPrefix + bufIDs[2] + " = .;\n").str(); + for (unsigned idx = 3; idx < numObjectsToLink; ++idx) + symDataOffsetDeps += + (dataOffsetPrefix + bufIDs[idx] + " = " + dataOffsetPrefix + + bufIDs[idx - 1] + " + " + dataSizePrefix + bufIDs[idx - 1] + ";\n") + .str(); + } + + // Contains a symbol whose value is the total size of the top-level contract + // with all the dependencies. + std::string symDatasizeTop = (dataSizePrefix + topName + " = ").str(); + if (numObjectsToLink > 2) + symDatasizeTop += (dataOffsetPrefix + bufIDs.back() + " + " + + dataSizePrefix + bufIDs.back() + ";\n") + .str(); + else + symDatasizeTop += ".;\n"; + + // Emit size of the deploy code offset as the 4-byte unsigned integer. + // This is needed to determine which offset the deployed code starts at + // in the linked binary. + Twine deploySize = "LONG(" + dataOffsetPrefix + deployed + ");\n"; + + Twine script = formatv("{0}\n\ +ENTRY(0);\n\ +SECTIONS {\n\ + . = 0;\n\ + .code : SUBALIGN(1) {\n\ +{1}\ +{2}\ +{3}\ +{4}\ + }\n\ +}\n\ +", + symDatasizeDeps, topLevel, symDataOffsetDeps, + symDatasizeTop, deploySize); + + return script.str(); +} + +LLVMBool LLVMLinkEVM(LLVMMemoryBufferRef inBuffers[], + const char *inBuffersIDs[], uint64_t numInBuffers, + LLVMMemoryBufferRef outBuffers[2], char **errorMessage) { + assert(numInBuffers > 1); + SmallVector localInMemBufRefs(3); + SmallVector> localInMemBufs(3); + for (unsigned idx = 0; idx < 2; ++idx) { + MemoryBufferRef ref = *unwrap(inBuffers[idx]); + localInMemBufs[idx] = + MemoryBuffer::getMemBuffer(ref.getBuffer(), inBuffersIDs[idx], + /*RequiresNullTerminator*/ false); + localInMemBufRefs[idx] = localInMemBufs[idx]->getMemBufferRef(); + } + + std::string linkerScript = creteEVMLinkerScript( + ArrayRef(inBuffers, numInBuffers), ArrayRef(inBuffersIDs, numInBuffers)); + std::unique_ptr scriptBuf = + MemoryBuffer::getMemBuffer(linkerScript, "script.x"); + localInMemBufRefs[2] = scriptBuf->getMemBufferRef(); + + SmallVector lldArgs; + lldArgs.push_back("ld.lld"); + lldArgs.push_back("-T"); + lldArgs.push_back("script.x"); + + // Use remapping of file names (a linker feature) to replace file names with + // indexes in the array of memory buffers. + Twine remapStr("--remap-inputs="); + std::string remapDeployStr = (remapStr + inBuffersIDs[0] + "=0").str(); + lldArgs.push_back(remapDeployStr.c_str()); + + std::string remapDeployedStr = (remapStr + inBuffersIDs[1] + "=1").str(); + lldArgs.push_back(remapDeployedStr.c_str()); + + lldArgs.push_back("--remap-inputs=script.x=2"); + + // Deploy code + lldArgs.push_back(inBuffersIDs[0]); + // Deployed code + lldArgs.push_back(inBuffersIDs[1]); + + lldArgs.push_back("--oformat=binary"); + + SmallString<0> codeString; + raw_svector_ostream ostream(codeString); + SmallString<0> errorString; + raw_svector_ostream errorOstream(errorString); + + // Lld-as-a-library is not thread safe, as it has a global state, + // so we need to protect lld from simultaneous access from different threads. + std::unique_lock lock(lldMutex); + const lld::Result s = + lld::lldMainMemBuf(localInMemBufRefs, &ostream, lldArgs, outs(), + errorOstream, {{lld::Gnu, &lld::elf::linkMemBuf}}); + lock.unlock(); + + bool ret = !s.retCode && s.canRunAgain; + if (!ret) { + *errorMessage = strdup(errorString.c_str()); + return true; + } + + StringRef data = ostream.str(); + // Linker script adds size of the deploy code as a 8-byte BE unsigned to the + // end of .text section. Knowing this, we can extract final deploy and + // deployed codes. + assert(data.size() > 4); + size_t deploySize = support::endian::read32be(data.data() + data.size() - 4); + assert(deploySize < data.size()); + size_t deployedSize = data.size() - deploySize - 4; + + outBuffers[0] = LLVMCreateMemoryBufferWithMemoryRangeCopy( + data.data(), deploySize, "deploy"); + outBuffers[1] = LLVMCreateMemoryBufferWithMemoryRangeCopy( + data.data() + deploySize, deployedSize, "deployed"); + + return false; +} diff --git a/llvm/include/llvm/BinaryFormat/ELF.h b/llvm/include/llvm/BinaryFormat/ELF.h index 69190fa96028..9159afcda78d 100644 --- a/llvm/include/llvm/BinaryFormat/ELF.h +++ b/llvm/include/llvm/BinaryFormat/ELF.h @@ -431,7 +431,7 @@ enum { // EVM local begin // ELF Relocation types for EVM -enum { +enum : uint8_t { #include "ELFRelocs/EVM.def" }; // EVM local end diff --git a/llvm/lib/Object/ELF.cpp b/llvm/lib/Object/ELF.cpp index 2f6fa9d475c4..d878bf84cd22 100644 --- a/llvm/lib/Object/ELF.cpp +++ b/llvm/lib/Object/ELF.cpp @@ -185,7 +185,7 @@ StringRef llvm::object::getELFRelocationTypeName(uint32_t Machine, case ELF::EM_ERAVM: switch (Type) { #include "llvm/BinaryFormat/ELFRelocs/EraVM.def" - default: + default: break; } break; @@ -251,10 +251,6 @@ uint32_t llvm::object::getELFRelativeRelocationType(uint32_t Machine) { break; case ELF::EM_LOONGARCH: return ELF::R_LARCH_RELATIVE; - // EVM local begin - case ELF::EM_EVM: - break; - // EVM local end default: break; } diff --git a/llvm/lib/Target/EVM/MCTargetDesc/EVMTargetStreamer.cpp b/llvm/lib/Target/EVM/MCTargetDesc/EVMTargetStreamer.cpp index c505d06c95bd..b601421607e9 100644 --- a/llvm/lib/Target/EVM/MCTargetDesc/EVMTargetStreamer.cpp +++ b/llvm/lib/Target/EVM/MCTargetDesc/EVMTargetStreamer.cpp @@ -11,6 +11,9 @@ //===----------------------------------------------------------------------===// #include "MCTargetDesc/EVMTargetStreamer.h" +#include "llvm/BinaryFormat/ELF.h" +#include "llvm/MC/MCSymbolELF.h" +#include "llvm/Support/Casting.h" using namespace llvm; @@ -18,7 +21,12 @@ using namespace llvm; EVMTargetStreamer::EVMTargetStreamer(MCStreamer &S) : MCTargetStreamer(S) {} -EVMTargetStreamer::~EVMTargetStreamer() = default; +void EVMTargetStreamer::emitLabel(MCSymbol *Symbol) { + // This is mostly a workaround for the current linking scheme. + // Mark all the symbols as local to their translation units. + auto *ELFSymbol = cast(Symbol); + ELFSymbol->setBinding(ELF::STB_LOCAL); +} EVMTargetObjStreamer::EVMTargetObjStreamer(MCStreamer &S) : EVMTargetStreamer(S) {} diff --git a/llvm/lib/Target/EVM/MCTargetDesc/EVMTargetStreamer.h b/llvm/lib/Target/EVM/MCTargetDesc/EVMTargetStreamer.h index 06afc76d8fc2..a09b7b8c86c8 100644 --- a/llvm/lib/Target/EVM/MCTargetDesc/EVMTargetStreamer.h +++ b/llvm/lib/Target/EVM/MCTargetDesc/EVMTargetStreamer.h @@ -24,7 +24,8 @@ class EVMTargetStreamer : public MCTargetStreamer { EVMTargetStreamer(EVMTargetStreamer &&) = delete; EVMTargetStreamer &operator=(const EVMTargetStreamer &) = delete; EVMTargetStreamer &operator=(EVMTargetStreamer &&) = delete; - ~EVMTargetStreamer() override; + + void emitLabel(MCSymbol *Symbol) override; }; /// This part is for ASCII assembly output