From 4cf8ae3d96d307368f27a4b0a1e84184dd2601e2 Mon Sep 17 00:00:00 2001 From: Pavel Kopyl Date: Fri, 16 Aug 2024 19:38:59 +0200 Subject: [PATCH] [EraVM] Adding support of iterative libraries linkage. This also includes the C-API for requiting the following info: - If the given memory buffer contains an ELF file - List of undefined linker symbols of the given ELF file --- lld/ELF/Arch/EraVM.cpp | 3 + lld/include/lld-c/LLDAsLibraryC.h | 114 +++- lld/lld-c/LLDAsLibraryC.cpp | 354 +++++++++++-- lld/test/ELF/Inputs/eravm.lds | 6 + lld/test/ELF/eravm-data-reloc.s | 29 +- lld/unittests/EraVM/LLDTest.cpp | 492 +++++++++++++++++- llvm/include/llvm/BinaryFormat/ELF.h | 6 + .../llvm/BinaryFormat/ELFRelocs/EraVM.def | 1 + llvm/include/llvm/IR/IntrinsicsEraVM.td | 9 + .../Target/EraVM/AsmParser/EraVMAsmParser.cpp | 23 + llvm/lib/Target/EraVM/EraVMAsmPrinter.cpp | 35 ++ llvm/lib/Target/EraVM/EraVMISD.def | 3 +- llvm/lib/Target/EraVM/EraVMISelLowering.cpp | 10 + llvm/lib/Target/EraVM/EraVMInstrInfo.td | 5 + .../MCTargetDesc/EraVMELFObjectWriter.cpp | 2 + .../EraVM/MCTargetDesc/EraVMELFStreamer.cpp | 44 ++ .../EraVM/MCTargetDesc/EraVMMCTargetDesc.cpp | 15 + .../EraVM/MCTargetDesc/EraVMMCTargetDesc.h | 4 +- .../EraVM/MCTargetDesc/EraVMTargetStreamer.h | 2 + llvm/test/CodeGen/EraVM/intrinsic.ll | 11 + llvm/test/MC/EraVM/asm-parser/data-errors.s | 8 + llvm/test/MC/EraVM/asm-parser/data.s | 6 + llvm/test/MC/EraVM/encoding/data.s | 24 +- llvm/tools/llvm-readobj/ELFDumper.cpp | 16 + 24 files changed, 1167 insertions(+), 55 deletions(-) diff --git a/lld/ELF/Arch/EraVM.cpp b/lld/ELF/Arch/EraVM.cpp index 60cb3c6c0de5..c2ce70291acb 100644 --- a/lld/ELF/Arch/EraVM.cpp +++ b/lld/ELF/Arch/EraVM.cpp @@ -59,6 +59,9 @@ void EraVM::relocate(uint8_t *loc, const Relocation &rel, uint64_t val) const { case R_ERAVM_16_SCALE_8: add16scaled(val, /*scale=*/8); break; + case R_ERAVM_32: + write32be(loc, static_cast(val)); + break; default: error(getErrorLocation(loc) + "unrecognized relocation " + toString(rel.type)); diff --git a/lld/include/lld-c/LLDAsLibraryC.h b/lld/include/lld-c/LLDAsLibraryC.h index 3548a89b077b..a8c90a150d85 100644 --- a/lld/include/lld-c/LLDAsLibraryC.h +++ b/lld/include/lld-c/LLDAsLibraryC.h @@ -6,8 +6,58 @@ // //===----------------------------------------------------------------------===// // -// This header declares the C interface to the lld-as-a-library, which can be -// used to invoke LLD functionality. +// This header declares the following EraVM C interfaces: +// - 'LLVMLinkEraVM' +// The interface to the LLD linker functionality (via lld-as-a-library) +// +// - 'LLVMIsELF' +// Checks if the given memory buffer contains an ELF EraVM object file +// +// - 'LLVMGetUndefinedSymbolsEraVM' +// Returns an array of undefined linker symbols +// +// - 'LLVMDisposeUndefinedSymbolsEraVM' +// Disposes an array returned by the 'LLVMGetUndefinedSymbolsEraVM' +// +// These functions use a notion of the 'Linker Symbol' which is generalization +// of a usual ELF global symbol. The main difference is that 'Linker Symbol' +// has a 20-byte value, whereas the maximum value width of a usual ELF symbol +// is just 8 bytes. In order to represent a 20-byte symbol value with its +// relocation, initial 'Linker Symbol' is split into five sub-symbols +// which are usual 32-bit ELF symbols. This split is performed by the LLVM MC +// layer. For example, if the codegen needs to represent a 20-byte relocatable +// value associated with the symbol 'symbol_id', the MC layer sequentially +// (in the binary layout) emits the following undefined symbols: +// +// '__linker_symbol_id_0' +// '__linker_symbol_id_1' +// '__linker_symbol_id_2' +// '__linker_symbol_id_3' +// '__linker_symbol_id_4' +// +// with associated 32-bit relocations. Each sub-symbol name is formed by +// prepending '__linker' and appending '_[0-4]'. MC layer also sets the +// ELF::STO_ERAVM_LINKER_SYMBOL flag in the 'st_other' field in the symbol +// table entry to distinguish such symbols from all others. +// In EraVM, only these symbols are allowed to be undefined in an object +// code. All other cases must be treated as unreachable and denote a bug +// in the the FE/LLVM codegen/Linker implementation. +// 'Linker Symbols' are resolved, i.e they receive their final 20-byte +// values, at the linkage stage when calling LLVMLinkEraVM. +// For this, the 'LLVMLinkEraVM' has parameters: +// - \p linkerSymbolNames, array of null-terminated linker symbol names +// - \p linkerSymbolValues, array of symbol values +// +// For example, if linkerSymbolNames[0] points to a string 'symbol_id', +// it takes the linkerSymbolValues[0] value which is 20-byte array +// 0xAAAAAAAABB.....EEEEEEEE) and creates five symbol definitions in +// a linker script: +// +// "__linker_symbol_id_0" = 0xAAAAAAAA +// "__linker_symbol_id_1" = 0xBBBBBBBB +// "__linker_symbol_id_2" = 0xCCCCCCCC +// "__linker_symbol_id_3" = 0xDDDDDDDD +// "__linker_symbol_id_4" = 0xEEEEEEEE // //===----------------------------------------------------------------------===// @@ -19,10 +69,62 @@ LLVM_C_EXTERN_C_BEGIN -/** Performs linkage operation of an object code via lld-as-a-library. - Input/output files are transferred via memory buffers. */ -LLVMBool LLVMLinkEraVM(LLVMMemoryBufferRef inBuf, LLVMMemoryBufferRef *outBuf, - char **ErrorMessage); +// Currently, the size of a linker symbol is limited to 20 bytes, as its the +// only usage is to represent Ethereum addresses which are of 160 bit width. +#define LINKER_SYMBOL_SIZE 20 + +/** Performs linkage of the ELF object code passed in \p inBuffer. The result + * is returned in \p outBuffer. + * EraVM platform hasn't a notion of separated compilation units, so the + * whole program is represented by the only ELF object file. In this case, + * the linker has two tasks. First, to emit definitions, passed in + * \p linkerSymbolValues, of linker symbols, passed in \p linkerSymbolNames, + * as shown above. The second one, is to perform symbol relocations. + * This function support an iterative linkage, i.e it will return relocatable + * ELF object files until all library symbols are defined. Once all of them + * are defined, it will return a final byte code with stripped ELF format. + * For example, if the initial input object file has two undefined linker + * symbols, 'symbol_id', 'symbol_id2' and at the first call only the + * 'symbol_id' definition was provided, the function will return an ELF + * object file where the symbol 'symbol_id' is defined, whereas the + * 'symbol_id2' is not. If the definition of 'symbol_id2' was provided + * at the second call, then the function returns the final bytecode. + * In case of an error the function returns 'true' and the error message + * is passes in \p errorMessage. The message should be disposed by + * 'LLVMDisposeMessage'. */ +LLVMBool LLVMLinkEraVM(LLVMMemoryBufferRef inBuffer, + LLVMMemoryBufferRef *outBuffer, + const char *const *linkerSymbolNames, + const char linkerSymbolValues[][LINKER_SYMBOL_SIZE], + uint64_t numLinkerSymbols, char **errorMessage); + +/** Returns true if the \p inBuffer contains an ELF object file. */ +LLVMBool LLVMIsELF(LLVMMemoryBufferRef inBuffer); + +/** Returns an array of undefined linker symbols name of the ELF object file + * passed in \p inBuffer. The names are returned in \p linkerSymbolNames as the + * array of null-terminated strings. The \p numLinkerSymbols is passed the + * number of names. + * For example, if an ELF file has an undefined symbol which is represented + * via five sub-symbols: + * '__linker_symbol_id_0' + * '__linker_symbol_id_1' + * '__linker_symbol_id_2' + * '__linker_symbol_id_3' + * '__linker_symbol_id_4' + * + * the 'symbol_id' will be returned (stripping prefix and suffix) as the + * result. + * Caller should dispose the memory allocated for the \p linkerSymbolNames + * using 'LLVMDisposeUndefinedSymbolsEraVM' */ +void LLVMGetUndefinedSymbolsEraVM(LLVMMemoryBufferRef inBuffer, + char **linkerSymbolNames[], + uint64_t *numLinkerSymbols); + +/** Disposes an array with linker symbols returned by the + * LLVMGetUndefinedSymbolsEraVM(). */ +void LLVMDisposeUndefinedSymbolsEraVM(char *linkerSymbolNames[], + uint64_t numLinkerSymbols); LLVM_C_EXTERN_C_END #endif // LLD_C_LLDASLIBRARYC_H diff --git a/lld/lld-c/LLDAsLibraryC.cpp b/lld/lld-c/LLDAsLibraryC.cpp index 40a6f7e2b8e5..87b5fb2d6ee8 100644 --- a/lld/lld-c/LLDAsLibraryC.cpp +++ b/lld/lld-c/LLDAsLibraryC.cpp @@ -4,7 +4,10 @@ #include "llvm-c/Core.h" #include "llvm/ADT/SmallString.h" #include "llvm/ADT/StringExtras.h" +#include "llvm/ADT/StringSet.h" +#include "llvm/BinaryFormat/ELF.h" #include "llvm/Object/Binary.h" +#include "llvm/Object/ELFObjectFile.h" #include "llvm/Object/ObjectFile.h" #include "llvm/Support/Alignment.h" #include "llvm/Support/Error.h" @@ -14,6 +17,7 @@ #include #include #include +#include #include #include #include @@ -23,37 +27,188 @@ using namespace object; LLD_HAS_DRIVER_MEM_BUF(elf) +namespace llvm { +namespace EraVM { +// The following two functions are defined in EraVMMCTargetDesc.cpp. +std::string getMangledLinkerSymbol(StringRef Name, unsigned SubId); +std::string getDemangledLinkerSymbol(StringRef Name); +} // namespace EraVM +} // namespace llvm + +constexpr static unsigned linkerSubSymbolRelocSize = sizeof(uint32_t); + +/// Returns size of the section \p SectionName in the object file \p file. +/// Returns zero, if there is no such section. static uint64_t getSectionSize(ObjectFile &file, StringRef sectionName) { - for (const object::SectionRef &sec : file.sections()) { - StringRef curSecName = cantFail(sec.getName()); - if (curSecName == sectionName) - return sec.getSize(); + section_iterator si = std::find_if(file.section_begin(), file.section_end(), + [§ionName](const SectionRef &sec) { + StringRef curSecName = + cantFail(sec.getName()); + return curSecName == sectionName; + }); + return si != file.section_end() ? si->getSize() : 0; +} + +/// Returns true if the object file \p file contains any other undefined +/// linker symbols besides those passed in \p linkerSymbolNames. +static bool hasUndefLinkerSymbols(ObjectFile &file, + const char *const *linkerSymbolNames, + uint64_t numLinkerSymbols) { + StringSet<> symbolsToBeDefined; + // Create a set of possible linker symbols from the 'linkerSymbolNames' array. + for (unsigned symIdx = 0; symIdx < numLinkerSymbols; ++symIdx) { + for (unsigned subSymIdx = 0; + subSymIdx < LINKER_SYMBOL_SIZE / linkerSubSymbolRelocSize; + ++subSymIdx) { + std::string symName = + EraVM::getMangledLinkerSymbol(linkerSymbolNames[symIdx], subSymIdx); + if (!symbolsToBeDefined.insert(symName).second) + llvm_unreachable("Duplicating linker symbols"); + } + } + + for (const SymbolRef &sym : file.symbols()) { + uint32_t symFlags = cantFail(sym.getFlags()); + uint8_t other = ELFSymbolRef(sym).getOther(); + if ((other == ELF::STO_ERAVM_LINKER_SYMBOL) && + (symFlags & object::SymbolRef::SF_Undefined)) { + StringRef symName = cantFail(sym.getName()); + if (!symbolsToBeDefined.contains(symName)) + return true; + } + } + return false; +} + +/// Returns a string with the linker symbol definitions passed in +/// \p linkerSymbolValues. For each name from the \p linkerSymbolNames array +/// it creates five symbol definitions. For example, if the linkerSymbolNames[0] +/// points to a string 'symbol_id', it takes the linkerSymbolValues[0] value +/// (which is 20 byte array: 0xAAAAAAAABB.....EEEEEEEE) and creates five symbol +/// definitions: +/// +/// "__linker_symbol_id_0" = 0xAAAAAAAA +/// "__linker_symbol_id_1" = 0xBBBBBBBB +/// "__linker_symbol_id_2" = 0xCCCCCCCC +/// "__linker_symbol_id_3" = 0xDDDDDDDD +/// "__linker_symbol_id_4" = 0xEEEEEEEE +/// +static std::string createLinkerSymbolDefinitions( + const char *const *linkerSymbolNames, + const char linkerSymbolValues[][LINKER_SYMBOL_SIZE], + uint64_t numLinkerSymbols) { + std::string symbolsStr; + for (uint64_t symNum = 0; symNum < numLinkerSymbols; ++symNum) { + StringRef symbolStr(linkerSymbolNames[symNum]); + SmallString hexStrSymbolVal; + toHex(ArrayRef( + reinterpret_cast(linkerSymbolValues[symNum]), + LINKER_SYMBOL_SIZE), + /*LowerCase*/ false, hexStrSymbolVal); + for (unsigned idx = 0; idx < LINKER_SYMBOL_SIZE / linkerSubSymbolRelocSize; + ++idx) { + symbolsStr += "\"" + EraVM::getMangledLinkerSymbol(symbolStr, idx) + "\""; + symbolsStr += " = 0x"; + symbolsStr += hexStrSymbolVal + .substr(2 * linkerSubSymbolRelocSize * idx, + 2 * linkerSubSymbolRelocSize) + .str(); + symbolsStr += ";\n"; + } } - return 0; + return symbolsStr; } -static std::string creteEraVMLinkerScript(uint64_t metadataSize) { +/// Creates a linker script used to generate an relocatable ELF file. The +/// script contains only the linker symbol definitions. +/// Here is an example of the resulting linker script: +/// +/// "__linker_library_id_0" = 0x01010101; +/// "__linker_library_id_1" = 0x02020202; +/// "__linker_library_id_2" = 0x03030303; +/// "__linker_library_id_3" = 0x04040404; +/// "__linker_library_id_4" = 0x05050505; +/// +static std::string +createEraVMRelLinkerScript(const char *const *linkerSymbolNames, + const char linkerSymbolValues[][LINKER_SYMBOL_SIZE], + uint64_t numLinkerSymbols) { + return createLinkerSymbolDefinitions(linkerSymbolNames, linkerSymbolValues, + numLinkerSymbols); +} + +/// Creates a linker script used to generate an executable byte code. The +/// script describes both the byte code layout according to the +/// 'EraVM Binary Layout' specification and the linker symbol definitions. +/// Here is an exmaple of the resulting linker script: +/// +/// "__linker_library_id2_0" = 0x06060606; +/// "__linker_library_id2_1" = 0x07070707; +/// "__linker_library_id2_2" = 0x08080808; +/// "__linker_library_id2_3" = 0x09090909; +/// "__linker_library_id2_4" = 0x0A0B0C0D; +/// ENTRY(0); +/// SECTIONS { +/// .code : SUBALIGN(1) { +/// *(.text) +/// +/// ASSERT(. % 8 == 0, "size of instructions isn't multiple of 8"); +/// +/// /* Check that the code size isn't more than 2^16 instructions. */ +/// ASSERT(. <= (1 << 16) * 8, "number of instructions > 2^16") +/// +/// /* Align the .rodata to 32 bytes. */ +/// . = ALIGN(32); +/// *(.rodata) +/// +/// ASSERT(. % 32 == 0, "size isn't multiple of 32"); +/// +/// /* Add padding before the metadata. Here metadata size is 32 bytes */ +/// . = ((((. + 32) >> 5) | 1 ) << 5) - 32; +/// *(.eravm-metadata) +/// +/// ASSERT(. % 64 == 32, "size isn't odd number of words"); +/// +/// /* Check the total binary size is not more than (2^16 - 2) words. */ +/// ASSERT(. <= ((1 << 16) - 1) * 32, "Binary size > (2^16 - 2) words") +/// } = 0 +/// +/// /* .data section itself, that contains initializers of global variables, +/// is not needed. */ +/// /DISCARD/ : { +/// *(.data) +/// }} +/// +static std::string +createEraVMExeLinkerScript(uint64_t metadataSize, + const char *const *linkerSymbolNames, + const char linkerSymbolValues[][LINKER_SYMBOL_SIZE], + uint64_t numLinkerSymbols) { + std::string linkerSymbolNamesStr = createLinkerSymbolDefinitions( + linkerSymbolNames, linkerSymbolValues, numLinkerSymbols); + // The final bytecode should be padded such that its size be the // odd number of words, i.e 2 * (N + 1). // Add padding before the metadata section such that the final // bytecode size to be the even number of words. The metadata // may have arbitrary size. - uint64_t alignedMDSize = llvm::alignTo(metadataSize, llvm::Align(32), 0); + uint64_t alignedMDSize = alignTo(metadataSize, Align(32), 0); // Create the resulting padding expression which is // . = ((((. + alignedMDSize) >> 5) | 1 ) << 5) - metadataSize - . std::string padding = - llvm::formatv(". = ((((. + {0}) >> 5) | 1 ) << 5) - {1};\n", - std::to_string(alignedMDSize), std::to_string(metadataSize)) + formatv(". = ((((. + {0}) >> 5) | 1 ) << 5) - {1};\n", + std::to_string(alignedMDSize), std::to_string(metadataSize)) .str(); - llvm::Twine scriptPart1 = llvm::Twine("\ + Twine scriptPart1 = Twine("\ +ENTRY(0); \n\ SECTIONS { \n\ - .code : SUBALIGN(8) { \n\ + .code : SUBALIGN(1) { \n\ *(.text) \n\ \n\ - ASSERT((32 - (31 & .)) % 8 == 0, \"padding isn't multiple of 8\"); \n\ + ASSERT(. % 8 == 0, \"size of instructions isn't multiple of 8\"); \n\ \n\ - /* Check the code size is no more than 2^16 instructions. */ \n\ + /* Check that the code size isn't more than 2^16 instructions. */ \n\ ASSERT(. <= (1 << 16) * 8, \"number of instructions > 2^16\") \n\ \n\ /* Align the .rodata to 32 bytes. */ \n\ @@ -62,44 +217,94 @@ SECTIONS { \n\ \n\ ASSERT(. % 32 == 0, \"size isn't multiple of 32\"); \n\ \n\ - /* Add padding */\n"); + /* Add padding before the metadata */\n"); - llvm::Twine scriptPart2 = llvm::Twine("\ + Twine scriptPart2 = Twine("\ *(.eravm-metadata) \n\ \n\ ASSERT(. % 64 == 32, \"size isn't odd number of words\"); \n\ \n\ - /* Check the total binary size is not more than (2^16 - 2) words. */ \n\ + /* Check the total binary size isn't more than (2^16 - 2) words. */ \n\ ASSERT(. <= ((1 << 16) - 1) * 32, \"Binary size > (2^16 - 2) words\") \n\ } = 0 \n\ \n\ - /* .data section itself that contains initializers of global variables, \n\ + /* .data section itself, that contains initializers of global variables, \n\ is not needed. */ \n\ /DISCARD/ : { \n\ *(.data) \n\ }}"); - return (scriptPart1 + padding + scriptPart2).str(); + return (linkerSymbolNamesStr + scriptPart1 + padding + scriptPart2).str(); } +/// Performs linkage of the ELF object file passed in \p inBuffer, as +/// described in the header. It works by creating a linker script +/// depending on the binary type to be produced (ELF relocatable vs byte code +/// with stripped ELF format) and passing it with the input file to the LLD. LLVMBool LLVMLinkEraVM(LLVMMemoryBufferRef inBuffer, - LLVMMemoryBufferRef *outBuffer, char **errorMessage) { - llvm::SmallVector localInMemBufRefs(2); - localInMemBufRefs[0] = *llvm::unwrap(inBuffer); + LLVMMemoryBufferRef *outBuffer, + const char *const *linkerSymbolNames, + const char linkerSymbolValues[][LINKER_SYMBOL_SIZE], + uint64_t numLinkerSymbols, char **errorMessage) { + // The first array element is the input memory buffer. + // The second one is a buffer with the linke script. + SmallVector localInMemBufRefs(2); + localInMemBufRefs[0] = *unwrap(inBuffer); - std::unique_ptr inBinary = - cantFail(createBinary(llvm::unwrap(inBuffer)->getMemBufferRef())); - assert(inBinary->isObject()); - uint64_t mdSize = getSectionSize(*static_cast(inBinary.get()), + *outBuffer = nullptr; + if (!LLVMIsELF(inBuffer)) { + *errorMessage = strdup("Input binary is not an EraVM ELF file"); + return true; + } + + std::unique_ptr InBinary = + cantFail(createBinary(unwrap(inBuffer)->getMemBufferRef())); + assert(InBinary->isObject()); + + bool shouldEmitRelocatable = + hasUndefLinkerSymbols(*static_cast(InBinary.get()), + linkerSymbolNames, numLinkerSymbols); + + // Input ELF has undefined linker symbols, but no one definition was + // provided, which means the linker has nothing to do with the ELF file, + // so just copy it to the out buffer. + if (shouldEmitRelocatable && !numLinkerSymbols) { + StringRef inData = localInMemBufRefs[0].getBuffer(); + *outBuffer = LLVMCreateMemoryBufferWithMemoryRangeCopy( + inData.data(), inData.size(), "result"); + return false; + } + + // Get the size of the metadata (if any) contained in the '.eravm-metadata' + // section of the input ELF file. + uint64_t MDSize = getSectionSize(*static_cast(InBinary.get()), ".eravm-metadata"); - std::string linkerScript = creteEraVMLinkerScript(mdSize); - std::unique_ptr linkerScriptBuf = - llvm::MemoryBuffer::getMemBuffer(linkerScript, "1"); + // The input ELF file can have undefined symbols. There can be the following + // cases: + // - there are undefined linker symbols for which the 'linkerSymbolValues' + // arguments do not provide definitions. In this case we emit a trivial + // linker script that provides definitions for the passed linker symbols. + // - there are undefined symbols other that the liker ones. This situation + // is treated as unreachable. If that happened, it means there is a bug + // in the FE/LLVM codegen/Linker. Technically, such an error will be + // handled by the LLD itself by returning 'undefined symbol' error. + // - all the linker symbols are either defined, or 'linkerSymbolValues' + // provides their definitions. In this case we create a linker script + // for the executable byte code generation. + std::string linkerScript = + shouldEmitRelocatable + ? createEraVMRelLinkerScript(linkerSymbolNames, linkerSymbolValues, + numLinkerSymbols) + : createEraVMExeLinkerScript(MDSize, linkerSymbolNames, + linkerSymbolValues, numLinkerSymbols); + + std::unique_ptr linkerScriptBuf = + MemoryBuffer::getMemBuffer(linkerScript, "1"); localInMemBufRefs[1] = linkerScriptBuf->getMemBufferRef(); - llvm::SmallVector lldArgs; + SmallVector lldArgs; lldArgs.push_back("ld.lld"); // Push the name of the linker script - '1'. @@ -109,15 +314,19 @@ LLVMBool LLVMLinkEraVM(LLVMMemoryBufferRef inBuffer, // Push the name of the input object file - '0'. lldArgs.push_back("0"); - // Strip out the ELF format. - lldArgs.push_back("--oformat=binary"); + // If all the symbols are supposed to be resolved, strip out the ELF format + // end emit the final bytecode. Otherwise emit an ELF relocatable file. + if (shouldEmitRelocatable) + lldArgs.push_back("--relocatable"); + else + lldArgs.push_back("--oformat=binary"); - llvm::SmallString<0> codeString; - llvm::raw_svector_ostream ostream(codeString); - llvm::SmallString<0> errorString; - llvm::raw_svector_ostream errorOstream(errorString); + SmallString<0> codeString; + raw_svector_ostream ostream(codeString); + SmallString<0> errorString; + raw_svector_ostream errorOstream(errorString); const lld::Result s = - lld::lldMainMemBuf(localInMemBufRefs, &ostream, lldArgs, llvm::outs(), + lld::lldMainMemBuf(localInMemBufRefs, &ostream, lldArgs, outs(), errorOstream, {{lld::Gnu, &lld::elf::linkMemBuf}}); bool Ret = !s.retCode && s.canRunAgain; @@ -128,9 +337,80 @@ LLVMBool LLVMLinkEraVM(LLVMMemoryBufferRef inBuffer, return true; } - llvm::StringRef data = ostream.str(); + StringRef data = ostream.str(); *outBuffer = LLVMCreateMemoryBufferWithMemoryRangeCopy(data.data(), data.size(), "result"); return false; } + +/// Returns true if the \p inBuffer contains an ELF object file. +LLVMBool LLVMIsELF(LLVMMemoryBufferRef inBuffer) { + Expected> inBinaryOrErr = + ELFObjectFile::create(unwrap(inBuffer)->getMemBufferRef()); + if (!inBinaryOrErr) { + handleAllErrors(inBinaryOrErr.takeError(), [](const ErrorInfoBase &EI) {}); + return false; + } + return inBinaryOrErr.get().getArch() == Triple::eravm; +} + +/// Returns true if the \p inBuffer contains an ELF object file with undefined +/// linker symbols, otherwise false. +void LLVMGetUndefinedSymbolsEraVM(LLVMMemoryBufferRef inBuffer, + char **linkerSymbolNames[], + uint64_t *numLinkerSymbols) { + if (!LLVMIsELF(inBuffer)) { + *linkerSymbolNames = nullptr; + *numLinkerSymbols = 0; + return; + } + + StringSet<> undefSymbols; + StringSet<> undefSubSymbols; + std::unique_ptr inBinary = + cantFail(createBinary(unwrap(inBuffer)->getMemBufferRef())); + const auto *oFile = static_cast(inBinary.get()); + for (const SymbolRef &sym : oFile->symbols()) { + uint32_t symFlags = cantFail(sym.getFlags()); + uint8_t other = ELFSymbolRef(sym).getOther(); + if ((other == ELF::STO_ERAVM_LINKER_SYMBOL) && + (symFlags & object::SymbolRef::SF_Undefined)) { + StringRef subName = cantFail(sym.getName()); + undefSubSymbols.insert(subName); + std::string symName = EraVM::getDemangledLinkerSymbol(subName); + undefSymbols.insert(symName); + } + } + + *numLinkerSymbols = undefSymbols.size(); + if (!undefSymbols.size()) { + *linkerSymbolNames = nullptr; + return; + } + + *linkerSymbolNames = reinterpret_cast( + std::malloc(undefSymbols.size() * sizeof(char *))); + unsigned idx = 0; + for (const StringSet<>::value_type &entry : undefSymbols) { + StringRef symName = entry.first(); + // Check that 'undefSybSymbols' form a set of groups each consisting of + // five sub-symbols. + for (unsigned idx = 0; idx < LINKER_SYMBOL_SIZE / linkerSubSymbolRelocSize; + idx++) { + std::string subSymName = EraVM::getMangledLinkerSymbol(symName, idx); + if (!undefSubSymbols.contains(subSymName)) + llvm_unreachable("missing a library sub-symbol"); + } + (*linkerSymbolNames)[idx++] = strdup(symName.str().c_str()); + } +} + +/// Disposes an array with linker symbols returned by the +/// LLVMGetUndefinedSymbolsEraVM(). +void LLVMDisposeUndefinedSymbolsEraVM(char *linkerSymbolNames[], + uint64_t numLinkerSymbols) { + for (unsigned idx = 0; idx < numLinkerSymbols; ++idx) + std::free(linkerSymbolNames[idx]); + std::free(linkerSymbolNames); +} diff --git a/lld/test/ELF/Inputs/eravm.lds b/lld/test/ELF/Inputs/eravm.lds index ce01ddd9121f..5314afb46714 100644 --- a/lld/test/ELF/Inputs/eravm.lds +++ b/lld/test/ELF/Inputs/eravm.lds @@ -14,3 +14,9 @@ SECTIONS { *(.data) } > stack } + +__linker_library_sybol_0 = 0x01010101; +__linker_library_sybol_1 = 0x02020202; +__linker_library_sybol_2 = 0x03030303; +__linker_library_sybol_3 = 0x04040404; +__linker_library_sybol_4 = 0x05050505; diff --git a/lld/test/ELF/eravm-data-reloc.s b/lld/test/ELF/eravm-data-reloc.s index 51b8b66fbca1..e0e3c6a00f55 100644 --- a/lld/test/ELF/eravm-data-reloc.s +++ b/lld/test/ELF/eravm-data-reloc.s @@ -1,6 +1,7 @@ ; REQUIRES: eravm ; RUN: llvm-mc -filetype=obj -arch=eravm %s -o %t.o ; RUN: llvm-objdump --no-leading-addr --disassemble --reloc %t.o | FileCheck --check-prefix=INPUT %s +; RUN: llvm-objdump --no-leading-addr --reloc %t.o | FileCheck --check-prefix=INPUT-LIBSYM %s ; RUN: ld.lld -T %S/Inputs/eravm.lds %t.o -o %t ; RUN: llvm-objdump --no-leading-addr --disassemble --reloc --syms %t | FileCheck --check-prefix=OUTPUT %s @@ -61,16 +62,21 @@ array_const_local: ; OUTPUT-NEXT: 00000000 l O .code 00000000 dummy_const ; OUTPUT-NEXT: 00000020 l O .code 00000000 scalar_const_local ; OUTPUT-NEXT: 00000040 l O .code 00000000 array_const_local -; OUTPUT-NEXT: 000000a0 l .code 00000000 reloc_src_g -; OUTPUT-NEXT: 000000c8 l .code 00000000 reloc_src_l -; OUTPUT-NEXT: 000000f0 l .code 00000000 reloc_dst_g -; OUTPUT-NEXT: 00000108 l .code 00000000 reloc_dst_l -; OUTPUT-NEXT: 00000120 l .code 00000000 reloc_both_g -; OUTPUT-NEXT: 00000148 l .code 00000000 reloc_both_l +; OUTPUT-NEXT: 000000c0 l .code 00000000 reloc_src_g +; OUTPUT-NEXT: 000000e8 l .code 00000000 reloc_src_l +; OUTPUT-NEXT: 00000110 l .code 00000000 reloc_dst_g +; OUTPUT-NEXT: 00000128 l .code 00000000 reloc_dst_l +; OUTPUT-NEXT: 00000140 l .code 00000000 reloc_both_g +; OUTPUT-NEXT: 00000168 l .code 00000000 reloc_both_l ; OUTPUT-NEXT: 00001020 g O .stack 00000000 scalar_var ; OUTPUT-NEXT: 00001040 g O .stack 00000000 array_var ; OUTPUT-NEXT: 00000020 g O .code 00000000 scalar_const ; OUTPUT-NEXT: 00000040 g O .code 00000000 array_const +; OUTPUT-NEXT: 01010101 g *ABS* 00000000 __linker_library_sybol_0 +; OUTPUT-NEXT: 02020202 g *ABS* 00000000 __linker_library_sybol_1 +; OUTPUT-NEXT: 03030303 g *ABS* 00000000 __linker_library_sybol_2 +; OUTPUT-NEXT: 04040404 g *ABS* 00000000 __linker_library_sybol_3 +; OUTPUT-NEXT: 05050505 g *ABS* 00000000 __linker_library_sybol_4 .text .p2align 3 @@ -209,3 +215,14 @@ reloc_both_l: ; OUTPUT-NEXT: 00 81 00 03 00 10 00 47 add code[3], r1, stack[129 + r0] ; OUTPUT-NEXT: 00 81 00 83 00 10 00 37 add stack[131 + r0], r1, stack[129 + r0] ; OUTPUT-NEXT: 00 00 00 00 00 01 04 2d ret + + .rodata +.linker_symbol: + .linker_symbol_cell @library_sybol +; INPUT-LIBSYM: RELOCATION RECORDS FOR [.rodata]: +; INPUT-LIBSYM-NEXT: OFFSET TYPE VALUE +; INPUT-LIBSYM-NEXT: 000000ac R_ERAVM_32 __linker_library_sybol_0 +; INPUT-LIBSYM-NEXT: 000000b0 R_ERAVM_32 __linker_library_sybol_1 +; INPUT-LIBSYM-NEXT: 000000b4 R_ERAVM_32 __linker_library_sybol_2 +; INPUT-LIBSYM-NEXT: 000000b8 R_ERAVM_32 __linker_library_sybol_3 +; INPUT-LIBSYM-NEXT: 000000bc R_ERAVM_32 __linker_library_sybol_4 diff --git a/lld/unittests/EraVM/LLDTest.cpp b/lld/unittests/EraVM/LLDTest.cpp index 877338ccd2a8..0b64c4a67678 100644 --- a/lld/unittests/EraVM/LLDTest.cpp +++ b/lld/unittests/EraVM/LLDTest.cpp @@ -106,7 +106,8 @@ define i256 @get_glob() nounwind { \n\ } LLVMMemoryBufferRef BinMemBuffer; - if (LLVMLinkEraVM(MDObjMemBuffer, &BinMemBuffer, &ErrMsg)) { + if (LLVMLinkEraVM(MDObjMemBuffer, &BinMemBuffer, nullptr, nullptr, 0, + &ErrMsg)) { FAIL() << "Failed to link:" << ErrMsg; LLVMDisposeMessage(ErrMsg); return; @@ -116,12 +117,449 @@ define i256 @get_glob() nounwind { \n\ StringRef Binary(LLVMGetBufferStart(BinMemBuffer), LLVMGetBufferSize(BinMemBuffer)); EXPECT_TRUE(Binary.take_back(MD.size()) == MDVal); + EXPECT_TRUE(Binary.size() % 64 == 32); + LLVMDisposeMemoryBuffer(ObjMemBuffer); + LLVMDisposeMemoryBuffer(MDObjMemBuffer); + LLVMDisposeMemoryBuffer(BinMemBuffer); +} + +TEST_F(LLDCTest, MetadataIsNotMultiple32) { + StringRef LLVMIr = "\ +target datalayout = \"E-p:256:256-i256:256:256-S32-a:256:256\" \n\ +@glob = global i256 113 \n\ + \n\ +define i256 @get_glob() nounwind { \n\ + %res = load i256, i256* @glob \n\ + %res2 = add i256 %res, 3 \n\ + ret i256 %res2 \n\ +}"; + + // Wrap Source in a MemoryBuffer + LLVMMemoryBufferRef IrMemBuffer = LLVMCreateMemoryBufferWithMemoryRange( + LLVMIr.data(), LLVMIr.size(), "test", 1); + char *ErrMsg = nullptr; + LLVMModuleRef M; + if (LLVMParseIRInContext(Context, IrMemBuffer, &M, &ErrMsg)) { + FAIL() << "Failed to parse llvm ir:" << ErrMsg; + LLVMDisposeMessage(ErrMsg); + return; + } + + // Run CodeGen to produce the buffer. + LLVMMemoryBufferRef ObjMemBuffer; + if (LLVMTargetMachineEmitToMemoryBuffer(TM, M, LLVMObjectFile, &ErrMsg, + &ObjMemBuffer)) { + FAIL() << "Failed to compile llvm ir:" << ErrMsg; + LLVMDisposeModule(M); + LLVMDisposeMessage(ErrMsg); + return; + } + LLVMDisposeModule(M); + + std::array MD = { + 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, + 0x0C, 0x0D, 0x0E, 0x0F, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, + 0x17, 0x18, 0x19, 0x1A, 0x1B, 0x1C, 0x1D, 0x1E, 0x1F, 0x20, 0x21}; + + LLVMMemoryBufferRef MDObjMemBuffer; + if (LLVMAddMetadataEraVM(ObjMemBuffer, MD.data(), MD.size(), &MDObjMemBuffer, + &ErrMsg)) { + errs() << "Failed to add metadata:" << ErrMsg; + LLVMDisposeMessage(ErrMsg); + return; + } + + LLVMMemoryBufferRef BinMemBuffer; + if (LLVMLinkEraVM(MDObjMemBuffer, &BinMemBuffer, nullptr, nullptr, 0, + &ErrMsg)) { + FAIL() << "Failed to link:" << ErrMsg; + LLVMDisposeMessage(ErrMsg); + return; + } + StringRef MDVal(MD.data(), MD.size()); + StringRef Binary(LLVMGetBufferStart(BinMemBuffer), + LLVMGetBufferSize(BinMemBuffer)); + EXPECT_TRUE(Binary.take_back(MD.size()) == MDVal); + EXPECT_TRUE(Binary.size() % 64 == 32); LLVMDisposeMemoryBuffer(ObjMemBuffer); LLVMDisposeMemoryBuffer(MDObjMemBuffer); LLVMDisposeMemoryBuffer(BinMemBuffer); } +TEST_F(LLDCTest, LinkerSymbol) { + StringRef LLVMIr = "\ +target datalayout = \"E-p:256:256-i256:256:256-S32-a:256:256\" \n\ +target triple = \"eravm\" \n\ +declare i256 @llvm.eravm.linkersymbol(metadata) \n\ + \n\ +define i256 @test() { \n\ + %sym = call i256 @llvm.eravm.linkersymbol(metadata !1) \n\ + %sym2 = call i256 @llvm.eravm.linkersymbol(metadata !2) \n\ + %sym3 = call i256 @llvm.eravm.linkersymbol(metadata !3) \n\ + %sym4 = call i256 @llvm.eravm.linkersymbol(metadata !4) \n\ + %sym5 = call i256 @llvm.eravm.linkersymbol(metadata !5) \n\ + %sym6 = call i256 @llvm.eravm.linkersymbol(metadata !6) \n\ + %sym7 = call i256 @llvm.eravm.linkersymbol(metadata !7) \n\ + %sym8 = call i256 @llvm.eravm.linkersymbol(metadata !8) \n\ + %sym9 = call i256 @llvm.eravm.linkersymbol(metadata !9) \n\ + %sym10 = call i256 @llvm.eravm.linkersymbol(metadata !10) \n\ + %sym11 = call i256 @llvm.eravm.linkersymbol(metadata !11) \n\ + %sym12 = call i256 @llvm.eravm.linkersymbol(metadata !12) \n\ + %sym13 = call i256 @llvm.eravm.linkersymbol(metadata !13) \n\ + %sym14 = call i256 @llvm.eravm.linkersymbol(metadata !14) \n\ + %sym15 = call i256 @llvm.eravm.linkersymbol(metadata !15) \n\ + %sym16 = call i256 @llvm.eravm.linkersymbol(metadata !16) \n\ + %sym17 = call i256 @llvm.eravm.linkersymbol(metadata !17) \n\ + %sym18 = call i256 @llvm.eravm.linkersymbol(metadata !18) \n\ + %sym19 = call i256 @llvm.eravm.linkersymbol(metadata !19) \n\ + %sym20 = call i256 @llvm.eravm.linkersymbol(metadata !20) \n\ + %sym21 = call i256 @llvm.eravm.linkersymbol(metadata !21) \n\ + %sym22 = call i256 @llvm.eravm.linkersymbol(metadata !22) \n\ + %sym23 = call i256 @llvm.eravm.linkersymbol(metadata !23) \n\ + %sym24 = call i256 @llvm.eravm.linkersymbol(metadata !24) \n\ + %sym25 = call i256 @llvm.eravm.linkersymbol(metadata !25) \n\ + %sym26 = call i256 @llvm.eravm.linkersymbol(metadata !26) \n\ + %sym27 = call i256 @llvm.eravm.linkersymbol(metadata !27) \n\ + %sym28 = call i256 @llvm.eravm.linkersymbol(metadata !28) \n\ + %sym29 = call i256 @llvm.eravm.linkersymbol(metadata !29) \n\ + %sym30 = call i256 @llvm.eravm.linkersymbol(metadata !30) \n\ + %sym31 = call i256 @llvm.eravm.linkersymbol(metadata !31) \n\ + %sym32 = call i256 @llvm.eravm.linkersymbol(metadata !32) \n\ + %sym33 = call i256 @llvm.eravm.linkersymbol(metadata !33) \n\ + %sym34 = call i256 @llvm.eravm.linkersymbol(metadata !34) \n\ + %sym35 = call i256 @llvm.eravm.linkersymbol(metadata !35) \n\ + %res = add i256 %sym, %sym2 \n\ + %res2 = add i256 %res, %sym3 \n\ + %res3 = add i256 %res2, %sym4 \n\ + %res4 = add i256 %res3, %sym5 \n\ + %res5 = add i256 %res4, %sym6 \n\ + %res6 = add i256 %res5, %sym7 \n\ + %res7 = add i256 %res6, %sym8 \n\ + %res8 = add i256 %res7, %sym9 \n\ + %res9 = add i256 %res8, %sym10 \n\ + %res10 = add i256 %res9, %sym11 \n\ + %res11 = add i256 %res10, %sym12 \n\ + %res12 = add i256 %res11, %sym13 \n\ + %res13 = add i256 %res12, %sym14 \n\ + %res14 = add i256 %res13, %sym15 \n\ + %res15 = add i256 %res14, %sym16 \n\ + %res16 = add i256 %res15, %sym17 \n\ + %res17 = add i256 %res16, %sym18 \n\ + %res18 = add i256 %res17, %sym19 \n\ + %res19 = add i256 %res18, %sym20 \n\ + %res20 = add i256 %res19, %sym21 \n\ + %res21 = add i256 %res20, %sym22 \n\ + %res22 = add i256 %res21, %sym23 \n\ + %res23 = add i256 %res22, %sym24 \n\ + %res24 = add i256 %res23, %sym25 \n\ + %res25 = add i256 %res24, %sym26 \n\ + %res26 = add i256 %res25, %sym27 \n\ + %res27 = add i256 %res26, %sym28 \n\ + %res28 = add i256 %res27, %sym29 \n\ + %res29 = add i256 %res28, %sym30 \n\ + %res30 = add i256 %res29, %sym31 \n\ + %res31 = add i256 %res30, %sym32 \n\ + %res32 = add i256 %res31, %sym33 \n\ + %res33 = add i256 %res32, %sym34 \n\ + %res34 = add i256 %res33, %sym35 \n\ + ret i256 %res34 \n\ +} \n\ + \n\ +!1 = !{!\"/file/path()`~!@#$%^&*-+=/library:id\"} \n\ +!2 = !{!\"C:\\file\\path()`~!@#$%^&*-+=\\library:id2\"} \n\ +!3 = !{!\"~/file/path()`~!@#$%^&*-+=/library:id3\"} \n\ +!4 = !{!\"/()`~!@#$%^&*-+=|\\{}[ ]:;'<>,?/_library:id4\"} \n\ +!5 = !{!\".()`~!@#$%^&*-+=|\\{}[]:;'<>,.?/_\"} \n\ +!6 = !{!\"()`~!@#$%^&*-+=|\\{}[]:;'<>, .?/_\"} \n\ +!7 = !{!\"!()`~!@#$%^&* - +=|\\{}[]:;'<>,.?/_\"} \n\ +!8 = !{!\"`()`~!@#$%^& * -+=|\\{}[]:;'<>,.?/_\"} \n\ +!9 = !{!\"!()`~!@#$%^&*-+=|\\{}[]:;'<>,.?/_\"} \n\ +!10 = !{!\"@()`~!@#$%^&*-+=|\\{}[]:;'<>,.?/_\"} \n\ +!11 = !{!\"#()`~!@#$%^&*-+=|\\{}[]:;'<>,.?/_\"} \n\ +!12 = !{!\"$()`~!@#$%^&*-+=|\\{}[]:;'<>,.?/_\"} \n\ +!13 = !{!\"%()`~!@#$%^&*-+=|\\{}[]:;'<>,.?/_\"} \n\ +!14 = !{!\"^()`~!@#$%^&*-+=|\\{}[]:;'<>,.?/_\"} \n\ +!15 = !{!\"&()`~!@#$%^&*-+=|\\{}[]:;'<>,.?/_\"} \n\ +!16 = !{!\"*()`~!@#$%^&*-+=|\\{}[]:;'<>,.?/_\"} \n\ +!17 = !{!\"-()`~!@#$%^&*-+=|\\{}[]:;'<>,.?/_\"} \n\ +!18 = !{!\"+()`~!@#$%^&*-+=|\\{}[]:;'<>,.?/_\"} \n\ +!19 = !{!\"=()`~!@#$%^&*-+=|\\{}[]:;'<>,.?/_\"} \n\ +!20 = !{!\" =()`~!@#$%^&*-+=|\\{}[]:;'<>,.?/_\"} \n\ +!21 = !{!\"|()`~!@#$%^&*-+=|\\{}[]:;'<>,.?/_\"} \n\ +!22 = !{!\"\\()`~!@#$%^&*-+=|\\{}[]:;'<>,.?/_\"} \n\ +!23 = !{!\"{()`~!@#$%^&*-+=|\\{}[]:;'<>,.?/_\"} \n\ +!24 = !{!\"}()`~!@#$%^&*-+=|\\{}[]:;'<>,.?/_\"} \n\ +!25 = !{!\"[()`~!@#$%^&*-+=|\\{}[]:;'<>,.?/_\"} \n\ +!26 = !{!\"]()`~!@#$%^&*-+=|\\{}[]:;'<>,.?/_\"} \n\ +!27 = !{!\":()`~!@#$%^&*-+=|\\{}[]:;'<>,.?/_\"} \n\ +!28 = !{!\";()`~!@#$%^&*-+=|\\{}[]:;'<>,.?/_\"} \n\ +!29 = !{!\"'()`~!@#$%^&*-+=|\\{}[]:;'<>,.?/_\"} \n\ +!30 = !{!\"<()`~!@#$%^&*-+=|\\{}[]:;'<>,.?/_\"} \n\ +!31 = !{!\">()`~!@#$%^&*-+=|\\{}[]:;'<>,.?/_\"} \n\ +!32 = !{!\",()`~!@#$%^&*-+=|\\{}[]:;'<>,.?/_\"} \n\ +!33 = !{!\"?()`~!@#$%^&*-+=|\\{}[]:;'<>,.?/_\"} \n\ +!34 = !{!\"/()`~!@#$%^&*-+=|\\{}[]:;'<>,.?/_\"} \n\ +!35 = !{!\"_()`~!@#$%^&*-+=|\\{}[]:;'<>,.?/_\"}"; + + // Wrap Source in a MemoryBuffer + LLVMMemoryBufferRef IrMemBuffer = LLVMCreateMemoryBufferWithMemoryRange( + LLVMIr.data(), LLVMIr.size(), "test", 1); + char *ErrMsg = nullptr; + LLVMModuleRef M; + if (LLVMParseIRInContext(Context, IrMemBuffer, &M, &ErrMsg)) { + FAIL() << "Failed to parse llvm ir:" << ErrMsg; + LLVMDisposeMessage(ErrMsg); + return; + } + + // Run CodeGen to produce the buffer. + LLVMMemoryBufferRef ObjMemBuffer; + if (LLVMTargetMachineEmitToMemoryBuffer(TM, M, LLVMObjectFile, &ErrMsg, + &ObjMemBuffer)) { + FAIL() << "Failed to compile llvm ir:" << ErrMsg; + LLVMDisposeModule(M); + LLVMDisposeMessage(ErrMsg); + return; + } + LLVMDisposeModule(M); + + LLVMMemoryBufferRef BinMemBuffer; + const char *LinkerSymbol[35] = { + "/file/path()`~!@#$%^&*-+=/library:id", + "C:\\file\\path()`~!@#$%^&*-+=\\library:id2", + "~/file/path()`~!@#$%^&*-+=/library:id3", + "/()`~!@#$%^&*-+=|\\{}[ ]:;'<>,?/_library:id4", + ".()`~!@#$%^&*-+=|\\{}[]:;'<>,.?/_", + "()`~!@#$%^&*-+=|\\{}[]:;'<>, .?/_", + "!()`~!@#$%^&* - +=|\\{}[]:;'<>,.?/_", + "`()`~!@#$%^& * -+=|\\{}[]:;'<>,.?/_", + "!()`~!@#$%^&*-+=|\\{}[]:;'<>,.?/_", + "@()`~!@#$%^&*-+=|\\{}[]:;'<>,.?/_", + "#()`~!@#$%^&*-+=|\\{}[]:;'<>,.?/_", + "$()`~!@#$%^&*-+=|\\{}[]:;'<>,.?/_", + "%()`~!@#$%^&*-+=|\\{}[]:;'<>,.?/_", + "^()`~!@#$%^&*-+=|\\{}[]:;'<>,.?/_", + "&()`~!@#$%^&*-+=|\\{}[]:;'<>,.?/_", + "*()`~!@#$%^&*-+=|\\{}[]:;'<>,.?/_", + "-()`~!@#$%^&*-+=|\\{}[]:;'<>,.?/_", + "+()`~!@#$%^&*-+=|\\{}[]:;'<>,.?/_", + "=()`~!@#$%^&*-+=|\\{}[]:;'<>,.?/_", + " =()`~!@#$%^&*-+=|\\{}[]:;'<>,.?/_", + "|()`~!@#$%^&*-+=|\\{}[]:;'<>,.?/_", + "\\()`~!@#$%^&*-+=|\\{}[]:;'<>,.?/_", + "{()`~!@#$%^&*-+=|\\{}[]:;'<>,.?/_", + "}()`~!@#$%^&*-+=|\\{}[]:;'<>,.?/_", + "[()`~!@#$%^&*-+=|\\{}[]:;'<>,.?/_", + "]()`~!@#$%^&*-+=|\\{}[]:;'<>,.?/_", + ":()`~!@#$%^&*-+=|\\{}[]:;'<>,.?/_", + ";()`~!@#$%^&*-+=|\\{}[]:;'<>,.?/_", + "'()`~!@#$%^&*-+=|\\{}[]:;'<>,.?/_", + "<()`~!@#$%^&*-+=|\\{}[]:;'<>,.?/_", + ">()`~!@#$%^&*-+=|\\{}[]:;'<>,.?/_", + ",()`~!@#$%^&*-+=|\\{}[]:;'<>,.?/_", + "?()`~!@#$%^&*-+=|\\{}[]:;'<>,.?/_", + "/()`~!@#$%^&*-+=|\\{}[]:;'<>,.?/_", + "_()`~!@#$%^&*-+=|\\{}[]:;'<>,.?/_", + }; + const char LinkerSymbolVal[35][20] = { + {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1}, + {2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2}, + {3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3}, + {4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4}, + {5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5}, + {6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6}, + {7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7}, + {8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8}, + {9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9}, + {10, 10, 10, 10, 10, 10, 10, 10, 10, 10, + 10, 10, 10, 10, 10, 10, 10, 10, 10, 10}, + {11, 11, 11, 11, 11, 11, 11, 11, 11, 11, + 11, 11, 11, 11, 11, 11, 11, 11, 11, 11}, + {12, 12, 12, 12, 12, 12, 12, 12, 12, 12, + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12}, + {13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13}, + {14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14}, + {15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15}, + {16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16}, + {17, 17, 17, 17, 17, 17, 17, 17, 17, 17, + 17, 17, 17, 17, 17, 17, 17, 17, 17, 17}, + {18, 18, 18, 18, 18, 18, 18, 18, 18, 18, + 18, 18, 18, 18, 18, 18, 18, 18, 18, 18}, + {19, 19, 19, 19, 19, 19, 19, 19, 19, 19, + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19}, + {20, 20, 20, 20, 20, 20, 20, 20, 20, 20, + 20, 20, 20, 20, 20, 20, 20, 20, 20, 20}, + {21, 21, 21, 21, 21, 21, 21, 21, 21, 21, + 21, 21, 21, 21, 21, 21, 21, 21, 21, 21}, + {22, 22, 22, 22, 22, 22, 22, 22, 22, 22, + 22, 22, 22, 22, 22, 22, 22, 22, 22, 22}, + {23, 23, 23, 23, 23, 23, 23, 23, 23, 23, + 23, 23, 23, 23, 23, 23, 23, 23, 23, 23}, + {24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24}, + {25, 25, 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, 25, 25}, + {26, 26, 26, 26, 26, 26, 26, 26, 26, 26, + 26, 26, 26, 26, 26, 26, 26, 26, 26, 26}, + {27, 27, 27, 27, 27, 27, 27, 27, 27, 27, + 27, 27, 27, 27, 27, 27, 27, 27, 27, 27}, + {28, 28, 28, 28, 28, 28, 28, 28, 28, 28, + 28, 28, 28, 28, 28, 28, 28, 28, 28, 28}, + {29, 29, 29, 29, 29, 29, 29, 29, 29, 29, + 29, 29, 29, 29, 29, 29, 29, 29, 29, 29}, + {30, 30, 30, 30, 30, 30, 30, 30, 30, 30, + 30, 30, 30, 30, 30, 30, 30, 30, 30, 30}, + {31, 31, 31, 31, 31, 31, 31, 31, 31, 31, + 31, 31, 31, 31, 31, 31, 31, 31, 31, 31}, + {32, 32, 32, 32, 32, 32, 32, 32, 32, 32, + 32, 32, 32, 32, 32, 32, 32, 32, 32, 32}, + {33, 33, 33, 33, 33, 33, 33, 33, 33, 33, + 33, 33, 33, 33, 33, 33, 33, 33, 33, 33}, + {34, 34, 34, 34, 34, 34, 34, 34, 34, 34, + 34, 34, 34, 34, 34, 34, 34, 34, 34, 34}, + {35, 35, 35, 35, 35, 35, 35, 35, 35, 35, + 35, 35, 35, 35, 35, 35, 35, 35, 35, 35}, + }; + if (LLVMLinkEraVM(ObjMemBuffer, &BinMemBuffer, LinkerSymbol, LinkerSymbolVal, + 35, &ErrMsg)) { + FAIL() << "Failed to link:" << ErrMsg; + LLVMDisposeMessage(ErrMsg); + return; + } + + char **UndefLinkerSymbols = nullptr; + uint64_t NumUndefLinkerSymbols = 0; + EXPECT_FALSE(LLVMIsELF(BinMemBuffer)); + LLVMGetUndefinedSymbolsEraVM(BinMemBuffer, &UndefLinkerSymbols, + &NumUndefLinkerSymbols); + EXPECT_TRUE(NumUndefLinkerSymbols == 0); + LLVMDisposeUndefinedSymbolsEraVM(UndefLinkerSymbols, NumUndefLinkerSymbols); + + StringRef Binary(LLVMGetBufferStart(BinMemBuffer), + LLVMGetBufferSize(BinMemBuffer)); + for (unsigned I = 0; I < 35; ++I) { + StringRef Val(LinkerSymbolVal[I], 20); + EXPECT_TRUE(Binary.find(Val) != StringRef::npos); + } + EXPECT_TRUE(LLVMGetBufferSize(BinMemBuffer) % 64 == 32); + LLVMDisposeMemoryBuffer(ObjMemBuffer); + LLVMDisposeMemoryBuffer(BinMemBuffer); +} + +TEST_F(LLDCTest, IterativeLinkage) { + StringRef LLVMIr = "\ +target datalayout = \"E-p:256:256-i256:256:256-S32-a:256:256\" \n\ +target triple = \"eravm\" \n\ +declare i256 @llvm.eravm.linkersymbol(metadata) \n\ + \n\ +define i256 @test() { \n\ + %res = call i256 @llvm.eravm.linkersymbol(metadata !1) \n\ + %res2 = call i256 @llvm.eravm.linkersymbol(metadata !2) \n\ + %res3 = add i256 %res, %res2 \n\ + ret i256 %res3 \n\ +} \n\ + \n\ +!1 = !{!\"library_id\"} \n\ +!2 = !{!\"library_id2\"}"; + + // Wrap Source in a MemoryBuffer + LLVMMemoryBufferRef IrMemBuffer = LLVMCreateMemoryBufferWithMemoryRange( + LLVMIr.data(), LLVMIr.size(), "test", 1); + char *ErrMsg = nullptr; + LLVMModuleRef M; + if (LLVMParseIRInContext(Context, IrMemBuffer, &M, &ErrMsg)) { + FAIL() << "Failed to parse llvm ir:" << ErrMsg; + LLVMDisposeMessage(ErrMsg); + return; + } + + // Run CodeGen to produce the buffer. + LLVMMemoryBufferRef ObjMemBuffer; + if (LLVMTargetMachineEmitToMemoryBuffer(TM, M, LLVMObjectFile, &ErrMsg, + &ObjMemBuffer)) { + FAIL() << "Failed to compile llvm ir:" << ErrMsg; + LLVMDisposeModule(M); + LLVMDisposeMessage(ErrMsg); + return; + } + LLVMDisposeModule(M); + + EXPECT_TRUE(LLVMIsELF(ObjMemBuffer)); + + char **UndefLibSymbols = nullptr; + uint64_t NumUndefLibSymbols = 0; + const char *LinkerSymbols[2] = {"library_id", "library_id2"}; + const char LinkerSymbolVals[2][20] = { + {1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 5, 5, 5, 5}, + {6, 6, 6, 6, 7, 7, 7, 7, 8, 8, 8, 8, 9, 9, 9, 9, 10, 11, 12, 13}}; + + LLVMGetUndefinedSymbolsEraVM(ObjMemBuffer, &UndefLibSymbols, + &NumUndefLibSymbols); + EXPECT_TRUE(NumUndefLibSymbols == 2); + EXPECT_TRUE(std::strcmp(UndefLibSymbols[0], LinkerSymbols[0]) == 0); + EXPECT_TRUE(std::strcmp(UndefLibSymbols[1], LinkerSymbols[1]) == 0); + + LLVMDisposeUndefinedSymbolsEraVM(UndefLibSymbols, NumUndefLibSymbols); + + // Pass only the first linker symbol. + LLVMMemoryBufferRef Obj2MemBuffer; + if (LLVMLinkEraVM(ObjMemBuffer, &Obj2MemBuffer, LinkerSymbols, + LinkerSymbolVals, 1, &ErrMsg)) { + FAIL() << "Failed to link:" << ErrMsg; + LLVMDisposeMessage(ErrMsg); + return; + } + + EXPECT_TRUE(LLVMIsELF(Obj2MemBuffer)); + LLVMGetUndefinedSymbolsEraVM(Obj2MemBuffer, &UndefLibSymbols, + &NumUndefLibSymbols); + EXPECT_TRUE(NumUndefLibSymbols == 1); + EXPECT_TRUE(std::strcmp(UndefLibSymbols[0], LinkerSymbols[1]) == 0); + + LLVMDisposeUndefinedSymbolsEraVM(UndefLibSymbols, NumUndefLibSymbols); + + // Pass only the second linker symbol. This time + // the linker should emit the final bytecode, as all the + // symbols are resolved. + LLVMMemoryBufferRef BinMemBuffer; + if (LLVMLinkEraVM(Obj2MemBuffer, &BinMemBuffer, &LinkerSymbols[1], + &LinkerSymbolVals[1], 1, &ErrMsg)) { + FAIL() << "Failed to link:" << ErrMsg; + LLVMDisposeMessage(ErrMsg); + return; + } + + { + LLVMMemoryBufferRef Bin2MemBuffer; + EXPECT_TRUE(LLVMLinkEraVM(BinMemBuffer, &Bin2MemBuffer, nullptr, nullptr, 0, + &ErrMsg)); + EXPECT_TRUE( + StringRef(ErrMsg).contains("Input binary is not an EraVM ELF file")); + LLVMDisposeMessage(ErrMsg); + } + + EXPECT_FALSE(LLVMIsELF(BinMemBuffer)); + LLVMGetUndefinedSymbolsEraVM(BinMemBuffer, &UndefLibSymbols, + &NumUndefLibSymbols); + EXPECT_TRUE(NumUndefLibSymbols == 0); + + StringRef Val1(LinkerSymbolVals[0], 20); + StringRef Val2(LinkerSymbolVals[1], 20); + StringRef Binary(LLVMGetBufferStart(BinMemBuffer), + LLVMGetBufferSize(BinMemBuffer)); + EXPECT_TRUE(Binary.find(Val1) != StringRef::npos); + EXPECT_TRUE(Binary.find(Val2) != StringRef::npos); + EXPECT_TRUE(LLVMGetBufferSize(BinMemBuffer) % 64 == 32); + LLVMDisposeMemoryBuffer(ObjMemBuffer); + LLVMDisposeMemoryBuffer(Obj2MemBuffer); + LLVMDisposeMemoryBuffer(BinMemBuffer); +} + TEST_F(LLDCTest, LinkError) { StringRef LLVMIr = "\ target datalayout = \"E-p:256:256-i256:256:256-S32-a:256:256\" \n\ @@ -155,7 +593,57 @@ define void @glob() nounwind { \n\ LLVMMemoryBufferRef BinMemBuffer; // Return code 'true' denotes an error. - EXPECT_TRUE(LLVMLinkEraVM(ObjMemBuffer, &BinMemBuffer, &ErrMsg)); + EXPECT_TRUE( + LLVMLinkEraVM(ObjMemBuffer, &BinMemBuffer, nullptr, nullptr, 0, &ErrMsg)); + EXPECT_TRUE(StringRef(ErrMsg).contains("undefined symbol: foo")); + + LLVMDisposeMessage(ErrMsg); + LLVMDisposeMemoryBuffer(ObjMemBuffer); +} + +TEST_F(LLDCTest, LinkErrorWithDefinedLibrarySymbols) { + StringRef LLVMIr = "\ +target datalayout = \"E-p:256:256-i256:256:256-S32-a:256:256\" \n\ +declare i256 @foo() \n\ +declare i256 @llvm.eravm.linkersymbol(metadata) \n\ +define i256 @glob() nounwind { \n\ + %addr = call i256 @llvm.eravm.linkersymbol(metadata !1) \n\ + %off = call i256 @foo() \n\ + %res = add i256 %addr, %off \n\ + ret i256 %res \n\ +} \n\ +!1 = !{!\"library_id\"}"; + + // Wrap Source in a MemoryBuffer + LLVMMemoryBufferRef IrMemBuffer = LLVMCreateMemoryBufferWithMemoryRange( + LLVMIr.data(), LLVMIr.size(), "test", 1); + char *ErrMsg = nullptr; + LLVMModuleRef M; + if (LLVMParseIRInContext(Context, IrMemBuffer, &M, &ErrMsg)) { + FAIL() << "Failed to parse llvm ir:" << ErrMsg; + LLVMDisposeMessage(ErrMsg); + return; + } + + // Run CodeGen to produce the buffer. + LLVMMemoryBufferRef ObjMemBuffer; + if (LLVMTargetMachineEmitToMemoryBuffer(TM, M, LLVMObjectFile, &ErrMsg, + &ObjMemBuffer)) { + FAIL() << "Failed to compile llvm ir:" << ErrMsg; + LLVMDisposeModule(M); + LLVMDisposeMessage(ErrMsg); + return; + } + LLVMDisposeModule(M); + + const char *LinkerSymbols[1] = {"library_id"}; + const char LinkerSymbolVals[1][20] = { + {1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 5, 5, 5, 5}}; + + LLVMMemoryBufferRef BinMemBuffer; + // Return code 'true' denotes an error. + EXPECT_TRUE(LLVMLinkEraVM(ObjMemBuffer, &BinMemBuffer, LinkerSymbols, + LinkerSymbolVals, 1, &ErrMsg)); EXPECT_TRUE(StringRef(ErrMsg).contains("undefined symbol: foo")); LLVMDisposeMessage(ErrMsg); diff --git a/llvm/include/llvm/BinaryFormat/ELF.h b/llvm/include/llvm/BinaryFormat/ELF.h index 225a3754b62d..f9af3d7f2d73 100644 --- a/llvm/include/llvm/BinaryFormat/ELF.h +++ b/llvm/include/llvm/BinaryFormat/ELF.h @@ -958,6 +958,12 @@ enum { #include "ELFRelocs/EraVM.def" }; +// Special values for the st_other field in the symbol table entry for EraVM. +enum { + // Symbol denotes a sub symbol of a linker symbol. + STO_ERAVM_LINKER_SYMBOL = 0x80 +}; + #undef ELF_RELOC // Section header. diff --git a/llvm/include/llvm/BinaryFormat/ELFRelocs/EraVM.def b/llvm/include/llvm/BinaryFormat/ELFRelocs/EraVM.def index 5b49ab0517f1..b0a4fc536e69 100644 --- a/llvm/include/llvm/BinaryFormat/ELFRelocs/EraVM.def +++ b/llvm/include/llvm/BinaryFormat/ELFRelocs/EraVM.def @@ -6,3 +6,4 @@ ELF_RELOC(R_ERAVM_NONE, 0) ELF_RELOC(R_ERAVM_16_SCALE_32, 1) ELF_RELOC(R_ERAVM_16_SCALE_8, 2) +ELF_RELOC(R_ERAVM_32, 3) diff --git a/llvm/include/llvm/IR/IntrinsicsEraVM.td b/llvm/include/llvm/IR/IntrinsicsEraVM.td index 7d9da94a0b7a..062d49ff1c66 100644 --- a/llvm/include/llvm/IR/IntrinsicsEraVM.td +++ b/llvm/include/llvm/IR/IntrinsicsEraVM.td @@ -59,4 +59,13 @@ def int_eravm_ptr_pack : Intrinsic<[LLVMQualPointerType<3>], [LLVMQualPointerTyp def int_eravm_ptr_shrink : Intrinsic<[LLVMQualPointerType<3>], [LLVMQualPointerType<3>, llvm_i256_ty], [IntrNoMem, IntrWillReturn]>; def int_eravm_ptr_add : Intrinsic<[LLVMQualPointerType<3>], [LLVMQualPointerType<3>, llvm_i256_ty], [IntrNoMem, IntrWillReturn]>; def int_eravm_ptrtoint : Intrinsic<[llvm_i256_ty], [LLVMQualPointerType<3>], [IntrNoMem, IntrWillReturn]>; + +// Linking of libraries. + +// Inserts a library address placeholder, which will be replced with +// the finall library address by the linker. +def int_eravm_linkersymbol : DefaultAttrsIntrinsic< + [llvm_i256_ty], [llvm_metadata_ty], + [IntrNoMem, IntrWillReturn] +>; } diff --git a/llvm/lib/Target/EraVM/AsmParser/EraVMAsmParser.cpp b/llvm/lib/Target/EraVM/AsmParser/EraVMAsmParser.cpp index 6c344cdd582a..e0e77642209d 100644 --- a/llvm/lib/Target/EraVM/AsmParser/EraVMAsmParser.cpp +++ b/llvm/lib/Target/EraVM/AsmParser/EraVMAsmParser.cpp @@ -799,6 +799,29 @@ bool EraVMAsmParser::ParseDirective(AsmToken DirectiveID) { return false; } + // Parses directive: + // ::= .library_address_cell @(@identificator | "string") + if (DirectiveID.getString() == ".linker_symbol_cell") { + if (!getLexer().is(AsmToken::At)) + return TokError("expected symbol name starting with @"); + Lex(); // eat "@" token + + StringRef SymbolName; + if (getParser().parseIdentifier(SymbolName)) + return TokError("expected symbol name"); + + if (parseEOL()) + return true; + + if (getContext().lookupSymbol(SymbolName)) + return TokError("duplicating library symbols"); + + MCSymbol *Symbol = getContext().getOrCreateSymbol(SymbolName); + auto *TS = getStreamer().getTargetStreamer(); + static_cast(TS)->emitLibraryAddressSymbol(Symbol); + + return false; + } return true; } diff --git a/llvm/lib/Target/EraVM/EraVMAsmPrinter.cpp b/llvm/lib/Target/EraVM/EraVMAsmPrinter.cpp index de653ea9cc55..131c96626b65 100644 --- a/llvm/lib/Target/EraVM/EraVMAsmPrinter.cpp +++ b/llvm/lib/Target/EraVM/EraVMAsmPrinter.cpp @@ -73,6 +73,8 @@ class EraVMAsmPrinter : public AsmPrinter { bool emitPseudoExpansionLowering(MCStreamer &OutStreamer, const MachineInstr *MI); + void emitLibraryAddressSymbol(const MachineInstr *MI); + void emitInstruction(const MachineInstr *MI) override; using AliasMapTy = DenseMap>; void emitGlobalConstant(const DataLayout &DL, const Constant *CV, @@ -128,6 +130,10 @@ void EraVMAsmPrinter::emitInstruction(const MachineInstr *MI) { EmitToStreamer(*OutStreamer, TmpInst); return; } + if (Opc == EraVM::LinkerSymbol) { + emitLibraryAddressSymbol(MI); + return; + } if (MI->isPseudo()) { #ifndef NDEBUG @@ -140,6 +146,35 @@ void EraVMAsmPrinter::emitInstruction(const MachineInstr *MI) { EmitToStreamer(*OutStreamer, TmpInst); } +void EraVMAsmPrinter::emitLibraryAddressSymbol(const MachineInstr *MI) { + MCInst MCI; + MCI.setOpcode(EraVM::ADDcrr_s); + // Code reference. + MCSymbol *Label = OutContext.createNamedTempSymbol("linker_symbol"); + const MCExpr *LabelExpr = MCSymbolRefExpr::create(Label, OutContext); + auto *TS = + static_cast(OutStreamer->getTargetStreamer()); + + // Dest register + MCI.addOperand(MCOperand::createReg(MI->getOperand(0).getReg())); + MCI.addOperand(MCOperand::createReg(EraVM::R0)); + MCI.addOperand(MCOperand::createExpr(LabelExpr)); + MCI.addOperand(MCOperand::createReg(EraVM::R0)); + // Operand: cc + MCI.addOperand(MCOperand::createImm(0)); + EmitToStreamer(*OutStreamer, MCI); + + // Now emit the .rodata entry. + MCSection *CurrentSection = OutStreamer->getCurrentSectionOnly(); + MCSection *ReadOnlySection = + OutContext.getELFSection(".rodata", ELF::SHT_PROGBITS, ELF::SHF_ALLOC); + OutStreamer->switchSection(ReadOnlySection); + OutStreamer->emitLabel(Label); + MCSymbol *LibraryAddressSymbol = MI->getOperand(1).getMCSymbol(); + TS->emitLibraryAddressSymbol(LibraryAddressSymbol); + OutStreamer->switchSection(CurrentSection); +} + void EraVMAsmPrinter::emitJumpTableInfo() { // The default implementation would try to emit 256-bit fixup, so provide // custom implementation based on emitJumpTableInfo and emitJumpTableEntry diff --git a/llvm/lib/Target/EraVM/EraVMISD.def b/llvm/lib/Target/EraVM/EraVMISD.def index 76c5a011b0b6..801e8c434fa7 100644 --- a/llvm/lib/Target/EraVM/EraVMISD.def +++ b/llvm/lib/Target/EraVM/EraVMISD.def @@ -40,8 +40,9 @@ HANDLE_NODETYPE(LOG_DECOMMIT) HANDLE_NODETYPE(GAStack) HANDLE_NODETYPE(GACode) HANDLE_NODETYPE(TRAP) +HANDLE_NODETYPE(LINKER_SYMBOL) // Flag setting operations. HANDLE_NODETYPE(ADD_V) HANDLE_NODETYPE(SUB_V) -HANDLE_NODETYPE(MUL_V) \ No newline at end of file +HANDLE_NODETYPE(MUL_V) diff --git a/llvm/lib/Target/EraVM/EraVMISelLowering.cpp b/llvm/lib/Target/EraVM/EraVMISelLowering.cpp index 7eb14a0b00d6..7e4e7ed469c2 100644 --- a/llvm/lib/Target/EraVM/EraVMISelLowering.cpp +++ b/llvm/lib/Target/EraVM/EraVMISelLowering.cpp @@ -34,6 +34,8 @@ #include "llvm/IR/GlobalVariable.h" #include "llvm/IR/Intrinsics.h" #include "llvm/IR/IntrinsicsEraVM.h" +#include "llvm/MC/MCContext.h" +#include "llvm/MC/MCSymbol.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" @@ -1204,6 +1206,14 @@ SDValue EraVMTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, return DAG.getNode(EraVMISD::PTR_PACK, DL, VT, Op.getOperand(1), Op.getOperand(2)); } + case Intrinsic::eravm_linkersymbol: { + MachineFunction &MF = DAG.getMachineFunction(); + const MDNode *Metadata = cast(Op.getOperand(1))->getMD(); + StringRef SymStr = cast(Metadata->getOperand(0))->getString(); + MCSymbol *Sym = MF.getContext().getOrCreateSymbol(SymStr); + return DAG.getNode(EraVMISD::LINKER_SYMBOL, DL, VT, + DAG.getMCSymbol(Sym, MVT::i256)); + } } return SDValue(); } diff --git a/llvm/lib/Target/EraVM/EraVMInstrInfo.td b/llvm/lib/Target/EraVM/EraVMInstrInfo.td index d1ca5ca3fd4b..f9d375aa13f2 100644 --- a/llvm/lib/Target/EraVM/EraVMInstrInfo.td +++ b/llvm/lib/Target/EraVM/EraVMInstrInfo.td @@ -44,6 +44,7 @@ def SDT_EraVMLogDecommit : SDTypeProfile<1, 2, [SDTCisVT<0, fatptr>, SDTCisVT<1 def SDT_EraVMArith : SDTypeProfile<1, 2, [SDTCisVT<0, i256>, SDTCisVT<1, i256>, SDTCisVT<2, i256>]>; +def SDT_EraVMLinkerSymbol : SDTypeProfile<1, 1, [SDTCisVT<0, i256>, SDTCisPtrTy<1>]>; //===----------------------------------------------------------------------===// // EraVM Specific Node Definitions. //===----------------------------------------------------------------------===// @@ -102,6 +103,7 @@ def EraVMptr_sub : SDNode<"EraVMISD::PTR_SUB", SDT_EraVMPtrOp, []>; def EraVMptr_pack : SDNode<"EraVMISD::PTR_PACK", SDT_EraVMPtrOp, []>; def EraVMptr_shrink : SDNode<"EraVMISD::PTR_SHRINK", SDT_EraVMPtrOp, []>; def EraVMlog_decommit : SDNode<"EraVMISD::LOG_DECOMMIT", SDT_EraVMLogDecommit, [SDNPHasChain]>; +def EraVMLinkerSymbol : SDNode<"EraVMISD::LINKER_SYMBOL", SDT_EraVMLinkerSymbol, []>; def EraVMTrap : SDNode<"EraVMISD::TRAP", SDTNone, [SDNPHasChain, SDNPOptInGlue]>; @@ -968,6 +970,9 @@ let isAsCheapAsAMove = 1, mayLoad = 1, isReMaterializable = 1, // FIXME: expand pseudo def LOADCONST : Pseudo<(outs GR256:$val), (ins i256imm:$addr), [(set GR256:$val, (load tconstpool:$addr))]>; + +def LinkerSymbol : Pseudo<(outs GR256:$val), (ins i256imm:$sym), + [(set GR256:$val, (EraVMLinkerSymbol mcsym:$sym))]>; } let isReMaterializable = 1, hasSideEffects = 0 in { diff --git a/llvm/lib/Target/EraVM/MCTargetDesc/EraVMELFObjectWriter.cpp b/llvm/lib/Target/EraVM/MCTargetDesc/EraVMELFObjectWriter.cpp index dc96c51091a6..9c757960b12e 100644 --- a/llvm/lib/Target/EraVM/MCTargetDesc/EraVMELFObjectWriter.cpp +++ b/llvm/lib/Target/EraVM/MCTargetDesc/EraVMELFObjectWriter.cpp @@ -43,6 +43,8 @@ class EraVMELFObjectWriter : public MCELFObjectTargetWriter { return ELF::R_ERAVM_16_SCALE_32; case EraVM::fixup_16_scale_8: return ELF::R_ERAVM_16_SCALE_8; + case FK_Data_4: + return ELF::R_ERAVM_32; default: llvm_unreachable("Invalid fixup kind"); } diff --git a/llvm/lib/Target/EraVM/MCTargetDesc/EraVMELFStreamer.cpp b/llvm/lib/Target/EraVM/MCTargetDesc/EraVMELFStreamer.cpp index 1d4c049cc474..7c104a79aac0 100644 --- a/llvm/lib/Target/EraVM/MCTargetDesc/EraVMELFStreamer.cpp +++ b/llvm/lib/Target/EraVM/MCTargetDesc/EraVMELFStreamer.cpp @@ -19,6 +19,7 @@ #include "llvm/MC/MCSectionELF.h" #include "llvm/MC/MCStreamer.h" #include "llvm/MC/MCSubtargetInfo.h" +#include "llvm/MC/MCSymbolELF.h" #include "llvm/Support/Casting.h" using namespace llvm; @@ -31,6 +32,7 @@ class EraVMTargetELFStreamer : public EraVMTargetStreamer { EraVMTargetELFStreamer(MCStreamer &S, const MCSubtargetInfo &STI); void emitCell(const APInt &Value) override; void emitJumpTarget(const MCExpr *Expr) override; + void emitLibraryAddressSymbol(const MCSymbol *Symbol) override; }; // This part is for ELF object output. @@ -44,6 +46,7 @@ class EraVMTargetAsmStreamer : public EraVMTargetStreamer { MCInstPrinter &InstPrinter, bool VerboseAsm); void emitCell(const APInt &Value) override; void emitJumpTarget(const MCExpr *Expr) override; + void emitLibraryAddressSymbol(const MCSymbol *Symbol) override; }; void EraVMTargetELFStreamer::emitCell(const APInt &Value) { @@ -73,6 +76,36 @@ void EraVMTargetELFStreamer::emitJumpTarget(const MCExpr *Expr) { DF->getFixups().push_back(MCFixup::create(Offset, Expr, FK)); } +void EraVMTargetELFStreamer::emitLibraryAddressSymbol(const MCSymbol *Symbol) { + // Emit the placeholder. + emitCell(APInt::getZero(EraVM::CellBitWidth)); + + constexpr unsigned SymbolSize = 20; + MCContext &Ctx = Streamer.getContext(); + auto &S = static_cast(Streamer); + auto *DF = cast(S.getCurrentFragment()); + StringRef SymStr = Symbol->getName(); + + // Emits 4-byte fixup to cover a part of the 20-byte linker symbol value. + auto EmitFixup = [&S, &Ctx, &SymStr, &DF](unsigned Idx) { + std::string SubSymName = EraVM::getMangledLinkerSymbol(SymStr, Idx); + if (Ctx.lookupSymbol(SubSymName)) + llvm_unreachable("Duplicating library sub-symbols"); + + auto *Sym = cast(Ctx.getOrCreateSymbol(SubSymName)); + Sym->setOther(ELF::STO_ERAVM_LINKER_SYMBOL); + const MCExpr *Expr = MCSymbolRefExpr::create(Sym, Ctx); + S.visitUsedExpr(*Expr); + + assert(DF->getContents().size() == 32 && SymbolSize > Idx * 4); + unsigned Offset = DF->getContents().size() - (SymbolSize - Idx * 4); + DF->getFixups().push_back(MCFixup::create(Offset, Expr, FK_Data_4)); + }; + + for (unsigned Idx = 0; Idx < SymbolSize / sizeof(uint32_t); ++Idx) + EmitFixup(Idx); +} + void EraVMTargetAsmStreamer::emitCell(const APInt &Value) { assert(Value.getBitWidth() <= EraVM::CellBitWidth); @@ -87,6 +120,17 @@ void EraVMTargetAsmStreamer::emitJumpTarget(const MCExpr *Expr) { Streamer.emitValue(Expr, EraVM::CellBitWidth / 8); } +void EraVMTargetAsmStreamer::emitLibraryAddressSymbol(const MCSymbol *Symbol) { + // This is almost a copy of MCTargetStreamer::emitValue() implementation. + MCContext &Ctx = Streamer.getContext(); + const MCExpr *Expr = MCSymbolRefExpr::create(Symbol, Ctx); + SmallString<128> Str; + raw_svector_ostream OS(Str); + OS << "\t.linker_symbol_cell\t"; + Expr->print(OS, Ctx.getAsmInfo()); + Streamer.emitRawText(OS.str()); +} + EraVMTargetAsmStreamer::EraVMTargetAsmStreamer(MCStreamer &S, formatted_raw_ostream &OS, MCInstPrinter &InstPrinter, diff --git a/llvm/lib/Target/EraVM/MCTargetDesc/EraVMMCTargetDesc.cpp b/llvm/lib/Target/EraVM/MCTargetDesc/EraVMMCTargetDesc.cpp index e8c9638e07c1..da9ed50ea4c0 100644 --- a/llvm/lib/Target/EraVM/MCTargetDesc/EraVMMCTargetDesc.cpp +++ b/llvm/lib/Target/EraVM/MCTargetDesc/EraVMMCTargetDesc.cpp @@ -18,6 +18,7 @@ #include "llvm/MC/MCRegisterInfo.h" #include "llvm/MC/MCSubtargetInfo.h" #include "llvm/MC/TargetRegistry.h" +#include "llvm/Support/Regex.h" using namespace llvm; @@ -123,3 +124,17 @@ EraVM::analyzeEncodedOpcode(unsigned EncodedOpcode, EncodedOperandMode &SrcMode, return Info; } + +std::string EraVM::getMangledLinkerSymbol(StringRef Name, unsigned SubIdx) { + return (Twine("__linker_") + Name + "_" + std::to_string(SubIdx)).str(); +} + +std::string EraVM::getDemangledLinkerSymbol(StringRef Name) { + Regex prefixRegex(R"(^__linker_.*)"); + Regex suffixRegex(R"(.*_[0-4]$)"); + if (!prefixRegex.match(Name) || !suffixRegex.match(Name)) + llvm_unreachable("Unexpected mangling of the library sub-symbol name"); + + StringRef SymName = Name.drop_front(9).drop_back(2); + return SymName.str(); +} diff --git a/llvm/lib/Target/EraVM/MCTargetDesc/EraVMMCTargetDesc.h b/llvm/lib/Target/EraVM/MCTargetDesc/EraVMMCTargetDesc.h index 817b3f0801f2..0f6f1b446833 100644 --- a/llvm/lib/Target/EraVM/MCTargetDesc/EraVMMCTargetDesc.h +++ b/llvm/lib/Target/EraVM/MCTargetDesc/EraVMMCTargetDesc.h @@ -13,6 +13,7 @@ #ifndef LLVM_LIB_TARGET_ERAVM_MCTARGETDESC_ERAVMMCTARGETDESC_H #define LLVM_LIB_TARGET_ERAVM_MCTARGETDESC_ERAVMMCTARGETDESC_H +#include "llvm/ADT/StringRef.h" #include "llvm/Support/DataTypes.h" #include @@ -182,7 +183,8 @@ const EraVMOpcodeInfo *findOpcodeInfo(unsigned Opcode); const EraVMOpcodeInfo *analyzeEncodedOpcode(unsigned EncodedOpcode, EncodedOperandMode &SrcMode, EncodedOperandMode &DstMode); - +std::string getMangledLinkerSymbol(StringRef Name, unsigned SubId); +std::string getDemangledLinkerSymbol(StringRef Name); } // namespace EraVM } // namespace llvm diff --git a/llvm/lib/Target/EraVM/MCTargetDesc/EraVMTargetStreamer.h b/llvm/lib/Target/EraVM/MCTargetDesc/EraVMTargetStreamer.h index 87c999a9ad0a..8d570a985b08 100644 --- a/llvm/lib/Target/EraVM/MCTargetDesc/EraVMTargetStreamer.h +++ b/llvm/lib/Target/EraVM/MCTargetDesc/EraVMTargetStreamer.h @@ -30,6 +30,8 @@ class EraVMTargetStreamer : public MCTargetStreamer { /// Emit `.cell @tgt` where `@tgt` is an instruction address. virtual void emitJumpTarget(const MCExpr *Expr) {} + + virtual void emitLibraryAddressSymbol(const MCSymbol *Symbol) {} }; } // namespace llvm diff --git a/llvm/test/CodeGen/EraVM/intrinsic.ll b/llvm/test/CodeGen/EraVM/intrinsic.ll index 7c6f22e6e7e9..9558d0c5ba8a 100644 --- a/llvm/test/CodeGen/EraVM/intrinsic.ll +++ b/llvm/test/CodeGen/EraVM/intrinsic.ll @@ -203,6 +203,13 @@ define i256 @ifgtii() { ret i256 %res } +; CHECK-LABEL: linkersymbol +define i256 @linkersymbol() { + ; CHECK: add code[@.linker_symbol0], r0, r1 + %res = call i256 @llvm.eravm.linkersymbol(metadata !0) + ret i256 %res +} + ; CHECK-LABEL: invoke.farcall define {i8 addrspace(3)*, i1} @invoke.farcall() noinline { ; CHECK: call r0, @__farcall, @DEFAULT_UNWIND @@ -403,6 +410,8 @@ declare i256 @llvm.eravm.ifeq(i256, i256) declare i256 @llvm.eravm.iflt(i256, i256) declare i256 @llvm.eravm.ifgt(i256, i256) +declare i256 @llvm.eravm.linkersymbol(metadata) + declare {i8 addrspace(3)*, i1} @__farcall(i256, i256, i256, i256, i256, i256, i256, i256, i256, i256, i256, i256) declare {i8 addrspace(3)*, i1} @__staticcall(i256, i256, i256, i256, i256, i256, i256, i256, i256, i256, i256, i256) declare {i8 addrspace(3)*, i1} @__delegatecall(i256, i256, i256, i256, i256, i256, i256, i256, i256, i256, i256, i256) @@ -412,3 +421,5 @@ declare {i8 addrspace(3)*, i1} @__farcall_byref(i8 addrspace(3)*, i256, i256, i2 declare {i8 addrspace(3)*, i1} @__staticcall_byref(i8 addrspace(3)*, i256, i256, i256, i256, i256, i256, i256, i256, i256, i256, i256) declare {i8 addrspace(3)*, i1} @__delegatecall_byref(i8 addrspace(3)*, i256, i256, i256, i256, i256, i256, i256, i256, i256, i256, i256) declare {i8 addrspace(3)*, i1} @__mimiccall_byref(i8 addrspace(3)*, i256, i256, i256, i256, i256, i256, i256, i256, i256, i256, i256, i256) + +!0 = !{!"linker_symbol"} diff --git a/llvm/test/MC/EraVM/asm-parser/data-errors.s b/llvm/test/MC/EraVM/asm-parser/data-errors.s index 2771db2b681f..856651c92423 100644 --- a/llvm/test/MC/EraVM/asm-parser/data-errors.s +++ b/llvm/test/MC/EraVM/asm-parser/data-errors.s @@ -5,6 +5,13 @@ ; STDOUT: .rodata ; STDOUT-NOT: {{.+}} + .text +.linker_symbol: + .linker_symbol_cell @"library:id" + +.linker_symbol1: + .linker_symbol_cell @"library:id" + .rodata .cell 1 2 .cell 1, 2 @@ -15,6 +22,7 @@ ; COM: Autogenerated checks below, see README.md. +; CHECK: duplicating library symbols ; CHECK: :{{[0-9]+}}:11: error: expected newline ; CHECK-NEXT: .cell 1 2 ; CHECK-NEXT: ^ diff --git a/llvm/test/MC/EraVM/asm-parser/data.s b/llvm/test/MC/EraVM/asm-parser/data.s index 4b09dd23eef7..2bf62358a106 100644 --- a/llvm/test/MC/EraVM/asm-parser/data.s +++ b/llvm/test/MC/EraVM/asm-parser/data.s @@ -29,6 +29,9 @@ foo: .cell -0x8000000000000000000000000000000000000000000000000000000000000000 +; library address lymbol +.linker_symbol: + .linker_symbol_cell @file_sol_Math ; COM: Autogenerated checks below, see README.md. ; CHECK: .text @@ -53,3 +56,6 @@ foo: ; CHECK: .cell -57896044618658097711785492504343953926634992332820282019728792003956564819968 ; CHECK: .cell -57896044618658097711785492504343953926634992332820282019728792003956564819968 + +; CHECK:.linker_symbol: +; CHECK: .linker_symbol_cell @file_sol_Math diff --git a/llvm/test/MC/EraVM/encoding/data.s b/llvm/test/MC/EraVM/encoding/data.s index 4d8505c720ff..28a29f9dbd7f 100644 --- a/llvm/test/MC/EraVM/encoding/data.s +++ b/llvm/test/MC/EraVM/encoding/data.s @@ -47,6 +47,10 @@ foo: jump code[@jump_table + 1] ret +.rodata +.linker_symbol: + .linker_symbol_cell @file_sol_Math + ; CHECK: Relocation section '.rela.text' at offset {{0x[0-9a-f]+}} contains 5 entries: ; CHECK-NEXT: Offset Info Type Sym. Value Symbol's Name + Addend ; CHECK-NEXT: 00000000 00000901 R_ERAVM_16_SCALE_32 00000020 global_var + 0 @@ -55,7 +59,15 @@ foo: ; CHECK-NEXT: 0000000a 00000101 R_ERAVM_16_SCALE_32 00000000 .rodata + 60 ; CHECK-NEXT: 00000012 00000101 R_ERAVM_16_SCALE_32 00000000 .rodata + 80 -; CHECK: Symbol table '.symtab' contains 11 entries: +; CHECK: Relocation section '.rela.rodata' at offset 0x31c contains 5 entries: +; CHECK-NEXT: Offset Info Type Sym. Value Symbol's Name + Addend +; CHECK-NEXT: 000000cc 00000b03 R_ERAVM_32 00000000 __linker_file_sol_Math_0 + 0 +; CHECK-NEXT: 000000d0 00000c03 R_ERAVM_32 00000000 __linker_file_sol_Math_1 + 0 +; CHECK-NEXT: 000000d4 00000d03 R_ERAVM_32 00000000 __linker_file_sol_Math_2 + 0 +; CHECK-NEXT: 000000d8 00000e03 R_ERAVM_32 00000000 __linker_file_sol_Math_3 + 0 +; CHECK-NEXT: 000000dc 00000f03 R_ERAVM_32 00000000 __linker_file_sol_Math_4 + 0 + +; CHECK: Symbol table '.symtab' contains 16 entries: ; CHECK-NEXT: Num: Value Size Type Bind Vis Ndx Name ; CHECK-NEXT: 0: 00000000 0 NOTYPE LOCAL DEFAULT UND ; CHECK-NEXT: 1: 00000000 0 SECTION LOCAL DEFAULT [[RO:[0-9]+]] .rodata @@ -67,7 +79,13 @@ foo: ; CHECK-NEXT: 7: 00000080 0 OBJECT LOCAL DEFAULT [[RW]] local_var ; CHECK-NEXT: 8: 00000040 0 OBJECT GLOBAL DEFAULT [[RO]] global_const ; CHECK-NEXT: 9: 00000020 0 OBJECT GLOBAL DEFAULT [[RW]] global_var -; CHECK-NEXT: 10: 00000000 0 FUNC GLOBAL DEFAULT {{[0-9]+}} foo +; CHECK-NEXT: 10: 00000000 0 FUNC GLOBAL DEFAULT {{[0-9]+}} fo +; CHECK-NEXT: 11: 00000000 0 NOTYPE GLOBAL DEFAULT [LINKER_SYMBOL] UND __linker_file_sol_Math_0 +; CHECK-NEXT: 12: 00000000 0 NOTYPE GLOBAL DEFAULT [LINKER_SYMBOL] UND __linker_file_sol_Math_1 +; CHECK-NEXT: 13: 00000000 0 NOTYPE GLOBAL DEFAULT [LINKER_SYMBOL] UND __linker_file_sol_Math_2 +; CHECK-NEXT: 14: 00000000 0 NOTYPE GLOBAL DEFAULT [LINKER_SYMBOL] UND __linker_file_sol_Math_3 +; CHECK-NEXT: 15: 00000000 0 NOTYPE GLOBAL DEFAULT [LINKER_SYMBOL] UND __linker_file_sol_Math_4 + ; RODATA: Hex dump of section '.rodata': ; RODATA-NEXT: 0x00000000 2a000000 00000000 00000000 00000000 *............... @@ -82,6 +100,8 @@ foo: ; RODATA-NEXT: 0x00000090 00000000 00000000 00000000 00000001 ................ ; RODATA-NEXT: 0x000000a0 00000000 00000000 00000000 00000000 ................ ; RODATA-NEXT: 0x000000b0 00000000 00000000 00000000 00000002 ................ +; RODATA-NEXT: 0x000000c0 00000000 00000000 00000000 00000000 ................ +; RODATA-NEXT: 0x000000d0 00000000 00000000 00000000 00000000 ................ ; DATA: Hex dump of section '.data': ; DATA-NEXT: 0x00000000 abcdef01 23456789 abcdef01 23456789 ....#Eg.....#Eg. diff --git a/llvm/tools/llvm-readobj/ELFDumper.cpp b/llvm/tools/llvm-readobj/ELFDumper.cpp index 020c5b5b5161..f8156e20028e 100644 --- a/llvm/tools/llvm-readobj/ELFDumper.cpp +++ b/llvm/tools/llvm-readobj/ELFDumper.cpp @@ -1753,6 +1753,11 @@ const EnumEntry ElfMips16SymOtherFlags[] = { const EnumEntry ElfRISCVSymOtherFlags[] = { LLVM_READOBJ_ENUM_ENT(ELF, STO_RISCV_VARIANT_CC)}; +// EraVM local begin +const EnumEntry ElfEraVMSymOtherFlags[] = { + LLVM_READOBJ_ENUM_ENT(ELF, STO_ERAVM_LINKER_SYMBOL)}; +// EraVM local end + static const char *getElfMipsOptionsOdkType(unsigned Odk) { switch (Odk) { LLVM_READOBJ_ENUM_CASE(ELF, ODK_NULL); @@ -3466,6 +3471,11 @@ ELFDumper::getOtherFlagsFromSymbol(const Elf_Ehdr &Header, } else if (Header.e_machine == EM_RISCV) { SymOtherFlags.insert(SymOtherFlags.end(), std::begin(ElfRISCVSymOtherFlags), std::end(ElfRISCVSymOtherFlags)); + // EraVM local begin + } else if (Header.e_machine == EM_ERAVM) { + SymOtherFlags.insert(SymOtherFlags.end(), std::begin(ElfEraVMSymOtherFlags), + std::end(ElfEraVMSymOtherFlags)); + // EraVM local end } return SymOtherFlags; } @@ -4111,6 +4121,12 @@ void GNUELFDumper::printSymbol(const Elf_Sym &Symbol, unsigned SymIndex, Fields[5].Str.append(" | " + utohexstr(Other, /*LowerCase=*/true)); Fields[5].Str.append("]"); } + // EraVM local begin + } else if (this->Obj.getHeader().e_machine == ELF::EM_ERAVM) { + uint8_t Other = Symbol.st_other & ~0x3; + if (Other & STO_ERAVM_LINKER_SYMBOL) + Fields[5].Str += " [LINKER_SYMBOL]"; + // EraVM local end } else { Fields[5].Str += " []";