Skip to content

Commit

Permalink
[EVM] Add LLVMLinkEVM C-API
Browse files Browse the repository at this point in the history
Please, note this is a temporary patch. It adds initial support
of dependencies, but it doesn't work in a general case.
A full solution will be more sophisticated and will likely be
implemented on the FE driver side without a need of the LLD usage.
  • Loading branch information
PavelKopyl committed Oct 9, 2024
1 parent dcc54ed commit 876ffb6
Show file tree
Hide file tree
Showing 6 changed files with 235 additions and 8 deletions.
26 changes: 26 additions & 0 deletions lld/include/lld-c/LLDAsLibraryC.h
Original file line number Diff line number Diff line change
Expand Up @@ -123,6 +123,32 @@ char **LLVMGetUndefinedLinkerSymbolsEraVM(LLVMMemoryBufferRef inBuffer,
* LLVMGetUndefinedSymbolsEraVM(). */
void LLVMDisposeUndefinedLinkerSymbolsEraVM(char *linkerSymbolNames[],
uint64_t numLinkerSymbols);

/** Links the deploy and runtime ELF object files using the information about
* dependencies.
* \p inBuffers - array of input memory buffers with following structure:
*
* inBuffers[0] - deploy ELF object code
* inBuffers[1] - deployed (runtime) ELF object code
* --------------------------
* inBuffers[2] - 1-st sub-contract (final EVM bytecode)
* ...
* inBuffers[N] - N-st sub-contract (final EVM bytecode)
*
* Sub-contracts are optional. They should have the same ordering as in
* the YUL layout.
*
* \p inBuffersIDs - array of string identifiers of the buffers. IDs correspond
* to the object names in the YUL layout.
* On success, outBuffers[0] will contain the deploy bytecode and outBuffers[1]
* the runtime bytecode.
* In case of an error the function returns 'true' and the error message is
* passes in \p errorMessage. The message should be disposed by
* 'LLVMDisposeMessage'. */
LLVMBool LLVMLinkEVM(LLVMMemoryBufferRef *inBuffers, const char *inBuffersIDs[],
uint64_t numInBuffers, LLVMMemoryBufferRef outBuffers[2],
char **errorMessage);

LLVM_C_EXTERN_C_END

#endif // LLD_C_LLDASLIBRARYC_H
196 changes: 196 additions & 0 deletions lld/lld-c/LLDAsLibraryC.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -443,3 +443,199 @@ void LLVMDisposeUndefinedLinkerSymbolsEraVM(char *linkerSymbolNames[],
std::free(linkerSymbolNames[idx]);
std::free(linkerSymbolNames);
}

//----------------------------------------------------------------------------//

/// This function generates a linker script for EVM architecture.
/// \p memBufs - array of input memory buffers with following structure:
///
/// memBufs[0] - deploy object code
/// memBufs[1] - deployed object code
/// --------------------------
/// memBufs[2] - 1-st sub-contract (final EVM bytecode)
/// ...
/// memBufs[N] - N-st sub-contract (final EVM bytecode)
///
/// Sub-contracts are optional. They should have the same ordering as in
/// the YUL layout.
///
/// \p bufIDs - array of string identifiers of the buffers. IDs correspond
/// to the object names in the YUL layout.
///
/// For example, the YUL object:
///
/// |--D_105_deploy --||--D_105_deployed --||-- B_40 --|
///
/// __datasize_B_40 = 1384;
/// SECTIONS {
/// . = 0;
/// .text : SUBALIGN(1) {
/// D_105(.text);
/// __dataoffset_D_105_deployed = .;
/// D_105_deployed(.text);
/// __datasize_D_105_deployed = . - __dataoffset_D_105_deployed;
/// __dataoffset_B_40 = .;
/// __datasize_D_105 = __dataoffset_B_40 + __datasize_B_40;
/// LONG(__dataoffset_D_105_deployed);
/// }
///
/// The dot '.' denotes current location in the resulting file.
/// The purpose of the script is to define datasize/dataoffset absolute symbols
/// that reflect the YUL layout.
static std::string creteEVMLinkerScript(ArrayRef<LLVMMemoryBufferRef> memBufs,
ArrayRef<const char *> bufIDs) {
assert(memBufs.size() == bufIDs.size());
size_t numObjectsToLink = memBufs.size();
StringRef dataSizePrefix("__datasize_");
StringRef dataOffsetPrefix("__dataoffset_");

// Define the script part related to the top-level contract.
StringRef topName(bufIDs[0]);
StringRef deployed(bufIDs[1]);

// Contains the linker script part corresponding to the top-level contract.
// For the example above, this contains:
// D_105(.text);
// __dataoffset_D_105_deployed = .;
// D_105_deployed(.text);
// __datasize_D_105_deployed = . - __dataoffset_D_105_deployed;
Twine topLevel = topName + "(.text);\n" + dataOffsetPrefix + deployed +
" = .;\n" + deployed + "(.text);\n" + dataSizePrefix +
deployed + " = . - " + dataOffsetPrefix + deployed + ";\n";

// Contains symbols whose values are the sizes of the dependent contracts.
// For the example above, this contains:
// __datasize_B_40 = 1384;
std::string symDatasizeDeps;

// Contains symbols whose values are the offsets of the dependent contracts.
// For the example above, this contains:
// __dataoffset_B_40 = .;
std::string symDataOffsetDeps;
if (numObjectsToLink > 2) {
// Define datasize symbols for the dependent contracts. They start after
// {deploy, deployed} pair of the top-level contract, i.e. at index 2.
for (unsigned idx = 2; idx < numObjectsToLink; ++idx)
symDatasizeDeps += (dataSizePrefix + bufIDs[idx] + " = " +
Twine(LLVMGetBufferSize(memBufs[idx])) + ";\n")
.str();

symDataOffsetDeps = (dataOffsetPrefix + bufIDs[2] + " = .;\n").str();
for (unsigned idx = 3; idx < numObjectsToLink; ++idx)
symDataOffsetDeps +=
(dataOffsetPrefix + bufIDs[idx] + " = " + dataOffsetPrefix +
bufIDs[idx - 1] + " + " + dataSizePrefix + bufIDs[idx - 1] + ";\n")
.str();
}

// Contains a symbol whose value is the total size of the top-level contract
// with all the dependencies.
std::string symDatasizeTop = (dataSizePrefix + topName + " = ").str();
if (numObjectsToLink > 2)
symDatasizeTop += (dataOffsetPrefix + bufIDs.back() + " + " +
dataSizePrefix + bufIDs.back() + ";\n")
.str();
else
symDatasizeTop += ".;\n";

// Emit size of the deploy code offset as the 4-byte unsigned integer.
// This is needed to determine which offset the deployed code starts at
// in the linked binary.
Twine deploySize = "LONG(" + dataOffsetPrefix + deployed + ");\n";

Twine script = formatv("{0}\n\
ENTRY(0);\n\
SECTIONS {\n\
. = 0;\n\
.code : SUBALIGN(1) {\n\
{1}\
{2}\
{3}\
{4}\
}\n\
}\n\
",
symDatasizeDeps, topLevel, symDataOffsetDeps,
symDatasizeTop, deploySize);

return script.str();
}

LLVMBool LLVMLinkEVM(LLVMMemoryBufferRef inBuffers[],
const char *inBuffersIDs[], uint64_t numInBuffers,
LLVMMemoryBufferRef outBuffers[2], char **errorMessage) {
assert(numInBuffers > 1);
SmallVector<MemoryBufferRef> localInMemBufRefs(3);
SmallVector<std::unique_ptr<MemoryBuffer>> localInMemBufs(3);
for (unsigned idx = 0; idx < 2; ++idx) {
MemoryBufferRef ref = *unwrap(inBuffers[idx]);
localInMemBufs[idx] =
MemoryBuffer::getMemBuffer(ref.getBuffer(), inBuffersIDs[idx],
/*RequiresNullTerminator*/ false);
localInMemBufRefs[idx] = localInMemBufs[idx]->getMemBufferRef();
}

std::string linkerScript = creteEVMLinkerScript(
ArrayRef(inBuffers, numInBuffers), ArrayRef(inBuffersIDs, numInBuffers));
std::unique_ptr<MemoryBuffer> scriptBuf =
MemoryBuffer::getMemBuffer(linkerScript, "script.x");
localInMemBufRefs[2] = scriptBuf->getMemBufferRef();

SmallVector<const char *, 16> lldArgs;
lldArgs.push_back("ld.lld");
lldArgs.push_back("-T");
lldArgs.push_back("script.x");

// Use remapping of file names (a linker feature) to replace file names with
// indexes in the array of memory buffers.
Twine remapStr("--remap-inputs=");
std::string remapDeployStr = (remapStr + inBuffersIDs[0] + "=0").str();
lldArgs.push_back(remapDeployStr.c_str());

std::string remapDeployedStr = (remapStr + inBuffersIDs[1] + "=1").str();
lldArgs.push_back(remapDeployedStr.c_str());

lldArgs.push_back("--remap-inputs=script.x=2");

// Deploy code
lldArgs.push_back(inBuffersIDs[0]);
// Deployed code
lldArgs.push_back(inBuffersIDs[1]);

lldArgs.push_back("--oformat=binary");

SmallString<0> codeString;
raw_svector_ostream ostream(codeString);
SmallString<0> errorString;
raw_svector_ostream errorOstream(errorString);

// Lld-as-a-library is not thread safe, as it has a global state,
// so we need to protect lld from simultaneous access from different threads.
std::unique_lock<std::mutex> lock(lldMutex);
const lld::Result s =
lld::lldMainMemBuf(localInMemBufRefs, &ostream, lldArgs, outs(),
errorOstream, {{lld::Gnu, &lld::elf::linkMemBuf}});
lock.unlock();

bool ret = !s.retCode && s.canRunAgain;
if (!ret) {
*errorMessage = strdup(errorString.c_str());
return true;
}

StringRef data = ostream.str();
// Linker script adds size of the deploy code as a 8-byte BE unsigned to the
// end of .text section. Knowing this, we can extract final deploy and
// deployed codes.
assert(data.size() > 4);
size_t deploySize = support::endian::read32be(data.data() + data.size() - 4);
assert(deploySize < data.size());
size_t deployedSize = data.size() - deploySize - 4;

outBuffers[0] = LLVMCreateMemoryBufferWithMemoryRangeCopy(
data.data(), deploySize, "deploy");
outBuffers[1] = LLVMCreateMemoryBufferWithMemoryRangeCopy(
data.data() + deploySize, deployedSize, "deployed");

return false;
}
2 changes: 1 addition & 1 deletion llvm/include/llvm/BinaryFormat/ELF.h
Original file line number Diff line number Diff line change
Expand Up @@ -431,7 +431,7 @@ enum {

// EVM local begin
// ELF Relocation types for EVM
enum {
enum : uint8_t {
#include "ELFRelocs/EVM.def"
};
// EVM local end
Expand Down
6 changes: 1 addition & 5 deletions llvm/lib/Object/ELF.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -185,7 +185,7 @@ StringRef llvm::object::getELFRelocationTypeName(uint32_t Machine,
case ELF::EM_ERAVM:
switch (Type) {
#include "llvm/BinaryFormat/ELFRelocs/EraVM.def"
default:
default:
break;
}
break;
Expand Down Expand Up @@ -251,10 +251,6 @@ uint32_t llvm::object::getELFRelativeRelocationType(uint32_t Machine) {
break;
case ELF::EM_LOONGARCH:
return ELF::R_LARCH_RELATIVE;
// EVM local begin
case ELF::EM_EVM:
break;
// EVM local end
default:
break;
}
Expand Down
10 changes: 9 additions & 1 deletion llvm/lib/Target/EVM/MCTargetDesc/EVMTargetStreamer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -11,14 +11,22 @@
//===----------------------------------------------------------------------===//

#include "MCTargetDesc/EVMTargetStreamer.h"
#include "llvm/BinaryFormat/ELF.h"
#include "llvm/MC/MCSymbolELF.h"
#include "llvm/Support/Casting.h"

using namespace llvm;

// EVMTargetStreamer implementations

EVMTargetStreamer::EVMTargetStreamer(MCStreamer &S) : MCTargetStreamer(S) {}

EVMTargetStreamer::~EVMTargetStreamer() = default;
void EVMTargetStreamer::emitLabel(MCSymbol *Symbol) {
// This is mostly a workaround for the current linking scheme.
// Mark all the symbols as local to their translation units.
auto *ELFSymbol = cast<MCSymbolELF>(Symbol);
ELFSymbol->setBinding(ELF::STB_LOCAL);
}

EVMTargetObjStreamer::EVMTargetObjStreamer(MCStreamer &S)
: EVMTargetStreamer(S) {}
Expand Down
3 changes: 2 additions & 1 deletion llvm/lib/Target/EVM/MCTargetDesc/EVMTargetStreamer.h
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,8 @@ class EVMTargetStreamer : public MCTargetStreamer {
EVMTargetStreamer(EVMTargetStreamer &&) = delete;
EVMTargetStreamer &operator=(const EVMTargetStreamer &) = delete;
EVMTargetStreamer &operator=(EVMTargetStreamer &&) = delete;
~EVMTargetStreamer() override;

void emitLabel(MCSymbol *Symbol) override;
};

/// This part is for ASCII assembly output
Expand Down

0 comments on commit 876ffb6

Please sign in to comment.