Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[EVM] Initial support of EVM dependencies #708

Merged
merged 4 commits into from
Oct 9, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
94 changes: 94 additions & 0 deletions lld/ELF/Arch/EVM.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,94 @@
//===- EVM.cpp ------------------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// EVM is a stack-based virtual machine with a word size of 256 bits intendent
// for execution of smart contracts in Ethereum blockchain environment.
//
// Since it is baremetal programming, there's usually no loader to load
// ELF files on EVMs. You are expected to link your program against address
// 0 and pull out a .text section from the result using objcopy, so that you
// can write the linked code to on-chip flush memory. You can do that with
// the following commands:
//
// ld.lld -Ttext=0 -o foo foo.o
// objcopy -O binary --only-section=.text foo output.bin
//
//===----------------------------------------------------------------------===//

#include "InputFiles.h"
#include "Symbols.h"
#include "Target.h"
#include "lld/Common/ErrorHandler.h"
#include "llvm/BinaryFormat/ELF.h"
#include "llvm/Support/Endian.h"

using namespace llvm;
using namespace llvm::object;
using namespace llvm::support::endian;
using namespace llvm::ELF;
using namespace lld;
using namespace lld::elf;

namespace {
class EVM final : public TargetInfo {
public:
uint32_t calcEFlags() const override;
RelExpr getRelExpr(RelType type, const Symbol &s,
const uint8_t *loc) const override;
void relocate(uint8_t *loc, const Relocation &rel,
uint64_t val) const override;
};
} // namespace

RelExpr EVM::getRelExpr(RelType type, const Symbol &s,
const uint8_t *loc) const {
switch (type) {
akiramenai marked this conversation as resolved.
Show resolved Hide resolved
case R_EVM_DATA:
return R_ABS;
default:
error(getErrorLocation(loc) + "unknown relocation (" + Twine(type) +
") against symbol " + toString(s));
return R_NONE;
}
}

void EVM::relocate(uint8_t *loc, const Relocation &rel, uint64_t val) const {
switch (rel.type) {
case R_EVM_DATA: {
if (val > std::numeric_limits<uint32_t>::max())
llvm_unreachable("R_EVM_DATA: to big relocation value");
write32be(loc, val);
break;
}
default:
llvm_unreachable("unknown relocation");
}
}

TargetInfo *elf::getEVMTargetInfo() {
static EVM target;
return &target;
}

static uint32_t getEFlags(InputFile *file) {
return cast<ObjFile<ELF32LE>>(file)->getObj().getHeader().e_flags;
}

uint32_t EVM::calcEFlags() const {
assert(!ctx.objectFiles.empty());

const uint32_t flags = getEFlags(ctx.objectFiles[0]);
if (auto it = std::find_if_not(
ctx.objectFiles.begin(), ctx.objectFiles.end(),
[flags](InputFile *f) { return flags == getEFlags(f); });
it != ctx.objectFiles.end())
error(toString(*it) +
": cannot link object files with incompatible target ISA");

return flags;
}
1 change: 1 addition & 0 deletions lld/ELF/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ add_lld_library(lldELF
Arch/AMDGPU.cpp
Arch/ARM.cpp
Arch/AVR.cpp
Arch/EVM.cpp
Arch/EraVM.cpp
Arch/Hexagon.cpp
Arch/LoongArch.cpp
Expand Down
3 changes: 3 additions & 0 deletions lld/ELF/Driver.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,9 @@
#include "llvm/Config/llvm-config.h"
#include "llvm/LTO/LTO.h"
#include "llvm/Object/Archive.h"
// EVM local begin
#include "llvm/Object/ELF.h"
// EVM local end
#include "llvm/Remarks/HotnessThresholdParser.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Compression.h"
Expand Down
4 changes: 4 additions & 0 deletions lld/ELF/InputFiles.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1614,6 +1614,10 @@ static uint16_t getBitcodeMachineKind(StringRef path, const Triple &t) {
return t.isOSIAMCU() ? EM_IAMCU : EM_386;
case Triple::x86_64:
return EM_X86_64;
// EVM local begin
case Triple::evm:
return EM_EVM;
// EVM local end
default:
error(path + ": could not infer e_machine from bitcode target triple " +
t.str());
Expand Down
4 changes: 4 additions & 0 deletions lld/ELF/Target.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -91,6 +91,10 @@ TargetInfo *elf::getTarget() {
return getSPARCV9TargetInfo();
case EM_X86_64:
return getX86_64TargetInfo();
// EVM local begin
case EM_EVM:
return getEVMTargetInfo();
// EVM local end
}
llvm_unreachable("unknown target machine");
}
Expand Down
3 changes: 3 additions & 0 deletions lld/ELF/Target.h
Original file line number Diff line number Diff line change
Expand Up @@ -188,6 +188,9 @@ TargetInfo *getRISCVTargetInfo();
TargetInfo *getSPARCV9TargetInfo();
TargetInfo *getX86TargetInfo();
TargetInfo *getX86_64TargetInfo();
// EVM local begin
TargetInfo *getEVMTargetInfo();
// EVM local end
template <class ELFT> TargetInfo *getMipsTargetInfo();

struct ErrorPlace {
Expand Down
26 changes: 26 additions & 0 deletions lld/include/lld-c/LLDAsLibraryC.h
Original file line number Diff line number Diff line change
Expand Up @@ -123,6 +123,32 @@ char **LLVMGetUndefinedLinkerSymbolsEraVM(LLVMMemoryBufferRef inBuffer,
* LLVMGetUndefinedSymbolsEraVM(). */
void LLVMDisposeUndefinedLinkerSymbolsEraVM(char *linkerSymbolNames[],
uint64_t numLinkerSymbols);

/** Links the deploy and runtime ELF object files using the information about
* dependencies.
* \p inBuffers - array of input memory buffers with following structure:
*
* inBuffers[0] - deploy ELF object code
* inBuffers[1] - deployed (runtime) ELF object code
* --------------------------
* inBuffers[2] - 1-st sub-contract (final EVM bytecode)
* ...
* inBuffers[N] - N-st sub-contract (final EVM bytecode)
*
* Sub-contracts are optional. They should have the same ordering as in
* the YUL layout.
*
* \p inBuffersIDs - array of string identifiers of the buffers. IDs correspond
* to the object names in the YUL layout.
* On success, outBuffers[0] will contain the deploy bytecode and outBuffers[1]
* the runtime bytecode.
* In case of an error the function returns 'true' and the error message is
* passes in \p errorMessage. The message should be disposed by
* 'LLVMDisposeMessage'. */
LLVMBool LLVMLinkEVM(LLVMMemoryBufferRef *inBuffers, const char *inBuffersIDs[],
uint64_t numInBuffers, LLVMMemoryBufferRef outBuffers[2],
char **errorMessage);

LLVM_C_EXTERN_C_END

#endif // LLD_C_LLDASLIBRARYC_H
196 changes: 196 additions & 0 deletions lld/lld-c/LLDAsLibraryC.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -443,3 +443,199 @@ void LLVMDisposeUndefinedLinkerSymbolsEraVM(char *linkerSymbolNames[],
std::free(linkerSymbolNames[idx]);
std::free(linkerSymbolNames);
}

//----------------------------------------------------------------------------//

/// This function generates a linker script for EVM architecture.
/// \p memBufs - array of input memory buffers with following structure:
///
/// memBufs[0] - deploy object code
/// memBufs[1] - deployed object code
/// --------------------------
/// memBufs[2] - 1-st sub-contract (final EVM bytecode)
/// ...
/// memBufs[N] - N-st sub-contract (final EVM bytecode)
///
/// Sub-contracts are optional. They should have the same ordering as in
/// the YUL layout.
///
/// \p bufIDs - array of string identifiers of the buffers. IDs correspond
/// to the object names in the YUL layout.
///
/// For example, the YUL object:
///
/// |--D_105_deploy --||--D_105_deployed --||-- B_40 --|
///
/// __datasize_B_40 = 1384;
/// SECTIONS {
/// . = 0;
/// .text : SUBALIGN(1) {
/// D_105(.text);
/// __dataoffset_D_105_deployed = .;
/// D_105_deployed(.text);
/// __datasize_D_105_deployed = . - __dataoffset_D_105_deployed;
/// __dataoffset_B_40 = .;
/// __datasize_D_105 = __dataoffset_B_40 + __datasize_B_40;
/// LONG(__dataoffset_D_105_deployed);
/// }
///
/// The dot '.' denotes current location in the resulting file.
/// The purpose of the script is to define datasize/dataoffset absolute symbols
/// that reflect the YUL layout.
static std::string creteEVMLinkerScript(ArrayRef<LLVMMemoryBufferRef> memBufs,
ArrayRef<const char *> bufIDs) {
assert(memBufs.size() == bufIDs.size());
size_t numObjectsToLink = memBufs.size();
StringRef dataSizePrefix("__datasize_");
StringRef dataOffsetPrefix("__dataoffset_");

// Define the script part related to the top-level contract.
StringRef topName(bufIDs[0]);
StringRef deployed(bufIDs[1]);

// Contains the linker script part corresponding to the top-level contract.
// For the example above, this contains:
// D_105(.text);
// __dataoffset_D_105_deployed = .;
// D_105_deployed(.text);
// __datasize_D_105_deployed = . - __dataoffset_D_105_deployed;
Twine topLevel = topName + "(.text);\n" + dataOffsetPrefix + deployed +
" = .;\n" + deployed + "(.text);\n" + dataSizePrefix +
deployed + " = . - " + dataOffsetPrefix + deployed + ";\n";

// Contains symbols whose values are the sizes of the dependent contracts.
// For the example above, this contains:
// __datasize_B_40 = 1384;
std::string symDatasizeDeps;

// Contains symbols whose values are the offsets of the dependent contracts.
// For the example above, this contains:
// __dataoffset_B_40 = .;
std::string symDataOffsetDeps;
if (numObjectsToLink > 2) {
// Define datasize symbols for the dependent contracts. They start after
// {deploy, deployed} pair of the top-level contract, i.e. at index 2.
for (unsigned idx = 2; idx < numObjectsToLink; ++idx)
symDatasizeDeps += (dataSizePrefix + bufIDs[idx] + " = " +
Twine(LLVMGetBufferSize(memBufs[idx])) + ";\n")
.str();

symDataOffsetDeps = (dataOffsetPrefix + bufIDs[2] + " = .;\n").str();
for (unsigned idx = 3; idx < numObjectsToLink; ++idx)
symDataOffsetDeps +=
(dataOffsetPrefix + bufIDs[idx] + " = " + dataOffsetPrefix +
bufIDs[idx - 1] + " + " + dataSizePrefix + bufIDs[idx - 1] + ";\n")
.str();
}

// Contains a symbol whose value is the total size of the top-level contract
// with all the dependencies.
std::string symDatasizeTop = (dataSizePrefix + topName + " = ").str();
if (numObjectsToLink > 2)
symDatasizeTop += (dataOffsetPrefix + bufIDs.back() + " + " +
dataSizePrefix + bufIDs.back() + ";\n")
.str();
else
symDatasizeTop += ".;\n";

// Emit size of the deploy code offset as the 4-byte unsigned integer.
// This is needed to determine which offset the deployed code starts at
// in the linked binary.
Twine deploySize = "LONG(" + dataOffsetPrefix + deployed + ");\n";

Twine script = formatv("{0}\n\
ENTRY(0);\n\
SECTIONS {\n\
. = 0;\n\
.code : SUBALIGN(1) {\n\
{1}\
{2}\
{3}\
{4}\
}\n\
}\n\
",
symDatasizeDeps, topLevel, symDataOffsetDeps,
symDatasizeTop, deploySize);

return script.str();
}

LLVMBool LLVMLinkEVM(LLVMMemoryBufferRef inBuffers[],
const char *inBuffersIDs[], uint64_t numInBuffers,
LLVMMemoryBufferRef outBuffers[2], char **errorMessage) {
assert(numInBuffers > 1);
SmallVector<MemoryBufferRef> localInMemBufRefs(3);
SmallVector<std::unique_ptr<MemoryBuffer>> localInMemBufs(3);
for (unsigned idx = 0; idx < 2; ++idx) {
MemoryBufferRef ref = *unwrap(inBuffers[idx]);
localInMemBufs[idx] =
MemoryBuffer::getMemBuffer(ref.getBuffer(), inBuffersIDs[idx],
/*RequiresNullTerminator*/ false);
localInMemBufRefs[idx] = localInMemBufs[idx]->getMemBufferRef();
}

std::string linkerScript = creteEVMLinkerScript(
ArrayRef(inBuffers, numInBuffers), ArrayRef(inBuffersIDs, numInBuffers));
std::unique_ptr<MemoryBuffer> scriptBuf =
MemoryBuffer::getMemBuffer(linkerScript, "script.x");
localInMemBufRefs[2] = scriptBuf->getMemBufferRef();

SmallVector<const char *, 16> lldArgs;
lldArgs.push_back("ld.lld");
lldArgs.push_back("-T");
lldArgs.push_back("script.x");

// Use remapping of file names (a linker feature) to replace file names with
// indexes in the array of memory buffers.
Twine remapStr("--remap-inputs=");
std::string remapDeployStr = (remapStr + inBuffersIDs[0] + "=0").str();
lldArgs.push_back(remapDeployStr.c_str());

std::string remapDeployedStr = (remapStr + inBuffersIDs[1] + "=1").str();
lldArgs.push_back(remapDeployedStr.c_str());

lldArgs.push_back("--remap-inputs=script.x=2");

// Deploy code
lldArgs.push_back(inBuffersIDs[0]);
// Deployed code
lldArgs.push_back(inBuffersIDs[1]);

lldArgs.push_back("--oformat=binary");

SmallString<0> codeString;
raw_svector_ostream ostream(codeString);
SmallString<0> errorString;
raw_svector_ostream errorOstream(errorString);

// Lld-as-a-library is not thread safe, as it has a global state,
// so we need to protect lld from simultaneous access from different threads.
std::unique_lock<std::mutex> lock(lldMutex);
const lld::Result s =
lld::lldMainMemBuf(localInMemBufRefs, &ostream, lldArgs, outs(),
errorOstream, {{lld::Gnu, &lld::elf::linkMemBuf}});
lock.unlock();

bool ret = !s.retCode && s.canRunAgain;
if (!ret) {
*errorMessage = strdup(errorString.c_str());
return true;
}

StringRef data = ostream.str();
// Linker script adds size of the deploy code as a 8-byte BE unsigned to the
// end of .text section. Knowing this, we can extract final deploy and
// deployed codes.
assert(data.size() > 4);
size_t deploySize = support::endian::read32be(data.data() + data.size() - 4);
assert(deploySize < data.size());
size_t deployedSize = data.size() - deploySize - 4;

outBuffers[0] = LLVMCreateMemoryBufferWithMemoryRangeCopy(
data.data(), deploySize, "deploy");
outBuffers[1] = LLVMCreateMemoryBufferWithMemoryRangeCopy(
data.data() + deploySize, deployedSize, "deployed");

return false;
}
Loading
Loading