From a54cdd0a9981415c74293b8db07420dfd1be1e7e Mon Sep 17 00:00:00 2001 From: Mark Peerdeman Date: Mon, 14 Aug 2023 09:53:54 +0200 Subject: [PATCH 01/11] Add AtomicRMW to ICF --- llvm/lib/CheerpUtils/IdenticalCodeFolding.cpp | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/llvm/lib/CheerpUtils/IdenticalCodeFolding.cpp b/llvm/lib/CheerpUtils/IdenticalCodeFolding.cpp index a44b43e5ada9..c7572affa291 100644 --- a/llvm/lib/CheerpUtils/IdenticalCodeFolding.cpp +++ b/llvm/lib/CheerpUtils/IdenticalCodeFolding.cpp @@ -526,6 +526,15 @@ bool IdenticalCodeFolding::equivalentInstruction(const llvm::Instruction* A, con equivalentOperand(A->getOperand(0), B->getOperand(0)) && equivalentOperand(A->getOperand(1), B->getOperand(1))); } + case Instruction::AtomicRMW: + { + const AtomicRMWInst* a = cast(A); + const AtomicRMWInst* b = cast(B); + return CacheAndReturn(equivalentType(a->getType(), b->getType()) && + a->getOperation() == b->getOperation() && + equivalentOperand(a->getPointerOperand(), b->getPointerOperand()) && + equivalentOperand(a->getValOperand(), b->getValOperand())); + } default: { #ifndef NDEBUG From ddf572f564ac2bda631992c3a81da76916b614b1 Mon Sep 17 00:00:00 2001 From: Mark Peerdeman Date: Tue, 12 Dec 2023 15:01:40 +0100 Subject: [PATCH 02/11] Add atomic instructions to isInlineable AtomicRMW is marked as not inlineable. AtomicCmpXchg is marked as not inlineable. The compare operand to an AtomicCmpXchg instruction may not be inlined. --- llvm/lib/CheerpUtils/Utility.cpp | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/llvm/lib/CheerpUtils/Utility.cpp b/llvm/lib/CheerpUtils/Utility.cpp index 1996522a1c16..5fe55a9cdcad 100644 --- a/llvm/lib/CheerpUtils/Utility.cpp +++ b/llvm/lib/CheerpUtils/Utility.cpp @@ -184,6 +184,11 @@ bool InlineableCache::isInlineableImpl(const Instruction& I) //Abs will be rendered as (X >= 0) ? X : -X in both writers return true; } + if(const AtomicCmpXchgInst* ai = dyn_cast(userInst)) + { + if (&I == ai->getCompareOperand()) + return true; + } return false; }; // Do not inline the instruction if the use is in another block @@ -497,6 +502,8 @@ bool InlineableCache::isInlineableImpl(const Instruction& I) return true; case Instruction::ExtractElement: case Instruction::InsertElement: + case Instruction::AtomicRMW: + case Instruction::AtomicCmpXchg: return false; case Instruction::Select: { From bacc6977dc9b2fe0479d8dbb30559e68579286d1 Mon Sep 17 00:00:00 2001 From: Mark Peerdeman Date: Tue, 14 Feb 2023 16:29:54 +0100 Subject: [PATCH 03/11] CheerpWasmWriter - Support for atomics --- llvm/include/llvm/Cheerp/WasmOpcodes.h | 71 +++++++++ llvm/include/llvm/Cheerp/WasmWriter.h | 4 +- llvm/lib/CheerpWriter/CheerpWasmWriter.cpp | 159 +++++++++++++++++++-- 3 files changed, 222 insertions(+), 12 deletions(-) diff --git a/llvm/include/llvm/Cheerp/WasmOpcodes.h b/llvm/include/llvm/Cheerp/WasmOpcodes.h index 6763d6dbac0b..2b9f826cfc75 100644 --- a/llvm/include/llvm/Cheerp/WasmOpcodes.h +++ b/llvm/include/llvm/Cheerp/WasmOpcodes.h @@ -143,6 +143,7 @@ enum class WasmOpcode { F64_REINTERPRET_I64 = 0xbf, FC = 0xfc, SIMD = 0xfd, + Threads = 0xfe, }; enum class WasmS32Opcode { @@ -373,6 +374,76 @@ enum class WasmSIMDU32U32U32Opcode { V128_STORE64_LANE = 0x5b, }; +enum class WasmThreadsU32Opcode { + ATOMIC_FENCE = 0x3, +}; + +enum class WasmThreadsU32U32Opcode { + I32_ATOMIC_LOAD = 0x10, + I64_ATOMIC_LOAD = 0x11, + I32_ATOMIC_LOAD8_U = 0x12, + I32_ATOMIC_LOAD16_U = 0x13, + I64_ATOMIC_LOAD8_U = 0x14, + I64_ATOMIC_LOAD16_U = 0x15, + I64_ATOMIC_LOAD32_U = 0x16, + I32_ATOMIC_STORE = 0x17, + I64_ATOMIC_STORE = 0x18, + I32_ATOMIC_STORE8 = 0x19, + I32_ATOMIC_STORE16 = 0x1a, + I64_ATOMIC_STORE8 = 0x1b, + I64_ATOMIC_STORE16 = 0x1c, + I64_ATOMIC_STORE32 = 0x1d, + I32_ATOMIC_RMW_ADD = 0x1e, + I64_ATOMIC_RMW_ADD = 0x1f, + I32_ATOMIC_RMW8_ADD_U = 0x20, + I32_ATOMIC_RMW16_ADD_U = 0x21, + I64_ATOMIC_RMW8_ADD_U = 0x22, + I64_ATOMIC_RMW16_ADD_U = 0x23, + I64_ATOMIC_RMW32_ADD_U = 0x24, + I32_ATOMIC_RMW_SUB = 0x25, + I64_ATOMIC_RMW_SUB = 0x26, + I32_ATOMIC_RMW8_SUB_U = 0x27, + I32_ATOMIC_RMW16_SUB_U = 0x28, + I64_ATOMIC_RMW8_SUB_U = 0x29, + I64_ATOMIC_RMW16_SUB_U = 0x2a, + I64_ATOMIC_RMW32_SUB_U = 0x2b, + I32_ATOMIC_RMW_AND = 0x2c, + I64_ATOMIC_RMW_AND = 0x2d, + I32_ATOMIC_RMW8_AND_U = 0x2e, + I32_ATOMIC_RMW16_AND_U = 0x2f, + I64_ATOMIC_RMW8_AND_U = 0x30, + I64_ATOMIC_RMW16_AND_U = 0x31, + I64_ATOMIC_RMW32_AND_U = 0x32, + I32_ATOMIC_RMW_OR = 0x33, + I64_ATOMIC_RMW_OR = 0x34, + I32_ATOMIC_RMW8_OR_U = 0x35, + I32_ATOMIC_RMW16_OR_U = 0x36, + I64_ATOMIC_RMW8_OR_U = 0x37, + I64_ATOMIC_RMW16_OR_U = 0x38, + I64_ATOMIC_RMW32_OR_U = 0x39, + I32_ATOMIC_RMW_XOR = 0x3a, + I64_ATOMIC_RMW_XOR = 0x3b, + I32_ATOMIC_RMW8_XOR_U = 0x3c, + I32_ATOMIC_RMW16_XOR_U = 0x3d, + I64_ATOMIC_RMW8_XOR_U = 0x3e, + I64_ATOMIC_RMW16_XOR_U = 0x3f, + I64_ATOMIC_RMW32_XOR_U = 0x40, + I32_ATOMIC_RMW_XCHG = 0x41, + I64_ATOMIC_RMW_XCHG = 0x42, + I32_ATOMIC_RMW8_XCHG_U = 0x43, + I32_ATOMIC_RMW16_XCHG_U = 0x44, + I64_ATOMIC_RMW8_XCHG_U = 0x45, + I64_ATOMIC_RMW16_XCHG_U = 0x46, + I64_ATOMIC_RMW32_XCHG_U = 0x47, + I32_ATOMIC_RMW_CMPXCHG = 0x48, + I64_ATOMIC_RMW_CMPXCHG = 0x49, + I32_ATOMIC_RMW8_CMPXCHG_U = 0x4a, + I32_ATOMIC_RMW16_CMPXCHG_U = 0x4b, + I64_ATOMIC_RMW8_CMPXCHG_U = 0x4c, + I64_ATOMIC_RMW16_CMPXCHG_U = 0x4d, + I64_ATOMIC_RMW32_CMPXCHG_U = 0x4e, +}; + enum class WasmInvalidOpcode { BRANCH_LIKELY = 0x14, BRANCH_UNLIKELY = 0x15, diff --git a/llvm/include/llvm/Cheerp/WasmWriter.h b/llvm/include/llvm/Cheerp/WasmWriter.h index f9b79f6433bd..92fa5856dcd7 100644 --- a/llvm/include/llvm/Cheerp/WasmWriter.h +++ b/llvm/include/llvm/Cheerp/WasmWriter.h @@ -513,6 +513,8 @@ class CheerpWasmWriter final : public CheerpBaseWriter static void encodeInst(WasmSIMDU32U32Opcode opcode, uint32_t i1, uint32_t i2, WasmBuffer& code); static void encodeInst(WasmSIMDU32U32U32Opcode opcode, uint32_t i1, uint32_t i2, uint32_t i3, WasmBuffer& code); static void encodeInst(WasmU32U32Opcode opcode, uint32_t i1, uint32_t i2, WasmBuffer& code); + static void encodeInst(WasmThreadsU32Opcode opcode, uint32_t immediate, WasmBuffer& code); + static void encodeInst(WasmThreadsU32U32Opcode opcode, uint32_t i1, uint32_t i2, WasmBuffer& code); void encodeInst(WasmInvalidOpcode opcode, WasmBuffer& code); void encodeVectorConstantZero(WasmBuffer& code); void encodeConstantDataVector(WasmBuffer& code, const llvm::ConstantDataVector* cdv); @@ -528,7 +530,7 @@ class CheerpWasmWriter final : public CheerpBaseWriter void compileICmp(const llvm::ICmpInst& ci, const llvm::CmpInst::Predicate p, WasmBuffer& code); void compileICmp(const llvm::Value* op0, const llvm::Value* op1, const llvm::CmpInst::Predicate p, WasmBuffer& code); void compileFCmp(const llvm::Value* lhs, const llvm::Value* rhs, const llvm::CmpInst::Predicate p, WasmBuffer& code); - void encodeLoad(llvm::Type* ty, uint32_t offset, WasmBuffer& code, bool signExtend); + void encodeLoad(llvm::Type* ty, uint32_t offset, WasmBuffer& code, bool signExtend, bool atomic); void encodeWasmIntrinsic(WasmBuffer& code, const llvm::Function* F); void encodeBranchTable(WasmBuffer& code, std::vector table, int32_t defaultBlock); void encodeDataSectionChunk(WasmBuffer& data, uint32_t address, llvm::StringRef buf); diff --git a/llvm/lib/CheerpWriter/CheerpWasmWriter.cpp b/llvm/lib/CheerpWriter/CheerpWasmWriter.cpp index c45a2cffd9b6..b92651f57902 100644 --- a/llvm/lib/CheerpWriter/CheerpWasmWriter.cpp +++ b/llvm/lib/CheerpWriter/CheerpWasmWriter.cpp @@ -589,6 +589,21 @@ void CheerpWasmWriter::encodeInst(WasmSIMDU32U32U32Opcode opcode, uint32_t i1, u encodeULEB128(i3, code); } +void CheerpWasmWriter::encodeInst(WasmThreadsU32Opcode opcode, uint32_t immediate, WasmBuffer& code) +{ + code << static_cast(WasmOpcode::Threads); + encodeULEB128(static_cast(opcode), code); + encodeULEB128(immediate, code); +} + +void CheerpWasmWriter::encodeInst(WasmThreadsU32U32Opcode opcode, uint32_t i1, uint32_t i2, WasmBuffer& code) +{ + code << static_cast(WasmOpcode::Threads); + encodeULEB128(static_cast(opcode), code); + encodeULEB128(i1, code); + encodeULEB128(i2, code); +} + void CheerpWasmWriter::encodeInst(WasmInvalidOpcode opcode, WasmBuffer& code) { nopLocations.push_back(code.tell()); @@ -1005,8 +1020,9 @@ void CheerpWasmWriter::encodePredicate(const llvm::Type* ty, const llvm::CmpInst } void CheerpWasmWriter::encodeLoad(llvm::Type* ty, uint32_t offset, - WasmBuffer& code, bool signExtend) + WasmBuffer& code, bool signExtend, bool atomic) { + assert(!(atomic && signExtend)); if(ty->isIntegerTy()) { uint32_t bitWidth = targetData.getTypeStoreSizeInBits(ty); @@ -1016,16 +1032,32 @@ void CheerpWasmWriter::encodeLoad(llvm::Type* ty, uint32_t offset, // Currently assume unsigned, like Cheerp. We may optimize // this be looking at a following sext or zext instruction. case 8: - encodeInst(signExtend ? WasmU32U32Opcode::I32_LOAD8_S : WasmU32U32Opcode::I32_LOAD8_U, 0x0, offset, code); + if (atomic) + encodeInst(WasmThreadsU32U32Opcode::I32_ATOMIC_LOAD8_U, 0x0, offset, code); + else if (signExtend) + encodeInst(WasmU32U32Opcode::I32_LOAD8_S, 0x0, offset, code); + else + encodeInst(WasmU32U32Opcode::I32_LOAD8_U, 0x0, offset, code); break; case 16: - encodeInst(signExtend ? WasmU32U32Opcode::I32_LOAD16_S : WasmU32U32Opcode::I32_LOAD16_U, 0x1, offset, code); + if (atomic) + encodeInst(WasmThreadsU32U32Opcode::I32_ATOMIC_LOAD16_U, 0x1, offset, code); + else if (signExtend) + encodeInst(WasmU32U32Opcode::I32_LOAD16_S, 0x1, offset, code); + else + encodeInst(WasmU32U32Opcode::I32_LOAD16_U, 0x1, offset, code); break; case 32: - encodeInst(WasmU32U32Opcode::I32_LOAD, 0x2, offset, code); + if (atomic) + encodeInst(WasmThreadsU32U32Opcode::I32_ATOMIC_LOAD, 0x2, offset, code); + else + encodeInst(WasmU32U32Opcode::I32_LOAD, 0x2, offset, code); break; case 64: - encodeInst(WasmU32U32Opcode::I64_LOAD, 0x2, offset, code); + if (atomic) + encodeInst(WasmThreadsU32U32Opcode::I64_ATOMIC_LOAD, 0x2, offset, code); + else + encodeInst(WasmU32U32Opcode::I64_LOAD, 0x2, offset, code); break; default: llvm::errs() << "bit width: " << bitWidth << '\n'; @@ -1060,6 +1092,7 @@ void CheerpWasmWriter::encodeLoad(llvm::Type* ty, uint32_t offset, llvm::report_fatal_error("vector bitwidth not supported"); } } else { + assert(!atomic && "atomic loads only supported on integers"); if (ty->isFloatTy()) encodeInst(WasmU32U32Opcode::F32_LOAD, 0x2, offset, code); else if (ty->isDoubleTy()) @@ -1982,7 +2015,7 @@ void CheerpWasmWriter::compileLoad(WasmBuffer& code, const LoadInst& li, bool si offset += elementOffset; } // 2) Load - encodeLoad(Ty, offset, code, signExtend); + encodeLoad(Ty, offset, code, signExtend, li.isAtomic()); } } @@ -2030,6 +2063,7 @@ void CheerpWasmWriter::compileStore(WasmBuffer& code, const StoreInst& si) } // 3) Store // When storing values with size less than 32-bit we need to truncate them + bool atomic = si.isAtomic(); if(Ty->isIntegerTy()) { uint32_t bitWidth = targetData.getTypeStoreSizeInBits(Ty); @@ -2037,16 +2071,28 @@ void CheerpWasmWriter::compileStore(WasmBuffer& code, const StoreInst& si) switch (bitWidth) { case 8: - encodeInst(WasmU32U32Opcode::I32_STORE8, 0x0, offset, code); + if (atomic) + encodeInst(WasmThreadsU32U32Opcode::I32_ATOMIC_STORE8, 0x0, offset, code); + else + encodeInst(WasmU32U32Opcode::I32_STORE8, 0x0, offset, code); break; case 16: - encodeInst(WasmU32U32Opcode::I32_STORE16, 0x1, offset, code); + if (atomic) + encodeInst(WasmThreadsU32U32Opcode::I32_ATOMIC_STORE16, 0x1, offset, code); + else + encodeInst(WasmU32U32Opcode::I32_STORE16, 0x1, offset, code); break; case 32: - encodeInst(WasmU32U32Opcode::I32_STORE, 0x2, offset, code); + if (atomic) + encodeInst(WasmThreadsU32U32Opcode::I32_ATOMIC_STORE, 0x2, offset, code); + else + encodeInst(WasmU32U32Opcode::I32_STORE, 0x2, offset, code); break; case 64: - encodeInst(WasmU32U32Opcode::I64_STORE, 0x2, offset, code); + if (atomic) + encodeInst(WasmThreadsU32U32Opcode::I64_ATOMIC_STORE, 0x3, offset, code); + else + encodeInst(WasmU32U32Opcode::I64_STORE, 0x2, offset, code); break; default: llvm::errs() << "bit width: " << bitWidth << '\n'; @@ -2055,6 +2101,7 @@ void CheerpWasmWriter::compileStore(WasmBuffer& code, const StoreInst& si) } else if (Ty->isVectorTy()) { + assert(!atomic && "atomic stores only supported on integers"); const FixedVectorType* vecType = cast(Ty); const unsigned vecWidth = getVectorBitwidth(vecType); if (vecWidth == 128) @@ -2082,6 +2129,7 @@ void CheerpWasmWriter::compileStore(WasmBuffer& code, const StoreInst& si) } else { + assert(!atomic && "atomic stores only supported on integers"); if (Ty->isFloatTy()) encodeInst(WasmU32U32Opcode::F32_STORE, 0x2, offset, code); else if (Ty->isDoubleTy()) @@ -2298,7 +2346,7 @@ bool CheerpWasmWriter::compileInlineInstruction(WasmBuffer& code, const Instruct // Load the current argument compileOperand(code, vi.getPointerOperand()); encodeInst(WasmU32U32Opcode::I32_LOAD, 0x2, 0x0, code); - encodeLoad(vi.getType(), 0, code, /*signExtend*/false); + encodeLoad(vi.getType(), 0, code, /*signExtend*/false, /*atomic*/false); // Move varargs pointer to next argument compileOperand(code, vi.getPointerOperand()); @@ -3566,6 +3614,95 @@ bool CheerpWasmWriter::compileInlineInstruction(WasmBuffer& code, const Instruct } break; } + case Instruction::AtomicRMW: + { + const AtomicRMWInst& ai = cast(I); + const Type* opType = ai.getOperand(1)->getType(); + assert(opType->isIntegerTy()); + const Value* ptrOp = ai.getPointerOperand(); + const Value* valOp = ai.getValOperand(); + uint32_t offset = compileLoadStorePointer(code, ptrOp); + compileOperand(code, valOp); + switch (ai.getOperation()) + { +#define ATOMICBINOP(Ty, name) \ + case AtomicRMWInst::Ty: \ + { \ + if (opType->isIntegerTy(64)) \ + encodeInst(WasmThreadsU32U32Opcode::I64_ATOMIC_RMW_##name, 0x3, offset, code); \ + else if (opType->isIntegerTy(32)) \ + encodeInst(WasmThreadsU32U32Opcode::I32_ATOMIC_RMW_##name, 0x2, offset, code); \ + else if (opType->isIntegerTy(16)) \ + encodeInst(WasmThreadsU32U32Opcode::I32_ATOMIC_RMW16_##name##_U, 0x1, offset, code); \ + else if (opType->getPrimitiveSizeInBits() <= 8) \ + encodeInst(WasmThreadsU32U32Opcode::I32_ATOMIC_RMW8_##name##_U, 0x0, offset, code); \ + else \ + llvm::report_fatal_error("Unknown bitwidth for AtomicRMW inst"); \ + break; \ + } + ATOMICBINOP( Add, ADD) + ATOMICBINOP( Sub, SUB) + ATOMICBINOP( And, AND) + ATOMICBINOP( Or, OR) + ATOMICBINOP( Xor, XOR) + ATOMICBINOP(Xchg, XCHG) +#undef ATOMICBINOP + default: + { + llvm::report_fatal_error("Atomic opcode not supported"); + } + } + break; + } + case Instruction::AtomicCmpXchg: + { + const AtomicCmpXchgInst& ai = cast(I); + const Value* ptrOp = ai.getPointerOperand(); + const Value* compareOp = ai.getCompareOperand(); + const Value* newValOp = ai.getNewValOperand(); + const Type* t = compareOp->getType(); + uint32_t offset = compileLoadStorePointer(code, ptrOp); + // We use compileOperand on the compareOp twice, but this is safe because + // the compareOp of a cmpxchg instruction will never be inlined. + compileOperand(code, compareOp); + compileOperand(code, newValOp); + if (t->isIntegerTy(8)) + encodeInst(WasmThreadsU32U32Opcode::I32_ATOMIC_RMW8_CMPXCHG_U, 0x0, offset, code); + else if (t->isIntegerTy(16)) + encodeInst(WasmThreadsU32U32Opcode::I32_ATOMIC_RMW16_CMPXCHG_U, 0x1, offset, code); + else if (t->isIntegerTy(32)) + encodeInst(WasmThreadsU32U32Opcode::I32_ATOMIC_RMW_CMPXCHG, 0x2, offset, code); + else if (t->isIntegerTy(64)) + encodeInst(WasmThreadsU32U32Opcode::I64_ATOMIC_RMW_CMPXCHG, 0x3, offset, code); + else + llvm::report_fatal_error("Atomic cmpxchg only allowed on integers"); + // Do not duplicate the result and render the comparison if this instruction has no uses. + // We do however have to push a garbage constant onto the stack, because + // compileInstructionAndSet will drop 2 times (since there are 2 registers for this). + if (I.use_empty()) + { + encodeInst(WasmS32Opcode::I32_CONST, 0, code); + break; + } + // Now compile the second part of this two-register operation, the comparison between + // the original (loaded) value and the comparison value. + uint32_t idx = registerize.getRegisterId(&ai, 0, edgeContext); + uint32_t localId = localMap.at(idx); + encodeInst(WasmU32Opcode::TEE_LOCAL, localId, code); + encodeInst(WasmU32Opcode::GET_LOCAL, localId, code); + compileOperand(code, compareOp); + if (t->isIntegerTy(64)) + encodeInst(WasmOpcode::I64_EQ, code); + else + encodeInst(WasmOpcode::I32_EQ, code); + break; + } + case Instruction::Fence: + { + // The FENCE opcode currently requires an immediate set to 0. + encodeInst(WasmThreadsU32Opcode::ATOMIC_FENCE, 0, code); + break; + } default: { #ifndef NDEBUG From 6c3d5be7d48c9dc890ce986fb1e8fd72f5adbd22 Mon Sep 17 00:00:00 2001 From: Mark Peerdeman Date: Wed, 8 Nov 2023 10:59:15 +0100 Subject: [PATCH 04/11] Atomic support for PreExecuter --- .../ExecutionEngine/Interpreter/Execution.cpp | 70 +++++++++++++++++++ .../ExecutionEngine/Interpreter/Interpreter.h | 4 ++ 2 files changed, 74 insertions(+) diff --git a/llvm/lib/ExecutionEngine/Interpreter/Execution.cpp b/llvm/lib/ExecutionEngine/Interpreter/Execution.cpp index b59e1bed223b..9a73cd185175 100644 --- a/llvm/lib/ExecutionEngine/Interpreter/Execution.cpp +++ b/llvm/lib/ExecutionEngine/Interpreter/Execution.cpp @@ -2119,6 +2119,76 @@ void Interpreter::visitInsertValueInst(InsertValueInst &I) { SetValue(&I, Dest, SF); } +void Interpreter::visitAtomicRMWInst(AtomicRMWInst &I) +{ + ExecutionContext &SF = ECStack.back(); + GenericValue Src1 = getOperandValue(I.getPointerOperand(), SF); + GenericValue *Ptr = (GenericValue*)GVTORP(Src1); + GenericValue Src2 = getOperandValue(I.getValOperand(), SF); + GenericValue Orig; + LoadValueFromMemory(Orig, Ptr, I.getType()); + GenericValue Result; + + switch (I.getOperation()) { + default: + dbgs() << "Don't know how to handle this binary operator!\n-->" << I; + llvm_unreachable(nullptr); + break; + case AtomicRMWInst::BinOp::Add: Result.IntVal = Orig.IntVal + Src2.IntVal; break; + case AtomicRMWInst::BinOp::Sub: Result.IntVal = Orig.IntVal - Src2.IntVal; break; + case AtomicRMWInst::BinOp::And: Result.IntVal = Orig.IntVal & Src2.IntVal; break; + case AtomicRMWInst::BinOp::Or: Result.IntVal = Orig.IntVal | Src2.IntVal; break; + case AtomicRMWInst::BinOp::Xor: Result.IntVal = Orig.IntVal ^ Src2.IntVal; break; + } + GenericValue Val = getOperandValue(I.getOperand(0), SF); + StoreValueToMemory(Result, Ptr, I.getValOperand()->getType()); + if (StoreListener) + { + assert(ForPreExecute); + StoreListener(GVTORP(Src1)); + } + SetValue(&I, Result, SF); +} + +void Interpreter::visitAtomicCmpXchgInst(AtomicCmpXchgInst &I) +{ + ExecutionContext &SF = ECStack.back(); + GenericValue Src1 = getOperandValue(I.getPointerOperand(), SF); + GenericValue *Ptr = (GenericValue*)GVTORP(Src1); + GenericValue Cmp = getOperandValue(I.getCompareOperand(), SF); + GenericValue NewVal = getOperandValue(I.getNewValOperand(), SF); + GenericValue Orig; + GenericValue Result; + GenericValue Equal; + + // Load the original value at the pointer. + LoadValueFromMemory(Orig, Ptr, I.getNewValOperand()->getType()); + + // Compare the original and the compare operand. + // If they are equal, store the newval. + Equal.IntVal = APInt(1,Orig.IntVal.eq(Cmp.IntVal)); + if (Equal.IntVal == 1) + { + StoreValueToMemory(NewVal, Ptr, I.getNewValOperand()->getType()); + if (StoreListener) + { + assert(ForPreExecute); + StoreListener(GVTORP(Src1)); + } + } + + // Now build the resulting value struct. + Result.AggregateVal.resize(2); + Result.AggregateVal[0] = Orig; + Result.AggregateVal[1] = Equal; + SetValue(&I, Result, SF); +} + +void Interpreter::visitFenceInst(FenceInst &I) +{ + return ; +} + GenericValue Interpreter::getConstantExprValue (ConstantExpr *CE, ExecutionContext &SF) { switch (CE->getOpcode()) { diff --git a/llvm/lib/ExecutionEngine/Interpreter/Interpreter.h b/llvm/lib/ExecutionEngine/Interpreter/Interpreter.h index c2f97bed0298..132d2830c0dc 100644 --- a/llvm/lib/ExecutionEngine/Interpreter/Interpreter.h +++ b/llvm/lib/ExecutionEngine/Interpreter/Interpreter.h @@ -222,6 +222,10 @@ class Interpreter : public ExecutionEngine, public InstVisitor { void visitExtractValueInst(ExtractValueInst &I); void visitInsertValueInst(InsertValueInst &I); + void visitAtomicRMWInst(AtomicRMWInst &I); + void visitAtomicCmpXchgInst(AtomicCmpXchgInst &I); + void visitFenceInst(FenceInst &I); + void visitInstruction(Instruction &I) { errs() << I << "\n"; llvm_unreachable("Instruction not interpretable yet!"); From 621fae3e5714c5f53237f1703ee7548b5224d99d Mon Sep 17 00:00:00 2001 From: Mark Peerdeman Date: Wed, 13 Dec 2023 09:49:38 +0100 Subject: [PATCH 05/11] Track atomic use in GDA. --- llvm/include/llvm/Cheerp/GlobalDepsAnalyzer.h | 6 ++++++ llvm/lib/CheerpUtils/GlobalDepsAnalyzer.cpp | 15 +++++++++++++++ 2 files changed, 21 insertions(+) diff --git a/llvm/include/llvm/Cheerp/GlobalDepsAnalyzer.h b/llvm/include/llvm/Cheerp/GlobalDepsAnalyzer.h index ba658d673157..484b0a75e5bf 100644 --- a/llvm/include/llvm/Cheerp/GlobalDepsAnalyzer.h +++ b/llvm/include/llvm/Cheerp/GlobalDepsAnalyzer.h @@ -137,6 +137,8 @@ class GlobalDepsAnalyzer */ bool usesAsmJSMalloc() const { return hasAsmJSMalloc; } + bool usesAtomics() const { return hasAtomics; } + bool runOnModule( llvm::Module & ); void visitType( llvm::Type* t, bool forceTypedArray ); @@ -242,6 +244,9 @@ class GlobalDepsAnalyzer //Extend lifetime of function, visiting them and declaring external void extendLifetime(llvm::Function* F); + //Determine whether an instruction is atomic. + bool isAtomicInstruction(const llvm::Instruction& I); + std::unordered_set< const llvm::GlobalValue * > reachableGlobals; // Set of all the reachable globals FixupMap varsFixups; @@ -270,6 +275,7 @@ class GlobalDepsAnalyzer bool hasVAArgs; bool hasPointerArrays; bool hasAsmJSCode; + bool hasAtomics; bool hasAsmJSMemory; bool hasAsmJSMalloc; bool hasCheerpException; diff --git a/llvm/lib/CheerpUtils/GlobalDepsAnalyzer.cpp b/llvm/lib/CheerpUtils/GlobalDepsAnalyzer.cpp index 9b8be6627011..ad53305356c3 100644 --- a/llvm/lib/CheerpUtils/GlobalDepsAnalyzer.cpp +++ b/llvm/lib/CheerpUtils/GlobalDepsAnalyzer.cpp @@ -144,11 +144,23 @@ void GlobalDepsAnalyzer::replaceFunctionAliasWithAliasee(llvm::Module &module, S } } +bool GlobalDepsAnalyzer::isAtomicInstruction(const llvm::Instruction& I) +{ + if (isa(I) || isa(I) || isa(I)) + return true; + else if (const llvm::LoadInst* li = dyn_cast(&I)) + return li->isAtomic(); + else if (const llvm::StoreInst* si = dyn_cast(&I)) + return si->isAtomic(); + return false; +} + bool GlobalDepsAnalyzer::runOnModule( llvm::Module & module ) { DL = &module.getDataLayout(); assert(DL); VisitedSet visited; + hasAtomics = false; replaceFunctionAliasWithAliasee(module, "malloc"); replaceFunctionAliasWithAliasee(module, "calloc"); @@ -209,6 +221,9 @@ bool GlobalDepsAnalyzer::runOnModule( llvm::Module & module ) break; Instruction& I = *instructionIterator; + if (isAtomicInstruction(I)) + hasAtomics = true; + if (isa(I)) { CallInst* ci = cast(&I); Function* calledFunc = ci->getCalledFunction(); From 6bb5bbfa78ff796b71e6747515eaa22f2675428b Mon Sep 17 00:00:00 2001 From: Mark Peerdeman Date: Mon, 11 Dec 2023 15:49:28 +0100 Subject: [PATCH 06/11] Add support for Atomic functions to NameGenerator --- llvm/include/llvm/Cheerp/NameGenerator.h | 9 +++++++++ llvm/lib/CheerpWriter/NameGenerator.cpp | 9 +++++++++ 2 files changed, 18 insertions(+) diff --git a/llvm/include/llvm/Cheerp/NameGenerator.h b/llvm/include/llvm/Cheerp/NameGenerator.h index c0b84e184276..788778ea358d 100644 --- a/llvm/include/llvm/Cheerp/NameGenerator.h +++ b/llvm/include/llvm/Cheerp/NameGenerator.h @@ -68,6 +68,15 @@ class NameGenerator HANDLE_VAARG, EXCEPTION, FETCHBUFFER, + ATOMICLOAD, + ATOMICSTORE, + ATOMICADD, + ATOMICSUB, + ATOMICAND, + ATOMICOR, + ATOMICXOR, + ATOMICXCHG, + ATOMICCMPXCHG, MEMORY, HEAP8, HEAP16, diff --git a/llvm/lib/CheerpWriter/NameGenerator.cpp b/llvm/lib/CheerpWriter/NameGenerator.cpp index 37d49bf1ed8d..17615cb078fc 100644 --- a/llvm/lib/CheerpWriter/NameGenerator.cpp +++ b/llvm/lib/CheerpWriter/NameGenerator.cpp @@ -678,6 +678,15 @@ void NameGenerator::generateReadableNames(const Module& M, const GlobalDepsAnaly builtins[EXCEPTION] = "$except"; builtins[FETCHBUFFER] = "fetchBuffer"; builtins[STACKPTR] = "__stackPtr"; + builtins[ATOMICLOAD] = "__atomicload"; + builtins[ATOMICSTORE] = "__atomicstore"; + builtins[ATOMICADD] = "__atomicadd"; + builtins[ATOMICSUB] = "__atomicsub"; + builtins[ATOMICAND] = "__atomicand"; + builtins[ATOMICOR] = "__atomicor"; + builtins[ATOMICXOR] = "__atomicxor"; + builtins[ATOMICXCHG] = "__atomicexchange"; + builtins[ATOMICCMPXCHG] = "__atomiccompareExchange"; builtins[HEAP8] = "HEAP8"; builtins[HEAP16] = "HEAP16"; builtins[HEAP32] = "HEAP32"; From 842f5e366b4c72b6b267037792aec2a161ccdeab Mon Sep 17 00:00:00 2001 From: Mark Peerdeman Date: Tue, 12 Dec 2023 15:13:26 +0100 Subject: [PATCH 07/11] Add atomics support to asmjs --- llvm/include/llvm/Cheerp/Writer.h | 9 +- llvm/lib/CheerpWriter/CheerpWriter.cpp | 262 ++++++++++++++++++++++++- 2 files changed, 265 insertions(+), 6 deletions(-) diff --git a/llvm/include/llvm/Cheerp/Writer.h b/llvm/include/llvm/Cheerp/Writer.h index 0d79083767b5..52e37a330b59 100644 --- a/llvm/include/llvm/Cheerp/Writer.h +++ b/llvm/include/llvm/Cheerp/Writer.h @@ -453,11 +453,12 @@ class CheerpWriter final : public CheerpBaseWriter COMPILE_INSTRUCTION_FEEDBACK compileNotInlineableInstruction(const llvm::Instruction& I, PARENT_PRIORITY parentPrio); COMPILE_INSTRUCTION_FEEDBACK compileInlineableInstruction(const llvm::Instruction& I, PARENT_PRIORITY parentPrio); COMPILE_INSTRUCTION_FEEDBACK compileCallInstruction(const llvm::CallBase& I, PARENT_PRIORITY parentPrio); - void compileLoadElem(const llvm::Value* ptrOp, llvm::Type* Ty, llvm::StructType* STy, POINTER_KIND ptrKind, POINTER_KIND loadKind, bool isOffset, Registerize::REGISTER_KIND regKind, uint32_t structElemIdx, bool asmjs, PARENT_PRIORITY parentPrio); - void compileLoadElem(const llvm::Value* ptrOp, llvm::Type* Ty, POINTER_KIND ptrKind, POINTER_KIND loadKind, bool isOffset, Registerize::REGISTER_KIND regKind, bool asmjs, PARENT_PRIORITY parentPrio); + void compileLoadElem(const llvm::LoadInst& li, llvm::Type* Ty, llvm::StructType* STy, POINTER_KIND ptrKind, POINTER_KIND loadKind, bool isOffset, Registerize::REGISTER_KIND regKind, uint32_t structElemIdx, bool asmjs, PARENT_PRIORITY parentPrio); void compileLoad(const llvm::LoadInst& li, PARENT_PRIORITY parentPrio); void compileStoreElem(const llvm::StoreInst& si, llvm::Type* Ty, llvm::StructType* STy, POINTER_KIND ptrKind, POINTER_KIND storedKind, bool isOffset, Registerize::REGISTER_KIND regKind, uint32_t structElemIdx, uint32_t elemIdx, bool asmjs); void compileStore(const llvm::StoreInst& si); + void compileAtomicRMW(const llvm::AtomicRMWInst& ai, PARENT_PRIORITY parentPrio); + void compileAtomicCmpXchg(const llvm::AtomicCmpXchgInst& ai, PARENT_PRIORITY parentPrio); void compileSignedInteger(const llvm::Value* v, bool forComparison, PARENT_PRIORITY parentPrio); void compileUnsignedInteger(const llvm::Value* v, bool forAsmJSComparison, PARENT_PRIORITY parentPrio, bool forceTruncation = false); @@ -692,6 +693,10 @@ class CheerpWriter final : public CheerpBaseWriter * Compile the function for growing the wasm linear memory */ void compileGrowMem(); + /** + * Compile the atomic functions + */ + void compileAtomicFunctions(); /** * Compile an helper function to assign all global heap symbols */ diff --git a/llvm/lib/CheerpWriter/CheerpWriter.cpp b/llvm/lib/CheerpWriter/CheerpWriter.cpp index 9160ae3a82f1..10ff6be58550 100644 --- a/llvm/lib/CheerpWriter/CheerpWriter.cpp +++ b/llvm/lib/CheerpWriter/CheerpWriter.cpp @@ -4391,6 +4391,18 @@ CheerpWriter::COMPILE_INSTRUCTION_FEEDBACK CheerpWriter::compileInlineableInstru } return COMPILE_OK; } + case Instruction::AtomicRMW: + { + const AtomicRMWInst& ai = cast(I); + compileAtomicRMW(ai, parentPrio); + return COMPILE_OK; + } + case Instruction::AtomicCmpXchg: + { + const AtomicCmpXchgInst& ai = cast(I); + compileAtomicCmpXchg(ai, parentPrio); + return COMPILE_OK; + } default: stream << "alert('Unsupported code')"; llvm::errs() << "\tImplement inst " << I.getOpcodeName() << '\n'; @@ -4449,7 +4461,7 @@ void CheerpWriter::compileLoad(const LoadInst& li, PARENT_PRIORITY parentPrio) elemPtrKind = PA.getPointerKind(&li); } bool isOffset = ie.ptrIdx == 1; - compileLoadElem(ptrOp, Ty, STy, ptrKind, elemPtrKind, isOffset, elemRegKind, ie.structIdx, asmjs, parentPrio); + compileLoadElem(li, Ty, STy, ptrKind, elemPtrKind, isOffset, elemRegKind, ie.structIdx, asmjs, parentPrio); if(needsCheckBounds) { needsCheckBounds = false; @@ -4458,9 +4470,42 @@ void CheerpWriter::compileLoad(const LoadInst& li, PARENT_PRIORITY parentPrio) } } -void CheerpWriter::compileLoadElem(const Value* ptrOp, Type* Ty, StructType* STy, POINTER_KIND ptrKind, POINTER_KIND loadKind, bool isOffset, Registerize::REGISTER_KIND regKind, uint32_t structElemIdx, bool asmjs, PARENT_PRIORITY parentPrio) +void CheerpWriter::compileLoadElem(const LoadInst& li, Type* Ty, StructType* STy, POINTER_KIND ptrKind, POINTER_KIND loadKind, bool isOffset, Registerize::REGISTER_KIND regKind, uint32_t structElemIdx, bool asmjs, PARENT_PRIORITY parentPrio) { - if(regKind==Registerize::INTEGER && needsIntCoercion(parentPrio)) + const Value* ptrOp = li.getPointerOperand(); + if (li.isAtomic()) + { + assert(!STy); + assert(!isOffset); + PARENT_PRIORITY shiftPrio = SHIFT; + uint32_t shift = getHeapShiftForType(Ty); + if (shift == 0) + shiftPrio = LOWEST; + if (parentPrio > BIT_OR) + stream << "("; + stream << namegen.getBuiltinName(NameGenerator::Builtin::ATOMICLOAD) << "("; + if (Ty->isIntegerTy(1) || Ty->isIntegerTy(8)) + stream << "8,"; + else if (Ty->isIntegerTy(16)) + stream << "16,"; + else if (Ty->isIntegerTy(32)) + stream << "32,"; + else if (Ty->isIntegerTy(64) && UseBigInts && LinearOutput!=AsmJs) + stream << "64,"; + else + llvm::report_fatal_error("Unsupported bitwidth for atomic load"); + compileRawPointer(ptrOp, shiftPrio); + if (shift != 0) + stream << ">>" << shift; + stream << ")"; + + if (li.getType()->isIntegerTy() && parentPrio != BIT_OR) + stream << "|0"; + if (parentPrio > BIT_OR) + stream << ")"; + return ; + } + else if(regKind==Registerize::INTEGER && needsIntCoercion(parentPrio)) { if (parentPrio > BIT_OR) stream << '('; @@ -4636,6 +4681,34 @@ void CheerpWriter::compileStoreElem(const StoreInst& si, Type* Ty, StructType* S const Value* ptrOp=si.getPointerOperand(); const Value* valOp=si.getValueOperand(); assert(ptrKind != CONSTANT); + if (si.isAtomic()) + { + assert(!STy); + assert(!isOffset); + stream << namegen.getBuiltinName(NameGenerator::Builtin::ATOMICSTORE) << "("; + Type* t = valOp->getType(); + PARENT_PRIORITY shiftPrio = SHIFT; + uint32_t shift = getHeapShiftForType(t); + if (shift == 0) + shiftPrio = LOWEST; + if (t->isIntegerTy(1) || t->isIntegerTy(8)) + stream << "8,"; + else if (t->isIntegerTy(16)) + stream << "16,"; + else if (t->isIntegerTy(32)) + stream << "32,"; + else if (t->isIntegerTy(64) && UseBigInts && LinearOutput!=AsmJs) + stream << "64,"; + else + llvm::report_fatal_error("Unsupported bitwidth for atomic store"); + compileRawPointer(ptrOp, shiftPrio); + if (shift != 0) + stream << ">>" << shift; + stream << ","; + compileOperand(valOp, BIT_OR); + stream << "|0)"; + return ; + } if (RAW == ptrKind || (asmjs && ptrKind == CONSTANT)) { assert(!isOffset); @@ -4739,6 +4812,107 @@ void CheerpWriter::compileStoreElem(const StoreInst& si, Type* Ty, StructType* S } } +void CheerpWriter::compileAtomicRMW(const AtomicRMWInst& ai, PARENT_PRIORITY parentPrio) +{ + if (parentPrio > BIT_OR) + stream << "("; + switch(ai.getOperation()) + { + case AtomicRMWInst::BinOp::Xchg: + stream << namegen.getBuiltinName(NameGenerator::Builtin::ATOMICXCHG) << "("; + break; + case AtomicRMWInst::BinOp::Add: + stream << namegen.getBuiltinName(NameGenerator::Builtin::ATOMICADD) << "("; + break; + case AtomicRMWInst::BinOp::Sub: + stream << namegen.getBuiltinName(NameGenerator::Builtin::ATOMICSUB) << "("; + break; + case AtomicRMWInst::BinOp::And: + stream << namegen.getBuiltinName(NameGenerator::Builtin::ATOMICAND) << "("; + break; + case AtomicRMWInst::BinOp::Or: + stream << namegen.getBuiltinName(NameGenerator::Builtin::ATOMICOR) << "("; + break; + case AtomicRMWInst::BinOp::Xor: + stream << namegen.getBuiltinName(NameGenerator::Builtin::ATOMICXOR) << "("; + break; + default: + llvm::report_fatal_error("Unsupported atomicrmw opcode"); + } + const Value* ptrOp=ai.getPointerOperand(); + const Value* valOp=ai.getValOperand(); + Type* t = valOp->getType(); + PARENT_PRIORITY shiftPrio = SHIFT; + uint32_t shift = getHeapShiftForType(t); + if (shift == 0) + shiftPrio = LOWEST; + if (t->isIntegerTy(1) || t->isIntegerTy(8)) + stream << "8,"; + else if (t->isIntegerTy(16)) + stream << "16,"; + else if (t->isIntegerTy(32)) + stream << "32,"; + else if (t->isIntegerTy(64) && UseBigInts && LinearOutput!=AsmJs) + stream << "64,"; + else + llvm::report_fatal_error("Unsupported bitwidth for atomicrmw"); + compileRawPointer(ptrOp, shiftPrio); + if (shift != 0) + stream << ">>" << shift; + stream << ","; + compileOperand(valOp, BIT_OR); + stream << "|0)"; + + if (ai.getType()->isIntegerTy() && parentPrio != BIT_OR) + stream << "|0"; + if (parentPrio > BIT_OR) + stream << ")"; +} + +void CheerpWriter::compileAtomicCmpXchg(const AtomicCmpXchgInst& ai, PARENT_PRIORITY parentPrio) +{ + const Value* ptrOp=ai.getPointerOperand(); + const Value* cmpOp=ai.getCompareOperand(); + const Value* newValOp=ai.getNewValOperand(); + Type* t = newValOp->getType(); + PARENT_PRIORITY shiftPrio = SHIFT; + uint32_t shift = getHeapShiftForType(t); + if (shift == 0) + shiftPrio = LOWEST; + + stream << namegen.getBuiltinName(NameGenerator::Builtin::ATOMICCMPXCHG) << "("; + if (t->isIntegerTy(1) || t->isIntegerTy(8)) + stream << "8,"; + else if (t->isIntegerTy(16)) + stream << "16,"; + else if (t->isIntegerTy(32)) + stream << "32,"; + else if (t->isIntegerTy(64) && UseBigInts && LinearOutput!=AsmJs) + stream << "64,"; + else + llvm::report_fatal_error("Unsupported bitwidth for atomicmpxchg"); + compileRawPointer(ptrOp, shiftPrio); + if (shift != 0) + stream << ">>" << shift; + stream << ","; + compileOperand(cmpOp, BIT_OR); + stream << "|0,"; + compileOperand(newValOp, BIT_OR); + stream << "|0)|0"; + + // Compile the second part of this instruction, the comparison between the loaded value + // and the compare operand. A compare operand to a cmpxchg instruction cannot be inlined, so + // calling compileOperand twice is safe. + // We only compile this part if this instruction has uses. + if (!ai.use_empty()) + { + stream << ";" << NewLine; + stream << namegen.getName(&ai, 1) << "=(" << namegen.getName(&ai, 0) << "|0)==("; + compileOperand(cmpOp, BIT_OR); + stream << "|0)"; + } +} + CheerpWriter::COMPILE_INSTRUCTION_FEEDBACK CheerpWriter::compileCallInstruction(const CallBase& ci, PARENT_PRIORITY parentPrio) { bool asmjs = currentFun->getSection() == StringRef("asmjs"); @@ -6044,6 +6218,64 @@ void CheerpWriter::compileGrowMem() stream << "}" << NewLine; } +void CheerpWriter::compileAtomicFunctions() +{ + auto funcName = namegen.getBuiltinName(NameGenerator::Builtin::ATOMICLOAD); + stream << "function " << funcName << "(bitwidth, addr){" << NewLine; + stream << "if(bitwidth==8)" << NewLine; + stream << "return Atomics.load(" << getHeapName(HEAP8) << ", addr);" << NewLine; + stream << "else if(bitwidth==16)" << NewLine; + stream << "return Atomics.load(" << getHeapName(HEAP16) << ", addr);" << NewLine; + stream << "else if(bitwidth==32)" << NewLine; + stream << "return Atomics.load(" << getHeapName(HEAP32) << ", addr);" << NewLine; + if (UseBigInts && LinearOutput!=AsmJs) + { + stream << "else if(bitwidth==64)" << NewLine; + stream << "return Atomics.load(" << getHeapName(HEAP64) << ", addr);" << NewLine; + } + stream << "else " << NewLine; + stream << "throw new Error('Wrong bitwidth');" << NewLine; + stream << "}" << NewLine; + std::vector opNames={"store","add","sub","and","or","xor","exchange"}; + for (uint32_t i = 0; i < opNames.size(); i++) + { + auto b = static_cast(i + NameGenerator::Builtin::ATOMICSTORE); + auto opName = opNames[i]; + funcName = namegen.getBuiltinName(b); + stream << "function " << funcName << "(bitwidth, addr, val){" << NewLine; + stream << "if(bitwidth==8)" << NewLine; + stream << "return Atomics." << opName << "(" << getHeapName(HEAP8) << ", addr, val);" << NewLine; + stream << "else if(bitwidth==16)" << NewLine; + stream << "return Atomics." << opName << "(" << getHeapName(HEAP16) << ", addr, val);" << NewLine; + stream << "else if(bitwidth==32)" << NewLine; + stream << "return Atomics." << opName << "(" << getHeapName(HEAP32) << ", addr, val);" << NewLine; + if (UseBigInts && LinearOutput!=AsmJs) + { + stream << "else if(bitwidth==64)" << NewLine; + stream << "return Atomics." << opName << "(" << getHeapName(HEAP64) << ", addr, val);" << NewLine; + } + stream << "else " << NewLine; + stream << "throw new Error('Wrong bitwidth');" << NewLine; + stream << "}" << NewLine; + } + funcName = namegen.getBuiltinName(NameGenerator::Builtin::ATOMICCMPXCHG); + stream << "function " << funcName << "(bitwidth, addr, expected, replacement){" << NewLine; + stream << "if(bitwidth==8)" << NewLine; + stream << "return Atomics.compareExchange(" << getHeapName(HEAP8) << ", addr, expected, replacement);" << NewLine; + stream << "else if(bitwidth==16)" << NewLine; + stream << "return Atomics.compareExchange(" << getHeapName(HEAP16) << ", addr, expected, replacement);" << NewLine; + stream << "else if(bitwidth==32)" << NewLine; + stream << "return Atomics.compareExchange(" << getHeapName(HEAP32) << ", addr, expected, replacement);" << NewLine; + if (UseBigInts && LinearOutput!=AsmJs) + { + stream << "else if(bitwidth==64)" << NewLine; + stream << "return Atomics.compareExchange(" << getHeapName(HEAP64) << ", addr, expected, replacement);" << NewLine; + } + stream << "else " << NewLine; + stream << "throw new Error('Wrong bitwidth');" << NewLine; + stream << "}" << NewLine; +} + void CheerpWriter::compileMathDeclAsmJS() { stream << "var Infinity=stdlib.Infinity;" << NewLine; @@ -6335,6 +6567,14 @@ void CheerpWriter::compileAsmJSClosure() stream << namegen.getBuiltinName(NameGenerator::Builtin::GROW_MEM); stream << ';' << NewLine; } + if (globalDeps.usesAtomics()) + { + for (int i = NameGenerator::Builtin::ATOMICLOAD; i <= NameGenerator::Builtin::ATOMICCMPXCHG; i++) + { + auto b = static_cast(i); + stream << "var " << namegen.getBuiltinName(b) << "=ffi." << namegen.getBuiltinName(b) << ";" << NewLine; + } + } // Declare globals for ( const GlobalVariable* GV : linearHelper.globals() ) @@ -6378,6 +6618,14 @@ void CheerpWriter::compileAsmJSffiObject() stream << namegen.getBuiltinName(NameGenerator::Builtin::GROW_MEM); stream << ',' << NewLine; } + if (globalDeps.usesAtomics()) + { + for (int i = NameGenerator::Builtin::ATOMICLOAD; i <= NameGenerator::Builtin::ATOMICCMPXCHG; i++) + { + auto b = static_cast(i); + stream << namegen.getBuiltinName(b) << ":" << namegen.getBuiltinName(b) << "," << NewLine; + } + } stream << "}"; } @@ -6385,7 +6633,10 @@ void CheerpWriter::compileAsmJSTopLevel() { compileDummies(); - stream << "var __heap = new ArrayBuffer("< Date: Tue, 12 Dec 2023 15:14:15 +0100 Subject: [PATCH 08/11] Introduce CheerpLowerAtomic pass This pass is meant to remove atomics by invoking the existing LowerAtomicPass only on genericjs functions. --- clang/lib/CodeGen/BackendUtil.cpp | 3 --- clang/lib/Driver/ToolChains/WebAssembly.cpp | 5 ++++ llvm/include/llvm/Cheerp/CheerpLowerAtomic.h | 18 +++++++++++++ llvm/include/llvm/Cheerp/CommandLine.h | 1 + llvm/include/llvm/Cheerp/PassRegistry.h | 1 + llvm/lib/CheerpUtils/CMakeLists.txt | 1 + llvm/lib/CheerpUtils/CheerpLowerAtomic.cpp | 27 ++++++++++++++++++++ llvm/lib/CheerpUtils/CommandLine.cpp | 2 ++ llvm/lib/Passes/PassRegistry.def | 1 + 9 files changed, 56 insertions(+), 3 deletions(-) create mode 100644 llvm/include/llvm/Cheerp/CheerpLowerAtomic.h create mode 100644 llvm/lib/CheerpUtils/CheerpLowerAtomic.cpp diff --git a/clang/lib/CodeGen/BackendUtil.cpp b/clang/lib/CodeGen/BackendUtil.cpp index ab89d0364a9f..00f1cf910fb7 100644 --- a/clang/lib/CodeGen/BackendUtil.cpp +++ b/clang/lib/CodeGen/BackendUtil.cpp @@ -87,7 +87,6 @@ #include "llvm/Transforms/Scalar/EarlyCSE.h" #include "llvm/Transforms/Scalar/GVN.h" #include "llvm/Transforms/Scalar/JumpThreading.h" -#include "llvm/Transforms/Scalar/LowerAtomicPass.h" #include "llvm/Transforms/Scalar/LowerMatrixIntrinsics.h" #include "llvm/Transforms/Utils.h" #include "llvm/Transforms/Utils/CanonicalizeAliases.h" @@ -948,8 +947,6 @@ void EmitAssemblyHelper::RunOptimizationPipeline( //We need this to track this in custom constructors for DOM types, such as String::String(const char*) MPM.addPass(createModuleToFunctionPassAdaptor(cheerp::RequiredPassWrapper())); MPM.addPass(createModuleToFunctionPassAdaptor(cheerp::CheerpNativeRewriterPass())); - //Cheerp is single threaded, convert atomic instructions to regular ones - MPM.addPass(createModuleToFunctionPassAdaptor(LowerAtomicPass())); }); PB.registerOptimizerLastEPCallback( [](ModulePassManager &MPM, OptimizationLevel Level) { diff --git a/clang/lib/Driver/ToolChains/WebAssembly.cpp b/clang/lib/Driver/ToolChains/WebAssembly.cpp index e9e7cb1c11c7..267db8af2561 100644 --- a/clang/lib/Driver/ToolChains/WebAssembly.cpp +++ b/clang/lib/Driver/ToolChains/WebAssembly.cpp @@ -697,9 +697,14 @@ void cheerp::CheerpOptimizer::ConstructJob(Compilation &C, const JobAction &JA, if (Args.hasArg(options::OPT_cheerp_no_icf)) CmdArgs.push_back("-cheerp-no-icf"); + if (!Args.hasArg(options::OPT_pthread)) + CmdArgs.push_back("-cheerp-lower-atomics"); + addPass("function(CheerpLowerInvoke)"); if (Args.hasArg(options::OPT_fexceptions)) CmdArgs.push_back("-cheerp-keep-invokes"); + // This pass will remove atomics from genericjs functions + addPass("CheerpLowerAtomic"); addPass("function(simplifycfg)"); addPass("CallConstructors"); diff --git a/llvm/include/llvm/Cheerp/CheerpLowerAtomic.h b/llvm/include/llvm/Cheerp/CheerpLowerAtomic.h new file mode 100644 index 000000000000..a4b1c98a5ad6 --- /dev/null +++ b/llvm/include/llvm/Cheerp/CheerpLowerAtomic.h @@ -0,0 +1,18 @@ +#ifndef CHEERP_LOWER_ATOMIC_H +#define CHEERP_LOWER_ATOMIC_H + +#include "llvm/IR/PassManager.h" + +namespace cheerp +{ + +class CheerpLowerAtomicPass : public llvm::PassInfoMixin +{ +public: + llvm::PreservedAnalyses run(llvm::Module& M, llvm::ModuleAnalysisManager& MAM); + static bool isRequired() { return true; } +}; + +} + +#endif diff --git a/llvm/include/llvm/Cheerp/CommandLine.h b/llvm/include/llvm/Cheerp/CommandLine.h index b121a160aeac..eb24168bd54e 100644 --- a/llvm/include/llvm/Cheerp/CommandLine.h +++ b/llvm/include/llvm/Cheerp/CommandLine.h @@ -60,5 +60,6 @@ extern llvm::cl::opt WasmNoUnalignedMem; extern llvm::cl::opt UseBigInts; extern llvm::cl::opt KeepInvokes; extern llvm::cl::opt PreserveFree; +extern llvm::cl::opt LowerAtomics; #endif //_CHEERP_COMMAND_LINE_H diff --git a/llvm/include/llvm/Cheerp/PassRegistry.h b/llvm/include/llvm/Cheerp/PassRegistry.h index 637b607d98cd..770c22e9fe7e 100644 --- a/llvm/include/llvm/Cheerp/PassRegistry.h +++ b/llvm/include/llvm/Cheerp/PassRegistry.h @@ -40,6 +40,7 @@ #include "llvm/Cheerp/StoreMerging.h" #include "llvm/Cheerp/CallConstructors.h" #include "llvm/Cheerp/CommandLine.h" +#include "llvm/Cheerp/CheerpLowerAtomic.h" namespace cheerp { diff --git a/llvm/lib/CheerpUtils/CMakeLists.txt b/llvm/lib/CheerpUtils/CMakeLists.txt index c7987f9b4da4..74ceb1015ed8 100644 --- a/llvm/lib/CheerpUtils/CMakeLists.txt +++ b/llvm/lib/CheerpUtils/CMakeLists.txt @@ -33,6 +33,7 @@ add_llvm_component_library(LLVMCheerpUtils BitCastLowering.cpp JSStringLiteralLowering.cpp MemoryInit.cpp + CheerpLowerAtomic.cpp ) add_dependencies(LLVMCheerpUtils intrinsics_gen) diff --git a/llvm/lib/CheerpUtils/CheerpLowerAtomic.cpp b/llvm/lib/CheerpUtils/CheerpLowerAtomic.cpp new file mode 100644 index 000000000000..3b8ee9cd7eba --- /dev/null +++ b/llvm/lib/CheerpUtils/CheerpLowerAtomic.cpp @@ -0,0 +1,27 @@ +#include "llvm/Cheerp/CheerpLowerAtomic.h" +#include "llvm/Cheerp/CommandLine.h" +#include "llvm/Transforms/Scalar/LowerAtomicPass.h" + +using namespace llvm; +using namespace cheerp; +// Module pass that invokes the LLVM LowerAtomicPass on genericjs functions. +PreservedAnalyses CheerpLowerAtomicPass::run(Module& M, ModuleAnalysisManager& MAM) +{ + FunctionAnalysisManager& FAM = MAM.getResult(M).getManager(); + FunctionPassManager FPM; + FPM.addPass(LowerAtomicPass()); + + // Loop over the functions, and only pass genericjs ones to LowerAtomicPass + for (Function& F : M) + { + if (F.isDeclaration()) + continue; + + if (!LowerAtomics && F.getSection() == "asmjs") + continue; + + FPM.run(F, FAM); + } + + return PreservedAnalyses::none(); +} diff --git a/llvm/lib/CheerpUtils/CommandLine.cpp b/llvm/lib/CheerpUtils/CommandLine.cpp index 5cd5a3237e5e..ee03f2541473 100644 --- a/llvm/lib/CheerpUtils/CommandLine.cpp +++ b/llvm/lib/CheerpUtils/CommandLine.cpp @@ -97,3 +97,5 @@ llvm::cl::opt EnvironName("cheerp-environ-name", llvm::cl::Optional llvm::cl::opt ArgvName("cheerp-argv-name", llvm::cl::Optional, llvm::cl::desc("If specified, the identifier name storing the arguments"), llvm::cl::value_desc("name")); + +llvm::cl::opt LowerAtomics("cheerp-lower-atomics", llvm::cl::desc("Lower all atomic operations")); diff --git a/llvm/lib/Passes/PassRegistry.def b/llvm/lib/Passes/PassRegistry.def index 8f428e76b6ac..d5056de81816 100644 --- a/llvm/lib/Passes/PassRegistry.def +++ b/llvm/lib/Passes/PassRegistry.def @@ -139,6 +139,7 @@ MODULE_PASS("PartialExecuter", cheerp::PartialExecuterPass()) MODULE_PASS("PreExecute", cheerp::PreExecutePass()) MODULE_PASS("FreeAndDeleteRemoval", cheerp::FreeAndDeleteRemovalPass()) MODULE_PASS("CallConstructors", cheerp::CallConstructorsPass()) +MODULE_PASS("CheerpLowerAtomic", cheerp::CheerpLowerAtomicPass()) #undef MODULE_PASS #ifndef MODULE_PASS_WITH_PARAMS From 5745eea55d666b04c90859205ae1dbf05f60bbfe Mon Sep 17 00:00:00 2001 From: Mark Peerdeman Date: Thu, 1 Feb 2024 11:48:22 +0100 Subject: [PATCH 09/11] Fix to TypeOptimizer to allow literal structs. TypeOptimizer would create new named structs as copies of literal structs. This caused a problem in the case of literal structs that are the result of a cmpxchg instruction. During inlining, these create their original return types, and those would conflict with the ones that went through TypeOptimizer. In turn, we needed a tiny fix in isJSExportedType in order for the literal structs not to cause a crash. --- llvm/lib/CheerpUtils/TypeOptimizer.cpp | 27 +++++++++++++++++++++----- llvm/lib/CheerpUtils/Utility.cpp | 2 ++ 2 files changed, 24 insertions(+), 5 deletions(-) diff --git a/llvm/lib/CheerpUtils/TypeOptimizer.cpp b/llvm/lib/CheerpUtils/TypeOptimizer.cpp index 85d7737585cf..42d803eced60 100644 --- a/llvm/lib/CheerpUtils/TypeOptimizer.cpp +++ b/llvm/lib/CheerpUtils/TypeOptimizer.cpp @@ -289,6 +289,11 @@ bool TypeOptimizer::isUnsafeDowncastSource(StructType* st) bool TypeOptimizer::canCollapseStruct(llvm::StructType* st, llvm::StructType* newStruct, llvm::Type* newType) { + if (newStruct == nullptr) + { + assert(st->isLiteral()); + return false; + } // Stop if the element is just a int8, we may be dealing with an empty struct // Empty structs are unsafe as the int8 inside is just a placeholder and will be replaced // by a different type in a derived class @@ -445,8 +450,11 @@ TypeOptimizer::TypeMappingInfo TypeOptimizer::rewriteType(Type* t) return CacheAndReturn(newType, TypeMappingInfo::BYTE_LAYOUT_TO_ARRAY); } - // Generate a new type inconditionally, it may end up being the same as the old one - StructType* newStruct=StructType::create(st->getContext()); + // Generate a new type if it's not a literal struct. It may end up being the same as the old one + // In case of literal, it will be created as a literal at the end. + StructType* newStruct=nullptr; + if (!st->isLiteral()) + newStruct=StructType::create(st->getContext()); #ifndef NDEBUG newStructTypes.insert(newStruct); #endif @@ -457,7 +465,8 @@ TypeOptimizer::TypeMappingInfo TypeOptimizer::rewriteType(Type* t) newStruct->setName(name); } // Tentatively map the type to the newStruct, it may be overridden if the type is collapsed - typesMapping[t] = TypeMappingInfo(newStruct, TypeMappingInfo::IDENTICAL); + if (!st->isLiteral()) + typesMapping[t] = TypeMappingInfo(newStruct, TypeMappingInfo::IDENTICAL); // Since we can merge arrays of the same type in an struct it is possible that at the end of the process a single type will remain TypeMappingInfo::MAPPING_KIND newStructKind = TypeMappingInfo::IDENTICAL; @@ -556,7 +565,9 @@ TypeOptimizer::TypeMappingInfo TypeOptimizer::rewriteType(Type* t) std::vector> mergedInts; uint32_t directBaseLimit=0; // We may need to update the bases metadata for this type - NamedMDNode* namedBasesMetadata = TypeSupport::getBasesMetadata(newStruct, *module); + NamedMDNode* namedBasesMetadata = nullptr; + if (!st->isLiteral()) + namedBasesMetadata = TypeSupport::getBasesMetadata(newStruct, *module); uint32_t firstBaseBegin, firstBaseEnd; if(namedBasesMetadata) { @@ -698,7 +709,13 @@ TypeOptimizer::TypeMappingInfo TypeOptimizer::rewriteType(Type* t) } StructType* newDirectBase = st->getDirectBase() ? dyn_cast(rewriteType(st->getDirectBase()).mappedType) : NULL; - newStruct->setBody(newTypes, st->isPacked(), newDirectBase, st->hasByteLayout(), st->hasAsmJS()); + if (st->isLiteral()) + { + newStruct = StructType::get(st->getContext(), newTypes, st->isPacked(), newDirectBase, st->hasByteLayout(), st->hasAsmJS()); + typesMapping[t] = TypeMappingInfo(newStruct, TypeMappingInfo::IDENTICAL); + } + else + newStruct->setBody(newTypes, st->isPacked(), newDirectBase, st->hasByteLayout(), st->hasAsmJS()); return CacheAndReturn(newStruct, newStructKind); } diff --git a/llvm/lib/CheerpUtils/Utility.cpp b/llvm/lib/CheerpUtils/Utility.cpp index 5fe55a9cdcad..c636174e9ca7 100644 --- a/llvm/lib/CheerpUtils/Utility.cpp +++ b/llvm/lib/CheerpUtils/Utility.cpp @@ -739,6 +739,8 @@ char TypeSupport::getPrefixCharForMember(const PointerAnalyzer& PA, llvm::Struct bool TypeSupport::isJSExportedType(StructType* st, const Module& m) { + if (st->isLiteral()) + return false; return m.getNamedMetadata(llvm::Twine(st->getName(),"_methods"))!=NULL; } From 3a20db1820193bf65abb3349df69d58815763e26 Mon Sep 17 00:00:00 2001 From: Mark Peerdeman Date: Thu, 8 Feb 2024 13:27:45 +0100 Subject: [PATCH 10/11] Compiler-rt Mutex disabled The Mutex uses a 64-bit value to store it's state. It then does atomic operations on this value, resulting in 64-bit atomic operations. These are not supported in AsmJS, so for now we've disabled the Mutex. --- compiler-rt/lib/sanitizer_common/sanitizer_mutex.h | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_mutex.h b/compiler-rt/lib/sanitizer_common/sanitizer_mutex.h index b1a58e421d81..6a7fa6245ec7 100644 --- a/compiler-rt/lib/sanitizer_common/sanitizer_mutex.h +++ b/compiler-rt/lib/sanitizer_common/sanitizer_mutex.h @@ -163,7 +163,11 @@ class SANITIZER_MUTEX Mutex : CheckedMutex { explicit constexpr Mutex(MutexType type = MutexUnchecked) : CheckedMutex(type) {} + // CHEERP: This mutex uses a 64-bit state value, meaning 64-bit atomic operations. + // These are not supported in asmjs currently, so we've disabled the Mutex + // by returning early from all functions. void Lock() SANITIZER_ACQUIRE() { + return; CheckedMutex::Lock(); u64 reset_mask = ~0ull; u64 state = atomic_load_relaxed(&state_); @@ -209,6 +213,7 @@ class SANITIZER_MUTEX Mutex : CheckedMutex { } bool TryLock() SANITIZER_TRY_ACQUIRE(true) { + return true; u64 state = atomic_load_relaxed(&state_); for (;;) { if (UNLIKELY(state & (kWriterLock | kReaderLockMask))) @@ -223,6 +228,7 @@ class SANITIZER_MUTEX Mutex : CheckedMutex { } void Unlock() SANITIZER_RELEASE() { + return; CheckedMutex::Unlock(); bool wake_writer; u64 wake_readers; @@ -251,6 +257,7 @@ class SANITIZER_MUTEX Mutex : CheckedMutex { } void ReadLock() SANITIZER_ACQUIRE_SHARED() { + return; CheckedMutex::Lock(); u64 reset_mask = ~0ull; u64 state = atomic_load_relaxed(&state_); @@ -288,6 +295,7 @@ class SANITIZER_MUTEX Mutex : CheckedMutex { } void ReadUnlock() SANITIZER_RELEASE_SHARED() { + return; CheckedMutex::Unlock(); bool wake; u64 new_state; @@ -314,12 +322,14 @@ class SANITIZER_MUTEX Mutex : CheckedMutex { // maintaining complex state to work around those situations, the check only // checks that the mutex is owned. void CheckWriteLocked() const SANITIZER_CHECK_LOCKED() { + return; CHECK(atomic_load(&state_, memory_order_relaxed) & kWriterLock); } void CheckLocked() const SANITIZER_CHECK_LOCKED() { CheckWriteLocked(); } void CheckReadLocked() const SANITIZER_CHECK_LOCKED() { + return; CHECK(atomic_load(&state_, memory_order_relaxed) & kReaderLockMask); } From c245c83be1ce004ee94a0dbd2815b424cc40f5cd Mon Sep 17 00:00:00 2001 From: Mark Peerdeman Date: Thu, 8 Feb 2024 13:29:58 +0100 Subject: [PATCH 11/11] Set max bitwidths for atomic operations --- clang/lib/Basic/Targets/WebAssembly.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/clang/lib/Basic/Targets/WebAssembly.h b/clang/lib/Basic/Targets/WebAssembly.h index cd7d2a7e8c03..52f36e8ba26a 100644 --- a/clang/lib/Basic/Targets/WebAssembly.h +++ b/clang/lib/Basic/Targets/WebAssembly.h @@ -212,6 +212,8 @@ class CheerpTargetInfo : public TargetInfo { LongDoubleWidth = LongDoubleAlign = 64; LongDoubleFormat = &llvm::APFloat::IEEEdouble(); SizeType = UnsignedInt; + // We define these as 32-bit for now, since AsmJS cannot handle 64-bit atomic operations currently. + MaxAtomicPromoteWidth = MaxAtomicInlineWidth = 32; // Use 32-bit integers for two separated bit fields. UseBitFieldTypeAlignment = true;