From a54cdd0a9981415c74293b8db07420dfd1be1e7e Mon Sep 17 00:00:00 2001
From: Mark Peerdeman <mark.peerdeman@leaningtech.com>
Date: Mon, 14 Aug 2023 09:53:54 +0200
Subject: [PATCH 01/11] Add AtomicRMW to ICF

---
 llvm/lib/CheerpUtils/IdenticalCodeFolding.cpp | 9 +++++++++
 1 file changed, 9 insertions(+)
diff --git a/llvm/lib/CheerpUtils/IdenticalCodeFolding.cpp b/llvm/lib/CheerpUtils/IdenticalCodeFolding.cpp
index a44b43e5ada9..c7572affa291 100644
--- a/llvm/lib/CheerpUtils/IdenticalCodeFolding.cpp
+++ b/llvm/lib/CheerpUtils/IdenticalCodeFolding.cpp
@@ -526,6 +526,15 @@ bool IdenticalCodeFolding::equivalentInstruction(const llvm::Instruction* A, con
 				equivalentOperand(A->getOperand(0), B->getOperand(0)) &&
 				equivalentOperand(A->getOperand(1), B->getOperand(1)));
 		}
+		case Instruction::AtomicRMW:
+		{
+			const AtomicRMWInst* a = cast<AtomicRMWInst>(A);
+			const AtomicRMWInst* b = cast<AtomicRMWInst>(B);
+			return CacheAndReturn(equivalentType(a->getType(), b->getType()) &&
+				a->getOperation() == b->getOperation() &&
+				equivalentOperand(a->getPointerOperand(), b->getPointerOperand()) &&
+				equivalentOperand(a->getValOperand(), b->getValOperand()));
+		}
 		default:
 		{
 #ifndef NDEBUG

From ddf572f564ac2bda631992c3a81da76916b614b1 Mon Sep 17 00:00:00 2001
From: Mark Peerdeman <mark.peerdeman@leaningtech.com>
Date: Tue, 12 Dec 2023 15:01:40 +0100
Subject: [PATCH 02/11] Add atomic instructions to isInlineable

AtomicRMW is marked as not inlineable.
AtomicCmpXchg is marked as not inlineable.
The compare operand to an AtomicCmpXchg instruction may not be inlined.
---
 llvm/lib/CheerpUtils/Utility.cpp | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/llvm/lib/CheerpUtils/Utility.cpp b/llvm/lib/CheerpUtils/Utility.cpp
index 1996522a1c16..5fe55a9cdcad 100644
--- a/llvm/lib/CheerpUtils/Utility.cpp
+++ b/llvm/lib/CheerpUtils/Utility.cpp
@@ -184,6 +184,11 @@ bool InlineableCache::isInlineableImpl(const Instruction& I)
 				//Abs will be rendered as (X >= 0) ? X : -X in both writers
 				return true;
 		}
+		if(const AtomicCmpXchgInst* ai = dyn_cast<AtomicCmpXchgInst>(userInst))
+		{
+			if (&I == ai->getCompareOperand())
+				return true;
+		}
 		return false;
 	};
 	// Do not inline the instruction if the use is in another block
@@ -497,6 +502,8 @@ bool InlineableCache::isInlineableImpl(const Instruction& I)
 				return true;
 			case Instruction::ExtractElement:
 			case Instruction::InsertElement:
+			case Instruction::AtomicRMW:
+			case Instruction::AtomicCmpXchg:
 				return false;
 			case Instruction::Select:
 			{

From bacc6977dc9b2fe0479d8dbb30559e68579286d1 Mon Sep 17 00:00:00 2001
From: Mark Peerdeman <mark.peerdeman@leaningtech.com>
Date: Tue, 14 Feb 2023 16:29:54 +0100
Subject: [PATCH 03/11] CheerpWasmWriter - Support for atomics

---
 llvm/include/llvm/Cheerp/WasmOpcodes.h     |  71 +++++++++
 llvm/include/llvm/Cheerp/WasmWriter.h      |   4 +-
 llvm/lib/CheerpWriter/CheerpWasmWriter.cpp | 159 +++++++++++++++++++--
 3 files changed, 222 insertions(+), 12 deletions(-)

diff --git a/llvm/include/llvm/Cheerp/WasmOpcodes.h b/llvm/include/llvm/Cheerp/WasmOpcodes.h
index 6763d6dbac0b..2b9f826cfc75 100644
--- a/llvm/include/llvm/Cheerp/WasmOpcodes.h
+++ b/llvm/include/llvm/Cheerp/WasmOpcodes.h
@@ -143,6 +143,7 @@ enum class WasmOpcode {
 	F64_REINTERPRET_I64 = 0xbf,
 	FC = 0xfc,
 	SIMD = 0xfd,
+	Threads = 0xfe,
 };
 
 enum class WasmS32Opcode {
@@ -373,6 +374,76 @@ enum class WasmSIMDU32U32U32Opcode {
 	V128_STORE64_LANE = 0x5b,
 };
 
+enum class WasmThreadsU32Opcode {
+	ATOMIC_FENCE = 0x3,
+};
+
+enum class WasmThreadsU32U32Opcode {
+	I32_ATOMIC_LOAD = 0x10,
+	I64_ATOMIC_LOAD = 0x11,
+	I32_ATOMIC_LOAD8_U = 0x12,
+	I32_ATOMIC_LOAD16_U = 0x13,
+	I64_ATOMIC_LOAD8_U = 0x14,
+	I64_ATOMIC_LOAD16_U = 0x15,
+	I64_ATOMIC_LOAD32_U = 0x16,
+	I32_ATOMIC_STORE = 0x17,
+	I64_ATOMIC_STORE = 0x18,
+	I32_ATOMIC_STORE8 = 0x19,
+	I32_ATOMIC_STORE16 = 0x1a,
+	I64_ATOMIC_STORE8 = 0x1b,
+	I64_ATOMIC_STORE16 = 0x1c,
+	I64_ATOMIC_STORE32 = 0x1d,
+	I32_ATOMIC_RMW_ADD = 0x1e,
+	I64_ATOMIC_RMW_ADD = 0x1f,
+	I32_ATOMIC_RMW8_ADD_U = 0x20,
+	I32_ATOMIC_RMW16_ADD_U = 0x21,
+	I64_ATOMIC_RMW8_ADD_U = 0x22,
+	I64_ATOMIC_RMW16_ADD_U = 0x23,
+	I64_ATOMIC_RMW32_ADD_U = 0x24,
+	I32_ATOMIC_RMW_SUB = 0x25,
+	I64_ATOMIC_RMW_SUB = 0x26,
+	I32_ATOMIC_RMW8_SUB_U = 0x27,
+	I32_ATOMIC_RMW16_SUB_U = 0x28,
+	I64_ATOMIC_RMW8_SUB_U = 0x29,
+	I64_ATOMIC_RMW16_SUB_U = 0x2a,
+	I64_ATOMIC_RMW32_SUB_U = 0x2b,
+	I32_ATOMIC_RMW_AND = 0x2c,
+	I64_ATOMIC_RMW_AND = 0x2d,
+	I32_ATOMIC_RMW8_AND_U = 0x2e,
+	I32_ATOMIC_RMW16_AND_U = 0x2f,
+	I64_ATOMIC_RMW8_AND_U = 0x30,
+	I64_ATOMIC_RMW16_AND_U = 0x31,
+	I64_ATOMIC_RMW32_AND_U = 0x32,
+	I32_ATOMIC_RMW_OR = 0x33,
+	I64_ATOMIC_RMW_OR = 0x34,
+	I32_ATOMIC_RMW8_OR_U = 0x35,
+	I32_ATOMIC_RMW16_OR_U = 0x36,
+	I64_ATOMIC_RMW8_OR_U = 0x37,
+	I64_ATOMIC_RMW16_OR_U = 0x38,
+	I64_ATOMIC_RMW32_OR_U = 0x39,
+	I32_ATOMIC_RMW_XOR = 0x3a,
+	I64_ATOMIC_RMW_XOR = 0x3b,
+	I32_ATOMIC_RMW8_XOR_U = 0x3c,
+	I32_ATOMIC_RMW16_XOR_U = 0x3d,
+	I64_ATOMIC_RMW8_XOR_U = 0x3e,
+	I64_ATOMIC_RMW16_XOR_U = 0x3f,
+	I64_ATOMIC_RMW32_XOR_U = 0x40,
+	I32_ATOMIC_RMW_XCHG = 0x41,
+	I64_ATOMIC_RMW_XCHG = 0x42,
+	I32_ATOMIC_RMW8_XCHG_U = 0x43,
+	I32_ATOMIC_RMW16_XCHG_U = 0x44,
+	I64_ATOMIC_RMW8_XCHG_U = 0x45,
+	I64_ATOMIC_RMW16_XCHG_U = 0x46,
+	I64_ATOMIC_RMW32_XCHG_U = 0x47,
+	I32_ATOMIC_RMW_CMPXCHG = 0x48,
+	I64_ATOMIC_RMW_CMPXCHG = 0x49,
+	I32_ATOMIC_RMW8_CMPXCHG_U = 0x4a,
+	I32_ATOMIC_RMW16_CMPXCHG_U = 0x4b,
+	I64_ATOMIC_RMW8_CMPXCHG_U = 0x4c,
+	I64_ATOMIC_RMW16_CMPXCHG_U = 0x4d,
+	I64_ATOMIC_RMW32_CMPXCHG_U = 0x4e,
+};
+
 enum class WasmInvalidOpcode {
 	BRANCH_LIKELY = 0x14,
 	BRANCH_UNLIKELY = 0x15,
diff --git a/llvm/include/llvm/Cheerp/WasmWriter.h b/llvm/include/llvm/Cheerp/WasmWriter.h
index f9b79f6433bd..92fa5856dcd7 100644
--- a/llvm/include/llvm/Cheerp/WasmWriter.h
+++ b/llvm/include/llvm/Cheerp/WasmWriter.h
@@ -513,6 +513,8 @@ class CheerpWasmWriter final : public CheerpBaseWriter
 	static void encodeInst(WasmSIMDU32U32Opcode opcode, uint32_t i1, uint32_t i2, WasmBuffer& code);
 	static void encodeInst(WasmSIMDU32U32U32Opcode opcode, uint32_t i1, uint32_t i2, uint32_t i3, WasmBuffer& code);
 	static void encodeInst(WasmU32U32Opcode opcode, uint32_t i1, uint32_t i2, WasmBuffer& code);
+	static void encodeInst(WasmThreadsU32Opcode opcode, uint32_t immediate, WasmBuffer& code);
+	static void encodeInst(WasmThreadsU32U32Opcode opcode, uint32_t i1, uint32_t i2, WasmBuffer& code);
 	void encodeInst(WasmInvalidOpcode opcode, WasmBuffer& code);
 	void encodeVectorConstantZero(WasmBuffer& code);
 	void encodeConstantDataVector(WasmBuffer& code, const llvm::ConstantDataVector* cdv);
@@ -528,7 +530,7 @@ class CheerpWasmWriter final : public CheerpBaseWriter
 	void compileICmp(const llvm::ICmpInst& ci, const llvm::CmpInst::Predicate p, WasmBuffer& code);
 	void compileICmp(const llvm::Value* op0, const llvm::Value* op1, const llvm::CmpInst::Predicate p, WasmBuffer& code);
 	void compileFCmp(const llvm::Value* lhs, const llvm::Value* rhs, const llvm::CmpInst::Predicate p, WasmBuffer& code);
-	void encodeLoad(llvm::Type* ty, uint32_t offset, WasmBuffer& code, bool signExtend);
+	void encodeLoad(llvm::Type* ty, uint32_t offset, WasmBuffer& code, bool signExtend, bool atomic);
 	void encodeWasmIntrinsic(WasmBuffer& code, const llvm::Function* F);
 	void encodeBranchTable(WasmBuffer& code, std::vector<uint32_t> table, int32_t defaultBlock);
 	void encodeDataSectionChunk(WasmBuffer& data, uint32_t address, llvm::StringRef buf);
diff --git a/llvm/lib/CheerpWriter/CheerpWasmWriter.cpp b/llvm/lib/CheerpWriter/CheerpWasmWriter.cpp
index c45a2cffd9b6..b92651f57902 100644
--- a/llvm/lib/CheerpWriter/CheerpWasmWriter.cpp
+++ b/llvm/lib/CheerpWriter/CheerpWasmWriter.cpp
@@ -589,6 +589,21 @@ void CheerpWasmWriter::encodeInst(WasmSIMDU32U32U32Opcode opcode, uint32_t i1, u
 	encodeULEB128(i3, code);
 }
 
+void CheerpWasmWriter::encodeInst(WasmThreadsU32Opcode opcode, uint32_t immediate, WasmBuffer& code)
+{
+	code << static_cast<char>(WasmOpcode::Threads);
+	encodeULEB128(static_cast<uint64_t>(opcode), code);
+	encodeULEB128(immediate, code);
+}
+
+void CheerpWasmWriter::encodeInst(WasmThreadsU32U32Opcode opcode, uint32_t i1, uint32_t i2, WasmBuffer& code)
+{
+	code << static_cast<char>(WasmOpcode::Threads);
+	encodeULEB128(static_cast<uint64_t>(opcode), code);
+	encodeULEB128(i1, code);
+	encodeULEB128(i2, code);
+}
+
 void CheerpWasmWriter::encodeInst(WasmInvalidOpcode opcode, WasmBuffer& code)
 {
 	nopLocations.push_back(code.tell());
@@ -1005,8 +1020,9 @@ void CheerpWasmWriter::encodePredicate(const llvm::Type* ty, const llvm::CmpInst
 }
 
 void CheerpWasmWriter::encodeLoad(llvm::Type* ty, uint32_t offset,
-		WasmBuffer& code, bool signExtend)
+		WasmBuffer& code, bool signExtend, bool atomic)
 {
+	assert(!(atomic && signExtend));
 	if(ty->isIntegerTy())
 	{
 		uint32_t bitWidth = targetData.getTypeStoreSizeInBits(ty);
@@ -1016,16 +1032,32 @@ void CheerpWasmWriter::encodeLoad(llvm::Type* ty, uint32_t offset,
 			// Currently assume unsigned, like Cheerp. We may optimize
 			// this be looking at a following sext or zext instruction.
 			case 8:
-				encodeInst(signExtend ? WasmU32U32Opcode::I32_LOAD8_S : WasmU32U32Opcode::I32_LOAD8_U, 0x0, offset, code);
+				if (atomic)
+					encodeInst(WasmThreadsU32U32Opcode::I32_ATOMIC_LOAD8_U, 0x0, offset, code);
+				else if (signExtend)
+					encodeInst(WasmU32U32Opcode::I32_LOAD8_S, 0x0, offset, code);
+				else
+					encodeInst(WasmU32U32Opcode::I32_LOAD8_U, 0x0, offset, code);
 				break;
 			case 16:
-				encodeInst(signExtend ? WasmU32U32Opcode::I32_LOAD16_S : WasmU32U32Opcode::I32_LOAD16_U, 0x1, offset, code);
+				if (atomic)
+					encodeInst(WasmThreadsU32U32Opcode::I32_ATOMIC_LOAD16_U, 0x1, offset, code);
+				else if (signExtend)
+					encodeInst(WasmU32U32Opcode::I32_LOAD16_S, 0x1, offset, code);
+				else
+					encodeInst(WasmU32U32Opcode::I32_LOAD16_U, 0x1, offset, code);
 				break;
 			case 32:
-				encodeInst(WasmU32U32Opcode::I32_LOAD, 0x2, offset, code);
+				if (atomic)
+					encodeInst(WasmThreadsU32U32Opcode::I32_ATOMIC_LOAD, 0x2, offset, code);
+				else
+					encodeInst(WasmU32U32Opcode::I32_LOAD, 0x2, offset, code);
 				break;
 			case 64:
-				encodeInst(WasmU32U32Opcode::I64_LOAD, 0x2, offset, code);
+				if (atomic)
+					encodeInst(WasmThreadsU32U32Opcode::I64_ATOMIC_LOAD, 0x2, offset, code);
+				else
+					encodeInst(WasmU32U32Opcode::I64_LOAD, 0x2, offset, code);
 				break;
 			default:
 				llvm::errs() << "bit width: " << bitWidth << '\n';
@@ -1060,6 +1092,7 @@ void CheerpWasmWriter::encodeLoad(llvm::Type* ty, uint32_t offset,
 				llvm::report_fatal_error("vector bitwidth not supported");
 		}
 	} else {
+		assert(!atomic && "atomic loads only supported on integers");
 		if (ty->isFloatTy())
 			encodeInst(WasmU32U32Opcode::F32_LOAD, 0x2, offset, code);
 		else if (ty->isDoubleTy())
@@ -1982,7 +2015,7 @@ void CheerpWasmWriter::compileLoad(WasmBuffer& code, const LoadInst& li, bool si
 			offset += elementOffset;
 		}
 		// 2) Load
-		encodeLoad(Ty, offset, code, signExtend);
+		encodeLoad(Ty, offset, code, signExtend, li.isAtomic());
 	}
 }
 
@@ -2030,6 +2063,7 @@ void CheerpWasmWriter::compileStore(WasmBuffer& code, const StoreInst& si)
 		}
 		// 3) Store
 		// When storing values with size less than 32-bit we need to truncate them
+		bool atomic = si.isAtomic();
 		if(Ty->isIntegerTy())
 		{
 			uint32_t bitWidth = targetData.getTypeStoreSizeInBits(Ty);
@@ -2037,16 +2071,28 @@ void CheerpWasmWriter::compileStore(WasmBuffer& code, const StoreInst& si)
 			switch (bitWidth)
 			{
 				case 8:
-					encodeInst(WasmU32U32Opcode::I32_STORE8, 0x0, offset, code);
+					if (atomic)
+						encodeInst(WasmThreadsU32U32Opcode::I32_ATOMIC_STORE8, 0x0, offset, code);
+					else
+						encodeInst(WasmU32U32Opcode::I32_STORE8, 0x0, offset, code);
 					break;
 				case 16:
-					encodeInst(WasmU32U32Opcode::I32_STORE16, 0x1, offset, code);
+					if (atomic)
+						encodeInst(WasmThreadsU32U32Opcode::I32_ATOMIC_STORE16, 0x1, offset, code);
+					else
+						encodeInst(WasmU32U32Opcode::I32_STORE16, 0x1, offset, code);
 					break;
 				case 32:
-					encodeInst(WasmU32U32Opcode::I32_STORE, 0x2, offset, code);
+					if (atomic)
+						encodeInst(WasmThreadsU32U32Opcode::I32_ATOMIC_STORE, 0x2, offset, code);
+					else
+						encodeInst(WasmU32U32Opcode::I32_STORE, 0x2, offset, code);
 					break;
 				case 64:
-					encodeInst(WasmU32U32Opcode::I64_STORE, 0x2, offset, code);
+					if (atomic)
+						encodeInst(WasmThreadsU32U32Opcode::I64_ATOMIC_STORE, 0x3, offset, code);
+					else
+						encodeInst(WasmU32U32Opcode::I64_STORE, 0x2, offset, code);
 					break;
 				default:
 					llvm::errs() << "bit width: " << bitWidth << '\n';
@@ -2055,6 +2101,7 @@ void CheerpWasmWriter::compileStore(WasmBuffer& code, const StoreInst& si)
 		}
 		else if (Ty->isVectorTy())
 		{
+			assert(!atomic && "atomic stores only supported on integers");
 			const FixedVectorType* vecType = cast<FixedVectorType>(Ty);
 			const unsigned vecWidth = getVectorBitwidth(vecType);
 			if (vecWidth == 128)
@@ -2082,6 +2129,7 @@ void CheerpWasmWriter::compileStore(WasmBuffer& code, const StoreInst& si)
 		}
 		else
 		{
+			assert(!atomic && "atomic stores only supported on integers");
 			if (Ty->isFloatTy())
 				encodeInst(WasmU32U32Opcode::F32_STORE, 0x2, offset, code);
 			else if (Ty->isDoubleTy())
@@ -2298,7 +2346,7 @@ bool CheerpWasmWriter::compileInlineInstruction(WasmBuffer& code, const Instruct
 			// Load the current argument
 			compileOperand(code, vi.getPointerOperand());
 			encodeInst(WasmU32U32Opcode::I32_LOAD, 0x2, 0x0, code);
-			encodeLoad(vi.getType(), 0, code, /*signExtend*/false);
+			encodeLoad(vi.getType(), 0, code, /*signExtend*/false, /*atomic*/false);
 
 			// Move varargs pointer to next argument
 			compileOperand(code, vi.getPointerOperand());
@@ -3566,6 +3614,95 @@ bool CheerpWasmWriter::compileInlineInstruction(WasmBuffer& code, const Instruct
 			}
 			break;
 		}
+		case Instruction::AtomicRMW:
+		{
+			const AtomicRMWInst& ai = cast<AtomicRMWInst>(I);
+			const Type* opType = ai.getOperand(1)->getType();
+			assert(opType->isIntegerTy());
+			const Value* ptrOp = ai.getPointerOperand();
+			const Value* valOp = ai.getValOperand();
+			uint32_t offset = compileLoadStorePointer(code, ptrOp);
+			compileOperand(code, valOp);
+			switch (ai.getOperation())
+			{
+#define ATOMICBINOP(Ty, name) \
+				case AtomicRMWInst::Ty: \
+				{ \
+					if (opType->isIntegerTy(64)) \
+						encodeInst(WasmThreadsU32U32Opcode::I64_ATOMIC_RMW_##name, 0x3, offset, code); \
+					else if (opType->isIntegerTy(32)) \
+						encodeInst(WasmThreadsU32U32Opcode::I32_ATOMIC_RMW_##name, 0x2, offset, code); \
+					else if (opType->isIntegerTy(16)) \
+						encodeInst(WasmThreadsU32U32Opcode::I32_ATOMIC_RMW16_##name##_U, 0x1, offset, code); \
+					else if (opType->getPrimitiveSizeInBits() <= 8) \
+						encodeInst(WasmThreadsU32U32Opcode::I32_ATOMIC_RMW8_##name##_U, 0x0, offset, code); \
+					else \
+						llvm::report_fatal_error("Unknown bitwidth for AtomicRMW inst"); \
+					break; \
+				}
+				ATOMICBINOP( Add,  ADD)
+				ATOMICBINOP( Sub,  SUB)
+				ATOMICBINOP( And,  AND)
+				ATOMICBINOP(  Or,   OR)
+				ATOMICBINOP( Xor,  XOR)
+				ATOMICBINOP(Xchg, XCHG)
+#undef ATOMICBINOP
+				default:
+				{
+					llvm::report_fatal_error("Atomic opcode not supported");
+				}
+			}
+			break;
+		}
+		case Instruction::AtomicCmpXchg:
+		{
+			const AtomicCmpXchgInst& ai = cast<AtomicCmpXchgInst>(I);
+			const Value* ptrOp = ai.getPointerOperand();
+			const Value* compareOp = ai.getCompareOperand();
+			const Value* newValOp = ai.getNewValOperand();
+			const Type* t = compareOp->getType();
+			uint32_t offset = compileLoadStorePointer(code, ptrOp);
+			// We use compileOperand on the compareOp twice, but this is safe because
+			// the compareOp of a cmpxchg instruction will never be inlined.
+			compileOperand(code, compareOp);
+			compileOperand(code, newValOp);
+			if (t->isIntegerTy(8))
+				encodeInst(WasmThreadsU32U32Opcode::I32_ATOMIC_RMW8_CMPXCHG_U, 0x0, offset, code);
+			else if (t->isIntegerTy(16))
+				encodeInst(WasmThreadsU32U32Opcode::I32_ATOMIC_RMW16_CMPXCHG_U, 0x1, offset, code);
+			else if (t->isIntegerTy(32))
+				encodeInst(WasmThreadsU32U32Opcode::I32_ATOMIC_RMW_CMPXCHG, 0x2, offset, code);
+			else if (t->isIntegerTy(64))
+				encodeInst(WasmThreadsU32U32Opcode::I64_ATOMIC_RMW_CMPXCHG, 0x3, offset, code);
+			else
+				llvm::report_fatal_error("Atomic cmpxchg only allowed on integers");
+			// Do not duplicate the result and render the comparison if this instruction has no uses.
+			// We do however have to push a garbage constant onto the stack, because
+			// compileInstructionAndSet will drop 2 times (since there are 2 registers for this).
+			if (I.use_empty())
+			{
+				encodeInst(WasmS32Opcode::I32_CONST, 0, code);
+				break;
+			}
+			// Now compile the second part of this two-register operation, the comparison between
+			// the original (loaded) value and the comparison value.
+			uint32_t idx = registerize.getRegisterId(&ai, 0, edgeContext);
+			uint32_t localId = localMap.at(idx);
+			encodeInst(WasmU32Opcode::TEE_LOCAL, localId, code);
+			encodeInst(WasmU32Opcode::GET_LOCAL, localId, code);
+			compileOperand(code, compareOp);
+			if (t->isIntegerTy(64))
+				encodeInst(WasmOpcode::I64_EQ, code);
+			else
+				encodeInst(WasmOpcode::I32_EQ, code);
+			break;
+		}
+		case Instruction::Fence:
+		{
+			// The FENCE opcode currently requires an immediate set to 0.
+			encodeInst(WasmThreadsU32Opcode::ATOMIC_FENCE, 0, code);
+			break;
+		}
 		default:
 		{
 #ifndef NDEBUG

From 6c3d5be7d48c9dc890ce986fb1e8fd72f5adbd22 Mon Sep 17 00:00:00 2001
From: Mark Peerdeman <mark.peerdeman@leaningtech.com>
Date: Wed, 8 Nov 2023 10:59:15 +0100
Subject: [PATCH 04/11] Atomic support for PreExecuter

---
 .../ExecutionEngine/Interpreter/Execution.cpp | 70 +++++++++++++++++++
 .../ExecutionEngine/Interpreter/Interpreter.h |  4 ++
 2 files changed, 74 insertions(+)

diff --git a/llvm/lib/ExecutionEngine/Interpreter/Execution.cpp b/llvm/lib/ExecutionEngine/Interpreter/Execution.cpp
index b59e1bed223b..9a73cd185175 100644
--- a/llvm/lib/ExecutionEngine/Interpreter/Execution.cpp
+++ b/llvm/lib/ExecutionEngine/Interpreter/Execution.cpp
@@ -2119,6 +2119,76 @@ void Interpreter::visitInsertValueInst(InsertValueInst &I) {
   SetValue(&I, Dest, SF);
 }
 
+void Interpreter::visitAtomicRMWInst(AtomicRMWInst &I)
+{
+  ExecutionContext &SF = ECStack.back();
+  GenericValue Src1 = getOperandValue(I.getPointerOperand(), SF);
+  GenericValue *Ptr = (GenericValue*)GVTORP(Src1);
+  GenericValue Src2 = getOperandValue(I.getValOperand(), SF);
+  GenericValue Orig;
+  LoadValueFromMemory(Orig, Ptr, I.getType());
+  GenericValue Result;
+
+  switch (I.getOperation()) {
+    default:
+      dbgs() << "Don't know how to handle this binary operator!\n-->" << I;
+      llvm_unreachable(nullptr);
+      break;
+    case AtomicRMWInst::BinOp::Add:   Result.IntVal = Orig.IntVal + Src2.IntVal; break;
+    case AtomicRMWInst::BinOp::Sub:   Result.IntVal = Orig.IntVal - Src2.IntVal; break;
+    case AtomicRMWInst::BinOp::And:   Result.IntVal = Orig.IntVal & Src2.IntVal; break;
+    case AtomicRMWInst::BinOp::Or:    Result.IntVal = Orig.IntVal | Src2.IntVal; break;
+    case AtomicRMWInst::BinOp::Xor:   Result.IntVal = Orig.IntVal ^ Src2.IntVal; break;
+  }
+  GenericValue Val = getOperandValue(I.getOperand(0), SF);
+  StoreValueToMemory(Result, Ptr, I.getValOperand()->getType());
+  if (StoreListener)
+  {
+    assert(ForPreExecute);
+    StoreListener(GVTORP(Src1));
+  }
+  SetValue(&I, Result, SF);
+}
+
+void Interpreter::visitAtomicCmpXchgInst(AtomicCmpXchgInst &I)
+{
+  ExecutionContext &SF = ECStack.back();
+  GenericValue Src1 = getOperandValue(I.getPointerOperand(), SF);
+  GenericValue *Ptr = (GenericValue*)GVTORP(Src1);
+  GenericValue Cmp = getOperandValue(I.getCompareOperand(), SF);
+  GenericValue NewVal = getOperandValue(I.getNewValOperand(), SF);
+  GenericValue Orig;
+  GenericValue Result;
+  GenericValue Equal;
+
+  // Load the original value at the pointer.
+  LoadValueFromMemory(Orig, Ptr, I.getNewValOperand()->getType());
+
+  // Compare the original and the compare operand.
+  // If they are equal, store the newval.
+  Equal.IntVal = APInt(1,Orig.IntVal.eq(Cmp.IntVal));
+  if (Equal.IntVal == 1)
+  {
+    StoreValueToMemory(NewVal, Ptr, I.getNewValOperand()->getType());
+    if (StoreListener)
+    {
+      assert(ForPreExecute);
+      StoreListener(GVTORP(Src1));
+    }
+  }
+
+  // Now build the resulting value struct.
+  Result.AggregateVal.resize(2);
+  Result.AggregateVal[0] = Orig;
+  Result.AggregateVal[1] = Equal;
+  SetValue(&I, Result, SF);
+}
+
+void Interpreter::visitFenceInst(FenceInst &I)
+{
+  return ;
+}
+
 GenericValue Interpreter::getConstantExprValue (ConstantExpr *CE,
                                                 ExecutionContext &SF) {
   switch (CE->getOpcode()) {
diff --git a/llvm/lib/ExecutionEngine/Interpreter/Interpreter.h b/llvm/lib/ExecutionEngine/Interpreter/Interpreter.h
index c2f97bed0298..132d2830c0dc 100644
--- a/llvm/lib/ExecutionEngine/Interpreter/Interpreter.h
+++ b/llvm/lib/ExecutionEngine/Interpreter/Interpreter.h
@@ -222,6 +222,10 @@ class Interpreter : public ExecutionEngine, public InstVisitor<Interpreter> {
   void visitExtractValueInst(ExtractValueInst &I);
   void visitInsertValueInst(InsertValueInst &I);
 
+  void visitAtomicRMWInst(AtomicRMWInst &I);
+  void visitAtomicCmpXchgInst(AtomicCmpXchgInst &I);
+  void visitFenceInst(FenceInst &I);
+
   void visitInstruction(Instruction &I) {
     errs() << I << "\n";
     llvm_unreachable("Instruction not interpretable yet!");

From 621fae3e5714c5f53237f1703ee7548b5224d99d Mon Sep 17 00:00:00 2001
From: Mark Peerdeman <mark.peerdeman@leaningtech.com>
Date: Wed, 13 Dec 2023 09:49:38 +0100
Subject: [PATCH 05/11] Track atomic use in GDA.

---
 llvm/include/llvm/Cheerp/GlobalDepsAnalyzer.h |  6 ++++++
 llvm/lib/CheerpUtils/GlobalDepsAnalyzer.cpp   | 15 +++++++++++++++
 2 files changed, 21 insertions(+)

diff --git a/llvm/include/llvm/Cheerp/GlobalDepsAnalyzer.h b/llvm/include/llvm/Cheerp/GlobalDepsAnalyzer.h
index ba658d673157..484b0a75e5bf 100644
--- a/llvm/include/llvm/Cheerp/GlobalDepsAnalyzer.h
+++ b/llvm/include/llvm/Cheerp/GlobalDepsAnalyzer.h
@@ -137,6 +137,8 @@ class GlobalDepsAnalyzer
 	 */
 	bool usesAsmJSMalloc() const { return hasAsmJSMalloc; }
 
+	bool usesAtomics() const { return hasAtomics; }
+
 	bool runOnModule( llvm::Module & );
 
 	void visitType( llvm::Type* t, bool forceTypedArray );
@@ -242,6 +244,9 @@ class GlobalDepsAnalyzer
 	//Extend lifetime of function, visiting them and declaring external
 	void extendLifetime(llvm::Function* F);
 
+	//Determine whether an instruction is atomic.
+	bool isAtomicInstruction(const llvm::Instruction& I);
+
 	std::unordered_set< const llvm::GlobalValue * > reachableGlobals; // Set of all the reachable globals
 	
 	FixupMap varsFixups;
@@ -270,6 +275,7 @@ class GlobalDepsAnalyzer
 	bool hasVAArgs;
 	bool hasPointerArrays;
 	bool hasAsmJSCode;
+	bool hasAtomics;
 	bool hasAsmJSMemory;
 	bool hasAsmJSMalloc;
 	bool hasCheerpException;
diff --git a/llvm/lib/CheerpUtils/GlobalDepsAnalyzer.cpp b/llvm/lib/CheerpUtils/GlobalDepsAnalyzer.cpp
index 9b8be6627011..ad53305356c3 100644
--- a/llvm/lib/CheerpUtils/GlobalDepsAnalyzer.cpp
+++ b/llvm/lib/CheerpUtils/GlobalDepsAnalyzer.cpp
@@ -144,11 +144,23 @@ void GlobalDepsAnalyzer::replaceFunctionAliasWithAliasee(llvm::Module &module, S
 	}
 }
 
+bool GlobalDepsAnalyzer::isAtomicInstruction(const llvm::Instruction& I)
+{
+	if (isa<AtomicRMWInst>(I) || isa<AtomicCmpXchgInst>(I) || isa<FenceInst>(I))
+		return true;
+	else if (const llvm::LoadInst* li = dyn_cast<llvm::LoadInst>(&I))
+		return li->isAtomic();
+	else if (const llvm::StoreInst* si = dyn_cast<llvm::StoreInst>(&I))
+		return si->isAtomic();
+	return false;
+}
+
 bool GlobalDepsAnalyzer::runOnModule( llvm::Module & module )
 {
 	DL = &module.getDataLayout();
 	assert(DL);
 	VisitedSet visited;
+	hasAtomics = false;
 
 	replaceFunctionAliasWithAliasee(module, "malloc");
 	replaceFunctionAliasWithAliasee(module, "calloc");
@@ -209,6 +221,9 @@ bool GlobalDepsAnalyzer::runOnModule( llvm::Module & module )
 					break;
 				Instruction& I = *instructionIterator;
 
+				if (isAtomicInstruction(I))
+					hasAtomics = true;
+
 				if (isa<CallInst>(I)) {
 					CallInst* ci = cast<CallInst>(&I);
 					Function* calledFunc = ci->getCalledFunction();

From 6bb5bbfa78ff796b71e6747515eaa22f2675428b Mon Sep 17 00:00:00 2001
From: Mark Peerdeman <mark.peerdeman@leaningtech.com>
Date: Mon, 11 Dec 2023 15:49:28 +0100
Subject: [PATCH 06/11] Add support for Atomic functions to NameGenerator

---
 llvm/include/llvm/Cheerp/NameGenerator.h | 9 +++++++++
 llvm/lib/CheerpWriter/NameGenerator.cpp  | 9 +++++++++
 2 files changed, 18 insertions(+)

diff --git a/llvm/include/llvm/Cheerp/NameGenerator.h b/llvm/include/llvm/Cheerp/NameGenerator.h
index c0b84e184276..788778ea358d 100644
--- a/llvm/include/llvm/Cheerp/NameGenerator.h
+++ b/llvm/include/llvm/Cheerp/NameGenerator.h
@@ -68,6 +68,15 @@ class NameGenerator
 		HANDLE_VAARG,
 		EXCEPTION,
 		FETCHBUFFER,
+		ATOMICLOAD,
+		ATOMICSTORE,
+		ATOMICADD,
+		ATOMICSUB,
+		ATOMICAND,
+		ATOMICOR,
+		ATOMICXOR,
+		ATOMICXCHG,
+		ATOMICCMPXCHG,
 		MEMORY,
 		HEAP8,
 		HEAP16,
diff --git a/llvm/lib/CheerpWriter/NameGenerator.cpp b/llvm/lib/CheerpWriter/NameGenerator.cpp
index 37d49bf1ed8d..17615cb078fc 100644
--- a/llvm/lib/CheerpWriter/NameGenerator.cpp
+++ b/llvm/lib/CheerpWriter/NameGenerator.cpp
@@ -678,6 +678,15 @@ void NameGenerator::generateReadableNames(const Module& M, const GlobalDepsAnaly
 	builtins[EXCEPTION] = "$except";
 	builtins[FETCHBUFFER] = "fetchBuffer";
 	builtins[STACKPTR] = "__stackPtr";
+	builtins[ATOMICLOAD] = "__atomicload";
+	builtins[ATOMICSTORE] = "__atomicstore";
+	builtins[ATOMICADD] = "__atomicadd";
+	builtins[ATOMICSUB] = "__atomicsub";
+	builtins[ATOMICAND] = "__atomicand";
+	builtins[ATOMICOR] = "__atomicor";
+	builtins[ATOMICXOR] = "__atomicxor";
+	builtins[ATOMICXCHG] = "__atomicexchange";
+	builtins[ATOMICCMPXCHG] = "__atomiccompareExchange";
 	builtins[HEAP8] = "HEAP8";
 	builtins[HEAP16] = "HEAP16";
 	builtins[HEAP32] = "HEAP32";

From 842f5e366b4c72b6b267037792aec2a161ccdeab Mon Sep 17 00:00:00 2001
From: Mark Peerdeman <mark.peerdeman@leaningtech.com>
Date: Tue, 12 Dec 2023 15:13:26 +0100
Subject: [PATCH 07/11] Add atomics support to asmjs

---
 llvm/include/llvm/Cheerp/Writer.h      |   9 +-
 llvm/lib/CheerpWriter/CheerpWriter.cpp | 262 ++++++++++++++++++++++++-
 2 files changed, 265 insertions(+), 6 deletions(-)

diff --git a/llvm/include/llvm/Cheerp/Writer.h b/llvm/include/llvm/Cheerp/Writer.h
index 0d79083767b5..52e37a330b59 100644
--- a/llvm/include/llvm/Cheerp/Writer.h
+++ b/llvm/include/llvm/Cheerp/Writer.h
@@ -453,11 +453,12 @@ class CheerpWriter final : public CheerpBaseWriter
 	COMPILE_INSTRUCTION_FEEDBACK compileNotInlineableInstruction(const llvm::Instruction& I, PARENT_PRIORITY parentPrio);
 	COMPILE_INSTRUCTION_FEEDBACK compileInlineableInstruction(const llvm::Instruction& I, PARENT_PRIORITY parentPrio);
 	COMPILE_INSTRUCTION_FEEDBACK compileCallInstruction(const llvm::CallBase& I, PARENT_PRIORITY parentPrio);
-	void compileLoadElem(const llvm::Value* ptrOp, llvm::Type* Ty, llvm::StructType* STy, POINTER_KIND ptrKind, POINTER_KIND loadKind, bool isOffset, Registerize::REGISTER_KIND regKind, uint32_t structElemIdx, bool asmjs, PARENT_PRIORITY parentPrio);
-	void compileLoadElem(const llvm::Value* ptrOp, llvm::Type* Ty, POINTER_KIND ptrKind, POINTER_KIND loadKind, bool isOffset, Registerize::REGISTER_KIND regKind, bool asmjs, PARENT_PRIORITY parentPrio);
+	void compileLoadElem(const llvm::LoadInst& li, llvm::Type* Ty, llvm::StructType* STy, POINTER_KIND ptrKind, POINTER_KIND loadKind, bool isOffset, Registerize::REGISTER_KIND regKind, uint32_t structElemIdx, bool asmjs, PARENT_PRIORITY parentPrio);
 	void compileLoad(const llvm::LoadInst& li, PARENT_PRIORITY parentPrio);
 	void compileStoreElem(const llvm::StoreInst& si, llvm::Type* Ty, llvm::StructType* STy, POINTER_KIND ptrKind, POINTER_KIND storedKind, bool isOffset, Registerize::REGISTER_KIND regKind, uint32_t structElemIdx, uint32_t elemIdx, bool asmjs);
 	void compileStore(const llvm::StoreInst& si);
+	void compileAtomicRMW(const llvm::AtomicRMWInst& ai, PARENT_PRIORITY parentPrio);
+	void compileAtomicCmpXchg(const llvm::AtomicCmpXchgInst& ai, PARENT_PRIORITY parentPrio);
 
 	void compileSignedInteger(const llvm::Value* v, bool forComparison, PARENT_PRIORITY parentPrio);
 	void compileUnsignedInteger(const llvm::Value* v, bool forAsmJSComparison, PARENT_PRIORITY parentPrio, bool forceTruncation = false);
@@ -692,6 +693,10 @@ class CheerpWriter final : public CheerpBaseWriter
 	 * Compile the function for growing the wasm linear memory
 	 */
 	void compileGrowMem();
+	/**
+	 * Compile the atomic functions
+	 */
+	void compileAtomicFunctions();
 	/**
 	 * Compile an helper function to assign all global heap symbols
 	 */
diff --git a/llvm/lib/CheerpWriter/CheerpWriter.cpp b/llvm/lib/CheerpWriter/CheerpWriter.cpp
index 9160ae3a82f1..10ff6be58550 100644
--- a/llvm/lib/CheerpWriter/CheerpWriter.cpp
+++ b/llvm/lib/CheerpWriter/CheerpWriter.cpp
@@ -4391,6 +4391,18 @@ CheerpWriter::COMPILE_INSTRUCTION_FEEDBACK CheerpWriter::compileInlineableInstru
 			}
 			return COMPILE_OK;
 		}
+		case Instruction::AtomicRMW:
+		{
+			const AtomicRMWInst& ai = cast<AtomicRMWInst>(I);
+			compileAtomicRMW(ai, parentPrio);
+			return COMPILE_OK;
+		}
+		case Instruction::AtomicCmpXchg:
+		{
+			const AtomicCmpXchgInst& ai = cast<AtomicCmpXchgInst>(I);
+			compileAtomicCmpXchg(ai, parentPrio);
+			return COMPILE_OK;
+		}
 		default:
 			stream << "alert('Unsupported code')";
 			llvm::errs() << "\tImplement inst " << I.getOpcodeName() << '\n';
@@ -4449,7 +4461,7 @@ void CheerpWriter::compileLoad(const LoadInst& li, PARENT_PRIORITY parentPrio)
 			elemPtrKind = PA.getPointerKind(&li);
 		}
 		bool isOffset = ie.ptrIdx == 1;
-		compileLoadElem(ptrOp, Ty, STy, ptrKind, elemPtrKind, isOffset, elemRegKind, ie.structIdx, asmjs, parentPrio);
+		compileLoadElem(li, Ty, STy, ptrKind, elemPtrKind, isOffset, elemRegKind, ie.structIdx, asmjs, parentPrio);
 		if(needsCheckBounds)
 		{
 			needsCheckBounds = false;
@@ -4458,9 +4470,42 @@ void CheerpWriter::compileLoad(const LoadInst& li, PARENT_PRIORITY parentPrio)
 	}
 }
 
-void CheerpWriter::compileLoadElem(const Value* ptrOp, Type* Ty, StructType* STy, POINTER_KIND ptrKind, POINTER_KIND loadKind, bool isOffset, Registerize::REGISTER_KIND regKind, uint32_t structElemIdx, bool asmjs, PARENT_PRIORITY parentPrio)
+void CheerpWriter::compileLoadElem(const LoadInst& li, Type* Ty, StructType* STy, POINTER_KIND ptrKind, POINTER_KIND loadKind, bool isOffset, Registerize::REGISTER_KIND regKind, uint32_t structElemIdx, bool asmjs, PARENT_PRIORITY parentPrio)
 {
-	if(regKind==Registerize::INTEGER && needsIntCoercion(parentPrio))
+	const Value* ptrOp = li.getPointerOperand();
+	if (li.isAtomic())
+	{
+		assert(!STy);
+		assert(!isOffset);
+		PARENT_PRIORITY shiftPrio = SHIFT;
+		uint32_t shift = getHeapShiftForType(Ty);
+		if (shift == 0)
+			shiftPrio = LOWEST;
+		if (parentPrio > BIT_OR)
+			stream << "(";
+		stream << namegen.getBuiltinName(NameGenerator::Builtin::ATOMICLOAD) << "(";
+		if (Ty->isIntegerTy(1) || Ty->isIntegerTy(8))
+			stream << "8,";
+		else if (Ty->isIntegerTy(16))
+			stream << "16,";
+		else if (Ty->isIntegerTy(32))
+			stream << "32,";
+		else if (Ty->isIntegerTy(64) && UseBigInts && LinearOutput!=AsmJs)
+			stream << "64,";
+		else
+			llvm::report_fatal_error("Unsupported bitwidth for atomic load");
+		compileRawPointer(ptrOp, shiftPrio);
+		if (shift != 0)
+			stream << ">>" << shift;
+		stream << ")";
+
+		if (li.getType()->isIntegerTy() && parentPrio != BIT_OR)
+			stream << "|0";
+		if (parentPrio > BIT_OR)
+			stream << ")";
+		return ;
+	}
+	else if(regKind==Registerize::INTEGER && needsIntCoercion(parentPrio))
 	{
 		if (parentPrio > BIT_OR)
 			stream << '(';
@@ -4636,6 +4681,34 @@ void CheerpWriter::compileStoreElem(const StoreInst& si, Type* Ty, StructType* S
 	const Value* ptrOp=si.getPointerOperand();
 	const Value* valOp=si.getValueOperand();
 	assert(ptrKind != CONSTANT);
+	if (si.isAtomic())
+	{
+		assert(!STy);
+		assert(!isOffset);
+		stream << namegen.getBuiltinName(NameGenerator::Builtin::ATOMICSTORE) << "(";
+		Type* t = valOp->getType();
+		PARENT_PRIORITY shiftPrio = SHIFT;
+		uint32_t shift = getHeapShiftForType(t);
+		if (shift == 0)
+			shiftPrio = LOWEST;
+		if (t->isIntegerTy(1) || t->isIntegerTy(8))
+			stream << "8,";
+		else if (t->isIntegerTy(16))
+			stream << "16,";
+		else if (t->isIntegerTy(32))
+			stream << "32,";
+		else if (t->isIntegerTy(64) && UseBigInts && LinearOutput!=AsmJs)
+			stream << "64,";
+		else
+			llvm::report_fatal_error("Unsupported bitwidth for atomic store");
+		compileRawPointer(ptrOp, shiftPrio);
+		if (shift != 0)
+			stream << ">>" << shift;
+		stream << ",";
+		compileOperand(valOp, BIT_OR);
+		stream << "|0)";
+		return ;
+	}
 	if (RAW == ptrKind || (asmjs && ptrKind == CONSTANT))
 	{
 		assert(!isOffset);
@@ -4739,6 +4812,107 @@ void CheerpWriter::compileStoreElem(const StoreInst& si, Type* Ty, StructType* S
 	}
 }
 
+void CheerpWriter::compileAtomicRMW(const AtomicRMWInst& ai, PARENT_PRIORITY parentPrio)
+{
+	if (parentPrio > BIT_OR)
+		stream << "(";
+	switch(ai.getOperation())
+	{
+		case AtomicRMWInst::BinOp::Xchg:
+			stream << namegen.getBuiltinName(NameGenerator::Builtin::ATOMICXCHG) << "(";
+			break;
+		case AtomicRMWInst::BinOp::Add:
+			stream << namegen.getBuiltinName(NameGenerator::Builtin::ATOMICADD) << "(";
+			break;
+		case AtomicRMWInst::BinOp::Sub:
+			stream << namegen.getBuiltinName(NameGenerator::Builtin::ATOMICSUB) << "(";
+			break;
+		case AtomicRMWInst::BinOp::And:
+			stream << namegen.getBuiltinName(NameGenerator::Builtin::ATOMICAND) << "(";
+			break;
+		case AtomicRMWInst::BinOp::Or:
+			stream << namegen.getBuiltinName(NameGenerator::Builtin::ATOMICOR) << "(";
+			break;
+		case AtomicRMWInst::BinOp::Xor:
+			stream << namegen.getBuiltinName(NameGenerator::Builtin::ATOMICXOR) << "(";
+			break;
+		default:
+			llvm::report_fatal_error("Unsupported atomicrmw opcode");
+	}
+	const Value* ptrOp=ai.getPointerOperand();
+	const Value* valOp=ai.getValOperand();
+	Type* t = valOp->getType();
+	PARENT_PRIORITY shiftPrio = SHIFT;
+	uint32_t shift = getHeapShiftForType(t);
+	if (shift == 0)
+		shiftPrio = LOWEST;
+	if (t->isIntegerTy(1) || t->isIntegerTy(8))
+		stream << "8,";
+	else if (t->isIntegerTy(16))
+		stream << "16,";
+	else if (t->isIntegerTy(32))
+		stream << "32,";
+	else if (t->isIntegerTy(64) && UseBigInts && LinearOutput!=AsmJs)
+		stream << "64,";
+	else
+		llvm::report_fatal_error("Unsupported bitwidth for atomicrmw");
+	compileRawPointer(ptrOp, shiftPrio);
+	if (shift != 0)
+		stream << ">>" << shift;
+	stream << ",";
+	compileOperand(valOp, BIT_OR);
+	stream << "|0)";
+
+	if (ai.getType()->isIntegerTy() && parentPrio != BIT_OR)
+		stream << "|0";
+	if (parentPrio > BIT_OR)
+		stream << ")";
+}
+
+void CheerpWriter::compileAtomicCmpXchg(const AtomicCmpXchgInst& ai, PARENT_PRIORITY parentPrio)
+{
+	const Value* ptrOp=ai.getPointerOperand();
+	const Value* cmpOp=ai.getCompareOperand();
+	const Value* newValOp=ai.getNewValOperand();
+	Type* t = newValOp->getType();
+	PARENT_PRIORITY shiftPrio = SHIFT;
+	uint32_t shift = getHeapShiftForType(t);
+	if (shift == 0)
+		shiftPrio = LOWEST;
+
+	stream << namegen.getBuiltinName(NameGenerator::Builtin::ATOMICCMPXCHG) << "(";
+	if (t->isIntegerTy(1) || t->isIntegerTy(8))
+		stream << "8,";
+	else if (t->isIntegerTy(16))
+		stream << "16,";
+	else if (t->isIntegerTy(32))
+		stream << "32,";
+	else if (t->isIntegerTy(64) && UseBigInts && LinearOutput!=AsmJs)
+		stream << "64,";
+	else
+		llvm::report_fatal_error("Unsupported bitwidth for atomicmpxchg");
+	compileRawPointer(ptrOp, shiftPrio);
+	if (shift != 0)
+		stream << ">>" << shift;
+	stream << ",";
+	compileOperand(cmpOp, BIT_OR);
+	stream << "|0,";
+	compileOperand(newValOp, BIT_OR);
+	stream << "|0)|0";
+
+	// Compile the second part of this instruction, the comparison between the loaded value
+	// and the compare operand. A compare operand to a cmpxchg instruction cannot be inlined, so
+	// calling compileOperand twice is safe.
+	// We only compile this part if this instruction has uses.
+	if (!ai.use_empty())
+	{
+		stream << ";" << NewLine;
+		stream << namegen.getName(&ai, 1) << "=(" << namegen.getName(&ai, 0) << "|0)==(";
+		compileOperand(cmpOp, BIT_OR);
+		stream << "|0)";
+	}
+}
+
 CheerpWriter::COMPILE_INSTRUCTION_FEEDBACK CheerpWriter::compileCallInstruction(const CallBase& ci, PARENT_PRIORITY parentPrio)
 {
 	bool asmjs = currentFun->getSection() == StringRef("asmjs");
@@ -6044,6 +6218,64 @@ void CheerpWriter::compileGrowMem()
 	stream << "}" << NewLine;
 }
 
+void CheerpWriter::compileAtomicFunctions()
+{
+	auto funcName = namegen.getBuiltinName(NameGenerator::Builtin::ATOMICLOAD);
+	stream << "function " << funcName << "(bitwidth, addr){" << NewLine;
+	stream << "if(bitwidth==8)" << NewLine;
+	stream << "return Atomics.load(" << getHeapName(HEAP8) << ", addr);" << NewLine;
+	stream << "else if(bitwidth==16)" << NewLine;
+	stream << "return Atomics.load(" << getHeapName(HEAP16) << ", addr);" << NewLine;
+	stream << "else if(bitwidth==32)" << NewLine;
+	stream << "return Atomics.load(" << getHeapName(HEAP32) << ", addr);" << NewLine;
+	if (UseBigInts && LinearOutput!=AsmJs)
+	{
+		stream << "else if(bitwidth==64)" << NewLine;
+		stream << "return Atomics.load(" << getHeapName(HEAP64) << ", addr);" << NewLine;
+	}
+	stream << "else " << NewLine;
+	stream << "throw new Error('Wrong bitwidth');" << NewLine;
+	stream << "}" << NewLine;
+	std::vector<std::string> opNames={"store","add","sub","and","or","xor","exchange"};
+	for (uint32_t i = 0; i < opNames.size(); i++)
+	{
+		auto b = static_cast<NameGenerator::Builtin>(i + NameGenerator::Builtin::ATOMICSTORE);
+		auto opName = opNames[i];
+		funcName = namegen.getBuiltinName(b);
+		stream << "function " << funcName << "(bitwidth, addr, val){" << NewLine;
+		stream << "if(bitwidth==8)" << NewLine;
+		stream << "return Atomics." << opName << "(" << getHeapName(HEAP8) << ", addr, val);" << NewLine;
+		stream << "else if(bitwidth==16)" << NewLine;
+		stream << "return Atomics." << opName << "(" << getHeapName(HEAP16) << ", addr, val);" << NewLine;
+		stream << "else if(bitwidth==32)" << NewLine;
+		stream << "return Atomics." << opName << "(" << getHeapName(HEAP32) << ", addr, val);" << NewLine;
+		if (UseBigInts && LinearOutput!=AsmJs)
+		{
+			stream << "else if(bitwidth==64)" << NewLine;
+			stream << "return Atomics." << opName << "(" << getHeapName(HEAP64) << ", addr, val);" << NewLine;
+		}
+		stream << "else " << NewLine;
+		stream << "throw new Error('Wrong bitwidth');" << NewLine;
+		stream << "}" << NewLine;
+	}
+	funcName = namegen.getBuiltinName(NameGenerator::Builtin::ATOMICCMPXCHG);
+	stream << "function " << funcName << "(bitwidth, addr, expected, replacement){" << NewLine;
+	stream << "if(bitwidth==8)" << NewLine;
+	stream << "return Atomics.compareExchange(" << getHeapName(HEAP8) << ", addr, expected, replacement);" << NewLine;
+	stream << "else if(bitwidth==16)" << NewLine;
+	stream << "return Atomics.compareExchange(" << getHeapName(HEAP16) << ", addr, expected, replacement);" << NewLine;
+	stream << "else if(bitwidth==32)" << NewLine;
+	stream << "return Atomics.compareExchange(" << getHeapName(HEAP32) << ", addr, expected, replacement);" << NewLine;
+	if (UseBigInts && LinearOutput!=AsmJs)
+	{
+		stream << "else if(bitwidth==64)" << NewLine;
+		stream << "return Atomics.compareExchange(" << getHeapName(HEAP64) << ", addr, expected, replacement);" << NewLine;
+	}
+	stream << "else " << NewLine;
+	stream << "throw new Error('Wrong bitwidth');" << NewLine;
+	stream << "}" << NewLine;
+}
+
 void CheerpWriter::compileMathDeclAsmJS()
 {
 	stream << "var Infinity=stdlib.Infinity;" << NewLine;
@@ -6335,6 +6567,14 @@ void CheerpWriter::compileAsmJSClosure()
 		stream << namegen.getBuiltinName(NameGenerator::Builtin::GROW_MEM);
 		stream << ';' << NewLine;
 	}
+	if (globalDeps.usesAtomics())
+	{
+		for (int i = NameGenerator::Builtin::ATOMICLOAD; i <= NameGenerator::Builtin::ATOMICCMPXCHG; i++)
+		{
+			auto b = static_cast<NameGenerator::Builtin>(i);
+			stream << "var " << namegen.getBuiltinName(b) << "=ffi." << namegen.getBuiltinName(b) << ";" << NewLine;
+		}
+	}
 
 	// Declare globals
 	for ( const GlobalVariable* GV : linearHelper.globals() )
@@ -6378,6 +6618,14 @@ void CheerpWriter::compileAsmJSffiObject()
 		stream << namegen.getBuiltinName(NameGenerator::Builtin::GROW_MEM);
 		stream << ',' << NewLine;
 	}
+	if (globalDeps.usesAtomics())
+	{
+		for (int i = NameGenerator::Builtin::ATOMICLOAD; i <= NameGenerator::Builtin::ATOMICCMPXCHG; i++)
+		{
+			auto b = static_cast<NameGenerator::Builtin>(i);
+			stream << namegen.getBuiltinName(b) << ":" << namegen.getBuiltinName(b) << "," << NewLine;
+		}
+	}
 	stream << "}";
 }
 
@@ -6385,7 +6633,10 @@ void CheerpWriter::compileAsmJSTopLevel()
 {
 	compileDummies();
 
-	stream << "var __heap = new ArrayBuffer("<<heapSize*1024*1024<<");" << NewLine;
+	stream << "var __heap = new ";
+	if (globalDeps.usesAtomics())
+		stream << "Shared";
+	stream << "ArrayBuffer(" << heapSize * 1024 * 1024 << ");" << NewLine;
 	{
 		//Declare used HEAPs variables to null, to be inizializated by a later call to ASSIGN_HEAPS
 		bool isFirst = true;
@@ -6482,6 +6733,9 @@ void CheerpWriter::compileGenericJS()
 	//Compile growLinearMemory if needed
 	if (globalDeps.needsBuiltin(BuiltinInstr::BUILTIN::GROW_MEM))
 		compileGrowMem();
+
+	if (globalDeps.usesAtomics() && wasmFile.empty())
+		compileAtomicFunctions();
 }
 
 void CheerpWriter::compileDummies()

From 8b45389d9356d9c4a0d6442423fd84c1e48e8909 Mon Sep 17 00:00:00 2001
From: Mark Peerdeman <mark.peerdeman@leaningtech.com>
Date: Tue, 12 Dec 2023 15:14:15 +0100
Subject: [PATCH 08/11] Introduce CheerpLowerAtomic pass

This pass is meant to remove atomics by invoking the existing LowerAtomicPass
only on genericjs functions.
---
 clang/lib/CodeGen/BackendUtil.cpp            |  3 ---
 clang/lib/Driver/ToolChains/WebAssembly.cpp  |  5 ++++
 llvm/include/llvm/Cheerp/CheerpLowerAtomic.h | 18 +++++++++++++
 llvm/include/llvm/Cheerp/CommandLine.h       |  1 +
 llvm/include/llvm/Cheerp/PassRegistry.h      |  1 +
 llvm/lib/CheerpUtils/CMakeLists.txt          |  1 +
 llvm/lib/CheerpUtils/CheerpLowerAtomic.cpp   | 27 ++++++++++++++++++++
 llvm/lib/CheerpUtils/CommandLine.cpp         |  2 ++
 llvm/lib/Passes/PassRegistry.def             |  1 +
 9 files changed, 56 insertions(+), 3 deletions(-)
 create mode 100644 llvm/include/llvm/Cheerp/CheerpLowerAtomic.h
 create mode 100644 llvm/lib/CheerpUtils/CheerpLowerAtomic.cpp

diff --git a/clang/lib/CodeGen/BackendUtil.cpp b/clang/lib/CodeGen/BackendUtil.cpp
index ab89d0364a9f..00f1cf910fb7 100644
--- a/clang/lib/CodeGen/BackendUtil.cpp
+++ b/clang/lib/CodeGen/BackendUtil.cpp
@@ -87,7 +87,6 @@
 #include "llvm/Transforms/Scalar/EarlyCSE.h"
 #include "llvm/Transforms/Scalar/GVN.h"
 #include "llvm/Transforms/Scalar/JumpThreading.h"
-#include "llvm/Transforms/Scalar/LowerAtomicPass.h"
 #include "llvm/Transforms/Scalar/LowerMatrixIntrinsics.h"
 #include "llvm/Transforms/Utils.h"
 #include "llvm/Transforms/Utils/CanonicalizeAliases.h"
@@ -948,8 +947,6 @@ void EmitAssemblyHelper::RunOptimizationPipeline(
             //We need this to track this in custom constructors for DOM types, such as String::String(const char*)
             MPM.addPass(createModuleToFunctionPassAdaptor(cheerp::RequiredPassWrapper<PromotePass>()));
             MPM.addPass(createModuleToFunctionPassAdaptor(cheerp::CheerpNativeRewriterPass()));
-            //Cheerp is single threaded, convert atomic instructions to regular ones
-            MPM.addPass(createModuleToFunctionPassAdaptor(LowerAtomicPass()));
           });
       PB.registerOptimizerLastEPCallback(
           [](ModulePassManager &MPM, OptimizationLevel Level) {
diff --git a/clang/lib/Driver/ToolChains/WebAssembly.cpp b/clang/lib/Driver/ToolChains/WebAssembly.cpp
index e9e7cb1c11c7..267db8af2561 100644
--- a/clang/lib/Driver/ToolChains/WebAssembly.cpp
+++ b/clang/lib/Driver/ToolChains/WebAssembly.cpp
@@ -697,9 +697,14 @@ void cheerp::CheerpOptimizer::ConstructJob(Compilation &C, const JobAction &JA,
   if (Args.hasArg(options::OPT_cheerp_no_icf))
     CmdArgs.push_back("-cheerp-no-icf");
 
+  if (!Args.hasArg(options::OPT_pthread))
+    CmdArgs.push_back("-cheerp-lower-atomics");
+
   addPass("function(CheerpLowerInvoke)");
   if (Args.hasArg(options::OPT_fexceptions))
     CmdArgs.push_back("-cheerp-keep-invokes");
+  // This pass will remove atomics from genericjs functions
+  addPass("CheerpLowerAtomic");
   addPass("function(simplifycfg)");
 
   addPass("CallConstructors");
diff --git a/llvm/include/llvm/Cheerp/CheerpLowerAtomic.h b/llvm/include/llvm/Cheerp/CheerpLowerAtomic.h
new file mode 100644
index 000000000000..a4b1c98a5ad6
--- /dev/null
+++ b/llvm/include/llvm/Cheerp/CheerpLowerAtomic.h
@@ -0,0 +1,18 @@
+#ifndef CHEERP_LOWER_ATOMIC_H
+#define CHEERP_LOWER_ATOMIC_H
+
+#include "llvm/IR/PassManager.h"
+
+namespace cheerp
+{
+
+class CheerpLowerAtomicPass : public llvm::PassInfoMixin<CheerpLowerAtomicPass>
+{
+public:
+	llvm::PreservedAnalyses run(llvm::Module& M, llvm::ModuleAnalysisManager& MAM);
+	static bool isRequired() { return true; }
+};
+
+}
+
+#endif
diff --git a/llvm/include/llvm/Cheerp/CommandLine.h b/llvm/include/llvm/Cheerp/CommandLine.h
index b121a160aeac..eb24168bd54e 100644
--- a/llvm/include/llvm/Cheerp/CommandLine.h
+++ b/llvm/include/llvm/Cheerp/CommandLine.h
@@ -60,5 +60,6 @@ extern llvm::cl::opt<bool> WasmNoUnalignedMem;
 extern llvm::cl::opt<bool> UseBigInts;
 extern llvm::cl::opt<bool> KeepInvokes;
 extern llvm::cl::opt<bool> PreserveFree;
+extern llvm::cl::opt<bool> LowerAtomics;
 
 #endif //_CHEERP_COMMAND_LINE_H
diff --git a/llvm/include/llvm/Cheerp/PassRegistry.h b/llvm/include/llvm/Cheerp/PassRegistry.h
index 637b607d98cd..770c22e9fe7e 100644
--- a/llvm/include/llvm/Cheerp/PassRegistry.h
+++ b/llvm/include/llvm/Cheerp/PassRegistry.h
@@ -40,6 +40,7 @@
 #include "llvm/Cheerp/StoreMerging.h"
 #include "llvm/Cheerp/CallConstructors.h"
 #include "llvm/Cheerp/CommandLine.h"
+#include "llvm/Cheerp/CheerpLowerAtomic.h"
 
 namespace cheerp {
 
diff --git a/llvm/lib/CheerpUtils/CMakeLists.txt b/llvm/lib/CheerpUtils/CMakeLists.txt
index c7987f9b4da4..74ceb1015ed8 100644
--- a/llvm/lib/CheerpUtils/CMakeLists.txt
+++ b/llvm/lib/CheerpUtils/CMakeLists.txt
@@ -33,6 +33,7 @@ add_llvm_component_library(LLVMCheerpUtils
   BitCastLowering.cpp
   JSStringLiteralLowering.cpp
   MemoryInit.cpp
+  CheerpLowerAtomic.cpp
   )
 
 add_dependencies(LLVMCheerpUtils intrinsics_gen)
diff --git a/llvm/lib/CheerpUtils/CheerpLowerAtomic.cpp b/llvm/lib/CheerpUtils/CheerpLowerAtomic.cpp
new file mode 100644
index 000000000000..3b8ee9cd7eba
--- /dev/null
+++ b/llvm/lib/CheerpUtils/CheerpLowerAtomic.cpp
@@ -0,0 +1,27 @@
+#include "llvm/Cheerp/CheerpLowerAtomic.h"
+#include "llvm/Cheerp/CommandLine.h"
+#include "llvm/Transforms/Scalar/LowerAtomicPass.h"
+
+using namespace llvm;
+using namespace cheerp;
+// Module pass that invokes the LLVM LowerAtomicPass on genericjs functions.
+PreservedAnalyses CheerpLowerAtomicPass::run(Module& M, ModuleAnalysisManager& MAM)
+{
+	FunctionAnalysisManager& FAM = MAM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager();
+	FunctionPassManager FPM;
+	FPM.addPass(LowerAtomicPass());
+
+	// Loop over the functions, and only pass genericjs ones to LowerAtomicPass
+	for (Function& F : M)
+	{
+		if (F.isDeclaration())
+			continue;
+
+		if (!LowerAtomics && F.getSection() == "asmjs")
+			continue;
+
+		FPM.run(F, FAM);
+	}
+
+	return PreservedAnalyses::none();
+}
diff --git a/llvm/lib/CheerpUtils/CommandLine.cpp b/llvm/lib/CheerpUtils/CommandLine.cpp
index 5cd5a3237e5e..ee03f2541473 100644
--- a/llvm/lib/CheerpUtils/CommandLine.cpp
+++ b/llvm/lib/CheerpUtils/CommandLine.cpp
@@ -97,3 +97,5 @@ llvm::cl::opt<std::string> EnvironName("cheerp-environ-name", llvm::cl::Optional
 
 llvm::cl::opt<std::string> ArgvName("cheerp-argv-name", llvm::cl::Optional,
   llvm::cl::desc("If specified, the identifier name storing the arguments"), llvm::cl::value_desc("name"));
+
+llvm::cl::opt<bool> LowerAtomics("cheerp-lower-atomics", llvm::cl::desc("Lower all atomic operations"));
diff --git a/llvm/lib/Passes/PassRegistry.def b/llvm/lib/Passes/PassRegistry.def
index 8f428e76b6ac..d5056de81816 100644
--- a/llvm/lib/Passes/PassRegistry.def
+++ b/llvm/lib/Passes/PassRegistry.def
@@ -139,6 +139,7 @@ MODULE_PASS("PartialExecuter", cheerp::PartialExecuterPass())
 MODULE_PASS("PreExecute", cheerp::PreExecutePass())
 MODULE_PASS("FreeAndDeleteRemoval", cheerp::FreeAndDeleteRemovalPass())
 MODULE_PASS("CallConstructors", cheerp::CallConstructorsPass())
+MODULE_PASS("CheerpLowerAtomic", cheerp::CheerpLowerAtomicPass())
 #undef MODULE_PASS
 
 #ifndef MODULE_PASS_WITH_PARAMS

From 5745eea55d666b04c90859205ae1dbf05f60bbfe Mon Sep 17 00:00:00 2001
From: Mark Peerdeman <mark.peerdeman@leaningtech.com>
Date: Thu, 1 Feb 2024 11:48:22 +0100
Subject: [PATCH 09/11] Fix to TypeOptimizer to allow literal structs.

TypeOptimizer would create new named structs as copies of literal structs. This caused a problem in the case of literal structs that are the result of a cmpxchg instruction. During inlining, these create their original return types, and those would conflict with the ones that went through TypeOptimizer.
In turn, we needed a tiny fix in isJSExportedType in order for the literal structs not to cause a crash.
---
 llvm/lib/CheerpUtils/TypeOptimizer.cpp | 27 +++++++++++++++++++++-----
 llvm/lib/CheerpUtils/Utility.cpp       |  2 ++
 2 files changed, 24 insertions(+), 5 deletions(-)

diff --git a/llvm/lib/CheerpUtils/TypeOptimizer.cpp b/llvm/lib/CheerpUtils/TypeOptimizer.cpp
index 85d7737585cf..42d803eced60 100644
--- a/llvm/lib/CheerpUtils/TypeOptimizer.cpp
+++ b/llvm/lib/CheerpUtils/TypeOptimizer.cpp
@@ -289,6 +289,11 @@ bool TypeOptimizer::isUnsafeDowncastSource(StructType* st)
 
 bool TypeOptimizer::canCollapseStruct(llvm::StructType* st, llvm::StructType* newStruct, llvm::Type* newType)
 {
+	if (newStruct == nullptr)
+	{
+		assert(st->isLiteral());
+		return false;
+	}
 	// Stop if the element is just a int8, we may be dealing with an empty struct
 	// Empty structs are unsafe as the int8 inside is just a placeholder and will be replaced
 	// by a different type in a derived class
@@ -445,8 +450,11 @@ TypeOptimizer::TypeMappingInfo TypeOptimizer::rewriteType(Type* t)
 			return CacheAndReturn(newType, TypeMappingInfo::BYTE_LAYOUT_TO_ARRAY);
 		}
 
-		// Generate a new type inconditionally, it may end up being the same as the old one
-		StructType* newStruct=StructType::create(st->getContext());
+		// Generate a new type if it's not a literal struct. It may end up being the same as the old one
+		// In case of literal, it will be created as a literal at the end.
+		StructType* newStruct=nullptr;
+		if (!st->isLiteral())
+			newStruct=StructType::create(st->getContext());
 #ifndef NDEBUG
 		newStructTypes.insert(newStruct);
 #endif
@@ -457,7 +465,8 @@ TypeOptimizer::TypeMappingInfo TypeOptimizer::rewriteType(Type* t)
 			newStruct->setName(name);
 		}
 		// Tentatively map the type to the newStruct, it may be overridden if the type is collapsed
-		typesMapping[t] = TypeMappingInfo(newStruct, TypeMappingInfo::IDENTICAL);
+		if (!st->isLiteral())
+			typesMapping[t] = TypeMappingInfo(newStruct, TypeMappingInfo::IDENTICAL);
 
 		// Since we can merge arrays of the same type in an struct it is possible that at the end of the process a single type will remain
 		TypeMappingInfo::MAPPING_KIND newStructKind = TypeMappingInfo::IDENTICAL;
@@ -556,7 +565,9 @@ TypeOptimizer::TypeMappingInfo TypeOptimizer::rewriteType(Type* t)
 			std::vector<std::pair<uint32_t, uint8_t>> mergedInts;
 			uint32_t directBaseLimit=0;
 			// We may need to update the bases metadata for this type
-			NamedMDNode* namedBasesMetadata = TypeSupport::getBasesMetadata(newStruct, *module);
+			NamedMDNode* namedBasesMetadata = nullptr;
+			if (!st->isLiteral())
+				namedBasesMetadata = TypeSupport::getBasesMetadata(newStruct, *module);
 			uint32_t firstBaseBegin, firstBaseEnd;
 			if(namedBasesMetadata)
 			{
@@ -698,7 +709,13 @@ TypeOptimizer::TypeMappingInfo TypeOptimizer::rewriteType(Type* t)
 		}
 
 		StructType* newDirectBase = st->getDirectBase() ? dyn_cast<StructType>(rewriteType(st->getDirectBase()).mappedType) : NULL;
-		newStruct->setBody(newTypes, st->isPacked(), newDirectBase, st->hasByteLayout(), st->hasAsmJS());
+		if (st->isLiteral())
+		{
+			newStruct = StructType::get(st->getContext(), newTypes, st->isPacked(), newDirectBase, st->hasByteLayout(), st->hasAsmJS());
+			typesMapping[t] = TypeMappingInfo(newStruct, TypeMappingInfo::IDENTICAL);
+		}
+		else
+			newStruct->setBody(newTypes, st->isPacked(), newDirectBase, st->hasByteLayout(), st->hasAsmJS());
 
 		return CacheAndReturn(newStruct, newStructKind);
 	}
diff --git a/llvm/lib/CheerpUtils/Utility.cpp b/llvm/lib/CheerpUtils/Utility.cpp
index 5fe55a9cdcad..c636174e9ca7 100644
--- a/llvm/lib/CheerpUtils/Utility.cpp
+++ b/llvm/lib/CheerpUtils/Utility.cpp
@@ -739,6 +739,8 @@ char TypeSupport::getPrefixCharForMember(const PointerAnalyzer& PA, llvm::Struct
 
 bool TypeSupport::isJSExportedType(StructType* st, const Module& m)
 {
+	if (st->isLiteral())
+		return false;
 	return m.getNamedMetadata(llvm::Twine(st->getName(),"_methods"))!=NULL;
 }
 

From 3a20db1820193bf65abb3349df69d58815763e26 Mon Sep 17 00:00:00 2001
From: Mark Peerdeman <mark.peerdeman@leaningtech.com>
Date: Thu, 8 Feb 2024 13:27:45 +0100
Subject: [PATCH 10/11] Compiler-rt Mutex disabled

The Mutex uses a 64-bit value to store it's state. It then does atomic operations on this value, resulting in 64-bit atomic operations. These are not supported in AsmJS, so for now we've disabled the Mutex.
---
 compiler-rt/lib/sanitizer_common/sanitizer_mutex.h | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_mutex.h b/compiler-rt/lib/sanitizer_common/sanitizer_mutex.h
index b1a58e421d81..6a7fa6245ec7 100644
--- a/compiler-rt/lib/sanitizer_common/sanitizer_mutex.h
+++ b/compiler-rt/lib/sanitizer_common/sanitizer_mutex.h
@@ -163,7 +163,11 @@ class SANITIZER_MUTEX Mutex : CheckedMutex {
   explicit constexpr Mutex(MutexType type = MutexUnchecked)
       : CheckedMutex(type) {}
 
+  // CHEERP: This mutex uses a 64-bit state value, meaning 64-bit atomic operations.
+  // These are not supported in asmjs currently, so we've disabled the Mutex
+  // by returning early from all functions.
   void Lock() SANITIZER_ACQUIRE() {
+    return;
     CheckedMutex::Lock();
     u64 reset_mask = ~0ull;
     u64 state = atomic_load_relaxed(&state_);
@@ -209,6 +213,7 @@ class SANITIZER_MUTEX Mutex : CheckedMutex {
   }
 
   bool TryLock() SANITIZER_TRY_ACQUIRE(true) {
+    return true;
     u64 state = atomic_load_relaxed(&state_);
     for (;;) {
       if (UNLIKELY(state & (kWriterLock | kReaderLockMask)))
@@ -223,6 +228,7 @@ class SANITIZER_MUTEX Mutex : CheckedMutex {
   }
 
   void Unlock() SANITIZER_RELEASE() {
+    return;
     CheckedMutex::Unlock();
     bool wake_writer;
     u64 wake_readers;
@@ -251,6 +257,7 @@ class SANITIZER_MUTEX Mutex : CheckedMutex {
   }
 
   void ReadLock() SANITIZER_ACQUIRE_SHARED() {
+    return;
     CheckedMutex::Lock();
     u64 reset_mask = ~0ull;
     u64 state = atomic_load_relaxed(&state_);
@@ -288,6 +295,7 @@ class SANITIZER_MUTEX Mutex : CheckedMutex {
   }
 
   void ReadUnlock() SANITIZER_RELEASE_SHARED() {
+    return;
     CheckedMutex::Unlock();
     bool wake;
     u64 new_state;
@@ -314,12 +322,14 @@ class SANITIZER_MUTEX Mutex : CheckedMutex {
   // maintaining complex state to work around those situations, the check only
   // checks that the mutex is owned.
   void CheckWriteLocked() const SANITIZER_CHECK_LOCKED() {
+    return;
     CHECK(atomic_load(&state_, memory_order_relaxed) & kWriterLock);
   }
 
   void CheckLocked() const SANITIZER_CHECK_LOCKED() { CheckWriteLocked(); }
 
   void CheckReadLocked() const SANITIZER_CHECK_LOCKED() {
+    return;
     CHECK(atomic_load(&state_, memory_order_relaxed) & kReaderLockMask);
   }
 

From c245c83be1ce004ee94a0dbd2815b424cc40f5cd Mon Sep 17 00:00:00 2001
From: Mark Peerdeman <mark.peerdeman@leaningtech.com>
Date: Thu, 8 Feb 2024 13:29:58 +0100
Subject: [PATCH 11/11] Set max bitwidths for atomic operations

---
 clang/lib/Basic/Targets/WebAssembly.h | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/clang/lib/Basic/Targets/WebAssembly.h b/clang/lib/Basic/Targets/WebAssembly.h
index cd7d2a7e8c03..52f36e8ba26a 100644
--- a/clang/lib/Basic/Targets/WebAssembly.h
+++ b/clang/lib/Basic/Targets/WebAssembly.h
@@ -212,6 +212,8 @@ class CheerpTargetInfo : public TargetInfo {
     LongDoubleWidth = LongDoubleAlign = 64;
     LongDoubleFormat = &llvm::APFloat::IEEEdouble();
     SizeType = UnsignedInt;
+    // We define these as 32-bit for now, since AsmJS cannot handle 64-bit atomic operations currently.
+    MaxAtomicPromoteWidth = MaxAtomicInlineWidth = 32;
 
     // Use 32-bit integers for two separated bit fields.
     UseBitFieldTypeAlignment = true;