From be6f50798e79336cdfd8fe464f37d41ac135640d Mon Sep 17 00:00:00 2001 From: Thomas Lively Date: Thu, 29 Oct 2020 11:06:20 -0700 Subject: [PATCH] [WebAssembly] Implement SIMD signselect instructions As proposed in https://github.com/WebAssembly/simd/pull/124, using the opcodes adopted by V8 in https://chromium-review.googlesource.com/c/v8/v8/+/2486235/2/src/wasm/wasm-opcodes.h. Uses new builtin functions and a new target intrinsic exclusively to ensure that the new instructions are only emitted when a user explicitly opts in to using them since they are still in the prototyping and evaluation phase. Differential Revision: https://reviews.llvm.org/D90357 --- .../clang/Basic/BuiltinsWebAssembly.def | 6 +++ clang/lib/CodeGen/CGBuiltin.cpp | 11 +++++ clang/test/CodeGen/builtins-wasm.c | 28 +++++++++++ llvm/include/llvm/IR/IntrinsicsWebAssembly.td | 5 ++ .../WebAssembly/WebAssemblyInstrSIMD.td | 17 +++++++ .../CodeGen/WebAssembly/simd-intrinsics.ll | 48 +++++++++++++++++++ llvm/test/MC/WebAssembly/simd-encodings.s | 12 +++++ 7 files changed, 127 insertions(+) diff --git a/clang/include/clang/Basic/BuiltinsWebAssembly.def b/clang/include/clang/Basic/BuiltinsWebAssembly.def index e86346d71c59..c1e594e147b3 100644 --- a/clang/include/clang/Basic/BuiltinsWebAssembly.def +++ b/clang/include/clang/Basic/BuiltinsWebAssembly.def @@ -134,6 +134,12 @@ TARGET_BUILTIN(__builtin_wasm_extmul_low_i32x4_u_i64x2, "V2ULLiV4UiV4Ui", "nc", TARGET_BUILTIN(__builtin_wasm_extmul_high_i32x4_u_i64x2, "V2ULLiV4UiV4Ui", "nc", "simd128") TARGET_BUILTIN(__builtin_wasm_bitselect, "V4iV4iV4iV4i", "nc", "simd128") + +TARGET_BUILTIN(__builtin_wasm_signselect_i8x16, "V16ScV16ScV16ScV16Sc", "nc", "simd128") +TARGET_BUILTIN(__builtin_wasm_signselect_i16x8, "V8sV8sV8sV8s", "nc", "simd128") +TARGET_BUILTIN(__builtin_wasm_signselect_i32x4, "V4iV4iV4iV4i", "nc", "simd128") +TARGET_BUILTIN(__builtin_wasm_signselect_i64x2, "V2LLiV2LLiV2LLiV2LLi", "nc", "simd128") + TARGET_BUILTIN(__builtin_wasm_shuffle_v8x16, "V16ScV16ScV16ScIiIiIiIiIiIiIiIiIiIiIiIiIiIiIiIi", "nc", "simd128") TARGET_BUILTIN(__builtin_wasm_any_true_i8x16, "iV16Sc", "nc", "simd128") diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp index f933113fa883..7341a440b873 100644 --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -16675,6 +16675,17 @@ Value *CodeGenFunction::EmitWebAssemblyBuiltinExpr(unsigned BuiltinID, CGM.getIntrinsic(Intrinsic::wasm_bitselect, ConvertType(E->getType())); return Builder.CreateCall(Callee, {V1, V2, C}); } + case WebAssembly::BI__builtin_wasm_signselect_i8x16: + case WebAssembly::BI__builtin_wasm_signselect_i16x8: + case WebAssembly::BI__builtin_wasm_signselect_i32x4: + case WebAssembly::BI__builtin_wasm_signselect_i64x2: { + Value *V1 = EmitScalarExpr(E->getArg(0)); + Value *V2 = EmitScalarExpr(E->getArg(1)); + Value *C = EmitScalarExpr(E->getArg(2)); + Function *Callee = + CGM.getIntrinsic(Intrinsic::wasm_signselect, ConvertType(E->getType())); + return Builder.CreateCall(Callee, {V1, V2, C}); + } case WebAssembly::BI__builtin_wasm_dot_s_i32x4_i16x8: { Value *LHS = EmitScalarExpr(E->getArg(0)); Value *RHS = EmitScalarExpr(E->getArg(1)); diff --git a/clang/test/CodeGen/builtins-wasm.c b/clang/test/CodeGen/builtins-wasm.c index ee635317ff7a..e5c7211ad5be 100644 --- a/clang/test/CodeGen/builtins-wasm.c +++ b/clang/test/CodeGen/builtins-wasm.c @@ -622,6 +622,34 @@ i32x4 bitselect(i32x4 x, i32x4 y, i32x4 c) { // WEBASSEMBLY-NEXT: ret } +i8x16 signselect_i8x16(i8x16 x, i8x16 y, i8x16 c) { + return __builtin_wasm_signselect_i8x16(x, y, c); + // WEBASSEMBLY: call <16 x i8> @llvm.wasm.signselect.v16i8( + // WEBASSEMBLY-SAME: <16 x i8> %x, <16 x i8> %y, <16 x i8> %c) + // WEBASSEMBLY-NEXT: ret +} + +i16x8 signselect_i16x8(i16x8 x, i16x8 y, i16x8 c) { + return __builtin_wasm_signselect_i16x8(x, y, c); + // WEBASSEMBLY: call <8 x i16> @llvm.wasm.signselect.v8i16( + // WEBASSEMBLY-SAME: <8 x i16> %x, <8 x i16> %y, <8 x i16> %c) + // WEBASSEMBLY-NEXT: ret +} + +i32x4 signselect_i32x4(i32x4 x, i32x4 y, i32x4 c) { + return __builtin_wasm_signselect_i32x4(x, y, c); + // WEBASSEMBLY: call <4 x i32> @llvm.wasm.signselect.v4i32( + // WEBASSEMBLY-SAME: <4 x i32> %x, <4 x i32> %y, <4 x i32> %c) + // WEBASSEMBLY-NEXT: ret +} + +i64x2 signselect_i64x2(i64x2 x, i64x2 y, i64x2 c) { + return __builtin_wasm_signselect_i64x2(x, y, c); + // WEBASSEMBLY: call <2 x i64> @llvm.wasm.signselect.v2i64( + // WEBASSEMBLY-SAME: <2 x i64> %x, <2 x i64> %y, <2 x i64> %c) + // WEBASSEMBLY-NEXT: ret +} + i8x16 popcnt(i8x16 x) { return __builtin_wasm_popcnt_i8x16(x); // WEBASSEMBLY: call <16 x i8> @llvm.wasm.popcnt(<16 x i8> %x) diff --git a/llvm/include/llvm/IR/IntrinsicsWebAssembly.td b/llvm/include/llvm/IR/IntrinsicsWebAssembly.td index 5c503a4d6436..f65b7457436b 100644 --- a/llvm/include/llvm/IR/IntrinsicsWebAssembly.td +++ b/llvm/include/llvm/IR/IntrinsicsWebAssembly.td @@ -276,6 +276,11 @@ def int_wasm_extmul_high_unsigned : [LLVMSubdivide2VectorType<0>, LLVMSubdivide2VectorType<0>], [IntrNoMem, IntrSpeculatable]>; +def int_wasm_signselect : + Intrinsic<[llvm_anyvector_ty], + [LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>], + [IntrNoMem, IntrSpeculatable]>; + //===----------------------------------------------------------------------===// // Thread-local storage intrinsics //===----------------------------------------------------------------------===// diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td b/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td index 7e78b6c0eecd..2d8c8160641e 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td +++ b/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td @@ -747,6 +747,23 @@ def : Pat<(select )>; } // foreach vec_t +// Sign select +multiclass SIMDSignSelect simdop> { + defm SIGNSELECT_#vec_t : + SIMD_I<(outs V128:$dst), (ins V128:$v1, V128:$v2, V128:$c), (outs), (ins), + [(set (vec_t V128:$dst), + (vec_t (int_wasm_signselect + (vec_t V128:$v1), (vec_t V128:$v2), (vec_t V128:$c) + )) + )], + vec#".signselect\t$dst, $v1, $v2, $c", vec#".signselect", simdop>; +} + +defm : SIMDSignSelect; +defm : SIMDSignSelect; +defm : SIMDSignSelect; +defm : SIMDSignSelect; + //===----------------------------------------------------------------------===// // Integer unary arithmetic //===----------------------------------------------------------------------===// diff --git a/llvm/test/CodeGen/WebAssembly/simd-intrinsics.ll b/llvm/test/CodeGen/WebAssembly/simd-intrinsics.ll index b4edbd3daee2..aa4ff63b9676 100644 --- a/llvm/test/CodeGen/WebAssembly/simd-intrinsics.ll +++ b/llvm/test/CodeGen/WebAssembly/simd-intrinsics.ll @@ -127,6 +127,18 @@ define <16 x i8> @bitselect_v16i8(<16 x i8> %v1, <16 x i8> %v2, <16 x i8> %c) { ret <16 x i8> %a } +; CHECK-LABEL: signselect_v16i8: +; SIMD128-NEXT: .functype signselect_v16i8 (v128, v128, v128) -> (v128){{$}} +; SIMD128-NEXT: i8x16.signselect $push[[R:[0-9]+]]=, $0, $1, $2{{$}} +; SIMD128-NEXT: return $pop[[R]]{{$}} +declare <16 x i8> @llvm.wasm.signselect.v16i8(<16 x i8>, <16 x i8>, <16 x i8>) +define <16 x i8> @signselect_v16i8(<16 x i8> %v1, <16 x i8> %v2, <16 x i8> %c) { + %a = call <16 x i8> @llvm.wasm.signselect.v16i8( + <16 x i8> %v1, <16 x i8> %v2, <16 x i8> %c + ) + ret <16 x i8> %a +} + ; CHECK-LABEL: narrow_signed_v16i8: ; SIMD128-NEXT: .functype narrow_signed_v16i8 (v128, v128) -> (v128){{$}} ; SIMD128-NEXT: i8x16.narrow_i16x8_s $push[[R:[0-9]+]]=, $0, $1{{$}} @@ -339,6 +351,18 @@ define <8 x i16> @bitselect_v8i16(<8 x i16> %v1, <8 x i16> %v2, <8 x i16> %c) { ret <8 x i16> %a } +; CHECK-LABEL: signselect_v8i16: +; SIMD128-NEXT: .functype signselect_v8i16 (v128, v128, v128) -> (v128){{$}} +; SIMD128-NEXT: i16x8.signselect $push[[R:[0-9]+]]=, $0, $1, $2{{$}} +; SIMD128-NEXT: return $pop[[R]]{{$}} +declare <8 x i16> @llvm.wasm.signselect.v8i16(<8 x i16>, <8 x i16>, <8 x i16>) +define <8 x i16> @signselect_v8i16(<8 x i16> %v1, <8 x i16> %v2, <8 x i16> %c) { + %a = call <8 x i16> @llvm.wasm.signselect.v8i16( + <8 x i16> %v1, <8 x i16> %v2, <8 x i16> %c + ) + ret <8 x i16> %a +} + ; CHECK-LABEL: narrow_signed_v8i16: ; SIMD128-NEXT: .functype narrow_signed_v8i16 (v128, v128) -> (v128){{$}} ; SIMD128-NEXT: i16x8.narrow_i32x4_s $push[[R:[0-9]+]]=, $0, $1{{$}} @@ -467,6 +491,18 @@ define <4 x i32> @bitselect_v4i32(<4 x i32> %v1, <4 x i32> %v2, <4 x i32> %c) { ret <4 x i32> %a } +; CHECK-LABEL: signselect_v4i32: +; SIMD128-NEXT: .functype signselect_v4i32 (v128, v128, v128) -> (v128){{$}} +; SIMD128-NEXT: i32x4.signselect $push[[R:[0-9]+]]=, $0, $1, $2{{$}} +; SIMD128-NEXT: return $pop[[R]]{{$}} +declare <4 x i32> @llvm.wasm.signselect.v4i32(<4 x i32>, <4 x i32>, <4 x i32>) +define <4 x i32> @signselect_v4i32(<4 x i32> %v1, <4 x i32> %v2, <4 x i32> %c) { + %a = call <4 x i32> @llvm.wasm.signselect.v4i32( + <4 x i32> %v1, <4 x i32> %v2, <4 x i32> %c + ) + ret <4 x i32> %a +} + ; CHECK-LABEL: trunc_sat_s_v4i32: ; NO-SIMD128-NOT: f32x4 ; SIMD128-NEXT: .functype trunc_sat_s_v4i32 (v128) -> (v128){{$}} @@ -572,6 +608,18 @@ define <2 x i64> @bitselect_v2i64(<2 x i64> %v1, <2 x i64> %v2, <2 x i64> %c) { ret <2 x i64> %a } +; CHECK-LABEL: signselect_v2i64: +; SIMD128-NEXT: .functype signselect_v2i64 (v128, v128, v128) -> (v128){{$}} +; SIMD128-NEXT: i64x2.signselect $push[[R:[0-9]+]]=, $0, $1, $2{{$}} +; SIMD128-NEXT: return $pop[[R]]{{$}} +declare <2 x i64> @llvm.wasm.signselect.v2i64(<2 x i64>, <2 x i64>, <2 x i64>) +define <2 x i64> @signselect_v2i64(<2 x i64> %v1, <2 x i64> %v2, <2 x i64> %c) { + %a = call <2 x i64> @llvm.wasm.signselect.v2i64( + <2 x i64> %v1, <2 x i64> %v2, <2 x i64> %c + ) + ret <2 x i64> %a +} + ; ============================================================================== ; 4 x f32 ; ============================================================================== diff --git a/llvm/test/MC/WebAssembly/simd-encodings.s b/llvm/test/MC/WebAssembly/simd-encodings.s index a4908a0a61af..6bd54c9ddeed 100644 --- a/llvm/test/MC/WebAssembly/simd-encodings.s +++ b/llvm/test/MC/WebAssembly/simd-encodings.s @@ -694,4 +694,16 @@ main: # CHECK: i64x2.extmul_high_i32x4_u # encoding: [0xfd,0xd7,0x01] i64x2.extmul_high_i32x4_u + # CHECK: i8x16.signselect # encoding: [0xfd,0x7d] + i8x16.signselect + + # CHECK: i16x8.signselect # encoding: [0xfd,0x7e] + i16x8.signselect + + # CHECK: i32x4.signselect # encoding: [0xfd,0x7f] + i32x4.signselect + + # CHECK: i64x2.signselect # encoding: [0xfd,0x94,0x01] + i64x2.signselect + end_function