fixing some vk validation layer errors (OpMemoryBarrier, Shuffle oper…

…ations) modified style of tests; removed redundancy (extra code that does nothing); fixed some incorrect run targets; added error reasons for all encountered problems (and if needed, a #define/#if toggle)
jkwak-work · Feb 13, 2024 · 9c7f2fa · 9c7f2fa
1 parent c4e1937
commit 9c7f2fa
Show file tree

Hide file tree

Showing 10 changed files with 59 additions and 54 deletions.
diff --git a/source/slang/glsl.meta.slang b/source/slang/glsl.meta.slang
@@ -2903,9 +2903,7 @@ spirv_caps_shader_subgroup(1.3)
         __intrinsic_asm "subgroupBarrier()";
     case spirv:
         spirv_asm {
-            OpControlBarrier Subgroup Subgroup AcquireRelease|SubgroupMemory|ImageMemory|UniformMemory
-        };
-        spirv_asm {
+            OpCapability Shader;
             OpControlBarrier Subgroup Subgroup AcquireRelease|SubgroupMemory|ImageMemory|UniformMemory
         };
     case cpp:
@@ -2928,9 +2926,7 @@ spirv_caps_shader_subgroup(1.3)
         __intrinsic_asm "subgroupMemoryBarrier()";
     case spirv:
         spirv_asm {
-            OpMemoryBarrier Subgroup AcquireRelease|SubgroupMemory|ImageMemory|UniformMemory
-        };
-        spirv_asm {
+            OpCapability Shader;
             OpMemoryBarrier Subgroup AcquireRelease|SubgroupMemory|ImageMemory|UniformMemory
         };
     case cpp:
@@ -2953,6 +2949,7 @@ spirv_caps_shader_subgroup(1.3)
         __intrinsic_asm "subgroupMemoryBarrierBuffer()";
     case spirv:
         spirv_asm {
+            OpCapability Shader;
             OpMemoryBarrier Subgroup AcquireRelease|UniformMemory
         };
     case cpp:
@@ -2997,7 +2994,9 @@ spirv_caps_shader_subgroup(1.3)
         __intrinsic_asm "subgroupMemoryBarrierShared()";
     case spirv:
         spirv_asm {
-            OpMemoryBarrier Subgroup AcquireRelease|SubgroupMemory
+            // SubgroupMemory triggers vulkan validation layer error; 
+            // WorkgroupMemory is the next level of granularity 
+            OpMemoryBarrier Subgroup AcquireRelease|WorkgroupMemory
         };
     case cpp:
         // TODO: cpp
@@ -4302,7 +4301,7 @@ spirv_caps_shader_subgroup(1.3)
         __target_intrinsic "subgroupShuffleUp($0, $1)";
     case spirv:
         return spirv_asm {
-            OpCapability GroupNonUniformBallot; 
+            OpCapability GroupNonUniformShuffleRelative;
             OpGroupNonUniformShuffleUp $$T result Subgroup $value $delta
         };
     case hlsl:
@@ -4326,7 +4325,7 @@ spirv_caps_shader_subgroup(1.3)
         __target_intrinsic "subgroupShuffleDown($0, $1)";
     case spirv:
         return spirv_asm {
-            OpCapability GroupNonUniformBallot; 
+            OpCapability GroupNonUniformShuffleRelative; 
             OpGroupNonUniformShuffleDown $$T result Subgroup $value $delta
         };
     case hlsl:
@@ -4351,7 +4350,7 @@ spirv_caps_shader_subgroup(1.3)
         __target_intrinsic "subgroupShuffleUp($0, $1)";
     case spirv:
         return spirv_asm {
-            OpCapability GroupNonUniformBallot; 
+            OpCapability GroupNonUniformShuffleRelative;
             OpGroupNonUniformShuffleUp $$macroAnyVec result Subgroup $value $delta
         };
     case hlsl:
@@ -4375,7 +4374,7 @@ spirv_caps_shader_subgroup(1.3)
         __target_intrinsic "subgroupShuffleDown($0, $1)";
     case spirv:
         return spirv_asm {
-            OpCapability GroupNonUniformBallot; 
+            OpCapability GroupNonUniformShuffleRelative;
             OpGroupNonUniformShuffleDown $$macroAnyVec result Subgroup $value $delta
         };
     case hlsl:

diff --git a/tests/glsl-intrinsic/shader-subgroup/shader-subgroup-arithmetic.slang b/tests/glsl-intrinsic/shader-subgroup/shader-subgroup-arithmetic.slang
@@ -1,7 +1,7 @@
 //TEST:SIMPLE(filecheck=CHECK_GLSL):  -allow-glsl -stage compute -entry computeMain -target glsl
 //TEST:SIMPLE(filecheck=CHECK_SPV):  -allow-glsl -stage compute -entry computeMain -target spirv
 //TEST:SIMPLE(filecheck=CHECK_HLSL): -allow-glsl -stage compute -entry computeMain -target hlsl -DTARGET_HLSL
-//TEST:SIMPLE(filecheck=CHECK_CUDA): -allow-glsl -stage compute -entry computeMain -target hlsl -DTARGET_CUDA 
+//TEST:SIMPLE(filecheck=CHECK_CUDA): -allow-glsl -stage compute -entry computeMain -target cuda -DTARGET_CUDA 
 
 // not testing cpp due to missing impl
 //T-EST:SIMPLE(filecheck=CHECK_CPP):  -allow-glsl -stage compute -entry computeMain -target cpp
@@ -11,6 +11,7 @@
 
 #version 430
 
+// breaks on all gpu's tested (Nvidia & Intel) -- GLSL does not work, SPIR-V does work
 //#define TEST_when_glsl_subgroupInclusiveXor_is_not_bugged
 //#define TEST_when_glsl_subgroupInclusiveAdd_is_not_bugged 
 
@@ -142,7 +143,7 @@ bool testArithmetic() {
         && testVArithmetic<float, 2>()
         && testVArithmetic<float, 3>()
         && testVArithmetic<float, 4>()
-        && test1Arithmetic<double>() // will silently hang on intel IGPUS
+        && test1Arithmetic<double>() // WARNING: intel GPU's lack FP64 support
         && testVArithmetic<double, 2>()
         && testVArithmetic<double, 3>()
         && testVArithmetic<double, 4>()

diff --git a/tests/glsl-intrinsic/shader-subgroup/shader-subgroup-ballot.slang b/tests/glsl-intrinsic/shader-subgroup/shader-subgroup-ballot.slang
@@ -1,15 +1,19 @@
 //TEST:SIMPLE(filecheck=CHECK_GLSL):  -allow-glsl -stage compute -entry computeMain -target glsl
 //TEST:SIMPLE(filecheck=CHECK_SPV):  -allow-glsl -stage compute -entry computeMain -target spirv
 //TEST:SIMPLE(filecheck=CHECK_HLSL): -allow-glsl -stage compute -entry computeMain -target hlsl -DTARGET_HLSL
+
 // not testing cuda due to missing impl
-//T-EST:SIMPLE(filecheck=CHECK_CUDA): -allow-glsl -stage compute -entry computeMain -target hlsl -DTARGET_CUDA 
+//T-EST:SIMPLE(filecheck=CHECK_CUDA): -allow-glsl -stage compute -entry computeMain -target cuda -DTARGET_CUDA 
 // not testing cpp due to missing impl
 //T-EST:SIMPLE(filecheck=CHECK_CPP):  -allow-glsl -stage compute -entry computeMain -target cpp
 
 //TEST(compute, vulkan):COMPARE_COMPUTE(filecheck-buffer=BUF):-vk -compute -entry computeMain -allow-glsl
 //TEST(compute, vulkan):COMPARE_COMPUTE(filecheck-buffer=BUF):-vk -compute -entry computeMain -allow-glsl -emit-spirv-directly
 #version 430
 
+// breaks on Nvidia GPU by returning 0 which is trivially wrong (works on Intel Iris Xe)
+//#define TEST_when_glsl_subgroupBallotExclusiveBitCount_is_not_bugged
+
 precision highp float;
 precision highp int;
 
@@ -64,7 +68,7 @@ bool testBroadcastX() {
         && testVBroadcastX<float, 2>()
         && testVBroadcastX<float, 3>()
         && testVBroadcastX<float, 4>()
-        && test1BroadcastX<double>() // will silently hang on intel IGPUS
+        && test1BroadcastX<double>() // WARNING: intel GPU's lack FP64 support
         && testVBroadcastX<double, 2>()
         && testVBroadcastX<double, 3>()
         && testVBroadcastX<double, 4>()
@@ -106,16 +110,22 @@ bool testBallot() {
         && (subgroupBallotBitExtract(uvec4(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF), 0) == true)
         && (subgroupBallotBitCount(uvec4(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF)) == 32)
         && (subgroupBallotInclusiveBitCount(uvec4(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF)) != 0)
+#ifdef TEST_when_glsl_subgroupBallotExclusiveBitCount_is_not_bugged
         && (subgroupBallotExclusiveBitCount(uvec4(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF)) != 0)
+#endif
         && (subgroupBallotFindLSB(uvec4(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF)) == 0)
         && (subgroupBallotFindMSB(uvec4(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF)) == 31)
         ;
 }
 
 void computeMain()
 {
-    outputBuffer.data[0] = testBroadcastX();
-    outputBuffer.data[1] = testBallot();
+    outputBuffer.data[0] = true
+        && testBroadcastX()
+        ;
+    outputBuffer.data[1] = true
+        && testBallot()
+        ;
 
     // CHECK_GLSL: void main(
     // CHECK_SPV: OpEntryPoint

diff --git a/tests/glsl-intrinsic/shader-subgroup/shader-subgroup-basic.slang b/tests/glsl-intrinsic/shader-subgroup/shader-subgroup-basic.slang
@@ -1,8 +1,9 @@
 //TEST:SIMPLE(filecheck=CHECK_GLSL):  -allow-glsl -stage compute -entry computeMain -target glsl
 //TEST:SIMPLE(filecheck=CHECK_SPV):  -allow-glsl -stage compute -entry computeMain -target spirv
 //TEST:SIMPLE(filecheck=CHECK_HLSL): -allow-glsl -stage compute -entry computeMain -target hlsl -DTARGET_HLSL
+
 // not testing cuda due to missing impl
-//T-EST:SIMPLE(filecheck=CHECK_CUDA): -allow-glsl -stage compute -entry computeMain -target hlsl -DTARGET_CUDA 
+//T-EST:SIMPLE(filecheck=CHECK_CUDA): -allow-glsl -stage compute -entry computeMain -target cuda -DTARGET_CUDA 
 // not testing cpp due to missing impl
 //T-EST:SIMPLE(filecheck=CHECK_CPP):  -allow-glsl -stage compute -entry computeMain -target cpp
 

diff --git a/tests/glsl-intrinsic/shader-subgroup/shader-subgroup-builtin-variables.slang b/tests/glsl-intrinsic/shader-subgroup/shader-subgroup-builtin-variables.slang
@@ -1,49 +1,34 @@
 //TEST:SIMPLE(filecheck=CHECK_GLSL):  -allow-glsl -stage compute -entry computeMain -target glsl
 //TEST:SIMPLE(filecheck=CHECK_SPV):  -allow-glsl -stage compute -entry computeMain -target spirv
 //TEST:SIMPLE(filecheck=CHECK_HLSL): -allow-glsl -stage compute -entry computeMain -target hlsl -DTARGET_HLSL
-//TEST:SIMPLE(filecheck=CHECK_CUDA): -allow-glsl -stage compute -entry computeMain -target hlsl -DTARGET_CUDA 
+//TEST:SIMPLE(filecheck=CHECK_CUDA): -allow-glsl -stage compute -entry computeMain -target cuda -DTARGET_CUDA 
 //TEST:SIMPLE(filecheck=CHECK_CPP):  -allow-glsl -stage compute -entry computeMain -target cpp
 
-//TEST(compute, vulkan):COMPARE_COMPUTE(filecheck-buffer=BUF):-vk -compute -entry computeMain -allow-glsl
-//TEST(compute, vulkan):COMPARE_COMPUTE(filecheck-buffer=BUF):-vk -compute -entry computeMain -allow-glsl -emit-spirv-directly
+// currently unused but required to be implemented once gl_NumSubgroups etc.. have functionality implemented
+//T-EST(compute, vulkan):COMPARE_COMPUTE(filecheck-buffer=BUF):-vk -compute -entry computeMain -allow-glsl
+//T-EST(compute, vulkan):COMPARE_COMPUTE(filecheck-buffer=BUF):-vk -compute -entry computeMain -allow-glsl -emit-spirv-directly
 #version 430
 
 precision highp float;
 precision highp int;
 
-//TEST_INPUT:ubuffer(data=[0], stride=4):out,name=outputBuffer
-buffer MyBlockName2 
-{
-    uint data[];
-} outputBuffer;
-
 layout(local_size_x = 32) in;
 
 void computeMain()
 {    
     uint v = gl_NumSubgroups;
-
     v = gl_SubgroupID;
-
     v = gl_SubgroupSize;
-
     v = gl_SubgroupInvocationID;
-
     v = gl_SubgroupEqMask;
-
     v = gl_SubgroupGeMask;
-
     v = gl_SubgroupGtMask;
-
     v = gl_SubgroupLeMask;
-
     v = gl_SubgroupLtMask;
 
-
     // CHECK_GLSL: void main(
     // CHECK_SPV: OpEntryPoint
     // CHECK_HLSL: void computeMain(
     // CHECK_CUDA: void computeMain(
     // CHECK_CPP: void _computeMain(
-    // BUF: 0
 }
diff --git a/tests/glsl-intrinsic/shader-subgroup/shader-subgroup-clustered.slang b/tests/glsl-intrinsic/shader-subgroup/shader-subgroup-clustered.slang
@@ -4,7 +4,7 @@
 // not testing hlsl due to missing impl
 //T-EST:SIMPLE(filecheck=CHECK_HLSL): -allow-glsl -stage compute -entry computeMain -target hlsl -DTARGET_HLSL
 // not testing cuda due to missing impl
-//T-EST:SIMPLE(filecheck=CHECK_CUDA): -allow-glsl -stage compute -entry computeMain -target hlsl -DTARGET_CUDA 
+//T-EST:SIMPLE(filecheck=CHECK_CUDA): -allow-glsl -stage compute -entry computeMain -target cuda -DTARGET_CUDA 
 // not testing cpp due to missing impl
 //T-EST:SIMPLE(filecheck=CHECK_CPP):  -allow-glsl -stage compute -entry computeMain -target cpp
 
@@ -100,7 +100,7 @@ bool testArithmetic() {
         && testVArithmetic<float, 2>()
         && testVArithmetic<float, 3>()
         && testVArithmetic<float, 4>()
-        && test1Arithmetic<double>() // will silently hang on intel IGPUS
+        && test1Arithmetic<double>() // WARNING: intel GPU's lack FP64 support
         && testVArithmetic<double, 2>() 
         && testVArithmetic<double, 3>()
         && testVArithmetic<double, 4>()
@@ -133,8 +133,12 @@ bool testArithmetic() {
 
 void computeMain()
 {
-    outputBuffer.data[0] = testLogical();
-    outputBuffer.data[1] = testArithmetic();
+    outputBuffer.data[0] = true
+        && testLogical()
+        ;
+    outputBuffer.data[1] = true
+        && testArithmetic()
+        ;
 
     // CHECK_GLSL: void main(
     // CHECK_SPV: OpEntryPoint

diff --git a/tests/glsl-intrinsic/shader-subgroup/shader-subgroup-quad.slang b/tests/glsl-intrinsic/shader-subgroup/shader-subgroup-quad.slang
@@ -1,12 +1,9 @@
 //TEST:SIMPLE(filecheck=CHECK_GLSL):  -allow-glsl -stage compute -entry computeMain -target glsl
 //TEST:SIMPLE(filecheck=CHECK_SPV):  -allow-glsl -stage compute -entry computeMain -target spirv
 //TEST:SIMPLE(filecheck=CHECK_HLSL): -allow-glsl -stage compute -entry computeMain -target hlsl -DTARGET_HLSL
+
 // not testing cuda due to missing impl
-//T-EST:SIMPLE(filecheck=CHECK_CUDA): -allow-glsl -stage compute -entry computeMain -target hlsl -DTARGET_CUDA 
-// not testing cpp due to missing impl
-//T-EST:SIMPLE(filecheck=CHECK_CPP):  -allow-glsl -stage compute -entry computeMain -target cpp
-// not testing cuda due to missing impl
-//T-EST:SIMPLE(filecheck=CHECK_CUDA): -allow-glsl -stage compute -entry computeMain -target hlsl -DTARGET_CUDA 
+//T-EST:SIMPLE(filecheck=CHECK_CUDA): -allow-glsl -stage compute -entry computeMain -target cuda -DTARGET_CUDA 
 // not testing cpp due to missing impl
 //T-EST:SIMPLE(filecheck=CHECK_CPP):  -allow-glsl -stage compute -entry computeMain -target cpp
 
@@ -72,7 +69,7 @@ bool testQuadSwapX() {
         && testVQuadX<float, 2>()
         && testVQuadX<float, 3>()
         && testVQuadX<float, 4>()
-        && test1QuadX<double>() // will silently hang on intel IGPUS
+        && test1QuadX<double>() // WARNING: intel GPU's lack FP64 support
         && testVQuadX<double, 2>()
         && testVQuadX<double, 3>()
         && testVQuadX<double, 4>()

diff --git a/tests/glsl-intrinsic/shader-subgroup/shader-subgroup-shuffle-relative.slang b/tests/glsl-intrinsic/shader-subgroup/shader-subgroup-shuffle-relative.slang
@@ -4,7 +4,7 @@
 // not testing hlsl due to missing impl
 //T-EST:SIMPLE(filecheck=CHECK_HLSL): -allow-glsl -stage compute -entry computeMain -target hlsl -DTARGET_HLSL
 // not testing cuda due to missing impl
-//T-EST:SIMPLE(filecheck=CHECK_CUDA): -allow-glsl -stage compute -entry computeMain -target hlsl -DTARGET_CUDA 
+//T-EST:SIMPLE(filecheck=CHECK_CUDA): -allow-glsl -stage compute -entry computeMain -target cuda -DTARGET_CUDA 
 // not testing cpp due to missing impl
 //T-EST:SIMPLE(filecheck=CHECK_CPP):  -allow-glsl -stage compute -entry computeMain -target cpp
 
@@ -61,7 +61,7 @@ bool testShuffleX() {
         && testVShuffleX<float, 2>()
         && testVShuffleX<float, 3>()
         && testVShuffleX<float, 4>()
-        && test1ShuffleX<double>() // will silently hang on intel IGPUS
+        && test1ShuffleX<double>() // WARNING: intel GPU's lack FP64 support
         && testVShuffleX<double, 2>()
         && testVShuffleX<double, 3>()
         && testVShuffleX<double, 4>()

diff --git a/tests/glsl-intrinsic/shader-subgroup/shader-subgroup-shuffle.slang b/tests/glsl-intrinsic/shader-subgroup/shader-subgroup-shuffle.slang
@@ -4,7 +4,7 @@
 // not testing hlsl due to missing impl
 //T-EST:SIMPLE(filecheck=CHECK_HLSL): -allow-glsl -stage compute -entry computeMain -target hlsl -DTARGET_HLSL
 // not testing cuda due to missing impl
-//T-EST:SIMPLE(filecheck=CHECK_CUDA): -allow-glsl -stage compute -entry computeMain -target hlsl -DTARGET_CUDA 
+//T-EST:SIMPLE(filecheck=CHECK_CUDA): -allow-glsl -stage compute -entry computeMain -target cuda -DTARGET_CUDA 
 // not testing cpp due to missing impl
 //T-EST:SIMPLE(filecheck=CHECK_CPP):  -allow-glsl -stage compute -entry computeMain -target cpp
 
@@ -29,7 +29,9 @@ __generic<T : __BuiltinLogicalType>
 bool test1ShuffleX() {
     return true
         && subgroupShuffle(T(1), 1) == T(1)
+#if !defined(TARGET_CUDA) && !defined(TARGET_HLSL)
         && subgroupShuffleXor(T(1), 1) == T(1)
+#endif // #if !defined(TARGET_CUDA) && !defined(TARGET_HLSL)
         ;
 }
 __generic<T : __BuiltinLogicalType, let N : int>
@@ -38,15 +40,19 @@ bool testVShuffleX() {
 
     return true
         && subgroupShuffle(gvec(T(1)), 1) == gvec(T(1))
+#if !defined(TARGET_CUDA) && !defined(TARGET_HLSL)
         && subgroupShuffleXor(gvec(T(1)), 1) == gvec(T(1))
+#endif // #if !defined(TARGET_CUDA) && !defined(TARGET_HLSL)
         ;
 }
 
 __generic<T : __BuiltinFloatingPointType>
 bool test1ShuffleX() {
     return true
         && subgroupShuffle(T(1), 1) == T(1)
+#if !defined(TARGET_CUDA) && !defined(TARGET_HLSL)
         && subgroupShuffleXor(T(1), 1) == T(1)
+#endif // #if !defined(TARGET_CUDA) && !defined(TARGET_HLSL)
         ;
 }
 __generic<T : __BuiltinFloatingPointType, let N : int>
@@ -55,7 +61,9 @@ bool testVShuffleX() {
 
     return true
         && subgroupShuffle(gvec(T(1)), 1) == gvec(T(1))
-        && subgroupShuffleXor(gvec(T(1)), 1) == gvec(T(1))
+#if !defined(TARGET_CUDA) && !defined(TARGET_HLSL)
+           && subgroupShuffleXor(gvec(T(1)), 1) == gvec(T(1))
+#endif // #if !defined(TARGET_CUDA) && !defined(TARGET_HLSL)
         ;
 }
 bool testShuffleX() {
@@ -64,7 +72,7 @@ bool testShuffleX() {
         && testVShuffleX<float, 2>()
         && testVShuffleX<float, 3>()
         && testVShuffleX<float, 4>()
-        && test1ShuffleX<double>() // will silently hang on intel IGPUS
+        && test1ShuffleX<double>() // WARNING: intel GPU's lack FP64 support
         && testVShuffleX<double, 2>()
         && testVShuffleX<double, 3>()
         && testVShuffleX<double, 4>()

diff --git a/tests/glsl-intrinsic/shader-subgroup/shader-subgroup-vote.slang b/tests/glsl-intrinsic/shader-subgroup/shader-subgroup-vote.slang
@@ -3,7 +3,7 @@
 //TEST:SIMPLE(filecheck=CHECK_HLSL): -allow-glsl -stage compute -entry computeMain -target hlsl -DTARGET_HLSL
 
 // not testing cuda due to missing impl
-//T-EST:SIMPLE(filecheck=CHECK_CUDA): -allow-glsl -stage compute -entry computeMain -target hlsl -DTARGET_CUDA 
+//T-EST:SIMPLE(filecheck=CHECK_CUDA): -allow-glsl -stage compute -entry computeMain -target cuda -DTARGET_CUDA 
 // not testing cpp due to missing impl
 //T-EST:SIMPLE(filecheck=CHECK_CPP):  -allow-glsl -stage compute -entry computeMain -target cpp
 
@@ -67,7 +67,7 @@ bool testAllEqual() {
         && testVAllEqual<float, 2>()
         && testVAllEqual<float, 3>()
         && testVAllEqual<float, 4>()
-        && test1AllEqual<double>() // will silently hang on intel IGPUS
+        && test1AllEqual<double>() // WARNING: intel GPU's lack FP64 support
         && testVAllEqual<double, 2>()
         && testVAllEqual<double, 3>()
         && testVAllEqual<double, 4>()