Skip to content

Commit

Permalink
fixing some vk validation layer errors (OpMemoryBarrier, Shuffle oper…
Browse files Browse the repository at this point in the history
…ations)

modified style of tests; removed redundancy (extra code that does nothing); fixed some incorrect run targets; added error reasons for all encountered problems (and if needed, a #define/#if toggle)
  • Loading branch information
ArielG-NV committed Feb 13, 2024
1 parent c4e1937 commit 9c7f2fa
Show file tree
Hide file tree
Showing 10 changed files with 59 additions and 54 deletions.
21 changes: 10 additions & 11 deletions source/slang/glsl.meta.slang
Original file line number Diff line number Diff line change
Expand Up @@ -2903,9 +2903,7 @@ spirv_caps_shader_subgroup(1.3)
__intrinsic_asm "subgroupBarrier()";
case spirv:
spirv_asm {
OpControlBarrier Subgroup Subgroup AcquireRelease|SubgroupMemory|ImageMemory|UniformMemory
};
spirv_asm {
OpCapability Shader;
OpControlBarrier Subgroup Subgroup AcquireRelease|SubgroupMemory|ImageMemory|UniformMemory
};
case cpp:
Expand All @@ -2928,9 +2926,7 @@ spirv_caps_shader_subgroup(1.3)
__intrinsic_asm "subgroupMemoryBarrier()";
case spirv:
spirv_asm {
OpMemoryBarrier Subgroup AcquireRelease|SubgroupMemory|ImageMemory|UniformMemory
};
spirv_asm {
OpCapability Shader;
OpMemoryBarrier Subgroup AcquireRelease|SubgroupMemory|ImageMemory|UniformMemory
};
case cpp:
Expand All @@ -2953,6 +2949,7 @@ spirv_caps_shader_subgroup(1.3)
__intrinsic_asm "subgroupMemoryBarrierBuffer()";
case spirv:
spirv_asm {
OpCapability Shader;
OpMemoryBarrier Subgroup AcquireRelease|UniformMemory
};
case cpp:
Expand Down Expand Up @@ -2997,7 +2994,9 @@ spirv_caps_shader_subgroup(1.3)
__intrinsic_asm "subgroupMemoryBarrierShared()";
case spirv:
spirv_asm {
OpMemoryBarrier Subgroup AcquireRelease|SubgroupMemory
// SubgroupMemory triggers vulkan validation layer error;
// WorkgroupMemory is the next level of granularity
OpMemoryBarrier Subgroup AcquireRelease|WorkgroupMemory
};
case cpp:
// TODO: cpp
Expand Down Expand Up @@ -4302,7 +4301,7 @@ spirv_caps_shader_subgroup(1.3)
__target_intrinsic "subgroupShuffleUp($0, $1)";
case spirv:
return spirv_asm {
OpCapability GroupNonUniformBallot;
OpCapability GroupNonUniformShuffleRelative;
OpGroupNonUniformShuffleUp $$T result Subgroup $value $delta
};
case hlsl:
Expand All @@ -4326,7 +4325,7 @@ spirv_caps_shader_subgroup(1.3)
__target_intrinsic "subgroupShuffleDown($0, $1)";
case spirv:
return spirv_asm {
OpCapability GroupNonUniformBallot;
OpCapability GroupNonUniformShuffleRelative;
OpGroupNonUniformShuffleDown $$T result Subgroup $value $delta
};
case hlsl:
Expand All @@ -4351,7 +4350,7 @@ spirv_caps_shader_subgroup(1.3)
__target_intrinsic "subgroupShuffleUp($0, $1)";
case spirv:
return spirv_asm {
OpCapability GroupNonUniformBallot;
OpCapability GroupNonUniformShuffleRelative;
OpGroupNonUniformShuffleUp $$macroAnyVec result Subgroup $value $delta
};
case hlsl:
Expand All @@ -4375,7 +4374,7 @@ spirv_caps_shader_subgroup(1.3)
__target_intrinsic "subgroupShuffleDown($0, $1)";
case spirv:
return spirv_asm {
OpCapability GroupNonUniformBallot;
OpCapability GroupNonUniformShuffleRelative;
OpGroupNonUniformShuffleDown $$macroAnyVec result Subgroup $value $delta
};
case hlsl:
Expand Down
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
//TEST:SIMPLE(filecheck=CHECK_GLSL): -allow-glsl -stage compute -entry computeMain -target glsl
//TEST:SIMPLE(filecheck=CHECK_SPV): -allow-glsl -stage compute -entry computeMain -target spirv
//TEST:SIMPLE(filecheck=CHECK_HLSL): -allow-glsl -stage compute -entry computeMain -target hlsl -DTARGET_HLSL
//TEST:SIMPLE(filecheck=CHECK_CUDA): -allow-glsl -stage compute -entry computeMain -target hlsl -DTARGET_CUDA
//TEST:SIMPLE(filecheck=CHECK_CUDA): -allow-glsl -stage compute -entry computeMain -target cuda -DTARGET_CUDA

// not testing cpp due to missing impl
//T-EST:SIMPLE(filecheck=CHECK_CPP): -allow-glsl -stage compute -entry computeMain -target cpp
Expand All @@ -11,6 +11,7 @@

#version 430

// breaks on all gpu's tested (Nvidia & Intel) -- GLSL does not work, SPIR-V does work
//#define TEST_when_glsl_subgroupInclusiveXor_is_not_bugged
//#define TEST_when_glsl_subgroupInclusiveAdd_is_not_bugged

Expand Down Expand Up @@ -142,7 +143,7 @@ bool testArithmetic() {
&& testVArithmetic<float, 2>()
&& testVArithmetic<float, 3>()
&& testVArithmetic<float, 4>()
&& test1Arithmetic<double>() // will silently hang on intel IGPUS
&& test1Arithmetic<double>() // WARNING: intel GPU's lack FP64 support
&& testVArithmetic<double, 2>()
&& testVArithmetic<double, 3>()
&& testVArithmetic<double, 4>()
Expand Down
18 changes: 14 additions & 4 deletions tests/glsl-intrinsic/shader-subgroup/shader-subgroup-ballot.slang
Original file line number Diff line number Diff line change
@@ -1,15 +1,19 @@
//TEST:SIMPLE(filecheck=CHECK_GLSL): -allow-glsl -stage compute -entry computeMain -target glsl
//TEST:SIMPLE(filecheck=CHECK_SPV): -allow-glsl -stage compute -entry computeMain -target spirv
//TEST:SIMPLE(filecheck=CHECK_HLSL): -allow-glsl -stage compute -entry computeMain -target hlsl -DTARGET_HLSL

// not testing cuda due to missing impl
//T-EST:SIMPLE(filecheck=CHECK_CUDA): -allow-glsl -stage compute -entry computeMain -target hlsl -DTARGET_CUDA
//T-EST:SIMPLE(filecheck=CHECK_CUDA): -allow-glsl -stage compute -entry computeMain -target cuda -DTARGET_CUDA
// not testing cpp due to missing impl
//T-EST:SIMPLE(filecheck=CHECK_CPP): -allow-glsl -stage compute -entry computeMain -target cpp

//TEST(compute, vulkan):COMPARE_COMPUTE(filecheck-buffer=BUF):-vk -compute -entry computeMain -allow-glsl
//TEST(compute, vulkan):COMPARE_COMPUTE(filecheck-buffer=BUF):-vk -compute -entry computeMain -allow-glsl -emit-spirv-directly
#version 430

// breaks on Nvidia GPU by returning 0 which is trivially wrong (works on Intel Iris Xe)
//#define TEST_when_glsl_subgroupBallotExclusiveBitCount_is_not_bugged

precision highp float;
precision highp int;

Expand Down Expand Up @@ -64,7 +68,7 @@ bool testBroadcastX() {
&& testVBroadcastX<float, 2>()
&& testVBroadcastX<float, 3>()
&& testVBroadcastX<float, 4>()
&& test1BroadcastX<double>() // will silently hang on intel IGPUS
&& test1BroadcastX<double>() // WARNING: intel GPU's lack FP64 support
&& testVBroadcastX<double, 2>()
&& testVBroadcastX<double, 3>()
&& testVBroadcastX<double, 4>()
Expand Down Expand Up @@ -106,16 +110,22 @@ bool testBallot() {
&& (subgroupBallotBitExtract(uvec4(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF), 0) == true)
&& (subgroupBallotBitCount(uvec4(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF)) == 32)
&& (subgroupBallotInclusiveBitCount(uvec4(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF)) != 0)
#ifdef TEST_when_glsl_subgroupBallotExclusiveBitCount_is_not_bugged
&& (subgroupBallotExclusiveBitCount(uvec4(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF)) != 0)
#endif
&& (subgroupBallotFindLSB(uvec4(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF)) == 0)
&& (subgroupBallotFindMSB(uvec4(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF)) == 31)
;
}

void computeMain()
{
outputBuffer.data[0] = testBroadcastX();
outputBuffer.data[1] = testBallot();
outputBuffer.data[0] = true
&& testBroadcastX()
;
outputBuffer.data[1] = true
&& testBallot()
;

// CHECK_GLSL: void main(
// CHECK_SPV: OpEntryPoint
Expand Down
Original file line number Diff line number Diff line change
@@ -1,8 +1,9 @@
//TEST:SIMPLE(filecheck=CHECK_GLSL): -allow-glsl -stage compute -entry computeMain -target glsl
//TEST:SIMPLE(filecheck=CHECK_SPV): -allow-glsl -stage compute -entry computeMain -target spirv
//TEST:SIMPLE(filecheck=CHECK_HLSL): -allow-glsl -stage compute -entry computeMain -target hlsl -DTARGET_HLSL

// not testing cuda due to missing impl
//T-EST:SIMPLE(filecheck=CHECK_CUDA): -allow-glsl -stage compute -entry computeMain -target hlsl -DTARGET_CUDA
//T-EST:SIMPLE(filecheck=CHECK_CUDA): -allow-glsl -stage compute -entry computeMain -target cuda -DTARGET_CUDA
// not testing cpp due to missing impl
//T-EST:SIMPLE(filecheck=CHECK_CPP): -allow-glsl -stage compute -entry computeMain -target cpp

Expand Down
Original file line number Diff line number Diff line change
@@ -1,49 +1,34 @@
//TEST:SIMPLE(filecheck=CHECK_GLSL): -allow-glsl -stage compute -entry computeMain -target glsl
//TEST:SIMPLE(filecheck=CHECK_SPV): -allow-glsl -stage compute -entry computeMain -target spirv
//TEST:SIMPLE(filecheck=CHECK_HLSL): -allow-glsl -stage compute -entry computeMain -target hlsl -DTARGET_HLSL
//TEST:SIMPLE(filecheck=CHECK_CUDA): -allow-glsl -stage compute -entry computeMain -target hlsl -DTARGET_CUDA
//TEST:SIMPLE(filecheck=CHECK_CUDA): -allow-glsl -stage compute -entry computeMain -target cuda -DTARGET_CUDA
//TEST:SIMPLE(filecheck=CHECK_CPP): -allow-glsl -stage compute -entry computeMain -target cpp

//TEST(compute, vulkan):COMPARE_COMPUTE(filecheck-buffer=BUF):-vk -compute -entry computeMain -allow-glsl
//TEST(compute, vulkan):COMPARE_COMPUTE(filecheck-buffer=BUF):-vk -compute -entry computeMain -allow-glsl -emit-spirv-directly
// currently unused but required to be implemented once gl_NumSubgroups etc.. have functionality implemented
//T-EST(compute, vulkan):COMPARE_COMPUTE(filecheck-buffer=BUF):-vk -compute -entry computeMain -allow-glsl
//T-EST(compute, vulkan):COMPARE_COMPUTE(filecheck-buffer=BUF):-vk -compute -entry computeMain -allow-glsl -emit-spirv-directly
#version 430

precision highp float;
precision highp int;

//TEST_INPUT:ubuffer(data=[0], stride=4):out,name=outputBuffer
buffer MyBlockName2
{
uint data[];
} outputBuffer;

layout(local_size_x = 32) in;

void computeMain()
{
uint v = gl_NumSubgroups;

v = gl_SubgroupID;

v = gl_SubgroupSize;

v = gl_SubgroupInvocationID;

v = gl_SubgroupEqMask;

v = gl_SubgroupGeMask;

v = gl_SubgroupGtMask;

v = gl_SubgroupLeMask;

v = gl_SubgroupLtMask;


// CHECK_GLSL: void main(
// CHECK_SPV: OpEntryPoint
// CHECK_HLSL: void computeMain(
// CHECK_CUDA: void computeMain(
// CHECK_CPP: void _computeMain(
// BUF: 0
}
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
// not testing hlsl due to missing impl
//T-EST:SIMPLE(filecheck=CHECK_HLSL): -allow-glsl -stage compute -entry computeMain -target hlsl -DTARGET_HLSL
// not testing cuda due to missing impl
//T-EST:SIMPLE(filecheck=CHECK_CUDA): -allow-glsl -stage compute -entry computeMain -target hlsl -DTARGET_CUDA
//T-EST:SIMPLE(filecheck=CHECK_CUDA): -allow-glsl -stage compute -entry computeMain -target cuda -DTARGET_CUDA
// not testing cpp due to missing impl
//T-EST:SIMPLE(filecheck=CHECK_CPP): -allow-glsl -stage compute -entry computeMain -target cpp

Expand Down Expand Up @@ -100,7 +100,7 @@ bool testArithmetic() {
&& testVArithmetic<float, 2>()
&& testVArithmetic<float, 3>()
&& testVArithmetic<float, 4>()
&& test1Arithmetic<double>() // will silently hang on intel IGPUS
&& test1Arithmetic<double>() // WARNING: intel GPU's lack FP64 support
&& testVArithmetic<double, 2>()
&& testVArithmetic<double, 3>()
&& testVArithmetic<double, 4>()
Expand Down Expand Up @@ -133,8 +133,12 @@ bool testArithmetic() {

void computeMain()
{
outputBuffer.data[0] = testLogical();
outputBuffer.data[1] = testArithmetic();
outputBuffer.data[0] = true
&& testLogical()
;
outputBuffer.data[1] = true
&& testArithmetic()
;

// CHECK_GLSL: void main(
// CHECK_SPV: OpEntryPoint
Expand Down
Original file line number Diff line number Diff line change
@@ -1,12 +1,9 @@
//TEST:SIMPLE(filecheck=CHECK_GLSL): -allow-glsl -stage compute -entry computeMain -target glsl
//TEST:SIMPLE(filecheck=CHECK_SPV): -allow-glsl -stage compute -entry computeMain -target spirv
//TEST:SIMPLE(filecheck=CHECK_HLSL): -allow-glsl -stage compute -entry computeMain -target hlsl -DTARGET_HLSL

// not testing cuda due to missing impl
//T-EST:SIMPLE(filecheck=CHECK_CUDA): -allow-glsl -stage compute -entry computeMain -target hlsl -DTARGET_CUDA
// not testing cpp due to missing impl
//T-EST:SIMPLE(filecheck=CHECK_CPP): -allow-glsl -stage compute -entry computeMain -target cpp
// not testing cuda due to missing impl
//T-EST:SIMPLE(filecheck=CHECK_CUDA): -allow-glsl -stage compute -entry computeMain -target hlsl -DTARGET_CUDA
//T-EST:SIMPLE(filecheck=CHECK_CUDA): -allow-glsl -stage compute -entry computeMain -target cuda -DTARGET_CUDA
// not testing cpp due to missing impl
//T-EST:SIMPLE(filecheck=CHECK_CPP): -allow-glsl -stage compute -entry computeMain -target cpp

Expand Down Expand Up @@ -72,7 +69,7 @@ bool testQuadSwapX() {
&& testVQuadX<float, 2>()
&& testVQuadX<float, 3>()
&& testVQuadX<float, 4>()
&& test1QuadX<double>() // will silently hang on intel IGPUS
&& test1QuadX<double>() // WARNING: intel GPU's lack FP64 support
&& testVQuadX<double, 2>()
&& testVQuadX<double, 3>()
&& testVQuadX<double, 4>()
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
// not testing hlsl due to missing impl
//T-EST:SIMPLE(filecheck=CHECK_HLSL): -allow-glsl -stage compute -entry computeMain -target hlsl -DTARGET_HLSL
// not testing cuda due to missing impl
//T-EST:SIMPLE(filecheck=CHECK_CUDA): -allow-glsl -stage compute -entry computeMain -target hlsl -DTARGET_CUDA
//T-EST:SIMPLE(filecheck=CHECK_CUDA): -allow-glsl -stage compute -entry computeMain -target cuda -DTARGET_CUDA
// not testing cpp due to missing impl
//T-EST:SIMPLE(filecheck=CHECK_CPP): -allow-glsl -stage compute -entry computeMain -target cpp

Expand Down Expand Up @@ -61,7 +61,7 @@ bool testShuffleX() {
&& testVShuffleX<float, 2>()
&& testVShuffleX<float, 3>()
&& testVShuffleX<float, 4>()
&& test1ShuffleX<double>() // will silently hang on intel IGPUS
&& test1ShuffleX<double>() // WARNING: intel GPU's lack FP64 support
&& testVShuffleX<double, 2>()
&& testVShuffleX<double, 3>()
&& testVShuffleX<double, 4>()
Expand Down
14 changes: 11 additions & 3 deletions tests/glsl-intrinsic/shader-subgroup/shader-subgroup-shuffle.slang
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
// not testing hlsl due to missing impl
//T-EST:SIMPLE(filecheck=CHECK_HLSL): -allow-glsl -stage compute -entry computeMain -target hlsl -DTARGET_HLSL
// not testing cuda due to missing impl
//T-EST:SIMPLE(filecheck=CHECK_CUDA): -allow-glsl -stage compute -entry computeMain -target hlsl -DTARGET_CUDA
//T-EST:SIMPLE(filecheck=CHECK_CUDA): -allow-glsl -stage compute -entry computeMain -target cuda -DTARGET_CUDA
// not testing cpp due to missing impl
//T-EST:SIMPLE(filecheck=CHECK_CPP): -allow-glsl -stage compute -entry computeMain -target cpp

Expand All @@ -29,7 +29,9 @@ __generic<T : __BuiltinLogicalType>
bool test1ShuffleX() {
return true
&& subgroupShuffle(T(1), 1) == T(1)
#if !defined(TARGET_CUDA) && !defined(TARGET_HLSL)
&& subgroupShuffleXor(T(1), 1) == T(1)
#endif // #if !defined(TARGET_CUDA) && !defined(TARGET_HLSL)
;
}
__generic<T : __BuiltinLogicalType, let N : int>
Expand All @@ -38,15 +40,19 @@ bool testVShuffleX() {

return true
&& subgroupShuffle(gvec(T(1)), 1) == gvec(T(1))
#if !defined(TARGET_CUDA) && !defined(TARGET_HLSL)
&& subgroupShuffleXor(gvec(T(1)), 1) == gvec(T(1))
#endif // #if !defined(TARGET_CUDA) && !defined(TARGET_HLSL)
;
}

__generic<T : __BuiltinFloatingPointType>
bool test1ShuffleX() {
return true
&& subgroupShuffle(T(1), 1) == T(1)
#if !defined(TARGET_CUDA) && !defined(TARGET_HLSL)
&& subgroupShuffleXor(T(1), 1) == T(1)
#endif // #if !defined(TARGET_CUDA) && !defined(TARGET_HLSL)
;
}
__generic<T : __BuiltinFloatingPointType, let N : int>
Expand All @@ -55,7 +61,9 @@ bool testVShuffleX() {

return true
&& subgroupShuffle(gvec(T(1)), 1) == gvec(T(1))
&& subgroupShuffleXor(gvec(T(1)), 1) == gvec(T(1))
#if !defined(TARGET_CUDA) && !defined(TARGET_HLSL)
&& subgroupShuffleXor(gvec(T(1)), 1) == gvec(T(1))
#endif // #if !defined(TARGET_CUDA) && !defined(TARGET_HLSL)
;
}
bool testShuffleX() {
Expand All @@ -64,7 +72,7 @@ bool testShuffleX() {
&& testVShuffleX<float, 2>()
&& testVShuffleX<float, 3>()
&& testVShuffleX<float, 4>()
&& test1ShuffleX<double>() // will silently hang on intel IGPUS
&& test1ShuffleX<double>() // WARNING: intel GPU's lack FP64 support
&& testVShuffleX<double, 2>()
&& testVShuffleX<double, 3>()
&& testVShuffleX<double, 4>()
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
//TEST:SIMPLE(filecheck=CHECK_HLSL): -allow-glsl -stage compute -entry computeMain -target hlsl -DTARGET_HLSL

// not testing cuda due to missing impl
//T-EST:SIMPLE(filecheck=CHECK_CUDA): -allow-glsl -stage compute -entry computeMain -target hlsl -DTARGET_CUDA
//T-EST:SIMPLE(filecheck=CHECK_CUDA): -allow-glsl -stage compute -entry computeMain -target cuda -DTARGET_CUDA
// not testing cpp due to missing impl
//T-EST:SIMPLE(filecheck=CHECK_CPP): -allow-glsl -stage compute -entry computeMain -target cpp

Expand Down Expand Up @@ -67,7 +67,7 @@ bool testAllEqual() {
&& testVAllEqual<float, 2>()
&& testVAllEqual<float, 3>()
&& testVAllEqual<float, 4>()
&& test1AllEqual<double>() // will silently hang on intel IGPUS
&& test1AllEqual<double>() // WARNING: intel GPU's lack FP64 support
&& testVAllEqual<double, 2>()
&& testVAllEqual<double, 3>()
&& testVAllEqual<double, 4>()
Expand Down

0 comments on commit 9c7f2fa

Please sign in to comment.