From 7f6d946c4bcddf1aca604e16ee6d0c0108e2d2b1 Mon Sep 17 00:00:00 2001 From: Chris Bieneman Date: Wed, 21 Jun 2023 01:59:32 +0000 Subject: [PATCH] Merged PR 5704: Add SM 6.8 preview features to release-preview-1.8.2306 Add Shader Model 6.8 features in preview state: Work Graphs and Wave Matrix. This code is considered preview ready, with more work planned before retail shaders can be compiled to Shader Model 6.8. --- docs/DXIL.rst | 40 +- include/dxc/DXIL/DXIL.h | 1 + include/dxc/DXIL/DxilConstants.h | 184 +- include/dxc/DXIL/DxilFunctionProps.h | 36 +- include/dxc/DXIL/DxilInstructions.h | 847 +++++++ include/dxc/DXIL/DxilMetadataHelper.h | 38 + include/dxc/DXIL/DxilNodeProps.h | 145 ++ include/dxc/DXIL/DxilOperations.h | 13 + include/dxc/DXIL/DxilSemantic.h | 1 - include/dxc/DXIL/DxilShaderFlags.h | 9 +- include/dxc/DXIL/DxilShaderModel.h | 8 +- include/dxc/DXIL/DxilTypeSystem.h | 6 + include/dxc/DXIL/DxilUtil.h | 24 +- include/dxc/DXIL/DxilWaveMatrix.h | 53 + .../DxilContainer/DxilContainerAssembler.h | 3 + .../DxilPipelineStateValidation.h | 1 + .../dxc/DxilContainer/DxilRuntimeReflection.h | 139 +- .../DxilContainer/DxilRuntimeReflection.inl | 270 -- .../dxc/DxilContainer/RDAT_LibraryTypes.inl | 316 ++- include/dxc/DxilContainer/RDAT_Macros.inl | 11 + include/dxc/HLSL/HLModule.h | 1 + include/dxc/HLSL/HLOperationLower.h | 5 +- include/dxc/HLSL/HLOperations.h | 53 +- include/dxc/HlslIntrinsicOp.h | 22 + include/dxc/Support/ErrorCodes.h | 3 + include/dxc/Support/HLSLOptions.h | 7 + include/dxc/Support/HLSLOptions.td | 4 +- include/dxc/dxcapi.internal.h | 26 +- include/llvm/InitializePasses.h | 1 + include/llvm/Transforms/Scalar.h | 5 + lib/DXIL/CMakeLists.txt | 2 + lib/DXIL/DxilMetadataHelper.cpp | 543 +++- lib/DXIL/DxilModule.cpp | 14 +- lib/DXIL/DxilNodeProps.cpp | 97 + lib/DXIL/DxilOperations.cpp | 227 +- lib/DXIL/DxilResourceProperties.cpp | 2 +- lib/DXIL/DxilShaderFlags.cpp | 21 + lib/DXIL/DxilShaderModel.cpp | 177 +- lib/DXIL/DxilTypeSystem.cpp | 7 + lib/DXIL/DxilUtil.cpp | 274 +- lib/DXIL/DxilWaveMatrix.cpp | 79 + lib/DxcSupport/HLSLOptions.cpp | 15 + lib/DxilContainer/DxilContainerAssembler.cpp | 487 +++- lib/HLSL/DxilContainerReflection.cpp | 155 +- lib/HLSL/DxilGenerationPass.cpp | 251 +- lib/HLSL/DxilValidation.cpp | 402 ++- lib/HLSL/HLLowerUDT.cpp | 146 +- lib/HLSL/HLModule.cpp | 3 + lib/HLSL/HLOperationLower.cpp | 636 ++++- lib/HLSL/HLOperations.cpp | 66 +- lib/HLSL/HLSignatureLower.cpp | 14 +- lib/HLSL/HLSignatureLower.h | 2 +- lib/Transforms/IPO/PassManagerBuilder.cpp | 3 + lib/Transforms/Scalar/LowerTypePasses.cpp | 139 ++ .../Scalar/ScalarReplAggregatesHLSL.cpp | 4 +- .../clang/include/clang/AST/BuiltinTypes.def | 12 +- tools/clang/include/clang/AST/HlslTypes.h | 42 +- tools/clang/include/clang/Basic/Attr.td | 83 +- .../clang/Basic/DiagnosticSemaKinds.td | 18 + tools/clang/lib/AST/ASTContextHLSL.cpp | 257 +- tools/clang/lib/AST/HlslTypes.cpp | 135 +- tools/clang/lib/CodeGen/CGDebugInfo.cpp | 3 +- tools/clang/lib/CodeGen/CGHLSLMS.cpp | 677 ++++- .../lib/CodeGen/CGHLSLMSFinishCodeGen.cpp | 416 +++- tools/clang/lib/CodeGen/CGHLSLMSHelper.h | 13 +- tools/clang/lib/Parse/ParseDecl.cpp | 12 + tools/clang/lib/Sema/SemaExpr.cpp | 72 +- tools/clang/lib/Sema/SemaHLSL.cpp | 1215 ++++++++- tools/clang/lib/Sema/SemaOverload.cpp | 8 + .../compute_node_compatibility.hlsl | 36 + .../node_input_compatibility.hlsl | 27 + .../scalar-assignments-exact-precision.hlsl | 149 +- tools/clang/test/HLSL/scalar-assignments.hlsl | 493 +++- tools/clang/test/HLSL/work-graphs.hlsl | 204 ++ .../HLSL/workgraph/dispatchgrid_diags.hlsl | 59 + .../workgraph/member_write_diagnostics.hlsl | 59 + .../workgraph/node_compute_compatibility.hlsl | 61 + .../workgraph/node_input_compatibility.hlsl | 89 + .../rwnodeinputrecord_sv_dispatchgrid.hlsl | 49 + .../d3dreflect/amp-groupshared.hlsl | 23 +- .../d3dreflect/cbuf-usage-lib.hlsl | 52 +- .../d3dreflect/comp-groupshared.hlsl | 43 +- .../d3dreflect/empty_broadcasting_nodes.hlsl | 307 +++ .../d3dreflect/empty_thread_nodes.hlsl | 337 +++ .../d3dreflect/lib_exports1.hlsl | 56 +- .../d3dreflect/lib_exports2.hlsl | 60 +- .../d3dreflect/lib_exports3.hlsl | 80 +- .../d3dreflect/lib_hs_export2.hlsl | 129 +- .../d3dreflect/mesh-groupshared.hlsl | 137 +- .../intrinsics/atomic/atomic_restypes.hlsl | 30 +- .../WaveMatrix/WaveMatrix_Add-limited.hlsl | 56 + .../objects/WaveMatrix/WaveMatrix_Depth.hlsl | 51 + .../WaveMatrix/WaveMatrix_Fill-acc.hlsl | 54 + .../WaveMatrix/WaveMatrix_Fill-in.hlsl | 49 + .../WaveMatrix/WaveMatrix_LoadStore-acc.hlsl | 112 + .../WaveMatrix/WaveMatrix_LoadStore-in.hlsl | 92 + .../WaveMatrix_Multiply-Add-acc.hlsl | 81 + .../WaveMatrix/WaveMatrix_ScalarOps-acc.hlsl | 83 + .../WaveMatrix_SumAccumulate-acc.hlsl | 64 + .../hlsl/workgraph/ast-nodeinput.hlsl | 101 + .../hlsl/workgraph/ast-nodeoutput.hlsl | 150 ++ .../hlsl/workgraph/ast-rwnodeinput.hlsl | 139 ++ .../hlsl/workgraph/both_compute_and_node.hlsl | 68 + .../called_function_arg_nodeoutput.hlsl | 36 + .../called_function_arg_record_object.hlsl | 27 + .../case001_dispatchgrid_shader.hlsl | 87 + ...ase006_broadcasting_numthreads_shader.hlsl | 65 + .../case007_broadcasting_numthreads_none.hlsl | 16 + .../case008_coalescing_numthreads_shader.hlsl | 54 + .../case009_coalescing_numthreads_none.hlsl | 16 + .../case010_thread_numthreads_shader.hlsl | 19 + .../case011_thread_numthreads_none.hlsl | 38 + ...012_thread_numthreads_wrongdimensions.hlsl | 21 + .../workgraph/case013_numthreads_1030.hlsl | 19 + ...case014_getrecordcount_nodeinputarray.hlsl | 85 + .../hlsl/workgraph/case017_renamed_node.hlsl | 47 + .../workgraph/case018_renamed_node_index.hlsl | 48 + .../case028_groupincrementoutputcount.hlsl | 59 + .../case029_threadincrementoutputcount.hlsl | 47 + .../case030_outputcomplete_nodeoutput.hlsl | 69 + ..._finishedcrossgroupsharing_coalescing.hlsl | 22 + ...e035_finishedcrossgroupsharing_thread.hlsl | 22 + .../case037_finishedcrossgroupsharing.hlsl | 94 + .../case038_trackrwinputsharing_missing.hlsl | 24 + .../hlsl/workgraph/case051_compute_attrs.hlsl | 43 + .../workgraph/case052_nodelaunch_invalid.hlsl | 20 + ...e055_nodearray_indices_not_contiguous.hlsl | 133 + .../case058_coalescing_dispatchgrid.hlsl | 18 + .../case059_thread_dispatchgrid.hlsl | 17 + .../case061_coalescing_maxdispatchgrid.hlsl | 24 + .../case062_thread_maxdispatchgrid.hlsl | 24 + .../case067_maxrecursiondepth_toolarge.hlsl | 37 + .../hlsl/workgraph/case070_noinput.hlsl | 69 + .../case085_thread_emptynodeinput.hlsl | 54 + ...putrecords_maxoutputrecordssharedwith.hlsl | 28 + ...se099_nodelocalrootargumenttableindex.hlsl | 40 + .../case114_multiple_svdispatchgrid.hlsl | 28 + .../workgraph/case116_barrier_compute.hlsl | 21 + .../workgraph/case117_barrier_memoryarg.hlsl | 83 + .../workgraph/case118_barrier_objectarg.hlsl | 243 ++ .../hlsl/workgraph/case119_member_read.hlsl | 69 + .../workgraph/case124_member_read_types.hlsl | 110 + .../case127_outputcomplete_errors.hlsl | 121 + .../case129_nodeoutputisvalid_nodeoutput.hlsl | 69 + ...131_nodeoutputisvalid_emptynodeoutput.hlsl | 60 + .../case133_getremainingrecursionlevels.hlsl | 47 + ...case150_multiple_getnoderoutputrecord.hlsl | 52 + .../hlsl/workgraph/groupshared.hlsl | 23 + .../hlsl/workgraph/groupshared_barrier.hlsl | 20 + .../implicit_record_dispatchgrid.hlsl | 36 + .../hlsl/workgraph/invalid_barrier_1.ll | 53 + .../hlsl/workgraph/invalid_barrier_12.ll | 54 + .../hlsl/workgraph/invalid_barrier_123.ll | 55 + .../hlsl/workgraph/invalid_barrier_13.ll | 54 + .../hlsl/workgraph/invalid_barrier_2.ll | 53 + .../hlsl/workgraph/invalid_barrier_23.ll | 54 + .../hlsl/workgraph/invalid_barrier_3.ll | 53 + .../workgraph/invalid_barrier_nodehandle.ll | 125 + .../hlsl/workgraph/member_atomics.hlsl | 84 + .../hlsl/workgraph/member_matrix_write.hlsl | 137 + .../missing_node_attribute_error_msg.hlsl | 23 + .../hlsl/workgraph/nodeoutputarray.hlsl | 101 + .../hlsl/workgraph/nodeshareinputof.hlsl | 53 + ...nodeinputrecord_sv_dispatchgrid_array.hlsl | 27 + .../test_increment_output_count.hlsl | 18 + .../hlsl/workgraph/wavesize.hlsl | 44 + .../workgraph/zero_sized_node_record.hlsl | 25 + .../passes/hl/sroa_hlsl/NodeInput_type.ll | 194 ++ .../library/shader_cv_mismatch.hlsl | 10 + .../library/shader_mp_mismatch.hlsl | 9 + .../library/shader_nvp_mismatch.hlsl | 10 + .../shader_targets/nodes/NodeOutput.hlsl | 62 + .../nodes/max_output_records_duplicate1.hlsl | 34 + .../nodes/max_output_records_duplicate2.hlsl | 33 + .../nodes/max_output_records_invalidref.hlsl | 33 + .../nodes/max_output_records_shared_with.hlsl | 36 + .../validation/completed-handle-all.ll | 121 + .../completed-handle-used-complete-inblock.ll | 94 + .../completed-handle-used-complete.ll | 98 + .../completed-handle-used-getptr-inblock.ll | 89 + .../completed-handle-used-getptr.ll | 98 + .../validation/val-node-numthreads.hlsl | 13 + .../validation/val-node-numthreads.ll | 43 + .../validation/verify_output_complete.hlsl | 52 + .../tools/dxcompiler/dxcdisassembler.cpp | 2 +- .../clang/tools/dxcompiler/dxcompilerobj.cpp | 198 +- tools/clang/tools/dxcompiler/dxcutil.cpp | 32 +- tools/clang/tools/dxcompiler/dxcutil.h | 16 +- .../tools/dxrfallbackcompiler/dxcutil.cpp | 2 +- .../clang/tools/libclang/dxcrewriteunused.cpp | 6 +- .../unittests/HLSL/DxilContainerTest.cpp | 183 +- tools/clang/unittests/HLSL/ValidationTest.cpp | 168 +- tools/clang/unittests/HLSL/VerifierTest.cpp | 31 + .../unittests/HLSLExec/ExecutionTest.cpp | 2204 ++++++++++++++++- .../unittests/HLSLExec/ShaderOpArith.xml | 65 + .../unittests/HLSLExec/ShaderOpArithTable.xml | 617 +++++ utils/hct/VerifierHelper.py | 2 + utils/hct/gen_intrin_main.txt | 165 ++ utils/hct/hctdb.py | 340 ++- utils/hct/hctdb_instrhelp.py | 34 +- 200 files changed, 19616 insertions(+), 1476 deletions(-) create mode 100644 include/dxc/DXIL/DxilNodeProps.h create mode 100644 include/dxc/DXIL/DxilWaveMatrix.h create mode 100644 lib/DXIL/DxilNodeProps.cpp create mode 100644 lib/DXIL/DxilWaveMatrix.cpp create mode 100644 tools/clang/test/DXILValidation/compute_node_compatibility.hlsl create mode 100644 tools/clang/test/DXILValidation/node_input_compatibility.hlsl create mode 100644 tools/clang/test/HLSL/work-graphs.hlsl create mode 100644 tools/clang/test/HLSL/workgraph/dispatchgrid_diags.hlsl create mode 100644 tools/clang/test/HLSL/workgraph/member_write_diagnostics.hlsl create mode 100644 tools/clang/test/HLSL/workgraph/node_compute_compatibility.hlsl create mode 100644 tools/clang/test/HLSL/workgraph/node_input_compatibility.hlsl create mode 100644 tools/clang/test/HLSL/workgraph/rwnodeinputrecord_sv_dispatchgrid.hlsl create mode 100644 tools/clang/test/HLSLFileCheck/d3dreflect/empty_broadcasting_nodes.hlsl create mode 100644 tools/clang/test/HLSLFileCheck/d3dreflect/empty_thread_nodes.hlsl create mode 100644 tools/clang/test/HLSLFileCheck/hlsl/objects/WaveMatrix/WaveMatrix_Add-limited.hlsl create mode 100644 tools/clang/test/HLSLFileCheck/hlsl/objects/WaveMatrix/WaveMatrix_Depth.hlsl create mode 100644 tools/clang/test/HLSLFileCheck/hlsl/objects/WaveMatrix/WaveMatrix_Fill-acc.hlsl create mode 100644 tools/clang/test/HLSLFileCheck/hlsl/objects/WaveMatrix/WaveMatrix_Fill-in.hlsl create mode 100644 tools/clang/test/HLSLFileCheck/hlsl/objects/WaveMatrix/WaveMatrix_LoadStore-acc.hlsl create mode 100644 tools/clang/test/HLSLFileCheck/hlsl/objects/WaveMatrix/WaveMatrix_LoadStore-in.hlsl create mode 100644 tools/clang/test/HLSLFileCheck/hlsl/objects/WaveMatrix/WaveMatrix_Multiply-Add-acc.hlsl create mode 100644 tools/clang/test/HLSLFileCheck/hlsl/objects/WaveMatrix/WaveMatrix_ScalarOps-acc.hlsl create mode 100644 tools/clang/test/HLSLFileCheck/hlsl/objects/WaveMatrix/WaveMatrix_SumAccumulate-acc.hlsl create mode 100644 tools/clang/test/HLSLFileCheck/hlsl/workgraph/ast-nodeinput.hlsl create mode 100644 tools/clang/test/HLSLFileCheck/hlsl/workgraph/ast-nodeoutput.hlsl create mode 100644 tools/clang/test/HLSLFileCheck/hlsl/workgraph/ast-rwnodeinput.hlsl create mode 100644 tools/clang/test/HLSLFileCheck/hlsl/workgraph/both_compute_and_node.hlsl create mode 100644 tools/clang/test/HLSLFileCheck/hlsl/workgraph/called_function_arg_nodeoutput.hlsl create mode 100644 tools/clang/test/HLSLFileCheck/hlsl/workgraph/called_function_arg_record_object.hlsl create mode 100644 tools/clang/test/HLSLFileCheck/hlsl/workgraph/case001_dispatchgrid_shader.hlsl create mode 100644 tools/clang/test/HLSLFileCheck/hlsl/workgraph/case006_broadcasting_numthreads_shader.hlsl create mode 100644 tools/clang/test/HLSLFileCheck/hlsl/workgraph/case007_broadcasting_numthreads_none.hlsl create mode 100644 tools/clang/test/HLSLFileCheck/hlsl/workgraph/case008_coalescing_numthreads_shader.hlsl create mode 100644 tools/clang/test/HLSLFileCheck/hlsl/workgraph/case009_coalescing_numthreads_none.hlsl create mode 100644 tools/clang/test/HLSLFileCheck/hlsl/workgraph/case010_thread_numthreads_shader.hlsl create mode 100644 tools/clang/test/HLSLFileCheck/hlsl/workgraph/case011_thread_numthreads_none.hlsl create mode 100644 tools/clang/test/HLSLFileCheck/hlsl/workgraph/case012_thread_numthreads_wrongdimensions.hlsl create mode 100644 tools/clang/test/HLSLFileCheck/hlsl/workgraph/case013_numthreads_1030.hlsl create mode 100644 tools/clang/test/HLSLFileCheck/hlsl/workgraph/case014_getrecordcount_nodeinputarray.hlsl create mode 100644 tools/clang/test/HLSLFileCheck/hlsl/workgraph/case017_renamed_node.hlsl create mode 100644 tools/clang/test/HLSLFileCheck/hlsl/workgraph/case018_renamed_node_index.hlsl create mode 100644 tools/clang/test/HLSLFileCheck/hlsl/workgraph/case028_groupincrementoutputcount.hlsl create mode 100644 tools/clang/test/HLSLFileCheck/hlsl/workgraph/case029_threadincrementoutputcount.hlsl create mode 100644 tools/clang/test/HLSLFileCheck/hlsl/workgraph/case030_outputcomplete_nodeoutput.hlsl create mode 100644 tools/clang/test/HLSLFileCheck/hlsl/workgraph/case034_finishedcrossgroupsharing_coalescing.hlsl create mode 100644 tools/clang/test/HLSLFileCheck/hlsl/workgraph/case035_finishedcrossgroupsharing_thread.hlsl create mode 100644 tools/clang/test/HLSLFileCheck/hlsl/workgraph/case037_finishedcrossgroupsharing.hlsl create mode 100644 tools/clang/test/HLSLFileCheck/hlsl/workgraph/case038_trackrwinputsharing_missing.hlsl create mode 100644 tools/clang/test/HLSLFileCheck/hlsl/workgraph/case051_compute_attrs.hlsl create mode 100644 tools/clang/test/HLSLFileCheck/hlsl/workgraph/case052_nodelaunch_invalid.hlsl create mode 100644 tools/clang/test/HLSLFileCheck/hlsl/workgraph/case055_nodearray_indices_not_contiguous.hlsl create mode 100644 tools/clang/test/HLSLFileCheck/hlsl/workgraph/case058_coalescing_dispatchgrid.hlsl create mode 100644 tools/clang/test/HLSLFileCheck/hlsl/workgraph/case059_thread_dispatchgrid.hlsl create mode 100644 tools/clang/test/HLSLFileCheck/hlsl/workgraph/case061_coalescing_maxdispatchgrid.hlsl create mode 100644 tools/clang/test/HLSLFileCheck/hlsl/workgraph/case062_thread_maxdispatchgrid.hlsl create mode 100644 tools/clang/test/HLSLFileCheck/hlsl/workgraph/case067_maxrecursiondepth_toolarge.hlsl create mode 100644 tools/clang/test/HLSLFileCheck/hlsl/workgraph/case070_noinput.hlsl create mode 100644 tools/clang/test/HLSLFileCheck/hlsl/workgraph/case085_thread_emptynodeinput.hlsl create mode 100644 tools/clang/test/HLSLFileCheck/hlsl/workgraph/case092_maxoutputrecords_maxoutputrecordssharedwith.hlsl create mode 100644 tools/clang/test/HLSLFileCheck/hlsl/workgraph/case099_nodelocalrootargumenttableindex.hlsl create mode 100644 tools/clang/test/HLSLFileCheck/hlsl/workgraph/case114_multiple_svdispatchgrid.hlsl create mode 100644 tools/clang/test/HLSLFileCheck/hlsl/workgraph/case116_barrier_compute.hlsl create mode 100644 tools/clang/test/HLSLFileCheck/hlsl/workgraph/case117_barrier_memoryarg.hlsl create mode 100644 tools/clang/test/HLSLFileCheck/hlsl/workgraph/case118_barrier_objectarg.hlsl create mode 100644 tools/clang/test/HLSLFileCheck/hlsl/workgraph/case119_member_read.hlsl create mode 100644 tools/clang/test/HLSLFileCheck/hlsl/workgraph/case124_member_read_types.hlsl create mode 100644 tools/clang/test/HLSLFileCheck/hlsl/workgraph/case127_outputcomplete_errors.hlsl create mode 100644 tools/clang/test/HLSLFileCheck/hlsl/workgraph/case129_nodeoutputisvalid_nodeoutput.hlsl create mode 100644 tools/clang/test/HLSLFileCheck/hlsl/workgraph/case131_nodeoutputisvalid_emptynodeoutput.hlsl create mode 100644 tools/clang/test/HLSLFileCheck/hlsl/workgraph/case133_getremainingrecursionlevels.hlsl create mode 100644 tools/clang/test/HLSLFileCheck/hlsl/workgraph/case150_multiple_getnoderoutputrecord.hlsl create mode 100644 tools/clang/test/HLSLFileCheck/hlsl/workgraph/groupshared.hlsl create mode 100644 tools/clang/test/HLSLFileCheck/hlsl/workgraph/groupshared_barrier.hlsl create mode 100644 tools/clang/test/HLSLFileCheck/hlsl/workgraph/implicit_record_dispatchgrid.hlsl create mode 100644 tools/clang/test/HLSLFileCheck/hlsl/workgraph/invalid_barrier_1.ll create mode 100644 tools/clang/test/HLSLFileCheck/hlsl/workgraph/invalid_barrier_12.ll create mode 100644 tools/clang/test/HLSLFileCheck/hlsl/workgraph/invalid_barrier_123.ll create mode 100644 tools/clang/test/HLSLFileCheck/hlsl/workgraph/invalid_barrier_13.ll create mode 100644 tools/clang/test/HLSLFileCheck/hlsl/workgraph/invalid_barrier_2.ll create mode 100644 tools/clang/test/HLSLFileCheck/hlsl/workgraph/invalid_barrier_23.ll create mode 100644 tools/clang/test/HLSLFileCheck/hlsl/workgraph/invalid_barrier_3.ll create mode 100644 tools/clang/test/HLSLFileCheck/hlsl/workgraph/invalid_barrier_nodehandle.ll create mode 100644 tools/clang/test/HLSLFileCheck/hlsl/workgraph/member_atomics.hlsl create mode 100644 tools/clang/test/HLSLFileCheck/hlsl/workgraph/member_matrix_write.hlsl create mode 100644 tools/clang/test/HLSLFileCheck/hlsl/workgraph/missing_node_attribute_error_msg.hlsl create mode 100644 tools/clang/test/HLSLFileCheck/hlsl/workgraph/nodeoutputarray.hlsl create mode 100644 tools/clang/test/HLSLFileCheck/hlsl/workgraph/nodeshareinputof.hlsl create mode 100644 tools/clang/test/HLSLFileCheck/hlsl/workgraph/rwnodeinputrecord_sv_dispatchgrid_array.hlsl create mode 100644 tools/clang/test/HLSLFileCheck/hlsl/workgraph/test_increment_output_count.hlsl create mode 100644 tools/clang/test/HLSLFileCheck/hlsl/workgraph/wavesize.hlsl create mode 100644 tools/clang/test/HLSLFileCheck/hlsl/workgraph/zero_sized_node_record.hlsl create mode 100644 tools/clang/test/HLSLFileCheck/passes/hl/sroa_hlsl/NodeInput_type.ll create mode 100644 tools/clang/test/HLSLFileCheck/shader_targets/library/shader_cv_mismatch.hlsl create mode 100644 tools/clang/test/HLSLFileCheck/shader_targets/library/shader_mp_mismatch.hlsl create mode 100644 tools/clang/test/HLSLFileCheck/shader_targets/library/shader_nvp_mismatch.hlsl create mode 100644 tools/clang/test/HLSLFileCheck/shader_targets/nodes/NodeOutput.hlsl create mode 100644 tools/clang/test/HLSLFileCheck/shader_targets/nodes/max_output_records_duplicate1.hlsl create mode 100644 tools/clang/test/HLSLFileCheck/shader_targets/nodes/max_output_records_duplicate2.hlsl create mode 100644 tools/clang/test/HLSLFileCheck/shader_targets/nodes/max_output_records_invalidref.hlsl create mode 100644 tools/clang/test/HLSLFileCheck/shader_targets/nodes/max_output_records_shared_with.hlsl create mode 100644 tools/clang/test/HLSLFileCheck/validation/completed-handle-all.ll create mode 100644 tools/clang/test/HLSLFileCheck/validation/completed-handle-used-complete-inblock.ll create mode 100644 tools/clang/test/HLSLFileCheck/validation/completed-handle-used-complete.ll create mode 100644 tools/clang/test/HLSLFileCheck/validation/completed-handle-used-getptr-inblock.ll create mode 100644 tools/clang/test/HLSLFileCheck/validation/completed-handle-used-getptr.ll create mode 100644 tools/clang/test/HLSLFileCheck/validation/val-node-numthreads.hlsl create mode 100644 tools/clang/test/HLSLFileCheck/validation/val-node-numthreads.ll create mode 100644 tools/clang/test/HLSLFileCheck/validation/verify_output_complete.hlsl diff --git a/docs/DXIL.rst b/docs/DXIL.rst index 5b1e957617..777132b4f5 100644 --- a/docs/DXIL.rst +++ b/docs/DXIL.rst @@ -2322,6 +2322,34 @@ ID Name Description 223 TextureGatherRaw Gather raw elements from 4 texels with no type conversions (SRV type is constrained) 224 SampleCmpLevel samples a texture and compares a single component against the specified comparison value 225 TextureStoreSample stores texel data at specified sample index +226 WaveMatrix_Annotate Annotate a wave matrix pointer with the type information +227 WaveMatrix_Depth Returns depth (K) value for matrix of specified type +228 WaveMatrix_Fill Fill wave matrix with scalar value +229 WaveMatrix_LoadRawBuf Load wave matrix from raw buffer +230 WaveMatrix_LoadGroupShared Load wave matrix from group shared array +231 WaveMatrix_StoreRawBuf Store wave matrix to raw buffer +232 WaveMatrix_StoreGroupShared Store wave matrix to group shared array +233 WaveMatrix_Multiply Mutiply left and right wave matrix and store in accumulator +234 WaveMatrix_MultiplyAccumulate Mutiply left and right wave matrix and accumulate into accumulator +235 WaveMatrix_ScalarOp Perform scalar operation on each element of wave matrix +236 WaveMatrix_SumAccumulate Sum rows or columns of an input matrix into an existing accumulator fragment matrix +237 WaveMatrix_Add Element-wise accumulate, or broadcast add of fragment into accumulator +238 AllocateNodeOutputRecords returns a handle for the output records +239 GetNodeRecordPtr retrieve node input/output record pointer in address space 6 +240 IncrementOutputCount Select the next logical output count for an EmptyNodeOutput for the whole group or per thread. +241 OutputComplete indicates all outputs for a given records are complete +242 GetInputRecordCount returns the number of records that have been coalesced into the current thread group +243 FinishedCrossGroupSharing returns true if the current thread group is the last to access the input +244 BarrierByMemoryType Request a barrier for a set of memory types and/or thread group execution sync +245 BarrierByMemoryHandle Request a barrier for just the memory used by the specified object +246 BarrierByNodeRecordHandle Request a barrier for just the memory used by the node record +247 CreateNodeOutputHandle Creates a handle to a NodeOutput +248 IndexNodeHandle returns the handle for the location in the output node array at the indicated index +249 AnnotateNodeHandle annotate handle with node properties +250 CreateNodeInputRecordHandle create a handle for an InputRecord +251 AnnotateNodeRecordHandle annotate handle with node record properties +252 NodeOutputIsValid returns true if the specified output node is present in the work graph +253 GetRemainingRecursionLevels returns how many levels of recursion remain === ===================================================== ======================================================================================================================================================================================================================= @@ -2984,14 +3012,19 @@ DECL.EXTRAARGS Extra arguments not allowed for shader DECL.FNATTRIBUTE Functions should only contain known function attributes DECL.FNFLATTENPARAM Function parameters must not use struct types DECL.FNISCALLED Functions can only be used by call instructions +DECL.MULTIPLENODEINPUTS A node shader may not have more than one input record +DECL.NODELAUNCHINPUTTYPE Invalid input record type for node launch type DECL.NOTUSEDEXTERNAL External declaration should not be used DECL.PARAMSTRUCT Callable function parameter must be struct type DECL.PAYLOADSTRUCT Payload parameter must be struct type +DECL.RAYQYERYINFNSIG Rayquery objects not allowed in function signatures DECL.RESOURCEINFNSIG Resources not allowed in function signatures DECL.SHADERMISSINGARG payload/params/attributes parameter is required for certain shader types DECL.SHADERRETURNVOID Shader functions must return void DECL.USEDEXTERNALFUNCTION External function must be used DECL.USEDINTERNAL Internal declaration must be used +FLOW.COMPUTENODEIO Node with input or outputs is not compatible with Compute +FLOW.COMPUTENODELAUNCHTYPE Node launch type is not compatible with Compute FLOW.DEADLOOP Loop must have break. FLOW.FUNCTIONCALL Function with parameter is not permitted FLOW.NORECUSION Recursion is not permitted. @@ -2999,11 +3032,13 @@ FLOW.REDUCIBLE Execution flow must be reducible. INSTR.ALLOWED Instructions must be of an allowed type. INSTR.ATOMICCONST Constant destination to atomic. INSTR.ATOMICINTRINNONUAV Non-UAV destination to atomic intrinsic. -INSTR.ATOMICOPNONGROUPSHARED Non-groupshared destination to atomic operation. +INSTR.ATOMICOPNONGROUPSHAREDORRECORD Non-groupshared or node record destination to atomic operation. INSTR.ATTRIBUTEATVERTEXNOINTERPOLATION Attribute %0 must have nointerpolation mode in order to use GetAttributeAtVertex function. -INSTR.BARRIERMODEFORNONCS sync in a non-Compute/Amplification/Mesh Shader must only sync UAV (sync_uglobal). +INSTR.BARRIERFLAGINVALID Invalid %0 flags on DXIL operation '%1' +INSTR.BARRIERMODEFORNONCS sync in a non-Compute/Amplification/Mesh/Node Shader must only sync UAV (sync_uglobal). INSTR.BARRIERMODENOMEMORY sync must include some form of memory barrier - _u (UAV) and/or _g (Thread Group Shared Memory). Only _t (thread group sync) is optional. INSTR.BARRIERMODEUSELESSUGROUP sync can't specify both _ugroup and _uglobal. If both are needed, just specify _uglobal. +INSTR.BARRIERNONCONSTANTFLAGARGUMENT Memory type, access, or sync flag is not constant INSTR.BUFFERUPDATECOUNTERONRESHASCOUNTER BufferUpdateCounter valid only when HasCounter is true. INSTR.BUFFERUPDATECOUNTERONUAV BufferUpdateCounter valid only on UAV. INSTR.CALLOLOAD Call to DXIL intrinsic must match overload signature @@ -3029,6 +3064,7 @@ INSTR.MIPONUAVLOAD uav load don't support mipLevel/sample INSTR.MISSINGSETMESHOUTPUTCOUNTS Missing SetMeshOutputCounts call. INSTR.MULTIPLEGETMESHPAYLOAD GetMeshPayload cannot be called multiple times. INSTR.MULTIPLESETMESHOUTPUTCOUNTS SetMeshOUtputCounts cannot be called multiple times. +INSTR.NODERECORDHANDLEUSEAFTERCOMPLETE Invalid use of completed record handle. INSTR.NOGENERICPTRADDRSPACECAST Address space cast between pointer types must have one part to be generic address space. INSTR.NOIDIVBYZERO No signed integer division by zero. INSTR.NOINDEFINITEACOS No indefinite arccosine. diff --git a/include/dxc/DXIL/DXIL.h b/include/dxc/DXIL/DXIL.h index ae849f7885..2a32debf22 100644 --- a/include/dxc/DXIL/DXIL.h +++ b/include/dxc/DXIL/DXIL.h @@ -22,3 +22,4 @@ #include "dxc/DXIL/DxilSampler.h" #include "dxc/DXIL/DxilOperations.h" #include "dxc/DXIL/DxilModule.h" +#include "dxc/DXIL/DxilNodeProps.h" diff --git a/include/dxc/DXIL/DxilConstants.h b/include/dxc/DXIL/DxilConstants.h index f97545703a..7d440431c6 100644 --- a/include/dxc/DXIL/DxilConstants.h +++ b/include/dxc/DXIL/DxilConstants.h @@ -29,7 +29,7 @@ namespace DXIL { const unsigned kDxilMajor = 1; /* hctdb_instrhelp.get_dxil_version_minor()*/ // VALRULE-TEXT:BEGIN - const unsigned kDxilMinor = 7; + const unsigned kDxilMinor = 8; // VALRULE-TEXT:END inline unsigned MakeDxilVersion(unsigned DxilMajor, unsigned DxilMinor) { @@ -169,6 +169,7 @@ namespace DXIL { Callable, Mesh, Amplification, + Node, Invalid, }; @@ -437,12 +438,20 @@ namespace DXIL { BitcastI32toF32 = 126, // bitcast between different sizes BitcastI64toF64 = 128, // bitcast between different sizes - // Compute/Mesh/Amplification shader + // Compute/Mesh/Amplification/Node shader FlattenedThreadIdInGroup = 96, // provides a flattened index for a given thread within a given group (SV_GroupIndex) GroupId = 94, // reads the group ID (SV_GroupID) ThreadId = 93, // reads the thread ID ThreadIdInGroup = 95, // reads the thread ID within the group (SV_GroupThreadID) + // Create/Annotate Node Handles + AllocateNodeOutputRecords = 238, // returns a handle for the output records + AnnotateNodeHandle = 249, // annotate handle with node properties + AnnotateNodeRecordHandle = 251, // annotate handle with node record properties + CreateNodeInputRecordHandle = 250, // create a handle for an InputRecord + CreateNodeOutputHandle = 247, // Creates a handle to a NodeOutput + IndexNodeHandle = 248, // returns the handle for the location in the output node array at the indicated index + // Derivatives CalculateLOD = 81, // calculates the level of detail DerivCoarseX = 83, // computes the rate of change per stamp in x direction. @@ -480,6 +489,9 @@ namespace DXIL { EmitThenCutStream = 99, // equivalent to an EmitStream followed by a CutStream GSInstanceID = 100, // GSInstanceID + // Get Pointer to Node Record in Address Space 6 + GetNodeRecordPtr = 239, // retrieve node input/output record pointer in address space 6 + // Get handle from heap AnnotateHandle = 216, // annotate handle with resource properties CreateHandleFromBinding = 217, // create resource handle from binding @@ -656,6 +668,9 @@ namespace DXIL { AtomicBinOp = 78, // performs an atomic operation on two operands AtomicCompareExchange = 79, // atomic compare and exchange to memory Barrier = 80, // inserts a memory barrier in the shader + BarrierByMemoryHandle = 245, // Request a barrier for just the memory used by the specified object + BarrierByMemoryType = 244, // Request a barrier for a set of memory types and/or thread group execution sync + BarrierByNodeRecordHandle = 246, // Request a barrier for just the memory used by the node record // Temporary, indexable, input, output registers LoadInput = 4, // Loads the value from shader input @@ -737,6 +752,28 @@ namespace DXIL { WaveReadLaneAt = 117, // returns the value from the specified lane WaveReadLaneFirst = 118, // returns the value from the first lane + // WaveMatrix + WaveMatrix_Add = 237, // Element-wise accumulate, or broadcast add of fragment into accumulator + WaveMatrix_Annotate = 226, // Annotate a wave matrix pointer with the type information + WaveMatrix_Depth = 227, // Returns depth (K) value for matrix of specified type + WaveMatrix_Fill = 228, // Fill wave matrix with scalar value + WaveMatrix_LoadGroupShared = 230, // Load wave matrix from group shared array + WaveMatrix_LoadRawBuf = 229, // Load wave matrix from raw buffer + WaveMatrix_Multiply = 233, // Mutiply left and right wave matrix and store in accumulator + WaveMatrix_MultiplyAccumulate = 234, // Mutiply left and right wave matrix and accumulate into accumulator + WaveMatrix_ScalarOp = 235, // Perform scalar operation on each element of wave matrix + WaveMatrix_StoreGroupShared = 232, // Store wave matrix to group shared array + WaveMatrix_StoreRawBuf = 231, // Store wave matrix to raw buffer + WaveMatrix_SumAccumulate = 236, // Sum rows or columns of an input matrix into an existing accumulator fragment matrix + + // Work Graph intrinsics + FinishedCrossGroupSharing = 243, // returns true if the current thread group is the last to access the input + GetInputRecordCount = 242, // returns the number of records that have been coalesced into the current thread group + GetRemainingRecursionLevels = 253, // returns how many levels of recursion remain + IncrementOutputCount = 240, // Select the next logical output count for an EmptyNodeOutput for the whole group or per thread. + NodeOutputIsValid = 252, // returns true if the specified output node is present in the work graph + OutputComplete = 241, // indicates all outputs for a given records are complete + NumOpCodes_Dxil_1_0 = 137, NumOpCodes_Dxil_1_1 = 139, NumOpCodes_Dxil_1_2 = 141, @@ -746,7 +783,7 @@ namespace DXIL { NumOpCodes_Dxil_1_6 = 222, NumOpCodes_Dxil_1_7 = 226, - NumOpCodes = 226 // exclusive last value of enumeration + NumOpCodes = 254 // exclusive last value of enumeration }; // OPCODE-ENUM:END @@ -778,12 +815,20 @@ namespace DXIL { BitcastI32toF32, BitcastI64toF64, - // Compute/Mesh/Amplification shader + // Compute/Mesh/Amplification/Node shader FlattenedThreadIdInGroup, GroupId, ThreadId, ThreadIdInGroup, + // Create/Annotate Node Handles + AllocateNodeOutputRecords, + AnnotateNodeHandle, + AnnotateNodeRecordHandle, + CreateNodeInputRecordHandle, + IndexNodeHandle, + createNodeOutputHandle, + // Derivatives CalculateLOD, Unary, @@ -817,6 +862,9 @@ namespace DXIL { EmitThenCutStream, GSInstanceID, + // Get Pointer to Node Record in Address Space 6 + GetNodeRecordPtr, + // Get handle from heap AnnotateHandle, CreateHandleFromBinding, @@ -968,6 +1016,9 @@ namespace DXIL { AtomicBinOp, AtomicCompareExchange, Barrier, + BarrierByMemoryHandle, + BarrierByMemoryType, + BarrierByNodeRecordHandle, // Temporary, indexable, input, output registers LoadInput, @@ -1007,6 +1058,26 @@ namespace DXIL { WaveReadLaneAt, WaveReadLaneFirst, + // WaveMatrix + WaveMatrix_Accumulate, + WaveMatrix_Annotate, + WaveMatrix_Depth, + WaveMatrix_Fill, + WaveMatrix_LoadGroupShared, + WaveMatrix_LoadRawBuf, + WaveMatrix_Multiply, + WaveMatrix_ScalarOp, + WaveMatrix_StoreGroupShared, + WaveMatrix_StoreRawBuf, + + // Work Graph intrinsics + FinishedCrossGroupSharing, + GetInputRecordCount, + GetRemainingRecursionLevels, + IncrementOutputCount, + NodeOutputIsValid, + OutputComplete, + NumOpClasses_Dxil_1_0 = 93, NumOpClasses_Dxil_1_1 = 95, NumOpClasses_Dxil_1_2 = 97, @@ -1016,7 +1087,7 @@ namespace DXIL { NumOpClasses_Dxil_1_6 = 149, NumOpClasses_Dxil_1_7 = 153, - NumOpClasses = 153 // exclusive last value of enumeration + NumOpClasses = 179 // exclusive last value of enumeration }; // OPCODECLASS-ENUM:END @@ -1206,6 +1277,7 @@ namespace DXIL { const unsigned kTGSMAddrSpace = 3; const unsigned kGenericPointerAddrSpace = 4; const unsigned kImmediateCBufferAddrSpace = 5; + const unsigned kNodeRecordAddrSpace = 6; // Input primitive, must match D3D_PRIMITIVE enum class InputPrimitive : unsigned { @@ -1308,6 +1380,59 @@ namespace DXIL { LastEntry, }; + enum class NodeLaunchType { + Invalid = 0, + Broadcasting, + Coalescing, + Thread, + + LastEntry + }; + + enum class NodeIOFlags : uint32_t { + None = 0x0, + Input = 0x1, + Output = 0x2, + ReadWrite = 0x4, + EmptyRecord = 0x8, // EmptyNodeOutput[Array], EmptyNodeInput + NodeArray = 0x10, // NodeOutputArray, EmptyNodeOutputArray + + // Record granularity (enum in 2 bits) + ThreadRecord = 0x20, // [RW]ThreadNodeInputRecord, ThreadNodeOutputRecords + GroupRecord = 0x40, // [RW]GroupNodeInputRecord, GroupNodeOutputRecords + DispatchRecord = 0x60, // [RW]DispatchNodeInputRecord + RecordGranularityMask = 0x60, + + NodeIOKindMask = 0x7F, + + TrackRWInputSharing = 0x100, // TrackRWInputSharing tracked on all non-empty input/output record/node types + + // Mask for node/record properties beyond NodeIOKind + RecordFlagsMask = 0x100, + NodeFlagsMask = 0x100, + }; + + enum class NodeIOKind : uint32_t { + Invalid = 0, + + EmptyInput = (uint32_t)NodeIOFlags::EmptyRecord | (uint32_t)NodeIOFlags::Input, + NodeOutput = (uint32_t)NodeIOFlags::ReadWrite | (uint32_t)NodeIOFlags::Output, + NodeOutputArray = (uint32_t)NodeIOFlags::ReadWrite | (uint32_t)NodeIOFlags::Output | (uint32_t)NodeIOFlags::NodeArray, + EmptyOutput = (uint32_t)NodeIOFlags::EmptyRecord | (uint32_t)NodeIOFlags::Output, + EmptyOutputArray = (uint32_t)NodeIOFlags::EmptyRecord | (uint32_t)NodeIOFlags::Output | (uint32_t)NodeIOFlags::NodeArray, + + DispatchNodeInputRecord = (uint32_t)NodeIOFlags::Input | (uint32_t)NodeIOFlags::DispatchRecord, + GroupNodeInputRecords = (uint32_t)NodeIOFlags::Input | (uint32_t)NodeIOFlags::GroupRecord, + ThreadNodeInputRecord = (uint32_t)NodeIOFlags::Input | (uint32_t)NodeIOFlags::ThreadRecord, + + RWDispatchNodeInputRecord = (uint32_t)NodeIOFlags::ReadWrite | (uint32_t)NodeIOFlags::Input | (uint32_t)NodeIOFlags::DispatchRecord, + RWGroupNodeInputRecords = (uint32_t)NodeIOFlags::ReadWrite | (uint32_t)NodeIOFlags::Input | (uint32_t)NodeIOFlags::GroupRecord, + RWThreadNodeInputRecord = (uint32_t)NodeIOFlags::ReadWrite | (uint32_t)NodeIOFlags::Input | (uint32_t)NodeIOFlags::ThreadRecord, + + GroupNodeOutputRecords = (uint32_t)NodeIOFlags::ReadWrite | (uint32_t)NodeIOFlags::Output | (uint32_t)NodeIOFlags::GroupRecord, + ThreadNodeOutputRecords = (uint32_t)NodeIOFlags::ReadWrite | (uint32_t)NodeIOFlags::Output | (uint32_t)NodeIOFlags::ThreadRecord, + }; + // Kind of quad-level operation enum class QuadOpKind { ReadAcrossX = 0, // returns the value from the other lane in the quad in the horizontal direction @@ -1434,6 +1559,51 @@ namespace DXIL { LastEntry = 2 }; + enum class WaveMatrixKind : uint8_t { + Left = 0, + Right = 1, + LeftColAcc = 2, + RightRowAcc = 3, + Accumulator = 4, + NumKinds = 5, + MaskSide = 1, + MaskClass = 6, // 0 = Left/Right, 2 = Fragment, 4 = Accumulator + }; + + /* hctdb_instrhelp.get_enum_decl("WaveMatrixScalarOpCode")*/ + // WAVEMATRIXSCALAROPCODE-ENUM:BEGIN + // Operation for WaveMatrix_ScalarOp + enum class WaveMatrixScalarOpCode : unsigned { + Add = 0, + Divide = 3, + Invalid = 4, + Multiply = 2, + Subtract = 1, + }; + // WAVEMATRIXSCALAROPCODE-ENUM:END + + // Corresponds to MEMORY_TYPE_FLAG enums in HLSL + enum class MemoryTypeFlag : uint32_t { + UavMemory = 0x00000001, + GroupSharedMemory = 0x00000002, + NodeInputMemory = 0x00000004, + NodeOutputMemory = 0x00000008, + ValidMask = 0x0000000F + }; + + // Corresponds to ACCESS_FLAG enums in HLSL + enum class AccessFlag : uint32_t { + DeviceVisible = 0x00000001, // implies group visible (smaller scope) + GroupVisible = 0x00000002, + ValidMask = 0x00000003 + }; + + // Corresponds to SYNC_FLAG enum in HLSL + enum class SyncFlag : uint32_t { + GroupSync = 0x00000001, + ValidMask = 0x00000001 + }; + // Constant for Container. const uint8_t DxilProgramSigMaskX = 1; const uint8_t DxilProgramSigMaskY = 2; @@ -1479,6 +1649,10 @@ namespace DXIL { const uint64_t ShaderFeatureInfo_AdvancedTextureOps = 0x20000000; const uint64_t ShaderFeatureInfo_WriteableMSAATextures = 0x40000000; + // SM 6.8+ + // WaveMMA slots in between two SM 6.6 feature bits. + const uint64_t ShaderFeatureInfo_WaveMMA = 0x8000000; + const unsigned ShaderFeatureInfoCount = 31; // DxilSubobjectType must match D3D12_STATE_SUBOBJECT_TYPE, with diff --git a/include/dxc/DXIL/DxilFunctionProps.h b/include/dxc/DXIL/DxilFunctionProps.h index 690d2d01e0..2521794c25 100644 --- a/include/dxc/DXIL/DxilFunctionProps.h +++ b/include/dxc/DXIL/DxilFunctionProps.h @@ -16,6 +16,8 @@ #include #include "dxc/DXIL/DxilConstants.h" +#include "dxc/DXIL/DxilNodeProps.h" +#include "llvm/ADT/StringRef.h" namespace llvm { class Function; @@ -27,13 +29,14 @@ struct DxilFunctionProps { DxilFunctionProps() { memset(&ShaderProps, 0, sizeof(ShaderProps)); shaderKind = DXIL::ShaderKind::Invalid; + NodeShaderID = {}; + NodeShaderSharedInput = {}; + memset(&Node, 0, sizeof(Node)); + Node.LaunchType = DXIL::NodeLaunchType::Invalid; + Node.LocalRootArgumentsTableIndex = -1; waveSize = 0; } union { - // Compute shader. - struct { - unsigned numThreads[3]; - } CS; // Geometry shader. struct { DXIL::InputPrimitive inputPrimitive; @@ -75,7 +78,6 @@ struct DxilFunctionProps { } Ray; // Mesh shader. struct { - unsigned numThreads[3]; unsigned maxVertexCount; unsigned maxPrimitiveCount; DXIL::MeshOutputTopology outputTopology; @@ -83,11 +85,28 @@ struct DxilFunctionProps { } MS; // Amplification shader. struct { - unsigned numThreads[3]; unsigned payloadSizeInBytes; } AS; } ShaderProps; + + // numThreads shared between multiple shader types and node shaders. + unsigned numThreads[3]; + + struct NodeProps { + DXIL::NodeLaunchType LaunchType = DXIL::NodeLaunchType::Invalid; + bool IsProgramEntry; + int LocalRootArgumentsTableIndex; + unsigned DispatchGrid[3]; + unsigned MaxDispatchGrid[3]; + unsigned MaxRecursionDepth; + } Node; + DXIL::ShaderKind shaderKind; + NodeID NodeShaderID; + NodeID NodeShaderSharedInput; + std::vector InputNodes; + std::vector OutputNodes; + // WaveSize is currently allowed only on compute shaders, but could be supported on other shader types in the future unsigned waveSize; // Save root signature for lib profile entry. @@ -118,6 +137,11 @@ struct DxilFunctionProps { } bool IsMS() const { return shaderKind == DXIL::ShaderKind::Mesh; } bool IsAS() const { return shaderKind == DXIL::ShaderKind::Amplification; } + bool IsNode() const { + return shaderKind == DXIL::ShaderKind::Node || + Node.LaunchType != DXIL::NodeLaunchType::Invalid; + }; + }; } // namespace hlsl diff --git a/include/dxc/DXIL/DxilInstructions.h b/include/dxc/DXIL/DxilInstructions.h index 1ccd9ba1f9..5c3047c758 100644 --- a/include/dxc/DXIL/DxilInstructions.h +++ b/include/dxc/DXIL/DxilInstructions.h @@ -7349,5 +7349,852 @@ struct DxilInst_TextureStoreSample { llvm::Value *get_sampleIdx() const { return Instr->getOperand(10); } void set_sampleIdx(llvm::Value *val) { Instr->setOperand(10, val); } }; + +/// This instruction Annotate a wave matrix pointer with the type information +struct DxilInst_WaveMatrix_Annotate { + llvm::Instruction *Instr; + // Construction and identification + DxilInst_WaveMatrix_Annotate(llvm::Instruction *pInstr) : Instr(pInstr) {} + operator bool() const { + return hlsl::OP::IsDxilOpFuncCallInst(Instr, hlsl::OP::OpCode::WaveMatrix_Annotate); + } + // Validation support + bool isAllowed() const { return true; } + bool isArgumentListValid() const { + if (3 != llvm::dyn_cast(Instr)->getNumArgOperands()) return false; + return true; + } + // Metadata + bool requiresUniformInputs() const { return false; } + // Operand indexes + enum OperandIdx { + arg_waveMatrixPtr = 1, + arg_waveMatProps = 2, + }; + // Accessors + llvm::Value *get_waveMatrixPtr() const { return Instr->getOperand(1); } + void set_waveMatrixPtr(llvm::Value *val) { Instr->setOperand(1, val); } + llvm::Value *get_waveMatProps() const { return Instr->getOperand(2); } + void set_waveMatProps(llvm::Value *val) { Instr->setOperand(2, val); } +}; + +/// This instruction Returns depth (K) value for matrix of specified type +struct DxilInst_WaveMatrix_Depth { + llvm::Instruction *Instr; + // Construction and identification + DxilInst_WaveMatrix_Depth(llvm::Instruction *pInstr) : Instr(pInstr) {} + operator bool() const { + return hlsl::OP::IsDxilOpFuncCallInst(Instr, hlsl::OP::OpCode::WaveMatrix_Depth); + } + // Validation support + bool isAllowed() const { return true; } + bool isArgumentListValid() const { + if (2 != llvm::dyn_cast(Instr)->getNumArgOperands()) return false; + return true; + } + // Metadata + bool requiresUniformInputs() const { return false; } + // Operand indexes + enum OperandIdx { + arg_waveMatProps = 1, + }; + // Accessors + llvm::Value *get_waveMatProps() const { return Instr->getOperand(1); } + void set_waveMatProps(llvm::Value *val) { Instr->setOperand(1, val); } +}; + +/// This instruction Fill wave matrix with scalar value +struct DxilInst_WaveMatrix_Fill { + llvm::Instruction *Instr; + // Construction and identification + DxilInst_WaveMatrix_Fill(llvm::Instruction *pInstr) : Instr(pInstr) {} + operator bool() const { + return hlsl::OP::IsDxilOpFuncCallInst(Instr, hlsl::OP::OpCode::WaveMatrix_Fill); + } + // Validation support + bool isAllowed() const { return true; } + bool isArgumentListValid() const { + if (3 != llvm::dyn_cast(Instr)->getNumArgOperands()) return false; + return true; + } + // Metadata + bool requiresUniformInputs() const { return false; } + // Operand indexes + enum OperandIdx { + arg_waveMatrixPtr = 1, + arg_value = 2, + }; + // Accessors + llvm::Value *get_waveMatrixPtr() const { return Instr->getOperand(1); } + void set_waveMatrixPtr(llvm::Value *val) { Instr->setOperand(1, val); } + llvm::Value *get_value() const { return Instr->getOperand(2); } + void set_value(llvm::Value *val) { Instr->setOperand(2, val); } +}; + +/// This instruction Load wave matrix from raw buffer +struct DxilInst_WaveMatrix_LoadRawBuf { + llvm::Instruction *Instr; + // Construction and identification + DxilInst_WaveMatrix_LoadRawBuf(llvm::Instruction *pInstr) : Instr(pInstr) {} + operator bool() const { + return hlsl::OP::IsDxilOpFuncCallInst(Instr, hlsl::OP::OpCode::WaveMatrix_LoadRawBuf); + } + // Validation support + bool isAllowed() const { return true; } + bool isArgumentListValid() const { + if (7 != llvm::dyn_cast(Instr)->getNumArgOperands()) return false; + return true; + } + // Metadata + bool requiresUniformInputs() const { return false; } + // Operand indexes + enum OperandIdx { + arg_waveMatrixPtr = 1, + arg_rawBuf = 2, + arg_offsetInBytes = 3, + arg_strideInBytes = 4, + arg_alignmentInBytes = 5, + arg_colMajor = 6, + }; + // Accessors + llvm::Value *get_waveMatrixPtr() const { return Instr->getOperand(1); } + void set_waveMatrixPtr(llvm::Value *val) { Instr->setOperand(1, val); } + llvm::Value *get_rawBuf() const { return Instr->getOperand(2); } + void set_rawBuf(llvm::Value *val) { Instr->setOperand(2, val); } + llvm::Value *get_offsetInBytes() const { return Instr->getOperand(3); } + void set_offsetInBytes(llvm::Value *val) { Instr->setOperand(3, val); } + llvm::Value *get_strideInBytes() const { return Instr->getOperand(4); } + void set_strideInBytes(llvm::Value *val) { Instr->setOperand(4, val); } + llvm::Value *get_alignmentInBytes() const { return Instr->getOperand(5); } + void set_alignmentInBytes(llvm::Value *val) { Instr->setOperand(5, val); } + int8_t get_alignmentInBytes_val() const { return (int8_t)(llvm::dyn_cast(Instr->getOperand(5))->getZExtValue()); } + void set_alignmentInBytes_val(int8_t val) { Instr->setOperand(5, llvm::Constant::getIntegerValue(llvm::IntegerType::get(Instr->getContext(), 8), llvm::APInt(8, (uint64_t)val))); } + llvm::Value *get_colMajor() const { return Instr->getOperand(6); } + void set_colMajor(llvm::Value *val) { Instr->setOperand(6, val); } + bool get_colMajor_val() const { return (bool)(llvm::dyn_cast(Instr->getOperand(6))->getZExtValue()); } + void set_colMajor_val(bool val) { Instr->setOperand(6, llvm::Constant::getIntegerValue(llvm::IntegerType::get(Instr->getContext(), 1), llvm::APInt(1, (uint64_t)val))); } +}; + +/// This instruction Load wave matrix from group shared array +struct DxilInst_WaveMatrix_LoadGroupShared { + llvm::Instruction *Instr; + // Construction and identification + DxilInst_WaveMatrix_LoadGroupShared(llvm::Instruction *pInstr) : Instr(pInstr) {} + operator bool() const { + return hlsl::OP::IsDxilOpFuncCallInst(Instr, hlsl::OP::OpCode::WaveMatrix_LoadGroupShared); + } + // Validation support + bool isAllowed() const { return true; } + bool isArgumentListValid() const { + if (6 != llvm::dyn_cast(Instr)->getNumArgOperands()) return false; + return true; + } + // Metadata + bool requiresUniformInputs() const { return false; } + // Operand indexes + enum OperandIdx { + arg_waveMatrixPtr = 1, + arg_groupsharedPtr = 2, + arg_startArrayIndex = 3, + arg_strideInElements = 4, + arg_colMajor = 5, + }; + // Accessors + llvm::Value *get_waveMatrixPtr() const { return Instr->getOperand(1); } + void set_waveMatrixPtr(llvm::Value *val) { Instr->setOperand(1, val); } + llvm::Value *get_groupsharedPtr() const { return Instr->getOperand(2); } + void set_groupsharedPtr(llvm::Value *val) { Instr->setOperand(2, val); } + llvm::Value *get_startArrayIndex() const { return Instr->getOperand(3); } + void set_startArrayIndex(llvm::Value *val) { Instr->setOperand(3, val); } + llvm::Value *get_strideInElements() const { return Instr->getOperand(4); } + void set_strideInElements(llvm::Value *val) { Instr->setOperand(4, val); } + llvm::Value *get_colMajor() const { return Instr->getOperand(5); } + void set_colMajor(llvm::Value *val) { Instr->setOperand(5, val); } + bool get_colMajor_val() const { return (bool)(llvm::dyn_cast(Instr->getOperand(5))->getZExtValue()); } + void set_colMajor_val(bool val) { Instr->setOperand(5, llvm::Constant::getIntegerValue(llvm::IntegerType::get(Instr->getContext(), 1), llvm::APInt(1, (uint64_t)val))); } +}; + +/// This instruction Store wave matrix to raw buffer +struct DxilInst_WaveMatrix_StoreRawBuf { + llvm::Instruction *Instr; + // Construction and identification + DxilInst_WaveMatrix_StoreRawBuf(llvm::Instruction *pInstr) : Instr(pInstr) {} + operator bool() const { + return hlsl::OP::IsDxilOpFuncCallInst(Instr, hlsl::OP::OpCode::WaveMatrix_StoreRawBuf); + } + // Validation support + bool isAllowed() const { return true; } + bool isArgumentListValid() const { + if (7 != llvm::dyn_cast(Instr)->getNumArgOperands()) return false; + return true; + } + // Metadata + bool requiresUniformInputs() const { return false; } + // Operand indexes + enum OperandIdx { + arg_waveMatrixPtr = 1, + arg_rawBuf = 2, + arg_offsetInBytes = 3, + arg_strideInBytes = 4, + arg_alignmentInBytes = 5, + arg_colMajor = 6, + }; + // Accessors + llvm::Value *get_waveMatrixPtr() const { return Instr->getOperand(1); } + void set_waveMatrixPtr(llvm::Value *val) { Instr->setOperand(1, val); } + llvm::Value *get_rawBuf() const { return Instr->getOperand(2); } + void set_rawBuf(llvm::Value *val) { Instr->setOperand(2, val); } + llvm::Value *get_offsetInBytes() const { return Instr->getOperand(3); } + void set_offsetInBytes(llvm::Value *val) { Instr->setOperand(3, val); } + llvm::Value *get_strideInBytes() const { return Instr->getOperand(4); } + void set_strideInBytes(llvm::Value *val) { Instr->setOperand(4, val); } + llvm::Value *get_alignmentInBytes() const { return Instr->getOperand(5); } + void set_alignmentInBytes(llvm::Value *val) { Instr->setOperand(5, val); } + int8_t get_alignmentInBytes_val() const { return (int8_t)(llvm::dyn_cast(Instr->getOperand(5))->getZExtValue()); } + void set_alignmentInBytes_val(int8_t val) { Instr->setOperand(5, llvm::Constant::getIntegerValue(llvm::IntegerType::get(Instr->getContext(), 8), llvm::APInt(8, (uint64_t)val))); } + llvm::Value *get_colMajor() const { return Instr->getOperand(6); } + void set_colMajor(llvm::Value *val) { Instr->setOperand(6, val); } + bool get_colMajor_val() const { return (bool)(llvm::dyn_cast(Instr->getOperand(6))->getZExtValue()); } + void set_colMajor_val(bool val) { Instr->setOperand(6, llvm::Constant::getIntegerValue(llvm::IntegerType::get(Instr->getContext(), 1), llvm::APInt(1, (uint64_t)val))); } +}; + +/// This instruction Store wave matrix to group shared array +struct DxilInst_WaveMatrix_StoreGroupShared { + llvm::Instruction *Instr; + // Construction and identification + DxilInst_WaveMatrix_StoreGroupShared(llvm::Instruction *pInstr) : Instr(pInstr) {} + operator bool() const { + return hlsl::OP::IsDxilOpFuncCallInst(Instr, hlsl::OP::OpCode::WaveMatrix_StoreGroupShared); + } + // Validation support + bool isAllowed() const { return true; } + bool isArgumentListValid() const { + if (6 != llvm::dyn_cast(Instr)->getNumArgOperands()) return false; + return true; + } + // Metadata + bool requiresUniformInputs() const { return false; } + // Operand indexes + enum OperandIdx { + arg_waveMatrixPtr = 1, + arg_groupsharedPtr = 2, + arg_startArrayIndex = 3, + arg_strideInElements = 4, + arg_colMajor = 5, + }; + // Accessors + llvm::Value *get_waveMatrixPtr() const { return Instr->getOperand(1); } + void set_waveMatrixPtr(llvm::Value *val) { Instr->setOperand(1, val); } + llvm::Value *get_groupsharedPtr() const { return Instr->getOperand(2); } + void set_groupsharedPtr(llvm::Value *val) { Instr->setOperand(2, val); } + llvm::Value *get_startArrayIndex() const { return Instr->getOperand(3); } + void set_startArrayIndex(llvm::Value *val) { Instr->setOperand(3, val); } + llvm::Value *get_strideInElements() const { return Instr->getOperand(4); } + void set_strideInElements(llvm::Value *val) { Instr->setOperand(4, val); } + llvm::Value *get_colMajor() const { return Instr->getOperand(5); } + void set_colMajor(llvm::Value *val) { Instr->setOperand(5, val); } + bool get_colMajor_val() const { return (bool)(llvm::dyn_cast(Instr->getOperand(5))->getZExtValue()); } + void set_colMajor_val(bool val) { Instr->setOperand(5, llvm::Constant::getIntegerValue(llvm::IntegerType::get(Instr->getContext(), 1), llvm::APInt(1, (uint64_t)val))); } +}; + +/// This instruction Mutiply left and right wave matrix and store in accumulator +struct DxilInst_WaveMatrix_Multiply { + llvm::Instruction *Instr; + // Construction and identification + DxilInst_WaveMatrix_Multiply(llvm::Instruction *pInstr) : Instr(pInstr) {} + operator bool() const { + return hlsl::OP::IsDxilOpFuncCallInst(Instr, hlsl::OP::OpCode::WaveMatrix_Multiply); + } + // Validation support + bool isAllowed() const { return true; } + bool isArgumentListValid() const { + if (4 != llvm::dyn_cast(Instr)->getNumArgOperands()) return false; + return true; + } + // Metadata + bool requiresUniformInputs() const { return false; } + // Operand indexes + enum OperandIdx { + arg_waveMatrixAccumulator = 1, + arg_waveMatrixLeft = 2, + arg_waveMatrixRight = 3, + }; + // Accessors + llvm::Value *get_waveMatrixAccumulator() const { return Instr->getOperand(1); } + void set_waveMatrixAccumulator(llvm::Value *val) { Instr->setOperand(1, val); } + llvm::Value *get_waveMatrixLeft() const { return Instr->getOperand(2); } + void set_waveMatrixLeft(llvm::Value *val) { Instr->setOperand(2, val); } + llvm::Value *get_waveMatrixRight() const { return Instr->getOperand(3); } + void set_waveMatrixRight(llvm::Value *val) { Instr->setOperand(3, val); } +}; + +/// This instruction Mutiply left and right wave matrix and accumulate into accumulator +struct DxilInst_WaveMatrix_MultiplyAccumulate { + llvm::Instruction *Instr; + // Construction and identification + DxilInst_WaveMatrix_MultiplyAccumulate(llvm::Instruction *pInstr) : Instr(pInstr) {} + operator bool() const { + return hlsl::OP::IsDxilOpFuncCallInst(Instr, hlsl::OP::OpCode::WaveMatrix_MultiplyAccumulate); + } + // Validation support + bool isAllowed() const { return true; } + bool isArgumentListValid() const { + if (4 != llvm::dyn_cast(Instr)->getNumArgOperands()) return false; + return true; + } + // Metadata + bool requiresUniformInputs() const { return false; } + // Operand indexes + enum OperandIdx { + arg_waveMatrixAccumulator = 1, + arg_waveMatrixLeft = 2, + arg_waveMatrixRight = 3, + }; + // Accessors + llvm::Value *get_waveMatrixAccumulator() const { return Instr->getOperand(1); } + void set_waveMatrixAccumulator(llvm::Value *val) { Instr->setOperand(1, val); } + llvm::Value *get_waveMatrixLeft() const { return Instr->getOperand(2); } + void set_waveMatrixLeft(llvm::Value *val) { Instr->setOperand(2, val); } + llvm::Value *get_waveMatrixRight() const { return Instr->getOperand(3); } + void set_waveMatrixRight(llvm::Value *val) { Instr->setOperand(3, val); } +}; + +/// This instruction Perform scalar operation on each element of wave matrix +struct DxilInst_WaveMatrix_ScalarOp { + llvm::Instruction *Instr; + // Construction and identification + DxilInst_WaveMatrix_ScalarOp(llvm::Instruction *pInstr) : Instr(pInstr) {} + operator bool() const { + return hlsl::OP::IsDxilOpFuncCallInst(Instr, hlsl::OP::OpCode::WaveMatrix_ScalarOp); + } + // Validation support + bool isAllowed() const { return true; } + bool isArgumentListValid() const { + if (4 != llvm::dyn_cast(Instr)->getNumArgOperands()) return false; + return true; + } + // Metadata + bool requiresUniformInputs() const { return false; } + // Operand indexes + enum OperandIdx { + arg_waveMatrixPtr = 1, + arg_op = 2, + arg_value = 3, + }; + // Accessors + llvm::Value *get_waveMatrixPtr() const { return Instr->getOperand(1); } + void set_waveMatrixPtr(llvm::Value *val) { Instr->setOperand(1, val); } + llvm::Value *get_op() const { return Instr->getOperand(2); } + void set_op(llvm::Value *val) { Instr->setOperand(2, val); } + int8_t get_op_val() const { return (int8_t)(llvm::dyn_cast(Instr->getOperand(2))->getZExtValue()); } + void set_op_val(int8_t val) { Instr->setOperand(2, llvm::Constant::getIntegerValue(llvm::IntegerType::get(Instr->getContext(), 8), llvm::APInt(8, (uint64_t)val))); } + llvm::Value *get_value() const { return Instr->getOperand(3); } + void set_value(llvm::Value *val) { Instr->setOperand(3, val); } +}; + +/// This instruction Sum rows or columns of an input matrix into an existing accumulator fragment matrix +struct DxilInst_WaveMatrix_SumAccumulate { + llvm::Instruction *Instr; + // Construction and identification + DxilInst_WaveMatrix_SumAccumulate(llvm::Instruction *pInstr) : Instr(pInstr) {} + operator bool() const { + return hlsl::OP::IsDxilOpFuncCallInst(Instr, hlsl::OP::OpCode::WaveMatrix_SumAccumulate); + } + // Validation support + bool isAllowed() const { return true; } + bool isArgumentListValid() const { + if (3 != llvm::dyn_cast(Instr)->getNumArgOperands()) return false; + return true; + } + // Metadata + bool requiresUniformInputs() const { return false; } + // Operand indexes + enum OperandIdx { + arg_waveMatrixFragment = 1, + arg_waveMatrixInput = 2, + }; + // Accessors + llvm::Value *get_waveMatrixFragment() const { return Instr->getOperand(1); } + void set_waveMatrixFragment(llvm::Value *val) { Instr->setOperand(1, val); } + llvm::Value *get_waveMatrixInput() const { return Instr->getOperand(2); } + void set_waveMatrixInput(llvm::Value *val) { Instr->setOperand(2, val); } +}; + +/// This instruction Element-wise accumulate, or broadcast add of fragment into accumulator +struct DxilInst_WaveMatrix_Add { + llvm::Instruction *Instr; + // Construction and identification + DxilInst_WaveMatrix_Add(llvm::Instruction *pInstr) : Instr(pInstr) {} + operator bool() const { + return hlsl::OP::IsDxilOpFuncCallInst(Instr, hlsl::OP::OpCode::WaveMatrix_Add); + } + // Validation support + bool isAllowed() const { return true; } + bool isArgumentListValid() const { + if (3 != llvm::dyn_cast(Instr)->getNumArgOperands()) return false; + return true; + } + // Metadata + bool requiresUniformInputs() const { return false; } + // Operand indexes + enum OperandIdx { + arg_waveMatrixAccumulator = 1, + arg_waveMatrixAccumulatorOrFragment = 2, + }; + // Accessors + llvm::Value *get_waveMatrixAccumulator() const { return Instr->getOperand(1); } + void set_waveMatrixAccumulator(llvm::Value *val) { Instr->setOperand(1, val); } + llvm::Value *get_waveMatrixAccumulatorOrFragment() const { return Instr->getOperand(2); } + void set_waveMatrixAccumulatorOrFragment(llvm::Value *val) { Instr->setOperand(2, val); } +}; + +/// This instruction returns a handle for the output records +struct DxilInst_AllocateNodeOutputRecords { + llvm::Instruction *Instr; + // Construction and identification + DxilInst_AllocateNodeOutputRecords(llvm::Instruction *pInstr) : Instr(pInstr) {} + operator bool() const { + return hlsl::OP::IsDxilOpFuncCallInst(Instr, hlsl::OP::OpCode::AllocateNodeOutputRecords); + } + // Validation support + bool isAllowed() const { return true; } + bool isArgumentListValid() const { + if (4 != llvm::dyn_cast(Instr)->getNumArgOperands()) return false; + return true; + } + // Metadata + bool requiresUniformInputs() const { return false; } + // Operand indexes + enum OperandIdx { + arg_output = 1, + arg_numRecords = 2, + arg_perThread = 3, + }; + // Accessors + llvm::Value *get_output() const { return Instr->getOperand(1); } + void set_output(llvm::Value *val) { Instr->setOperand(1, val); } + llvm::Value *get_numRecords() const { return Instr->getOperand(2); } + void set_numRecords(llvm::Value *val) { Instr->setOperand(2, val); } + llvm::Value *get_perThread() const { return Instr->getOperand(3); } + void set_perThread(llvm::Value *val) { Instr->setOperand(3, val); } + bool get_perThread_val() const { return (bool)(llvm::dyn_cast(Instr->getOperand(3))->getZExtValue()); } + void set_perThread_val(bool val) { Instr->setOperand(3, llvm::Constant::getIntegerValue(llvm::IntegerType::get(Instr->getContext(), 1), llvm::APInt(1, (uint64_t)val))); } +}; + +/// This instruction retrieve node input/output record pointer in address space 6 +struct DxilInst_GetNodeRecordPtr { + llvm::Instruction *Instr; + // Construction and identification + DxilInst_GetNodeRecordPtr(llvm::Instruction *pInstr) : Instr(pInstr) {} + operator bool() const { + return hlsl::OP::IsDxilOpFuncCallInst(Instr, hlsl::OP::OpCode::GetNodeRecordPtr); + } + // Validation support + bool isAllowed() const { return true; } + bool isArgumentListValid() const { + if (3 != llvm::dyn_cast(Instr)->getNumArgOperands()) return false; + return true; + } + // Metadata + bool requiresUniformInputs() const { return false; } + // Operand indexes + enum OperandIdx { + arg_recordhandle = 1, + arg_arrayIndex = 2, + }; + // Accessors + llvm::Value *get_recordhandle() const { return Instr->getOperand(1); } + void set_recordhandle(llvm::Value *val) { Instr->setOperand(1, val); } + llvm::Value *get_arrayIndex() const { return Instr->getOperand(2); } + void set_arrayIndex(llvm::Value *val) { Instr->setOperand(2, val); } +}; + +/// This instruction Select the next logical output count for an EmptyNodeOutput for the whole group or per thread. +struct DxilInst_IncrementOutputCount { + llvm::Instruction *Instr; + // Construction and identification + DxilInst_IncrementOutputCount(llvm::Instruction *pInstr) : Instr(pInstr) {} + operator bool() const { + return hlsl::OP::IsDxilOpFuncCallInst(Instr, hlsl::OP::OpCode::IncrementOutputCount); + } + // Validation support + bool isAllowed() const { return true; } + bool isArgumentListValid() const { + if (4 != llvm::dyn_cast(Instr)->getNumArgOperands()) return false; + return true; + } + // Metadata + bool requiresUniformInputs() const { return false; } + // Operand indexes + enum OperandIdx { + arg_output = 1, + arg_count = 2, + arg_perThread = 3, + }; + // Accessors + llvm::Value *get_output() const { return Instr->getOperand(1); } + void set_output(llvm::Value *val) { Instr->setOperand(1, val); } + llvm::Value *get_count() const { return Instr->getOperand(2); } + void set_count(llvm::Value *val) { Instr->setOperand(2, val); } + llvm::Value *get_perThread() const { return Instr->getOperand(3); } + void set_perThread(llvm::Value *val) { Instr->setOperand(3, val); } + bool get_perThread_val() const { return (bool)(llvm::dyn_cast(Instr->getOperand(3))->getZExtValue()); } + void set_perThread_val(bool val) { Instr->setOperand(3, llvm::Constant::getIntegerValue(llvm::IntegerType::get(Instr->getContext(), 1), llvm::APInt(1, (uint64_t)val))); } +}; + +/// This instruction indicates all outputs for a given records are complete +struct DxilInst_OutputComplete { + llvm::Instruction *Instr; + // Construction and identification + DxilInst_OutputComplete(llvm::Instruction *pInstr) : Instr(pInstr) {} + operator bool() const { + return hlsl::OP::IsDxilOpFuncCallInst(Instr, hlsl::OP::OpCode::OutputComplete); + } + // Validation support + bool isAllowed() const { return true; } + bool isArgumentListValid() const { + if (2 != llvm::dyn_cast(Instr)->getNumArgOperands()) return false; + return true; + } + // Metadata + bool requiresUniformInputs() const { return false; } + // Operand indexes + enum OperandIdx { + arg_output = 1, + }; + // Accessors + llvm::Value *get_output() const { return Instr->getOperand(1); } + void set_output(llvm::Value *val) { Instr->setOperand(1, val); } +}; + +/// This instruction returns the number of records that have been coalesced into the current thread group +struct DxilInst_GetInputRecordCount { + llvm::Instruction *Instr; + // Construction and identification + DxilInst_GetInputRecordCount(llvm::Instruction *pInstr) : Instr(pInstr) {} + operator bool() const { + return hlsl::OP::IsDxilOpFuncCallInst(Instr, hlsl::OP::OpCode::GetInputRecordCount); + } + // Validation support + bool isAllowed() const { return true; } + bool isArgumentListValid() const { + if (2 != llvm::dyn_cast(Instr)->getNumArgOperands()) return false; + return true; + } + // Metadata + bool requiresUniformInputs() const { return false; } + // Operand indexes + enum OperandIdx { + arg_input = 1, + }; + // Accessors + llvm::Value *get_input() const { return Instr->getOperand(1); } + void set_input(llvm::Value *val) { Instr->setOperand(1, val); } +}; + +/// This instruction returns true if the current thread group is the last to access the input +struct DxilInst_FinishedCrossGroupSharing { + llvm::Instruction *Instr; + // Construction and identification + DxilInst_FinishedCrossGroupSharing(llvm::Instruction *pInstr) : Instr(pInstr) {} + operator bool() const { + return hlsl::OP::IsDxilOpFuncCallInst(Instr, hlsl::OP::OpCode::FinishedCrossGroupSharing); + } + // Validation support + bool isAllowed() const { return true; } + bool isArgumentListValid() const { + if (2 != llvm::dyn_cast(Instr)->getNumArgOperands()) return false; + return true; + } + // Metadata + bool requiresUniformInputs() const { return false; } + // Operand indexes + enum OperandIdx { + arg_input = 1, + }; + // Accessors + llvm::Value *get_input() const { return Instr->getOperand(1); } + void set_input(llvm::Value *val) { Instr->setOperand(1, val); } +}; + +/// This instruction Request a barrier for a set of memory types and/or thread group execution sync +struct DxilInst_BarrierByMemoryType { + llvm::Instruction *Instr; + // Construction and identification + DxilInst_BarrierByMemoryType(llvm::Instruction *pInstr) : Instr(pInstr) {} + operator bool() const { + return hlsl::OP::IsDxilOpFuncCallInst(Instr, hlsl::OP::OpCode::BarrierByMemoryType); + } + // Validation support + bool isAllowed() const { return true; } + bool isArgumentListValid() const { + if (4 != llvm::dyn_cast(Instr)->getNumArgOperands()) return false; + return true; + } + // Metadata + bool requiresUniformInputs() const { return false; } + // Operand indexes + enum OperandIdx { + arg_MemoryTypeFlags = 1, + arg_AccessFlags = 2, + arg_SyncFlags = 3, + }; + // Accessors + llvm::Value *get_MemoryTypeFlags() const { return Instr->getOperand(1); } + void set_MemoryTypeFlags(llvm::Value *val) { Instr->setOperand(1, val); } + int32_t get_MemoryTypeFlags_val() const { return (int32_t)(llvm::dyn_cast(Instr->getOperand(1))->getZExtValue()); } + void set_MemoryTypeFlags_val(int32_t val) { Instr->setOperand(1, llvm::Constant::getIntegerValue(llvm::IntegerType::get(Instr->getContext(), 32), llvm::APInt(32, (uint64_t)val))); } + llvm::Value *get_AccessFlags() const { return Instr->getOperand(2); } + void set_AccessFlags(llvm::Value *val) { Instr->setOperand(2, val); } + int32_t get_AccessFlags_val() const { return (int32_t)(llvm::dyn_cast(Instr->getOperand(2))->getZExtValue()); } + void set_AccessFlags_val(int32_t val) { Instr->setOperand(2, llvm::Constant::getIntegerValue(llvm::IntegerType::get(Instr->getContext(), 32), llvm::APInt(32, (uint64_t)val))); } + llvm::Value *get_SyncFlags() const { return Instr->getOperand(3); } + void set_SyncFlags(llvm::Value *val) { Instr->setOperand(3, val); } + int32_t get_SyncFlags_val() const { return (int32_t)(llvm::dyn_cast(Instr->getOperand(3))->getZExtValue()); } + void set_SyncFlags_val(int32_t val) { Instr->setOperand(3, llvm::Constant::getIntegerValue(llvm::IntegerType::get(Instr->getContext(), 32), llvm::APInt(32, (uint64_t)val))); } +}; + +/// This instruction Request a barrier for just the memory used by the specified object +struct DxilInst_BarrierByMemoryHandle { + llvm::Instruction *Instr; + // Construction and identification + DxilInst_BarrierByMemoryHandle(llvm::Instruction *pInstr) : Instr(pInstr) {} + operator bool() const { + return hlsl::OP::IsDxilOpFuncCallInst(Instr, hlsl::OP::OpCode::BarrierByMemoryHandle); + } + // Validation support + bool isAllowed() const { return true; } + bool isArgumentListValid() const { + if (4 != llvm::dyn_cast(Instr)->getNumArgOperands()) return false; + return true; + } + // Metadata + bool requiresUniformInputs() const { return false; } + // Operand indexes + enum OperandIdx { + arg_object = 1, + arg_AccessFlags = 2, + arg_SyncFlags = 3, + }; + // Accessors + llvm::Value *get_object() const { return Instr->getOperand(1); } + void set_object(llvm::Value *val) { Instr->setOperand(1, val); } + llvm::Value *get_AccessFlags() const { return Instr->getOperand(2); } + void set_AccessFlags(llvm::Value *val) { Instr->setOperand(2, val); } + int32_t get_AccessFlags_val() const { return (int32_t)(llvm::dyn_cast(Instr->getOperand(2))->getZExtValue()); } + void set_AccessFlags_val(int32_t val) { Instr->setOperand(2, llvm::Constant::getIntegerValue(llvm::IntegerType::get(Instr->getContext(), 32), llvm::APInt(32, (uint64_t)val))); } + llvm::Value *get_SyncFlags() const { return Instr->getOperand(3); } + void set_SyncFlags(llvm::Value *val) { Instr->setOperand(3, val); } + int32_t get_SyncFlags_val() const { return (int32_t)(llvm::dyn_cast(Instr->getOperand(3))->getZExtValue()); } + void set_SyncFlags_val(int32_t val) { Instr->setOperand(3, llvm::Constant::getIntegerValue(llvm::IntegerType::get(Instr->getContext(), 32), llvm::APInt(32, (uint64_t)val))); } +}; + +/// This instruction Request a barrier for just the memory used by the node record +struct DxilInst_BarrierByNodeRecordHandle { + llvm::Instruction *Instr; + // Construction and identification + DxilInst_BarrierByNodeRecordHandle(llvm::Instruction *pInstr) : Instr(pInstr) {} + operator bool() const { + return hlsl::OP::IsDxilOpFuncCallInst(Instr, hlsl::OP::OpCode::BarrierByNodeRecordHandle); + } + // Validation support + bool isAllowed() const { return true; } + bool isArgumentListValid() const { + if (4 != llvm::dyn_cast(Instr)->getNumArgOperands()) return false; + return true; + } + // Metadata + bool requiresUniformInputs() const { return false; } + // Operand indexes + enum OperandIdx { + arg_object = 1, + arg_AccessFlags = 2, + arg_SyncFlags = 3, + }; + // Accessors + llvm::Value *get_object() const { return Instr->getOperand(1); } + void set_object(llvm::Value *val) { Instr->setOperand(1, val); } + llvm::Value *get_AccessFlags() const { return Instr->getOperand(2); } + void set_AccessFlags(llvm::Value *val) { Instr->setOperand(2, val); } + llvm::Value *get_SyncFlags() const { return Instr->getOperand(3); } + void set_SyncFlags(llvm::Value *val) { Instr->setOperand(3, val); } +}; + +/// This instruction Creates a handle to a NodeOutput +struct DxilInst_CreateNodeOutputHandle { + llvm::Instruction *Instr; + // Construction and identification + DxilInst_CreateNodeOutputHandle(llvm::Instruction *pInstr) : Instr(pInstr) {} + operator bool() const { + return hlsl::OP::IsDxilOpFuncCallInst(Instr, hlsl::OP::OpCode::CreateNodeOutputHandle); + } + // Validation support + bool isAllowed() const { return true; } + bool isArgumentListValid() const { + if (2 != llvm::dyn_cast(Instr)->getNumArgOperands()) return false; + return true; + } + // Metadata + bool requiresUniformInputs() const { return false; } + // Operand indexes + enum OperandIdx { + arg_MetadataIdx = 1, + }; + // Accessors + llvm::Value *get_MetadataIdx() const { return Instr->getOperand(1); } + void set_MetadataIdx(llvm::Value *val) { Instr->setOperand(1, val); } +}; + +/// This instruction returns the handle for the location in the output node array at the indicated index +struct DxilInst_IndexNodeHandle { + llvm::Instruction *Instr; + // Construction and identification + DxilInst_IndexNodeHandle(llvm::Instruction *pInstr) : Instr(pInstr) {} + operator bool() const { + return hlsl::OP::IsDxilOpFuncCallInst(Instr, hlsl::OP::OpCode::IndexNodeHandle); + } + // Validation support + bool isAllowed() const { return true; } + bool isArgumentListValid() const { + if (3 != llvm::dyn_cast(Instr)->getNumArgOperands()) return false; + return true; + } + // Metadata + bool requiresUniformInputs() const { return false; } + // Operand indexes + enum OperandIdx { + arg_NodeOutputHandle = 1, + arg_ArrayIndex = 2, + }; + // Accessors + llvm::Value *get_NodeOutputHandle() const { return Instr->getOperand(1); } + void set_NodeOutputHandle(llvm::Value *val) { Instr->setOperand(1, val); } + llvm::Value *get_ArrayIndex() const { return Instr->getOperand(2); } + void set_ArrayIndex(llvm::Value *val) { Instr->setOperand(2, val); } +}; + +/// This instruction annotate handle with node properties +struct DxilInst_AnnotateNodeHandle { + llvm::Instruction *Instr; + // Construction and identification + DxilInst_AnnotateNodeHandle(llvm::Instruction *pInstr) : Instr(pInstr) {} + operator bool() const { + return hlsl::OP::IsDxilOpFuncCallInst(Instr, hlsl::OP::OpCode::AnnotateNodeHandle); + } + // Validation support + bool isAllowed() const { return true; } + bool isArgumentListValid() const { + if (3 != llvm::dyn_cast(Instr)->getNumArgOperands()) return false; + return true; + } + // Metadata + bool requiresUniformInputs() const { return false; } + // Operand indexes + enum OperandIdx { + arg_node = 1, + arg_props = 2, + }; + // Accessors + llvm::Value *get_node() const { return Instr->getOperand(1); } + void set_node(llvm::Value *val) { Instr->setOperand(1, val); } + llvm::Value *get_props() const { return Instr->getOperand(2); } + void set_props(llvm::Value *val) { Instr->setOperand(2, val); } +}; + +/// This instruction create a handle for an InputRecord +struct DxilInst_CreateNodeInputRecordHandle { + llvm::Instruction *Instr; + // Construction and identification + DxilInst_CreateNodeInputRecordHandle(llvm::Instruction *pInstr) : Instr(pInstr) {} + operator bool() const { + return hlsl::OP::IsDxilOpFuncCallInst(Instr, hlsl::OP::OpCode::CreateNodeInputRecordHandle); + } + // Validation support + bool isAllowed() const { return true; } + bool isArgumentListValid() const { + if (2 != llvm::dyn_cast(Instr)->getNumArgOperands()) return false; + return true; + } + // Metadata + bool requiresUniformInputs() const { return false; } + // Operand indexes + enum OperandIdx { + arg_MetadataIdx = 1, + }; + // Accessors + llvm::Value *get_MetadataIdx() const { return Instr->getOperand(1); } + void set_MetadataIdx(llvm::Value *val) { Instr->setOperand(1, val); } +}; + +/// This instruction annotate handle with node record properties +struct DxilInst_AnnotateNodeRecordHandle { + llvm::Instruction *Instr; + // Construction and identification + DxilInst_AnnotateNodeRecordHandle(llvm::Instruction *pInstr) : Instr(pInstr) {} + operator bool() const { + return hlsl::OP::IsDxilOpFuncCallInst(Instr, hlsl::OP::OpCode::AnnotateNodeRecordHandle); + } + // Validation support + bool isAllowed() const { return true; } + bool isArgumentListValid() const { + if (3 != llvm::dyn_cast(Instr)->getNumArgOperands()) return false; + return true; + } + // Metadata + bool requiresUniformInputs() const { return false; } + // Operand indexes + enum OperandIdx { + arg_noderecord = 1, + arg_props = 2, + }; + // Accessors + llvm::Value *get_noderecord() const { return Instr->getOperand(1); } + void set_noderecord(llvm::Value *val) { Instr->setOperand(1, val); } + llvm::Value *get_props() const { return Instr->getOperand(2); } + void set_props(llvm::Value *val) { Instr->setOperand(2, val); } +}; + +/// This instruction returns true if the specified output node is present in the work graph +struct DxilInst_NodeOutputIsValid { + llvm::Instruction *Instr; + // Construction and identification + DxilInst_NodeOutputIsValid(llvm::Instruction *pInstr) : Instr(pInstr) {} + operator bool() const { + return hlsl::OP::IsDxilOpFuncCallInst(Instr, hlsl::OP::OpCode::NodeOutputIsValid); + } + // Validation support + bool isAllowed() const { return true; } + bool isArgumentListValid() const { + if (2 != llvm::dyn_cast(Instr)->getNumArgOperands()) return false; + return true; + } + // Metadata + bool requiresUniformInputs() const { return false; } + // Operand indexes + enum OperandIdx { + arg_output = 1, + }; + // Accessors + llvm::Value *get_output() const { return Instr->getOperand(1); } + void set_output(llvm::Value *val) { Instr->setOperand(1, val); } +}; + +/// This instruction returns how many levels of recursion remain +struct DxilInst_GetRemainingRecursionLevels { + llvm::Instruction *Instr; + // Construction and identification + DxilInst_GetRemainingRecursionLevels(llvm::Instruction *pInstr) : Instr(pInstr) {} + operator bool() const { + return hlsl::OP::IsDxilOpFuncCallInst(Instr, hlsl::OP::OpCode::GetRemainingRecursionLevels); + } + // Validation support + bool isAllowed() const { return true; } + bool isArgumentListValid() const { + if (1 != llvm::dyn_cast(Instr)->getNumArgOperands()) return false; + return true; + } + // Metadata + bool requiresUniformInputs() const { return false; } +}; // INSTR-HELPER:END } // namespace hlsl diff --git a/include/dxc/DXIL/DxilMetadataHelper.h b/include/dxc/DXIL/DxilMetadataHelper.h index f24a08c3dc..595c85e069 100644 --- a/include/dxc/DXIL/DxilMetadataHelper.h +++ b/include/dxc/DXIL/DxilMetadataHelper.h @@ -16,6 +16,7 @@ #include #include #include +#include "dxc/DXIL/DxilFunctionProps.h" namespace llvm { class LLVMContext; @@ -229,6 +230,7 @@ class DxilMDHelper { static const unsigned kDxilFieldAnnotationResPropTag = 10; static const unsigned kDxilFieldAnnotationBitFieldsTag = 11; static const unsigned kDxilFieldAnnotationBitFieldWidthTag = 12; + static const unsigned kDxilFieldAnnotationVectorSizeTag = 13; // DXR Payload Annotations static const unsigned kDxilPayloadAnnotationStructTag = 0; @@ -278,6 +280,32 @@ class DxilMDHelper { static const unsigned kDxilWaveSizeTag = 11; static const unsigned kDxilEntryRootSigTag = 12; + // Node Tags ( extension of shader property tags) + + static const unsigned kDxilNodeLaunchTypeTag = 13; + static const unsigned kDxilNodeIsProgramEntryTag = 14; + static const unsigned kDxilNodeIdTag = 15; + static const unsigned kDxilNodeLocalRootArgumentsTableIndexTag = 16; + static const unsigned kDxilShareInputOfTag = 17; + static const unsigned kDxilNodeDispatchGridTag = 18; + static const unsigned kDxilNodeMaxRecursionDepthTag = 19; + static const unsigned kDxilNodeInputsTag = 20; + static const unsigned kDxilNodeOutputsTag = 21; + static const unsigned kDxilNodeMaxDispatchGridTag = 22; + + // Node Input/Output State. + static const unsigned kDxilNodeOutputIDTag = 0; + static const unsigned kDxilNodeIOFlagsTag = 1; + static const unsigned kDxilNodeRecordTypeTag = 2; + static const unsigned kDxilNodeMaxRecordsTag = 3; + static const unsigned kDxilNodeMaxRecordsSharedWithTag = 4; + static const unsigned kDxilNodeOutputArraySizeTag = 5; + static const unsigned kDxilNodeAllowSparseNodesTag = 6; + + // Node Record Type + static const unsigned kDxilNodeRecordSizeTag = 0; + static const unsigned kDxilNodeSVDispatchGridTag = 1; + // GSState. static const unsigned kDxilGSStateNumFields = 5; static const unsigned kDxilGSStateInputPrimitive = 0; @@ -437,6 +465,10 @@ class DxilMDHelper { void LoadDxrPayloadAccessQualifiers(const llvm::MDOperand &MDO, DxilPayloadFieldAnnotation &FA); // Function props. + void SerializeNodeProps(llvm::SmallVectorImpl &MDVals, unsigned &valIdx, + const hlsl::DxilFunctionProps *props); + void DeserializeNodeProps(const llvm::MDTuple *pProps, unsigned &idx, + hlsl::DxilFunctionProps *props); llvm::MDTuple *EmitDxilFunctionProps(const hlsl::DxilFunctionProps *props, const llvm::Function *F); const llvm::Function *LoadDxilFunctionProps(const llvm::MDTuple *pProps, @@ -513,6 +545,12 @@ class DxilMDHelper { llvm::MDTuple *EmitDxilASState(const unsigned *NumThreads, unsigned payloadSizeInBytes); void LoadDxilASState(const llvm::MDOperand &MDO, unsigned *NumThreads, unsigned &payloadSizeInBytes); + llvm::MDTuple * EmitDxilNodeIOState(const NodeIOProperties &Node); + hlsl::NodeIOProperties LoadDxilNodeIOState(const llvm::MDOperand &MDO); + + void EmitDxilNodeState(std::vector &MDVals, + const DxilFunctionProps &props); + void AddCounterIfNonZero(uint32_t value, llvm::StringRef name, std::vector &MDVals); void LoadCounterMD(const llvm::MDOperand &MDName, const llvm::MDOperand &MDValue, DxilCounters &counters) const; public: diff --git a/include/dxc/DXIL/DxilNodeProps.h b/include/dxc/DXIL/DxilNodeProps.h new file mode 100644 index 0000000000..ccf191d2a4 --- /dev/null +++ b/include/dxc/DXIL/DxilNodeProps.h @@ -0,0 +1,145 @@ +/////////////////////////////////////////////////////////////////////////////// +// // +// DxilNodeProps.h // +// Copyright (C) Microsoft Corporation. All rights reserved. // +// This file is distributed under the University of Illinois Open Source // +// License. See LICENSE.TXT for details. // +// // +// Representation of DXIL nodes and node records properties // +// // +/////////////////////////////////////////////////////////////////////////////// + +#pragma once + +#include "DxilConstants.h" +#include "dxc/Support/Global.h" + +namespace llvm { +class StringRef; +} + +namespace hlsl { + +//------------------------------------------------------------------------------ +// +// NodeID +// +struct NodeID { + std::string Name; + unsigned Index; +}; + +//------------------------------------------------------------------------------ +// +// SVDispatchGrid +// +struct SVDispatchGrid { + unsigned ByteOffset; + DXIL::ComponentType ComponentType; + unsigned NumComponents; +}; + +//------------------------------------------------------------------------------ +// +// NodeRecordType +// +struct NodeRecordType { + unsigned size; + SVDispatchGrid SV_DispatchGrid; +}; + +//------------------------------------------------------------------------------ +// +// NodeInfo +// +struct NodeInfo { + NodeInfo() : NodeInfo(DXIL::NodeIOFlags::None) {} + NodeInfo(DXIL::NodeIOFlags flags, unsigned recordSize = 0) + : IOFlags((unsigned)flags), RecordSize(recordSize) {} + NodeInfo(DXIL::NodeIOKind kind, unsigned recordSize = 0) + : NodeInfo((DXIL::NodeIOFlags)kind, recordSize) {} + + unsigned IOFlags; + unsigned RecordSize; // 0 if EmptyNodeOutput +}; + +//------------------------------------------------------------------------------ +// +// NodeRecordInfo +// +typedef NodeInfo NodeRecordInfo; + +//------------------------------------------------------------------------------ +// +// NodeProps +// +struct NodeProps { + unsigned MetadataIdx; + NodeInfo Info; +}; + +//------------------------------------------------------------------------------ +// +// NodeInputRecerdProps +// +struct NodeInputRecordProps { + unsigned MetadataIdx; + NodeRecordInfo RecordInfo; +}; + +//------------------------------------------------------------------------------ +// +// NodeFlags - helper class for working with DXIL::NodeIOFlags and +// DXIL::NodeIOKind +// +struct NodeFlags { +public: + NodeFlags(); + NodeFlags(DXIL::NodeIOFlags flags); + NodeFlags(DXIL::NodeIOKind kind); + NodeFlags(uint32_t F); + + bool operator==(const NodeFlags &o) const; + operator uint32_t() const; + + DXIL::NodeIOKind GetNodeIOKind() const; + DXIL::NodeIOFlags GetNodeIOFlags() const; + + bool IsInputRecord() const; + bool IsOutputNode() const; + bool IsReadWrite() const; + bool IsEmpty() const; + bool IsEmptyInput() const; + bool IsValidNodeKind() const; + bool RecordTypeMatchesLaunchType(DXIL::NodeLaunchType launchType) const; + + void SetTrackRWInputSharing(); + bool GetTrackRWInputSharing() const; + +private: + DXIL::NodeIOFlags m_Flags; + +}; // end of NodeFlags + +//------------------------------------------------------------------------------ +// +// NodeIOProperties +// +struct NodeIOProperties { + NodeFlags Flags = NodeFlags(); + NodeRecordType RecordType = {}; + NodeID OutputID = {}; + unsigned MaxRecords = 0; + int MaxRecordsSharedWith = -1; + unsigned OutputArraySize = 0; + bool AllowSparseNodes = false; + +public: + NodeIOProperties() {} + NodeIOProperties(NodeFlags flags) : Flags(flags) {} + + NodeInfo GetNodeInfo() const; + NodeRecordInfo GetNodeRecordInfo() const; +}; + +} // namespace hlsl diff --git a/include/dxc/DXIL/DxilOperations.h b/include/dxc/DXIL/DxilOperations.h index f69cff443d..594906f9a5 100644 --- a/include/dxc/DXIL/DxilOperations.h +++ b/include/dxc/DXIL/DxilOperations.h @@ -16,6 +16,7 @@ class LLVMContext; class Module; class Type; class StructType; +class PointerType; class Function; class Constant; class Value; @@ -50,7 +51,11 @@ class OP { void RemoveFunction(llvm::Function *F); llvm::LLVMContext &GetCtx() { return m_Ctx; } llvm::Type *GetHandleType() const; + llvm::Type *GetNodeHandleType() const; + llvm::Type *GetNodeRecordHandleType() const; llvm::Type *GetResourcePropertiesType() const; + llvm::Type *GetNodePropertiesType() const; + llvm::Type *GetNodeRecordPropertiesType() const; llvm::Type *GetResourceBindingType() const; llvm::Type *GetDimensionsType() const; llvm::Type *GetSamplePosType() const; @@ -59,6 +64,8 @@ class OP { llvm::Type *GetSplitDoubleType() const; llvm::Type *GetFourI32Type() const; llvm::Type *GetFourI16Type() const; + llvm::StructType *GetWaveMatrixPropertiesType() const; + llvm::PointerType *GetWaveMatPtrType() const; llvm::Type *GetResRetType(llvm::Type *pOverloadType); llvm::Type *GetCBufferRetType(llvm::Type *pOverloadType); @@ -125,7 +132,11 @@ class OP { llvm::Module *m_pModule; llvm::Type *m_pHandleType; + llvm::Type *m_pNodeHandleType; + llvm::Type *m_pNodeRecordHandleType; llvm::Type *m_pResourcePropertiesType; + llvm::Type *m_pNodePropertiesType; + llvm::Type *m_pNodeRecordPropertiesType; llvm::Type *m_pResourceBindingType; llvm::Type *m_pDimensionsType; llvm::Type *m_pSamplePosType; @@ -134,6 +145,8 @@ class OP { llvm::Type *m_pSplitDoubleType; llvm::Type *m_pFourI32Type; llvm::Type *m_pFourI16Type; + llvm::StructType *m_pWaveMatInfoType; + llvm::PointerType *m_pWaveMatPtrType; DXIL::LowPrecisionMode m_LowPrecisionMode; diff --git a/include/dxc/DXIL/DxilSemantic.h b/include/dxc/DXIL/DxilSemantic.h index 2b0ba20747..8c89c2d499 100644 --- a/include/dxc/DXIL/DxilSemantic.h +++ b/include/dxc/DXIL/DxilSemantic.h @@ -54,7 +54,6 @@ class Semantic { static const Semantic ms_SemanticTable[kNumSemanticRecords]; friend class ShaderModel; - friend class SignatureElement; }; } // namespace hlsl diff --git a/include/dxc/DXIL/DxilShaderFlags.h b/include/dxc/DXIL/DxilShaderFlags.h index c03132b841..8ad26abdd1 100644 --- a/include/dxc/DXIL/DxilShaderFlags.h +++ b/include/dxc/DXIL/DxilShaderFlags.h @@ -143,6 +143,9 @@ namespace hlsl { void SetWriteableMSAATextures(bool flag) { m_bWriteableMSAATextures = flag; } bool GetWriteableMSAATextures() const { return m_bWriteableMSAATextures; } + void SetWaveMMA(bool flag) { m_bWaveMMA = flag; } + bool GetWaveMMA() const { return m_bWaveMMA; } + private: unsigned m_bDisableOptimizations :1; // D3D11_1_SB_GLOBAL_FLAG_SKIP_OPTIMIZATION unsigned m_bDisableMathRefactoring :1; //~D3D10_SB_GLOBAL_FLAG_REFACTORING_ALLOWED @@ -191,6 +194,7 @@ namespace hlsl { unsigned m_bAtomicInt64OnHeapResource : 1; // SHADER_FEATURE_ATOMIC_INT64_ON_DESCRIPTOR_HEAP_RESOURCE + // SM 6.7+ // Global flag indicating that any UAV may not alias any other UAV. // Set if UAVs are used, unless -res-may-alias was specified. // For modules compiled against validator version < 1.7, this flag will be @@ -200,7 +204,10 @@ namespace hlsl { unsigned m_bAdvancedTextureOps : 1; // SHADER_FEATURE_ADVANCED_TEXTURE_OPS unsigned m_bWriteableMSAATextures : 1; // SHADER_FEATURE_WRITEABLE_MSAA_TEXTURES - uint32_t m_align1 : 28; // align to 64 bit. + // SM 6.8+ + unsigned m_bWaveMMA : 1; // SHADER_FEATURE_WAVE_MMA + + uint32_t m_align1 : 27; // align to 64 bit. }; diff --git a/include/dxc/DXIL/DxilShaderModel.h b/include/dxc/DXIL/DxilShaderModel.h index a53e1e6a6a..17bf370593 100644 --- a/include/dxc/DXIL/DxilShaderModel.h +++ b/include/dxc/DXIL/DxilShaderModel.h @@ -15,9 +15,6 @@ #include "llvm/ADT/StringRef.h" -#include - - namespace hlsl { class Semantic; @@ -34,7 +31,7 @@ class ShaderModel { /* hctdb_instrhelp.get_highest_shader_model()*/ // VALRULE-TEXT:BEGIN static const unsigned kHighestMajor = 6; - static const unsigned kHighestMinor = 7; + static const unsigned kHighestMinor = 8; // VALRULE-TEXT:END static const unsigned kOfflineMinor = 0xF; @@ -72,6 +69,7 @@ class ShaderModel { bool IsSM65Plus() const { return IsSMAtLeast(6, 5); } bool IsSM66Plus() const { return IsSMAtLeast(6, 6); } bool IsSM67Plus() const { return IsSMAtLeast(6, 7); } + bool IsSM68Plus() const { return IsSMAtLeast(6, 8); } // VALRULE-TEXT:END const char *GetName() const { return m_pszName; } const char *GetKindName() const; @@ -102,7 +100,7 @@ class ShaderModel { bool m_bUAVs, bool m_bTypedUavs, unsigned m_UAVRegsLim); /* hctdb_instrhelp.get_num_shader_models()*/ // VALRULE-TEXT:BEGIN - static const unsigned kNumShaderModels = 83; + static const unsigned kNumShaderModels = 92; // VALRULE-TEXT:END static const ShaderModel ms_ShaderModels[kNumShaderModels]; diff --git a/include/dxc/DXIL/DxilTypeSystem.h b/include/dxc/DXIL/DxilTypeSystem.h index f9744c65d4..4c3b85344c 100644 --- a/include/dxc/DXIL/DxilTypeSystem.h +++ b/include/dxc/DXIL/DxilTypeSystem.h @@ -55,6 +55,9 @@ class DxilFieldAnnotation { const DxilMatrixAnnotation &GetMatrixAnnotation() const; void SetMatrixAnnotation(const DxilMatrixAnnotation &MA); + unsigned GetVectorSize() const; + void SetVectorSize(unsigned size); + // Currently, ResourceProperties is only used to capture resource type // information during CodeGen for the annotate handle generated during // AddOpcodeParamForIntrinsic. @@ -106,6 +109,7 @@ class DxilFieldAnnotation { bool m_bCBufferVarUsed; // true if this field represents a top level variable in CB structure, and it is used. std::vector m_BitFields; unsigned m_BitFieldWidth; // For bit field. 0 means not bitfield. + unsigned m_VectorSize; }; class DxilTemplateArgAnnotation { @@ -218,6 +222,7 @@ enum class DxilParamInputQual { OutVertices, OutPrimitives, InPayload, + NodeIO }; /// Use this class to represent type annotation for function parameter. @@ -229,6 +234,7 @@ class DxilParameterAnnotation : public DxilFieldAnnotation { const std::vector &GetSemanticIndexVec() const; void SetSemanticIndexVec(const std::vector &Vec); void AppendSemanticIndex(unsigned SemIdx); + bool IsParamInputQualNode(); private: DxilParamInputQual m_inputQual; std::vector m_semanticIndex; diff --git a/include/dxc/DXIL/DxilUtil.h b/include/dxc/DXIL/DxilUtil.h index 5da572d9c8..93b4824b7f 100644 --- a/include/dxc/DXIL/DxilUtil.h +++ b/include/dxc/DXIL/DxilUtil.h @@ -13,9 +13,13 @@ #include #include #include + +#include "llvm/IR/GetElementPtrTypeIterator.h" +#include "llvm/IR/IRBuilder.h" #include "llvm/ADT/StringRef.h" #include "llvm/ADT/Twine.h" #include "llvm/IR/Constants.h" + #include "dxc/DXIL/DxilConstants.h" #include "dxc/DXIL/DxilResourceProperties.h" @@ -39,6 +43,7 @@ class DebugLoc; class DIGlobalVariable; class ConstantInt; class SwitchInst; +class GEPOperator; ModulePass *createDxilLoadMetadataPass(); void initializeDxilLoadMetadataPass(llvm::PassRegistry&); @@ -141,13 +146,26 @@ namespace dxilutil { bool IsHLSLResourceType(llvm::Type *Ty); bool IsHLSLObjectType(llvm::Type *Ty); bool IsHLSLRayQueryType(llvm::Type *Ty); + bool IsHLSLWaveMatrixType(llvm::Type *Ty, DXIL::WaveMatrixKind *pKind = nullptr); bool IsHLSLResourceDescType(llvm::Type *Ty); bool IsResourceSingleComponent(llvm::Type *Ty); uint8_t GetResourceComponentCount(llvm::Type *Ty); bool IsSplat(llvm::ConstantDataVector *cdv); + bool IsHLSLNodeIOType(llvm::Type *Ty); + bool IsHLSLNodeOutputType(llvm::Type* Ty); + bool IsHLSLNodeOutputArrayType(llvm::Type* Ty); + bool IsHLSLEmptyNodeOutputType(llvm::Type* Ty); + bool IsHLSLEmptyNodeOutputArrayType(llvm::Type* Ty); + bool IsHLSLNodeInputRecordType(llvm::Type *Ty); + bool IsHLSLRWNodeInputRecordType(llvm::Type* Ty); + bool IsHLSLNodeOutputRecordType(llvm::Type *Ty); + bool IsHLSLGSNodeOutputRecordType(llvm::Type* Ty); + bool IsHLSLNodeRecordType(llvm::Type *Ty); + bool IsHLSLNodeInputOutputType(llvm::Type *Ty); llvm::Type* StripArrayTypes(llvm::Type *Ty, llvm::SmallVectorImpl *OuterToInnerLengths = nullptr); llvm::Type* WrapInArrayTypes(llvm::Type *Ty, llvm::ArrayRef OuterToInnerLengths); + llvm::Value *MirrorGEP(llvm::GEPOperator *GEP, llvm::Value *NewBasePtr); llvm::CallInst *TranslateCallRawBufferLoadToBufferLoad( llvm::CallInst *CI, llvm::Function *newFunction, hlsl::OP *op); @@ -179,6 +197,8 @@ namespace dxilutil { /// These allocas hold on to values that do not contribute to the /// shader's results. bool DeleteDeadAllocas(llvm::Function &F); -} -} + llvm::Value *GEPIdxToOffset(llvm::GetElementPtrInst *GEP, llvm::IRBuilder<> &Builder, hlsl::OP *OP, const llvm::DataLayout &DL); +} //namespace dxilutil + +} //namespace hlsl diff --git a/include/dxc/DXIL/DxilWaveMatrix.h b/include/dxc/DXIL/DxilWaveMatrix.h new file mode 100644 index 0000000000..f70c10d8dd --- /dev/null +++ b/include/dxc/DXIL/DxilWaveMatrix.h @@ -0,0 +1,53 @@ +/////////////////////////////////////////////////////////////////////////////// +// // +// DxilWaveMatrix.h // +// Copyright (C) Microsoft Corporation. All rights reserved. // +// This file is distributed under the University of Illinois Open Source // +// License. See LICENSE.TXT for details. // +// // +// WaveMatrix related types and helpers. // +// // +/////////////////////////////////////////////////////////////////////////////// + +#pragma once + +#include "DxilConstants.h" + +namespace llvm { +class Value; +class Constant; +class Type; +class StructType; +} // namespace llvm + +namespace hlsl { + +struct DxilWaveMatrixProperties { + DXIL::WaveMatrixKind kind; + DXIL::ComponentType compType; + unsigned dimM, dimN; + + DxilWaveMatrixProperties() + : kind(DXIL::WaveMatrixKind::NumKinds), + compType(DXIL::ComponentType::Invalid), dimM(0), dimN(0) {} + bool isValid() const { return kind < DXIL::WaveMatrixKind::NumKinds; } + bool operator==(const DxilWaveMatrixProperties &other) { + return kind == other.kind && compType == other.compType && + dimM == other.dimM && dimN == other.dimN; + } + bool operator!=(const DxilWaveMatrixProperties &other) { + return !(*this == other); + } +}; + +namespace wavemat_helper { + +DxilWaveMatrixProperties LoadInfoFromConstant(llvm::Constant *C); +llvm::Constant *GetInfoConstantFromWaveMatPtr(llvm::Value *waveMatPtr); +DxilWaveMatrixProperties GetInfoFromWaveMatPtr(llvm::Value *waveMatPtr); +llvm::Constant *GetAsConstant(const DxilWaveMatrixProperties &info, + llvm::StructType *infoTy); + +} // namespace wavemat_helper + +} // namespace hlsl diff --git a/include/dxc/DxilContainer/DxilContainerAssembler.h b/include/dxc/DxilContainer/DxilContainerAssembler.h index c51f97dbec..6dd812ff82 100644 --- a/include/dxc/DxilContainer/DxilContainerAssembler.h +++ b/include/dxc/DxilContainer/DxilContainerAssembler.h @@ -15,6 +15,7 @@ #include "dxc/DxilContainer/DxilContainer.h" #include "llvm/ADT/StringRef.h" +struct IDxcVersionInfo; struct IStream; class DxilPipelineStateValidation; @@ -51,6 +52,7 @@ DxilPartWriter *NewRootSignatureWriter(const RootSignatureHandle &S); DxilPartWriter *NewFeatureInfoWriter(const DxilModule &M); DxilPartWriter *NewPSVWriter(const DxilModule &M, uint32_t PSVVersion = UINT_MAX); DxilPartWriter *NewRDATWriter(const DxilModule &M); +DxilPartWriter *NewVersionWriter(IDxcVersionInfo *pVersionInfo); // Store serialized ViewID data from DxilModule to PipelineStateValidation. void StoreViewIDStateToPSV(const uint32_t *pInputData, @@ -77,6 +79,7 @@ void WriteProgramPart(const hlsl::ShaderModel *pModel, void SerializeDxilContainerForModule( hlsl::DxilModule *pModule, AbstractMemoryStream *pModuleBitcode, + IDxcVersionInfo *DXCVersionInfo, AbstractMemoryStream *pStream, llvm::StringRef DebugName, SerializeDxilFlags Flags, DxilShaderHash *pShaderHashOut = nullptr, AbstractMemoryStream *pReflectionStreamOut = nullptr, diff --git a/include/dxc/DxilContainer/DxilPipelineStateValidation.h b/include/dxc/DxilContainer/DxilPipelineStateValidation.h index 0dfbf9656b..333606246e 100644 --- a/include/dxc/DxilContainer/DxilPipelineStateValidation.h +++ b/include/dxc/DxilContainer/DxilPipelineStateValidation.h @@ -111,6 +111,7 @@ enum class PSVShaderKind : uint8_t // DXIL::ShaderKind Callable, Mesh, Amplification, + Node, Invalid, }; diff --git a/include/dxc/DxilContainer/DxilRuntimeReflection.h b/include/dxc/DxilContainer/DxilRuntimeReflection.h index 3ed49d403f..39c150ffa3 100644 --- a/include/dxc/DxilContainer/DxilRuntimeReflection.h +++ b/include/dxc/DxilContainer/DxilRuntimeReflection.h @@ -59,6 +59,26 @@ enum class RuntimeDataPartType : uint32_t { SubobjectTable = 6, Last_1_4 = SubobjectTable, + // PRERELEASE-TODO: assign values explicitly to all enums before release + SignatureElementTable, + VSInfoTable, + PSInfoTable, + HSInfoTable, + DSInfoTable, + GSInfoTable, + CSInfoTable, + MSInfoTable, + ASInfoTable, + + NodeIDTable, + NodeShaderIOAttribTable, + NodeShaderFuncAttribTable, + IONodeTable, + NodeShaderInfoTable, + + Last_1_8 = NodeShaderInfoTable, // PRERELEASE-TODO: change to last necessary 1.8 part before release. + // Insert experimental here. + LastPlus1, LastExperimental = LastPlus1 - 1, @@ -73,8 +93,10 @@ RuntimeDataPartType MaxPartTypeForValVer(unsigned Major, unsigned Minor) { ? RuntimeDataPartType::Invalid // No RDAT before 1.3 : DXIL::CompareVersions(Major, Minor, 1, 4) < 0 ? RuntimeDataPartType::Last_1_3 - : DXIL::CompareVersions(Major, Minor, 1, 7) <= 0 + : DXIL::CompareVersions(Major, Minor, 1, 8) < 0 ? RuntimeDataPartType::Last_1_4 + : DXIL::CompareVersions(Major, Minor, 1, 8) == 0 + ? RuntimeDataPartType::Last_1_8 : RuntimeDataPartType::LastExperimental; } @@ -83,6 +105,21 @@ enum class RecordTableIndex : unsigned { FunctionTable, SubobjectTable, + SignatureElementTable, + VSInfoTable, + PSInfoTable, + HSInfoTable, + DSInfoTable, + GSInfoTable, + CSInfoTable, + MSInfoTable, + ASInfoTable, + NodeIDTable, + NodeShaderIOAttribTable, + NodeShaderFuncAttribTable, + IONodeTable, + NodeShaderInfoTable, + DxilPdbInfoTable, DxilPdbInfoSourceTable, DxilPdbInfoLibraryTable, @@ -623,105 +660,5 @@ class DxilRuntimeData { }; - -////////////////////////////////// -/// structures for library runtime - -struct DxilResourceDesc { - uint32_t Class; // hlsl::DXIL::ResourceClass - uint32_t Kind; // hlsl::DXIL::ResourceKind - uint32_t ID; // id per class - uint32_t Space; - uint32_t UpperBound; - uint32_t LowerBound; - LPCWSTR Name; - uint32_t Flags; // hlsl::RDAT::DxilResourceFlag -}; - -typedef const DxilResourceDesc *const *DxilResourceDescPtrArray; - -struct DxilFunctionDesc { - LPCWSTR Name; - LPCWSTR UnmangledName; - uint32_t NumResources; - uint32_t NumFunctionDependencies; - DxilResourceDescPtrArray Resources; - const LPCWSTR *FunctionDependencies; - DXIL::ShaderKind ShaderKind; - uint32_t PayloadSizeInBytes; // 1) hit, miss, or closest shader: payload count - // 2) call shader: parameter size - uint32_t AttributeSizeInBytes; // attribute size for closest hit and any hit - uint32_t FeatureInfo1; // first 32 bits of feature flag - uint32_t FeatureInfo2; // second 32 bits of feature flag - uint32_t ShaderStageFlag; // valid shader stage flag. - uint32_t MinShaderTarget; // minimum shader target. -}; - -struct DxilSubobjectDesc { - LPCWSTR Name; - DXIL::SubobjectKind Kind; // D3D12_STATE_SUBOBJECT_TYPE - - struct StateObjectConfig_t { - uint32_t Flags; // DXIL::StateObjectFlags / D3D12_STATE_OBJECT_FLAGS - }; - struct RootSignature_t { - LPCVOID pSerializedSignature; - uint32_t SizeInBytes; - }; // GlobalRootSignature or LocalRootSignature - struct SubobjectToExportsAssociation_t { - LPCWSTR Subobject; - uint32_t NumExports; - const LPCWSTR* Exports; - }; - struct RaytracingShaderConfig_t { - uint32_t MaxPayloadSizeInBytes; - uint32_t MaxAttributeSizeInBytes; - }; - struct RaytracingPipelineConfig_t { - uint32_t MaxTraceRecursionDepth; - }; - struct HitGroup_t { - DXIL::HitGroupType Type; // D3D12_HIT_GROUP_TYPE - LPCWSTR AnyHit; - LPCWSTR ClosestHit; - LPCWSTR Intersection; - }; - - struct RaytracingPipelineConfig1_t { - uint32_t MaxTraceRecursionDepth; - uint32_t Flags; // DXIL::RaytracingPipelineFlags / D3D12_RAYTRACING_PIPELINE_FLAGS - }; - - union { - StateObjectConfig_t StateObjectConfig; - RootSignature_t RootSignature; // GlobalRootSignature or LocalRootSignature - SubobjectToExportsAssociation_t SubobjectToExportsAssociation; - RaytracingShaderConfig_t RaytracingShaderConfig; - RaytracingPipelineConfig_t RaytracingPipelineConfig; - HitGroup_t HitGroup; - RaytracingPipelineConfig1_t RaytracingPipelineConfig1; - }; -}; - -struct DxilLibraryDesc { - uint32_t NumFunctions; - DxilFunctionDesc *pFunction; - uint32_t NumResources; - DxilResourceDesc *pResource; - uint32_t NumSubobjects; - DxilSubobjectDesc *pSubobjects; -}; - -class DxilRuntimeReflection { -public: - virtual ~DxilRuntimeReflection() {} - // This call will allocate memory for GetLibraryReflection call - virtual bool InitFromRDAT(const void *pRDAT, size_t size) = 0; - // DxilRuntimeReflection owns the memory pointed to by DxilLibraryDesc - virtual const DxilLibraryDesc GetLibraryReflection() = 0; -}; - -DxilRuntimeReflection *CreateDxilRuntimeReflection(); - } // namespace RDAT } // namespace hlsl diff --git a/include/dxc/DxilContainer/DxilRuntimeReflection.inl b/include/dxc/DxilContainer/DxilRuntimeReflection.inl index 84ec2b74a9..4a52414c2f 100644 --- a/include/dxc/DxilContainer/DxilRuntimeReflection.inl +++ b/include/dxc/DxilContainer/DxilRuntimeReflection.inl @@ -288,273 +288,3 @@ bool DxilRuntimeData::Validate() { } }} // hlsl::RDAT - -using namespace hlsl; -using namespace RDAT; - -namespace std { -template <> struct hash { - size_t operator()(const ResourceKey &key) const throw() { - return (hash()(key.Class) * (size_t)16777619U) ^ - hash()(key.ID); - } -}; -} // namespace std - -namespace { - -class DxilRuntimeReflection_impl : public DxilRuntimeReflection { -private: - typedef std::unordered_map> StringMap; - typedef std::unordered_map> BytesMap; - typedef std::vector WStringList; - typedef std::vector ResourceList; - typedef std::vector ResourceRefList; - typedef std::vector FunctionList; - typedef std::vector SubobjectList; - - DxilRuntimeData m_RuntimeData; - StringMap m_StringMap; - BytesMap m_BytesMap; - std::vector m_IndexData; - ResourceList m_Resources; - FunctionList m_Functions; - SubobjectList m_Subobjects; - std::unordered_map m_ResourceMap; - std::unordered_map m_FuncToResMap; - std::unordered_map m_FuncToDependenciesMap; - std::unordered_map m_SubobjectToExportsMap; - bool m_initialized; - - const wchar_t *GetWideString(const char *ptr); - void AddString(const char *ptr); - const void *GetBytes(const void *ptr, size_t size); - void InitializeReflection(); - const DxilResourceDesc * const*GetResourcesForFunction(DxilFunctionDesc &function, - const RuntimeDataFunctionInfo_Reader &functionReader); - const wchar_t **GetDependenciesForFunction(DxilFunctionDesc &function, - const RuntimeDataFunctionInfo_Reader &functionReader); - const wchar_t **GetExportsForAssociation(DxilSubobjectDesc &subobject, - const RuntimeDataSubobjectInfo_Reader &reader); - void AddResources(); - void AddFunctions(); - void AddSubobjects(); - -public: - // TODO: Implement pipeline state validation with runtime data - // TODO: Update BlobContainer.h to recognize 'RDAT' blob - DxilRuntimeReflection_impl() - : m_RuntimeData(), m_StringMap(), m_BytesMap(), m_Resources(), m_Functions(), - m_FuncToResMap(), m_FuncToDependenciesMap(), m_SubobjectToExportsMap(), - m_initialized(false) {} - virtual ~DxilRuntimeReflection_impl() {} - // This call will allocate memory for GetLibraryReflection call - bool InitFromRDAT(const void *pRDAT, size_t size) override; - const DxilLibraryDesc GetLibraryReflection() override; -}; - -void DxilRuntimeReflection_impl::AddString(const char *ptr) { - if (m_StringMap.find(ptr) == m_StringMap.end()) { - auto state = std::mbstate_t(); - size_t size = std::mbsrtowcs(nullptr, &ptr, 0, &state); - if (size != static_cast(-1)) { - std::unique_ptr pNew(new wchar_t[size + 1]); - auto pOldPtr = ptr; - std::mbsrtowcs(pNew.get(), &ptr, size + 1, &state); - m_StringMap[pOldPtr] = std::move(pNew); - } - } -} - -const wchar_t *DxilRuntimeReflection_impl::GetWideString(const char *ptr) { - if (m_StringMap.find(ptr) == m_StringMap.end()) { - AddString(ptr); - } - return m_StringMap.at(ptr).get(); -} - -const void *DxilRuntimeReflection_impl::GetBytes(const void *ptr, size_t size) { - if (!ptr || !size) - return nullptr; - - auto it = m_BytesMap.find(ptr); - if (it != m_BytesMap.end()) - return it->second.get(); - - auto inserted = m_BytesMap.insert(std::make_pair(ptr, std::unique_ptr(new char[size]))); - void *newPtr = inserted.first->second.get(); - memcpy(newPtr, ptr, size); - return newPtr; -} - -bool DxilRuntimeReflection_impl::InitFromRDAT(const void *pRDAT, size_t size) { - assert(!m_initialized && "may only initialize once"); - m_initialized = m_RuntimeData.InitFromRDAT(pRDAT, size); - if (!m_RuntimeData.Validate()) - return false; - if (m_initialized) - InitializeReflection(); - return m_initialized; -} - -const DxilLibraryDesc DxilRuntimeReflection_impl::GetLibraryReflection() { - DxilLibraryDesc reflection = {}; - if (m_initialized) { - reflection.NumResources = m_Resources.size(); - reflection.pResource = m_Resources.data(); - reflection.NumFunctions = m_Functions.size(); - reflection.pFunction = m_Functions.data(); - reflection.NumSubobjects = m_Subobjects.size(); - reflection.pSubobjects = m_Subobjects.data(); - } - return reflection; -} - -void DxilRuntimeReflection_impl::InitializeReflection() { - auto indexTable = m_RuntimeData.GetContext().IndexTable; - m_IndexData.assign(indexTable.Data(), indexTable.Data() + indexTable.Count()); - - // First need to reserve spaces for resources because functions will need to - // reference them via pointers. - AddResources(); - AddFunctions(); - AddSubobjects(); -} - -void DxilRuntimeReflection_impl::AddResources() { - auto table = m_RuntimeData.GetResourceTable(); - m_Resources.assign(table.Count(), {}); - for (uint32_t i = 0; i < table.Count(); ++i) { - auto reader = table[i]; - DxilResourceDesc &desc = m_Resources[i]; - desc.Class = (uint32_t)reader.getClass(); - desc.Kind = (uint32_t)reader.getKind(); - desc.Space = reader.getSpace(); - desc.LowerBound = reader.getLowerBound(); - desc.UpperBound = reader.getUpperBound(); - desc.ID = reader.getID(); - desc.Flags = reader.getFlags(); - desc.Name = GetWideString(reader.getName()); - ResourceKey key(desc.Class, desc.ID); - m_ResourceMap[key] = &desc; - } -} - -const DxilResourceDesc * const*DxilRuntimeReflection_impl::GetResourcesForFunction( - DxilFunctionDesc &function, const RuntimeDataFunctionInfo_Reader &functionReader) { - auto resources = functionReader.getResources(); - if (!resources.Count()) - return nullptr; - auto it = m_FuncToResMap.insert(std::make_pair(&function, ResourceRefList())); - assert(it.second && "otherwise, collision"); - ResourceRefList &resourceList = it.first->second; - resourceList.reserve(resources.Count()); - for (uint32_t i = 0; i < resources.Count(); ++i) { - auto resourceReader = functionReader.getResources()[i]; - ResourceKey key((uint32_t)resourceReader.getClass(), - resourceReader.getID()); - auto it = m_ResourceMap.find(key); - assert(it != m_ResourceMap.end() && it->second && "Otherwise, resource was not in map, or was null"); - resourceList.emplace_back(it->second); - } - return resourceList.data(); -} - -const wchar_t **DxilRuntimeReflection_impl::GetDependenciesForFunction( - DxilFunctionDesc &function, const RuntimeDataFunctionInfo_Reader &functionReader) { - auto it = m_FuncToDependenciesMap.insert(std::make_pair(&function, WStringList())); - assert(it.second && "otherwise, collision"); - WStringList &wStringList = it.first->second; - auto dependencies = functionReader.getFunctionDependencies(); - for (uint32_t i = 0; i < dependencies.Count(); ++i) { - wStringList.emplace_back(GetWideString(dependencies[i])); - } - return wStringList.empty() ? nullptr : wStringList.data(); -} - -void DxilRuntimeReflection_impl::AddFunctions() { - auto table = m_RuntimeData.GetFunctionTable(); - m_Functions.assign(table.Count(), {}); - for (uint32_t i = 0; i < table.Count(); ++i) { - auto reader = table[i]; - auto &desc = m_Functions[i]; - desc.Name = GetWideString(reader.getName()); - desc.UnmangledName = GetWideString(reader.getUnmangledName()); - desc.NumResources = reader.getResources().Count(); - desc.Resources = GetResourcesForFunction(desc, reader); - desc.NumFunctionDependencies = reader.getFunctionDependencies().Count(); - desc.FunctionDependencies = GetDependenciesForFunction(desc, reader); - desc.ShaderKind = reader.getShaderKind(); - desc.PayloadSizeInBytes = reader.getPayloadSizeInBytes(); - desc.AttributeSizeInBytes = reader.getAttributeSizeInBytes(); - desc.FeatureInfo1 = reader.getFeatureInfo1(); - desc.FeatureInfo2 = reader.getFeatureInfo2(); - desc.ShaderStageFlag = reader.getShaderStageFlag(); - desc.MinShaderTarget = reader.getMinShaderTarget(); - } -} - -const wchar_t **DxilRuntimeReflection_impl::GetExportsForAssociation( - DxilSubobjectDesc &subobject, const RuntimeDataSubobjectInfo_Reader &reader) { - auto it = m_SubobjectToExportsMap.insert(std::make_pair(&subobject, WStringList())); - assert(it.second && "otherwise, collision"); - auto exports = reader.getSubobjectToExportsAssociation().getExports(); - WStringList &wStringList = it.first->second; - for (uint32_t i = 0; i < exports.Count(); ++i) { - wStringList.emplace_back(GetWideString(exports[i])); - } - return wStringList.empty() ? nullptr : wStringList.data(); -} - -void DxilRuntimeReflection_impl::AddSubobjects() { - auto table = m_RuntimeData.GetSubobjectTable(); - m_Subobjects.assign(table.Count(), {}); - for (uint32_t i = 0; i < table.Count(); ++i) { - auto reader = table[i]; - auto &desc = m_Subobjects[i]; - desc.Name = GetWideString(reader.getName()); - desc.Kind = reader.getKind(); - switch (reader.getKind()) { - case DXIL::SubobjectKind::StateObjectConfig: - desc.StateObjectConfig.Flags = reader.getStateObjectConfig().getFlags(); - break; - case DXIL::SubobjectKind::GlobalRootSignature: - case DXIL::SubobjectKind::LocalRootSignature: - desc.RootSignature.SizeInBytes = reader.getRootSignature().sizeData(); - desc.RootSignature.pSerializedSignature = GetBytes(reader.getRootSignature().getData(), desc.RootSignature.SizeInBytes); - break; - case DXIL::SubobjectKind::SubobjectToExportsAssociation: - desc.SubobjectToExportsAssociation.Subobject = - GetWideString(reader.getSubobjectToExportsAssociation().getSubobject()); - desc.SubobjectToExportsAssociation.NumExports = reader.getSubobjectToExportsAssociation().getExports().Count(); - desc.SubobjectToExportsAssociation.Exports = GetExportsForAssociation(desc, reader); - break; - case DXIL::SubobjectKind::RaytracingShaderConfig: - desc.RaytracingShaderConfig.MaxPayloadSizeInBytes = reader.getRaytracingShaderConfig().getMaxPayloadSizeInBytes(); - desc.RaytracingShaderConfig.MaxAttributeSizeInBytes = reader.getRaytracingShaderConfig().getMaxAttributeSizeInBytes(); - break; - case DXIL::SubobjectKind::RaytracingPipelineConfig: - desc.RaytracingPipelineConfig.MaxTraceRecursionDepth = reader.getRaytracingPipelineConfig().getMaxTraceRecursionDepth(); - break; - case DXIL::SubobjectKind::HitGroup: - desc.HitGroup.Type = reader.getHitGroup().getType(); - desc.HitGroup.Intersection = GetWideString(reader.getHitGroup().getIntersection()); - desc.HitGroup.AnyHit = GetWideString(reader.getHitGroup().getAnyHit()); - desc.HitGroup.ClosestHit = GetWideString(reader.getHitGroup().getClosestHit()); - break; - case DXIL::SubobjectKind::RaytracingPipelineConfig1: - desc.RaytracingPipelineConfig1.MaxTraceRecursionDepth = reader.getRaytracingPipelineConfig1().getMaxTraceRecursionDepth(); - desc.RaytracingPipelineConfig1.Flags = reader.getRaytracingPipelineConfig1().getFlags(); - break; - default: - // Ignore contents of unrecognized subobject type (forward-compat) - break; - } - } -} - -} // namespace anon - -DxilRuntimeReflection *hlsl::RDAT::CreateDxilRuntimeReflection() { - return new DxilRuntimeReflection_impl(); -} diff --git a/include/dxc/DxilContainer/RDAT_LibraryTypes.inl b/include/dxc/DxilContainer/RDAT_LibraryTypes.inl index 568f43768c..2382eb81cd 100644 --- a/include/dxc/DxilContainer/RDAT_LibraryTypes.inl +++ b/include/dxc/DxilContainer/RDAT_LibraryTypes.inl @@ -20,6 +20,39 @@ RDAT_ENUM_START(DxilResourceFlag, uint32_t) RDAT_ENUM_VALUE(Atomics64Use, 1 << 4) RDAT_ENUM_END() +RDAT_ENUM_START(DxilShaderFlags, uint32_t) + RDAT_ENUM_VALUE(None, 0) + RDAT_ENUM_VALUE(OutputPositionPresent, 1 << 0) + RDAT_ENUM_VALUE(DepthOutput, 1 << 1) + RDAT_ENUM_VALUE(SampleFrequency, 1 << 2) + RDAT_ENUM_VALUE(UsesViewID, 1 << 3) + RDAT_ENUM_VALUE(NodeProgramEntry, 1 << 4) +RDAT_ENUM_END() + +RDAT_ENUM_START(NodeFuncAttribKind, uint32_t) + RDAT_ENUM_VALUE(None, 0) + RDAT_ENUM_VALUE(ID, 1) + RDAT_ENUM_VALUE(NumThreads, 2) + RDAT_ENUM_VALUE(ShareInputOf, 3) + RDAT_ENUM_VALUE(DispatchGrid, 4) + RDAT_ENUM_VALUE(MaxRecursionDepth, 5) + RDAT_ENUM_VALUE(LocalRootArgumentsTableIndex, 6) + RDAT_ENUM_VALUE(MaxDispatchGrid, 7) + RDAT_ENUM_VALUE_NODEF(LastValue) + RDAT_ENUM_END() + +RDAT_ENUM_START(NodeAttribKind, uint32_t) + RDAT_ENUM_VALUE(None, 0) + RDAT_ENUM_VALUE(OutputID, 1) + RDAT_ENUM_VALUE(MaxRecords, 2) + RDAT_ENUM_VALUE(MaxRecordsSharedWith, 3) + RDAT_ENUM_VALUE(RecordSizeInBytes, 4) + RDAT_ENUM_VALUE(RecordDispatchGrid, 5) + RDAT_ENUM_VALUE(OutputArraySize, 6) + RDAT_ENUM_VALUE(AllowSparseNodes, 7) + RDAT_ENUM_VALUE_NODEF(LastValue) +RDAT_ENUM_END() + #endif // DEF_RDAT_ENUMS #ifdef DEF_DXIL_ENUMS @@ -84,9 +117,10 @@ RDAT_DXIL_ENUM_START(hlsl::DXIL::ShaderKind, uint32_t) RDAT_ENUM_VALUE_NODEF(Callable) RDAT_ENUM_VALUE_NODEF(Mesh) RDAT_ENUM_VALUE_NODEF(Amplification) + RDAT_ENUM_VALUE_NODEF(Node) RDAT_ENUM_VALUE_NODEF(Invalid) #if DEF_RDAT_ENUMS == DEF_RDAT_DUMP_IMPL - static_assert((unsigned)hlsl::DXIL::ShaderKind::Invalid == 15, "otherwise, RDAT_DXIL_ENUM definition needs updating"); + static_assert((unsigned)hlsl::DXIL::ShaderKind::Invalid == 16, "otherwise, RDAT_DXIL_ENUM definition needs updating"); #endif RDAT_ENUM_END() @@ -169,6 +203,34 @@ RDAT_DXIL_ENUM_START(hlsl::DXIL::InterpolationMode, uint32_t) #endif RDAT_ENUM_END() +RDAT_DXIL_ENUM_START(hlsl::DXIL::NodeIOKind, uint32_t) + RDAT_ENUM_VALUE_NODEF(EmptyInput) + RDAT_ENUM_VALUE_NODEF(NodeOutput) + RDAT_ENUM_VALUE_NODEF(NodeOutputArray) + RDAT_ENUM_VALUE_NODEF(EmptyOutput) + RDAT_ENUM_VALUE_NODEF(EmptyOutputArray) + RDAT_ENUM_VALUE_NODEF(DispatchNodeInputRecord) + RDAT_ENUM_VALUE_NODEF(RWDispatchNodeInputRecord) + RDAT_ENUM_VALUE_NODEF(GroupNodeInputRecords) + RDAT_ENUM_VALUE_NODEF(RWGroupNodeInputRecords) + RDAT_ENUM_VALUE_NODEF(ThreadNodeInputRecord) + RDAT_ENUM_VALUE_NODEF(RWThreadNodeInputRecord) + RDAT_ENUM_VALUE_NODEF(GroupNodeOutputRecords) + RDAT_ENUM_VALUE_NODEF(ThreadNodeOutputRecords) + RDAT_ENUM_VALUE_NODEF(Invalid) +RDAT_ENUM_END() + +RDAT_DXIL_ENUM_START(hlsl::DXIL::NodeLaunchType, uint32_t) + RDAT_ENUM_VALUE_NODEF(Invalid) + RDAT_ENUM_VALUE_NODEF(Broadcasting) + RDAT_ENUM_VALUE_NODEF(Coalescing) + RDAT_ENUM_VALUE_NODEF(Thread) + RDAT_ENUM_VALUE_NODEF(LastEntry) +#if DEF_RDAT_ENUMS == DEF_RDAT_DUMP_IMPL + static_assert((unsigned)hlsl::DXIL::NodeLaunchType::LastEntry == 4, "otherwise, RDAT_DXIL_ENUM definition needs updating"); +#endif +RDAT_ENUM_END() + #endif // DEF_DXIL_ENUMS #ifdef DEF_RDAT_TYPES @@ -236,4 +298,256 @@ RDAT_STRUCT_TABLE(RuntimeDataFunctionInfo, FunctionTable) RDAT_STRUCT_END() #undef RECORD_TYPE +#define RECORD_TYPE SignatureElement +RDAT_STRUCT_TABLE(SignatureElement, SignatureElementTable) + RDAT_STRING(SemanticName) + RDAT_INDEX_ARRAY_REF(SemanticIndices) // Rows = SemanticIndices.Count() + RDAT_ENUM(uint8_t, hlsl::DXIL::SemanticKind, SemanticKind) + RDAT_ENUM(uint8_t, hlsl::DXIL::ComponentType, ComponentType) + RDAT_ENUM(uint8_t, hlsl::DXIL::InterpolationMode, InterpolationMode) + RDAT_VALUE(uint8_t, StartRow) // Starting row of packed location if allocated, otherwise 0xFF + // TODO: use struct with bitfields or accessors for ColsAndStream and UsageAndDynIndexMasks + RDAT_VALUE(uint8_t, ColsAndStream) // 0:2 = (Cols-1) (0-3), 2:4 = StartCol (0-3), 4:6 = OutputStream (0-3) + RDAT_VALUE(uint8_t, UsageAndDynIndexMasks) // 0:4 = UsageMask, 4:8 = DynamicIndexMask +#if DEF_RDAT_TYPES == DEF_RDAT_TYPES_USE_HELPERS + uint8_t GetCols() const { return (ColsAndStream & 3) + 1; } + uint8_t GetStartCol() const { return (ColsAndStream >> 2) & 3; } + uint8_t GetOutputStream() const { return (ColsAndStream >> 4) & 3; } + uint8_t GetUsageMask() const { return UsageAndDynIndexMasks & 0xF; } + uint8_t GetDynamicIndexMask() const { return (UsageAndDynIndexMasks >> 4) & 0xF; } + void SetCols(unsigned cols) { ColsAndStream &= ~3; ColsAndStream |= (cols - 1) & 3; } + void SetStartCol(unsigned col) { ColsAndStream &= ~(3 << 2); ColsAndStream |= (col & 3) << 2; } + void SetOutputStream(unsigned stream) { ColsAndStream &= ~(3 << 4); ColsAndStream |= (stream & 3) << 4; } + void SetUsageMask(unsigned mask) { UsageAndDynIndexMasks &= ~0xF; UsageAndDynIndexMasks |= mask & 0xF; } + void SetDynamicIndexMask(unsigned mask) { UsageAndDynIndexMasks &= ~(0xF << 4); UsageAndDynIndexMasks |= (mask & 0xF) << 4; } +#endif +RDAT_STRUCT_END() +#undef RECORD_TYPE + +#define RECORD_TYPE VSInfo +RDAT_STRUCT_TABLE(VSInfo, VSInfoTable) + RDAT_RECORD_ARRAY_REF(SignatureElement, SigInputElements) + RDAT_RECORD_ARRAY_REF(SignatureElement, SigOutputElements) + RDAT_BYTES(ViewIDOutputMask) +RDAT_STRUCT_END() +#undef RECORD_TYPE + +#define RECORD_TYPE PSInfo +RDAT_STRUCT_TABLE(PSInfo, PSInfoTable) + RDAT_RECORD_ARRAY_REF(SignatureElement, SigInputElements) + RDAT_RECORD_ARRAY_REF(SignatureElement, SigOutputElements) +RDAT_STRUCT_END() +#undef RECORD_TYPE + +#define RECORD_TYPE HSInfo +RDAT_STRUCT_TABLE(HSInfo, HSInfoTable) + RDAT_RECORD_ARRAY_REF(SignatureElement, SigInputElements) + RDAT_RECORD_ARRAY_REF(SignatureElement, SigOutputElements) + RDAT_RECORD_ARRAY_REF(SignatureElement, SigPatchConstOutputElements) + RDAT_BYTES(ViewIDOutputMask) + RDAT_BYTES(ViewIDPatchConstOutputMask) + RDAT_BYTES(InputToOutputMasks) + RDAT_BYTES(InputToPatchConstOutputMasks) + RDAT_VALUE(uint8_t, InputControlPointCount) + RDAT_VALUE(uint8_t, OutputControlPointCount) + RDAT_VALUE(uint8_t, TessellatorDomain) + RDAT_VALUE(uint8_t, TessellatorOutputPrimitive) +RDAT_STRUCT_END() +#undef RECORD_TYPE + +#define RECORD_TYPE DSInfo +RDAT_STRUCT_TABLE(DSInfo, DSInfoTable) + RDAT_RECORD_ARRAY_REF(SignatureElement, SigInputElements) + RDAT_RECORD_ARRAY_REF(SignatureElement, SigOutputElements) + RDAT_RECORD_ARRAY_REF(SignatureElement, SigPatchConstInputElements) + RDAT_BYTES(ViewIDOutputMask) + RDAT_BYTES(InputToOutputMasks) + RDAT_BYTES(PatchConstInputToOutputMasks) + RDAT_VALUE(uint8_t, InputControlPointCount) + RDAT_VALUE(uint8_t, TessellatorDomain) +RDAT_STRUCT_END() +#undef RECORD_TYPE + +#define RECORD_TYPE GSInfo +RDAT_STRUCT_TABLE(GSInfo, GSInfoTable) + RDAT_RECORD_ARRAY_REF(SignatureElement, SigInputElements) + RDAT_RECORD_ARRAY_REF(SignatureElement, SigOutputElements) + RDAT_BYTES(ViewIDOutputMask) + RDAT_BYTES(InputToOutputMasks) + RDAT_VALUE(uint8_t, InputPrimitive) + RDAT_VALUE(uint8_t, OutputTopology) + RDAT_VALUE(uint8_t, MaxVertexCount) + RDAT_VALUE(uint8_t, OutputStreamMask) +RDAT_STRUCT_END() +#undef RECORD_TYPE + +#define RECORD_TYPE CSInfo +RDAT_STRUCT_TABLE(CSInfo, CSInfoTable) + RDAT_INDEX_ARRAY_REF(NumThreads) // ref to array of X, Y, Z. If < 3 elements, default value is 1 + RDAT_VALUE(uint32_t, GroupSharedBytesUsed) +RDAT_STRUCT_END() +#undef RECORD_TYPE + +#define RECORD_TYPE MSInfo +RDAT_STRUCT_TABLE(MSInfo, MSInfoTable) + RDAT_RECORD_ARRAY_REF(SignatureElement, SigOutputElements) + RDAT_RECORD_ARRAY_REF(SignatureElement, SigPrimOutputElements) + RDAT_BYTES(ViewIDOutputMask) + RDAT_BYTES(ViewIDPrimOutputMask) + RDAT_INDEX_ARRAY_REF(NumThreads) // ref to array of X, Y, Z. If < 3 elements, default value is 1 + RDAT_VALUE(uint32_t, GroupSharedBytesUsed) + RDAT_VALUE(uint32_t, GroupSharedBytesDependentOnViewID) + RDAT_VALUE(uint32_t, PayloadSizeInBytes) + RDAT_VALUE(uint16_t, MaxOutputVertices) + RDAT_VALUE(uint16_t, MaxOutputPrimitives) + RDAT_VALUE(uint8_t, MeshOutputTopology) +RDAT_STRUCT_END() +#undef RECORD_TYPE + +#define RECORD_TYPE ASInfo +RDAT_STRUCT_TABLE(ASInfo, ASInfoTable) + RDAT_INDEX_ARRAY_REF(NumThreads) // ref to array of X, Y, Z. If < 3 elements, default value is 1 + RDAT_VALUE(uint32_t, GroupSharedBytesUsed) + RDAT_VALUE(uint32_t, PayloadSizeInBytes) +RDAT_STRUCT_END() +#undef RECORD_TYPE + +#define RECORD_TYPE RecordDispatchGrid +RDAT_STRUCT(RecordDispatchGrid) + RDAT_VALUE(uint16_t, ByteOffset) + RDAT_VALUE(uint16_t, ComponentNumAndType) // 0:2 = NumComponents (0-3), 3:15 = hlsl::DXIL::ComponentType enum +#if DEF_RDAT_TYPES == DEF_RDAT_TYPES_USE_HELPERS + uint8_t GetNumComponents() const { return (ComponentNumAndType & 0x3); } + hlsl::DXIL::ComponentType GetComponentType() const { return (hlsl::DXIL::ComponentType)(ComponentNumAndType >> 2); } + void SetNumComponents(uint8_t num) { ComponentNumAndType |= (num & 0x3); } + void SetComponentType(hlsl::DXIL::ComponentType type) { ComponentNumAndType |= (((uint16_t)type) << 2); } +#endif +RDAT_STRUCT_END() +#undef RECORD_TYPE + +#define RECORD_TYPE NodeID +RDAT_STRUCT_TABLE(NodeID, NodeIDTable) + RDAT_STRING(Name) + RDAT_VALUE(uint32_t, Index) +RDAT_STRUCT_END() +#undef RECORD_TYPE + + +#define RECORD_TYPE NodeShaderFuncAttrib +RDAT_STRUCT_TABLE(NodeShaderFuncAttrib, NodeShaderFuncAttribTable) + RDAT_ENUM(uint32_t, hlsl::RDAT::NodeFuncAttribKind, AttribKind) + RDAT_UNION() + RDAT_UNION_IF(ID, getAttribKind() == hlsl::RDAT::NodeFuncAttribKind::ID) + RDAT_RECORD_REF(NodeID, ID) + RDAT_UNION_ELIF(NumThreads, getAttribKind() == hlsl::RDAT::NodeFuncAttribKind::NumThreads) + RDAT_INDEX_ARRAY_REF(NumThreads) + RDAT_UNION_ELIF(SharedInput, getAttribKind() == hlsl::RDAT::NodeFuncAttribKind::ShareInputOf) + RDAT_RECORD_REF(NodeID, ShareInputOf) + RDAT_UNION_ELIF(DispatchGrid, getAttribKind() == hlsl::RDAT::NodeFuncAttribKind::DispatchGrid) + RDAT_INDEX_ARRAY_REF(DispatchGrid) + RDAT_UNION_ELIF(MaxRecursionDepth, getAttribKind() == hlsl::RDAT::NodeFuncAttribKind::MaxRecursionDepth) + RDAT_VALUE(uint32_t, MaxRecursionDepth) + RDAT_UNION_ELIF(LocalRootArgumentsTableIndex, getAttribKind() == hlsl::RDAT::NodeFuncAttribKind::LocalRootArgumentsTableIndex) + RDAT_VALUE(uint32_t, LocalRootArgumentsTableIndex) + RDAT_UNION_ELIF(MaxDispatchGrid, getAttribKind() == hlsl::RDAT::NodeFuncAttribKind::MaxDispatchGrid) + RDAT_INDEX_ARRAY_REF(MaxDispatchGrid) + RDAT_UNION_ENDIF() + RDAT_UNION_END() +RDAT_STRUCT_END() +#undef RECORD_TYPE + + +#define RECORD_TYPE NodeShaderIOAttrib +RDAT_STRUCT_TABLE(NodeShaderIOAttrib, NodeShaderIOAttribTable) + RDAT_ENUM(uint32_t, hlsl::RDAT::NodeAttribKind, AttribKind) + RDAT_UNION() + RDAT_UNION_IF(ID, getAttribKind() == hlsl::RDAT::NodeAttribKind::OutputID) + RDAT_RECORD_REF(NodeID, OutputID) + RDAT_UNION_ELIF(MaxRecords, getAttribKind() == hlsl::RDAT::NodeAttribKind::MaxRecords) + RDAT_VALUE(uint32_t, MaxRecords) + RDAT_UNION_ELIF(MaxRecordsSharedWith, getAttribKind() == hlsl::RDAT::NodeAttribKind::MaxRecordsSharedWith) + RDAT_VALUE(uint32_t, MaxRecordsSharedWith) + RDAT_UNION_ELIF(RecordSizeInBytes, getAttribKind() == hlsl::RDAT::NodeAttribKind::RecordSizeInBytes) + RDAT_VALUE(uint32_t, RecordSizeInBytes) + RDAT_UNION_ELIF(RecordDispatchGrid, getAttribKind() == hlsl::RDAT::NodeAttribKind::RecordDispatchGrid) + RDAT_RECORD_VALUE(RecordDispatchGrid, RecordDispatchGrid) + RDAT_UNION_ELIF(OutputArraySize, getAttribKind() == hlsl::RDAT::NodeAttribKind::OutputArraySize) + RDAT_VALUE(uint32_t, OutputArraySize) + RDAT_UNION_ELIF(AllowSparseNodes, getAttribKind() == hlsl::RDAT::NodeAttribKind::AllowSparseNodes) + RDAT_VALUE(uint32_t, AllowSparseNodes) + RDAT_UNION_ENDIF() + RDAT_UNION_END() +RDAT_STRUCT_END() +#undef RECORD_TYPE + + + +#define RECORD_TYPE IONode +RDAT_STRUCT_TABLE(IONode, IONodeTable) + // Required field + RDAT_VALUE(uint32_t, IOFlagsAndKind) + // Optional fields + RDAT_RECORD_ARRAY_REF(NodeShaderIOAttrib, Attribs) +#if DEF_RDAT_TYPES == DEF_RDAT_TYPES_USE_HELPERS + uint32_t GetIOFlags() const { return IOFlagsAndKind & (uint32_t)DXIL::NodeIOFlags::NodeFlagsMask; } + hlsl::DXIL::NodeIOKind GetIOKind() const { return (hlsl::DXIL::NodeIOKind)(IOFlagsAndKind & (uint32_t)DXIL::NodeIOFlags::NodeIOKindMask); } + void SetIOFlags(uint32_t flags) { IOFlagsAndKind |= flags; } + void SetIOKind(hlsl::DXIL::NodeIOKind kind) { IOFlagsAndKind |= (uint32_t)kind; } +#endif +RDAT_STRUCT_END() +#undef RECORD_TYPE + +#define RECORD_TYPE NodeShaderInfo +RDAT_STRUCT_TABLE(NodeShaderInfo, NodeShaderInfoTable) + // Function Attributes + RDAT_ENUM(uint32_t, hlsl::DXIL::NodeLaunchType, LaunchType) + RDAT_VALUE(uint32_t, GroupSharedBytesUsed) + RDAT_RECORD_ARRAY_REF(NodeShaderFuncAttrib, Attribs) + RDAT_RECORD_ARRAY_REF(IONode, Outputs) + RDAT_RECORD_ARRAY_REF(IONode, Inputs) + +RDAT_STRUCT_END() +#undef RECORD_TYPE + +#define RECORD_TYPE RuntimeDataFunctionInfo2 +RDAT_STRUCT_TABLE_DERIVED(RuntimeDataFunctionInfo2, RuntimeDataFunctionInfo, FunctionTable) + + // 128 lanes is maximum that could be supported by HLSL + RDAT_VALUE(uint8_t, MinimumExpectedWaveLaneCount) // 0 = none specified + RDAT_VALUE(uint8_t, MaximumExpectedWaveLaneCount) // 0 = none specified + RDAT_FLAGS(uint16_t, hlsl::RDAT::DxilShaderFlags, ShaderFlags) + + RDAT_UNION() + RDAT_UNION_IF(VS, (getShaderKind() == hlsl::DXIL::ShaderKind::Vertex)) + RDAT_RECORD_REF(VSInfo, VS) + RDAT_UNION_ELIF(PS, (getShaderKind() == hlsl::DXIL::ShaderKind::Pixel)) + RDAT_RECORD_REF(PSInfo, PS) + RDAT_UNION_ELIF(HS, (getShaderKind() == hlsl::DXIL::ShaderKind::Hull)) + RDAT_RECORD_REF(HSInfo, HS) + RDAT_UNION_ELIF(DS, (getShaderKind() == hlsl::DXIL::ShaderKind::Domain)) + RDAT_RECORD_REF(DSInfo, DS) + RDAT_UNION_ELIF(GS, (getShaderKind() == hlsl::DXIL::ShaderKind::Geometry)) + RDAT_RECORD_REF(GSInfo, GS) + RDAT_UNION_ELIF(CS, (getShaderKind() == hlsl::DXIL::ShaderKind::Compute)) + RDAT_RECORD_REF(CSInfo, CS) + RDAT_UNION_ELIF(MS, (getShaderKind() == hlsl::DXIL::ShaderKind::Mesh)) + RDAT_RECORD_REF(MSInfo, MS) + RDAT_UNION_ELIF(AS, (getShaderKind() == hlsl::DXIL::ShaderKind::Amplification)) + RDAT_RECORD_REF(ASInfo, AS) + RDAT_UNION_ELIF(RawShaderRef, (getShaderKind() == hlsl::DXIL::ShaderKind::Invalid)) + RDAT_VALUE(uint32_t, RawShaderRef) + RDAT_UNION_ENDIF() + RDAT_UNION_END() + +RDAT_STRUCT_END() +#undef RECORD_TYPE + +#define RECORD_TYPE RuntimeDataFunctionInfo3 +RDAT_STRUCT_TABLE_DERIVED(RuntimeDataFunctionInfo3, RuntimeDataFunctionInfo2, FunctionTable) + + RDAT_RECORD_REF(NodeShaderInfo, Node) + +RDAT_STRUCT_END() +#undef RECORD_TYPE + #endif // DEF_RDAT_TYPES diff --git a/include/dxc/DxilContainer/RDAT_Macros.inl b/include/dxc/DxilContainer/RDAT_Macros.inl index 88a3121284..566c523d2c 100644 --- a/include/dxc/DxilContainer/RDAT_Macros.inl +++ b/include/dxc/DxilContainer/RDAT_Macros.inl @@ -81,6 +81,12 @@ #define RDAT_FLAGS(sTy, eTy, name) sTy name; #define RDAT_BYTES(name) hlsl::RDAT::BytesRef name; #define RDAT_ARRAY_VALUE(type, count, type_name, name) type_name name; + #define RDAT_STRUCT_TABLE_DERIVED(type, base, table) \ + template<> constexpr const char *RecordTraits::TypeName();\ + template<> constexpr RecordTableIndex RecordTraits::TableIndex();\ + template<> constexpr RuntimeDataPartType RecordTraits::PartType();\ + template<> constexpr size_t RecordTraits::DerivedRecordSize();\ + RDAT_STRUCT_DERIVED(type, base) #elif DEF_RDAT_TYPES == DEF_RDAT_READER_DECL @@ -175,15 +181,20 @@ type##_Reader reader(BaseRecordReader( \ &ctx, (void *)pRecord, (uint32_t)RecordTraits::RecordSize())); #define RDAT_STRUCT_DERIVED(type, base) \ + template <> \ const char *RecordRefDumper::TypeNameDerived( \ const hlsl::RDAT::RDATContext &ctx) const { \ return TypeName(ctx); \ } \ + template <> \ void RecordRefDumper::DumpDerived( \ const hlsl::RDAT::RDATContext &ctx, DumpContext &d) const { \ Dump(ctx, d); \ } \ template <> \ + void RecordDumper::Dump( \ + const hlsl::RDAT::RDATContext &ctx, DumpContext &d) const; \ + template <> \ void DumpWithBase(const hlsl::RDAT::RDATContext &ctx, \ DumpContext &d, \ const hlsl::RDAT::type *pRecord) { \ diff --git a/include/dxc/HLSL/HLModule.h b/include/dxc/HLSL/HLModule.h index ece773bbae..c939defbd0 100644 --- a/include/dxc/HLSL/HLModule.h +++ b/include/dxc/HLSL/HLModule.h @@ -150,6 +150,7 @@ class HLModule { bool IsGraphicsShader(llvm::Function *F); // vs,hs,ds,gs,ps bool IsPatchConstantShader(llvm::Function *F); bool IsComputeShader(llvm::Function *F); + bool IsNodeShader(llvm::Function *F); // Is an entry function that uses input/output signature conventions? // Includes: vs/hs/ds/gs/ps/cs as well as the patch constant function. diff --git a/include/dxc/HLSL/HLOperationLower.h b/include/dxc/HLSL/HLOperationLower.h index d32e86a51e..9a1cc20030 100644 --- a/include/dxc/HLSL/HLOperationLower.h +++ b/include/dxc/HLSL/HLOperationLower.h @@ -25,4 +25,7 @@ class HLSLExtensionsCodegenHelper; void TranslateBuiltinOperations( HLModule &HLM, HLSLExtensionsCodegenHelper *extCodegenHelper, std::unordered_set &UpdateCounterSet); -} \ No newline at end of file + +void LowerRecordAccessToGetNodeRecordPtr(HLModule &HLM); +} + diff --git a/include/dxc/HLSL/HLOperations.h b/include/dxc/HLSL/HLOperations.h index 69435a92d8..4bd1f41fc1 100644 --- a/include/dxc/HLSL/HLOperations.h +++ b/include/dxc/HLSL/HLOperations.h @@ -41,7 +41,17 @@ enum class HLOpcodeGroup { HLMatLoadStore, HLSelect, HLCreateHandle, + // FIXME: Change the way these groups are being proliferated/used for each + // new generated op. + // Suggestion: Add an opcode and reuse CreateHandle/AnnotateHandle groups, and + // add an IndexHandle group. + HLCreateNodeOutputHandle, + HLIndexNodeHandle, + HLCreateNodeInputRecordHandle, HLAnnotateHandle, + HLWaveMatrix_Annotate, + HLAnnotateNodeHandle, + HLAnnotateNodeRecordHandle, NumOfHLOps }; @@ -109,6 +119,10 @@ enum class HLCastOpcode { ColMatrixToRowMatrix, RowMatrixToColMatrix, HandleToResCast, + HandleToNodeOutputCast, + NodeOutputToHandleCast, + HandleToNodeRecordCast, + NodeRecordToHandleCast, }; enum class HLMatLoadStoreOpcode { @@ -362,11 +376,14 @@ const unsigned kWaveAllEqualValueOpIdx = 1; const unsigned kCreateHandleResourceOpIdx = 1; const unsigned kCreateHandleIndexOpIdx = 2; // Only for array of cbuffer. -// AnnotateHandle. -const unsigned kAnnotateHandleHandleOpIdx = 1; +// Annotate(Node)(Record)Handle. const unsigned kAnnotateHandleResourcePropertiesOpIdx = 2; const unsigned kAnnotateHandleResourceTypeOpIdx = 3; +// AnnotateWaveMatrix. +const unsigned kAnnotateWaveMatrixPtrOpIdx = 1; +const unsigned kAnnotateWaveMatrixPropertiesOpIdx = 2; + // TraceRay. const unsigned kTraceRayRayDescOpIdx = 7; const unsigned kTraceRayPayLoadOpIdx = 8; @@ -386,6 +403,38 @@ const unsigned kDispatchMeshOpThreadY = 2; const unsigned kDispatchMeshOpThreadZ = 3; const unsigned kDispatchMeshOpPayload = 4; +// WaveMatrix +const unsigned kWaveMatThisOpIdx = 1; +const unsigned kWaveMatFillScalarOpIdx = 2; +const unsigned kWaveMatScalarOpOpIdx = 2; +const unsigned kWaveMatOther1OpIdx = 2; +const unsigned kWaveMatOther2OpIdx = 3; +const unsigned kWaveMatLoadStoreBufOpIdx = 2; +const unsigned kWaveMatLoadStoreStartOpIdx = 3; +const unsigned kWaveMatLoadStoreStrideOpIdx = 4; +// Note: No ColMajor arg for fragments, so align idx is one less. +const unsigned kWaveMatLoadStoreColMajorOpIdx = 5; +const unsigned kWaveMatFragLoadStoreAlignmentOpIdx = 5; +const unsigned kWaveMatLoadStoreAlignmentOpIdx = 6; + +// Work Graph + +const unsigned kIncrementOutputCountCountIdx = 2; + +const unsigned kMemoryTypeFlagsOpIdx = 1; +const unsigned kAccessFlagsOpIdx = 2; +const unsigned kSyncFlagsOpIdx = 3; + + +// Node Handles +const unsigned kAllocateRecordNumRecordsIdx = 2; +const unsigned kNodeOutputMetadataIDIdx = 1; +const unsigned kIndexNodeHandleArrayIDIdx = 2; +const unsigned kNodeInputRecordMetadataIDIdx = 1; +const unsigned kNodeHandleToResCastOpIdx = 1; +const unsigned kAnnotateNodeHandleNodePropIdx = 2; +const unsigned kAnnotateNodeRecordHandleNodeRecordPropIdx = 2; + } // namespace HLOperandIndex llvm::Function *GetOrCreateHLFunction(llvm::Module &M, diff --git a/include/dxc/HlslIntrinsicOp.h b/include/dxc/HlslIntrinsicOp.h index 589066d612..aa20eb5e94 100644 --- a/include/dxc/HlslIntrinsicOp.h +++ b/include/dxc/HlslIntrinsicOp.h @@ -9,6 +9,7 @@ enum class IntrinsicOp { IOP_AcceptHitAndEndSearch, IOP_AllMemoryBarrier, IOP_AllMemoryBarrierWithGroupSync, IOP_AllocateRayQuery, + IOP_Barrier, IOP_CallShader, IOP_CheckAccessFullyMapped, IOP_CreateResourceFromHeap, @@ -21,14 +22,17 @@ enum class IntrinsicOp { IOP_AcceptHitAndEndSearch, IOP_EvaluateAttributeAtSample, IOP_EvaluateAttributeCentroid, IOP_EvaluateAttributeSnapped, + IOP_ExtractRecordStructFromArray, IOP_GeometryIndex, IOP_GetAttributeAtVertex, + IOP_GetRemainingRecursionLevels, IOP_GetRenderTargetSampleCount, IOP_GetRenderTargetSamplePosition, IOP_GroupMemoryBarrier, IOP_GroupMemoryBarrierWithGroupSync, IOP_HitKind, IOP_IgnoreHit, + IOP_ImplicitRecordToStructCast, IOP_InstanceID, IOP_InstanceIndex, IOP_InterlockedAdd, @@ -338,6 +342,24 @@ enum class IntrinsicOp { IOP_AcceptHitAndEndSearch, MOP_TraceRayInline, MOP_WorldRayDirection, MOP_WorldRayOrigin, + MOP_Fill, + MOP_MatrixDepth, + MOP_ScalarAdd, + MOP_ScalarDivide, + MOP_ScalarMultiply, + MOP_ScalarSubtract, + MOP_SumAccumulate, + MOP_Add, + MOP_Multiply, + MOP_MultiplyAccumulate, + MOP_Count, + MOP_FinishedCrossGroupSharing, + MOP_GetGroupNodeOutputRecords, + MOP_GetThreadNodeOutputRecords, + MOP_IsValid, + MOP_GroupIncrementOutputCount, + MOP_ThreadIncrementOutputCount, + MOP_OutputComplete, #ifdef ENABLE_SPIRV_CODEGEN MOP_SubpassLoad, #endif // ENABLE_SPIRV_CODEGEN diff --git a/include/dxc/Support/ErrorCodes.h b/include/dxc/Support/ErrorCodes.h index 1ecec3cb6e..41f0f20f28 100644 --- a/include/dxc/Support/ErrorCodes.h +++ b/include/dxc/Support/ErrorCodes.h @@ -116,3 +116,6 @@ // 0X80AA001D - LLVM Cast Failure #define DXC_E_LLVM_CAST_ERROR DXC_MAKE_HRESULT(DXC_SEVERITY_ERROR,FACILITY_DXC,(0x001D)) + +// 0X80AA001E - External validator (DXIL.dll) required, and missing. +#define DXC_E_VALIDATOR_MISSING DXC_MAKE_HRESULT(DXC_SEVERITY_ERROR,FACILITY_DXC,(0x001E)) diff --git a/include/dxc/Support/HLSLOptions.h b/include/dxc/Support/HLSLOptions.h index c5e52e5680..bb40365408 100644 --- a/include/dxc/Support/HLSLOptions.h +++ b/include/dxc/Support/HLSLOptions.h @@ -102,6 +102,12 @@ struct RewriterOpts { bool DeclGlobalCB = false; // OPT_rw_decl_global_cb }; +enum class ValidatorSelection : int { + Auto, // Try DXIL.dll; fallback to internal validator + Internal, // Force internal validator (even if DXIL.dll is present) + External // Use DXIL.dll, failing compilation if not available +}; + /// Use this class to capture all options. class DxcOpts { public: @@ -203,6 +209,7 @@ class DxcOpts { bool ExportShadersOnly = false; // OPT_export_shaders_only bool ResMayAlias = false; // OPT_res_may_alias unsigned long ValVerMajor = UINT_MAX, ValVerMinor = UINT_MAX; // OPT_validator_version + ValidatorSelection SelectValidator = ValidatorSelection::Auto; // OPT_select_validator unsigned ScanLimit = 0; // OPT_memdep_block_scan_limit bool ForceZeroStoreLifetimes = false; // OPT_force_zero_store_lifetimes bool EnableLifetimeMarkers = false; // OPT_enable_lifetime_markers diff --git a/include/dxc/Support/HLSLOptions.td b/include/dxc/Support/HLSLOptions.td index cc2ed7fcde..45a14ed25f 100644 --- a/include/dxc/Support/HLSLOptions.td +++ b/include/dxc/Support/HLSLOptions.td @@ -307,6 +307,8 @@ def encoding : Separate<["-", "/"], "encoding">, Group, Flags<[C HelpText<"Set default encoding for source inputs and text outputs (utf8|utf16(win)|utf32(*nix)|wide) default=utf8">; def validator_version : Separate<["-", "/"], "validator-version">, Group, Flags<[CoreOption, HelpHidden]>, HelpText<"Override validator version for module. Format: ; Default: DXIL.dll version or current internal version.">; +def select_validator : Separate<["-", "/"], "select-validator">, Group, Flags<[CoreOption, HelpHidden]>, + HelpText<"Select validator: auto: (default) use DXIL.dll if found, otherwise use internal; internal: internal non-signing validator; external: use DXIL.dll if found, otherwise fail compilation.">; def print_after_all : Flag<["-", "/"], "print-after-all">, Group, Flags<[CoreOption, HelpHidden]>, HelpText<"Print LLVM IR after each pass.">; def print_after : Separate<["-", "/"], "print-after">, Group, Flags<[CoreOption, HelpHidden]>, @@ -402,7 +404,7 @@ def fspv_preserve_bindings : Flag<["-"], "fspv-preserve-bindings">, Group, Flags<[CoreOption]>, Group, MetaVarName<"">, /* hctdb_instrhelp.get_target_profiles()*/ // VALRULE-TEXT:BEGIN - HelpText<"Set target profile. \n\t: ps_6_0, ps_6_1, ps_6_2, ps_6_3, ps_6_4, ps_6_5, ps_6_6, ps_6_7, \n\t\t vs_6_0, vs_6_1, vs_6_2, vs_6_3, vs_6_4, vs_6_5, vs_6_6, vs_6_7, \n\t\t gs_6_0, gs_6_1, gs_6_2, gs_6_3, gs_6_4, gs_6_5, gs_6_6, gs_6_7, \n\t\t hs_6_0, hs_6_1, hs_6_2, hs_6_3, hs_6_4, hs_6_5, hs_6_6, hs_6_7, \n\t\t ds_6_0, ds_6_1, ds_6_2, ds_6_3, ds_6_4, ds_6_5, ds_6_6, ds_6_7, \n\t\t cs_6_0, cs_6_1, cs_6_2, cs_6_3, cs_6_4, cs_6_5, cs_6_6, cs_6_7, \n\t\t lib_6_1, lib_6_2, lib_6_3, lib_6_4, lib_6_5, lib_6_6, lib_6_7, \n\t\t ms_6_5, ms_6_6, ms_6_7, \n\t\t as_6_5, as_6_6, as_6_7, \n\t\t ">; + HelpText<"Set target profile. \n\t: ps_6_0, ps_6_1, ps_6_2, ps_6_3, ps_6_4, ps_6_5, ps_6_6, ps_6_7, ps_6_8, \n\t\t vs_6_0, vs_6_1, vs_6_2, vs_6_3, vs_6_4, vs_6_5, vs_6_6, vs_6_7, vs_6_8, \n\t\t gs_6_0, gs_6_1, gs_6_2, gs_6_3, gs_6_4, gs_6_5, gs_6_6, gs_6_7, gs_6_8, \n\t\t hs_6_0, hs_6_1, hs_6_2, hs_6_3, hs_6_4, hs_6_5, hs_6_6, hs_6_7, hs_6_8, \n\t\t ds_6_0, ds_6_1, ds_6_2, ds_6_3, ds_6_4, ds_6_5, ds_6_6, ds_6_7, ds_6_8, \n\t\t cs_6_0, cs_6_1, cs_6_2, cs_6_3, cs_6_4, cs_6_5, cs_6_6, cs_6_7, cs_6_8, \n\t\t lib_6_1, lib_6_2, lib_6_3, lib_6_4, lib_6_5, lib_6_6, lib_6_7, lib_6_8, \n\t\t ms_6_5, ms_6_6, ms_6_7, ms_6_8, \n\t\t as_6_5, as_6_6, as_6_7, as_6_8, \n\t\t ">; // VALRULE-TEXT:END def entrypoint : JoinedOrSeparate<["-", "/"], "E">, Flags<[CoreOption, RewriteOption]>, Group, HelpText<"Entry point name">; diff --git a/include/dxc/dxcapi.internal.h b/include/dxc/dxcapi.internal.h index 85f71057ab..48b1fea08b 100644 --- a/include/dxc/dxcapi.internal.h +++ b/include/dxc/dxcapi.internal.h @@ -28,6 +28,7 @@ typedef struct ID3D10Blob ID3D10Blob; #define AR_QUAL_CONST 0x0000000000000200ULL #define AR_QUAL_ROWMAJOR 0x0000000000000400ULL #define AR_QUAL_COLMAJOR 0x0000000000000800ULL +#define AR_QUAL_GROUPSHARED 0x0000000000001000ULL #define AR_QUAL_IN_OUT (AR_QUAL_IN | AR_QUAL_OUT) @@ -43,8 +44,9 @@ enum LEGAL_INTRINSIC_TEMPLATES { LITEMPLATE_MATRIX = 3, // Matrix types (eg. float3x3). LITEMPLATE_ANY = 4, // Any one of scalar, vector or matrix types (but not object). LITEMPLATE_OBJECT = 5, // Object types. + LITEMPLATE_ARRAY = 6, // Scalar array. - LITEMPLATE_COUNT = 6 + LITEMPLATE_COUNT = 7 }; // INTRIN_COMPTYPE_FROM_TYPE_ELT0 is for object method intrinsics to indicate @@ -52,6 +54,11 @@ enum LEGAL_INTRINSIC_TEMPLATES { // object's template type; see for example Texture2D.Gather static const BYTE INTRIN_COMPTYPE_FROM_TYPE_ELT0 = 0xff; +// INTRIN_COMPTYPE_FROM_NODEOUTPUT is for intrinsics to indicate that the +// component type of the type is taken from the component type of the specified +// argument type. See for example the intrinsics Get*NodeOutputRecords() +static const BYTE INTRIN_COMPTYPE_FROM_NODEOUTPUT = 0xfe; + enum LEGAL_INTRINSIC_COMPTYPES { LICOMPTYPE_VOID = 0, // void, used for function returns LICOMPTYPE_BOOL = 1, // bool @@ -99,7 +106,22 @@ enum LEGAL_INTRINSIC_COMPTYPES { LICOMPTYPE_UINT8_4PACKED = 41, LICOMPTYPE_ANY_INT16_OR_32 = 42, LICOMPTYPE_SINT16_OR_32_ONLY = 43, - LICOMPTYPE_COUNT = 44 + + LICOMPTYPE_BYTEADDRESSBUFFER = 44, + LICOMPTYPE_RWBYTEADDRESSBUFFER = 45, + + LICOMPTYPE_WAVE_MATRIX_LEFT = 46, + LICOMPTYPE_WAVE_MATRIX_RIGHT = 47, + LICOMPTYPE_WAVE_MATRIX_LEFT_COL_ACC = 48, + LICOMPTYPE_WAVE_MATRIX_RIGHT_ROW_ACC = 49, + LICOMPTYPE_WAVE_MATRIX_ACCUMULATOR = 50, + + LICOMPTYPE_NODE_RECORD_OR_UAV = 51, + LICOMPTYPE_ANY_NODE_OUTPUT_RECORD = 52, + LICOMPTYPE_GROUP_NODE_OUTPUT_RECORDS = 53, + LICOMPTYPE_THREAD_NODE_OUTPUT_RECORDS = 54, + + LICOMPTYPE_COUNT = 55 }; static const BYTE IA_SPECIAL_BASE = 0xf0; diff --git a/include/llvm/InitializePasses.h b/include/llvm/InitializePasses.h index 6a0565efa3..e1aa9d40b4 100644 --- a/include/llvm/InitializePasses.h +++ b/include/llvm/InitializePasses.h @@ -258,6 +258,7 @@ void initializeLowerStaticGlobalIntoAllocaPass(PassRegistry&); void initializeDynamicIndexingVectorToArrayPass(PassRegistry&); void initializeMultiDimArrayToOneDimArrayPass(PassRegistry&); void initializeResourceToHandlePass(PassRegistry&); +void initializeLowerWaveMatTypePass(PassRegistry&); void initializeSROA_SSAUp_HLSLPass(PassRegistry&); void initializeHoistConstantArrayPass(PassRegistry&); void initializeDxilLoopUnrollPass(PassRegistry&); diff --git a/include/llvm/Transforms/Scalar.h b/include/llvm/Transforms/Scalar.h index 896a967d4c..78ab6556b0 100644 --- a/include/llvm/Transforms/Scalar.h +++ b/include/llvm/Transforms/Scalar.h @@ -178,6 +178,11 @@ void initializeMultiDimArrayToOneDimArrayPass(PassRegistry&); // ModulePass *createResourceToHandlePass(); void initializeResourceToHandlePass(PassRegistry&); +//===----------------------------------------------------------------------===// +// Flatten resource into handle. +// +ModulePass *createLowerWaveMatTypePass(); +void initializeLowerWaveMatTypePass(PassRegistry&); //===----------------------------------------------------------------------===// // Hoist a local array initialized with constant values to a global array with diff --git a/lib/DXIL/CMakeLists.txt b/lib/DXIL/CMakeLists.txt index c92c532efd..97d1365ce1 100644 --- a/lib/DXIL/CMakeLists.txt +++ b/lib/DXIL/CMakeLists.txt @@ -14,6 +14,7 @@ add_llvm_library(LLVMDXIL DxilMetadataHelper.cpp DxilModule.cpp DxilModuleHelper.cpp + DxilNodeProps.cpp DxilOperations.cpp DxilResource.cpp DxilResourceBase.cpp @@ -30,6 +31,7 @@ add_llvm_library(LLVMDXIL DxilUtil.cpp DxilUtilDbgInfoAndMisc.cpp DxilPDB.cpp + DxilWaveMatrix.cpp ADDITIONAL_HEADER_DIRS ${LLVM_MAIN_INCLUDE_DIR}/llvm/IR diff --git a/lib/DXIL/DxilMetadataHelper.cpp b/lib/DXIL/DxilMetadataHelper.cpp index c06beb3a26..5eaec62bca 100644 --- a/lib/DXIL/DxilMetadataHelper.cpp +++ b/lib/DXIL/DxilMetadataHelper.cpp @@ -1235,6 +1235,13 @@ Metadata *DxilMDHelper::EmitDxilFieldAnnotation(const DxilFieldAnnotation &FA) { MDVals.emplace_back(Uint32ToConstMD(FA.GetBitFieldWidth())); } } + + if (FA.GetVectorSize() && + DXIL::CompareVersions(m_MinValMajor, m_MinValMinor, 1, 8) >= 0) { + MDVals.emplace_back(Uint32ToConstMD(kDxilFieldAnnotationVectorSizeTag)); + MDVals.emplace_back(Uint32ToConstMD(FA.GetVectorSize())); + } + return MDNode::get(m_Ctx, MDVals); } @@ -1304,6 +1311,9 @@ void DxilMDHelper::LoadDxilFieldAnnotation(const MDOperand &MDO, DxilFieldAnnota } FA.SetBitFieldWidth(ConstMDToUint32(MDO)); break; + case kDxilFieldAnnotationVectorSizeTag: + FA.SetVectorSize(ConstMDToUint32(MDO)); + break; default: DXASSERT(false, "Unknown extended shader properties tag"); m_bExtraMetadata = true; @@ -1331,16 +1341,19 @@ const Function *DxilMDHelper::LoadDxilFunctionProps(const MDTuple *pProps, DXIL::ShaderKind shaderKind = static_cast(ConstMDToUint32(pProps->getOperand(idx++))); + auto DeserializeNumThreads = [&]() { + props->numThreads[0] = ConstMDToUint32(pProps->getOperand(idx++)); + props->numThreads[1] = ConstMDToUint32(pProps->getOperand(idx++)); + props->numThreads[2] = ConstMDToUint32(pProps->getOperand(idx++)); + }; + bool bRayAttributes = false; props->shaderKind = shaderKind; switch (shaderKind) { case DXIL::ShaderKind::Compute: - props->ShaderProps.CS.numThreads[0] = - ConstMDToUint32(pProps->getOperand(idx++)); - props->ShaderProps.CS.numThreads[1] = - ConstMDToUint32(pProps->getOperand(idx++)); - props->ShaderProps.CS.numThreads[2] = - ConstMDToUint32(pProps->getOperand(idx++)); + DeserializeNumThreads(); + if (props->IsNode()) + DeserializeNodeProps(pProps, idx, props); break; case DXIL::ShaderKind::Geometry: props->ShaderProps.GS.inputPrimitive = @@ -1396,12 +1409,7 @@ const Function *DxilMDHelper::LoadDxilFunctionProps(const MDTuple *pProps, ConstMDToUint32(pProps->getOperand(idx++)); break; case DXIL::ShaderKind::Mesh: - props->ShaderProps.MS.numThreads[0] = - ConstMDToUint32(pProps->getOperand(idx++)); - props->ShaderProps.MS.numThreads[1] = - ConstMDToUint32(pProps->getOperand(idx++)); - props->ShaderProps.MS.numThreads[2] = - ConstMDToUint32(pProps->getOperand(idx++)); + DeserializeNumThreads(); props->ShaderProps.MS.maxVertexCount = ConstMDToUint32(pProps->getOperand(idx++)); props->ShaderProps.MS.maxPrimitiveCount = @@ -1412,15 +1420,15 @@ const Function *DxilMDHelper::LoadDxilFunctionProps(const MDTuple *pProps, ConstMDToUint32(pProps->getOperand(idx++)); break; case DXIL::ShaderKind::Amplification: - props->ShaderProps.AS.numThreads[0] = - ConstMDToUint32(pProps->getOperand(idx++)); - props->ShaderProps.AS.numThreads[1] = - ConstMDToUint32(pProps->getOperand(idx++)); - props->ShaderProps.AS.numThreads[2] = - ConstMDToUint32(pProps->getOperand(idx++)); + DeserializeNumThreads(); props->ShaderProps.AS.payloadSizeInBytes = ConstMDToUint32(pProps->getOperand(idx++)); break; + case DXIL::ShaderKind::Node: + DeserializeNumThreads(); + // Node specific attributes + DeserializeNodeProps(pProps, idx, props); + break; default: break; } @@ -1453,15 +1461,17 @@ MDTuple *DxilMDHelper::EmitDxilEntryProperties(uint64_t rawShaderFlag, Uint32ToConstMD(static_cast(props.shaderKind))); } + if (props.IsNode()) + EmitDxilNodeState(MDVals, props); + switch (props.shaderKind) { - // Compute shader. + // Compute shader. case DXIL::ShaderKind::Compute: { - auto &CS = props.ShaderProps.CS; MDVals.emplace_back(Uint32ToConstMD(DxilMDHelper::kDxilNumThreadsTag)); vector NumThreadVals; - NumThreadVals.emplace_back(Uint32ToConstMD(CS.numThreads[0])); - NumThreadVals.emplace_back(Uint32ToConstMD(CS.numThreads[1])); - NumThreadVals.emplace_back(Uint32ToConstMD(CS.numThreads[2])); + NumThreadVals.emplace_back(Uint32ToConstMD(props.numThreads[0])); + NumThreadVals.emplace_back(Uint32ToConstMD(props.numThreads[1])); + NumThreadVals.emplace_back(Uint32ToConstMD(props.numThreads[2])); MDVals.emplace_back(MDNode::get(m_Ctx, NumThreadVals)); if (props.waveSize != 0) { @@ -1471,78 +1481,89 @@ MDTuple *DxilMDHelper::EmitDxilEntryProperties(uint64_t rawShaderFlag, MDVals.emplace_back(MDNode::get(m_Ctx, WaveSizeVal)); } } break; - // Geometry shader. + // Geometry shader. case DXIL::ShaderKind::Geometry: { MDVals.emplace_back(Uint32ToConstMD(DxilMDHelper::kDxilGSStateTag)); DXIL::PrimitiveTopology topo = DXIL::PrimitiveTopology::Undefined; unsigned activeStreamMask = 0; for (size_t i = 0; - i < _countof(props.ShaderProps.GS.streamPrimitiveTopologies); ++i) { + i < _countof(props.ShaderProps.GS.streamPrimitiveTopologies); ++i) { if (props.ShaderProps.GS.streamPrimitiveTopologies[i] != - DXIL::PrimitiveTopology::Undefined) { + DXIL::PrimitiveTopology::Undefined) { activeStreamMask |= 1 << i; DXASSERT_NOMSG(topo == DXIL::PrimitiveTopology::Undefined || - topo == - props.ShaderProps.GS.streamPrimitiveTopologies[i]); + topo == + props.ShaderProps.GS.streamPrimitiveTopologies[i]); topo = props.ShaderProps.GS.streamPrimitiveTopologies[i]; } } MDTuple *pMDTuple = - EmitDxilGSState(props.ShaderProps.GS.inputPrimitive, - props.ShaderProps.GS.maxVertexCount, activeStreamMask, - topo, props.ShaderProps.GS.instanceCount); + EmitDxilGSState(props.ShaderProps.GS.inputPrimitive, + props.ShaderProps.GS.maxVertexCount, activeStreamMask, + topo, props.ShaderProps.GS.instanceCount); MDVals.emplace_back(pMDTuple); } break; - // Domain shader. + // Domain shader. case DXIL::ShaderKind::Domain: { auto &DS = props.ShaderProps.DS; MDVals.emplace_back(Uint32ToConstMD(DxilMDHelper::kDxilDSStateTag)); MDTuple *pMDTuple = EmitDxilDSState(DS.domain, DS.inputControlPoints); MDVals.emplace_back(pMDTuple); } break; - // Hull shader. + // Hull shader. case DXIL::ShaderKind::Hull: { auto &HS = props.ShaderProps.HS; MDVals.emplace_back(Uint32ToConstMD(DxilMDHelper::kDxilHSStateTag)); MDTuple *pMDTuple = EmitDxilHSState( - HS.patchConstantFunc, HS.inputControlPoints, HS.outputControlPoints, - HS.domain, HS.partition, HS.outputPrimitive, HS.maxTessFactor); + HS.patchConstantFunc, HS.inputControlPoints, HS.outputControlPoints, + HS.domain, HS.partition, HS.outputPrimitive, HS.maxTessFactor); MDVals.emplace_back(pMDTuple); } break; - // Raytracing. + // Raytracing. case DXIL::ShaderKind::AnyHit: case DXIL::ShaderKind::ClosestHit: { MDVals.emplace_back(Uint32ToConstMD(kDxilRayPayloadSizeTag)); MDVals.emplace_back( - Uint32ToConstMD(props.ShaderProps.Ray.payloadSizeInBytes)); + Uint32ToConstMD(props.ShaderProps.Ray.payloadSizeInBytes)); MDVals.emplace_back(Uint32ToConstMD(kDxilRayAttribSizeTag)); MDVals.emplace_back( - Uint32ToConstMD(props.ShaderProps.Ray.attributeSizeInBytes)); + Uint32ToConstMD(props.ShaderProps.Ray.attributeSizeInBytes)); } break; case DXIL::ShaderKind::Miss: case DXIL::ShaderKind::Callable: { MDVals.emplace_back(Uint32ToConstMD(kDxilRayPayloadSizeTag)); MDVals.emplace_back( - Uint32ToConstMD(props.ShaderProps.Ray.payloadSizeInBytes)); + Uint32ToConstMD(props.ShaderProps.Ray.payloadSizeInBytes)); } break; case DXIL::ShaderKind::Mesh: { auto &MS = props.ShaderProps.MS; MDVals.emplace_back(Uint32ToConstMD(DxilMDHelper::kDxilMSStateTag)); - MDTuple *pMDTuple = EmitDxilMSState(MS.numThreads, - MS.maxVertexCount, - MS.maxPrimitiveCount, - MS.outputTopology, - MS.payloadSizeInBytes); + MDTuple *pMDTuple = EmitDxilMSState(props.numThreads, + MS.maxVertexCount, + MS.maxPrimitiveCount, + MS.outputTopology, + MS.payloadSizeInBytes); MDVals.emplace_back(pMDTuple); } break; case DXIL::ShaderKind::Amplification: { auto &AS = props.ShaderProps.AS; MDVals.emplace_back(Uint32ToConstMD(DxilMDHelper::kDxilASStateTag)); - MDTuple *pMDTuple = EmitDxilASState(AS.numThreads, AS.payloadSizeInBytes); + MDTuple *pMDTuple = EmitDxilASState(props.numThreads, AS.payloadSizeInBytes); MDVals.emplace_back(pMDTuple); } break; + case DXIL::ShaderKind::Node: { + // The Node specific properties have already been handled by + // EmitDxilNodeState function above. Here we emit the metadata for those + // Node shader attributes that are shared with other shader types (only CS for now) + MDVals.emplace_back(Uint32ToConstMD(DxilMDHelper::kDxilNumThreadsTag)); + vector NumThreadVals; + NumThreadVals.emplace_back(Uint32ToConstMD(props.numThreads[0])); + NumThreadVals.emplace_back(Uint32ToConstMD(props.numThreads[1])); + NumThreadVals.emplace_back(Uint32ToConstMD(props.numThreads[2])); + MDVals.emplace_back(MDNode::get(m_Ctx, NumThreadVals)); + } break; default: break; } @@ -1585,6 +1606,7 @@ void DxilMDHelper::LoadDxilEntryProperties(const MDOperand &MDO, props.shaderKind = DXIL::ShaderKind::Library; } + bool hasNodeTag = false; for (unsigned iNode = 0; iNode < pTupleMD->getNumOperands(); iNode += 2) { unsigned Tag = DxilMDHelper::ConstMDToUint32(pTupleMD->getOperand(iNode)); const MDOperand &MDO = pTupleMD->getOperand(iNode + 1); @@ -1599,12 +1621,12 @@ void DxilMDHelper::LoadDxilEntryProperties(const MDOperand &MDO, } break; case DxilMDHelper::kDxilNumThreadsTag: { - DXASSERT(props.IsCS(), "else invalid shader kind"); - auto &CS = props.ShaderProps.CS; + DXASSERT(props.IsCS() || props.shaderKind == DXIL::ShaderKind::Node, + "else invalid shader kind"); MDNode *pNode = cast(MDO.get()); - CS.numThreads[0] = ConstMDToUint32(pNode->getOperand(0)); - CS.numThreads[1] = ConstMDToUint32(pNode->getOperand(1)); - CS.numThreads[2] = ConstMDToUint32(pNode->getOperand(2)); + props.numThreads[0] = ConstMDToUint32(pNode->getOperand(0)); + props.numThreads[1] = ConstMDToUint32(pNode->getOperand(1)); + props.numThreads[2] = ConstMDToUint32(pNode->getOperand(2)); } break; case DxilMDHelper::kDxilGSStateTag: { @@ -1669,17 +1691,17 @@ void DxilMDHelper::LoadDxilEntryProperties(const MDOperand &MDO, case DxilMDHelper::kDxilMSStateTag: { DXASSERT(props.IsMS(), "else invalid shader kind"); auto &MS = props.ShaderProps.MS; - LoadDxilMSState(MDO, MS.numThreads, MS.maxVertexCount, + LoadDxilMSState(MDO, props.numThreads, MS.maxVertexCount, MS.maxPrimitiveCount, MS.outputTopology, MS.payloadSizeInBytes); } break; case DxilMDHelper::kDxilASStateTag: { DXASSERT(props.IsAS(), "else invalid shader kind"); auto &AS = props.ShaderProps.AS; - LoadDxilASState(MDO, AS.numThreads, AS.payloadSizeInBytes); + LoadDxilASState(MDO, props.numThreads, AS.payloadSizeInBytes); } break; case DxilMDHelper::kDxilWaveSizeTag: { - DXASSERT(props.IsCS(), "else invalid shader kind"); + DXASSERT(props.IsCS() || props.IsNode(), "else invalid shader kind"); MDNode *pNode = cast(MDO.get()); props.waveSize = ConstMDToUint32(pNode->getOperand(0)); } break; @@ -1687,6 +1709,73 @@ void DxilMDHelper::LoadDxilEntryProperties(const MDOperand &MDO, MDNode *pNode = cast(MDO.get()); LoadSerializedRootSignature(pNode, props.serializedRootSignature, m_Ctx); } break; + case DxilMDHelper::kDxilNodeLaunchTypeTag: { + hasNodeTag = true; + auto &Node = props.Node; + Node.LaunchType = static_cast(ConstMDToUint32(MDO)); + } break; + case DxilMDHelper::kDxilNodeIsProgramEntryTag: { + hasNodeTag = true; + props.Node.IsProgramEntry = ConstMDToBool(MDO); + } break; + case DxilMDHelper::kDxilNodeIdTag: { + hasNodeTag = true; + MDNode *pNode = cast(MDO.get()); + props.NodeShaderID.Name = StringMDToString(pNode->getOperand(0)); + props.NodeShaderID.Index = ConstMDToUint32(pNode->getOperand(1)); + } break; + case DxilMDHelper::kDxilNodeLocalRootArgumentsTableIndexTag: { + hasNodeTag = true; + auto &Node = props.Node; + Node.LocalRootArgumentsTableIndex = ConstMDToUint32(MDO); + } break; + case DxilMDHelper::kDxilShareInputOfTag: { + hasNodeTag = true; + MDNode *pNode = cast(MDO.get()); + props.NodeShaderSharedInput.Name = StringMDToString(pNode->getOperand(0)); + props.NodeShaderSharedInput.Index = ConstMDToUint32(pNode->getOperand(1)); + } break; + case DxilMDHelper::kDxilNodeDispatchGridTag: { + hasNodeTag = true; + auto &Node = props.Node; + MDNode *pNode = cast(MDO.get()); + Node.DispatchGrid[0] = ConstMDToUint32(pNode->getOperand(0)); + Node.DispatchGrid[1] = ConstMDToUint32(pNode->getOperand(1)); + Node.DispatchGrid[2] = ConstMDToUint32(pNode->getOperand(2)); + } break; + case DxilMDHelper::kDxilNodeMaxDispatchGridTag: { + hasNodeTag = true; + auto &Node = props.Node; + MDNode *pNode = cast(MDO.get()); + Node.MaxDispatchGrid[0] = ConstMDToUint32(pNode->getOperand(0)); + Node.MaxDispatchGrid[1] = ConstMDToUint32(pNode->getOperand(1)); + Node.MaxDispatchGrid[2] = ConstMDToUint32(pNode->getOperand(2)); + } break; + case DxilMDHelper::kDxilNodeMaxRecursionDepthTag: { + hasNodeTag = true; + auto &Node = props.Node; + Node.MaxRecursionDepth = ConstMDToUint32(MDO); + } break; + case DxilMDHelper::kDxilNodeInputsTag: { + hasNodeTag = true; + const MDTuple *pNodeInputs = dyn_cast(MDO.get()); + IFTBOOL(pTupleMD != nullptr, DXC_E_INCORRECT_DXIL_METADATA); + for (unsigned i = 0; i != pNodeInputs->getNumOperands(); ++i) { + const MDOperand &NodeInput = pNodeInputs->getOperand(i); + IFTBOOL(NodeInput.get() != nullptr, DXC_E_INCORRECT_DXIL_METADATA); + props.InputNodes.push_back(LoadDxilNodeIOState(NodeInput)); + } + } break; + case DxilMDHelper::kDxilNodeOutputsTag: { + hasNodeTag = true; + const MDTuple *pNodeOutputs = dyn_cast(MDO.get()); + IFTBOOL(pTupleMD != nullptr, DXC_E_INCORRECT_DXIL_METADATA); + for (unsigned i = 0; i != pNodeOutputs->getNumOperands(); ++i) { + const MDOperand &NodeOutput = pNodeOutputs->getOperand(i); + IFTBOOL(NodeOutput.get() != nullptr, DXC_E_INCORRECT_DXIL_METADATA); + props.OutputNodes.push_back(LoadDxilNodeIOState(NodeOutput)); + } + } break; default: DXASSERT(false, "Unknown extended shader properties tag"); m_bExtraMetadata = true; @@ -1694,26 +1783,137 @@ void DxilMDHelper::LoadDxilEntryProperties(const MDOperand &MDO, } } + DXASSERT(!hasNodeTag || + props.Node.LaunchType != DXIL::NodeLaunchType::Invalid, + "else invalid shader kind"); + if (bEarlyDepth) { DXASSERT(props.IsPS(), "else invalid shader kind"); props.ShaderProps.PS.EarlyDepthStencil = true; } } +void DxilMDHelper::SerializeNodeProps(SmallVectorImpl &MDVals, unsigned &valIdx, + const hlsl::DxilFunctionProps *props) { + auto &NodeProps = props->Node; + + MDVals.push_back( + Uint32ToConstMD(static_cast((NodeProps.LaunchType)))); + MDVals.push_back(BoolToConstMD(NodeProps.IsProgramEntry)); + MDVals.push_back(MDString::get(m_Ctx, props->NodeShaderID.Name)); + MDVals.push_back(Uint32ToConstMD(props->NodeShaderID.Index)); + MDVals.push_back(MDString::get(m_Ctx, props->NodeShaderSharedInput.Name)); + MDVals.push_back(Uint32ToConstMD(props->NodeShaderSharedInput.Index)); + MDVals.push_back(Uint32ToConstMD(NodeProps.LocalRootArgumentsTableIndex)); + MDVals.push_back(Uint32ToConstMD(NodeProps.DispatchGrid[0])); + MDVals.push_back(Uint32ToConstMD(NodeProps.DispatchGrid[1])); + MDVals.push_back(Uint32ToConstMD(NodeProps.DispatchGrid[2])); + MDVals.push_back(Uint32ToConstMD(NodeProps.MaxDispatchGrid[0])); + MDVals.push_back(Uint32ToConstMD(NodeProps.MaxDispatchGrid[1])); + MDVals.push_back(Uint32ToConstMD(NodeProps.MaxDispatchGrid[2])); + MDVals.push_back(Uint32ToConstMD(NodeProps.MaxRecursionDepth)); + for (auto &nodeinput : props->InputNodes) { + MDVals.push_back(Uint32ToConstMD(nodeinput.Flags)); + MDVals.push_back(Uint32ToConstMD(nodeinput.MaxRecords)); + MDVals.push_back(Uint32ToConstMD(nodeinput.RecordType.size)); + MDVals.push_back( + Uint32ToConstMD(nodeinput.RecordType.SV_DispatchGrid.ByteOffset)); + MDVals.push_back(Uint32ToConstMD(static_cast( + nodeinput.RecordType.SV_DispatchGrid.ComponentType))); + MDVals.push_back( + Uint32ToConstMD(nodeinput.RecordType.SV_DispatchGrid.NumComponents)); + } + for (auto &nodeoutput : props->OutputNodes) { + MDVals.push_back(Uint32ToConstMD(nodeoutput.Flags)); + MDVals.push_back(Uint32ToConstMD(nodeoutput.RecordType.size)); + MDVals.push_back( + Uint32ToConstMD(nodeoutput.RecordType.SV_DispatchGrid.ByteOffset)); + MDVals.push_back(Uint32ToConstMD(static_cast( + nodeoutput.RecordType.SV_DispatchGrid.ComponentType))); + MDVals.push_back( + Uint32ToConstMD(nodeoutput.RecordType.SV_DispatchGrid.NumComponents)); + MDVals.push_back(MDString::get(m_Ctx, nodeoutput.OutputID.Name)); + MDVals.push_back(Uint32ToConstMD(nodeoutput.OutputID.Index)); + MDVals.push_back(Uint32ToConstMD(nodeoutput.MaxRecords)); + MDVals.push_back(Int32ToConstMD(nodeoutput.MaxRecordsSharedWith)); + MDVals.push_back(Uint32ToConstMD(nodeoutput.OutputArraySize)); + MDVals.push_back(BoolToConstMD(nodeoutput.AllowSparseNodes)); + } +} + +void DxilMDHelper::DeserializeNodeProps(const MDTuple *pProps, unsigned &idx, + hlsl::DxilFunctionProps *props) { + auto &NodeProps = props->Node; + + NodeProps.LaunchType = static_cast( + ConstMDToUint32(pProps->getOperand(idx++))); + NodeProps.IsProgramEntry = ConstMDToBool(pProps->getOperand(idx++)); + props->NodeShaderID.Name = StringMDToString(pProps->getOperand(idx++)); + props->NodeShaderID.Index = ConstMDToUint32(pProps->getOperand(idx++)); + props->NodeShaderSharedInput.Name = StringMDToString(pProps->getOperand(idx++)); + props->NodeShaderSharedInput.Index = ConstMDToUint32(pProps->getOperand(idx++)); + NodeProps.LocalRootArgumentsTableIndex = + ConstMDToUint32(pProps->getOperand(idx++)); + NodeProps.DispatchGrid[0] = ConstMDToUint32(pProps->getOperand(idx++)); + NodeProps.DispatchGrid[1] = ConstMDToUint32(pProps->getOperand(idx++)); + NodeProps.DispatchGrid[2] = ConstMDToUint32(pProps->getOperand(idx++)); + NodeProps.MaxDispatchGrid[0] = ConstMDToUint32(pProps->getOperand(idx++)); + NodeProps.MaxDispatchGrid[1] = ConstMDToUint32(pProps->getOperand(idx++)); + NodeProps.MaxDispatchGrid[2] = ConstMDToUint32(pProps->getOperand(idx++)); + NodeProps.MaxRecursionDepth = ConstMDToUint32(pProps->getOperand(idx++)); + for (auto &nodeinput : props->InputNodes) { + nodeinput.Flags = NodeFlags(ConstMDToUint32(pProps->getOperand(idx++))); + nodeinput.MaxRecords = ConstMDToUint32(pProps->getOperand(idx++)); + nodeinput.RecordType.size = ConstMDToUint32(pProps->getOperand(idx++)); + nodeinput.RecordType.SV_DispatchGrid.ByteOffset = + ConstMDToUint32(pProps->getOperand(idx++)); + nodeinput.RecordType.SV_DispatchGrid.ComponentType = + static_cast( + ConstMDToUint32(pProps->getOperand(idx++))); + nodeinput.RecordType.SV_DispatchGrid.NumComponents = + ConstMDToUint32(pProps->getOperand(idx++)); + } + + for (auto &nodeoutput : props->OutputNodes) { + nodeoutput.Flags = NodeFlags(ConstMDToUint32(pProps->getOperand(idx++))); + nodeoutput.RecordType.size = ConstMDToUint32(pProps->getOperand(idx++)); + nodeoutput.RecordType.SV_DispatchGrid.ByteOffset = + ConstMDToUint32(pProps->getOperand(idx++)); + nodeoutput.RecordType.SV_DispatchGrid.ComponentType = + static_cast( + ConstMDToUint32(pProps->getOperand(idx++))); + nodeoutput.RecordType.SV_DispatchGrid.NumComponents = + ConstMDToUint32(pProps->getOperand(idx++)); + nodeoutput.OutputID.Name = + StringMDToString(pProps->getOperand(idx++)); + nodeoutput.OutputID.Index = ConstMDToUint32(pProps->getOperand(idx++)); + nodeoutput.MaxRecords = ConstMDToUint32(pProps->getOperand(idx++)); + nodeoutput.MaxRecordsSharedWith = ConstMDToInt32(pProps->getOperand(idx++)); + nodeoutput.OutputArraySize = ConstMDToUint32(pProps->getOperand(idx++)); + nodeoutput.AllowSparseNodes = ConstMDToBool(pProps->getOperand(idx++)); + } +} + MDTuple * DxilMDHelper::EmitDxilFunctionProps(const hlsl::DxilFunctionProps *props, const Function *F) { bool bRayAttributes = false; SmallVector MDVals; - + auto SerializeNumThreads = [&]() { + MDVals.push_back(Uint32ToConstMD(props->numThreads[0])); + MDVals.push_back(Uint32ToConstMD(props->numThreads[1])); + MDVals.push_back(Uint32ToConstMD(props->numThreads[2])); + }; + + unsigned valIdx = 0; MDVals.push_back(ValueAsMetadata::get(const_cast(F))); MDVals.push_back(Uint32ToConstMD(static_cast(props->shaderKind))); switch (props->shaderKind) { case DXIL::ShaderKind::Compute: - MDVals.push_back(Uint32ToConstMD(props->ShaderProps.CS.numThreads[0])); - MDVals.push_back(Uint32ToConstMD(props->ShaderProps.CS.numThreads[1])); - MDVals.push_back(Uint32ToConstMD(props->ShaderProps.CS.numThreads[2])); + SerializeNumThreads(); + if (props->IsNode()) + SerializeNodeProps(MDVals, valIdx, props); break; case DXIL::ShaderKind::Geometry: MDVals.push_back( @@ -1721,7 +1921,7 @@ DxilMDHelper::EmitDxilFunctionProps(const hlsl::DxilFunctionProps *props, MDVals.push_back(Uint32ToConstMD(props->ShaderProps.GS.maxVertexCount)); MDVals.push_back(Uint32ToConstMD(props->ShaderProps.GS.instanceCount)); for (size_t i = 0; - i < _countof(props->ShaderProps.GS.streamPrimitiveTopologies); ++i) + i < _countof(props->ShaderProps.GS.streamPrimitiveTopologies); ++i) MDVals.push_back(Uint8ToConstMD( (uint8_t)props->ShaderProps.GS.streamPrimitiveTopologies[i])); break; @@ -1758,9 +1958,7 @@ DxilMDHelper::EmitDxilFunctionProps(const hlsl::DxilFunctionProps *props, Uint32ToConstMD(props->ShaderProps.Ray.attributeSizeInBytes)); break; case DXIL::ShaderKind::Mesh: - MDVals.push_back(Uint32ToConstMD(props->ShaderProps.MS.numThreads[0])); - MDVals.push_back(Uint32ToConstMD(props->ShaderProps.MS.numThreads[1])); - MDVals.push_back(Uint32ToConstMD(props->ShaderProps.MS.numThreads[2])); + SerializeNumThreads(); MDVals.push_back(Uint32ToConstMD(props->ShaderProps.MS.maxVertexCount)); MDVals.push_back(Uint32ToConstMD(props->ShaderProps.MS.maxPrimitiveCount)); MDVals.push_back( @@ -1768,11 +1966,14 @@ DxilMDHelper::EmitDxilFunctionProps(const hlsl::DxilFunctionProps *props, MDVals.push_back(Uint32ToConstMD(props->ShaderProps.MS.payloadSizeInBytes)); break; case DXIL::ShaderKind::Amplification: - MDVals.push_back(Uint32ToConstMD(props->ShaderProps.AS.numThreads[0])); - MDVals.push_back(Uint32ToConstMD(props->ShaderProps.AS.numThreads[1])); - MDVals.push_back(Uint32ToConstMD(props->ShaderProps.AS.numThreads[2])); + SerializeNumThreads(); MDVals.push_back(Uint32ToConstMD(props->ShaderProps.AS.payloadSizeInBytes)); break; + case DXIL::ShaderKind::Node: + SerializeNumThreads(); + // Node specific properties + SerializeNodeProps(MDVals, valIdx, props); + break; default: break; } @@ -2254,6 +2455,220 @@ void DxilMDHelper::LoadDxilASState(const MDOperand &MDO, unsigned *NumThreads, u payloadSizeInBytes = ConstMDToUint32(pTupleMD->getOperand(kDxilASStatePayloadSizeInBytes)); } +void DxilMDHelper::EmitDxilNodeState(std::vector &MDVals, + const DxilFunctionProps &props) { + auto &Node = props.Node; + + // Required Fields + MDVals.emplace_back(Uint32ToConstMD(DxilMDHelper::kDxilNodeLaunchTypeTag)); + MDVals.emplace_back(Uint32ToConstMD(static_cast(Node.LaunchType))); + + // Optional Fields + + if (props.waveSize != 0) { + MDVals.emplace_back(Uint32ToConstMD(DxilMDHelper::kDxilWaveSizeTag)); + vector WaveSizeVal; + WaveSizeVal.emplace_back(Uint32ToConstMD(props.waveSize)); + MDVals.emplace_back(MDNode::get(m_Ctx, WaveSizeVal)); + } + + if (Node.IsProgramEntry) { + MDVals.emplace_back( + Uint32ToConstMD(DxilMDHelper::kDxilNodeIsProgramEntryTag)); + MDVals.emplace_back(BoolToConstMD(true)); + } + + if (!props.NodeShaderID.Name.empty()) { + MDVals.emplace_back(Uint32ToConstMD(kDxilNodeIdTag)); + vector NodeIDVals; + NodeIDVals.emplace_back(MDString::get(m_Ctx, props.NodeShaderID.Name)); + NodeIDVals.emplace_back(Uint32ToConstMD(props.NodeShaderID.Index)); + MDVals.emplace_back(MDNode::get(m_Ctx, NodeIDVals)); + } + + MDVals.emplace_back( + Uint32ToConstMD(DxilMDHelper::kDxilNodeLocalRootArgumentsTableIndexTag)); + MDVals.emplace_back(Uint32ToConstMD(Node.LocalRootArgumentsTableIndex)); + + if (!props.NodeShaderSharedInput.Name.empty()) { + MDVals.emplace_back(Uint32ToConstMD(kDxilShareInputOfTag)); + vector NodeIDVals; + NodeIDVals.emplace_back( + MDString::get(m_Ctx, props.NodeShaderSharedInput.Name)); + NodeIDVals.emplace_back(Uint32ToConstMD(props.NodeShaderSharedInput.Index)); + MDVals.emplace_back(MDNode::get(m_Ctx, NodeIDVals)); + } + + if (Node.DispatchGrid[0] || Node.DispatchGrid[1] || Node.DispatchGrid[2]) { + MDVals.emplace_back( + Uint32ToConstMD(DxilMDHelper::kDxilNodeDispatchGridTag)); + vector DispatchGridVals; + DispatchGridVals.emplace_back(Uint32ToConstMD(Node.DispatchGrid[0])); + DispatchGridVals.emplace_back(Uint32ToConstMD(Node.DispatchGrid[1])); + DispatchGridVals.emplace_back(Uint32ToConstMD(Node.DispatchGrid[2])); + MDVals.emplace_back(MDNode::get(m_Ctx, DispatchGridVals)); + } + + if (Node.MaxDispatchGrid[0] || Node.MaxDispatchGrid[1] || + Node.MaxDispatchGrid[2]) { + MDVals.emplace_back( + Uint32ToConstMD(DxilMDHelper::kDxilNodeMaxDispatchGridTag)); + vector MaxDispatchGridVals; + MaxDispatchGridVals.emplace_back(Uint32ToConstMD(Node.MaxDispatchGrid[0])); + MaxDispatchGridVals.emplace_back(Uint32ToConstMD(Node.MaxDispatchGrid[1])); + MaxDispatchGridVals.emplace_back(Uint32ToConstMD(Node.MaxDispatchGrid[2])); + MDVals.emplace_back(MDNode::get(m_Ctx, MaxDispatchGridVals)); + } + + if (Node.MaxRecursionDepth) { + MDVals.emplace_back( + Uint32ToConstMD(DxilMDHelper::kDxilNodeMaxRecursionDepthTag)); + MDVals.emplace_back(Uint32ToConstMD(Node.MaxRecursionDepth)); + } + + if (props.InputNodes.size()) { + MDVals.emplace_back(Uint32ToConstMD(DxilMDHelper::kDxilNodeInputsTag)); + vector NodeInputVals; + for (auto &InputNode : props.InputNodes) + NodeInputVals.emplace_back(EmitDxilNodeIOState(InputNode)); + MDVals.emplace_back(MDNode::get(m_Ctx, NodeInputVals)); + } + + if (props.OutputNodes.size()) { + MDVals.emplace_back(Uint32ToConstMD(DxilMDHelper::kDxilNodeOutputsTag)); + vector NodeOutputVals; + for (auto &OutputNode : props.OutputNodes) + NodeOutputVals.emplace_back(EmitDxilNodeIOState(OutputNode)); + MDVals.emplace_back(MDNode::get(m_Ctx, NodeOutputVals)); + } +} + +llvm::MDTuple * +DxilMDHelper::EmitDxilNodeIOState(const hlsl::NodeIOProperties &Node) { + vector MDVals; + MDVals.emplace_back(Uint32ToConstMD(DxilMDHelper::kDxilNodeIOFlagsTag)); + MDVals.emplace_back(Uint32ToConstMD(Node.Flags)); + + if (Node.RecordType.size) { + MDVals.emplace_back(Uint32ToConstMD(DxilMDHelper::kDxilNodeRecordTypeTag)); + vector NodeRecordTypeVals; + NodeRecordTypeVals.emplace_back( + Uint32ToConstMD(DxilMDHelper::kDxilNodeRecordSizeTag)); + NodeRecordTypeVals.emplace_back(Uint32ToConstMD(Node.RecordType.size)); + // If the record has a SV_DispatchGrid field + if (Node.RecordType.SV_DispatchGrid.NumComponents) { + NodeRecordTypeVals.emplace_back( + Uint32ToConstMD(DxilMDHelper::kDxilNodeSVDispatchGridTag)); + vector SVDispatchGridVals; + SVDispatchGridVals.emplace_back( + Uint32ToConstMD(Node.RecordType.SV_DispatchGrid.ByteOffset)); + SVDispatchGridVals.emplace_back(Uint32ToConstMD(static_cast( + Node.RecordType.SV_DispatchGrid.ComponentType))); + SVDispatchGridVals.emplace_back( + Uint32ToConstMD(Node.RecordType.SV_DispatchGrid.NumComponents)); + NodeRecordTypeVals.emplace_back(MDNode::get(m_Ctx, SVDispatchGridVals)); + } + MDVals.emplace_back(MDNode::get(m_Ctx, NodeRecordTypeVals)); + } + + if (Node.Flags.IsOutputNode()) { + // Required Field + MDVals.emplace_back(Uint32ToConstMD(DxilMDHelper::kDxilNodeMaxRecordsTag)); + MDVals.emplace_back(Uint32ToConstMD(Node.MaxRecords)); + + if (Node.OutputArraySize) { + MDVals.emplace_back( + Uint32ToConstMD(DxilMDHelper::kDxilNodeOutputArraySizeTag)); + MDVals.emplace_back(Uint32ToConstMD(Node.OutputArraySize)); + } + + if (Node.MaxRecordsSharedWith >= 0) { + MDVals.emplace_back( + Uint32ToConstMD(DxilMDHelper::kDxilNodeMaxRecordsSharedWithTag)); + MDVals.emplace_back(Int32ToConstMD(Node.MaxRecordsSharedWith)); + } + + if (Node.AllowSparseNodes) { + MDVals.emplace_back( + Uint32ToConstMD(DxilMDHelper::kDxilNodeAllowSparseNodesTag)); + MDVals.emplace_back(BoolToConstMD((uint32_t)Node.AllowSparseNodes)); + } + + if (!Node.OutputID.Name.empty()) { + MDVals.emplace_back(Uint32ToConstMD(kDxilNodeOutputIDTag)); + vector NodeOpIDVals; + NodeOpIDVals.emplace_back(MDString::get(m_Ctx, Node.OutputID.Name)); + NodeOpIDVals.emplace_back(Uint32ToConstMD(Node.OutputID.Index)); + MDVals.emplace_back(MDNode::get(m_Ctx, NodeOpIDVals)); + } + } else { + DXASSERT(Node.Flags.IsInputRecord(), "Invalid NodeIO Kind"); + if (Node.MaxRecords) { + MDVals.emplace_back( + Uint32ToConstMD(DxilMDHelper::kDxilNodeMaxRecordsTag)); + MDVals.emplace_back(Uint32ToConstMD(Node.MaxRecords)); + } + } + return MDNode::get(m_Ctx, MDVals); +} + +NodeIOProperties DxilMDHelper::LoadDxilNodeIOState(const llvm::MDOperand &MDO) { + const MDTuple *pTupleMD = dyn_cast(MDO.get()); + IFTBOOL(pTupleMD != nullptr, DXC_E_INCORRECT_DXIL_METADATA); + IFTBOOL((pTupleMD->getNumOperands() & 0x1) == 0, + DXC_E_INCORRECT_DXIL_METADATA); + + NodeIOProperties Node = {}; + for (unsigned iNode = 0; iNode < pTupleMD->getNumOperands(); iNode += 2) { + unsigned Tag = DxilMDHelper::ConstMDToUint32(pTupleMD->getOperand(iNode)); + const MDOperand &MDO = pTupleMD->getOperand(iNode + 1); + IFTBOOL(MDO.get() != nullptr, DXC_E_INCORRECT_DXIL_METADATA); + + switch (Tag) { + case DxilMDHelper::kDxilNodeIOFlagsTag: { + Node.Flags = NodeFlags(ConstMDToUint32(MDO)); + } break; + case DxilMDHelper::kDxilNodeRecordTypeTag: { + MDTuple *pTupleMD = cast(MDO.get()); + Node.RecordType.size = ConstMDToUint32(pTupleMD->getOperand(1)); + if (pTupleMD->getNumOperands() > 2) { + DXASSERT(pTupleMD->getNumOperands() == 4, + "incorrect number of operands"); + MDTuple *pSVDTupleMD = cast(pTupleMD->getOperand(3)); + Node.RecordType.SV_DispatchGrid.ByteOffset = + ConstMDToUint32(pSVDTupleMD->getOperand(0)); + Node.RecordType.SV_DispatchGrid.ComponentType = + static_cast( + ConstMDToUint32(pSVDTupleMD->getOperand(1))); + Node.RecordType.SV_DispatchGrid.NumComponents = + ConstMDToUint32(pSVDTupleMD->getOperand(2)); + } + } break; + case DxilMDHelper::kDxilNodeOutputArraySizeTag: { + Node.OutputArraySize = ConstMDToUint32(MDO); + } break; + case DxilMDHelper::kDxilNodeMaxRecordsTag: { + Node.MaxRecords = ConstMDToUint32(MDO); + } break; + case DxilMDHelper::kDxilNodeMaxRecordsSharedWithTag: { + Node.MaxRecordsSharedWith = ConstMDToInt32(MDO); + } break; + case DxilMDHelper::kDxilNodeAllowSparseNodesTag: { + Node.AllowSparseNodes = ConstMDToBool(MDO); + } break; + case DxilMDHelper::kDxilNodeOutputIDTag: { + MDNode *pNode = cast(MDO.get()); + Node.OutputID.Name = StringMDToString(pNode->getOperand(0)); + Node.OutputID.Index = ConstMDToUint32(pNode->getOperand(1)); + } break; + default: + DXASSERT(false, "Unknown NodeIO properties tag"); + break; + } + } + return Node; +} + void DxilMDHelper::AddCounterIfNonZero(uint32_t value, StringRef name, vector &MDVals) { if (value) { MDVals.emplace_back(MDString::get(m_Ctx, name)); diff --git a/lib/DXIL/DxilModule.cpp b/lib/DXIL/DxilModule.cpp index e39080041a..3c861d0465 100644 --- a/lib/DXIL/DxilModule.cpp +++ b/lib/DXIL/DxilModule.cpp @@ -377,11 +377,9 @@ void DxilModule::SetNumThreads(unsigned x, unsigned y, unsigned z) { "only works for CS/MS/AS profiles"); DxilFunctionProps &props = m_DxilEntryPropsMap.begin()->second->props; DXASSERT_NOMSG(m_pSM->GetKind() == props.shaderKind); - unsigned *numThreads = props.IsCS() ? props.ShaderProps.CS.numThreads : - props.IsMS() ? props.ShaderProps.MS.numThreads : props.ShaderProps.AS.numThreads; - numThreads[0] = x; - numThreads[1] = y; - numThreads[2] = z; + props.numThreads[0] = x; + props.numThreads[1] = y; + props.numThreads[2] = z; } unsigned DxilModule::GetNumThreads(unsigned idx) const { DXASSERT(m_DxilEntryPropsMap.size() == 1 && @@ -393,9 +391,7 @@ unsigned DxilModule::GetNumThreads(unsigned idx) const { return 0; const DxilFunctionProps &props = m_DxilEntryPropsMap.begin()->second->props; DXASSERT_NOMSG(m_pSM->GetKind() == props.shaderKind); - const unsigned *numThreads = props.IsCS() ? props.ShaderProps.CS.numThreads : - props.IsMS() ? props.ShaderProps.MS.numThreads : props.ShaderProps.AS.numThreads; - return numThreads[idx]; + return props.numThreads[idx]; } void DxilModule::SetWaveSize(unsigned size) { @@ -1236,7 +1232,7 @@ bool DxilModule::IsEntryThatUsesSignatures(const llvm::Function *F) const { auto propIter = m_DxilEntryPropsMap.find(F); if (propIter != m_DxilEntryPropsMap.end()) { DxilFunctionProps &props = propIter->second->props; - return props.IsGraphics() || props.IsCS(); + return props.IsGraphics() || props.IsCS() || props.IsNode(); } // Otherwise, return true if patch constant function return IsPatchConstantShader(F); diff --git a/lib/DXIL/DxilNodeProps.cpp b/lib/DXIL/DxilNodeProps.cpp new file mode 100644 index 0000000000..3880521b19 --- /dev/null +++ b/lib/DXIL/DxilNodeProps.cpp @@ -0,0 +1,97 @@ +/////////////////////////////////////////////////////////////////////////////// +// // +// DxilNodeProps.cpp // +// Copyright (C) Microsoft Corporation. All rights reserved. // +// This file is distributed under the University of Illinois Open Source // +// License. See LICENSE.TXT for details. // +// // +/////////////////////////////////////////////////////////////////////////////// + +#include "dxc/DXIL/DxilNodeProps.h" + +namespace hlsl { + +//------------------------------------------------------------------------------ +// +// NodeFlags methods +// +NodeFlags::NodeFlags() : m_Flags(DXIL::NodeIOFlags::None) {} + +NodeFlags::NodeFlags(DXIL::NodeIOFlags flags) : m_Flags(flags) {} + +NodeFlags::NodeFlags(DXIL::NodeIOKind kind) + : m_Flags((DXIL::NodeIOFlags)kind) {} + +NodeFlags::NodeFlags(uint32_t F) : NodeFlags((DXIL::NodeIOFlags)F) {} + +bool NodeFlags::operator==(const hlsl::NodeFlags &o) const { + return m_Flags == o.m_Flags; +} + +NodeFlags::operator uint32_t() const { return (uint32_t)m_Flags; } + +DXIL::NodeIOKind NodeFlags::GetNodeIOKind() const { + return (DXIL::NodeIOKind)((uint32_t)m_Flags & + (uint32_t)DXIL::NodeIOFlags::NodeIOKindMask); +} + +DXIL::NodeIOFlags NodeFlags::GetNodeIOFlags() const { return m_Flags; } + +bool NodeFlags::IsInputRecord() const { + return ((uint32_t)m_Flags & (uint32_t)DXIL::NodeIOFlags::Input) != 0; +} + +bool NodeFlags::IsOutputNode() const { + return ((uint32_t)m_Flags & (uint32_t)DXIL::NodeIOFlags::Output) != 0; +} + +bool NodeFlags::IsReadWrite() const { + return ((uint32_t)m_Flags & (uint32_t)DXIL::NodeIOFlags::ReadWrite) != 0; +} + +bool NodeFlags::IsEmpty() const { + return ((uint32_t)m_Flags & (uint32_t)DXIL::NodeIOFlags::EmptyRecord) != 0; +} + +bool NodeFlags::IsEmptyInput() const { return IsEmpty() && IsInputRecord(); } + +bool NodeFlags::IsValidNodeKind() const { + return GetNodeIOKind() != DXIL::NodeIOKind::Invalid; +} + +bool NodeFlags::RecordTypeMatchesLaunchType( + DXIL::NodeLaunchType launchType) const { + DXIL::NodeIOFlags recordLaunchType = (DXIL::NodeIOFlags)( + (uint32_t)m_Flags & (uint32_t)DXIL::NodeIOFlags::RecordGranularityMask); + return (launchType == DXIL::NodeLaunchType::Broadcasting && + recordLaunchType == DXIL::NodeIOFlags::DispatchRecord) || + (launchType == DXIL::NodeLaunchType::Coalescing && + recordLaunchType == DXIL::NodeIOFlags::GroupRecord) || + (launchType == DXIL::NodeLaunchType::Thread && + recordLaunchType == DXIL::NodeIOFlags::ThreadRecord); +} + +void NodeFlags::SetTrackRWInputSharing() { + m_Flags = (DXIL::NodeIOFlags)( + (uint32_t)m_Flags | (uint32_t)DXIL::NodeIOFlags::TrackRWInputSharing); +} + +bool NodeFlags::GetTrackRWInputSharing() const { + return ((uint32_t)m_Flags & + (uint32_t)DXIL::NodeIOFlags::TrackRWInputSharing) != 0; +} + +//------------------------------------------------------------------------------ +// +// NodeIOProperties methods. +// + +NodeInfo NodeIOProperties::GetNodeInfo() const { + return NodeInfo(Flags.GetNodeIOFlags(), RecordType.size); +} + +NodeRecordInfo NodeIOProperties::GetNodeRecordInfo() const { + return NodeInfo(Flags.GetNodeIOFlags(), RecordType.size); +} + +} // namespace hlsl diff --git a/lib/DXIL/DxilOperations.cpp b/lib/DXIL/DxilOperations.cpp index 2cb205545f..ff3a322ddb 100644 --- a/lib/DXIL/DxilOperations.cpp +++ b/lib/DXIL/DxilOperations.cpp @@ -194,7 +194,7 @@ const OP::OpCodeProperty OP::m_OpCodeProps[(unsigned)OP::OpCode::NumOpCodes] = { { OC::Coverage, "Coverage", OCC::Coverage, "coverage", { false, false, false, false, false, false, false, true, false, false, false}, Attribute::ReadNone, }, { OC::InnerCoverage, "InnerCoverage", OCC::InnerCoverage, "innerCoverage", { false, false, false, false, false, false, false, true, false, false, false}, Attribute::ReadNone, }, - // Compute/Mesh/Amplification shader void, h, f, d, i1, i8, i16, i32, i64, udt, obj , function attribute + // Compute/Mesh/Amplification/Node shader void, h, f, d, i1, i8, i16, i32, i64, udt, obj , function attribute { OC::ThreadId, "ThreadId", OCC::ThreadId, "threadId", { false, false, false, false, false, false, false, true, false, false, false}, Attribute::ReadNone, }, { OC::GroupId, "GroupId", OCC::GroupId, "groupId", { false, false, false, false, false, false, false, true, false, false, false}, Attribute::ReadNone, }, { OC::ThreadIdInGroup, "ThreadIdInGroup", OCC::ThreadIdInGroup, "threadIdInGroup", { false, false, false, false, false, false, false, true, false, false, false}, Attribute::ReadNone, }, @@ -416,11 +416,54 @@ const OP::OpCodeProperty OP::m_OpCodeProps[(unsigned)OP::OpCode::NumOpCodes] = { // Resources void, h, f, d, i1, i8, i16, i32, i64, udt, obj , function attribute { OC::TextureStoreSample, "TextureStoreSample", OCC::TextureStoreSample, "textureStoreSample", { false, true, true, false, false, false, true, true, false, false, false}, Attribute::None, }, + + // WaveMatrix void, h, f, d, i1, i8, i16, i32, i64, udt, obj , function attribute + { OC::WaveMatrix_Annotate, "WaveMatrix_Annotate", OCC::WaveMatrix_Annotate, "waveMatrix_Annotate", { true, false, false, false, false, false, false, false, false, false, false}, Attribute::ArgMemOnly, }, + { OC::WaveMatrix_Depth, "WaveMatrix_Depth", OCC::WaveMatrix_Depth, "waveMatrix_Depth", { true, false, false, false, false, false, false, false, false, false, false}, Attribute::ReadNone, }, + { OC::WaveMatrix_Fill, "WaveMatrix_Fill", OCC::WaveMatrix_Fill, "waveMatrix_Fill", { false, true, true, false, false, false, false, true, false, false, false}, Attribute::ArgMemOnly, }, + { OC::WaveMatrix_LoadRawBuf, "WaveMatrix_LoadRawBuf", OCC::WaveMatrix_LoadRawBuf, "waveMatrix_LoadRawBuf", { true, false, false, false, false, false, false, false, false, false, false}, Attribute::None, }, + { OC::WaveMatrix_LoadGroupShared, "WaveMatrix_LoadGroupShared", OCC::WaveMatrix_LoadGroupShared, "waveMatrix_LoadGroupShared", { false, true, true, false, false, false, false, true, false, false, false}, Attribute::ArgMemOnly, }, + { OC::WaveMatrix_StoreRawBuf, "WaveMatrix_StoreRawBuf", OCC::WaveMatrix_StoreRawBuf, "waveMatrix_StoreRawBuf", { true, false, false, false, false, false, false, false, false, false, false}, Attribute::None, }, + { OC::WaveMatrix_StoreGroupShared, "WaveMatrix_StoreGroupShared", OCC::WaveMatrix_StoreGroupShared, "waveMatrix_StoreGroupShared", { false, true, true, false, false, false, false, true, false, false, false}, Attribute::ArgMemOnly, }, + { OC::WaveMatrix_Multiply, "WaveMatrix_Multiply", OCC::WaveMatrix_Multiply, "waveMatrix_Multiply", { true, false, false, false, false, false, false, false, false, false, false}, Attribute::ArgMemOnly, }, + { OC::WaveMatrix_MultiplyAccumulate, "WaveMatrix_MultiplyAccumulate", OCC::WaveMatrix_Multiply, "waveMatrix_Multiply", { true, false, false, false, false, false, false, false, false, false, false}, Attribute::ArgMemOnly, }, + { OC::WaveMatrix_ScalarOp, "WaveMatrix_ScalarOp", OCC::WaveMatrix_ScalarOp, "waveMatrix_ScalarOp", { false, true, true, false, false, false, false, true, false, false, false}, Attribute::ArgMemOnly, }, + { OC::WaveMatrix_SumAccumulate, "WaveMatrix_SumAccumulate", OCC::WaveMatrix_Accumulate, "waveMatrix_Accumulate", { true, false, false, false, false, false, false, false, false, false, false}, Attribute::ArgMemOnly, }, + { OC::WaveMatrix_Add, "WaveMatrix_Add", OCC::WaveMatrix_Accumulate, "waveMatrix_Accumulate", { true, false, false, false, false, false, false, false, false, false, false}, Attribute::ArgMemOnly, }, + + // Create/Annotate Node Handles void, h, f, d, i1, i8, i16, i32, i64, udt, obj , function attribute + { OC::AllocateNodeOutputRecords, "AllocateNodeOutputRecords", OCC::AllocateNodeOutputRecords, "allocateNodeOutputRecords", { true, false, false, false, false, false, false, false, false, false, false}, Attribute::None, }, + + // Get Pointer to Node Record in Address Space 6 void, h, f, d, i1, i8, i16, i32, i64, udt, obj , function attribute + { OC::GetNodeRecordPtr, "GetNodeRecordPtr", OCC::GetNodeRecordPtr, "getNodeRecordPtr", { false, false, false, false, false, false, false, false, false, true, false}, Attribute::ReadNone, }, + + // Work Graph intrinsics void, h, f, d, i1, i8, i16, i32, i64, udt, obj , function attribute + { OC::IncrementOutputCount, "IncrementOutputCount", OCC::IncrementOutputCount, "incrementOutputCount", { true, false, false, false, false, false, false, false, false, false, false}, Attribute::None, }, + { OC::OutputComplete, "OutputComplete", OCC::OutputComplete, "outputComplete", { true, false, false, false, false, false, false, false, false, false, false}, Attribute::None, }, + { OC::GetInputRecordCount, "GetInputRecordCount", OCC::GetInputRecordCount, "getInputRecordCount", { true, false, false, false, false, false, false, false, false, false, false}, Attribute::ReadOnly, }, + { OC::FinishedCrossGroupSharing, "FinishedCrossGroupSharing", OCC::FinishedCrossGroupSharing, "finishedCrossGroupSharing", { true, false, false, false, false, false, false, false, false, false, false}, Attribute::None, }, + + // Synchronization void, h, f, d, i1, i8, i16, i32, i64, udt, obj , function attribute + { OC::BarrierByMemoryType, "BarrierByMemoryType", OCC::BarrierByMemoryType, "barrierByMemoryType", { true, false, false, false, false, false, false, false, false, false, false}, Attribute::NoDuplicate, }, + { OC::BarrierByMemoryHandle, "BarrierByMemoryHandle", OCC::BarrierByMemoryHandle, "barrierByMemoryHandle", { true, false, false, false, false, false, false, false, false, false, false}, Attribute::NoDuplicate, }, + { OC::BarrierByNodeRecordHandle, "BarrierByNodeRecordHandle", OCC::BarrierByNodeRecordHandle, "barrierByNodeRecordHandle", { true, false, false, false, false, false, false, false, false, false, false}, Attribute::NoDuplicate, }, + + // Create/Annotate Node Handles void, h, f, d, i1, i8, i16, i32, i64, udt, obj , function attribute + { OC::CreateNodeOutputHandle, "CreateNodeOutputHandle", OCC::createNodeOutputHandle, "createNodeOutputHandle", { true, false, false, false, false, false, false, false, false, false, false}, Attribute::ReadNone, }, + { OC::IndexNodeHandle, "IndexNodeHandle", OCC::IndexNodeHandle, "indexNodeHandle", { true, false, false, false, false, false, false, false, false, false, false}, Attribute::ReadNone, }, + { OC::AnnotateNodeHandle, "AnnotateNodeHandle", OCC::AnnotateNodeHandle, "annotateNodeHandle", { true, false, false, false, false, false, false, false, false, false, false}, Attribute::ReadNone, }, + { OC::CreateNodeInputRecordHandle, "CreateNodeInputRecordHandle", OCC::CreateNodeInputRecordHandle, "createNodeInputRecordHandle", { true, false, false, false, false, false, false, false, false, false, false}, Attribute::ReadNone, }, + { OC::AnnotateNodeRecordHandle, "AnnotateNodeRecordHandle", OCC::AnnotateNodeRecordHandle, "annotateNodeRecordHandle", { true, false, false, false, false, false, false, false, false, false, false}, Attribute::ReadNone, }, + + // Work Graph intrinsics void, h, f, d, i1, i8, i16, i32, i64, udt, obj , function attribute + { OC::NodeOutputIsValid, "NodeOutputIsValid", OCC::NodeOutputIsValid, "nodeOutputIsValid", { true, false, false, false, false, false, false, false, false, false, false}, Attribute::ReadOnly, }, + { OC::GetRemainingRecursionLevels, "GetRemainingRecursionLevels", OCC::GetRemainingRecursionLevels, "getRemainingRecursionLevels", { true, false, false, false, false, false, false, false, false, false, false}, Attribute::ReadOnly, }, }; // OPCODE-OLOADS:END const char *OP::m_OverloadTypeName[kNumTypeOverloads] = { - "void", "f16", "f32", "f64", "i1", "i8", "i16", "i32", "i64", "udt", + "void", "f16", "f32", "f64", "i1", "i8", "i16", "i32", "i64", + "udt", "obj", // These should not be used }; const char *OP::m_NamePrefix = "dx.op."; @@ -460,8 +503,14 @@ unsigned OP::GetTypeSlot(Type *pType) { } llvm_unreachable("Invalid Bits size"); } - case Type::PointerTyID: return 9; - case Type::StructTyID: return 10; + case Type::PointerTyID: { + pType = cast(pType)->getElementType(); + if (pType->isStructTy()) + return kUserDefineTypeSlot; + DXASSERT(!pType->isPointerTy(), "pointer-to-pointer type unsupported"); + return GetTypeSlot(pType); + } + case Type::StructTyID: return kObjectTypeSlot; default: break; } @@ -617,8 +666,13 @@ bool OP::IsDxilOpWave(OpCode C) { // WaveReadLaneFirst=118, WaveActiveOp=119, WaveActiveBit=120, // WavePrefixOp=121, QuadReadLaneAt=122, QuadOp=123, WaveAllBitCount=135, // WavePrefixBitCount=136, WaveMatch=165, WaveMultiPrefixOp=166, - // WaveMultiPrefixBitCount=167, QuadVote=222 - return (110 <= op && op <= 123) || (135 <= op && op <= 136) || (165 <= op && op <= 167) || op == 222; + // WaveMultiPrefixBitCount=167, QuadVote=222, WaveMatrix_Annotate=226, + // WaveMatrix_Depth=227, WaveMatrix_Fill=228, WaveMatrix_LoadRawBuf=229, + // WaveMatrix_LoadGroupShared=230, WaveMatrix_StoreRawBuf=231, + // WaveMatrix_StoreGroupShared=232, WaveMatrix_Multiply=233, + // WaveMatrix_MultiplyAccumulate=234, WaveMatrix_ScalarOp=235, + // WaveMatrix_SumAccumulate=236, WaveMatrix_Add=237 + return (110 <= op && op <= 123) || (135 <= op && op <= 136) || (165 <= op && op <= 167) || op == 222 || (226 <= op && op <= 237); // OPCODE-WAVE:END } @@ -656,7 +710,7 @@ void OP::GetMinShaderModelAndMask(OpCode C, bool bWithTranslation, // Instructions: ThreadId=93, GroupId=94, ThreadIdInGroup=95, // FlattenedThreadIdInGroup=96 if ((93 <= op && op <= 96)) { - mask = SFLAG(Compute) | SFLAG(Mesh) | SFLAG(Amplification); + mask = SFLAG(Compute) | SFLAG(Mesh) | SFLAG(Amplification) | SFLAG(Node); return; } // Instructions: DomainLocation=105 @@ -696,7 +750,7 @@ void OP::GetMinShaderModelAndMask(OpCode C, bool bWithTranslation, // WaveReadLaneFirst=118, WaveActiveOp=119, WaveActiveBit=120, // WavePrefixOp=121, WaveAllBitCount=135, WavePrefixBitCount=136 if ((110 <= op && op <= 121) || (135 <= op && op <= 136)) { - mask = SFLAG(Library) | SFLAG(Compute) | SFLAG(Amplification) | SFLAG(Mesh) | SFLAG(Pixel) | SFLAG(Vertex) | SFLAG(Hull) | SFLAG(Domain) | SFLAG(Geometry) | SFLAG(RayGeneration) | SFLAG(Intersection) | SFLAG(AnyHit) | SFLAG(ClosestHit) | SFLAG(Miss) | SFLAG(Callable); + mask = SFLAG(Library) | SFLAG(Compute) | SFLAG(Amplification) | SFLAG(Mesh) | SFLAG(Pixel) | SFLAG(Vertex) | SFLAG(Hull) | SFLAG(Domain) | SFLAG(Geometry) | SFLAG(RayGeneration) | SFLAG(Intersection) | SFLAG(AnyHit) | SFLAG(ClosestHit) | SFLAG(Miss) | SFLAG(Callable) | SFLAG(Node); return; } // Instructions: Sample=60, SampleBias=61, SampleCmp=64, CalculateLOD=81, @@ -832,7 +886,7 @@ void OP::GetMinShaderModelAndMask(OpCode C, bool bWithTranslation, // WaveMultiPrefixBitCount=167 if ((165 <= op && op <= 167)) { major = 6; minor = 5; - mask = SFLAG(Library) | SFLAG(Compute) | SFLAG(Amplification) | SFLAG(Mesh) | SFLAG(Pixel) | SFLAG(Vertex) | SFLAG(Hull) | SFLAG(Domain) | SFLAG(Geometry) | SFLAG(RayGeneration) | SFLAG(Intersection) | SFLAG(AnyHit) | SFLAG(ClosestHit) | SFLAG(Miss) | SFLAG(Callable); + mask = SFLAG(Library) | SFLAG(Compute) | SFLAG(Amplification) | SFLAG(Mesh) | SFLAG(Pixel) | SFLAG(Vertex) | SFLAG(Hull) | SFLAG(Domain) | SFLAG(Geometry) | SFLAG(RayGeneration) | SFLAG(Intersection) | SFLAG(AnyHit) | SFLAG(ClosestHit) | SFLAG(Miss) | SFLAG(Callable) | SFLAG(Node); return; } // Instructions: GeometryIndex=213 @@ -875,6 +929,17 @@ void OP::GetMinShaderModelAndMask(OpCode C, bool bWithTranslation, major = 6; minor = 7; return; } + // Instructions: WaveMatrix_Annotate=226, WaveMatrix_Depth=227, + // WaveMatrix_Fill=228, WaveMatrix_LoadRawBuf=229, + // WaveMatrix_LoadGroupShared=230, WaveMatrix_StoreRawBuf=231, + // WaveMatrix_StoreGroupShared=232, WaveMatrix_Multiply=233, + // WaveMatrix_MultiplyAccumulate=234, WaveMatrix_ScalarOp=235, + // WaveMatrix_SumAccumulate=236, WaveMatrix_Add=237 + if ((226 <= op && op <= 237)) { + major = 6; minor = 7; + mask = SFLAG(Library) | SFLAG(Compute); + return; + } // Instructions: QuadVote=222 if (op == 222) { if (bWithTranslation) { @@ -885,6 +950,23 @@ void OP::GetMinShaderModelAndMask(OpCode C, bool bWithTranslation, mask = SFLAG(Library) | SFLAG(Compute) | SFLAG(Amplification) | SFLAG(Mesh) | SFLAG(Pixel); return; } + // Instructions: BarrierByMemoryType=244, BarrierByMemoryHandle=245, + // BarrierByNodeRecordHandle=246 + if ((244 <= op && op <= 246)) { + major = 6; minor = 8; + return; + } + // Instructions: AllocateNodeOutputRecords=238, GetNodeRecordPtr=239, + // IncrementOutputCount=240, OutputComplete=241, GetInputRecordCount=242, + // FinishedCrossGroupSharing=243, CreateNodeOutputHandle=247, + // IndexNodeHandle=248, AnnotateNodeHandle=249, + // CreateNodeInputRecordHandle=250, AnnotateNodeRecordHandle=251, + // NodeOutputIsValid=252, GetRemainingRecursionLevels=253 + if ((238 <= op && op <= 243) || (247 <= op && op <= 253)) { + major = 6; minor = 8; + mask = SFLAG(Node); + return; + } // OPCODE-SMMASK:END } @@ -979,11 +1061,21 @@ OP::OP(LLVMContext &Ctx, Module *pModule) memset(m_OpCodeClassCache, 0, sizeof(m_OpCodeClassCache)); static_assert(_countof(OP::m_OpCodeProps) == (size_t)OP::OpCode::NumOpCodes, "forgot to update OP::m_OpCodeProps"); - m_pHandleType = GetOrCreateStructType(m_Ctx, Type::getInt8PtrTy(m_Ctx), + m_pHandleType = GetOrCreateStructType(m_Ctx, Type::getInt8PtrTy(m_Ctx), "dx.types.Handle", pModule); - m_pResourcePropertiesType = GetOrCreateStructType( - m_Ctx, {Type::getInt32Ty(m_Ctx), Type::getInt32Ty(m_Ctx)}, - "dx.types.ResourceProperties", pModule); + m_pNodeHandleType = GetOrCreateStructType(m_Ctx, Type::getInt8PtrTy(m_Ctx), + "dx.types.NodeHandle", pModule); + m_pNodeRecordHandleType = GetOrCreateStructType(m_Ctx, Type::getInt8PtrTy(m_Ctx), + "dx.types.NodeRecordHandle", pModule); + m_pResourcePropertiesType = GetOrCreateStructType(m_Ctx, + { Type::getInt32Ty(m_Ctx), Type::getInt32Ty(m_Ctx) }, + "dx.types.ResourceProperties", pModule); + m_pNodePropertiesType = GetOrCreateStructType(m_Ctx, + { Type::getInt32Ty(m_Ctx), Type::getInt32Ty(m_Ctx) }, + "dx.types.NodeInfo", pModule); + m_pNodeRecordPropertiesType = GetOrCreateStructType(m_Ctx, + { Type::getInt32Ty(m_Ctx), Type::getInt32Ty(m_Ctx) }, + "dx.types.NodeRecordInfo", pModule); m_pResourceBindingType = GetOrCreateStructType(m_Ctx, @@ -1012,6 +1104,10 @@ OP::OP(LLVMContext &Ctx, Module *pModule) Type *FourI16Types[4] = { Type::getInt16Ty(m_Ctx), Type::getInt16Ty(m_Ctx), Type::getInt16Ty(m_Ctx), Type::getInt16Ty(m_Ctx) }; // HiHi, HiLo, LoHi, LoLo m_pFourI16Type = GetOrCreateStructType(m_Ctx, FourI16Types, "dx.types.fouri16", pModule); + Type *WaveMatInfoTypes[4] = { Type::getInt8Ty(m_Ctx), Type::getInt8Ty(m_Ctx), Type::getInt32Ty(m_Ctx), Type::getInt32Ty(m_Ctx) }; + m_pWaveMatInfoType = cast(GetOrCreateStructType(m_Ctx, WaveMatInfoTypes, "dx.types.waveMatProps", pModule)); + m_pWaveMatPtrType = PointerType::get(GetOrCreateStructType(m_Ctx, Type::getInt8PtrTy(m_Ctx), "dx.types.waveMatrix", pModule), 0); + // When loading a module into an existing context where types are merged, // type names may change. When this happens, any intrinsics overloaded on // UDT types will no longer have matching overload names. @@ -1080,6 +1176,8 @@ Function *OP::GetOpFunc(OpCode opCode, Type *pOverloadType) { vector ArgTypes; // RetType is ArgTypes[0] Type *pETy = pOverloadType; Type *pRes = GetHandleType(); + Type* pNodeHandle = GetNodeHandleType(); + Type* pNodeRecordHandle = GetNodeRecordHandleType(); Type *pDim = GetDimensionsType(); Type *pPos = GetSamplePosType(); Type *pV = Type::getVoidTy(m_Ctx); @@ -1102,6 +1200,12 @@ Function *OP::GetOpFunc(OpCode opCode, Type *pOverloadType) { Type *obj = pOverloadType; Type *resProperty = GetResourcePropertiesType(); Type *resBind = GetResourceBindingType(); + Type* nodeProperty = GetNodePropertiesType(); + Type* nodeRecordProperty = GetNodeRecordPropertiesType(); + + Type *pWaveMatProps = GetWaveMatrixPropertiesType(); + Type *pWaveMatPtr = GetWaveMatPtrType(); + Type *pGSEltPtrTy = pETy->isVoidTy() ? nullptr : pETy->getPointerTo(DXIL::kTGSMAddrSpace); std::string funcName; ConstructOverloadName(pOverloadType, opCode, funcName); @@ -1272,7 +1376,7 @@ Function *OP::GetOpFunc(OpCode opCode, Type *pOverloadType) { case OpCode::Coverage: A(pI32); A(pI32); break; case OpCode::InnerCoverage: A(pI32); A(pI32); break; - // Compute/Mesh/Amplification shader + // Compute/Mesh/Amplification/Node shader case OpCode::ThreadId: A(pI32); A(pI32); A(pI32); break; case OpCode::GroupId: A(pI32); A(pI32); A(pI32); break; case OpCode::ThreadIdInGroup: A(pI32); A(pI32); A(pI32); break; @@ -1494,6 +1598,48 @@ Function *OP::GetOpFunc(OpCode opCode, Type *pOverloadType) { // Resources case OpCode::TextureStoreSample: A(pV); A(pI32); A(pRes); A(pI32); A(pI32); A(pI32); A(pETy); A(pETy); A(pETy); A(pETy); A(pI8); A(pI32); break; + + // WaveMatrix + case OpCode::WaveMatrix_Annotate: A(pV); A(pI32); A(pWaveMatPtr);A(pWaveMatProps);break; + case OpCode::WaveMatrix_Depth: A(pI32); A(pI32); A(pWaveMatProps);break; + case OpCode::WaveMatrix_Fill: A(pV); A(pI32); A(pWaveMatPtr);A(pETy); break; + case OpCode::WaveMatrix_LoadRawBuf: A(pV); A(pI32); A(pWaveMatPtr);A(pRes); A(pI32); A(pI32); A(pI8); A(pI1); break; + case OpCode::WaveMatrix_LoadGroupShared:A(pV); A(pI32); A(pWaveMatPtr);A(pGSEltPtrTy);A(pI32); A(pI32); A(pI1); break; + case OpCode::WaveMatrix_StoreRawBuf: A(pV); A(pI32); A(pWaveMatPtr);A(pRes); A(pI32); A(pI32); A(pI8); A(pI1); break; + case OpCode::WaveMatrix_StoreGroupShared:A(pV); A(pI32); A(pWaveMatPtr);A(pGSEltPtrTy);A(pI32); A(pI32); A(pI1); break; + case OpCode::WaveMatrix_Multiply: A(pV); A(pI32); A(pWaveMatPtr);A(pWaveMatPtr);A(pWaveMatPtr);break; + case OpCode::WaveMatrix_MultiplyAccumulate:A(pV); A(pI32); A(pWaveMatPtr);A(pWaveMatPtr);A(pWaveMatPtr);break; + case OpCode::WaveMatrix_ScalarOp: A(pV); A(pI32); A(pWaveMatPtr);A(pI8); A(pETy); break; + case OpCode::WaveMatrix_SumAccumulate:A(pV); A(pI32); A(pWaveMatPtr);A(pWaveMatPtr);break; + case OpCode::WaveMatrix_Add: A(pV); A(pI32); A(pWaveMatPtr);A(pWaveMatPtr);break; + + // Create/Annotate Node Handles + case OpCode::AllocateNodeOutputRecords:A(pNodeRecordHandle);A(pI32); A(pNodeHandle);A(pI32); A(pI1); break; + + // Get Pointer to Node Record in Address Space 6 + case OpCode::GetNodeRecordPtr: A(pETy); A(pI32); A(pNodeRecordHandle);A(pI32); break; + + // Work Graph intrinsics + case OpCode::IncrementOutputCount: A(pV); A(pI32); A(pNodeHandle);A(pI32); A(pI1); break; + case OpCode::OutputComplete: A(pV); A(pI32); A(pNodeRecordHandle);break; + case OpCode::GetInputRecordCount: A(pI32); A(pI32); A(pNodeRecordHandle);break; + case OpCode::FinishedCrossGroupSharing:A(pI1); A(pI32); A(pNodeRecordHandle);break; + + // Synchronization + case OpCode::BarrierByMemoryType: A(pV); A(pI32); A(pI32); A(pI32); A(pI32); break; + case OpCode::BarrierByMemoryHandle: A(pV); A(pI32); A(pRes); A(pI32); A(pI32); break; + case OpCode::BarrierByNodeRecordHandle:A(pV); A(pI32); A(pNodeRecordHandle);A(pI32); A(pI32); break; + + // Create/Annotate Node Handles + case OpCode::CreateNodeOutputHandle: A(pNodeHandle);A(pI32); A(pI32); break; + case OpCode::IndexNodeHandle: A(pNodeHandle);A(pI32); A(pNodeHandle);A(pI32); break; + case OpCode::AnnotateNodeHandle: A(pNodeHandle);A(pI32); A(pNodeHandle);A(nodeProperty);break; + case OpCode::CreateNodeInputRecordHandle:A(pNodeRecordHandle);A(pI32); A(pI32); break; + case OpCode::AnnotateNodeRecordHandle:A(pNodeRecordHandle);A(pI32); A(pNodeRecordHandle);A(nodeRecordProperty);break; + + // Work Graph intrinsics + case OpCode::NodeOutputIsValid: A(pI1); A(pI32); A(pNodeHandle);break; + case OpCode::GetRemainingRecursionLevels:A(pI32); A(pI32); break; // OPCODE-OLOAD-FUNCS:END default: DXASSERT(false, "otherwise unhandled case"); break; } @@ -1594,6 +1740,7 @@ llvm::Type *OP::GetOverloadType(OpCode opCode, llvm::Function *F) { case OpCode::TempRegStore: case OpCode::CallShader: case OpCode::Pack4x8: + case OpCode::WaveMatrix_Fill: DXASSERT_NOMSG(FT->getNumParams() > 2); return FT->getParamType(2); case OpCode::MinPrecXRegStore: @@ -1632,8 +1779,13 @@ llvm::Type *OP::GetOverloadType(OpCode opCode, llvm::Function *F) { DXASSERT_NOMSG(FT->getNumParams() > 15); return FT->getParamType(15); case OpCode::ReportHit: + case OpCode::WaveMatrix_ScalarOp: DXASSERT_NOMSG(FT->getNumParams() > 3); return FT->getParamType(3); + case OpCode::WaveMatrix_LoadGroupShared: + case OpCode::WaveMatrix_StoreGroupShared: + DXASSERT_NOMSG(FT->getNumParams() > 2); + return FT->getParamType(2)->getPointerElementType(); case OpCode::CreateHandle: case OpCode::BufferUpdateCounter: case OpCode::GetDimensions: @@ -1682,6 +1834,29 @@ llvm::Type *OP::GetOverloadType(OpCode opCode, llvm::Function *F) { case OpCode::AnnotateHandle: case OpCode::CreateHandleFromBinding: case OpCode::CreateHandleFromHeap: + case OpCode::WaveMatrix_Annotate: + case OpCode::WaveMatrix_Depth: + case OpCode::WaveMatrix_LoadRawBuf: + case OpCode::WaveMatrix_StoreRawBuf: + case OpCode::WaveMatrix_Multiply: + case OpCode::WaveMatrix_MultiplyAccumulate: + case OpCode::WaveMatrix_SumAccumulate: + case OpCode::WaveMatrix_Add: + case OpCode::AllocateNodeOutputRecords: + case OpCode::IncrementOutputCount: + case OpCode::OutputComplete: + case OpCode::GetInputRecordCount: + case OpCode::FinishedCrossGroupSharing: + case OpCode::BarrierByMemoryType: + case OpCode::BarrierByMemoryHandle: + case OpCode::BarrierByNodeRecordHandle: + case OpCode::CreateNodeOutputHandle: + case OpCode::IndexNodeHandle: + case OpCode::AnnotateNodeHandle: + case OpCode::CreateNodeInputRecordHandle: + case OpCode::AnnotateNodeRecordHandle: + case OpCode::NodeOutputIsValid: + case OpCode::GetRemainingRecursionLevels: return Type::getVoidTy(Ctx); case OpCode::CheckAccessFullyMapped: case OpCode::SampleIndex: @@ -1783,10 +1958,26 @@ Type *OP::GetHandleType() const { return m_pHandleType; } +Type* OP::GetNodeHandleType() const { + return m_pNodeHandleType; +} + +Type* OP::GetNodeRecordHandleType() const { + return m_pNodeRecordHandleType; +} + Type *OP::GetResourcePropertiesType() const { return m_pResourcePropertiesType; } +Type* OP::GetNodePropertiesType() const { + return m_pNodePropertiesType; +} + +Type* OP::GetNodeRecordPropertiesType() const { + return m_pNodeRecordPropertiesType; +} + Type *OP::GetResourceBindingType() const { return m_pResourceBindingType; } @@ -1821,6 +2012,14 @@ Type *OP::GetFourI16Type() const { return m_pFourI16Type; } +StructType *OP::GetWaveMatrixPropertiesType() const { + return m_pWaveMatInfoType; +} +PointerType *OP::GetWaveMatPtrType() const { + return m_pWaveMatPtrType; +} + + bool OP::IsResRetType(llvm::Type *Ty) { for (Type *ResTy : m_pResRetType) { if (Ty == ResTy) diff --git a/lib/DXIL/DxilResourceProperties.cpp b/lib/DXIL/DxilResourceProperties.cpp index 9f29251a51..1bd20b2375 100644 --- a/lib/DXIL/DxilResourceProperties.cpp +++ b/lib/DXIL/DxilResourceProperties.cpp @@ -85,7 +85,7 @@ bool DxilResourceProperties::operator!=(const DxilResourceProperties &RP) const namespace resource_helper { // The constant is as struct with int32 fields. -// ShaderModel 6.6 has 2 fileds. +// ShaderModel 6.6 has 2 fields. Constant *getAsConstant(const DxilResourceProperties &RP, Type *Ty, const ShaderModel &) { StructType *ST = cast(Ty); diff --git a/lib/DXIL/DxilShaderFlags.cpp b/lib/DXIL/DxilShaderFlags.cpp index d2259973fd..13f4f76e69 100644 --- a/lib/DXIL/DxilShaderFlags.cpp +++ b/lib/DXIL/DxilShaderFlags.cpp @@ -62,6 +62,7 @@ ShaderFlags::ShaderFlags(): , m_bResMayNotAlias(false) , m_bAdvancedTextureOps(false) , m_bWriteableMSAATextures(false) +, m_bWaveMMA(false) , m_align1(0) { // Silence unused field warnings @@ -122,6 +123,8 @@ uint64_t ShaderFlags::GetFeatureInfo() const { Flags |= m_bAdvancedTextureOps ? hlsl::DXIL::ShaderFeatureInfo_AdvancedTextureOps : 0; Flags |= m_bWriteableMSAATextures ? hlsl::DXIL::ShaderFeatureInfo_WriteableMSAATextures : 0; + Flags |= m_bWaveMMA ? hlsl::DXIL::ShaderFeatureInfo_WaveMMA : 0; + return Flags; } @@ -184,6 +187,7 @@ uint64_t ShaderFlags::GetShaderFlagsRawForCollection() { Flags.SetResMayNotAlias(true); Flags.SetAdvancedTextureOps(true); Flags.SetWriteableMSAATextures(true); + Flags.SetWaveMMA(true); return Flags.GetShaderFlagsRaw(); } @@ -383,6 +387,8 @@ ShaderFlags ShaderFlags::CollectShaderFlags(const Function *F, bool hasAdvancedTextureOps = false; bool hasWriteableMSAATextures = false; + bool hasWaveMMA = false; + // Try to maintain compatibility with a v1.0 validator if that's what we have. uint32_t valMajor, valMinor; M->GetValidatorVersion(valMajor, valMinor); @@ -584,6 +590,20 @@ ShaderFlags ShaderFlags::CollectShaderFlags(const Function *F, case DXIL::OpCode::TextureGatherRaw: hasAdvancedTextureOps = true; break; + case DXIL::OpCode::WaveMatrix_Add: + case DXIL::OpCode::WaveMatrix_Annotate: + case DXIL::OpCode::WaveMatrix_Depth: + case DXIL::OpCode::WaveMatrix_Fill: + case DXIL::OpCode::WaveMatrix_LoadGroupShared: + case DXIL::OpCode::WaveMatrix_LoadRawBuf: + case DXIL::OpCode::WaveMatrix_Multiply: + case DXIL::OpCode::WaveMatrix_MultiplyAccumulate: + case DXIL::OpCode::WaveMatrix_ScalarOp: + case DXIL::OpCode::WaveMatrix_StoreGroupShared: + case DXIL::OpCode::WaveMatrix_StoreRawBuf: + case DXIL::OpCode::WaveMatrix_SumAccumulate: + hasWaveMMA = true; + break; default: // Normal opcodes. break; @@ -699,6 +719,7 @@ ShaderFlags ShaderFlags::CollectShaderFlags(const Function *F, flag.SetAtomicInt64OnHeapResource(hasAtomicInt64OnHeapResource); flag.SetAdvancedTextureOps(hasAdvancedTextureOps); flag.SetWriteableMSAATextures(hasWriteableMSAATextures); + flag.SetWaveMMA(hasWaveMMA); // Only bother setting the flag when there are UAVs. flag.SetResMayNotAlias(canSetResMayNotAlias && hasUAVs && diff --git a/lib/DXIL/DxilShaderModel.cpp b/lib/DXIL/DxilShaderModel.cpp index 9f308230d8..4cbb151126 100644 --- a/lib/DXIL/DxilShaderModel.cpp +++ b/lib/DXIL/DxilShaderModel.cpp @@ -67,6 +67,7 @@ bool ShaderModel::IsValidForDxil() const { case 5: case 6: case 7: + case 8: // VALRULE-TEXT:END return true; case kOfflineMinor: @@ -99,77 +100,86 @@ const ShaderModel *ShaderModel::Get(Kind Kind, unsigned Major, unsigned Minor) { {1541,9}, //ps_6_5 {1542,10}, //ps_6_6 {1543,11}, //ps_6_7 - {66560,12}, //vs_4_0 - {66561,13}, //vs_4_1 - {66816,14}, //vs_5_0 - {66817,15}, //vs_5_1 - {67072,16}, //vs_6_0 - {67073,17}, //vs_6_1 - {67074,18}, //vs_6_2 - {67075,19}, //vs_6_3 - {67076,20}, //vs_6_4 - {67077,21}, //vs_6_5 - {67078,22}, //vs_6_6 - {67079,23}, //vs_6_7 - {132096,24}, //gs_4_0 - {132097,25}, //gs_4_1 - {132352,26}, //gs_5_0 - {132353,27}, //gs_5_1 - {132608,28}, //gs_6_0 - {132609,29}, //gs_6_1 - {132610,30}, //gs_6_2 - {132611,31}, //gs_6_3 - {132612,32}, //gs_6_4 - {132613,33}, //gs_6_5 - {132614,34}, //gs_6_6 - {132615,35}, //gs_6_7 - {197888,36}, //hs_5_0 - {197889,37}, //hs_5_1 - {198144,38}, //hs_6_0 - {198145,39}, //hs_6_1 - {198146,40}, //hs_6_2 - {198147,41}, //hs_6_3 - {198148,42}, //hs_6_4 - {198149,43}, //hs_6_5 - {198150,44}, //hs_6_6 - {198151,45}, //hs_6_7 - {263424,46}, //ds_5_0 - {263425,47}, //ds_5_1 - {263680,48}, //ds_6_0 - {263681,49}, //ds_6_1 - {263682,50}, //ds_6_2 - {263683,51}, //ds_6_3 - {263684,52}, //ds_6_4 - {263685,53}, //ds_6_5 - {263686,54}, //ds_6_6 - {263687,55}, //ds_6_7 - {328704,56}, //cs_4_0 - {328705,57}, //cs_4_1 - {328960,58}, //cs_5_0 - {328961,59}, //cs_5_1 - {329216,60}, //cs_6_0 - {329217,61}, //cs_6_1 - {329218,62}, //cs_6_2 - {329219,63}, //cs_6_3 - {329220,64}, //cs_6_4 - {329221,65}, //cs_6_5 - {329222,66}, //cs_6_6 - {329223,67}, //cs_6_7 - {394753,68}, //lib_6_1 - {394754,69}, //lib_6_2 - {394755,70}, //lib_6_3 - {394756,71}, //lib_6_4 - {394757,72}, //lib_6_5 - {394758,73}, //lib_6_6 - {394759,74}, //lib_6_7 + {1544,12}, //ps_6_8 + {66560,13}, //vs_4_0 + {66561,14}, //vs_4_1 + {66816,15}, //vs_5_0 + {66817,16}, //vs_5_1 + {67072,17}, //vs_6_0 + {67073,18}, //vs_6_1 + {67074,19}, //vs_6_2 + {67075,20}, //vs_6_3 + {67076,21}, //vs_6_4 + {67077,22}, //vs_6_5 + {67078,23}, //vs_6_6 + {67079,24}, //vs_6_7 + {67080,25}, //vs_6_8 + {132096,26}, //gs_4_0 + {132097,27}, //gs_4_1 + {132352,28}, //gs_5_0 + {132353,29}, //gs_5_1 + {132608,30}, //gs_6_0 + {132609,31}, //gs_6_1 + {132610,32}, //gs_6_2 + {132611,33}, //gs_6_3 + {132612,34}, //gs_6_4 + {132613,35}, //gs_6_5 + {132614,36}, //gs_6_6 + {132615,37}, //gs_6_7 + {132616,38}, //gs_6_8 + {197888,39}, //hs_5_0 + {197889,40}, //hs_5_1 + {198144,41}, //hs_6_0 + {198145,42}, //hs_6_1 + {198146,43}, //hs_6_2 + {198147,44}, //hs_6_3 + {198148,45}, //hs_6_4 + {198149,46}, //hs_6_5 + {198150,47}, //hs_6_6 + {198151,48}, //hs_6_7 + {198152,49}, //hs_6_8 + {263424,50}, //ds_5_0 + {263425,51}, //ds_5_1 + {263680,52}, //ds_6_0 + {263681,53}, //ds_6_1 + {263682,54}, //ds_6_2 + {263683,55}, //ds_6_3 + {263684,56}, //ds_6_4 + {263685,57}, //ds_6_5 + {263686,58}, //ds_6_6 + {263687,59}, //ds_6_7 + {263688,60}, //ds_6_8 + {328704,61}, //cs_4_0 + {328705,62}, //cs_4_1 + {328960,63}, //cs_5_0 + {328961,64}, //cs_5_1 + {329216,65}, //cs_6_0 + {329217,66}, //cs_6_1 + {329218,67}, //cs_6_2 + {329219,68}, //cs_6_3 + {329220,69}, //cs_6_4 + {329221,70}, //cs_6_5 + {329222,71}, //cs_6_6 + {329223,72}, //cs_6_7 + {329224,73}, //cs_6_8 + {394753,74}, //lib_6_1 + {394754,75}, //lib_6_2 + {394755,76}, //lib_6_3 + {394756,77}, //lib_6_4 + {394757,78}, //lib_6_5 + {394758,79}, //lib_6_6 + {394759,80}, //lib_6_7 + {394760,81}, //lib_6_8 // lib_6_x is for offline linking only, and relaxes restrictions - {394767,75},//lib_6_x - {853509,76}, //ms_6_5 - {853510,77}, //ms_6_6 - {853511,78}, //ms_6_7 - {919045,79}, //as_6_5 - {919046,80}, //as_6_6 - {919047,81}, //as_6_7 + {394767,82},//lib_6_x + {853509,83}, //ms_6_5 + {853510,84}, //ms_6_6 + {853511,85}, //ms_6_7 + {853512,86}, //ms_6_8 + {919045,87}, //as_6_5 + {919046,88}, //as_6_6 + {919047,89}, //as_6_7 + {919048,90}, //as_6_8 }; unsigned hash = (unsigned)Kind << 16 | Major << 8 | Minor; auto pred = [](const std::pair& elem, unsigned val){ return elem.first < val;}; @@ -257,6 +267,12 @@ const ShaderModel *ShaderModel::GetByName(const char *pszName) { break; } else return GetInvalid(); + case '8': + if (Major == 6) { + Minor = 8; + break; + } + else return GetInvalid(); // VALRULE-TEXT:END case 'x': if (kind == Kind::Library && Major == 6) { @@ -302,8 +318,11 @@ void ShaderModel::GetDxilVersion(unsigned &DxilMajor, unsigned &DxilMinor) const case 7: DxilMinor = 7; break; + case 8: + DxilMinor = 8; + break; case kOfflineMinor: // Always update this to highest dxil version - DxilMinor = 7; + DxilMinor = 8; break; // VALRULE-TEXT:END default: @@ -342,6 +361,9 @@ void ShaderModel::GetMinValidatorVersion(unsigned &ValMajor, unsigned &ValMinor) case 7: ValMinor = 7; break; + case 8: + ValMinor = 8; + break; // VALRULE-TEXT:END case kOfflineMinor: ValMajor = 0; @@ -356,8 +378,7 @@ void ShaderModel::GetMinValidatorVersion(unsigned &ValMajor, unsigned &ValMinor) static const char *ShaderModelKindNames[] = { "ps", "vs", "gs", "hs", "ds", "cs", "lib", "raygeneration", "intersection", "anyhit", "closesthit", "miss", "callable", - "ms", "as", "invalid", -}; + "ms", "as", "node", "invalid",}; const char * ShaderModel::GetKindName() const { return GetKindName(m_Kind); @@ -390,6 +411,7 @@ DXIL::ShaderKind ShaderModel::KindFromFullName(llvm::StringRef Name) { .Case("callable", DXIL::ShaderKind::Callable) .Case("mesh", DXIL::ShaderKind::Mesh) .Case("amplification", DXIL::ShaderKind::Amplification) + .Case("node", DXIL::ShaderKind::Node) .Default(DXIL::ShaderKind::Invalid); } @@ -411,6 +433,7 @@ const ShaderModel ShaderModel::ms_ShaderModels[kNumShaderModels] = { SM(Kind::Pixel, 6, 5, "ps_6_5", 32, 8, true, true, UINT_MAX), SM(Kind::Pixel, 6, 6, "ps_6_6", 32, 8, true, true, UINT_MAX), SM(Kind::Pixel, 6, 7, "ps_6_7", 32, 8, true, true, UINT_MAX), + SM(Kind::Pixel, 6, 8, "ps_6_8", 32, 8, true, true, UINT_MAX), SM(Kind::Vertex, 4, 0, "vs_4_0", 16, 16, false, false, 0), SM(Kind::Vertex, 4, 1, "vs_4_1", 32, 32, false, false, 0), SM(Kind::Vertex, 5, 0, "vs_5_0", 32, 32, true, true, 64), @@ -423,6 +446,7 @@ const ShaderModel ShaderModel::ms_ShaderModels[kNumShaderModels] = { SM(Kind::Vertex, 6, 5, "vs_6_5", 32, 32, true, true, UINT_MAX), SM(Kind::Vertex, 6, 6, "vs_6_6", 32, 32, true, true, UINT_MAX), SM(Kind::Vertex, 6, 7, "vs_6_7", 32, 32, true, true, UINT_MAX), + SM(Kind::Vertex, 6, 8, "vs_6_8", 32, 32, true, true, UINT_MAX), SM(Kind::Geometry, 4, 0, "gs_4_0", 16, 32, false, false, 0), SM(Kind::Geometry, 4, 1, "gs_4_1", 32, 32, false, false, 0), SM(Kind::Geometry, 5, 0, "gs_5_0", 32, 32, true, true, 64), @@ -435,6 +459,7 @@ const ShaderModel ShaderModel::ms_ShaderModels[kNumShaderModels] = { SM(Kind::Geometry, 6, 5, "gs_6_5", 32, 32, true, true, UINT_MAX), SM(Kind::Geometry, 6, 6, "gs_6_6", 32, 32, true, true, UINT_MAX), SM(Kind::Geometry, 6, 7, "gs_6_7", 32, 32, true, true, UINT_MAX), + SM(Kind::Geometry, 6, 8, "gs_6_8", 32, 32, true, true, UINT_MAX), SM(Kind::Hull, 5, 0, "hs_5_0", 32, 32, true, true, 64), SM(Kind::Hull, 5, 1, "hs_5_1", 32, 32, true, true, 64), SM(Kind::Hull, 6, 0, "hs_6_0", 32, 32, true, true, UINT_MAX), @@ -445,6 +470,7 @@ const ShaderModel ShaderModel::ms_ShaderModels[kNumShaderModels] = { SM(Kind::Hull, 6, 5, "hs_6_5", 32, 32, true, true, UINT_MAX), SM(Kind::Hull, 6, 6, "hs_6_6", 32, 32, true, true, UINT_MAX), SM(Kind::Hull, 6, 7, "hs_6_7", 32, 32, true, true, UINT_MAX), + SM(Kind::Hull, 6, 8, "hs_6_8", 32, 32, true, true, UINT_MAX), SM(Kind::Domain, 5, 0, "ds_5_0", 32, 32, true, true, 64), SM(Kind::Domain, 5, 1, "ds_5_1", 32, 32, true, true, 64), SM(Kind::Domain, 6, 0, "ds_6_0", 32, 32, true, true, UINT_MAX), @@ -455,6 +481,7 @@ const ShaderModel ShaderModel::ms_ShaderModels[kNumShaderModels] = { SM(Kind::Domain, 6, 5, "ds_6_5", 32, 32, true, true, UINT_MAX), SM(Kind::Domain, 6, 6, "ds_6_6", 32, 32, true, true, UINT_MAX), SM(Kind::Domain, 6, 7, "ds_6_7", 32, 32, true, true, UINT_MAX), + SM(Kind::Domain, 6, 8, "ds_6_8", 32, 32, true, true, UINT_MAX), SM(Kind::Compute, 4, 0, "cs_4_0", 0, 0, false, false, 0), SM(Kind::Compute, 4, 1, "cs_4_1", 0, 0, false, false, 0), SM(Kind::Compute, 5, 0, "cs_5_0", 0, 0, true, true, 64), @@ -467,6 +494,7 @@ const ShaderModel ShaderModel::ms_ShaderModels[kNumShaderModels] = { SM(Kind::Compute, 6, 5, "cs_6_5", 0, 0, true, true, UINT_MAX), SM(Kind::Compute, 6, 6, "cs_6_6", 0, 0, true, true, UINT_MAX), SM(Kind::Compute, 6, 7, "cs_6_7", 0, 0, true, true, UINT_MAX), + SM(Kind::Compute, 6, 8, "cs_6_8", 0, 0, true, true, UINT_MAX), SM(Kind::Library, 6, 1, "lib_6_1", 32, 32, true, true, UINT_MAX), SM(Kind::Library, 6, 2, "lib_6_2", 32, 32, true, true, UINT_MAX), SM(Kind::Library, 6, 3, "lib_6_3", 32, 32, true, true, UINT_MAX), @@ -474,14 +502,17 @@ const ShaderModel ShaderModel::ms_ShaderModels[kNumShaderModels] = { SM(Kind::Library, 6, 5, "lib_6_5", 32, 32, true, true, UINT_MAX), SM(Kind::Library, 6, 6, "lib_6_6", 32, 32, true, true, UINT_MAX), SM(Kind::Library, 6, 7, "lib_6_7", 32, 32, true, true, UINT_MAX), + SM(Kind::Library, 6, 8, "lib_6_8", 32, 32, true, true, UINT_MAX), // lib_6_x is for offline linking only, and relaxes restrictions SM(Kind::Library, 6, kOfflineMinor, "lib_6_x", 32, 32, true, true, UINT_MAX), SM(Kind::Mesh, 6, 5, "ms_6_5", 0, 0, true, true, UINT_MAX), SM(Kind::Mesh, 6, 6, "ms_6_6", 0, 0, true, true, UINT_MAX), SM(Kind::Mesh, 6, 7, "ms_6_7", 0, 0, true, true, UINT_MAX), + SM(Kind::Mesh, 6, 8, "ms_6_8", 0, 0, true, true, UINT_MAX), SM(Kind::Amplification, 6, 5, "as_6_5", 0, 0, true, true, UINT_MAX), SM(Kind::Amplification, 6, 6, "as_6_6", 0, 0, true, true, UINT_MAX), SM(Kind::Amplification, 6, 7, "as_6_7", 0, 0, true, true, UINT_MAX), + SM(Kind::Amplification, 6, 8, "as_6_8", 0, 0, true, true, UINT_MAX), // Values before Invalid must remain sorted by Kind, then Major, then Minor. SM(Kind::Invalid, 0, 0, "invalid", 0, 0, false, false, 0), // VALRULE-TEXT:END diff --git a/lib/DXIL/DxilTypeSystem.cpp b/lib/DXIL/DxilTypeSystem.cpp index e87ba3ab08..5f207f37da 100644 --- a/lib/DXIL/DxilTypeSystem.cpp +++ b/lib/DXIL/DxilTypeSystem.cpp @@ -46,6 +46,7 @@ DxilFieldAnnotation::DxilFieldAnnotation() , m_CBufferOffset(UINT_MAX) , m_bCBufferVarUsed(false) , m_BitFieldWidth(0) +, m_VectorSize(0) {} bool DxilFieldAnnotation::IsPrecise() const { return m_bPrecise; } @@ -53,6 +54,8 @@ void DxilFieldAnnotation::SetPrecise(bool b) { m_bPrecise = b; } bool DxilFieldAnnotation::HasMatrixAnnotation() const { return m_Matrix.Cols != 0; } const DxilMatrixAnnotation &DxilFieldAnnotation::GetMatrixAnnotation() const { return m_Matrix; } void DxilFieldAnnotation::SetMatrixAnnotation(const DxilMatrixAnnotation &MA) { m_Matrix = MA; } +unsigned DxilFieldAnnotation::GetVectorSize() const { return m_VectorSize; } +void DxilFieldAnnotation::SetVectorSize(unsigned size) { m_VectorSize = size; } bool DxilFieldAnnotation::HasResourceProperties() const { return m_ResourceProps.isValid(); } @@ -260,6 +263,10 @@ void DxilParameterAnnotation::AppendSemanticIndex(unsigned SemIdx) { m_semanticIndex.emplace_back(SemIdx); } +bool DxilParameterAnnotation::IsParamInputQualNode() { + return (m_inputQual == DxilParamInputQual::NodeIO); +} + //------------------------------------------------------------------------------ // // DxilFunctionAnnotation class methods. diff --git a/lib/DXIL/DxilUtil.cpp b/lib/DXIL/DxilUtil.cpp index 22d3c83765..d87a696f92 100644 --- a/lib/DXIL/DxilUtil.cpp +++ b/lib/DXIL/DxilUtil.cpp @@ -16,6 +16,8 @@ #include "dxc/DXIL/DxilOperations.h" #include "dxc/HLSL/DxilConvergentName.h" #include "dxc/Support/Global.h" + +#include "llvm/IR/GetElementPtrTypeIterator.h" #include "llvm/ADT/StringExtras.h" #include "llvm/ADT/Twine.h" #include "llvm/IR/DiagnosticInfo.h" @@ -520,6 +522,12 @@ bool IsHLSLObjectType(llvm::Type *Ty) { return true; if (name.startswith("LineStream<")) return true; + + if (IsHLSLWaveMatrixType(Ty)) + return true; + + if (IsHLSLNodeIOType(Ty)) + return true; } return false; } @@ -537,22 +545,225 @@ bool IsHLSLRayQueryType(llvm::Type *Ty) { return false; } +bool IsHLSLWaveMatrixType(llvm::Type *Ty, DXIL::WaveMatrixKind *pKind) { + if (Ty->isPointerTy()) + Ty = Ty->getPointerElementType(); + if (llvm::StructType *ST = dyn_cast(Ty)) { + if (!ST->hasName()) + return false; + StringRef name = ST->getName(); + // TODO: don't check names. + ConsumePrefix(name, "class."); + if (!ConsumePrefix(name, "WaveMatrix")) + return false; + DXIL::WaveMatrixKind kind = DXIL::WaveMatrixKind::NumKinds; + if (name.startswith("Left<")) kind = DXIL::WaveMatrixKind::Left; + if (name.startswith("Right<")) kind = DXIL::WaveMatrixKind::Right; + if (name.startswith("LeftColAcc<")) kind = DXIL::WaveMatrixKind::LeftColAcc; + if (name.startswith("RightRowAcc<")) kind = DXIL::WaveMatrixKind::RightRowAcc; + if (name.startswith("Accumulator<")) kind = DXIL::WaveMatrixKind::Accumulator; + if (pKind) + *pKind = kind; + if (kind != DXIL::WaveMatrixKind::NumKinds) + return true; + } + return false; +} + bool IsHLSLResourceDescType(llvm::Type *Ty) { if (llvm::StructType *ST = dyn_cast(Ty)) { if (!ST->hasName()) return false; StringRef name = ST->getName(); + ConsumePrefix(name, "class."); + ConsumePrefix(name, "struct."); + // TODO: don't check names. - if (name == ("struct..Resource")) + if (name == (".Resource")) return true; - if (name == "struct..Sampler") + if (name == ".Sampler") return true; } return false; } +bool IsHLSLNodeOutputType(llvm::Type *Ty) { + if (llvm::StructType *ST = dyn_cast(Ty)) { + if (!ST->hasName()) + return false; + StringRef name = ST->getName(); + + ConsumePrefix(name, "class."); + ConsumePrefix(name, "struct."); + + // TODO: don't check names. + if ( name.startswith("NodeOutput<") + || name.equals("EmptyNodeOutput")) + return true; + } + return false; +} + +bool IsHLSLNodeOutputArrayType(llvm::Type* Ty) { + if (llvm::StructType* ST = dyn_cast(Ty)) { + if (!ST->hasName()) + return false; + StringRef name = ST->getName(); + + ConsumePrefix(name, "class."); + ConsumePrefix(name, "struct."); + + // TODO: don't check names. + if (name.startswith("NodeOutputArray<") + || name.equals("EmptyNodeOutputArray")) + return true; + } + return false; +} + +bool IsHLSLEmptyNodeOutputType(llvm::Type* Ty) { + if (llvm::StructType* ST = dyn_cast(Ty)) { + if (!ST->hasName()) + return false; + StringRef name = ST->getName(); + + ConsumePrefix(name, "class."); + ConsumePrefix(name, "struct."); + + // TODO: don't check names. + if (name.equals("EmptyNodeOutput")) + return true; + } + return false; +} + +bool IsHLSLEmptyNodeOutputArrayType(llvm::Type* Ty) { + if (llvm::StructType* ST = dyn_cast(Ty)) { + if (!ST->hasName()) + return false; + StringRef name = ST->getName(); + + ConsumePrefix(name, "class."); + ConsumePrefix(name, "struct."); + + // TODO: don't check names. + if (name.equals("EmptyNodeOutputArray")) + return true; + } + return false; +} + +bool IsHLSLNodeInputRecordType(llvm::Type *Ty) { + if (llvm::StructType *ST = dyn_cast(Ty)) { + if (!ST->hasName()) + return false; + StringRef name = ST->getName(); + + ConsumePrefix(name, "class."); + ConsumePrefix(name, "struct."); + + // TODO: don't check names. + if (name.startswith("DispatchNodeInputRecord<") || + name.startswith("RWDispatchNodeInputRecord<") || + name.startswith("GroupNodeInputRecords<") || + name.startswith("RWGroupNodeInputRecords<") || + name.startswith("ThreadNodeInputRecord<") || + name.startswith("RWThreadNodeInputRecord<") || + name.equals("EmptyNodeInput")) + return true; + } + return false; +} + +bool IsHLSLNodeIOType(llvm::Type* Ty) { + return IsHLSLNodeInputRecordType(Ty) || IsHLSLNodeOutputType(Ty); +} + +bool IsHLSLNodeEmptyInputRecordType(llvm::Type* Ty) { + if (llvm::StructType* ST = dyn_cast(Ty)) { + if (!ST->hasName()) + return false; + StringRef name = ST->getName(); + + ConsumePrefix(name, "class."); + ConsumePrefix(name, "struct."); + + // TODO: don't check names. + if (name.equals("EmptyNodeInput")) + return true; + } + return false; +} + +bool IsHLSLNodeEmptyOutputRecordType(llvm::Type* Ty) { + if (llvm::StructType* ST = dyn_cast(Ty)) { + if (!ST->hasName()) + return false; + StringRef name = ST->getName(); + + ConsumePrefix(name, "class."); + ConsumePrefix(name, "struct."); + + // TODO: don't check names. + if (name.equals("EmptyNodeOutput")) + return true; + } + return false; +} + +bool IsHLSLRWNodeInputRecordType(llvm::Type* Ty) { + if (llvm::StructType* ST = dyn_cast(Ty)) { + if (!ST->hasName()) + return false; + StringRef name = ST->getName(); + + ConsumePrefix(name, "class."); + ConsumePrefix(name, "struct."); + + if (name.startswith("RWDispatchNodeInputRecord<") || + name.startswith("RWGroupNodeInputRecords<") || + name.startswith("RWThreadNodeInputRecord<")) + return true; + } + return false; +} + +bool IsHLSLNodeOutputRecordType(llvm::Type *Ty) { + if (llvm::StructType *ST = dyn_cast(Ty)) { + if (!ST->hasName()) + return false; + StringRef name = ST->getName(); + + ConsumePrefix(name, "class."); + ConsumePrefix(name, "struct."); + + // TODO: don't check names. + if (name.startswith("GroupNodeOutputRecords<") || + name.startswith("ThreadNodeOutputRecords<")) + return true; + } + return false; +} + +bool IsHLSLGSNodeOutputRecordType(llvm::Type* Ty) { + if (llvm::StructType* ST = dyn_cast(Ty)) { + if (!ST->hasName()) + return false; + StringRef name = ST->getName(); + + if (name.startswith("struct.GroupNodeOutputRecords<")) + return true; + } + return false; +} + +bool IsHLSLNodeRecordType(llvm::Type *Ty) { + return IsHLSLNodeOutputRecordType(Ty) || + IsHLSLNodeInputRecordType(Ty); +} + bool IsIntegerOrFloatingPointType(llvm::Type *Ty) { return Ty->isIntegerTy() || Ty->isFloatingPointTy(); } @@ -612,6 +823,15 @@ llvm::Type* WrapInArrayTypes(llvm::Type *Ty, llvm::ArrayRef OuterToInn return Ty; } +llvm::Value *MirrorGEP(llvm::GEPOperator *GEP, llvm::Value *NewBasePtr) { + IRBuilder<> Builder(GEP->getContext()); + if (GetElementPtrInst *GEPI = dyn_cast(GEP)) + Builder.SetInsertPoint(GEPI); + + SmallVector idxList(GEP->idx_begin(), GEP->idx_end()); + return Builder.CreateGEP(NewBasePtr, idxList); +} + namespace { // Create { v0, v1 } from { v0.lo, v0.hi, v1.lo, v1.hi } void Make64bitResultForLoad(Type *EltTy, ArrayRef resultElts32, @@ -994,6 +1214,52 @@ Value *TryReplaceBaseCastWithGep(Value *V) { return nullptr; } +// Calculate Offset +Value *GEPIdxToOffset(GetElementPtrInst *GEP, IRBuilder<> &Builder, + hlsl::OP *OP, const DataLayout &DL) { + SmallVector Indices(GEP->idx_begin(), GEP->idx_end()); + Value *addr = nullptr; + // update offset + if (GEP->hasAllConstantIndices()) { + unsigned gepOffset = + DL.getIndexedOffset(GEP->getPointerOperandType(), Indices); + addr = OP->GetU32Const(gepOffset); + } else { + Value *offset = OP->GetU32Const(0); + gep_type_iterator GEPIt = gep_type_begin(GEP), E = gep_type_end(GEP); + for (; GEPIt != E; GEPIt++) { + Value *idx = GEPIt.getOperand(); + unsigned immIdx = 0; + if (llvm::Constant *constIdx = dyn_cast(idx)) { + immIdx = constIdx->getUniqueInteger().getLimitedValue(); + if (immIdx == 0) + continue; + } + + if (GEPIt->isPointerTy() || GEPIt->isArrayTy() || GEPIt->isVectorTy()) { + unsigned size = DL.getTypeAllocSize(GEPIt->getSequentialElementType()); + if (immIdx) { + unsigned tempOffset = size * immIdx; + offset = Builder.CreateAdd(offset, OP->GetU32Const(tempOffset)); + } else { + Value *tempOffset = Builder.CreateMul(idx, OP->GetU32Const(size)); + offset = Builder.CreateAdd(offset, tempOffset); + } + } else if (GEPIt->isStructTy()) { + const StructLayout *Layout = DL.getStructLayout(cast(*GEPIt)); + unsigned structOffset = Layout->getElementOffset(immIdx); + offset = Builder.CreateAdd(offset, OP->GetU32Const(structOffset)); + } else { + gep_type_iterator temp = GEPIt; + temp++; + DXASSERT(temp == E, "scalar type must be the last"); + } + } + addr = offset; + } + return addr; +} + struct AllocaDeleter { SmallVector WorkList; std::unordered_set Seen; @@ -1082,5 +1348,5 @@ bool DeleteDeadAllocas(llvm::Function &F) { return Changed; } -} -} +} // namespace dxil util +} //namespace hlsl diff --git a/lib/DXIL/DxilWaveMatrix.cpp b/lib/DXIL/DxilWaveMatrix.cpp new file mode 100644 index 0000000000..c15b93a04d --- /dev/null +++ b/lib/DXIL/DxilWaveMatrix.cpp @@ -0,0 +1,79 @@ +/////////////////////////////////////////////////////////////////////////////// +// // +// DxilWaveMatrix.cpp // +// Copyright (C) Microsoft Corporation. All rights reserved. // +// This file is distributed under the University of Illinois Open Source // +// License. See LICENSE.TXT for details. // +// // +/////////////////////////////////////////////////////////////////////////////// + +#include "dxc/DXIL/DxilWaveMatrix.h" +#include "dxc/DXIL/DxilInstructions.h" +#include "dxc/DXIL/DxilModule.h" +#include "dxc/DXIL/DxilOperations.h" +#include "dxc/DXIL/DxilShaderModel.h" +#include "dxc/DXIL/DxilUtil.h" +#include "dxc/Support/Global.h" +#include "llvm/IR/Constant.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/LLVMContext.h" +#include "llvm/IR/Module.h" + + +using namespace llvm; + +namespace hlsl { + +DxilWaveMatrixProperties +wavemat_helper::LoadInfoFromConstant(llvm::Constant *C) { + DXASSERT(!isa(C), + "otherwise, DxilWaveMatrixProperties has invalid value"); + const ConstantStruct *CS = cast(C); + DXASSERT(CS->getType()->getNumElements() == 4, + "otherwise, struct is not expected layout"); + DxilWaveMatrixProperties info; + info.kind = (DXIL::WaveMatrixKind)cast(CS->getOperand(0)) + ->getLimitedValue(); + info.compType = (DXIL::ComponentType)cast(CS->getOperand(1)) + ->getLimitedValue(); + info.dimM = (uint32_t)cast(CS->getOperand(2))->getLimitedValue(); + info.dimN = (uint32_t)cast(CS->getOperand(3))->getLimitedValue(); + return info; +} + +Constant * +wavemat_helper::GetInfoConstantFromWaveMatPtr(llvm::Value *waveMatPtr) { + DXASSERT_NOMSG(isa(waveMatPtr)); + for (auto *U : waveMatPtr->users()) { + Instruction *I = cast(U); + DxilInst_WaveMatrix_Annotate annotate(I); + if (annotate) { + DXASSERT_NOMSG(isa(annotate.get_waveMatProps())); + return cast(annotate.get_waveMatProps()); + } + } + return nullptr; +} + +DxilWaveMatrixProperties +wavemat_helper::GetInfoFromWaveMatPtr(llvm::Value *waveMatPtr) { + Constant *infoC = wavemat_helper::GetInfoConstantFromWaveMatPtr(waveMatPtr); + DXASSERT(infoC, "otherwise, no WaveMatAnnotate call found for ptr"); + return wavemat_helper::LoadInfoFromConstant(infoC); +} + +llvm::Constant * +wavemat_helper::GetAsConstant(const DxilWaveMatrixProperties &info, + llvm::StructType *infoTy) { + LLVMContext &Ctx = infoTy->getContext(); + IntegerType *i8Ty = IntegerType::get(Ctx, 8); + IntegerType *i32Ty = IntegerType::get(Ctx, 32); + return ConstantStruct::get(cast(infoTy), + {ConstantInt::get(i8Ty, (unsigned)info.kind), + ConstantInt::get(i8Ty, (unsigned)info.compType), + ConstantInt::get(i32Ty, (unsigned)info.dimM), + ConstantInt::get(i32Ty, (unsigned)info.dimN)}); +} + +} // namespace hlsl diff --git a/lib/DxcSupport/HLSLOptions.cpp b/lib/DxcSupport/HLSLOptions.cpp index 717bb15b64..3be202f0e6 100644 --- a/lib/DxcSupport/HLSLOptions.cpp +++ b/lib/DxcSupport/HLSLOptions.cpp @@ -921,6 +921,21 @@ int ReadDxcOpts(const OptTable *optionTable, unsigned flagsToInclude, opts.ValVerMinor = (unsigned long)minor64; } + llvm::StringRef valSelectStr = Args.getLastArgValue(OPT_select_validator); + if (!valSelectStr.empty()) { + if (valSelectStr.equals_lower("auto")) { + opts.SelectValidator = ValidatorSelection::Auto; + } else if (valSelectStr.equals_lower("internal")) { + opts.SelectValidator = ValidatorSelection::Internal; + } else if (valSelectStr.equals_lower("external")) { + opts.SelectValidator = ValidatorSelection::External; + } else { + errors << "Unsupported value '" << valSelectStr + << "for -select-validator option."; + return 1; + } + } + if (opts.IsLibraryProfile() && Minor == 0xF) { if (opts.ValVerMajor != UINT_MAX && opts.ValVerMajor != 0) { errors << "Offline library profile cannot be used with non-zero -validator-version."; diff --git a/lib/DxilContainer/DxilContainerAssembler.cpp b/lib/DxilContainer/DxilContainerAssembler.cpp index ced7ed22c8..ba28702e58 100644 --- a/lib/DxilContainer/DxilContainerAssembler.cpp +++ b/lib/DxilContainer/DxilContainerAssembler.cpp @@ -1008,6 +1008,93 @@ class DxilPSVWriter : public DxilPartWriter { } }; +////////////////////////////////////////////////////////// +// DxilVersionWriter - Writes VERS part +class DxilVersionWriter : public DxilPartWriter { + hlsl::DxilCompilerVersion m_Header = {}; + CComHeapPtr m_CommitShaStorage; + llvm::StringRef m_CommitSha = ""; + CComHeapPtr m_CustomStringStorage; + llvm::StringRef m_CustomString = ""; +public: + DxilVersionWriter(IDxcVersionInfo *pVersion) + { + Init(pVersion); + } + + void Init(IDxcVersionInfo *pVersionInfo) { + m_Header = {}; + + UINT32 Major = 0, Minor = 0; + UINT32 Flags = 0; + IFT(pVersionInfo->GetVersion(&Major, &Minor)); + IFT(pVersionInfo->GetFlags(&Flags)); + + m_Header.Major = Major; + m_Header.Minor = Minor; + m_Header.VersionFlags = Flags; + CComPtr pVersionInfo2; + if (SUCCEEDED(pVersionInfo->QueryInterface(&pVersionInfo2))) { + UINT32 CommitCount = 0; + IFT(pVersionInfo2->GetCommitInfo(&CommitCount, &m_CommitShaStorage)); + m_CommitSha = llvm::StringRef(m_CommitShaStorage.m_pData, strlen(m_CommitShaStorage.m_pData)); + m_Header.CommitCount = CommitCount; + m_Header.VersionStringListSizeInBytes += m_CommitSha.size(); + } + m_Header.VersionStringListSizeInBytes += /*null term*/ 1; + + CComPtr pVersionInfo3; + if (SUCCEEDED(pVersionInfo->QueryInterface(&pVersionInfo3))) { + IFT(pVersionInfo3->GetCustomVersionString(&m_CustomStringStorage)); + m_CustomString = llvm::StringRef(m_CustomStringStorage, strlen(m_CustomStringStorage.m_pData)); + m_Header.VersionStringListSizeInBytes += m_CustomString.size(); + } + m_Header.VersionStringListSizeInBytes += /*null term*/ 1; + } + + static uint32_t PadToDword(uint32_t size, uint32_t *outNumPadding=nullptr) { + uint32_t rem = size % 4; + if (rem) { + uint32_t padding = (4 - rem); + if (outNumPadding) + *outNumPadding = padding; + return size + padding; + } + if (outNumPadding) + *outNumPadding = 0; + return size; + } + + UINT32 size() const override { + return PadToDword(sizeof(m_Header) + m_Header.VersionStringListSizeInBytes); + } + + void write(AbstractMemoryStream *pStream) override { + const uint8_t padByte = 0; + UINT32 uPadding = 0; + UINT32 uSize = PadToDword(sizeof(m_Header) + m_Header.VersionStringListSizeInBytes, &uPadding); + (void)uSize; + + ULONG cbWritten = 0; + IFT(pStream->Write(&m_Header, sizeof(m_Header), &cbWritten)); + + // Write a null terminator even if the string is empty + IFT(pStream->Write(m_CommitSha.data(), m_CommitSha.size(), &cbWritten)); + // Null terminator for the commit sha + IFT(pStream->Write(&padByte, sizeof(padByte), &cbWritten)); + + // Write the custom version string. + IFT(pStream->Write(m_CustomString.data(), m_CustomString.size(), &cbWritten)); + // Null terminator for the custom version string. + IFT(pStream->Write(&padByte, sizeof(padByte), &cbWritten)); + + // Write padding + for (unsigned i = 0; i < uPadding; i++) { + IFT(pStream->Write(&padByte, sizeof(padByte), &cbWritten)); + } + } +}; + using namespace DXIL; class DxilRDATWriter : public DxilPartWriter { @@ -1170,6 +1257,312 @@ class DxilRDATWriter : public DxilPartWriter { } } + uint32_t AddSigElements(const DxilSignature &sig, uint32_t &shaderFlags, + uint8_t *pOutputStreamMask = nullptr) { + shaderFlags = 0; // Fresh flags each call + SmallVector rdatElements; + for (auto &&E : sig.GetElements()) { + RDAT::SignatureElement e = {}; + e.SemanticName = Builder.InsertString(E->GetSemanticName()); + e.SemanticIndices = Builder.InsertArray( + E->GetSemanticIndexVec().begin(), E->GetSemanticIndexVec().end()); + e.SemanticKind = (uint8_t)E->GetKind(); + e.ComponentType = (uint8_t)E->GetCompType().GetKind(); + e.InterpolationMode = (uint8_t)E->GetInterpolationMode()->GetKind(); + e.StartRow = E->IsAllocated() ? E->GetStartRow() : 0xFF; + e.SetCols(E->GetCols()); + e.SetStartCol(E->GetStartCol()); + e.SetOutputStream(E->GetOutputStream()); + e.SetUsageMask(E->GetUsageMask()); + e.SetDynamicIndexMask(E->GetDynIdxCompMask()); + rdatElements.push_back(Builder.InsertRecord(e)); + + if (E->GetKind() == DXIL::SemanticKind::Position) + shaderFlags |= (uint32_t)DxilShaderFlags::OutputPositionPresent; + if (E->GetInterpolationMode()->IsAnySample() || + E->GetKind() == Semantic::Kind::SampleIndex) + shaderFlags |= (uint32_t)DxilShaderFlags::SampleFrequency; + if (E->IsAnyDepth()) + shaderFlags |= (uint32_t)DxilShaderFlags::DepthOutput; + + if (pOutputStreamMask) + *pOutputStreamMask |= 1 << E->GetOutputStream(); + } + return Builder.InsertArray(rdatElements.begin(), rdatElements.end()); + } + + uint32_t AddIONodes(const std::vector &nodes) { + SmallVector rdatNodes; + for (auto &N : nodes) { + RDAT::IONode ioNode = {}; + ioNode.IOFlagsAndKind = N.Flags; + SmallVector nodeAttribs; + RDAT::NodeShaderIOAttrib nAttrib = {}; + if (N.Flags.IsOutputNode()) { + nAttrib = {}; + nAttrib.AttribKind = (uint32_t)NodeAttribKind::OutputID; + RDAT::NodeID ID = {}; + ID.Name = Builder.InsertString(N.OutputID.Name); + ID.Index = N.OutputID.Index; + nAttrib.OutputID = Builder.InsertRecord(ID); + nodeAttribs.push_back(Builder.InsertRecord(nAttrib)); + + nAttrib = {}; + nAttrib.AttribKind = (uint32_t)NodeAttribKind::OutputArraySize; + nAttrib.OutputArraySize = N.OutputArraySize; + nodeAttribs.push_back(Builder.InsertRecord(nAttrib)); + + // Only include if these are specified + if (N.MaxRecords) { + nAttrib = {}; + nAttrib.AttribKind = (uint32_t)NodeAttribKind::MaxRecords; + nAttrib.MaxRecords = N.MaxRecords; + nodeAttribs.push_back(Builder.InsertRecord(nAttrib)); + } else if (N.MaxRecordsSharedWith >= 0) { + nAttrib = {}; + nAttrib.AttribKind = (uint32_t)RDAT::NodeAttribKind::MaxRecordsSharedWith; + nAttrib.MaxRecordsSharedWith = N.MaxRecordsSharedWith; + nodeAttribs.push_back(Builder.InsertRecord(nAttrib)); + } + if (N.AllowSparseNodes) { + nAttrib = {}; + nAttrib.AttribKind = (uint32_t)RDAT::NodeAttribKind::AllowSparseNodes; + nAttrib.AllowSparseNodes = N.AllowSparseNodes; + nodeAttribs.push_back(Builder.InsertRecord(nAttrib)); + } + } else if (N.Flags.IsInputRecord()) { + if (N.MaxRecords) { + nAttrib = {}; + nAttrib.AttribKind = (uint32_t)NodeAttribKind::MaxRecords; + nAttrib.MaxRecords = N.MaxRecords; + nodeAttribs.push_back(Builder.InsertRecord(nAttrib)); + } + } + + // Common attributes + if (N.RecordType.size) { + nAttrib = {}; + nAttrib.AttribKind = (uint32_t)NodeAttribKind::RecordSizeInBytes; + nAttrib.RecordSizeInBytes = N.RecordType.size; + nodeAttribs.push_back(Builder.InsertRecord(nAttrib)); + + if (N.RecordType.SV_DispatchGrid.ComponentType != DXIL::ComponentType::Invalid) { + nAttrib = {}; + nAttrib.AttribKind = (uint32_t)NodeAttribKind::RecordDispatchGrid; + nAttrib.RecordDispatchGrid.ByteOffset = (uint16_t)N.RecordType.SV_DispatchGrid.ByteOffset; + nAttrib.RecordDispatchGrid.SetComponentType(N.RecordType.SV_DispatchGrid.ComponentType); + nAttrib.RecordDispatchGrid.SetNumComponents(N.RecordType.SV_DispatchGrid.NumComponents); + nodeAttribs.push_back(Builder.InsertRecord(nAttrib)); + } + } + + + ioNode.Attribs = Builder.InsertArray(nodeAttribs.begin(), nodeAttribs.end()); + rdatNodes.push_back(Builder.InsertRecord(ioNode)); + } + return Builder.InsertArray(rdatNodes.begin(), rdatNodes.end()); + } + + uint32_t AddShaderInfo(llvm::Function &function, + const DxilEntryProps &entryProps, + RuntimeDataFunctionInfo2 &funcInfo, + const ShaderFlags &flags, + uint32_t tgsmSizeInBytes) { + const DxilFunctionProps &props = entryProps.props; + const DxilEntrySignature &sig = entryProps.sig; + if (props.waveSize) { + funcInfo.MinimumExpectedWaveLaneCount = props.waveSize; + funcInfo.MaximumExpectedWaveLaneCount = props.waveSize; + } + funcInfo.ShaderFlags = 0; + if (flags.GetViewID()) + funcInfo.ShaderFlags |= (uint16_t)DxilShaderFlags::UsesViewID; + uint32_t shaderFlags = 0; + switch (props.shaderKind) { + case ShaderKind::Pixel: { + RDAT::PSInfo info = {}; + info.SigInputElements = AddSigElements(sig.InputSignature, shaderFlags); + funcInfo.ShaderFlags |= (uint16_t)(shaderFlags & (uint16_t)DxilShaderFlags::SampleFrequency); + info.SigOutputElements = AddSigElements(sig.OutputSignature, shaderFlags); + funcInfo.ShaderFlags |= (uint16_t)(shaderFlags & (uint16_t)DxilShaderFlags::DepthOutput); + return Builder.InsertRecord(info); + } break; + case ShaderKind::Vertex: { + RDAT::VSInfo info = {}; + info.SigInputElements = AddSigElements(sig.InputSignature, shaderFlags); + info.SigOutputElements = AddSigElements(sig.OutputSignature, shaderFlags); + funcInfo.ShaderFlags |= (uint16_t)(shaderFlags & (uint16_t)DxilShaderFlags::OutputPositionPresent); + // TODO: Fill in ViewID related masks + return Builder.InsertRecord(info); + } break; + case ShaderKind::Geometry: { + RDAT::GSInfo info = {}; + info.SigInputElements = AddSigElements(sig.InputSignature, shaderFlags); + shaderFlags = 0; + info.SigOutputElements = AddSigElements(sig.OutputSignature, shaderFlags, &info.OutputStreamMask); + funcInfo.ShaderFlags |= (uint16_t)(shaderFlags & (uint16_t)DxilShaderFlags::OutputPositionPresent); + // TODO: Fill in ViewID related masks + info.InputPrimitive = (uint8_t)props.ShaderProps.GS.inputPrimitive; + info.OutputTopology = (uint8_t)props.ShaderProps.GS.streamPrimitiveTopologies[0]; + info.MaxVertexCount = (uint8_t)props.ShaderProps.GS.maxVertexCount; + return Builder.InsertRecord(info); + } break; + case ShaderKind::Hull: { + RDAT::HSInfo info = {}; + info.SigInputElements = AddSigElements(sig.InputSignature, shaderFlags); + info.SigOutputElements = AddSigElements(sig.OutputSignature, shaderFlags); + info.SigPatchConstOutputElements = AddSigElements(sig.PatchConstOrPrimSignature, shaderFlags); + // TODO: Fill in ViewID related masks + info.InputControlPointCount = (uint8_t)props.ShaderProps.HS.inputControlPoints; + info.OutputControlPointCount = (uint8_t)props.ShaderProps.HS.outputControlPoints; + info.TessellatorDomain = (uint8_t)props.ShaderProps.HS.domain; + info.TessellatorOutputPrimitive = (uint8_t)props.ShaderProps.HS.outputPrimitive; + return Builder.InsertRecord(info); + } break; + case ShaderKind::Domain: { + RDAT::DSInfo info = {}; + info.SigInputElements = AddSigElements(sig.InputSignature, shaderFlags); + info.SigOutputElements = AddSigElements(sig.OutputSignature, shaderFlags); + funcInfo.ShaderFlags |= (uint16_t)(shaderFlags & (uint16_t)DxilShaderFlags::OutputPositionPresent); + info.SigPatchConstInputElements = AddSigElements(sig.PatchConstOrPrimSignature, shaderFlags); + // TODO: Fill in ViewID related masks + info.InputControlPointCount = (uint8_t)props.ShaderProps.DS.inputControlPoints; + info.TessellatorDomain = (uint8_t)props.ShaderProps.DS.domain; + return Builder.InsertRecord(info); + } break; + case ShaderKind::Compute: { + RDAT::CSInfo info = {}; + info.NumThreads = + Builder.InsertArray(&props.numThreads[0], + &props.numThreads[0] + 3); + info.GroupSharedBytesUsed = tgsmSizeInBytes; + return Builder.InsertRecord(info); + } break; + case ShaderKind::Mesh: { + RDAT::MSInfo info = {}; + info.SigOutputElements = AddSigElements(sig.OutputSignature, shaderFlags); + funcInfo.ShaderFlags |= (uint16_t)(shaderFlags & (uint16_t)DxilShaderFlags::OutputPositionPresent); + info.SigPrimOutputElements = AddSigElements(sig.PatchConstOrPrimSignature, shaderFlags); + // TODO: Fill in ViewID related masks + info.NumThreads = + Builder.InsertArray(&props.numThreads[0], + &props.numThreads[0] + 3); + info.GroupSharedBytesUsed = tgsmSizeInBytes; + info.GroupSharedBytesDependentOnViewID = (uint32_t)0; // TODO: same thing (note: this isn't filled in for PSV!) + info.PayloadSizeInBytes = (uint32_t)props.ShaderProps.MS.payloadSizeInBytes; + info.MaxOutputVertices = (uint16_t)props.ShaderProps.MS.maxVertexCount; + info.MaxOutputPrimitives = (uint16_t)props.ShaderProps.MS.maxPrimitiveCount; + info.MeshOutputTopology = (uint8_t)props.ShaderProps.MS.outputTopology; + return Builder.InsertRecord(info); + } break; + case ShaderKind::Amplification: { + RDAT::ASInfo info = {}; + info.NumThreads = + Builder.InsertArray(&props.numThreads[0], + &props.numThreads[0] + 3); + info.GroupSharedBytesUsed = tgsmSizeInBytes; + info.PayloadSizeInBytes = (uint32_t)props.ShaderProps.AS.payloadSizeInBytes; + return Builder.InsertRecord(info); + } break; + } + return RDAT_NULL_REF; + } + + uint32_t AddShaderNodeInfo(const DxilModule &DM, llvm::Function &function, + const DxilEntryProps &entryProps, + RuntimeDataFunctionInfo3 &funcInfo, + uint32_t tgsmSizeInBytes) { + const DxilFunctionProps &props = entryProps.props; + + // Add node info + RDAT::NodeShaderInfo nInfo = {}; + + RDAT::NodeShaderFuncAttrib nAttrib = {}; + SmallVector funcAttribs; + + // LaunchType is technically optional, but less optional + nInfo.LaunchType = (uint32_t)props.Node.LaunchType; + nInfo.GroupSharedBytesUsed = tgsmSizeInBytes; + + // Add the function attribute fields + if (!props.NodeShaderID.Name.empty()) { + nAttrib = {}; + nAttrib.AttribKind = (uint32_t)RDAT::NodeFuncAttribKind::ID; + RDAT::NodeID ID = {}; + ID.Name = Builder.InsertString(props.NodeShaderID.Name); + ID.Index = props.NodeShaderID.Index; + nAttrib.ID = Builder.InsertRecord(ID); + funcAttribs.push_back(Builder.InsertRecord(nAttrib)); + } + + if (props.Node.IsProgramEntry) + funcInfo.ShaderFlags |= (uint16_t)DxilShaderFlags::NodeProgramEntry; + + if (props.numThreads[0] || props.numThreads[1] || props.numThreads[2]) { + nAttrib = {}; + nAttrib.AttribKind = (uint32_t)RDAT::NodeFuncAttribKind::NumThreads; + nAttrib.NumThreads = + Builder.InsertArray(&props.numThreads[0], + &props.numThreads[0] + 3); + funcAttribs.push_back(Builder.InsertRecord(nAttrib)); + } + + if (props.Node.LocalRootArgumentsTableIndex >= 0) { + nAttrib = {}; + nAttrib.AttribKind = (uint32_t)RDAT::NodeFuncAttribKind::LocalRootArgumentsTableIndex; + nAttrib.LocalRootArgumentsTableIndex = props.Node.LocalRootArgumentsTableIndex; + funcAttribs.push_back(Builder.InsertRecord(nAttrib)); + } + + if (!props.NodeShaderSharedInput.Name.empty()) { + nAttrib = {}; + nAttrib.AttribKind = (uint32_t)RDAT::NodeFuncAttribKind::ShareInputOf; + RDAT::NodeID ID = {}; + ID.Name = Builder.InsertString(props.NodeShaderSharedInput.Name); + ID.Index = props.NodeShaderSharedInput.Index; + nAttrib.ShareInputOf = Builder.InsertRecord(ID); + funcAttribs.push_back(Builder.InsertRecord(nAttrib)); + } + + if (props.Node.DispatchGrid[0] || + props.Node.DispatchGrid[1] || + props.Node.DispatchGrid[2]) { + nAttrib = {}; + nAttrib.AttribKind = (uint32_t)RDAT::NodeFuncAttribKind::DispatchGrid; + nAttrib.DispatchGrid = + Builder.InsertArray(&props.Node.DispatchGrid[0], + &props.Node.DispatchGrid[0] + 3); + funcAttribs.push_back(Builder.InsertRecord(nAttrib)); + } + + if (props.Node.MaxRecursionDepth) { + nAttrib = {}; + nAttrib.AttribKind = (uint32_t)RDAT::NodeFuncAttribKind::MaxRecursionDepth; + nAttrib.MaxRecursionDepth = props.Node.MaxRecursionDepth; + funcAttribs.push_back(Builder.InsertRecord(nAttrib)); + } + + if (props.Node.MaxDispatchGrid[0] || + props.Node.MaxDispatchGrid[1] || + props.Node.MaxDispatchGrid[2]) { + nAttrib = {}; + nAttrib.AttribKind = (uint32_t)RDAT::NodeFuncAttribKind::MaxDispatchGrid; + nAttrib.MaxDispatchGrid = + Builder.InsertArray(&props.Node.MaxDispatchGrid[0], + &props.Node.MaxDispatchGrid[0] + 3); + funcAttribs.push_back(Builder.InsertRecord(nAttrib)); + } + + nInfo.Attribs = Builder.InsertArray(funcAttribs.begin(), funcAttribs.end()); + + // Add the input and output nodes + nInfo.Inputs = AddIONodes(props.InputNodes); + nInfo.Outputs = AddIONodes(props.OutputNodes); + + return Builder.InsertRecord(nInfo); + + } + void UpdateFunctionInfo(const DxilModule &DM) { llvm::Module *M = DM.GetModule(); // We must select the appropriate shader mask for the validator version, @@ -1191,6 +1584,42 @@ class DxilRDATWriter : public DxilPartWriter { } } + // Collect total groupshared memory potentially used by every function + const DataLayout &DL = M->getDataLayout(); + ValueMap TGSMInFunc; + // Initialize all function TGSM usage to zero + for (auto &function : M->getFunctionList()) { + TGSMInFunc[&function] = 0; + } + for (GlobalVariable &GV : M->globals()) { + if (GV.getType()->getAddressSpace() == DXIL::kTGSMAddrSpace) { + SmallPtrSet completeFuncs; + SmallVector WorkList; + uint32_t gvSize = DL.getTypeAllocSize(GV.getType()->getElementType()); + + WorkList.append(GV.user_begin(), GV.user_end()); + + while (!WorkList.empty()) { + User *U = WorkList.pop_back_val(); + // If const, keep going until we find something we can use + if (isa(U)) { + WorkList.append(U->user_begin(), U->user_end()); + continue; + } + + if (Instruction *I = dyn_cast(U)) { + llvm::Function *F = I->getParent()->getParent(); + if (completeFuncs.insert(F).second) { + // If function is new, process it and its users + // Add users to the worklist + WorkList.append(F->user_begin(), F->user_end()); + // Add groupshared size to function's total + TGSMInFunc[F] += gvSize; + } + } + } + } + } for (auto &function : M->getFunctionList()) { if (!function.isDeclaration()) { @@ -1204,6 +1633,8 @@ class DxilRDATWriter : public DxilPartWriter { uint32_t payloadSizeInBytes = 0; uint32_t attrSizeInBytes = 0; uint32_t shaderKind = static_cast(DXIL::ShaderKind::Library); + uint32_t shaderInfo = RDAT_NULL_REF; + uint32_t nodeInfo = RDAT_NULL_REF; if (m_FuncToResNameOffset.find(&function) != m_FuncToResNameOffset.end()) resourceIndex = @@ -1213,7 +1644,16 @@ class DxilRDATWriter : public DxilPartWriter { functionDependencies = Builder.InsertArray(m_FuncToDependencies[&function].begin(), m_FuncToDependencies[&function].end()); - RuntimeDataFunctionInfo info = {}; + RuntimeDataFunctionInfo3 info_latest = {}; + RuntimeDataFunctionInfo &info = info_latest; + RuntimeDataFunctionInfo2 *pInfo2 = (sizeof(RuntimeDataFunctionInfo2) <= + m_pFunctionTable->GetRecordStride()) + ? &info_latest + : nullptr; + RuntimeDataFunctionInfo3 *pInfo3 = (sizeof(RuntimeDataFunctionInfo3) <= + m_pFunctionTable->GetRecordStride()) + ? &info_latest + : nullptr; ShaderFlags flags = ShaderFlags::CollectShaderFlags(&function, &DM); if (DM.HasDxilFunctionProps(&function)) { const auto &props = DM.GetDxilFunctionProps(&function); @@ -1228,10 +1668,22 @@ class DxilRDATWriter : public DxilPartWriter { payloadSizeInBytes = props.ShaderProps.Ray.paramSizeInBytes; } shaderKind = (uint32_t)props.shaderKind; + if (pInfo2 && DM.HasDxilEntryProps(&function)) { + const auto &entryProps = DM.GetDxilEntryProps(&function); + shaderInfo = AddShaderInfo(function, entryProps, *pInfo2, flags, TGSMInFunc[&function]); + if (pInfo3 && (entryProps.props.shaderKind == ShaderKind::Node || + entryProps.props.IsNode())) { + nodeInfo = AddShaderNodeInfo(DM, function, entryProps, *pInfo3, TGSMInFunc[&function]); + } + } } info.Name = mangledIndex; info.UnmangledName = unmangledIndex; info.ShaderKind = shaderKind; + if (pInfo2) + pInfo2->RawShaderRef = shaderInfo; + if (pInfo3) + pInfo3->Node = nodeInfo; info.Resources = resourceIndex; info.FunctionDependencies = functionDependencies; info.PayloadSizeInBytes = payloadSizeInBytes; @@ -1264,7 +1716,7 @@ class DxilRDATWriter : public DxilPartWriter { info.ShaderStageFlag &= compatInfo.mask; } info.MinShaderTarget = EncodeVersion((DXIL::ShaderKind)shaderKind, minMajor, minMinor); - m_pFunctionTable->Insert(info); + m_pFunctionTable->Insert(info_latest); } } } @@ -1360,6 +1812,11 @@ class DxilRDATWriter : public DxilPartWriter { Builder.GetStringBufferPart(); m_pResourceTable = Builder.GetOrAddTable(); m_pFunctionTable = Builder.GetOrAddTable(); + if (DXIL::CompareVersions(m_ValMajor, m_ValMinor, 1, 8) >= 0) { + m_pFunctionTable->SetRecordStride(sizeof(RuntimeDataFunctionInfo3)); + } else { + m_pFunctionTable->SetRecordStride(sizeof(RuntimeDataFunctionInfo)); + } Builder.GetIndexArraysPart(); Builder.GetRawBytesPart(); if (RDAT::RecordTraits::PartType() <= maxAllowedType) @@ -1367,8 +1824,8 @@ class DxilRDATWriter : public DxilPartWriter { // Once per table. #define RDAT_STRUCT_TABLE(type, table) \ - if (RDAT::RecordTraits::PartType() <= maxAllowedType) \ - (void)Builder.GetOrAddTable(); + if (RDAT::RecordTraits::PartType() <= maxAllowedType) \ + (void)Builder.GetOrAddTable(); #define DEF_RDAT_TYPES DEF_RDAT_DEFAULTS #include "dxc/DxilContainer/RDAT_Macros.inl" @@ -1398,6 +1855,10 @@ DxilPartWriter *hlsl::NewRDATWriter(const DxilModule &M) { return new DxilRDATWriter(M); } +DxilPartWriter *hlsl::NewVersionWriter(IDxcVersionInfo *DXCVersionInfo) { + return new DxilVersionWriter(DXCVersionInfo); +} + class DxilContainerWriter_impl : public DxilContainerWriter { private: class DxilPart { @@ -1575,6 +2036,7 @@ void hlsl::StripAndCreateReflectionStream(Module *pReflectionM, uint32_t *pRefle void hlsl::SerializeDxilContainerForModule( DxilModule *pModule, AbstractMemoryStream *pModuleBitcode, + IDxcVersionInfo *DXCVersionInfo, AbstractMemoryStream *pFinalStream, llvm::StringRef DebugName, SerializeDxilFlags Flags, DxilShaderHash *pShaderHashOut, AbstractMemoryStream *pReflectionStreamOut, @@ -1646,17 +2108,32 @@ void hlsl::SerializeDxilContainerForModule( }); } } + std::unique_ptr pVERSWriter = nullptr; std::unique_ptr pRDATWriter = nullptr; std::unique_ptr pPSVWriter = nullptr; + unsigned int major, minor; pModule->GetDxilVersion(major, minor); RootSignatureWriter rootSigWriter(std::move(pModule->GetSerializedRootSignature())); // Grab RS here DXASSERT_NOMSG(pModule->GetSerializedRootSignature().empty()); bool bMetadataStripped = false; - if (pModule->GetShaderModel()->IsLib()) { + const hlsl::ShaderModel *pSM = pModule->GetShaderModel(); + if (pSM->IsLib()) { DXASSERT(pModule->GetSerializedRootSignature().empty(), "otherwise, library has root signature outside subobject definitions"); + // Write the DxilCompilerVersion (VERS) part. + + if (pSM->IsSM68Plus() && DXCVersionInfo) { + pVERSWriter = llvm::make_unique(DXCVersionInfo); + + writer.AddPart(hlsl::DFCC_CompilerVersion, pVERSWriter->size(), + [&pVERSWriter](AbstractMemoryStream *pStream) { + pVERSWriter->write(pStream); + return S_OK; + }); + } + // Write the DxilRuntimeData (RDAT) part. pRDATWriter = llvm::make_unique(*pModule); writer.AddPart( diff --git a/lib/HLSL/DxilContainerReflection.cpp b/lib/HLSL/DxilContainerReflection.cpp index f7014bd96d..9685dea6d9 100644 --- a/lib/HLSL/DxilContainerReflection.cpp +++ b/lib/HLSL/DxilContainerReflection.cpp @@ -874,6 +874,23 @@ HRESULT CShaderReflectionType::InitializeEmpty() return S_OK; } +// Returns true if type is array and/or vec with matching number of elements. +static bool MatchVectorOrMatrixType(llvm::Type *type, unsigned count, + int maxDepth) { + if (type->isArrayTy()) { + unsigned arraySize = (unsigned)type->getArrayNumElements(); + if (maxDepth < 1 || count < arraySize || (count % arraySize) != 0) + return false; + return MatchVectorOrMatrixType(type->getArrayElementType(), + count / arraySize, maxDepth - 1); + } else if (type->isVectorTy()) { + if (maxDepth < 1) + return false; + return type->getVectorNumElements() == count; + } + return count == 1; +} + // Main logic for translating an LLVM type and associated // annotations into a D3D shader reflection type. HRESULT CShaderReflectionType::Initialize( @@ -887,6 +904,7 @@ HRESULT CShaderReflectionType::Initialize( DXASSERT_NOMSG(inType); // Set a bunch of fields to default values, to avoid duplication. + m_Desc.Class = D3D_SVC_SCALAR; m_Desc.Rows = 0; m_Desc.Columns = 0; m_Desc.Elements = 0; @@ -917,33 +935,76 @@ HRESULT CShaderReflectionType::Initialize( // at the element type. llvm::Type* type = inType; + // Arrays can be a bit difficult, since some types are translated to arrays. + // Additionally, matrices have multiple potential forms, so we must pay + // attention to the field annotation to determine when we have reached the + // element type that may be a matrix or a vector. + + // There are several possible matrix encodings: + // High level: struct { [rows x ] } + // High level struct stripped: [rows x ] + // High level struct stripped, one row: + // Vector as array: [rows x [cols x float]] + // Vector as array, one row: [cols x float] + // Flattened vector: <(rows*cols) x float> + // Flattened vector as array: [(rows*cols) x float] + // And vector may use llvm vector, or be translated to array: + // <-> [cols x float] + // Use type annotation to determine if we have a vector or matrix first, + // so we can stop multiplying in array dims at the right time. + + if (typeAnnotation.HasMatrixAnnotation()) { + // We can extract the details from the annotation. + DxilMatrixAnnotation const &matrixAnnotation = + typeAnnotation.GetMatrixAnnotation(); + + switch (matrixAnnotation.Orientation) { + default: +#ifndef NDEBUG + OutputDebugStringA( + "DxilContainerReflection.cpp: error: unknown matrix orientation\n"); +#endif + // Note: column-major layout is the default + LLVM_FALLTHROUGH; // HLSL Change + case hlsl::MatrixOrientation::Undefined: + case hlsl::MatrixOrientation::ColumnMajor: + m_Desc.Class = D3D_SVC_MATRIX_COLUMNS; + break; + + case hlsl::MatrixOrientation::RowMajor: + m_Desc.Class = D3D_SVC_MATRIX_ROWS; + break; + } + + m_Desc.Rows = matrixAnnotation.Rows; + m_Desc.Columns = matrixAnnotation.Cols; + + cbRows = m_Desc.Rows; + cbCols = m_Desc.Columns; + if (m_Desc.Class == D3D_SVC_MATRIX_COLUMNS) { + std::swap(cbRows, cbCols); + } + } else if (unsigned cols = typeAnnotation.GetVectorSize()) { + // Older format lacks this size, but the type will be a vector, + // so that will be handled later by original code path. + m_Desc.Class = D3D_SVC_VECTOR; + m_Desc.Rows = 1; + m_Desc.Columns = cols; + + cbRows = m_Desc.Rows; + cbCols = m_Desc.Columns; + } + while(type->isArrayTy()) { - llvm::Type* elementType = type->getArrayElementType(); - - // Note: At this point an HLSL matrix type may appear as an ordinary - // array (not wrapped in a `struct`), so `dxilutil::IsHLSLMatrixType()` - // is not sufficient. Instead we need to check the field annotation. - // - // We might have an array of matrices, though, so we only exit if - // the field annotation says we have a matrix, and we've bottomed - // out at one array level, since matrix will be in the format: - // [rows x ] - // - // This is in storage orientation, so rows/cols are swapped - // when the matrix is column_major. - // - // However, when the matrix has a row size of 1 in storage orientation, - // this array dimension appears to be missing. - // To properly count the array dimensions for this case, - // we must not break out of the loop one array early when rows == 1. - if(typeAnnotation.HasMatrixAnnotation() && !elementType->isArrayTy() && - !HLMatrixType::isa(elementType)){ - const DxilMatrixAnnotation &mat = typeAnnotation.GetMatrixAnnotation(); - unsigned rows = mat.Orientation == MatrixOrientation::RowMajor ? - mat.Rows : mat.Cols; - // when rows == 1, in storage orientation, the row array is missing. - if (rows > 1) + // Already determined that this is a vector or matrix, so break if the + // number of remaining array and/or vector elements matches. + if (m_Desc.Class != D3D_SVC_SCALAR) { + // max depth is 1 for vector, and 2 for matrix, unless rows in storage + // orientation is 1. + if (MatchVectorOrMatrixType( + type, cbRows * cbCols, + (m_Desc.Class == D3D_SVC_VECTOR || cbRows == 1) ? 1 : 2)) break; } @@ -955,12 +1016,9 @@ HRESULT CShaderReflectionType::Initialize( // but for now we do the expedient thing of multiplying out all their // dimensions. m_Desc.Elements *= type->getArrayNumElements(); - type = elementType; + type = type->getArrayElementType(); } - // Default to a scalar type, just to avoid some duplication later. - m_Desc.Class = D3D_SVC_SCALAR; - // Look at the annotation to try to determine the basic type of value. // // Note that DXIL supports some types that don't currently have equivalents @@ -1055,39 +1113,12 @@ HRESULT CShaderReflectionType::Initialize( } m_Desc.Type = componentType; - // A matrix type is encoded as a vector type, plus annotations, so we - // need to check for this case before other vector cases. - if(typeAnnotation.HasMatrixAnnotation()) - { - // We can extract the details from the annotation. - DxilMatrixAnnotation const& matrixAnnotation = typeAnnotation.GetMatrixAnnotation(); - - switch(matrixAnnotation.Orientation) - { - default: -#ifndef NDEBUG - OutputDebugStringA("DxilContainerReflection.cpp: error: unknown matrix orientation\n"); -#endif - // Note: column-major layout is the default - LLVM_FALLTHROUGH; // HLSL Change - case hlsl::MatrixOrientation::Undefined: - case hlsl::MatrixOrientation::ColumnMajor: - m_Desc.Class = D3D_SVC_MATRIX_COLUMNS; - break; - - case hlsl::MatrixOrientation::RowMajor: - m_Desc.Class = D3D_SVC_MATRIX_ROWS; - break; - } - - m_Desc.Rows = matrixAnnotation.Rows; - m_Desc.Columns = matrixAnnotation.Cols; - m_Name += std::to_string(matrixAnnotation.Rows) + "x" + std::to_string(matrixAnnotation.Cols); - - cbRows = m_Desc.Rows; - cbCols = m_Desc.Columns; - if (m_Desc.Class == D3D_SVC_MATRIX_COLUMNS) { - std::swap(cbRows, cbCols); + if (m_Desc.Class != D3D_SVC_SCALAR) { + // matrix or explicit vector already handled, except for name. + if (m_Desc.Class == D3D_SVC_VECTOR) { + m_Name += std::to_string(m_Desc.Columns); + } else { + m_Name += std::to_string(m_Desc.Rows) + "x" + std::to_string(m_Desc.Columns); } } else if(FixedVectorType *VT = dyn_cast(type) ) diff --git a/lib/HLSL/DxilGenerationPass.cpp b/lib/HLSL/DxilGenerationPass.cpp index 0f9719a493..14e43defab 100644 --- a/lib/HLSL/DxilGenerationPass.cpp +++ b/lib/HLSL/DxilGenerationPass.cpp @@ -231,7 +231,8 @@ class DxilGenerationPass : public ModulePass { llvm::make_unique( props, m_pHLModule->GetHLOptions().bUseMinPrecision); if (m_pHLModule->IsGraphicsShader(&F) || - m_pHLModule->IsComputeShader(&F)) { + m_pHLModule->IsComputeShader(&F) || + m_pHLModule->IsNodeShader(&F)) { HLSignatureLower sigLower(&F, *m_pHLModule, pProps->sig); // TODO: BUG: This will lower patch constant function sigs twice if // used by two hull shaders! @@ -242,8 +243,12 @@ class DxilGenerationPass : public ModulePass { } } + LowerHLAnnotateWaveMatrix(M); + std::unordered_set UpdateCounterSet; + LowerRecordAccessToGetNodeRecordPtr(*m_pHLModule); + GenerateDxilOperations(M, UpdateCounterSet); GenerateDxilCBufferHandles(); @@ -269,13 +274,31 @@ class DxilGenerationPass : public ModulePass { } } } + + // Remove Redundant OutputComplete + //call void @dx.op.outputComplete(i32 241, %dx.types.Handle zeroinitializer) + const bool SkipInit = true; + hlsl::DxilModule& DxilMod = M.GetOrCreateDxilModule(SkipInit); + hlsl::OP* hlslOP = DxilMod.GetOP(); + for (auto& it : hlslOP->GetOpFuncList(DXIL::OpCode::OutputComplete)) { + Function* F = it.second; + if (!F) + continue; + for (auto itU = F->user_begin(); itU != F->user_end(); ) { + User* U = *(itU++); + CallInst* CI = cast(U); + Value* NodeRecHandle = CI->getArgOperand(HLOperandIndex::kHandleOpIdx); + Constant* C = dyn_cast(NodeRecHandle); + if ( C && C->isZeroValue()) + CI->eraseFromParent(); + } + } + // Translate precise on allocas into function call to keep the information after mem2reg. // The function calls will be removed after propagate precise attribute. TranslatePreciseAttribute(); // High-level metadata should now be turned into low-level metadata. - const bool SkipInit = true; - hlsl::DxilModule &DxilMod = M.GetOrCreateDxilModule(SkipInit); DxilFunctionProps *pProps = nullptr; if (!SM->IsLib()) { pProps = &EntryPropsMap.begin()->second->props; @@ -312,6 +335,7 @@ class DxilGenerationPass : public ModulePass { std::unordered_set &UpdateCounterSet); void LowerHLCreateHandle( std::unordered_map &HandleToResTypeMap); + void LowerHLAnnotateWaveMatrix(Module &M); // Translate precise attribute into HL function call. void TranslatePreciseAttribute(); @@ -360,6 +384,105 @@ void TranslateHLCreateHandle(Function *F, hlsl::OP &hlslOP) { } } +void TranslateHLCreateNodeOutputHandle(Function *F, hlsl::OP &hlslOP) { + for (auto U = F->user_begin(); U != F->user_end();) { + Value *user = *(U++); + if (!isa(user)) + continue; + // must be call inst + CallInst *CI = cast(user); + Value *idx = CI->getArgOperand(HLOperandIndex::kNodeOutputMetadataIDIdx); + + auto DxilOpcode = DXIL::OpCode::CreateNodeOutputHandle; + Value *opArg = + hlslOP.GetU32Const((unsigned)DXIL::OpCode::CreateNodeOutputHandle); + + IRBuilder<> Builder(CI); + Function *createHandle = hlslOP.GetOpFunc(DxilOpcode, Builder.getVoidTy()); + Value *newHandle = Builder.CreateCall(createHandle, {opArg, idx}); + + CI->replaceAllUsesWith(newHandle); + CI->eraseFromParent(); + } +} + +void TranslateHLIndexNodeHandle(Function *F, hlsl::OP &hlslOP) { + for (auto U = F->user_begin(); U != F->user_end();) { + Value *user = *(U++); + if (!isa(user)) + continue; + CallInst *CI = cast(user); + Value *handle = CI->getArgOperand(HLOperandIndex::kHandleOpIdx); + Value *arrayidx = + CI->getArgOperand(HLOperandIndex::kIndexNodeHandleArrayIDIdx); + + auto DxilOpcode = DXIL::OpCode::IndexNodeHandle; + Value *opArg = hlslOP.GetU32Const((unsigned)DXIL::OpCode::IndexNodeHandle); + + IRBuilder<> Builder(CI); + Function *createHandle = hlslOP.GetOpFunc(DxilOpcode, Builder.getVoidTy()); + Value *newHandle = + Builder.CreateCall(createHandle, {opArg, handle, arrayidx}); + CI->replaceAllUsesWith(newHandle); + CI->eraseFromParent(); + } +} + +void TranslateHLCreateNodeInputRecordHandle(Function *F, hlsl::OP &hlslOP) { + for (auto U = F->user_begin(); U != F->user_end();) { + Value *user = *(U++); + if (!isa(user)) + continue; + // must be a call inst + CallInst *CI = cast(user); + Value *idx = + CI->getArgOperand(HLOperandIndex::kNodeInputRecordMetadataIDIdx); + auto DxilOpcode = DXIL::OpCode::CreateNodeInputRecordHandle; + Value *opArg = + hlslOP.GetU32Const((unsigned)DXIL::OpCode::CreateNodeInputRecordHandle); + IRBuilder<> Builder(CI); + Function *createHandle = hlslOP.GetOpFunc(DxilOpcode, Builder.getVoidTy()); + Value *newHandle = Builder.CreateCall(createHandle, {opArg, idx}); + + CI->replaceAllUsesWith(newHandle); + CI->eraseFromParent(); + } +} + +void TranslateHLAnnotateNodeRecordHandle(Function *F, hlsl::OP &hlslOP) { + Value *opArg = + hlslOP.GetU32Const((unsigned)DXIL::OpCode::AnnotateNodeRecordHandle); + + for (auto U = F->user_begin(); U != F->user_end();) { + Value *user = *(U++); + if (!isa(user)) + continue; + // must be call inst + CallInst *CI = cast(user); + Value *handle = CI->getArgOperand(HLOperandIndex::kHandleOpIdx); + Value *NP = CI->getArgOperand( + HLOperandIndex::kAnnotateNodeRecordHandleNodeRecordPropIdx); + + IRBuilder<> Builder(CI); + // put annotateHandle near the Handle it annotated. + if (Instruction *I = dyn_cast(handle)) { + if (isa(I)) + Builder.SetInsertPoint(I->getParent()->getFirstInsertionPt()); + else + Builder.SetInsertPoint(I->getNextNode()); + } else if (Argument *Arg = dyn_cast(handle)) { + Builder.SetInsertPoint( + Arg->getParent()->getEntryBlock().getFirstInsertionPt()); + } + Function *annotateHandle = hlslOP.GetOpFunc( + DXIL::OpCode::AnnotateNodeRecordHandle, Builder.getVoidTy()); + CallInst *newHandle = + Builder.CreateCall(annotateHandle, {opArg, handle, NP}); + CI->replaceAllUsesWith(newHandle); + CI->eraseFromParent(); + } +} + void TranslateHLAnnotateHandle( Function *F, hlsl::OP &hlslOP, std::unordered_map &HandleToResTypeMap) { @@ -372,7 +495,7 @@ void TranslateHLAnnotateHandle( // must be call inst CallInst *CI = cast(user); Value *handle = - CI->getArgOperand(HLOperandIndex::kAnnotateHandleHandleOpIdx); + CI->getArgOperand(HLOperandIndex::kHandleOpIdx); Value *RP = CI->getArgOperand( HLOperandIndex::kAnnotateHandleResourcePropertiesOpIdx); Type *ResTy = @@ -399,7 +522,40 @@ void TranslateHLAnnotateHandle( } } -void TranslateHLCastHandleToRes(Function *F, hlsl::OP &hlslOP) { +void TranslateHLAnnotateNodeHandle(Function *F, hlsl::OP &hlslOP) { + Value *opArg = hlslOP.GetU32Const((unsigned)DXIL::OpCode::AnnotateNodeHandle); + + for (auto U = F->user_begin(); U != F->user_end();) { + Value *user = *(U++); + if (!isa(user)) + continue; + // must be call inst + CallInst *CI = cast(user); + Value *handle = CI->getArgOperand(HLOperandIndex::kHandleOpIdx); + Value *NP = + CI->getArgOperand(HLOperandIndex::kAnnotateNodeHandleNodePropIdx); + + IRBuilder<> Builder(CI); + // put AnnotateNodeHandle near the Handle it annotated. + if (Instruction *I = dyn_cast(handle)) { + if (isa(I)) + Builder.SetInsertPoint(I->getParent()->getFirstInsertionPt()); + else + Builder.SetInsertPoint(I->getNextNode()); + } else if (Argument *Arg = dyn_cast(handle)) { + Builder.SetInsertPoint( + Arg->getParent()->getEntryBlock().getFirstInsertionPt()); + } + Function *annotateHandle = + hlslOP.GetOpFunc(DXIL::OpCode::AnnotateNodeHandle, Builder.getVoidTy()); + CallInst *newHandle = + Builder.CreateCall(annotateHandle, {opArg, handle, NP}); + CI->replaceAllUsesWith(newHandle); + CI->eraseFromParent(); + } +} + +void TranslateHLCastHandleToRes(Function *F, hlsl::OP &hlslOP, const llvm::DataLayout& DL) { for (auto U = F->user_begin(); U != F->user_end();) { Value *User = *(U++); if (!isa(User)) @@ -409,6 +565,39 @@ void TranslateHLCastHandleToRes(Function *F, hlsl::OP &hlslOP) { IRBuilder<> Builder(CI); HLCastOpcode opcode = static_cast(hlsl::GetHLOpcode(CI)); switch (opcode) { + case HLCastOpcode::HandleToNodeOutputCast: { + // Do Nothing for now + // Perhaps we need to replace the recordtohandle cast users + // with the handle argument here. + } break; + case HLCastOpcode::NodeOutputToHandleCast: { + Value *NodeOutputHandle = CI->getArgOperand(HLOperandIndex::kHandleOpIdx); + Constant *C = dyn_cast(NodeOutputHandle); + if (C && C->isZeroValue()) { + NodeOutputHandle = Constant::getNullValue(hlslOP.GetNodeHandleType()); + } else if (auto *CastI = dyn_cast(NodeOutputHandle)) { + DXASSERT_NOMSG(hlsl::GetHLOpcodeGroup(CastI->getCalledFunction()) == + HLOpcodeGroup::HLCast); + NodeOutputHandle = CastI->getArgOperand(HLOperandIndex::kHandleOpIdx); + } + CI->replaceAllUsesWith(NodeOutputHandle); + LLVM_FALLTHROUGH; + } + case HLCastOpcode::NodeRecordToHandleCast: { + Value *OutputRecordHandle = + CI->getArgOperand(HLOperandIndex::kHandleOpIdx); + Constant *C = dyn_cast(OutputRecordHandle); + if (C && C->isZeroValue()) { + OutputRecordHandle = + Constant::getNullValue(hlslOP.GetNodeRecordHandleType()); + } else if (auto *CastI = dyn_cast(OutputRecordHandle)) { + DXASSERT_NOMSG(hlsl::GetHLOpcodeGroup(CastI->getCalledFunction()) == + HLOpcodeGroup::HLCast); + OutputRecordHandle = CastI->getArgOperand(HLOperandIndex::kHandleOpIdx); + } + CI->replaceAllUsesWith(OutputRecordHandle); + LLVM_FALLTHROUGH; + } case HLCastOpcode::HandleToResCast: { Value *Handle = CI->getArgOperand(HLOperandIndex::kUnaryOpSrc0Idx); for (auto HandleU = CI->user_begin(); HandleU != CI->user_end();) { @@ -417,7 +606,7 @@ void TranslateHLCastHandleToRes(Function *F, hlsl::OP &hlslOP) { if (!HandleCI) continue; hlsl::HLOpcodeGroup handleGroup = - hlsl::GetHLOpcodeGroup(HandleCI->getCalledFunction()); + hlsl::GetHLOpcodeGroup(HandleCI->getCalledFunction()); if (handleGroup == HLOpcodeGroup::HLCreateHandle) { HandleCI->replaceAllUsesWith(Handle); HandleCI->eraseFromParent(); @@ -436,13 +625,14 @@ void DxilGenerationPass::LowerHLCreateHandle( std::unordered_map &HandleToResTypeMap) { Module *M = m_pHLModule->GetModule(); hlsl::OP &hlslOP = *m_pHLModule->GetOP(); - // Lower cast handle to res used by hl.createhandle. + // Lower cast handle to res/node used by hl.createhandle. for (iplist::iterator F : M->getFunctionList()) { if (F->user_empty()) continue; hlsl::HLOpcodeGroup group = hlsl::GetHLOpcodeGroup(F); if (group == HLOpcodeGroup::HLCast) { - TranslateHLCastHandleToRes(F, hlslOP); + auto DL = M->getDataLayout(); + TranslateHLCastHandleToRes(F, hlslOP, DL); } } // generate dxil operation @@ -457,13 +647,58 @@ void DxilGenerationPass::LowerHLCreateHandle( TranslateHLCreateHandle(F, hlslOP); break; + case HLOpcodeGroup::HLCreateNodeOutputHandle: + TranslateHLCreateNodeOutputHandle(F, hlslOP); + break; + case HLOpcodeGroup::HLIndexNodeHandle: + TranslateHLIndexNodeHandle(F, hlslOP); + break; + case HLOpcodeGroup::HLCreateNodeInputRecordHandle: + TranslateHLCreateNodeInputRecordHandle(F, hlslOP); + break; case HLOpcodeGroup::HLAnnotateHandle: TranslateHLAnnotateHandle(F, hlslOP, HandleToResTypeMap); break; + case HLOpcodeGroup::HLAnnotateNodeHandle: + TranslateHLAnnotateNodeHandle(F, hlslOP); + break; + case HLOpcodeGroup::HLAnnotateNodeRecordHandle: + TranslateHLAnnotateNodeRecordHandle(F, hlslOP); + break; } } } +void DxilGenerationPass::LowerHLAnnotateWaveMatrix(Module &M) { + hlsl::OP &hlslOP = *m_pHLModule->GetOP(); + Value *opArg = + hlslOP.GetU32Const((unsigned)DXIL::OpCode::WaveMatrix_Annotate); + for (iplist::iterator F : M.getFunctionList()) { + if (F->user_empty()) + continue; + if (hlsl::GetHLOpcodeGroup(F) == HLOpcodeGroup::HLWaveMatrix_Annotate) { + for (auto U = F->user_begin(); U != F->user_end();) { + Value *User = *(U++); + if (!isa(User)) + continue; + // must be call inst + CallInst *CI = cast(User); + IRBuilder<> Builder(CI); + Value *waveMatPtr = + CI->getArgOperand(HLOperandIndex::kAnnotateWaveMatrixPtrOpIdx); + Value *WMP = CI->getArgOperand( + HLOperandIndex::kAnnotateWaveMatrixPropertiesOpIdx); + Function *annotateWaveMatrix = hlslOP.GetOpFunc( + DXIL::OpCode::WaveMatrix_Annotate, Builder.getVoidTy()); + CallInst *newCI = + Builder.CreateCall(annotateWaveMatrix, {opArg, waveMatPtr, WMP}); + if (!CI->user_empty()) + CI->replaceAllUsesWith(Builder.CreateBitCast(newCI, CI->getType())); + CI->eraseFromParent(); + } + } + } +} static void MarkUavUpdateCounter(Value* LoadOrGEP, diff --git a/lib/HLSL/DxilValidation.cpp b/lib/HLSL/DxilValidation.cpp index d1883e4594..467207c2e1 100644 --- a/lib/HLSL/DxilValidation.cpp +++ b/lib/HLSL/DxilValidation.cpp @@ -19,6 +19,7 @@ #include "dxc/DxilContainer/DxilPipelineStateValidation.h" #include "dxc/HLSL/DxilGenerationPass.h" #include "dxc/DXIL/DxilOperations.h" +#include "dxc/DXIL/DxilConstants.h" #include "dxc/DXIL/DxilModule.h" #include "dxc/DXIL/DxilShaderModel.h" #include "dxc/DxilContainer/DxilContainer.h" @@ -153,6 +154,7 @@ struct ValidationContext { Module *pDebugModule; DxilModule &DxilMod; const Type *HandleTy; + const Type *WaveMatrixTy; const DataLayout &DL; DebugLoc LastDebugLocEmit; ValidationRule LastRuleEmit; @@ -187,6 +189,7 @@ struct ValidationContext { slotTracker(&llvmModule, true) { DxilMod.GetDxilVersion(m_DxilMajor, m_DxilMinor); HandleTy = DxilMod.GetOP()->GetHandleType(); + WaveMatrixTy = DxilMod.GetOP()->GetWaveMatPtrType()->getPointerElementType(); for (Function &F : llvmModule.functions()) { if (DxilMod.HasDxilEntryProps(&F)) { @@ -531,6 +534,52 @@ struct ValidationContext { return I; } + void EmitInstrNote(Instruction *I, std::string Msg) { + Instruction *DbgI = GetDebugInstr(I); + const DebugLoc L = DbgI->getDebugLoc(); + if (L) { + LastDebugLocEmit = L; + } + + BasicBlock *BB = I->getParent(); + Function *F = BB->getParent(); + + dxilutil::EmitErrorOnInstruction(DbgI, Msg); + + // Add llvm information as a note to instruction string + std::string InstrStr; + raw_string_ostream InstrStream(InstrStr); + I->print(InstrStream, slotTracker); + InstrStream.flush(); + StringRef InstrStrRef = InstrStr; + InstrStrRef = InstrStrRef.ltrim(); // Ignore indentation + Msg = "at '" + InstrStrRef.str() + "'"; + + // Print the parent block name + Msg += " in block '"; + if (!BB->getName().empty()) { + Msg += BB->getName(); + } + else { + unsigned idx = 0; + for (auto i = F->getBasicBlockList().begin(), + e = F->getBasicBlockList().end(); i != e; ++i) { + if (BB == &(*i)) { + break; + } + idx++; + } + Msg += "#" + std::to_string(idx); + } + Msg += "'"; + + // Print the function name + Msg += " of function '" + F->getName().str() + "'."; + + dxilutil::EmitNoteOnContext(DbgI->getContext(), Msg); + + } + void EmitInstrErrorMsg(Instruction *I, ValidationRule Rule, std::string Msg) { Instruction *DbgI = GetDebugInstr(I); const DebugLoc L = DbgI->getDebugLoc(); @@ -2029,6 +2078,20 @@ static void ValidateResourceDxilOp(CallInst *CI, DXIL::OpCode opcode, } } +static void ValidateBarrierFlagArg(ValidationContext &ValCtx, CallInst *CI, + Value *Arg, unsigned validMask, + StringRef flagName, StringRef opName) { + if (ConstantInt *CArg = dyn_cast(Arg)) { + if ((CArg->getLimitedValue() & (uint32_t)(~validMask)) != 0) { + ValCtx.EmitInstrFormatError(CI, ValidationRule::InstrBarrierFlagInvalid, + {flagName, opName}); + } + } else { + ValCtx.EmitInstrError(CI, + ValidationRule::InstrBarrierNonConstantFlagArgument); + } +} + static void ValidateDxilOperationCallInProfile(CallInst *CI, DXIL::OpCode opcode, const ShaderModel *pSM, @@ -2045,7 +2108,8 @@ static void ValidateDxilOperationCallInProfile(CallInst *CI, // These shader models are treted like compute bool isCSLike = shaderKind == DXIL::ShaderKind::Compute || shaderKind == DXIL::ShaderKind::Mesh || - shaderKind == DXIL::ShaderKind::Amplification; + shaderKind == DXIL::ShaderKind::Amplification || + shaderKind == DXIL::ShaderKind::Node; // Is called from a library function bool isLibFunc = shaderKind == DXIL::ShaderKind::Library; @@ -2172,19 +2236,45 @@ static void ValidateDxilOperationCallInProfile(CallInst *CI, bool bHasGroup = barrierMode & g; bool bHasUGroup = barrierMode & ut; if (bHasUGlobal && bHasUGroup) { - ValCtx.EmitInstrError(CI, - ValidationRule::InstrBarrierModeUselessUGroup); + ValCtx.EmitInstrError(CI, + ValidationRule::InstrBarrierModeUselessUGroup); } - if (!bHasUGlobal && !bHasGroup && !bHasUGroup) { - ValCtx.EmitInstrError(CI, ValidationRule::InstrBarrierModeNoMemory); + ValCtx.EmitInstrError(CI, ValidationRule::InstrBarrierModeNoMemory); } } else { if (uglobal != barrierMode) { - ValCtx.EmitInstrError(CI, ValidationRule::InstrBarrierModeForNonCS); + ValCtx.EmitInstrError(CI, ValidationRule::InstrBarrierModeForNonCS); } } + } break; + case DXIL::OpCode::BarrierByMemoryType: { + DxilInst_BarrierByMemoryType DI(CI); + ValidateBarrierFlagArg(ValCtx, CI, DI.get_MemoryTypeFlags(), + (unsigned)hlsl::DXIL::MemoryTypeFlag::ValidMask, + "memory type", "BarrierByMemoryType"); + ValidateBarrierFlagArg(ValCtx, CI, DI.get_AccessFlags(), + (unsigned)hlsl::DXIL::AccessFlag::ValidMask, + "access", "BarrierByMemoryType"); + ValidateBarrierFlagArg(ValCtx, CI, DI.get_SyncFlags(), + (unsigned)hlsl::DXIL::SyncFlag::ValidMask, + "sync", "BarrierByMemoryType"); + + } break; + case DXIL::OpCode::BarrierByNodeRecordHandle: + case DXIL::OpCode::BarrierByMemoryHandle: { + std::string opName = opcode == DXIL::OpCode::BarrierByNodeRecordHandle + ? "barrierByNodeRecordHandle" + : "barrierByMemoryHandle"; + DxilInst_BarrierByMemoryHandle DIMH(CI); + ValidateBarrierFlagArg(ValCtx, CI, DIMH.get_AccessFlags(), + (unsigned)hlsl::DXIL::AccessFlag::ValidMask, + "access", opName); + ValidateBarrierFlagArg(ValCtx, CI, DIMH.get_SyncFlags(), + (unsigned)hlsl::DXIL::SyncFlag::ValidMask, "sync", + opName); + } break; case DXIL::OpCode::CreateHandleForLib: if (!ValCtx.isLibProfile) { ValCtx.EmitInstrFormatError(CI, ValidationRule::SmOpcodeInInvalidFunction, @@ -2257,6 +2347,7 @@ static void ValidateExternalFunction(Function *F, ValidationContext &ValCtx) { OP *hlslOP = ValCtx.DxilMod.GetOP(); bool isDxilOp = OP::IsDxilOpFunc(F); Type *voidTy = Type::getVoidTy(F->getContext()); + for (User *user : F->users()) { CallInst *CI = dyn_cast(user); if (!CI) { @@ -2417,7 +2508,8 @@ static bool ValidateType(Type *Ty, ValidationContext &ValCtx, bool bInner = fals StringRef Name = ST->getName(); if (Name.startswith("dx.")) { // Allow handle type. - if (ValCtx.HandleTy == Ty) + if (ValCtx.HandleTy == Ty || + ValCtx.WaveMatrixTy == Ty) return true; hlsl::OP *hlslOP = ValCtx.DxilMod.GetOP(); if (IsDxilBuiltinStructType(ST, hlslOP)) { @@ -3197,8 +3289,9 @@ static void ValidateFunctionBody(Function *F, ValidationContext &ValCtx) { ValCtx.EmitInstrFormatError(&I, ValidationRule::SmOpcodeInInvalidFunction, {"64-bit atomic operations", "Shader Model 6.6+"}); - if (ptrType->getAddressSpace() != DXIL::kTGSMAddrSpace) - ValCtx.EmitInstrError(&I, ValidationRule::InstrAtomicOpNonGroupshared); + if (ptrType->getAddressSpace() != DXIL::kTGSMAddrSpace && + ptrType->getAddressSpace() != DXIL::kNodeRecordAddrSpace) + ValCtx.EmitInstrError(&I, ValidationRule::InstrAtomicOpNonGroupsharedOrRecord); // Drill through GEP and bitcasts while (true) { @@ -3256,6 +3349,41 @@ static void ValidateFunctionBody(Function *F, ValidationContext &ValCtx) { ValidateAsIntrinsics(F, ValCtx, dispatchMesh); } +static void ValidateNodeInputRecord(Function *F, ValidationContext &ValCtx) { + // if there are no function props or LaunchType is Invalid, there is nothing to do here + if (!ValCtx.DxilMod.HasDxilFunctionProps(F)) + return; + auto &props = ValCtx.DxilMod.GetDxilFunctionProps(F); + if (!props.IsNode()) + return; + if (props.InputNodes.size() > 1) { + ValCtx.EmitFnFormatError(F, ValidationRule::DeclMultipleNodeInputs, + { F->getName(), std::to_string(props.InputNodes.size()) }); + } + for (auto &input : props.InputNodes) { + if (!input.Flags.RecordTypeMatchesLaunchType(props.Node.LaunchType)) { + // We allow EmptyNodeInput here, as that may have been added implicitly + // if there was no input specified + if (input.Flags.IsEmptyInput()) + continue; + LPCSTR launchTypeStr = "Invalid"; + switch(props.Node.LaunchType) { + case DXIL::NodeLaunchType::Broadcasting: + launchTypeStr = "Broadcasting"; + break; + case DXIL::NodeLaunchType::Coalescing: + launchTypeStr = "Coalescing"; + break; + case DXIL::NodeLaunchType::Thread: + launchTypeStr = "Thread"; + break; + } + ValCtx.EmitFnFormatError(F, ValidationRule::DeclNodeLaunchInputType, + { launchTypeStr, F->getName() }); + } + } +} + static void ValidateFunction(Function &F, ValidationContext &ValCtx) { if (F.isDeclaration()) { ValidateExternalFunction(&F, ValCtx); @@ -3276,6 +3404,32 @@ static void ValidateFunction(Function &F, ValidationContext &ValCtx) { case DXIL::ShaderKind::Callable: numUDTShaderArgs = 1; break; + case DXIL::ShaderKind::Compute: { + DxilModule &DM = ValCtx.DxilMod; + if (DM.HasDxilEntryProps(&F)) { + DxilEntryProps &entryProps = DM.GetDxilEntryProps(&F); + // Check compatibility when both compute and node are specified + if (entryProps.props.IsNode()) { + // Compute is only compatible with Broadcasting launch nodes + if (entryProps.props.Node.LaunchType != DXIL::NodeLaunchType::Broadcasting) { + ValCtx.EmitFnFormatError(&F, ValidationRule::FlowComputeNodeLaunchType, + { F.getName(), entryProps.props.Node.LaunchType == DXIL::NodeLaunchType::Coalescing ? + "Coalescing" : "Thread"}); + break; + } + // Compute is not compatible with node input (other than an input added implicitly) + // or outputs (only produce this error if we haven't produced the one above) + // Implicitly added input may only be an EmptyNodeInput or a record with size of 12 bytes. + if (!(entryProps.props.InputNodes.empty() || + entryProps.props.InputNodes[0].GetNodeRecordInfo().RecordSize == 12 || + NodeFlags(entryProps.props.InputNodes[0].GetNodeRecordInfo().IOFlags).IsEmptyInput()) || + !entryProps.props.OutputNodes.empty()) { + ValCtx.EmitFnFormatError(&F, ValidationRule::FlowComputeNodeIO, { F.getName() }); + } + } + } + break; + } default: break; } @@ -3310,11 +3464,15 @@ static void ValidateFunction(Function &F, ValidationContext &ValCtx) { if (arg.getArgNo() >= numUDTShaderArgs) { ArgFormatError(F, arg, ValidationRule::DeclExtraArgs); } else if (!argTy->isStructTy()) { + switch(shaderKind) { + case DXIL::ShaderKind::Callable: + ArgFormatError(F, arg, ValidationRule::DeclParamStruct); + break; + default: ArgFormatError(F, arg, - shaderKind == DXIL::ShaderKind::Callable - ? ValidationRule::DeclParamStruct - : arg.getArgNo() == 0 ? ValidationRule::DeclPayloadStruct - : ValidationRule::DeclAttrStruct); + arg.getArgNo() == 0 ? ValidationRule::DeclPayloadStruct + : ValidationRule::DeclAttrStruct); + } } continue; } @@ -3329,7 +3487,7 @@ static void ValidateFunction(Function &F, ValidationContext &ValCtx) { } } - if (numArgs < numUDTShaderArgs) { + if (numArgs < numUDTShaderArgs && shaderKind != DXIL::ShaderKind::Node) { StringRef argType[2] = { shaderKind == DXIL::ShaderKind::Callable ? "params" : "payload", "attributes" }; for (unsigned i = numArgs; i < numUDTShaderArgs; i++) { @@ -3338,6 +3496,11 @@ static void ValidateFunction(Function &F, ValidationContext &ValCtx) { } } + if (ValCtx.DxilMod.HasDxilFunctionProps(&F) && + ValCtx.DxilMod.GetDxilFunctionProps(&F).IsNode()) { + ValidateNodeInputRecord(&F, ValCtx); + } + ValidateFunctionBody(&F, ValCtx); } @@ -3503,7 +3666,7 @@ static void ValidateGlobalVariables(ValidationContext &ValCtx) { llvm::Function *F = I->getParent()->getParent(); if (M.HasDxilEntryProps(F)) { DxilFunctionProps &props = M.GetDxilEntryProps(F).props; - if (!props.IsCS() && !props.IsAS() && !props.IsMS()) { + if (!props.IsCS() && !props.IsAS() && !props.IsMS() && !props.IsNode()) { ValCtx.EmitInstrFormatError(I, ValidationRule::SmTGSMUnsupported, { "from non-compute entry points" }); } @@ -4938,11 +5101,10 @@ static void ValidateEntryProps(ValidationContext &ValCtx, } } - if (ShaderType == DXIL::ShaderKind::Compute) { - const auto &CS = props.ShaderProps.CS; - unsigned x = CS.numThreads[0]; - unsigned y = CS.numThreads[1]; - unsigned z = CS.numThreads[2]; + if (ShaderType == DXIL::ShaderKind::Compute || props.IsNode()) { + unsigned x = props.numThreads[0]; + unsigned y = props.numThreads[1]; + unsigned z = props.numThreads[2]; unsigned threadsInGroup = x * y * z; @@ -4975,9 +5137,9 @@ static void ValidateEntryProps(ValidationContext &ValCtx, // check. } else if (ShaderType == DXIL::ShaderKind::Mesh) { const auto &MS = props.ShaderProps.MS; - unsigned x = MS.numThreads[0]; - unsigned y = MS.numThreads[1]; - unsigned z = MS.numThreads[2]; + unsigned x = props.numThreads[0]; + unsigned y = props.numThreads[1]; + unsigned z = props.numThreads[2]; unsigned threadsInGroup = x * y * z; @@ -5025,10 +5187,9 @@ static void ValidateEntryProps(ValidationContext &ValCtx, std::to_string(maxPrimitiveCount) }); } } else if (ShaderType == DXIL::ShaderKind::Amplification) { - const auto &AS = props.ShaderProps.AS; - unsigned x = AS.numThreads[0]; - unsigned y = AS.numThreads[1]; - unsigned z = AS.numThreads[2]; + unsigned x = props.numThreads[0]; + unsigned y = props.numThreads[1]; + unsigned z = props.numThreads[2]; unsigned threadsInGroup = x * y * z; @@ -5287,7 +5448,7 @@ static void ValidateFlowControl(ValidationContext &ValCtx) { ValidateCallGraph(ValCtx); - for (auto &F : ValCtx.DxilMod.GetModule()->functions()) { + for (llvm::Function &F : ValCtx.DxilMod.GetModule()->functions()) { if (F.isDeclaration()) continue; @@ -5302,6 +5463,101 @@ static void ValidateFlowControl(ValidationContext &ValCtx) { if (exitBlocks.empty()) ValCtx.EmitFnError(&F, ValidationRule::FlowDeadLoop); } + + // validate that there is no use of a value that has been output-completed + // for this function. + + hlsl::OP *hlslOP = ValCtx.DxilMod.GetOP(); + + for (auto &it : hlslOP->GetOpFuncList(DXIL::OpCode::OutputComplete)) { + Function *pF = it.second; + if (!pF) + continue; + + // first, collect all the output complete calls that are not dominated + // by another OutputComplete call for the same handle value + llvm::SmallMapVector, 4> + handleToCI; + for (User *U : pF->users()) { + // all OutputComplete calls are instructions, and call instructions, + // so there shouldn't need to be a null check. + CallInst *CI = cast(U); + + // verify that the function that contains this instruction is the same + // function that the DominatorTree was built on. + if (&F != CI->getParent()->getParent()) + continue; + + DxilInst_OutputComplete OutputComplete(CI); + Value *completedRecord = OutputComplete.get_output(); + + auto vIt = handleToCI.find(completedRecord); + if (vIt == handleToCI.end()) { + llvm::SmallPtrSet s; + s.insert(CI); + handleToCI.insert(std::make_pair(completedRecord, s)); + } else { + // if the handle is already in the map, make sure the map's set of + // output complete calls that dominate the handle and do not dominate + // each other gets updated if necessary + bool CI_is_dominated = false; + for (auto ocIt = vIt->second.begin(); ocIt != vIt->second.end();) { + // if our new OC CI dominates an OC instruction in the set, + // then replace the instruction in the set with the new OC CI. + + if (DT.dominates(CI, *ocIt)) { + auto cur_it = ocIt++; + vIt->second.erase(*cur_it); + continue; + } + // Remember if our new CI gets dominated by any CI in the set. + if (DT.dominates(*ocIt, CI)) { + CI_is_dominated = true; + break; + } + ocIt++; + } + // if no CI in the set dominates our new CI, + // the new CI should be added to the set + if (!CI_is_dominated) + vIt->second.insert(CI); + } + } + + for (auto handle_iter = handleToCI.begin(), e = handleToCI.end(); + handle_iter != e; handle_iter++) { + for (auto user_itr = handle_iter->first->user_begin(); + user_itr != handle_iter->first->user_end(); user_itr++) { + User *pU = *user_itr; + Instruction *useInstr = cast(pU); + if (useInstr) { + if (CallInst *CI = dyn_cast(useInstr)) { + // if the user is an output complete call that is in the set of + // OutputComplete calls not dominated by another OutputComplete + // call for the same handle value, no diagnostics need to be + // emitted. + if (handle_iter->second.count(CI) == 1) + continue; + } + + // make sure any output complete call in the set + // that dominates this use gets its diagnostic emitted. + for (auto ocIt = handle_iter->second.begin(); + ocIt != handle_iter->second.end(); ocIt++) { + Instruction *ocInstr = cast(*ocIt); + if (DT.dominates(ocInstr, useInstr)) { + ValCtx.EmitInstrError( + useInstr, + ValidationRule::InstrNodeRecordHandleUseAfterComplete); + ValCtx.EmitInstrNote( + *ocIt, "record handle invalidated by OutputComplete here"); + break; + } + } + } + } + } + } } // fxc has ERR_CONTINUE_INSIDE_SWITCH to disallow continue in switch. // Not do it for now. @@ -5539,15 +5795,85 @@ static void VerifyFeatureInfoMatches(_In_ ValidationContext &ValCtx, VerifyBlobPartMatches(ValCtx, "Feature Info", pWriter.get(), pFeatureInfoData, FeatureInfoSize); } +// return true if the pBlob is a valid, well-formed CompilerVersion part, false +// otherwise +bool ValidateCompilerVersionPart(const void *pBlobPtr, UINT blobSize) { + // The hlsl::DxilCompilerVersion struct is always 16 bytes. (2 2-byte + // uint16's, 3 4-byte uint32's) The blob size should absolutely never be less + // than 16 bytes. + if (blobSize < sizeof(hlsl::DxilCompilerVersion)) + return false; + + const hlsl::DxilCompilerVersion *pDCV = + (const hlsl::DxilCompilerVersion *)pBlobPtr; + if (pDCV->VersionStringListSizeInBytes == 0) + // No version strings, just make sure there is no extra space. + return blobSize == sizeof(hlsl::DxilCompilerVersion); + + // after this point, we know VersionStringListSizeInBytes >= 1, because it is + // a UINT + + UINT EndOfVersionStringIndex = + sizeof(hlsl::DxilCompilerVersion) + pDCV->VersionStringListSizeInBytes; + // Make sure that the buffer size is large enough to contain both the DCV + // struct and the version string but not any larger than necessary + if (PSVALIGN4(EndOfVersionStringIndex) != blobSize) + return false; + + const char *VersionStringsListData = + (const char *)pBlobPtr + sizeof(hlsl::DxilCompilerVersion); + UINT VersionStringListSizeInBytes = pDCV->VersionStringListSizeInBytes; + + // now make sure that any pad bytes that were added are null-terminators. + for (UINT i = VersionStringListSizeInBytes; + i < blobSize - sizeof(hlsl::DxilCompilerVersion); i++) + if (VersionStringsListData[i] != '\0') + return false; + + // Now, version string validation + // first, the final byte of the string should always be null-terminator so + // that the string ends + if (VersionStringsListData[VersionStringListSizeInBytes - 1] != '\0') + return false; + + // construct the first string + // data format for VersionString can be see in the definition for the + // DxilCompilerVersion struct. summary: 2 strings that each end with the null + // terminator, and [0-3] null terminators after the final null terminator + StringRef firstStr(VersionStringsListData); + + // if the second string exists, attempt to construct it. + if (VersionStringListSizeInBytes > (firstStr.size() + 1)) { + StringRef secondStr(VersionStringsListData + firstStr.size() + 1); + + // the VersionStringListSizeInBytes member should be exactly equal to the + // two string lengths, plus the 2 null terminator bytes. + if (VersionStringListSizeInBytes != + firstStr.size() + secondStr.size() + 2) + return false; + } else { + // the VersionStringListSizeInBytes member should be exactly equal to the + // first string length, plus the 1 null terminator byte. + if (VersionStringListSizeInBytes != firstStr.size() + 1) + return false; + } + + return true; +} static void VerifyRDATMatches(_In_ ValidationContext &ValCtx, _In_reads_bytes_(RDATSize) const void *pRDATData, _In_ uint32_t RDATSize) { const char *PartName = "Runtime Data (RDAT)"; + RDAT::DxilRuntimeData rdat(pRDATData, RDATSize); + if (!rdat.Validate()) { + ValCtx.EmitFormatError(ValidationRule::ContainerPartMatches, { PartName }); + return; + } + // If DxilModule subobjects already loaded, validate these against the RDAT blob, // otherwise, load subobject into DxilModule to generate reference RDAT. if (!ValCtx.DxilMod.GetSubobjects()) { - RDAT::DxilRuntimeData rdat(pRDATData, RDATSize); auto table = rdat.GetSubobjectTable(); if (table && table.Count() > 0) { ValCtx.DxilMod.ResetSubobjects(new DxilSubobjects()); @@ -5558,16 +5884,8 @@ static void VerifyRDATMatches(_In_ ValidationContext &ValCtx, } } - // TODO: Implement deep validation, instead of binary comparison before 1.7 release. unique_ptr pWriter(NewRDATWriter(ValCtx.DxilMod)); VerifyBlobPartMatches(ValCtx, PartName, pWriter.get(), pRDATData, RDATSize); - - // Verify no errors when runtime reflection from RDAT: - unique_ptr pReflection(RDAT::CreateDxilRuntimeReflection()); - if (!pReflection->InitFromRDAT(pRDATData, RDATSize)) { - ValCtx.EmitFormatError(ValidationRule::ContainerPartMatches, { PartName }); - return; - } } _Use_decl_annotations_ @@ -5657,6 +5975,16 @@ HRESULT ValidateDxilContainerParts(llvm::Module *pModule, case DFCC_FeatureInfo: VerifyFeatureInfoMatches(ValCtx, GetDxilPartData(pPart), pPart->PartSize); break; + case DFCC_CompilerVersion: + // Either this blob is a PDB, or it is a library with shader model at + // least 6.8 + if (pDxilModule->GetShaderModel()->IsSM68Plus() && ValCtx.isLibProfile) + if (!ValidateCompilerVersionPart((void *)GetDxilPartData(pPart), + pPart->PartSize)) + ValCtx.EmitFormatError(ValidationRule::ContainerPartInvalid, + {szFourCC}); + break; + case DFCC_RootSignature: pRootSignaturePart = pPart; if (ValCtx.isLibProfile) { diff --git a/lib/HLSL/HLLowerUDT.cpp b/lib/HLSL/HLLowerUDT.cpp index 11f64cdfcf..f06f8a103d 100644 --- a/lib/HLSL/HLLowerUDT.cpp +++ b/lib/HLSL/HLLowerUDT.cpp @@ -180,6 +180,26 @@ Constant *hlsl::TranslateInitForLoweredUDT( return Init; } +static Constant *InsertAddrSpaceCastIfRequired(Constant *C, + unsigned AddrSpace) { + Type *Ty = C->getType(); + DXASSERT_NOMSG(Ty->isPointerTy()); + if (AddrSpace != Ty->getPointerAddressSpace()) + return ConstantExpr::getAddrSpaceCast( + C, PointerType::get(Ty->getPointerElementType(), AddrSpace)); + return C; +} + +static Value *InsertAddrSpaceCastIfRequired(IRBuilder<> &Builder, Value *V, + unsigned AddrSpace) { + Type *Ty = V->getType(); + DXASSERT_NOMSG(Ty->isPointerTy()); + if (AddrSpace != Ty->getPointerAddressSpace()) + return Builder.CreateAddrSpaceCast( + V, PointerType::get(Ty->getPointerElementType(), AddrSpace)); + return V; +} + void hlsl::ReplaceUsesForLoweredUDT(Value *V, Value *NewV) { Type *Ty = V->getType(); Type *NewTy = NewV->getType(); @@ -193,10 +213,11 @@ void hlsl::ReplaceUsesForLoweredUDT(Value *V, Value *NewV) { return; } - if (Ty->isPointerTy()) - Ty = Ty->getPointerElementType(); - if (NewTy->isPointerTy()) - NewTy = NewTy->getPointerElementType(); + DXASSERT_NOMSG(Ty->isPointerTy() && NewTy->isPointerTy()); + unsigned OriginalAddrSpace = Ty->getPointerAddressSpace(); + unsigned NewAddrSpace = NewTy->getPointerAddressSpace(); + Ty = Ty->getPointerElementType(); + NewTy = NewTy->getPointerElementType(); while (!V->use_empty()) { Use &use = *V->use_begin(); @@ -206,34 +227,44 @@ void hlsl::ReplaceUsesForLoweredUDT(Value *V, Value *NewV) { } if (LoadInst *LI = dyn_cast(user)) { - // Load for non-matching type should only be vector - FixedVectorType *VT = dyn_cast(Ty); - DXASSERT(VT && NewTy->isArrayTy() && - VT->getNumElements() == NewTy->getArrayNumElements(), - "unexpected load of non-matching type"); IRBuilder<> Builder(LI); Value *result = UndefValue::get(Ty); - for (unsigned i = 0; i < VT->getNumElements(); ++i) { - Value *GEP = Builder.CreateInBoundsGEP(NewV, - {Builder.getInt32(0), Builder.getInt32(i)}); - Value *El = Builder.CreateLoad(GEP); - result = Builder.CreateInsertElement(result, El, i); + if (Ty == NewTy) { + // Ptrs differ by addrspace only + result = Builder.CreateLoad(NewV); + } else { + // Load for non-matching type should only be vector + FixedVectorType *VT = dyn_cast(Ty); + DXASSERT(VT && NewTy->isArrayTy() && + VT->getNumElements() == NewTy->getArrayNumElements(), + "unexpected load of non-matching type"); + for (unsigned i = 0; i < VT->getNumElements(); ++i) { + Value *GEP = Builder.CreateInBoundsGEP(NewV, + {Builder.getInt32(0), Builder.getInt32(i)}); + Value *El = Builder.CreateLoad(GEP); + result = Builder.CreateInsertElement(result, El, i); + } } LI->replaceAllUsesWith(result); LI->eraseFromParent(); } else if (StoreInst *SI = dyn_cast(user)) { - // Store for non-matching type should only be vector - FixedVectorType *VT = dyn_cast(Ty); - DXASSERT(VT && NewTy->isArrayTy() && - VT->getNumElements() == NewTy->getArrayNumElements(), - "unexpected load of non-matching type"); IRBuilder<> Builder(SI); - for (unsigned i = 0; i < VT->getNumElements(); ++i) { - Value *EE = Builder.CreateExtractElement(SI->getValueOperand(), i); - Value *GEP = Builder.CreateInBoundsGEP( - NewV, {Builder.getInt32(0), Builder.getInt32(i)}); - Builder.CreateStore(EE, GEP); + if (Ty == NewTy) { + // Ptrs differ by addrspace only + Builder.CreateStore(SI->getValueOperand(), NewV); + } else { + // Store for non-matching type should only be vector + FixedVectorType *VT = dyn_cast(Ty); + DXASSERT(VT && NewTy->isArrayTy() && + VT->getNumElements() == NewTy->getArrayNumElements(), + "unexpected load of non-matching type"); + for (unsigned i = 0; i < VT->getNumElements(); ++i) { + Value *EE = Builder.CreateExtractElement(SI->getValueOperand(), i); + Value *GEP = Builder.CreateInBoundsGEP( + NewV, {Builder.getInt32(0), Builder.getInt32(i)}); + Builder.CreateStore(EE, GEP); + } } SI->eraseFromParent(); @@ -255,10 +286,9 @@ void hlsl::ReplaceUsesForLoweredUDT(Value *V, Value *NewV) { } else if (AddrSpaceCastInst *AC = dyn_cast(user)) { // Address space cast IRBuilder<> Builder(AC); - unsigned AddrSpace = AC->getType()->getPointerAddressSpace(); - Value *NewAC = Builder.CreateAddrSpaceCast( - NewV, PointerType::get(NewTy, AddrSpace)); - ReplaceUsesForLoweredUDT(user, NewAC); + ReplaceUsesForLoweredUDT( + user, InsertAddrSpaceCastIfRequired( + Builder, NewV, AC->getType()->getPointerAddressSpace())); AC->eraseFromParent(); } else if (BitCastInst *BC = dyn_cast(user)) { @@ -277,10 +307,10 @@ void hlsl::ReplaceUsesForLoweredUDT(Value *V, Value *NewV) { } else if (ConstantExpr *CE = dyn_cast(user)) { // Constant AddrSpaceCast, or BitCast if (CE->getOpcode() == Instruction::AddrSpaceCast) { - unsigned AddrSpace = CE->getType()->getPointerAddressSpace(); - ReplaceUsesForLoweredUDT(user, - ConstantExpr::getAddrSpaceCast(cast(NewV), - PointerType::get(NewTy, AddrSpace))); + ReplaceUsesForLoweredUDT( + user, + InsertAddrSpaceCastIfRequired( + cast(NewV), CE->getType()->getPointerAddressSpace())); } else if (CE->getOpcode() == Instruction::BitCast) { if (CE->getType()->getPointerElementType() == NewTy) { // if alreday bitcast to new type, just replace the bitcast @@ -327,27 +357,30 @@ void hlsl::ReplaceUsesForLoweredUDT(Value *V, Value *NewV) { Value *elt = Builder.CreateLoad(GEP); val = Builder.CreateInsertElement(val, elt, i); } - if (bColMajor) { - // transpose matrix to match expected value orientation for - // default cast to matrix type - SmallVector ShuffleIndices; - for (unsigned RowIdx = 0; RowIdx < Mat.getNumRows(); ++RowIdx) - for (unsigned ColIdx = 0; ColIdx < Mat.getNumColumns(); ++ColIdx) - ShuffleIndices.emplace_back( - static_cast(Mat.getColumnMajorIndex(RowIdx, ColIdx))); - val = Builder.CreateShuffleVector(val, val, ShuffleIndices); - } - // lower mem to reg type - val = Mat.emitLoweredMemToReg(val, Builder); - // cast vector back to matrix value (DefaultCast expects row major) - unsigned newOpcode = (unsigned)HLCastOpcode::DefaultCast; - val = callHLFunction(*F->getParent(), HLOpcodeGroup::HLCast, newOpcode, - Ty, { Builder.getInt32(newOpcode), val }, Builder); - if (bColMajor) { - // emit cast row to col to match original result - newOpcode = (unsigned)HLCastOpcode::RowMatrixToColMatrix; + if (!CI->getType()->isVectorTy()) { + // Before HLMatrixLower, translate vector back to HL matrix value. + if (bColMajor) { + // transpose matrix to match expected value orientation for + // default cast to matrix type + SmallVector ShuffleIndices; + for (unsigned RowIdx = 0; RowIdx < Mat.getNumRows(); ++RowIdx) + for (unsigned ColIdx = 0; ColIdx < Mat.getNumColumns(); ++ColIdx) + ShuffleIndices.emplace_back( + static_cast(Mat.getColumnMajorIndex(RowIdx, ColIdx))); + val = Builder.CreateShuffleVector(val, val, ShuffleIndices); + } + // lower mem to reg type + val = Mat.emitLoweredMemToReg(val, Builder); + // cast vector back to matrix value (DefaultCast expects row major) + unsigned newOpcode = (unsigned)HLCastOpcode::DefaultCast; val = callHLFunction(*F->getParent(), HLOpcodeGroup::HLCast, newOpcode, - Ty, { Builder.getInt32(newOpcode), val }, Builder); + Ty, { Builder.getInt32(newOpcode), val }, Builder); + if (bColMajor) { + // emit cast row to col to match original result + newOpcode = (unsigned)HLCastOpcode::RowMatrixToColMatrix; + val = callHLFunction(*F->getParent(), HLOpcodeGroup::HLCast, newOpcode, + Ty, { Builder.getInt32(newOpcode), val }, Builder); + } } // replace use of HLMatLoadStore with loaded vector CI->replaceAllUsesWith(val); @@ -425,9 +458,14 @@ void hlsl::ReplaceUsesForLoweredUDT(Value *V, Value *NewV) { //case HLOpcodeGroup::NotHL: // TODO: Support lib functions case HLOpcodeGroup::HLIntrinsic: { - // Just bitcast for now + // Just addrspace cast/bitcast for now IRBuilder<> Builder(CI); - use.set(Builder.CreateBitCast(NewV, V->getType())); + Value *Cast = NewV; + if (OriginalAddrSpace != NewAddrSpace) + Cast = Builder.CreateAddrSpaceCast(Cast, PointerType::get(NewTy, OriginalAddrSpace)); + if (V->getType() != Cast->getType()) + Cast = Builder.CreateBitCast(Cast, V->getType()); + use.set(Cast); continue; } break; diff --git a/lib/HLSL/HLModule.cpp b/lib/HLSL/HLModule.cpp index ac6a2d217f..652b147ce8 100644 --- a/lib/HLSL/HLModule.cpp +++ b/lib/HLSL/HLModule.cpp @@ -397,6 +397,9 @@ bool HLModule::IsPatchConstantShader(llvm::Function *F) { bool HLModule::IsComputeShader(llvm::Function *F) { return HasDxilFunctionProps(F) && GetDxilFunctionProps(F).IsCS(); } +bool HLModule::IsNodeShader(llvm::Function *F) { + return HasDxilFunctionProps(F) && GetDxilFunctionProps(F).IsNode(); +} bool HLModule::IsEntryThatUsesSignatures(llvm::Function *F) { auto propIter = m_DxilFunctionPropsMap.find(F); if (propIter != m_DxilFunctionPropsMap.end()) { diff --git a/lib/HLSL/HLOperationLower.cpp b/lib/HLSL/HLOperationLower.cpp index 6b9cf96b00..d83b72cfe9 100644 --- a/lib/HLSL/HLOperationLower.cpp +++ b/lib/HLSL/HLOperationLower.cpp @@ -27,6 +27,8 @@ #include "dxc/HlslIntrinsicOp.h" #include "dxc/DXIL/DxilResourceProperties.h" #include "dxc/HLSL/DxilPoisonValues.h" +#include "dxc/HLSL/HLLowerUDT.h" +#include "dxc/DXIL/DxilWaveMatrix.h" #include "llvm/IR/GetElementPtrTypeIterator.h" #include "llvm/IR/IRBuilder.h" @@ -51,7 +53,12 @@ struct HLOperationLowerHelper { DxilFunctionProps *functionProps; bool bLegacyCBufferLoad; DataLayout dataLayout; + SmallDenseMap loweredTypes; + typedef std::pair WaveMatrix_Props; + typedef DenseMap WaveMatrix_PropMap; + WaveMatrix_PropMap waveMatPropMap; HLOperationLowerHelper(HLModule &HLM); + const WaveMatrix_Props &GetWaveMatInfo(Value *waveMatPtr); }; HLOperationLowerHelper::HLOperationLowerHelper(HLModule &HLM) @@ -73,6 +80,19 @@ HLOperationLowerHelper::HLOperationLowerHelper(HLModule &HLM) bLegacyCBufferLoad = HLM.GetHLOptions().bLegacyCBufferLoad; } +const HLOperationLowerHelper::WaveMatrix_Props & +HLOperationLowerHelper::GetWaveMatInfo(Value *waveMatPtr) { + auto it = waveMatPropMap.find(waveMatPtr); + if (it == waveMatPropMap.end()) { + Constant *infoC = wavemat_helper::GetInfoConstantFromWaveMatPtr(waveMatPtr); + DxilWaveMatrixProperties info = wavemat_helper::LoadInfoFromConstant(infoC); + it = waveMatPropMap + .insert(std::make_pair(waveMatPtr, std::make_pair(info, infoC))) + .first; + } + return it->second; +} + struct HLObjectOperationLowerHelper { private: // For object intrinsics. @@ -115,7 +135,7 @@ struct HLObjectOperationLowerHelper { DXASSERT(hlsl::GetHLOpcodeGroup(CIHandle->getCalledFunction()) == HLOpcodeGroup::HLAnnotateHandle, "else invalid handle"); // Mark has counter for the input handle. Value *counterHandle = - CIHandle->getArgOperand(HLOperandIndex::kAnnotateHandleHandleOpIdx); + CIHandle->getArgOperand(HLOperandIndex::kHandleOpIdx); // Change kind into StructurBufferWithCounter. Constant *Props = cast(CIHandle->getArgOperand( HLOperandIndex::kAnnotateHandleResourcePropertiesOpIdx)); @@ -141,7 +161,7 @@ struct HLObjectOperationLowerHelper { hlsl::HLOpcodeGroup group = hlsl::GetHLOpcodeGroupByName(CI->getCalledFunction()); if (group == HLOpcodeGroup::HLAnnotateHandle) { - handle = CI->getArgOperand(HLOperandIndex::kAnnotateHandleHandleOpIdx); + handle = CI->getArgOperand(HLOperandIndex::kHandleOpIdx); } } @@ -997,6 +1017,45 @@ Value *TranslateEvalCentroid(CallInst *CI, IntrinsicOp IOP, OP::OpCode op, ); } +/* +HLSL: bool RWDispatchNodeInputRecord::FinishedCrossGroupSharing() +DXIL: i1 @dx.op.finishedCrossGroupSharing(i32 %Opcode, %dx.types.NodeRecordHandle %NodeInputRecordHandle) +*/ +Value *TranslateNodeFinishedCrossGroupSharing(CallInst *CI, IntrinsicOp IOP, OP::OpCode op, + HLOperationLowerHelper &helper, + HLObjectOperationLowerHelper *pObjHelper, + bool &Translated) { + hlsl::OP *OP = &helper.hlslOP; + + Function *dxilFunc = OP->GetOpFunc(op, Type::getVoidTy(CI->getContext())); + Value* handle = CI->getArgOperand(HLOperandIndex::kHandleOpIdx); + DXASSERT_NOMSG(handle->getType() == OP->GetNodeRecordHandleType()); + Value *opArg = OP->GetU32Const((unsigned)op); + + IRBuilder<> Builder(CI); + return Builder.CreateCall(dxilFunc, {opArg, handle}); +} + +/* +HLSL: + bool NodeOutput::IsValid() + bool EmptyNodeOutput::IsValid() +DXIL: + i1 @dx.op.nodeOutputIsValid(i32 %Opcode, %dx.types.NodeHandle %NodeOutputHandle) +*/ +Value *TranslateNodeOutputIsValid(CallInst *CI, IntrinsicOp IOP, OP::OpCode op, + HLOperationLowerHelper &helper, + HLObjectOperationLowerHelper *pObjHelper, + bool &Translated) { + hlsl::OP *OP = &helper.hlslOP; + Value* handle = CI->getArgOperand(HLOperandIndex::kHandleOpIdx); + Function *dxilFunc = OP->GetOpFunc(op, Type::getVoidTy(CI->getContext())); + Value *opArg = OP->GetU32Const((unsigned)op); + + IRBuilder<> Builder(CI); + return Builder.CreateCall(dxilFunc, {opArg, handle}); +} + Value *TranslateGetAttributeAtVertex(CallInst *CI, IntrinsicOp IOP, OP::OpCode op, HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, @@ -1016,6 +1075,121 @@ Value *TranslateGetAttributeAtVertex(CallInst *CI, IntrinsicOp IOP, OP::OpCode o } ); } +/* + +HLSL: +void Barrier(uint MemoryTypeFlags, uint AccessFlags, uint SyncFlags) +void Barrier(Object o, uint AccessFlags, uint SyncFlags) +UAV: +void @dx.op.barrierByMemoryType(i32 %Opcode, i32 %MemoryTypeFlags, i32 %AccessFlags, i32 %SyncFlags) +void @dx.op.barrierByMemoryHandle(i32 %Opcode, %dx.types.Handle %Object, i32 %AccessFlags, i32 %SyncFlags) +DXIL: (For NodeRecords) +void @dx.op.barrierByMemoryType(i32 %Opcode, i32 %MemoryTypeFlags, i32 %AccessFlags, i32 %SyncFlags) +void @dx.op.barrierByMemoryHandle(i32 %Opcode, %dx.types.NodeRecordHandle %Object, i32 %AccessFlags, i32 %SyncFlags) +*/ + +Value *TranslateBarrier(CallInst *CI, IntrinsicOp IOP, OP::OpCode op, + HLOperationLowerHelper &helper, + HLObjectOperationLowerHelper *pObjHelper, + bool &Translated) { + hlsl::OP *OP = &helper.hlslOP; + Value *HandleOrMemoryFlags = CI->getArgOperand(HLOperandIndex::kMemoryTypeFlagsOpIdx); + Value *AccessFlags = CI->getArgOperand(HLOperandIndex::kAccessFlagsOpIdx); + Value *SyncFlags = CI->getArgOperand(HLOperandIndex::kSyncFlagsOpIdx); + IRBuilder<> Builder(CI); + + if (HandleOrMemoryFlags->getType()->isIntegerTy()) { + op = OP::OpCode::BarrierByMemoryType; + } + else if (HandleOrMemoryFlags->getType() == OP->GetHandleType()) { + op = OP::OpCode::BarrierByMemoryHandle; + } + else if (HandleOrMemoryFlags->getType() == OP->GetNodeRecordHandleType()) { + op = OP::OpCode::BarrierByNodeRecordHandle; + } + else { + DXASSERT(false, "Shouldn't get here"); + } + + Function *dxilFunc = OP->GetOpFunc(op, CI->getType()); + Constant *opArg = OP->GetU32Const((unsigned)op); + + Value *args[] = {opArg, HandleOrMemoryFlags, AccessFlags, SyncFlags}; + + Builder.CreateCall(dxilFunc, args); + return nullptr; +} + +Value* TranslateGetGroupOrThreadNodeOutputRecords(CallInst* CI, IntrinsicOp IOP, OP::OpCode op, + HLOperationLowerHelper& helper, + HLObjectOperationLowerHelper* pObjHelper, + bool isPerThreadRecord, + bool& Translated) { + IRBuilder<> Builder(CI); + hlsl::OP* OP = &helper.hlslOP; + Value* handle = CI->getArgOperand(HLOperandIndex::kHandleOpIdx); + Function* dxilFunc = OP->GetOpFunc(op, Builder.getVoidTy()); + Value* opArg = OP->GetU32Const((unsigned)op); + Value* count = CI->getArgOperand(HLOperandIndex::kAllocateRecordNumRecordsIdx); + Value* perThread = OP->GetI1Const(isPerThreadRecord); + + Value* args[] = { opArg, handle, count, perThread }; + + return Builder.CreateCall(dxilFunc, args); +} + +/* +HLSL: +GroupNodeOutputRecords NodeOutput::GetGroupNodeOutputRecords(uint numRecords); +DXIL: +%dx.types.NodeRecordHandle @dx.op.allocateNodeOutputRecords(i32 %Opcode, %dx.types.NodeHandle %NodeOutputHandle, + i32 %NumRecords, i1 %PerThread) +*/ +Value *TranslateGetGroupNodeOutputRecords(CallInst *CI, IntrinsicOp IOP, OP::OpCode op, + HLOperationLowerHelper &helper, + HLObjectOperationLowerHelper *pObjHelper, + bool &Translated) { + return TranslateGetGroupOrThreadNodeOutputRecords(CI, IOP, op, helper, pObjHelper, /* isPerThreadRecord */ false, Translated); +} + +/* +HLSL: +ThreadNodeOutputRecords NodeOutput::GetThreadNodeOutputRecords(uint numRecords) +DXIL: +%dx.types.NodeRecordHandle @dx.op.allocateNodeOutputRecords(i32 %Opcode, %dx.types.NodeHandle %NodeOutputHandle, + i32 %NumRecords, i1 %PerThread) +*/ +Value *TranslateGetThreadNodeOutputRecords(CallInst *CI, IntrinsicOp IOP, OP::OpCode op, + HLOperationLowerHelper &helper, + HLObjectOperationLowerHelper *pObjHelper, + bool &Translated) { + return TranslateGetGroupOrThreadNodeOutputRecords(CI, IOP, op, helper, pObjHelper, /* isPerThreadRecord */ true, Translated); +} + +/* +HLSL: +uint EmptyNodeInput::Count() +uint GroupNodeInputRecords::Count() +uint RWGroupNodeInputRecords::Count() + +DXIL: +i32 @dx.op.getInputRecordCount(i32 %Opcode, %dx.types.NodeRecordHandle %NodeInputHandle) +*/ +Value *TranslateNodeGetInputRecordCount(CallInst *CI, IntrinsicOp IOP, OP::OpCode op, + HLOperationLowerHelper &helper, + HLObjectOperationLowerHelper *pObjHelper, + bool &Translated) { + hlsl::OP *OP = &helper.hlslOP; + + Value* handle = CI->getArgOperand(HLOperandIndex::kHandleOpIdx); + DXASSERT_NOMSG(handle->getType() == OP->GetNodeRecordHandleType()); + Function *dxilFunc = OP->GetOpFunc(op, Type::getVoidTy(CI->getContext())); + Value *opArg = OP->GetU32Const((unsigned)op); + Value *args[] = {opArg, handle}; + + IRBuilder<> Builder(CI); + return Builder.CreateCall(dxilFunc, args); +} Value *TrivialNoArgOperation(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode, HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) { @@ -3846,10 +4020,21 @@ void TranslateLoad(ResLoadHelper &helper, HLResource::Kind RK, UpdateStatus(ResRet, helper.status, Builder, OP); } +Value *TranslateWaveMatLoadStore(CallInst *CI, IntrinsicOp IOP, + OP::OpCode opcode, + HLOperationLowerHelper &helper, + HLObjectOperationLowerHelper *pObjHelper, + bool &Translated); + Value *TranslateResourceLoad(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode, HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) { hlsl::OP *hlslOP = &helper.hlslOP; Value *handle = CI->getArgOperand(HLOperandIndex::kHandleOpIdx); + + // object.Load(...) could be WaveMatrix Load instead of resource method + if (handle->getType() == hlslOP->GetWaveMatPtrType()) + return TranslateWaveMatLoadStore(CI, IOP, opcode, helper, pObjHelper, Translated); + IRBuilder<> Builder(CI); DXIL::ResourceClass RC = pObjHelper->GetRC(handle); @@ -4124,6 +4309,11 @@ Value *TranslateResourceStore(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) { hlsl::OP *hlslOP = &helper.hlslOP; Value *handle = CI->getArgOperand(HLOperandIndex::kHandleOpIdx); + + // object.Store(...) could be WaveMatrix Store instead of resource method + if (handle->getType() == hlslOP->GetWaveMatPtrType()) + return TranslateWaveMatLoadStore(CI, IOP, opcode, helper, pObjHelper, Translated); + IRBuilder<> Builder(CI); DXIL::ResourceKind RK = pObjHelper->GetRK(handle); @@ -4385,7 +4575,7 @@ Value *TranslateMopAtomicCmpXChg(CallInst *CI, IntrinsicOp IOP, return nullptr; } -void TranslateSharedMemAtomicBinOp(CallInst *CI, IntrinsicOp IOP, Value *addr) { +void TranslateSharedMemOrNodeAtomicBinOp(CallInst *CI, IntrinsicOp IOP, Value *addr) { AtomicRMWInst::BinOp Op; IRBuilder<> Builder(CI); Value *val = CI->getArgOperand(HLOperandIndex::kInterlockedValueOpIndex); @@ -4402,7 +4592,9 @@ void TranslateSharedMemAtomicBinOp(CallInst *CI, IntrinsicOp IOP, Value *addr) { case IntrinsicOp::IOP_InterlockedExchange: if (needCast) { val = Builder.CreateBitCast(val, Type::getInt32Ty(CI->getContext())); - addr = Builder.CreateBitCast(addr, Type::getInt32PtrTy(CI->getContext(), DXIL::kTGSMAddrSpace)); + addr = Builder.CreateBitCast( + addr, Type::getInt32PtrTy(CI->getContext(), + addr->getType()->getPointerAddressSpace())); } Op = AtomicRMWInst::BinOp::Xchg; break; @@ -4451,6 +4643,49 @@ static Value* SkipAddrSpaceCast(Value* Ptr) { return Ptr; } +Value* TranslateNodeIncrementOutputCount(CallInst* CI, IntrinsicOp IOP, OP::OpCode op, + HLOperationLowerHelper& helper, + HLObjectOperationLowerHelper* pObjHelper, + bool isPerThread, bool& Translated) { + + hlsl::OP* OP = &helper.hlslOP; + Value* handle = CI->getArgOperand(HLOperandIndex::kHandleOpIdx); + Value* count = CI->getArgOperand(HLOperandIndex::kIncrementOutputCountCountIdx); + Function* dxilFunc = OP->GetOpFunc(op, CI->getType()); + Value* opArg = OP->GetU32Const((unsigned)op); + Value* perThread = OP->GetI1Const(isPerThread); + + Value* args[] = { opArg, handle, count, perThread }; + + IRBuilder<> Builder(CI); + Builder.CreateCall(dxilFunc, args); + return nullptr; +} + +/* +HLSL: +void EmptyNodeOutput::GroupIncrementOutputCount(uint count) +DXIL: +void @dx.op.groupIncrementOutputCount(i32 %Opcode, %dx.types.NodeHandle %NodeOutput, i32 count) +*/ +Value *TranslateNodeGroupIncrementOutputCount(CallInst *CI, IntrinsicOp IOP, OP::OpCode op, + HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) { + return TranslateNodeIncrementOutputCount(CI, IOP, op, helper, pObjHelper, + /*isPerThread*/ false, Translated); +} + +/* +HLSL: +void EmptyNodeOutput::ThreadIncrementOutputCount(uint count) +DXIL: +void @dx.op.threadIncrementOutputCount(i32 %Opcode, %dx.types.NodeHandle %NodeOutput, i32 count) +*/ +Value* TranslateNodeThreadIncrementOutputCount(CallInst* CI, IntrinsicOp IOP, OP::OpCode op, + HLOperationLowerHelper& helper, HLObjectOperationLowerHelper* pObjHelper, bool& Translated) { + return TranslateNodeIncrementOutputCount(CI, IOP, op, helper, pObjHelper, + /*isPerThread*/ true, Translated); +} + // For known non-groupshared, verify that the destination param is valid void ValidateAtomicDestination(CallInst *CI, HLObjectOperationLowerHelper *pObjHelper) { Value *dest = CI->getArgOperand(HLOperandIndex::kInterlockedDestOpIndex); @@ -4486,7 +4721,7 @@ void ValidateAtomicDestination(CallInst *CI, HLObjectOperationLowerHelper *pObjH } } - dxilutil::EmitErrorOnInstruction(CI, "Atomic operation targets must be groupshared or UAV."); + dxilutil::EmitErrorOnInstruction(CI, "Atomic operation targets must be groupshared, Node Record or UAV."); } Value *TranslateIopAtomicBinaryOperation(CallInst *CI, IntrinsicOp IOP, @@ -4496,10 +4731,11 @@ Value *TranslateIopAtomicBinaryOperation(CallInst *CI, IntrinsicOp IOP, addr = SkipAddrSpaceCast(addr); unsigned addressSpace = addr->getType()->getPointerAddressSpace(); - if (addressSpace == DXIL::kTGSMAddrSpace) - TranslateSharedMemAtomicBinOp(CI, IOP, addr); + if (addressSpace == DXIL::kTGSMAddrSpace || + addressSpace == DXIL::kNodeRecordAddrSpace) + TranslateSharedMemOrNodeAtomicBinOp(CI, IOP, addr); else { - // If not groupshared, we either have an error case or will translate + // If not groupshared or node record, we either have an error case or will translate // the atomic op in the process of translating users of the subscript operator // Mark not translated and validate dest param Translated = false; @@ -4509,7 +4745,7 @@ Value *TranslateIopAtomicBinaryOperation(CallInst *CI, IntrinsicOp IOP, return nullptr; } -void TranslateSharedMemAtomicCmpXChg(CallInst *CI, Value *addr) { +void TranslateSharedMemOrNodeAtomicCmpXChg(CallInst *CI, Value *addr) { Value *val = CI->getArgOperand(HLOperandIndex::kInterlockedCmpValueOpIndex); Value *cmpVal = CI->getArgOperand(HLOperandIndex::kInterlockedCmpCompareValueOpIndex); @@ -4522,7 +4758,8 @@ void TranslateSharedMemAtomicCmpXChg(CallInst *CI, Value *addr) { needCast = true; val = Builder.CreateBitCast(val, Type::getInt32Ty(CI->getContext())); cmpVal = Builder.CreateBitCast(cmpVal, Type::getInt32Ty(CI->getContext())); - addr = Builder.CreateBitCast(addr, Type::getInt32PtrTy(CI->getContext(), DXIL::kTGSMAddrSpace)); + unsigned addrSpace = cast(addr->getType())->getAddressSpace(); + addr = Builder.CreateBitCast(addr, Type::getInt32PtrTy(CI->getContext(), addrSpace)); } Value *Result = Builder.CreateAtomicCmpXchg( @@ -4547,8 +4784,9 @@ Value *TranslateIopAtomicCmpXChg(CallInst *CI, IntrinsicOp IOP, addr = SkipAddrSpaceCast(addr); unsigned addressSpace = addr->getType()->getPointerAddressSpace(); - if (addressSpace == DXIL::kTGSMAddrSpace) - TranslateSharedMemAtomicCmpXChg(CI, addr); + if (addressSpace == DXIL::kTGSMAddrSpace || + addressSpace == DXIL::kNodeRecordAddrSpace) + TranslateSharedMemOrNodeAtomicCmpXChg(CI, addr); else { // If not groupshared, we either have an error case or will translate // the atomic op in the process of translating users of the subscript operator @@ -5287,6 +5525,28 @@ Value *TranslateNoArgTransposedMatrix3x4Operation(CallInst *CI, IntrinsicOp IOP, return retVal; } +/* +HLSL: +void ThreadNodeOutputRecords::OutputComplete(); +void GroupNodeOutputRecords::OutputComplete(); +DXIL: +void @dx.op.outputComplete(i32 %Opcode, %dx.types.NodeRecordHandle %RecordHandle) +*/ +Value *TranslateNodeOutputComplete(CallInst *CI, IntrinsicOp IOP, OP::OpCode op, + HLOperationLowerHelper &helper, + HLObjectOperationLowerHelper *pObjHelper, + bool &Translated) { + hlsl::OP *OP = &helper.hlslOP; + + Value *handle = CI->getArgOperand(HLOperandIndex::kHandleOpIdx); + DXASSERT_NOMSG(handle->getType() == OP->GetNodeRecordHandleType()); + Function *dxilFunc = OP->GetOpFunc(op, CI->getType()); + Value *opArg = OP->GetU32Const((unsigned)op); + + IRBuilder<> Builder(CI); + return Builder.CreateCall(dxilFunc, {opArg, handle}); +} + Value *TranslateNoArgNoReturnPreserveOutput(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode, HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) { Instruction *pResult = cast( @@ -5452,6 +5712,164 @@ Value *TranslateUnpack(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode, return ResVec; } +Value *TranslateWaveMatrixDepth(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode, + HLOperationLowerHelper &helper, + HLObjectOperationLowerHelper *pObjHelper, + bool &Translated) { + hlsl::OP *hlslOP = &helper.hlslOP; + + Value *thisWaveMatPtr = CI->getArgOperand(HLOperandIndex::kWaveMatThisOpIdx); + const auto &props = helper.GetWaveMatInfo(thisWaveMatPtr); + + IRBuilder<> Builder(CI); + Function *dxilFunc = hlslOP->GetOpFunc(opcode, helper.voidTy); + Constant *opArg = hlslOP->GetU32Const((unsigned)opcode); + return Builder.CreateCall(dxilFunc, { opArg, props.second }); +} + +Value *TranslateWaveMatrixFill(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode, + HLOperationLowerHelper &helper, + HLObjectOperationLowerHelper *pObjHelper, + bool &Translated) { + hlsl::OP *hlslOP = &helper.hlslOP; + + Value *thisWaveMatPtr = CI->getArgOperand(HLOperandIndex::kWaveMatThisOpIdx); + Value *val = CI->getArgOperand(HLOperandIndex::kWaveMatFillScalarOpIdx); + + IRBuilder<> Builder(CI); + Function *dxilFunc = hlslOP->GetOpFunc(opcode, val->getType()); + Constant *opArg = hlslOP->GetU32Const((unsigned)opcode); + return Builder.CreateCall(dxilFunc, { opArg, thisWaveMatPtr, val }); +} + +Value *TranslateWaveMatrixScalarOp(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode, + HLOperationLowerHelper &helper, + HLObjectOperationLowerHelper *pObjHelper, + bool &Translated) { + hlsl::OP *hlslOP = &helper.hlslOP; + + Value *thisWaveMatPtr = CI->getArgOperand(HLOperandIndex::kWaveMatThisOpIdx); + Value *val = CI->getArgOperand(HLOperandIndex::kWaveMatScalarOpOpIdx); + + DXIL::WaveMatrixScalarOpCode scalarOp = DXIL::WaveMatrixScalarOpCode::Invalid; + switch (IOP) { + case IntrinsicOp::MOP_ScalarAdd: scalarOp = DXIL::WaveMatrixScalarOpCode::Add; break; + case IntrinsicOp::MOP_ScalarSubtract: scalarOp = DXIL::WaveMatrixScalarOpCode::Subtract; break; + case IntrinsicOp::MOP_ScalarMultiply: scalarOp = DXIL::WaveMatrixScalarOpCode::Multiply; break; + case IntrinsicOp::MOP_ScalarDivide: scalarOp = DXIL::WaveMatrixScalarOpCode::Divide; break; + default: + DXASSERT(false, "Missing case for WaveMatrix scalar operation"); + } + + IRBuilder<> Builder(CI); + Function *dxilFunc = hlslOP->GetOpFunc(opcode, val->getType()); + Constant *opArg = hlslOP->GetU32Const((unsigned)opcode); + Constant *scalarOpArg = hlslOP->GetU8Const((unsigned)scalarOp); + return Builder.CreateCall(dxilFunc, { opArg, thisWaveMatPtr, scalarOpArg, val }); +} + +Value *TranslateWaveMatrix_Accumulate(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode, + HLOperationLowerHelper &helper, + HLObjectOperationLowerHelper *pObjHelper, + bool &Translated) { + hlsl::OP *hlslOP = &helper.hlslOP; + + Value *thisWaveMatPtr = CI->getArgOperand(HLOperandIndex::kWaveMatThisOpIdx); + Value *otherWaveMatPtr1 = CI->getArgOperand(HLOperandIndex::kWaveMatOther1OpIdx); + + IRBuilder<> Builder(CI); + Function *dxilFunc = hlslOP->GetOpFunc(opcode, helper.voidTy); + Constant *opArg = hlslOP->GetU32Const((unsigned)opcode); + return Builder.CreateCall(dxilFunc, { opArg, thisWaveMatPtr, otherWaveMatPtr1 }); +} + +Value *TranslateWaveMatrixMultiply(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode, + HLOperationLowerHelper &helper, + HLObjectOperationLowerHelper *pObjHelper, + bool &Translated) { + hlsl::OP *hlslOP = &helper.hlslOP; + + Value *thisWaveMatPtr = CI->getArgOperand(HLOperandIndex::kWaveMatThisOpIdx); + Value *otherWaveMatPtr1 = CI->getArgOperand(HLOperandIndex::kWaveMatOther1OpIdx); + Value *otherWaveMatPtr2 = CI->getArgOperand(HLOperandIndex::kWaveMatOther2OpIdx); + + IRBuilder<> Builder(CI); + Function *dxilFunc = hlslOP->GetOpFunc(opcode, helper.voidTy); + Constant *opArg = hlslOP->GetU32Const((unsigned)opcode); + return Builder.CreateCall(dxilFunc, { opArg, thisWaveMatPtr, otherWaveMatPtr1, otherWaveMatPtr2 }); +} + +Value *TranslateWaveMatLoadStore(CallInst *CI, IntrinsicOp IOP, + OP::OpCode opcode, + HLOperationLowerHelper &helper, + HLObjectOperationLowerHelper *pObjHelper, + bool &Translated) { + hlsl::OP *hlslOP = &helper.hlslOP; + + // buf is raw buffer handle or groupshared ptr: + Value *buf = CI->getArgOperand(HLOperandIndex::kWaveMatLoadStoreBufOpIdx); + Type *bufETy = buf->getType(); + bool bRawBuf = bufETy == hlslOP->GetHandleType(); + if (!bRawBuf) { + Constant *C = dyn_cast(buf); + if (auto *CE = dyn_cast(C)) + C = CE->getOperand(0)->stripPointerCasts(); + DXASSERT(C && C->getType()->getPointerAddressSpace() == DXIL::kTGSMAddrSpace, + "otherwise, non-groupshared type passed to groupshared Load/Store"); + bufETy = dxilutil::StripArrayTypes(C->getType()->getPointerElementType()); + buf = ConstantExpr::getPointerBitCastOrAddrSpaceCast(C, bufETy->getPointerTo(DXIL::kTGSMAddrSpace)); + } + + // Determine if fragment (LeftColAcc/RightRowAcc) + const auto &props = helper.GetWaveMatInfo(CI->getArgOperand(HLOperandIndex::kWaveMatThisOpIdx)); + DXIL::WaveMatrixKind waveMatKind = props.first.kind; + bool bFragment = waveMatKind == DXIL::WaveMatrixKind::LeftColAcc || + waveMatKind == DXIL::WaveMatrixKind::RightRowAcc; + + if (IOP == IntrinsicOp::MOP_Load) { + opcode = bRawBuf ? OP::OpCode::WaveMatrix_LoadRawBuf + : OP::OpCode::WaveMatrix_LoadGroupShared; + } else if (IOP == IntrinsicOp::MOP_Store) { + opcode = bRawBuf ? OP::OpCode::WaveMatrix_StoreRawBuf + : OP::OpCode::WaveMatrix_StoreGroupShared; + } else { + DXASSERT(0, "otherwise, unexpected IntrinsicOp"); + } + + Function *dxilFunc = hlslOP->GetOpFunc(opcode, bRawBuf ? helper.voidTy : bufETy); + + IRBuilder<> Builder(CI); + SmallVector args; + args.push_back(hlslOP->GetU32Const((unsigned)opcode)); + args.push_back(CI->getArgOperand(HLOperandIndex::kWaveMatThisOpIdx)); + args.push_back(buf); + args.push_back(CI->getArgOperand(HLOperandIndex::kWaveMatLoadStoreStartOpIdx)); + + // For fragment, stride is element stride with same argument mapping. + args.push_back(CI->getArgOperand(HLOperandIndex::kWaveMatLoadStoreStrideOpIdx)); + + // if handle, push align arg + if (bRawBuf) { + Value *align = ConstantInt::get(helper.i8Ty, (uint64_t)0); + const unsigned AlignOpIdx = + bFragment ? HLOperandIndex::kWaveMatFragLoadStoreAlignmentOpIdx + : HLOperandIndex::kWaveMatLoadStoreAlignmentOpIdx; + if (CI->getNumArgOperands() > AlignOpIdx) { + align = CI->getArgOperand(AlignOpIdx); + align = Builder.CreateTrunc(align, helper.i8Ty); + } + args.push_back(align); + } + + // No orientation for matrix fragments, just use i1 0 for unused arg. + args.push_back( + bFragment + ? ConstantInt::get(helper.i1Ty, (uint64_t)0) + : CI->getArgOperand(HLOperandIndex::kWaveMatLoadStoreColMajorOpIdx)); + + return Builder.CreateCall(dxilFunc, args); +} + } // namespace // Resource Handle. @@ -5579,6 +5997,7 @@ IntrinsicLower gLowerTable[] = { {IntrinsicOp::IOP_AllMemoryBarrier, TrivialBarrier, DXIL::OpCode::Barrier}, {IntrinsicOp::IOP_AllMemoryBarrierWithGroupSync, TrivialBarrier, DXIL::OpCode::Barrier}, {IntrinsicOp::IOP_AllocateRayQuery, TranslateAllocateRayQuery, DXIL::OpCode::AllocateRayQuery}, + {IntrinsicOp::IOP_Barrier, TranslateBarrier, DXIL::OpCode::NumOpCodes}, {IntrinsicOp::IOP_CallShader, TranslateCallShader, DXIL::OpCode::CallShader}, {IntrinsicOp::IOP_CheckAccessFullyMapped, TranslateCheckAccess, DXIL::OpCode::CheckAccessFullyMapped}, {IntrinsicOp::IOP_CreateResourceFromHeap, TranslateGetHandleFromHeap, DXIL::OpCode::CreateHandleFromHeap}, @@ -5591,14 +6010,17 @@ IntrinsicLower gLowerTable[] = { {IntrinsicOp::IOP_EvaluateAttributeAtSample, TranslateEvalSample, DXIL::OpCode::NumOpCodes}, {IntrinsicOp::IOP_EvaluateAttributeCentroid, TranslateEvalCentroid, DXIL::OpCode::EvalCentroid}, {IntrinsicOp::IOP_EvaluateAttributeSnapped, TranslateEvalSnapped, DXIL::OpCode::NumOpCodes}, + {IntrinsicOp::IOP_ExtractRecordStructFromArray, EmptyLower, DXIL::OpCode::NumOpCodes}, {IntrinsicOp::IOP_GeometryIndex, TrivialNoArgWithRetOperation, DXIL::OpCode::GeometryIndex}, {IntrinsicOp::IOP_GetAttributeAtVertex, TranslateGetAttributeAtVertex, DXIL::OpCode::AttributeAtVertex}, + {IntrinsicOp::IOP_GetRemainingRecursionLevels, TrivialNoArgOperation, DXIL::OpCode::GetRemainingRecursionLevels}, {IntrinsicOp::IOP_GetRenderTargetSampleCount, TrivialNoArgOperation, DXIL::OpCode::RenderTargetGetSampleCount}, {IntrinsicOp::IOP_GetRenderTargetSamplePosition, TranslateGetRTSamplePos, DXIL::OpCode::NumOpCodes}, {IntrinsicOp::IOP_GroupMemoryBarrier, TrivialBarrier, DXIL::OpCode::Barrier}, {IntrinsicOp::IOP_GroupMemoryBarrierWithGroupSync, TrivialBarrier, DXIL::OpCode::Barrier}, {IntrinsicOp::IOP_HitKind, TrivialNoArgWithRetOperation, DXIL::OpCode::HitKind}, {IntrinsicOp::IOP_IgnoreHit, TranslateNoArgNoReturnPreserveOutput, DXIL::OpCode::IgnoreHit}, + {IntrinsicOp::IOP_ImplicitRecordToStructCast, EmptyLower, DXIL::OpCode::NumOpCodes}, {IntrinsicOp::IOP_InstanceID, TrivialNoArgWithRetOperation, DXIL::OpCode::InstanceID}, {IntrinsicOp::IOP_InstanceIndex, TrivialNoArgWithRetOperation, DXIL::OpCode::InstanceIndex}, {IntrinsicOp::IOP_InterlockedAdd, TranslateIopAtomicBinaryOperation, DXIL::OpCode::NumOpCodes}, @@ -5901,6 +6323,24 @@ IntrinsicLower gLowerTable[] = { {IntrinsicOp::MOP_TraceRayInline, TranslateTraceRayInline, DXIL::OpCode::RayQuery_TraceRayInline}, {IntrinsicOp::MOP_WorldRayDirection, TranslateRayQueryFloat3Getter, DXIL::OpCode::RayQuery_WorldRayDirection}, {IntrinsicOp::MOP_WorldRayOrigin, TranslateRayQueryFloat3Getter, DXIL::OpCode::RayQuery_WorldRayOrigin}, + {IntrinsicOp::MOP_Fill, TranslateWaveMatrixFill, DXIL::OpCode::WaveMatrix_Fill}, + {IntrinsicOp::MOP_MatrixDepth, TranslateWaveMatrixDepth, DXIL::OpCode::WaveMatrix_Depth}, + {IntrinsicOp::MOP_ScalarAdd, TranslateWaveMatrixScalarOp, DXIL::OpCode::WaveMatrix_ScalarOp}, + {IntrinsicOp::MOP_ScalarDivide, TranslateWaveMatrixScalarOp, DXIL::OpCode::WaveMatrix_ScalarOp}, + {IntrinsicOp::MOP_ScalarMultiply, TranslateWaveMatrixScalarOp, DXIL::OpCode::WaveMatrix_ScalarOp}, + {IntrinsicOp::MOP_ScalarSubtract, TranslateWaveMatrixScalarOp, DXIL::OpCode::WaveMatrix_ScalarOp}, + {IntrinsicOp::MOP_SumAccumulate, TranslateWaveMatrix_Accumulate, DXIL::OpCode::WaveMatrix_SumAccumulate}, + {IntrinsicOp::MOP_Add, TranslateWaveMatrix_Accumulate, DXIL::OpCode::WaveMatrix_Add}, + {IntrinsicOp::MOP_Multiply, TranslateWaveMatrixMultiply, DXIL::OpCode::WaveMatrix_Multiply}, + {IntrinsicOp::MOP_MultiplyAccumulate, TranslateWaveMatrixMultiply, DXIL::OpCode::WaveMatrix_MultiplyAccumulate}, + {IntrinsicOp::MOP_Count, TranslateNodeGetInputRecordCount, DXIL::OpCode::GetInputRecordCount}, + {IntrinsicOp::MOP_FinishedCrossGroupSharing, TranslateNodeFinishedCrossGroupSharing, DXIL::OpCode::FinishedCrossGroupSharing}, + {IntrinsicOp::MOP_GetGroupNodeOutputRecords, TranslateGetGroupNodeOutputRecords, DXIL::OpCode::AllocateNodeOutputRecords }, + {IntrinsicOp::MOP_GetThreadNodeOutputRecords, TranslateGetThreadNodeOutputRecords, DXIL::OpCode::AllocateNodeOutputRecords }, + {IntrinsicOp::MOP_IsValid, TranslateNodeOutputIsValid, DXIL::OpCode::NodeOutputIsValid}, + {IntrinsicOp::MOP_GroupIncrementOutputCount, TranslateNodeGroupIncrementOutputCount, DXIL::OpCode::IncrementOutputCount }, + {IntrinsicOp::MOP_ThreadIncrementOutputCount, TranslateNodeThreadIncrementOutputCount, DXIL::OpCode::IncrementOutputCount}, + {IntrinsicOp::MOP_OutputComplete, TranslateNodeOutputComplete, DXIL::OpCode::OutputComplete }, // SPIRV change starts #ifdef ENABLE_SPIRV_CODEGEN @@ -5908,7 +6348,7 @@ IntrinsicLower gLowerTable[] = { #endif // ENABLE_SPIRV_CODEGEN // SPIRV change ends - // Manully added part. + // Manually added part. { IntrinsicOp::IOP_InterlockedUMax, TranslateIopAtomicBinaryOperation, DXIL::OpCode::NumOpCodes }, { IntrinsicOp::IOP_InterlockedUMin, TranslateIopAtomicBinaryOperation, DXIL::OpCode::NumOpCodes }, { IntrinsicOp::IOP_WaveActiveUMax, TranslateWaveA2A, DXIL::OpCode::WaveActiveOp }, @@ -6190,7 +6630,7 @@ void TranslateCBAddressUser(Instruction *user, Value *handle, Value *baseOffset, // CI should be annotate handle. // Need createHandle here. if (GetHLOpcodeGroup(CI->getCalledFunction()) == HLOpcodeGroup::HLAnnotateHandle) - CI = cast(CI->getArgOperand(HLOperandIndex::kAnnotateHandleHandleOpIdx)); + CI = cast(CI->getArgOperand(HLOperandIndex::kHandleOpIdx)); GlobalVariable *CbGV = cast( CI->getArgOperand(HLOperandIndex::kCreateHandleResourceOpIdx)); TranslateResourceInCB(ldInst, pObjHelper, CbGV); @@ -6686,7 +7126,7 @@ void TranslateCBAddressUserLegacy(Instruction *user, Value *handle, // CI should be annotate handle. // Need createHandle here. if (GetHLOpcodeGroup(CI->getCalledFunction()) == HLOpcodeGroup::HLAnnotateHandle) - CI = cast(CI->getArgOperand(HLOperandIndex::kAnnotateHandleHandleOpIdx)); + CI = cast(CI->getArgOperand(HLOperandIndex::kHandleOpIdx)); GlobalVariable *CbGV = cast( CI->getArgOperand(HLOperandIndex::kCreateHandleResourceOpIdx)); @@ -6935,53 +7375,6 @@ void TranslateCBOperationsLegacy(Value *handle, Value *ptr, OP *hlslOP, // Structured buffer. namespace { -// Calculate offset. -Value *GEPIdxToOffset(GetElementPtrInst *GEP, IRBuilder<> &Builder, - hlsl::OP *OP, const DataLayout &DL) { - SmallVector Indices(GEP->idx_begin(), GEP->idx_end()); - Value *addr = nullptr; - // update offset - if (GEP->hasAllConstantIndices()) { - unsigned gepOffset = - DL.getIndexedOffset(GEP->getPointerOperandType(), Indices); - addr = OP->GetU32Const(gepOffset); - } else { - Value *offset = OP->GetU32Const(0); - gep_type_iterator GEPIt = gep_type_begin(GEP), E = gep_type_end(GEP); - for (; GEPIt != E; GEPIt++) { - Value *idx = GEPIt.getOperand(); - unsigned immIdx = 0; - if (llvm::Constant *constIdx = dyn_cast(idx)) { - immIdx = constIdx->getUniqueInteger().getLimitedValue(); - if (immIdx == 0) { - continue; - } - } - if (GEPIt->isPointerTy() || GEPIt->isArrayTy() || GEPIt->isVectorTy()) { - unsigned size = DL.getTypeAllocSize(GEPIt->getSequentialElementType()); - if (immIdx) { - unsigned tempOffset = size * immIdx; - offset = Builder.CreateAdd(offset, OP->GetU32Const(tempOffset)); - } else { - Value *tempOffset = Builder.CreateMul(idx, OP->GetU32Const(size)); - offset = Builder.CreateAdd(offset, tempOffset); - } - } else if (GEPIt->isStructTy()) { - const StructLayout *Layout = DL.getStructLayout(cast(*GEPIt)); - unsigned structOffset = Layout->getElementOffset(immIdx); - offset = Builder.CreateAdd(offset, OP->GetU32Const(structOffset)); - } else { - gep_type_iterator temp = GEPIt; - temp++; - DXASSERT(temp == E, "scalar type must be the last"); - } - }; - addr = offset; - } - // TODO: x4 for byte address - return addr; -} - // Load a value from a typedef buffer with an offset. // Typed buffer do not directly support reading at offsets // because the whole value (e.g. float4) must be read at once. @@ -7607,7 +8000,7 @@ void TranslateStructBufSubscriptUser( GetElementPtrInst *GEP = cast(user); Type *Ty = GEP->getType()->getPointerElementType(); - Value *offset = GEPIdxToOffset(GEP, Builder, OP, DL); + Value *offset = dxilutil::GEPIdxToOffset(GEP, Builder, OP, DL); DXASSERT_LOCALVAR(Ty, offset->getType() == Type::getInt32Ty(Ty->getContext()), "else bitness is wrong"); offset = Builder.CreateAdd(offset, baseOffset); @@ -7843,28 +8236,28 @@ void TranslateDefaultSubscript(CallInst *CI, HLOperationLowerHelper &helper, HL ResLoadHelper helper(CI, RK, RC, handle, IntrinsicOp::IOP_InterlockedUMin); AtomicHelper atomHelper(userCall, DXIL::OpCode::AtomicBinOp, handle, helper.addr, /*offset*/ nullptr); - TranslateAtomicBinaryOperation( - atomHelper, DXIL::AtomicBinOpCode::UMin, Builder, hlslOP); +TranslateAtomicBinaryOperation( + atomHelper, DXIL::AtomicBinOpCode::UMin, Builder, hlslOP); } break; case IntrinsicOp::IOP_InterlockedOr: { ResLoadHelper helper(CI, RK, RC, handle, IntrinsicOp::IOP_InterlockedOr); AtomicHelper atomHelper(userCall, DXIL::OpCode::AtomicBinOp, handle, - helper.addr, /*offset*/ nullptr); + helper.addr, /*offset*/ nullptr); TranslateAtomicBinaryOperation(atomHelper, DXIL::AtomicBinOpCode::Or, - Builder, hlslOP); + Builder, hlslOP); } break; case IntrinsicOp::IOP_InterlockedXor: { ResLoadHelper helper(CI, RK, RC, handle, IntrinsicOp::IOP_InterlockedXor); AtomicHelper atomHelper(userCall, DXIL::OpCode::AtomicBinOp, handle, - helper.addr, /*offset*/ nullptr); + helper.addr, /*offset*/ nullptr); TranslateAtomicBinaryOperation(atomHelper, DXIL::AtomicBinOpCode::Xor, - Builder, hlslOP); + Builder, hlslOP); } break; case IntrinsicOp::IOP_InterlockedCompareStore: case IntrinsicOp::IOP_InterlockedCompareExchange: { ResLoadHelper helper(CI, RK, RC, handle, IntrinsicOp::IOP_InterlockedCompareExchange); AtomicHelper atomHelper(userCall, DXIL::OpCode::AtomicCompareExchange, - handle, helper.addr, /*offset*/ nullptr); + handle, helper.addr, /*offset*/ nullptr); TranslateAtomicCmpXChg(atomHelper, Builder, hlslOP); } break; case IntrinsicOp::IOP_InterlockedCompareStoreFloatBitwise: @@ -7872,7 +8265,7 @@ void TranslateDefaultSubscript(CallInst *CI, HLOperationLowerHelper &helper, HL Type *i32Ty = Type::getInt32Ty(userCall->getContext()); ResLoadHelper helper(CI, RK, RC, handle, IntrinsicOp::IOP_InterlockedCompareExchange); AtomicHelper atomHelper(userCall, DXIL::OpCode::AtomicCompareExchange, - handle, helper.addr, /*offset*/ nullptr, i32Ty); + handle, helper.addr, /*offset*/ nullptr, i32Ty); TranslateAtomicCmpXChg(atomHelper, Builder, hlslOP); } break; default: @@ -7880,15 +8273,16 @@ void TranslateDefaultSubscript(CallInst *CI, HLOperationLowerHelper &helper, HL break; } } else { - DXASSERT(0, "invalid group"); + DXASSERT(0, "invalid group"); } userCall->eraseFromParent(); } } } +} void TranslateHLSubscript(CallInst *CI, HLSubscriptOpcode opcode, - HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) { + HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) { if (CI->user_empty()) { Translated = true; return; @@ -7902,11 +8296,11 @@ void TranslateHLSubscript(CallInst *CI, HLSubscriptOpcode opcode, Value *handle = CI->getArgOperand(HLOperandIndex::kSubscriptObjectOpIdx); if (helper.bLegacyCBufferLoad) TranslateCBOperationsLegacy(handle, CI, hlslOP, helper.dxilTypeSys, - helper.dataLayout, pObjHelper); + helper.dataLayout, pObjHelper); else { TranslateCBOperations(handle, CI, /*offset*/ hlslOP->GetU32Const(0), - hlslOP, helper.dxilTypeSys, - CI->getModule()->getDataLayout(), pObjHelper); + hlslOP, helper.dxilTypeSys, + CI->getModule()->getDataLayout(), pObjHelper); } Translated = true; return; @@ -7916,7 +8310,7 @@ void TranslateHLSubscript(CallInst *CI, HLSubscriptOpcode opcode, DXIL::ResourceKind RK = pObjHelper->GetRK(handle); Value *coord = CI->getArgOperand(HLOperandIndex::kSubscriptIndexOpIdx); Value *mipLevel = - CI->getArgOperand(HLOperandIndex::kDoubleSubscriptMipLevelOpIdx); + CI->getArgOperand(HLOperandIndex::kDoubleSubscriptMipLevelOpIdx); auto U = CI->user_begin(); DXASSERT(CI->hasOneUse(), "subscript should only have one use"); @@ -7929,30 +8323,40 @@ void TranslateHLSubscript(CallInst *CI, HLSubscriptOpcode opcode, StoreInst *stInst = cast(*U); Value *val = stInst->getValueOperand(); TranslateStore(RK, handle, val, - CI->getArgOperand(HLOperandIndex::kStoreOffsetOpIdx), - Builder, hlslOP, mipLevel); + CI->getArgOperand(HLOperandIndex::kStoreOffsetOpIdx), + Builder, hlslOP, mipLevel); stInst->eraseFromParent(); } Translated = true; return; } else { Type *HandleTy = hlslOP->GetHandleType(); + if (ptr->getType() == hlslOP->GetNodeRecordHandleType()) { + DXASSERT(false, "Shouldn't get here, NodeRecord subscripts should have " + "generated ExtractRecordStructFromArray intrinsic"); + return; + } if (ptr->getType() == HandleTy) { // Resource ptr. Value *handle = ptr; - DXIL::ResourceKind RK = pObjHelper->GetRK(handle); + DXIL::ResourceKind RK = DxilResource::Kind::Invalid; + Type *ObjTy = nullptr; + Type *RetTy = nullptr; + RK = pObjHelper->GetRK(handle); if (RK == DxilResource::Kind::Invalid) { Translated = false; return; } + ObjTy = pObjHelper->GetResourceType(handle); + RetTy = ObjTy->getStructElementType(0); Translated = true; - Type *ObjTy = pObjHelper->GetResourceType(handle); - Type *RetTy = ObjTy->getStructElementType(0); + if (DXIL::IsStructuredBuffer(RK)) { TranslateStructBufSubscript(CI, handle, /*status*/ nullptr, hlslOP, RK, helper.dataLayout); - } else if (RetTy->isAggregateType() && - RK == DxilResource::Kind::TypedBuffer) { + } + else if (RetTy->isAggregateType() && RK == DxilResource::Kind::TypedBuffer) { + TranslateStructBufSubscript(CI, handle, /*status*/ nullptr, hlslOP, RK, helper.dataLayout); // Clear offset for typed buf. @@ -8020,8 +8424,6 @@ void TranslateHLSubscript(CallInst *CI, HLSubscriptOpcode opcode, return; } -} - void TranslateSubscriptOperation(Function *F, HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper) { for (auto U = F->user_begin(); U != F->user_end();) { Value *user = *(U++); @@ -8122,14 +8524,19 @@ void TranslateHLBuiltinOperation(Function *F, HLOperationLowerHelper &helper, case HLMatLoadStoreOpcode::RowMatStore: { Value *vecVal = CI->getArgOperand(HLOperandIndex::kMatStoreValOpIdx); Value *matPtr = CI->getArgOperand(HLOperandIndex::kMatStoreDstPtrOpIdx); - Value *castPtr = Builder.CreateBitCast(matPtr, vecVal->getType()->getPointerTo()); + matPtr = SkipAddrSpaceCast(matPtr); + unsigned addrSpace = cast(matPtr->getType())->getAddressSpace(); + + Value *castPtr = Builder.CreateBitCast(matPtr, vecVal->getType()->getPointerTo(addrSpace)); Builder.CreateStore(vecVal, castPtr); CI->eraseFromParent(); } break; case HLMatLoadStoreOpcode::ColMatLoad: case HLMatLoadStoreOpcode::RowMatLoad: { Value *matPtr = CI->getArgOperand(HLOperandIndex::kMatLoadPtrOpIdx); - Value *castPtr = Builder.CreateBitCast(matPtr, CI->getType()->getPointerTo()); + matPtr = SkipAddrSpaceCast(matPtr); + unsigned addrSpace = cast(matPtr->getType())->getAddressSpace(); + Value *castPtr = Builder.CreateBitCast(matPtr, CI->getType()->getPointerTo(addrSpace)); Value *vecVal = Builder.CreateLoad(castPtr); CI->replaceAllUsesWith(vecVal); CI->eraseFromParent(); @@ -8263,4 +8670,57 @@ void TranslateBuiltinOperations( } } +void EmitGetNodeRecordPtrAndUpdateUsers(HLOperationLowerHelper &helper, + CallInst *CI, Value *ArrayIndex) { + IRBuilder<> Builder(CI); + Value *opArg = nullptr; + Value *Handle = CI->getArgOperand(HLOperandIndex::kHandleOpIdx); + opArg = Builder.getInt32((unsigned)DXIL::OpCode::GetNodeRecordPtr); + StructType *origRecordUDT = + cast(cast(CI->getType())->getElementType()); + Type *getNodeRecordPtrRT = origRecordUDT; + // Translate node record type here + auto findIt = helper.loweredTypes.find(origRecordUDT); + if (findIt != helper.loweredTypes.end()) { + getNodeRecordPtrRT = findIt->second; + } else { + getNodeRecordPtrRT = GetLoweredUDT(origRecordUDT, &helper.dxilTypeSys); + if (origRecordUDT != getNodeRecordPtrRT) + helper.loweredTypes[origRecordUDT] = getNodeRecordPtrRT; + } + getNodeRecordPtrRT = + getNodeRecordPtrRT->getPointerTo(DXIL::kNodeRecordAddrSpace); + Function *getNodeRecordPtr = helper.hlslOP.GetOpFunc( + DXIL::OpCode::GetNodeRecordPtr, getNodeRecordPtrRT); + Value *args[] = {opArg, Handle, ArrayIndex}; + Value *NodeRecordPtr = Builder.CreateCall(getNodeRecordPtr, args); + ReplaceUsesForLoweredUDT(CI, NodeRecordPtr); +} + +void LowerRecordAccessToGetNodeRecordPtr(HLModule &HLM) { + Module *M = HLM.GetModule(); + HLOperationLowerHelper helper(HLM); + for (iplist::iterator F : M->getFunctionList()) { + if (F->user_empty()) + continue; + hlsl::HLOpcodeGroup group = hlsl::GetHLOpcodeGroup(F); + if (group == HLOpcodeGroup::HLIntrinsic) { + for (auto U = F->user_begin(); U != F->user_end();) { + Value *User = *(U++); + if (!isa(User)) + continue; + // must be call inst + CallInst *CI = cast(User); + IntrinsicOp opcode = static_cast(hlsl::GetHLOpcode(CI)); + if (opcode != IntrinsicOp::IOP_ExtractRecordStructFromArray) + continue; + Value *Index = CI->getNumArgOperands() > 2 + ? CI->getArgOperand(2) + : ConstantInt::get(helper.i32Ty, 0); + EmitGetNodeRecordPtrAndUpdateUsers(helper, CI, Index); + CI->eraseFromParent(); + } + } + } +} } diff --git a/lib/HLSL/HLOperations.cpp b/lib/HLSL/HLOperations.cpp index 96bc9e0da4..f7f1358cc2 100644 --- a/lib/HLSL/HLOperations.cpp +++ b/lib/HLSL/HLOperations.cpp @@ -41,9 +41,18 @@ static StringRef HLOpcodeGroupNames[]{ "matldst", // HLMatLoadStore, "select", // HLSelect, "createhandle",// HLCreateHandle, - "annotatehandle" // HLAnnotateHandle, + "createnodeoutputhandle", // HLCreateNodeOutputHandle + "indexnodehandle", // HLIndexNodeHandle: + "createnodeinputrecordhandle", //HLCreateNodeInputRecordHandle + "annotatehandle", // HLAnnotateHandle, + "wavematrix_annotate", // HLWaveMatrix_Annotate, + "annotatenodehandle", //HLAnnotateNodeHandle + "annotatenoderecordhandle", //HLAnnotateNodeRecordHandle "numOfHLDXIL", // NumOfHLOps }; +static_assert(_countof(HLOpcodeGroupNames) == + 1 + (size_t)HLOpcodeGroup::NumOfHLOps, + "otherwise, tables out of sync"); static StringRef HLOpcodeGroupFullNames[]{ "notHLDXIL", // NotHL, @@ -57,9 +66,18 @@ static StringRef HLOpcodeGroupFullNames[]{ "dx.hl.matldst", // HLMatLoadStore, "dx.hl.select", // HLSelect, "dx.hl.createhandle", // HLCreateHandle, + "dx.hl.createnodeoutputhandle", // HLCreateNodeHandle + "dx.hl.indexnodehandle", // HLIndexNodeHandle + "dx.hl.createnodeinputrecordhandle", //HLCreateNodeInputRecordHandle "dx.hl.annotatehandle", // HLAnnotateHandle, + "dx.hl.wavematrix_annotate", // HLWaveMatrix_Annotate, + "dx.hl.annotatenodehandle", // HLAnnotateNodeHandle, + "dx.hl.annotatenoderecordhandle", //HLAnnotateNodeRecordHandle "numOfHLDXIL", // NumOfHLOps }; +static_assert(_countof(HLOpcodeGroupFullNames) == + 1 + (size_t)HLOpcodeGroup::NumOfHLOps, + "otherwise, tables out of sync"); static HLOpcodeGroup GetHLOpcodeGroupInternal(StringRef group) { if (!group.empty()) { @@ -71,11 +89,27 @@ static HLOpcodeGroup GetHLOpcodeGroupInternal(StringRef group) { case 'a': // cast return HLOpcodeGroup::HLCast; case 'r': // createhandle - return HLOpcodeGroup::HLCreateHandle; + { + if (group.startswith_lower("createnodeoutputhandle")) + return HLOpcodeGroup::HLCreateNodeOutputHandle; + else if (group.startswith_lower("createnodeinputrecordhandle")) + return HLOpcodeGroup::HLCreateNodeInputRecordHandle; + else { + assert(group.startswith_lower("createhandle")); + return HLOpcodeGroup::HLCreateHandle; + } + } } llvm_unreachable("unrecognized group code"); case 'i': // init - return HLOpcodeGroup::HLInit; + { + if (group.startswith_lower("init")) + return HLOpcodeGroup::HLInit; + else if (group.startswith_lower("indexnodehandle")) + return HLOpcodeGroup::HLIndexNodeHandle; + } + break; + case 'b': // binaryOp return HLOpcodeGroup::HLBinOp; case 'u': // unaryOp @@ -91,7 +125,15 @@ static HLOpcodeGroup GetHLOpcodeGroupInternal(StringRef group) { case 'm': // matldst return HLOpcodeGroup::HLMatLoadStore; case 'a': // annotatehandle - return HLOpcodeGroup::HLAnnotateHandle; + if (group.startswith_lower("annotatehandle")) + return HLOpcodeGroup::HLAnnotateHandle; + else if (group.startswith_lower("annotatenodehandle")) + return HLOpcodeGroup::HLAnnotateNodeHandle; + else if (group.startswith_lower("annotatenoderecordhandle")) + return HLOpcodeGroup::HLAnnotateNodeRecordHandle; + break; + case 'w': // wavematrix_annotate + return HLOpcodeGroup::HLWaveMatrix_Annotate; } } return HLOpcodeGroup::NotHL; @@ -142,7 +184,13 @@ StringRef GetHLOpcodeGroupName(HLOpcodeGroup op) { case HLOpcodeGroup::HLMatLoadStore: case HLOpcodeGroup::HLSelect: case HLOpcodeGroup::HLCreateHandle: + case HLOpcodeGroup::HLCreateNodeOutputHandle: + case HLOpcodeGroup::HLIndexNodeHandle: + case HLOpcodeGroup::HLCreateNodeInputRecordHandle: case HLOpcodeGroup::HLAnnotateHandle: + case HLOpcodeGroup::HLWaveMatrix_Annotate: + case HLOpcodeGroup::HLAnnotateNodeHandle: + case HLOpcodeGroup::HLAnnotateNodeRecordHandle: return HLOpcodeGroupNames[static_cast(op)]; default: llvm_unreachable("invalid op"); @@ -161,7 +209,13 @@ StringRef GetHLOpcodeGroupFullName(HLOpcodeGroup op) { case HLOpcodeGroup::HLMatLoadStore: case HLOpcodeGroup::HLSelect: case HLOpcodeGroup::HLCreateHandle: + case HLOpcodeGroup::HLCreateNodeOutputHandle: + case HLOpcodeGroup::HLIndexNodeHandle: + case HLOpcodeGroup::HLCreateNodeInputRecordHandle: case HLOpcodeGroup::HLAnnotateHandle: + case HLOpcodeGroup::HLWaveMatrix_Annotate: + case HLOpcodeGroup::HLAnnotateNodeHandle: + case HLOpcodeGroup::HLAnnotateNodeRecordHandle: return HLOpcodeGroupFullNames[static_cast(op)]; default: llvm_unreachable("invalid op"); @@ -450,6 +504,10 @@ static void SetHLFunctionAttribute(Function *F, HLOpcodeGroup group, case HLOpcodeGroup::HLAnnotateHandle: { F->addFnAttr(Attribute::ReadNone); } break; + case HLOpcodeGroup::HLWaveMatrix_Annotate: { + F->addFnAttr(Attribute::ArgMemOnly); + F->addFnAttr(Attribute::NoUnwind); + } break; case HLOpcodeGroup::HLIntrinsic: { IntrinsicOp intrinsicOp = static_cast(opcode); switch (intrinsicOp) { diff --git a/lib/HLSL/HLSignatureLower.cpp b/lib/HLSL/HLSignatureLower.cpp index 110ded70be..858993cef3 100644 --- a/lib/HLSL/HLSignatureLower.cpp +++ b/lib/HLSL/HLSignatureLower.cpp @@ -1209,11 +1209,12 @@ void HLSignatureLower::GenerateDxilInputsOutputs(DXIL::SignatureKind SK) { } } -void HLSignatureLower::GenerateDxilCSInputs() { +void HLSignatureLower::GenerateDxilComputeAndNodeCommonInputs() { OP *hlslOP = HLM.GetOP(); DxilFunctionAnnotation *funcAnnotation = HLM.GetFunctionAnnotation(Entry); DXASSERT(funcAnnotation, "must find annotation for entry function"); + auto &funcProps = HLM.GetDxilFunctionProps(Entry); IRBuilder<> Builder(Entry->getEntryBlock().getFirstInsertionPt()); for (Argument &arg : Entry->args()) { @@ -1221,7 +1222,10 @@ void HLSignatureLower::GenerateDxilCSInputs() { funcAnnotation->GetParameterAnnotation(arg.getArgNo()); llvm::StringRef semanticStr = paramAnnotation.GetSemanticString(); + if (semanticStr.empty()) { + if (funcProps.IsNode() && paramAnnotation.IsParamInputQualNode()) + continue; dxilutil::EmitErrorOnFunction(HLM.GetModule()->getContext(), Entry, "Semantic must be defined for all " "parameters of an entry function or patch " "constant function."); @@ -1736,10 +1740,10 @@ void HLSignatureLower::Run() { if (props.IsMS()) { GenerateDxilPrimOutputs(); } - } else if (props.IsCS()) { - GenerateDxilCSInputs(); - } - + } else if (props.IsCS() || props.IsNode()) { + GenerateDxilComputeAndNodeCommonInputs(); + } + if (props.IsDS() || props.IsHS()) GenerateDxilPatchConstantLdSt(); if (props.IsHS()) diff --git a/lib/HLSL/HLSignatureLower.h b/lib/HLSL/HLSignatureLower.h index e1ef9fb463..e07ebedadf 100644 --- a/lib/HLSL/HLSignatureLower.h +++ b/lib/HLSL/HLSignatureLower.h @@ -52,7 +52,7 @@ class HLSignatureLower { void GenerateDxilOutputs(); void GenerateDxilPrimOutputs(); void GenerateDxilInputsOutputs(DXIL::SignatureKind SK); - void GenerateDxilCSInputs(); + void GenerateDxilComputeAndNodeCommonInputs(); void GenerateDxilPatchConstantLdSt(); void GenerateDxilPatchConstantFunctionInputs(); void GenerateClipPlanesForVS(llvm::Value *outPosition); diff --git a/lib/Transforms/IPO/PassManagerBuilder.cpp b/lib/Transforms/IPO/PassManagerBuilder.cpp index a863ca20af..4b31542df6 100644 --- a/lib/Transforms/IPO/PassManagerBuilder.cpp +++ b/lib/Transforms/IPO/PassManagerBuilder.cpp @@ -289,6 +289,9 @@ void PassManagerBuilder::addHLSLPasses(legacy::PassManagerBase &MPM) { // Verify no undef resource again after promotion MPM.add(createInvalidateUndefResourcesPass()); + // Translate HL WaveMatrix ptrs to final dxil type + MPM.add(createLowerWaveMatTypePass()); + MPM.add(createDxilGenerationPass(NoOpt, this->HLSLExtensionsCodeGen)); // Propagate precise attribute. diff --git a/lib/Transforms/Scalar/LowerTypePasses.cpp b/lib/Transforms/Scalar/LowerTypePasses.cpp index d4d1782617..cee4f881ee 100644 --- a/lib/Transforms/Scalar/LowerTypePasses.cpp +++ b/lib/Transforms/Scalar/LowerTypePasses.cpp @@ -889,3 +889,142 @@ INITIALIZE_PASS(ResourceToHandle, "resource-handle", ModulePass *llvm::createResourceToHandlePass() { return new ResourceToHandle(); } + + +//===----------------------------------------------------------------------===// +// Lower WaveMatrix types to single dxil type. +//===----------------------------------------------------------------------===// + +namespace { + +class LowerWaveMatType : public LowerTypePass { +public: + explicit LowerWaveMatType() : LowerTypePass(ID) {} + static char ID; // Pass identification, replacement for typeid +protected: + bool needToLower(Value *V) override; + void lowerUseWithNewValue(Value *V, Value *NewV) override; + Type *lowerType(Type *Ty) override; + Constant *lowerInitVal(Constant *InitVal, Type *NewTy) override; + StringRef getGlobalPrefix() override { return ".res"; } + void initialize(Module &M) override; +private: + void lowerUserWithNewValue(User *U, Value *V, Value *NewV); + + Type *m_WaveMatTy = nullptr; + HLModule *m_pHLM = nullptr; +}; + +void LowerWaveMatType::initialize(Module &M) { + DXASSERT(M.HasHLModule(), "require HLModule"); + m_pHLM = &M.GetHLModule(); + m_WaveMatTy = m_pHLM->GetOP()->GetWaveMatPtrType()->getPointerElementType(); +} + +bool LowerWaveMatType::needToLower(Value *V) { + return dxilutil::IsHLSLWaveMatrixType(dxilutil::GetArrayEltTy(V->getType())); +} + +Type *LowerWaveMatType::lowerType(Type *Ty) { + if (Ty->isPointerTy()) { + return PointerType::get(lowerType(Ty->getPointerElementType()), Ty->getPointerAddressSpace()); + } else if (Ty->isArrayTy()) { + llvm::SmallVector OuterToInnerLengths; + Ty = dxilutil::StripArrayTypes(Ty, &OuterToInnerLengths); + DXASSERT(dxilutil::IsHLSLWaveMatrixType(Ty), + "otherwise, unexpected wave matrix type to lower"); + return dxilutil::WrapInArrayTypes(m_WaveMatTy, OuterToInnerLengths); + } else if (dxilutil::IsHLSLWaveMatrixType(Ty)) { + return m_WaveMatTy; + } + DXASSERT(0, "otherwise, unexpected wave matrix type to lower"); + return Ty; +} + +Constant *LowerWaveMatType::lowerInitVal(Constant *InitVal, Type *NewTy) { + DXASSERT(isa(InitVal), "wave matrix cannot have real init val"); + return UndefValue::get(NewTy); +} + +// Rewrite call, replacing argument with new type +static CallInst *RewriteIntrinsicCallForNewArg(CallInst *CI, Value *OldV, Value *NewV, Type *NewRet = nullptr) { + Function *F = CI->getCalledFunction(); + HLOpcodeGroup group = GetHLOpcodeGroupByName(F); + unsigned opcode = GetHLOpcode(CI); + SmallVector newArgTypes(CI->getFunctionType()->param_begin(), + CI->getFunctionType()->param_end()); + SmallVector newArgs(CI->arg_operands()); + + for (unsigned i = 1; i < newArgs.size(); i++) { + if (newArgs[i] == OldV) { + newArgTypes[i] = NewV->getType(); + newArgs[i] = NewV; + } + } + + if (NewRet == nullptr) + NewRet = CI->getType(); + + FunctionType *newFuncTy = FunctionType::get(NewRet, newArgTypes, false); + Function *newF = GetOrCreateHLFunction(*F->getParent(), newFuncTy, group, opcode, + F->getAttributes().getFnAttributes()); + IRBuilder<> Builder(CI); + return Builder.CreateCall(newF, newArgs); +} + +void LowerWaveMatType::lowerUserWithNewValue(User *U, Value *V, Value *NewV) { + if (CallInst *CI = dyn_cast(U)) { + HLOpcodeGroup group = GetHLOpcodeGroupByName(CI->getCalledFunction()); + if (group == HLOpcodeGroup::HLWaveMatrix_Annotate || + group == HLOpcodeGroup::HLIntrinsic) { + Type *NewRet = needToLower(CI) ? lowerType(CI->getType()) : nullptr; + Value *NewU = RewriteIntrinsicCallForNewArg(CI, V, NewV, NewRet); + if (!U->user_empty()) { + if (NewRet) + lowerUseWithNewValue(U, NewU); + else + U->replaceAllUsesWith(NewU); + } + return; + } + } else if (BitCastInst *BI = dyn_cast(U)) { + BI->setOperand(0, NewV); + return; + } + + DXASSERT(0, "invalid operation on WaveMatrix pointer"); +} + +void LowerWaveMatType::lowerUseWithNewValue(Value *V, Value *NewV) { + SmallVector deadInsts; + for (auto it = V->user_begin(); it != V->user_end();) { + User *U = *it; + // Prevent double User iteration when multiple Uses in same User + while (it != V->user_end() && *it == U) + ++it; + if (GEPOperator *GEP = dyn_cast(U)) { + if (!GEP->user_empty()) + lowerUseWithNewValue(U, dxilutil::MirrorGEP(GEP, NewV)); + } else { + lowerUserWithNewValue(U, V, NewV); + } + if (Instruction *I = dyn_cast(U)) + if (I->user_empty()) + deadInsts.push_back(I); + } + for (auto I : deadInsts) + I->eraseFromParent(); +} + +} + +char LowerWaveMatType::ID = 0; + +INITIALIZE_PASS(LowerWaveMatType, "hlsl-lower-wavematrix-type", + "Lower WaveMatrix types to dxil type", false, + false) + +// Public interface to the LowerWaveMatType pass +ModulePass *llvm::createLowerWaveMatTypePass() { + return new LowerWaveMatType(); +} diff --git a/lib/Transforms/Scalar/ScalarReplAggregatesHLSL.cpp b/lib/Transforms/Scalar/ScalarReplAggregatesHLSL.cpp index b3589884aa..42f5fd8923 100644 --- a/lib/Transforms/Scalar/ScalarReplAggregatesHLSL.cpp +++ b/lib/Transforms/Scalar/ScalarReplAggregatesHLSL.cpp @@ -5750,7 +5750,9 @@ static void LegalizeDxilInputOutputs(Function *F, case DxilParamInputQual::InPayload: case DxilParamInputQual::InputPrimitive: case DxilParamInputQual::InputPatch: - case DxilParamInputQual::OutputPatch: { + case DxilParamInputQual::OutputPatch: + case DxilParamInputQual::NodeIO: + { bStoreInputToTemp = true; } break; case DxilParamInputQual::Inout: diff --git a/tools/clang/include/clang/AST/BuiltinTypes.def b/tools/clang/include/clang/AST/BuiltinTypes.def index 83e9b942cf..0887f9a319 100644 --- a/tools/clang/include/clang/AST/BuiltinTypes.def +++ b/tools/clang/include/clang/AST/BuiltinTypes.def @@ -90,18 +90,18 @@ UNSIGNED_TYPE(UInt, UnsignedIntTy) // 'unsigned long' UNSIGNED_TYPE(ULong, UnsignedLongTy) +// HLSL Change - 'int8_t4_packed' +UNSIGNED_TYPE(Int8_4Packed, Int8_4PackedTy) + +// HLSL Change - 'uint8_t4_packed' +UNSIGNED_TYPE(UInt8_4Packed, UInt8_4PackedTy) + // 'unsigned long long' UNSIGNED_TYPE(ULongLong, UnsignedLongLongTy) // '__uint128_t' UNSIGNED_TYPE(UInt128, UnsignedInt128Ty) -// 'int8_t4_packed' -UNSIGNED_TYPE(Int8_4Packed, Int8_4PackedTy) - -// 'uint8_t4_packed' -UNSIGNED_TYPE(UInt8_4Packed, UInt8_4PackedTy) - //===- Signed Types -------------------------------------------------------===// // 'char' for targets where it's signed diff --git a/tools/clang/include/clang/AST/HlslTypes.h b/tools/clang/include/clang/AST/HlslTypes.h index d11e6d8d17..d3456ff806 100644 --- a/tools/clang/include/clang/AST/HlslTypes.h +++ b/tools/clang/include/clang/AST/HlslTypes.h @@ -20,6 +20,7 @@ #include "clang/Basic/SourceLocation.h" #include "clang/Basic/Specifiers.h" #include "dxc/DXIL/DxilConstants.h" +#include "dxc/DXIL/DxilNodeProps.h" #include "dxc/WinAdapter.h" #include "llvm/Support/Casting.h" #include "llvm/ADT/ArrayRef.h" @@ -313,11 +314,17 @@ void AddHLSLVectorTemplate( clang::ASTContext& context, _Outptr_ clang::ClassTemplateDecl** vectorTemplateDecl); +void AddHLSLNodeOutputRecordTemplate( + clang::ASTContext &context, llvm::StringRef templateName, + _Outptr_ clang::ClassTemplateDecl **outputRecordTemplateDecl, + bool isCompleteType = true); + clang::CXXRecordDecl* DeclareRecordTypeWithHandle( - clang::ASTContext& context, llvm::StringRef name); + clang::ASTContext& context, llvm::StringRef name, bool isCompleteType = true); void AddRaytracingConstants(clang::ASTContext& context); void AddSamplerFeedbackConstants(clang::ASTContext& context); +void AddBarrierConstants(clang::ASTContext& context); /// Adds the implementation for std::is_equal. void AddStdIsEqualImplementation(clang::ASTContext& context, clang::Sema& sema); @@ -331,8 +338,8 @@ void AddStdIsEqualImplementation(clang::ASTContext& context, clang::Sema& sema); clang::CXXRecordDecl* DeclareTemplateTypeWithHandle( clang::ASTContext& context, llvm::StringRef name, - uint8_t templateArgCount, - _In_opt_ clang::TypeSourceInfo* defaultTypeArgValue); + uint8_t templateArgCount = 1, + _In_opt_ clang::TypeSourceInfo* defaultTypeArgValue = nullptr); clang::CXXRecordDecl* DeclareTemplateTypeWithHandleInDeclContext( clang::ASTContext& context, @@ -342,14 +349,28 @@ clang::CXXRecordDecl* DeclareTemplateTypeWithHandleInDeclContext( _In_opt_ clang::TypeSourceInfo* defaultTypeArgValue); clang::CXXRecordDecl* DeclareUIntTemplatedTypeWithHandle( - clang::ASTContext& context, llvm::StringRef typeName, llvm::StringRef templateParamName); + clang::ASTContext& context, llvm::StringRef typeName, llvm::StringRef templateParamName, + clang::TagTypeKind tagKind = clang::TagTypeKind::TTK_Class); clang::CXXRecordDecl *DeclareUIntTemplatedTypeWithHandleInDeclContext( clang::ASTContext &context, clang::DeclContext *declContext, - llvm::StringRef typeName, llvm::StringRef templateParamName); + llvm::StringRef typeName, llvm::StringRef templateParamName, + clang::TagTypeKind tagKind = clang::TagTypeKind::TTK_Class); clang::CXXRecordDecl *DeclareConstantBufferViewType(clang::ASTContext& context, bool bTBuf); clang::CXXRecordDecl* DeclareRayQueryType(clang::ASTContext& context); +clang::CXXRecordDecl *DeclareWaveMatrixType(clang::ASTContext& context, DXIL::WaveMatrixKind kind); clang::CXXRecordDecl *DeclareResourceType(clang::ASTContext &context, bool bSampler); + +clang::CXXRecordDecl* DeclareNodeOrRecordType(clang::ASTContext& Ctx, llvm::StringRef TypeName, + bool IsRecordTypeTemplate = false, bool IsConst = false, bool HasGetMethods = false, + bool IsArray = false, bool IsCompleteType = false); + +clang::CXXRecordDecl* DeclareNodeOutputArray(clang::ASTContext& Ctx, llvm::StringRef TypeName, + clang::CXXRecordDecl* OutputType, bool IsRecordTypeTemplate, bool IsCompleteType); + +clang::CXXRecordDecl* DeclareRecordTypeWithHandleAndNoMemberFunctions( + clang::ASTContext& context, llvm::StringRef name); + clang::VarDecl *DeclareBuiltinGlobal(llvm::StringRef name, clang::QualType Ty, clang::ASTContext &context); @@ -399,8 +420,15 @@ bool IsHLSLLineStreamType(clang::QualType type); bool IsHLSLTriangleStreamType(clang::QualType type); bool IsHLSLStreamOutputType(clang::QualType type); bool IsHLSLResourceType(clang::QualType type); +bool IsHLSLNodeInputType(clang::QualType type); bool IsHLSLDynamicResourceType(clang::QualType type); -bool IsHLSLBufferViewType(clang::QualType type); +bool IsHLSLNodeType(clang::QualType type); +bool IsHLSLObjectWithImplicitMemberAccess(clang::QualType type); +bool IsHLSLObjectWithImplicitROMemberAccess(clang::QualType type); +bool IsHLSLRWNodeInputRecordType(clang::QualType type); +bool IsHLSLRONodeInputRecordType(clang::QualType type); +bool IsHLSLNodeOutputType(clang::QualType type); + bool IsHLSLStructuredBufferType(clang::QualType type); bool IsHLSLNumericOrAggregateOfNumericType(clang::QualType type); bool IsHLSLNumericUserDefinedType(clang::QualType type); @@ -420,6 +448,7 @@ bool IsHLSLSubobjectType(clang::QualType type); bool GetHLSLSubobjectKind(clang::QualType type, DXIL::SubobjectKind &subobjectKind, DXIL::HitGroupType &ghType); bool IsHLSLRayQueryType(clang::QualType type); +bool GetHLSLNodeIORecordType(const clang::ParmVarDecl *parmDecl, NodeFlags &nodeKind); bool IsArrayConstantStringType(const clang::QualType type); bool IsPointerStringType(const clang::QualType type); @@ -441,6 +470,7 @@ bool GetIntrinsicOp(const clang::FunctionDecl *FD, unsigned &opcode, llvm::StringRef &group); bool GetIntrinsicLowering(const clang::FunctionDecl *FD, llvm::StringRef &S); +llvm::StringRef GetWaveMatrixName(DXIL::WaveMatrixKind kind); bool IsUserDefinedRecordType(clang::QualType type); bool DoesTypeDefineOverloadedOperator(clang::QualType typeWithOperator, clang::OverloadedOperatorKind opc, diff --git a/tools/clang/include/clang/Basic/Attr.td b/tools/clang/include/clang/Basic/Attr.td index d5d8cace9b..37b1faa06c 100644 --- a/tools/clang/include/clang/Basic/Attr.td +++ b/tools/clang/include/clang/Basic/Attr.td @@ -853,7 +853,7 @@ def HLSLGloballyCoherent : InheritableAttr { def HLSLShader : InheritableAttr { let Spellings = [CXX11<"", "shader", 2017>]; - let Args = [StringArgument<"stage">]; // one of compute, pixel, vertex, hull, domain, geomery + let Args = [StringArgument<"stage">]; // one of compute, pixel, vertex, hull, domain, geometry, node let Documentation = [Undocumented]; } @@ -931,6 +931,87 @@ def HLSLCXXOverload : InheritableAttr { let Documentation = [Undocumented]; } +def HLSLNodeLaunch : InheritableAttr { + let Spellings = [CXX11<"", "nodelaunch", 2017>]; + let Args = [StringArgument<"LaunchType">]; // one of broadcasting, coalescing, thread + let Documentation = [Undocumented]; +} + +def HLSLNodeIsProgramEntry : InheritableAttr { + let Spellings = [CXX11<"", "nodeisprogramentry", 2017>]; + let Documentation = [Undocumented]; +} + +def HLSLNodeId : InheritableAttr { + let Spellings = [CXX11<"", "nodeid", 2017>]; + let Args = [StringArgument<"Name">,DefaultIntArgument<"ArrayIndex", 0>]; + let Documentation = [Undocumented]; +} + +def HLSLNodeLocalRootArgumentsTableIndex : InheritableAttr { + let Spellings = [CXX11<"", "nodelocalrootargumentstableindex", 2017>]; + let Args = [UnsignedArgument<"Index">]; + let Documentation = [Undocumented]; +} + +def HLSLNodeShareInputOf : InheritableAttr { + let Spellings = [CXX11<"", "nodeshareinputof", 2017>]; + let Args = [StringArgument<"Name">,UnsignedArgument<"ArrayIndex", 1>]; + let Documentation = [Undocumented]; +} + +def HLSLNodeDispatchGrid: InheritableAttr { + let Spellings = [CXX11<"", "nodedispatchgrid", 2015>]; + let Args = [UnsignedArgument<"X">, UnsignedArgument<"Y">, UnsignedArgument<"Z">]; + let Documentation = [Undocumented]; +} + +def HLSLNodeMaxDispatchGrid: InheritableAttr { + let Spellings = [CXX11<"", "nodemaxdispatchgrid", 2015>]; + let Args = [UnsignedArgument<"X">, UnsignedArgument<"Y">, UnsignedArgument<"Z">]; + let Documentation = [Undocumented]; +} + +def HLSLNodeMaxRecursionDepth : InheritableAttr { + let Spellings = [CXX11<"", "nodemaxrecursiondepth", 2017>]; + let Args = [UnsignedArgument<"Count">]; + let Documentation = [Undocumented]; +} + +def HLSLNodeTrackRWInputSharing : InheritableAttr { + let Spellings = [CXX11<"", "nodetrackrwinputsharing", 2017>]; + let Subjects = SubjectList<[CXXRecord]>; + let Documentation = [Undocumented]; +} + + +// HLSL Parameter Attributes + +def HLSLMaxRecords : InheritableAttr { + let Spellings = [CXX11<"", "MaxRecords", 2015>]; + let Args = [IntArgument<"maxCount">]; + let Documentation = [Undocumented]; +} + +def HLSLMaxRecordsSharedWith : InheritableParamAttr { + let Spellings = [CXX11<"", "maxrecordssharedwith", 2015>]; + let Args = [IdentifierArgument<"Name">]; + let Subjects = SubjectList<[ParmVar]>; + let Documentation = [Undocumented]; +} + +def HLSLAllowSparseNodes : InheritableParamAttr { + let Spellings = [CXX11<"", "allowsparsenodes", 2015>]; + let Subjects = SubjectList<[ParmVar]>; + let Documentation = [Undocumented]; +} + +def HLSLNodeArraySize : InheritableAttr { + let Spellings = [CXX11<"", "NodeArraySize", 2015>]; + let Args = [IntArgument<"count">]; + let Documentation = [Undocumented]; +} + // HLSL Change Ends // SPIRV Change Starts diff --git a/tools/clang/include/clang/Basic/DiagnosticSemaKinds.td b/tools/clang/include/clang/Basic/DiagnosticSemaKinds.td index 97614a205d..0add4143ef 100644 --- a/tools/clang/include/clang/Basic/DiagnosticSemaKinds.td +++ b/tools/clang/include/clang/Basic/DiagnosticSemaKinds.td @@ -7786,6 +7786,24 @@ def err_hlsl_ternary_scalar : Error< def warn_hlsl_structurize_exits_lifetime_markers_conflict : Warning < "structurize-returns skipped function '%0' due to incompatibility with lifetime markers. Use -disable-lifetime-markers to enable structurize-exits on this function.">, InGroup< HLSLStructurizeExitsLifetimeMarkersConflict >; +def err_hlsl_wg_intrinsic_launch_type : Error< + "%0 may only be used by nodes with %1 launch type">; +def err_hlsl_wg_thread_launch_group_size : Error< + "Thread launch nodes must have a thread group size of (1,1,1)">; +def err_hlsl_wg_nodetrackrwinputsharing_missing : Error< + "Use of FinishedCrossGroupSharing() requires NodeTrackRWInputSharing attribute to be specified on the record struct type">; +def err_hlsl_wg_input_kind : Error< + "%0 may not be used with %1 launch nodes">; +def err_hlsl_rwnodeinputrecord_sv_dispatch : Error< + "a RWNodeInputRecord field with SV_DispatchGrid semantic is not assignable">; +def err_hlsl_maxarraysize_template_arg : Error< + "the %0 maxArraySize template argument must be an integer constant expression with value >= 1">; +def err_hlsl_dispatchgrid_component : Error< + "%0 %1 component value must be between 1 and 65,535 (2^16-1) inclusive">; +def err_hlsl_dispatchgrid_product : Error< + "%0 X * Y * Z product may not exceed 16,777,215 (2^24-1)">; +def err_hlsl_compute_compatibility : Error< + "Node shader '%0' with %1 is not compatible with compute">; // HLSL Change Ends // SPIRV Change Starts diff --git a/tools/clang/lib/AST/ASTContextHLSL.cpp b/tools/clang/lib/AST/ASTContextHLSL.cpp index ce3ff38c30..8ff0222f11 100644 --- a/tools/clang/lib/AST/ASTContextHLSL.cpp +++ b/tools/clang/lib/AST/ASTContextHLSL.cpp @@ -407,13 +407,116 @@ void hlsl::AddHLSLVectorTemplate(ASTContext& context, ClassTemplateDecl** vector *vectorTemplateDecl = classTemplateDecl; } +static void AddRecordAccessMethod(clang::ASTContext &Ctx, + clang::CXXRecordDecl *RD, + clang::QualType ReturnTy, + bool IsGetOrSubscript, bool IsConst, + bool IsArray) { + DeclarationName DeclName = + IsGetOrSubscript ? DeclarationName(&Ctx.Idents.get("Get")) + : Ctx.DeclarationNames.getCXXOperatorName(OO_Subscript); + + if (IsConst) + ReturnTy.addConst(); + + ReturnTy = Ctx.getLValueReferenceType(ReturnTy); + + QualType ArgTypes[] = {Ctx.UnsignedIntTy}; + ArrayRef Types = IsArray ? ArgTypes : ArrayRef(); + StringRef ArgNames[] = {"Index"}; + ArrayRef Names = IsArray ? ArgNames : ArrayRef(); + + CXXMethodDecl *MethodDecl = CreateObjectFunctionDeclarationWithParams( + Ctx, RD, ReturnTy, Types, Names, DeclName, IsConst); + + if (IsGetOrSubscript && IsArray) { + ParmVarDecl *IndexParam = MethodDecl->getParamDecl(0); + Expr *ConstantZero = IntegerLiteral::Create( + Ctx, llvm::APInt(Ctx.getIntWidth(Ctx.UnsignedIntTy), 0), + Ctx.UnsignedIntTy, NoLoc); + IndexParam->setDefaultArg(ConstantZero); + } + + StringRef OpcodeGroup = GetHLOpcodeGroupName(HLOpcodeGroup::HLIntrinsic); + unsigned Opcode = + static_cast(IntrinsicOp::IOP_ExtractRecordStructFromArray); + MethodDecl->addAttr(HLSLIntrinsicAttr::CreateImplicit( + Ctx, OpcodeGroup, "ExtractRecordStructFromArray", Opcode)); + MethodDecl->addAttr(HLSLCXXOverloadAttr::CreateImplicit(Ctx)); +} + +static void AddRecordGetMethods(clang::ASTContext &Ctx, + clang::CXXRecordDecl *RD, + clang::QualType ReturnTy, bool IsConstOnly, + bool IsArray) { + if (!IsConstOnly) + AddRecordAccessMethod(Ctx, RD, ReturnTy, true, false, IsArray); + AddRecordAccessMethod(Ctx, RD, ReturnTy, true, true, IsArray); +} + +static void AddRecordSubscriptAccess(clang::ASTContext &Ctx, + clang::CXXRecordDecl *RD, + clang::QualType ReturnTy, + bool IsConstOnly) { + if (!IsConstOnly) + AddRecordAccessMethod(Ctx, RD, ReturnTy, false, false, true); + AddRecordAccessMethod(Ctx, RD, ReturnTy, false, true, true); +} + +/// Adds up-front support for HLSL *NodeOutputRecords template +/// types. +void hlsl::AddHLSLNodeOutputRecordTemplate( + ASTContext &context, StringRef templateName, + ClassTemplateDecl **outputRecordTemplateDecl, + bool isCompleteType /*= true*/) { + DXASSERT_NOMSG(outputRecordTemplateDecl != nullptr); + + // Create a *NodeOutputRecords template declaration in translation unit scope. + BuiltinTypeDeclBuilder typeDeclBuilder(context.getTranslationUnitDecl(), + templateName, + TagDecl::TagKind::TTK_Struct); + TemplateTypeParmDecl *outputTemplateParamDecl = + typeDeclBuilder.addTypeTemplateParam("recordType"); + typeDeclBuilder.startDefinition(); + ClassTemplateDecl *classTemplateDecl = typeDeclBuilder.getTemplateDecl(); + + // Add an 'h' field to hold the handle. + typeDeclBuilder.addField("h", GetHLSLObjectHandleType(context)); + + QualType elementType = context.getTemplateTypeParmType( + 0, 0, ParameterPackFalse, outputTemplateParamDecl); + + CXXRecordDecl *record = typeDeclBuilder.getRecordDecl(); + + // Subscript operator is required for Node Array Types. + AddRecordSubscriptAccess(context, record, elementType, false); + AddRecordGetMethods(context, record, elementType, false, true); + + if (isCompleteType) + typeDeclBuilder.completeDefinition(); + *outputRecordTemplateDecl = classTemplateDecl; +} + /// /// Adds a new record type in the specified context with the given name. The record type will have a handle field. /// -CXXRecordDecl* hlsl::DeclareRecordTypeWithHandle(ASTContext& context, StringRef name) { +CXXRecordDecl* hlsl::DeclareRecordTypeWithHandleAndNoMemberFunctions(ASTContext& context, StringRef name) { BuiltinTypeDeclBuilder typeDeclBuilder(context.getTranslationUnitDecl(), name, TagDecl::TagKind::TTK_Struct); typeDeclBuilder.startDefinition(); typeDeclBuilder.addField("h", GetHLSLObjectHandleType(context)); + typeDeclBuilder.completeDefinition(); + return typeDeclBuilder.getRecordDecl(); +} + +/// +/// Adds a new record type in the specified context with the given name. The record type will have a handle field. +/// +CXXRecordDecl* hlsl::DeclareRecordTypeWithHandle(ASTContext& context, StringRef name, bool isCompleteType /*= true */) { + BuiltinTypeDeclBuilder typeDeclBuilder(context.getTranslationUnitDecl(), name, TagDecl::TagKind::TTK_Struct); + typeDeclBuilder.startDefinition(); + typeDeclBuilder.addField("h", GetHLSLObjectHandleType(context)); + if (isCompleteType) + return typeDeclBuilder.completeDefinition(); return typeDeclBuilder.getRecordDecl(); } @@ -503,6 +606,23 @@ void hlsl::AddSamplerFeedbackConstants(ASTContext& context) { AddConstUInt(context, StringRef("SAMPLER_FEEDBACK_MIP_REGION_USED"), (unsigned)DXIL::SamplerFeedbackType::MipRegionUsed); } +/// Adds all enums for Barrier intrinsic +void hlsl::AddBarrierConstants(ASTContext& context) { + AddTypedefPseudoEnum(context, "MEMORY_TYPE_FLAG", { + { "UAV_MEMORY", (unsigned)DXIL::MemoryTypeFlag::UavMemory }, + { "GROUP_SHARED_MEMORY", (unsigned)DXIL::MemoryTypeFlag::GroupSharedMemory }, + { "NODE_INPUT_MEMORY", (unsigned)DXIL::MemoryTypeFlag::NodeInputMemory }, + { "NODE_OUTPUT_MEMORY", (unsigned)DXIL::MemoryTypeFlag::NodeOutputMemory } + }); + AddTypedefPseudoEnum(context, "ACCESS_FLAG", { + { "DEVICE_VISIBLE", (unsigned)DXIL::AccessFlag::DeviceVisible }, + { "GROUP_VISIBLE", (unsigned)DXIL::AccessFlag::GroupVisible } + }); + AddTypedefPseudoEnum(context, "SYNC_FLAG", { + { "GROUP_SYNC", (unsigned)DXIL::SyncFlag::GroupSync } + }); +} + static Expr* IntConstantAsBoolExpr(clang::Sema& sema, uint64_t value) { @@ -863,20 +983,24 @@ CXXMethodDecl* hlsl::CreateObjectFunctionDeclarationWithParams( return functionDecl; } -CXXRecordDecl* hlsl::DeclareUIntTemplatedTypeWithHandle( - ASTContext& context, StringRef typeName, StringRef templateParamName) { +CXXRecordDecl *hlsl::DeclareUIntTemplatedTypeWithHandle( + ASTContext &context, StringRef typeName, StringRef templateParamName, + TagTypeKind tagKind) { return DeclareUIntTemplatedTypeWithHandleInDeclContext( - context, context.getTranslationUnitDecl(), typeName, templateParamName); + context, context.getTranslationUnitDecl(), typeName, templateParamName, + tagKind); } CXXRecordDecl *hlsl::DeclareUIntTemplatedTypeWithHandleInDeclContext( ASTContext &context, DeclContext *declContext, StringRef typeName, - StringRef templateParamName) { + StringRef templateParamName, TagTypeKind tagKind) { // template FeedbackTexture2D[Array] { ... } - BuiltinTypeDeclBuilder typeDeclBuilder(declContext, typeName); - typeDeclBuilder.addIntegerTemplateParam(templateParamName, context.UnsignedIntTy); + BuiltinTypeDeclBuilder typeDeclBuilder(declContext, typeName, tagKind); + typeDeclBuilder.addIntegerTemplateParam(templateParamName, + context.UnsignedIntTy); typeDeclBuilder.startDefinition(); - typeDeclBuilder.addField("h", context.UnsignedIntTy); // Add an 'h' field to hold the handle. + typeDeclBuilder.addField( + "h", context.UnsignedIntTy); // Add an 'h' field to hold the handle. return typeDeclBuilder.getRecordDecl(); } @@ -921,6 +1045,24 @@ CXXRecordDecl* hlsl::DeclareRayQueryType(ASTContext& context) { return typeDeclBuilder.getRecordDecl(); } +clang::CXXRecordDecl *hlsl::DeclareWaveMatrixType(clang::ASTContext &context, + DXIL::WaveMatrixKind kind) { + StringRef Name = GetWaveMatrixName(kind); + BuiltinTypeDeclBuilder typeDeclBuilder(context.getTranslationUnitDecl(), + Name); + typeDeclBuilder.addTypeTemplateParam("element"); + typeDeclBuilder.addIntegerTemplateParam("dimM", context.UnsignedIntTy); + typeDeclBuilder.addIntegerTemplateParam("dimN", context.UnsignedIntTy); + + typeDeclBuilder.startDefinition(); + CXXRecordDecl *templateRecordDecl = typeDeclBuilder.getRecordDecl(); + + // Add an 'h' field to hold the handle. + typeDeclBuilder.addField("h", context.UnsignedIntTy); + + return templateRecordDecl; +} + CXXRecordDecl* hlsl::DeclareResourceType(ASTContext& context, bool bSampler) { // struct ResourceDescriptor { uint8 desc; } StringRef Name = bSampler?".Sampler":".Resource"; @@ -949,6 +1091,92 @@ CXXRecordDecl* hlsl::DeclareResourceType(ASTContext& context, bool bSampler) { return recordDecl; } +CXXRecordDecl *hlsl::DeclareNodeOrRecordType( + clang::ASTContext &Ctx, StringRef TypeName, bool IsRecordTypeTemplate, + bool IsConst, bool HasGetMethods, bool IsArray, bool IsCompleteType) { + + BuiltinTypeDeclBuilder Builder(Ctx.getTranslationUnitDecl(), TypeName, + TagDecl::TagKind::TTK_Struct); + TemplateTypeParmDecl *TyParamDecl = nullptr; + + if (IsRecordTypeTemplate) + TyParamDecl = Builder.addTypeTemplateParam("recordtype"); + + Builder.startDefinition(); + Builder.addField("h", GetHLSLObjectHandleType(Ctx)); + + if (IsRecordTypeTemplate) { + QualType ParamTy = QualType(TyParamDecl->getTypeForDecl(), 0); + CXXRecordDecl *Record = Builder.getRecordDecl(); + + if (HasGetMethods || IsArray) + AddRecordGetMethods(Ctx, Record, ParamTy, IsConst, IsArray); + + if (IsArray) + AddRecordSubscriptAccess(Ctx, Record, ParamTy, IsConst); + } + + if (IsCompleteType) + return Builder.completeDefinition(); + + return Builder.getRecordDecl(); +} + +CXXRecordDecl *hlsl::DeclareNodeOutputArray(clang::ASTContext &Ctx, + StringRef TypeName, + CXXRecordDecl *OutputType, + bool IsRecordTypeTemplate, + bool IsCompleteType) { + + BuiltinTypeDeclBuilder Builder(Ctx.getTranslationUnitDecl(), TypeName, + TagDecl::TagKind::TTK_Struct); + TemplateTypeParmDecl *elementTemplateParamDecl = nullptr; + + if (IsRecordTypeTemplate) + elementTemplateParamDecl = Builder.addTypeTemplateParam("recordtype"); + + Builder.startDefinition(); + Builder.addField("h", GetHLSLObjectHandleType(Ctx)); + QualType ResultType; + if (IsRecordTypeTemplate) { + QualType elementType = Ctx.getTemplateTypeParmType( + /*templateDepth*/ 0, /*index*/ 0, ParameterPackFalse, + elementTemplateParamDecl); + + const clang::Type *nodeOutputTy = OutputType->getTypeForDecl(); + + TemplateArgument templateArgs[1] = {TemplateArgument(elementType)}; + + TemplateName canonName = Ctx.getCanonicalTemplateName( + TemplateName(OutputType->getDescribedClassTemplate())); + ResultType = Ctx.getTemplateSpecializationType(canonName, templateArgs, + _countof(templateArgs), + QualType(nodeOutputTy, 0)); + } else { + // For Non Template types(like EmptyNodeOutput) + ResultType = Ctx.getTypeDeclType(OutputType); + } + + QualType indexType = Ctx.UnsignedIntTy; + + auto methodDecl = CreateObjectFunctionDeclarationWithParams( + Ctx, Builder.getRecordDecl(), ResultType, ArrayRef(indexType), + ArrayRef(StringRef("index")), + Ctx.DeclarationNames.getCXXOperatorName(OO_Subscript), false); + + StringRef OpcodeGroup = + GetHLOpcodeGroupName(HLOpcodeGroup::HLIndexNodeHandle); + unsigned Opcode = static_cast(HLOpcodeGroup::HLIndexNodeHandle); + methodDecl->addAttr( + HLSLIntrinsicAttr::CreateImplicit(Ctx, OpcodeGroup, "", Opcode)); + methodDecl->addAttr(HLSLCXXOverloadAttr::CreateImplicit(Ctx)); + + if (IsCompleteType) + return Builder.completeDefinition(); + + return Builder.getRecordDecl(); +} + VarDecl *hlsl::DeclareBuiltinGlobal(llvm::StringRef name, clang::QualType Ty, clang::ASTContext &context) { IdentifierInfo &II = context.Idents.get(name); @@ -991,6 +1219,19 @@ bool hlsl::GetIntrinsicLowering(const clang::FunctionDecl *FD, llvm::StringRef & return true; } +llvm::StringRef hlsl::GetWaveMatrixName(DXIL::WaveMatrixKind kind) { + DXASSERT_NOMSG(kind < DXIL::WaveMatrixKind::NumKinds); + static const char *typeNames[(unsigned)DXIL::WaveMatrixKind::NumKinds] = { + "WaveMatrixLeft", + "WaveMatrixRight", + "WaveMatrixLeftColAcc", + "WaveMatrixRightRowAcc", + "WaveMatrixAccumulator", + }; + return typeNames[(unsigned)kind]; +} + + /// Parses a column or row digit. static bool TryParseColOrRowChar(const char digit, _Out_ int* count) { diff --git a/tools/clang/lib/AST/HlslTypes.cpp b/tools/clang/lib/AST/HlslTypes.cpp index 76e2db9719..49a25199f8 100644 --- a/tools/clang/lib/AST/HlslTypes.cpp +++ b/tools/clang/lib/AST/HlslTypes.cpp @@ -15,12 +15,14 @@ #include "dxc/Support/Global.h" #include "dxc/DXIL/DxilSemantic.h" +#include "dxc/DXIL/DxilNodeProps.h" #include "clang/AST/CanonicalType.h" #include "clang/AST/DeclTemplate.h" #include "clang/AST/HlslTypes.h" #include "clang/AST/Type.h" #include "clang/Sema/AttributeList.h" // conceptually ParsedAttributes #include "clang/AST/ASTContext.h" +#include "llvm/ADT/StringSwitch.h" using namespace clang; @@ -152,6 +154,55 @@ bool IsHLSLAggregateType(clang::QualType type) { return IsUserDefinedRecordType(type); } +hlsl::NodeFlags GetNodeKind(llvm::StringRef nodekind) { + DXIL::NodeIOKind nodeKind = + llvm::StringSwitch(nodekind) + .Case("EmptyNodeInput", DXIL::NodeIOKind::EmptyInput) + .Case("DispatchNodeInputRecord", + DXIL::NodeIOKind::DispatchNodeInputRecord) + .Case("RWDispatchNodeInputRecord", + DXIL::NodeIOKind::RWDispatchNodeInputRecord) + .Case("GroupNodeInputRecords", + DXIL::NodeIOKind::GroupNodeInputRecords) + .Case("RWGroupNodeInputRecords", + DXIL::NodeIOKind::RWGroupNodeInputRecords) + .Case("ThreadNodeInputRecord", + DXIL::NodeIOKind::ThreadNodeInputRecord) + .Case("RWThreadNodeInputRecord", + DXIL::NodeIOKind::RWThreadNodeInputRecord) + .Case("NodeOutput", DXIL::NodeIOKind::NodeOutput) + .Case("NodeOutputArray", DXIL::NodeIOKind::NodeOutputArray) + .Case("EmptyNodeOutput", DXIL::NodeIOKind::EmptyOutput) + .Case("EmptyNodeOutputArray", DXIL::NodeIOKind::EmptyOutputArray) + .Case("ThreadNodeOutputRecords", + DXIL::NodeIOKind::ThreadNodeOutputRecords) + .Case("GroupNodeOutputRecords", + DXIL::NodeIOKind::GroupNodeOutputRecords) + .Default(DXIL::NodeIOKind::Invalid); + + return NodeFlags(nodeKind); +} + +bool GetHLSLNodeIORecordType(const ParmVarDecl *parmDecl, NodeFlags &nodeKind) { + clang::QualType paramTy = parmDecl->getType().getCanonicalType(); + + if (auto arrayType = dyn_cast(paramTy)) + paramTy = arrayType->getElementType(); + + llvm::StringRef name; + if (const RecordType *RT = dyn_cast(paramTy)) { + if (const ClassTemplateSpecializationDecl *templateDecl = + dyn_cast(RT->getDecl())) + name = templateDecl->getName(); + else + name = paramTy->getAsCXXRecordDecl()->getName(); + + nodeKind = GetNodeKind(name); + return nodeKind.IsValidNodeKind(); + } + return false; +} + clang::QualType GetElementTypeOrType(clang::QualType type) { if (const RecordType *RT = type->getAs()) { if (const ClassTemplateSpecializationDecl *templateDecl = @@ -392,6 +443,17 @@ void GetRowsAndColsForAny(QualType type, uint32_t &rowCount, const TemplateArgument &arg1 = argList[1]; llvm::APSInt rowSize = arg1.getAsIntegral(); colCount = rowSize.getLimitedValue(); + } else if (templateDecl->getName().startswith("WaveMatrix")) { + auto name = templateDecl->getName(); + if (name == "WaveMatrixLeft" || + name == "WaveMatrixRight" || + name == "WaveMatrixLeftColAcc" || + name == "WaveMatrixRightRowAcc" || + name == "WaveMatrixAccumulator") { + const TemplateArgumentList &argList = templateDecl->getTemplateArgs(); + rowCount = argList[1].getAsIntegral().getLimitedValue(); + colCount = argList[2].getAsIntegral().getLimitedValue(); + } } } } @@ -571,6 +633,18 @@ bool IsHLSLResourceType(clang::QualType type) { return false; } +bool IsHLSLNodeInputType(clang::QualType type) { + if (const RecordType* RT = type->getAs()) { + StringRef name = RT->getDecl()->getName(); + if (name == "EmptyNodeInput" || + name == "DispatchNodeInputRecord" || name == "RWDispatchNodeInputRecord" || + name == "GroupNodeInputRecords" || name == "RWGroupNodeInputRecords" || + name == "ThreadNodeInputRecord" || name == "RWThreadNodeInputRecord") + return true; + } + return false; +} + bool IsHLSLDynamicResourceType(clang::QualType type) { if (const RecordType *RT = type->getAs()) { StringRef name = RT->getDecl()->getName(); @@ -579,7 +653,26 @@ bool IsHLSLDynamicResourceType(clang::QualType type) { return false; } -bool IsHLSLBufferViewType(clang::QualType type) { +bool IsHLSLNodeType(clang::QualType type) { + if (const RecordType *RT = type->getAs()) { + StringRef name = RT->getDecl()->getName(); + if (name == "EmptyNodeInput" || name == "DispatchNodeInputRecord" || + name == "RWDispatchNodeInputRecord" || + name == "GroupNodeInputRecords" || name == "RWGroupNodeInputRecords" || + name == "ThreadNodeInputRecord" || name == "RWThreadNodeInputRecord") + return true; + + if (name == "NodeOutput" || name == "NodeOutputArray" || + name == "EmptyNodeOutput" || name == "EmptyNodeOutputArray") + return true; + + if (name == "ThreadNodeOutputRecords" || name == "GroupNodeOutputRecords") + return true; + } + return false; +} + +bool IsHLSLObjectWithImplicitMemberAccess(clang::QualType type) { if (const RecordType *RT = type->getAs()) { StringRef name = RT->getDecl()->getName(); if (name == "ConstantBuffer" || name == "TextureBuffer") @@ -588,6 +681,46 @@ bool IsHLSLBufferViewType(clang::QualType type) { return false; } +bool IsHLSLObjectWithImplicitROMemberAccess(clang::QualType type) { + if (const RecordType *RT = type->getAs()) { + StringRef name = RT->getDecl()->getName(); + // Read-only records + if (name == "ConstantBuffer" || name == "TextureBuffer") + return true; + } + return false; +} + +bool IsHLSLRWNodeInputRecordType(clang::QualType type) { + if (const RecordType *RT = type->getAs()) { + StringRef name = RT->getDecl()->getName(); + if (name == "RWDispatchNodeInputRecord" || + name == "RWGroupNodeInputRecords" || name == "RWThreadNodeInputRecord") + return true; + } + return false; +} + +bool IsHLSLRONodeInputRecordType(clang::QualType type) { + if (const RecordType *RT = type->getAs()) { + StringRef name = RT->getDecl()->getName(); + if (name == "DispatchNodeInputRecord" || name == "GroupNodeInputRecords" || + name == "ThreadNodeInputRecord") + return true; + } + return false; +} + +bool IsHLSLNodeOutputType(clang::QualType type) { + if (const RecordType *RT = type->getAs()) { + StringRef name = RT->getDecl()->getName(); + if (name == "NodeOutput" || name == "NodeOutputArray" || + name == "EmptyNodeOutput" || name == "EmptyNodeOutputArray") + return true; + } + return false; +} + bool IsHLSLStructuredBufferType(clang::QualType type) { if (const RecordType *RT = type->getAs()) { StringRef name = RT->getDecl()->getName(); diff --git a/tools/clang/lib/CodeGen/CGDebugInfo.cpp b/tools/clang/lib/CodeGen/CGDebugInfo.cpp index 11c3337ee2..87eb444c9a 100644 --- a/tools/clang/lib/CodeGen/CGDebugInfo.cpp +++ b/tools/clang/lib/CodeGen/CGDebugInfo.cpp @@ -1079,7 +1079,8 @@ bool CGDebugInfo::TryCollectHLSLRecordElements(const RecordType *Ty, return true; } - else if (hlsl::IsHLSLResourceType(QualTy) || hlsl::IsHLSLStreamOutputType(QualTy)) { + else if (hlsl::IsHLSLResourceType(QualTy) || hlsl::IsHLSLNodeType(QualTy) || + hlsl::IsHLSLStreamOutputType(QualTy)) { // Should appear as having no members rather than exposing our internal handles. return true; } diff --git a/tools/clang/lib/CodeGen/CGHLSLMS.cpp b/tools/clang/lib/CodeGen/CGHLSLMS.cpp index 84fc806da8..ae27b4b3ec 100644 --- a/tools/clang/lib/CodeGen/CGHLSLMS.cpp +++ b/tools/clang/lib/CodeGen/CGHLSLMS.cpp @@ -51,6 +51,7 @@ #include "dxc/HLSL/DxilGenerationPass.h" // support pause/resume passes #include "dxc/HLSL/DxilExportMap.h" #include "dxc/DXIL/DxilResourceProperties.h" +#include "dxc/DXIL/DxilWaveMatrix.h" #include "CGHLSLMSHelper.h" @@ -85,12 +86,15 @@ class CGMSHLSLRuntime : public CGHLSLRuntime { llvm::SmallVector, 1>> constantRegBindingMap; - // Map from value to resource properties. - // This only collect object variables(global/local/parameter), not object fields inside struct. - // Object fields inside struct is saved by TypeAnnotation. + // Adds value to DxilObjectProperties if it's resource or wave matrix. // Returns true if added to one. bool AddValToPropertyMap(Value *V, QualType Ty); CGHLSLMSHelper::DxilObjectProperties objectProperties; + + // Map to value to node properties + + llvm::MapVector NodeInputRecordParams; + llvm::MapVector NodeOutputParams; bool m_bDebugInfo; bool m_bIsLib; @@ -210,6 +214,7 @@ class CGMSHLSLRuntime : public CGHLSLRuntime { unsigned AddTypeAnnotation(QualType Ty, DxilTypeSystem &dxilTypeSys, unsigned &arrayEltSize); DxilResourceProperties BuildResourceProperty(QualType resTy); + DxilWaveMatrixProperties BuildWaveMatrixProperties(QualType resTy); void ConstructFieldAttributedAnnotation(DxilFieldAnnotation &fieldAnnotation, QualType fieldTy, bool bDefaultRowMajor); @@ -282,6 +287,7 @@ class CGMSHLSLRuntime : public CGHLSLRuntime { llvm::Value *DestPtr, clang::QualType DestTy) override; void AddHLSLFunctionInfo(llvm::Function *, const FunctionDecl *FD) override; + void AddHLSLNodeRecordTypeInfo(const clang::ParmVarDecl* parmDecl, hlsl::NodeIOProperties& node); void EmitHLSLFunctionProlog(llvm::Function *, const FunctionDecl *FD) override; void AddControlFlowHint(CodeGenFunction &CGF, const Stmt &S, @@ -551,6 +557,19 @@ StringToMeshOutputTopology(StringRef topology) { return DXIL::MeshOutputTopology::Undefined; } +static DXIL::NodeLaunchType +StringToNodeLaunchType(StringRef launchType) { + if (launchType.equals_lower("broadcasting")) + return DXIL::NodeLaunchType::Broadcasting; + if (launchType.equals_lower("coalescing")) + return DXIL::NodeLaunchType::Coalescing; + if (launchType.equals_lower("thread")) + return DXIL::NodeLaunchType::Thread; + + DXASSERT(false, "Invalid Node Launch Type"); + return DXIL::NodeLaunchType::Invalid; +} + static unsigned GetMatrixSizeInCB(QualType Ty, bool defaultRowMajor, bool b64Bit) { bool bRowMajor; @@ -754,8 +773,31 @@ DxilResourceProperties CGMSHLSLRuntime::BuildResourceProperty(QualType resTy) { return RP; } +DxilWaveMatrixProperties +CGMSHLSLRuntime::BuildWaveMatrixProperties(QualType qualTy) { + DxilWaveMatrixProperties props; + llvm::Type *Ty = CGM.getTypes().ConvertType(qualTy); + if (dxilutil::IsHLSLWaveMatrixType(Ty, &props.kind)) { + const CXXRecordDecl *CXXRD = + qualTy.getCanonicalType()->getAsCXXRecordDecl(); + if (const ClassTemplateSpecializationDecl *templateSpecializationDecl = + dyn_cast(CXXRD)) { + const clang::TemplateArgumentList &args = + templateSpecializationDecl->getTemplateInstantiationArgs(); + DXASSERT(args[0].getAsType()->isBuiltinType(), + "otherwise, wrong kind of component type"); + const BuiltinType *BTy = args[0].getAsType()->getAs(); + props.compType = BuiltinTyToCompTy(BTy, false, false); + props.dimM = (unsigned)args[1].getAsIntegral().getExtValue(); + props.dimN = (unsigned)args[2].getAsIntegral().getExtValue(); + } + } + return props; +} + bool CGMSHLSLRuntime::AddValToPropertyMap(Value *V, QualType Ty) { - return objectProperties.AddResource(V, BuildResourceProperty(Ty)); + return objectProperties.AddResource(V, BuildResourceProperty(Ty)) || + objectProperties.AddWaveMatrix(V, BuildWaveMatrixProperties(Ty)); } void CGMSHLSLRuntime::ConstructFieldAttributedAnnotation( @@ -779,8 +821,12 @@ void CGMSHLSLRuntime::ConstructFieldAttributedAnnotation( EltTy = hlsl::GetHLSLMatElementType(Ty); } - if (hlsl::IsHLSLVecType(Ty)) + if (hlsl::IsHLSLVecType(Ty)) { + unsigned rows, cols; + hlsl::GetRowsAndColsForAny(Ty, rows, cols); + fieldAnnotation.SetVectorSize(cols); EltTy = hlsl::GetHLSLVecElementType(Ty); + } if (IsHLSLResourceType(Ty)) { fieldAnnotation.SetResourceProperties(BuildResourceProperty(Ty)); @@ -822,7 +868,7 @@ static void ConstructFieldInterpolation(DxilFieldAnnotation &fieldAnnotation, static unsigned AlignBaseOffset(unsigned baseOffset, unsigned size, QualType Ty, bool bDefaultRowMajor) { // Do not align if resource, since resource isn't really here. - if (IsHLSLResourceType(Ty)) + if (IsHLSLResourceType(Ty) || IsHLSLNodeType(Ty)) return baseOffset; bool needNewAlign = Ty->isArrayType(); @@ -1346,6 +1392,15 @@ static DxilResource::Kind KeywordToKind(StringRef keyword) { return DxilResource::Kind::Invalid; } +static void ReportMissingNodeDiag(DiagnosticsEngine &Diags, + const InheritableAttr *a) { + SourceLocation loc = a->getLocation(); + unsigned DiagID = Diags.getCustomDiagID( + DiagnosticsEngine::Error, "Attribute %0 only applies to node shaders " + "(indicated with '[shader(\"node\")]')"); + Diags.Report(loc, DiagID) << a->getSpelling(); +} + void CGMSHLSLRuntime::AddHLSLFunctionInfo(Function *F, const FunctionDecl *FD) { // Add hlsl intrinsic attr unsigned intrinsicOpcode; @@ -1378,8 +1433,12 @@ void CGMSHLSLRuntime::AddHLSLFunctionInfo(Function *F, const FunctionDecl *FD) { F->addFnAttr(DXIL::kFP32DenormKindString, DXIL::kFP32DenormValueAnyString); } // Set entry function + const ShaderModel *SM = m_pHLModule->GetShaderModel(); const std::string &entryName = m_pHLModule->GetEntryFunctionName(); - bool isEntry = FD->getNameAsString() == entryName; + bool isEntry = + !SM->IsLib() && + FD->getDeclContext()->getDeclKind() == Decl::Kind::TranslationUnit && + FD->getNameAsString() == entryName; if (isEntry) { Entry.Func = F; Entry.SL = FD->getLocation(); @@ -1390,6 +1449,7 @@ void CGMSHLSLRuntime::AddHLSLFunctionInfo(Function *F, const FunctionDecl *FD) { std::unique_ptr funcProps = llvm::make_unique(); funcProps->shaderKind = DXIL::ShaderKind::Invalid; + funcProps->Node.LaunchType = DXIL::NodeLaunchType::Invalid; bool isCS = false; bool isGS = false; bool isHS = false; @@ -1399,97 +1459,131 @@ void CGMSHLSLRuntime::AddHLSLFunctionInfo(Function *F, const FunctionDecl *FD) { bool isRay = false; bool isMS = false; bool isAS = false; - if (const HLSLShaderAttr *Attr = FD->getAttr()) { - // Stage is already validate in HandleDeclAttributeForHLSL. - // Here just check first letter (or two). - switch (Attr->getStage()[0]) { - case 'c': - switch (Attr->getStage()[1]) { - case 'o': - isCS = true; - funcProps->shaderKind = DXIL::ShaderKind::Compute; - break; - case 'l': - isRay = true; - funcProps->shaderKind = DXIL::ShaderKind::ClosestHit; - break; - case 'a': - isRay = true; - funcProps->shaderKind = DXIL::ShaderKind::Callable; - break; - default: - break; - } - break; - case 'v': - isVS = true; - funcProps->shaderKind = DXIL::ShaderKind::Vertex; - break; - case 'h': - isHS = true; - funcProps->shaderKind = DXIL::ShaderKind::Hull; - break; - case 'd': - isDS = true; - funcProps->shaderKind = DXIL::ShaderKind::Domain; - break; - case 'g': - isGS = true; - funcProps->shaderKind = DXIL::ShaderKind::Geometry; - break; - case 'p': - isPS = true; - funcProps->shaderKind = DXIL::ShaderKind::Pixel; - break; - case 'r': - isRay = true; - funcProps->shaderKind = DXIL::ShaderKind::RayGeneration; - break; - case 'i': + bool isNode = false; + + // SetStageFlag returns true if valid as function attribute + auto SetStageFlag = [&](DXIL::ShaderKind shaderKind) -> bool { + switch(shaderKind) { + case DXIL::ShaderKind::Pixel: isPS = true; break; + case DXIL::ShaderKind::Vertex: isVS = true; break; + case DXIL::ShaderKind::Geometry: isGS = true; break; + case DXIL::ShaderKind::Hull: isHS = true; break; + case DXIL::ShaderKind::Domain: isDS = true; break; + case DXIL::ShaderKind::Compute: isCS = true; break; + case DXIL::ShaderKind::Mesh: isMS = true; break; + case DXIL::ShaderKind::Amplification: isAS = true; break; + case DXIL::ShaderKind::Node: isNode = true; break; + case DXIL::ShaderKind::ClosestHit: + case DXIL::ShaderKind::Callable: + case DXIL::ShaderKind::RayGeneration: + case DXIL::ShaderKind::Intersection: + case DXIL::ShaderKind::AnyHit: + case DXIL::ShaderKind::Miss: isRay = true; - funcProps->shaderKind = DXIL::ShaderKind::Intersection; - break; - case 'a': - switch (Attr->getStage()[1]) { - case 'm': - isAS = true; - funcProps->shaderKind = DXIL::ShaderKind::Amplification; - break; - case 'n': - isRay = true; - funcProps->shaderKind = DXIL::ShaderKind::AnyHit; - break; - default: - break; - } - break; - case 'm': - switch (Attr->getStage()[1]) { - case 'e': - isMS = true; - funcProps->shaderKind = DXIL::ShaderKind::Mesh; - break; - case 'i': - isRay = true; - funcProps->shaderKind = DXIL::ShaderKind::Miss; - break; - default: - break; - } break; + case DXIL::ShaderKind::Library: default: - break; + return false; } - if (funcProps->shaderKind == DXIL::ShaderKind::Invalid) { + return true; + }; + + clang::SourceLocation priorShaderAttrLoc; + enum class ShaderStageSource : unsigned { + Attribute, + Profile, + }; + + // Some diagnostic assumptions for shader attribute: + // - multiple shader attributes may exist in HLSL + // - duplicate attribute of same kind is ok + // - node attribute only combinable with compute + // - all attributes parsed before set from insertion or target shader model + + auto DiagShaderStage = [&priorShaderAttrLoc, &Diags]( + clang::SourceLocation diagLoc, + llvm::StringRef shaderStage, + ShaderStageSource source, bool bConflict) { + bool bFromProfile = source == ShaderStageSource::Profile; + unsigned DiagID = Diags.getCustomDiagID( + DiagnosticsEngine::Error, + "%select{Invalid|Conflicting}0 shader %select{profile|attribute}1"); + Diags.Report(diagLoc, DiagID) << bConflict << bFromProfile; + if (priorShaderAttrLoc.isValid()) { unsigned DiagID = Diags.getCustomDiagID( - DiagnosticsEngine::Error, "Invalid profile for shader attribute"); - Diags.Report(Attr->getLocation(), DiagID); + DiagnosticsEngine::Note, "See conflicting shader attribute"); + Diags.Report(priorShaderAttrLoc, DiagID); + } + }; + + auto SetShaderKind = [&](clang::SourceLocation diagLoc, + DXIL::ShaderKind shaderKind, + llvm::StringRef shaderStage, + ShaderStageSource source) { + if (!SetStageFlag(shaderKind)) { + DiagShaderStage(diagLoc, shaderStage, source, false); } if (isEntry && isRay) { unsigned DiagID = Diags.getCustomDiagID( DiagnosticsEngine::Error, "Ray function cannot be used as a global entry point"); - Diags.Report(Attr->getLocation(), DiagID); + Diags.Report(diagLoc, DiagID); } + if (isEntry && isNode && !SM->IsCS()) { + unsigned DiagID = Diags.getCustomDiagID( + DiagnosticsEngine::Error, "Node function as global entry point must " + "be compiled to compute shader target"); + Diags.Report(diagLoc, DiagID); + } + if (isNode) { + if (isPS || isVS || isGS || isHS || isDS || isMS || isAS || isRay) { + DiagShaderStage(diagLoc, shaderStage, source, true); + unsigned DiagID = Diags.getCustomDiagID( + DiagnosticsEngine::Note, + "'node' shader attribute is only compatible with 'compute' " + "shader attribute"); + Diags.Report(priorShaderAttrLoc, DiagID); + } + } else if (funcProps->shaderKind != DXIL::ShaderKind::Invalid && + funcProps->shaderKind != shaderKind) { + // Different kinds and not the node case, so it's a conflict. + DiagShaderStage(diagLoc, shaderStage, source, true); + } + // Update shaderKind, unless we would be overriding one with node, so when + // node+compute, kind = compute. Other conflicts are diagnosed above. + if (funcProps->shaderKind == DXIL::ShaderKind::Invalid || + shaderKind != DXIL::ShaderKind::Node) + funcProps->shaderKind = shaderKind; + }; + + // Used when a function attribute implies a particular stage. + // This will emit an error if the stage it implies conflicts with a stage set + // from some other source. + auto CheckImpliedShaderStageAttr = [&SetShaderKind](clang::SourceLocation diagLoc, + DXIL::ShaderKind shaderKind) { + SetShaderKind(diagLoc, shaderKind, "", ShaderStageSource::Attribute); + }; + + auto ParseShaderStage = [&SetShaderKind](clang::SourceLocation diagLoc, llvm::StringRef shaderStage, ShaderStageSource source) { + if (!shaderStage.empty()) { + DXIL::ShaderKind shaderKind = ShaderModel::KindFromFullName(shaderStage); + SetShaderKind(diagLoc, shaderKind, shaderStage, source); + } + }; + + // Parse all shader attributes and report conflicts. + for (auto* Attr : FD->specific_attrs()) { + ParseShaderStage(Attr->getLocation(), Attr->getStage(), ShaderStageSource::Attribute); + priorShaderAttrLoc = Attr->getLocation(); + } + + if (isEntry) { + // Set shaderKind from the shader target profile + SetShaderKind(FD->getLocation(), SM->GetKind(), "", ShaderStageSource::Profile); + } + + if (isNode && isCS) { + DXASSERT(funcProps->shaderKind == DXIL::ShaderKind::Compute, + "If both Compute and Node are set, shaderKind should be set to Compute"); } // Save patch constant function to patchConstantFunctionMap. @@ -1523,28 +1617,17 @@ void CGMSHLSLRuntime::AddHLSLFunctionInfo(Function *F, const FunctionDecl *FD) { funcProps->shaderKind = DXIL::ShaderKind::Hull; } - const ShaderModel *SM = m_pHLModule->GetShaderModel(); if (FD->hasAttr()) { if (SM->IsSM67Plus() && (funcProps->shaderKind == DXIL::ShaderKind::Pixel || (isEntry && SM->GetKind() == DXIL::ShaderKind::Pixel))) F->addFnAttr(DXIL::kWaveOpsIncludeHelperLanesString); } - if (isEntry) { - funcProps->shaderKind = SM->GetKind(); - if (funcProps->shaderKind == DXIL::ShaderKind::Mesh) { - isMS = true; - } - else if (funcProps->shaderKind == DXIL::ShaderKind::Amplification) { - isAS = true; - } - } // Geometry shader. if (const HLSLMaxVertexCountAttr *Attr = FD->getAttr()) { - isGS = true; - funcProps->shaderKind = DXIL::ShaderKind::Geometry; + CheckImpliedShaderStageAttr(Attr->getLocation(), DXIL::ShaderKind::Geometry); funcProps->ShaderProps.GS.maxVertexCount = Attr->getCount(); funcProps->ShaderProps.GS.inputPrimitive = DXIL::InputPrimitive::Undefined; @@ -1557,6 +1640,7 @@ void CGMSHLSLRuntime::AddHLSLFunctionInfo(Function *F, const FunctionDecl *FD) { } } if (const HLSLInstanceAttr *Attr = FD->getAttr()) { + CheckImpliedShaderStageAttr(Attr->getLocation(), DXIL::ShaderKind::Geometry); unsigned instanceCount = Attr->getCount(); funcProps->ShaderProps.GS.instanceCount = instanceCount; if (isEntry && !SM->IsGS()) { @@ -1572,23 +1656,24 @@ void CGMSHLSLRuntime::AddHLSLFunctionInfo(Function *F, const FunctionDecl *FD) { funcProps->ShaderProps.GS.instanceCount = 1; } - // Compute shader + // Populate numThreads if (const HLSLNumThreadsAttr *Attr = FD->getAttr()) { - if (isMS) { - funcProps->ShaderProps.MS.numThreads[0] = Attr->getX(); - funcProps->ShaderProps.MS.numThreads[1] = Attr->getY(); - funcProps->ShaderProps.MS.numThreads[2] = Attr->getZ(); - } else if (isAS) { - funcProps->ShaderProps.AS.numThreads[0] = Attr->getX(); - funcProps->ShaderProps.AS.numThreads[1] = Attr->getY(); - funcProps->ShaderProps.AS.numThreads[2] = Attr->getZ(); - } else { + if (!(isMS || isAS)) { + // Compute and/or node shader + if (isCS || funcProps->shaderKind == DXIL::ShaderKind::Invalid) + funcProps->shaderKind = DXIL::ShaderKind::Compute; isCS = true; - funcProps->shaderKind = DXIL::ShaderKind::Compute; + } + + funcProps->numThreads[0] = Attr->getX(); + funcProps->numThreads[1] = Attr->getY(); + funcProps->numThreads[2] = Attr->getZ(); - funcProps->ShaderProps.CS.numThreads[0] = Attr->getX(); - funcProps->ShaderProps.CS.numThreads[1] = Attr->getY(); - funcProps->ShaderProps.CS.numThreads[2] = Attr->getZ(); + if ((Attr->getX() * Attr->getY() * Attr->getZ()) > 1024) { + unsigned DiagID = Diags.getCustomDiagID( + DiagnosticsEngine::Error, + "Thread group size may not exceed 1024"); + Diags.Report(Attr->getLocation(), DiagID); } if (isEntry && !SM->IsCS() && !SM->IsMS() && !SM->IsAS()) { @@ -1610,8 +1695,7 @@ void CGMSHLSLRuntime::AddHLSLFunctionInfo(Function *F, const FunctionDecl *FD) { return; } - isHS = true; - funcProps->shaderKind = DXIL::ShaderKind::Hull; + CheckImpliedShaderStageAttr(Attr->getLocation(), DXIL::ShaderKind::Hull); HSEntryPatchConstantFuncAttr[F] = Attr; } else { // TODO: This is a duplicate check. We also have this check in @@ -1701,9 +1785,8 @@ void CGMSHLSLRuntime::AddHLSLFunctionInfo(Function *F, const FunctionDecl *FD) { return; } - isDS = !isHS; - if (isDS) - funcProps->shaderKind = DXIL::ShaderKind::Domain; + if (!isHS) + CheckImpliedShaderStageAttr(Attr->getLocation(), DXIL::ShaderKind::Domain); DXIL::TessellatorDomain domain = StringToDomain(Attr->getDomainType()); if (isHS) @@ -1721,10 +1804,9 @@ void CGMSHLSLRuntime::AddHLSLFunctionInfo(Function *F, const FunctionDecl *FD) { return; } - isVS = true; // The real job is done at EmitHLSLFunctionProlog where debug info is // available. Only set shader kind here. - funcProps->shaderKind = DXIL::ShaderKind::Vertex; + CheckImpliedShaderStageAttr(Attr->getLocation(), DXIL::ShaderKind::Vertex); } // Pixel shader. @@ -1738,9 +1820,8 @@ void CGMSHLSLRuntime::AddHLSLFunctionInfo(Function *F, const FunctionDecl *FD) { return; } - isPS = true; + CheckImpliedShaderStageAttr(Attr->getLocation(), DXIL::ShaderKind::Pixel); funcProps->ShaderProps.PS.EarlyDepthStencil = true; - funcProps->shaderKind = DXIL::ShaderKind::Pixel; } if (const HLSLWaveSizeAttr *Attr = FD->getAttr()) { @@ -1751,14 +1832,14 @@ void CGMSHLSLRuntime::AddHLSLFunctionInfo(Function *F, const FunctionDecl *FD) { Diags.Report(Attr->getLocation(), DiagID); return; } - if (!isCS) { + if (!isCS && !isNode) { unsigned DiagID = Diags.getCustomDiagID( DiagnosticsEngine::Error, "attribute WaveSize only valid for CS."); Diags.Report(Attr->getLocation(), DiagID); return; } - if (!isEntry) { + if (!isEntry && !isNode) { unsigned DiagID = Diags.getCustomDiagID( DiagnosticsEngine::Error, "attribute WaveSize only valid on entry point function."); @@ -1776,10 +1857,129 @@ void CGMSHLSLRuntime::AddHLSLFunctionInfo(Function *F, const FunctionDecl *FD) { funcProps->waveSize = Attr->getSize(); } - const unsigned profileAttributes = isCS + isHS + isDS + isGS + isVS + isPS + isRay + isMS + isAS; + // Node shader + if (isNode) { + // Default launch type is defined to be Broadcasting. + funcProps->Node.LaunchType = DXIL::NodeLaunchType::Broadcasting; + } + + // Assign function properties for all "node" attributes. + if (const auto *pAttr = FD->getAttr()) { + if (isNode) + funcProps->Node.LaunchType = StringToNodeLaunchType(pAttr->getLaunchType()); + else + ReportMissingNodeDiag(Diags, pAttr); + } + + if (const auto *pAttr = FD->getAttr()) { + if (isNode) + funcProps->Node.IsProgramEntry = true; + else + ReportMissingNodeDiag(Diags, pAttr); + } + + if (const auto *pAttr = + FD->getAttr()) { + if (isNode) { + funcProps->NodeShaderID.Name = pAttr->getName().str(); + funcProps->NodeShaderID.Index = pAttr->getArrayIndex(); + } + else { + ReportMissingNodeDiag(Diags, pAttr); + } + + } else { + if (isNode) { + funcProps->NodeShaderID.Name = FD->getName().str(); + funcProps->NodeShaderID.Index = 0; + } + } + if (const auto *pAttr = FD->getAttr()) { + if (isNode) + funcProps->Node.LocalRootArgumentsTableIndex = + pAttr->getIndex(); + else + ReportMissingNodeDiag(Diags, pAttr); + } + if (const auto *pAttr = FD->getAttr()) { + if (isNode) { + funcProps->NodeShaderSharedInput.Name = pAttr->getName().str(); + funcProps->NodeShaderSharedInput.Index = pAttr->getArrayIndex(); + } + else { + ReportMissingNodeDiag(Diags, pAttr); + } + } + if (const auto *pAttr = + FD->getAttr()) { + if (isNode) { + funcProps->Node.DispatchGrid[0] = pAttr->getX(); + funcProps->Node.DispatchGrid[1] = pAttr->getY(); + funcProps->Node.DispatchGrid[2] = pAttr->getZ(); + if (funcProps->Node.LaunchType != DXIL::NodeLaunchType::Broadcasting) { + unsigned DiagID = Diags.getCustomDiagID( + DiagnosticsEngine::Error, + "NodeDispatchGrid may only be used with Broadcasting nodes"); + Diags.Report(pAttr->getLocation(), DiagID); + } + } + else { + ReportMissingNodeDiag(Diags, pAttr); + } + } + if (const auto *pAttr = FD->getAttr()) { + if (isNode) { + funcProps->Node.MaxDispatchGrid[0] = pAttr->getX(); + funcProps->Node.MaxDispatchGrid[1] = pAttr->getY(); + funcProps->Node.MaxDispatchGrid[2] = pAttr->getZ(); + if (funcProps->Node.LaunchType != DXIL::NodeLaunchType::Broadcasting) { + unsigned DiagID = Diags.getCustomDiagID( + DiagnosticsEngine::Error, + "NodeMaxDispatchGrid may only be used with Broadcasting nodes"); + Diags.Report(pAttr->getLocation(), DiagID); + } + } + else { + ReportMissingNodeDiag(Diags, pAttr); + } + } + if (const auto *pAttr = FD->getAttr()) { + if (isNode) { + funcProps->Node.MaxRecursionDepth = pAttr->getCount(); + if (pAttr->getCount() > 32) { + unsigned DiagID = Diags.getCustomDiagID( + DiagnosticsEngine::Error, + "NodeMaxRecursionDepth may not exceed 32"); + Diags.Report(pAttr->getLocation(), DiagID); + } + } + else { + ReportMissingNodeDiag(Diags, pAttr); + } + } + if (!FD->getAttr()) { + if (isNode) { + // NumThreads wasn't specified. + // For a Thread launch node the default is (1,1,1,) which we set here. + funcProps->numThreads[0] = 1; + funcProps->numThreads[1] = 1; + funcProps->numThreads[2] = 1; + // Other node launch types require NumThreads to be specified. + if (funcProps->Node.LaunchType != DXIL::NodeLaunchType::Thread) { + unsigned DiagID = Diags.getCustomDiagID( + DiagnosticsEngine::Error, "NumThreads is required, but was not specified"); + Diags.Report(FD->getLocation(), DiagID); + } + } + } + + const unsigned profileAttributes = isCS + isHS + isDS + isGS + isVS + isPS + isRay + isMS + isAS + isNode; // TODO: check this in front-end and report error. - DXASSERT(profileAttributes < 2, "profile attributes are mutual exclusive"); + if (profileAttributes > 1 && profileAttributes != isNode + isCS) + Diags.Report(FD->getLocation(), Diags.getCustomDiagID( + DiagnosticsEngine::Error, + "Invalid shader stage attribute combination")); if (isEntry) { switch (funcProps->shaderKind) { @@ -1873,6 +2073,9 @@ void CGMSHLSLRuntime::AddHLSLFunctionInfo(Function *F, const FunctionDecl *FD) { bool hasOutPrimitives = false; bool hasInPayload = false; bool rayShaderHaveErrors = false; + unsigned int NodeInputParamIdx = 0; + unsigned int NodeOutputParamIdx = 0; + SmallMapVector outputDecls; for (; ArgNo < F->arg_size(); ++ArgNo, ++ParmIdx, ++ArgIt) { DxilParameterAnnotation ¶mAnnotation = FuncAnnotation->GetParameterAnnotation(ArgNo); @@ -2239,6 +2442,68 @@ void CGMSHLSLRuntime::AddHLSLFunctionInfo(Function *F, const FunctionDecl *FD) { } } } + // Parse the function arguments and fill out the node i/o properties + if (isNode) { + hlsl::NodeFlags nodeFlags; + if (GetHLSLNodeIORecordType(parmDecl, nodeFlags)) { + hlsl::NodeIOProperties node(nodeFlags); + + dxilInputQ = DxilParamInputQual::NodeIO; + // Add Node Record Type + AddHLSLNodeRecordTypeInfo(parmDecl, node); + if (nodeFlags.IsInputRecord()) { + // Add Node Shader parameter to a ValToProp map + // This will be used later to lower the Node parameters + // to handles + if (NodeInputParamIdx != 0) { + Diags.Report(parmDecl->getLocation(), Diags.getCustomDiagID(DiagnosticsEngine::Error, + "Node Shaders can have only zero or one Input Record parameter")); + } + NodeInputRecordParams[ArgIt].MetadataIdx = NodeInputParamIdx++; + + if (parmDecl->hasAttr()) { + node.MaxRecords = + parmDecl->getAttr()->getMaxCount(); + } + + NodeInputRecordParams[ArgIt].RecordInfo = node.GetNodeRecordInfo(); + funcProps->InputNodes.push_back(node); + } + else { + DXASSERT(node.Flags.IsOutputNode(), "Invalid NodeIO Kind"); + // Add Node Shader parameter to a ValToProp map + // This will be used later to lower the Node parameters + // to handles + NodeOutputParams[ArgIt].MetadataIdx = NodeOutputParamIdx++; + if (parmDecl->hasAttr()) + node.AllowSparseNodes = true; + + // OutputArraySize from declared arraysize + // FIXME: move to OutputNodeArray. + if (parmDecl->hasAttr < HLSLNodeArraySizeAttr>()) { + node.OutputArraySize = + parmDecl->getAttr()->getCount(); + } + else { + node.OutputArraySize = 0; + } + // OutputID from attribute + if (const auto *Attr = parmDecl->getAttr()) { + node.OutputID.Name = Attr->getName().str(); + node.OutputID.Index = Attr->getArrayIndex(); + } else { + node.OutputID.Name = parmDecl->getName().str(); + node.OutputID.Index = 0; + } + + // Insert output decls for cross referencing once all info is available + outputDecls.insert(std::make_pair(parmDecl->getName(), parmDecl)); + + NodeOutputParams[ArgIt].Info = node.GetNodeInfo(); + funcProps->OutputNodes.push_back(node); + } + } + } paramAnnotation.SetParamInputQual(dxilInputQ); if (isEntry) { @@ -2250,6 +2515,58 @@ void CGMSHLSLRuntime::AddHLSLFunctionInfo(Function *F, const FunctionDecl *FD) { } } + // If InputNodes is empty, add an implicit input. + // - If MaxDispatchGrid is specified, we need a default record type with a + // single uint3 field for SV_DispatchGrid. + // - Otherwise, we use EmptyNodeInput + if (funcProps->InputNodes.size() == 0) { + if (funcProps->Node.MaxDispatchGrid[0] > 0) { + hlsl::NodeIOProperties defaultInput(DXIL::NodeIOKind::DispatchNodeInputRecord); + defaultInput.RecordType.size = 12; + defaultInput.RecordType.SV_DispatchGrid.ByteOffset = 0; + defaultInput.RecordType.SV_DispatchGrid.ComponentType = DXIL::ComponentType::U32; + defaultInput.RecordType.SV_DispatchGrid.NumComponents = 3; + funcProps->InputNodes.push_back(defaultInput); + } else { + hlsl::NodeIOProperties emptyInput(DXIL::NodeIOKind::EmptyInput); + funcProps->InputNodes.push_back(emptyInput); + } + } + + // All output decls and param names are available and errors can be generated + // and parameter output array indices that correspond to param names can be added to the properties + auto outIt = outputDecls.begin(); + for (unsigned outputNo = 0; outputNo < funcProps->OutputNodes.size(); outputNo++ ) { + const ParmVarDecl *parmDecl = outIt->second; + outIt++; + hlsl::NodeIOProperties &node = funcProps->OutputNodes[outputNo]; + if (const auto *Attr = parmDecl->getAttr()) { + // Find matching argument name if present + StringRef sharedName = Attr->getName()->getName(); + + auto snIt = outputDecls.find(sharedName); + int ix = snIt - outputDecls.begin(); + if (snIt == outputDecls.end()) { + Diags.Report(parmDecl->getLocation(), Diags.getCustomDiagID( + DiagnosticsEngine::Error, + "MaxRecordsSharedWith must reference a valid ouput parameter name.")); + } else if (ix == (int)outputNo) { + Diags.Report(parmDecl->getLocation(), Diags.getCustomDiagID( + DiagnosticsEngine::Error, + "MaxRecordsSharedWith must not reference the same parameter it is applied to.")); + } + node.MaxRecordsSharedWith = ix; + } + if (const auto *Attr = parmDecl->getAttr()) { + if (node.MaxRecordsSharedWith >= 0) { + Diags.Report(parmDecl->getLocation(), Diags.getCustomDiagID( + DiagnosticsEngine::Error, + "Only one of MaxRecords or MaxRecordsSharedWith may be specified to the same parameter.")); + } + node.MaxRecords = Attr->getMaxCount(); + } + } + if (inputPatchCount > 1) { unsigned DiagID = Diags.getCustomDiagID( DiagnosticsEngine::Error, "may only have one InputPatch parameter"); @@ -2322,7 +2639,7 @@ void CGMSHLSLRuntime::AddHLSLFunctionInfo(Function *F, const FunctionDecl *FD) { } // clear isExportedEntry if not exporting entry - bool isExportedEntry = profileAttributes != 0; + bool isExportedEntry = SM->IsLib() && profileAttributes != 0; if (isExportedEntry) { // use unmangled or mangled name depending on which is used for final entry function StringRef name = isRay ? F->getName() : FD->getName(); @@ -2365,6 +2682,101 @@ void CGMSHLSLRuntime::AddHLSLFunctionInfo(Function *F, const FunctionDecl *FD) { m_ScopeMap[F] = ScopeInfo(F, FD->getLocation()); } +void CGMSHLSLRuntime::AddHLSLNodeRecordTypeInfo(const clang::ParmVarDecl* parmDecl, hlsl::NodeIOProperties& node) { + clang::QualType paramTy = parmDecl->getType().getCanonicalType(); + + if (auto arrayType = dyn_cast(paramTy)) { + paramTy = arrayType->getElementType(); + } + if (const RecordType *RT = dyn_cast(paramTy)) { + // Node I/O records are templateTypes + if (const ClassTemplateSpecializationDecl *templateDecl = + dyn_cast(RT->getDecl())) { + auto& TemplateArgs = templateDecl->getTemplateArgs(); + + if (!node.Flags.IsEmpty()) { + DiagnosticsEngine& Diags = CGM.getDiags(); + auto& Rec = TemplateArgs.get(0); + clang::QualType RecType = Rec.getAsType(); + llvm::Type *Type = CGM.getTypes().ConvertType(RecType); + const RecordType *recordtype = RecType->getAsStructureType(); + RecordDecl *RD = recordtype->getDecl(); + + // Get the TrackRWInputSharing flag from the record attribute + if (RD->hasAttr()) { + if (node.Flags.IsInputRecord() && node.Flags.GetNodeIOKind() != hlsl::DXIL::NodeIOKind::RWDispatchNodeInputRecord) { + Diags.Report(parmDecl->getLocation(), Diags.getCustomDiagID( + DiagnosticsEngine::Error, + "NodeTrackRWInputSharing attribute cannot be applied to Input Records that are not RWDispatchNodeInputRecord")); + } + node.Flags.SetTrackRWInputSharing(); + } + + // Ex: For DispatchNodeInputRecord, set size = size(MY_RECORD) + node.RecordType.size = CGM.getDataLayout().getTypeAllocSize(Type); + if (node.RecordType.size == 0) { + // a node input/output record can't have a size of zero + DiagnosticsEngine &Diags = CGM.getDiags(); + unsigned DiagID = Diags.getCustomDiagID(DiagnosticsEngine::Error, "record used in %0 may not have zero size"); + Diags.Report(parmDecl->getSourceRange().getBegin(), DiagID) << templateDecl->getName() << parmDecl->getSourceRange(); + Diags.Report(RD->getLocation(), diag::note_defined_here) << "zero sized record"; + } + // If we find SV_DispatchGrid we'll remember the location for diagnostics + SourceLocation SV_DispatchGridLoc; + // Iterate over fields of the MY_RECORD(example) struct + for (auto fieldDecl : RD->fields()) { + //Check if any of the fields have a semantic annotation = SV_DispatchGrid + for (const hlsl::UnusualAnnotation *it : fieldDecl->getUnusualAnnotations()) { + if (it->getKind() == hlsl::UnusualAnnotation::UA_SemanticDecl) { + const hlsl::SemanticDecl *sd = cast(it); + // if we find a field with SV_DispatchGrid, fill out the SV_DispatchGrid + // member with byteoffset of the field, NumComponents (3 for uint3 etc) + // and U32 vs U16 types, which are the only types allowed + if (sd->SemanticName.equals("SV_DispatchGrid")) { + clang::QualType FT = fieldDecl->getType(); + auto &DL = CGM.getDataLayout(); + auto &SDGRec = node.RecordType.SV_DispatchGrid; + if (SDGRec.NumComponents != 0) { + DiagnosticsEngine &Diags = CGM.getDiags(); + unsigned DiagID = Diags.getCustomDiagID(DiagnosticsEngine::Error, "a field with SV_DispatchGrid has already been specified"); + Diags.Report(it->Loc, DiagID); + Diags.Report(SV_DispatchGridLoc, diag::note_defined_here) << "previously"; + } else { + // Set SV_DispatchGridLoc for use in diagnostics ; + SV_DispatchGridLoc = it->Loc; + } + unsigned fieldIdx = fieldDecl->getFieldIndex(); + if (StructType* ST = dyn_cast(Type)) { + SDGRec.ByteOffset = DL.getStructLayout(ST)->getElementOffset(fieldIdx); + } + const llvm::Type *lTy = CGM.getTypes().ConvertType(FT); + if (const llvm::VectorType *VT = dyn_cast(lTy)) { + DXASSERT(VT->getElementType()->isIntegerTy(), "invalid type"); + SDGRec.NumComponents = VT->getNumElements(); + SDGRec.ComponentType = (VT->getElementType()->getIntegerBitWidth() == 16) ? DXIL::ComponentType::U16 : + DXIL::ComponentType::U32; + } else if (const llvm::ArrayType *AT = dyn_cast(lTy)) { + DXASSERT(AT->getElementType()->isIntegerTy(), "invalid type"); + DXASSERT_NOMSG(AT->getNumElements() <= 3); + SDGRec.NumComponents = AT->getNumElements(); + SDGRec.ComponentType = (AT->getElementType()->getIntegerBitWidth() == 16) ? DXIL::ComponentType::U16 : + DXIL::ComponentType::U32; + } else { + // Scalar U16 or U32 + DXASSERT(lTy->isIntegerTy(), "invalid type"); + SDGRec.NumComponents = 1; + SDGRec.ComponentType = (lTy->getIntegerBitWidth() == 16) ? DXIL::ComponentType::U16 : + DXIL::ComponentType::U32; + } + } + } + } + } + } + } + } +} + void CGMSHLSLRuntime::RemapObsoleteSemantic(DxilParameterAnnotation ¶mInfo, bool isPatchConstantFunction) { DXASSERT(CGM.getLangOpts().EnableDX9CompatMode, "should be used only in back-compat mode"); @@ -3575,6 +3987,10 @@ void CGMSHLSLRuntime::FinishCodeGen() { // Translate calls to RayQuery constructor into hl Allocate calls TranslateRayQueryConstructor(HLM); + //Lower Node Input and Output Parameters to Node Handles + TranslateInputNodeRecordArgToHandle(HLM, NodeInputRecordParams); + TranslateNodeOutputParamToHandle(HLM, NodeOutputParams); + bool bIsLib = HLM.GetShaderModel()->IsLib(); StringRef GlobalCtorName = "llvm.global_ctors"; llvm::SmallVector Ctors; @@ -5942,6 +6358,11 @@ void CGMSHLSLRuntime::EmitHLSLOutParamConversionInit( if (Ptr) { if (isa(Ptr) && 0 == ArgVals.count(Ptr)) SafeToSkip = true; + // Safe to skip if groupshared ptr passed to groupshared parameter. + else if (Ptr->getType()->getPointerAddressSpace() == + DXIL::kTGSMAddrSpace && + ParamTy.getAddressSpace() == DXIL::kTGSMAddrSpace) + SafeToSkip = true; else if (const auto *A = dyn_cast(Ptr)) SafeToSkip = A->hasNoAliasAttr() && 0 == ArgVals.count(Ptr); } diff --git a/tools/clang/lib/CodeGen/CGHLSLMSFinishCodeGen.cpp b/tools/clang/lib/CodeGen/CGHLSLMSFinishCodeGen.cpp index 5634b74924..ff2ed422d9 100644 --- a/tools/clang/lib/CodeGen/CGHLSLMSFinishCodeGen.cpp +++ b/tools/clang/lib/CodeGen/CGHLSLMSFinishCodeGen.cpp @@ -44,6 +44,7 @@ #include "dxc/HLSL/HLModule.h" #include "dxc/HLSL/HLSLExtensionsCodegenHelper.h" #include "dxc/HlslIntrinsicOp.h" +#include "dxc/DXIL/DxilWaveMatrix.h" #include #include @@ -88,6 +89,71 @@ Value *CreateHandleFromResPtr(Value *ResPtr, HLModule &HLM, return Handle; } +Value *CreateNodeOutputHandle(HLModule &HLM, llvm::Type *HandleTy, + IRBuilder<> &Builder, unsigned index) { + Module &M = *HLM.GetModule(); + HLOpcodeGroup opCodeGroup = HLOpcodeGroup::HLCreateNodeOutputHandle; + unsigned opCode = + static_cast(HLOpcodeGroup::HLCreateNodeOutputHandle); + Value *mdIndex = Builder.getInt32(index); + Value *args[] = {mdIndex}; + CallInst *Handle = + HLM.EmitHLOperationCall(Builder, opCodeGroup, opCode, HandleTy, args, M); + return Handle; +} + +Value *CreateAnnotateNodeHandle(HLModule &HLM, Value *NodeHandle, + IRBuilder<> &Builder, NodeInfo Info) { + llvm::Type *NodeHandleTy = HLM.GetOP()->GetNodeHandleType(); + llvm::Type *NodeInfoTy = HLM.GetOP()->GetNodePropertiesType(); + Module &M = *HLM.GetModule(); + StructType *ST = cast(NodeInfoTy); + Constant *NodeProperties[] = { + ConstantInt::get(ST->getElementType(0), Info.IOFlags), + ConstantInt::get(ST->getElementType(1), Info.RecordSize)}; + Constant *NodeInfo = ConstantStruct::get(ST, NodeProperties); + Value *args[] = {NodeHandle, NodeInfo}; + Value *Handle = HLM.EmitHLOperationCall( + Builder, HLOpcodeGroup::HLAnnotateNodeHandle, + static_cast(HLOpcodeGroup::HLAnnotateNodeHandle), NodeHandleTy, + args, M); + return Handle; +} + +Value *CreateNodeInputRecordHandle(Value *NodeArg, HLModule &HLM, + llvm::Type *HandleTy, IRBuilder<> &Builder, + unsigned index) { + Module &M = *HLM.GetModule(); + HLOpcodeGroup opCodeGroup = HLOpcodeGroup::HLCreateNodeInputRecordHandle; + auto opCode = + static_cast(HLOpcodeGroup::HLCreateNodeInputRecordHandle); + DXASSERT_NOMSG(index == 0); + Value *mdIndex = Builder.getInt32(index); + Value *args[] = {mdIndex}; + CallInst *Handle = + HLM.EmitHLOperationCall(Builder, opCodeGroup, opCode, HandleTy, args, M); + return Handle; +} + +Value *CreateAnnotateNodeRecordHandle(HLModule &HLM, Value *NodeRecordHandle, + IRBuilder<> &Builder, + NodeRecordInfo Info) { + llvm::Type *NodeRecordHandleTy = HLM.GetOP()->GetNodeRecordHandleType(); + llvm::Type *NodeRecordInfoTy = HLM.GetOP()->GetNodeRecordPropertiesType(); + Module &M = *HLM.GetModule(); + StructType *ST = cast(NodeRecordInfoTy); + Constant *NodeRecordProperties[] = { + ConstantInt::get(ST->getElementType(0), Info.IOFlags), + ConstantInt::get(ST->getElementType(1), Info.RecordSize)}; + Constant *NodeRecordInfo = ConstantStruct::get(ST, NodeRecordProperties); + Value *args[] = {NodeRecordHandle, NodeRecordInfo}; + Value *Handle = HLM.EmitHLOperationCall( + Builder, HLOpcodeGroup::HLAnnotateNodeRecordHandle, + static_cast(HLOpcodeGroup::HLAnnotateNodeRecordHandle), + NodeRecordHandleTy, args, M); + return Handle; +} + template Value *CreateAnnotateHandle(HLModule &HLM, Value *Handle, DxilResourceProperties &RP, llvm::Type *ResTy, @@ -110,6 +176,18 @@ Value *CastHandleToRes(HLModule &HLM, Value *Handle, llvm::Type *ResTy, return Res; } +CallInst *CreateAnnotateWaveMatrix(HLModule &HLM, Value *WaveMatrixPtr, + DxilWaveMatrixProperties &WMP, + IRBuilder<> &Builder) { + Constant *WMPConstant = wavemat_helper::GetAsConstant( + WMP, HLM.GetOP()->GetWaveMatrixPropertiesType()); + CallInst *CI = HLM.EmitHLOperationCall( + Builder, HLOpcodeGroup::HLWaveMatrix_Annotate, + (unsigned)HLOpcodeGroup::HLWaveMatrix_Annotate, WaveMatrixPtr->getType(), + {WaveMatrixPtr, WMPConstant}, *HLM.GetModule()); + return CI; +} + // Lower CBV bitcast use to handle use. // Leave the load/store. void LowerDynamicCBVUseToHandle( @@ -659,50 +737,132 @@ DxilResourceProperties GetResourcePropsFromIntrinsicObjectArg( return RP; } -void AddOpcodeParamForIntrinsic( - HLModule &HLM, Function *F, unsigned opcode, llvm::Type *HandleTy, - DxilObjectProperties &objectProperties) { +void AddAnnotateWaveMatrix(HLModule &HLM, + DxilObjectProperties &objectProperties) { + for (auto it : objectProperties.waveMatMap) { + Value *V = it.first; + DxilWaveMatrixProperties &WMP = it.second; + // annotate Alloca, Param, or Global + if (AllocaInst *AI = dyn_cast(V)) { + // Insert annotation after alloca + IRBuilder<> Builder(AI->getNextNode()); + CreateAnnotateWaveMatrix(HLM, V, WMP, Builder); + } else if (GlobalVariable *GV = dyn_cast(V)) { + // Insert annotation in each function's entry block with users + SmallSetVector functions; + for (auto U : GV->users()) + if (Instruction *I = dyn_cast(U)) + functions.insert(I->getParent()->getParent()); + + for (auto F : functions) { + IRBuilder<> Builder(dxilutil::FindAllocaInsertionPt(F)); + CreateAnnotateWaveMatrix(HLM, V, WMP, Builder); + } + } else if (Argument *Arg = dyn_cast(V)) { + IRBuilder<> Builder(dxilutil::FindAllocaInsertionPt(Arg->getParent())); + CreateAnnotateWaveMatrix(HLM, V, WMP, Builder); + } else { + DXASSERT(false, "WaveMatrix value is unexpected type"); + } + } +} + +void AddOpcodeParamForIntrinsic(HLModule &HLM, Function *F, unsigned opcode, + DxilObjectProperties &objectProperties) { llvm::Module &M = *HLM.GetModule(); llvm::FunctionType *oldFuncTy = F->getFunctionType(); + llvm::Type *HandleTy = HLM.GetOP()->GetHandleType(); + llvm::Type *NodeRecordHandleTy = HLM.GetOP()->GetNodeRecordHandleType(); + llvm::Type *NodeOutputHandleTy = HLM.GetOP()->GetNodeHandleType(); + + HLOpcodeGroup group = hlsl::GetHLOpcodeGroup(F); SmallVector paramTyList; // Add the opcode param llvm::Type *opcodeTy = llvm::Type::getInt32Ty(M.getContext()); paramTyList.emplace_back(opcodeTy); - bool bRetHandle = false; + // Create a vector of the types of the original Intrinsic's + // parameters. In the case of an intrinsic returning a struct + bool bSRetHandle = false; + bool bNodeRecordRet = false; + bool bNodeOutputRet = false; + bool bNodeOutputMethod = false; + bool bNodeOutputArrayMethod = false; + unsigned bRetArgIndex = 0; for (unsigned i = 0; i < oldFuncTy->getNumParams(); i++) { llvm::Type *Ty = oldFuncTy->getParamType(i); + + if (i == 0 && Ty->isPointerTy() + && dxilutil::IsHLSLNodeOutputType(Ty->getPointerElementType())) + bNodeOutputMethod = true; + + if (i == 0 && Ty->isPointerTy() + && dxilutil::IsHLSLNodeOutputArrayType(Ty->getPointerElementType())) + bNodeOutputArrayMethod = true; + + if (i == 1 && bNodeOutputMethod && Ty->isPointerTy() + && dxilutil::IsHLSLNodeOutputRecordType(Ty->getPointerElementType())) { + // Skip for return type from a method. + // NodeOutput::GetGroupNodeOutputRecords + // NodeOutput::GetThreadNodeOutputRecords + bSRetHandle = true; + bNodeRecordRet = true; + bRetArgIndex = 1; + continue; + } + + if (i == 1 && bNodeOutputArrayMethod && Ty->isPointerTy() + && dxilutil::IsHLSLNodeOutputType(Ty->getPointerElementType())) { + // Skip for return type from a method + bSRetHandle = true; + bNodeOutputRet = true; + bRetArgIndex = 1; + continue; + } + if (Ty->isPointerTy()) { llvm::Type *PtrEltTy = Ty->getPointerElementType(); - if (dxilutil::IsHLSLResourceType(PtrEltTy)) { + + if (dxilutil::IsHLSLResourceType(PtrEltTy) || + dxilutil::IsHLSLNodeRecordType(PtrEltTy) || + dxilutil::IsHLSLNodeOutputType(PtrEltTy) || + dxilutil::IsHLSLNodeOutputArrayType(PtrEltTy)) { // Skip for return type. if (i == 0 && F->arg_begin()->hasStructRetAttr()) { - bRetHandle = true; + bSRetHandle = true; + if (dxilutil::IsHLSLNodeRecordType(PtrEltTy)) { + bNodeRecordRet = true; + } continue; } - // Use handle type for resource type. + // Use handle type for resource, Node(Input/Output) Record type. // This will make sure temp object variable only used by createHandle. - Ty = HandleTy; + if (dxilutil::IsHLSLResourceType(PtrEltTy)) + Ty = HandleTy; + else if (dxilutil::IsHLSLNodeRecordType(PtrEltTy)) + Ty = NodeRecordHandleTy; + else if (dxilutil::IsHLSLNodeOutputType(PtrEltTy)) + Ty = NodeOutputHandleTy; + else if (dxilutil::IsHLSLNodeOutputArrayType(PtrEltTy)) + Ty = NodeOutputHandleTy; } } paramTyList.emplace_back(Ty); } - HLOpcodeGroup group = hlsl::GetHLOpcodeGroup(F); - if (group == HLOpcodeGroup::HLSubscript && opcode == static_cast(HLSubscriptOpcode::VectorSubscript)) { llvm::FunctionType *FT = F->getFunctionType(); llvm::Type *VecArgTy = FT->getParamType(0); llvm::VectorType *VType = - cast(VecArgTy->getPointerElementType()); + cast(VecArgTy->getPointerElementType()); llvm::Type *Ty = VType->getElementType(); DXASSERT(Ty->isIntegerTy(), "Only bool could use VectorSubscript"); llvm::IntegerType *ITy = cast(Ty); DXASSERT_LOCALVAR(ITy, ITy->getBitWidth() == 1, - "Only bool could use VectorSubscript"); + "Only bool could use VectorSubscript"); // The return type is i8*. // Replace all uses with i1*. @@ -711,14 +871,19 @@ void AddOpcodeParamForIntrinsic( } bool isDoubleSubscriptFunc = - group == HLOpcodeGroup::HLSubscript && - opcode == static_cast(HLSubscriptOpcode::DoubleSubscript); + group == HLOpcodeGroup::HLSubscript && + opcode == static_cast(HLSubscriptOpcode::DoubleSubscript); llvm::Type *RetTy = oldFuncTy->getReturnType(); - if (bRetHandle) { + if (bSRetHandle) { DXASSERT(RetTy->isVoidTy(), "else invalid return type"); RetTy = HandleTy; + if (bNodeRecordRet) + RetTy = NodeRecordHandleTy; + else if (bNodeOutputRet) + RetTy = NodeOutputHandleTy; } + if (isDoubleSubscriptFunc) { CallInst *doubleSub = cast(*F->user_begin()); @@ -729,7 +894,7 @@ void AddOpcodeParamForIntrinsic( unsigned coordIdx = HLOperandIndex::kSubscriptIndexOpIdx; // opcode operand not add yet, so the index need -1. if (GetHLOpcodeGroupByName(secSub->getCalledFunction()) == - HLOpcodeGroup::NotHL) + HLOpcodeGroup::NotHL) coordIdx -= 1; Value *coord = secSub->getArgOperand(coordIdx); @@ -741,7 +906,7 @@ void AddOpcodeParamForIntrinsic( // Change return type to be resource ret type. // opcode operand not add yet, so the index need -1. Value *objPtr = - doubleSub->getArgOperand(HLOperandIndex::kSubscriptObjectOpIdx - 1); + doubleSub->getArgOperand(HLOperandIndex::kSubscriptObjectOpIdx - 1); // Must be a GEP GEPOperator *objGEP = cast(objPtr); gep_type_iterator GEPIt = gep_type_begin(objGEP), E = gep_type_end(objGEP); @@ -762,7 +927,7 @@ void AddOpcodeParamForIntrinsic( } llvm::FunctionType *funcTy = - llvm::FunctionType::get(RetTy, paramTyList, oldFuncTy->isVarArg()); + llvm::FunctionType::get(RetTy, paramTyList, oldFuncTy->isVarArg()); Function *opFunc = CreateOpFunction(M, F, funcTy, group, opcode); StringRef lower = hlsl::GetHLLowerStrategy(F); @@ -779,14 +944,22 @@ void AddOpcodeParamForIntrinsic( Value *opcodeConst = Constant::getIntegerValue(opcodeTy, APInt(32, opcode)); opcodeParamList.emplace_back(opcodeConst); Value *retHandleArg = nullptr; - if (!bRetHandle) { + Value *nodeRecordArg = nullptr; + if (!bSRetHandle) { opcodeParamList.append(oldCI->arg_operands().begin(), - oldCI->arg_operands().end()); + oldCI->arg_operands().end()); } else { auto it = oldCI->arg_operands().begin(); + unsigned argIndex = 0; + auto start = it; + while (argIndex < bRetArgIndex) { + it++; + argIndex++; + } + opcodeParamList.append(start, it); retHandleArg = *(it++); opcodeParamList.append(it, - oldCI->arg_operands().end()); + oldCI->arg_operands().end()); } IRBuilder<> Builder(oldCI); @@ -799,13 +972,13 @@ void AddOpcodeParamForIntrinsic( Value *lastIndex = IndexList.back(); ConstantInt *constIndex = cast(lastIndex); DXASSERT_LOCALVAR(constIndex, constIndex->getLimitedValue() == 1, - "last index must 1"); + "last index must 1"); // Remove the last index. IndexList.pop_back(); objVal = objGEP->getPointerOperand(); DxilResourceProperties RP = GetResourcePropsFromIntrinsicObjectArg( - objVal, HLM, typeSys, objectProperties); + objVal, HLM, typeSys, objectProperties); if (IndexList.size() > 1) objVal = Builder.CreateInBoundsGEP(objVal, IndexList); @@ -824,7 +997,7 @@ void AddOpcodeParamForIntrinsic( CallInst *secSub = cast(user); unsigned idxOpIndex = HLOperandIndex::kSubscriptIndexOpIdx; if (GetHLOpcodeGroupByName(secSub->getCalledFunction()) == - HLOpcodeGroup::NotHL) + HLOpcodeGroup::NotHL) idxOpIndex--; Value *idx = secSub->getArgOperand(idxOpIndex); @@ -836,7 +1009,7 @@ void AddOpcodeParamForIntrinsic( // Insert new call before secSub to make sure idx is ready to use. Builder.SetInsertPoint(secSub); } - + unsigned recordSizeWAR = 0; for (unsigned i = 1; i < opcodeParamList.size(); i++) { Value *arg = opcodeParamList[i]; llvm::Type *Ty = arg->getType(); @@ -844,15 +1017,15 @@ void AddOpcodeParamForIntrinsic( Ty = Ty->getPointerElementType(); if (dxilutil::IsHLSLResourceType(Ty)) { DxilResourceProperties RP = GetResourcePropsFromIntrinsicObjectArg( - arg, HLM, typeSys, objectProperties); + arg, HLM, typeSys, objectProperties); // Use object type directly, not by pointer. // This will make sure temp object variable only used by ld/st. if (GEPOperator *argGEP = dyn_cast(arg)) { std::vector idxList(argGEP->idx_begin(), - argGEP->idx_end()); + argGEP->idx_end()); // Create instruction to avoid GEPOperator. GetElementPtrInst *GEP = GetElementPtrInst::CreateInBounds( - argGEP->getPointerOperand(), idxList); + argGEP->getPointerOperand(), idxList); Builder.Insert(GEP); arg = GEP; } @@ -863,16 +1036,111 @@ void AddOpcodeParamForIntrinsic( Handle = CreateAnnotateHandle(HLM, Handle, RP, ResTy, Builder); opcodeParamList[i] = Handle; } + if (dxilutil::IsHLSLNodeRecordType(Ty)) { + nodeRecordArg = arg; + Value *ldObj = Builder.CreateLoad(arg); + Value *Handle = EmitHLOperationCall( + HLM, Builder, HLOpcodeGroup::HLCast, + (unsigned)HLCastOpcode::NodeRecordToHandleCast, + NodeRecordHandleTy, {ldObj}, *HLM.GetModule()); + opcodeParamList[i] = Handle; + } + if (dxilutil::IsHLSLNodeOutputArrayType(Ty)) { + Value *ldObj = Builder.CreateLoad(arg); + Value *Handle = EmitHLOperationCall( + HLM, Builder, HLOpcodeGroup::HLCast, + (unsigned)HLCastOpcode::NodeOutputToHandleCast, + NodeOutputHandleTy, {ldObj}, *HLM.GetModule()); + opcodeParamList[i] = Handle; + // WAR for record size computation + if (!dxilutil::IsHLSLEmptyNodeOutputArrayType(Ty)) { + DxilStructAnnotation *pAnno = + HLM.GetTypeSystem().GetStructAnnotation( + dyn_cast(Ty)); + assert(pAnno != nullptr && pAnno->GetNumTemplateArgs() == 1 && + "otherwise the node template is not declared properly"); + llvm::Type *pRecType = const_cast( + pAnno->GetTemplateArgAnnotation(0).GetType()); + recordSizeWAR = M.getDataLayout().getTypeAllocSize(pRecType); + } + } + if (dxilutil::IsHLSLNodeOutputType(Ty)) { + Value *ldObj = Builder.CreateLoad(arg); + Value *Handle = EmitHLOperationCall( + HLM, Builder, HLOpcodeGroup::HLCast, + (unsigned)HLCastOpcode::NodeOutputToHandleCast, + NodeOutputHandleTy, {ldObj}, *HLM.GetModule()); + opcodeParamList[i] = Handle; + } } } Value *CI = Builder.CreateCall(opFunc, opcodeParamList); + if (group == HLOpcodeGroup::HLIntrinsic && + opcode == static_cast(IntrinsicOp::MOP_OutputComplete)) { + DXASSERT_NOMSG(nodeRecordArg->getType()->isPointerTy()); + Type *T = nodeRecordArg->getType()->getPointerElementType(); + Builder.CreateStore(Constant::getNullValue(T), nodeRecordArg); + } if (retHandleArg) { Type *ResTy = retHandleArg->getType()->getPointerElementType(); - Value *Res = HLM.EmitHLOperationCall( - Builder, HLOpcodeGroup::HLCast, - (unsigned)HLCastOpcode::HandleToResCast, ResTy, {CI}, M); - Builder.CreateStore(Res, retHandleArg); + if (dxilutil::IsHLSLNodeOutputRecordType(ResTy)) { + CallInst *GetOpRecordCall = cast(CI); + DXASSERT_NOMSG(group == HLOpcodeGroup::HLIntrinsic); + IntrinsicOp opcode = + static_cast(hlsl::GetHLOpcode(GetOpRecordCall)); + DXIL::NodeIOKind kind; + if (opcode == IntrinsicOp::MOP_GetGroupNodeOutputRecords) + kind = DXIL::NodeIOKind::GroupNodeOutputRecords; + else + kind = DXIL::NodeIOKind::ThreadNodeOutputRecords; + + // Get record size from the node output record template argument + + DxilStructAnnotation *pAnno = HLM.GetTypeSystem().GetStructAnnotation( + dyn_cast(ResTy)); + assert(pAnno != nullptr && pAnno->GetNumTemplateArgs() == 1 && + "otherwise the node record template is not declared properly"); + llvm::Type *pRecType = const_cast( + pAnno->GetTemplateArgAnnotation(0).GetType()); + unsigned recordSize = M.getDataLayout().getTypeAllocSize(pRecType); + NodeRecordInfo Info(kind, recordSize); + + CI = CreateAnnotateNodeRecordHandle(HLM, CI, Builder, Info); + Value *Res = HLM.EmitHLOperationCall( + Builder, HLOpcodeGroup::HLCast, + (unsigned)HLCastOpcode::HandleToNodeRecordCast, ResTy, {CI}, M); + Builder.CreateStore(Res, retHandleArg); + } else if (dxilutil::IsHLSLNodeOutputType(ResTy)) { + DXASSERT_NOMSG(group == HLOpcodeGroup::HLIndexNodeHandle); + DXIL::NodeIOKind kind = DXIL::NodeIOKind::NodeOutputArray; + unsigned recordSize; + if (dxilutil::IsHLSLEmptyNodeOutputType(ResTy)) { + kind = DXIL::NodeIOKind::EmptyOutputArray; + recordSize = 0; + } else { + // TODO FIX AddStructAnnotation + /*DxilStructAnnotation* pAnno = + HLM.GetTypeSystem().GetStructAnnotation(dyn_cast(ResTy)); + assert(pAnno != nullptr && pAnno->GetNumTemplateArgs() == 1 && + "otherwise the node template is not declared properly"); llvm::Type* + pRecType = (llvm::Type*)pAnno->GetTemplateArgAnnotation(0).GetType(); + recordSize = M.getDataLayout().getTypeAllocSize(pRecType);*/ + recordSize = recordSizeWAR; + } + NodeInfo Info(kind, recordSize); + + CI = CreateAnnotateNodeHandle(HLM, CI, Builder, Info); + Value *Res = HLM.EmitHLOperationCall( + Builder, HLOpcodeGroup::HLCast, + (unsigned)HLCastOpcode::HandleToNodeOutputCast, ResTy, {CI}, M); + Builder.CreateStore(Res, retHandleArg); + } else { + Value *Res = HLM.EmitHLOperationCall( + Builder, HLOpcodeGroup::HLCast, + (unsigned)HLCastOpcode::HandleToResCast, ResTy, {CI}, M); + Builder.CreateStore(Res, retHandleArg); + } oldCI->eraseFromParent(); continue; } @@ -898,7 +1166,7 @@ void AddOpcodeParamForIntrinsic( void AddOpcodeParamForIntrinsics( HLModule &HLM, std::vector> &intrinsicMap, DxilObjectProperties &objectProperties) { - llvm::Type *HandleTy = HLM.GetOP()->GetHandleType(); + for (auto mapIter : intrinsicMap) { Function *F = mapIter.first; if (F->user_empty()) { @@ -908,7 +1176,7 @@ void AddOpcodeParamForIntrinsics( } unsigned opcode = mapIter.second; - AddOpcodeParamForIntrinsic(HLM, F, opcode, HandleTy, objectProperties); + AddOpcodeParamForIntrinsic(HLM, F, opcode, objectProperties); } } @@ -2639,6 +2907,59 @@ void TranslateRayQueryConstructor(HLModule &HLM) { pConstructorFunc->eraseFromParent(); } } + +void TranslateInputNodeRecordArgToHandle( + hlsl::HLModule &HLM, + llvm::MapVector &NodeParams) { + + llvm::Module &Module = *HLM.GetModule(); + Type *HandleTy = HLM.GetOP()->GetNodeRecordHandleType(); + + for (auto it : NodeParams) { + NodeInputRecordProps Props = it.second; + Argument *NodeParam = it.first; + if (NodeParam->user_empty()) + continue; + + IRBuilder<> Builder( + NodeParam->getParent()->getEntryBlock().getFirstInsertionPt()); + Value *Handle = CreateNodeInputRecordHandle(NodeParam, HLM, HandleTy, + Builder, Props.MetadataIdx); + Handle = + CreateAnnotateNodeRecordHandle(HLM, Handle, Builder, Props.RecordInfo); + llvm::Type *RetTy = NodeParam->getType()->getPointerElementType(); + Value *Res = + HLM.EmitHLOperationCall(Builder, HLOpcodeGroup::HLCast, + (unsigned)HLCastOpcode::HandleToNodeRecordCast, + RetTy, {Handle}, Module); + Builder.CreateStore(Res, NodeParam); + } +} + +void TranslateNodeOutputParamToHandle( + hlsl::HLModule &HLM, + llvm::MapVector &NodeParams) { + Type *HandleTy = HLM.GetOP()->GetNodeHandleType(); + + for (auto it : NodeParams) { + NodeProps Props = it.second; + Argument *NodeParam = it.first; + if (NodeParam->user_empty()) + continue; + + IRBuilder<> Builder( + NodeParam->getParent()->getEntryBlock().getFirstInsertionPt()); + Value *Handle = + CreateNodeOutputHandle(HLM, HandleTy, Builder, Props.MetadataIdx); + Handle = CreateAnnotateNodeHandle(HLM, Handle, Builder, Props.Info); + Value *Res = + HLM.EmitHLOperationCall(Builder, HLOpcodeGroup::HLCast, + (unsigned)HLCastOpcode::HandleToNodeOutputCast, + NodeParam->getType()->getPointerElementType(), + {Handle}, *HLM.GetModule()); + Builder.CreateStore(Res, NodeParam); + } +} } // namespace CGHLSLMSHelper namespace { @@ -3233,6 +3554,10 @@ void FinishIntrinsics( LowerGetResourceFromHeap(HLM, intrinsicMap); // Lower bitcast use of CBV into cbSubscript. LowerDynamicCBVUseToHandle(HLM, objectProperties); + + // Add AnnotateWaveMatrix + AddAnnotateWaveMatrix(HLM, objectProperties); + // translate opcode into parameter for intrinsic functions // Do this before CloneShaderEntry and TranslateRayQueryConstructor to avoid // update valToResPropertiesMap for cloned inst. @@ -3744,4 +4069,27 @@ void DxilObjectProperties::updateGLC(llvm::Value *V) { it->second.Basic.IsGloballyCoherent ^= 1; } +bool DxilObjectProperties::AddWaveMatrix( + llvm::Value *V, const hlsl::DxilWaveMatrixProperties &WMP) { + if (WMP.isValid()) { + DXASSERT(!GetWaveMatrix(V).isValid() || GetWaveMatrix(V) == WMP, + "otherwise, property conflict"); + waveMatMap[V] = WMP; + return true; + } + return false; +} + +bool DxilObjectProperties::IsWaveMatrix(llvm::Value *V) { + return waveMatMap.count(V) != 0; +} + +hlsl::DxilWaveMatrixProperties +DxilObjectProperties::GetWaveMatrix(llvm::Value *V) { + auto it = waveMatMap.find(V); + if (it != waveMatMap.end()) + return it->second; + return DxilWaveMatrixProperties(); +} + } // namespace CGHLSLMSHelper diff --git a/tools/clang/lib/CodeGen/CGHLSLMSHelper.h b/tools/clang/lib/CodeGen/CGHLSLMSHelper.h index b2937434ea..fe7eb72d8f 100644 --- a/tools/clang/lib/CodeGen/CGHLSLMSHelper.h +++ b/tools/clang/lib/CodeGen/CGHLSLMSHelper.h @@ -36,6 +36,7 @@ template class SmallVector; namespace hlsl { class HLModule; struct DxilResourceProperties; +struct DxilWaveMatrixProperties; struct DxilFunctionProps; class DxilFieldAnnotation; enum class IntrinsicOp; @@ -150,7 +151,7 @@ class ScopeInfo { clang::SourceLocation sourceLoc; }; -// Map from value to resource properties. +// Map from value to resource/wave matrix properties. // This only collect object variables(global/local/parameter), not object fields inside struct. // Object fields inside struct is saved by TypeAnnotation. struct DxilObjectProperties { @@ -159,8 +160,13 @@ struct DxilObjectProperties { hlsl::DxilResourceProperties GetResource(llvm::Value *V); void updateGLC(llvm::Value *V); + bool AddWaveMatrix(llvm::Value *V, const hlsl::DxilWaveMatrixProperties &WMP); + bool IsWaveMatrix(llvm::Value *V); + hlsl::DxilWaveMatrixProperties GetWaveMatrix(llvm::Value *V); + // MapVector for deterministic iteration order. llvm::MapVector resMap; + llvm::MapVector waveMatMap; }; void CopyAndAnnotateResourceArgument(llvm::Value *Src, llvm::Value *Dest, @@ -221,7 +227,10 @@ void CollectCtorFunctions(llvm::Module &M, llvm::StringRef globalName, clang::CodeGen::CodeGenModule &CGM); void TranslateRayQueryConstructor(hlsl::HLModule &HLM); - +void TranslateInputNodeRecordArgToHandle(hlsl::HLModule& HLM, llvm::MapVector& NodeParams); +void TranslateNodeOutputParamToHandle(hlsl::HLModule& HLM, llvm::MapVector& NodeParams); void UpdateLinkage( hlsl::HLModule &HLM, clang::CodeGen::CodeGenModule &CGM, hlsl::dxilutil::ExportMap &exportMap, diff --git a/tools/clang/lib/Parse/ParseDecl.cpp b/tools/clang/lib/Parse/ParseDecl.cpp index 6ada5f32b5..3122953d89 100644 --- a/tools/clang/lib/Parse/ParseDecl.cpp +++ b/tools/clang/lib/Parse/ParseDecl.cpp @@ -813,6 +813,17 @@ void Parser::ParseGNUAttributeArgs(IdentifierInfo *AttrName, case AttributeList::AT_HLSLNumThreads: case AttributeList::AT_HLSLShader: case AttributeList::AT_HLSLExperimental: + case AttributeList::AT_HLSLNodeLaunch: + case AttributeList::AT_HLSLNodeId: + case AttributeList::AT_HLSLNodeIsProgramEntry: + case AttributeList::AT_HLSLNodeLocalRootArgumentsTableIndex: + case AttributeList::AT_HLSLNodeShareInputOf: + case AttributeList::AT_HLSLNodeDispatchGrid: + case AttributeList::AT_HLSLNodeMaxDispatchGrid: + case AttributeList::AT_HLSLNodeMaxRecursionDepth: + case AttributeList::AT_HLSLMaxRecordsSharedWith: + case AttributeList::AT_HLSLMaxRecords: + case AttributeList::AT_HLSLNodeArraySize: case AttributeList::AT_HLSLRootSignature: case AttributeList::AT_HLSLOutputControlPoints: case AttributeList::AT_HLSLOutputTopology: @@ -846,6 +857,7 @@ void Parser::ParseGNUAttributeArgs(IdentifierInfo *AttrName, //case AttributeList::AT_HLSLVertices: //case AttributeList::AT_HLSLPrimitives: //case AttributeList::AT_HLSLPayload: + //case AttributeList::AT_HLSLAllowSparseNodes: goto GenericAttributeParse; default: Diag(AttrNameLoc, diag::warn_unknown_attribute_ignored) << AttrName; diff --git a/tools/clang/lib/Sema/SemaExpr.cpp b/tools/clang/lib/Sema/SemaExpr.cpp index 370c5efcd5..8197c29a05 100644 --- a/tools/clang/lib/Sema/SemaExpr.cpp +++ b/tools/clang/lib/Sema/SemaExpr.cpp @@ -9475,43 +9475,63 @@ static bool HLSLCheckForModifiableLValue( SourceLocation Loc, Sema &S ) { - assert(isa(E)); - const CXXOperatorCallExpr *expr = cast(E); - const Expr *LHS = expr->getArg(0); - QualType qt = LHS->getType(); - - // Check modifying const matrix with double subscript operator calls - if (isa(expr->getArg(0))) - return HLSLCheckForModifiableLValue(const_cast(expr->getArg(0)), Loc, S); - - if (qt.isConstQualified() && (hlsl::IsMatrixType(&S, qt) || hlsl::IsVectorType(&S, qt))) { - DiagnoseConstAssignment(S, LHS, Loc); + if (E->getType().isConstQualified()) { + DiagnoseConstAssignment(S, E, Loc); + return true; + } + if (!isa(E) && !E->isLValue()) { + S.Diag(Loc, diag::err_typecheck_expression_not_modifiable_lvalue); + return true; + } + if (auto OC = dyn_cast(E)) { + QualType qt = OC->getArg(0)->getType(); + if (hlsl::IsMatrixType(&S, qt) || hlsl::IsVectorType(&S, qt)) + return HLSLCheckForModifiableLValue(OC->getArg(0), Loc, S); + } + if (auto M = dyn_cast(E)) { + // If the return type of the expressin is const, we should respect the const + // qualification. + if (E->getType().isConstQualified()) { + DiagnoseConstAssignment(S, E, Loc); return true; } - if (!LHS->isLValue()) { - S.Diag(Loc, diag::err_typecheck_expression_not_modifiable_lvalue); - return true; + if (auto MemberCall = dyn_cast(M->getBase())) { + CXXRecordDecl *RD = MemberCall->getRecordDecl(); + QualType Ty = QualType(RD->getTypeForDecl(), 0); + // NodeInputRecord and NodeInputRecordArray are not modifiable + if (hlsl::IsHLSLRONodeInputRecordType(Ty)) { + DiagnoseConstAssignment(S, E, Loc); + return true; + } } - return false; + return HLSLCheckForModifiableLValue(M->getBase(), Loc, S); + } + if (auto ICE = dyn_cast(E)) { + return HLSLCheckForModifiableLValue(ICE->getSubExpr(), Loc, S); + } + if (auto AS = dyn_cast(E)) { + return HLSLCheckForModifiableLValue(AS->getBase(), Loc, S); + } + if (auto SE = dyn_cast(E)) { + return HLSLCheckForModifiableLValue(SE->getBase(), Loc, S); + } + + return false; } /// CheckForModifiableLvalue - Verify that E is a modifiable lvalue. If not, /// emit an error and return true. If so, return false. bool CheckForModifiableLvalue(Expr *E, SourceLocation Loc, Sema &S) { // HLSL Change: export this function assert(!E->hasPlaceholderType(BuiltinType::PseudoObject)); - // HLSL Change Starts - check const for array subscript operator for HLSL vector/matrix - if (S.Context.getLangOpts().HLSL && E->getStmtClass() == Stmt::CXXOperatorCallExprClass) { - // check if it's a vector or matrix - const CXXOperatorCallExpr *expr = cast(E); - QualType qt = expr->getArg(0)->getType(); - if ((hlsl::IsMatrixType(&S, qt) || hlsl::IsVectorType(&S, qt))) - return HLSLCheckForModifiableLValue(E, Loc, S); - } + SourceLocation OrigLoc = Loc; + Expr::isModifiableLvalueResult IsLV = E->isModifiableLvalue(S.Context, &Loc); + + // HLSL Change Starts - HLSL has extra constraints to check + if (IsLV == Expr::MLV_Valid && S.Context.getLangOpts().HLSL && + HLSLCheckForModifiableLValue(E, Loc, S)) + return true; // HLSL Change Ends - SourceLocation OrigLoc = Loc; - Expr::isModifiableLvalueResult IsLV = E->isModifiableLvalue(S.Context, - &Loc); if (IsLV == Expr::MLV_ClassTemporary && IsReadonlyMessage(E, S)) IsLV = Expr::MLV_InvalidMessageExpression; if (IsLV == Expr::MLV_Valid) diff --git a/tools/clang/lib/Sema/SemaHLSL.cpp b/tools/clang/lib/Sema/SemaHLSL.cpp index bb46dcf093..8be6772135 100644 --- a/tools/clang/lib/Sema/SemaHLSL.cpp +++ b/tools/clang/lib/Sema/SemaHLSL.cpp @@ -45,6 +45,7 @@ #include "dxc/DXIL/DxilShaderModel.h" #include #include +#include #include enum ArBasicKind { @@ -222,6 +223,30 @@ enum ArBasicKind { AR_OBJECT_RWTEXTURE2DMS, AR_OBJECT_RWTEXTURE2DMS_ARRAY, + // WaveMatrix + AR_OBJECT_WAVE_MATRIX_LEFT, + AR_OBJECT_WAVE_MATRIX_RIGHT, + AR_OBJECT_WAVE_MATRIX_LEFT_COL_ACC, + AR_OBJECT_WAVE_MATRIX_RIGHT_ROW_ACC, + AR_OBJECT_WAVE_MATRIX_ACCUMULATOR, + + // Work Graphs + AR_OBJECT_EMPTY_NODE_INPUT, + AR_OBJECT_DISPATCH_NODE_INPUT_RECORD, + AR_OBJECT_RWDISPATCH_NODE_INPUT_RECORD, + AR_OBJECT_GROUP_NODE_INPUT_RECORDS, + AR_OBJECT_RWGROUP_NODE_INPUT_RECORDS, + AR_OBJECT_THREAD_NODE_INPUT_RECORD, + AR_OBJECT_RWTHREAD_NODE_INPUT_RECORD, + + AR_OBJECT_NODE_OUTPUT, + AR_OBJECT_EMPTY_NODE_OUTPUT, + AR_OBJECT_NODE_OUTPUT_ARRAY, + AR_OBJECT_EMPTY_NODE_OUTPUT_ARRAY, + + AR_OBJECT_THREAD_NODE_OUTPUT_RECORDS, + AR_OBJECT_GROUP_NODE_OUTPUT_RECORDS, + AR_BASIC_MAXIMUM_COUNT }; @@ -257,6 +282,15 @@ enum ArBasicKind { case AR_OBJECT_DEPTHSTENCIL: \ case AR_OBJECT_STATEBLOCK +#define AR_BASIC_WAVE_MATRIX_INPUT_CASES \ + case AR_OBJECT_WAVE_MATRIX_LEFT: \ + case AR_OBJECT_WAVE_MATRIX_RIGHT + +#define AR_BASIC_WAVE_MATRIX_ACC_FRAG_CASES \ + case AR_OBJECT_WAVE_MATRIX_LEFT_COL_ACC: \ + case AR_OBJECT_WAVE_MATRIX_RIGHT_ROW_ACC: \ + case AR_OBJECT_WAVE_MATRIX_ACCUMULATOR + // // Properties of entries in the ArBasicKind enumeration. // These properties are intended to allow easy identification @@ -303,6 +337,8 @@ enum ArBasicKind { #define BPROP_ROVBUFFER 0x00400000 // Whether the type is a ROV object. #define BPROP_FEEDBACKTEXTURE 0x00800000 // Whether the type is a feedback texture. #define BPROP_ENUM 0x01000000 // Whether the type is a enum +#define BPROP_WAVE_MATRIX_INPUT 0x02000000 // Whether the type is a wave matrix input object (Left/Right) +#define BPROP_WAVE_MATRIX_ACC 0x04000000 // Whether the type is a wave matrix accum object (Accumulator/LeftColAcc/RightRowAcc) #define GET_BPROP_PRIM_KIND(_Props) \ ((_Props) & (BPROP_BOOLEAN | BPROP_INTEGER | BPROP_FLOATING)) @@ -351,6 +387,11 @@ enum ArBasicKind { #define IS_BPROP_ENUM(_Props) \ (((_Props) & BPROP_ENUM) != 0) +#define IS_BPROP_WAVE_MATRIX_INPUT(_Props) \ + (((_Props) & BPROP_WAVE_MATRIX_INPUT) != 0) +#define IS_BPROP_WAVE_MATRIX_ACC(_Props) \ + (((_Props) & BPROP_WAVE_MATRIX_ACC) != 0) + const UINT g_uBasicKindProps[] = { BPROP_PRIMITIVE | BPROP_BOOLEAN | BPROP_INTEGER | BPROP_NUMERIC | BPROP_BITS0, // AR_BASIC_BOOL @@ -511,13 +552,36 @@ const UINT g_uBasicKindProps[] = 0, //AR_OBJECT_PROCEDURAL_PRIMITIVE_HIT_GROUP, 0, //AR_OBJECT_RAYTRACING_PIPELINE_CONFIG1, - 0, //AR_OBJECT_RAY_QUERY, - 0, //AR_OBJECT_HEAP_RESOURCE, - 0, //AR_OBJECT_HEAP_SAMPLER, + BPROP_OBJECT, //AR_OBJECT_RAY_QUERY, + BPROP_OBJECT, //AR_OBJECT_HEAP_RESOURCE, + BPROP_OBJECT, //AR_OBJECT_HEAP_SAMPLER, BPROP_OBJECT | BPROP_RWBUFFER, // AR_OBJECT_RWTEXTURE2DMS BPROP_OBJECT | BPROP_RWBUFFER, // AR_OBJECT_RWTEXTURE2DMS_ARRAY + BPROP_OBJECT | BPROP_WAVE_MATRIX_INPUT, //AR_OBJECT_WAVE_MATRIX_LEFT + BPROP_OBJECT | BPROP_WAVE_MATRIX_INPUT, //AR_OBJECT_WAVE_MATRIX_RIGHT + BPROP_OBJECT | BPROP_WAVE_MATRIX_ACC, //AR_OBJECT_WAVE_MATRIX_LEFT_COL_ACC + BPROP_OBJECT | BPROP_WAVE_MATRIX_ACC, //AR_OBJECT_WAVE_MATRIX_RIGHT_ROW_ACC + BPROP_OBJECT | BPROP_WAVE_MATRIX_ACC, //AR_OBJECT_WAVE_MATRIX_ACCUMULATOR + + // WorkGraphs + BPROP_OBJECT, //AR_OBJECT_EMPTY_NODE_INPUT + BPROP_OBJECT, //AR_OBJECT_DISPATCH_NODE_INPUT_RECORD + BPROP_OBJECT | BPROP_RWBUFFER, //AR_OBJECT_RWDISPATCH_NODE_INPUT_RECORD + BPROP_OBJECT, //AR_OBJECT_GROUP_NODE_INPUT_RECORDS + BPROP_OBJECT | BPROP_RWBUFFER, //AR_OBJECT_RWGROUP_NODE_INPUT_RECORDS + BPROP_OBJECT, //AR_OBJECT_THREAD_NODE_INPUT_RECORD + BPROP_OBJECT | BPROP_RWBUFFER, //AR_OBJECT_RWTHREAD_NODE_INPUT_RECORD + + BPROP_OBJECT, //AR_OBJECT_NODE_OUTPUT + BPROP_OBJECT, //AR_OBJECT_EMPTY_NODE_OUTPUT + BPROP_OBJECT, //AR_OBJECT_NODE_OUTPUT_ARRAY + BPROP_OBJECT, //AR_OBJECT_EMPTY_NODE_OUTPUT_ARRAY + + BPROP_OBJECT | BPROP_RWBUFFER, // AR_OBJECT_THREAD_NODE_OUTPUT_RECORDS, + BPROP_OBJECT | BPROP_RWBUFFER, // AR_OBJECT_GROUP_NODE_OUTPUT_RECORDS, + // AR_BASIC_MAXIMUM_COUNT }; @@ -568,6 +632,13 @@ C_ASSERT(ARRAYSIZE(g_uBasicKindProps) == AR_BASIC_MAXIMUM_COUNT); #define IS_BASIC_ENUM(_Kind) \ IS_BPROP_ENUM(GetBasicKindProps(_Kind)) +#define IS_BASIC_WAVE_MATRIX_INPUT(_Kind) \ + IS_BPROP_WAVE_MATRIX_INPUT(GetBasicKindProps(_Kind)) +#define IS_BASIC_WAVE_MATRIX_ACC(_Kind) \ + IS_BPROP_WAVE_MATRIX_ACC(GetBasicKindProps(_Kind)) +#define IS_BASIC_WAVE_MATRIX(_Kind) \ + (IS_BASIC_WAVE_MATRIX_INPUT(_Kind) || IS_BASIC_WAVE_MATRIX_ACC(_Kind)) + #define BITWISE_ENUM_OPS(_Type) \ inline _Type operator|(_Type F1, _Type F2) \ { \ @@ -866,6 +937,61 @@ QualType GetOrCreateVectorSpecialization(ASTContext& context, Sema* sema, return vectorSpecializationType; } +// Gets component type, dimM, and dimN from WaveMatrix* instantiated type. +// Assumes wave matrix type, returns false if anything isn't as expected. +static bool GetWaveMatrixTemplateValues(QualType objType, QualType *compType, + unsigned *dimM, unsigned *dimN) { + const CXXRecordDecl *CXXRD = objType.getCanonicalType()->getAsCXXRecordDecl(); + if (const ClassTemplateSpecializationDecl *templateSpecializationDecl = + dyn_cast(CXXRD)) { + const clang::TemplateArgumentList &args = + templateSpecializationDecl->getTemplateInstantiationArgs(); + if (args.size() != 3) + return false; + if (args[0].getKind() != TemplateArgument::Type || + !args[0].getAsType()->isBuiltinType()) + return false; + if (args[1].getKind() != TemplateArgument::Integral || + args[2].getKind() != TemplateArgument::Integral) + return false; + if (compType) + *compType = args[0].getAsType(); + if (dimM) + *dimM = (unsigned)args[1].getAsIntegral().getExtValue(); + if (dimN) + *dimN = (unsigned)args[2].getAsIntegral().getExtValue(); + return true; + } + return false; +} + +/// Instantiates a new *NodeOutputRecords type specialization or gets +/// an existing one from the AST. +static QualType +GetOrCreateNodeOutputRecordSpecialization(ASTContext &context, Sema *sema, + _In_ ClassTemplateDecl *templateDecl, + QualType elementType) { + DXASSERT_NOMSG(sema); + DXASSERT_NOMSG(templateDecl); + + TemplateArgument templateArgs[1] = {TemplateArgument(elementType)}; + + QualType specializationType = GetOrCreateTemplateSpecialization( + context, *sema, templateDecl, ArrayRef(templateArgs)); + +#ifdef DBG + // Verify that we can read the field member from the template record. + DXASSERT(specializationType->getAsCXXRecordDecl(), + "type of non-dependent specialization is not a RecordType"); + DeclContext::lookup_result lookupResult = + specializationType->getAsCXXRecordDecl()->lookup( + DeclarationName(&context.Idents.get(StringRef("h")))); + DXASSERT(!lookupResult.empty(), + "otherwise *NodeOutputRecords handle cannot be looked up"); +#endif + + return specializationType; +} // Decls.cpp constants start here - these should be refactored or, better, replaced with clang::Type-based constructs. @@ -909,6 +1035,12 @@ static const ArTypeObjectKind g_NullTT[] = AR_TOBJ_UNKNOWN }; +static const ArTypeObjectKind g_ArrayTT[] = +{ + AR_TOBJ_ARRAY, + AR_TOBJ_UNKNOWN +}; + const ArTypeObjectKind* g_LegalIntrinsicTemplates[] = { g_NullTT, @@ -917,6 +1049,7 @@ const ArTypeObjectKind* g_LegalIntrinsicTemplates[] = g_MatrixTT, g_AnyTT, g_ObjectTT, + g_ArrayTT, }; C_ASSERT(ARRAYSIZE(g_LegalIntrinsicTemplates) == LITEMPLATE_COUNT); @@ -1293,6 +1426,91 @@ static const ArBasicKind g_SInt16Or32OnlyCT[] = AR_BASIC_UNKNOWN }; +static const ArBasicKind g_ByteAddressBufferCT[] = +{ + AR_OBJECT_BYTEADDRESS_BUFFER, + AR_BASIC_UNKNOWN +}; + +static const ArBasicKind g_RWByteAddressBufferCT[] = +{ + AR_OBJECT_RWBYTEADDRESS_BUFFER, + AR_BASIC_UNKNOWN +}; + +static const ArBasicKind g_WaveMatrixLeftCT[] = +{ + AR_OBJECT_WAVE_MATRIX_LEFT, + AR_BASIC_UNKNOWN +}; + +static const ArBasicKind g_WaveMatrixRightCT[] = +{ + AR_OBJECT_WAVE_MATRIX_RIGHT, + AR_BASIC_UNKNOWN +}; + +static const ArBasicKind g_WaveMatrixLeftColAccCT[] = +{ + AR_OBJECT_WAVE_MATRIX_LEFT_COL_ACC, + AR_BASIC_UNKNOWN +}; + +static const ArBasicKind g_WaveMatrixRightRowAccCT[] = +{ + AR_OBJECT_WAVE_MATRIX_RIGHT_ROW_ACC, + AR_BASIC_UNKNOWN +}; + +static const ArBasicKind g_WaveMatrixAccumulatorCT[] = +{ + AR_OBJECT_WAVE_MATRIX_ACCUMULATOR, + AR_BASIC_UNKNOWN +}; + +static const ArBasicKind g_NodeRecordOrUAVCT[] = +{ + AR_OBJECT_DISPATCH_NODE_INPUT_RECORD, + AR_OBJECT_RWDISPATCH_NODE_INPUT_RECORD, + AR_OBJECT_GROUP_NODE_INPUT_RECORDS, + AR_OBJECT_RWGROUP_NODE_INPUT_RECORDS, + AR_OBJECT_THREAD_NODE_INPUT_RECORD, + AR_OBJECT_RWTHREAD_NODE_INPUT_RECORD, + AR_OBJECT_NODE_OUTPUT, + AR_OBJECT_THREAD_NODE_OUTPUT_RECORDS, + AR_OBJECT_GROUP_NODE_OUTPUT_RECORDS, + + AR_OBJECT_RWBUFFER, + AR_OBJECT_RWTEXTURE1D, + AR_OBJECT_RWTEXTURE1D_ARRAY, + AR_OBJECT_RWTEXTURE2D, + AR_OBJECT_RWTEXTURE2D_ARRAY, + AR_OBJECT_RWTEXTURE3D, + AR_OBJECT_RWSTRUCTURED_BUFFER, + AR_OBJECT_RWBYTEADDRESS_BUFFER, + AR_OBJECT_APPEND_STRUCTURED_BUFFER, + AR_BASIC_UNKNOWN +}; + +static const ArBasicKind g_GroupNodeOutputRecordsCT[] = +{ + AR_OBJECT_GROUP_NODE_OUTPUT_RECORDS, + AR_BASIC_UNKNOWN +}; + +static const ArBasicKind g_ThreadNodeOutputRecordsCT[] = +{ + AR_OBJECT_THREAD_NODE_OUTPUT_RECORDS, + AR_BASIC_UNKNOWN +}; + +static const ArBasicKind g_AnyOutputRecordCT[] = +{ + AR_OBJECT_GROUP_NODE_OUTPUT_RECORDS, + AR_OBJECT_THREAD_NODE_OUTPUT_RECORDS, + AR_BASIC_UNKNOWN +}; + // Basic kinds, indexed by a LEGAL_INTRINSIC_COMPTYPES value. const ArBasicKind* g_LegalIntrinsicCompTypes[] = { @@ -1340,6 +1558,18 @@ const ArBasicKind* g_LegalIntrinsicCompTypes[] = g_UInt8_4PackedCT, // LICOMPTYPE_UINT8_4PACKED g_AnyInt16Or32CT, // LICOMPTYPE_ANY_INT16_OR_32 g_SInt16Or32OnlyCT, // LICOMPTYPE_SINT16_OR_32_ONLY + + g_ByteAddressBufferCT, // LICOMPTYPE_BYTEADDRESSBUFFER + g_RWByteAddressBufferCT, // LICOMPTYPE_RWBYTEADDRESSBUFFER + g_WaveMatrixLeftCT, // LICOMPTYPE_WAVE_MATRIX_LEFT + g_WaveMatrixRightCT, // LICOMPTYPE_WAVE_MATRIX_RIGHT + g_WaveMatrixLeftColAccCT, // LICOMPTYPE_WAVE_MATRIX_LEFT_COL_ACC + g_WaveMatrixRightRowAccCT, // LICOMPTYPE_WAVE_MATRIX_RIGHT_ROW_ACC + g_WaveMatrixAccumulatorCT, // LICOMPTYPE_WAVE_MATRIX_ACCUMULATOR + g_NodeRecordOrUAVCT, // LICOMPTYPE_NODE_RECORD_OR_UAV + g_AnyOutputRecordCT, // LICOMPTYPE_ANY_NODE_OUTPUT_RECORD + g_GroupNodeOutputRecordsCT, // LICOMPTYPE_GROUP_NODE_OUTPUT_RECORDS + g_ThreadNodeOutputRecordsCT, // LICOMPTYPE_THREAD_NODE_OUTPUT_RECORDS }; static_assert(ARRAYSIZE(g_LegalIntrinsicCompTypes) == LICOMPTYPE_COUNT, "Intrinsic comp type table must be updated when new enumerants are added."); @@ -1441,6 +1671,29 @@ const ArBasicKind g_ArBasicKindsAsTypes[] = AR_OBJECT_RWTEXTURE2DMS, // RWTexture2DMS AR_OBJECT_RWTEXTURE2DMS_ARRAY, // RWTexture2DMSArray + + AR_OBJECT_WAVE_MATRIX_LEFT, + AR_OBJECT_WAVE_MATRIX_RIGHT, + AR_OBJECT_WAVE_MATRIX_LEFT_COL_ACC, + AR_OBJECT_WAVE_MATRIX_RIGHT_ROW_ACC, + AR_OBJECT_WAVE_MATRIX_ACCUMULATOR, + + // Work Graphs + AR_OBJECT_EMPTY_NODE_INPUT, + AR_OBJECT_DISPATCH_NODE_INPUT_RECORD, + AR_OBJECT_RWDISPATCH_NODE_INPUT_RECORD, + AR_OBJECT_GROUP_NODE_INPUT_RECORDS, + AR_OBJECT_RWGROUP_NODE_INPUT_RECORDS, + AR_OBJECT_THREAD_NODE_INPUT_RECORD, + AR_OBJECT_RWTHREAD_NODE_INPUT_RECORD, + + AR_OBJECT_NODE_OUTPUT, + AR_OBJECT_EMPTY_NODE_OUTPUT, + AR_OBJECT_NODE_OUTPUT_ARRAY, + AR_OBJECT_EMPTY_NODE_OUTPUT_ARRAY, + + AR_OBJECT_THREAD_NODE_OUTPUT_RECORDS, + AR_OBJECT_GROUP_NODE_OUTPUT_RECORDS }; // Count of template arguments for basic kind of objects that look like templates (one or more type arguments). @@ -1536,6 +1789,29 @@ const uint8_t g_ArBasicKindsTemplateCount[] = 2, // AR_OBJECT_RWTEXTURE2DMS 2, // AR_OBJECT_RWTEXTURE2DMS_ARRAY + + 3, // AR_OBJECT_WAVE_MATRIX_LEFT, + 3, // AR_OBJECT_WAVE_MATRIX_RIGHT, + 3, // AR_OBJECT_WAVE_MATRIX_LEFT_COL_ACC, + 3, // AR_OBJECT_WAVE_MATRIX_RIGHT_ROW_ACC, + 3, // AR_OBJECT_WAVE_MATRIX_ACCUMULATOR, + + // WorkGraphs + 0, //AR_OBJECT_EMPTY_NODE_INPUT, + 1, //AR_OBJECT_DISPATCH_NODE_INPUT_RECORD, + 1, //AR_OBJECT_RWDISPATCH_NODE_INPUT_RECORD, + 1, //AR_OBJECT_GROUP_NODE_INPUT_RECORDS, + 1, //AR_OBJECT_RWGROUP_NODE_INPUT_RECORDS, + 1, //AR_OBJECT_THREAD_NODE_INPUT_RECORD, + 1, //AR_OBJECT_RWTHREAD_NODE_INPUT_RECORD, + + 1, //AR_OBJECT_NODE_OUTPUT, + 0, //AR_OBJECT_EMPTY_NODE_OUTPUT, + 1, //AR_OBJECT_NODE_OUTPUT_ARRAY, + 0, //AR_OBJECT_EMPTY_NODE_OUTPUT_ARRAY, + + 1, //AR_OBJECT_THREAD_NODE_OUTPUT_RECORDS, + 1, //AR_OBJECT_GROUP_NODE_OUTPUT_RECORDS }; C_ASSERT(_countof(g_ArBasicKindsAsTypes) == _countof(g_ArBasicKindsTemplateCount)); @@ -1641,6 +1917,29 @@ const SubscriptOperatorRecord g_ArBasicKindsSubscripts[] = { 2, MipsFalse, SampleTrue }, // AR_OBJECT_RWTEXTURE2DMS (RWTexture2DMS) { 3, MipsFalse, SampleTrue }, // AR_OBJECT_RWTEXTURE2DMS_ARRAY (RWTexture2DMSArray) + + { 0, MipsFalse, SampleFalse }, // AR_OBJECT_WAVE_MATRIX_LEFT, + { 0, MipsFalse, SampleFalse }, // AR_OBJECT_WAVE_MATRIX_RIGHT, + { 0, MipsFalse, SampleFalse }, // AR_OBJECT_WAVE_MATRIX_LEFT_COL_ACC, + { 0, MipsFalse, SampleFalse }, // AR_OBJECT_WAVE_MATRIX_RIGHT_ROW_ACC, + { 0, MipsFalse, SampleFalse }, // AR_OBJECT_WAVE_MATRIX_ACCUMULATOR, + + // WorkGraphs + { 0, MipsFalse, SampleFalse }, // AR_OBJECT_EMPTY_NODE_INPUT + { 0, MipsFalse, SampleFalse }, // AR_OBJECT_DISPATCH_NODE_INPUT_RECORD + { 0, MipsFalse, SampleFalse }, // AR_OBJECT_RWDISPATCH_NODE_INPUT_RECORD + { 1, MipsFalse, SampleFalse }, // AR_OBJECT_GROUP_NODE_INPUT_RECORDS + { 1, MipsFalse, SampleFalse }, // AR_OBJECT_RWGROUP_NODE_INPUT_RECORDS + { 0, MipsFalse, SampleFalse }, // AR_OBJECT_GROUP_NODE_INPUT_RECORD + { 0, MipsFalse, SampleFalse }, // AR_OBJECT_RWGROUP_NODE_INPUT_RECORD + + { 0, MipsFalse, SampleFalse }, // AR_OBJECT_NODE_OUTPUT + { 0, MipsFalse, SampleFalse }, // AR_OBJECT_EMPTY_NODE_OUTPUT + { 1, MipsFalse, SampleFalse }, // AR_OBJECT_NODE_OUTPUT_ARRAY + { 1, MipsFalse, SampleFalse }, // AR_OBJECT_EMPTY_NODE_OUTPUT_ARRAY + + { 1, MipsFalse, SampleFalse }, // AR_OBJECT_THREAD_NODE_OUTPUT_RECORDS + { 1, MipsFalse, SampleFalse }, // AR_OBJECT_GROUP_NODE_OUTPUT_RECORDS }; C_ASSERT(_countof(g_ArBasicKindsAsTypes) == _countof(g_ArBasicKindsSubscripts)); @@ -1769,6 +2068,29 @@ const char* g_ArBasicTypeNames[] = "RWTexture2DMS", "RWTexture2DMSArray", + + "WaveMatrixLeft", + "WaveMatrixRight", + "WaveMatrixLeftColAcc", + "WaveMatrixRightRowAcc", + "WaveMatrixAccumulator", + + //Workgraphs + "EmptyNodeInput", + "DispatchNodeInputRecord", + "RWDispatchNodeInputRecord", + "GroupNodeInputRecords", + "RWGroupNodeInputRecords", + "ThreadNodeInputRecord", + "RWThreadNodeInputRecord", + + "NodeOutput", + "EmptyNodeOutput", + "NodeOutputArray", + "EmptyNodeOutputArray", + + "ThreadNodeOutputRecords", + "GroupNodeOutputRecords" }; C_ASSERT(_countof(g_ArBasicTypeNames) == AR_BASIC_MAXIMUM_COUNT); @@ -1827,15 +2149,16 @@ static bool IsVariadicArgument(const HLSL_INTRINSIC_ARGUMENT &arg) { static hlsl::ParameterModifier ParamModsFromIntrinsicArg(const HLSL_INTRINSIC_ARGUMENT *pArg) { - if (pArg->qwUsage == AR_QUAL_IN_OUT) { + UINT64 qwUsage = pArg->qwUsage & AR_QUAL_IN_OUT; + if (qwUsage == AR_QUAL_IN_OUT) { return hlsl::ParameterModifier(hlsl::ParameterModifier::Kind::InOut); } - if (pArg->qwUsage == AR_QUAL_OUT) { + if (qwUsage == AR_QUAL_OUT) { return hlsl::ParameterModifier(hlsl::ParameterModifier::Kind::Out); } if (pArg->qwUsage == AR_QUAL_REF) return hlsl::ParameterModifier(hlsl::ParameterModifier::Kind::Ref); - DXASSERT(pArg->qwUsage & AR_QUAL_IN, "else usage is incorrect"); + DXASSERT(qwUsage & AR_QUAL_IN, "else usage is incorrect"); return hlsl::ParameterModifier(hlsl::ParameterModifier::Kind::In); } @@ -2368,6 +2691,53 @@ void GetIntrinsicMethods(ArBasicKind kind, _Outptr_result_buffer_(*intrinsicCoun *intrinsics = g_RWTexture2DMSArrayMethods; *intrinsicCount = _countof(g_RWTexture2DMSArrayMethods); break; + case AR_OBJECT_WAVE_MATRIX_LEFT: + *intrinsics = g_WaveMatrixLeftMethods; + *intrinsicCount = _countof(g_WaveMatrixLeftMethods); + break; + case AR_OBJECT_WAVE_MATRIX_RIGHT: + *intrinsics = g_WaveMatrixRightMethods; + *intrinsicCount = _countof(g_WaveMatrixRightMethods); + break; + case AR_OBJECT_WAVE_MATRIX_LEFT_COL_ACC: + *intrinsics = g_WaveMatrixLeftColAccMethods; + *intrinsicCount = _countof(g_WaveMatrixLeftColAccMethods); + break; + case AR_OBJECT_WAVE_MATRIX_RIGHT_ROW_ACC: + *intrinsics = g_WaveMatrixRightRowAccMethods; + *intrinsicCount = _countof(g_WaveMatrixRightRowAccMethods); + break; + case AR_OBJECT_WAVE_MATRIX_ACCUMULATOR: + *intrinsics = g_WaveMatrixAccumulatorMethods; + *intrinsicCount = _countof(g_WaveMatrixAccumulatorMethods); + break; + case AR_OBJECT_EMPTY_NODE_INPUT: + *intrinsics = g_EmptyNodeInputMethods; + *intrinsicCount = _countof(g_EmptyNodeInputMethods); + break; + case AR_OBJECT_RWDISPATCH_NODE_INPUT_RECORD: + *intrinsics = g_RWDispatchNodeInputRecordMethods; + *intrinsicCount = _countof(g_RWDispatchNodeInputRecordMethods); + break; + case AR_OBJECT_GROUP_NODE_INPUT_RECORDS: + case AR_OBJECT_RWGROUP_NODE_INPUT_RECORDS: + *intrinsics = g_GroupNodeInputRecordsMethods; + *intrinsicCount = _countof(g_GroupNodeInputRecordsMethods); + break; + case AR_OBJECT_NODE_OUTPUT: + *intrinsics = g_NodeOutputMethods; + *intrinsicCount = _countof(g_NodeOutputMethods); + break; + case AR_OBJECT_EMPTY_NODE_OUTPUT: + *intrinsics = g_EmptyNodeOutputMethods; + *intrinsicCount = _countof(g_EmptyNodeOutputMethods); + break; + case AR_OBJECT_THREAD_NODE_OUTPUT_RECORDS: + case AR_OBJECT_GROUP_NODE_OUTPUT_RECORDS: + *intrinsics = g_GroupOrThreadNodeOutputRecordsMethods; + *intrinsicCount = _countof(g_GroupOrThreadNodeOutputRecordsMethods); + break; + // SPIRV change starts #ifdef ENABLE_SPIRV_CODEGEN case AR_OBJECT_VK_SUBPASS_INPUT: @@ -3016,6 +3386,11 @@ class HLSLExternalSource : public ExternalSemaSource { // Declaration for matrix and vector templates. ClassTemplateDecl* m_matrixTemplateDecl; ClassTemplateDecl* m_vectorTemplateDecl; + + // Declarations for Work Graph Output Record types + ClassTemplateDecl* m_GroupNodeOutputRecordsTemplateDecl; + ClassTemplateDecl* m_ThreadNodeOutputRecordsTemplateDecl; + // Namespace decl for hlsl intrinsic functions NamespaceDecl* m_hlslNSDecl; @@ -3097,6 +3472,7 @@ class HLSLExternalSource : public ExternalSemaSource { !DoesLegalTemplateAcceptMultipleTypes(templateArg->uLegalTemplates) && componentRef >= 0 && componentRef != INTRIN_COMPTYPE_FROM_TYPE_ELT0 && + componentRef != INTRIN_COMPTYPE_FROM_NODEOUTPUT && componentArg->uComponentTypeId == 0 && !DoesComponentTypeAcceptMultipleTypes(componentArg->uLegalComponentTypes) && !IsRowOrColumnVariable(matrixArg->uCols) && @@ -3680,6 +4056,8 @@ class HLSLExternalSource : public ExternalSemaSource { unsigned effectKindIndex = 0; const auto *SM = hlsl::ShaderModel::GetByName(m_sema->getLangOpts().HLSLProfile.c_str()); + CXXRecordDecl *nodeOutputDecl = nullptr, *emptyNodeOutputDecl = nullptr; + for (unsigned i = 0; i < _countof(g_ArBasicKindsAsTypes); i++) { ArBasicKind kind = g_ArBasicKindsAsTypes[i]; @@ -3751,20 +4129,94 @@ class HLSLExternalSource : public ExternalSemaSource { m_context->getRecordType(recordDecl), *m_context); } + } else if (IsWaveMatrixBasicKind(kind)) { + recordDecl = DeclareWaveMatrixType(*m_context, + (DXIL::WaveMatrixKind)(kind - AR_OBJECT_WAVE_MATRIX_LEFT)); } else if (kind == AR_OBJECT_FEEDBACKTEXTURE2D) { recordDecl = DeclareUIntTemplatedTypeWithHandle(*m_context, "FeedbackTexture2D", "kind"); } else if (kind == AR_OBJECT_FEEDBACKTEXTURE2D_ARRAY) { recordDecl = DeclareUIntTemplatedTypeWithHandle(*m_context, "FeedbackTexture2DArray", "kind"); + } else if (kind == AR_OBJECT_EMPTY_NODE_INPUT) { + recordDecl = DeclareNodeOrRecordType( + *m_context, "EmptyNodeInput", + /*IsRecordTypeTemplate*/ false, /*IsConst*/ true, + /*HasGetMethods*/ false, + /*IsArray*/ false, /*IsCompleteType*/ false); + } else if (kind == AR_OBJECT_DISPATCH_NODE_INPUT_RECORD) { + recordDecl = + DeclareNodeOrRecordType(*m_context, "DispatchNodeInputRecord", + /*IsRecordTypeTemplate*/ true, + /*IsConst*/ true, /*HasGetMethods*/ true, + /*IsArray*/ false, /*IsCompleteType*/ true); + } else if (kind == AR_OBJECT_RWDISPATCH_NODE_INPUT_RECORD) { + recordDecl = DeclareNodeOrRecordType( + *m_context, "RWDispatchNodeInputRecord", + /*IsRecordTypeTemplate*/ true, /*IsConst*/ false, + /*HasGetMethods*/ true, + /*IsArray*/ false, /*IsCompleteType*/ false); + } else if (kind == AR_OBJECT_GROUP_NODE_INPUT_RECORDS) { + recordDecl = + DeclareNodeOrRecordType(*m_context, "GroupNodeInputRecords", + /*IsRecordTypeTemplate*/ true, + /*IsConst*/ true, /*HasGetMethods*/ true, + /*IsArray*/ true, /*IsCompleteType*/ false); + } else if (kind == AR_OBJECT_RWGROUP_NODE_INPUT_RECORDS) { + recordDecl = + DeclareNodeOrRecordType(*m_context, "RWGroupNodeInputRecords", + /*IsRecordTypeTemplate*/ true, + /*IsConst*/ false, /*HasGetMethods*/ true, + /*IsArray*/ true, /*IsCompleteType*/ false); + } else if (kind == AR_OBJECT_THREAD_NODE_INPUT_RECORD) { + recordDecl = + DeclareNodeOrRecordType(*m_context, "ThreadNodeInputRecord", + /*IsRecordTypeTemplate*/ true, + /*IsConst*/ true, /*HasGetMethods*/ true, + /*IsArray*/ false, /*IsCompleteType*/ true); + } else if (kind == AR_OBJECT_RWTHREAD_NODE_INPUT_RECORD) { + recordDecl = + DeclareNodeOrRecordType(*m_context, "RWThreadNodeInputRecord", + /*IsRecordTypeTemplate*/ true, + /*IsConst*/ false, /*HasGetMethods*/ true, + /*IsArray*/ false, /*IsCompleteType*/ true); + } else if (kind == AR_OBJECT_NODE_OUTPUT) { + recordDecl = DeclareNodeOrRecordType( + *m_context, "NodeOutput", + /*IsRecordTypeTemplate*/ true, /*IsConst*/ true, + /*HasGetMethods*/ false, + /*IsArray*/ false, /*IsCompleteType*/ false); + nodeOutputDecl = recordDecl; + } else if (kind == AR_OBJECT_EMPTY_NODE_OUTPUT) { + recordDecl = DeclareNodeOrRecordType( + *m_context, "EmptyNodeOutput", + /*IsRecordTypeTemplate*/ false, /*IsConst*/ true, + /*HasGetMethods*/ false, + /*IsArray*/ false, /*IsCompleteType*/ false); + emptyNodeOutputDecl = recordDecl; + } else if (kind == AR_OBJECT_NODE_OUTPUT_ARRAY) { + assert(nodeOutputDecl != nullptr); + recordDecl = DeclareNodeOutputArray(*m_context, "NodeOutputArray", + /* ItemType */ nodeOutputDecl, + /*IsRecordTypeTemplate*/ true, + /*IsCompleteType*/ true); + } else if (kind == AR_OBJECT_EMPTY_NODE_OUTPUT_ARRAY) { + assert(emptyNodeOutputDecl != nullptr); + recordDecl = DeclareNodeOutputArray(*m_context, "EmptyNodeOutputArray", + /* ItemType */ emptyNodeOutputDecl, + /*IsRecordTypeTemplate*/ false, + /*IsCompleteType*/ true); + } else if (kind == AR_OBJECT_GROUP_NODE_OUTPUT_RECORDS) { + recordDecl = m_GroupNodeOutputRecordsTemplateDecl->getTemplatedDecl(); + } else if (kind == AR_OBJECT_THREAD_NODE_OUTPUT_RECORDS) { + recordDecl = m_ThreadNodeOutputRecordsTemplateDecl->getTemplatedDecl(); } #ifdef ENABLE_SPIRV_CODEGEN else if (kind == AR_OBJECT_VK_SPV_INTRINSIC_TYPE && m_vkNSDecl) { recordDecl = DeclareUIntTemplatedTypeWithHandleInDeclContext( *m_context, m_vkNSDecl, typeName, "id"); recordDecl->setImplicit(true); - } - else if (kind == AR_OBJECT_VK_SPV_INTRINSIC_RESULT_ID && m_vkNSDecl) { + } else if (kind == AR_OBJECT_VK_SPV_INTRINSIC_RESULT_ID && m_vkNSDecl) { recordDecl = DeclareTemplateTypeWithHandleInDeclContext(*m_context, m_vkNSDecl, typeName, 1, @@ -3773,7 +4225,8 @@ class HLSLExternalSource : public ExternalSemaSource { } #endif else if (templateArgCount == 0) { - recordDecl = DeclareRecordTypeWithHandle(*m_context, typeName); + recordDecl = DeclareRecordTypeWithHandle(*m_context, typeName, + /*isCompleteType*/ false); } else { DXASSERT(templateArgCount == 1 || templateArgCount == 2, "otherwise a new case has been added"); @@ -3991,6 +4444,14 @@ class HLSLExternalSource : public ExternalSemaSource { return IsRayQueryBasicKind(GetTypeElementKind(type)); } + + bool IsWaveMatrixBasicKind(ArBasicKind kind) { + return kind >= AR_OBJECT_WAVE_MATRIX_LEFT && kind <= AR_OBJECT_WAVE_MATRIX_ACCUMULATOR; + } + bool IsWaveMatrixType(QualType type) { + return IsWaveMatrixBasicKind(GetTypeElementKind(type)); + } + void WarnMinPrecision(QualType Type, SourceLocation Loc) { Type = Type->getCanonicalTypeUnqualified(); if (IsVectorType(m_sema, Type) || IsMatrixType(m_sema, Type)) { @@ -4366,7 +4827,7 @@ class HLSLExternalSource : public ExternalSemaSource { ArBasicKind kind = g_ArBasicKindsAsTypes[i]; const char *typeName = g_ArBasicTypeNames[kind]; uint8_t templateArgCount = g_ArBasicKindsTemplateCount[i]; - DXASSERT(templateArgCount <= 2, "otherwise a new case has been added"); + DXASSERT(templateArgCount <= 3, "otherwise a new case has been added"); int startDepth = (templateArgCount == 0) ? 0 : 1; CXXRecordDecl *recordDecl = m_objectTypeDecls[i]; if (recordDecl == nullptr) { @@ -4518,6 +4979,27 @@ class HLSLExternalSource : public ExternalSemaSource { case AR_OBJECT_TRIANGLE_INTERSECTION_ATTRIBUTES: case AR_OBJECT_RWTEXTURE2DMS: case AR_OBJECT_RWTEXTURE2DMS_ARRAY: + + case AR_OBJECT_WAVE_MATRIX_LEFT: + case AR_OBJECT_WAVE_MATRIX_RIGHT: + case AR_OBJECT_WAVE_MATRIX_LEFT_COL_ACC: + case AR_OBJECT_WAVE_MATRIX_RIGHT_ROW_ACC: + case AR_OBJECT_WAVE_MATRIX_ACCUMULATOR: + + + case AR_OBJECT_EMPTY_NODE_INPUT: + case AR_OBJECT_DISPATCH_NODE_INPUT_RECORD: + case AR_OBJECT_RWDISPATCH_NODE_INPUT_RECORD: + case AR_OBJECT_GROUP_NODE_INPUT_RECORDS: + case AR_OBJECT_RWGROUP_NODE_INPUT_RECORDS: + case AR_OBJECT_THREAD_NODE_INPUT_RECORD: + case AR_OBJECT_RWTHREAD_NODE_INPUT_RECORD: + case AR_OBJECT_NODE_OUTPUT: + case AR_OBJECT_EMPTY_NODE_OUTPUT: + case AR_OBJECT_NODE_OUTPUT_ARRAY: + case AR_OBJECT_EMPTY_NODE_OUTPUT_ARRAY: + case AR_OBJECT_THREAD_NODE_OUTPUT_RECORDS: + case AR_OBJECT_GROUP_NODE_OUTPUT_RECORDS: { const ArBasicKind* match = std::find(g_ArBasicKindsAsTypes, &g_ArBasicKindsAsTypes[_countof(g_ArBasicKindsAsTypes)], kind); DXASSERT(match != &g_ArBasicKindsAsTypes[_countof(g_ArBasicKindsAsTypes)], "otherwise can't find constant in basic kinds"); @@ -4626,6 +5108,7 @@ class HLSLExternalSource : public ExternalSemaSource { /// On success, argTypes includes the clang Types to use for the signature, with the first being the return type. bool MatchArguments( const IntrinsicDefIter &cursor, + _In_ QualType objectType, _In_ QualType objectElement, _In_ QualType functionTemplateTypeArg, _In_ ArrayRef Args, @@ -4739,7 +5222,7 @@ class HLSLExternalSource : public ExternalSemaSource { std::vector functionArgTypes; size_t badArgIdx; - bool argsMatch = MatchArguments(cursor, QualType(), QualType(), Args, &functionArgTypes, badArgIdx); + bool argsMatch = MatchArguments(cursor, QualType(), QualType(), QualType(), Args, &functionArgTypes, badArgIdx); if (!functionArgTypes.size()) return false; @@ -4802,6 +5285,12 @@ class HLSLExternalSource : public ExternalSemaSource { // Initializing built in integers for ray tracing AddRaytracingConstants(*m_context); AddSamplerFeedbackConstants(*m_context); + AddBarrierConstants(*m_context); + + AddHLSLNodeOutputRecordTemplate(*m_context, "GroupNodeOutputRecords", + &m_GroupNodeOutputRecordsTemplateDecl, /* isCompleteType */ false); + AddHLSLNodeOutputRecordTemplate(*m_context, "ThreadNodeOutputRecords", + &m_ThreadNodeOutputRecordsTemplateDecl, /* isCompleteType */ false); return true; } @@ -5042,12 +5531,51 @@ class HLSLExternalSource : public ExternalSemaSource { if (!recordType->getDecl()->isCompleteDefinition()) { m_sema->Diag(argSrcLoc, diag::err_typecheck_decl_incomplete_type) << argType; - return true; } } } return false; + + } else if (templateName == "DispatchNodeInputRecord" || + templateName == "RWDispatchNodeInputRecord" || + templateName == "GroupNodeInputRecords" || + templateName == "RWGroupNodeInputRecords" || + templateName == "ThreadNodeInputRecord" || + templateName == "RWThreadNodeInputRecord" || + templateName == "NodeOutput" || + templateName == "NodeOutputArray" || + templateName == "GroupNodeOutputRecords" || + templateName == "ThreadNodeOutputRecords") { + + DXASSERT(TemplateArgList.size() == 1, + "otherwise the template has not been declared properly"); + // The first argument must be a user defined struct type that does not + // contain any HLSL object + const TemplateArgumentLoc &argLoc = TemplateArgList[0]; + const TemplateArgument &arg = argLoc.getArgument(); + + if (arg.getKind() == TemplateArgument::ArgKind::Template) { + TemplateDecl *templateDecl = arg.getAsTemplate().getAsTemplateDecl(); + SourceLocation argSrcLoc = argLoc.getLocation(); + m_sema->Diag(argSrcLoc, + diag::err_hlsl_typeintemplateargument_requires_struct) + << templateDecl->getName(); + return true; + } + QualType argType = arg.getAsType(); + UINT count; + // We skip the empty struct case here as a more specific diagnostic for + // that case is generated later + if ((GetTypeObjectKind(argType) != AR_TOBJ_COMPOUND) || + (!IsTypeNumeric(argType, &count) && count > 0)) { + SourceLocation argSrcLoc = argLoc.getLocation(); + m_sema->Diag(argSrcLoc, + diag::err_hlsl_typeintemplateargument_requires_struct) + << argType; + return true; + } + return false; } bool isMatrix = Template->getCanonicalDecl() == @@ -5277,8 +5805,7 @@ class HLSLExternalSource : public ExternalSemaSource { int startDepth = 0; if (templateArgCount > 0) { - DXASSERT(templateArgCount == 1 || templateArgCount == 2, - "otherwise a new case has been added"); + DXASSERT(templateArgCount <= 3, "otherwise a new case has been added"); ClassTemplateDecl *typeDecl = recordDecl->getDescribedClassTemplate(); AddObjectSubscripts(kind, typeDecl, recordDecl, g_ArBasicKindsSubscripts[idx]); @@ -5907,9 +6434,50 @@ HLSLExternalSource::IsValidObjectElement(LPCSTR tableName, const IntrinsicOp op, } } +// Given component type of wave matrix object on which a method is called, +// and given the component type of an argument passed by the user, +// return either the user component type, or a valid component type, +// if the user component type is not valid. +static ArBasicKind GetValidWaveMatrixComponentTypeForArg( + ArBasicKind objKind, // wave matrix type for this + ArBasicKind objEltKind, // element type for this + ArBasicKind argKind, // wave matrix type for arg + ArBasicKind argEltKind) { // element type for arg + if (IS_BASIC_WAVE_MATRIX_ACC(objKind) && + IS_BASIC_WAVE_MATRIX_INPUT(argKind)) { + switch (objEltKind) { + case AR_BASIC_FLOAT32: + switch (argEltKind) { + case AR_BASIC_FLOAT32: + case AR_BASIC_FLOAT16: + return argEltKind; + default: + break; + } + // return a valid type (this will be used for error message) + return AR_BASIC_FLOAT32; + case AR_BASIC_INT32: + switch (argEltKind) { + case AR_BASIC_INT8_4PACKED: + case AR_BASIC_UINT8_4PACKED: + return argEltKind; + default: + break; + } + // return a valid type (this will be used for error message) + return AR_BASIC_INT8_4PACKED; + default: + break; + } + } + // In other cases, we return this element kind. + return objEltKind; +} + _Use_decl_annotations_ bool HLSLExternalSource::MatchArguments( const IntrinsicDefIter &cursor, + QualType objectType, QualType objectElement, QualType functionTemplateTypeArg, ArrayRef Args, @@ -6053,6 +6621,7 @@ bool HLSLExternalSource::MatchArguments( case AR_TOBJ_BASIC: case AR_TOBJ_OBJECT: case AR_TOBJ_STRING: + case AR_TOBJ_ARRAY: break; default: badArgIdx = std::min(badArgIdx, iArg); // no struct, arrays or void @@ -6084,6 +6653,12 @@ bool HLSLExternalSource::MatchArguments( } } + // Process component type from object element after loop + if (pIntrinsicArg->uComponentTypeId == INTRIN_COMPTYPE_FROM_TYPE_ELT0) { + ++iArg; + continue; + } + DXASSERT( pIntrinsicArg->uComponentTypeId < MaxIntrinsicArgs, "otherwise intrinsic table was modified and MaxIntrinsicArgs was not updated (or uComponentTypeId is out of bounds)"); @@ -6155,9 +6730,11 @@ bool HLSLExternalSource::MatchArguments( "otherwise the argument list wasn't fully processed"); // Default template and component type for return value - if (pIntrinsic->pArgs[0].qwUsage - && pIntrinsic->pArgs[0].uTemplateId != INTRIN_TEMPLATE_FROM_TYPE - && pIntrinsic->pArgs[0].uTemplateId != INTRIN_TEMPLATE_FROM_FUNCTION) { + if (pIntrinsic->pArgs[0].qwUsage && + pIntrinsic->pArgs[0].uTemplateId != INTRIN_TEMPLATE_FROM_TYPE && + pIntrinsic->pArgs[0].uTemplateId != INTRIN_TEMPLATE_FROM_FUNCTION && + pIntrinsic->pArgs[0].uComponentTypeId != + INTRIN_COMPTYPE_FROM_NODEOUTPUT) { CAB(pIntrinsic->pArgs[0].uTemplateId < MaxIntrinsicArgs, 0); if (AR_TOBJ_UNKNOWN == Template[pIntrinsic->pArgs[0].uTemplateId]) { Template[pIntrinsic->pArgs[0].uTemplateId] = @@ -6374,8 +6951,13 @@ bool HLSLExternalSource::MatchArguments( else if (pArgument->uLegalComponentTypes == LICOMPTYPE_TEXTURE2D || pArgument->uLegalComponentTypes == LICOMPTYPE_TEXTURE2DARRAY) { pNewType = Args[i - 1]->getType().getNonReferenceType(); - } - else { + } else if (pArgument->uLegalComponentTypes == + LICOMPTYPE_NODE_RECORD_OR_UAV) { + pNewType = Args[i - 1]->getType().getNonReferenceType(); + } else if (pArgument->uLegalComponentTypes == + LICOMPTYPE_ANY_NODE_OUTPUT_RECORD) { + pNewType = Args[i - 1]->getType().getNonReferenceType(); + } else { ArBasicKind pEltType; // ComponentType, if the Id is special then it gets the @@ -6393,8 +6975,29 @@ bool HLSLExternalSource::MatchArguments( badArgIdx = std::min(badArgIdx, i); return false; } - } - else { + } else if (pArgument->uComponentTypeId == + INTRIN_COMPTYPE_FROM_NODEOUTPUT) { + ClassTemplateDecl *templateDecl = nullptr; + if (pArgument->uLegalComponentTypes == + LICOMPTYPE_GROUP_NODE_OUTPUT_RECORDS) + templateDecl = m_GroupNodeOutputRecordsTemplateDecl; + else if (pArgument->uLegalComponentTypes == + LICOMPTYPE_THREAD_NODE_OUTPUT_RECORDS) + templateDecl = m_ThreadNodeOutputRecordsTemplateDecl; + else { + assert(false && "unexpected comp type"); + } + + CXXRecordDecl *recordDecl = templateDecl->getTemplatedDecl(); + if (!recordDecl->isCompleteDefinition()) { + CompleteType(recordDecl); + } + + pNewType = GetOrCreateNodeOutputRecordSpecialization( + *m_context, m_sema, templateDecl, objectElement); + argTypes[i] = QualType(pNewType.getTypePtr(), quals); + continue; + } else { pEltType = ComponentType[pArgument->uComponentTypeId]; DXASSERT_VALIDBASICKIND(pEltType); } @@ -6425,13 +7028,89 @@ bool HLSLExternalSource::MatchArguments( CAB(uCols > 0 && uCols <= MaxVectorSize && uRows > 0 && uRows <= MaxVectorSize, i); // Const - UINT64 qwQual = pArgument->qwUsage & (AR_QUAL_ROWMAJOR | AR_QUAL_COLMAJOR); + UINT64 qwQual = + pArgument->qwUsage & + (AR_QUAL_ROWMAJOR | AR_QUAL_COLMAJOR | AR_QUAL_GROUPSHARED); if ((0 == i) || !(pArgument->qwUsage & AR_QUAL_OUT)) qwQual |= AR_QUAL_CONST; - DXASSERT_VALIDBASICKIND(pEltType); - pNewType = NewSimpleAggregateType(Template[pArgument->uTemplateId], pEltType, qwQual, uRows, uCols); + // If the type is WaveMatrix, construct a template specialization based + // on the template arguments of this wave matrix object in a special way. + if (IsWaveMatrixBasicKind(pEltType)) { + CXXRecordDecl *templateRecordDecl = + GetBasicKindType(pEltType)->getAsCXXRecordDecl(); + if (!templateRecordDecl->isCompleteDefinition()) { + // If template definition is not completed, no instantiations exist, + // so we can assume this candiate does not apply. + badArgIdx = std::min(badArgIdx, i); + return false; + } + + // read template args of objectType + ArTypeInfo objInfo; + CollectInfo(objectType, &objInfo); + ArTypeInfo argInfo; + CollectInfo(Args[i - 1]->getType(), &argInfo); + ArBasicKind eltKind = GetValidWaveMatrixComponentTypeForArg( + objInfo.ObjKind, objInfo.EltKind, argInfo.ObjKind, argInfo.EltKind); + QualType compType = GetBasicKindType(eltKind); + + // Now construct the expected argument specialization + TemplateArgument templateArgs[3] = { + TemplateArgument(compType), + TemplateArgument(*m_context, + llvm::APSInt(llvm::APInt(32, objInfo.uRows)), + m_context->UnsignedIntTy), + TemplateArgument(*m_context, + llvm::APSInt(llvm::APInt(32, objInfo.uCols)), + m_context->UnsignedIntTy)}; + pNewType = GetOrCreateTemplateSpecialization( + *m_context, *m_sema, + templateRecordDecl->getDescribedClassTemplate(), templateArgs); + } else { + DXASSERT_VALIDBASICKIND(pEltType); + pNewType = NewSimpleAggregateType(Template[pArgument->uTemplateId], + pEltType, qwQual, uRows, uCols); + + // If array type, wrap in the argument's array type. + if (i > 0 && Template[pArgument->uTemplateId] == AR_TOBJ_ARRAY) { + QualType arrayElt = Args[i - 1]->getType(); + SmallVector sizes; + while (arrayElt->isArrayType()) { + UINT size = 0; + if (arrayElt->isConstantArrayType()) { + const ConstantArrayType *arrayType = + (const ConstantArrayType *)arrayElt->getAsArrayTypeUnsafe(); + size = arrayType->getSize().getLimitedValue(); + } + arrayElt = QualType(arrayElt->getAsArrayTypeUnsafe() + ->getArrayElementTypeNoTypeQual(), + 0); + sizes.push_back(size); + } + // Wrap element in matching array dimensions: + while (sizes.size()) { + uint64_t size = sizes.pop_back_val(); + if (size) { + pNewType = m_context->getConstantArrayType( + pNewType, llvm::APInt(32, size, false), + ArrayType::ArraySizeModifier::Normal, 0); + } else { + pNewType = m_context->getIncompleteArrayType( + pNewType, ArrayType::ArraySizeModifier::Normal, 0); + } + } + if (qwQual & AR_QUAL_CONST) + pNewType = QualType(pNewType.getTypePtr(), Qualifiers::Const); + + if (qwQual & AR_QUAL_GROUPSHARED) + pNewType = + m_context->getAddrSpaceQualType(pNewType, DXIL::kTGSMAddrSpace); + + pNewType = m_context->getLValueReferenceType(pNewType); + } + } } DXASSERT(!pNewType.isNull(), "otherwise there's a branch in this function that fails to assign this"); @@ -6731,10 +7410,19 @@ void HLSLExternalSource::CollectInfo(QualType type, ArTypeInfo* pTypeInfo) // Try to inline that here, making it cheaper to use this function // when retrieving multiple properties. pTypeInfo->ObjKind = GetTypeElementKind(type); - pTypeInfo->EltTy = GetTypeElementType(type)->getCanonicalTypeUnqualified()->getTypePtr(); - pTypeInfo->EltKind = pTypeInfo->ObjKind; pTypeInfo->ShapeKind = GetTypeObjectKind(type); - GetRowsAndColsForAny(type, pTypeInfo->uRows, pTypeInfo->uCols); + if (IsWaveMatrixBasicKind(pTypeInfo->ObjKind)) { + QualType elTy; + GetWaveMatrixTemplateValues(type, &elTy, &pTypeInfo->uRows, + &pTypeInfo->uCols); + pTypeInfo->EltKind = GetTypeElementKind(elTy); + pTypeInfo->EltTy = pTypeInfo->EltTy = GetStructuralForm(elTy).getTypePtr(); + } else { + GetRowsAndColsForAny(type, pTypeInfo->uRows, pTypeInfo->uCols); + pTypeInfo->EltKind = pTypeInfo->ObjKind; + pTypeInfo->EltTy = + GetTypeElementType(type)->getCanonicalTypeUnqualified()->getTypePtr(); + } pTypeInfo->uTotalElts = pTypeInfo->uRows * pTypeInfo->uCols; } @@ -8259,12 +8947,13 @@ ExprResult HLSLExternalSource::LookupArrayMemberExprForHLSL( ExprResult HLSLExternalSource::MaybeConvertMemberAccess(_In_ clang::Expr* E) { DXASSERT_NOMSG(E != nullptr); - if (IsHLSLBufferViewType(E->getType())) { - QualType targetType = - m_context->getConstType(hlsl::GetHLSLResourceResultType(E->getType())); + if (IsHLSLObjectWithImplicitMemberAccess(E->getType())) { + QualType targetType = hlsl::GetHLSLResourceResultType(E->getType()); + if (IsHLSLObjectWithImplicitROMemberAccess(E->getType())) + targetType = m_context->getConstType(targetType); return ImplicitCastExpr::Create(*m_context, targetType, - CastKind::CK_FlatConversion, E, nullptr, - E->getValueKind()); + CastKind::CK_FlatConversion, E, nullptr, + E->getValueKind()); } ArBasicKind basic = GetTypeElementKind(E->getType()); if (!IS_BASIC_PRIMITIVE(basic)) { @@ -9914,6 +10603,9 @@ Sema::TemplateDeductionResult HLSLExternalSource::DeduceTemplateArgumentsForHLSL DXASSERT(functionParentRecord != nullptr, "otherwise function is orphaned"); QualType objectElement = GetFirstElementTypeFromDecl(functionParentRecord); + // Preserve full object type for special cases in method matching + QualType objectType = m_context->getTagDeclType(functionParentRecord); + QualType functionTemplateTypeArg {}; if (ExplicitTemplateArgs != nullptr && ExplicitTemplateArgs->size() == 1) { const TemplateArgument &firstTemplateArg = (*ExplicitTemplateArgs)[0].getArgument(); @@ -9986,7 +10678,7 @@ Sema::TemplateDeductionResult HLSLExternalSource::DeduceTemplateArgumentsForHLSL while (cursor != end) { size_t badArgIdx; - if (!MatchArguments(cursor, objectElement, functionTemplateTypeArg, Args, &argTypes, badArgIdx)) + if (!MatchArguments(cursor, objectType, objectElement, functionTemplateTypeArg, Args, &argTypes, badArgIdx)) { ++cursor; continue; @@ -10172,6 +10864,8 @@ void GetUnsignedLimit(ArBasicKind basicKind, uint64_t* maxValue) case AR_BASIC_UINT16: *maxValue = UINT16_MAX; return; case AR_BASIC_UINT32: *maxValue = UINT32_MAX; return; case AR_BASIC_UINT64: *maxValue = UINT64_MAX; return; + case AR_BASIC_UINT8_4PACKED: + case AR_BASIC_INT8_4PACKED: *maxValue = UINT32_MAX; return; default: // No other unsigned int types. break; @@ -10622,6 +11316,158 @@ static NameLookup GetSingleFunctionDeclByName(clang::Sema *self, StringRef Name, return NameLookup{ pFoundDecl, nullptr }; } +// Work Graph checks: +// - intrinsics are only called by nodes with appropriate launch types +class WorkGraphVisitor : public RecursiveASTVisitor { +private: + Sema &S; + // Current Launch Node + StringRef nodeLaunchType; + StringRef funcName; + SourceLocation nodeLaunchLoc; + SourceLocation computeLoc; + SourceLocation nodeLoc; + +public: + WorkGraphVisitor(Sema &S) : S(S) {} + + void diagnose(TranslationUnitDecl *TU) { TraverseTranslationUnitDecl(TU); } + + bool VisitFunctionDecl(FunctionDecl *Decl) { + nodeLaunchType = StringRef(); + funcName = StringRef(); + nodeLaunchLoc = SourceLocation(); + computeLoc = SourceLocation(); + nodeLoc = SourceLocation(); + + // a function may be both compute and work-graph node + for (auto *pAttr : Decl->specific_attrs()) { + DXIL::ShaderKind shaderKind = + ShaderModel::KindFromFullName(pAttr->getStage()); + if (shaderKind == DXIL::ShaderKind::Node) { + nodeLoc = pAttr->getLocation(); + } else if (shaderKind == DXIL::ShaderKind::Compute) { + computeLoc = pAttr->getLocation(); + } + } + // if this isn't a work-graph node we can quit now + if (!nodeLoc.isValid()) + return false; + + // nodes will always have a name - we'll save it for use in diagnostics + funcName = Decl->getName(); + + // save NodeLaunch type for use later + if (auto NodeLaunch = Decl->getAttr()) { + nodeLaunchType = NodeLaunch->getLaunchType(); + nodeLaunchLoc = NodeLaunch->getLocation(); + } else { + nodeLaunchType = "Broadcasting"; + nodeLaunchLoc = SourceLocation(); + } + + // If this is both a compute shader and work-graph node, it may only have broadcasting launch mode + if (computeLoc.isValid() && !nodeLaunchType.equals_lower("broadcasting")) { + S.Diags.Report(nodeLaunchLoc, diag::err_hlsl_compute_compatibility) + << funcName << nodeLaunchType.lower() + " launch type"; + S.Diags.Report(computeLoc, diag::note_defined_here) << "compute"; + // ignore other compute incompatibilities (i.e. input/output records) + computeLoc = SourceLocation(); + } + + // Check that a Thread node has thread group size (1,1,1) + if (nodeLaunchType.equals_lower("thread")) { + if (auto NumThreads = Decl->getAttr()) { + if (NumThreads->getX() != 1 || NumThreads->getY() != 1 || + NumThreads->getZ() != 1) { + S.Diags.Report(NumThreads->getLocation(), + diag::err_hlsl_wg_thread_launch_group_size); + // Only output the note if the source location is valid + if (nodeLaunchLoc.isValid()) + S.Diags.Report(nodeLaunchLoc, diag::note_defined_here) + << "Launch type"; + } + } + } + + return true; + } + + bool NodeInputIsCompatible(StringRef& typeName, StringRef& launchName) { + return llvm::StringSwitch(typeName) + .Case("DispatchNodeInputRecord", launchName.equals_lower("broadcasting")) + .Case("RWDispatchNodeInputRecord", launchName.equals_lower("broadcasting")) + .Case("GroupNodeInputRecords", launchName.equals_lower("coalescing")) + .Case("RWGroupNodeInputRecords", launchName.equals_lower("coalescing")) + .Case("EmptyNodeInput", launchName.equals_lower("coalescing")) + .Case("ThreadNodeInputRecord", launchName.equals_lower("thread")) + .Case("RWThreadNodeInputRecord", launchName.equals_lower("thread")) + .Default(false); + } + + bool VisitParmVarDecl(ParmVarDecl *P) { + // compute is incompatible with node input/output + if (computeLoc.isValid() && hlsl::IsHLSLNodeType(P->getType())) { + S.Diags.Report(P->getLocation(), diag::err_hlsl_compute_compatibility) + << funcName << "node input/output" << P->getSourceRange(); + S.Diags.Report(computeLoc, diag::note_defined_here) << "compute"; + // ignore any other errors + return true; + } + // Check any node input is compatible with the node launch type + if (hlsl::IsHLSLNodeInputType(P->getType())) { + const RecordType* RT = P->getType()->getAs(); + StringRef typeName = RT->getDecl()->getName(); + if (!NodeInputIsCompatible(typeName, nodeLaunchType)) { + S.Diags.Report(P->getLocation(), diag::err_hlsl_wg_input_kind) << typeName + << nodeLaunchType.lower() << P->getSourceRange(); + if (nodeLaunchLoc.isValid()) { + S.Diags.Report(nodeLaunchLoc, diag::note_defined_here) << "Launch type"; + } + } + } + + return true; + } + + bool VisitCallExpr(CallExpr *C) { + + if (FunctionDecl *FD = C->getDirectCallee()) { + if (FD->hasAttr()) { + // this is a call to a HLSL intrinsic FinishedCrossGroupSharing + hlsl::IntrinsicOp opCode = + (IntrinsicOp)FD->getAttr()->getOpcode(); + if (opCode == hlsl::IntrinsicOp::MOP_FinishedCrossGroupSharing) { + const CXXMethodDecl *MD = cast(FD); + const CXXRecordDecl *NodeRecDecl = MD->getParent(); + // Node I/O records are templateTypes + const ClassTemplateSpecializationDecl *templateDecl = + cast(NodeRecDecl); + auto &TemplateArgs = templateDecl->getTemplateArgs(); + DXASSERT(TemplateArgs.size() == 1, + "Input record types need to have one template argument"); + auto &Rec = TemplateArgs.get(0); + clang::QualType RecType = Rec.getAsType(); + RecordDecl *RD = RecType->getAs()->getDecl(); + if (!RD->hasAttr()) + S.Diags.Report(C->getLocStart(), + diag::err_hlsl_wg_nodetrackrwinputsharing_missing); + } + } + } + return true; + } +}; + +namespace hlsl { + +void DiagnoseWorkGraphConstraints(clang::Sema &S, + clang::TranslationUnitDecl *TU) { + WorkGraphVisitor visitor(S); + visitor.diagnose(TU); +} +} // namespace hlsl + void hlsl::DiagnoseTranslationUnit(clang::Sema *self) { DXASSERT_NOMSG(self != nullptr); @@ -10643,6 +11489,9 @@ void hlsl::DiagnoseTranslationUnit(clang::Sema *self) { } } + // Check constraints for work graphs + DiagnoseWorkGraphConstraints(*self, self->getASTContext().getTranslationUnitDecl()); + // Don't check entry function for library. if (self->getLangOpts().IsHLSLLibrary) { // TODO: validate no recursion start from every function. @@ -11062,7 +11911,7 @@ bool hlsl::ShouldSkipNRVO(clang::Sema& sema, clang::QualType returnType, clang:: ArrayEltTy = AT->getElementType(); } // exclude resource for globallycoherent. - if (hlsl::IsHLSLResourceType(ArrayEltTy)) + if (hlsl::IsHLSLResourceType(ArrayEltTy) || hlsl::IsHLSLNodeType(ArrayEltTy)) return true; // exclude precise. if (VD->hasAttr()) { @@ -11794,7 +12643,8 @@ template static EnumType ValidateAttributeEnumArg(Sema &S, const AttributeList &Attr, EnumType defaultValue, - unsigned index = 0) { + unsigned index = 0, + bool isCaseSensitive = true) { EnumType value(defaultValue); StringRef Str = ""; SourceLocation ArgLoc; @@ -11803,7 +12653,9 @@ static EnumType ValidateAttributeEnumArg(Sema &S, const AttributeList &Attr, if (!S.checkStringLiteralArgumentAttr(Attr, 0, Str, &ArgLoc)) return value; - if (!ConvertStrToEnumType(Str, value)) { + std::string str = isCaseSensitive ? Str.str() : Str.lower(); + + if (!ConvertStrToEnumType(str, value)) { S.Diag(Attr.getLoc(), diag::warn_attribute_type_not_supported) << Attr.getName() << Str << ArgLoc; } @@ -11998,8 +12850,10 @@ static void ValidateAttributeOnSwitchOrIf(Sema& S, Stmt* St, const AttributeList } } -static StringRef ValidateAttributeStringArg(Sema& S, const AttributeList &A, _In_opt_z_ const char* values, unsigned index = 0) -{ +static StringRef ValidateAttributeStringArg(Sema &S, const AttributeList &A, + _In_opt_z_ const char *values, + unsigned index = 0, + bool isCaseSensitive = true) { // values is an optional comma-separated list of potential values. if (A.getNumArgs() <= index) return StringRef(); @@ -12014,6 +12868,9 @@ static StringRef ValidateAttributeStringArg(Sema& S, const AttributeList &A, _In StringLiteral* sl = cast(E); StringRef result = sl->getString(); + std::string cmpstr = sl->getString(); + if (!isCaseSensitive) + cmpstr = sl->getString().lower(); // Return result with no additional validation. if (values == nullptr) @@ -12027,8 +12884,8 @@ static StringRef ValidateAttributeStringArg(Sema& S, const AttributeList &A, _In DXASSERT_NOMSG(*value != ','); // no leading commas in values // Look for a match. - const char* argData = result.data(); - size_t argDataLen = result.size(); + const char* argData = cmpstr.c_str(); + size_t argDataLen = cmpstr.size(); while (argDataLen != 0 && *argData == *value && *value) { @@ -12114,6 +12971,43 @@ void Sema::DiagnoseGloballyCoherentMismatch(const Expr *SrcExpr, } } +void ValidateDispatchGridValues(DiagnosticsEngine &Diags, const AttributeList &A, Attr *declAttr) { + unsigned x = 1, y = 1, z = 1; + if (HLSLNodeDispatchGridAttr *pA = dyn_cast(declAttr)) { + x = pA->getX(); + y = pA->getY(); + z = pA->getZ(); + } else if (HLSLNodeMaxDispatchGridAttr *pA = dyn_cast(declAttr)) { + x = pA->getX(); + y = pA->getY(); + z = pA->getZ(); + } else { + llvm_unreachable("ValidateDispatchGridValues() called for wrong attribute"); + } + static const unsigned MaxComponentValue = 65535; // 2^16 - 1 + static const unsigned MaxProductValue = 16777215; // 2^24 - 1 + // If a component is out of range, we reset it to 0 to avoid also generating + // a secondary error if the product would be out of range + if (x < 1 || x > MaxComponentValue) { + Diags.Report(A.getArgAsExpr(0)->getExprLoc(), diag::err_hlsl_dispatchgrid_component) + << A.getName() << "X" << A.getRange(); + x = 0; + } + if (y < 1 || y > MaxComponentValue) { + Diags.Report(A.getArgAsExpr(1)->getExprLoc(), diag::err_hlsl_dispatchgrid_component) + << A.getName() << "Y" << A.getRange(); + y = 0; + } + if(z < 1 || z > MaxComponentValue) { + Diags.Report(A.getArgAsExpr(2)->getExprLoc(), diag::err_hlsl_dispatchgrid_component) + << A.getName() << "Z" << A.getRange(); + z = 0; + } + if (x * y * z > MaxProductValue) + Diags.Report(A.getLoc(), diag::err_hlsl_dispatchgrid_product) + << A.getName() << A.getRange(); +} + void hlsl::HandleDeclAttributeForHLSL(Sema &S, Decl *D, const AttributeList &A, bool& Handled) { DXASSERT_NOMSG(D != nullptr); @@ -12235,6 +13129,44 @@ void hlsl::HandleDeclAttributeForHLSL(Sema &S, Decl *D, const AttributeList &A, declAttr = ::new (S.Context) HLSLRayPayloadAttr( A.getRange(), S.Context, A.getAttributeSpellingListIndex()); break; + case AttributeList::AT_HLSLMaxRecords: + declAttr = ::new (S.Context) HLSLMaxRecordsAttr( + A.getRange(), S.Context, ValidateAttributeIntArg(S, A), + A.getAttributeSpellingListIndex()); + break; + case AttributeList::AT_HLSLMaxRecordsSharedWith: { + if (A.isArgIdent(0)) { + IdentifierInfo *II = A.getArgAsIdent(0)->Ident; + declAttr = ::new (S.Context) HLSLMaxRecordsSharedWithAttr( + A.getRange(), S.Context, II, A.getAttributeSpellingListIndex()); + } else { + S.Diag(A.getLoc(), diag::err_attribute_argument_n_type) + << A.getName() << 1 << AANT_ArgumentIdentifier; + // We return here to avoid falling into the default failure case and + // asserting + return; + } + break; + } + case AttributeList::AT_HLSLNodeArraySize: { + declAttr = ::new (S.Context) HLSLNodeArraySizeAttr( + A.getRange(), S.Context, ValidateAttributeIntArg(S, A), + A.getAttributeSpellingListIndex()); + break; + } + case AttributeList::AT_HLSLAllowSparseNodes: + declAttr = ::new (S.Context) HLSLAllowSparseNodesAttr( + A.getRange(), S.Context, A.getAttributeSpellingListIndex()); + break; + case AttributeList::AT_HLSLNodeId: + declAttr = ::new (S.Context) HLSLNodeIdAttr( + A.getRange(), S.Context, ValidateAttributeStringArg(S, A, nullptr, 0), + ValidateAttributeIntArg(S, A, 1), A.getAttributeSpellingListIndex()); + break; + case AttributeList::AT_HLSLNodeTrackRWInputSharing: + declAttr = ::new (S.Context) HLSLNodeTrackRWInputSharingAttr( + A.getRange(), S.Context, A.getAttributeSpellingListIndex()); + break; // SPIRV Change Starts case AttributeList::AT_VKDecorateIdExt: { if (A.getNumArgs() == 0 || !A.getArg(0).is()) { @@ -12353,7 +13285,8 @@ void hlsl::HandleDeclAttributeForHLSL(Sema &S, Decl *D, const AttributeList &A, ValidateAttributeStringArg( S, A, "compute,vertex,pixel,hull,domain,geometry,raygeneration," - "intersection,anyhit,closesthit,miss,callable,mesh,amplification"), + "intersection,anyhit,closesthit,miss,callable,mesh,amplification," + "node"), A.getAttributeSpellingListIndex()); break; case AttributeList::AT_HLSLMaxVertexCount: @@ -12381,6 +13314,50 @@ void hlsl::HandleDeclAttributeForHLSL(Sema &S, Decl *D, const AttributeList &A, case AttributeList::AT_HLSLWaveOpsIncludeHelperLanes: declAttr = ::new (S.Context) HLSLWaveOpsIncludeHelperLanesAttr(A.getRange(), S.Context, A.getAttributeSpellingListIndex()); break; + case AttributeList::AT_HLSLNodeLaunch: + declAttr = ::new (S.Context) HLSLNodeLaunchAttr( + A.getRange(), S.Context, + ValidateAttributeStringArg(S, A, "broadcasting,coalescing,thread", 0, + false /*isCaseSensitive*/), + A.getAttributeSpellingListIndex()); + break; + case AttributeList::AT_HLSLNodeIsProgramEntry: + declAttr = ::new (S.Context) HLSLNodeIsProgramEntryAttr( + A.getRange(), S.Context, A.getAttributeSpellingListIndex()); + break; + case AttributeList::AT_HLSLNodeTrackRWInputSharing: + declAttr = ::new (S.Context) HLSLNodeTrackRWInputSharingAttr( + A.getRange(), S.Context, A.getAttributeSpellingListIndex()); + break; + case AttributeList::AT_HLSLNodeLocalRootArgumentsTableIndex: + declAttr = ::new (S.Context) HLSLNodeLocalRootArgumentsTableIndexAttr( + A.getRange(), S.Context, ValidateAttributeIntArg(S, A), + A.getAttributeSpellingListIndex()); + break; + case AttributeList::AT_HLSLNodeShareInputOf: + declAttr = ::new (S.Context) HLSLNodeShareInputOfAttr( + A.getRange(), S.Context, ValidateAttributeStringArg(S, A, nullptr, 0), + ValidateAttributeIntArg(S, A, 1), A.getAttributeSpellingListIndex()); + break; + case AttributeList::AT_HLSLNodeDispatchGrid: + declAttr = ::new (S.Context) HLSLNodeDispatchGridAttr( + A.getRange(), S.Context, ValidateAttributeIntArg(S, A), + ValidateAttributeIntArg(S, A, 1), ValidateAttributeIntArg(S, A, 2), + A.getAttributeSpellingListIndex()); + ValidateDispatchGridValues(S.Diags, A, declAttr); + break; + case AttributeList::AT_HLSLNodeMaxDispatchGrid: + declAttr = ::new (S.Context) HLSLNodeMaxDispatchGridAttr( + A.getRange(), S.Context, ValidateAttributeIntArg(S, A), + ValidateAttributeIntArg(S, A, 1), ValidateAttributeIntArg(S, A, 2), + A.getAttributeSpellingListIndex()); + ValidateDispatchGridValues(S.Diags, A, declAttr); + break; + case AttributeList::AT_HLSLNodeMaxRecursionDepth: + declAttr = ::new (S.Context) HLSLNodeMaxRecursionDepthAttr( + A.getRange(), S.Context, ValidateAttributeIntArg(S, A), + A.getAttributeSpellingListIndex()); + break; default: Handled = false; break; // SPIRV Change: was return; @@ -13017,7 +13994,9 @@ bool Sema::DiagnoseHLSLDecl(Declarator &D, DeclContext *DC, Expr *BitWidth, *pCenter = nullptr, *pAnyLinear = nullptr, // first linear attribute found *pTopology = nullptr, - *pMeshModifier = nullptr; + *pMeshModifier = nullptr, + *pDispatchGrid = nullptr, + *pMaxDispatchGrid = nullptr; bool usageIn = false; bool usageOut = false; @@ -13186,6 +14165,7 @@ bool Sema::DiagnoseHLSLDecl(Declarator &D, DeclContext *DC, Expr *BitWidth, if (!isFunction) { Diag(pAttr->getLoc(), diag::err_hlsl_varmodifierna) << pAttr->getName() << declarationType << pAttr->getRange(); + result = false; } if (isStatic) { @@ -13218,6 +14198,32 @@ bool Sema::DiagnoseHLSLDecl(Declarator &D, DeclContext *DC, Expr *BitWidth, } pMeshModifier = pAttr; break; + case AttributeList::AT_HLSLNodeDispatchGrid: + if (pDispatchGrid) { + // TODO: it would be nice to diffentiate between an exact duplicate and + // conflicting values + Diag(pAttr->getLoc(), diag::warn_duplicate_attribute_exact) + << pAttr->getName() << pAttr->getRange(); + result = false; + } else { + // Note: the NodeDispatchGrid values are validated later in + // HandleDeclAttributeForHLSL() + pDispatchGrid = pAttr; + } + break; + case AttributeList::AT_HLSLNodeMaxDispatchGrid: + if (pMaxDispatchGrid) { + // TODO: it would be nice to diffentiate between an exact duplicate and + // conflicting values + Diag(pAttr->getLoc(), diag::warn_duplicate_attribute_exact) + << pAttr->getName() << pAttr->getRange(); + result = false; + } else { + // Note: the NodeMaxDispatchGrid values are validated later in + // HandleDeclAttributeForHLSL() + pMaxDispatchGrid = pAttr; + } + break; default: break; @@ -13838,6 +14844,118 @@ void hlsl::CustomPrintHLSLAttr(const clang::Attr *A, llvm::raw_ostream &Out, con Out << "payload "; break; + case clang::attr::HLSLNodeLaunch: { + Attr *noconst = const_cast(A); + HLSLNodeLaunchAttr *ACast = static_cast(noconst); + Indent(Indentation, Out); + Out << "[NodeLaunch(\"" << ACast->getLaunchType() << "\")]\n"; + break; + } + + case clang::attr::HLSLNodeIsProgramEntry: + Indent(Indentation, Out); + Out << "[NodeIsProgramEntry]\n"; + break; + + case clang::attr::HLSLNodeId: { + Attr *noconst = const_cast(A); + HLSLNodeIdAttr *ACast = static_cast(noconst); + Indent(Indentation, Out); + if (ACast->getArrayIndex() > 0) + Out << "[NodeId(\"" << ACast->getName() << "\"," << ACast->getArrayIndex() + << ")]\n"; + else + Out << "[NodeId(\"" << ACast->getName() << "\")]\n"; + break; + } + + case clang::attr::HLSLNodeLocalRootArgumentsTableIndex: { + Attr *noconst = const_cast(A); + HLSLNodeLocalRootArgumentsTableIndexAttr *ACast = + static_cast(noconst); + Indent(Indentation, Out); + Out << "[NodeLocalRootTableIndex(" << ACast->getIndex() << ")]\n"; + break; + } + + case clang::attr::HLSLNodeShareInputOf: { + Attr *noconst = const_cast(A); + HLSLNodeShareInputOfAttr *ACast = + static_cast(noconst); + Indent(Indentation, Out); + if (ACast->getArrayIndex() > 0) + Out << "[NodeShareInputOf(\"" << ACast->getName() << "\"," + << ACast->getArrayIndex() << ")]\n"; + else + Out << "[NodeShareInputOf(\"" << ACast->getName() << "\")]\n"; + break; + } + + case clang::attr::HLSLNodeTrackRWInputSharing: { + Indent(Indentation, Out); + Out << "[HLSLNodeTrackRWInputSharing]\n"; + break; + } + + case clang::attr::HLSLNodeDispatchGrid: { + Attr *noconst = const_cast(A); + HLSLNodeDispatchGridAttr *ACast = + static_cast(noconst); + Indent(Indentation, Out); + Out << "[NodeDispatchGrid(" << ACast->getX() << ", " << ACast->getY() + << ", " << ACast->getZ() << ")]\n"; + break; + } + + case clang::attr::HLSLNodeMaxDispatchGrid: { + Attr *noconst = const_cast(A); + HLSLNodeMaxDispatchGridAttr *ACast = + static_cast(noconst); + Indent(Indentation, Out); + Out << "[NodeMaxDispatchGrid(" << ACast->getX() << ", " << ACast->getY() + << ", " << ACast->getZ() << ")]\n"; + break; + } + + case clang::attr::HLSLNodeMaxRecursionDepth: { + Attr *noconst = const_cast(A); + HLSLNodeMaxRecursionDepthAttr *ACast = + static_cast(noconst); + Indent(Indentation, Out); + Out << "[NodeMaxRecursionDepth(" << ACast->getCount() << ")]\n"; + break; + } + + case clang::attr::HLSLMaxRecords: { + Attr *noconst = const_cast(A); + auto *ACast = static_cast(noconst); + Indent(Indentation, Out); + Out << "[MaxRecords(" << ACast->getMaxCount() << ")]\n"; + break; + } + case clang::attr::HLSLNodeArraySize: { + Attr *noconst = const_cast(A); + auto *ACast = static_cast(noconst); + Indent(Indentation, Out); + Out << "[NodeArraySize(" << ACast->getCount() << ")]\n"; + break; + } + + case clang::attr::HLSLMaxRecordsSharedWith: { + Attr *noconst = const_cast(A); + HLSLMaxRecordsSharedWithAttr *ACast = + static_cast(noconst); + Indent(Indentation, Out); + Out << "[MaxRecordsSharedWith(\"" << ACast->getName() << "\")]\n"; + break; + } + + case clang::attr::HLSLAllowSparseNodes: { + Indent(Indentation, Out); + Out << "[AllowSparseNodes]\n"; + break; + } + default: A->printPretty(Out, Policy); break; @@ -13897,6 +15015,19 @@ bool hlsl::IsHLSLAttr(clang::attr::Kind AttrKind) { case clang::attr::HLSLExport: case clang::attr::HLSLWaveSensitive: case clang::attr::HLSLWaveSize: + case clang::attr::HLSLMaxRecordsSharedWith: + case clang::attr::HLSLMaxRecords: + case clang::attr::HLSLNodeArraySize: + case clang::attr::HLSLAllowSparseNodes: + case clang::attr::HLSLNodeDispatchGrid: + case clang::attr::HLSLNodeMaxDispatchGrid: + case clang::attr::HLSLNodeMaxRecursionDepth: + case clang::attr::HLSLNodeId: + case clang::attr::HLSLNodeIsProgramEntry: + case clang::attr::HLSLNodeLaunch: + case clang::attr::HLSLNodeLocalRootArgumentsTableIndex: + case clang::attr::HLSLNodeShareInputOf: + case clang::attr::HLSLNodeTrackRWInputSharing: case clang::attr::VKBinding: case clang::attr::VKBuiltIn: case clang::attr::VKConstantId: diff --git a/tools/clang/lib/Sema/SemaOverload.cpp b/tools/clang/lib/Sema/SemaOverload.cpp index 7ed4bd0746..c8349d22be 100644 --- a/tools/clang/lib/Sema/SemaOverload.cpp +++ b/tools/clang/lib/Sema/SemaOverload.cpp @@ -9050,6 +9050,14 @@ static void DiagnoseBadConversion(Sema &S, OverloadCandidate *Cand, return; } } + + // HLSL Change Starts + // With some intrinsics with templated parameters we can end up here + // with the To and From types being the same - we'll return early to + // avoid a weird diagnostic. + if (ToTy == FromTy) + return; + // HLSL Change Ends // Emit the generic diagnostic and, optionally, add the hints to it. PartialDiagnostic FDiag = S.PDiag(diag::note_ovl_candidate_bad_conv); diff --git a/tools/clang/test/DXILValidation/compute_node_compatibility.hlsl b/tools/clang/test/DXILValidation/compute_node_compatibility.hlsl new file mode 100644 index 0000000000..c2a8eec337 --- /dev/null +++ b/tools/clang/test/DXILValidation/compute_node_compatibility.hlsl @@ -0,0 +1,36 @@ +// RUN: %dxc -T lib_6_8 %s +// ================================================================== +// Check that validation errors are generated when both compute and +// node are specified and node input or outputs are also present. +// The validation test will add compute to each of the following +// shaders in turn, and check for the expected error message. +// We also check that only Broadcasting nodes may be used with +// compute. +// ================================================================== + +struct RECORD { + uint a; +}; + +[Shader("node")] +[NodeLaunch("Broadcasting")] +[NodeDispatchGrid(1, 1, 1)] +[NumThreads(1,1,1)] +void node01(DispatchNodeInputRecord input) { } + +[Shader("node")] +[NodeLaunch("Broadcasting")] +[NodeDispatchGrid(2, 1, 1)] +[NumThreads(1,1,1)] +void node02(RWDispatchNodeInputRecord input) { } + +[Shader("node")] +[NodeLaunch("Broadcasting")] +[NodeDispatchGrid(3, 1, 1)] +[NumThreads(1,1,1)] +void node03(NodeOutput output) { } + +[Shader("node")] +[NodeLaunch("Coalescing")] +[NumThreads(1,1,1)] +void node04() { } diff --git a/tools/clang/test/DXILValidation/node_input_compatibility.hlsl b/tools/clang/test/DXILValidation/node_input_compatibility.hlsl new file mode 100644 index 0000000000..d1e9ce9204 --- /dev/null +++ b/tools/clang/test/DXILValidation/node_input_compatibility.hlsl @@ -0,0 +1,27 @@ +// RUN: %dxc -T lib_6_8 %s +// ================================================================== +// Check that validation errors are generated when an input record +// has a type that is invalid with the launch mode. +// The validation test replaces each of the (valid) input types in +// turn to provoke the error diagnostics. +// ================================================================== + +struct RECORD { + uint a; +}; + +[Shader("node")] +[NodeLaunch("Broadcasting")] +[NodeDispatchGrid(1, 1, 1)] +[NumThreads(1,1,1)] +void node01(DispatchNodeInputRecord input) { } + +[Shader("node")] +[NodeLaunch("Coalescing")] +[NumThreads(1,1,1)] +void node02(GroupNodeInputRecords input) { } + +[Shader("node")] +[NodeLaunch("Thread")] +[NumThreads(1,1,1)] +void node03(ThreadNodeInputRecord input) { } diff --git a/tools/clang/test/HLSL/scalar-assignments-exact-precision.hlsl b/tools/clang/test/HLSL/scalar-assignments-exact-precision.hlsl index df085b22cd..1d5f0a1279 100644 --- a/tools/clang/test/HLSL/scalar-assignments-exact-precision.hlsl +++ b/tools/clang/test/HLSL/scalar-assignments-exact-precision.hlsl @@ -478,4 +478,151 @@ unorm min10float left1125; min10float right1125; left1125 = right1125; // expe unorm min10float left1126; snorm min10float right1126; left1126 = right1126; // expected-warning {{'min10float' is promoted to 'half'}} expected-warning {{'min10float' is promoted to 'half'}} fxc-pass {{}} // unorm min10float left1127; unorm min10float right1127; left1127 = right1127; // expected-warning {{'min10float' is promoted to 'half'}} expected-warning {{'min10float' is promoted to 'half'}} fxc-pass {{}} // -} + +// Test additional types + +/* +import re +rxComments = re.compile(r'(//.*|/\*.*?\*\/)') +def strip_comments(line): + line = rxComments.sub('', line) + return line.strip() +def save_error_comments(lines): + saved = {} + for line in lines: + key = strip_comments(line) + if key and line.strip() != key: + saved[key] = line + return saved +def restore_error_comments(saved, lines): + return [saved.get(line.strip(), line) for line in lines] +def modify(lines, newlines): + return restore_error_comments(save_error_comments(lines), newlines) +def gen_code(template, combos): + return [ + template.format(left = left, right = right) + for left, right in combos] +*/ + +/* +types1 = 'uint16_t int16_t float16_t'.split() +types2 = 'uint64_t int8_t4_packed uint8_t4_packed'.split() +types = types1 + types2 +new_type_combos = [(left, right) for left in types1 for right in types2] +new_type_combos += [(left, right) for left in types2 for right in types] +*/ + +// modify(lines, gen_code('{{ {left} left; {right} right; left = right; }}', new_type_combos)) +// GENERATED_CODE:BEGIN +{ uint16_t left; uint64_t right; left = right; } /* expected-warning {{conversion from larger type 'uint64_t' to smaller type 'uint16_t', possible loss of data}} */ +{ uint16_t left; int8_t4_packed right; left = right; } /* expected-warning {{conversion from larger type 'int8_t4_packed' to smaller type 'uint16_t', possible loss of data}} */ +{ uint16_t left; uint8_t4_packed right; left = right; } /* expected-warning {{conversion from larger type 'uint8_t4_packed' to smaller type 'uint16_t', possible loss of data}} */ +{ int16_t left; uint64_t right; left = right; } /* expected-warning {{conversion from larger type 'uint64_t' to smaller type 'int16_t', possible loss of data}} */ +{ int16_t left; int8_t4_packed right; left = right; } /* expected-warning {{conversion from larger type 'int8_t4_packed' to smaller type 'int16_t', possible loss of data}} */ +{ int16_t left; uint8_t4_packed right; left = right; } /* expected-warning {{conversion from larger type 'uint8_t4_packed' to smaller type 'int16_t', possible loss of data}} */ +{ float16_t left; uint64_t right; left = right; } /* expected-warning {{conversion from larger type 'uint64_t' to smaller type 'float16_t', possible loss of data}} */ +{ float16_t left; int8_t4_packed right; left = right; } /* expected-warning {{conversion from larger type 'int8_t4_packed' to smaller type 'float16_t', possible loss of data}} */ +{ float16_t left; uint8_t4_packed right; left = right; } /* expected-warning {{conversion from larger type 'uint8_t4_packed' to smaller type 'float16_t', possible loss of data}} */ +{ uint64_t left; uint16_t right; left = right; } +{ uint64_t left; int16_t right; left = right; } +{ uint64_t left; float16_t right; left = right; } +{ uint64_t left; uint64_t right; left = right; } +{ uint64_t left; int8_t4_packed right; left = right; } +{ uint64_t left; uint8_t4_packed right; left = right; } +{ int8_t4_packed left; uint16_t right; left = right; } +{ int8_t4_packed left; int16_t right; left = right; } +{ int8_t4_packed left; float16_t right; left = right; } +{ int8_t4_packed left; uint64_t right; left = right; } /* expected-warning {{conversion from larger type 'uint64_t' to smaller type 'int8_t4_packed', possible loss of data}} */ +{ int8_t4_packed left; int8_t4_packed right; left = right; } +{ int8_t4_packed left; uint8_t4_packed right; left = right; } +{ uint8_t4_packed left; uint16_t right; left = right; } +{ uint8_t4_packed left; int16_t right; left = right; } +{ uint8_t4_packed left; float16_t right; left = right; } +{ uint8_t4_packed left; uint64_t right; left = right; } /* expected-warning {{conversion from larger type 'uint64_t' to smaller type 'uint8_t4_packed', possible loss of data}} */ +{ uint8_t4_packed left; int8_t4_packed right; left = right; } +{ uint8_t4_packed left; uint8_t4_packed right; left = right; } +// GENERATED_CODE:END + +// Constant assignments + +/* +constant_ints = '0 -1 2U 3L 4ULL 3000000000 -3000000000 10000000000'.split() +constant_floats = '0.5 -0.5F'.split() +constants = constant_ints + constant_floats +# types2 assignments already tested in scalar-assignments.hlsl +constant_assignment_combos = [(left, right) for left in types1 for right in constants] +*/ + +// Catch bugs with SemaHLSL GetUnsignedLimit/GetSignedLimit + +// modify(lines, gen_code('{{ {left} left = {right}; }}', constant_assignment_combos)) +// GENERATED_CODE:BEGIN +{ uint16_t left = 0; } +{ uint16_t left = -1; } +{ uint16_t left = 2U; } +{ uint16_t left = 3L; } +{ uint16_t left = 4ULL; } +{ uint16_t left = 3000000000; } /* expected-warning {{implicit conversion from 'literal int' to 'uint16_t' changes value from 3000000000 to 24064}} */ +{ uint16_t left = -3000000000; } /* expected-warning {{implicit conversion from 'literal int' to 'uint16_t' changes value from -3000000000 to 41472}} */ +{ uint16_t left = 10000000000; } /* expected-warning {{implicit conversion from 'literal int' to 'uint16_t' changes value from 10000000000 to 58368}} */ +{ uint16_t left = 0.5; } /* expected-warning {{implicit conversion from 'literal float' to 'uint16_t' changes value from 0.5 to 0}} */ +{ uint16_t left = -0.5F; } /* expected-warning {{conversion from larger type 'float' to smaller type 'uint16_t', possible loss of data}} expected-warning {{implicit conversion from 'float' to 'uint16_t' changes value from 0.5 to 0}} */ +{ int16_t left = 0; } +{ int16_t left = -1; } +{ int16_t left = 2U; } +{ int16_t left = 3L; } +{ int16_t left = 4ULL; } +{ int16_t left = 3000000000; } /* expected-warning {{implicit conversion from 'literal int' to 'int16_t' changes value from 3000000000 to 24064}} */ +{ int16_t left = -3000000000; } /* expected-warning {{implicit conversion from 'literal int' to 'int16_t' changes value from -3000000000 to -24064}} */ +{ int16_t left = 10000000000; } /* expected-warning {{implicit conversion from 'literal int' to 'int16_t' changes value from 10000000000 to -7168}} */ +{ int16_t left = 0.5; } /* expected-warning {{implicit conversion from 'literal float' to 'int16_t' changes value from 0.5 to 0}} */ +{ int16_t left = -0.5F; } /* expected-warning {{conversion from larger type 'float' to smaller type 'int16_t', possible loss of data}} expected-warning {{implicit conversion from 'float' to 'int16_t' changes value from 0.5 to 0}} */ +{ float16_t left = 0; } +{ float16_t left = -1; } +{ float16_t left = 2U; } +{ float16_t left = 3L; } +{ float16_t left = 4ULL; } +{ float16_t left = 3000000000; } +{ float16_t left = -3000000000; } +{ float16_t left = 10000000000; } +{ float16_t left = 0.5; } +{ float16_t left = -0.5F; } /* expected-warning {{conversion from larger type 'float' to smaller type 'float16_t', possible loss of data}} */ +// GENERATED_CODE:END + +// Catch bugs with clang type ranges when adding custom types + +// modify(lines, gen_code('{{ {left} left[2]; left[1] = {right}; }}', constant_assignment_combos)) +// GENERATED_CODE:BEGIN +{ uint16_t left[2]; left[1] = 0; } +{ uint16_t left[2]; left[1] = -1; } +{ uint16_t left[2]; left[1] = 2U; } +{ uint16_t left[2]; left[1] = 3L; } +{ uint16_t left[2]; left[1] = 4ULL; } +{ uint16_t left[2]; left[1] = 3000000000; } /* expected-warning {{implicit conversion from 'literal int' to 'uint16_t' changes value from 3000000000 to 24064}} */ +{ uint16_t left[2]; left[1] = -3000000000; } /* expected-warning {{implicit conversion from 'literal int' to 'uint16_t' changes value from -3000000000 to 41472}} */ +{ uint16_t left[2]; left[1] = 10000000000; } /* expected-warning {{implicit conversion from 'literal int' to 'uint16_t' changes value from 10000000000 to 58368}} */ +{ uint16_t left[2]; left[1] = 0.5; } /* expected-warning {{implicit conversion from 'literal float' to 'uint16_t' changes value from 0.5 to 0}} */ +{ uint16_t left[2]; left[1] = -0.5F; } /* expected-warning {{conversion from larger type 'float' to smaller type 'uint16_t', possible loss of data}} expected-warning {{implicit conversion from 'float' to 'uint16_t' changes value from 0.5 to 0}} */ +{ int16_t left[2]; left[1] = 0; } +{ int16_t left[2]; left[1] = -1; } +{ int16_t left[2]; left[1] = 2U; } +{ int16_t left[2]; left[1] = 3L; } +{ int16_t left[2]; left[1] = 4ULL; } +{ int16_t left[2]; left[1] = 3000000000; } /* expected-warning {{implicit conversion from 'literal int' to 'int16_t' changes value from 3000000000 to 24064}} */ +{ int16_t left[2]; left[1] = -3000000000; } /* expected-warning {{implicit conversion from 'literal int' to 'int16_t' changes value from -3000000000 to -24064}} */ +{ int16_t left[2]; left[1] = 10000000000; } /* expected-warning {{implicit conversion from 'literal int' to 'int16_t' changes value from 10000000000 to -7168}} */ +{ int16_t left[2]; left[1] = 0.5; } /* expected-warning {{implicit conversion from 'literal float' to 'int16_t' changes value from 0.5 to 0}} */ +{ int16_t left[2]; left[1] = -0.5F; } /* expected-warning {{conversion from larger type 'float' to smaller type 'int16_t', possible loss of data}} expected-warning {{implicit conversion from 'float' to 'int16_t' changes value from 0.5 to 0}} */ +{ float16_t left[2]; left[1] = 0; } +{ float16_t left[2]; left[1] = -1; } +{ float16_t left[2]; left[1] = 2U; } +{ float16_t left[2]; left[1] = 3L; } +{ float16_t left[2]; left[1] = 4ULL; } +{ float16_t left[2]; left[1] = 3000000000; } +{ float16_t left[2]; left[1] = -3000000000; } +{ float16_t left[2]; left[1] = 10000000000; } +{ float16_t left[2]; left[1] = 0.5; } +{ float16_t left[2]; left[1] = -0.5F; } /* expected-warning {{conversion from larger type 'float' to smaller type 'float16_t', possible loss of data}} */ +// GENERATED_CODE:END + +} \ No newline at end of file diff --git a/tools/clang/test/HLSL/scalar-assignments.hlsl b/tools/clang/test/HLSL/scalar-assignments.hlsl index e8499059db..dc97513c95 100644 --- a/tools/clang/test/HLSL/scalar-assignments.hlsl +++ b/tools/clang/test/HLSL/scalar-assignments.hlsl @@ -321,4 +321,495 @@ unorm min10float left1125; min10float right1125; left1125 = right1125; // expe unorm min10float left1126; snorm min10float right1126; left1126 = right1126; // expected-warning {{'min10float' is promoted to 'min16float'}} expected-warning {{'min10float' is promoted to 'min16float'}} fxc-pass {{}} // unorm min10float left1127; unorm min10float right1127; left1127 = right1127; // expected-warning {{'min10float' is promoted to 'min16float'}} expected-warning {{'min10float' is promoted to 'min16float'}} fxc-pass {{}} // -} + +// Test additional types + +/* +import re +rxComments = re.compile(r'(//.*|/\*.*?\*\/)') +def strip_comments(line): + line = rxComments.sub('', line) + return line.strip() +def save_error_comments(lines): + saved = {} + for line in lines: + key = strip_comments(line) + if key and line.strip() != key: + saved[key] = line + return saved +def restore_error_comments(saved, lines): + return [saved.get(line.strip(), line) for line in lines] +def modify(lines, newlines): + return restore_error_comments(save_error_comments(lines), newlines) +def gen_code(template, combos): + return [ + template.format(left = left, right = right) + for left, right in combos] +*/ + +/* +types1 = 'bool int uint dword half float double min16float min10float min16int min12int min16uint'.split() +types2 = 'int64_t uint64_t int8_t4_packed uint8_t4_packed'.split() +types = types1 + types2 +new_type_combos = [(left, right) for left in types1 for right in types2] +new_type_combos += [(left, right) for left in types2 for right in types] +*/ + +// modify(lines, gen_code('{{ {left} left; {right} right; left = right; }}', new_type_combos)) +// GENERATED_CODE:BEGIN +{ bool left; int64_t right; left = right; } /* fxc-error {{X3000: unrecognized identifier 'int64_t'}} fxc-error {{X3000: unrecognized identifier 'right'}} */ +{ bool left; uint64_t right; left = right; } /* fxc-error {{X3000: unrecognized identifier 'right'}} fxc-error {{X3000: unrecognized identifier 'uint64_t'}} */ +{ bool left; int8_t4_packed right; left = right; } /* fxc-error {{X3000: unrecognized identifier 'int8_t4_packed'}} fxc-error {{X3000: unrecognized identifier 'right'}} */ +{ bool left; uint8_t4_packed right; left = right; } /* fxc-error {{X3000: unrecognized identifier 'right'}} fxc-error {{X3000: unrecognized identifier 'uint8_t4_packed'}} */ +{ int left; int64_t right; left = right; } /* expected-warning {{conversion from larger type 'int64_t' to smaller type 'int', possible loss of data}} fxc-error {{X3000: unrecognized identifier 'int64_t'}} fxc-error {{X3000: unrecognized identifier 'right'}} */ +{ int left; uint64_t right; left = right; } /* expected-warning {{conversion from larger type 'uint64_t' to smaller type 'int', possible loss of data}} fxc-error {{X3000: unrecognized identifier 'right'}} fxc-error {{X3000: unrecognized identifier 'uint64_t'}} */ +{ int left; int8_t4_packed right; left = right; } /* fxc-error {{X3000: unrecognized identifier 'int8_t4_packed'}} fxc-error {{X3000: unrecognized identifier 'right'}} */ +{ int left; uint8_t4_packed right; left = right; } /* fxc-error {{X3000: unrecognized identifier 'right'}} fxc-error {{X3000: unrecognized identifier 'uint8_t4_packed'}} */ +{ uint left; int64_t right; left = right; } /* expected-warning {{conversion from larger type 'int64_t' to smaller type 'uint', possible loss of data}} fxc-error {{X3000: unrecognized identifier 'int64_t'}} fxc-error {{X3000: unrecognized identifier 'right'}} */ +{ uint left; uint64_t right; left = right; } /* expected-warning {{conversion from larger type 'uint64_t' to smaller type 'uint', possible loss of data}} fxc-error {{X3000: unrecognized identifier 'right'}} fxc-error {{X3000: unrecognized identifier 'uint64_t'}} */ +{ uint left; int8_t4_packed right; left = right; } /* fxc-error {{X3000: unrecognized identifier 'int8_t4_packed'}} fxc-error {{X3000: unrecognized identifier 'right'}} */ +{ uint left; uint8_t4_packed right; left = right; } /* fxc-error {{X3000: unrecognized identifier 'right'}} fxc-error {{X3000: unrecognized identifier 'uint8_t4_packed'}} */ +{ dword left; int64_t right; left = right; } /* expected-warning {{conversion from larger type 'int64_t' to smaller type 'dword', possible loss of data}} fxc-error {{X3000: unrecognized identifier 'int64_t'}} fxc-error {{X3000: unrecognized identifier 'right'}} */ +{ dword left; uint64_t right; left = right; } /* expected-warning {{conversion from larger type 'uint64_t' to smaller type 'dword', possible loss of data}} fxc-error {{X3000: unrecognized identifier 'right'}} fxc-error {{X3000: unrecognized identifier 'uint64_t'}} */ +{ dword left; int8_t4_packed right; left = right; } /* fxc-error {{X3000: unrecognized identifier 'int8_t4_packed'}} fxc-error {{X3000: unrecognized identifier 'right'}} */ +{ dword left; uint8_t4_packed right; left = right; } /* fxc-error {{X3000: unrecognized identifier 'right'}} fxc-error {{X3000: unrecognized identifier 'uint8_t4_packed'}} */ +{ half left; int64_t right; left = right; } /* expected-warning {{conversion from larger type 'int64_t' to smaller type 'half', possible loss of data}} fxc-error {{X3000: unrecognized identifier 'int64_t'}} fxc-error {{X3000: unrecognized identifier 'right'}} */ +{ half left; uint64_t right; left = right; } /* expected-warning {{conversion from larger type 'uint64_t' to smaller type 'half', possible loss of data}} fxc-error {{X3000: unrecognized identifier 'right'}} fxc-error {{X3000: unrecognized identifier 'uint64_t'}} */ +{ half left; int8_t4_packed right; left = right; } /* fxc-error {{X3000: unrecognized identifier 'int8_t4_packed'}} fxc-error {{X3000: unrecognized identifier 'right'}} */ +{ half left; uint8_t4_packed right; left = right; } /* fxc-error {{X3000: unrecognized identifier 'right'}} fxc-error {{X3000: unrecognized identifier 'uint8_t4_packed'}} */ +{ float left; int64_t right; left = right; } /* expected-warning {{conversion from larger type 'int64_t' to smaller type 'float', possible loss of data}} fxc-error {{X3000: unrecognized identifier 'int64_t'}} fxc-error {{X3000: unrecognized identifier 'right'}} */ +{ float left; uint64_t right; left = right; } /* expected-warning {{conversion from larger type 'uint64_t' to smaller type 'float', possible loss of data}} fxc-error {{X3000: unrecognized identifier 'right'}} fxc-error {{X3000: unrecognized identifier 'uint64_t'}} */ +{ float left; int8_t4_packed right; left = right; } /* fxc-error {{X3000: unrecognized identifier 'int8_t4_packed'}} fxc-error {{X3000: unrecognized identifier 'right'}} */ +{ float left; uint8_t4_packed right; left = right; } /* fxc-error {{X3000: unrecognized identifier 'right'}} fxc-error {{X3000: unrecognized identifier 'uint8_t4_packed'}} */ +{ double left; int64_t right; left = right; } /* fxc-error {{X3000: unrecognized identifier 'int64_t'}} fxc-error {{X3000: unrecognized identifier 'right'}} */ +{ double left; uint64_t right; left = right; } /* fxc-error {{X3000: unrecognized identifier 'right'}} fxc-error {{X3000: unrecognized identifier 'uint64_t'}} */ +{ double left; int8_t4_packed right; left = right; } /* fxc-error {{X3000: unrecognized identifier 'int8_t4_packed'}} fxc-error {{X3000: unrecognized identifier 'right'}} */ +{ double left; uint8_t4_packed right; left = right; } /* fxc-error {{X3000: unrecognized identifier 'right'}} fxc-error {{X3000: unrecognized identifier 'uint8_t4_packed'}} */ +{ min16float left; int64_t right; left = right; } /* expected-warning {{conversion from larger type 'int64_t' to smaller type 'min16float', possible loss of data}} fxc-error {{X3000: unrecognized identifier 'int64_t'}} fxc-error {{X3000: unrecognized identifier 'right'}} */ +{ min16float left; uint64_t right; left = right; } /* expected-warning {{conversion from larger type 'uint64_t' to smaller type 'min16float', possible loss of data}} fxc-error {{X3000: unrecognized identifier 'right'}} fxc-error {{X3000: unrecognized identifier 'uint64_t'}} */ +{ min16float left; int8_t4_packed right; left = right; } /* expected-warning {{conversion from larger type 'int8_t4_packed' to smaller type 'min16float', possible loss of data}} fxc-error {{X3000: unrecognized identifier 'int8_t4_packed'}} fxc-error {{X3000: unrecognized identifier 'right'}} */ +{ min16float left; uint8_t4_packed right; left = right; } /* expected-warning {{conversion from larger type 'uint8_t4_packed' to smaller type 'min16float', possible loss of data}} fxc-error {{X3000: unrecognized identifier 'right'}} fxc-error {{X3000: unrecognized identifier 'uint8_t4_packed'}} */ +{ min10float left; int64_t right; left = right; } /* expected-warning {{'min10float' is promoted to 'min16float'}} expected-warning {{conversion from larger type 'int64_t' to smaller type 'min10float', possible loss of data}} fxc-error {{X3000: unrecognized identifier 'int64_t'}} fxc-error {{X3000: unrecognized identifier 'right'}} */ +{ min10float left; uint64_t right; left = right; } /* expected-warning {{'min10float' is promoted to 'min16float'}} expected-warning {{conversion from larger type 'uint64_t' to smaller type 'min10float', possible loss of data}} fxc-error {{X3000: unrecognized identifier 'right'}} fxc-error {{X3000: unrecognized identifier 'uint64_t'}} */ +{ min10float left; int8_t4_packed right; left = right; } /* expected-warning {{'min10float' is promoted to 'min16float'}} expected-warning {{conversion from larger type 'int8_t4_packed' to smaller type 'min10float', possible loss of data}} fxc-error {{X3000: unrecognized identifier 'int8_t4_packed'}} fxc-error {{X3000: unrecognized identifier 'right'}} */ +{ min10float left; uint8_t4_packed right; left = right; } /* expected-warning {{'min10float' is promoted to 'min16float'}} expected-warning {{conversion from larger type 'uint8_t4_packed' to smaller type 'min10float', possible loss of data}} fxc-error {{X3000: unrecognized identifier 'right'}} fxc-error {{X3000: unrecognized identifier 'uint8_t4_packed'}} */ +{ min16int left; int64_t right; left = right; } /* expected-warning {{conversion from larger type 'int64_t' to smaller type 'min16int', possible loss of data}} fxc-error {{X3000: unrecognized identifier 'int64_t'}} fxc-error {{X3000: unrecognized identifier 'right'}} */ +{ min16int left; uint64_t right; left = right; } /* expected-warning {{conversion from larger type 'uint64_t' to smaller type 'min16int', possible loss of data}} fxc-error {{X3000: unrecognized identifier 'right'}} fxc-error {{X3000: unrecognized identifier 'uint64_t'}} */ +{ min16int left; int8_t4_packed right; left = right; } /* expected-warning {{conversion from larger type 'int8_t4_packed' to smaller type 'min16int', possible loss of data}} fxc-error {{X3000: unrecognized identifier 'int8_t4_packed'}} fxc-error {{X3000: unrecognized identifier 'right'}} */ +{ min16int left; uint8_t4_packed right; left = right; } /* expected-warning {{conversion from larger type 'uint8_t4_packed' to smaller type 'min16int', possible loss of data}} fxc-error {{X3000: unrecognized identifier 'right'}} fxc-error {{X3000: unrecognized identifier 'uint8_t4_packed'}} */ +{ min12int left; int64_t right; left = right; } /* expected-warning {{'min12int' is promoted to 'min16int'}} expected-warning {{conversion from larger type 'int64_t' to smaller type 'min12int', possible loss of data}} fxc-error {{X3000: unrecognized identifier 'int64_t'}} fxc-error {{X3000: unrecognized identifier 'right'}} */ +{ min12int left; uint64_t right; left = right; } /* expected-warning {{'min12int' is promoted to 'min16int'}} expected-warning {{conversion from larger type 'uint64_t' to smaller type 'min12int', possible loss of data}} fxc-error {{X3000: unrecognized identifier 'right'}} fxc-error {{X3000: unrecognized identifier 'uint64_t'}} */ +{ min12int left; int8_t4_packed right; left = right; } /* expected-warning {{'min12int' is promoted to 'min16int'}} expected-warning {{conversion from larger type 'int8_t4_packed' to smaller type 'min12int', possible loss of data}} fxc-error {{X3000: unrecognized identifier 'int8_t4_packed'}} fxc-error {{X3000: unrecognized identifier 'right'}} */ +{ min12int left; uint8_t4_packed right; left = right; } /* expected-warning {{'min12int' is promoted to 'min16int'}} expected-warning {{conversion from larger type 'uint8_t4_packed' to smaller type 'min12int', possible loss of data}} fxc-error {{X3000: unrecognized identifier 'right'}} fxc-error {{X3000: unrecognized identifier 'uint8_t4_packed'}} */ +{ min16uint left; int64_t right; left = right; } /* expected-warning {{conversion from larger type 'int64_t' to smaller type 'min16uint', possible loss of data}} fxc-error {{X3000: unrecognized identifier 'int64_t'}} fxc-error {{X3000: unrecognized identifier 'right'}} */ +{ min16uint left; uint64_t right; left = right; } /* expected-warning {{conversion from larger type 'uint64_t' to smaller type 'min16uint', possible loss of data}} fxc-error {{X3000: unrecognized identifier 'right'}} fxc-error {{X3000: unrecognized identifier 'uint64_t'}} */ +{ min16uint left; int8_t4_packed right; left = right; } /* expected-warning {{conversion from larger type 'int8_t4_packed' to smaller type 'min16uint', possible loss of data}} fxc-error {{X3000: unrecognized identifier 'int8_t4_packed'}} fxc-error {{X3000: unrecognized identifier 'right'}} */ +{ min16uint left; uint8_t4_packed right; left = right; } /* expected-warning {{conversion from larger type 'uint8_t4_packed' to smaller type 'min16uint', possible loss of data}} fxc-error {{X3000: unrecognized identifier 'right'}} fxc-error {{X3000: unrecognized identifier 'uint8_t4_packed'}} */ +{ int64_t left; bool right; left = right; } /* fxc-error {{X3000: unrecognized identifier 'int64_t'}} fxc-error {{X3000: unrecognized identifier 'left'}} */ +{ int64_t left; int right; left = right; } /* fxc-error {{X3000: unrecognized identifier 'int64_t'}} fxc-error {{X3000: unrecognized identifier 'left'}} */ +{ int64_t left; uint right; left = right; } /* fxc-error {{X3000: unrecognized identifier 'int64_t'}} fxc-error {{X3000: unrecognized identifier 'left'}} */ +{ int64_t left; dword right; left = right; } /* fxc-error {{X3000: unrecognized identifier 'int64_t'}} fxc-error {{X3000: unrecognized identifier 'left'}} */ +{ int64_t left; half right; left = right; } /* fxc-error {{X3000: unrecognized identifier 'int64_t'}} fxc-error {{X3000: unrecognized identifier 'left'}} */ +{ int64_t left; float right; left = right; } /* fxc-error {{X3000: unrecognized identifier 'int64_t'}} fxc-error {{X3000: unrecognized identifier 'left'}} */ +{ int64_t left; double right; left = right; } /* fxc-error {{X3000: unrecognized identifier 'int64_t'}} fxc-error {{X3000: unrecognized identifier 'left'}} */ +{ int64_t left; min16float right; left = right; } /* fxc-error {{X3000: unrecognized identifier 'int64_t'}} fxc-error {{X3000: unrecognized identifier 'left'}} */ +{ int64_t left; min10float right; left = right; } /* expected-warning {{'min10float' is promoted to 'min16float'}} fxc-error {{X3000: unrecognized identifier 'int64_t'}} fxc-error {{X3000: unrecognized identifier 'left'}} */ +{ int64_t left; min16int right; left = right; } /* fxc-error {{X3000: unrecognized identifier 'int64_t'}} fxc-error {{X3000: unrecognized identifier 'left'}} */ +{ int64_t left; min12int right; left = right; } /* expected-warning {{'min12int' is promoted to 'min16int'}} fxc-error {{X3000: unrecognized identifier 'int64_t'}} fxc-error {{X3000: unrecognized identifier 'left'}} */ +{ int64_t left; min16uint right; left = right; } /* fxc-error {{X3000: unrecognized identifier 'int64_t'}} fxc-error {{X3000: unrecognized identifier 'left'}} */ +{ int64_t left; int64_t right; left = right; } /* fxc-error {{X3000: unrecognized identifier 'int64_t'}} fxc-error {{X3000: unrecognized identifier 'left'}} */ +{ int64_t left; uint64_t right; left = right; } /* fxc-error {{X3000: unrecognized identifier 'int64_t'}} fxc-error {{X3000: unrecognized identifier 'left'}} */ +{ int64_t left; int8_t4_packed right; left = right; } /* fxc-error {{X3000: unrecognized identifier 'int64_t'}} fxc-error {{X3000: unrecognized identifier 'left'}} */ +{ int64_t left; uint8_t4_packed right; left = right; } /* fxc-error {{X3000: unrecognized identifier 'int64_t'}} fxc-error {{X3000: unrecognized identifier 'left'}} */ +{ uint64_t left; bool right; left = right; } /* fxc-error {{X3000: unrecognized identifier 'left'}} fxc-error {{X3000: unrecognized identifier 'uint64_t'}} */ +{ uint64_t left; int right; left = right; } /* fxc-error {{X3000: unrecognized identifier 'left'}} fxc-error {{X3000: unrecognized identifier 'uint64_t'}} */ +{ uint64_t left; uint right; left = right; } /* fxc-error {{X3000: unrecognized identifier 'left'}} fxc-error {{X3000: unrecognized identifier 'uint64_t'}} */ +{ uint64_t left; dword right; left = right; } /* fxc-error {{X3000: unrecognized identifier 'left'}} fxc-error {{X3000: unrecognized identifier 'uint64_t'}} */ +{ uint64_t left; half right; left = right; } /* fxc-error {{X3000: unrecognized identifier 'left'}} fxc-error {{X3000: unrecognized identifier 'uint64_t'}} */ +{ uint64_t left; float right; left = right; } /* fxc-error {{X3000: unrecognized identifier 'left'}} fxc-error {{X3000: unrecognized identifier 'uint64_t'}} */ +{ uint64_t left; double right; left = right; } /* fxc-error {{X3000: unrecognized identifier 'left'}} fxc-error {{X3000: unrecognized identifier 'uint64_t'}} */ +{ uint64_t left; min16float right; left = right; } /* fxc-error {{X3000: unrecognized identifier 'left'}} fxc-error {{X3000: unrecognized identifier 'uint64_t'}} */ +{ uint64_t left; min10float right; left = right; } /* expected-warning {{'min10float' is promoted to 'min16float'}} fxc-error {{X3000: unrecognized identifier 'left'}} fxc-error {{X3000: unrecognized identifier 'uint64_t'}} */ +{ uint64_t left; min16int right; left = right; } /* fxc-error {{X3000: unrecognized identifier 'left'}} fxc-error {{X3000: unrecognized identifier 'uint64_t'}} */ +{ uint64_t left; min12int right; left = right; } /* expected-warning {{'min12int' is promoted to 'min16int'}} fxc-error {{X3000: unrecognized identifier 'left'}} fxc-error {{X3000: unrecognized identifier 'uint64_t'}} */ +{ uint64_t left; min16uint right; left = right; } /* fxc-error {{X3000: unrecognized identifier 'left'}} fxc-error {{X3000: unrecognized identifier 'uint64_t'}} */ +{ uint64_t left; int64_t right; left = right; } /* fxc-error {{X3000: unrecognized identifier 'left'}} fxc-error {{X3000: unrecognized identifier 'uint64_t'}} */ +{ uint64_t left; uint64_t right; left = right; } /* fxc-error {{X3000: unrecognized identifier 'left'}} fxc-error {{X3000: unrecognized identifier 'uint64_t'}} */ +{ uint64_t left; int8_t4_packed right; left = right; } /* fxc-error {{X3000: unrecognized identifier 'left'}} fxc-error {{X3000: unrecognized identifier 'uint64_t'}} */ +{ uint64_t left; uint8_t4_packed right; left = right; } /* fxc-error {{X3000: unrecognized identifier 'left'}} fxc-error {{X3000: unrecognized identifier 'uint64_t'}} */ +{ int8_t4_packed left; bool right; left = right; } /* fxc-error {{X3000: unrecognized identifier 'int8_t4_packed'}} fxc-error {{X3000: unrecognized identifier 'left'}} */ +{ int8_t4_packed left; int right; left = right; } /* fxc-error {{X3000: unrecognized identifier 'int8_t4_packed'}} fxc-error {{X3000: unrecognized identifier 'left'}} */ +{ int8_t4_packed left; uint right; left = right; } /* fxc-error {{X3000: unrecognized identifier 'int8_t4_packed'}} fxc-error {{X3000: unrecognized identifier 'left'}} */ +{ int8_t4_packed left; dword right; left = right; } /* fxc-error {{X3000: unrecognized identifier 'int8_t4_packed'}} fxc-error {{X3000: unrecognized identifier 'left'}} */ +{ int8_t4_packed left; half right; left = right; } /* fxc-error {{X3000: unrecognized identifier 'int8_t4_packed'}} fxc-error {{X3000: unrecognized identifier 'left'}} */ +{ int8_t4_packed left; float right; left = right; } /* fxc-error {{X3000: unrecognized identifier 'int8_t4_packed'}} fxc-error {{X3000: unrecognized identifier 'left'}} */ +{ int8_t4_packed left; double right; left = right; } /* expected-warning {{conversion from larger type 'double' to smaller type 'int8_t4_packed', possible loss of data}} fxc-error {{X3000: unrecognized identifier 'int8_t4_packed'}} fxc-error {{X3000: unrecognized identifier 'left'}} */ +{ int8_t4_packed left; min16float right; left = right; } /* fxc-error {{X3000: unrecognized identifier 'int8_t4_packed'}} fxc-error {{X3000: unrecognized identifier 'left'}} */ +{ int8_t4_packed left; min10float right; left = right; } /* expected-warning {{'min10float' is promoted to 'min16float'}} fxc-error {{X3000: unrecognized identifier 'int8_t4_packed'}} fxc-error {{X3000: unrecognized identifier 'left'}} */ +{ int8_t4_packed left; min16int right; left = right; } /* fxc-error {{X3000: unrecognized identifier 'int8_t4_packed'}} fxc-error {{X3000: unrecognized identifier 'left'}} */ +{ int8_t4_packed left; min12int right; left = right; } /* expected-warning {{'min12int' is promoted to 'min16int'}} fxc-error {{X3000: unrecognized identifier 'int8_t4_packed'}} fxc-error {{X3000: unrecognized identifier 'left'}} */ +{ int8_t4_packed left; min16uint right; left = right; } /* fxc-error {{X3000: unrecognized identifier 'int8_t4_packed'}} fxc-error {{X3000: unrecognized identifier 'left'}} */ +{ int8_t4_packed left; int64_t right; left = right; } /* expected-warning {{conversion from larger type 'int64_t' to smaller type 'int8_t4_packed', possible loss of data}} fxc-error {{X3000: unrecognized identifier 'int8_t4_packed'}} fxc-error {{X3000: unrecognized identifier 'left'}} */ +{ int8_t4_packed left; uint64_t right; left = right; } /* expected-warning {{conversion from larger type 'uint64_t' to smaller type 'int8_t4_packed', possible loss of data}} fxc-error {{X3000: unrecognized identifier 'int8_t4_packed'}} fxc-error {{X3000: unrecognized identifier 'left'}} */ +{ int8_t4_packed left; int8_t4_packed right; left = right; } /* fxc-error {{X3000: unrecognized identifier 'int8_t4_packed'}} fxc-error {{X3000: unrecognized identifier 'left'}} */ +{ int8_t4_packed left; uint8_t4_packed right; left = right; } /* fxc-error {{X3000: unrecognized identifier 'int8_t4_packed'}} fxc-error {{X3000: unrecognized identifier 'left'}} */ +{ uint8_t4_packed left; bool right; left = right; } /* fxc-error {{X3000: unrecognized identifier 'left'}} fxc-error {{X3000: unrecognized identifier 'uint8_t4_packed'}} */ +{ uint8_t4_packed left; int right; left = right; } /* fxc-error {{X3000: unrecognized identifier 'left'}} fxc-error {{X3000: unrecognized identifier 'uint8_t4_packed'}} */ +{ uint8_t4_packed left; uint right; left = right; } /* fxc-error {{X3000: unrecognized identifier 'left'}} fxc-error {{X3000: unrecognized identifier 'uint8_t4_packed'}} */ +{ uint8_t4_packed left; dword right; left = right; } /* fxc-error {{X3000: unrecognized identifier 'left'}} fxc-error {{X3000: unrecognized identifier 'uint8_t4_packed'}} */ +{ uint8_t4_packed left; half right; left = right; } /* fxc-error {{X3000: unrecognized identifier 'left'}} fxc-error {{X3000: unrecognized identifier 'uint8_t4_packed'}} */ +{ uint8_t4_packed left; float right; left = right; } /* fxc-error {{X3000: unrecognized identifier 'left'}} fxc-error {{X3000: unrecognized identifier 'uint8_t4_packed'}} */ +{ uint8_t4_packed left; double right; left = right; } /* expected-warning {{conversion from larger type 'double' to smaller type 'uint8_t4_packed', possible loss of data}} fxc-error {{X3000: unrecognized identifier 'left'}} fxc-error {{X3000: unrecognized identifier 'uint8_t4_packed'}} */ +{ uint8_t4_packed left; min16float right; left = right; } /* fxc-error {{X3000: unrecognized identifier 'left'}} fxc-error {{X3000: unrecognized identifier 'uint8_t4_packed'}} */ +{ uint8_t4_packed left; min10float right; left = right; } /* expected-warning {{'min10float' is promoted to 'min16float'}} fxc-error {{X3000: unrecognized identifier 'left'}} fxc-error {{X3000: unrecognized identifier 'uint8_t4_packed'}} */ +{ uint8_t4_packed left; min16int right; left = right; } /* fxc-error {{X3000: unrecognized identifier 'left'}} fxc-error {{X3000: unrecognized identifier 'uint8_t4_packed'}} */ +{ uint8_t4_packed left; min12int right; left = right; } /* expected-warning {{'min12int' is promoted to 'min16int'}} fxc-error {{X3000: unrecognized identifier 'left'}} fxc-error {{X3000: unrecognized identifier 'uint8_t4_packed'}} */ +{ uint8_t4_packed left; min16uint right; left = right; } /* fxc-error {{X3000: unrecognized identifier 'left'}} fxc-error {{X3000: unrecognized identifier 'uint8_t4_packed'}} */ +{ uint8_t4_packed left; int64_t right; left = right; } /* expected-warning {{conversion from larger type 'int64_t' to smaller type 'uint8_t4_packed', possible loss of data}} fxc-error {{X3000: unrecognized identifier 'left'}} fxc-error {{X3000: unrecognized identifier 'uint8_t4_packed'}} */ +{ uint8_t4_packed left; uint64_t right; left = right; } /* expected-warning {{conversion from larger type 'uint64_t' to smaller type 'uint8_t4_packed', possible loss of data}} fxc-error {{X3000: unrecognized identifier 'left'}} fxc-error {{X3000: unrecognized identifier 'uint8_t4_packed'}} */ +{ uint8_t4_packed left; int8_t4_packed right; left = right; } /* fxc-error {{X3000: unrecognized identifier 'left'}} fxc-error {{X3000: unrecognized identifier 'uint8_t4_packed'}} */ +{ uint8_t4_packed left; uint8_t4_packed right; left = right; } /* fxc-error {{X3000: unrecognized identifier 'left'}} fxc-error {{X3000: unrecognized identifier 'uint8_t4_packed'}} */ +// GENERATED_CODE:END + +// Constant assignments + +/* +constant_ints = '0 -1 2U 3L 4ULL 3000000000 -3000000000 10000000000'.split() +constant_floats = '0.5 -0.5F'.split() +constants = constant_ints + constant_floats +constant_assignment_combos = [(left, right) for left in types for right in constants] +*/ + +// Catch bugs with SemaHLSL GetUnsignedLimit/GetSignedLimit + +// modify(lines, gen_code('{{ {left} left = {right}; }}', constant_assignment_combos)) +// GENERATED_CODE:BEGIN +{ bool left = 0; } +{ bool left = -1; } +{ bool left = 2U; } +{ bool left = 3L; } +{ bool left = 4ULL; } /* fxc-error {{X3000: syntax error: unexpected token 'L'}} */ +{ bool left = 3000000000; } +{ bool left = -3000000000; } +{ bool left = 10000000000; } /* fxc-error {{X3000: syntax error: unexpected token ';'}} */ +{ bool left = 0.5; } /* expected-warning {{implicit conversion from 'literal float' to 'bool' changes value from 0.5 to false}} fxc-pass {{}} */ +{ bool left = -0.5F; } /* expected-warning {{implicit conversion from 'float' to 'bool' changes value from 0.5 to false}} fxc-pass {{}} */ +{ int left = 0; } +{ int left = -1; } +{ int left = 2U; } +{ int left = 3L; } +{ int left = 4ULL; } /* fxc-error {{X3000: syntax error: unexpected token 'L'}} */ +{ int left = 3000000000; } +{ int left = -3000000000; } /* expected-warning {{implicit conversion from 'literal int' to 'int' changes value from -3000000000 to 1294967296}} fxc-pass {{}} */ +{ int left = 10000000000; } /* expected-warning {{implicit conversion from 'literal int' to 'int' changes value from 10000000000 to 1410065408}} fxc-error {{X3000: syntax error: unexpected token ';'}} */ +{ int left = 0.5; } /* expected-warning {{implicit conversion from 'literal float' to 'int' changes value from 0.5 to 0}} fxc-pass {{}} */ +{ int left = -0.5F; } /* expected-warning {{implicit conversion from 'float' to 'int' changes value from 0.5 to 0}} fxc-pass {{}} */ +{ uint left = 0; } +{ uint left = -1; } +{ uint left = 2U; } +{ uint left = 3L; } +{ uint left = 4ULL; } /* fxc-error {{X3000: syntax error: unexpected token 'L'}} */ +{ uint left = 3000000000; } +{ uint left = -3000000000; } /* expected-warning {{implicit conversion from 'literal int' to 'uint' changes value from -3000000000 to 1294967296}} fxc-pass {{}} */ +{ uint left = 10000000000; } /* expected-warning {{implicit conversion from 'literal int' to 'uint' changes value from 10000000000 to 1410065408}} fxc-error {{X3000: syntax error: unexpected token ';'}} */ +{ uint left = 0.5; } /* expected-warning {{implicit conversion from 'literal float' to 'uint' changes value from 0.5 to 0}} fxc-pass {{}} */ +{ uint left = -0.5F; } /* expected-warning {{implicit conversion from 'float' to 'uint' changes value from 0.5 to 0}} fxc-pass {{}} */ +{ dword left = 0; } +{ dword left = -1; } +{ dword left = 2U; } +{ dword left = 3L; } +{ dword left = 4ULL; } /* fxc-error {{X3000: syntax error: unexpected token 'L'}} */ +{ dword left = 3000000000; } +{ dword left = -3000000000; } /* expected-warning {{implicit conversion from 'literal int' to 'dword' changes value from -3000000000 to 1294967296}} fxc-pass {{}} */ +{ dword left = 10000000000; } /* expected-warning {{implicit conversion from 'literal int' to 'dword' changes value from 10000000000 to 1410065408}} fxc-error {{X3000: syntax error: unexpected token ';'}} */ +{ dword left = 0.5; } /* expected-warning {{implicit conversion from 'literal float' to 'dword' changes value from 0.5 to 0}} fxc-pass {{}} */ +{ dword left = -0.5F; } /* expected-warning {{implicit conversion from 'float' to 'dword' changes value from 0.5 to 0}} fxc-pass {{}} */ +{ half left = 0; } +{ half left = -1; } +{ half left = 2U; } +{ half left = 3L; } +{ half left = 4ULL; } /* fxc-error {{X3000: syntax error: unexpected token 'L'}} */ +{ half left = 3000000000; } +{ half left = -3000000000; } +{ half left = 10000000000; } /* fxc-error {{X3000: syntax error: unexpected token ';'}} */ +{ half left = 0.5; } +{ half left = -0.5F; } +{ float left = 0; } +{ float left = -1; } +{ float left = 2U; } +{ float left = 3L; } +{ float left = 4ULL; } /* fxc-error {{X3000: syntax error: unexpected token 'L'}} */ +{ float left = 3000000000; } +{ float left = -3000000000; } +{ float left = 10000000000; } /* fxc-error {{X3000: syntax error: unexpected token ';'}} */ +{ float left = 0.5; } +{ float left = -0.5F; } +{ double left = 0; } +{ double left = -1; } +{ double left = 2U; } +{ double left = 3L; } +{ double left = 4ULL; } /* fxc-error {{X3000: syntax error: unexpected token 'L'}} */ +{ double left = 3000000000; } +{ double left = -3000000000; } +{ double left = 10000000000; } /* fxc-error {{X3000: syntax error: unexpected token ';'}} */ +{ double left = 0.5; } +{ double left = -0.5F; } +{ min16float left = 0; } +{ min16float left = -1; } +{ min16float left = 2U; } /* fxc-warning {{X3205: conversion from larger type to smaller, possible loss of data}} */ +{ min16float left = 3L; } +{ min16float left = 4ULL; } /* fxc-error {{X3000: syntax error: unexpected token 'L'}} fxc-warning {{X3205: conversion from larger type to smaller, possible loss of data}} */ +{ min16float left = 3000000000; } /* fxc-warning {{X3205: conversion from larger type to smaller, possible loss of data}} */ +{ min16float left = -3000000000; } /* fxc-warning {{X3205: conversion from larger type to smaller, possible loss of data}} */ +{ min16float left = 10000000000; } /* fxc-error {{X3000: syntax error: unexpected token ';'}} */ +{ min16float left = 0.5; } +{ min16float left = -0.5F; } /* expected-warning {{conversion from larger type 'float' to smaller type 'min16float', possible loss of data}} fxc-warning {{X3205: conversion from larger type to smaller, possible loss of data}} */ +{ min10float left = 0; } /* expected-warning {{'min10float' is promoted to 'min16float'}} fxc-pass {{}} */ +{ min10float left = -1; } /* expected-warning {{'min10float' is promoted to 'min16float'}} fxc-pass {{}} */ +{ min10float left = 2U; } /* expected-warning {{'min10float' is promoted to 'min16float'}} fxc-warning {{X3205: conversion from larger type to smaller, possible loss of data}} */ +{ min10float left = 3L; } /* expected-warning {{'min10float' is promoted to 'min16float'}} fxc-pass {{}} */ +{ min10float left = 4ULL; } /* expected-warning {{'min10float' is promoted to 'min16float'}} fxc-error {{X3000: syntax error: unexpected token 'L'}} fxc-warning {{X3205: conversion from larger type to smaller, possible loss of data}} */ +{ min10float left = 3000000000; } /* expected-warning {{'min10float' is promoted to 'min16float'}} fxc-warning {{X3205: conversion from larger type to smaller, possible loss of data}} */ +{ min10float left = -3000000000; } /* expected-warning {{'min10float' is promoted to 'min16float'}} fxc-warning {{X3205: conversion from larger type to smaller, possible loss of data}} */ +{ min10float left = 10000000000; } /* expected-warning {{'min10float' is promoted to 'min16float'}} fxc-error {{X3000: syntax error: unexpected token ';'}} */ +{ min10float left = 0.5; } /* expected-warning {{'min10float' is promoted to 'min16float'}} fxc-pass {{}} */ +{ min10float left = -0.5F; } /* expected-warning {{'min10float' is promoted to 'min16float'}} expected-warning {{conversion from larger type 'float' to smaller type 'min10float', possible loss of data}} fxc-warning {{X3205: conversion from larger type to smaller, possible loss of data}} */ +{ min16int left = 0; } +{ min16int left = -1; } +{ min16int left = 2U; } /* fxc-warning {{X3205: conversion from larger type to smaller, possible loss of data}} */ +{ min16int left = 3L; } +{ min16int left = 4ULL; } /* fxc-error {{X3000: syntax error: unexpected token 'L'}} fxc-warning {{X3205: conversion from larger type to smaller, possible loss of data}} */ +{ min16int left = 3000000000; } /* expected-warning {{implicit conversion from 'literal int' to 'min16int' changes value from 3000000000 to 24064}} fxc-warning {{X3205: conversion from larger type to smaller, possible loss of data}} */ +{ min16int left = -3000000000; } /* expected-warning {{implicit conversion from 'literal int' to 'min16int' changes value from -3000000000 to -24064}} fxc-warning {{X3205: conversion from larger type to smaller, possible loss of data}} */ +{ min16int left = 10000000000; } /* expected-warning {{implicit conversion from 'literal int' to 'min16int' changes value from 10000000000 to -7168}} fxc-error {{X3000: syntax error: unexpected token ';'}} */ +{ min16int left = 0.5; } /* expected-warning {{implicit conversion from 'literal float' to 'min16int' changes value from 0.5 to 0}} fxc-pass {{}} */ +{ min16int left = -0.5F; } /* expected-warning {{conversion from larger type 'float' to smaller type 'min16int', possible loss of data}} expected-warning {{implicit conversion from 'float' to 'min16int' changes value from 0.5 to 0}} fxc-warning {{X3205: conversion from larger type to smaller, possible loss of data}} */ +{ min12int left = 0; } /* expected-warning {{'min12int' is promoted to 'min16int'}} fxc-pass {{}} */ +{ min12int left = -1; } /* expected-warning {{'min12int' is promoted to 'min16int'}} fxc-pass {{}} */ +{ min12int left = 2U; } /* expected-warning {{'min12int' is promoted to 'min16int'}} fxc-warning {{X3205: conversion from larger type to smaller, possible loss of data}} */ +{ min12int left = 3L; } /* expected-warning {{'min12int' is promoted to 'min16int'}} fxc-pass {{}} */ +{ min12int left = 4ULL; } /* expected-warning {{'min12int' is promoted to 'min16int'}} fxc-error {{X3000: syntax error: unexpected token 'L'}} fxc-warning {{X3205: conversion from larger type to smaller, possible loss of data}} */ +{ min12int left = 3000000000; } /* expected-warning {{'min12int' is promoted to 'min16int'}} expected-warning {{implicit conversion from 'literal int' to 'min12int' changes value from 3000000000 to 24064}} fxc-warning {{X3205: conversion from larger type to smaller, possible loss of data}} */ +{ min12int left = -3000000000; } /* expected-warning {{'min12int' is promoted to 'min16int'}} expected-warning {{implicit conversion from 'literal int' to 'min12int' changes value from -3000000000 to -24064}} fxc-warning {{X3205: conversion from larger type to smaller, possible loss of data}} */ +{ min12int left = 10000000000; } /* expected-warning {{'min12int' is promoted to 'min16int'}} expected-warning {{implicit conversion from 'literal int' to 'min12int' changes value from 10000000000 to -7168}} fxc-error {{X3000: syntax error: unexpected token ';'}} */ +{ min12int left = 0.5; } /* expected-warning {{'min12int' is promoted to 'min16int'}} expected-warning {{implicit conversion from 'literal float' to 'min12int' changes value from 0.5 to 0}} fxc-pass {{}} */ +{ min12int left = -0.5F; } /* expected-warning {{'min12int' is promoted to 'min16int'}} expected-warning {{conversion from larger type 'float' to smaller type 'min12int', possible loss of data}} expected-warning {{implicit conversion from 'float' to 'min12int' changes value from 0.5 to 0}} fxc-warning {{X3205: conversion from larger type to smaller, possible loss of data}} */ +{ min16uint left = 0; } +{ min16uint left = -1; } +{ min16uint left = 2U; } /* fxc-warning {{X3205: conversion from larger type to smaller, possible loss of data}} */ +{ min16uint left = 3L; } +{ min16uint left = 4ULL; } /* fxc-error {{X3000: syntax error: unexpected token 'L'}} fxc-warning {{X3205: conversion from larger type to smaller, possible loss of data}} */ +{ min16uint left = 3000000000; } /* expected-warning {{implicit conversion from 'literal int' to 'min16uint' changes value from 3000000000 to 24064}} fxc-warning {{X3205: conversion from larger type to smaller, possible loss of data}} */ +{ min16uint left = -3000000000; } /* expected-warning {{implicit conversion from 'literal int' to 'min16uint' changes value from -3000000000 to 41472}} fxc-warning {{X3205: conversion from larger type to smaller, possible loss of data}} */ +{ min16uint left = 10000000000; } /* expected-warning {{implicit conversion from 'literal int' to 'min16uint' changes value from 10000000000 to 58368}} fxc-error {{X3000: syntax error: unexpected token ';'}} */ +{ min16uint left = 0.5; } /* expected-warning {{implicit conversion from 'literal float' to 'min16uint' changes value from 0.5 to 0}} fxc-pass {{}} */ +{ min16uint left = -0.5F; } /* expected-warning {{conversion from larger type 'float' to smaller type 'min16uint', possible loss of data}} expected-warning {{implicit conversion from 'float' to 'min16uint' changes value from 0.5 to 0}} fxc-warning {{X3205: conversion from larger type to smaller, possible loss of data}} */ +{ int64_t left = 0; } /* fxc-error {{X3000: unrecognized identifier 'int64_t'}} fxc-error {{X3000: unrecognized identifier 'left'}} */ +{ int64_t left = -1; } /* fxc-error {{X3000: unrecognized identifier 'int64_t'}} fxc-error {{X3000: unrecognized identifier 'left'}} */ +{ int64_t left = 2U; } /* fxc-error {{X3000: unrecognized identifier 'int64_t'}} fxc-error {{X3000: unrecognized identifier 'left'}} */ +{ int64_t left = 3L; } /* fxc-error {{X3000: unrecognized identifier 'int64_t'}} fxc-error {{X3000: unrecognized identifier 'left'}} */ +{ int64_t left = 4ULL; } /* fxc-error {{X3000: unrecognized identifier 'int64_t'}} fxc-error {{X3000: unrecognized identifier 'left'}} */ +{ int64_t left = 3000000000; } /* fxc-error {{X3000: unrecognized identifier 'int64_t'}} fxc-error {{X3000: unrecognized identifier 'left'}} */ +{ int64_t left = -3000000000; } /* fxc-error {{X3000: unrecognized identifier 'int64_t'}} fxc-error {{X3000: unrecognized identifier 'left'}} */ +{ int64_t left = 10000000000; } /* fxc-error {{X3000: unrecognized identifier 'int64_t'}} fxc-error {{X3000: unrecognized identifier 'left'}} */ +{ int64_t left = 0.5; } /* expected-warning {{implicit conversion from 'literal float' to 'int64_t' changes value from 0.5 to 0}} fxc-error {{X3000: unrecognized identifier 'int64_t'}} fxc-error {{X3000: unrecognized identifier 'left'}} */ +{ int64_t left = -0.5F; } /* expected-warning {{implicit conversion from 'float' to 'int64_t' changes value from 0.5 to 0}} fxc-error {{X3000: unrecognized identifier 'int64_t'}} fxc-error {{X3000: unrecognized identifier 'left'}} */ +{ uint64_t left = 0; } /* fxc-error {{X3000: unrecognized identifier 'left'}} fxc-error {{X3000: unrecognized identifier 'uint64_t'}} */ +{ uint64_t left = -1; } /* fxc-error {{X3000: unrecognized identifier 'left'}} fxc-error {{X3000: unrecognized identifier 'uint64_t'}} */ +{ uint64_t left = 2U; } /* fxc-error {{X3000: unrecognized identifier 'left'}} fxc-error {{X3000: unrecognized identifier 'uint64_t'}} */ +{ uint64_t left = 3L; } /* fxc-error {{X3000: unrecognized identifier 'left'}} fxc-error {{X3000: unrecognized identifier 'uint64_t'}} */ +{ uint64_t left = 4ULL; } /* fxc-error {{X3000: unrecognized identifier 'left'}} fxc-error {{X3000: unrecognized identifier 'uint64_t'}} */ +{ uint64_t left = 3000000000; } /* fxc-error {{X3000: unrecognized identifier 'left'}} fxc-error {{X3000: unrecognized identifier 'uint64_t'}} */ +{ uint64_t left = -3000000000; } /* fxc-error {{X3000: unrecognized identifier 'left'}} fxc-error {{X3000: unrecognized identifier 'uint64_t'}} */ +{ uint64_t left = 10000000000; } /* fxc-error {{X3000: unrecognized identifier 'left'}} fxc-error {{X3000: unrecognized identifier 'uint64_t'}} */ +{ uint64_t left = 0.5; } /* expected-warning {{implicit conversion from 'literal float' to 'uint64_t' changes value from 0.5 to 0}} fxc-error {{X3000: unrecognized identifier 'left'}} fxc-error {{X3000: unrecognized identifier 'uint64_t'}} */ +{ uint64_t left = -0.5F; } /* expected-warning {{implicit conversion from 'float' to 'uint64_t' changes value from 0.5 to 0}} fxc-error {{X3000: unrecognized identifier 'left'}} fxc-error {{X3000: unrecognized identifier 'uint64_t'}} */ +{ int8_t4_packed left = 0; } /* fxc-error {{X3000: unrecognized identifier 'int8_t4_packed'}} fxc-error {{X3000: unrecognized identifier 'left'}} */ +{ int8_t4_packed left = -1; } /* fxc-error {{X3000: unrecognized identifier 'int8_t4_packed'}} fxc-error {{X3000: unrecognized identifier 'left'}} */ +{ int8_t4_packed left = 2U; } /* fxc-error {{X3000: unrecognized identifier 'int8_t4_packed'}} fxc-error {{X3000: unrecognized identifier 'left'}} */ +{ int8_t4_packed left = 3L; } /* fxc-error {{X3000: unrecognized identifier 'int8_t4_packed'}} fxc-error {{X3000: unrecognized identifier 'left'}} */ +{ int8_t4_packed left = 4ULL; } /* fxc-error {{X3000: unrecognized identifier 'int8_t4_packed'}} fxc-error {{X3000: unrecognized identifier 'left'}} */ +{ int8_t4_packed left = 3000000000; } /* fxc-error {{X3000: unrecognized identifier 'int8_t4_packed'}} fxc-error {{X3000: unrecognized identifier 'left'}} */ +{ int8_t4_packed left = -3000000000; } /* expected-warning {{implicit conversion from 'literal int' to 'int8_t4_packed' changes value from -3000000000 to 1294967296}} fxc-error {{X3000: unrecognized identifier 'int8_t4_packed'}} fxc-error {{X3000: unrecognized identifier 'left'}} */ +{ int8_t4_packed left = 10000000000; } /* expected-warning {{implicit conversion from 'literal int' to 'int8_t4_packed' changes value from 10000000000 to 1410065408}} fxc-error {{X3000: unrecognized identifier 'int8_t4_packed'}} fxc-error {{X3000: unrecognized identifier 'left'}} */ +{ int8_t4_packed left = 0.5; } /* expected-warning {{implicit conversion from 'literal float' to 'int8_t4_packed' changes value from 0.5 to 0}} fxc-error {{X3000: unrecognized identifier 'int8_t4_packed'}} fxc-error {{X3000: unrecognized identifier 'left'}} */ +{ int8_t4_packed left = -0.5F; } /* expected-warning {{implicit conversion from 'float' to 'int8_t4_packed' changes value from 0.5 to 0}} fxc-error {{X3000: unrecognized identifier 'int8_t4_packed'}} fxc-error {{X3000: unrecognized identifier 'left'}} */ +{ uint8_t4_packed left = 0; } /* fxc-error {{X3000: unrecognized identifier 'left'}} fxc-error {{X3000: unrecognized identifier 'uint8_t4_packed'}} */ +{ uint8_t4_packed left = -1; } /* fxc-error {{X3000: unrecognized identifier 'left'}} fxc-error {{X3000: unrecognized identifier 'uint8_t4_packed'}} */ +{ uint8_t4_packed left = 2U; } /* fxc-error {{X3000: unrecognized identifier 'left'}} fxc-error {{X3000: unrecognized identifier 'uint8_t4_packed'}} */ +{ uint8_t4_packed left = 3L; } /* fxc-error {{X3000: unrecognized identifier 'left'}} fxc-error {{X3000: unrecognized identifier 'uint8_t4_packed'}} */ +{ uint8_t4_packed left = 4ULL; } /* fxc-error {{X3000: unrecognized identifier 'left'}} fxc-error {{X3000: unrecognized identifier 'uint8_t4_packed'}} */ +{ uint8_t4_packed left = 3000000000; } /* fxc-error {{X3000: unrecognized identifier 'left'}} fxc-error {{X3000: unrecognized identifier 'uint8_t4_packed'}} */ +{ uint8_t4_packed left = -3000000000; } /* expected-warning {{implicit conversion from 'literal int' to 'uint8_t4_packed' changes value from -3000000000 to 1294967296}} fxc-error {{X3000: unrecognized identifier 'left'}} fxc-error {{X3000: unrecognized identifier 'uint8_t4_packed'}} */ +{ uint8_t4_packed left = 10000000000; } /* expected-warning {{implicit conversion from 'literal int' to 'uint8_t4_packed' changes value from 10000000000 to 1410065408}} fxc-error {{X3000: unrecognized identifier 'left'}} fxc-error {{X3000: unrecognized identifier 'uint8_t4_packed'}} */ +{ uint8_t4_packed left = 0.5; } /* expected-warning {{implicit conversion from 'literal float' to 'uint8_t4_packed' changes value from 0.5 to 0}} fxc-error {{X3000: unrecognized identifier 'left'}} fxc-error {{X3000: unrecognized identifier 'uint8_t4_packed'}} */ +{ uint8_t4_packed left = -0.5F; } /* expected-warning {{implicit conversion from 'float' to 'uint8_t4_packed' changes value from 0.5 to 0}} fxc-error {{X3000: unrecognized identifier 'left'}} fxc-error {{X3000: unrecognized identifier 'uint8_t4_packed'}} */ +// GENERATED_CODE:END + +// Catch bugs with clang type ranges when adding custom types + +// modify(lines, gen_code('{{ {left} left[2]; left[1] = {right}; }}', constant_assignment_combos)) +// GENERATED_CODE:BEGIN +{ bool left[2]; left[1] = 0; } +{ bool left[2]; left[1] = -1; } +{ bool left[2]; left[1] = 2U; } +{ bool left[2]; left[1] = 3L; } +{ bool left[2]; left[1] = 4ULL; } /* fxc-error {{X3000: syntax error: unexpected token 'L'}} */ +{ bool left[2]; left[1] = 3000000000; } +{ bool left[2]; left[1] = -3000000000; } +{ bool left[2]; left[1] = 10000000000; } /* fxc-error {{X3000: syntax error: unexpected token ';'}} */ +{ bool left[2]; left[1] = 0.5; } /* expected-warning {{implicit conversion from 'literal float' to 'bool' changes value from 0.5 to false}} fxc-pass {{}} */ +{ bool left[2]; left[1] = -0.5F; } /* expected-warning {{implicit conversion from 'float' to 'bool' changes value from 0.5 to false}} fxc-pass {{}} */ +{ int left[2]; left[1] = 0; } +{ int left[2]; left[1] = -1; } +{ int left[2]; left[1] = 2U; } +{ int left[2]; left[1] = 3L; } +{ int left[2]; left[1] = 4ULL; } /* fxc-error {{X3000: syntax error: unexpected token 'L'}} */ +{ int left[2]; left[1] = 3000000000; } +{ int left[2]; left[1] = -3000000000; } /* expected-warning {{implicit conversion from 'literal int' to 'int' changes value from -3000000000 to 1294967296}} fxc-pass {{}} */ +{ int left[2]; left[1] = 10000000000; } /* expected-warning {{implicit conversion from 'literal int' to 'int' changes value from 10000000000 to 1410065408}} fxc-error {{X3000: syntax error: unexpected token ';'}} */ +{ int left[2]; left[1] = 0.5; } /* expected-warning {{implicit conversion from 'literal float' to 'int' changes value from 0.5 to 0}} fxc-pass {{}} */ +{ int left[2]; left[1] = -0.5F; } /* expected-warning {{implicit conversion from 'float' to 'int' changes value from 0.5 to 0}} fxc-pass {{}} */ +{ uint left[2]; left[1] = 0; } +{ uint left[2]; left[1] = -1; } +{ uint left[2]; left[1] = 2U; } +{ uint left[2]; left[1] = 3L; } +{ uint left[2]; left[1] = 4ULL; } /* fxc-error {{X3000: syntax error: unexpected token 'L'}} */ +{ uint left[2]; left[1] = 3000000000; } +{ uint left[2]; left[1] = -3000000000; } /* expected-warning {{implicit conversion from 'literal int' to 'uint' changes value from -3000000000 to 1294967296}} fxc-pass {{}} */ +{ uint left[2]; left[1] = 10000000000; } /* expected-warning {{implicit conversion from 'literal int' to 'uint' changes value from 10000000000 to 1410065408}} fxc-error {{X3000: syntax error: unexpected token ';'}} */ +{ uint left[2]; left[1] = 0.5; } /* expected-warning {{implicit conversion from 'literal float' to 'uint' changes value from 0.5 to 0}} fxc-pass {{}} */ +{ uint left[2]; left[1] = -0.5F; } /* expected-warning {{implicit conversion from 'float' to 'uint' changes value from 0.5 to 0}} fxc-pass {{}} */ +{ dword left[2]; left[1] = 0; } +{ dword left[2]; left[1] = -1; } +{ dword left[2]; left[1] = 2U; } +{ dword left[2]; left[1] = 3L; } +{ dword left[2]; left[1] = 4ULL; } /* fxc-error {{X3000: syntax error: unexpected token 'L'}} */ +{ dword left[2]; left[1] = 3000000000; } +{ dword left[2]; left[1] = -3000000000; } /* expected-warning {{implicit conversion from 'literal int' to 'dword' changes value from -3000000000 to 1294967296}} fxc-pass {{}} */ +{ dword left[2]; left[1] = 10000000000; } /* expected-warning {{implicit conversion from 'literal int' to 'dword' changes value from 10000000000 to 1410065408}} fxc-error {{X3000: syntax error: unexpected token ';'}} */ +{ dword left[2]; left[1] = 0.5; } /* expected-warning {{implicit conversion from 'literal float' to 'dword' changes value from 0.5 to 0}} fxc-pass {{}} */ +{ dword left[2]; left[1] = -0.5F; } /* expected-warning {{implicit conversion from 'float' to 'dword' changes value from 0.5 to 0}} fxc-pass {{}} */ +{ half left[2]; left[1] = 0; } +{ half left[2]; left[1] = -1; } +{ half left[2]; left[1] = 2U; } +{ half left[2]; left[1] = 3L; } +{ half left[2]; left[1] = 4ULL; } /* fxc-error {{X3000: syntax error: unexpected token 'L'}} */ +{ half left[2]; left[1] = 3000000000; } +{ half left[2]; left[1] = -3000000000; } +{ half left[2]; left[1] = 10000000000; } /* fxc-error {{X3000: syntax error: unexpected token ';'}} */ +{ half left[2]; left[1] = 0.5; } +{ half left[2]; left[1] = -0.5F; } +{ float left[2]; left[1] = 0; } +{ float left[2]; left[1] = -1; } +{ float left[2]; left[1] = 2U; } +{ float left[2]; left[1] = 3L; } +{ float left[2]; left[1] = 4ULL; } /* fxc-error {{X3000: syntax error: unexpected token 'L'}} */ +{ float left[2]; left[1] = 3000000000; } +{ float left[2]; left[1] = -3000000000; } +{ float left[2]; left[1] = 10000000000; } /* fxc-error {{X3000: syntax error: unexpected token ';'}} */ +{ float left[2]; left[1] = 0.5; } +{ float left[2]; left[1] = -0.5F; } +{ double left[2]; left[1] = 0; } +{ double left[2]; left[1] = -1; } +{ double left[2]; left[1] = 2U; } +{ double left[2]; left[1] = 3L; } +{ double left[2]; left[1] = 4ULL; } /* fxc-error {{X3000: syntax error: unexpected token 'L'}} */ +{ double left[2]; left[1] = 3000000000; } +{ double left[2]; left[1] = -3000000000; } +{ double left[2]; left[1] = 10000000000; } /* fxc-error {{X3000: syntax error: unexpected token ';'}} */ +{ double left[2]; left[1] = 0.5; } +{ double left[2]; left[1] = -0.5F; } +{ min16float left[2]; left[1] = 0; } +{ min16float left[2]; left[1] = -1; } +{ min16float left[2]; left[1] = 2U; } /* fxc-warning {{X3205: conversion from larger type to smaller, possible loss of data}} */ +{ min16float left[2]; left[1] = 3L; } +{ min16float left[2]; left[1] = 4ULL; } /* fxc-error {{X3000: syntax error: unexpected token 'L'}} fxc-warning {{X3205: conversion from larger type to smaller, possible loss of data}} */ +{ min16float left[2]; left[1] = 3000000000; } /* fxc-warning {{X3205: conversion from larger type to smaller, possible loss of data}} */ +{ min16float left[2]; left[1] = -3000000000; } /* fxc-warning {{X3205: conversion from larger type to smaller, possible loss of data}} */ +{ min16float left[2]; left[1] = 10000000000; } /* fxc-error {{X3000: syntax error: unexpected token ';'}} */ +{ min16float left[2]; left[1] = 0.5; } +{ min16float left[2]; left[1] = -0.5F; } /* expected-warning {{conversion from larger type 'float' to smaller type 'min16float', possible loss of data}} fxc-warning {{X3205: conversion from larger type to smaller, possible loss of data}} */ +{ min10float left[2]; left[1] = 0; } +{ min10float left[2]; left[1] = -1; } +{ min10float left[2]; left[1] = 2U; } /* fxc-warning {{X3205: conversion from larger type to smaller, possible loss of data}} */ +{ min10float left[2]; left[1] = 3L; } +{ min10float left[2]; left[1] = 4ULL; } /* fxc-error {{X3000: syntax error: unexpected token 'L'}} fxc-warning {{X3205: conversion from larger type to smaller, possible loss of data}} */ +{ min10float left[2]; left[1] = 3000000000; } /* fxc-warning {{X3205: conversion from larger type to smaller, possible loss of data}} */ +{ min10float left[2]; left[1] = -3000000000; } /* fxc-warning {{X3205: conversion from larger type to smaller, possible loss of data}} */ +{ min10float left[2]; left[1] = 10000000000; } /* fxc-error {{X3000: syntax error: unexpected token ';'}} */ +{ min10float left[2]; left[1] = 0.5; } +{ min10float left[2]; left[1] = -0.5F; } /* expected-warning {{conversion from larger type 'float' to smaller type 'min10float', possible loss of data}} fxc-warning {{X3205: conversion from larger type to smaller, possible loss of data}} */ +{ min16int left[2]; left[1] = 0; } +{ min16int left[2]; left[1] = -1; } +{ min16int left[2]; left[1] = 2U; } /* fxc-warning {{X3205: conversion from larger type to smaller, possible loss of data}} */ +{ min16int left[2]; left[1] = 3L; } +{ min16int left[2]; left[1] = 4ULL; } /* fxc-error {{X3000: syntax error: unexpected token 'L'}} fxc-warning {{X3205: conversion from larger type to smaller, possible loss of data}} */ +{ min16int left[2]; left[1] = 3000000000; } /* expected-warning {{implicit conversion from 'literal int' to 'min16int' changes value from 3000000000 to 24064}} fxc-warning {{X3205: conversion from larger type to smaller, possible loss of data}} */ +{ min16int left[2]; left[1] = -3000000000; } /* expected-warning {{implicit conversion from 'literal int' to 'min16int' changes value from -3000000000 to -24064}} fxc-warning {{X3205: conversion from larger type to smaller, possible loss of data}} */ +{ min16int left[2]; left[1] = 10000000000; } /* expected-warning {{implicit conversion from 'literal int' to 'min16int' changes value from 10000000000 to -7168}} fxc-error {{X3000: syntax error: unexpected token ';'}} */ +{ min16int left[2]; left[1] = 0.5; } /* expected-warning {{implicit conversion from 'literal float' to 'min16int' changes value from 0.5 to 0}} fxc-pass {{}} */ +{ min16int left[2]; left[1] = -0.5F; } /* expected-warning {{conversion from larger type 'float' to smaller type 'min16int', possible loss of data}} expected-warning {{implicit conversion from 'float' to 'min16int' changes value from 0.5 to 0}} fxc-warning {{X3205: conversion from larger type to smaller, possible loss of data}} */ +{ min12int left[2]; left[1] = 0; } +{ min12int left[2]; left[1] = -1; } +{ min12int left[2]; left[1] = 2U; } /* fxc-warning {{X3205: conversion from larger type to smaller, possible loss of data}} */ +{ min12int left[2]; left[1] = 3L; } +{ min12int left[2]; left[1] = 4ULL; } /* fxc-error {{X3000: syntax error: unexpected token 'L'}} fxc-warning {{X3205: conversion from larger type to smaller, possible loss of data}} */ +{ min12int left[2]; left[1] = 3000000000; } /* expected-warning {{implicit conversion from 'literal int' to 'min12int' changes value from 3000000000 to 24064}} fxc-warning {{X3205: conversion from larger type to smaller, possible loss of data}} */ +{ min12int left[2]; left[1] = -3000000000; } /* expected-warning {{implicit conversion from 'literal int' to 'min12int' changes value from -3000000000 to -24064}} fxc-warning {{X3205: conversion from larger type to smaller, possible loss of data}} */ +{ min12int left[2]; left[1] = 10000000000; } /* expected-warning {{implicit conversion from 'literal int' to 'min12int' changes value from 10000000000 to -7168}} fxc-error {{X3000: syntax error: unexpected token ';'}} */ +{ min12int left[2]; left[1] = 0.5; } /* expected-warning {{implicit conversion from 'literal float' to 'min12int' changes value from 0.5 to 0}} fxc-pass {{}} */ +{ min12int left[2]; left[1] = -0.5F; } /* expected-warning {{conversion from larger type 'float' to smaller type 'min12int', possible loss of data}} expected-warning {{implicit conversion from 'float' to 'min12int' changes value from 0.5 to 0}} fxc-warning {{X3205: conversion from larger type to smaller, possible loss of data}} */ +{ min16uint left[2]; left[1] = 0; } +{ min16uint left[2]; left[1] = -1; } +{ min16uint left[2]; left[1] = 2U; } /* fxc-warning {{X3205: conversion from larger type to smaller, possible loss of data}} */ +{ min16uint left[2]; left[1] = 3L; } +{ min16uint left[2]; left[1] = 4ULL; } /* fxc-error {{X3000: syntax error: unexpected token 'L'}} fxc-warning {{X3205: conversion from larger type to smaller, possible loss of data}} */ +{ min16uint left[2]; left[1] = 3000000000; } /* expected-warning {{implicit conversion from 'literal int' to 'min16uint' changes value from 3000000000 to 24064}} fxc-warning {{X3205: conversion from larger type to smaller, possible loss of data}} */ +{ min16uint left[2]; left[1] = -3000000000; } /* expected-warning {{implicit conversion from 'literal int' to 'min16uint' changes value from -3000000000 to 41472}} fxc-warning {{X3205: conversion from larger type to smaller, possible loss of data}} */ +{ min16uint left[2]; left[1] = 10000000000; } /* expected-warning {{implicit conversion from 'literal int' to 'min16uint' changes value from 10000000000 to 58368}} fxc-error {{X3000: syntax error: unexpected token ';'}} */ +{ min16uint left[2]; left[1] = 0.5; } /* expected-warning {{implicit conversion from 'literal float' to 'min16uint' changes value from 0.5 to 0}} fxc-pass {{}} */ +{ min16uint left[2]; left[1] = -0.5F; } /* expected-warning {{conversion from larger type 'float' to smaller type 'min16uint', possible loss of data}} expected-warning {{implicit conversion from 'float' to 'min16uint' changes value from 0.5 to 0}} fxc-warning {{X3205: conversion from larger type to smaller, possible loss of data}} */ +{ int64_t left[2]; left[1] = 0; } /* fxc-error {{X3000: unrecognized identifier 'int64_t'}} fxc-error {{X3000: unrecognized identifier 'left'}} */ +{ int64_t left[2]; left[1] = -1; } /* fxc-error {{X3000: unrecognized identifier 'int64_t'}} fxc-error {{X3000: unrecognized identifier 'left'}} */ +{ int64_t left[2]; left[1] = 2U; } /* fxc-error {{X3000: unrecognized identifier 'int64_t'}} fxc-error {{X3000: unrecognized identifier 'left'}} */ +{ int64_t left[2]; left[1] = 3L; } /* fxc-error {{X3000: unrecognized identifier 'int64_t'}} fxc-error {{X3000: unrecognized identifier 'left'}} */ +{ int64_t left[2]; left[1] = 4ULL; } /* fxc-error {{X3000: unrecognized identifier 'int64_t'}} fxc-error {{X3000: unrecognized identifier 'left'}} */ +{ int64_t left[2]; left[1] = 3000000000; } /* fxc-error {{X3000: unrecognized identifier 'int64_t'}} fxc-error {{X3000: unrecognized identifier 'left'}} */ +{ int64_t left[2]; left[1] = -3000000000; } /* fxc-error {{X3000: unrecognized identifier 'int64_t'}} fxc-error {{X3000: unrecognized identifier 'left'}} */ +{ int64_t left[2]; left[1] = 10000000000; } /* fxc-error {{X3000: unrecognized identifier 'int64_t'}} fxc-error {{X3000: unrecognized identifier 'left'}} */ +{ int64_t left[2]; left[1] = 0.5; } /* expected-warning {{implicit conversion from 'literal float' to 'int64_t' changes value from 0.5 to 0}} fxc-error {{X3000: unrecognized identifier 'int64_t'}} fxc-error {{X3000: unrecognized identifier 'left'}} */ +{ int64_t left[2]; left[1] = -0.5F; } /* expected-warning {{implicit conversion from 'float' to 'int64_t' changes value from 0.5 to 0}} fxc-error {{X3000: unrecognized identifier 'int64_t'}} fxc-error {{X3000: unrecognized identifier 'left'}} */ +{ uint64_t left[2]; left[1] = 0; } /* fxc-error {{X3000: unrecognized identifier 'left'}} fxc-error {{X3000: unrecognized identifier 'uint64_t'}} */ +{ uint64_t left[2]; left[1] = -1; } /* fxc-error {{X3000: unrecognized identifier 'left'}} fxc-error {{X3000: unrecognized identifier 'uint64_t'}} */ +{ uint64_t left[2]; left[1] = 2U; } /* fxc-error {{X3000: unrecognized identifier 'left'}} fxc-error {{X3000: unrecognized identifier 'uint64_t'}} */ +{ uint64_t left[2]; left[1] = 3L; } /* fxc-error {{X3000: unrecognized identifier 'left'}} fxc-error {{X3000: unrecognized identifier 'uint64_t'}} */ +{ uint64_t left[2]; left[1] = 4ULL; } /* fxc-error {{X3000: unrecognized identifier 'left'}} fxc-error {{X3000: unrecognized identifier 'uint64_t'}} */ +{ uint64_t left[2]; left[1] = 3000000000; } /* fxc-error {{X3000: unrecognized identifier 'left'}} fxc-error {{X3000: unrecognized identifier 'uint64_t'}} */ +{ uint64_t left[2]; left[1] = -3000000000; } /* fxc-error {{X3000: unrecognized identifier 'left'}} fxc-error {{X3000: unrecognized identifier 'uint64_t'}} */ +{ uint64_t left[2]; left[1] = 10000000000; } /* fxc-error {{X3000: unrecognized identifier 'left'}} fxc-error {{X3000: unrecognized identifier 'uint64_t'}} */ +{ uint64_t left[2]; left[1] = 0.5; } /* expected-warning {{implicit conversion from 'literal float' to 'uint64_t' changes value from 0.5 to 0}} fxc-error {{X3000: unrecognized identifier 'left'}} fxc-error {{X3000: unrecognized identifier 'uint64_t'}} */ +{ uint64_t left[2]; left[1] = -0.5F; } /* expected-warning {{implicit conversion from 'float' to 'uint64_t' changes value from 0.5 to 0}} fxc-error {{X3000: unrecognized identifier 'left'}} fxc-error {{X3000: unrecognized identifier 'uint64_t'}} */ +{ int8_t4_packed left[2]; left[1] = 0; } /* fxc-error {{X3000: unrecognized identifier 'int8_t4_packed'}} fxc-error {{X3000: unrecognized identifier 'left'}} */ +{ int8_t4_packed left[2]; left[1] = -1; } /* fxc-error {{X3000: unrecognized identifier 'int8_t4_packed'}} fxc-error {{X3000: unrecognized identifier 'left'}} */ +{ int8_t4_packed left[2]; left[1] = 2U; } /* fxc-error {{X3000: unrecognized identifier 'int8_t4_packed'}} fxc-error {{X3000: unrecognized identifier 'left'}} */ +{ int8_t4_packed left[2]; left[1] = 3L; } /* fxc-error {{X3000: unrecognized identifier 'int8_t4_packed'}} fxc-error {{X3000: unrecognized identifier 'left'}} */ +{ int8_t4_packed left[2]; left[1] = 4ULL; } /* fxc-error {{X3000: unrecognized identifier 'int8_t4_packed'}} fxc-error {{X3000: unrecognized identifier 'left'}} */ +{ int8_t4_packed left[2]; left[1] = 3000000000; } /* fxc-error {{X3000: unrecognized identifier 'int8_t4_packed'}} fxc-error {{X3000: unrecognized identifier 'left'}} */ +{ int8_t4_packed left[2]; left[1] = -3000000000; } /* expected-warning {{implicit conversion from 'literal int' to 'int8_t4_packed' changes value from -3000000000 to 1294967296}} fxc-error {{X3000: unrecognized identifier 'int8_t4_packed'}} fxc-error {{X3000: unrecognized identifier 'left'}} */ +{ int8_t4_packed left[2]; left[1] = 10000000000; } /* expected-warning {{implicit conversion from 'literal int' to 'int8_t4_packed' changes value from 10000000000 to 1410065408}} fxc-error {{X3000: unrecognized identifier 'int8_t4_packed'}} fxc-error {{X3000: unrecognized identifier 'left'}} */ +{ int8_t4_packed left[2]; left[1] = 0.5; } /* expected-warning {{implicit conversion from 'literal float' to 'int8_t4_packed' changes value from 0.5 to 0}} fxc-error {{X3000: unrecognized identifier 'int8_t4_packed'}} fxc-error {{X3000: unrecognized identifier 'left'}} */ +{ int8_t4_packed left[2]; left[1] = -0.5F; } /* expected-warning {{implicit conversion from 'float' to 'int8_t4_packed' changes value from 0.5 to 0}} fxc-error {{X3000: unrecognized identifier 'int8_t4_packed'}} fxc-error {{X3000: unrecognized identifier 'left'}} */ +{ uint8_t4_packed left[2]; left[1] = 0; } /* fxc-error {{X3000: unrecognized identifier 'left'}} fxc-error {{X3000: unrecognized identifier 'uint8_t4_packed'}} */ +{ uint8_t4_packed left[2]; left[1] = -1; } /* fxc-error {{X3000: unrecognized identifier 'left'}} fxc-error {{X3000: unrecognized identifier 'uint8_t4_packed'}} */ +{ uint8_t4_packed left[2]; left[1] = 2U; } /* fxc-error {{X3000: unrecognized identifier 'left'}} fxc-error {{X3000: unrecognized identifier 'uint8_t4_packed'}} */ +{ uint8_t4_packed left[2]; left[1] = 3L; } /* fxc-error {{X3000: unrecognized identifier 'left'}} fxc-error {{X3000: unrecognized identifier 'uint8_t4_packed'}} */ +{ uint8_t4_packed left[2]; left[1] = 4ULL; } /* fxc-error {{X3000: unrecognized identifier 'left'}} fxc-error {{X3000: unrecognized identifier 'uint8_t4_packed'}} */ +{ uint8_t4_packed left[2]; left[1] = 3000000000; } /* fxc-error {{X3000: unrecognized identifier 'left'}} fxc-error {{X3000: unrecognized identifier 'uint8_t4_packed'}} */ +{ uint8_t4_packed left[2]; left[1] = -3000000000; } /* expected-warning {{implicit conversion from 'literal int' to 'uint8_t4_packed' changes value from -3000000000 to 1294967296}} fxc-error {{X3000: unrecognized identifier 'left'}} fxc-error {{X3000: unrecognized identifier 'uint8_t4_packed'}} */ +{ uint8_t4_packed left[2]; left[1] = 10000000000; } /* expected-warning {{implicit conversion from 'literal int' to 'uint8_t4_packed' changes value from 10000000000 to 1410065408}} fxc-error {{X3000: unrecognized identifier 'left'}} fxc-error {{X3000: unrecognized identifier 'uint8_t4_packed'}} */ +{ uint8_t4_packed left[2]; left[1] = 0.5; } /* expected-warning {{implicit conversion from 'literal float' to 'uint8_t4_packed' changes value from 0.5 to 0}} fxc-error {{X3000: unrecognized identifier 'left'}} fxc-error {{X3000: unrecognized identifier 'uint8_t4_packed'}} */ +{ uint8_t4_packed left[2]; left[1] = -0.5F; } /* expected-warning {{implicit conversion from 'float' to 'uint8_t4_packed' changes value from 0.5 to 0}} fxc-error {{X3000: unrecognized identifier 'left'}} fxc-error {{X3000: unrecognized identifier 'uint8_t4_packed'}} */ +// GENERATED_CODE:END + +} \ No newline at end of file diff --git a/tools/clang/test/HLSL/work-graphs.hlsl b/tools/clang/test/HLSL/work-graphs.hlsl new file mode 100644 index 0000000000..ed4f397d92 --- /dev/null +++ b/tools/clang/test/HLSL/work-graphs.hlsl @@ -0,0 +1,204 @@ +// RUN: %clang_cc1 -HV 2021 -verify %s + +struct RECORD +{ + uint a; + bool b; +}; + +struct RECORD2 +{ + uint a; + bool b; +}; + +struct BAD_RECORD +{ + uint a; + SamplerState s; +}; + +struct BAD_RECORD2 +{ + BAD_RECORD b; +}; + +//============================================================================== +// Check diagnostics for the various node input/output types + +[Shader("node")] +[NodeLaunch("Broadcasting")] +void node1_01(DispatchNodeInputRecord input) /* expected-error {{'int' cannot be used as a type parameter where a struct/class is required}} */ +{ } + +[Shader("node")] +[NodeLaunch("Coalescing")] +void node1_02(GroupNodeInputRecords input) /* expected-error {{'float' cannot be used as a type parameter where a struct/class is required}} */ +{ } + +[Shader("node")] +[NodeLaunch("Coalescing")] +void node1_03(GroupNodeInputRecords input) /* expected-error {{'SamplerState' cannot be used as a type parameter where a struct/class is required}} */ +{ } + +[Shader("node")] +[NodeLaunch("Broadcasting")] +void node1_04(RWDispatchNodeInputRecord input) /* expected-error {{'RECORD [2]' cannot be used as a type parameter where a struct/class is required}} */ +{ } + +[Shader("node")] +[NodeLaunch("Coalescing")] +void node1_05(RWGroupNodeInputRecords input) /* expected-error {{'float' cannot be used as a type parameter where a struct/class is required}} */ +{ } + +typedef matrix f2x2; + +[Shader("node")] +[NodeLaunch("Broadcasting")] +void node1_06(RWDispatchNodeInputRecord input) /* expected-error {{too many template arguments for class template 'RWDispatchNodeInputRecord'}} */ +{ } + +[Shader("node")] +[NodeLaunch("Broadcasting")] +void node1_07(NodeOutput output) /* expected-error {{'bool' cannot be used as a type parameter where a struct/class is required}} */ +{ } + +[Shader("node")] +[NodeLaunch("Broadcasting")] +void node1_08(NodeOutput output) /* expected-error {{'RECORD [3]' cannot be used as a type parameter where a struct/class is required}} */ +{ } + +[Shader("node")] +[NodeLaunch("Broadcasting")] +void node1_09(NodeOutput output) /* expected-error {{'float4' cannot be used as a type parameter where a struct/class is required}} */ +{ } + +[Shader("node")] +[NodeLaunch("Broadcasting")] +void node1_10(DispatchNodeInputRecord input) /* expected-error {{'float3' cannot be used as a type parameter where a struct/class is required}} */ +{ } + +[Shader("node")] +[NodeLaunch("Broadcasting")] +void node1_11(DispatchNodeInputRecord input) /* expected-error {{'BAD_RECORD' cannot be used as a type parameter where a struct/class is required}} */ +{ } + +[Shader("node")] +[NodeLaunch("Broadcasting")] +void node1_12(RWDispatchNodeInputRecord input) /* expected-error {{'BAD_RECORD2' cannot be used as a type parameter where a struct/class is required}} */ +{ } + + +[Shader("node")] +[NodeLaunch("Broadcasting")] +void node1_16() +{ + GroupNodeOutputRecords outrec1; /* expected-error {{'int' cannot be used as a type parameter where a struct/class is required}} */ + + ThreadNodeOutputRecords outrec2; /* expected-error {{'bool' cannot be used as a type parameter where a struct/class is required}} */ + + GroupNodeOutputRecords outrec3; /* expected-error {{'float4' cannot be used as a type parameter where a struct/class is required}} */ + + ThreadNodeOutputRecords outrec4; /* expected-error {{'RECORD [4]' cannot be used as a type parameter where a struct/class is required}} */ +} + + +//============================================================================== +// Check Get[GroupShared]NodeOutput[Array]() intrinsics don't match with invalid +// parameter types. + +[Shader("node")] +[NodeLaunch("Broadcasting")] +void node2_01([MaxRecords(5)] EmptyNodeOutput output) +{ + // GetGroupNodeOutputRecords() is called on an EmptyNodeOutput + output.GetGroupNodeOutputRecords(1); /* expected-error {{no member named 'GetGroupNodeOutputRecords' in 'EmptyNodeOutput'}} */ +} + +[Shader("node")] +[NodeLaunch("Broadcasting")] +void node2_02([MaxRecords(5)] EmptyNodeOutput output) +{ + // GetThreadNodeOutputRecords() is called on an EmptyNodeOutput + output.GetThreadNodeOutputRecords(3); /* expected-error {{no member named 'GetThreadNodeOutputRecords' in 'EmptyNodeOutput'}} */ +} + +[Shader("node")] +[NodeLaunch("Broadcasting")] +void node2_05(DispatchNodeInputRecord input) +{ + // GetGroupNodeOutputRecords() is called on a DispatchNodeInputRecord<> + input.GetGroupNodeOutputRecords(1); /* expected-error {{no member named 'GetGroupNodeOutputRecords' in 'DispatchNodeInputRecord'}} */ +} + +template +struct FakeNodeOutput { + int h; +}; + +[Shader("node")] +[NodeLaunch("Broadcasting")] +void node2_06(FakeNodeOutput output) +{ + // GetGroupNodeOutputRecords() is called on a type that is like NodeOutput<> to check INTRIN_COMPTYPE_FROM_NODEOUTPUT isn't fooled. + GroupNodeOutputRecords outrec = output.GetGroupNodeOutputRecords(1); /* expected-error {{no member named 'GetGroupNodeOutputRecords' in 'FakeNodeOutput'}} */ +} + +//============================================================================== +// Check invalid initialization of *NodeOutputRecords + +[Shader("node")] +[NodeLaunch("Broadcasting")] +void node3_01(NodeOutput output) +{ + // Initializing a GroupNodeOutputRecords from NodeOutput::GetNodeOutput() + GroupNodeOutputRecords outrec = output.GetGroupNodeOutputRecords(5); /* expected-error {{cannot initialize a variable of type 'GroupNodeOutputRecords' with an rvalue of type 'GroupNodeOutputRecords'}} */ +} + +[Shader("node")] +[NodeLaunch("Broadcasting")] +void node3_02(NodeOutput output) +{ + // Initializing a ThreadNodeOutputRecords from NodeOutput::ThreadNodeOutputRecords() + ThreadNodeOutputRecords outrec = output.GetThreadNodeOutputRecords(5); /* expected-error {{cannot initialize a variable of type 'ThreadNodeOutputRecords' with an rvalue of type 'ThreadNodeOutputRecords'}} */ +} + +[Shader("node")] +[NodeLaunch("Broadcasting")] +void node3_03(NodeOutput output) +{ + // Initializing a ThreadNodeOutputRecords from GetGroupNodeOutputRecords() + ThreadNodeOutputRecords outrec = output.GetGroupNodeOutputRecords(1); /* expected-error {{cannot initialize a variable of type 'ThreadNodeOutputRecords' with an rvalue of type 'GroupNodeOutputRecords'}} */ +} + +[Shader("node")] +[NodeLaunch("Broadcasting")] +void node3_04(NodeOutput output) +{ + // Initializing a GroupNodeOutputRecords from GetThreadNodeOutputRecords() + GroupNodeOutputRecords outrec = output.GetThreadNodeOutputRecords(1); /* expected-error {{cannot initialize a variable of type 'GroupNodeOutputRecords' with an rvalue of type 'ThreadNodeOutputRecords'}} */ +} + +//============================================================================== +// Check invalid template arguments + +[Shader("node")] +[NodeLaunch("Broadcasting")] +void node4_01(DispatchNodeInputRecord input) /* expected-error {{too many template arguments for class template 'DispatchNodeInputRecord'}} */ +{ } + +[Shader("node")] +[NodeLaunch("Thread")] +void node4_02(ThreadNodeInputRecord input) /* expected-error {{use of class template 'ThreadNodeInputRecord' requires template arguments}} */ +{ } + +[Shader("node")] +[NodeLaunch("Broadcasting")] +void node4_01(DispatchNodeInputRecord input) /* expected-error {{Texture2D cannot be used as a type parameter where a struct/class is required}} */ +{ } + +[Shader("node")] +[NodeLaunch("Broadcasting")] +void node4_01(DispatchNodeInputRecord input) /* expected-error {{'RaytracingAccelerationStructure' cannot be used as a type parameter where a struct/class is required}} */ +{ } + diff --git a/tools/clang/test/HLSL/workgraph/dispatchgrid_diags.hlsl b/tools/clang/test/HLSL/workgraph/dispatchgrid_diags.hlsl new file mode 100644 index 0000000000..6c8a841009 --- /dev/null +++ b/tools/clang/test/HLSL/workgraph/dispatchgrid_diags.hlsl @@ -0,0 +1,59 @@ +// RUN: %clang_cc1 -fsyntax-only -verify %s +// NodeDispatchGrid and NodeMaxDispatchGrid validation diagnostics: +// - the x, y, z, component values must be in the range 1 to 2^16 - 1 (65,535) inclusive +// - the product x * y * z must not exceed 2^24 - 1 (16,777,215) +// - a warning should be generated for 2nd and subsequent occurances of these attributes + +[Shader("node")] +[NodeLaunch("Broadcasting")] +[NodeDispatchGrid(65535, 1, 1)] +[NodeMaxDispatchGrid(65535, 1, 1)] +[NumThreads(32, 1, 1)] +void node01() +{ } + +[Shader("node")] +[NodeLaunch("Broadcasting")] +[NodeMaxDispatchGrid(1, 65536, 1)] // expected-error {{'NodeMaxDispatchGrid' Y component value must be between 1 and 65,535 (2^16-1) inclusive}} +[NodeDispatchGrid(1, 65536, 1)] // expected-error {{'NodeDispatchGrid' Y component value must be between 1 and 65,535 (2^16-1) inclusive}} +[NumThreads(32, 1, 1)] +void node02() +{ } + +[Shader("node")] +[NodeLaunch("Broadcasting")] +[NodeDispatchGrid(1, 1, 0)] // expected-error {{'NodeDispatchGrid' Z component value must be between 1 and 65,535 (2^16-1) inclusive}} +[NodeMaxDispatchGrid(1, 1, 0)] // expected-error {{'NodeMaxDispatchGrid' Z component value must be between 1 and 65,535 (2^16-1) inclusive}} +[NumThreads(32, 1, 1)] +void node03() +{ } + +static const int x = 1<<16; +static const uint y = 4; +static const int z = 0; + +[Shader("node")] +[NodeLaunch("Broadcasting")] +[NodeDispatchGrid(x, 256, 256)] // expected-error {{'NodeDispatchGrid' X component value must be between 1 and 65,535 (2^16-1) inclusive}} +[NodeMaxDispatchGrid(1, y, z)] // expected-error {{'NodeMaxDispatchGrid' Z component value must be between 1 and 65,535 (2^16-1) inclusive}} +[NumThreads(32, 1, 1)] +void node04() +{ } + +[Shader("node")] +[NodeLaunch("Broadcasting")] +[NodeDispatchGrid(256, 256, 256)] // expected-error {{'NodeDispatchGrid' X * Y * Z product may not exceed 16,777,215 (2^24-1)}} +[NodeMaxDispatchGrid(1, 65535, 257)] // expected-error {{'NodeMaxDispatchGrid' X * Y * Z product may not exceed 16,777,215 (2^24-1)}} +[NumThreads(32, 1, 1)] +void node05() +{ } + +[Shader("node")] +[NodeLaunch("Broadcasting")] +[NodeDispatchGrid(64, 4, 2)] // expected-warning {{attribute 'NodeDispatchGrid' is already applied}} +[NodeMaxDispatchGrid(256, 8, 8)] // expected-warning {{attribute 'NodeMaxDispatchGrid' is already applied}} +[NodeDispatchGrid(64, 4, 2)] +[NodeMaxDispatchGrid(256, 8, 8)] +[NumThreads(32, 1, 1)] +void node06() +{ } diff --git a/tools/clang/test/HLSL/workgraph/member_write_diagnostics.hlsl b/tools/clang/test/HLSL/workgraph/member_write_diagnostics.hlsl new file mode 100644 index 0000000000..7d81ae67c0 --- /dev/null +++ b/tools/clang/test/HLSL/workgraph/member_write_diagnostics.hlsl @@ -0,0 +1,59 @@ +// RUN: %clang_cc1 -fsyntax-only -verify %s +// ================================================================== +// CASE120 (error) +// Errors are generated for writes to members of read-only records +// ================================================================== + +struct RECORD +{ + bool b; +}; + +[Shader("node")] +[NumThreads(1024,1,1)] +[NodeLaunch("Broadcasting")] +void node01(DispatchNodeInputRecord input1) +{ + input1.Get().b = false; //expected-error{{cannot assign to return value because function 'Get' returns a const value}} +} + +[Shader("node")] +[NumThreads(1024,1,1)] +[NodeLaunch("Broadcasting")] +void node02(RWDispatchNodeInputRecord input2) +{ + input2.b = true; //expected-error{{no member named 'b' in 'RWDispatchNodeInputRecord'}} + input2.Get().b = true; +} + +[Shader("node")] +[NumThreads(1024,1,1)] +[NodeLaunch("coalescing")] +void node03([MaxRecords(3)] GroupNodeInputRecords input3) +{ + input3.Get().b = false; //expected-error{{cannot assign to return value because function 'Get' returns a const value}} + input3[0].b = false; //expected-error{{cannot assign to return value because function 'operator[]' returns a const value}} +} + +[Shader("node")] +[NumThreads(1,1,1)] +[NodeLaunch("coalescing")] +void node04([MaxRecords(4)] RWGroupNodeInputRecords input4) +{ + input4.Get().b = true; + input4.Get(0).b = true; + input4[0].b = true; +} + +[Shader("node")] +[NumThreads(1024,1,1)] +[NodeLaunch("Broadcasting")] +void node05(NodeOutput output5) +{ + output5.b = false; //expected-error{{no member named 'b' in 'NodeOutput'}} + output5.Get().b = false; //expected-error{{no member named 'Get' in 'NodeOutput'}} +} + +// expected-note@? +{{function 'Get' which returns const-qualified type 'const RECORD &' declared here}} +// expected-note@? +{{function 'operator[]' which returns const-qualified type 'const RECORD &' declared here}} + diff --git a/tools/clang/test/HLSL/workgraph/node_compute_compatibility.hlsl b/tools/clang/test/HLSL/workgraph/node_compute_compatibility.hlsl new file mode 100644 index 0000000000..621ef37a66 --- /dev/null +++ b/tools/clang/test/HLSL/workgraph/node_compute_compatibility.hlsl @@ -0,0 +1,61 @@ +// RUN: %clang_cc1 -fsyntax-only -verify %s +// ================================================================== +// Errors are generated for shaders with both "node" and "compute" +// specified when: +// - the launch type is not Broadcasting +// - the node has an input record and/or output records +// ================================================================== + +struct RECORD +{ + uint a; +}; + +[Shader("node")] +[Shader("compute")] +[NumThreads(1024,1,1)] +[NodeLaunch("Broadcasting")] +void node01() +{ /* compatible */ } + +[Shader("node")] +[Shader("compute")] // expected-note {{compute defined here}} +[NumThreads(128,1,1)] +[NodeLaunch("Coalescing")] // expected-error {{Node shader 'node02' with coalescing launch type is not compatible with compute}} +void node02(GroupNodeInputRecords input) +{ } + +[Shader("compute")] // expected-note {{compute defined here}} +[Shader("node")] +[NumThreads(1,1,1)] +[NodeLaunch("Thread")] // expected-error {{Node shader 'node03' with thread launch type is not compatible with compute}} +void node03(ThreadNodeInputRecord input) +{ } + +[Shader("node")] +[NumThreads(1024,1,1)] +[Shader("compute")] // expected-note {{compute defined here}} +[NodeLaunch("Broadcasting")] +void node04(DispatchNodeInputRecord input) // expected-error {{Node shader 'node04' with node input/output is not compatible with compute}} +{ } + +[Shader("compute")] // expected-note {{compute defined here}} +[NumThreads(1024,1,1)] +[Shader("node")] +[NodeLaunch("Broadcasting")] +void node05(RWDispatchNodeInputRecord input) // expected-error {{Node shader 'node05' with node input/output is not compatible with compute}} +{ } + +[NodeLaunch("Broadcasting")] +[Shader("node")] +[NumThreads(1024,1,1)] +[Shader("compute")] // expected-note {{compute defined here}} +void node06(NodeOutput output) // expected-error {{Node shader 'node06' with node input/output is not compatible with compute}} +{ } + +[NumThreads(1024,1,1)] +[NodeLaunch("Broadcasting")] +[Shader("node")] +[Shader("compute")] // expected-note {{compute defined here}} +void node07(EmptyNodeOutput output) // expected-error {{Node shader 'node07' with node input/output is not compatible with compute}} +{ } diff --git a/tools/clang/test/HLSL/workgraph/node_input_compatibility.hlsl b/tools/clang/test/HLSL/workgraph/node_input_compatibility.hlsl new file mode 100644 index 0000000000..9548263ea4 --- /dev/null +++ b/tools/clang/test/HLSL/workgraph/node_input_compatibility.hlsl @@ -0,0 +1,89 @@ +// RUN: %clang_cc1 -fsyntax-only -verify %s +// ================================================================== +// Errors are generated for node inputs that are not compatible with +// the launch type +// ================================================================== + +struct RECORD +{ + uint a; +}; + +[Shader("node")] +[NumThreads(1024,1,1)] +[NodeLaunch("BrOaDcasting")] // expected-note {{Launch type defined here}} +void node01(GroupNodeInputRecords input) // expected-error {{GroupNodeInputRecords may not be used with broadcasting launch nodes}} +{ } + +[Shader("node")] +[NumThreads(1024,1,1)] +void node02(RWGroupNodeInputRecords input) // expected-error {{RWGroupNodeInputRecords may not be used with broadcasting launch nodes}} +{ } + +[Shader("node")] +[NumThreads(1024,1,1)] +[NodeLaunch("Broadcasting")] // expected-note {{Launch type defined here}} +void node03(ThreadNodeInputRecord input) // expected-error {{ThreadNodeInputRecord may not be used with broadcasting launch nodes}} +{ } + +[Shader("node")] +[NodeLaunch("Broadcasting")] // expected-note {{Launch type defined here}} +[NumThreads(1024,1,1)] +void node04(RWThreadNodeInputRecord input) // expected-error {{RWThreadNodeInputRecord may not be used with broadcasting launch nodes}} +{ } + +[Shader("node")] +[NodeLaunch("Broadcasting")] // expected-note {{Launch type defined here}} +[NumThreads(1024,1,1)] +void node05(EmptyNodeInput input) // expected-error {{EmptyNodeInput may not be used with broadcasting launch nodes}} +{ } + +[Shader("node")] +[NumThreads(128,1,1)] +[NodeLaunch("COALESCING")] // expected-note {{Launch type defined here}} +void node06(DispatchNodeInputRecord input) // expected-error {{DispatchNodeInputRecord may not be used with coalescing launch nodes}} +{ } + +[NodeLaunch("Coalescing")] // expected-note {{Launch type defined here}} +[Shader("node")] +[NumThreads(128,1,1)] +void node07(RWDispatchNodeInputRecord input) // expected-error {{RWDispatchNodeInputRecord may not be used with coalescing launch nodes}} +{ } + +[Shader("node")] +[NumThreads(128,1,1)] +[NodeLaunch("Coalescing")] // expected-note {{Launch type defined here}} +void node08(ThreadNodeInputRecord input) // expected-error {{ThreadNodeInputRecord may not be used with coalescing launch nodes}} +{ } + +[NodeLaunch("Coalescing")] // expected-note {{Launch type defined here}} +[Shader("node")] +[NumThreads(128,1,1)] +void node09(RWThreadNodeInputRecord input) // expected-error {{RWThreadNodeInputRecord may not be used with coalescing launch nodes}} +{ } + +[Shader("node")] +[NodeLaunch("Thread")] // expected-note {{Launch type defined here}} +void node10(DispatchNodeInputRecord input) // expected-error {{DispatchNodeInputRecord may not be used with thread launch nodes}} +{ } + +[NodeLaunch("Thread")] // expected-note {{Launch type defined here}} +[Shader("node")] +void node11(RWDispatchNodeInputRecord input) // expected-error {{RWDispatchNodeInputRecord may not be used with thread launch nodes}} +{ } + +[Shader("node")] +[NodeLaunch("Thread")] // expected-note {{Launch type defined here}} +void node12(GroupNodeInputRecords input) // expected-error {{GroupNodeInputRecords may not be used with thread launch nodes}} +{ } + +[NodeLaunch("ThREAd")] // expected-note {{Launch type defined here}} +[Shader("node")] +void node13([MaxRecords(32)] + RWGroupNodeInputRecords input) // expected-error {{RWGroupNodeInputRecords may not be used with thread launch nodes}} +{ } + +[Shader("node")] +[NodeLaunch("Thread")] // expected-note {{Launch type defined here}} +void node14(EmptyNodeInput input) // expected-error {{EmptyNodeInput may not be used with thread launch nodes}} +{ } diff --git a/tools/clang/test/HLSL/workgraph/rwnodeinputrecord_sv_dispatchgrid.hlsl b/tools/clang/test/HLSL/workgraph/rwnodeinputrecord_sv_dispatchgrid.hlsl new file mode 100644 index 0000000000..4eaa392834 --- /dev/null +++ b/tools/clang/test/HLSL/workgraph/rwnodeinputrecord_sv_dispatchgrid.hlsl @@ -0,0 +1,49 @@ +// RUN: %clang_cc1 -fsyntax-only -verify %s + +// Check that a RWNodeInputRecord field that has the SV_DispatchGrid semantic +// is not assignable. + +struct RECORD +{ + uint3 a; + uint3 b : SV_DispatchGrid; +}; + +//============================================================================== +// Check non-assignable fields + +[Shader("node")] +[NodeLaunch("Coalescing")] +void node01(RWGroupNodeInputRecords input) +{ + input.Get().a = 11; + input.Get().a.yz = 12; + input.Get().a[0] = 13; +} + +[Shader("node")] +[NodeLaunch("Coalescing")] +void node02(GroupNodeInputRecords input) +{ + input.Get().a = 21; //expected-error{{cannot assign to return value because function 'Get' returns a const value}} + input.Get().b = 22; //expected-error{{cannot assign to return value because function 'Get' returns a const value}} +} + +[Shader("node")] +[NodeLaunch("Coalescing")] +void node03([MaxRecords(4)] GroupNodeInputRecords input) +{ + input[1].a = 31; //expected-error{{cannot assign to return value because function 'operator[]' returns a const value}} + input[1].b = 32; //expected-error{{cannot assign to return value because function 'operator[]' returns a const value}} +} + +[Shader("node")] +[NodeLaunch("Coalescing")] +void node04(const RWGroupNodeInputRecords input) +{ + input.Get().a = 41; // expected-error{{cannot assign to return value because function 'Get' returns a const value}} + input.Get().b = 42; // expected-error{{cannot assign to return value because function 'Get' returns a const value}} +} + +// expected-note@? +{{function 'Get' which returns const-qualified type 'const RECORD &' declared here}} +// expected-note@? +{{function 'operator[]' which returns const-qualified type 'const RECORD &' declared here}} diff --git a/tools/clang/test/HLSLFileCheck/d3dreflect/amp-groupshared.hlsl b/tools/clang/test/HLSLFileCheck/d3dreflect/amp-groupshared.hlsl index 0d13ae174a..ac44faeb6a 100644 --- a/tools/clang/test/HLSLFileCheck/d3dreflect/amp-groupshared.hlsl +++ b/tools/clang/test/HLSLFileCheck/d3dreflect/amp-groupshared.hlsl @@ -1,9 +1,9 @@ // RUN: %dxc -T lib_6_7 %s | %D3DReflect %s | FileCheck %s -// CHECK:DxilRuntimeData (size = 180 bytes): +// CHECK:DxilRuntimeData (size = 240 bytes): // CHECK: StringBuffer (size = 24 bytes) -// CHECK: IndexTable (size = 8 bytes) +// CHECK: IndexTable (size = 24 bytes) // CHECK: RawBytes (size = 0 bytes) // CHECK: RecordTable (stride = 32 bytes) ResourceTable[1] = { // CHECK: <0:RuntimeDataResourceInfo> = { @@ -17,8 +17,8 @@ // CHECK: Flags: 0 (None) // CHECK: } // CHECK: } -// CHECK: RecordTable (stride = 44 bytes) FunctionTable[1] = { -// CHECK: <0:RuntimeDataFunctionInfo> = { +// CHECK: RecordTable (stride = 56 bytes) FunctionTable[1] = { +// CHECK: <0:RuntimeDataFunctionInfo3> = { // CHECK: Name: "amplification" // CHECK: UnmangledName: "amplification" // CHECK: Resources: <0:RecordArrayRef[1]> = { @@ -41,6 +41,21 @@ // CHECK: FeatureInfo2: 0 // CHECK: ShaderStageFlag: 16384 // CHECK: MinShaderTarget: 917605 +// CHECK: MinimumExpectedWaveLaneCount: 0 +// CHECK: MaximumExpectedWaveLaneCount: 0 +// CHECK: ShaderFlags: 0 (None) +// CHECK: AS: <0:ASInfo> = { +// CHECK: NumThreads: <2:array[3]> = { 4, 1, 1 } +// CHECK: GroupSharedBytesUsed: 32 +// CHECK: PayloadSizeInBytes: 16 +// CHECK: } +// CHECK: } +// CHECK: } +// CHECK: RecordTable (stride = 12 bytes) ASInfoTable[1] = { +// CHECK: <0:ASInfo> = { +// CHECK: NumThreads: <2:array[3]> = { 4, 1, 1 } +// CHECK: GroupSharedBytesUsed: 32 +// CHECK: PayloadSizeInBytes: 16 // CHECK: } // CHECK: } // CHECK:ID3D12LibraryReflection: diff --git a/tools/clang/test/HLSLFileCheck/d3dreflect/cbuf-usage-lib.hlsl b/tools/clang/test/HLSLFileCheck/d3dreflect/cbuf-usage-lib.hlsl index 3a7b3f5a4a..a3561185fa 100644 --- a/tools/clang/test/HLSLFileCheck/d3dreflect/cbuf-usage-lib.hlsl +++ b/tools/clang/test/HLSLFileCheck/d3dreflect/cbuf-usage-lib.hlsl @@ -5,9 +5,9 @@ // CHECK-NOT: CBufUnused -// CHECK: DxilRuntimeData (size = 320 bytes): -// CHECK: StringBuffer (size = 44 bytes) -// CHECK: IndexTable (size = 20 bytes) +// CHECK: DxilRuntimeData (size = 448 bytes): +// CHECK: StringBuffer (size = 52 bytes) +// CHECK: IndexTable (size = 28 bytes) // CHECK: RawBytes (size = 0 bytes) // CHECK: RecordTable (stride = 32 bytes) ResourceTable[3] = { // CHECK: <0:RuntimeDataResourceInfo> = { @@ -41,8 +41,8 @@ // CHECK: Flags: 0 (None) // CHECK: } // CHECK: } -// CHECK: RecordTable (stride = 44 bytes) FunctionTable[2] = { -// CHECK: <0:RuntimeDataFunctionInfo> = { +// CHECK: RecordTable (stride = 56 bytes) FunctionTable[2] = { +// CHECK: <0:RuntimeDataFunctionInfo3> = { // CHECK: Name: "\01?foo{{[@$?.A-Za-z0-9_]+}}" // CHECK: UnmangledName: "foo" // CHECK: Resources: <0:RecordArrayRef[1]> = { @@ -56,8 +56,11 @@ // CHECK: FeatureInfo2: 0 // CHECK: ShaderStageFlag: 32767 // CHECK: MinShaderTarget: 393312 +// CHECK: MinimumExpectedWaveLaneCount: 0 +// CHECK: MaximumExpectedWaveLaneCount: 0 +// CHECK: ShaderFlags: 0 (None) // CHECK: } -// CHECK: <1:RuntimeDataFunctionInfo> = { +// CHECK: <1:RuntimeDataFunctionInfo3> = { // CHECK: Name: "main" // CHECK: UnmangledName: "main" // CHECK: Resources: <2:RecordArrayRef[2]> = { @@ -72,6 +75,43 @@ // CHECK: FeatureInfo2: 0 // CHECK: ShaderStageFlag: 2 // CHECK: MinShaderTarget: 65632 +// CHECK: MinimumExpectedWaveLaneCount: 0 +// CHECK: MaximumExpectedWaveLaneCount: 0 +// CHECK: ShaderFlags: 0 (None) +// CHECK: VS: <0:VSInfo> +// CHECK: } +// CHECK: } +// CHECK: RecordTable (stride = 16 bytes) SignatureElementTable[2] = { +// CHECK: <0:SignatureElement> = { +// CHECK: SemanticName: "IDX" +// CHECK: SemanticIndices: <5:array[1]> = { 0 } +// CHECK: SemanticKind: Arbitrary +// CHECK: ComponentType: I32 +// CHECK: InterpolationMode: Undefined +// CHECK: StartRow: 0 +// CHECK: ColsAndStream: 0 +// CHECK: UsageAndDynIndexMasks: 0 +// CHECK: } +// CHECK: <1:SignatureElement> = { +// CHECK: SemanticName: "OUT" +// CHECK: SemanticIndices: <5:array[1]> = { 0 } +// CHECK: SemanticKind: Arbitrary +// CHECK: ComponentType: F32 +// CHECK: InterpolationMode: Linear +// CHECK: StartRow: 0 +// CHECK: ColsAndStream: 0 +// CHECK: UsageAndDynIndexMasks: 0 +// CHECK: } +// CHECK: } +// CHECK: RecordTable (stride = 16 bytes) VSInfoTable[1] = { +// CHECK: <0:VSInfo> = { +// CHECK: SigInputElements: <5:RecordArrayRef[1]> = { +// CHECK: [0]: <0:SignatureElement> +// CHECK: } +// CHECK: SigOutputElements: <0:RecordArrayRef[1]> = { +// CHECK: [0]: <1:SignatureElement> +// CHECK: } +// CHECK: ViewIDOutputMask: <0:bytes[0]> // CHECK: } // CHECK: } diff --git a/tools/clang/test/HLSLFileCheck/d3dreflect/comp-groupshared.hlsl b/tools/clang/test/HLSLFileCheck/d3dreflect/comp-groupshared.hlsl index cde917798b..b8aa2baa5d 100644 --- a/tools/clang/test/HLSLFileCheck/d3dreflect/comp-groupshared.hlsl +++ b/tools/clang/test/HLSLFileCheck/d3dreflect/comp-groupshared.hlsl @@ -1,8 +1,8 @@ // RUN: %dxc -T lib_6_5 %s | %D3DReflect %s | FileCheck %s -// CHECK:DxilRuntimeData (size = 220 bytes): +// CHECK:DxilRuntimeData (size = 276 bytes): // CHECK: StringBuffer (size = 28 bytes) -// CHECK: IndexTable (size = 12 bytes) +// CHECK: IndexTable (size = 28 bytes) // CHECK: RawBytes (size = 0 bytes) // CHECK: RecordTable (stride = 32 bytes) ResourceTable[2] = { // CHECK: <0:RuntimeDataResourceInfo> = { @@ -26,13 +26,31 @@ // CHECK: Flags: 0 (None) // CHECK: } // CHECK: } -// CHECK: RecordTable (stride = 44 bytes) FunctionTable[1] = { -// CHECK: <0:RuntimeDataFunctionInfo> = { +// CHECK: RecordTable (stride = 56 bytes) FunctionTable[1] = { +// CHECK: <0:RuntimeDataFunctionInfo3> = { // CHECK: Name: "main" // CHECK: UnmangledName: "main" // CHECK: Resources: <0:RecordArrayRef[2]> = { -// CHECK: [0]: <0:RuntimeDataResourceInfo> -// CHECK: [1]: <1:RuntimeDataResourceInfo> +// CHECK: [0]: <0:RuntimeDataResourceInfo> = { +// CHECK: Class: SRV +// CHECK: Kind: TypedBuffer +// CHECK: ID: 0 +// CHECK: Space: 0 +// CHECK: LowerBound: 1 +// CHECK: UpperBound: 1 +// CHECK: Name: "inputs" +// CHECK: Flags: 0 (None) +// CHECK: } +// CHECK: [1]: <1:RuntimeDataResourceInfo> = { +// CHECK: Class: UAV +// CHECK: Kind: TypedBuffer +// CHECK: ID: 0 +// CHECK: Space: 0 +// CHECK: LowerBound: 1 +// CHECK: UpperBound: 1 +// CHECK: Name: "g_Intensities" +// CHECK: Flags: 0 (None) +// CHECK: } // CHECK: } // CHECK: FunctionDependencies: = {} // CHECK: ShaderKind: Compute @@ -42,6 +60,19 @@ // CHECK: FeatureInfo2: 0 // CHECK: ShaderStageFlag: 32 // CHECK: MinShaderTarget: 327776 +// CHECK: MinimumExpectedWaveLaneCount: 0 +// CHECK: MaximumExpectedWaveLaneCount: 0 +// CHECK: ShaderFlags: 0 (None) +// CHECK: CS: <0:CSInfo> = { +// CHECK: NumThreads: <3:array[3]> = { 64, 2, 2 } +// CHECK: GroupSharedBytesUsed: 16 +// CHECK: } +// CHECK: } +// CHECK: } +// CHECK: RecordTable (stride = 8 bytes) CSInfoTable[1] = { +// CHECK: <0:CSInfo> = { +// CHECK: NumThreads: <3:array[3]> = { 64, 2, 2 } +// CHECK: GroupSharedBytesUsed: 16 // CHECK: } // CHECK: } // CHECK:ID3D12LibraryReflection: diff --git a/tools/clang/test/HLSLFileCheck/d3dreflect/empty_broadcasting_nodes.hlsl b/tools/clang/test/HLSLFileCheck/d3dreflect/empty_broadcasting_nodes.hlsl new file mode 100644 index 0000000000..f2c7c81265 --- /dev/null +++ b/tools/clang/test/HLSLFileCheck/d3dreflect/empty_broadcasting_nodes.hlsl @@ -0,0 +1,307 @@ +// RUN: %dxc -T lib_6_8 %s | %D3DReflect %s | FileCheck %s + +// CHECK:DxilRuntimeData (size = 488 bytes): +// CHECK: StringBuffer (size = 56 bytes) +// CHECK: IndexTable (size = 92 bytes) +// CHECK: RawBytes (size = 0 bytes) +// CHECK: RecordTable (stride = 56 bytes) FunctionTable[1] = { +// CHECK: <0:RuntimeDataFunctionInfo3> = { +// CHECK: Name: "depth18part0_wg_63_nodes_seed_255" +// CHECK: UnmangledName: "depth18part0_wg_63_nodes_seed_255" +// CHECK: Resources: [0]> = {} +// CHECK: FunctionDependencies: = {} +// CHECK: ShaderKind: Node +// CHECK: PayloadSizeInBytes: 0 +// CHECK: AttributeSizeInBytes: 0 +// CHECK: FeatureInfo1: 0 +// CHECK: FeatureInfo2: 0 +// CHECK: ShaderStageFlag: 32768 +// CHECK: MinShaderTarget: 983136 +// CHECK: MinimumExpectedWaveLaneCount: 0 +// CHECK: MaximumExpectedWaveLaneCount: 0 +// CHECK: ShaderFlags: 0 (None) +// CHECK: Node: <0:NodeShaderInfo> = { +// CHECK: LaunchType: Broadcasting +// CHECK: GroupSharedBytesUsed: 0 +// CHECK: Attribs: <8:RecordArrayRef[3]> = { +// CHECK: [0]: <0:NodeShaderFuncAttrib> = { +// CHECK: AttribKind: ID +// CHECK: ID: <0:NodeID> = { +// CHECK: Name: "depth18part0_wg_63_nodes_seed_255" +// CHECK: Index: 0 +// CHECK: } +// CHECK: } +// CHECK: [1]: <1:NodeShaderFuncAttrib> = { +// CHECK: AttribKind: NumThreads +// CHECK: NumThreads: <0:array[3]> = { 25, 4, 1 } +// CHECK: } +// CHECK: [2]: <2:NodeShaderFuncAttrib> = { +// CHECK: AttribKind: DispatchGrid +// CHECK: DispatchGrid: <4:array[3]> = { 2, 8, 10 } +// CHECK: } +// CHECK: } +// CHECK: Outputs: <21:RecordArrayRef[1]> = { +// CHECK: [0]: <1:IONode> = { +// CHECK: IOFlagsAndKind: 6 +// CHECK: Attribs: <14:RecordArrayRef[6]> = { +// CHECK: [0]: <1:NodeShaderIOAttrib> = { +// CHECK: AttribKind: OutputID +// CHECK: OutputID: <1:NodeID> = { +// CHECK: Name: "OutputyMcOutputFace" +// CHECK: Index: 0 +// CHECK: } +// CHECK: } +// CHECK: [1]: <2:NodeShaderIOAttrib> = { +// CHECK: AttribKind: OutputArraySize +// CHECK: OutputArraySize: 2 +// CHECK: } +// CHECK: [2]: <3:NodeShaderIOAttrib> = { +// CHECK: AttribKind: MaxRecords +// CHECK: MaxRecords: 47 +// CHECK: } +// CHECK: [3]: <4:NodeShaderIOAttrib> = { +// CHECK: AttribKind: AllowSparseNodes +// CHECK: AllowSparseNodes: 1 +// CHECK: } +// CHECK: [4]: <5:NodeShaderIOAttrib> = { +// CHECK: AttribKind: RecordSizeInBytes +// CHECK: RecordSizeInBytes: 20 +// CHECK: } +// CHECK: [5]: <6:NodeShaderIOAttrib> = { +// CHECK: AttribKind: RecordDispatchGrid +// CHECK: RecordDispatchGrid: +// CHECK: ByteOffset: 8 +// CHECK: ComponentNumAndType: 23 +// CHECK: } +// CHECK: } +// CHECK: } +// CHECK: } +// CHECK: Inputs: <12:RecordArrayRef[1]> = { +// CHECK: [0]: <0:IONode> = { +// CHECK: IOFlagsAndKind: 97 +// CHECK: Attribs: <12:RecordArrayRef[1]> = { +// CHECK: [0]: <0:NodeShaderIOAttrib> = { +// CHECK: AttribKind: RecordSizeInBytes +// CHECK: RecordSizeInBytes: 8 +// CHECK: } +// CHECK: } +// CHECK: } +// CHECK: } +// CHECK: } +// CHECK: } +// CHECK: } +// CHECK: RecordTable (stride = 8 bytes) NodeIDTable[2] = { +// CHECK: <0:NodeID> = { +// CHECK: Name: "depth18part0_wg_63_nodes_seed_255" +// CHECK: Index: 0 +// CHECK: } +// CHECK: <1:NodeID> = { +// CHECK: Name: "OutputyMcOutputFace" +// CHECK: Index: 0 +// CHECK: } +// CHECK: } +// CHECK: RecordTable (stride = 8 bytes) NodeShaderFuncAttribTable[3] = { +// CHECK: <0:NodeShaderFuncAttrib> = { +// CHECK: AttribKind: ID +// CHECK: ID: <0:NodeID> = { +// CHECK: Name: "depth18part0_wg_63_nodes_seed_255" +// CHECK: Index: 0 +// CHECK: } +// CHECK: } +// CHECK: <1:NodeShaderFuncAttrib> = { +// CHECK: AttribKind: NumThreads +// CHECK: NumThreads: <0:array[3]> = { 25, 4, 1 } +// CHECK: } +// CHECK: <2:NodeShaderFuncAttrib> = { +// CHECK: AttribKind: DispatchGrid +// CHECK: DispatchGrid: <4:array[3]> = { 2, 8, 10 } +// CHECK: } +// CHECK: } +// CHECK: RecordTable (stride = 8 bytes) NodeShaderIOAttribTable[7] = { +// CHECK: <0:NodeShaderIOAttrib> = { +// CHECK: AttribKind: RecordSizeInBytes +// CHECK: RecordSizeInBytes: 8 +// CHECK: } +// CHECK: <1:NodeShaderIOAttrib> = { +// CHECK: AttribKind: OutputID +// CHECK: OutputID: <1:NodeID> = { +// CHECK: Name: "OutputyMcOutputFace" +// CHECK: Index: 0 +// CHECK: } +// CHECK: } +// CHECK: <2:NodeShaderIOAttrib> = { +// CHECK: AttribKind: OutputArraySize +// CHECK: OutputArraySize: 2 +// CHECK: } +// CHECK: <3:NodeShaderIOAttrib> = { +// CHECK: AttribKind: MaxRecords +// CHECK: MaxRecords: 47 +// CHECK: } +// CHECK: <4:NodeShaderIOAttrib> = { +// CHECK: AttribKind: AllowSparseNodes +// CHECK: AllowSparseNodes: 1 +// CHECK: } +// CHECK: <5:NodeShaderIOAttrib> = { +// CHECK: AttribKind: RecordSizeInBytes +// CHECK: RecordSizeInBytes: 20 +// CHECK: } +// CHECK: <6:NodeShaderIOAttrib> = { +// CHECK: AttribKind: RecordDispatchGrid +// CHECK: RecordDispatchGrid: +// CHECK: ByteOffset: 8 +// CHECK: ComponentNumAndType: 23 +// CHECK: } +// CHECK: } +// CHECK: RecordTable (stride = 8 bytes) IONodeTable[2] = { +// CHECK: <0:IONode> = { +// CHECK: IOFlagsAndKind: 97 +// CHECK: Attribs: <12:RecordArrayRef[1]> = { +// CHECK: [0]: <0:NodeShaderIOAttrib> = { +// CHECK: AttribKind: RecordSizeInBytes +// CHECK: RecordSizeInBytes: 8 +// CHECK: } +// CHECK: } +// CHECK: } +// CHECK: <1:IONode> = { +// CHECK: IOFlagsAndKind: 6 +// CHECK: Attribs: <14:RecordArrayRef[6]> = { +// CHECK: [0]: <1:NodeShaderIOAttrib> = { +// CHECK: AttribKind: OutputID +// CHECK: OutputID: <1:NodeID> = { +// CHECK: Name: "OutputyMcOutputFace" +// CHECK: Index: 0 +// CHECK: } +// CHECK: } +// CHECK: [1]: <2:NodeShaderIOAttrib> = { +// CHECK: AttribKind: OutputArraySize +// CHECK: OutputArraySize: 2 +// CHECK: } +// CHECK: [2]: <3:NodeShaderIOAttrib> = { +// CHECK: AttribKind: MaxRecords +// CHECK: MaxRecords: 47 +// CHECK: } +// CHECK: [3]: <4:NodeShaderIOAttrib> = { +// CHECK: AttribKind: AllowSparseNodes +// CHECK: AllowSparseNodes: 1 +// CHECK: } +// CHECK: [4]: <5:NodeShaderIOAttrib> = { +// CHECK: AttribKind: RecordSizeInBytes +// CHECK: RecordSizeInBytes: 20 +// CHECK: } +// CHECK: [5]: <6:NodeShaderIOAttrib> = { +// CHECK: AttribKind: RecordDispatchGrid +// CHECK: RecordDispatchGrid: +// CHECK: ByteOffset: 8 +// CHECK: ComponentNumAndType: 23 +// CHECK: } +// CHECK: } +// CHECK: } +// CHECK: } +// CHECK: RecordTable (stride = 20 bytes) NodeShaderInfoTable[1] = { +// CHECK: <0:NodeShaderInfo> = { +// CHECK: LaunchType: Broadcasting +// CHECK: GroupSharedBytesUsed: 0 +// CHECK: Attribs: <8:RecordArrayRef[3]> = { +// CHECK: [0]: <0:NodeShaderFuncAttrib> = { +// CHECK: AttribKind: ID +// CHECK: ID: <0:NodeID> = { +// CHECK: Name: "depth18part0_wg_63_nodes_seed_255" +// CHECK: Index: 0 +// CHECK: } +// CHECK: } +// CHECK: [1]: <1:NodeShaderFuncAttrib> = { +// CHECK: AttribKind: NumThreads +// CHECK: NumThreads: <0:array[3]> = { 25, 4, 1 } +// CHECK: } +// CHECK: [2]: <2:NodeShaderFuncAttrib> = { +// CHECK: AttribKind: DispatchGrid +// CHECK: DispatchGrid: <4:array[3]> = { 2, 8, 10 } +// CHECK: } +// CHECK: } +// CHECK: Outputs: <21:RecordArrayRef[1]> = { +// CHECK: [0]: <1:IONode> = { +// CHECK: IOFlagsAndKind: 6 +// CHECK: Attribs: <14:RecordArrayRef[6]> = { +// CHECK: [0]: <1:NodeShaderIOAttrib> = { +// CHECK: AttribKind: OutputID +// CHECK: OutputID: <1:NodeID> = { +// CHECK: Name: "OutputyMcOutputFace" +// CHECK: Index: 0 +// CHECK: } +// CHECK: } +// CHECK: [1]: <2:NodeShaderIOAttrib> = { +// CHECK: AttribKind: OutputArraySize +// CHECK: OutputArraySize: 2 +// CHECK: } +// CHECK: [2]: <3:NodeShaderIOAttrib> = { +// CHECK: AttribKind: MaxRecords +// CHECK: MaxRecords: 47 +// CHECK: } +// CHECK: [3]: <4:NodeShaderIOAttrib> = { +// CHECK: AttribKind: AllowSparseNodes +// CHECK: AllowSparseNodes: 1 +// CHECK: } +// CHECK: [4]: <5:NodeShaderIOAttrib> = { +// CHECK: AttribKind: RecordSizeInBytes +// CHECK: RecordSizeInBytes: 20 +// CHECK: } +// CHECK: [5]: <6:NodeShaderIOAttrib> = { +// CHECK: AttribKind: RecordDispatchGrid +// CHECK: RecordDispatchGrid: +// CHECK: ByteOffset: 8 +// CHECK: ComponentNumAndType: 23 +// CHECK: } +// CHECK: } +// CHECK: } +// CHECK: } +// CHECK: Inputs: <12:RecordArrayRef[1]> = { +// CHECK: [0]: <0:IONode> = { +// CHECK: IOFlagsAndKind: 97 +// CHECK: Attribs: <12:RecordArrayRef[1]> = { +// CHECK: [0]: <0:NodeShaderIOAttrib> = { +// CHECK: AttribKind: RecordSizeInBytes +// CHECK: RecordSizeInBytes: 8 +// CHECK: } +// CHECK: } +// CHECK: } +// CHECK: } +// CHECK: } +// CHECK: } +// CHECK:ID3D12LibraryReflection: +// CHECK: D3D12_LIBRARY_DESC: +// CHECK: Creator: +// CHECK: Flags: 0 +// CHECK: FunctionCount: 1 +// CHECK: ID3D12FunctionReflection: +// CHECK: D3D12_FUNCTION_DESC: Name: depth18part0_wg_63_nodes_seed_255 +// CHECK: Shader Version: 6.8 +// CHECK: Creator: +// CHECK: Flags: 0 +// CHECK: ConstantBuffers: 0 +// CHECK: BoundResources: 0 +// CHECK: FunctionParameterCount: 0 +// CHECK: HasReturn: FALSE + +struct rec0 +{ + int i0; + float f0; +}; + +struct rec1 +{ + float f1; + int i1; + uint3 dg : SV_DispatchGrid; +}; + + +[Shader("node")] +[NodeLaunch("Broadcasting")] +[NumThreads(25, 4, 1)] +[NodeDispatchGrid(2, 8, 10)] +export void depth18part0_wg_63_nodes_seed_255( + DispatchNodeInputRecord InputyMcInputFace, + [MaxRecords(47)] [AllowSparseNodes] [NodeArraySize(2)] NodeOutput OutputyMcOutputFace[2]) +{ +} diff --git a/tools/clang/test/HLSLFileCheck/d3dreflect/empty_thread_nodes.hlsl b/tools/clang/test/HLSLFileCheck/d3dreflect/empty_thread_nodes.hlsl new file mode 100644 index 0000000000..f6ba365102 --- /dev/null +++ b/tools/clang/test/HLSLFileCheck/d3dreflect/empty_thread_nodes.hlsl @@ -0,0 +1,337 @@ +// RUN: %dxc -T lib_6_8 %s | %D3DReflect %s | FileCheck %s + + +// CHECK:DxilRuntimeData (size = 472 bytes): +// CHECK: StringBuffer (size = 32 bytes) +// CHECK: IndexTable (size = 92 bytes) +// CHECK: RawBytes (size = 0 bytes) +// CHECK: RecordTable (stride = 56 bytes) FunctionTable[1] = { +// CHECK: <0:RuntimeDataFunctionInfo3> = { +// CHECK: Name: "Input2Output" +// CHECK: UnmangledName: "Input2Output" +// CHECK: Resources: [0]> = {} +// CHECK: FunctionDependencies: = {} +// CHECK: ShaderKind: Node +// CHECK: PayloadSizeInBytes: 0 +// CHECK: AttributeSizeInBytes: 0 +// CHECK: FeatureInfo1: 0 +// CHECK: FeatureInfo2: 0 +// CHECK: ShaderStageFlag: 32768 +// CHECK: MinShaderTarget: 983136 +// CHECK: MinimumExpectedWaveLaneCount: 0 +// CHECK: MaximumExpectedWaveLaneCount: 0 +// CHECK: ShaderFlags: 0 (None) +// CHECK: Node: <0:NodeShaderInfo> = { +// CHECK: LaunchType: Thread +// CHECK: GroupSharedBytesUsed: 0 +// CHECK: Attribs: <4:RecordArrayRef[3]> = { +// CHECK: [0]: <0:NodeShaderFuncAttrib> = { +// CHECK: AttribKind: ID +// CHECK: ID: <0:NodeID> = { +// CHECK: Name: "Input2Output" +// CHECK: Index: 0 +// CHECK: } +// CHECK: } +// CHECK: [1]: <1:NodeShaderFuncAttrib> = { +// CHECK: AttribKind: NumThreads +// CHECK: NumThreads: <0:array[3]> = { 1, 1, 1 } +// CHECK: } +// CHECK: [2]: <2:NodeShaderFuncAttrib> = { +// CHECK: AttribKind: LocalRootArgumentsTableIndex +// CHECK: LocalRootArgumentsTableIndex: 2 +// CHECK: } +// CHECK: } +// CHECK: Outputs: <20:RecordArrayRef[2]> = { +// CHECK: [0]: <1:IONode> = { +// CHECK: IOFlagsAndKind: 262 +// CHECK: Attribs: <10:RecordArrayRef[4]> = { +// CHECK: [0]: <1:NodeShaderIOAttrib> = { +// CHECK: AttribKind: OutputID +// CHECK: OutputID: <1:NodeID> = { +// CHECK: Name: "Output1" +// CHECK: Index: 0 +// CHECK: } +// CHECK: } +// CHECK: [1]: <2:NodeShaderIOAttrib> = { +// CHECK: AttribKind: OutputArraySize +// CHECK: OutputArraySize: 0 +// CHECK: } +// CHECK: [2]: <3:NodeShaderIOAttrib> = { +// CHECK: AttribKind: MaxRecordsSharedWith +// CHECK: MaxRecordsSharedWith: 1 +// CHECK: } +// CHECK: [3]: <0:NodeShaderIOAttrib> = { +// CHECK: AttribKind: RecordSizeInBytes +// CHECK: RecordSizeInBytes: 8 +// CHECK: } +// CHECK: } +// CHECK: } +// CHECK: [1]: <2:IONode> = { +// CHECK: IOFlagsAndKind: 6 +// CHECK: Attribs: <15:RecordArrayRef[4]> = { +// CHECK: [0]: <4:NodeShaderIOAttrib> = { +// CHECK: AttribKind: OutputID +// CHECK: OutputID: <2:NodeID> = { +// CHECK: Name: "Output2ID" +// CHECK: Index: 1 +// CHECK: } +// CHECK: } +// CHECK: [1]: <2:NodeShaderIOAttrib> +// CHECK: [2]: <5:NodeShaderIOAttrib> = { +// CHECK: AttribKind: MaxRecords +// CHECK: MaxRecords: 5 +// CHECK: } +// CHECK: [3]: <0:NodeShaderIOAttrib> +// CHECK: } +// CHECK: } +// CHECK: } +// CHECK: Inputs: <8:RecordArrayRef[1]> = { +// CHECK: [0]: <0:IONode> = { +// CHECK: IOFlagsAndKind: 37 +// CHECK: Attribs: <8:RecordArrayRef[1]> = { +// CHECK: [0]: <0:NodeShaderIOAttrib> +// CHECK: } +// CHECK: } +// CHECK: } +// CHECK: } +// CHECK: } +// CHECK: } +// CHECK: RecordTable (stride = 8 bytes) NodeIDTable[3] = { +// CHECK: <0:NodeID> = { +// CHECK: Name: "Input2Output" +// CHECK: Index: 0 +// CHECK: } +// CHECK: <1:NodeID> = { +// CHECK: Name: "Output1" +// CHECK: Index: 0 +// CHECK: } +// CHECK: <2:NodeID> = { +// CHECK: Name: "Output2ID" +// CHECK: Index: 1 +// CHECK: } +// CHECK: } +// CHECK: RecordTable (stride = 8 bytes) NodeShaderFuncAttribTable[3] = { +// CHECK: <0:NodeShaderFuncAttrib> = { +// CHECK: AttribKind: ID +// CHECK: ID: <0:NodeID> = { +// CHECK: Name: "Input2Output" +// CHECK: Index: 0 +// CHECK: } +// CHECK: } +// CHECK: <1:NodeShaderFuncAttrib> = { +// CHECK: AttribKind: NumThreads +// CHECK: NumThreads: <0:array[3]> = { 1, 1, 1 } +// CHECK: } +// CHECK: <2:NodeShaderFuncAttrib> = { +// CHECK: AttribKind: LocalRootArgumentsTableIndex +// CHECK: LocalRootArgumentsTableIndex: 2 +// CHECK: } +// CHECK: } +// CHECK: RecordTable (stride = 8 bytes) NodeShaderIOAttribTable[6] = { +// CHECK: <0:NodeShaderIOAttrib> = { +// CHECK: AttribKind: RecordSizeInBytes +// CHECK: RecordSizeInBytes: 8 +// CHECK: } +// CHECK: <1:NodeShaderIOAttrib> = { +// CHECK: AttribKind: OutputID +// CHECK: OutputID: <1:NodeID> = { +// CHECK: Name: "Output1" +// CHECK: Index: 0 +// CHECK: } +// CHECK: } +// CHECK: <2:NodeShaderIOAttrib> = { +// CHECK: AttribKind: OutputArraySize +// CHECK: OutputArraySize: 0 +// CHECK: } +// CHECK: <3:NodeShaderIOAttrib> = { +// CHECK: AttribKind: MaxRecordsSharedWith +// CHECK: MaxRecordsSharedWith: 1 +// CHECK: } +// CHECK: <4:NodeShaderIOAttrib> = { +// CHECK: AttribKind: OutputID +// CHECK: OutputID: <2:NodeID> = { +// CHECK: Name: "Output2ID" +// CHECK: Index: 1 +// CHECK: } +// CHECK: } +// CHECK: <5:NodeShaderIOAttrib> = { +// CHECK: AttribKind: MaxRecords +// CHECK: MaxRecords: 5 +// CHECK: } +// CHECK: } +// CHECK: RecordTable (stride = 8 bytes) IONodeTable[3] = { +// CHECK: <0:IONode> = { +// CHECK: IOFlagsAndKind: 37 +// CHECK: Attribs: <8:RecordArrayRef[1]> = { +// CHECK: [0]: <0:NodeShaderIOAttrib> = { +// CHECK: AttribKind: RecordSizeInBytes +// CHECK: RecordSizeInBytes: 8 +// CHECK: } +// CHECK: } +// CHECK: } +// CHECK: <1:IONode> = { +// CHECK: IOFlagsAndKind: 262 +// CHECK: Attribs: <10:RecordArrayRef[4]> = { +// CHECK: [0]: <1:NodeShaderIOAttrib> = { +// CHECK: AttribKind: OutputID +// CHECK: OutputID: <1:NodeID> = { +// CHECK: Name: "Output1" +// CHECK: Index: 0 +// CHECK: } +// CHECK: } +// CHECK: [1]: <2:NodeShaderIOAttrib> = { +// CHECK: AttribKind: OutputArraySize +// CHECK: OutputArraySize: 0 +// CHECK: } +// CHECK: [2]: <3:NodeShaderIOAttrib> = { +// CHECK: AttribKind: MaxRecordsSharedWith +// CHECK: MaxRecordsSharedWith: 1 +// CHECK: } +// CHECK: [3]: <0:NodeShaderIOAttrib> = { +// CHECK: AttribKind: RecordSizeInBytes +// CHECK: RecordSizeInBytes: 8 +// CHECK: } +// CHECK: } +// CHECK: } +// CHECK: <2:IONode> = { +// CHECK: IOFlagsAndKind: 6 +// CHECK: Attribs: <15:RecordArrayRef[4]> = { +// CHECK: [0]: <4:NodeShaderIOAttrib> = { +// CHECK: AttribKind: OutputID +// CHECK: OutputID: <2:NodeID> = { +// CHECK: Name: "Output2ID" +// CHECK: Index: 1 +// CHECK: } +// CHECK: } +// CHECK: [1]: <2:NodeShaderIOAttrib> = { +// CHECK: AttribKind: OutputArraySize +// CHECK: OutputArraySize: 0 +// CHECK: } +// CHECK: [2]: <5:NodeShaderIOAttrib> = { +// CHECK: AttribKind: MaxRecords +// CHECK: MaxRecords: 5 +// CHECK: } +// CHECK: [3]: <0:NodeShaderIOAttrib> = { +// CHECK: AttribKind: RecordSizeInBytes +// CHECK: RecordSizeInBytes: 8 +// CHECK: } +// CHECK: } +// CHECK: } +// CHECK: } +// CHECK: RecordTable (stride = 20 bytes) NodeShaderInfoTable[1] = { +// CHECK: <0:NodeShaderInfo> = { +// CHECK: LaunchType: Thread +// CHECK: GroupSharedBytesUsed: 0 +// CHECK: Attribs: <4:RecordArrayRef[3]> = { +// CHECK: [0]: <0:NodeShaderFuncAttrib> = { +// CHECK: AttribKind: ID +// CHECK: ID: <0:NodeID> = { +// CHECK: Name: "Input2Output" +// CHECK: Index: 0 +// CHECK: } +// CHECK: } +// CHECK: [1]: <1:NodeShaderFuncAttrib> = { +// CHECK: AttribKind: NumThreads +// CHECK: NumThreads: <0:array[3]> = { 1, 1, 1 } +// CHECK: } +// CHECK: [2]: <2:NodeShaderFuncAttrib> = { +// CHECK: AttribKind: LocalRootArgumentsTableIndex +// CHECK: LocalRootArgumentsTableIndex: 2 +// CHECK: } +// CHECK: } +// CHECK: Outputs: <20:RecordArrayRef[2]> = { +// CHECK: [0]: <1:IONode> = { +// CHECK: IOFlagsAndKind: 262 +// CHECK: Attribs: <10:RecordArrayRef[4]> = { +// CHECK: [0]: <1:NodeShaderIOAttrib> = { +// CHECK: AttribKind: OutputID +// CHECK: OutputID: <1:NodeID> = { +// CHECK: Name: "Output1" +// CHECK: Index: 0 +// CHECK: } +// CHECK: } +// CHECK: [1]: <2:NodeShaderIOAttrib> = { +// CHECK: AttribKind: OutputArraySize +// CHECK: OutputArraySize: 0 +// CHECK: } +// CHECK: [2]: <3:NodeShaderIOAttrib> = { +// CHECK: AttribKind: MaxRecordsSharedWith +// CHECK: MaxRecordsSharedWith: 1 +// CHECK: } +// CHECK: [3]: <0:NodeShaderIOAttrib> = { +// CHECK: AttribKind: RecordSizeInBytes +// CHECK: RecordSizeInBytes: 8 +// CHECK: } +// CHECK: } +// CHECK: } +// CHECK: [1]: <2:IONode> = { +// CHECK: IOFlagsAndKind: 6 +// CHECK: Attribs: <15:RecordArrayRef[4]> = { +// CHECK: [0]: <4:NodeShaderIOAttrib> = { +// CHECK: AttribKind: OutputID +// CHECK: OutputID: <2:NodeID> = { +// CHECK: Name: "Output2ID" +// CHECK: Index: 1 +// CHECK: } +// CHECK: } +// CHECK: [1]: <2:NodeShaderIOAttrib> +// CHECK: [2]: <5:NodeShaderIOAttrib> = { +// CHECK: AttribKind: MaxRecords +// CHECK: MaxRecords: 5 +// CHECK: } +// CHECK: [3]: <0:NodeShaderIOAttrib> +// CHECK: } +// CHECK: } +// CHECK: } +// CHECK: Inputs: <8:RecordArrayRef[1]> = { +// CHECK: [0]: <0:IONode> = { +// CHECK: IOFlagsAndKind: 37 +// CHECK: Attribs: <8:RecordArrayRef[1]> = { +// CHECK: [0]: <0:NodeShaderIOAttrib> +// CHECK: } +// CHECK: } +// CHECK: } +// CHECK: } +// CHECK: } +// CHECK:ID3D12LibraryReflection: +// CHECK: D3D12_LIBRARY_DESC: +// FIXME: Creator: +// CHECK: Flags: 0 +// CHECK: FunctionCount: 1 +// CHECK: ID3D12FunctionReflection: +// CHECK: D3D12_FUNCTION_DESC: Name: Input2Output +// FIXME: Shader Version: 6.8 +// FIXME: Creator: +// CHECK: Flags: 0 +// CHECK: ConstantBuffers: 0 +// CHECK: BoundResources: 0 +// CHECK: FunctionParameterCount: 0 +// CHECK: HasReturn: FALSE + +struct rec0 +{ + int i0; + float f0; +}; + +struct rec1 +{ + float f1; + int i1; +}; + +struct [NodeTrackRWInputSharing] rec2 +{ + float f1; + int i1; +}; + +[Shader("node")] +[NodeLaunch("Thread")] +[NodeLocalRootArgumentsTableIndex(2)] +void Input2Output( + RWThreadNodeInputRecord Inputy, + [MaxRecordsSharedWith(Output2)] NodeOutput Output1, + [NodeID("Output2ID",1)] [MaxRecords(5)] NodeOutput Output2) +{ +} diff --git a/tools/clang/test/HLSLFileCheck/d3dreflect/lib_exports1.hlsl b/tools/clang/test/HLSLFileCheck/d3dreflect/lib_exports1.hlsl index 69b2d9336b..de58376a61 100644 --- a/tools/clang/test/HLSLFileCheck/d3dreflect/lib_exports1.hlsl +++ b/tools/clang/test/HLSLFileCheck/d3dreflect/lib_exports1.hlsl @@ -21,9 +21,9 @@ float4 PSMain(int idx : INDEX) : SV_Target { return T2[T0.Load(idx)].f; } -// CHECK: DxilRuntimeData (size = 444 bytes): -// CHECK: StringBuffer (size = 124 bytes) -// CHECK: IndexTable (size = 20 bytes) +// CHECK: DxilRuntimeData (size = 584 bytes): +// CHECK: StringBuffer (size = 140 bytes) +// CHECK: IndexTable (size = 28 bytes) // CHECK: RawBytes (size = 0 bytes) // CHECK: RecordTable (stride = 32 bytes) ResourceTable[3] = { // CHECK: <0:RuntimeDataResourceInfo> = { @@ -57,8 +57,8 @@ float4 PSMain(int idx : INDEX) : SV_Target { // CHECK: Flags: 0 (None) // CHECK: } // CHECK: } -// CHECK: RecordTable (stride = 44 bytes) FunctionTable[3] = { -// CHECK: <0:RuntimeDataFunctionInfo> = { +// CHECK: RecordTable (stride = 56 bytes) FunctionTable[3] = { +// CHECK: <0:RuntimeDataFunctionInfo3> = { // CHECK: Name: "\01?VS_RENAMED{{[@$?.A-Za-z0-9_]+}}" // CHECK: UnmangledName: "VS_RENAMED" // CHECK: Resources: <0:RecordArrayRef[1]> = { @@ -72,8 +72,11 @@ float4 PSMain(int idx : INDEX) : SV_Target { // CHECK: FeatureInfo2: 0 // CHECK: ShaderStageFlag: 32767 // CHECK: MinShaderTarget: 393312 +// CHECK: MinimumExpectedWaveLaneCount: 0 +// CHECK: MaximumExpectedWaveLaneCount: 0 +// CHECK: ShaderFlags: 0 (None) // CHECK: } -// CHECK: <1:RuntimeDataFunctionInfo> = { +// CHECK: <1:RuntimeDataFunctionInfo3> = { // CHECK: Name: "\01?PS_RENAMED{{[@$?.A-Za-z0-9_]+}}" // CHECK: UnmangledName: "PS_RENAMED" // CHECK: Resources: <2:RecordArrayRef[2]> = { @@ -88,8 +91,11 @@ float4 PSMain(int idx : INDEX) : SV_Target { // CHECK: FeatureInfo2: 0 // CHECK: ShaderStageFlag: 32767 // CHECK: MinShaderTarget: 393312 +// CHECK: MinimumExpectedWaveLaneCount: 0 +// CHECK: MaximumExpectedWaveLaneCount: 0 +// CHECK: ShaderFlags: 0 (None) // CHECK: } -// CHECK: <2:RuntimeDataFunctionInfo> = { +// CHECK: <2:RuntimeDataFunctionInfo3> = { // CHECK: Name: "PS_RENAMED" // CHECK: UnmangledName: "PS_RENAMED" // CHECK: Resources: <2:RecordArrayRef[2]> = { @@ -104,6 +110,42 @@ float4 PSMain(int idx : INDEX) : SV_Target { // CHECK: FeatureInfo2: 0 // CHECK: ShaderStageFlag: 1 // CHECK: MinShaderTarget: 96 +// CHECK: MinimumExpectedWaveLaneCount: 0 +// CHECK: MaximumExpectedWaveLaneCount: 0 +// CHECK: ShaderFlags: 0 (None) +// CHECK: PS: <0:PSInfo> +// CHECK: } +// CHECK: } +// CHECK: RecordTable (stride = 16 bytes) SignatureElementTable[2] = { +// CHECK: <0:SignatureElement> = { +// CHECK: SemanticName: "INDEX" +// CHECK: SemanticIndices: <5:array[1]> = { 0 } +// CHECK: SemanticKind: Arbitrary +// CHECK: ComponentType: I32 +// CHECK: InterpolationMode: Constant +// CHECK: StartRow: 0 +// CHECK: ColsAndStream: 0 +// CHECK: UsageAndDynIndexMasks: 0 +// CHECK: } +// CHECK: <1:SignatureElement> = { +// CHECK: SemanticName: "SV_Target" +// CHECK: SemanticIndices: <5:array[1]> = { 0 } +// CHECK: SemanticKind: Target +// CHECK: ComponentType: F32 +// CHECK: InterpolationMode: Undefined +// CHECK: StartRow: 0 +// CHECK: ColsAndStream: 3 +// CHECK: UsageAndDynIndexMasks: 0 +// CHECK: } +// CHECK: } +// CHECK: RecordTable (stride = 8 bytes) PSInfoTable[1] = { +// CHECK: <0:PSInfo> = { +// CHECK: SigInputElements: <5:RecordArrayRef[1]> = { +// CHECK: [0]: <0:SignatureElement> +// CHECK: } +// CHECK: SigOutputElements: <0:RecordArrayRef[1]> = { +// CHECK: [0]: <1:SignatureElement> +// CHECK: } // CHECK: } // CHECK: } diff --git a/tools/clang/test/HLSLFileCheck/d3dreflect/lib_exports2.hlsl b/tools/clang/test/HLSLFileCheck/d3dreflect/lib_exports2.hlsl index 648018c0c7..9fd81eed1e 100644 --- a/tools/clang/test/HLSLFileCheck/d3dreflect/lib_exports2.hlsl +++ b/tools/clang/test/HLSLFileCheck/d3dreflect/lib_exports2.hlsl @@ -26,8 +26,8 @@ void RayGen() { U0.Store(idx.y * dim.x * 4 + idx.x * 4, idx.x ^ idx.y); } -// CHECK: DxilRuntimeData (size = 456 bytes): -// CHECK: StringBuffer (size = 128 bytes) +// CHECK: DxilRuntimeData (size = 612 bytes): +// CHECK: StringBuffer (size = 148 bytes) // CHECK: IndexTable (size = 16 bytes) // CHECK: RawBytes (size = 0 bytes) // CHECK: RecordTable (stride = 32 bytes) ResourceTable[2] = { @@ -52,8 +52,8 @@ void RayGen() { // CHECK: Flags: 0 (None) // CHECK: } // CHECK: } -// CHECK: RecordTable (stride = 44 bytes) FunctionTable[4] = { -// CHECK: <0:RuntimeDataFunctionInfo> = { +// CHECK: RecordTable (stride = 56 bytes) FunctionTable[4] = { +// CHECK: <0:RuntimeDataFunctionInfo3> = { // CHECK: Name: "\01?RayGen1{{[@$?.A-Za-z0-9_]+}}" // CHECK: UnmangledName: "RayGen1" // CHECK: Resources: <0:RecordArrayRef[1]> = { @@ -67,8 +67,11 @@ void RayGen() { // CHECK: FeatureInfo2: 0 // CHECK: ShaderStageFlag: 128 // CHECK: MinShaderTarget: 458851 +// CHECK: MinimumExpectedWaveLaneCount: 0 +// CHECK: MaximumExpectedWaveLaneCount: 0 +// CHECK: ShaderFlags: 0 (None) // CHECK: } -// CHECK: <1:RuntimeDataFunctionInfo> = { +// CHECK: <1:RuntimeDataFunctionInfo3> = { // CHECK: Name: "\01?VS_RENAMED{{[@$?.A-Za-z0-9_]+}}" // CHECK: UnmangledName: "VS_RENAMED" // CHECK: Resources: <2:RecordArrayRef[1]> = { @@ -82,8 +85,11 @@ void RayGen() { // CHECK: FeatureInfo2: 0 // CHECK: ShaderStageFlag: 32767 // CHECK: MinShaderTarget: 393312 +// CHECK: MinimumExpectedWaveLaneCount: 0 +// CHECK: MaximumExpectedWaveLaneCount: 0 +// CHECK: ShaderFlags: 0 (None) // CHECK: } -// CHECK: <2:RuntimeDataFunctionInfo> = { +// CHECK: <2:RuntimeDataFunctionInfo3> = { // CHECK: Name: "\01?RayGen2{{[@$?.A-Za-z0-9_]+}}" // CHECK: UnmangledName: "RayGen2" // CHECK: Resources: <0:RecordArrayRef[1]> = { @@ -97,8 +103,11 @@ void RayGen() { // CHECK: FeatureInfo2: 0 // CHECK: ShaderStageFlag: 128 // CHECK: MinShaderTarget: 458851 +// CHECK: MinimumExpectedWaveLaneCount: 0 +// CHECK: MaximumExpectedWaveLaneCount: 0 +// CHECK: ShaderFlags: 0 (None) // CHECK: } -// CHECK: <3:RuntimeDataFunctionInfo> = { +// CHECK: <3:RuntimeDataFunctionInfo3> = { // CHECK: Name: "VSMain" // CHECK: UnmangledName: "VSMain" // CHECK: Resources: <2:RecordArrayRef[1]> = { @@ -112,6 +121,43 @@ void RayGen() { // CHECK: FeatureInfo2: 0 // CHECK: ShaderStageFlag: 2 // CHECK: MinShaderTarget: 65632 +// CHECK: MinimumExpectedWaveLaneCount: 0 +// CHECK: MaximumExpectedWaveLaneCount: 0 +// CHECK: ShaderFlags: (OutputPositionPresent) +// CHECK: VS: <0:VSInfo> +// CHECK: } +// CHECK: } +// CHECK: RecordTable (stride = 16 bytes) SignatureElementTable[2] = { +// CHECK: <0:SignatureElement> = { +// CHECK: SemanticName: "COORD" +// CHECK: SemanticIndices: <2:array[1]> = { 0 } +// CHECK: SemanticKind: Arbitrary +// CHECK: ComponentType: I32 +// CHECK: InterpolationMode: Undefined +// CHECK: StartRow: 0 +// CHECK: ColsAndStream: 2 +// CHECK: UsageAndDynIndexMasks: 0 +// CHECK: } +// CHECK: <1:SignatureElement> = { +// CHECK: SemanticName: "SV_Position" +// CHECK: SemanticIndices: <2:array[1]> = { 0 } +// CHECK: SemanticKind: Position +// CHECK: ComponentType: F32 +// CHECK: InterpolationMode: LinearNoperspective +// CHECK: StartRow: 0 +// CHECK: ColsAndStream: 3 +// CHECK: UsageAndDynIndexMasks: 0 +// CHECK: } +// CHECK: } +// CHECK: RecordTable (stride = 16 bytes) VSInfoTable[1] = { +// CHECK: <0:VSInfo> = { +// CHECK: SigInputElements: <2:RecordArrayRef[1]> = { +// CHECK: [0]: <0:SignatureElement> +// CHECK: } +// CHECK: SigOutputElements: <0:RecordArrayRef[1]> = { +// CHECK: [0]: <1:SignatureElement> +// CHECK: } +// CHECK: ViewIDOutputMask: <0:bytes[0]> // CHECK: } // CHECK: } diff --git a/tools/clang/test/HLSLFileCheck/d3dreflect/lib_exports3.hlsl b/tools/clang/test/HLSLFileCheck/d3dreflect/lib_exports3.hlsl index 3dce94c18b..480b9e72ae 100644 --- a/tools/clang/test/HLSLFileCheck/d3dreflect/lib_exports3.hlsl +++ b/tools/clang/test/HLSLFileCheck/d3dreflect/lib_exports3.hlsl @@ -17,9 +17,9 @@ float4 PSMain(int idx : INDEX) : SV_Target { return T2[T0.Load(idx)].f; } -// CHECK: DxilRuntimeData (size = 572 bytes): -// CHECK: StringBuffer (size = 160 bytes) -// CHECK: IndexTable (size = 12 bytes) +// CHECK: DxilRuntimeData (size = 756 bytes): +// CHECK: StringBuffer (size = 176 bytes) +// CHECK: IndexTable (size = 28 bytes) // CHECK: RawBytes (size = 0 bytes) // CHECK: RecordTable (stride = 32 bytes) ResourceTable[2] = { // CHECK: <0:RuntimeDataResourceInfo> = { @@ -43,8 +43,8 @@ float4 PSMain(int idx : INDEX) : SV_Target { // CHECK: Flags: 0 (None) // CHECK: } // CHECK: } -// CHECK: RecordTable (stride = 44 bytes) FunctionTable[6] = { -// CHECK: <0:RuntimeDataFunctionInfo> = { +// CHECK: RecordTable (stride = 56 bytes) FunctionTable[6] = { +// CHECK: <0:RuntimeDataFunctionInfo3> = { // CHECK: Name: "\01?PSMain{{[@$?.A-Za-z0-9_]+}}" // CHECK: UnmangledName: "PSMain" // CHECK: Resources: <0:RecordArrayRef[2]> = { @@ -59,8 +59,11 @@ float4 PSMain(int idx : INDEX) : SV_Target { // CHECK: FeatureInfo2: 0 // CHECK: ShaderStageFlag: 32767 // CHECK: MinShaderTarget: 393312 +// CHECK: MinimumExpectedWaveLaneCount: 0 +// CHECK: MaximumExpectedWaveLaneCount: 0 +// CHECK: ShaderFlags: 0 (None) // CHECK: } -// CHECK: <1:RuntimeDataFunctionInfo> = { +// CHECK: <1:RuntimeDataFunctionInfo3> = { // CHECK: Name: "\01?PSMain_Clone1{{[@$?.A-Za-z0-9_]+}}" // CHECK: UnmangledName: "PSMain_Clone1" // CHECK: Resources: <0:RecordArrayRef[2]> = { @@ -75,8 +78,11 @@ float4 PSMain(int idx : INDEX) : SV_Target { // CHECK: FeatureInfo2: 0 // CHECK: ShaderStageFlag: 32767 // CHECK: MinShaderTarget: 393312 +// CHECK: MinimumExpectedWaveLaneCount: 0 +// CHECK: MaximumExpectedWaveLaneCount: 0 +// CHECK: ShaderFlags: 0 (None) // CHECK: } -// CHECK: <2:RuntimeDataFunctionInfo> = { +// CHECK: <2:RuntimeDataFunctionInfo3> = { // CHECK: Name: "\01?PSMain_Clone2{{[@$?.A-Za-z0-9_]+}}" // CHECK: UnmangledName: "PSMain_Clone2" // CHECK: Resources: <0:RecordArrayRef[2]> = { @@ -91,8 +97,11 @@ float4 PSMain(int idx : INDEX) : SV_Target { // CHECK: FeatureInfo2: 0 // CHECK: ShaderStageFlag: 32767 // CHECK: MinShaderTarget: 393312 +// CHECK: MinimumExpectedWaveLaneCount: 0 +// CHECK: MaximumExpectedWaveLaneCount: 0 +// CHECK: ShaderFlags: 0 (None) // CHECK: } -// CHECK: <3:RuntimeDataFunctionInfo> = { +// CHECK: <3:RuntimeDataFunctionInfo3> = { // CHECK: Name: "PSMain" // CHECK: UnmangledName: "PSMain" // CHECK: Resources: <0:RecordArrayRef[2]> = { @@ -107,8 +116,19 @@ float4 PSMain(int idx : INDEX) : SV_Target { // CHECK: FeatureInfo2: 0 // CHECK: ShaderStageFlag: 1 // CHECK: MinShaderTarget: 96 +// CHECK: MinimumExpectedWaveLaneCount: 0 +// CHECK: MaximumExpectedWaveLaneCount: 0 +// CHECK: ShaderFlags: 0 (None) +// CHECK: PS: <0:PSInfo> = { +// CHECK: SigInputElements: <3:RecordArrayRef[1]> = { +// CHECK: [0]: <0:SignatureElement> +// CHECK: } +// CHECK: SigOutputElements: <5:RecordArrayRef[1]> = { +// CHECK: [0]: <1:SignatureElement> +// CHECK: } +// CHECK: } // CHECK: } -// CHECK: <4:RuntimeDataFunctionInfo> = { +// CHECK: <4:RuntimeDataFunctionInfo3> = { // CHECK: Name: "PSMain_Clone1" // CHECK: UnmangledName: "PSMain_Clone1" // CHECK: Resources: <0:RecordArrayRef[2]> = { @@ -123,8 +143,12 @@ float4 PSMain(int idx : INDEX) : SV_Target { // CHECK: FeatureInfo2: 0 // CHECK: ShaderStageFlag: 1 // CHECK: MinShaderTarget: 96 +// CHECK: MinimumExpectedWaveLaneCount: 0 +// CHECK: MaximumExpectedWaveLaneCount: 0 +// CHECK: ShaderFlags: 0 (None) +// CHECK: PS: <0:PSInfo> // CHECK: } -// CHECK: <5:RuntimeDataFunctionInfo> = { +// CHECK: <5:RuntimeDataFunctionInfo3> = { // CHECK: Name: "PSMain_Clone2" // CHECK: UnmangledName: "PSMain_Clone2" // CHECK: Resources: <0:RecordArrayRef[2]> = { @@ -139,6 +163,42 @@ float4 PSMain(int idx : INDEX) : SV_Target { // CHECK: FeatureInfo2: 0 // CHECK: ShaderStageFlag: 1 // CHECK: MinShaderTarget: 96 +// CHECK: MinimumExpectedWaveLaneCount: 0 +// CHECK: MaximumExpectedWaveLaneCount: 0 +// CHECK: ShaderFlags: 0 (None) +// CHECK: PS: <0:PSInfo> +// CHECK: } +// CHECK: } +// CHECK: RecordTable (stride = 16 bytes) SignatureElementTable[2] = { +// CHECK: <0:SignatureElement> = { +// CHECK: SemanticName: "INDEX" +// CHECK: SemanticIndices: <3:array[1]> = { 0 } +// CHECK: SemanticKind: Arbitrary +// CHECK: ComponentType: I32 +// CHECK: InterpolationMode: Constant +// CHECK: StartRow: 0 +// CHECK: ColsAndStream: 0 +// CHECK: UsageAndDynIndexMasks: 0 +// CHECK: } +// CHECK: <1:SignatureElement> = { +// CHECK: SemanticName: "SV_Target" +// CHECK: SemanticIndices: <3:array[1]> = { 0 } +// CHECK: SemanticKind: Target +// CHECK: ComponentType: F32 +// CHECK: InterpolationMode: Undefined +// CHECK: StartRow: 0 +// CHECK: ColsAndStream: 3 +// CHECK: UsageAndDynIndexMasks: 0 +// CHECK: } +// CHECK: } +// CHECK: RecordTable (stride = 8 bytes) PSInfoTable[1] = { +// CHECK: <0:PSInfo> = { +// CHECK: SigInputElements: <3:RecordArrayRef[1]> = { +// CHECK: [0]: <0:SignatureElement> +// CHECK: } +// CHECK: SigOutputElements: <5:RecordArrayRef[1]> = { +// CHECK: [0]: <1:SignatureElement> +// CHECK: } // CHECK: } // CHECK: } diff --git a/tools/clang/test/HLSLFileCheck/d3dreflect/lib_hs_export2.hlsl b/tools/clang/test/HLSLFileCheck/d3dreflect/lib_hs_export2.hlsl index 14c79df2f2..2c96419772 100644 --- a/tools/clang/test/HLSLFileCheck/d3dreflect/lib_hs_export2.hlsl +++ b/tools/clang/test/HLSLFileCheck/d3dreflect/lib_hs_export2.hlsl @@ -90,12 +90,12 @@ HSPerPatchData HSPerPatchFunc1() return d; } -// CHECK: DxilRuntimeData (size = 436 bytes): -// CHECK: StringBuffer (size = 176 bytes) -// CHECK: IndexTable (size = 0 bytes) +// CHECK: DxilRuntimeData (size = 824 bytes): +// CHECK: StringBuffer (size = 236 bytes) +// CHECK: IndexTable (size = 40 bytes) // CHECK: RawBytes (size = 0 bytes) -// CHECK: RecordTable (stride = 44 bytes) FunctionTable[5] = { -// CHECK: <0:RuntimeDataFunctionInfo> = { +// CHECK: RecordTable (stride = 56 bytes) FunctionTable[5] = { +// CHECK: <0:RuntimeDataFunctionInfo3> = { // CHECK: Name: "\01?HSMain1{{[@$?.A-Za-z0-9_]+}}" // CHECK: UnmangledName: "HSMain1" // CHECK: Resources: [0]> = {} @@ -107,8 +107,11 @@ HSPerPatchData HSPerPatchFunc1() // CHECK: FeatureInfo2: 0 // CHECK: ShaderStageFlag: 32767 // CHECK: MinShaderTarget: 393312 +// CHECK: MinimumExpectedWaveLaneCount: 0 +// CHECK: MaximumExpectedWaveLaneCount: 0 +// CHECK: ShaderFlags: 0 (None) // CHECK: } -// CHECK: <1:RuntimeDataFunctionInfo> = { +// CHECK: <1:RuntimeDataFunctionInfo3> = { // CHECK: Name: "\01?HSMain3{{[@$?.A-Za-z0-9_]+}}" // CHECK: UnmangledName: "HSMain3" // CHECK: Resources: [0]> = {} @@ -120,8 +123,11 @@ HSPerPatchData HSPerPatchFunc1() // CHECK: FeatureInfo2: 0 // CHECK: ShaderStageFlag: 32767 // CHECK: MinShaderTarget: 393312 +// CHECK: MinimumExpectedWaveLaneCount: 0 +// CHECK: MaximumExpectedWaveLaneCount: 0 +// CHECK: ShaderFlags: 0 (None) // CHECK: } -// CHECK: <2:RuntimeDataFunctionInfo> = { +// CHECK: <2:RuntimeDataFunctionInfo3> = { // CHECK: Name: "HSMain1" // CHECK: UnmangledName: "HSMain1" // CHECK: Resources: [0]> = {} @@ -133,8 +139,12 @@ HSPerPatchData HSPerPatchFunc1() // CHECK: FeatureInfo2: 0 // CHECK: ShaderStageFlag: 8 // CHECK: MinShaderTarget: 196704 +// CHECK: MinimumExpectedWaveLaneCount: 0 +// CHECK: MaximumExpectedWaveLaneCount: 0 +// CHECK: ShaderFlags: 0 (None) +// CHECK: HS: <0:HSInfo> // CHECK: } -// CHECK: <3:RuntimeDataFunctionInfo> = { +// CHECK: <3:RuntimeDataFunctionInfo3> = { // CHECK: Name: "HSMain3" // CHECK: UnmangledName: "HSMain3" // CHECK: Resources: [0]> = {} @@ -146,8 +156,12 @@ HSPerPatchData HSPerPatchFunc1() // CHECK: FeatureInfo2: 0 // CHECK: ShaderStageFlag: 8 // CHECK: MinShaderTarget: 196704 +// CHECK: MinimumExpectedWaveLaneCount: 0 +// CHECK: MaximumExpectedWaveLaneCount: 0 +// CHECK: ShaderFlags: 0 (None) +// CHECK: HS: <1:HSInfo> // CHECK: } -// CHECK: <4:RuntimeDataFunctionInfo> = { +// CHECK: <4:RuntimeDataFunctionInfo3> = { // CHECK: Name: "\01?HSPerPatchFunc1{{[@$?.A-Za-z0-9_]+}}" // CHECK: UnmangledName: "HSPerPatchFunc1" // CHECK: Resources: [0]> = {} @@ -159,6 +173,103 @@ HSPerPatchData HSPerPatchFunc1() // CHECK: FeatureInfo2: 0 // CHECK: ShaderStageFlag: 8 // CHECK: MinShaderTarget: 393312 +// CHECK: MinimumExpectedWaveLaneCount: 0 +// CHECK: MaximumExpectedWaveLaneCount: 0 +// CHECK: ShaderFlags: 0 (None) +// CHECK: } +// CHECK: } +// CHECK: RecordTable (stride = 16 bytes) SignatureElementTable[5] = { +// CHECK: <0:SignatureElement> = { +// CHECK: SemanticName: "SV_Position" +// CHECK: SemanticIndices: <0:array[1]> = { 0 } +// CHECK: SemanticKind: Position +// CHECK: ComponentType: F32 +// CHECK: InterpolationMode: LinearNoperspective +// CHECK: StartRow: 0 +// CHECK: ColsAndStream: 3 +// CHECK: UsageAndDynIndexMasks: 0 +// CHECK: } +// CHECK: <1:SignatureElement> = { +// CHECK: SemanticName: "TEXCOORD" +// CHECK: SemanticIndices: <0:array[1]> = { 0 } +// CHECK: SemanticKind: Arbitrary +// CHECK: ComponentType: F32 +// CHECK: InterpolationMode: Linear +// CHECK: StartRow: 1 +// CHECK: ColsAndStream: 1 +// CHECK: UsageAndDynIndexMasks: 0 +// CHECK: } +// CHECK: <2:SignatureElement> = { +// CHECK: SemanticName: "NORMAL" +// CHECK: SemanticIndices: <0:array[1]> = { 0 } +// CHECK: SemanticKind: Arbitrary +// CHECK: ComponentType: F32 +// CHECK: InterpolationMode: Linear +// CHECK: StartRow: 2 +// CHECK: ColsAndStream: 2 +// CHECK: UsageAndDynIndexMasks: 0 +// CHECK: } +// CHECK: <3:SignatureElement> = { +// CHECK: SemanticName: "SV_TessFactor" +// CHECK: SemanticIndices: <2:array[3]> = { 0, 1, 2 } +// CHECK: SemanticKind: TessFactor +// CHECK: ComponentType: F32 +// CHECK: InterpolationMode: Undefined +// CHECK: StartRow: 0 +// CHECK: ColsAndStream: 12 +// CHECK: UsageAndDynIndexMasks: 0 +// CHECK: } +// CHECK: <4:SignatureElement> = { +// CHECK: SemanticName: "SV_InsideTessFactor" +// CHECK: SemanticIndices: <0:array[1]> = { 0 } +// CHECK: SemanticKind: InsideTessFactor +// CHECK: ComponentType: F32 +// CHECK: InterpolationMode: Undefined +// CHECK: StartRow: 3 +// CHECK: ColsAndStream: 0 +// CHECK: UsageAndDynIndexMasks: 0 +// CHECK: } +// CHECK: } +// CHECK: RecordTable (stride = 48 bytes) HSInfoTable[2] = { +// CHECK: <0:HSInfo> = { +// CHECK: SigInputElements: <2:RecordArrayRef[3]> = { +// CHECK: [0]: <0:SignatureElement> +// CHECK: [1]: <1:SignatureElement> +// CHECK: [2]: <2:SignatureElement> +// CHECK: } +// CHECK: SigOutputElements: [0]> = {} +// CHECK: SigPatchConstOutputElements: <7:RecordArrayRef[2]> = { +// CHECK: [0]: <3:SignatureElement> +// CHECK: [1]: <4:SignatureElement> +// CHECK: } +// CHECK: ViewIDOutputMask: <0:bytes[0]> +// CHECK: ViewIDPatchConstOutputMask: <0:bytes[0]> +// CHECK: InputToOutputMasks: <0:bytes[0]> +// CHECK: InputToPatchConstOutputMasks: <0:bytes[0]> +// CHECK: InputControlPointCount: 3 +// CHECK: OutputControlPointCount: 3 +// CHECK: TessellatorDomain: 2 +// CHECK: TessellatorOutputPrimitive: 3 +// CHECK: } +// CHECK: <1:HSInfo> = { +// CHECK: SigInputElements: <2:RecordArrayRef[3]> = { +// CHECK: [0]: <0:SignatureElement> +// CHECK: [1]: <1:SignatureElement> +// CHECK: [2]: <2:SignatureElement> +// CHECK: } +// CHECK: SigOutputElements: [0]> = {} +// CHECK: SigPatchConstOutputElements: <7:RecordArrayRef[2]> = { +// CHECK: [0]: <3:SignatureElement> +// CHECK: [1]: <4:SignatureElement> +// CHECK: } +// CHECK: ViewIDOutputMask: <0:bytes[0]> +// CHECK: ViewIDPatchConstOutputMask: <0:bytes[0]> +// CHECK: InputToOutputMasks: <0:bytes[0]> +// CHECK: InputToPatchConstOutputMasks: <0:bytes[0]> +// CHECK: InputControlPointCount: 3 +// CHECK: OutputControlPointCount: 3 +// CHECK: TessellatorDomain: 2 +// CHECK: TessellatorOutputPrimitive: 4 // CHECK: } // CHECK: } diff --git a/tools/clang/test/HLSLFileCheck/d3dreflect/mesh-groupshared.hlsl b/tools/clang/test/HLSLFileCheck/d3dreflect/mesh-groupshared.hlsl index 75a5e221d6..1729dee440 100644 --- a/tools/clang/test/HLSLFileCheck/d3dreflect/mesh-groupshared.hlsl +++ b/tools/clang/test/HLSLFileCheck/d3dreflect/mesh-groupshared.hlsl @@ -1,11 +1,11 @@ // RUN: %dxc -T lib_6_7 %s | %D3DReflect %s | FileCheck %s -// CHECK:DxilRuntimeData (size = 92 bytes): -// CHECK: StringBuffer (size = 8 bytes) -// CHECK: IndexTable (size = 0 bytes) +// CHECK:DxilRuntimeData (size = 340 bytes): +// CHECK: StringBuffer (size = 32 bytes) +// CHECK: IndexTable (size = 64 bytes) // CHECK: RawBytes (size = 0 bytes) -// CHECK: RecordTable (stride = 44 bytes) FunctionTable[1] = { -// CHECK: <0:RuntimeDataFunctionInfo> = { +// CHECK: RecordTable (stride = 56 bytes) FunctionTable[1] = { +// CHECK: <0:RuntimeDataFunctionInfo3> = { // CHECK: Name: "main" // CHECK: UnmangledName: "main" // CHECK: Resources: [0]> = {} @@ -17,6 +17,133 @@ // CHECK: FeatureInfo2: 0 // CHECK: ShaderStageFlag: 8192 // CHECK: MinShaderTarget: 852069 +// CHECK: MinimumExpectedWaveLaneCount: 0 +// CHECK: MaximumExpectedWaveLaneCount: 0 +// CHECK: ShaderFlags: (OutputPositionPresent | UsesViewID) +// CHECK: MS: <0:MSInfo> = { +// CHECK: SigOutputElements: <7:RecordArrayRef[2]> = { +// CHECK: [0]: <0:SignatureElement> = { +// CHECK: SemanticName: "SV_Position" +// CHECK: SemanticIndices: <0:array[1]> = { 0 } +// CHECK: SemanticKind: Position +// CHECK: ComponentType: F32 +// CHECK: InterpolationMode: LinearNoperspective +// CHECK: StartRow: 0 +// CHECK: ColsAndStream: 3 +// CHECK: UsageAndDynIndexMasks: 0 +// CHECK: } +// CHECK: [1]: <1:SignatureElement> = { +// CHECK: SemanticName: "COLOR" +// CHECK: SemanticIndices: <2:array[4]> = { 0, 1, 2, 3 } +// CHECK: SemanticKind: Arbitrary +// CHECK: ComponentType: F32 +// CHECK: InterpolationMode: Linear +// CHECK: StartRow: 1 +// CHECK: ColsAndStream: 0 +// CHECK: UsageAndDynIndexMasks: 0 +// CHECK: } +// CHECK: } +// CHECK: SigPrimOutputElements: <10:RecordArrayRef[1]> = { +// CHECK: [0]: <2:SignatureElement> = { +// CHECK: SemanticName: "NORMAL" +// CHECK: SemanticIndices: <0:array[1]> = { 0 } +// CHECK: SemanticKind: Arbitrary +// CHECK: ComponentType: F32 +// CHECK: InterpolationMode: Constant +// CHECK: StartRow: 0 +// CHECK: ColsAndStream: 0 +// CHECK: UsageAndDynIndexMasks: 0 +// CHECK: } +// CHECK: } +// CHECK: ViewIDOutputMask: <0:bytes[0]> +// CHECK: ViewIDPrimOutputMask: <0:bytes[0]> +// CHECK: NumThreads: <12:array[3]> = { 32, 1, 1 } +// CHECK: GroupSharedBytesUsed: 64 +// CHECK: GroupSharedBytesDependentOnViewID: 0 +// CHECK: PayloadSizeInBytes: 36 +// CHECK: MaxOutputVertices: 32 +// CHECK: MaxOutputPrimitives: 16 +// CHECK: MeshOutputTopology: 2 +// CHECK: } +// CHECK: } +// CHECK: } +// CHECK: RecordTable (stride = 16 bytes) SignatureElementTable[3] = { +// CHECK: <0:SignatureElement> = { +// CHECK: SemanticName: "SV_Position" +// CHECK: SemanticIndices: <0:array[1]> = { 0 } +// CHECK: SemanticKind: Position +// CHECK: ComponentType: F32 +// CHECK: InterpolationMode: LinearNoperspective +// CHECK: StartRow: 0 +// CHECK: ColsAndStream: 3 +// CHECK: UsageAndDynIndexMasks: 0 +// CHECK: } +// CHECK: <1:SignatureElement> = { +// CHECK: SemanticName: "COLOR" +// CHECK: SemanticIndices: <2:array[4]> = { 0, 1, 2, 3 } +// CHECK: SemanticKind: Arbitrary +// CHECK: ComponentType: F32 +// CHECK: InterpolationMode: Linear +// CHECK: StartRow: 1 +// CHECK: ColsAndStream: 0 +// CHECK: UsageAndDynIndexMasks: 0 +// CHECK: } +// CHECK: <2:SignatureElement> = { +// CHECK: SemanticName: "NORMAL" +// CHECK: SemanticIndices: <0:array[1]> = { 0 } +// CHECK: SemanticKind: Arbitrary +// CHECK: ComponentType: F32 +// CHECK: InterpolationMode: Constant +// CHECK: StartRow: 0 +// CHECK: ColsAndStream: 0 +// CHECK: UsageAndDynIndexMasks: 0 +// CHECK: } +// CHECK: } +// CHECK: RecordTable (stride = 48 bytes) MSInfoTable[1] = { +// CHECK: <0:MSInfo> = { +// CHECK: SigOutputElements: <7:RecordArrayRef[2]> = { +// CHECK: [0]: <0:SignatureElement> = { +// CHECK: SemanticName: "SV_Position" +// CHECK: SemanticIndices: <0:array[1]> = { 0 } +// CHECK: SemanticKind: Position +// CHECK: ComponentType: F32 +// CHECK: InterpolationMode: LinearNoperspective +// CHECK: StartRow: 0 +// CHECK: ColsAndStream: 3 +// CHECK: UsageAndDynIndexMasks: 0 +// CHECK: } +// CHECK: [1]: <1:SignatureElement> = { +// CHECK: SemanticName: "COLOR" +// CHECK: SemanticIndices: <2:array[4]> = { 0, 1, 2, 3 } +// CHECK: SemanticKind: Arbitrary +// CHECK: ComponentType: F32 +// CHECK: InterpolationMode: Linear +// CHECK: StartRow: 1 +// CHECK: ColsAndStream: 0 +// CHECK: UsageAndDynIndexMasks: 0 +// CHECK: } +// CHECK: } +// CHECK: SigPrimOutputElements: <10:RecordArrayRef[1]> = { +// CHECK: [0]: <2:SignatureElement> = { +// CHECK: SemanticName: "NORMAL" +// CHECK: SemanticIndices: <0:array[1]> = { 0 } +// CHECK: SemanticKind: Arbitrary +// CHECK: ComponentType: F32 +// CHECK: InterpolationMode: Constant +// CHECK: StartRow: 0 +// CHECK: ColsAndStream: 0 +// CHECK: UsageAndDynIndexMasks: 0 +// CHECK: } +// CHECK: } +// CHECK: ViewIDOutputMask: <0:bytes[0]> +// CHECK: ViewIDPrimOutputMask: <0:bytes[0]> +// CHECK: NumThreads: <12:array[3]> = { 32, 1, 1 } +// CHECK: GroupSharedBytesUsed: 64 +// CHECK: GroupSharedBytesDependentOnViewID: 0 +// CHECK: PayloadSizeInBytes: 36 +// CHECK: MaxOutputVertices: 32 +// CHECK: MaxOutputPrimitives: 16 +// CHECK: MeshOutputTopology: 2 // CHECK: } // CHECK: } // CHECK:ID3D12LibraryReflection: diff --git a/tools/clang/test/HLSLFileCheck/hlsl/intrinsics/atomic/atomic_restypes.hlsl b/tools/clang/test/HLSLFileCheck/hlsl/intrinsics/atomic/atomic_restypes.hlsl index 23b87e4f4e..50e23a0817 100644 --- a/tools/clang/test/HLSLFileCheck/hlsl/intrinsics/atomic/atomic_restypes.hlsl +++ b/tools/clang/test/HLSLFileCheck/hlsl/intrinsics/atomic/atomic_restypes.hlsl @@ -51,8 +51,8 @@ void main(uint ix : SV_GroupIndex) { // CHKRES: note: candidate function not viable: 1st argument {{.*}} would lose const qualifier // CHECK: error: no matching function for call to 'InterlockedAdd' // CHKRES: note: candidate function not viable: 1st argument {{.*}} would lose const qualifier - // CHKLOC: error: Atomic operation targets must be groupshared or UAV - // CHKLOC: error: Atomic operation targets must be groupshared or UAV + // CHKLOC: error: Atomic operation targets must be groupshared, Node Record or UAV + // CHKLOC: error: Atomic operation targets must be groupshared, Node Record or UAV InterlockedAdd(DEST IDX, val); InterlockedAdd(DEST IDX, val, orig); @@ -61,8 +61,8 @@ void main(uint ix : SV_GroupIndex) { // CHKRES: note: candidate function not viable: 1st argument {{.*}} would lose const qualifier // CHECK: error: no matching function for call to 'InterlockedMin' // CHKRES: note: candidate function not viable: 1st argument {{.*}} would lose const qualifier - // CHKLOC: error: Atomic operation targets must be groupshared or UAV - // CHKLOC: error: Atomic operation targets must be groupshared or UAV + // CHKLOC: error: Atomic operation targets must be groupshared, Node Record or UAV + // CHKLOC: error: Atomic operation targets must be groupshared, Node Record or UAV InterlockedMin(DEST IDX, val); InterlockedMin(DEST IDX, val, orig); @@ -71,8 +71,8 @@ void main(uint ix : SV_GroupIndex) { // CHKRES: note: candidate function not viable: 1st argument {{.*}} would lose const qualifier // CHECK: error: no matching function for call to 'InterlockedMax' // CHKRES: note: candidate function not viable: 1st argument {{.*}} would lose const qualifier - // CHKLOC: error: Atomic operation targets must be groupshared or UAV - // CHKLOC: error: Atomic operation targets must be groupshared or UAV + // CHKLOC: error: Atomic operation targets must be groupshared, Node Record or UAV + // CHKLOC: error: Atomic operation targets must be groupshared, Node Record or UAV InterlockedMax(DEST IDX, val); InterlockedMax(DEST IDX, val, orig); @@ -81,8 +81,8 @@ void main(uint ix : SV_GroupIndex) { // CHKRES: note: candidate function not viable: 1st argument {{.*}} would lose const qualifier // CHECK: error: no matching function for call to 'InterlockedAnd' // CHKRES: note: candidate function not viable: 1st argument {{.*}} would lose const qualifier - // CHKLOC: error: Atomic operation targets must be groupshared or UAV - // CHKLOC: error: Atomic operation targets must be groupshared or UAV + // CHKLOC: error: Atomic operation targets must be groupshared, Node Record or UAV + // CHKLOC: error: Atomic operation targets must be groupshared, Node Record or UAV InterlockedAnd(DEST IDX, val); InterlockedAnd(DEST IDX, val, orig); @@ -91,8 +91,8 @@ void main(uint ix : SV_GroupIndex) { // CHKRES: note: candidate function not viable: 1st argument {{.*}} would lose const qualifier // CHECK: error: no matching function for call to 'InterlockedOr' // CHKRES: note: candidate function not viable: 1st argument {{.*}} would lose const qualifier - // CHKLOC: error: Atomic operation targets must be groupshared or UAV - // CHKLOC: error: Atomic operation targets must be groupshared or UAV + // CHKLOC: error: Atomic operation targets must be groupshared, Node Record or UAV + // CHKLOC: error: Atomic operation targets must be groupshared, Node Record or UAV InterlockedOr(DEST IDX, val); InterlockedOr(DEST IDX, val, orig); @@ -101,27 +101,27 @@ void main(uint ix : SV_GroupIndex) { // CHKRES: note: candidate function not viable: 1st argument {{.*}} would lose const qualifier // CHECK: error: no matching function for call to 'InterlockedXor' // CHKRES: note: candidate function not viable: 1st argument {{.*}} would lose const qualifier - // CHKLOC: error: Atomic operation targets must be groupshared or UAV - // CHKLOC: error: Atomic operation targets must be groupshared or UAV + // CHKLOC: error: Atomic operation targets must be groupshared, Node Record or UAV + // CHKLOC: error: Atomic operation targets must be groupshared, Node Record or UAV InterlockedXor(DEST IDX, val); InterlockedXor(DEST IDX, val, orig); // compareStore // CHECK: error: no matching function for call to 'InterlockedCompareStore' // CHKRES: note: candidate function not viable: 1st argument {{.*}} would lose const qualifier - // CHKLOC: error: Atomic operation targets must be groupshared or UAV + // CHKLOC: error: Atomic operation targets must be groupshared, Node Record or UAV InterlockedCompareStore(DEST IDX, comp, val); // exchange // CHECK: error: no matching function for call to 'InterlockedExchange' // CHKRES: note: candidate function not viable: 1st argument {{.*}} would lose const qualifier - // CHKLOC: error: Atomic operation targets must be groupshared or UAV + // CHKLOC: error: Atomic operation targets must be groupshared, Node Record or UAV InterlockedExchange(DEST IDX, val, orig); // compareExchange // CHECK: error: no matching function for call to 'InterlockedCompareExchange' // CHKRES: note: candidate function not viable: 1st argument {{.*}} would lose const qualifier - // CHKLOC: error: Atomic operation targets must be groupshared or UAV + // CHKLOC: error: Atomic operation targets must be groupshared, Node Record or UAV InterlockedCompareExchange(DEST IDX, comp, val, orig); output[ix] = (float)DEST IDX; diff --git a/tools/clang/test/HLSLFileCheck/hlsl/objects/WaveMatrix/WaveMatrix_Add-limited.hlsl b/tools/clang/test/HLSLFileCheck/hlsl/objects/WaveMatrix/WaveMatrix_Add-limited.hlsl new file mode 100644 index 0000000000..c3dfa336a3 --- /dev/null +++ b/tools/clang/test/HLSLFileCheck/hlsl/objects/WaveMatrix/WaveMatrix_Add-limited.hlsl @@ -0,0 +1,56 @@ +// RUN: %dxc -enable-16bit-types -T cs_6_8 -DADD_TY=WMLC %s | FileCheck %s -DADD_TY=2 -DCOMP=9 -DDIMM=16 -DDIMN=16 -check-prefix=CHKIR +// RUN: %dxc -enable-16bit-types -T cs_6_8 -DADD_TY=WMRR %s | FileCheck %s -DADD_TY=3 -DCOMP=9 -DDIMM=16 -DDIMN=16 -check-prefix=CHKIR +// RUN: %dxc -enable-16bit-types -T cs_6_8 -DADD_TY=WMA %s | FileCheck %s -DADD_TY=4 -DCOMP=9 -DDIMM=16 -DDIMN=16 -check-prefix=CHKIR +// RUN: %dxc -enable-16bit-types -T cs_6_8 -ast-dump -DADD_TY=WMLC %s | FileCheck %s -DADD_TY=WaveMatrixLeftColAcc -DCOMP=float -DDIMM=16 -DDIMN=16 -check-prefix=CHKAST +// RUN: %dxc -enable-16bit-types -T cs_6_8 -ast-dump -DADD_TY=WMRR %s | FileCheck %s -DADD_TY=WaveMatrixRightRowAcc -DCOMP=float -DDIMM=16 -DDIMN=16 -check-prefix=CHKAST +// RUN: %dxc -enable-16bit-types -T cs_6_8 -ast-dump -DADD_TY=WMA %s | FileCheck %s -DADD_TY=WaveMatrixAccumulator -DCOMP=float -DDIMM=16 -DDIMN=16 -check-prefix=CHKAST + +// CHECK: ; Note: shader requires additional functionality: +// CHECK: ; Wave level operations +// CHECK: ; Wave Matrix + +// CHECK: define void @main() + +#ifndef COMP +#define COMP float +#endif +#ifndef DIMM +#define DIMM 16 +#endif +#ifndef DIMN +#define DIMN 16 +#endif +#ifndef WAVESIZE +#define WAVESIZE +#endif +#ifndef NUMTHREADS +#define NUMTHREADS [NumThreads(64,1,1)] +#endif + +#define WMLC WaveMatrixLeftColAcc +#define WMRR WaveMatrixRightRowAcc +#define WMA WaveMatrixAccumulator + +WAVESIZE +NUMTHREADS +void main(uint3 gtid : SV_GroupThreadID, uint gidx : SV_GroupIndex) +{ +// CHKIR: %[[wma:.*]] = alloca %dx.types.waveMatrix, align 4 +// CHKIR: %[[wma_add:.*]] = alloca %dx.types.waveMatrix, align 4 +// CHKIR: call void @dx.op.waveMatrix_Annotate(i32 226, %dx.types.waveMatrix* nonnull %[[wma]], %dx.types.waveMatProps { i8 4, i8 [[COMP]], i32 [[DIMM]], i32 [[DIMN]] }) +// CHKIR: call void @dx.op.waveMatrix_Annotate(i32 226, %dx.types.waveMatrix* nonnull %[[wma_add]], %dx.types.waveMatProps { i8 [[ADD_TY]], i8 [[COMP]], i32 [[DIMM]], i32 [[DIMN]] }) + WMA acc; + ADD_TY wma_add; + +// CHKAST: CXXMemberCallExpr +// CHKAST-NEXT: MemberExpr +// CHKAST-SAME: .Add +// CHKAST-NEXT: DeclRefExpr +// CHKAST-SAME: 'acc' 'WaveMatrixAccumulator<[[COMP]], [[DIMM]], [[DIMN]]>' +// CHKAST-NEXT: +// CHKAST-NEXT: DeclRefExpr +// CHKAST-SAME: 'wma_add' '[[ADD_TY]]<[[COMP]], [[DIMM]], [[DIMN]]>' + +// CHKIR: call void @dx.op.waveMatrix_Accumulate(i32 237, %dx.types.waveMatrix* nonnull %[[wma]], %dx.types.waveMatrix* nonnull %[[wma_add]]) + acc.Add(wma_add); +} diff --git a/tools/clang/test/HLSLFileCheck/hlsl/objects/WaveMatrix/WaveMatrix_Depth.hlsl b/tools/clang/test/HLSLFileCheck/hlsl/objects/WaveMatrix/WaveMatrix_Depth.hlsl new file mode 100644 index 0000000000..50c6ab5e5e --- /dev/null +++ b/tools/clang/test/HLSLFileCheck/hlsl/objects/WaveMatrix/WaveMatrix_Depth.hlsl @@ -0,0 +1,51 @@ +// RUN: %dxc -E main -T cs_6_8 -DCOMP=float -DDIMM=16 -DDIMN=16 %s | FileCheck %s -DCOMP=9 -DDIMM=16 -DDIMN=16 +// RUN: %dxc -E main -T cs_6_8 -DCOMP=half -DDIMM=16 -DDIMN=16 %s | FileCheck %s -DCOMP=9 -DDIMM=16 -DDIMN=16 +// RUN: %dxc -E main -T cs_6_8 -DCOMP=int8_t4_packed -DDIMM=16 -DDIMN=16 %s | FileCheck %s -DCOMP=17 -DDIMM=16 -DDIMN=16 +// RUN: %dxc -E main -T cs_6_8 -DCOMP=uint8_t4_packed -DDIMM=16 -DDIMN=16 %s | FileCheck %s -DCOMP=18 -DDIMM=16 -DDIMN=16 +// RUN: %dxc -E main -T cs_6_8 -DCOMP=float -DDIMM=64 -DDIMN=16 %s | FileCheck %s -DCOMP=9 -DDIMM=64 -DDIMN=16 +// RUN: %dxc -E main -T cs_6_8 -DCOMP=float -DDIMM=16 -DDIMN=64 %s | FileCheck %s -DCOMP=9 -DDIMM=16 -DDIMN=64 +// RUN: %dxc -E main -T cs_6_8 -DCOMP=float -DDIMM=64 -DDIMN=64 %s | FileCheck %s -DCOMP=9 -DDIMM=64 -DDIMN=64 + +// CHECK: ; Note: shader requires additional functionality: +// CHECK: ; Wave level operations +// CHECK: ; Wave Matrix + +// CHECK: define void @main() + +#ifndef COMP +#define COMP float +#endif +#ifndef DIMM +#define DIMM 16 +#endif +#ifndef DIMN +#define DIMN 16 +#endif +#ifndef WAVESIZE +#define WAVESIZE +#endif +#ifndef NUMTHREADS +#define NUMTHREADS [NumThreads(64,1,1)] +#endif + +#define WML WaveMatrixLeft +#define WMR WaveMatrixRight + +RWByteAddressBuffer rwbuf; + +WAVESIZE +NUMTHREADS +void main(uint3 gtid : SV_GroupThreadID, uint gidx : SV_GroupIndex) +{ +// CHECK: %[[wml:.*]] = alloca %dx.types.waveMatrix, align 4 +// CHECK: %[[wmr:.*]] = alloca %dx.types.waveMatrix, align 4 +// CHECK: call void @dx.op.waveMatrix_Annotate(i32 226, %dx.types.waveMatrix* nonnull %[[wml]], %dx.types.waveMatProps { i8 0, i8 [[COMP]], i32 [[DIMM]], i32 [[DIMN]] }) +// CHECK: call void @dx.op.waveMatrix_Annotate(i32 226, %dx.types.waveMatrix* nonnull %[[wmr]], %dx.types.waveMatProps { i8 1, i8 [[COMP]], i32 [[DIMM]], i32 [[DIMN]] }) + WML left; + WMR right; + +// CHECK: call i32 @dx.op.waveMatrix_Depth(i32 227, %dx.types.waveMatProps { i8 0, i8 [[COMP]], i32 [[DIMM]], i32 [[DIMN]] }) + rwbuf.Store(0, left.MatrixDepth()); +// CHECK: call i32 @dx.op.waveMatrix_Depth(i32 227, %dx.types.waveMatProps { i8 1, i8 [[COMP]], i32 [[DIMM]], i32 [[DIMN]] }) + rwbuf.Store(4, right.MatrixDepth()); +} diff --git a/tools/clang/test/HLSLFileCheck/hlsl/objects/WaveMatrix/WaveMatrix_Fill-acc.hlsl b/tools/clang/test/HLSLFileCheck/hlsl/objects/WaveMatrix/WaveMatrix_Fill-acc.hlsl new file mode 100644 index 0000000000..f7f970d56e --- /dev/null +++ b/tools/clang/test/HLSLFileCheck/hlsl/objects/WaveMatrix/WaveMatrix_Fill-acc.hlsl @@ -0,0 +1,54 @@ +// RUN: %dxc -enable-16bit-types -T cs_6_8 -DCOMP=float -DDIMM=16 -DDIMN=16 %s | FileCheck %s -DCOMP=9 -DDIMM=16 -DDIMN=16 -DOLOAD=f32 +// RUN: %dxc -enable-16bit-types -T cs_6_8 -DCOMP=float16_t -DDIMM=16 -DDIMN=16 %s | FileCheck %s -DCOMP=8 -DDIMM=16 -DDIMN=16 -DOLOAD=f16 +// RUN: %dxc -enable-16bit-types -T cs_6_8 -DCOMP=int -DDIMM=16 -DDIMN=16 %s | FileCheck %s -DCOMP=4 -DDIMM=16 -DDIMN=16 -DOLOAD=i32 +// RUN: %dxc -enable-16bit-types -T cs_6_8 -DCOMP=float -DDIMM=64 -DDIMN=16 %s | FileCheck %s -DCOMP=9 -DDIMM=64 -DDIMN=16 -DOLOAD=f32 +// RUN: %dxc -enable-16bit-types -T cs_6_8 -DCOMP=float -DDIMM=16 -DDIMN=64 %s | FileCheck %s -DCOMP=9 -DDIMM=16 -DDIMN=64 -DOLOAD=f32 +// RUN: %dxc -enable-16bit-types -T cs_6_8 -DCOMP=float -DDIMM=64 -DDIMN=64 %s | FileCheck %s -DCOMP=9 -DDIMM=64 -DDIMN=64 -DOLOAD=f32 + +// CHECK: ; Note: shader requires additional functionality: +// CHECK: ; Wave level operations +// CHECK: ; Wave Matrix + +// CHECK: define void @main() + +#ifndef COMP +#define COMP float +#endif +#ifndef DIMM +#define DIMM 16 +#endif +#ifndef DIMN +#define DIMN 16 +#endif +#ifndef WAVESIZE +#define WAVESIZE +#endif +#ifndef NUMTHREADS +#define NUMTHREADS [NumThreads(64,1,1)] +#endif + +#define WMLC WaveMatrixLeftColAcc +#define WMRR WaveMatrixRightRowAcc +#define WMA WaveMatrixAccumulator + +WAVESIZE +NUMTHREADS +void main(uint3 gtid : SV_GroupThreadID, uint gidx : SV_GroupIndex) +{ +// CHECK: %[[wmlc:.*]] = alloca %dx.types.waveMatrix, align 4 +// CHECK: %[[wmrr:.*]] = alloca %dx.types.waveMatrix, align 4 +// CHECK: %[[wma:.*]] = alloca %dx.types.waveMatrix, align 4 +// CHECK: call void @dx.op.waveMatrix_Annotate(i32 226, %dx.types.waveMatrix* nonnull %[[wmlc]], %dx.types.waveMatProps { i8 2, i8 [[COMP]], i32 [[DIMM]], i32 [[DIMN]] }) +// CHECK: call void @dx.op.waveMatrix_Annotate(i32 226, %dx.types.waveMatrix* nonnull %[[wmrr]], %dx.types.waveMatProps { i8 3, i8 [[COMP]], i32 [[DIMM]], i32 [[DIMN]] }) +// CHECK: call void @dx.op.waveMatrix_Annotate(i32 226, %dx.types.waveMatrix* nonnull %[[wma]], %dx.types.waveMatProps { i8 4, i8 [[COMP]], i32 [[DIMM]], i32 [[DIMN]] }) + WMLC leftcol; + WMRR rightrow; + WMA acc; + +// CHECK: call void @dx.op.waveMatrix_Fill.[[OLOAD]](i32 228, %dx.types.waveMatrix* nonnull %[[wmlc]] + leftcol.Fill(1); +// CHECK: call void @dx.op.waveMatrix_Fill.[[OLOAD]](i32 228, %dx.types.waveMatrix* nonnull %[[wmrr]] + rightrow.Fill(2); +// CHECK: call void @dx.op.waveMatrix_Fill.[[OLOAD]](i32 228, %dx.types.waveMatrix* nonnull %[[wma]] + acc.Fill(3); +} diff --git a/tools/clang/test/HLSLFileCheck/hlsl/objects/WaveMatrix/WaveMatrix_Fill-in.hlsl b/tools/clang/test/HLSLFileCheck/hlsl/objects/WaveMatrix/WaveMatrix_Fill-in.hlsl new file mode 100644 index 0000000000..baf0294d1c --- /dev/null +++ b/tools/clang/test/HLSLFileCheck/hlsl/objects/WaveMatrix/WaveMatrix_Fill-in.hlsl @@ -0,0 +1,49 @@ +// RUN: %dxc -enable-16bit-types -T cs_6_8 -DCOMP=float -DDIMM=16 -DDIMN=16 %s | FileCheck %s -DCOMP=9 -DDIMM=16 -DDIMN=16 -DOLOAD=f32 +// RUN: %dxc -enable-16bit-types -T cs_6_8 -DCOMP=float16_t -DDIMM=16 -DDIMN=16 %s | FileCheck %s -DCOMP=8 -DDIMM=16 -DDIMN=16 -DOLOAD=f16 +// RUN: %dxc -enable-16bit-types -T cs_6_8 -DCOMP=int8_t4_packed -DDIMM=16 -DDIMN=16 %s | FileCheck %s -DCOMP=17 -DDIMM=16 -DDIMN=16 -DOLOAD=i32 +// RUN: %dxc -enable-16bit-types -T cs_6_8 -DCOMP=uint8_t4_packed -DDIMM=16 -DDIMN=16 %s | FileCheck %s -DCOMP=18 -DDIMM=16 -DDIMN=16 -DOLOAD=i32 +// RUN: %dxc -enable-16bit-types -T cs_6_8 -DCOMP=float -DDIMM=64 -DDIMN=16 %s | FileCheck %s -DCOMP=9 -DDIMM=64 -DDIMN=16 -DOLOAD=f32 +// RUN: %dxc -enable-16bit-types -T cs_6_8 -DCOMP=float -DDIMM=16 -DDIMN=64 %s | FileCheck %s -DCOMP=9 -DDIMM=16 -DDIMN=64 -DOLOAD=f32 +// RUN: %dxc -enable-16bit-types -T cs_6_8 -DCOMP=float -DDIMM=64 -DDIMN=64 %s | FileCheck %s -DCOMP=9 -DDIMM=64 -DDIMN=64 -DOLOAD=f32 + +// CHECK: ; Note: shader requires additional functionality: +// CHECK: ; Wave level operations +// CHECK: ; Wave Matrix + +// CHECK: define void @main() + +#ifndef COMP +#define COMP float +#endif +#ifndef DIMM +#define DIMM 16 +#endif +#ifndef DIMN +#define DIMN 16 +#endif +#ifndef WAVESIZE +#define WAVESIZE +#endif +#ifndef NUMTHREADS +#define NUMTHREADS [NumThreads(64,1,1)] +#endif + +#define WML WaveMatrixLeft +#define WMR WaveMatrixRight + +WAVESIZE +NUMTHREADS +void main(uint3 gtid : SV_GroupThreadID, uint gidx : SV_GroupIndex) +{ +// CHECK: %[[wml:.*]] = alloca %dx.types.waveMatrix, align 4 +// CHECK: %[[wmr:.*]] = alloca %dx.types.waveMatrix, align 4 +// CHECK: call void @dx.op.waveMatrix_Annotate(i32 226, %dx.types.waveMatrix* nonnull %[[wml]], %dx.types.waveMatProps { i8 0, i8 [[COMP]], i32 [[DIMM]], i32 [[DIMN]] }) +// CHECK: call void @dx.op.waveMatrix_Annotate(i32 226, %dx.types.waveMatrix* nonnull %[[wmr]], %dx.types.waveMatProps { i8 1, i8 [[COMP]], i32 [[DIMM]], i32 [[DIMN]] }) + WML left; + WMR right; + +// CHECK: call void @dx.op.waveMatrix_Fill.[[OLOAD]](i32 228, %dx.types.waveMatrix* nonnull %[[wml]] +// CHECK: call void @dx.op.waveMatrix_Fill.[[OLOAD]](i32 228, %dx.types.waveMatrix* nonnull %[[wmr]] + left.Fill(1); + right.Fill(2); +} diff --git a/tools/clang/test/HLSLFileCheck/hlsl/objects/WaveMatrix/WaveMatrix_LoadStore-acc.hlsl b/tools/clang/test/HLSLFileCheck/hlsl/objects/WaveMatrix/WaveMatrix_LoadStore-acc.hlsl new file mode 100644 index 0000000000..f40d3e6ad7 --- /dev/null +++ b/tools/clang/test/HLSLFileCheck/hlsl/objects/WaveMatrix/WaveMatrix_LoadStore-acc.hlsl @@ -0,0 +1,112 @@ +// RUN: %dxc -enable-16bit-types -T cs_6_8 -DCOMP=float -DDIMM=16 -DDIMN=16 %s | FileCheck %s -DCOMP=9 -DDIMM=16 -DDIMN=16 -DOLOAD=f32 +// RUN: %dxc -enable-16bit-types -T cs_6_8 -DCOMP=float16_t -DDIMM=16 -DDIMN=16 %s | FileCheck %s -DCOMP=8 -DDIMM=16 -DDIMN=16 -DOLOAD=f16 +// RUN: %dxc -enable-16bit-types -T cs_6_8 -DCOMP=int -DDIMM=16 -DDIMN=16 %s | FileCheck %s -DCOMP=4 -DDIMM=16 -DDIMN=16 -DOLOAD=i32 +// RUN: %dxc -enable-16bit-types -T cs_6_8 -DCOMP=float -DDIMM=64 -DDIMN=16 %s | FileCheck %s -DCOMP=9 -DDIMM=64 -DDIMN=16 -DOLOAD=f32 +// RUN: %dxc -enable-16bit-types -T cs_6_8 -DCOMP=float -DDIMM=16 -DDIMN=64 %s | FileCheck %s -DCOMP=9 -DDIMM=16 -DDIMN=64 -DOLOAD=f32 +// RUN: %dxc -enable-16bit-types -T cs_6_8 -DCOMP=float -DDIMM=64 -DDIMN=64 %s | FileCheck %s -DCOMP=9 -DDIMM=64 -DDIMN=64 -DOLOAD=f32 + +// CHECK: ; Note: shader requires additional functionality: +// CHECK: ; Wave level operations +// CHECK: ; Wave Matrix + +// CHECK: define void @main() + +#ifndef COMP +#define COMP float +#endif +#ifndef DIMM +#define DIMM 16 +#endif +#ifndef DIMN +#define DIMN 16 +#endif +#ifndef STRIDE +#define STRIDE 64 +#endif +#ifndef WAVESIZE +#define WAVESIZE +#endif +#ifndef NUMTHREADS +#define NUMTHREADS [NumThreads(64,1,1)] +#endif + +#define WMLC WaveMatrixLeftColAcc +#define WMRR WaveMatrixRightRowAcc +#define WMA WaveMatrixAccumulator + +// Should be no addrspacecast from groupshared. +// CHECK-NOT: addrspacecast + +groupshared COMP ai512[512]; + +ByteAddressBuffer buf; +RWByteAddressBuffer rwbuf; + +WAVESIZE +NUMTHREADS +void main(uint3 gtid : SV_GroupThreadID, uint gidx : SV_GroupIndex) +{ +// CHECK: %[[wmlc:.*]] = alloca %dx.types.waveMatrix, align 4 +// CHECK: %[[wmrr:.*]] = alloca %dx.types.waveMatrix, align 4 +// CHECK: %[[wma:.*]] = alloca %dx.types.waveMatrix, align 4 +// CHECK: call void @dx.op.waveMatrix_Annotate(i32 226, %dx.types.waveMatrix* nonnull %[[wmlc]], %dx.types.waveMatProps { i8 2, i8 [[COMP]], i32 [[DIMM]], i32 [[DIMN]] }) +// CHECK: call void @dx.op.waveMatrix_Annotate(i32 226, %dx.types.waveMatrix* nonnull %[[wmrr]], %dx.types.waveMatProps { i8 3, i8 [[COMP]], i32 [[DIMM]], i32 [[DIMN]] }) +// CHECK: call void @dx.op.waveMatrix_Annotate(i32 226, %dx.types.waveMatrix* nonnull %[[wma]], %dx.types.waveMatProps { i8 4, i8 [[COMP]], i32 [[DIMM]], i32 [[DIMN]] }) + WMLC leftcol; + WMRR rightrow; + WMA acc; + + uint n = 0; +#define IDX() (n++*1024) + +// CHECK: call void @dx.op.waveMatrix_LoadRawBuf(i32 229, %dx.types.waveMatrix* nonnull %[[wmlc]], %dx.types.Handle %{{[^,]+}}, i32 0, i32 64, i8 0, i1 false) +// CHECK: call void @dx.op.waveMatrix_LoadRawBuf(i32 229, %dx.types.waveMatrix* nonnull %[[wmlc]], %dx.types.Handle %{{[^,]+}}, i32 1024, i32 64, i8 16, i1 false) +// CHECK: call void @dx.op.waveMatrix_LoadRawBuf(i32 229, %dx.types.waveMatrix* nonnull %[[wmlc]], %dx.types.Handle %{{[^,]+}}, i32 2048, i32 64, i8 0, i1 false) +// CHECK: call void @dx.op.waveMatrix_LoadRawBuf(i32 229, %dx.types.waveMatrix* nonnull %[[wmlc]], %dx.types.Handle %{{[^,]+}}, i32 3072, i32 64, i8 16, i1 false) + leftcol.Load(buf, IDX(), STRIDE); + leftcol.Load(buf, IDX(), STRIDE, 16); + leftcol.Load(rwbuf, IDX(), STRIDE); + leftcol.Load(rwbuf, IDX(), STRIDE, 16); +// CHECK: call void @dx.op.waveMatrix_StoreRawBuf(i32 231, %dx.types.waveMatrix* nonnull %[[wmlc]], %dx.types.Handle %{{[^,]+}}, i32 4096, i32 64, i8 0, i1 false) +// CHECK: call void @dx.op.waveMatrix_StoreRawBuf(i32 231, %dx.types.waveMatrix* nonnull %[[wmlc]], %dx.types.Handle %{{[^,]+}}, i32 5120, i32 64, i8 16, i1 false) + leftcol.Store(rwbuf, IDX(), STRIDE); + leftcol.Store(rwbuf, IDX(), STRIDE, 16); +// CHECK: call void @dx.op.waveMatrix_LoadGroupShared.[[OLOAD]](i32 230, %dx.types.waveMatrix* nonnull %[[wmlc]], {{.+}} addrspace(3)* {{.+}}, i32 0, i32 16, i1 false) +// CHECK: call void @dx.op.waveMatrix_StoreGroupShared.[[OLOAD]](i32 232, %dx.types.waveMatrix* nonnull %[[wmlc]], {{.+}} addrspace(3)* {{.+}}, i32 32, i32 16, i1 false) + leftcol.Load(ai512, 0, 16); + leftcol.Store(ai512, 32, 16); + +// CHECK: call void @dx.op.waveMatrix_LoadRawBuf(i32 229, %dx.types.waveMatrix* nonnull %[[wmrr]], %dx.types.Handle %{{[^,]+}}, i32 6144, i32 64, i8 0, i1 false) +// CHECK: call void @dx.op.waveMatrix_LoadRawBuf(i32 229, %dx.types.waveMatrix* nonnull %[[wmrr]], %dx.types.Handle %{{[^,]+}}, i32 7168, i32 64, i8 16, i1 false) +// CHECK: call void @dx.op.waveMatrix_LoadRawBuf(i32 229, %dx.types.waveMatrix* nonnull %[[wmrr]], %dx.types.Handle %{{[^,]+}}, i32 8192, i32 64, i8 0, i1 false) +// CHECK: call void @dx.op.waveMatrix_LoadRawBuf(i32 229, %dx.types.waveMatrix* nonnull %[[wmrr]], %dx.types.Handle %{{[^,]+}}, i32 9216, i32 64, i8 16, i1 false) + rightrow.Load(buf, IDX(), STRIDE); + rightrow.Load(buf, IDX(), STRIDE, 16); + rightrow.Load(rwbuf, IDX(), STRIDE); + rightrow.Load(rwbuf, IDX(), STRIDE, 16); +// CHECK: call void @dx.op.waveMatrix_StoreRawBuf(i32 231, %dx.types.waveMatrix* nonnull %[[wmrr]], %dx.types.Handle %{{[^,]+}}, i32 10240, i32 64, i8 0, i1 false) +// CHECK: call void @dx.op.waveMatrix_StoreRawBuf(i32 231, %dx.types.waveMatrix* nonnull %[[wmrr]], %dx.types.Handle %{{[^,]+}}, i32 11264, i32 64, i8 16, i1 false) + rightrow.Store(rwbuf, IDX(), STRIDE); + rightrow.Store(rwbuf, IDX(), STRIDE, 16); +// CHECK: call void @dx.op.waveMatrix_LoadGroupShared.[[OLOAD]](i32 230, %dx.types.waveMatrix* nonnull %[[wmrr]], {{.+}} addrspace(3)* {{.+}}, i32 48, i32 16, i1 false) +// CHECK: call void @dx.op.waveMatrix_StoreGroupShared.[[OLOAD]](i32 232, %dx.types.waveMatrix* nonnull %[[wmrr]], {{.+}} addrspace(3)* {{.+}}, i32 64, i32 16, i1 false) + rightrow.Load(ai512, 48, 16); + rightrow.Store(ai512, 64, 16); + +// CHECK: call void @dx.op.waveMatrix_LoadRawBuf(i32 229, %dx.types.waveMatrix* nonnull %[[wma]], %dx.types.Handle %{{[^,]+}}, i32 12288, i32 64, i8 0, i1 false) +// CHECK: call void @dx.op.waveMatrix_LoadRawBuf(i32 229, %dx.types.waveMatrix* nonnull %[[wma]], %dx.types.Handle %{{[^,]+}}, i32 13312, i32 64, i8 16, i1 true) +// CHECK: call void @dx.op.waveMatrix_LoadRawBuf(i32 229, %dx.types.waveMatrix* nonnull %[[wma]], %dx.types.Handle %{{[^,]+}}, i32 14336, i32 64, i8 0, i1 true) +// CHECK: call void @dx.op.waveMatrix_LoadRawBuf(i32 229, %dx.types.waveMatrix* nonnull %[[wma]], %dx.types.Handle %{{[^,]+}}, i32 15360, i32 64, i8 16, i1 false) + acc.Load(buf, IDX(), STRIDE, false); + acc.Load(buf, IDX(), STRIDE, true, 16); + acc.Load(rwbuf, IDX(), STRIDE, true); + acc.Load(rwbuf, IDX(), STRIDE, false, 16); +// CHECK: call void @dx.op.waveMatrix_StoreRawBuf(i32 231, %dx.types.waveMatrix* nonnull %[[wma]], %dx.types.Handle %{{[^,]+}}, i32 16384, i32 64, i8 0, i1 false) +// CHECK: call void @dx.op.waveMatrix_StoreRawBuf(i32 231, %dx.types.waveMatrix* nonnull %[[wma]], %dx.types.Handle %{{[^,]+}}, i32 17408, i32 64, i8 16, i1 true) + acc.Store(rwbuf, IDX(), STRIDE, false); + acc.Store(rwbuf, IDX(), STRIDE, true, 16); +// CHECK: call void @dx.op.waveMatrix_LoadGroupShared.[[OLOAD]](i32 230, %dx.types.waveMatrix* nonnull %[[wma]], {{.+}} addrspace(3)* {{.+}}, i32 80, i32 16, i1 false) +// CHECK: call void @dx.op.waveMatrix_StoreGroupShared.[[OLOAD]](i32 232, %dx.types.waveMatrix* nonnull %[[wma]], {{.+}} addrspace(3)* {{.+}}, i32 96, i32 16, i1 true) + acc.Load(ai512, 80, 16, false); + acc.Store(ai512, 96, 16, true); +} diff --git a/tools/clang/test/HLSLFileCheck/hlsl/objects/WaveMatrix/WaveMatrix_LoadStore-in.hlsl b/tools/clang/test/HLSLFileCheck/hlsl/objects/WaveMatrix/WaveMatrix_LoadStore-in.hlsl new file mode 100644 index 0000000000..c83402646c --- /dev/null +++ b/tools/clang/test/HLSLFileCheck/hlsl/objects/WaveMatrix/WaveMatrix_LoadStore-in.hlsl @@ -0,0 +1,92 @@ +// RUN: %dxc -enable-16bit-types -T cs_6_8 -DCOMP=float -DDIMM=16 -DDIMN=16 %s | FileCheck %s -DCOMP=9 -DDIMM=16 -DDIMN=16 -DOLOAD=f32 +// RUN: %dxc -enable-16bit-types -T cs_6_8 -DCOMP=float16_t -DDIMM=16 -DDIMN=16 %s | FileCheck %s -DCOMP=8 -DDIMM=16 -DDIMN=16 -DOLOAD=f16 +// RUN: %dxc -enable-16bit-types -T cs_6_8 -DCOMP=int8_t4_packed -DDIMM=16 -DDIMN=16 %s | FileCheck %s -DCOMP=17 -DDIMM=16 -DDIMN=16 -DOLOAD=i32 +// RUN: %dxc -enable-16bit-types -T cs_6_8 -DCOMP=uint8_t4_packed -DDIMM=16 -DDIMN=16 %s | FileCheck %s -DCOMP=18 -DDIMM=16 -DDIMN=16 -DOLOAD=i32 +// RUN: %dxc -enable-16bit-types -T cs_6_8 -DCOMP=float -DDIMM=64 -DDIMN=16 %s | FileCheck %s -DCOMP=9 -DDIMM=64 -DDIMN=16 -DOLOAD=f32 +// RUN: %dxc -enable-16bit-types -T cs_6_8 -DCOMP=float -DDIMM=16 -DDIMN=64 %s | FileCheck %s -DCOMP=9 -DDIMM=16 -DDIMN=64 -DOLOAD=f32 +// RUN: %dxc -enable-16bit-types -T cs_6_8 -DCOMP=float -DDIMM=64 -DDIMN=64 %s | FileCheck %s -DCOMP=9 -DDIMM=64 -DDIMN=64 -DOLOAD=f32 + +// CHECK: ; Note: shader requires additional functionality: +// CHECK: ; Wave level operations +// CHECK: ; Wave Matrix + +// CHECK: define void @main() + +#ifndef COMP +#define COMP float +#endif +#ifndef DIMM +#define DIMM 16 +#endif +#ifndef DIMN +#define DIMN 16 +#endif +#ifndef STRIDE +#define STRIDE 64 +#endif +#ifndef WAVESIZE +#define WAVESIZE +#endif +#ifndef NUMTHREADS +#define NUMTHREADS [NumThreads(64,1,1)] +#endif + +#define WML WaveMatrixLeft +#define WMR WaveMatrixRight + +// Should be no addrspacecast from groupshared. +// CHECK-NOT: addrspacecast + +groupshared COMP ai512[512]; + +ByteAddressBuffer buf; +RWByteAddressBuffer rwbuf; + +WAVESIZE +NUMTHREADS +void main(uint3 gtid : SV_GroupThreadID, uint gidx : SV_GroupIndex) +{ +// CHECK: %[[wml:.*]] = alloca %dx.types.waveMatrix, align 4 +// CHECK: %[[wmr:.*]] = alloca %dx.types.waveMatrix, align 4 +// CHECK: call void @dx.op.waveMatrix_Annotate(i32 226, %dx.types.waveMatrix* nonnull %[[wml]], %dx.types.waveMatProps { i8 0, i8 [[COMP]], i32 [[DIMM]], i32 [[DIMN]] }) +// CHECK: call void @dx.op.waveMatrix_Annotate(i32 226, %dx.types.waveMatrix* nonnull %[[wmr]], %dx.types.waveMatProps { i8 1, i8 [[COMP]], i32 [[DIMM]], i32 [[DIMN]] }) + WML left; + WMR right; + + uint n = 0; +#define IDX() (n++*1024) + +// CHECK: call void @dx.op.waveMatrix_LoadRawBuf(i32 229, %dx.types.waveMatrix* nonnull %[[wml]], %dx.types.Handle %{{[^,]+}}, i32 0, i32 64, i8 0, i1 false) +// CHECK: call void @dx.op.waveMatrix_LoadRawBuf(i32 229, %dx.types.waveMatrix* nonnull %[[wml]], %dx.types.Handle %{{[^,]+}}, i32 1024, i32 64, i8 16, i1 true) +// CHECK: call void @dx.op.waveMatrix_LoadRawBuf(i32 229, %dx.types.waveMatrix* nonnull %[[wml]], %dx.types.Handle %{{[^,]+}}, i32 2048, i32 64, i8 0, i1 true) +// CHECK: call void @dx.op.waveMatrix_LoadRawBuf(i32 229, %dx.types.waveMatrix* nonnull %[[wml]], %dx.types.Handle %{{[^,]+}}, i32 3072, i32 64, i8 16, i1 false) + left.Load(buf, IDX(), STRIDE, false); + left.Load(buf, IDX(), STRIDE, true, 16); + left.Load(rwbuf, IDX(), STRIDE, true); + left.Load(rwbuf, IDX(), STRIDE, false, 16); +// CHECK: call void @dx.op.waveMatrix_StoreRawBuf(i32 231, %dx.types.waveMatrix* nonnull %[[wml]], %dx.types.Handle %{{[^,]+}}, i32 4096, i32 64, i8 0, i1 false) +// CHECK: call void @dx.op.waveMatrix_StoreRawBuf(i32 231, %dx.types.waveMatrix* nonnull %[[wml]], %dx.types.Handle %{{[^,]+}}, i32 5120, i32 64, i8 16, i1 true) + left.Store(rwbuf, IDX(), STRIDE, false); + left.Store(rwbuf, IDX(), STRIDE, true, 16); +// CHECK: call void @dx.op.waveMatrix_LoadGroupShared.[[OLOAD]](i32 230, %dx.types.waveMatrix* nonnull %[[wml]], {{.+}} addrspace(3)* {{.+}}, i32 0, i32 16, i1 false) +// CHECK: call void @dx.op.waveMatrix_StoreGroupShared.[[OLOAD]](i32 232, %dx.types.waveMatrix* nonnull %[[wml]], {{.+}} addrspace(3)* {{.+}}, i32 32, i32 16, i1 true) + left.Load(ai512, 0, 16, false); + left.Store(ai512, 32, 16, true); + +// CHECK: call void @dx.op.waveMatrix_LoadRawBuf(i32 229, %dx.types.waveMatrix* nonnull %[[wmr]], %dx.types.Handle %{{[^,]+}}, i32 6144, i32 64, i8 0, i1 false) +// CHECK: call void @dx.op.waveMatrix_LoadRawBuf(i32 229, %dx.types.waveMatrix* nonnull %[[wmr]], %dx.types.Handle %{{[^,]+}}, i32 7168, i32 64, i8 16, i1 true) +// CHECK: call void @dx.op.waveMatrix_LoadRawBuf(i32 229, %dx.types.waveMatrix* nonnull %[[wmr]], %dx.types.Handle %{{[^,]+}}, i32 8192, i32 64, i8 0, i1 true) +// CHECK: call void @dx.op.waveMatrix_LoadRawBuf(i32 229, %dx.types.waveMatrix* nonnull %[[wmr]], %dx.types.Handle %{{[^,]+}}, i32 9216, i32 64, i8 16, i1 false) + right.Load(buf, IDX(), STRIDE, false); + right.Load(buf, IDX(), STRIDE, true, 16); + right.Load(rwbuf, IDX(), STRIDE, true); + right.Load(rwbuf, IDX(), STRIDE, false, 16); +// CHECK: call void @dx.op.waveMatrix_StoreRawBuf(i32 231, %dx.types.waveMatrix* nonnull %[[wmr]], %dx.types.Handle %{{[^,]+}}, i32 10240, i32 64, i8 0, i1 false) +// CHECK: call void @dx.op.waveMatrix_StoreRawBuf(i32 231, %dx.types.waveMatrix* nonnull %[[wmr]], %dx.types.Handle %{{[^,]+}}, i32 11264, i32 64, i8 16, i1 true) + right.Store(rwbuf, IDX(), STRIDE, false); + right.Store(rwbuf, IDX(), STRIDE, true, 16); +// CHECK: call void @dx.op.waveMatrix_LoadGroupShared.[[OLOAD]](i32 230, %dx.types.waveMatrix* nonnull %[[wmr]], {{.+}} addrspace(3)* {{.+}}, i32 48, i32 16, i1 false) +// CHECK: call void @dx.op.waveMatrix_StoreGroupShared.[[OLOAD]](i32 232, %dx.types.waveMatrix* nonnull %[[wmr]], {{.+}} addrspace(3)* {{.+}}, i32 64, i32 16, i1 true) + right.Load(ai512, 48, 16, false); + right.Store(ai512, 64, 16, true); +} diff --git a/tools/clang/test/HLSLFileCheck/hlsl/objects/WaveMatrix/WaveMatrix_Multiply-Add-acc.hlsl b/tools/clang/test/HLSLFileCheck/hlsl/objects/WaveMatrix/WaveMatrix_Multiply-Add-acc.hlsl new file mode 100644 index 0000000000..1bcf2af3ae --- /dev/null +++ b/tools/clang/test/HLSLFileCheck/hlsl/objects/WaveMatrix/WaveMatrix_Multiply-Add-acc.hlsl @@ -0,0 +1,81 @@ +// RUN: %dxc -enable-16bit-types -T cs_6_8 -DCOMP=float -DCOMP_IN=float16_t -DDIMM=16 -DDIMN=16 %s | FileCheck %s -DCOMP=9 -DCOMP_IN=8 -DDIMM=16 -DDIMN=16 -DOLOAD=f32 +// RUN: %dxc -enable-16bit-types -T cs_6_8 -DCOMP=float -DCOMP_IN=float -DDIMM=16 -DDIMN=16 %s | FileCheck %s -DCOMP=9 -DCOMP_IN=9 -DDIMM=16 -DDIMN=16 -DOLOAD=f32 +// RUN: %dxc -enable-16bit-types -T cs_6_8 -DCOMP=float16_t -DCOMP_IN=float16_t -DDIMM=16 -DDIMN=16 %s | FileCheck %s -DCOMP=8 -DCOMP_IN=8 -DDIMM=16 -DDIMN=16 -DOLOAD=f16 +// RUN: %dxc -enable-16bit-types -T cs_6_8 -DCOMP=int -DCOMP_IN=int8_t4_packed -DDIMM=16 -DDIMN=16 %s | FileCheck %s -DCOMP=4 -DCOMP_IN=17 -DDIMM=16 -DDIMN=16 -DOLOAD=i32 +// RUN: %dxc -enable-16bit-types -T cs_6_8 -DCOMP=int -DCOMP_IN=uint8_t4_packed -DDIMM=16 -DDIMN=16 %s | FileCheck %s -DCOMP=4 -DCOMP_IN=18 -DDIMM=16 -DDIMN=16 -DOLOAD=i32 +// RUN: %dxc -enable-16bit-types -T cs_6_8 -DCOMP=float -DCOMP_IN=float -DDIMM=64 -DDIMN=16 %s | FileCheck %s -DCOMP=9 -DCOMP_IN=9 -DDIMM=64 -DDIMN=16 -DOLOAD=f32 +// RUN: %dxc -enable-16bit-types -T cs_6_8 -DCOMP=float -DCOMP_IN=float -DDIMM=16 -DDIMN=64 %s | FileCheck %s -DCOMP=9 -DCOMP_IN=9 -DDIMM=16 -DDIMN=64 -DOLOAD=f32 +// RUN: %dxc -enable-16bit-types -T cs_6_8 -DCOMP=float -DCOMP_IN=float -DDIMM=64 -DDIMN=64 %s | FileCheck %s -DCOMP=9 -DCOMP_IN=9 -DDIMM=64 -DDIMN=64 -DOLOAD=f32 + +// CHECK: ; Note: shader requires additional functionality: +// CHECK: ; Wave level operations +// CHECK: ; Wave Matrix + +// CHECK: define void @main() + +#ifndef COMP +#define COMP float +#endif +#ifndef COMP_IN +#define COMP float +#endif +#ifndef DIMM +#define DIMM 16 +#endif +#ifndef DIMN +#define DIMN 16 +#endif +#ifndef STRIDE +#define STRIDE 64 +#endif +#ifndef WAVESIZE +#define WAVESIZE +#endif +#ifndef NUMTHREADS +#define NUMTHREADS [NumThreads(64,1,1)] +#endif + +#define WML WaveMatrixLeft +#define WMR WaveMatrixRight +#define WMLC WaveMatrixLeftColAcc +#define WMRR WaveMatrixRightRowAcc +#define WMA WaveMatrixAccumulator + +WAVESIZE +NUMTHREADS +void main(uint3 gtid : SV_GroupThreadID, uint gidx : SV_GroupIndex) +{ +// CHECK: %[[wml:.*]] = alloca %dx.types.waveMatrix, align 4 +// CHECK: %[[wmr:.*]] = alloca %dx.types.waveMatrix, align 4 +// CHECK: %[[wmlc:.*]] = alloca %dx.types.waveMatrix, align 4 +// CHECK: %[[wmrr:.*]] = alloca %dx.types.waveMatrix, align 4 +// CHECK: %[[wma:.*]] = alloca %dx.types.waveMatrix, align 4 +// CHECK: %[[wma2:.*]] = alloca %dx.types.waveMatrix, align 4 +// CHECK: call void @dx.op.waveMatrix_Annotate(i32 226, %dx.types.waveMatrix* nonnull %[[wml]], %dx.types.waveMatProps { i8 0, i8 [[COMP_IN]], i32 [[DIMM]], i32 [[DIMN]] }) +// CHECK: call void @dx.op.waveMatrix_Annotate(i32 226, %dx.types.waveMatrix* nonnull %[[wmr]], %dx.types.waveMatProps { i8 1, i8 [[COMP_IN]], i32 [[DIMM]], i32 [[DIMN]] }) +// CHECK: call void @dx.op.waveMatrix_Annotate(i32 226, %dx.types.waveMatrix* nonnull %[[wmlc]], %dx.types.waveMatProps { i8 2, i8 [[COMP]], i32 [[DIMM]], i32 [[DIMN]] }) +// CHECK: call void @dx.op.waveMatrix_Annotate(i32 226, %dx.types.waveMatrix* nonnull %[[wmrr]], %dx.types.waveMatProps { i8 3, i8 [[COMP]], i32 [[DIMM]], i32 [[DIMN]] }) +// CHECK: call void @dx.op.waveMatrix_Annotate(i32 226, %dx.types.waveMatrix* nonnull %[[wma]], %dx.types.waveMatProps { i8 4, i8 [[COMP]], i32 [[DIMM]], i32 [[DIMN]] }) +// CHECK: call void @dx.op.waveMatrix_Annotate(i32 226, %dx.types.waveMatrix* nonnull %[[wma2]], %dx.types.waveMatProps { i8 4, i8 [[COMP]], i32 [[DIMM]], i32 [[DIMN]] }) + WML left; + WMR right; + WMLC leftcol; + WMRR rightrow; + WMA acc; + WMA acc2; + +// CHECK: call void @dx.op.waveMatrix_Multiply(i32 233, %dx.types.waveMatrix* nonnull %[[wma]], %dx.types.waveMatrix* nonnull %[[wml]], %dx.types.waveMatrix* nonnull %[[wmr]]) +// CHECK: call void @dx.op.waveMatrix_Multiply(i32 234, %dx.types.waveMatrix* nonnull %[[wma]], %dx.types.waveMatrix* nonnull %[[wml]], %dx.types.waveMatrix* nonnull %[[wmr]]) + acc.Multiply(left, right); + acc.MultiplyAccumulate(left, right); + +// CHECK: call void @dx.op.waveMatrix_Accumulate(i32 237, %dx.types.waveMatrix* nonnull %[[wma]], %dx.types.waveMatrix* nonnull %[[wmlc]]) +// CHECK: call void @dx.op.waveMatrix_Accumulate(i32 237, %dx.types.waveMatrix* nonnull %[[wma]], %dx.types.waveMatrix* nonnull %[[wmrr]]) + acc.Add(leftcol); + acc.Add(rightrow); + +// CHECK: call void @dx.op.waveMatrix_Accumulate(i32 237, %dx.types.waveMatrix* nonnull %[[wma]], %dx.types.waveMatrix* nonnull %[[wma2]]) +// CHECK: call void @dx.op.waveMatrix_Accumulate(i32 237, %dx.types.waveMatrix* nonnull %[[wma]], %dx.types.waveMatrix* nonnull %[[wma]]) + acc.Add(acc2); + acc.Add(acc); +} diff --git a/tools/clang/test/HLSLFileCheck/hlsl/objects/WaveMatrix/WaveMatrix_ScalarOps-acc.hlsl b/tools/clang/test/HLSLFileCheck/hlsl/objects/WaveMatrix/WaveMatrix_ScalarOps-acc.hlsl new file mode 100644 index 0000000000..b2fe2b12d8 --- /dev/null +++ b/tools/clang/test/HLSLFileCheck/hlsl/objects/WaveMatrix/WaveMatrix_ScalarOps-acc.hlsl @@ -0,0 +1,83 @@ +// RUN: %dxc -enable-16bit-types -T cs_6_8 -DCOMP=float -DDIMM=16 -DDIMN=16 %s | FileCheck %s -DCOMP=9 -DDIMM=16 -DDIMN=16 -DOLOAD=f32 +// RUN: %dxc -enable-16bit-types -T cs_6_8 -DCOMP=float16_t -DDIMM=16 -DDIMN=16 %s | FileCheck %s -DCOMP=8 -DDIMM=16 -DDIMN=16 -DOLOAD=f16 +// RUN: %dxc -enable-16bit-types -T cs_6_8 -DCOMP=int -DDIMM=16 -DDIMN=16 %s | FileCheck %s -DCOMP=4 -DDIMM=16 -DDIMN=16 -DOLOAD=i32 +// RUN: %dxc -enable-16bit-types -T cs_6_8 -DCOMP=float -DDIMM=64 -DDIMN=16 %s | FileCheck %s -DCOMP=9 -DDIMM=64 -DDIMN=16 -DOLOAD=f32 +// RUN: %dxc -enable-16bit-types -T cs_6_8 -DCOMP=float -DDIMM=16 -DDIMN=64 %s | FileCheck %s -DCOMP=9 -DDIMM=16 -DDIMN=64 -DOLOAD=f32 +// RUN: %dxc -enable-16bit-types -T cs_6_8 -DCOMP=float -DDIMM=64 -DDIMN=64 %s | FileCheck %s -DCOMP=9 -DDIMM=64 -DDIMN=64 -DOLOAD=f32 + +// CHECK: ; Note: shader requires additional functionality: +// CHECK: ; Wave level operations +// CHECK: ; Wave Matrix + +// CHECK: define void @main() + +#ifndef COMP +#define COMP float +#endif +#ifndef DIMM +#define DIMM 16 +#endif +#ifndef DIMN +#define DIMN 16 +#endif +#ifndef STRIDE +#define STRIDE 64 +#endif +#ifndef WAVESIZE +#define WAVESIZE +#endif +#ifndef NUMTHREADS +#define NUMTHREADS [NumThreads(64,1,1)] +#endif + +#define WMLC WaveMatrixLeftColAcc +#define WMRR WaveMatrixRightRowAcc +#define WMA WaveMatrixAccumulator + +ByteAddressBuffer buf; +RWByteAddressBuffer rwbuf; + +#define MAKE_TEST_SCALAR(typ) \ + void testScalar(typ mat) { \ + mat.ScalarMultiply(4); \ + mat.ScalarDivide(4); \ + mat.ScalarAdd(4); \ + mat.ScalarSubtract(4); \ + } + +MAKE_TEST_SCALAR(WMLC) +MAKE_TEST_SCALAR(WMRR) +MAKE_TEST_SCALAR(WMA) + +WAVESIZE +NUMTHREADS +void main(uint3 gtid : SV_GroupThreadID, uint gidx : SV_GroupIndex) +{ +// CHECK: %[[wmlc:.*]] = alloca %dx.types.waveMatrix, align 4 +// CHECK: %[[wmrr:.*]] = alloca %dx.types.waveMatrix, align 4 +// CHECK: %[[wma:.*]] = alloca %dx.types.waveMatrix, align 4 +// CHECK: call void @dx.op.waveMatrix_Annotate(i32 226, %dx.types.waveMatrix* nonnull %[[wmlc]], %dx.types.waveMatProps { i8 2, i8 [[COMP]], i32 [[DIMM]], i32 [[DIMN]] }) +// CHECK: call void @dx.op.waveMatrix_Annotate(i32 226, %dx.types.waveMatrix* nonnull %[[wmrr]], %dx.types.waveMatProps { i8 3, i8 [[COMP]], i32 [[DIMM]], i32 [[DIMN]] }) +// CHECK: call void @dx.op.waveMatrix_Annotate(i32 226, %dx.types.waveMatrix* nonnull %[[wma]], %dx.types.waveMatProps { i8 4, i8 [[COMP]], i32 [[DIMM]], i32 [[DIMN]] }) + WMLC leftcol; + WMRR rightrow; + WMA acc; + +// CHECK: call void @dx.op.waveMatrix_ScalarOp.[[OLOAD]](i32 235, %dx.types.waveMatrix* nonnull %[[wmlc]], i8 2, +// CHECK: call void @dx.op.waveMatrix_ScalarOp.[[OLOAD]](i32 235, %dx.types.waveMatrix* nonnull %[[wmlc]], i8 3, +// CHECK: call void @dx.op.waveMatrix_ScalarOp.[[OLOAD]](i32 235, %dx.types.waveMatrix* nonnull %[[wmlc]], i8 0, +// CHECK: call void @dx.op.waveMatrix_ScalarOp.[[OLOAD]](i32 235, %dx.types.waveMatrix* nonnull %[[wmlc]], i8 1, + testScalar(leftcol); + +// CHECK: call void @dx.op.waveMatrix_ScalarOp.[[OLOAD]](i32 235, %dx.types.waveMatrix* nonnull %[[wmrr]], i8 2, +// CHECK: call void @dx.op.waveMatrix_ScalarOp.[[OLOAD]](i32 235, %dx.types.waveMatrix* nonnull %[[wmrr]], i8 3, +// CHECK: call void @dx.op.waveMatrix_ScalarOp.[[OLOAD]](i32 235, %dx.types.waveMatrix* nonnull %[[wmrr]], i8 0, +// CHECK: call void @dx.op.waveMatrix_ScalarOp.[[OLOAD]](i32 235, %dx.types.waveMatrix* nonnull %[[wmrr]], i8 1, + testScalar(rightrow); + +// CHECK: call void @dx.op.waveMatrix_ScalarOp.[[OLOAD]](i32 235, %dx.types.waveMatrix* nonnull %[[wma]], i8 2, +// CHECK: call void @dx.op.waveMatrix_ScalarOp.[[OLOAD]](i32 235, %dx.types.waveMatrix* nonnull %[[wma]], i8 3, +// CHECK: call void @dx.op.waveMatrix_ScalarOp.[[OLOAD]](i32 235, %dx.types.waveMatrix* nonnull %[[wma]], i8 0, +// CHECK: call void @dx.op.waveMatrix_ScalarOp.[[OLOAD]](i32 235, %dx.types.waveMatrix* nonnull %[[wma]], i8 1, + testScalar(acc); +} diff --git a/tools/clang/test/HLSLFileCheck/hlsl/objects/WaveMatrix/WaveMatrix_SumAccumulate-acc.hlsl b/tools/clang/test/HLSLFileCheck/hlsl/objects/WaveMatrix/WaveMatrix_SumAccumulate-acc.hlsl new file mode 100644 index 0000000000..1c3f2f94a8 --- /dev/null +++ b/tools/clang/test/HLSLFileCheck/hlsl/objects/WaveMatrix/WaveMatrix_SumAccumulate-acc.hlsl @@ -0,0 +1,64 @@ +// RUN: %dxc -enable-16bit-types -T cs_6_8 -DCOMP=float -DCOMP_IN=float -DDIMM=16 -DDIMN=16 %s | FileCheck %s -DCOMP=9 -DCOMP_IN=9 -DDIMM=16 -DDIMN=16 -DOLOAD=f32 +// RUN: %dxc -enable-16bit-types -T cs_6_8 -DCOMP=float -DCOMP_IN=float16_t -DDIMM=16 -DDIMN=16 %s | FileCheck %s -DCOMP=9 -DCOMP_IN=8 -DDIMM=16 -DDIMN=16 -DOLOAD=f32 +// RUN: %dxc -enable-16bit-types -T cs_6_8 -DCOMP=float16_t -DCOMP_IN=float16_t -DDIMM=16 -DDIMN=16 %s | FileCheck %s -DCOMP=8 -DCOMP_IN=8 -DDIMM=16 -DDIMN=16 -DOLOAD=f16 +// RUN: %dxc -enable-16bit-types -T cs_6_8 -DCOMP=int -DCOMP_IN=int8_t4_packed -DDIMM=16 -DDIMN=16 %s | FileCheck %s -DCOMP=4 -DCOMP_IN=17 -DDIMM=16 -DDIMN=16 -DOLOAD=i32 +// RUN: %dxc -enable-16bit-types -T cs_6_8 -DCOMP=int -DCOMP_IN=uint8_t4_packed -DDIMM=16 -DDIMN=16 %s | FileCheck %s -DCOMP=4 -DCOMP_IN=18 -DDIMM=16 -DDIMN=16 -DOLOAD=i32 +// RUN: %dxc -enable-16bit-types -T cs_6_8 -DCOMP=float -DCOMP_IN=float -DDIMM=64 -DDIMN=16 %s | FileCheck %s -DCOMP=9 -DCOMP_IN=9 -DDIMM=64 -DDIMN=16 -DOLOAD=f32 +// RUN: %dxc -enable-16bit-types -T cs_6_8 -DCOMP=float -DCOMP_IN=float -DDIMM=16 -DDIMN=64 %s | FileCheck %s -DCOMP=9 -DCOMP_IN=9 -DDIMM=16 -DDIMN=64 -DOLOAD=f32 +// RUN: %dxc -enable-16bit-types -T cs_6_8 -DCOMP=float -DCOMP_IN=float -DDIMM=64 -DDIMN=64 %s | FileCheck %s -DCOMP=9 -DCOMP_IN=9 -DDIMM=64 -DDIMN=64 -DOLOAD=f32 + +// CHECK: ; Note: shader requires additional functionality: +// CHECK: ; Wave level operations +// CHECK: ; Wave Matrix + +// CHECK: define void @main() + +#ifndef COMP +#define COMP float +#endif +#ifndef COMP_IN +#define COMP float +#endif +#ifndef DIMM +#define DIMM 16 +#endif +#ifndef DIMN +#define DIMN 16 +#endif +#ifndef STRIDE +#define STRIDE 64 +#endif +#ifndef WAVESIZE +#define WAVESIZE +#endif +#ifndef NUMTHREADS +#define NUMTHREADS [NumThreads(64,1,1)] +#endif + +#define WML WaveMatrixLeft +#define WMR WaveMatrixRight +#define WMLC WaveMatrixLeftColAcc +#define WMRR WaveMatrixRightRowAcc + +WAVESIZE +NUMTHREADS +void main(uint3 gtid : SV_GroupThreadID, uint gidx : SV_GroupIndex) +{ +// CHECK: %[[wml:.*]] = alloca %dx.types.waveMatrix, align 4 +// CHECK: %[[wmr:.*]] = alloca %dx.types.waveMatrix, align 4 +// CHECK: %[[wmlc:.*]] = alloca %dx.types.waveMatrix, align 4 +// CHECK: %[[wmrr:.*]] = alloca %dx.types.waveMatrix, align 4 +// CHECK: call void @dx.op.waveMatrix_Annotate(i32 226, %dx.types.waveMatrix* nonnull %[[wml]], %dx.types.waveMatProps { i8 0, i8 [[COMP_IN]], i32 [[DIMM]], i32 [[DIMN]] }) +// CHECK: call void @dx.op.waveMatrix_Annotate(i32 226, %dx.types.waveMatrix* nonnull %[[wmr]], %dx.types.waveMatProps { i8 1, i8 [[COMP_IN]], i32 [[DIMM]], i32 [[DIMN]] }) +// CHECK: call void @dx.op.waveMatrix_Annotate(i32 226, %dx.types.waveMatrix* nonnull %[[wmlc]], %dx.types.waveMatProps { i8 2, i8 [[COMP]], i32 [[DIMM]], i32 [[DIMN]] }) +// CHECK: call void @dx.op.waveMatrix_Annotate(i32 226, %dx.types.waveMatrix* nonnull %[[wmrr]], %dx.types.waveMatProps { i8 3, i8 [[COMP]], i32 [[DIMM]], i32 [[DIMN]] }) + WML left; + WMR right; + WMLC leftcol; + WMRR rightrow; + +// CHECK: call void @dx.op.waveMatrix_Accumulate(i32 236, %dx.types.waveMatrix* nonnull %[[wmlc]], %dx.types.waveMatrix* nonnull %[[wml]]) +// CHECK: call void @dx.op.waveMatrix_Accumulate(i32 236, %dx.types.waveMatrix* nonnull %[[wmrr]], %dx.types.waveMatrix* nonnull %[[wmr]]) + leftcol.SumAccumulate(left); + rightrow.SumAccumulate(right); +} diff --git a/tools/clang/test/HLSLFileCheck/hlsl/workgraph/ast-nodeinput.hlsl b/tools/clang/test/HLSLFileCheck/hlsl/workgraph/ast-nodeinput.hlsl new file mode 100644 index 0000000000..a0f2630919 --- /dev/null +++ b/tools/clang/test/HLSLFileCheck/hlsl/workgraph/ast-nodeinput.hlsl @@ -0,0 +1,101 @@ +// RUN: %dxc -T lib_6_8 -ast-dump-implicit %s | FileCheck %s +// This test verifies the AST of the {Dispatch|Group|Thread}NodeInputRecord{s} types. The +// source doesn't matter except that it forces a use to ensure the AST is fully +// loaded by the external sema source. + +struct RECORD +{ + int X; +}; + +[Shader("node")] +[NodeLaunch("Broadcasting")] +[NumThreads(1024,1,1)] +void node05(DispatchNodeInputRecord input) {} + +//CHECK: ClassTemplateDecl {{0x[0-9a-fA-F]+}} <> implicit DispatchNodeInputRecord +//CHECK-NEXT: TemplateTypeParmDecl {{0x[0-9a-fA-F]+}} <> class recordtype +//CHECK-NEXT: CXXRecordDecl {{0x[0-9a-fA-F]+}} <> implicit struct DispatchNodeInputRecord definition +//CHECK-NEXT: FinalAttr {{0x[0-9a-fA-F]+}} <> Implicit final +//CHECK-NEXT: FieldDecl {{0x[0-9a-fA-F]+}} <> implicit h 'int' +//CHECK-NEXT: CXXMethodDecl {{0x[0-9a-fA-F]+}} <> Get 'const recordtype &() const' +//CHECK-NEXT: HLSLIntrinsicAttr {{0x[0-9a-fA-F]+}} <> Implicit "op" "ExtractRecordStructFromArray" 18 +//CHECK-NEXT: HLSLCXXOverloadAttr {{0x[0-9a-fA-F]+}} <> Implicit +//CHECK-NEXT: ClassTemplateSpecializationDecl {{0x[0-9a-fA-F]+}} <> struct DispatchNodeInputRecord definition +//CHECK-NEXT: TemplateArgument type 'RECORD' +//CHECK-NEXT: FinalAttr {{0x[0-9a-fA-F]+}} <> Implicit final +//CHECK-NEXT: FieldDecl {{0x[0-9a-fA-F]+}} <> implicit h 'int' +//CHECK-NEXT: CXXMethodDecl {{0x[0-9a-fA-F]+}} <> Get 'const RECORD &() const' +//CHECK-NEXT: HLSLIntrinsicAttr {{0x[0-9a-fA-F]+}} <> Implicit "op" "ExtractRecordStructFromArray" 18 +//CHECK-NEXT: HLSLCXXOverloadAttr {{0x[0-9a-fA-F]+}} <> Implicit + +[Shader("node")] +[NodeLaunch("Coalescing")] +[NumThreads(1024,1,1)] +void node06(GroupNodeInputRecords input) {} + +//CHECK: ClassTemplateDecl {{0x[0-9a-fA-F]+}} <> implicit GroupNodeInputRecords +//CHECK-NEXT: TemplateTypeParmDecl {{0x[0-9a-fA-F]+}} <> class recordtype +//CHECK-NEXT: CXXRecordDecl {{0x[0-9a-fA-F]+}} <> implicit struct GroupNodeInputRecords +//CHECK-NEXT: FinalAttr {{0x[0-9a-fA-F]+}} <> Implicit final +//CHECK-NEXT: FieldDecl {{0x[0-9a-fA-F]+}} <> implicit h 'int' +//CHECK-NEXT: CXXMethodDecl {{0x[0-9a-fA-F]+}} <> Get 'const recordtype &(unsigned int) const' +//CHECK-NEXT: ParmVarDecl {{0x[0-9a-fA-F]+}} <> Index 'unsigned int' cinit +//CHECK-NEXT: IntegerLiteral {{0x[0-9a-fA-F]+}} <> 'unsigned int' 0 +//CHECK-NEXT: HLSLIntrinsicAttr {{0x[0-9a-fA-F]+}} <> Implicit "op" "ExtractRecordStructFromArray" 18 +//CHECK-NEXT: HLSLCXXOverloadAttr {{0x[0-9a-fA-F]+}} <> Implicit +//CHECK-NEXT: CXXMethodDecl {{0x[0-9a-fA-F]+}} <> operator[] 'const recordtype &(unsigned int) const' +//CHECK-NEXT: ParmVarDecl {{0x[0-9a-fA-F]+}} <> Index 'unsigned int' +//CHECK-NEXT: HLSLIntrinsicAttr {{0x[0-9a-fA-F]+}} <> Implicit "op" "ExtractRecordStructFromArray" 18 +//CHECK-NEXT: HLSLCXXOverloadAttr {{0x[0-9a-fA-F]+}} <> Implicit + +//CHECK-NEXT: FunctionTemplateDecl {{0x[0-9a-fA-F]+}} <> operator[] +//CHECK-NEXT: TemplateTypeParmDecl {{0x[0-9a-fA-F]+}} <> class recordtype +//CHECK-NEXT: CXXMethodDecl {{0x[0-9a-fA-F]+}} <> operator[] 'const recordtype &(unsigned int) const' +//CHECK-NEXT: ParmVarDecl {{0x[0-9a-fA-F]+}} <> index 'unsigned int' +//CHECK-NEXT: HLSLCXXOverloadAttr {{0x[0-9a-fA-F]+}} <> Implicit +//CHECK-NEXT: FunctionTemplateDecl {{0x[0-9a-fA-F]+}} <> Count +//CHECK-NEXT: TemplateTypeParmDecl {{0x[0-9a-fA-F]+}} <> class TResult +//CHECK-NEXT: CXXMethodDecl {{0x[0-9a-fA-F]+}} <> implicit Count 'TResult () const' + +//CHECK-NEXT: ClassTemplateSpecializationDecl {{0x[0-9a-fA-F]+}} <> struct GroupNodeInputRecords definition +//CHECK-NEXT: TemplateArgument type 'RECORD' +//CHECK-NEXT: FinalAttr {{0x[0-9a-fA-F]+}} <> Implicit final +//CHECK-NEXT: FieldDecl {{0x[0-9a-fA-F]+}} <> implicit h 'int' +//CHECK-NEXT: CXXMethodDecl {{0x[0-9a-fA-F]+}} <> Get 'const RECORD &(unsigned int) const' +//CHECK-NEXT: ParmVarDecl {{0x[0-9a-fA-F]+}} <> Index 'unsigned int' +//CHECK-NEXT: HLSLIntrinsicAttr {{0x[0-9a-fA-F]+}} <> Implicit "op" "ExtractRecordStructFromArray" 18 +//CHECK-NEXT: HLSLCXXOverloadAttr {{0x[0-9a-fA-F]+}} <> Implicit +//CHECK-NEXT: CXXMethodDecl {{0x[0-9a-fA-F]+}} <> operator[] 'const RECORD &(unsigned int) const' +//CHECK-NEXT: ParmVarDecl {{0x[0-9a-fA-F]+}} <> Index 'unsigned int' +//CHECK-NEXT: HLSLIntrinsicAttr {{0x[0-9a-fA-F]+}} <> Implicit "op" "ExtractRecordStructFromArray" 18 +//CHECK-NEXT: HLSLCXXOverloadAttr {{0x[0-9a-fA-F]+}} <> Implicit + +//CHECK-NEXT: FunctionTemplateDecl {{0x[0-9a-fA-F]+}} <> operator[] +//CHECK-NEXT: TemplateTypeParmDecl {{0x[0-9a-fA-F]+}} <> class recordtype +//CHECK-NEXT: CXXMethodDecl {{0x[0-9a-fA-F]+}} <> operator[] 'const recordtype &(unsigned int) const' +//CHECK-NEXT: ParmVarDecl {{0x[0-9a-fA-F]+}} <> index 'unsigned int' +//CHECK-NEXT: HLSLCXXOverloadAttr {{0x[0-9a-fA-F]+}} <> Implicit +//CHECK-NEXT: FunctionTemplateDecl {{0x[0-9a-fA-F]+}} <> Count +//CHECK-NEXT: TemplateTypeParmDecl {{0x[0-9a-fA-F]+}} <> class TResult +//CHECK-NEXT: CXXMethodDecl {{0x[0-9a-fA-F]+}} <> Count 'TResult () const' + +[Shader("node")] +[NodeLaunch("Thread")] +void node06(ThreadNodeInputRecord input) {} + +//CHECK: ClassTemplateDecl {{0x[0-9a-fA-F]+}} <> implicit ThreadNodeInputRecord +//CHECK-NEXT: TemplateTypeParmDecl {{0x[0-9a-fA-F]+}} <> class recordtype +//CHECK-NEXT: CXXRecordDecl {{0x[0-9a-fA-F]+}} <> implicit struct ThreadNodeInputRecord definition +//CHECK-NEXT: FinalAttr {{0x[0-9a-fA-F]+}} <> Implicit final +//CHECK-NEXT: FieldDecl {{0x[0-9a-fA-F]+}} <> implicit h 'int' +//CHECK-NEXT: CXXMethodDecl {{0x[0-9a-fA-F]+}} <> Get 'const recordtype &() const' +//CHECK-NEXT: HLSLIntrinsicAttr {{0x[0-9a-fA-F]+}} <> Implicit "op" "ExtractRecordStructFromArray" 18 +//CHECK-NEXT: HLSLCXXOverloadAttr {{0x[0-9a-fA-F]+}} <> Implicit +//CHECK-NEXT: ClassTemplateSpecializationDecl {{0x[0-9a-fA-F]+}} <> struct ThreadNodeInputRecord definition +//CHECK-NEXT: TemplateArgument type 'RECORD' +//CHECK-NEXT: FinalAttr {{0x[0-9a-fA-F]+}} <> Implicit final +//CHECK-NEXT: FieldDecl {{0x[0-9a-fA-F]+}} <> implicit h 'int' +//CHECK-NEXT: CXXMethodDecl {{0x[0-9a-fA-F]+}} <> Get 'const RECORD &() const' +//CHECK-NEXT: HLSLIntrinsicAttr {{0x[0-9a-fA-F]+}} <> Implicit "op" "ExtractRecordStructFromArray" 18 +//CHECK-NEXT: HLSLCXXOverloadAttr {{0x[0-9a-fA-F]+}} <> Implicit diff --git a/tools/clang/test/HLSLFileCheck/hlsl/workgraph/ast-nodeoutput.hlsl b/tools/clang/test/HLSLFileCheck/hlsl/workgraph/ast-nodeoutput.hlsl new file mode 100644 index 0000000000..9bc7d056d3 --- /dev/null +++ b/tools/clang/test/HLSLFileCheck/hlsl/workgraph/ast-nodeoutput.hlsl @@ -0,0 +1,150 @@ +// RUN: %dxc -T lib_6_8 -ast-dump-implicit %s | FileCheck %s +// This test verifies the AST of the GroupNodeOutputRecords and +// ThreadNodeOutputRecords types. The source doesn't matter except +// that it forces a use to ensure the AST is fully loaded by the +// external sema source. + +struct RECORD +{ + int X; +}; + + +[Shader("node")] +[NumThreads(1024,1,1)] +[NodeLaunch("Broadcasting")] +void node02(NodeOutput output) +{ + GroupNodeOutputRecords outrec = output.GetGroupNodeOutputRecords(1); +} + +//CHECK: ClassTemplateDecl {{0x[0-9a-fA-F]+}} <> implicit GroupNodeOutputRecords +//CHECK-NEXT: TemplateTypeParmDecl {{0x[0-9a-fA-F]+}} <> class recordType +//CHECK-NEXT: CXXRecordDecl {{0x[0-9a-fA-F]+}} <> implicit struct GroupNodeOutputRecords definition +//CHECK-NEXT: FinalAttr {{0x[0-9a-fA-F]+}} <> Implicit final +//CHECK-NEXT: FieldDecl {{0x[0-9a-fA-F]+}} <> implicit h 'int' +//CHECK-NEXT: CXXMethodDecl {{0x[0-9a-fA-F]+}} <> operator[] 'recordType &(unsigned int)' +//CHECK-NEXT: ParmVarDecl {{0x[0-9a-fA-F]+}} <> Index 'unsigned int' +//CHECK-NEXT: HLSLIntrinsicAttr {{0x[0-9a-fA-F]+}} <> Implicit "op" "ExtractRecordStructFromArray" 18 +//CHECK-NEXT: HLSLCXXOverloadAttr {{0x[0-9a-fA-F]+}} <> Implicit +//CHECK-NEXT: CXXMethodDecl {{0x[0-9a-fA-F]+}} <> operator[] 'const recordType &(unsigned int) const' +//CHECK-NEXT: ParmVarDecl {{0x[0-9a-fA-F]+}} <> Index 'unsigned int' +//CHECK-NEXT: HLSLIntrinsicAttr {{0x[0-9a-fA-F]+}} <> Implicit "op" "ExtractRecordStructFromArray" 18 +//CHECK-NEXT: HLSLCXXOverloadAttr {{0x[0-9a-fA-F]+}} <> Implicit +//CHECK-NEXT: CXXMethodDecl {{0x[0-9a-fA-F]+}} <> Get 'recordType &(unsigned int)' +//CHECK-NEXT: ParmVarDecl {{0x[0-9a-fA-F]+}} <> Index 'unsigned int' cinit +//CHECK-NEXT: IntegerLiteral {{0x[0-9a-fA-F]+}} <> 'unsigned int' 0 +//CHECK-NEXT: HLSLIntrinsicAttr {{0x[0-9a-fA-F]+}} <> Implicit "op" "ExtractRecordStructFromArray" +//CHECK-NEXT: HLSLCXXOverloadAttr {{0x[0-9a-fA-F]+}} <> Implicit +//CHECK-NEXT: CXXMethodDecl {{0x[0-9a-fA-F]+}} <> Get 'const recordType &(unsigned int) const' +//CHECK-NEXT: ParmVarDecl {{0x[0-9a-fA-F]+}} <> Index 'unsigned int' cinit +//CHECK-NEXT: IntegerLiteral {{0x[0-9a-fA-F]+}} <> 'unsigned int' 0 +//CHECK-NEXT: HLSLIntrinsicAttr {{0x[0-9a-fA-F]+}} <> Implicit "op" "ExtractRecordStructFromArray" +//CHECK-NEXT: HLSLCXXOverloadAttr {{0x[0-9a-fA-F]+}} <> Implicit +//CHECK-NEXT: FunctionTemplateDecl {{0x[0-9a-fA-F]+}} <> operator[] +//CHECK-NEXT: TemplateTypeParmDecl {{0x[0-9a-fA-F]+}} <> class recordType +//CHECK-NEXT: CXXMethodDecl {{0x[0-9a-fA-F]+}} <> operator[] 'recordType &(unsigned int) const' +//CHECK-NEXT: ParmVarDecl {{0x[0-9a-fA-F]+}} <> index 'unsigned int' +//CHECK-NEXT: HLSLCXXOverloadAttr {{0x[0-9a-fA-F]+}} <> Implicit +//CHECK-NEXT: FunctionTemplateDecl {{0x[0-9a-fA-F]+}} <> OutputComplete +//CHECK-NEXT: TemplateTypeParmDecl {{0x[0-9a-fA-F]+}} <> class TResult +//CHECK-NEXT: CXXMethodDecl {{0x[0-9a-fA-F]+}} <> implicit OutputComplete 'TResult () const' +//CHECK-NEXT: ClassTemplateSpecializationDecl {{0x[0-9a-fA-F]+}} <> struct GroupNodeOutputRecords definition +//CHECK-NEXT: TemplateArgument type 'RECORD' +//CHECK-NEXT: FinalAttr {{0x[0-9a-fA-F]+}} <> Implicit final +//CHECK-NEXT: FieldDecl {{0x[0-9a-fA-F]+}} <> implicit h 'int' +//CHECK-NEXT: CXXMethodDecl {{0x[0-9a-fA-F]+}} <> operator[] 'RECORD &(unsigned int)' +//CHECK-NEXT: ParmVarDecl {{0x[0-9a-fA-F]+}} <> Index 'unsigned int' +//CHECK-NEXT: HLSLIntrinsicAttr {{0x[0-9a-fA-F]+}} <> Implicit "op" "ExtractRecordStructFromArray" 18 +//CHECK-NEXT: HLSLCXXOverloadAttr {{0x[0-9a-fA-F]+}} <> Implicit +//CHECK-NEXT: CXXMethodDecl {{0x[0-9a-fA-F]+}} <> operator[] 'const RECORD &(unsigned int) const' +//CHECK-NEXT: ParmVarDecl {{0x[0-9a-fA-F]+}} <> Index 'unsigned int' +//CHECK-NEXT: HLSLIntrinsicAttr {{0x[0-9a-fA-F]+}} <> Implicit "op" "ExtractRecordStructFromArray" 18 +//CHECK-NEXT: HLSLCXXOverloadAttr {{0x[0-9a-fA-F]+}} <> Implicit +//CHECK-NEXT: CXXMethodDecl {{0x[0-9a-fA-F]+}} <> Get 'RECORD &(unsigned int)' +//CHECK-NEXT: ParmVarDecl {{0x[0-9a-fA-F]+}} <> Index 'unsigned int' +//CHECK-NEXT: HLSLIntrinsicAttr {{0x[0-9a-fA-F]+}} <> Implicit "op" "ExtractRecordStructFromArray" 18 +//CHECK-NEXT: HLSLCXXOverloadAttr {{0x[0-9a-fA-F]+}} <> Implicit +//CHECK-NEXT: CXXMethodDecl {{0x[0-9a-fA-F]+}} <> Get 'const RECORD &(unsigned int) const' +//CHECK-NEXT: ParmVarDecl {{0x[0-9a-fA-F]+}} <> Index 'unsigned int' +//CHECK-NEXT: HLSLIntrinsicAttr {{0x[0-9a-fA-F]+}} <> Implicit "op" "ExtractRecordStructFromArray" 18 +//CHECK-NEXT: HLSLCXXOverloadAttr {{0x[0-9a-fA-F]+}} <> Implicit +//CHECK-NEXT: FunctionTemplateDecl {{0x[0-9a-fA-F]+}} <> operator[] +//CHECK-NEXT: TemplateTypeParmDecl {{0x[0-9a-fA-F]+}} <> class recordType +//CHECK-NEXT: CXXMethodDecl {{0x[0-9a-fA-F]+}} <> operator[] 'recordType &(unsigned int) const' +//CHECK-NEXT: ParmVarDecl {{0x[0-9a-fA-F]+}} <> index 'unsigned int' +//CHECK-NEXT: HLSLCXXOverloadAttr {{0x[0-9a-fA-F]+}} <> Implicit +//CHECK-NEXT: FunctionTemplateDecl {{0x[0-9a-fA-F]+}} <> OutputComplete +//CHECK-NEXT: TemplateTypeParmDecl {{0x[0-9a-fA-F]+}} <> class TResult +//CHECK-NEXT: CXXMethodDecl {{0x[0-9a-fA-F]+}} <> OutputComplete 'TResult () const' +//CHECK-NEXT: CXXDestructorDecl {{0x[0-9a-fA-F]+}} <> implicit referenced ~GroupNodeOutputRecords 'void () noexcept' inline + +[Shader("node")] +[NumThreads(1024,1,1)] +[NodeLaunch("Broadcasting")] +void node03(NodeOutput output) +{ + ThreadNodeOutputRecords outrec = output.GetThreadNodeOutputRecords(1); +} + +//CHECK: ClassTemplateDecl {{0x[0-9a-fA-F]+}} <> implicit ThreadNodeOutputRecords +//CHECK-NEXT: TemplateTypeParmDecl {{0x[0-9a-fA-F]+}} <> class recordType +//CHECK-NEXT: CXXRecordDecl {{0x[0-9a-fA-F]+}} <> implicit struct ThreadNodeOutputRecords definition +//CHECK-NEXT: FinalAttr {{0x[0-9a-fA-F]+}} <> Implicit final +//CHECK-NEXT: FieldDecl {{0x[0-9a-fA-F]+}} <> implicit h 'int' +//CHECK-NEXT: CXXMethodDecl {{0x[0-9a-fA-F]+}} <> operator[] 'recordType &(unsigned int)' +//CHECK-NEXT: ParmVarDecl {{0x[0-9a-fA-F]+}} <> Index 'unsigned int' +//CHECK-NEXT: HLSLIntrinsicAttr {{0x[0-9a-fA-F]+}} <> Implicit "op" "ExtractRecordStructFromArray" 18 +//CHECK-NEXT: HLSLCXXOverloadAttr {{0x[0-9a-fA-F]+}} <> Implicit +//CHECK-NEXT: CXXMethodDecl {{0x[0-9a-fA-F]+}} <> operator[] 'const recordType &(unsigned int) const' +//CHECK-NEXT: ParmVarDecl {{0x[0-9a-fA-F]+}} <> Index 'unsigned int' +//CHECK-NEXT: HLSLIntrinsicAttr {{0x[0-9a-fA-F]+}} <> Implicit "op" "ExtractRecordStructFromArray" 18 +//CHECK-NEXT: HLSLCXXOverloadAttr {{0x[0-9a-fA-F]+}} <> Implicit +//CHECK-NEXT: CXXMethodDecl {{0x[0-9a-fA-F]+}} <> Get 'recordType &(unsigned int)' +//CHECK-NEXT: ParmVarDecl {{0x[0-9a-fA-F]+}} <> Index 'unsigned int' cinit +//CHECK-NEXT: IntegerLiteral {{0x[0-9a-fA-F]+}} <> 'unsigned int' 0 +//CHECK-NEXT: HLSLIntrinsicAttr {{0x[0-9a-fA-F]+}} <> Implicit "op" "ExtractRecordStructFromArray" +//CHECK-NEXT: HLSLCXXOverloadAttr {{0x[0-9a-fA-F]+}} <> Implicit +//CHECK-NEXT: CXXMethodDecl {{0x[0-9a-fA-F]+}} <> Get 'const recordType &(unsigned int) const' +//CHECK-NEXT: ParmVarDecl {{0x[0-9a-fA-F]+}} <> Index 'unsigned int' cinit +//CHECK-NEXT: IntegerLiteral {{0x[0-9a-fA-F]+}} <> 'unsigned int' 0 +//CHECK-NEXT: HLSLIntrinsicAttr {{0x[0-9a-fA-F]+}} <> Implicit "op" "ExtractRecordStructFromArray" +//CHECK-NEXT: HLSLCXXOverloadAttr {{0x[0-9a-fA-F]+}} <> Implicit +//CHECK-NEXT: FunctionTemplateDecl {{0x[0-9a-fA-F]+}} <> operator[] +//CHECK-NEXT: TemplateTypeParmDecl {{0x[0-9a-fA-F]+}} <> class recordType +//CHECK-NEXT: CXXMethodDecl {{0x[0-9a-fA-F]+}} <> operator[] 'recordType &(unsigned int) const' +//CHECK-NEXT: ParmVarDecl {{0x[0-9a-fA-F]+}} <> index 'unsigned int' +//CHECK-NEXT: HLSLCXXOverloadAttr {{0x[0-9a-fA-F]+}} <> Implicit +//CHECK-NEXT: FunctionTemplateDecl {{0x[0-9a-fA-F]+}} <> OutputComplete +//CHECK-NEXT: TemplateTypeParmDecl {{0x[0-9a-fA-F]+}} <> class TResult +//CHECK-NEXT: CXXMethodDecl {{0x[0-9a-fA-F]+}} <> implicit OutputComplete 'TResult () const' +//CHECK-NEXT: ClassTemplateSpecializationDecl {{0x[0-9a-fA-F]+}} <> struct ThreadNodeOutputRecords definition +//CHECK-NEXT: TemplateArgument type 'RECORD' +//CHECK-NEXT: FinalAttr {{0x[0-9a-fA-F]+}} <> Implicit final +//CHECK-NEXT: FieldDecl {{0x[0-9a-fA-F]+}} <> implicit h 'int' +//CHECK-NEXT: CXXMethodDecl {{0x[0-9a-fA-F]+}} <> operator[] 'RECORD &(unsigned int)' +//CHECK-NEXT: ParmVarDecl {{0x[0-9a-fA-F]+}} <> Index 'unsigned int' +//CHECK-NEXT: HLSLIntrinsicAttr {{0x[0-9a-fA-F]+}} <> Implicit "op" "ExtractRecordStructFromArray" 18 +//CHECK-NEXT: HLSLCXXOverloadAttr {{0x[0-9a-fA-F]+}} <> Implicit +//CHECK-NEXT: CXXMethodDecl {{0x[0-9a-fA-F]+}} <> operator[] 'const RECORD &(unsigned int) const' +//CHECK-NEXT: ParmVarDecl {{0x[0-9a-fA-F]+}} <> Index 'unsigned int' +//CHECK-NEXT: HLSLIntrinsicAttr {{0x[0-9a-fA-F]+}} <> Implicit "op" "ExtractRecordStructFromArray" 18 +//CHECK-NEXT: HLSLCXXOverloadAttr {{0x[0-9a-fA-F]+}} <> Implicit +//CHECK-NEXT: CXXMethodDecl {{0x[0-9a-fA-F]+}} <> Get 'RECORD &(unsigned int)' +//CHECK-NEXT: ParmVarDecl {{0x[0-9a-fA-F]+}} <> Index 'unsigned int' +//CHECK-NEXT: HLSLIntrinsicAttr {{0x[0-9a-fA-F]+}} <> Implicit "op" "ExtractRecordStructFromArray" 18 +//CHECK-NEXT: HLSLCXXOverloadAttr {{0x[0-9a-fA-F]+}} <> Implicit +//CHECK-NEXT: CXXMethodDecl {{0x[0-9a-fA-F]+}} <> Get 'const RECORD &(unsigned int) const' +//CHECK-NEXT: ParmVarDecl {{0x[0-9a-fA-F]+}} <> Index 'unsigned int' +//CHECK-NEXT: HLSLIntrinsicAttr {{0x[0-9a-fA-F]+}} <> Implicit "op" "ExtractRecordStructFromArray" 18 +//CHECK-NEXT: HLSLCXXOverloadAttr {{0x[0-9a-fA-F]+}} <> Implicit +//CHECK-NEXT: FunctionTemplateDecl {{0x[0-9a-fA-F]+}} <> operator[] +//CHECK-NEXT: TemplateTypeParmDecl {{0x[0-9a-fA-F]+}} <> class recordType +//CHECK-NEXT: CXXMethodDecl {{0x[0-9a-fA-F]+}} <> operator[] 'recordType &(unsigned int) const' +//CHECK-NEXT: ParmVarDecl {{0x[0-9a-fA-F]+}} <> index 'unsigned int' +//CHECK-NEXT: HLSLCXXOverloadAttr {{0x[0-9a-fA-F]+}} <> Implicit +//CHECK-NEXT: FunctionTemplateDecl {{0x[0-9a-fA-F]+}} <> OutputComplete +//CHECK-NEXT: TemplateTypeParmDecl {{0x[0-9a-fA-F]+}} <> class TResult +//CHECK-NEXT: CXXMethodDecl {{0x[0-9a-fA-F]+}} <> OutputComplete 'TResult () const' +//CHECK-NEXT: CXXDestructorDecl {{0x[0-9a-fA-F]+}} <> implicit referenced ~ThreadNodeOutputRecords 'void () noexcept' inline + diff --git a/tools/clang/test/HLSLFileCheck/hlsl/workgraph/ast-rwnodeinput.hlsl b/tools/clang/test/HLSLFileCheck/hlsl/workgraph/ast-rwnodeinput.hlsl new file mode 100644 index 0000000000..ac408eae41 --- /dev/null +++ b/tools/clang/test/HLSLFileCheck/hlsl/workgraph/ast-rwnodeinput.hlsl @@ -0,0 +1,139 @@ +// RUN: %dxc -T lib_6_8 -ast-dump-implicit %s FileCheck %s +// This test verifies the AST of the RW{Dispatch|Group|Thread}NodeInputRecord{s} types. +// The source doesn't matter except that it forces a use to ensure the AST is fully +// loaded by the external sema source. + +struct RECORD +{ + int X; +}; + +[Shader("node")] +[NodeLaunch("Broadcasting")] +[NumThreads(1024,1,1)] +void node05(RWDispatchNodeInputRecord input) {} + +//CHECK: ClassTemplateDecl {{0x[0-9a-fA-F]+}} <> implicit RWDispatchNodeInputRecord +//CHECK-NEXT: TemplateTypeParmDecl {{0x[0-9a-fA-F]+}} <> class recordtype +//CHECK-NEXT: CXXRecordDecl {{0x[0-9a-fA-F]+}} <> implicit struct RWDispatchNodeInputRecord definition +//CHECK-NEXT: FinalAttr {{0x[0-9a-fA-F]+}} <> Implicit final +//CHECK-NEXT: FieldDecl {{0x[0-9a-fA-F]+}} <> implicit h 'int' +//CHECK-NEXT: CXXMethodDecl {{0x[0-9a-fA-F]+}} <> Get 'recordtype &()' +//CHECK-NEXT: HLSLIntrinsicAttr {{0x[0-9a-fA-F]+}} <> Implicit "op" "ExtractRecordStructFromArray" 18 +//CHECK-NEXT: HLSLCXXOverloadAttr {{0x[0-9a-fA-F]+}} <> Implicit +//CHECK-NEXT: CXXMethodDecl {{0x[0-9a-fA-F]+}} <> Get 'const recordtype &() const' +//CHECK-NEXT: HLSLIntrinsicAttr {{0x[0-9a-fA-F]+}} <> Implicit "op" "ExtractRecordStructFromArray" 18 +//CHECK-NEXT: HLSLCXXOverloadAttr {{0x[0-9a-fA-F]+}} <> Implicit +//CHECK-NEXT: FunctionTemplateDecl {{0x[0-9a-fA-F]+}} <> FinishedCrossGroupSharing +//CHECK-NEXT: TemplateTypeParmDecl {{0x[0-9a-fA-F]+}} <> class TResult +//CHECK-NEXT: CXXMethodDecl {{0x[0-9a-fA-F]+}} <> implicit FinishedCrossGroupSharing 'TResult () const' +//CHECK-NEXT: ClassTemplateSpecializationDecl {{0x[0-9a-fA-F]+}} <> struct RWDispatchNodeInputRecord definition +//CHECK-NEXT: TemplateArgument type 'RECORD' +//CHECK-NEXT: FinalAttr {{0x[0-9a-fA-F]+}} <> Implicit final +//CHECK-NEXT: FieldDecl {{0x[0-9a-fA-F]+}} <> implicit h 'int' +//CHECK-NEXT: CXXMethodDecl {{0x[0-9a-fA-F]+}} <> Get 'RECORD &()' +//CHECK-NEXT: HLSLIntrinsicAttr {{0x[0-9a-fA-F]+}} <> Implicit "op" "ExtractRecordStructFromArray" 18 +//CHECK-NEXT: HLSLCXXOverloadAttr {{0x[0-9a-fA-F]+}} <> Implicit +//CHECK-NEXT: CXXMethodDecl {{0x[0-9a-fA-F]+}} <> Get 'const RECORD &() const' +//CHECK-NEXT: HLSLIntrinsicAttr {{0x[0-9a-fA-F]+}} <> Implicit "op" "ExtractRecordStructFromArray" 18 +//CHECK-NEXT: HLSLCXXOverloadAttr {{0x[0-9a-fA-F]+}} <> Implicit +//CHECK-NEXT: FunctionTemplateDecl {{0x[0-9a-fA-F]+}} <> FinishedCrossGroupSharing +//CHECK-NEXT: TemplateTypeParmDecl {{0x[0-9a-fA-F]+}} <> class TResult +//CHECK-NEXT: CXXMethodDecl {{0x[0-9a-fA-F]+}} <> FinishedCrossGroupSharing 'TResult () const' + +[Shader("node")] +[NodeLaunch("Coalescing")] +[NumThreads(1024,1,1)] +void node06(RWGroupNodeInputRecords input) {} + +//CHECK: ClssTemplateDecl {{0x[0-9a-fA-F]+}} <> implicit RWGroupNodeInputRecords +//CHECK-NEXT: TemplateTypeParmDecl {{0x[0-9a-fA-F]+}} <> class recordtype +//CHECK-NEXT: CXXRecordDecl {{0x[0-9a-fA-F]+}} <> implicit struct RWGroupNodeInputRecords +//CHECK-NEXT: FinalAttr {{0x[0-9a-fA-F]+}} <> Implicit final +//CHECK-NEXT: FieldDecl {{0x[0-9a-fA-F]+}} <> implicit h 'int' +//CHECK-NEXT: CXXMethodDecl {{0x[0-9a-fA-F]+}} <> Get 'recordtype &(unsigned int)' +//CHECK-NEXT: ParmVarDecl {{0x[0-9a-fA-F]+}} <> Index 'unsigned int' cinit +//CHECK-NEXT: IntegerLiteral {{0x[0-9a-fA-F]+}} <> 'unsigned int' 0 +//CHECK-NEXT: HLSLIntrinsicAttr {{0x[0-9a-fA-F]+}} <> Implicit "op" "ExtractRecordStructFromArray" 18 +//CHECK-NEXT: HLSLCXXOverloadAttr {{0x[0-9a-fA-F]+}} <> Implicit +//CHECK-NEXT: CXXMethodDecl {{0x[0-9a-fA-F]+}} <> Get 'const recordtype &(unsigned int) const' +//CHECK-NEXT: ParmVarDecl {{0x[0-9a-fA-F]+}} <> Index 'unsigned int' cinit +//CHECK-NEXT: IntegerLiteral {{0x[0-9a-fA-F]+}} <> 'unsigned int' 0 +//CHECK-NEXT: HLSLIntrinsicAttr {{0x[0-9a-fA-F]+}} <> Implicit "op" "ExtractRecordStructFromArray" 18 +//CHECK-NEXT: HLSLCXXOverloadAttr {{0x[0-9a-fA-F]+}} <> Implicit +//CHECK-NEXT: CXXMethodDecl {{0x[0-9a-fA-F]+}} <> operator[] 'recordtype &(unsigned int)' +//CHECK-NEXT: ParmVarDecl {{0x[0-9a-fA-F]+}} <> Index 'unsigned int' +//CHECK-NEXT: HLSLIntrinsicAttr {{0x[0-9a-fA-F]+}} <> Implicit "op" "ExtractRecordStructFromArray" 18 +//CHECK-NEXT: HLSLCXXOverloadAttr {{0x[0-9a-fA-F]+}} <> Implicit +//CHECK-NEXT: CXXMethodDecl {{0x[0-9a-fA-F]+}} <> operator[] 'const recordtype &(unsigned int) const' +//CHECK-NEXT: ParmVarDecl {{0x[0-9a-fA-F]+}} <> Index 'unsigned int' +//CHECK-NEXT: HLSLIntrinsicAttr {{0x[0-9a-fA-F]+}} <> Implicit "op" "ExtractRecordStructFromArray" 18 +//CHECK-NEXT: HLSLCXXOverloadAttr {{0x[0-9a-fA-F]+}} <> Implicit +//CHECK-NEXT: FunctionTemplateDecl {{0x[0-9a-fA-F]+}} <> operator[] +//CHECK-NEXT: TemplateTypeParmDecl {{0x[0-9a-fA-F]+}} <> class recordtype +//CHECK-NEXT: CXXMethodDecl {{0x[0-9a-fA-F]+}} <> operator[] 'const recordtype &(unsigned int) const' +//CHECK-NEXT: ParmVarDecl {{0x[0-9a-fA-F]+}} <> index 'unsigned int' +//CHECK-NEXT: HLSLCXXOverloadAttr {{0x[0-9a-fA-F]+}} <> Implicit +//CHECK-NEXT: FunctionTemplateDecl {{0x[0-9a-fA-F]+}} <> Count +//CHECK-NEXT: TemplateTypeParmDecl {{0x[0-9a-fA-F]+}} <> class TResult +//CHECK-NEXT: CXXMethodDecl {{0x[0-9a-fA-F]+}} <> implicit Count 'TResult () const' + +//CHECK-NEXT: ClassTemplateSpecializationDecl {{0x[0-9a-fA-F]+}} <> struct RWGroupNodeInputRecords definition +//CHECK-NEXT: TemplateArgument type 'RECORD' +//CHECK-NEXT: FinalAttr {{0x[0-9a-fA-F]+}} <> Implicit final +//CHECK-NEXT: FieldDecl {{0x[0-9a-fA-F]+}} <> implicit h 'int' +//CHECK-NEXT: CXXMethodDecl {{0x[0-9a-fA-F]+}} <> Get 'recordtype &(unsigned int)' +//CHECK-NEXT: ParmVarDecl {{0x[0-9a-fA-F]+}} <> Index 'unsigned int' cinit +//CHECK-NEXT: IntegerLiteral {{0x[0-9a-fA-F]+}} <> 'unsigned int' 0 +//CHECK-NEXT: CXXMethodDecl {{0x[0-9a-fA-F]+}} <> Get 'const RECORD &(unsigned int) const' +//CHECK-NEXT: ParmVarDecl {{0x[0-9a-fA-F]+}} <> Index 'unsigned int' +//CHECK-NEXT: HLSLIntrinsicAttr {{0x[0-9a-fA-F]+}} <> Implicit "op" "ExtractRecordStructFromArray" 18 +//CHECK-NEXT: HLSLCXXOverloadAttr {{0x[0-9a-fA-F]+}} <> Implicit +//CHECK-NEXT: CXXMethodDecl {{0x[0-9a-fA-F]+}} <> operator[] 'recordtype &(unsigned int)' +//CHECK-NEXT: ParmVarDecl {{0x[0-9a-fA-F]+}} <> Index 'unsigned int' +//CHECK-NEXT: HLSLIntrinsicAttr {{0x[0-9a-fA-F]+}} <> Implicit "op" "ExtractRecordStructFromArray" 18 +//CHECK-NEXT: HLSLCXXOverloadAttr {{0x[0-9a-fA-F]+}} <> Implicit +//CHECK-NEXT: CXXMethodDecl {{0x[0-9a-fA-F]+}} <> operator[] 'const RECORD &(unsigned int) const' +//CHECK-NEXT: ParmVarDecl {{0x[0-9a-fA-F]+}} <> Index 'unsigned int' +//CHECK-NEXT: HLSLIntrinsicAttr {{0x[0-9a-fA-F]+}} <> Implicit "op" "ExtractRecordStructFromArray" 18 +//CHECK-NEXT: HLSLCXXOverloadAttr {{0x[0-9a-fA-F]+}} <> Implicit +//CHECK-NEXT: FunctionTemplateDecl {{0x[0-9a-fA-F]+}} <> operator[] +//CHECK-NEXT: TemplateTypeParmDecl {{0x[0-9a-fA-F]+}} <> class recordtype +//CHECK-NEXT: CXXMethodDecl {{0x[0-9a-fA-F]+}} <> operator[] 'const recordtype &(unsigned int) const' +//CHECK-NEXT: ParmVarDecl {{0x[0-9a-fA-F]+}} <> index 'unsigned int' +//CHECK-NEXT: HLSLCXXOverloadAttr {{0x[0-9a-fA-F]+}} <> Implicit +//CHECK-NEXT: FunctionTemplateDecl {{0x[0-9a-fA-F]+}} <> Count +//CHECK-NEXT: TemplateTypeParmDecl {{0x[0-9a-fA-F]+}} <> class TResult +//CHECK-NEXT: CXXMethodDecl {{0x[0-9a-fA-F]+}} <> Count 'TResult () const' + +[Shader("node")] +[NodeLaunch("Thread")] +void node06(RWThreadNodeInputRecord input) {} + +//CHECK: ClassTemplateDecl {{0x[0-9a-fA-F]+}} <> implicit RWThreadNodeInputRecord +//CHECK-NEXT: TemplateTypeParmDecl {{0x[0-9a-fA-F]+}} <> class recordtype +//CHECK-NEXT: CXXRecordDecl {{0x[0-9a-fA-F]+}} <> implicit struct RWThreadNodeInputRecord definition +//CHECK-NEXT: FinalAttr {{0x[0-9a-fA-F]+}} <> Implicit final +//CHECK-NEXT: FieldDecl {{0x[0-9a-fA-F]+}} <> implicit h 'int' +//CHECK-NEXT: CXXMethodDecl {{0x[0-9a-fA-F]+}} <> Get 'recordtype &()' +//CHECK-NEXT: HLSLIntrinsicAttr {{0x[0-9a-fA-F]+}} <> Implicit "op" "ExtractRecordStructFromArray" 18 +//CHECK-NEXT: HLSLCXXOverloadAttr {{0x[0-9a-fA-F]+}} <> Implicit +//CHECK-NEXT: CXXMethodDecl {{0x[0-9a-fA-F]+}} <> Get 'const recordtype &() const' +//CHECK-NEXT: HLSLIntrinsicAttr {{0x[0-9a-fA-F]+}} <> Implicit "op" "ExtractRecordStructFromArray" 18 +//CHECK-NEXT: HLSLCXXOverloadAttr {{0x[0-9a-fA-F]+}} <> Implicit +//CHECK-NEXT: FunctionTemplateDecl {{0x[0-9a-fA-F]+}} <> FinishedCrossGroupSharing +//CHECK-NEXT: TemplateTypeParmDecl {{0x[0-9a-fA-F]+}} <> class TResult +//CHECK-NEXT: CXXMethodDecl {{0x[0-9a-fA-F]+}} <> implicit FinishedCrossGroupSharing 'TResult () const' +//CHECK-NEXT: ClassTemplateSpecializationDecl {{0x[0-9a-fA-F]+}} <> struct RWThreadNodeInputRecord definition +//CHECK-NEXT: TemplateArgument type 'RECORD' +//CHECK-NEXT: FinalAttr {{0x[0-9a-fA-F]+}} <> Implicit final +//CHECK-NEXT: FieldDecl {{0x[0-9a-fA-F]+}} <> implicit h 'int' +//CHECK-NEXT: CXXMethodDecl {{0x[0-9a-fA-F]+}} <> Get 'RECORD &()' +//CHECK-NEXT: HLSLIntrinsicAttr {{0x[0-9a-fA-F]+}} <> Implicit "op" "ExtractRecordStructFromArray" 18 +//CHECK-NEXT: HLSLCXXOverloadAttr {{0x[0-9a-fA-F]+}} <> Implicit +//CHECK-NEXT: CXXMethodDecl {{0x[0-9a-fA-F]+}} <> Get 'const RECORD &() const' +//CHECK-NEXT: HLSLIntrinsicAttr {{0x[0-9a-fA-F]+}} <> Implicit "op" "ExtractRecordStructFromArray" 18 +//CHECK-NEXT: HLSLCXXOverloadAttr {{0x[0-9a-fA-F]+}} <> Implicit +//CHECK-NEXT: FunctionTemplateDecl {{0x[0-9a-fA-F]+}} <> FinishedCrossGroupSharing +//CHECK-NEXT: TemplateTypeParmDecl {{0x[0-9a-fA-F]+}} <> class TResult +//CHECK-NEXT: CXXMethodDecl {{0x[0-9a-fA-F]+}} <> FinishedCrossGroupSharing 'TResult () const' diff --git a/tools/clang/test/HLSLFileCheck/hlsl/workgraph/both_compute_and_node.hlsl b/tools/clang/test/HLSLFileCheck/hlsl/workgraph/both_compute_and_node.hlsl new file mode 100644 index 0000000000..7fc0a1b644 --- /dev/null +++ b/tools/clang/test/HLSLFileCheck/hlsl/workgraph/both_compute_and_node.hlsl @@ -0,0 +1,68 @@ +// RUN: %dxc -T lib_6_8 %s | FileCheck %s +// ================================================================== +// Test when permutations of compute and node are specified: +// - Check that Shader Kind is compute whenever compute is specified +// - Check that NodeId is present when and only when node is specified. +// - Check that NumThreads is present and correctly populated in all cases +// ================================================================== + +[Shader("compute")] +[Shader("node")] +[NodeLaunch("Broadcasting")] +[NodeDispatchGrid(2, 1, 1)] +[NumThreads(9,1,2)] +void compute_node() { } + +// Both compute then node specified: Shader Kind = compute, NodeId present +// CHECK: !{void ()* @compute_node, !"compute_node", null, null, [[ATTRS_1:![0-9]+]]} +// CHECK: [[ATTRS_1]] = !{i32 8, i32 5, i32 13, i32 1, +// CHECK-SAME: i32 15, [[NODE_ID_1:![0-9]+]], +// CHECK-SAME: i32 4, [[NUM_THREADS_1:![0-9]+]], +// CHECK: [[NODE_ID_1]] = !{!"compute_node", i32 0} +// CHECK: [[NUM_THREADS_1]] = !{i32 9, i32 1, i32 2} + +// ================================================================== + +[Shader("compute")] +[NumThreads(9,3,4)] +void compute_only() { } + +// Only compute specified: Shader Kind = compute, NodeId not present +// CHECK: !{void ()* @compute_only, !"compute_only", null, null, [[ATTRS_2:![0-9]+]]} +// CHECK: [[ATTRS_2]] = !{i32 8, i32 5, +// CHECK-NOT: i32 15, {{![0-9]+}}, +// CHECK-SAME: i32 4, [[NUM_THREADS_2:![0-9]+]], +// CHECK: [[NUM_THREADS_2]] = !{i32 9, i32 3, i32 4} + +// ================================================================== + +[Shader("node")] +[Shader("compute")] +[NodeLaunch("Broadcasting")] +[NodeDispatchGrid(2, 1, 1)] +[NumThreads(9,5,6)] +void node_compute() { } + +// Both node then compute specified: Shader Kind = compute, NodeId present +// CHECK: !{void ()* @node_compute, !"node_compute", null, null, [[ATTRS_3:![0-9]+]]} +// CHECK: [[ATTRS_3]] = !{i32 8, i32 5, +// CHECK-SAME: i32 15, [[NODE_ID_3:![0-9]+]], +// CHECK-SAME: i32 4, [[NUM_THREADS_3:![0-9]+]], +// CHECK: [[NODE_ID_3]] = !{!"node_compute", i32 0} +// CHECK: [[NUM_THREADS_3]] = !{i32 9, i32 5, i32 6} + +// ================================================================== + +[Shader("node")] +[NodeLaunch("Broadcasting")] +[NodeDispatchGrid(2, 1, 1)] +[NumThreads(9,7,8)] +void node_only() { } + +// Only node specified: Shader Kind = node, NodeId present +// CHECK: !{void ()* @node_only, !"node_only", null, null, [[ATTRS_4:![0-9]+]]} +// CHECK: [[ATTRS_4]] = !{i32 8, i32 15, +// CHECK-SAME: i32 15, [[NODE_ID_4:![0-9]+]], +// CHECK-SAME: i32 4, [[NUM_THREADS_4:![0-9]+]], +// CHECK: [[NODE_ID_4]] = !{!"node_only", i32 0} +// CHECK: [[NUM_THREADS_4]] = !{i32 9, i32 7, i32 8} diff --git a/tools/clang/test/HLSLFileCheck/hlsl/workgraph/called_function_arg_nodeoutput.hlsl b/tools/clang/test/HLSLFileCheck/hlsl/workgraph/called_function_arg_nodeoutput.hlsl new file mode 100644 index 0000000000..5dc6eeec28 --- /dev/null +++ b/tools/clang/test/HLSLFileCheck/hlsl/workgraph/called_function_arg_nodeoutput.hlsl @@ -0,0 +1,36 @@ +// RUN: %dxc -T lib_6_8 %s | FileCheck %s +// +// Verify that NodeOutput can be passed to a called function and used." + +// RUN: %dxc -fcgl -T lib_6_8 %s | FileCheck -check-prefix=CHECK_FCGL %s +// +// Verify that the correct parameter, the NodeOutputRecord parameter, gets the cast +// CHECK_FCGL: %[[outputNodeVal:[0-9]+]] = load %"struct.NodeOutput", %"struct.NodeOutput"* %outputNode, +// CHECK_FCGL: %[[outputNodeHandle:[0-9]+]] = call %dx.types.NodeHandle @"dx.hl.cast..%dx.types.NodeHandle (i32, %\22struct.NodeOutput\22)"(i32 10, %"struct.NodeOutput" %[[outputNodeVal]]) +// outputNode.GetGroupNodeOutputRecords(13): +// CHECK_FCGL: call %dx.types.NodeRecordHandle @"dx.hl.op..%dx.types.NodeRecordHandle (i32, %dx.types.NodeHandle, i32)"(i32 {{[0-9]+}}, %dx.types.NodeHandle %[[outputNodeHandle]], i32 13) + +struct loadStressRecord +{ + uint3 grid : SV_DispatchGrid; +}; + +void loadStressWorker( + NodeOutput outputNode) +{ + GroupNodeOutputRecords outRec = outputNode.GetGroupNodeOutputRecords(13); + // CHECK: store i32 39 + // CHECK: store i32 61 + // CHECK: store i32 71 + outRec.Get(5).grid = uint3(39, 61, 71); +} + +[Shader("node")] +[NodeMaxDispatchGrid(3, 1, 1)] +[NumThreads(16, 1, 1)] +void loadStress_16( + [MaxRecords(16)] NodeOutput loadStressChild +) +{ + loadStressWorker(loadStressChild); +} diff --git a/tools/clang/test/HLSLFileCheck/hlsl/workgraph/called_function_arg_record_object.hlsl b/tools/clang/test/HLSLFileCheck/hlsl/workgraph/called_function_arg_record_object.hlsl new file mode 100644 index 0000000000..2339cbbdfd --- /dev/null +++ b/tools/clang/test/HLSLFileCheck/hlsl/workgraph/called_function_arg_record_object.hlsl @@ -0,0 +1,27 @@ +// RUN: %dxc -T lib_6_8 %s | FileCheck %s +// +// Verify that NodeInputRecord can be passed to a called function and used." + +struct loadStressRecord +{ + uint data[29]; +}; + +void loadStressWorker( + inout DispatchNodeInputRecord inputData, + GroupNodeOutputRecords outRec) +{ + // CHECK: getelementptr %struct.loadStressRecord, %struct.loadStressRecord + uint val = inputData.Get().data[0]; // problem line + + outRec.Get().data[0] = val + 61; +} + +[Shader("node")] +[NodeMaxDispatchGrid(3, 1, 1)] +[NumThreads(16, 1, 1)] +void loadStress_16(DispatchNodeInputRecord inputData, + [MaxOutputRecords(16)] NodeOutput loadStressChild) +{ + loadStressWorker(inputData, loadStressChild.GetGroupNodeOutputRecords(1)); +} diff --git a/tools/clang/test/HLSLFileCheck/hlsl/workgraph/case001_dispatchgrid_shader.hlsl b/tools/clang/test/HLSLFileCheck/hlsl/workgraph/case001_dispatchgrid_shader.hlsl new file mode 100644 index 0000000000..56d87b2e66 --- /dev/null +++ b/tools/clang/test/HLSLFileCheck/hlsl/workgraph/case001_dispatchgrid_shader.hlsl @@ -0,0 +1,87 @@ +// RUN: %dxc -T lib_6_8 -default-linkage external %s | FileCheck %s +// ================================================================== +// CASE001 +// Broadcasting launch node with dispatch grid defined in shader +// ================================================================== + +struct INPUT_NOGRID +{ + uint textureIndex; +}; + +// Shader function +// Arg #1: Opcode = +// Arg #2: Metadata ID +// ------------------------------------------------------------------ +// CHECK: define void @node001_dispatchgrid_shader() +// CHECK-SAME: { +// XCHECK: {{%[0-9]+}} = %dx.types.NodeRecordHandle CreateInputRecordHandle(i32 , 0) +// CHECK: ret void +// CHECK: } + +[Shader("node")] +[NodeLaunch("Broadcasting")] +[NodeDispatchGrid(2,3,2)] +[NumThreads(1024,1,1)] +[NodeIsProgramEntry] +void node001_dispatchgrid_shader(DispatchNodeInputRecord input) +{ +} + +// Metadata for node +// ------------------------------------------------------------------ +// CHECK: !dx.entryPoints = !{ +// CHECK-SAME: } +// CHECK: = !{void ()* @node001_dispatchgrid_shader, !"node001_dispatchgrid_shader", null, null, [[ATTRS:![0-9]+]]} + +// Metadata for node attributes +// Arg #1: ShaderKind Tag (8) +// Arg #2: Node (15) +// Arg #3: NodeLaunch Tag (13) +// Arg #4: Broadcasting (1) +// Arg #5: NodeIsProgramEntry Tag (14) +// Arg #6: True (1) +// Arg #7: NodeId Tag (15) +// Arg #8: NodeId (NodeId metadata) +// Arg #9: NodeLocalRootArgumentsTableIndex Tag (16) +// Arg #10: Index (-1) +// Arg #11: NodeDispatchGrid Tag (18) +// Arg #12: NodeDispatchGrid (xyz metadata) +// Arg #13: NodeInputs Tag (20) +// Arg #14: NodeInputs (NodeInput metadata) +// Arg #15: NumThreads Tag (4) +// Arg #16: NumThreads (xyz metadata) +// ... +// ------------------------------------------------------------------ +// CHECK: [[ATTRS]] = !{ +// CHECK-SAME: i32 8, i32 15, i32 13, i32 1, i32 14, i1 true, i32 15, [[NODEID:![0-9]+]], i32 16, i32 -1, i32 18, [[DISPATCHGRID:![0-9]+]], i32 20, [[NODE_IN:![0-9]+]], i32 4, [[NUMTHREADS:![0-9]+]] +// CHECK-SAME: } + +// DispatchGrid +// Arg #1: 2 +// Arg #2: 3 +// Arg #3: 2 +// ------------------------------------------------------------------ +// CHECK: [[DISPATCHGRID]] = !{i32 2, i32 3, i32 2} + +// NodeInputs +// Arg #1: NodeIOKind Tag (1) +// Arg #2: InputRecord(97) +// Arg #3: NodeRecordType Tag (3) +// Arg #4: INPUT_NOGRID type +// ------------------------------------------------------------------ +// CHECK: [[NODE_IN]] = !{[[INPUT0:![0-9]+]]} +// CHECK: [[INPUT0]] = !{i32 1, i32 97, i32 2, [[INPUT_NOGRID:![0-9]+]]} + +// Metadata for input record struct +// Arg #1: Size Tag (0) +// Arg #2: 4 bytes +// ------------------------------------------------------------------ +// CHECK-DAG: [[INPUT_NOGRID]] = !{i32 0, i32 4} + +// NumThreads +// Arg #1: 1024 +// Arg #2: 1 +// Arg #3: 1 +// ------------------------------------------------------------------ +// CHECK: [[NUMTHREADS]] = !{i32 1024, i32 1, i32 1} diff --git a/tools/clang/test/HLSLFileCheck/hlsl/workgraph/case006_broadcasting_numthreads_shader.hlsl b/tools/clang/test/HLSLFileCheck/hlsl/workgraph/case006_broadcasting_numthreads_shader.hlsl new file mode 100644 index 0000000000..3ca760b3e2 --- /dev/null +++ b/tools/clang/test/HLSLFileCheck/hlsl/workgraph/case006_broadcasting_numthreads_shader.hlsl @@ -0,0 +1,65 @@ +// RUN: %dxc -T lib_6_8 -default-linkage external %s | FileCheck %s +// ================================================================== +// CASE006 +// Broadcasting launch node with num threads defined in shader +// ================================================================== + +// Shader function +// ------------------------------------------------------------------ +// CHECK: define void @node006_broadcasting_numthreads_shader() +// CHECK-SAME: { +// CHECK: ret void +// CHECK: } + +[Shader("node")] +[NodeLaunch("Broadcasting")] +[NodeDispatchGrid(2,3,2)] +[NumThreads(1024,1,1)] +[NodeIsProgramEntry] +void node006_broadcasting_numthreads_shader() +{ +} + +// Metadata for node +// CHECK: !dx.entryPoints = !{ +// CHECK-SAME: } +// CHECK: = !{void ()* @node006_broadcasting_numthreads_shader, !"node006_broadcasting_numthreads_shader", null, null, [[ATTRS:![0-9]+]]} +// ------------------------------------------------------------------ + +//Metadata for node attributes +// Arg #1: ShaderKind Tag (8) +// Arg #2: Node (15) +// Arg #3: NodeLaunch Tag (13) +// Arg #4: Broadcasting (1) +// Arg #5: NodeIsProgramEntry Tag (14) +// Arg #6: True (1) +// Arg #7: NodeId Tag (15) +// Arg #8: NodeId (NodeId metadata) +// Arg #9: NodeLocalRootArgumentsTableIndex Tag (16) +// Arg #10: Index (-1) +// Arg #11: NodeDispatchGrid Tag (18) +// Arg #12: NodeDispatchGrid (xyz metadata) +// ... +// Arg #n1: NumThreads Tag (4) +// Arg #n2: NumThreads (xyz metadata) +// ... +// ------------------------------------------------------------------ +// CHECK: [[ATTRS]] = !{ +// CHECK-SAME: i32 8, i32 15, i32 13, i32 1, i32 14, i1 true, i32 15, [[NODEID:![0-9]+]], i32 16, i32 -1, i32 18, [[DISPATCHGRID:![0-9]+]], +// CHECK-SAME: i32 4, [[NUMTHREADS:![0-9]+]] + +// CHECK-SAME: } + +// DispatchGrid +// Arg #1: 2 +// Arg #2: 3 +// Arg #3: 2 +// ------------------------------------------------------------------ +// CHECK-DAG: [[DISPATCHGRID]] = !{i32 2, i32 3, i32 2} + +// NumThreads +// Arg #1: 1024 +// Arg #2: 1 +// Arg #3: 1 +// ------------------------------------------------------------------ +// CHECK-DAG: [[NUMTHREADS]] = !{i32 1024, i32 1, i32 1} diff --git a/tools/clang/test/HLSLFileCheck/hlsl/workgraph/case007_broadcasting_numthreads_none.hlsl b/tools/clang/test/HLSLFileCheck/hlsl/workgraph/case007_broadcasting_numthreads_none.hlsl new file mode 100644 index 0000000000..09535cfac0 --- /dev/null +++ b/tools/clang/test/HLSLFileCheck/hlsl/workgraph/case007_broadcasting_numthreads_none.hlsl @@ -0,0 +1,16 @@ +// RUN: %dxc -T lib_6_8 %s | FileCheck %s +// ================================================================== +// CASE007 +// Broadcasting launch node with NumThreads not specified + +[Shader("node")] +[NodeLaunch("Broadcasting")] +[NodeDispatchGrid(2,3,2)] +[NodeIsProgramEntry] +void node007_broadcasting_numthreads_none() +{ +} + +// CHECK: :10:6: error: NumThreads is required, but was not specified +// CHECK-NEXT: void node007_broadcasting_numthreads_none() +// CHECK-NEXT: ^ diff --git a/tools/clang/test/HLSLFileCheck/hlsl/workgraph/case008_coalescing_numthreads_shader.hlsl b/tools/clang/test/HLSLFileCheck/hlsl/workgraph/case008_coalescing_numthreads_shader.hlsl new file mode 100644 index 0000000000..0e9aabff08 --- /dev/null +++ b/tools/clang/test/HLSLFileCheck/hlsl/workgraph/case008_coalescing_numthreads_shader.hlsl @@ -0,0 +1,54 @@ +// RUN: %dxc -T lib_6_8 -default-linkage external %s | FileCheck %s +// ================================================================== +// CASE008 +// Coalescing launch node with thread group defined in the shader +// ================================================================== + +// Shader function +// ------------------------------------------------------------------ +// CHECK: define void @node008_coalescing_numthreads_shader() +// CHECK-SAME: { +// CHECK: ret void +// CHECK: } + +[Shader("node")] +[NodeLaunch("Coalescing")] +[NumThreads(1024,1,1)] +[NodeIsProgramEntry] +void node008_coalescing_numthreads_shader() +{ +} + +// Metadata for node +// ------------------------------------------------------------------ +// CHECK: !dx.entryPoints = !{ +// CHECK-SAME: } +// CHECK: = !{void ()* @node008_coalescing_numthreads_shader, !"node008_coalescing_numthreads_shader", null, null, [[ATTRS:![0-9]+]]} + +// Metadata for node attributes +// Arg #1: ShaderKind Tag (8) +// Arg #2: Node (15) +// Arg #3: NodeLaunch Tag (13) +// Arg #4: Broadcasting (1) +// Arg #5: NodeIsProgramEntry Tag (14) +// Arg #6: True (1) +// Arg #7: NodeId Tag (15) +// Arg #8: NodeId (NodeId metadata) +// Arg #9: NodeLocalRootArgumentsTableIndex Tag (16) +// Arg #10: Index (-1) +// ... +// Arg #n1: NumThreads Tag (4) +// Arg #n2: NumThreads (xyz metadata) +// ... +// ------------------------------------------------------------------ +// CHECK: [[ATTRS]] = !{ +// CHECK-SAME: i32 8, i32 15, i32 13, i32 2, i32 14, i1 true, i32 15, [[NODEID:![0-9]+]], i32 16, i32 -1, +// CHECK-SAME: i32 4, [[NUMTHREADS:![0-9]+]] +// CHECK-SAME: } + +// NumThreads +// Arg #1: 1024 +// Arg #2: 1 +// Arg #3: 1 +// ------------------------------------------------------------------ +// CHECK: [[NUMTHREADS]] = !{i32 1024, i32 1, i32 1} diff --git a/tools/clang/test/HLSLFileCheck/hlsl/workgraph/case009_coalescing_numthreads_none.hlsl b/tools/clang/test/HLSLFileCheck/hlsl/workgraph/case009_coalescing_numthreads_none.hlsl new file mode 100644 index 0000000000..b72091023a --- /dev/null +++ b/tools/clang/test/HLSLFileCheck/hlsl/workgraph/case009_coalescing_numthreads_none.hlsl @@ -0,0 +1,16 @@ +// RUN: %dxc -T lib_6_8 %s | FileCheck %s +// ================================================================== +// CASE009 +// Coalescing launch node with NumThreads not specified + +[Shader("node")] +[NodeLaunch("Broadcasting")] +[NodeDispatchGrid(2,3,2)] +[NodeIsProgramEntry] +void node009_coalescing_numthreads_none() +{ +} + +// CHECK: :10:6: error: NumThreads is required, but was not specified +// CHECK-NEXT: void node009_coalescing_numthreads_none() +// CHECK-NEXT: ^ diff --git a/tools/clang/test/HLSLFileCheck/hlsl/workgraph/case010_thread_numthreads_shader.hlsl b/tools/clang/test/HLSLFileCheck/hlsl/workgraph/case010_thread_numthreads_shader.hlsl new file mode 100644 index 0000000000..4c85eb018a --- /dev/null +++ b/tools/clang/test/HLSLFileCheck/hlsl/workgraph/case010_thread_numthreads_shader.hlsl @@ -0,0 +1,19 @@ +// RUN: %dxc -T lib_6_8 %s | FileCheck %s +// ================================================================== +// CASE010 (pass) +// Thread launch node may define NumThreads(1,1,1) +// ================================================================== + +[Shader("node")] +[NodeLaunch("Thread")] +[NumThreads(1,1,1)] +[NodeIsProgramEntry] +void node010_thread_numthreads_shader() +{ +} + +// CHECK: !{void ()* @node010_thread_numthreads_shader, !"node010_thread_numthreads_shader", null, null, [[ATTR:![0-9]+]]} +// CHECK: [[ATTR]] = !{i32 8, i32 15, i32 13, i32 3, +// CHECK-SAME: i32 4, [[NUMTHREADS:![0-9]+]] +// CHECK: [[NUMTHREADS]] = !{i32 1, i32 1, i32 1} + diff --git a/tools/clang/test/HLSLFileCheck/hlsl/workgraph/case011_thread_numthreads_none.hlsl b/tools/clang/test/HLSLFileCheck/hlsl/workgraph/case011_thread_numthreads_none.hlsl new file mode 100644 index 0000000000..2a100bc021 --- /dev/null +++ b/tools/clang/test/HLSLFileCheck/hlsl/workgraph/case011_thread_numthreads_none.hlsl @@ -0,0 +1,38 @@ +// RUN: %dxc -T lib_6_8 %s | FileCheck %s +// ================================================================== +// CASE011 +// Thread launch node without NumThreads specified should use a +// default of (1,1,1) +// ================================================================== + +// Shader function +// ------------------------------------------------------------------ +// CHECK: define void @node011_thread_numthreads_none() +// CHECK-SAME: { +// CHECK: ret void +// CHECK: } + +[Shader("node")] +[NodeLaunch("Thread")] +[NodeIsProgramEntry] +void node011_thread_numthreads_none() +{ +} + +// Metadata for node +// ------------------------------------------------------------------ +// CHECK: !dx.entryPoints = !{ +// CHECK-SAME: } +// CHECK: = !{void ()* @node011_thread_numthreads_none, !"node011_thread_numthreads_none", null, null, [[ATTRS:![0-9]+]]} + +// Metadata for node attributes +// Arg #1: ShaderKind Tag (8) +// Arg #2: Node (15) +// Arg #3: NodeLaunch Tag (13) +// Arg #4: Thread (3) +// ... +// ------------------------------------------------------------------ +// CHECK: [[ATTRS]] = !{i32 8, i32 15, i32 13, i32 3, +// CHECK-SAME: i32 4, [[NUMTHREADS:![0-9]+]] +// CHECK-SAME: } +// CHECK: [[NUMTHREADS]] = !{i32 1, i32 1, i32 1} diff --git a/tools/clang/test/HLSLFileCheck/hlsl/workgraph/case012_thread_numthreads_wrongdimensions.hlsl b/tools/clang/test/HLSLFileCheck/hlsl/workgraph/case012_thread_numthreads_wrongdimensions.hlsl new file mode 100644 index 0000000000..36b6b5ee73 --- /dev/null +++ b/tools/clang/test/HLSLFileCheck/hlsl/workgraph/case012_thread_numthreads_wrongdimensions.hlsl @@ -0,0 +1,21 @@ +// RUN: %dxc -T lib_6_8 %s | FileCheck %s +// ================================================================== +// CASE012 (fail) +// Thread launch node with incompatible thread group dimensions +// ================================================================== + +[Shader("node")] +[NodeLaunch("Thread")] +[NumThreads(1024,1,1)] +[NodeIsProgramEntry] +void node012_thread_numthreads_wrongdimensions() +{ +} + +// CHECK: :9:2: error: Thread launch nodes must have a thread group size of (1,1,1) +// CHECK-NEXT: [NumThreads(1024,1,1)] +// CHECK-NEXT: ^ +// CHECK: :8:2: note: Launch type defined here +// CHECK-NEXT: [NodeLaunch("Thread")] +// CHECK-NEXT: ^ + diff --git a/tools/clang/test/HLSLFileCheck/hlsl/workgraph/case013_numthreads_1030.hlsl b/tools/clang/test/HLSLFileCheck/hlsl/workgraph/case013_numthreads_1030.hlsl new file mode 100644 index 0000000000..3f549c3292 --- /dev/null +++ b/tools/clang/test/HLSLFileCheck/hlsl/workgraph/case013_numthreads_1030.hlsl @@ -0,0 +1,19 @@ +// RUN: %dxc -T lib_6_8 %s | FileCheck %s +// ================================================================== +// CASE013 (fail) +// Broadcasting launch node with > 1024 threads in a group +// ================================================================== + +[Shader("node")] +[NodeLaunch("Broadcasting")] +[NodeDispatchGrid(2,3,2)] +[NumThreads(1030,1,1)] +[NodeIsProgramEntry] +void node013_numthreads_1030() +{ +} + +// CHECK: case013_numthreads_1030.hlsl:10:2: error: Thread group size may not exceed 1024 +// CHECK: [NumThreads(1030,1,1)] +// CHECK: ^ + diff --git a/tools/clang/test/HLSLFileCheck/hlsl/workgraph/case014_getrecordcount_nodeinputarray.hlsl b/tools/clang/test/HLSLFileCheck/hlsl/workgraph/case014_getrecordcount_nodeinputarray.hlsl new file mode 100644 index 0000000000..3ac0c919bc --- /dev/null +++ b/tools/clang/test/HLSLFileCheck/hlsl/workgraph/case014_getrecordcount_nodeinputarray.hlsl @@ -0,0 +1,85 @@ +// RUN: %dxc -T lib_6_8 external %s | FileCheck %s +// ================================================================== +// CASE014 +// GetInputRecordCount() called with NodeInputRecordArray +// ================================================================== + +RWBuffer buf0; + +struct INPUT_RECORD +{ + uint textureIndex; +}; + +[Shader("node")] +[NodeLaunch("Coalescing")] +[NumThreads(1024,1,1)] +[NodeIsProgramEntry] +void node014_getrecordcount([MaxRecords(256)] GroupNodeInputRecords inputs) +{ + uint numRecords = inputs.Count(); + // Use Barrier as a way of preventing the unused numRecords being optimised away + buf0[0] = numRecords; +} + +// Shader function +// Arg #1: Opcode = +// Arg #2: Metadata ID +// ------------------------------------------------------------------ +// CHECK: define void @node014_getrecordcount() +// CHECK-SAME: { +// CHECK: [[INPUTS:%[0-9]+]] = call %dx.types.NodeRecordHandle @dx.op.createNodeInputRecordHandle(i32 {{[0-9]+}}, i32 0) +// CHECK: [[ANN_IP:%[0-9]+]] = call %dx.types.NodeRecordHandle @dx.op.annotateNodeRecordHandle(i32 {{[0-9]+}}, %dx.types.NodeRecordHandle [[INPUTS]], %dx.types.NodeRecordInfo { i32 65, i32 4 }) +// CHECK: {{%[0-9]+}} = call i32 @dx.op.getInputRecordCount(i32 {{[0-9]+}}, %dx.types.NodeRecordHandle [[ANN_IP]]) +// CHECK: ret void +// CHECK: } + +// Metadata for node +// ------------------------------------------------------------------ +// CHECK: !dx.entryPoints = !{ +// CHECK-SAME: } +// CHECK: = !{void ()* @node014_getrecordcount, !"node014_getrecordcount", null, null, [[ATTRS:![0-9]+]]} + +// Metadata for node attributes +// Arg #1: Shader Kind Tag (8) +// Arg #2: Node (15) +// Arg #3: NodeLaunch Tag (13) +// Arg #4: Coalescing (2) +// ... +// Arg #n: NodeInputs Tag (20) +// Arg #n+1: NodeInputs (metadata) +// ... +// Arg #m: NumThreads Tag (4) +// Arg #m+1: NumThreads (xyz metadata) +// ... +// ------------------------------------------------------------------ +// CHECK: [[ATTRS]] = !{ +// CHECK-SAME: i32 8, i32 15, i32 13, i32 2, +// CHECK-SAME: i32 20, [[NODE_IN:![0-9]+]] +// CHECK-SAME: i32 4, [[NUMTHREADS:![0-9]+]] +// CHECK-SAME: } + +// NodeInputs +// Arg #1: NodeIOKind Tag (1) +// Arg #2: InputRecord (65) +// Arg #3: NodeInputMaxArraySize Tag (2) +// Arg #4: 256 +// Arg #5: NodeRecordType Tag (3) +// Arg #6: INPUT_RECORD Type +// ------------------------------------------------------------------ +// CHECK-DAG: [[NODE_IN]] = !{[[INPUT0:![0-9]+]]} +// CHECK-DAG: [[INPUT0]] = !{i32 1, i32 65, i32 2, [[INPUT_RECORD:![0-9]+]], i32 3, i32 256} + + +// Metadata for input record struct +// Arg #1: Size Tag (0) +// Arg #2: 4 bytes +// ------------------------------------------------------------------ +// CHECK: [[INPUT_RECORD:![0-9]+]] = !{i32 0, i32 4} + +// NumThreads +// Arg #1: 1024 +// Arg #2: 1 +// Arg #3: 1 +// ------------------------------------------------------------------ +// CHECK-DAG: [[NUMTHREADS]] = !{i32 1024, i32 1, i32 1} diff --git a/tools/clang/test/HLSLFileCheck/hlsl/workgraph/case017_renamed_node.hlsl b/tools/clang/test/HLSLFileCheck/hlsl/workgraph/case017_renamed_node.hlsl new file mode 100644 index 0000000000..c3c30d95a8 --- /dev/null +++ b/tools/clang/test/HLSLFileCheck/hlsl/workgraph/case017_renamed_node.hlsl @@ -0,0 +1,47 @@ +// RUN: %dxc -T lib_6_8 -default-linkage external %s | FileCheck %s +// ================================================================== +// CASE017 +// Renamed node, unnamed index defaults to 0 +// ================================================================== + +// Shader function +// ------------------------------------------------------------------ +// CHECK: define void @node017_renamed_node() +// CHECK-SAME: { +// CHECK: ret void +// CHECK: } + +[Shader("node")] +[NodeLaunch("Thread")] +[NodeID("new_node_name")] +[NodeIsProgramEntry] +void node017_renamed_node() +{ +} + +// Metadata for node +// ------------------------------------------------------------------ +// CHECK: !dx.entryPoints = !{ +// CHECK-SAME: } +// CHECK: = !{void ()* @node017_renamed_node, !"node017_renamed_node", null, null, [[ATTRS:![0-9]+]]} + +// Metadata for node attributes +// Arg #1: ShaderKind Tag (8) +// Arg #2: Node (15) +// Arg #3: NodeLaunch Tag (13) +// Arg #4: Thread (3) +// Arg #5: NodeIsProgramEntry Tag (14) +// Arg #6: True (1) +// Arg #7: NodeId Tag (15) +// Arg #8: NodeId (NodeId metadata) +// ... +// ------------------------------------------------------------------ +// CHECK: [[ATTRS]] = !{ +// CHECK-SAME: i32 8, i32 15, i32 13, i32 3, i32 14, i1 true, i32 15, [[NODEID:![0-9]+]] +// CHECK-SAME: } + +// NodeID +// Arg #1: NodeID = "new_node_name" +// Arg #2: Default Index (0) +// ------------------------------------------------------------------ +// CHECK: [[NODEID]] = !{!"new_node_name", i32 0} diff --git a/tools/clang/test/HLSLFileCheck/hlsl/workgraph/case018_renamed_node_index.hlsl b/tools/clang/test/HLSLFileCheck/hlsl/workgraph/case018_renamed_node_index.hlsl new file mode 100644 index 0000000000..f5b096e092 --- /dev/null +++ b/tools/clang/test/HLSLFileCheck/hlsl/workgraph/case018_renamed_node_index.hlsl @@ -0,0 +1,48 @@ +// RUN: %dxc -T lib_6_8 %s | FileCheck %s +// ================================================================== +// CASE018 +// Renamed node with named index +// ================================================================== + +// Shader function +// ------------------------------------------------------------------ +// CHECK: define void @node018_renamed_node_index() +// CHECK-SAME: { +// CHECK: ret void +// CHECK: } + +[Shader("node")] +[NodeLaunch("Thread")] +[NodeID("new_node_name", 2)] +[NodeIsProgramEntry] +void node018_renamed_node_index() +{ +} + +// Metadata for node +// ------------------------------------------------------------------ +// CHECK: !dx.entryPoints = !{ +// CEHCK-SAME: } +// CHECK: = !{void ()* @node018_renamed_node_index, !"node018_renamed_node_index", null, null, [[ATTRS:![0-9]+]]} + +// Metadata for node attributes +// Arg #1: ShaderKind Tag (8) +// Arg #2: Node (15) +// Arg #3: NodeLaunch Tag (13) +// Arg #4: Thread (2) +// Arg #5: NodeIsProgramEntry Tag (14) +// Arg #6: True (1) +// Arg #7: NodeId Tag (15) +// Arg #8: NodeId (NodeId metadata) +// ... +// ------------------------------------------------------------------ +// CHECK: [[ATTRS]] = !{ +// XCHECK-SAME: i32 8, i32 15, i32 13, i32 2, i32 14, i32 1, +// CHECK-SAME: i32 8, i32 15, i32 13, i32 3, i32 14, i1 true, i32 15, [[NODEID:![0-9]+]] +// CHECK-SAME: } + +// NodeID +// Arg #1: NodeID = "new_node_name" +// Arg #2: NodeID index 2 +// ------------------------------------------------------------------ +// CHECK: [[NODEID]] = !{!"new_node_name", i32 2} diff --git a/tools/clang/test/HLSLFileCheck/hlsl/workgraph/case028_groupincrementoutputcount.hlsl b/tools/clang/test/HLSLFileCheck/hlsl/workgraph/case028_groupincrementoutputcount.hlsl new file mode 100644 index 0000000000..a16ecb2c61 --- /dev/null +++ b/tools/clang/test/HLSLFileCheck/hlsl/workgraph/case028_groupincrementoutputcount.hlsl @@ -0,0 +1,59 @@ +// RUN: %dxc -T lib_6_8 -default-linkage external %s | FileCheck %s +// ================================================================== +// CASE028 +// Node with EmptyNodeOutput calls GroupIncrementOutputCount +// ================================================================== + + +[Shader("node")] +[NodeLaunch("Coalescing")] +[NumThreads(1024,1,1)] +[NodeIsProgramEntry] +void node028_incrementoutputcount([MaxRecords(20)] EmptyNodeOutput empty) +{ + empty.GroupIncrementOutputCount(1); +} + +// CHECK: define void @node028_incrementoutputcount() +// CHECK-SAME: { +// CHECK: [[OP:%[0-9]+]] = call %dx.types.NodeHandle @dx.op.createNodeOutputHandle(i32 {{[0-9]+}}, i32 0) ; CreateNodeOutputHandle(MetadataIdx) +// CHECK: [[ANN_OP:%[0-9]+]] = call %dx.types.NodeHandle @dx.op.annotateNodeHandle(i32 {{[0-9]+}}, %dx.types.NodeHandle [[OP]], %dx.types.NodeInfo { i32 10, i32 0 }) +// CHECK: call void @dx.op.incrementOutputCount(i32 {{[0-9]+}}, %dx.types.NodeHandle [[ANN_OP]], i32 1, i1 false) ; IncrementOutputCount(output,count,perThread) +// CHECK: ret void +// CHECK: } + +// CHECK: = !{void ()* @node028_incrementoutputcount, !"node028_incrementoutputcount", null, null, [[ATTRS:![0-9]+]]} + +// Metadata for node attributes +// Arg #1: ShaderKind Tag (8) +// Arg #2: Node (15) +// Arg #3: NodeLaunch Tag (13) +// Arg #4: Coalescing (2) +// ... +// Arg #n: NodeOutputs Tag (21) +// Arg #n+1: NodeOutputs (NodeOutputs metadata) +// ... +// Arg #m: NumThreads Tag (4) +// Arg #m+1: NumThreads (xyz metadata) +// ------------------------------------------------------------------ +// CHECK: [[ATTRS]] = !{ +// CHECK-SAME: i32 8, i32 15, i32 13, i32 2, +// CHECK-SAME: i32 21, [[NODE_OUT:![0-9]+]] +// CHECK-SAME: i32 4, [[NUMTHREADS:![0-9]+]] +// CHECK-SAME: } + +// NodeOutputs +// Arg #1: NodeIOKind Tag (1) +// Arg #2: EmptyOutputRecord (10) +// Arg #3: MaxOutputRecords Tag (4) +// Arg #4: 20 Records Max +// ------------------------------------------------------------------ +// CHECK: [[NODE_OUT]] = !{[[OUTPUT0:![0-9]+]]} +// CHECK: [[OUTPUT0]] = !{i32 1, i32 10, i32 3, i32 20 + +// NumThreads +// Arg #1: 1024 +// Arg #2: 1 +// Arg #3: 1 +// ------------------------------------------------------------------ +// CHECK: [[NUMTHREADS]] = !{i32 1024, i32 1, i32 1} diff --git a/tools/clang/test/HLSLFileCheck/hlsl/workgraph/case029_threadincrementoutputcount.hlsl b/tools/clang/test/HLSLFileCheck/hlsl/workgraph/case029_threadincrementoutputcount.hlsl new file mode 100644 index 0000000000..7ccd5547fe --- /dev/null +++ b/tools/clang/test/HLSLFileCheck/hlsl/workgraph/case029_threadincrementoutputcount.hlsl @@ -0,0 +1,47 @@ +// RUN: %dxc -T lib_6_8 -default-linkage external %s | FileCheck %s +// ================================================================== +// CASE028 +// Node with EmptyNodeOutput calls GroupIncrementOutputCount +// ================================================================== + + +[Shader("node")] +[NodeLaunch("Thread")] +[NodeIsProgramEntry] +void node028_incrementoutputcount([MaxRecords(20)] EmptyNodeOutput empty) +{ + empty.ThreadIncrementOutputCount(1); +} + +// CHECK: define void @node028_incrementoutputcount() +// CHECK-SAME: { +// CHECK: [[OP:%[0-9]+]] = call %dx.types.NodeHandle @dx.op.createNodeOutputHandle(i32 {{[0-9]+}}, i32 0) ; CreateNodeOutputHandle(MetadataIdx) +// CHECK: [[ANN_OP:%[0-9]+]] = call %dx.types.NodeHandle @dx.op.annotateNodeHandle(i32 {{[0-9]+}}, %dx.types.NodeHandle [[OP]], %dx.types.NodeInfo { i32 10, i32 0 }) +// CHECK: call void @dx.op.incrementOutputCount(i32 {{[0-9]+}}, %dx.types.NodeHandle [[ANN_OP]], i32 1, i1 true) ; IncrementOutputCount(output,count,perThread) +// CHECK: ret void +// CHECK: } + +// CHECK: = !{void ()* @node028_incrementoutputcount, !"node028_incrementoutputcount", null, null, [[ATTRS:![0-9]+]]} + +// Metadata for node attributes +// Arg #1: ShaderKind Tag (8) +// Arg #2: Node (15) +// Arg #3: NodeLaunch Tag (13) +// Arg #4: Thread (3) +// ... +// Arg #n: NodeOutputs Tag (21) +// Arg #n+1: NodeOutputs (NodeOutputs metadata) +// ------------------------------------------------------------------ +// CHECK: [[ATTRS]] = !{ +// CHECK-SAME: i32 8, i32 15, i32 13, i32 3, +// CHECK-SAME: i32 21, [[NODE_OUT:![0-9]+]] +// CHECK-SAME: } + +// NodeOutputs +// Arg #1: NodeIOKind Tag (1) +// Arg #2: EmptyOutputRecord (10) +// Arg #3: MaxOutputRecords Tag (4) +// Arg #4: 20 Records Max +// ------------------------------------------------------------------ +// CHECK: [[NODE_OUT]] = !{[[OUTPUT0:![0-9]+]]} +// CHECK: [[OUTPUT0]] = !{i32 1, i32 10, i32 3, i32 20 diff --git a/tools/clang/test/HLSLFileCheck/hlsl/workgraph/case030_outputcomplete_nodeoutput.hlsl b/tools/clang/test/HLSLFileCheck/hlsl/workgraph/case030_outputcomplete_nodeoutput.hlsl new file mode 100644 index 0000000000..242c946dac --- /dev/null +++ b/tools/clang/test/HLSLFileCheck/hlsl/workgraph/case030_outputcomplete_nodeoutput.hlsl @@ -0,0 +1,69 @@ +// RUN: %dxc -T lib_6_8 %s | FileCheck %s +// ================================================================== +// CASE030 +// OutputComplete() is called with NodeOutput +// ================================================================== + +struct OUTPUT_RECORD +{ + uint value; +}; + +[Shader("node")] +[NodeLaunch("Broadcasting")] +[NodeDispatchGrid(256,1,1)] +[NumThreads(1024,1,1)] +void node030_outputcomplete_nodeoutput(NodeOutput output) +{ + ThreadNodeOutputRecords outputrecords = output.GetThreadNodeOutputRecords(1); + // ... + outputrecords.OutputComplete(); +} + +// CHECK: define void @node030_outputcomplete_nodeoutput() +// CHECK: [[OHANDLE1:%[0-9]+]] = call %dx.types.NodeHandle @dx.op.createNodeOutputHandle(i32 {{[0-9]+}}, i32 0) +// CHECK: [[OHANDLE:%[0-9]+]] = call %dx.types.NodeHandle @dx.op.annotateNodeHandle(i32 {{[0-9]+}}, %dx.types.NodeHandle [[OHANDLE1]], %dx.types.NodeInfo { i32 6, i32 4 }) +// CHECK: [[RHANDLE1:%[0-9]+]] = call %dx.types.NodeRecordHandle @dx.op.allocateNodeOutputRecords(i32 {{[0-9]+}}, %dx.types.NodeHandle [[OHANDLE]], i32 1, i1 true) +// CHECK: [[RHANDLE:%[0-9]+]] = call %dx.types.NodeRecordHandle @dx.op.annotateNodeRecordHandle(i32 {{[0-9]+}}, %dx.types.NodeRecordHandle [[RHANDLE1]], %dx.types.NodeRecordInfo { i32 38, i32 4 }) +// CHECK: call void @dx.op.outputComplete(i32 {{[0-9]+}}, %dx.types.NodeRecordHandle [[RHANDLE]]) +// CHECK: ret void +// CHECK: } + +// Metadata for node +// ------------------------------------------------------------------ +// CHECK: = !{void ()* @node030_outputcomplete_nodeoutput, !"node030_outputcomplete_nodeoutput", null, null, [[ATTRS:![0-9]+]]} + +// Metadata for node attributes +// Arg #1: ShaderKind Tag (8) +// Arg #2: Node (15) +// Arg #3: NodeLaunch Tag (13) +// Arg #4: Broadcasting (1) +// ... +// Arg #n: NodeOutput Tag (21) +// Arg #n+1: NodeOutput (metadata) +// ... +// ------------------------------------------------------------------ +// CHECK: [[ATTRS]] = !{i32 8, i32 15, i32 13, i32 1, +// CHECK-SAME: i32 21, [[NODEOUT:![0-9]+]] +// CHECK-SAME: } + +// NodeOutputs +// Arg #1: NodeIOKind Tag (1) +// Arg #2: RWOutputRecord (6) +// Arg #3: NodeRecordType Tag (3) +// Arg #4: OUTPUT_RECORD (output metadata) +// Arg #5: NodeOutputID (0) +// Arg #6: NodeID (metadata) +// ------------------------------------------------------------------ +// CHECK: [[NODEOUT]] = !{[[OUTPUT:![0-9]+]]} +// CHECK: [[OUTPUT]] = !{i32 1, i32 6, i32 2, [[OUTPUT_RECORD:![0-9]+]], i32 3, i32 0, i32 0, [[NODEID:![0-9]+]] + +// Metadata for output record struct +// Arg #1: Size Tag (0) +// Arg #4: 4 bytes +// ------------------------------------------------------------------ +// CHECK: [[OUTPUT_RECORD:![0-9]+]] = !{i32 0, i32 4} + +// NodeID +// ------------------------------------------------------------------ +// CHECK: [[NODEID]] = !{!"output", i32 0} diff --git a/tools/clang/test/HLSLFileCheck/hlsl/workgraph/case034_finishedcrossgroupsharing_coalescing.hlsl b/tools/clang/test/HLSLFileCheck/hlsl/workgraph/case034_finishedcrossgroupsharing_coalescing.hlsl new file mode 100644 index 0000000000..6e62304999 --- /dev/null +++ b/tools/clang/test/HLSLFileCheck/hlsl/workgraph/case034_finishedcrossgroupsharing_coalescing.hlsl @@ -0,0 +1,22 @@ +// RUN: %dxc -T lib_6_8 %s | FileCheck %s +// ================================================================== +// CASE034 (fail) +// FinishedCrossGroupSharing() is called in a coalescing launch node +// ================================================================== + +struct [NodeTrackRWInputSharing] INPUT_RECORD { + uint value; +}; + +[Shader("node")] +[NodeLaunch("Coalescing")] +[NumThreads(1024,1,1)] +[NodeIsProgramEntry] +void node034_finishedcrossgroupsharing_coalescing(RWGroupNodeInputRecords input) +{ + bool foo = input.FinishedCrossGroupSharing(); +} + +// CHECK: 17:20: error: no member named 'FinishedCrossGroupSharing' in 'RWGroupNodeInputRecords' +// CHECK-NEXT: bool foo = input.FinishedCrossGroupSharing(); +// CHECK-NEXT: ~~~~~ ^ diff --git a/tools/clang/test/HLSLFileCheck/hlsl/workgraph/case035_finishedcrossgroupsharing_thread.hlsl b/tools/clang/test/HLSLFileCheck/hlsl/workgraph/case035_finishedcrossgroupsharing_thread.hlsl new file mode 100644 index 0000000000..a342cced43 --- /dev/null +++ b/tools/clang/test/HLSLFileCheck/hlsl/workgraph/case035_finishedcrossgroupsharing_thread.hlsl @@ -0,0 +1,22 @@ +// RUN: %dxc -T lib_6_8 %s | FileCheck %s +// ================================================================== +// CASE035 (fail) +// FinishedCrossGroupSharing() is called in a thread launch node +// ================================================================== + +struct [NodeTrackRWInputSharing] INPUT_RECORD { + uint value; +}; + +[Shader("node")] +[NodeLaunch("Thread")] +[NodeIsProgramEntry] +void node035_finishedcrossgroupsharing_thread(RWThreadNodeInputRecord input) +{ + bool foo = input.FinishedCrossGroupSharing(); +} + +// CHECK: 16:20: error: no member named 'FinishedCrossGroupSharing' in 'RWThreadNodeInputRecord' +// CHECK-NEXT: bool foo = input.FinishedCrossGroupSharing(); +// CHECK-NEXT: ~~~~~ ^ + diff --git a/tools/clang/test/HLSLFileCheck/hlsl/workgraph/case037_finishedcrossgroupsharing.hlsl b/tools/clang/test/HLSLFileCheck/hlsl/workgraph/case037_finishedcrossgroupsharing.hlsl new file mode 100644 index 0000000000..1155f58e5b --- /dev/null +++ b/tools/clang/test/HLSLFileCheck/hlsl/workgraph/case037_finishedcrossgroupsharing.hlsl @@ -0,0 +1,94 @@ +// RUN: %dxc -T lib_6_8 %s | FileCheck %s +// ================================================================== +// CASE037 +// FinishedCrossGroupSharing() is called with RWDispatchNodeInputRecord +// ================================================================== + +// Template for input record +// ------------------------------------------------------------------ +// CHECK: %dx.types.NodeRecordHandle = type { i8* } + +RWBuffer buf0; + +struct [NodeTrackRWInputSharing] INPUT_RECORD +{ + uint value; +}; + +// Shader function +// Arg #1: Opcode = +// Arg #2: Metadata ID +// ------------------------------------------------------------------ +// CHECK: define void @node037_finishedcrossgroupsharing() +// CHECK-SAME: { +// CHECK: [[INPUTS:%[0-9]+]] = call %dx.types.NodeRecordHandle @dx.op.createNodeInputRecordHandle(i32 {{[0-9]+}}, i32 0) +// CHECK: [[ANN_INPUTS:%[0-9]+]] = call %dx.types.NodeRecordHandle @dx.op.annotateNodeRecordHandle(i32 {{[0-9]+}}, %dx.types.NodeRecordHandle [[INPUTS]], %dx.types.NodeRecordInfo { i32 357, i32 4 }) +// CHECK: {{%[0-9]+}} = call i1 @dx.op.finishedCrossGroupSharing(i32 {{[0-9]+}}, %dx.types.NodeRecordHandle [[ANN_INPUTS]]) +// CHECK: ret void +// CHECK: } + +[Shader("node")] +[NodeLaunch("Broadcasting")] +[NodeDispatchGrid(256,1,1)] +[NumThreads(1,1,1)] +void node037_finishedcrossgroupsharing(RWDispatchNodeInputRecord input) +{ + bool b = input.FinishedCrossGroupSharing(); + // the return value is used in a call to Barrier to avoid the call to + // FinishedCrossGroupSharing() being optimised away. + buf0[0] = 0 ? b : 1; +} + +// Metadata for node +// ------------------------------------------------------------------ +// CHECK: !dx.entryPoints = !{ +// CHECK-SAME: } +// CHECK: = !{void ()* @node037_finishedcrossgroupsharing, !"node037_finishedcrossgroupsharing", null, null, [[ATTRS:![0-9]+]]} + +// Metadata for node attributes +// Arg #1: ShaderKind Tag (8) +// Arg #2: Node (15) +// Arg #3: NodeLaunch Tag (13) +// Arg #4: Broadcasting (1) +// ... +// Arg #x: NodeDispatchGrid Tag (18) +// Arg #x+1: NodeDispatchGrid (xyz metadata) +// ... +// Arg #y: NodeInputs Tag (20) +// Arg #y+1: NodeInputs (metadata) +// ... +// Arg #z: NumThreads Tag (4) +// Arg #z+1: NumThreads (xyz metadata) +// ... +// ------------------------------------------------------------------ +// CHECK: [[ATTRS]] = !{i32 8, i32 15, i32 13, i32 1, i32 15, !{{[0-9]+}}, i32 16, i32 -1, i32 18, [[DISPATCHGRID:![0-9]+]], i32 20, [[NODE_IN:![0-9]+]], i32 4, [[NUMTHREADS:![0-9]+]] + +// DispatchGrid +// Arg #1: 256 +// Arg #2: 1 +// Arg #3: 1 +// ------------------------------------------------------------------ +// CHECK: [[DISPATCHGRID]] = !{i32 256, i32 1, i32 1} + +// NodeInputs +// Arg #1: NodeIOKind Tag (1) +// Arg #2: RWInputRecord(357) +// Arg #3: NodeRecordType Tag (2) +// Arg #4: INPUT_RECORD type +// ------------------------------------------------------------------ +// CHECK: [[NODE_IN]] = !{[[INPUT0:![0-9]+]]} +// CHECK: [[INPUT0]] = !{i32 1, i32 357, i32 2, [[INPUT_RECORD:![0-9]+]]} + +// Metadata for input record struct +// Arg #1: Size Tag (0) +// Arg #2: 4 bytes +// ------------------------------------------------------------------ +// CHECK: [[INPUT_RECORD]] = !{i32 0, i32 4} + +// NumThreads +// Arg #1: 1024 +// Arg #2: 1 +// Arg #3: 1 +// ------------------------------------------------------------------ +// CHECK-DAG: [[NUMTHREADS]] = !{i32 1, i32 1, i32 1} + diff --git a/tools/clang/test/HLSLFileCheck/hlsl/workgraph/case038_trackrwinputsharing_missing.hlsl b/tools/clang/test/HLSLFileCheck/hlsl/workgraph/case038_trackrwinputsharing_missing.hlsl new file mode 100644 index 0000000000..e72be44688 --- /dev/null +++ b/tools/clang/test/HLSLFileCheck/hlsl/workgraph/case038_trackrwinputsharing_missing.hlsl @@ -0,0 +1,24 @@ +// RUN: %dxc -T lib_6_8 %s | FileCheck %s +// ================================================================== +// CASE038 (fail) +// FinishedCrossGroupSharing() is called without NodeTrackRWInputSharing +// ================================================================== + +struct INPUT_RECORD +{ + uint value; +}; + +[Shader("node")] +[NodeLaunch("Broadcasting")] +[NodeDispatchGrid(256,1,1)] +[NumThreads(1,1,1)] +[NodeIsProgramEntry] +void node038_trackrwinputsharing_missing(RWDispatchNodeInputRecord input) +{ + bool bar = input.FinishedCrossGroupSharing(); +} + +// CHECK: :19:14: error: Use of FinishedCrossGroupSharing() requires NodeTrackRWInputSharing attribute to be specified on the record struct type +// CHECK-NEXT: bool bar = input.FinishedCrossGroupSharing(); +// CHECK-NEXT: ^ diff --git a/tools/clang/test/HLSLFileCheck/hlsl/workgraph/case051_compute_attrs.hlsl b/tools/clang/test/HLSLFileCheck/hlsl/workgraph/case051_compute_attrs.hlsl new file mode 100644 index 0000000000..162c880246 --- /dev/null +++ b/tools/clang/test/HLSLFileCheck/hlsl/workgraph/case051_compute_attrs.hlsl @@ -0,0 +1,43 @@ +// RUN: %dxc -T lib_6_8 -default-linkage external %s | FileCheck %s +// ================================================================== +// CASE051 +// Compute shaders ignore graphnode attributes +// ================================================================== + +// Shader functions +// ------------------------------------------------------------------ +// CHECK: define void @node051_compute_attrs() +// CHECK-SAME: { +// CHECK: ret void +// CHECK: } + +[Shader("compute")] +[Shader("node")] +[NodeLaunch("Broadcasting")] +[NodeDispatchGrid(2,3,2)] +[NumThreads(1,1,1)] +[NodeIsProgramEntry] +void node051_compute_attrs() +{ +} + +// Metadata for node +// ------------------------------------------------------------------ +// CHECK: !dx.entryPoints = !{ +// CHECK-SAME: } +// CHECK: = !{void ()* @node051_compute_attrs, !"node051_compute_attrs", null, null, [[ATTRS:![0-9]+]]} + +// Metadata for node attributes +// Arg #1: NumThreads Tag (4) +// Arg #2: NumThreads (metadata) +// ------------------------------------------------------------------ +// CHECK: [[ATTRS]] = !{ +// CHECK-SAME: i32 4, [[NUMTHREADS:![0-9]+]] +// CHECK-SAME: } + +// NumThreads +// Arg #1: 1 +// Arg #2: 1 +// Arg #3: 1 +// ------------------------------------------------------------------ +// CHECK: [[NUMTHREADS]] = !{i32 1, i32 1, i32 1} diff --git a/tools/clang/test/HLSLFileCheck/hlsl/workgraph/case052_nodelaunch_invalid.hlsl b/tools/clang/test/HLSLFileCheck/hlsl/workgraph/case052_nodelaunch_invalid.hlsl new file mode 100644 index 0000000000..7046d2f6e2 --- /dev/null +++ b/tools/clang/test/HLSLFileCheck/hlsl/workgraph/case052_nodelaunch_invalid.hlsl @@ -0,0 +1,20 @@ +// RUN: %dxc -T lib_6_8 %s | FileCheck %s +// ================================================================== +// CASE052 (fail) +// Invalid NodeLaunch value +// ================================================================== + +struct INPUT_NOGRID +{ + uint textureIndex; +}; + +[Shader("node")] +[NodeLaunch("Other")] +[NumThreads(1024,1,1)] +[NodeIsProgramEntry] +void node052_nodelaunch_invalid(DispatchNodeInputRecord input) +{ +} + +// CHECK: 13:13: error: attribute 'NodeLaunch' must have one of these values: broadcasting,coalescing,thread diff --git a/tools/clang/test/HLSLFileCheck/hlsl/workgraph/case055_nodearray_indices_not_contiguous.hlsl b/tools/clang/test/HLSLFileCheck/hlsl/workgraph/case055_nodearray_indices_not_contiguous.hlsl new file mode 100644 index 0000000000..95b6857f03 --- /dev/null +++ b/tools/clang/test/HLSLFileCheck/hlsl/workgraph/case055_nodearray_indices_not_contiguous.hlsl @@ -0,0 +1,133 @@ +// RUN: %dxc -T lib_6_8 -default-linkage external %s | FileCheck %s +// ================================================================== +// CASE055 +// Node array with non-contiguous indices +// ================================================================== + +// Shader functions +// ------------------------------------------------------------------ +// CHECK: define void @node055a_nodearray_indices_not_contiguous() +// CHECK-SAME: { +// CHECK: ret void +// CHECK: } + +[Shader("node")] +[NodeLaunch("Broadcasting")] +[NodeDispatchGrid(256,1,1)] +[NumThreads(1024,1,1)] +[NodeID("node_array")] +[NodeIsProgramEntry] +void node055a_nodearray_indices_not_contiguous() +{ +} + +// CHECK: define void @node055b_nodearray_indices_not_contiguous() +// CHECK-SAME: { +// CHECK: ret void +// CHECK: } + +[Shader("node")] +[NodeLaunch("Broadcasting")] +[NodeDispatchGrid(256,1,1)] +[NumThreads(1024,1,1)] +[NodeID("node_array", 5)] +[NodeIsProgramEntry] +void node055b_nodearray_indices_not_contiguous() +{ +} + +// Metadata for node 0 +// ------------------------------------------------------------------ +// CHECK: !dx.entryPoints = !{[[ENTRYX:![0-9]+]], [[ENTRY0:![0-9]+]], [[ENTRY1:![0-9]+]] +// CHECK-SAME: } +// CHECK: [[ENTRY0]] = !{void ()* @node055a_nodearray_indices_not_contiguous, !"node055a_nodearray_indices_not_contiguous", null, null, [[ATTRS0:![0-9]+]]} + +// Metadata for node attributes +// Arg #1: ShaderKind Tag (8) +// Arg #2: Node (15) +// Arg #3: NodeLaunch Tag (13) +// Arg #4: Broadcasting (1) +// Arg #5: NodeIsProgramEntry Tag (14) +// Arg #6: True (1) +// Arg #7: NodeId Tag (15) +// Arg #8: NodeId (NodeId metadata) +// Arg #9: NodeLocalRootArgumentsTableIndex Tag (16) +// Arg #10: Index (-1) +// Arg #11: NodeDispatchGrid Tag (18) +// Arg #12: NodeDispatchGrid (xyz metadata) +// Arg #13: NodeInputs Tag (20) +// Arg #14: NodeInputs (NodeInput metadata) +// Arg #15: NumThreads Tag (4) +// Arg #16: NumThreads (xyz metadata) +// ... +// ------------------------------------------------------------------ +// CHECK: [[ATTRS0]] = !{ +// CHECK-SAME: i32 8, i32 15, i32 13, i32 1, i32 14, i1 true, i32 15, [[NODEID0:![0-9]+]], i32 16, i32 -1, i32 18, [[DISPATCHGRID:![0-9]+]], i32 20, !12, i32 4, [[NUMTHREADS:![0-9]+]] +// CHECK-SAME: } + +// NodeID +// Arg #1: NodeID = "node_array" +// Arg #2: Default Index (0) +// ------------------------------------------------------------------ +// CHECK-DAG: [[NODEID0]] = !{!"node_array", i32 0} + +// DispatchGrid +// Arg #1: 256 +// Arg #2: 1 +// Arg #3: 1 +// ------------------------------------------------------------------ +// CHECK-DAG: [[DISPATCHGRID]] = !{i32 256, i32 1, i32 1} + +// NumThreads +// Arg #1: 1024 +// Arg #2: 1 +// Arg #3: 1 +// ------------------------------------------------------------------ +// CHECK-DAG: [[NUMTHREADS]] = !{i32 1024, i32 1, i32 1} + +// Metadata for node 1 +// ------------------------------------------------------------------ +// CHECK: [[ENTRY1]] = !{void ()* @node055b_nodearray_indices_not_contiguous, !"node055b_nodearray_indices_not_contiguous", null, null, [[ATTRS1:![0-9]+]]} + +// Metadata for node attributes +// Arg #1: ShaderKind Tag (8) +// Arg #2: Node (15) +// Arg #3: NodeLaunch Tag (13) +// Arg #4: Broadcasting (1) +// Arg #5: NodeIsProgramEntry Tag (14) +// Arg #6: True (1) +// Arg #7: NodeId Tag (15) +// Arg #8: NodeId (NodeId metadata) +// Arg #9: NodeLocalRootArgumentsTableIndex Tag (16) +// Arg #10: Index (-1) +// Arg #11: NodeDispatchGrid Tag (18) +// Arg #12: NodeDispatchGrid (xyz metadata) +// Arg #13: NodeInputs Tag (20) +// Arg #14: NodeInputs (NodeInput metadata) +// Arg #15: NumThreads Tag (4) +// Arg #16: NumThreads (xyz metadata) +// ... +// ------------------------------------------------------------------ +// CHECK: [[ATTRS1]] = !{ +// CHECK-SAME: i32 8, i32 15, i32 13, i32 1, i32 14, i1 true, i32 15, [[NODEID1:![0-9]+]], i32 16, i32 -1, i32 18, [[DISPATCHGRID:![0-9]+]], i32 20, !12, i32 4, [[NUMTHREADS]] +// CHECK-SAME: } + +// DispatchGrid +// Ath #1: 256 +// Arg #2: 1 +// Arg #3: 1 +// ------------------------------------------------------------------ +// XCHECK-DAG: [[DISPATCHGRID2]] = !{i32 256, i32 1, i32 1} + +// NumThreads +// Arg #1: 1024 +// Arg #2: 1 +// Arg #3: 1 +// ------------------------------------------------------------------ +// XCHECK-DAG: [[NUMTHREADS2]] = !{i32 1024, i32 1, i32 1} + +// NodeID +// Arg #1: NodeID = "node_array" +// Arg #2: Index = 5 +// ------------------------------------------------------------------ +// CHECK-DAG: [[NODEID1]] = !{!"node_array", i32 5} diff --git a/tools/clang/test/HLSLFileCheck/hlsl/workgraph/case058_coalescing_dispatchgrid.hlsl b/tools/clang/test/HLSLFileCheck/hlsl/workgraph/case058_coalescing_dispatchgrid.hlsl new file mode 100644 index 0000000000..f7bc2f1fa1 --- /dev/null +++ b/tools/clang/test/HLSLFileCheck/hlsl/workgraph/case058_coalescing_dispatchgrid.hlsl @@ -0,0 +1,18 @@ +// RUN: %dxc -T lib_6_8 %s | FileCheck %s +// ================================================================== +// CASE058 (fail) +// Coalescing launch node declared dispatch grid +// ================================================================== + +[Shader("node")] +[NodeLaunch("Coalescing")] +[NodeDispatchGrid(2,3,2)] +[NumThreads(1024,1,1)] +[NodeIsProgramEntry] +void node058_coalescing_dispatchgrid() +{ +} + +// CHECK: error: NodeDispatchGrid may only be used with Broadcasting nodes +// CHECK-NEXT: [NodeDispatchGrid(2,3,2)] +// CHECK-NEXT: ^ diff --git a/tools/clang/test/HLSLFileCheck/hlsl/workgraph/case059_thread_dispatchgrid.hlsl b/tools/clang/test/HLSLFileCheck/hlsl/workgraph/case059_thread_dispatchgrid.hlsl new file mode 100644 index 0000000000..deb14fe2b4 --- /dev/null +++ b/tools/clang/test/HLSLFileCheck/hlsl/workgraph/case059_thread_dispatchgrid.hlsl @@ -0,0 +1,17 @@ +// RUN: %dxc -T lib_6_8 %s | FileCheck %s +// ================================================================== +// CASE059 (fail) +// Thread launch node declared dispatch grid +// ================================================================== + +[Shader("node")] +[NodeLaunch("Thread")] +[NodeDispatchGrid(2,3,2)] +[NodeIsProgramEntry] +void node059_thread_dispatchgrid() +{ +} + +// CHECK: error: NodeDispatchGrid may only be used with Broadcasting nodes +// CHECK-NEXT: [NodeDispatchGrid(2,3,2)] +// CHECK-NEXT: ^ diff --git a/tools/clang/test/HLSLFileCheck/hlsl/workgraph/case061_coalescing_maxdispatchgrid.hlsl b/tools/clang/test/HLSLFileCheck/hlsl/workgraph/case061_coalescing_maxdispatchgrid.hlsl new file mode 100644 index 0000000000..2fcc36527c --- /dev/null +++ b/tools/clang/test/HLSLFileCheck/hlsl/workgraph/case061_coalescing_maxdispatchgrid.hlsl @@ -0,0 +1,24 @@ +// RUN: %dxc -T lib_6_8 %s | FileCheck %s +// ================================================================== +// CASE061 (fail) +// Coalescing launch node declared max dispatch grid +// ================================================================== + +struct INPUT_RECORD +{ + uint3 DispatchGrid : SV_DipatchGrid; + uint foo; +}; + +[Shader("node")] +[NodeLaunch("Coalescing")] +[NodeMaxDispatchGrid(2,3,2)] +[NumThreads(1024,1,1)] +[NodeIsProgramEntry] +void node061_coalescing_maxdispatchgrid(DispatchNodeInputRecord input) +{ +} + +// CHECK: 15:2: error: NodeMaxDispatchGrid may only be used with Broadcasting nodes +// CHECK-NEXT: [NodeMaxDispatchGrid(2,3,2)] +// CHECK-NEXT: ^ \ No newline at end of file diff --git a/tools/clang/test/HLSLFileCheck/hlsl/workgraph/case062_thread_maxdispatchgrid.hlsl b/tools/clang/test/HLSLFileCheck/hlsl/workgraph/case062_thread_maxdispatchgrid.hlsl new file mode 100644 index 0000000000..b8e9a231ef --- /dev/null +++ b/tools/clang/test/HLSLFileCheck/hlsl/workgraph/case062_thread_maxdispatchgrid.hlsl @@ -0,0 +1,24 @@ +// RUN: %dxc -T lib_6_8 %s | FileCheck %s +// ================================================================== +// CASE062 (fail) +// Thread launch node declared max dispatch grid +// ================================================================== + +struct INPUT_RECORD +{ + uint3 DispatchGrid : SV_DipatchGrid; + uint foo; +}; + +[Shader("node")] +[NodeLaunch("Thread")] +[NodeMaxDispatchGrid(2,3,2)] +[NodeIsProgramEntry] +void node062_thread_maxdispatchgrid(DispatchNodeInputRecord input) +{ +} + +// CHECK: :15:2: error: NodeMaxDispatchGrid may only be used with Broadcasting nodes +// CHECK-NEXT: [NodeMaxDispatchGrid(2,3,2)] +// CHECK-NEXT: ^ + diff --git a/tools/clang/test/HLSLFileCheck/hlsl/workgraph/case067_maxrecursiondepth_toolarge.hlsl b/tools/clang/test/HLSLFileCheck/hlsl/workgraph/case067_maxrecursiondepth_toolarge.hlsl new file mode 100644 index 0000000000..3febec17fe --- /dev/null +++ b/tools/clang/test/HLSLFileCheck/hlsl/workgraph/case067_maxrecursiondepth_toolarge.hlsl @@ -0,0 +1,37 @@ +// RUN: %dxc -T lib_6_8 %s | FileCheck %s +// ================================================================== +// CASE067 (fail) +// recursive node with too many recursions max +// ================================================================== + +struct RECURSIVE_RECORD +{ + uint value; +}; + +[Shader("node")] +[NodeLaunch("Broadcasting")] +[NodeDispatchGrid(256,1,1)] +[NumThreads(1024,1,1)] +[NodeMaxRecursionDepth(35)] +[NodeID("recursive_node")] +[NodeIsProgramEntry] +void node067a_maxrecursiondepth_toolarge(DispatchNodeInputRecord input, + [MaxOutputRecords(2)][NodeID("recursive_node")] NodeOutput recursion, + [MaxOutputRecordsSharedWith(recursion)][NodeID("target_node")] NodeOutput output) +{ +} + +[Shader("node")] +[NodeLaunch("Broadcasting")] +[NodeDispatchGrid(256,1,1)] +[NumThreads(1024,1,1)] +[NodeID("target_node")] +void node067b_maxrecursiondepth_toolarge(DispatchNodeInputRecord input) +{ +} + +// CHECK: :16:2: error: NodeMaxRecursionDepth may not exceed 32 +// CHECK-NEXT: [NodeMaxRecursionDepth(35)] +// CHECK-NEXT: ^ + diff --git a/tools/clang/test/HLSLFileCheck/hlsl/workgraph/case070_noinput.hlsl b/tools/clang/test/HLSLFileCheck/hlsl/workgraph/case070_noinput.hlsl new file mode 100644 index 0000000000..ecc85881ae --- /dev/null +++ b/tools/clang/test/HLSLFileCheck/hlsl/workgraph/case070_noinput.hlsl @@ -0,0 +1,69 @@ +// RUN: %dxc -T lib_6_8 %s | FileCheck %s +// ================================================================== +// If no input is specified then the NodeInputs metadata should not +// be present. +// ================================================================== + +[Shader("node")] +[NodeLaunch("Broadcasting")] +[NumThreads(1,1,1)] +void node070_broadcasting() { } + +[Shader("node")] +[NodeLaunch("Coalescing")] +[NumThreads(1,1,1)] +void node070_coalescing() { } + +[Shader("node")] +[NodeLaunch("Thread")] +[NumThreads(1,1,1)] +void node070_thread() { } + + +// Metadata for node070_broadcasting +// ------------------------------------------------------------------ +// CHECK: = !{void ()* @node070_broadcasting, !"node070_broadcasting", null, null, [[ATTRS_BROADCASTING:![0-9]+]]} + +// Metadata for node attributes +// Arg #1: ShaderKind Tag (8) +// Arg #2: Node (15) +// Arg #3: NodeLaunch Tag (13) +// Arg #4: Broadcasting (1) +// ... +// ------------------------------------------------------------------ +// CHECK: [[ATTRS_BROADCASTING]] = !{ +// CHECK-SAME: i32 8, i32 15, i32 13, i32 1, +// CHECK-NOT: i32 20, i32 {{![-=9]+}} +// CHECK-SAME: } + +// Metadata for node070_coalescing +// ------------------------------------------------------------------ +// CHECK: = !{void ()* @node070_coalescing, !"node070_coalescing", null, null, [[ATTRS_COALESCING:![0-9]+]]} + +// Metadata for node attributes +// Arg #1: ShaderKind Tag (8) +// Arg #2: Node (15) +// Arg #3: NodeLaunch Tag (13) +// Arg #4: Coalescing (2) +// ... +// ------------------------------------------------------------------ +// CHECK: [[ATTRS_COALESCING]] = !{ +// CHECK-SAME: i32 8, i32 15, i32 13, i32 2, +// CHECK-NOT: i32 20, i32 {{![-=9]+}} +// CHECK-SAME: } + +// Metadata for node070_thread +// ------------------------------------------------------------------ +// CHECK: = !{void ()* @node070_thread, !"node070_thread", null, null, [[ATTRS_THREAD:![0-9]+]]} + +// Metadata for node attributes +// Arg #1: ShaderKind Tag (8) +// Arg #2: Node (15) +// Arg #3: NodeLaunch Tag (13) +// Arg #4: Thread (3) +// ... +// ------------------------------------------------------------------ +// CHECK: [[ATTRS_THREAD]] = !{ +// CHECK-SAME: i32 8, i32 15, i32 13, i32 3, +// CHECK-NOT: i32 20, i32 {{![-=9]+}} +// CHECK-SAME: } diff --git a/tools/clang/test/HLSLFileCheck/hlsl/workgraph/case085_thread_emptynodeinput.hlsl b/tools/clang/test/HLSLFileCheck/hlsl/workgraph/case085_thread_emptynodeinput.hlsl new file mode 100644 index 0000000000..f00373929b --- /dev/null +++ b/tools/clang/test/HLSLFileCheck/hlsl/workgraph/case085_thread_emptynodeinput.hlsl @@ -0,0 +1,54 @@ +// RUN: %dxc -T lib_6_8 %s | FileCheck %s +// ================================================================== +// CASE085 +// Thread launch node declares EmptyNodeInput<1> +// ================================================================== + +RWBuffer buf0; + +[Shader("node")] +[NodeLaunch("Coalescing")] +[NodeIsProgramEntry] +[NumThreads(2,1,1)] +void node085_thread_emptynodeinput(EmptyNodeInput input) +{ + // input.Count should always return 1 here, so there is + // an opportunity for an optimization. + buf0[0] = input.Count(); +} + +// CHECK: define void @node085_thread_emptynodeinput() { +// CHECK: [[LOAD:%[0-9]+]] = load %dx.types.Handle, %dx.types.Handle* @"\01?buf0@@3V?$RWBuffer@I@@A", align 4 +// CHECK: [[HANDLE:%[0-9]+]] = call %dx.types.NodeRecordHandle @dx.op.createNodeInputRecordHandle(i32 {{[0-9]+}}, i32 0) ; CreateNodeInputRecordHandle(MetadataIdx) +// CHECK: [[ANN_HANDLE:%[0-9]+]] = call %dx.types.NodeRecordHandle @dx.op.annotateNodeRecordHandle(i32 {{[0-9]+}}, %dx.types.NodeRecordHandle [[HANDLE]], %dx.types.NodeRecordInfo { i32 9, i32 0 }) +// CHECK: [[COUNT:%[0-9]+]] = call i32 @dx.op.getInputRecordCount(i32 {{[0-9]+}}, %dx.types.NodeRecordHandle [[ANN_HANDLE]]) ; GetInputRecordCount(input) +// CHECK: [[HANDLE_FOR_LIB:%[0-9]+]] = call %dx.types.Handle @dx.op.createHandleForLib.dx.types.Handle(i32 160, %dx.types.Handle [[LOAD]]) ; CreateHandleForLib(Resource) +// CHECK: [[ANN_HANDLE2:%[0-9]+]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[HANDLE_FOR_LIB]], %dx.types.ResourceProperties { i32 4106, i32 261 }) ; AnnotateHandle(res,props) resource: RWTypedBuffer +// CHECK: call void @dx.op.bufferStore.i32(i32 69, %dx.types.Handle [[ANN_HANDLE2]], i32 0, i32 undef, i32 [[COUNT]], i32 [[COUNT]], i32 [[COUNT]], i32 [[COUNT]], i8 15) ; BufferStore(uav,coord0,coord1,value0,value1,value2,value3,mask) + +// Metadata for node +// ------------------------------------------------------------------ +// CHECK: = !{void ()* @node085_thread_emptynodeinput, !"node085_thread_emptynodeinput", null, null, [[ATTRS:![0-9]+]]} + +// Metadata for node attributes +// Arg #1: ShaderKind Tag (8) +// Arg #2: Node (15) +// Arg #3: NodeLaunch Tag (13) +// Arg #2: Coalescing (2) +// ... +// Arg #n: NodeInputs Tag (20) +// Arg #n+1: NodeInputs (metadata) +// ------------------------------------------------------------------ +// CHECK: [[ATTRS]] = !{ +// CHECK-SAME: i32 8, i32 15, i32 13, i32 2, +// CHECK-SAME: i32 20, [[NODE_IN:![0-9]+]] +// CHECK-SAME: } + +// NodeInputs +// Arg #1: NodeIOKind Tag (1) +// Arg #2: EmptyNodeInput (9) +// Arg #3: NodeInputMaxRecordArraySize Tag (2) +// Arg #4: MaxRecordArraySize = 1 +// ------------------------------------------------------------------ +// CHECK: [[NODE_IN]] = !{[[INPUT0:![0-9]+]]} +// CHECK: [[INPUT0]] = !{i32 1, i32 9} diff --git a/tools/clang/test/HLSLFileCheck/hlsl/workgraph/case092_maxoutputrecords_maxoutputrecordssharedwith.hlsl b/tools/clang/test/HLSLFileCheck/hlsl/workgraph/case092_maxoutputrecords_maxoutputrecordssharedwith.hlsl new file mode 100644 index 0000000000..3a1f4487ab --- /dev/null +++ b/tools/clang/test/HLSLFileCheck/hlsl/workgraph/case092_maxoutputrecords_maxoutputrecordssharedwith.hlsl @@ -0,0 +1,28 @@ +// RUN: %dxc -T lib_6_8 %s | FileCheck %s +// ================================================================== +// CASE092 (fail) +// MaxRecords and MaxRecordsSharedWith are both declared +// ================================================================== + +struct INPUT_RECORD +{ + uint value; +}; + +struct OUTPUT_RECORD +{ + uint num; +}; + +[Shader("node")] +[NodeLaunch("Broadcasting")] +[NodeDispatchGrid(256,1,1)] +[NumThreads(1,1,1)] +[NodeIsProgramEntry] +void node092_maxoutputrecords_maxoutputrecordssharedwith(DispatchNodeInputRecord input, + [MaxRecords(5)] NodeOutput firstOut, + [MaxRecords(5)][MaxRecordsSharedWith(firstOut)] NodeOutput secondOut) +{ +} + +// CHECK: 24:132: error: Only one of MaxRecords or MaxRecordsSharedWith may be specified to the same parameter diff --git a/tools/clang/test/HLSLFileCheck/hlsl/workgraph/case099_nodelocalrootargumenttableindex.hlsl b/tools/clang/test/HLSLFileCheck/hlsl/workgraph/case099_nodelocalrootargumenttableindex.hlsl new file mode 100644 index 0000000000..fcb8cdd5b7 --- /dev/null +++ b/tools/clang/test/HLSLFileCheck/hlsl/workgraph/case099_nodelocalrootargumenttableindex.hlsl @@ -0,0 +1,40 @@ +// RUN: %dxc -T lib_6_8 -default-linkage external %s | FileCheck %s +// ================================================================== +// CASE099 +// NodeLocalRootArgumentsTableIndex is declared +// ================================================================== + +// Shader function +// Arg #1: Opcode = +// Arg #2: Metadata ID = 0 +// ------------------------------------------------------------------ +// CHECK: define void @node099_localrootargumentstableindex() +// CHECK-SAME: { +// CHECK: ret void +// CHECK: } + +[Shader("node")] +[NodeLaunch("Thread")] +[NodeLocalRootArgumentsTableIndex(5)] +[NodeIsProgramEntry] +void node099_localrootargumentstableindex() +{ +} + +// Metadata for node +// ------------------------------------------------------------------ +// CHECK: !dx.entryPoints = !{ +// CHECK-SAME: } +// CHECK: = !{void ()* @node099_localrootargumentstableindex, !"node099_localrootargumentstableindex", null, null, [[ATTRS:![0-9]+]]} + +// Metadata for node attributes +// Arg #1: NodeLaunch Tag (13) +// Arg #2: Thread (2) +// Arg #3: NodeLocalRootArgumentsTableIndex Tag (16) +// Arg #4: Index 5 +// Arg #5: NodeIsProgramEntry Tag (14) +// Arg #6: True (1) +// ------------------------------------------------------------------ +// CHECK: [[ATTRS]] = !{ +// CHECK-SAME: i32 13, i32 3, i32 14, i1 true, i32 15, !10, i32 16, i32 5 +// CHECK-SAME: } diff --git a/tools/clang/test/HLSLFileCheck/hlsl/workgraph/case114_multiple_svdispatchgrid.hlsl b/tools/clang/test/HLSLFileCheck/hlsl/workgraph/case114_multiple_svdispatchgrid.hlsl new file mode 100644 index 0000000000..10da729386 --- /dev/null +++ b/tools/clang/test/HLSLFileCheck/hlsl/workgraph/case114_multiple_svdispatchgrid.hlsl @@ -0,0 +1,28 @@ +// RUN: %dxc -T lib_6_8 %s | FileCheck %s +// ================================================================== +// CASE114 (fail) +// Input record with multiple fields with SV_DispatchGrid annotation +// ================================================================== + +struct INPUT_RECORD +{ + uint DispatchGrid1 : SV_DispatchGrid; + uint2 a; + uint3 DispatchGrid2 : SV_DispatchGrid; +}; + +[Shader("node")] +[NodeLaunch("Broadcasting")] +[NodeMaxDispatchGrid(256,1,1)] +[NumThreads(1024,1,1)] +void node114_multiple_svdispatchgrid(DispatchNodeInputRecord input) +{ +} + +// CHECK: :11:25: error: a field with SV_DispatchGrid has already been specified +// CHECK-NEXT: uint3 DispatchGrid2 : SV_DispatchGrid; +// CHECK-NEXT: ^ +// CHECK-NEXT: :9:24: note: previously defined here +// CHECK-NEXT: uint DispatchGrid1 : SV_DispatchGrid; +// CHECK-NEXT: ^ + diff --git a/tools/clang/test/HLSLFileCheck/hlsl/workgraph/case116_barrier_compute.hlsl b/tools/clang/test/HLSLFileCheck/hlsl/workgraph/case116_barrier_compute.hlsl new file mode 100644 index 0000000000..fa8541e890 --- /dev/null +++ b/tools/clang/test/HLSLFileCheck/hlsl/workgraph/case116_barrier_compute.hlsl @@ -0,0 +1,21 @@ +// RUN: %dxc -T lib_6_8 -default-linkage external %s | FileCheck %s +// ================================================================== +// CASE116 +// Barrier is called from a compute shader +// ================================================================== + +// Shader function +// ------------------------------------------------------------------ +// CHECK: define void @node116_barrier_compute() +// CHECK-SAME: { +// CHECK: call void @dx.op.barrierByMemoryType(i32 +// CHECK-SAME: , i32 1, i32 2, i32 1) +// CHECK: ret void +// CHECK: } + +[Shader("compute")] +[NumThreads(5,1,1)] +void node116_barrier_compute() +{ + Barrier(1, 2, 1); +} diff --git a/tools/clang/test/HLSLFileCheck/hlsl/workgraph/case117_barrier_memoryarg.hlsl b/tools/clang/test/HLSLFileCheck/hlsl/workgraph/case117_barrier_memoryarg.hlsl new file mode 100644 index 0000000000..f5004cbe4e --- /dev/null +++ b/tools/clang/test/HLSLFileCheck/hlsl/workgraph/case117_barrier_memoryarg.hlsl @@ -0,0 +1,83 @@ +// RUN: %dxc -T lib_6_8 -enable-16bit-types %s | FileCheck %s +// ================================================================== +// CASE117 +// Barrier is called using a memory type argument +// ================================================================== + +static const int a = 7; +static const int b = 2; +static const int16_t c = 1; + +[Shader("node")] +[NodeLaunch("Thread")] +void node117_barrier_memoryarg() +{ + // literal integer flag values + Barrier(1, 2, 1); + + // static const integer flag values + Barrier(a, b, c); + + // AllMemoryBarrier() -> + Barrier(UAV_MEMORY|GROUP_SHARED_MEMORY|NODE_INPUT_MEMORY|NODE_OUTPUT_MEMORY, + DEVICE_VISIBLE, + 0); + + // AllMemoryBarrierWithGroupSync() -> + Barrier(UAV_MEMORY|GROUP_SHARED_MEMORY|NODE_INPUT_MEMORY|NODE_OUTPUT_MEMORY, + DEVICE_VISIBLE, + GROUP_SYNC); + + // DeviceMemoryBarrier() -> + Barrier(UAV_MEMORY, + DEVICE_VISIBLE, + 0); + + // DeviceMemoryBarrierWithGroupSync() -> + Barrier(UAV_MEMORY, + DEVICE_VISIBLE, + GROUP_SYNC); + + // GroupMemoryBarrier() -> + Barrier(GROUP_SHARED_MEMORY, + GROUP_VISIBLE, + 0); + + // GroupMemoryBarrierWithGroupSync() -> + Barrier(GROUP_SHARED_MEMORY, + GROUP_VISIBLE, + GROUP_SYNC); +} + +// Shader function +// ------------------------------------------------------------------ +// CHECK: define void @node117_barrier_memoryarg() +// CHECK-SAME: { +// CHECK: call void @dx.op.barrierByMemoryType(i32 {{[0-9]+}}, i32 1, i32 2, i32 1) ; BarrierByMemoryType(MemoryTypeFlags,AccessFlags,SyncFlags) +// CHECK: call void @dx.op.barrierByMemoryType(i32 {{[0-9]+}}, i32 7, i32 2, i32 1) ; BarrierByMemoryType(MemoryTypeFlags,AccessFlags,SyncFlags) +// CHECK: call void @dx.op.barrierByMemoryType(i32 {{[0-9]+}}, i32 15, i32 1, i32 0) ; BarrierByMemoryType(MemoryTypeFlags,AccessFlags,SyncFlags) +// CHECK: call void @dx.op.barrierByMemoryType(i32 {{[0-9]+}}, i32 15, i32 1, i32 1) ; BarrierByMemoryType(MemoryTypeFlags,AccessFlags,SyncFlags) +// CHECK: call void @dx.op.barrierByMemoryType(i32 {{[0-9]+}}, i32 1, i32 1, i32 0) ; BarrierByMemoryType(MemoryTypeFlags,AccessFlags,SyncFlags) +// CHECK: call void @dx.op.barrierByMemoryType(i32 {{[0-9]+}}, i32 1, i32 1, i32 1) ; BarrierByMemoryType(MemoryTypeFlags,AccessFlags,SyncFlags) +// CHECK: call void @dx.op.barrierByMemoryType(i32 {{[0-9]+}}, i32 2, i32 2, i32 0) ; BarrierByMemoryType(MemoryTypeFlags,AccessFlags,SyncFlags) +// CHECK: call void @dx.op.barrierByMemoryType(i32 {{[0-9]+}}, i32 2, i32 2, i32 1) ; BarrierByMemoryType(MemoryTypeFlags,AccessFlags,SyncFlags) + +// CHECK: ret void +// CHECK: } + +// Metadata for node +// ------------------------------------------------------------------ +// CHECK: !dx.entryPoints = !{ +// CHECK-SAME: } +// CHECK: = !{void ()* @node117_barrier_memoryarg, !"node117_barrier_memoryarg", null, null, [[ATTRS:![0-9]+]]} + +// Metadata for node attributes +// Arg #1: ShaderKind Tag (8) +// Arg #2: Node (15) +// Arg #3: NodeLaunch Tag (13) +// Arg #4: Thread (3) +// ... +// ------------------------------------------------------------------ +// CHECK: [[ATTRS]] = !{ +// CHECK-SAME: i32 8, i32 15, i32 13, i32 3 +// CHECK-SAME: } diff --git a/tools/clang/test/HLSLFileCheck/hlsl/workgraph/case118_barrier_objectarg.hlsl b/tools/clang/test/HLSLFileCheck/hlsl/workgraph/case118_barrier_objectarg.hlsl new file mode 100644 index 0000000000..9f25c9c636 --- /dev/null +++ b/tools/clang/test/HLSLFileCheck/hlsl/workgraph/case118_barrier_objectarg.hlsl @@ -0,0 +1,243 @@ +// RUN: %dxc -T lib_6_8 %s | FileCheck %s +// ================================================================== +// CASE118 +// Barrier is called with each node record and UAV type +// ================================================================== + +struct RECORD +{ + uint value; +}; + +[Shader("node")] +[NodeLaunch("Broadcasting")] +[NumThreads(256,1,1)] +void node01(DispatchNodeInputRecord input) +{ + Barrier(input, 1, 1); +} +// CHECK: define void @node01() { +// CHECK: [[NODE01_A:%[0-9]+]] = call %dx.types.NodeRecordHandle @dx.op.createNodeInputRecordHandle(i32 {{[0-9]+}}, i32 0) ; CreateNodeInputRecordHandle(MetadataIdx) +// CHECK: [[ANN_NODE01_A:%[0-9]+]] = call %dx.types.NodeRecordHandle @dx.op.annotateNodeRecordHandle(i32 {{[0-9]+}}, %dx.types.NodeRecordHandle [[NODE01_A]], %dx.types.NodeRecordInfo { i32 97, i32 4 }) +// CHECK: call void @dx.op.barrierByNodeRecordHandle(i32 {{[0-9]+}}, %dx.types.NodeRecordHandle [[ANN_NODE01_A]], i32 1, i32 1) ; BarrierByNodeRecordHandle(object,AccessFlags,SyncFlags) + +[Shader("node")] +[NodeLaunch("Coalescing")] +[NumThreads(256,1,1)] +void node02([MaxRecords(8)] GroupNodeInputRecords input) +{ + Barrier(input, 1, 1); +} +// CHECK: define void @node02() { +// CHECK: [[NODE02_A:%[0-9]+]] = call %dx.types.NodeRecordHandle @dx.op.createNodeInputRecordHandle(i32 {{[0-9]+}}, i32 0) ; CreateNodeInputRecordHandle(MetadataIdx) +// CHECK: [[ANN_NODE02_A:%[0-9]+]] = call %dx.types.NodeRecordHandle @dx.op.annotateNodeRecordHandle(i32 {{[0-9]+}}, %dx.types.NodeRecordHandle [[NODE02_A]], %dx.types.NodeRecordInfo { i32 65, i32 4 }) +// CHECK: call void @dx.op.barrierByNodeRecordHandle(i32 {{[0-9]+}}, %dx.types.NodeRecordHandle [[ANN_NODE02_A]], i32 1, i32 1) ; BarrierByNodeRecordHandle(object,AccessFlags,SyncFlags) + +[Shader("node")] +[NodeLaunch("Thread")] +void node03(RWThreadNodeInputRecord input) +{ + Barrier(input, 1, 1); +} +// CHECK: define void @node03() { +// CHECK: [[NODE03_A:%[0-9]+]] = call %dx.types.NodeRecordHandle @dx.op.createNodeInputRecordHandle(i32 {{[0-9]+}}, i32 0) ; CreateNodeInputRecordHandle(MetadataIdx) +// CHECK: [[ANN_NODE03_A:%[0-9]+]] = call %dx.types.NodeRecordHandle @dx.op.annotateNodeRecordHandle(i32 {{[0-9]+}}, %dx.types.NodeRecordHandle [[NODE03_A]], %dx.types.NodeRecordInfo { i32 37, i32 4 }) +// CHECK: call void @dx.op.barrierByNodeRecordHandle(i32 {{[0-9]+}}, %dx.types.NodeRecordHandle [[ANN_NODE03_A]], i32 1, i32 1) ; BarrierByNodeRecordHandle(object,AccessFlags,SyncFlags) + +[Shader("node")] +[NodeLaunch("Coalescing")] +[NumThreads(256,1,1)] +void node04([MaxRecords(6)] RWGroupNodeInputRecords input) +{ + Barrier(input, 1, 1); +} +// CHECK: define void @node04() { +// CHECK: [[NODE04_A:%[0-9]+]] = call %dx.types.NodeRecordHandle @dx.op.createNodeInputRecordHandle(i32 {{[0-9]+}}, i32 0) ; CreateNodeInputRecordHandle(MetadataIdx) +// CHECK: [[ANN_NODE04_A:%[0-9]+]] = call %dx.types.NodeRecordHandle @dx.op.annotateNodeRecordHandle(i32 {{[0-9]+}}, %dx.types.NodeRecordHandle [[NODE04_A]], %dx.types.NodeRecordInfo { i32 69, i32 4 }) +// CHECK: call void @dx.op.barrierByNodeRecordHandle(i32 {{[0-9]+}}, %dx.types.NodeRecordHandle [[ANN_NODE04_A]], i32 1, i32 1) ; BarrierByNodeRecordHandle(object,AccessFlags,SyncFlags) + +[Shader("node")] +[NodeLaunch("Broadcasting")] +[NumThreads(256,1,1)] +void node05([MaxOutputRecords(5)] NodeOutput outputs) +{ + ThreadNodeOutputRecords outrec = outputs.GetThreadNodeOutputRecords(1); + Barrier(outrec, 1, 1); +} +// CHECK: define void @node05() { +// CHECK: [[NODE05_A:%[0-9]+]] = call %dx.types.NodeHandle @dx.op.createNodeOutputHandle(i32 {{[0-9]+}}, i32 0) ; CreateNodeOutputHandle(MetadataIdx) +// CHECK: [[ANN_NODE05_A:%[0-9]+]] = call %dx.types.NodeHandle @dx.op.annotateNodeHandle(i32 {{[0-9]+}}, %dx.types.NodeHandle [[NODE05_A]], %dx.types.NodeInfo { i32 6, i32 4 }) +// CHECK: [[NODE05_B:%[0-9]+]] = call %dx.types.NodeRecordHandle @dx.op.allocateNodeOutputRecords(i32 {{[0-9]+}}, %dx.types.NodeHandle [[ANN_NODE05_A]], i32 1, i1 true) ; AllocateNodeOutputRecords(output,numRecords,perThread) +// CHECK: [[ANN_NODE05_B:%[0-9]+]] = call %dx.types.NodeRecordHandle @dx.op.annotateNodeRecordHandle(i32 {{[0-9]+}}, %dx.types.NodeRecordHandle [[NODE05_B]], %dx.types.NodeRecordInfo { i32 38, i32 4 }) +// CHECK: call void @dx.op.barrierByNodeRecordHandle(i32 {{[0-9]+}}, %dx.types.NodeRecordHandle [[ANN_NODE05_B]], i32 1, i32 1) ; BarrierByNodeRecordHandle(object,AccessFlags,SyncFlags) + +[Shader("node")] +[NodeLaunch("Thread")] +void node06([MaxOutputRecords(5)] NodeOutput outputs) +{ + ThreadNodeOutputRecords outrec = outputs.GetThreadNodeOutputRecords(3); + Barrier(outrec, 1, 1); +} +// CHECK: define void @node06() { +// CHECK: [[NODE06_A:%[0-9]+]] = call %dx.types.NodeHandle @dx.op.createNodeOutputHandle(i32 {{[0-9]+}}, i32 0) ; CreateNodeOutputHandle(MetadataIdx) +// CHECK: [[ANN_NODE06_A:%[0-9]+]] = call %dx.types.NodeHandle @dx.op.annotateNodeHandle(i32 {{[0-9]+}}, %dx.types.NodeHandle [[NODE06_A]], %dx.types.NodeInfo { i32 6, i32 4 }) +// CHECK: [[NODE06_B:%[0-9]+]] = call %dx.types.NodeRecordHandle @dx.op.allocateNodeOutputRecords(i32 {{[0-9]+}}, %dx.types.NodeHandle [[ANN_NODE06_A]], i32 3, i1 true) ; AllocateNodeOutputRecords(output,numRecords,perThread) +// CHECK: [[ANN_NODE06_B:%[0-9]+]] = call %dx.types.NodeRecordHandle @dx.op.annotateNodeRecordHandle(i32 {{[0-9]+}}, %dx.types.NodeRecordHandle [[NODE06_B]], %dx.types.NodeRecordInfo { i32 38, i32 4 }) +// CHECK: call void @dx.op.barrierByNodeRecordHandle(i32 {{[0-9]+}}, %dx.types.NodeRecordHandle [[ANN_NODE06_B]], i32 1, i32 1) ; BarrierByNodeRecordHandle(object,AccessFlags,SyncFlags) + +[Shader("node")] +[NodeLaunch("Coalescing")] +[NumThreads(256,1,3)] +void node07([MaxOutputRecords(5)] NodeOutput outputs) +{ + GroupNodeOutputRecords outrec = outputs.GetGroupNodeOutputRecords(1); + Barrier(outrec, 1, 1); +} +// CHECK: define void @node07() { +// CHECK: [[NODE07_A:%[0-9]+]] = call %dx.types.NodeHandle @dx.op.createNodeOutputHandle(i32 {{[0-9]+}}, i32 0) ; CreateNodeOutputHandle(MetadataIdx) +// CHECK: [[ANN_NODE07_A:%[0-9]+]] = call %dx.types.NodeHandle @dx.op.annotateNodeHandle(i32 {{[0-9]+}}, %dx.types.NodeHandle [[NODE07_A]], %dx.types.NodeInfo { i32 6, i32 4 }) +// CHECK: [[NODE07_B:%[0-9]+]] = call %dx.types.NodeRecordHandle @dx.op.allocateNodeOutputRecords(i32 {{[0-9]+}}, %dx.types.NodeHandle [[ANN_NODE07_A]], i32 1, i1 false) ; AllocateNodeOutputRecords(output,numRecords,perThread) +// CHECK: [[ANN_NODE07_B:%[0-9]+]] = call %dx.types.NodeRecordHandle @dx.op.annotateNodeRecordHandle(i32 {{[0-9]+}}, %dx.types.NodeRecordHandle [[NODE07_B]], %dx.types.NodeRecordInfo { i32 70, i32 4 }) +// CHECK: call void @dx.op.barrierByNodeRecordHandle(i32 {{[0-9]+}}, %dx.types.NodeRecordHandle [[ANN_NODE07_B]], i32 1, i32 1) ; BarrierByNodeRecordHandle(object,AccessFlags,SyncFlags) + +[Shader("node")] +[NodeLaunch("Broadcasting")] +[NumThreads(256,1,4)] +void node08([MaxOutputRecords(5)] NodeOutput outputs) +{ + GroupNodeOutputRecords outrec = outputs.GetGroupNodeOutputRecords(4); + Barrier(outrec, 1, 1); +} +// CHECK: define void @node08() { +// CHECK: [[NODE08_A:%[0-9]+]] = call %dx.types.NodeHandle @dx.op.createNodeOutputHandle(i32 {{[0-9]+}}, i32 0) ; CreateNodeOutputHandle(MetadataIdx) +// CHECK: [[ANN_NODE08_A:%[0-9]+]] = call %dx.types.NodeHandle @dx.op.annotateNodeHandle(i32 {{[0-9]+}}, %dx.types.NodeHandle [[NODE08_A]], %dx.types.NodeInfo { i32 6, i32 4 }) +// CHECK: [[NODE08_B:%[0-9]+]] = call %dx.types.NodeRecordHandle @dx.op.allocateNodeOutputRecords(i32 {{[0-9]+}}, %dx.types.NodeHandle [[ANN_NODE08_A]], i32 4, i1 false) ; AllocateNodeOutputRecords(output,numRecords,perThread) +// CHECK: [[ANN_NODE08_B:%[0-9]+]] = call %dx.types.NodeRecordHandle @dx.op.annotateNodeRecordHandle(i32 {{[0-9]+}}, %dx.types.NodeRecordHandle [[NODE08_B]], %dx.types.NodeRecordInfo { i32 70, i32 4 }) +// CHECK: call void @dx.op.barrierByNodeRecordHandle(i32 {{[0-9]+}}, %dx.types.NodeRecordHandle [[ANN_NODE08_B]], i32 1, i32 1) ; BarrierByNodeRecordHandle(object,AccessFlags,SyncFlags) + +RWBuffer obj09; +[Shader("node")] +[NodeLaunch("Broadcasting")] +[NumThreads(256,1,4)] +void node09() +{ + Barrier(obj09, 1, 1); +} +// CHECK: define void @node09() +// CHECK: [[NODE09_A:%[0-9]+]] = load %dx.types.Handle, %dx.types.Handle* @"\01?obj09@@3V?$RWBuffer@M@@A", align 4 +// CHECK: [[NODE09_B:%[0-9]+]] = call %dx.types.Handle @dx.op.createHandleForLib.dx.types.Handle(i32 {{[0-9]+}}, %dx.types.Handle [[NODE09_A]]) ; CreateHandleForLib(Resource) +// CHECK: [[NODE09_C:%[0-9]+]] = call %dx.types.Handle @dx.op.annotateHandle(i32 {{[0-9]+}}, %dx.types.Handle [[NODE09_B]], %dx.types.ResourceProperties { i32 4106, i32 265 }) ; AnnotateHandle(res,props) resource: RWTypedBuffer +// CHECK: call void @dx.op.barrierByMemoryHandle(i32 {{[0-9]+}}, %dx.types.Handle [[NODE09_C]], i32 1, i32 1) ; BarrierByMemoryHandle(object,AccessFlags,SyncFlags) + + +RWTexture1D obj10; +[Shader("node")] +[NodeLaunch("Broadcasting")] +[NumThreads(256,1,4)] +void node10() +{ + Barrier(obj10, 1, 1); +} +// CHECK: define void @node10() +// CHECK: [[NODE10_A:%[0-9]+]] = load %dx.types.Handle, %dx.types.Handle* @"\01?obj10@@3V?$RWTexture1D@V?$vector@M$03@@@@A", align 4 +// CHECK: [[NODE10_B:%[0-9]+]] = call %dx.types.Handle @dx.op.createHandleForLib.dx.types.Handle(i32 {{[0-9]+}}, %dx.types.Handle [[NODE10_A]]) ; CreateHandleForLib(Resource) +// CHECK: [[NODE10_C:%[0-9]+]] = call %dx.types.Handle @dx.op.annotateHandle(i32 {{[0-9]+}}, %dx.types.Handle [[NODE10_B]], %dx.types.ResourceProperties { i32 4097, i32 1033 }) ; AnnotateHandle(res,props) resource: RWTexture1D<4xF32> +// CHECK: call void @dx.op.barrierByMemoryHandle(i32 {{[0-9]+}}, %dx.types.Handle [[NODE10_C]], i32 1, i32 1) ; BarrierByMemoryHandle(object,AccessFlags,SyncFlags) + + +RWTexture1DArray obj11; +[Shader("node")] +[NodeLaunch("Broadcasting")] +[NumThreads(256,1,4)] +void node11() +{ + Barrier(obj11, 1, 1); +} +// CHECK: define void @node11() +// CHECK: [[NODE11_A:%[0-9]+]] = load %dx.types.Handle, %dx.types.Handle* @"\01?obj11@@3V?$RWTexture1DArray@V?$vector@M$03@@@@A", align 4 +// CHECK: [[NODE11_B:%[0-9]+]] = call %dx.types.Handle @dx.op.createHandleForLib.dx.types.Handle(i32 {{[0-9]+}}, %dx.types.Handle [[NODE11_A]]) ; CreateHandleForLib(Resource) +// CHECK: [[NODE11_C:%[0-9]+]] = call %dx.types.Handle @dx.op.annotateHandle(i32 {{[0-9]+}}, %dx.types.Handle [[NODE11_B]], %dx.types.ResourceProperties { i32 4102, i32 1033 }) ; AnnotateHandle(res,props) resource: RWTexture1DArray<4xF32> + +RWTexture2D obj12; +[Shader("node")] +[NodeLaunch("Broadcasting")] +[NumThreads(256,1,4)] +void node12() +{ + Barrier(obj12, 1, 1); +} +// CHECK: define void @node12() +// CHECK: [[NODE12_A:%[0-9]+]] = load %dx.types.Handle, %dx.types.Handle* @"\01?obj12@@3V?$RWTexture2D@M@@A", align 4 +// CHECK: [[NODE12_B:%[0-9]+]] = call %dx.types.Handle @dx.op.createHandleForLib.dx.types.Handle(i32 {{[0-9]+}}, %dx.types.Handle [[NODE12_A]]) ; CreateHandleForLib(Resource) +// CHECK: [[NODE12_C:%[0-9]+]] = call %dx.types.Handle @dx.op.annotateHandle(i32 {{[0-9]+}}, %dx.types.Handle [[NODE12_B]], %dx.types.ResourceProperties { i32 4098, i32 265 }) ; AnnotateHandle(res,props) resource: RWTexture2D +// CHECK: call void @dx.op.barrierByMemoryHandle(i32 {{[0-9]+}}, %dx.types.Handle [[NODE12_C]], i32 1, i32 1) ; BarrierByMemoryHandle(object,AccessFlags,SyncFlags) + +RWTexture2DArray obj13; +[Shader("node")] +[NodeLaunch("Broadcasting")] +[NumThreads(256,1,4)] +void node13() +{ + Barrier(obj13, 1, 1); +} +// CHECK: define void @node13() +// CHECK: [[NODE13_A:%[0-9]+]] = load %dx.types.Handle, %dx.types.Handle* @"\01?obj13@@3V?$RWTexture2DArray@M@@A", align 4 +// CHECK: [[NODE13_B:%[0-9]+]] = call %dx.types.Handle @dx.op.createHandleForLib.dx.types.Handle(i32 {{[0-9]+}}, %dx.types.Handle [[NODE13_A]]) ; CreateHandleForLib(Resource) +// CHECK: [[NODE13_C:%[0-9]+]] = call %dx.types.Handle @dx.op.annotateHandle(i32 {{[0-9]+}}, %dx.types.Handle [[NODE13_B]], %dx.types.ResourceProperties { i32 4103, i32 265 }) ; AnnotateHandle(res,props) resource: RWTexture2DArray +// CHECK: call void @dx.op.barrierByMemoryHandle(i32 {{[0-9]+}}, %dx.types.Handle [[NODE13_C]], i32 1, i32 1) ; BarrierByMemoryHandle(object,AccessFlags,SyncFlags) + +RWTexture3D obj14; +[Shader("node")] +[NodeLaunch("Broadcasting")] +[NumThreads(256,1,4)] +void node14() +{ + Barrier(obj14, 1, 1); +} +// CHECK: define void @node14() +// CHECK: [[NODE14_A:%[0-9]+]] = load %dx.types.Handle, %dx.types.Handle* @"\01?obj14@@3V?$RWTexture3D@M@@A", align 4 +// CHECK: [[NODE14_B:%[0-9]+]] = call %dx.types.Handle @dx.op.createHandleForLib.dx.types.Handle(i32 {{[0-9]+}}, %dx.types.Handle [[NODE14_A]]) ; CreateHandleForLib(Resource) +// CHECK: [[NODE14_C:%[0-9]+]] = call %dx.types.Handle @dx.op.annotateHandle(i32 {{[0-9]+}}, %dx.types.Handle [[NODE14_B]], %dx.types.ResourceProperties { i32 4100, i32 265 }) ; AnnotateHandle(res,props) resource: RWTexture3D +// CHECK: call void @dx.op.barrierByMemoryHandle(i32 {{[0-9]+}}, %dx.types.Handle [[NODE14_C]], i32 1, i32 1) ; BarrierByMemoryHandle(object,AccessFlags,SyncFlags) + +RWStructuredBuffer obj15; +[Shader("node")] +[NodeLaunch("Broadcasting")] +[NumThreads(256,1,4)] +void node15() +{ + Barrier(obj15, 1, 1); +} +// CHECK: define void @node15() +// CHECK: [[NODE15_A:%[0-9]+]] = load %dx.types.Handle, %dx.types.Handle* @"\01?obj15@@3V?$RWStructuredBuffer@URECORD@@@@A", align 4 +// CHECK: [[NODE15_B:%[0-9]+]] = call %dx.types.Handle @dx.op.createHandleForLib.dx.types.Handle(i32 {{[0-9]+}}, %dx.types.Handle [[NODE15_A]]) ; CreateHandleForLib(Resource) +// CHECK: [[NODE15_C:%[0-9]+]] = call %dx.types.Handle @dx.op.annotateHandle(i32 {{[0-9]+}}, %dx.types.Handle [[NODE15_B]], %dx.types.ResourceProperties { i32 4620, i32 4 }) ; AnnotateHandle(res,props) resource: RWStructuredBuffer +// CHECK: call void @dx.op.barrierByMemoryHandle(i32 {{[0-9]+}}, %dx.types.Handle [[NODE15_C]], i32 1, i32 1) ; BarrierByMemoryHandle(object,AccessFlags,SyncFlags) + +RWByteAddressBuffer obj16; +[Shader("node")] +[NodeLaunch("Broadcasting")] +[NumThreads(256,1,4)] +void node16() +{ + Barrier(obj16, 1, 1); +} +// CHECK: define void @node16() +// CHECK: [[NODE16_A:%[0-9]+]] = load %dx.types.Handle, %dx.types.Handle* @"\01?obj16@@3URWByteAddressBuffer@@A", align 4 +// CHECK: [[NODE16_B:%[0-9]+]] = call %dx.types.Handle @dx.op.createHandleForLib.dx.types.Handle(i32 {{[0-9]+}}, %dx.types.Handle [[NODE16_A]]) ; CreateHandleForLib(Resource) +// CHECK: [[NODE16_C:%[0-9]+]] = call %dx.types.Handle @dx.op.annotateHandle(i32 {{[0-9]+}}, %dx.types.Handle [[NODE16_B]], %dx.types.ResourceProperties { i32 4107, i32 0 }) ; AnnotateHandle(res,props) resource: RWByteAddressBuffer +// CHECK: call void @dx.op.barrierByMemoryHandle(i32 {{[0-9]+}}, %dx.types.Handle [[NODE16_C]], i32 1, i32 1) ; BarrierByMemoryHandle(object,AccessFlags,SyncFlags) + +AppendStructuredBuffer obj17; +[Shader("node")] +[NodeLaunch("Broadcasting")] +[NumThreads(256,1,4)] +void node17() +{ + Barrier(obj17, 1, 1); +} +// CHECK: define void @node17() +// CHECK: [[NODE17_A:%[0-9]+]] = load %dx.types.Handle, %dx.types.Handle* @"\01?obj17@@3V?$AppendStructuredBuffer@URECORD@@@@A", align 4 +// CHECK: [[NODE17_B:%[0-9]+]] = call %dx.types.Handle @dx.op.createHandleForLib.dx.types.Handle(i32 {{[0-9]+}}, %dx.types.Handle [[NODE17_A]]) ; CreateHandleForLib(Resource) +// CHECK: [[NODE17_C:%[0-9]+]] = call %dx.types.Handle @dx.op.annotateHandle(i32 {{[0-9]+}}, %dx.types.Handle [[NODE17_B]], %dx.types.ResourceProperties { i32 4620, i32 4 }) ; AnnotateHandle(res,props) resource: RWStructuredBuffer +// CHECK: call void @dx.op.barrierByMemoryHandle(i32 {{[0-9]+}}, %dx.types.Handle [[NODE17_C]], i32 1, i32 1) ; BarrierByMemoryHandle(object,AccessFlags,SyncFlags) diff --git a/tools/clang/test/HLSLFileCheck/hlsl/workgraph/case119_member_read.hlsl b/tools/clang/test/HLSLFileCheck/hlsl/workgraph/case119_member_read.hlsl new file mode 100644 index 0000000000..8eef995214 --- /dev/null +++ b/tools/clang/test/HLSLFileCheck/hlsl/workgraph/case119_member_read.hlsl @@ -0,0 +1,69 @@ +// RUN: %dxc -T lib_6_8 %s | FileCheck %s +// ================================================================== +// CASE119 +// Read access to members of input/output records +// ================================================================== + +RWBuffer buf0; + +struct RECORD +{ + uint a; + uint b; + uint c; +}; + +[Shader("node")] +[NumThreads(1024,1,1)] +[NodeLaunch("Broadcasting")] +void node01(DispatchNodeInputRecord input) +{ + buf0[0] = input.Get().a; +} + +// CHECK: define void @node01() { +// CHECK: [[NODE01_L:%[0-9]+]] = call %dx.types.NodeRecordHandle @dx.op.createNodeInputRecordHandle(i32 {{[0-9]+}}, i32 0) +// CHECK: [[ANN_NODE01_L:%[0-9]+]] = call %dx.types.NodeRecordHandle @dx.op.annotateNodeRecordHandle(i32 {{[0-9]+}}, %dx.types.NodeRecordHandle [[NODE01_L]], %dx.types.NodeRecordInfo { i32 97, i32 12 }) +// CHECK: {{[0-9]+}} = call %struct.RECORD addrspace(6)* @dx.op.getNodeRecordPtr.struct.RECORD(i32 {{[0-9]+}}, %dx.types.NodeRecordHandle [[ANN_NODE01_L]], i32 0) + +[Shader("node")] +[NumThreads(1024,1,1)] +[NodeLaunch("Broadcasting")] +void node02(RWDispatchNodeInputRecord input) +{ + buf0[0] = input.Get().b; +} + +// CHECK: define void @node02() { +// CHECK: [[NODE02_L:%[0-9]+]] = call %dx.types.NodeRecordHandle @dx.op.createNodeInputRecordHandle(i32 {{[0-9]+}}, i32 0) +// CHECK: [[ANN_NODE02_L:%[0-9]+]] = call %dx.types.NodeRecordHandle @dx.op.annotateNodeRecordHandle(i32 {{[0-9]+}}, %dx.types.NodeRecordHandle [[NODE02_L]], %dx.types.NodeRecordInfo { i32 101, i32 12 }) +// CHECK: {{[0-9]+}} = call %struct.RECORD addrspace(6)* @dx.op.getNodeRecordPtr.struct.RECORD(i32 {{[0-9]+}}, %dx.types.NodeRecordHandle [[ANN_NODE02_L]], i32 0) + +[Shader("node")] +[NumThreads(1024, 1, 1)] +[NodeLaunch("coalescing")] +void node03([MaxRecords(3)] GroupNodeInputRecords input) +{ + buf0[0] = input[1].c; +} + +// CHECK: define void @node03() { +// CHECK: [[NODE03_A:%[0-9]+]] = call %dx.types.NodeRecordHandle @dx.op.createNodeInputRecordHandle(i32 {{[0-9]+}}, i32 0) ; CreateNodeInputRecordHandle(MetadataIdx) +// CHECK: [[ANN_NODE03_A:%[0-9]+]] = call %dx.types.NodeRecordHandle @dx.op.annotateNodeRecordHandle(i32 {{[0-9]+}}, %dx.types.NodeRecordHandle [[NODE03_A]], %dx.types.NodeRecordInfo { i32 65, i32 12 }) +// CHECK: {{[0-9]+}} = call %struct.RECORD addrspace(6)* @dx.op.getNodeRecordPtr.struct.RECORD(i32 {{[0-9]+}}, %dx.types.NodeRecordHandle [[ANN_NODE03_A]], i32 1) + + +[Shader("node")] +[NumThreads(1,1,1)] +[NodeLaunch("coalescing")] +void node04([MaxRecords(4)] RWGroupNodeInputRecords input) +{ + buf0[0] = input[2].c; +} + +// CHECK: define void @node04() { +// CHECK: [[NODE04_A:%[0-9]+]] = call %dx.types.NodeRecordHandle @dx.op.createNodeInputRecordHandle(i32 {{[0-9]+}}, i32 0) ; CreateNodeInputRecordHandle(MetadataIdx) +// CHECK: [[ANN_NODE04_A:%[0-9]+]] = call %dx.types.NodeRecordHandle @dx.op.annotateNodeRecordHandle(i32 {{[0-9]+}}, %dx.types.NodeRecordHandle [[NODE04_A]], %dx.types.NodeRecordInfo { i32 69, i32 12 }) +// CHECK: {{[0-9]+}} = call %struct.RECORD addrspace(6)* @dx.op.getNodeRecordPtr.struct.RECORD(i32 {{[0-9]+}}, %dx.types.NodeRecordHandle [[ANN_NODE04_A]], i32 2) + +// TODO: add test cases for OutputRecord, OutputRecordArray, GroupSharedOutputRecord, and GroupSharedOutputRecordArray diff --git a/tools/clang/test/HLSLFileCheck/hlsl/workgraph/case124_member_read_types.hlsl b/tools/clang/test/HLSLFileCheck/hlsl/workgraph/case124_member_read_types.hlsl new file mode 100644 index 0000000000..8db4dfb0f1 --- /dev/null +++ b/tools/clang/test/HLSLFileCheck/hlsl/workgraph/case124_member_read_types.hlsl @@ -0,0 +1,110 @@ +// RUN: %dxc -T lib_6_8 -enable-16bit-types %s | FileCheck %s +// ================================================================== +// CASE124 +// Read access of members of input/output record with different type +// sizes - we check the function specializations generated +// ================================================================== + +RWBuffer buf0; + +struct RECORD +{ + half h; + float f; + double d; + bool b; + uint16_t i16; + int i; + int64_t i64; + uint64_t u64; + float3 f3; +}; + +// CHECK: %[[RECORD:struct\.RECORD.*]] = type { half, float, double, i32, i16, i32, i64, i64, [3 x float] } + +[Shader("node")] +[NumThreads(1024,1,1)] +[NodeLaunch("Broadcasting")] +void node01(DispatchNodeInputRecord input) +{ + buf0[0] = input.Get().h; +} +// CHECK: define void @node01() { +// CHECK: {{%[0-9]+}} = call %[[RECORD]] addrspace(6)* @dx.op.getNodeRecordPtr.[[RECORD]](i32 {{[0-9]+}}, %dx.types.NodeRecordHandle {{%[0-9]+}}, i32 0) + +[Shader("node")] +[NumThreads(1024,1,1)] +[NodeLaunch("Broadcasting")] +void node02(DispatchNodeInputRecord input) +{ + buf0[0] = input.Get().f; + +} +// CHECK: define void @node02() { +// CHECK: {{%[0-9]+}} = call %[[RECORD]] addrspace(6)* @dx.op.getNodeRecordPtr.[[RECORD]](i32 {{[0-9]+}}, %dx.types.NodeRecordHandle {{%[0-9]+}}, i32 0) + +[Shader("node")] +[NumThreads(1024,1,1)] +[NodeLaunch("Broadcasting")] +void node03(DispatchNodeInputRecord input) +{ + buf0[0] = input.Get().d; + +} +// CHECK: define void @node03() { +// CHECK: {{%[0-9]+}} = call %[[RECORD]] addrspace(6)* @dx.op.getNodeRecordPtr.[[RECORD]](i32 {{[0-9]+}}, %dx.types.NodeRecordHandle {{%[0-9]+}}, i32 0) + +[Shader("node")] +[NumThreads(1024,1,1)] +[NodeLaunch("Broadcasting")] +void node04(DispatchNodeInputRecord input) +{ + buf0[0] = input.Get().b; + +} +// CHECK: define void @node04() { +// CHECK: {{%[0-9]+}} = call %[[RECORD]] addrspace(6)* @dx.op.getNodeRecordPtr.[[RECORD]](i32 {{[0-9]+}}, %dx.types.NodeRecordHandle {{%[0-9]+}}, i32 0) + +[Shader("node")] +[NumThreads(1024,1,1)] +[NodeLaunch("Broadcasting")] +void node05(DispatchNodeInputRecord input) +{ + buf0[0] = input.Get().i16; + +} +// CHECK: define void @node05() { +// CHECK: {{%[0-9]+}} = call %[[RECORD]] addrspace(6)* @dx.op.getNodeRecordPtr.[[RECORD]](i32 {{[0-9]+}}, %dx.types.NodeRecordHandle {{%[0-9]+}}, i32 0) + +[Shader("node")] +[NumThreads(1024,1,1)] +[NodeLaunch("Broadcasting")] +void node06(DispatchNodeInputRecord input) +{ + buf0[0] = input.Get().i; + +} +// CHECK: define void @node06() { +// CHECK: {{%[0-9]+}} = call %[[RECORD]] addrspace(6)* @dx.op.getNodeRecordPtr.[[RECORD]](i32 {{[0-9]+}}, %dx.types.NodeRecordHandle {{%[0-9]+}}, i32 0) + +[Shader("node")] +[NumThreads(1024,1,1)] +[NodeLaunch("Broadcasting")] +void node07(DispatchNodeInputRecord input) +{ + buf0[0] = input.Get().i64; + +} +// CHECK: define void @node07() { +// CHECK: {{%[0-9]+}} = call %[[RECORD]] addrspace(6)* @dx.op.getNodeRecordPtr.[[RECORD]](i32 {{[0-9]+}}, %dx.types.NodeRecordHandle {{%[0-9]+}}, i32 0) + +[Shader("node")] +[NumThreads(1024,1,1)] +[NodeLaunch("Broadcasting")] +void node08(DispatchNodeInputRecord input) +{ + buf0[0] = input.Get().u64; + +} +// CHECK: define void @node08() { +// CHECK: {{%[0-9]+}} = call %[[RECORD]] addrspace(6)* @dx.op.getNodeRecordPtr.[[RECORD]](i32 {{[0-9]+}}, %dx.types.NodeRecordHandle {{%[0-9]+}}, i32 0) diff --git a/tools/clang/test/HLSLFileCheck/hlsl/workgraph/case127_outputcomplete_errors.hlsl b/tools/clang/test/HLSLFileCheck/hlsl/workgraph/case127_outputcomplete_errors.hlsl new file mode 100644 index 0000000000..060221e8bf --- /dev/null +++ b/tools/clang/test/HLSLFileCheck/hlsl/workgraph/case127_outputcomplete_errors.hlsl @@ -0,0 +1,121 @@ +// RUN: %dxc -T lib_6_8 %s | FileCheck %s +// ================================================================== +// CASE127 (fail) +// OutputComplete() is called with unsupported node i/o types +// ================================================================== + +struct RECORD { + int i; +}; + +[Shader("node")] +[NodeLaunch("Broadcasting")] +void node127_a(DispatchNodeInputRecord nodeInputRecord) +{ + nodeInputRecord.OutputComplete(); +} + +[Shader("node")] +[NodeLaunch("Broadcasting")] +void node127_c(RWDispatchNodeInputRecord rwNodeInputRecord) +{ + rwNodeInputRecord.OutputComplete(); +} + +[Shader("node")] +[NodeLaunch("Coalescing")] +void node127_a(GroupNodeInputRecords nodeInputRecord) +{ + nodeInputRecord.OutputComplete(); +} + +[Shader("node")] +[NodeLaunch("Coalescing")] +void node127_c(RWGroupNodeInputRecords rwNodeInputRecord) +{ + rwNodeInputRecord.OutputComplete(); +} + +[Shader("node")] +[NodeLaunch("Thread")] +void node127_a(ThreadNodeInputRecord nodeInputRecord) +{ + nodeInputRecord.OutputComplete(); +} + +[Shader("node")] +[NodeLaunch("Thread")] +void node127_c(RWThreadNodeInputRecord rwNodeInputRecord) +{ + rwNodeInputRecord.OutputComplete(); +} + +[Shader("node")] +[NodeLaunch("Broadcasting")] +void node127_e([MaxRecords(5)] EmptyNodeInput emptyNodeInput) +{ + emptyNodeInput.OutputComplete(); +} + +[Shader("node")] +[NodeLaunch("Broadcasting")] +void node127_f(NodeOutput nodeOutput) +{ + nodeOutput.OutputComplete(); +} + +[Shader("node")] +[NodeLaunch("Broadcasting")] +void node127_g(EmptyNodeOutput emptyNodeOutput) +{ + emptyNodeOutput.OutputComplete(); +} + +// TODO: NODE OUTPUT ARRAY - not implemented yet +// [Shader("node")] +// [NodeLaunch("Broadcasting")] +// void node127_h(NodeOutputArray nodeOutput_array[8]) +// { +// nodeOutput_array[3].OutputComplete(); +// } + +// CHECK: 15:19: error: no member named 'OutputComplete' in 'DispatchNodeInputRecord' +// CHECK: nodeInputRecord.OutputComplete(); +// CHECK: ~~~~~~~~~~~~~~~ ^ + +// CHECK: 22:21: error: no member named 'OutputComplete' in 'RWDispatchNodeInputRecord' +// CHECK: rwNodeInputRecord.OutputComplete(); +// CHECK: ~~~~~~~~~~~~~~~~~ ^ + +// CHECK: 29:19: error: no member named 'OutputComplete' in 'GroupNodeInputRecords' +// CHECK: nodeInputRecord.OutputComplete(); +// CHECK: ~~~~~~~~~~~~~~~ ^ + +// CHECK: 36:21: error: no member named 'OutputComplete' in 'RWGroupNodeInputRecords' +// CHECK: rwNodeInputRecord.OutputComplete(); +// CHECK: ~~~~~~~~~~~~~~~~~ ^ + +// CHECK: 43:19: error: no member named 'OutputComplete' in 'ThreadNodeInputRecord' +// CHECK: nodeInputRecord.OutputComplete(); +// CHECK: ~~~~~~~~~~~~~~~ ^ + +// CHECK: 50:21: error: no member named 'OutputComplete' in 'RWThreadNodeInputRecord' +// rwNodeInputRecord.OutputComplete(); +// CHECK: ~~~~~~~~~~~~~~~~~ ^ + +// CHECK: 57:18: error: no member named 'OutputComplete' in 'EmptyNodeInput' +// emptyNodeInput.OutputComplete(); +// ~~~~~~~~~~~~~~ ^ + +// CHECK: 64:14: error: no member named 'OutputComplete' in 'NodeOutput' +// emptyNodeInput.OutputComplete(); +// ~~~~~~~~~~~~~~ ^ + +// CHECK: 71:19: error: no member named 'OutputComplete' in 'EmptyNodeOutput' +// emptyNodeInput.OutputComplete(); +// ~~~~~~~~~~~~~~ ^ + +// TODO: NODE OUTPUT ARRAY - not implemented yet +// CHECK-not: 78:23: error: no member named 'OutputComplete' in 'NodeOutputArray' +// emptyNodeInput.OutputComplete(); +// ~~~~~~~~~~~~~~ ^ diff --git a/tools/clang/test/HLSLFileCheck/hlsl/workgraph/case129_nodeoutputisvalid_nodeoutput.hlsl b/tools/clang/test/HLSLFileCheck/hlsl/workgraph/case129_nodeoutputisvalid_nodeoutput.hlsl new file mode 100644 index 0000000000..0112155351 --- /dev/null +++ b/tools/clang/test/HLSLFileCheck/hlsl/workgraph/case129_nodeoutputisvalid_nodeoutput.hlsl @@ -0,0 +1,69 @@ +// RUN: %dxc -T lib_6_8 %s | FileCheck %s +// ================================================================== +// CASE129 +// NodeOutputIsValid() is called with NodeOutput +// ================================================================== + +RWBuffer buf0; + +struct RECORD +{ + uint value; +}; + +[Shader("node")] +[NodeLaunch("Broadcasting")] +[NodeDispatchGrid(256,1,1)] +[NumThreads(1,1,1)] +void node129_nodeoutputisvalid_nodeoutput(NodeOutput output) +{ + buf0[0] = output.IsValid(); +} + +// Shader function +// Arg #1: Opcode = +// Arg #2: Metadata ID +// ------------------------------------------------------------------ +// CHECK: define void @node129_nodeoutputisvalid_nodeoutput() +// CHECK-SAME: { +// CHECK: [[OUTPUT:%[0-9]+]] = call %dx.types.NodeHandle @dx.op.createNodeOutputHandle(i32 {{[0-9]+}}, i32 {{[0-9]+}}) ; CreateNodeOutputHandle(MetadataIdx) +// CHECK: [[ANN_OUTPUT:%[0-9]+]] = call %dx.types.NodeHandle @dx.op.annotateNodeHandle(i32 {{[0-9]+}}, %dx.types.NodeHandle [[OUTPUT]], %dx.types.NodeInfo { i32 6, i32 4 }) +// CHECK: {{%[0-9]+}} = call i1 @dx.op.nodeOutputIsValid(i32 {{[0-9]+}}, %dx.types.NodeHandle [[ANN_OUTPUT]]) ; NodeOutputIsValid(output) +// CHECK: ret void +// CHECK: } + +// Metadata for node +// ------------------------------------------------------------------ +// CHECK: = !{void ()* @node129_nodeoutputisvalid_nodeoutput, !"node129_nodeoutputisvalid_nodeoutput", null, null, [[ATTRS:![0-9]+]]} + +// Metadata for node attributes +// Arg #1: ShaderKind Tag (8) +// Arg #2: Node (15) +// Arg #3: NodeLaunch Tag (13) +// Arg #4: Broadcasting (1) +// ... +// Arg #x: NodeOutputs Tag (21) +// Arg #x+1: NodeOutputs (metadata) +// ... +// ------------------------------------------------------------------ +// CHECK: [[ATTRS]] = !{ +// CHECK-SAME: i32 8, i32 15, i32 13, i32 1, +// CHECK-SAME: i32 21, [[NODE_OUT:![0-9]+]], +// CHECK-SAME: } + +// NodeOutputs +// Arg #1: NodeIOKind Tag (1) +// Arg #2: RWOutputRecord (6) +// Arg #3: NodeRecordType Tag (3) +// Arg #4: INPUT_RECORD type +// Arg #5: NodeMaxOutputRecords Tag (4) +// Arg #6: value (0) - TODO: shouldn't this be 1? +// Arg #7: NodeOutputID Tag (0) +// Arg #8: NodeOutput (metadata) +// ------------------------------------------------------------------ +// CHECK: [[NODE_OUT]] = !{[[OUTPUT:![0-9]+]]} +// CHECK: [[OUTPUT]] = !{i32 1, i32 6, i32 2, {{![0-9]+}}, i32 3, i32 0, i32 0, [[NODE_ID:![0-9]+]]} + +// NodeID +// ------------------------------------------------------------------ +// CHECK: !{!"output", i32 0} diff --git a/tools/clang/test/HLSLFileCheck/hlsl/workgraph/case131_nodeoutputisvalid_emptynodeoutput.hlsl b/tools/clang/test/HLSLFileCheck/hlsl/workgraph/case131_nodeoutputisvalid_emptynodeoutput.hlsl new file mode 100644 index 0000000000..483f025729 --- /dev/null +++ b/tools/clang/test/HLSLFileCheck/hlsl/workgraph/case131_nodeoutputisvalid_emptynodeoutput.hlsl @@ -0,0 +1,60 @@ +// RUN: %dxc -T lib_6_8 %s | FileCheck %s +// ================================================================== +// CASE131 +// NodeOutputIsValid() is called with EmptyNodeOutput +// ================================================================== + +RWBuffer buf0; + +[Shader("node")] +[NodeLaunch("Broadcasting")] +[NodeDispatchGrid(256,1,1)] +[NumThreads(1,1,1)] +void node131_nodeoutputisvalid_emptynodeoutput(EmptyNodeOutput output) +{ + buf0[0] = output.IsValid(); +} + +// Shader function +// ------------------------------------------------------------------ +// CHECK: define void @node131_nodeoutputisvalid_emptynodeoutput() +// CHECK-SAME: { +// CHECK: [[OUTPUT:%[0-9]+]] = call %dx.types.NodeHandle @dx.op.createNodeOutputHandle(i32 {{[0-9]+}}, i32 {{[0-9]+}}) ; CreateNodeOutputHandle(MetadataIdx) +// CHECK: [[ANN_OUTPUT:%[0-9]+]] = call %dx.types.NodeHandle @dx.op.annotateNodeHandle(i32 {{[0-9]+}}, %dx.types.NodeHandle [[OUTPUT]], %dx.types.NodeInfo { i32 10, i32 0 }) +// CHECK: {{%[0-9]+}} = call i1 @dx.op.nodeOutputIsValid(i32 {{[0-9]+}}, %dx.types.NodeHandle [[ANN_OUTPUT]]) ; NodeOutputIsValid(output) +// CHECK: ret void +// CHECK: } + +// Metadata for node +// ------------------------------------------------------------------ +// CHECK: = !{void ()* @node131_nodeoutputisvalid_emptynodeoutput, !"node131_nodeoutputisvalid_emptynodeoutput", null, null, [[ATTRS:![0-9]+]]} + +// Metadata for node attributes +// Arg #1: ShaderKind Tag (8) +// Arg #2: Node (15) +// Arg #3: NodeLaunch Tag (13) +// Arg #4: Broadcasting (1) +// ... +// Arg #x: NodeOutputs Tag (21) +// Arg #x+1: NodeOutputs (metadata) +// ... +// ------------------------------------------------------------------ +// CHECK: [[ATTRS]] = !{ +// CHECK-SAME: i32 8, i32 15, i32 13, i32 1, +// CHECK-SAME: i32 21, [[NODE_OUT:![0-9]+]], +// CHECK-SAME: } + +// NodeOutputs +// Arg #1: NodeIOKind Tag (1) +// Arg #2: EmptyNodeOutput (10) +// Arg #3: NodeMaxOuputRecords Tag (4) +// Arg #4: value (0) +// Arg #5: NodeOutputID Tag (0) +// Arg #6: NodeID (metadata) +// ------------------------------------------------------------------ +// CHECK: [[NODE_OUT]] = !{[[OUTPUT:![0-9]+]]} +// CHECK: [[OUTPUT]] = !{i32 1, i32 10, i32 3, i32 0, i32 0, [[NODE_ID:![0-9]+]]} + +// NodeID +// ------------------------------------------------------------------ +// CHECK: !{!"output", i32 0} diff --git a/tools/clang/test/HLSLFileCheck/hlsl/workgraph/case133_getremainingrecursionlevels.hlsl b/tools/clang/test/HLSLFileCheck/hlsl/workgraph/case133_getremainingrecursionlevels.hlsl new file mode 100644 index 0000000000..4ef6a33124 --- /dev/null +++ b/tools/clang/test/HLSLFileCheck/hlsl/workgraph/case133_getremainingrecursionlevels.hlsl @@ -0,0 +1,47 @@ +// RUN: %dxc -T lib_6_8 external %s | FileCheck %s +// ================================================================== +// CASE133 +// GetRemainingRecusionLevels() called +// ================================================================== + +RWBuffer buf0; + +[Shader("node")] +[NodeLaunch("Broadcasting")] +[NumThreads(8,1,1)] +[NodeMaxRecursionDepth(16)] +void node133_getremainingrecursionlevels() +{ + uint remaining = GetRemainingRecursionLevels(); + // Use resource as a way of preventing DCE + buf0[0] = remaining; +} + +// Shader function +// Arg #1: Opcode = +// Arg #2: Metadata ID +// ------------------------------------------------------------------ +// CHECK: define void @node133_getremainingrecursionlevels() +// CHECK-SAME: { +// CHECK: {{%[0-9]+}} = call i32 @dx.op.getRemainingRecursionLevels(i32 {{[0-9]+}}) ; GetRemainingRecursionLevels() +// CHECK: ret void +// CHECK: } + +// Metadata for node +// ------------------------------------------------------------------ +// CHECK: = !{void ()* @node133_getremainingrecursionlevels, !"node133_getremainingrecursionlevels", null, null, [[ATTRS:![0-9]+]]} + +// Metadata for node attributes +// Arg #1: Shader Kind Tag (8) +// Arg #2: Node (15) +// Arg #3: NodeLaunch Tag (13) +// Arg #4: Broadcasting (1) +// ... +// Arg #x: NodeMaxrecursionDepth Tag (19) +// Arg #x+1: value (16) +// ... +// ------------------------------------------------------------------ +// CHECK: [[ATTRS]] = !{ +// CHECK-SAME: i32 8, i32 15, i32 13, i32 1, +// CHECK-SAME: i32 19, i32 16 +// CHECK-SAME: } diff --git a/tools/clang/test/HLSLFileCheck/hlsl/workgraph/case150_multiple_getnoderoutputrecord.hlsl b/tools/clang/test/HLSLFileCheck/hlsl/workgraph/case150_multiple_getnoderoutputrecord.hlsl new file mode 100644 index 0000000000..c6dbc6b005 --- /dev/null +++ b/tools/clang/test/HLSLFileCheck/hlsl/workgraph/case150_multiple_getnoderoutputrecord.hlsl @@ -0,0 +1,52 @@ +// RUN: %dxc -T lib_6_8 %s | FileCheck %s +// ================================================================== +// CASE150 () +// Multiple calls to getnodeouputrecord(array) +// ================================================================== + +struct RECORD { + int i; + float3 foo; +}; + +[Shader("node")] +[NodeLaunch("Broadcasting")] +[NumThreads(1024, 1, 1)] +void node150_a(NodeOutput output) +{ + GroupNodeOutputRecords outRec1 = output.GetGroupNodeOutputRecords(1); + GroupNodeOutputRecords outRec2 = output.GetGroupNodeOutputRecords(4); + outRec1.OutputComplete(); + outRec2.OutputComplete(); +} +// CHECK: define void @node150_a() { +// CHECK: [[OP_HANDLE:%[0-9]+]] = call %dx.types.NodeHandle @dx.op.createNodeOutputHandle(i32 {{[0-9]+}}, i32 0) ; CreateNodeOutputHandle(MetadataIdx) +// CHECK: [[ANN_OP_HANDLE:%[0-9]+]] = call %dx.types.NodeHandle @dx.op.annotateNodeHandle(i32 {{[0-9]+}}, %dx.types.NodeHandle [[OP_HANDLE]], %dx.types.NodeInfo { i32 6, i32 16 }) +// CHECK: [[OP_REC_HANDLE1:%[0-9]+]] = call %dx.types.NodeRecordHandle @dx.op.allocateNodeOutputRecords(i32 238, %dx.types.NodeHandle [[ANN_OP_HANDLE]], i32 1, i1 false) ; AllocateNodeOutputRecords(output,numRecords,perThread) +// CHECK: [[ANN_OP_REC_HANDLE1:%[0-9]+]] = call %dx.types.NodeRecordHandle @dx.op.annotateNodeRecordHandle(i32 {{[0-9]+}}, %dx.types.NodeRecordHandle [[OP_REC_HANDLE1]], %dx.types.NodeRecordInfo { i32 70, i32 16 }) +// CHECK: [[OP_REC_HANDLE2:%[0-9]+]] = call %dx.types.NodeRecordHandle @dx.op.allocateNodeOutputRecords(i32 238, %dx.types.NodeHandle [[ANN_OP_HANDLE]], i32 4, i1 false) ; AllocateNodeOutputRecords(output,numRecords,perThread) +// CHECK: [[ANN_OP_REC_HANDLE2:%[0-9]+]] = call %dx.types.NodeRecordHandle @dx.op.annotateNodeRecordHandle(i32 {{[0-9]+}}, %dx.types.NodeRecordHandle [[OP_REC_HANDLE2]], %dx.types.NodeRecordInfo { i32 70, i32 16 }) +// CHECK: call void @dx.op.outputComplete(i32 241, %dx.types.NodeRecordHandle [[ANN_OP_REC_HANDLE1]]) ; OutputComplete(output) +// CHECK: call void @dx.op.outputComplete(i32 241, %dx.types.NodeRecordHandle [[ANN_OP_REC_HANDLE2]]) ; OutputComplete(output) + +[Shader("node")] +[NodeLaunch("Broadcasting")] +[NumThreads(1024, 1, 1)] +void node150_b(NodeOutput output) +{ + ThreadNodeOutputRecords outRec1 = output.GetThreadNodeOutputRecords(5); + ThreadNodeOutputRecords outRec2 = output.GetThreadNodeOutputRecords(1); + outRec1.OutputComplete(); + outRec1 = outRec2; + outRec1.OutputComplete(); + +} +// CHECK: define void @node150_b() { + // CHECK: [[OP_HANDLE:%[0-9]+]] = call %dx.types.NodeHandle @dx.op.createNodeOutputHandle(i32 {{[0-9]+}}, i32 0) ; CreateNodeOutputHandle(MetadataIdx) + // CHECK: [[ANN_OP_HANDLE:%[0-9]+]] = call %dx.types.NodeHandle @dx.op.annotateNodeHandle(i32 {{[0-9]+}}, %dx.types.NodeHandle [[OP_HANDLE]], %dx.types.NodeInfo { i32 6, i32 16 }) + // CHECK: [[OP_REC_HANDLE1:%[0-9]+]] = call %dx.types.NodeRecordHandle @dx.op.allocateNodeOutputRecords(i32 {{[0-9]+}}, %dx.types.NodeHandle [[ANN_OP_HANDLE]], i32 5, i1 true) ; AllocateNodeOutputRecords(output,numRecords,perThread) + // CHECK: [[ANN_OP_REC_HANDLE1:%[0-9]+]] = call %dx.types.NodeRecordHandle @dx.op.annotateNodeRecordHandle(i32 {{[0-9]+}}, %dx.types.NodeRecordHandle [[OP_REC_HANDLE1]], %dx.types.NodeRecordInfo { i32 38, i32 16 }) + // CHECK: [[OP_REC_HANDLE2:%[0-9]+]] = call %dx.types.NodeRecordHandle @dx.op.allocateNodeOutputRecords(i32 {{[0-9]+}}, %dx.types.NodeHandle [[ANN_OP_HANDLE]], i32 1, i1 true) ; AllocateNodeOutputRecords(output,numRecords,perThread) + // CHECK: [[ANN_OP_REC_HANDLE2:%[0-9]+]] = call %dx.types.NodeRecordHandle @dx.op.annotateNodeRecordHandle(i32 {{[0-9]+}}, %dx.types.NodeRecordHandle [[OP_REC_HANDLE2]], %dx.types.NodeRecordInfo { i32 38, i32 16 }) +// CHECK: call void @dx.op.outputComplete(i32 241, %dx.types.NodeRecordHandle [[ANN_OP_REC_HANDLE1]]) ; OutputComplete(output) +// CHECK: call void @dx.op.outputComplete(i32 241, %dx.types.NodeRecordHandle [[ANN_OP_REC_HANDLE2]]) ; OutputComplete(output) \ No newline at end of file diff --git a/tools/clang/test/HLSLFileCheck/hlsl/workgraph/groupshared.hlsl b/tools/clang/test/HLSLFileCheck/hlsl/workgraph/groupshared.hlsl new file mode 100644 index 0000000000..aa1a137456 --- /dev/null +++ b/tools/clang/test/HLSLFileCheck/hlsl/workgraph/groupshared.hlsl @@ -0,0 +1,23 @@ +// RUN: %dxc -T lib_6_8 %s | FileCheck %s +// ================================================================== +// Check that Group Shared memory is allowed from a work graph node +// ================================================================== + +// CHECK-NOT: error +// CHECK: define void @firstNode() + +struct Record +{ + uint index; +}; + +groupshared uint testLds[512]; + +[Shader("node")] +[NodeLaunch("Broadcasting")] +[NodeDispatchGrid(2, 1, 1)] +[NumThreads(1,1,1)] +void firstNode(DispatchNodeInputRecord inputData) +{ + testLds[inputData.Get().index] = 99; +} diff --git a/tools/clang/test/HLSLFileCheck/hlsl/workgraph/groupshared_barrier.hlsl b/tools/clang/test/HLSLFileCheck/hlsl/workgraph/groupshared_barrier.hlsl new file mode 100644 index 0000000000..0452849990 --- /dev/null +++ b/tools/clang/test/HLSLFileCheck/hlsl/workgraph/groupshared_barrier.hlsl @@ -0,0 +1,20 @@ +// RUN: %dxc -T lib_6_8 %s | FileCheck %s +// ================================================================== +// Check that a barrier can be used on a groupshared object from a +// work graph node +// ================================================================== + +// CHECK-NOT: error +// CHECK: define void @firstNode() + +groupshared uint Test; + +[Shader("node")] +[NodeLaunch("Broadcasting")] +[NodeDispatchGrid(1, 1, 1)] +[NumThreads(1, 1, 1)] +void firstNode() +{ + Test = 1; + AllMemoryBarrierWithGroupSync(); +} diff --git a/tools/clang/test/HLSLFileCheck/hlsl/workgraph/implicit_record_dispatchgrid.hlsl b/tools/clang/test/HLSLFileCheck/hlsl/workgraph/implicit_record_dispatchgrid.hlsl new file mode 100644 index 0000000000..55f407162b --- /dev/null +++ b/tools/clang/test/HLSLFileCheck/hlsl/workgraph/implicit_record_dispatchgrid.hlsl @@ -0,0 +1,36 @@ +// RUN: %dxc -T lib_6_8 %s | FileCheck %s +// RUN: %dxc -T lib_6_8 %s | %D3DReflect %s | FileCheck -check-prefix=REFL %s + +// CHECK: = !{void ()* @cs_and_node, !"cs_and_node", null, null, [[ExtAttrs:![0-9]+]]} +// CHECK: [[ExtAttrs]] = +// CHECK-SAME: i32 20, [[InputNodes:![0-9]+]], +// CHECK: [[InputNodes]] = !{[[InputNode:![0-9]+]]} +// CHECK: [[InputNode]] = !{i32 1, i32 97, i32 2, [[RecordInfo:![0-9]+]]} +// CHECK: [[RecordInfo]] = !{i32 0, i32 12, i32 1, [[SVDispatchGrid:![0-9]+]]} +// CHECK: [[SVDispatchGrid]] = !{i32 0, i32 5, i32 3} + +// REFL: Inputs: <15:RecordArrayRef[1]> = { +// REFL: [0]: <0:IONode> = { +// REFL: IOFlagsAndKind: 97 +// REFL: Attribs: <12:RecordArrayRef[2]> = { +// REFL: [0]: <0:NodeShaderIOAttrib> = { +// REFL: AttribKind: RecordSizeInBytes +// REFL: RecordSizeInBytes: 12 +// REFL: } +// REFL: [1]: <1:NodeShaderIOAttrib> = { +// REFL: AttribKind: RecordDispatchGrid +// REFL: RecordDispatchGrid: +// REFL: ByteOffset: 0 +// REFL: ComponentNumAndType: 23 +// REFL: } +// REFL: } +// REFL: } +// REFL: } + +[Shader("compute")] +[Shader("node")] +[NodeMaxDispatchGrid(3, 1, 1)] +[NumThreads(16, 1, 1)] +void cs_and_node() +{ +} diff --git a/tools/clang/test/HLSLFileCheck/hlsl/workgraph/invalid_barrier_1.ll b/tools/clang/test/HLSLFileCheck/hlsl/workgraph/invalid_barrier_1.ll new file mode 100644 index 0000000000..509a1e75d5 --- /dev/null +++ b/tools/clang/test/HLSLFileCheck/hlsl/workgraph/invalid_barrier_1.ll @@ -0,0 +1,53 @@ +; RUN: %dxv %s | FileCheck %s +; + +; shader hash: 736cf97c50b38ecbe5281c2034e9b6c5 +; +; Buffer Definitions: +; +; +; Resource Bindings: +; +; Name Type Format Dim ID HLSL Bind Count +; ------------------------------ ---------- ------- ----------- ------- -------------- ------ +; +target datalayout = "e-m:e-p:32:32-i1:32-i8:32-i16:32-i32:32-i64:64-f16:32-f32:32-f64:64-n8:16:32:64" +target triple = "dxil-ms-dx" + +; Function Attrs: nounwind +define void @node01() #0 { + + ; CHECK: Invalid memory type flag + call void @dx.op.barrierByMemoryType(i32 244, i32 16, i32 1, i32 1) ; BarrierByMemoryType(MemoryTypeFlags,AccessFlags,SyncFlags) + + ret void +} + +; Function Attrs: noduplicate nounwind +declare void @dx.op.barrierByMemoryType(i32, i32, i32, i32) #1 + +attributes #0 = { nounwind } +attributes #1 = { noduplicate nounwind } + +!llvm.ident = !{!0} +!dx.version = !{!1} +!dx.valver = !{!1} +!dx.shaderModel = !{!2} +!dx.typeAnnotations = !{!3} +!dx.entryPoints = !{!7, !8} + +!0 = !{!"dxc(private) 1.7.0.4846 (user/jbatista/validate_Barrier_args01, 618a7385b-dirty)"} +!1 = !{i32 1, i32 8} +!2 = !{!"lib", i32 6, i32 8} +!3 = !{i32 1, void ()* @node01, !4} +!4 = !{!5} +!5 = !{i32 1, !6, !6} +!6 = !{} +!7 = !{null, !"", null, null, null} +!8 = !{void ()* @node01, !"node01", null, null, !9} +!9 = !{i32 8, i32 15, i32 13, i32 1, i32 15, !10, i32 16, i32 -1, i32 20, !11, i32 4, !13, i32 5, !14} +!10 = !{!"node01", i32 0} +!11 = !{!12} +!12 = !{i32 1, i32 9} +!13 = !{i32 1024, i32 1, i32 1} +!14 = !{i32 0} \ No newline at end of file diff --git a/tools/clang/test/HLSLFileCheck/hlsl/workgraph/invalid_barrier_12.ll b/tools/clang/test/HLSLFileCheck/hlsl/workgraph/invalid_barrier_12.ll new file mode 100644 index 0000000000..e64c2ac4c7 --- /dev/null +++ b/tools/clang/test/HLSLFileCheck/hlsl/workgraph/invalid_barrier_12.ll @@ -0,0 +1,54 @@ +; RUN: %dxv %s | FileCheck %s +; + +; shader hash: 736cf97c50b38ecbe5281c2034e9b6c5 +; +; Buffer Definitions: +; +; +; Resource Bindings: +; +; Name Type Format Dim ID HLSL Bind Count +; ------------------------------ ---------- ------- ----------- ------- -------------- ------ +; +target datalayout = "e-m:e-p:32:32-i1:32-i8:32-i16:32-i32:32-i64:64-f16:32-f32:32-f64:64-n8:16:32:64" +target triple = "dxil-ms-dx" + +; Function Attrs: nounwind +define void @node01() #0 { + + ; CHECK: Invalid memory type flag + ; CHECK: Invalid access flag + call void @dx.op.barrierByMemoryType(i32 244, i32 18, i32 5, i32 1) ; BarrierByMemoryType(MemoryTypeFlags,AccessFlags,SyncFlags) + + ret void +} + +; Function Attrs: noduplicate nounwind +declare void @dx.op.barrierByMemoryType(i32, i32, i32, i32) #1 + +attributes #0 = { nounwind } +attributes #1 = { noduplicate nounwind } + +!llvm.ident = !{!0} +!dx.version = !{!1} +!dx.valver = !{!1} +!dx.shaderModel = !{!2} +!dx.typeAnnotations = !{!3} +!dx.entryPoints = !{!7, !8} + +!0 = !{!"dxc(private) 1.7.0.4846 (user/jbatista/validate_Barrier_args01, 618a7385b-dirty)"} +!1 = !{i32 1, i32 8} +!2 = !{!"lib", i32 6, i32 8} +!3 = !{i32 1, void ()* @node01, !4} +!4 = !{!5} +!5 = !{i32 1, !6, !6} +!6 = !{} +!7 = !{null, !"", null, null, null} +!8 = !{void ()* @node01, !"node01", null, null, !9} +!9 = !{i32 8, i32 15, i32 13, i32 1, i32 15, !10, i32 16, i32 -1, i32 20, !11, i32 4, !13, i32 5, !14} +!10 = !{!"node01", i32 0} +!11 = !{!12} +!12 = !{i32 1, i32 9} +!13 = !{i32 1024, i32 1, i32 1} +!14 = !{i32 0} \ No newline at end of file diff --git a/tools/clang/test/HLSLFileCheck/hlsl/workgraph/invalid_barrier_123.ll b/tools/clang/test/HLSLFileCheck/hlsl/workgraph/invalid_barrier_123.ll new file mode 100644 index 0000000000..0b69a35bb2 --- /dev/null +++ b/tools/clang/test/HLSLFileCheck/hlsl/workgraph/invalid_barrier_123.ll @@ -0,0 +1,55 @@ +; RUN: %dxv %s | FileCheck %s +; + +; shader hash: 736cf97c50b38ecbe5281c2034e9b6c5 +; +; Buffer Definitions: +; +; +; Resource Bindings: +; +; Name Type Format Dim ID HLSL Bind Count +; ------------------------------ ---------- ------- ----------- ------- -------------- ------ +; +target datalayout = "e-m:e-p:32:32-i1:32-i8:32-i16:32-i32:32-i64:64-f16:32-f32:32-f64:64-n8:16:32:64" +target triple = "dxil-ms-dx" + +; Function Attrs: nounwind +define void @node01() #0 { + + ; CHECK: Invalid memory type flag + ; CHECK: Invalid access flag + ; CHECK: Invalid sync flag + call void @dx.op.barrierByMemoryType(i32 244, i32 18, i32 7, i32 5) ; BarrierByMemoryType(MemoryTypeFlags,AccessFlags,SyncFlags) + + ret void +} + +; Function Attrs: noduplicate nounwind +declare void @dx.op.barrierByMemoryType(i32, i32, i32, i32) #1 + +attributes #0 = { nounwind } +attributes #1 = { noduplicate nounwind } + +!llvm.ident = !{!0} +!dx.version = !{!1} +!dx.valver = !{!1} +!dx.shaderModel = !{!2} +!dx.typeAnnotations = !{!3} +!dx.entryPoints = !{!7, !8} + +!0 = !{!"dxc(private) 1.7.0.4846 (user/jbatista/validate_Barrier_args01, 618a7385b-dirty)"} +!1 = !{i32 1, i32 8} +!2 = !{!"lib", i32 6, i32 8} +!3 = !{i32 1, void ()* @node01, !4} +!4 = !{!5} +!5 = !{i32 1, !6, !6} +!6 = !{} +!7 = !{null, !"", null, null, null} +!8 = !{void ()* @node01, !"node01", null, null, !9} +!9 = !{i32 8, i32 15, i32 13, i32 1, i32 15, !10, i32 16, i32 -1, i32 20, !11, i32 4, !13, i32 5, !14} +!10 = !{!"node01", i32 0} +!11 = !{!12} +!12 = !{i32 1, i32 9} +!13 = !{i32 1024, i32 1, i32 1} +!14 = !{i32 0} \ No newline at end of file diff --git a/tools/clang/test/HLSLFileCheck/hlsl/workgraph/invalid_barrier_13.ll b/tools/clang/test/HLSLFileCheck/hlsl/workgraph/invalid_barrier_13.ll new file mode 100644 index 0000000000..49f576b24b --- /dev/null +++ b/tools/clang/test/HLSLFileCheck/hlsl/workgraph/invalid_barrier_13.ll @@ -0,0 +1,54 @@ +; RUN: %dxv %s | FileCheck %s +; + +; shader hash: 736cf97c50b38ecbe5281c2034e9b6c5 +; +; Buffer Definitions: +; +; +; Resource Bindings: +; +; Name Type Format Dim ID HLSL Bind Count +; ------------------------------ ---------- ------- ----------- ------- -------------- ------ +; +target datalayout = "e-m:e-p:32:32-i1:32-i8:32-i16:32-i32:32-i64:64-f16:32-f32:32-f64:64-n8:16:32:64" +target triple = "dxil-ms-dx" + +; Function Attrs: nounwind +define void @node01() #0 { + + ; CHECK: Invalid memory type flag + ; CHECK: Invalid sync flag + call void @dx.op.barrierByMemoryType(i32 244, i32 18, i32 2, i32 5) ; BarrierByMemoryType(MemoryTypeFlags,AccessFlags,SyncFlags) + + ret void +} + +; Function Attrs: noduplicate nounwind +declare void @dx.op.barrierByMemoryType(i32, i32, i32, i32) #1 + +attributes #0 = { nounwind } +attributes #1 = { noduplicate nounwind } + +!llvm.ident = !{!0} +!dx.version = !{!1} +!dx.valver = !{!1} +!dx.shaderModel = !{!2} +!dx.typeAnnotations = !{!3} +!dx.entryPoints = !{!7, !8} + +!0 = !{!"dxc(private) 1.7.0.4846 (user/jbatista/validate_Barrier_args01, 618a7385b-dirty)"} +!1 = !{i32 1, i32 8} +!2 = !{!"lib", i32 6, i32 8} +!3 = !{i32 1, void ()* @node01, !4} +!4 = !{!5} +!5 = !{i32 1, !6, !6} +!6 = !{} +!7 = !{null, !"", null, null, null} +!8 = !{void ()* @node01, !"node01", null, null, !9} +!9 = !{i32 8, i32 15, i32 13, i32 1, i32 15, !10, i32 16, i32 -1, i32 20, !11, i32 4, !13, i32 5, !14} +!10 = !{!"node01", i32 0} +!11 = !{!12} +!12 = !{i32 1, i32 9} +!13 = !{i32 1024, i32 1, i32 1} +!14 = !{i32 0} \ No newline at end of file diff --git a/tools/clang/test/HLSLFileCheck/hlsl/workgraph/invalid_barrier_2.ll b/tools/clang/test/HLSLFileCheck/hlsl/workgraph/invalid_barrier_2.ll new file mode 100644 index 0000000000..ee03ecd072 --- /dev/null +++ b/tools/clang/test/HLSLFileCheck/hlsl/workgraph/invalid_barrier_2.ll @@ -0,0 +1,53 @@ +; RUN: %dxv %s | FileCheck %s +; + +; shader hash: 736cf97c50b38ecbe5281c2034e9b6c5 +; +; Buffer Definitions: +; +; +; Resource Bindings: +; +; Name Type Format Dim ID HLSL Bind Count +; ------------------------------ ---------- ------- ----------- ------- -------------- ------ +; +target datalayout = "e-m:e-p:32:32-i1:32-i8:32-i16:32-i32:32-i64:64-f16:32-f32:32-f64:64-n8:16:32:64" +target triple = "dxil-ms-dx" + +; Function Attrs: nounwind +define void @node01() #0 { + + ; CHECK: Invalid access flag + call void @dx.op.barrierByMemoryType(i32 244, i32 1, i32 4, i32 1) ; BarrierByMemoryType(MemoryTypeFlags,AccessFlags,SyncFlags) + + ret void +} + +; Function Attrs: noduplicate nounwind +declare void @dx.op.barrierByMemoryType(i32, i32, i32, i32) #1 + +attributes #0 = { nounwind } +attributes #1 = { noduplicate nounwind } + +!llvm.ident = !{!0} +!dx.version = !{!1} +!dx.valver = !{!1} +!dx.shaderModel = !{!2} +!dx.typeAnnotations = !{!3} +!dx.entryPoints = !{!7, !8} + +!0 = !{!"dxc(private) 1.7.0.4846 (user/jbatista/validate_Barrier_args01, 618a7385b-dirty)"} +!1 = !{i32 1, i32 8} +!2 = !{!"lib", i32 6, i32 8} +!3 = !{i32 1, void ()* @node01, !4} +!4 = !{!5} +!5 = !{i32 1, !6, !6} +!6 = !{} +!7 = !{null, !"", null, null, null} +!8 = !{void ()* @node01, !"node01", null, null, !9} +!9 = !{i32 8, i32 15, i32 13, i32 1, i32 15, !10, i32 16, i32 -1, i32 20, !11, i32 4, !13, i32 5, !14} +!10 = !{!"node01", i32 0} +!11 = !{!12} +!12 = !{i32 1, i32 9} +!13 = !{i32 1024, i32 1, i32 1} +!14 = !{i32 0} \ No newline at end of file diff --git a/tools/clang/test/HLSLFileCheck/hlsl/workgraph/invalid_barrier_23.ll b/tools/clang/test/HLSLFileCheck/hlsl/workgraph/invalid_barrier_23.ll new file mode 100644 index 0000000000..b9c2c7ff13 --- /dev/null +++ b/tools/clang/test/HLSLFileCheck/hlsl/workgraph/invalid_barrier_23.ll @@ -0,0 +1,54 @@ +; RUN: %dxv %s | FileCheck %s +; + +; shader hash: 736cf97c50b38ecbe5281c2034e9b6c5 +; +; Buffer Definitions: +; +; +; Resource Bindings: +; +; Name Type Format Dim ID HLSL Bind Count +; ------------------------------ ---------- ------- ----------- ------- -------------- ------ +; +target datalayout = "e-m:e-p:32:32-i1:32-i8:32-i16:32-i32:32-i64:64-f16:32-f32:32-f64:64-n8:16:32:64" +target triple = "dxil-ms-dx" + +; Function Attrs: nounwind +define void @node01() #0 { + + ; CHECK: Invalid access flag + ; CHECK: Invalid sync flag + call void @dx.op.barrierByMemoryType(i32 244, i32 1, i32 62, i32 5) ; BarrierByMemoryType(MemoryTypeFlags,AccessFlags,SyncFlags) + + ret void +} + +; Function Attrs: noduplicate nounwind +declare void @dx.op.barrierByMemoryType(i32, i32, i32, i32) #1 + +attributes #0 = { nounwind } +attributes #1 = { noduplicate nounwind } + +!llvm.ident = !{!0} +!dx.version = !{!1} +!dx.valver = !{!1} +!dx.shaderModel = !{!2} +!dx.typeAnnotations = !{!3} +!dx.entryPoints = !{!7, !8} + +!0 = !{!"dxc(private) 1.7.0.4846 (user/jbatista/validate_Barrier_args01, 618a7385b-dirty)"} +!1 = !{i32 1, i32 8} +!2 = !{!"lib", i32 6, i32 8} +!3 = !{i32 1, void ()* @node01, !4} +!4 = !{!5} +!5 = !{i32 1, !6, !6} +!6 = !{} +!7 = !{null, !"", null, null, null} +!8 = !{void ()* @node01, !"node01", null, null, !9} +!9 = !{i32 8, i32 15, i32 13, i32 1, i32 15, !10, i32 16, i32 -1, i32 20, !11, i32 4, !13, i32 5, !14} +!10 = !{!"node01", i32 0} +!11 = !{!12} +!12 = !{i32 1, i32 9} +!13 = !{i32 1024, i32 1, i32 1} +!14 = !{i32 0} \ No newline at end of file diff --git a/tools/clang/test/HLSLFileCheck/hlsl/workgraph/invalid_barrier_3.ll b/tools/clang/test/HLSLFileCheck/hlsl/workgraph/invalid_barrier_3.ll new file mode 100644 index 0000000000..248c34e425 --- /dev/null +++ b/tools/clang/test/HLSLFileCheck/hlsl/workgraph/invalid_barrier_3.ll @@ -0,0 +1,53 @@ +; RUN: %dxv %s | FileCheck %s +; + +; shader hash: 736cf97c50b38ecbe5281c2034e9b6c5 +; +; Buffer Definitions: +; +; +; Resource Bindings: +; +; Name Type Format Dim ID HLSL Bind Count +; ------------------------------ ---------- ------- ----------- ------- -------------- ------ +; +target datalayout = "e-m:e-p:32:32-i1:32-i8:32-i16:32-i32:32-i64:64-f16:32-f32:32-f64:64-n8:16:32:64" +target triple = "dxil-ms-dx" + +; Function Attrs: nounwind +define void @node01() #0 { + + ; CHECK: Invalid sync flag + call void @dx.op.barrierByMemoryType(i32 244, i32 1, i32 1, i32 2) ; BarrierByMemoryType(MemoryTypeFlags,AccessFlags,SyncFlags) + + ret void +} + +; Function Attrs: noduplicate nounwind +declare void @dx.op.barrierByMemoryType(i32, i32, i32, i32) #1 + +attributes #0 = { nounwind } +attributes #1 = { noduplicate nounwind } + +!llvm.ident = !{!0} +!dx.version = !{!1} +!dx.valver = !{!1} +!dx.shaderModel = !{!2} +!dx.typeAnnotations = !{!3} +!dx.entryPoints = !{!7, !8} + +!0 = !{!"dxc(private) 1.7.0.4846 (user/jbatista/validate_Barrier_args01, 618a7385b-dirty)"} +!1 = !{i32 1, i32 8} +!2 = !{!"lib", i32 6, i32 8} +!3 = !{i32 1, void ()* @node01, !4} +!4 = !{!5} +!5 = !{i32 1, !6, !6} +!6 = !{} +!7 = !{null, !"", null, null, null} +!8 = !{void ()* @node01, !"node01", null, null, !9} +!9 = !{i32 8, i32 15, i32 13, i32 1, i32 15, !10, i32 16, i32 -1, i32 20, !11, i32 4, !13, i32 5, !14} +!10 = !{!"node01", i32 0} +!11 = !{!12} +!12 = !{i32 1, i32 9} +!13 = !{i32 1024, i32 1, i32 1} +!14 = !{i32 0} \ No newline at end of file diff --git a/tools/clang/test/HLSLFileCheck/hlsl/workgraph/invalid_barrier_nodehandle.ll b/tools/clang/test/HLSLFileCheck/hlsl/workgraph/invalid_barrier_nodehandle.ll new file mode 100644 index 0000000000..d1de66b18e --- /dev/null +++ b/tools/clang/test/HLSLFileCheck/hlsl/workgraph/invalid_barrier_nodehandle.ll @@ -0,0 +1,125 @@ +; RUN: %dxv %s | FileCheck %s +; + +; +; Note: shader requires additional functionality: +; UAVs at every shader stage +; +; shader hash: 27900d88c63cf1e406c339e543e4ebc9 +; +; Buffer Definitions: +; +; +; Resource Bindings: +; +; Name Type Format Dim ID HLSL Bind Count +; ------------------------------ ---------- ------- ----------- ------- -------------- ------ +; buf0 UAV u32 buf U0u4294967295,space4294967295 1 +; +target datalayout = "e-m:e-p:32:32-i1:32-i8:32-i16:32-i32:32-i64:64-f16:32-f32:32-f64:64-n8:16:32:64" +target triple = "dxil-ms-dx" + +%dx.types.Handle = type { i8* } +%dx.types.ResourceProperties = type { i32, i32 } +%dx.types.NodeRecordHandle = type { i8* } +%dx.types.NodeRecordInfo = type { i32, i32 } +%"class.RWBuffer" = type { i32 } + +@"\01?buf0@@3V?$RWBuffer@I@@A" = external constant %dx.types.Handle, align 4 + +define void @node01() { + %1 = load %dx.types.Handle, %dx.types.Handle* @"\01?buf0@@3V?$RWBuffer@I@@A", align 4 + %2 = call %dx.types.Handle @dx.op.createHandleForLib.dx.types.Handle(i32 160, %dx.types.Handle %1) ; CreateHandleForLib(Resource) + %3 = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle %2, %dx.types.ResourceProperties { i32 4106, i32 261 }) ; AnnotateHandle(res,props) resource: RWTypedBuffer + ; CHECK: error: Invalid access flags on DXIL operation 'barrierByMemoryHandle' + call void @dx.op.barrierByMemoryHandle(i32 245, %dx.types.Handle %3, i32 9, i32 1) ; BarrierByMemoryHandle(object,AccessFlags,SyncFlags) + ret void +} + +define void @node02() { + %1 = call %dx.types.NodeRecordHandle @dx.op.createNodeInputRecordHandle(i32 250, i32 0) ; CreateNodeInputRecordHandle(MetadataIdx) + %2 = call %dx.types.NodeRecordHandle @dx.op.annotateNodeRecordHandle(i32 251, %dx.types.NodeRecordHandle %1, %dx.types.NodeRecordInfo { i32 97, i32 12 }) ; AnnotateNodeRecordHandle(noderecord,props) + ; CHECK: error: Invalid access flags on DXIL operation 'barrierByNodeRecordHandle' + call void @dx.op.barrierByNodeRecordHandle(i32 246, %dx.types.NodeRecordHandle %2, i32 9, i32 1) ; BarrierByNodeRecordHandle(object,AccessFlags,SyncFlags) + ret void +} + +; Function Attrs: noduplicate nounwind +declare void @dx.op.barrierByMemoryHandle(i32, %dx.types.Handle, i32, i32) #0 + +; Function Attrs: noduplicate nounwind +declare void @dx.op.barrierByNodeRecordHandle(i32, %dx.types.NodeRecordHandle, i32, i32) #0 + +; Function Attrs: nounwind readnone +declare %dx.types.Handle @dx.op.annotateHandle(i32, %dx.types.Handle, %dx.types.ResourceProperties) #1 + +; Function Attrs: nounwind readnone +declare %dx.types.NodeRecordHandle @dx.op.createNodeInputRecordHandle(i32, i32) #1 + +; Function Attrs: nounwind readnone +declare %dx.types.NodeRecordHandle @dx.op.annotateNodeRecordHandle(i32, %dx.types.NodeRecordHandle, %dx.types.NodeRecordInfo) #1 + +; Function Attrs: nounwind readonly +declare %dx.types.Handle @dx.op.createHandleForLib.dx.types.Handle(i32, %dx.types.Handle) #2 + +attributes #0 = { noduplicate nounwind } +attributes #1 = { nounwind readnone } +attributes #2 = { nounwind readonly } + +!llvm.ident = !{!0} +!dx.version = !{!1} +!dx.valver = !{!1} +!dx.shaderModel = !{!2} +!dx.resources = !{!3} +!dx.typeAnnotations = !{!7} +!dx.entryPoints = !{!11, !13, !21} + +!0 = !{!"dxc(private) 1.7.0.5187 (user/jbatista/validate_Barrier_args01, c232ad072)"} +!1 = !{i32 1, i32 8} +!2 = !{!"lib", i32 6, i32 8} +!3 = !{null, !4, null, null} +!4 = !{!5} +!5 = !{i32 0, %"class.RWBuffer"* bitcast (%dx.types.Handle* @"\01?buf0@@3V?$RWBuffer@I@@A" to %"class.RWBuffer"*), !"buf0", i32 -1, i32 -1, i32 1, i32 10, i1 false, i1 false, i1 false, !6} +!6 = !{i32 0, i32 5} +!7 = !{i32 1, void ()* @node01, !8, void ()* @node02, !8} +!8 = !{!9} +!9 = !{i32 0, !10, !10} +!10 = !{} +!11 = !{null, !"", null, !3, !12} +!12 = !{i32 0, i64 8590000128} +!13 = !{void ()* @node01, !"node01", null, null, !14} +!14 = !{i32 8, i32 15, i32 13, i32 1, i32 15, !15, i32 16, i32 -1, i32 20, !16, i32 4, !19, i32 5, !20} +!15 = !{!"node01", i32 0} +!16 = !{!17} +!17 = !{i32 1, i32 97, i32 2, !18} +!18 = !{i32 0, i32 12} +!19 = !{i32 1024, i32 1, i32 1} +!20 = !{i32 0} +!21 = !{void ()* @node02, !"node02", null, null, !22} +!22 = !{i32 8, i32 15, i32 13, i32 1, i32 15, !23, i32 16, i32 -1, i32 20, !16, i32 4, !19, i32 5, !20} +!23 = !{!"node02", i32 0} + +; SOURCE HLSL: +;struct RECORD +;{ +; uint a; +; uint b; +; uint c; +;}; +;RWBuffer buf0; + +;[Shader("node")] +;[NumThreads(1024,1,1)] +;[NodeLaunch("Broadcasting")] +;void node01(DispatchNodeInputRecord input) +;{ +; Barrier(buf0,1,1); +;} + +;[Shader("node")] +;[NumThreads(1024,1,1)] +;[NodeLaunch("Broadcasting")] +;void node02(DispatchNodeInputRecord input) +;{ +; Barrier(input,1,1); +;} \ No newline at end of file diff --git a/tools/clang/test/HLSLFileCheck/hlsl/workgraph/member_atomics.hlsl b/tools/clang/test/HLSLFileCheck/hlsl/workgraph/member_atomics.hlsl new file mode 100644 index 0000000000..9d28450948 --- /dev/null +++ b/tools/clang/test/HLSLFileCheck/hlsl/workgraph/member_atomics.hlsl @@ -0,0 +1,84 @@ +// RUN: %dxc -T lib_6_8 %s | FileCheck %s +// ================================================================== +// Test using atomics on node record members for cmpxchg and binops +// ================================================================== + +struct RECORD +{ + uint ival; + float fval; +}; + +// CHECK: define void @node01 +[Shader("node")] +[NumThreads(1024,1,1)] +[NodeLaunch("Broadcasting")] +void node01(RWDispatchNodeInputRecord input1) +{ + // CHECK: getelementptr %struct.RECORD, %struct.RECORD addrspace(6)* + // CHECK: bitcast float addrspace(6)* %{{[0-9A-Za-z_]*}} to i32 addrspace(6)* + // CHECK: cmpxchg i32 addrspace(6)* %{{[0-9A-Za-z_]*}}, i32 0, i32 1123477094 + InterlockedCompareStoreFloatBitwise(input1.Get().fval, 0.0, 123.45); + + // CHECK: getelementptr %struct.RECORD, %struct.RECORD addrspace(6)* + // CHECK: cmpxchg i32 addrspace(6)* %{{[0-9A-Za-z_]*}}, i32 111, i32 222 + // CHECK: atomicrmw add i32 addrspace(6)* %{{[0-9A-Za-z_]*}}, i32 333 + InterlockedCompareStore(input1.Get().ival, 111, 222); + InterlockedAdd(input1.Get().ival, 333); +} + +// CHECK: define void @node02 +[Shader("node")] +[NumThreads(1,1,1)] +[NodeLaunch("coalescing")] +void node02([MaxRecords(4)]RWGroupNodeInputRecords input2) +{ + // CHECK: getelementptr %struct.RECORD, %struct.RECORD addrspace(6)* + // CHECK: bitcast float addrspace(6)* %{{[0-9A-Za-z_]*}} to i32 addrspace(6)* + // CHECK: cmpxchg i32 addrspace(6)* %{{[0-9A-Za-z_]*}}, i32 0, i32 1123477094 + InterlockedCompareStoreFloatBitwise(input2[0].fval, 0.0, 123.45); + + // CHECK: getelementptr %struct.RECORD, %struct.RECORD addrspace(6)* + // CHECK: cmpxchg i32 addrspace(6)* %{{[0-9A-Za-z_]*}}, i32 111, i32 222 + // CHECK: atomicrmw add i32 addrspace(6)* %{{[0-9A-Za-z_]*}}, i32 333 + InterlockedCompareStore(input2[1].ival, 111, 222); + InterlockedAdd(input2[2].ival, 333); +} + +// CHECK: define void @node03 +[Shader("node")] +[NumThreads(1024,1,1)] +[NodeLaunch("Broadcasting")] +void node03(NodeOutput output3) +{ + GroupNodeOutputRecords outrec = output3.GetGroupNodeOutputRecords(1); + // CHECK: getelementptr %struct.RECORD, %struct.RECORD addrspace(6)* + // CHECK: bitcast float addrspace(6)* %{{[0-9A-Za-z_]*}} to i32 addrspace(6)* + // CHECK: cmpxchg i32 addrspace(6)* %{{[0-9A-Za-z_]*}}, i32 0, i32 1123477094 + InterlockedCompareStoreFloatBitwise(outrec.Get().fval, 0.0, 123.45); + + // CHECK: getelementptr %struct.RECORD, %struct.RECORD addrspace(6)* + // CHECK: cmpxchg i32 addrspace(6)* %{{[0-9A-Za-z_]*}}, i32 111, i32 222 + // CHECK: atomicrmw add i32 addrspace(6)* %{{[0-9A-Za-z_]*}}, i32 333 + InterlockedCompareStore(outrec.Get().ival, 111, 222); + InterlockedAdd(outrec.Get().ival, 333); +} + +// CHECK: define void @node04 +[Shader("node")] +[NumThreads(1024,1,1)] +[NodeLaunch("Coalescing")] +void node04([MaxOutputRecords(5)] NodeOutput outputs4) +{ + ThreadNodeOutputRecords outrec = outputs4.GetThreadNodeOutputRecords(1); + // CHECK: getelementptr %struct.RECORD, %struct.RECORD addrspace(6)* + // CHECK: bitcast float addrspace(6)* %{{[0-9A-Za-z_]*}} to i32 addrspace(6)* + // CHECK: cmpxchg i32 addrspace(6)* %{{[0-9A-Za-z_]*}}, i32 0, i32 1123477094 + InterlockedCompareStoreFloatBitwise(outrec.Get().fval, 0.0, 123.45); + + // CHECK: getelementptr %struct.RECORD, %struct.RECORD addrspace(6)* + // CHECK: cmpxchg i32 addrspace(6)* %{{[0-9A-Za-z_]*}}, i32 111, i32 222 + // CHECK: atomicrmw add i32 addrspace(6)* %{{[0-9A-Za-z_]*}}, i32 333 + InterlockedCompareStore(outrec.Get().ival, 111, 222); + InterlockedAdd(outrec.Get().ival, 333); +} diff --git a/tools/clang/test/HLSLFileCheck/hlsl/workgraph/member_matrix_write.hlsl b/tools/clang/test/HLSLFileCheck/hlsl/workgraph/member_matrix_write.hlsl new file mode 100644 index 0000000000..3d148bf810 --- /dev/null +++ b/tools/clang/test/HLSLFileCheck/hlsl/workgraph/member_matrix_write.hlsl @@ -0,0 +1,137 @@ +// RUN: %dxc -T lib_6_8 %s | FileCheck %s +// ================================================================== +// Test writing to matrix members of node records +// ================================================================== + +struct RECORD +{ + row_major float2x2 m0; + row_major float2x2 m1; + column_major float2x2 m2; +}; + +// CHECK: %[[RECORD:struct\.RECORD.*]] = type { [4 x float], [4 x float], [4 x float] } + +// CHECK: define void @node01 +[Shader("node")] +[NumThreads(1024,1,1)] +[NodeLaunch("Broadcasting")] +void node01(RWDispatchNodeInputRecord input1) +{ + // CHECK: %[[p1_0:[^ ]+]] = getelementptr %[[RECORD]], %[[RECORD]] addrspace(6)* %{{[^,]+}}, i32 0, i32 1, i32 0 + // CHECK: store float 1.110000e+02, float addrspace(6)* %[[p1_0]], align 4 + // CHECK: %[[p1_1:[^ ]+]] = getelementptr %[[RECORD]], %[[RECORD]] addrspace(6)* %{{[^,]+}}, i32 0, i32 1, i32 1 + // CHECK: store float 1.110000e+02, float addrspace(6)* %[[p1_1]], align 4 + // CHECK: %[[p1_2:[^ ]+]] = getelementptr %[[RECORD]], %[[RECORD]] addrspace(6)* %{{[^,]+}}, i32 0, i32 1, i32 2 + // CHECK: store float 1.110000e+02, float addrspace(6)* %[[p1_2]], align 4 + // CHECK: %[[p1_3:[^ ]+]] = getelementptr %[[RECORD]], %[[RECORD]] addrspace(6)* %{{[^,]+}}, i32 0, i32 1, i32 3 + // CHECK: store float 1.110000e+02, float addrspace(6)* %[[p1_3]], align 4 + input1.Get().m1 = 111; + // CHECK: %[[p0_0:[^ ]+]] = getelementptr %[[RECORD]], %[[RECORD]] addrspace(6)* %{{[^,]+}}, i32 0, i32 0, i32 0 + // CHECK: %[[v0_0:[^ ]+]] = load float, float addrspace(6)* %[[p0_0]], align 4 + // CHECK: %[[p0_1:[^ ]+]] = getelementptr %[[RECORD]], %[[RECORD]] addrspace(6)* %{{[^,]+}}, i32 0, i32 0, i32 1 + // CHECK: %[[v0_1:[^ ]+]] = load float, float addrspace(6)* %[[p0_1]], align 4 + // CHECK: %[[p0_2:[^ ]+]] = getelementptr %[[RECORD]], %[[RECORD]] addrspace(6)* %{{[^,]+}}, i32 0, i32 0, i32 2 + // CHECK: %[[v0_2:[^ ]+]] = load float, float addrspace(6)* %[[p0_2]], align 4 + // CHECK: %[[p0_3:[^ ]+]] = getelementptr %[[RECORD]], %[[RECORD]] addrspace(6)* %{{[^,]+}}, i32 0, i32 0, i32 3 + // CHECK: %[[v0_3:[^ ]+]] = load float, float addrspace(6)* %[[p0_3]], align 4 + // Note: store transposed. + // CHECK: %[[p2_0:[^ ]+]] = getelementptr %[[RECORD]], %[[RECORD]] addrspace(6)* %{{[^,]+}}, i32 0, i32 2, i32 0 + // CHECK: store float %[[v0_0]], float addrspace(6)* %[[p2_0]], align 4 + // CHECK: %[[p2_1:[^ ]+]] = getelementptr %[[RECORD]], %[[RECORD]] addrspace(6)* %{{[^,]+}}, i32 0, i32 2, i32 1 + // CHECK: store float %[[v0_2]], float addrspace(6)* %[[p2_1]], align 4 + // CHECK: %[[p2_2:[^ ]+]] = getelementptr %[[RECORD]], %[[RECORD]] addrspace(6)* %{{[^,]+}}, i32 0, i32 2, i32 2 + // CHECK: store float %[[v0_1]], float addrspace(6)* %[[p2_2]], align 4 + // CHECK: %[[p2_3:[^ ]+]] = getelementptr %[[RECORD]], %[[RECORD]] addrspace(6)* %{{[^,]+}}, i32 0, i32 2, i32 3 + // CHECK: store float %[[v0_3]], float addrspace(6)* %[[p2_3]], align 4 + input1.Get().m2 = input1.Get().m0; +} + +// CHECK: define void @node02 +[Shader("node")] +[NumThreads(1,1,1)] +[NodeLaunch("coalescing")] +void node02([MaxRecords(4)] RWGroupNodeInputRecords input2) +{ + // CHECK: %[[p1_0:[^ ]+]] = getelementptr %[[RECORD]], %[[RECORD]] addrspace(6)* %{{[^,]+}}, i32 0, i32 1, i32 0 + // CHECK: store float 1.110000e+02, float addrspace(6)* %[[p1_0]], align 4 + // CHECK: %[[p1_1:[^ ]+]] = getelementptr %[[RECORD]], %[[RECORD]] addrspace(6)* %{{[^,]+}}, i32 0, i32 1, i32 1 + // CHECK: store float 1.110000e+02, float addrspace(6)* %[[p1_1]], align 4 + // CHECK: %[[p1_2:[^ ]+]] = getelementptr %[[RECORD]], %[[RECORD]] addrspace(6)* %{{[^,]+}}, i32 0, i32 1, i32 2 + // CHECK: store float 1.110000e+02, float addrspace(6)* %[[p1_2]], align 4 + // CHECK: %[[p1_3:[^ ]+]] = getelementptr %[[RECORD]], %[[RECORD]] addrspace(6)* %{{[^,]+}}, i32 0, i32 1, i32 3 + // CHECK: store float 1.110000e+02, float addrspace(6)* %[[p1_3]], align 4 + input2[0].m1 = 111; + // CHECK: %[[p0_0:[^ ]+]] = getelementptr %[[RECORD]], %[[RECORD]] addrspace(6)* %{{[^,]+}}, i32 0, i32 0, i32 0 + // CHECK: %[[v0_0:[^ ]+]] = load float, float addrspace(6)* %[[p0_0]], align 4 + // CHECK: %[[p0_1:[^ ]+]] = getelementptr %[[RECORD]], %[[RECORD]] addrspace(6)* %{{[^,]+}}, i32 0, i32 0, i32 1 + // CHECK: %[[v0_1:[^ ]+]] = load float, float addrspace(6)* %[[p0_1]], align 4 + // CHECK: %[[p0_2:[^ ]+]] = getelementptr %[[RECORD]], %[[RECORD]] addrspace(6)* %{{[^,]+}}, i32 0, i32 0, i32 2 + // CHECK: %[[v0_2:[^ ]+]] = load float, float addrspace(6)* %[[p0_2]], align 4 + // CHECK: %[[p0_3:[^ ]+]] = getelementptr %[[RECORD]], %[[RECORD]] addrspace(6)* %{{[^,]+}}, i32 0, i32 0, i32 3 + // CHECK: %[[v0_3:[^ ]+]] = load float, float addrspace(6)* %[[p0_3]], align 4 + // Note: store transposed. + // CHECK: %[[p2_0:[^ ]+]] = getelementptr %[[RECORD]], %[[RECORD]] addrspace(6)* %{{[^,]+}}, i32 0, i32 2, i32 0 + // CHECK: store float %[[v0_0]], float addrspace(6)* %[[p2_0]], align 4 + // CHECK: %[[p2_1:[^ ]+]] = getelementptr %[[RECORD]], %[[RECORD]] addrspace(6)* %{{[^,]+}}, i32 0, i32 2, i32 1 + // CHECK: store float %[[v0_2]], float addrspace(6)* %[[p2_1]], align 4 + // CHECK: %[[p2_2:[^ ]+]] = getelementptr %[[RECORD]], %[[RECORD]] addrspace(6)* %{{[^,]+}}, i32 0, i32 2, i32 2 + // CHECK: store float %[[v0_1]], float addrspace(6)* %[[p2_2]], align 4 + // CHECK: %[[p2_3:[^ ]+]] = getelementptr %[[RECORD]], %[[RECORD]] addrspace(6)* %{{[^,]+}}, i32 0, i32 2, i32 3 + // CHECK: store float %[[v0_3]], float addrspace(6)* %[[p2_3]], align 4 + input2[1].m2 = input2[1].m0; +} + +// CHECK: define void @node03 +[Shader("node")] +[NumThreads(1024,1,1)] +[NodeLaunch("Broadcasting")] +void node03(NodeOutput output3) +{ + ThreadNodeOutputRecords outrec = output3.GetThreadNodeOutputRecords(1); + // CHECK: %[[p1_0:[^ ]+]] = getelementptr %[[RECORD]], %[[RECORD]] addrspace(6)* %{{[^,]+}}, i32 0, i32 1, i32 0 + // CHECK: store float 1.110000e+02, float addrspace(6)* %[[p1_0]], align 4 + // CHECK: %[[p1_1:[^ ]+]] = getelementptr %[[RECORD]], %[[RECORD]] addrspace(6)* %{{[^,]+}}, i32 0, i32 1, i32 1 + // CHECK: store float 1.110000e+02, float addrspace(6)* %[[p1_1]], align 4 + // CHECK: %[[p1_2:[^ ]+]] = getelementptr %[[RECORD]], %[[RECORD]] addrspace(6)* %{{[^,]+}}, i32 0, i32 1, i32 2 + // CHECK: store float 1.110000e+02, float addrspace(6)* %[[p1_2]], align 4 + // CHECK: %[[p1_3:[^ ]+]] = getelementptr %[[RECORD]], %[[RECORD]] addrspace(6)* %{{[^,]+}}, i32 0, i32 1, i32 3 + // CHECK: store float 1.110000e+02, float addrspace(6)* %[[p1_3]], align 4 + outrec.Get().m1 = 111; + // CHECK: %[[p2_0:[^ ]+]] = getelementptr %[[RECORD]], %[[RECORD]] addrspace(6)* %{{[^,]+}}, i32 0, i32 2, i32 0 + // CHECK: store float 2.220000e+02, float addrspace(6)* %[[p2_0]], align 4 + // CHECK: %[[p2_1:[^ ]+]] = getelementptr %[[RECORD]], %[[RECORD]] addrspace(6)* %{{[^,]+}}, i32 0, i32 2, i32 1 + // CHECK: store float 2.220000e+02, float addrspace(6)* %[[p2_1]], align 4 + // CHECK: %[[p2_2:[^ ]+]] = getelementptr %[[RECORD]], %[[RECORD]] addrspace(6)* %{{[^,]+}}, i32 0, i32 2, i32 2 + // CHECK: store float 2.220000e+02, float addrspace(6)* %[[p2_2]], align 4 + // CHECK: %[[p2_3:[^ ]+]] = getelementptr %[[RECORD]], %[[RECORD]] addrspace(6)* %{{[^,]+}}, i32 0, i32 2, i32 3 + // CHECK: store float 2.220000e+02, float addrspace(6)* %[[p2_3]], align 4 + outrec.Get().m2 = 222; +} + +// CHECK: define void @node04 +[Shader("node")] +[NumThreads(1024,1,1)] +[NodeLaunch("Coalescing")] +void node04([MaxOutputRecords(5)] NodeOutput outputs4) +{ + GroupNodeOutputRecords outrec = outputs4.GetGroupNodeOutputRecords(1); + // CHECK: %[[p1_0:[^ ]+]] = getelementptr %[[RECORD]], %[[RECORD]] addrspace(6)* %{{[^,]+}}, i32 0, i32 1, i32 0 + // CHECK: store float 1.110000e+02, float addrspace(6)* %[[p1_0]], align 4 + // CHECK: %[[p1_1:[^ ]+]] = getelementptr %[[RECORD]], %[[RECORD]] addrspace(6)* %{{[^,]+}}, i32 0, i32 1, i32 1 + // CHECK: store float 1.110000e+02, float addrspace(6)* %[[p1_1]], align 4 + // CHECK: %[[p1_2:[^ ]+]] = getelementptr %[[RECORD]], %[[RECORD]] addrspace(6)* %{{[^,]+}}, i32 0, i32 1, i32 2 + // CHECK: store float 1.110000e+02, float addrspace(6)* %[[p1_2]], align 4 + // CHECK: %[[p1_3:[^ ]+]] = getelementptr %[[RECORD]], %[[RECORD]] addrspace(6)* %{{[^,]+}}, i32 0, i32 1, i32 3 + // CHECK: store float 1.110000e+02, float addrspace(6)* %[[p1_3]], align 4 + outrec.Get().m1 = 111; + // CHECK: %[[p2_0:[^ ]+]] = getelementptr %[[RECORD]], %[[RECORD]] addrspace(6)* %{{[^,]+}}, i32 0, i32 2, i32 0 + // CHECK: store float 2.220000e+02, float addrspace(6)* %[[p2_0]], align 4 + // CHECK: %[[p2_1:[^ ]+]] = getelementptr %[[RECORD]], %[[RECORD]] addrspace(6)* %{{[^,]+}}, i32 0, i32 2, i32 1 + // CHECK: store float 2.220000e+02, float addrspace(6)* %[[p2_1]], align 4 + // CHECK: %[[p2_2:[^ ]+]] = getelementptr %[[RECORD]], %[[RECORD]] addrspace(6)* %{{[^,]+}}, i32 0, i32 2, i32 2 + // CHECK: store float 2.220000e+02, float addrspace(6)* %[[p2_2]], align 4 + // CHECK: %[[p2_3:[^ ]+]] = getelementptr %[[RECORD]], %[[RECORD]] addrspace(6)* %{{[^,]+}}, i32 0, i32 2, i32 3 + // CHECK: store float 2.220000e+02, float addrspace(6)* %[[p2_3]], align 4 + outrec.Get().m2 = 222; +} diff --git a/tools/clang/test/HLSLFileCheck/hlsl/workgraph/missing_node_attribute_error_msg.hlsl b/tools/clang/test/HLSLFileCheck/hlsl/workgraph/missing_node_attribute_error_msg.hlsl new file mode 100644 index 0000000000..ff143756d6 --- /dev/null +++ b/tools/clang/test/HLSLFileCheck/hlsl/workgraph/missing_node_attribute_error_msg.hlsl @@ -0,0 +1,23 @@ +// RUN: %dxc -T lib_6_8 %s | FileCheck %s +// check that an error gets generated each time a "node" attribute gets used without the presence of +// the '[shader("node")]' attribute + +[Shader("compute")] +[NodeIsProgramEntry] +[NodeID("nodeName")] +[NodeLocalRootArgumentsTableIndex(3)] +[NodeShareInputOf("nodeName")] +[NodeMaxRecursionDepth(51)] +[NodeDispatchGrid(1, 1, 1)] +[NodeMaxDispatchGrid(2,2,2)] +[NumThreads(1, 1, 1)] +void secondNode() +{ + // CHECK-DAG: Attribute nodeisprogramentry only applies to node shaders (indicated with '[shader("node")]') + // CHECK-DAG: Attribute nodeid only applies to node shaders (indicated with '[shader("node")]') + // CHECK-DAG: Attribute nodelocalrootargumentstableindex only applies to node shaders (indicated with '[shader("node")]') + // CHECK-DAG: Attribute nodeshareinputof only applies to node shaders (indicated with '[shader("node")]') + // CHECK-DAG: Attribute nodemaxrecursiondepth only applies to node shaders (indicated with '[shader("node")]') + // CHECK-DAG: Attribute nodedispatchgrid only applies to node shaders (indicated with '[shader("node")]') + // CHECK-DAG: Attribute nodemaxdispatchgrid only applies to node shaders (indicated with '[shader("node")]') +} \ No newline at end of file diff --git a/tools/clang/test/HLSLFileCheck/hlsl/workgraph/nodeoutputarray.hlsl b/tools/clang/test/HLSLFileCheck/hlsl/workgraph/nodeoutputarray.hlsl new file mode 100644 index 0000000000..e4823be8f3 --- /dev/null +++ b/tools/clang/test/HLSLFileCheck/hlsl/workgraph/nodeoutputarray.hlsl @@ -0,0 +1,101 @@ +// RUN: %dxc -T lib_6_8 %s | FileCheck %s +// Tests for NodeOutputArray/EmptyNodeOutputArray/IndexNodeHandle +struct RECORD1 +{ + uint value; + uint value2; +}; + +[Shader("node")] +[NodeLaunch("broadcasting")] +[NodeDispatchGrid(1, 1, 1)] +[NumThreads(128, 1, 1)] +void node_1_0( + [NodeArraySize(128)] [MaxRecords(64)] NodeOutputArray OutputArray +) +{ +} +// CHECK: define void @node_1_0() +// CHECK: ret void + +[Shader("node")] +[NodeLaunch("broadcasting")] +[NodeDispatchGrid(1, 1, 1)] +[NumThreads(128, 1, 1)] +void node_1_1( + [NodeArraySize(128)] [MaxRecords(64)] NodeOutputArray OutputArray +) +{ + ThreadNodeOutputRecords outRec = OutputArray[1].GetThreadNodeOutputRecords(2); + outRec.OutputComplete(); +} +// CHECK: define void @node_1_1() +// CHECK: [[IDXNH:%[0-9]+]] = call %dx.types.NodeHandle @dx.op.indexNodeHandle(i32 {{[0-9]+}}, %dx.types.NodeHandle {{%[0-9]+}}, i32 1) ; IndexNodeHandle(NodeOutputHandle,ArrayIndex) +// CHECK: [[ANH:%[0-9]+]] = call %dx.types.NodeHandle @dx.op.annotateNodeHandle(i32 {{[0-9]+}}, %dx.types.NodeHandle [[IDXNH]], %dx.types.NodeInfo { i32 22, i32 8 }) +// CHECK: call %dx.types.NodeRecordHandle @dx.op.allocateNodeOutputRecords(i32 {{[0-9]+}}, %dx.types.NodeHandle [[ANH]], i32 2, i1 true) + +[Shader("node")] +[NodeLaunch("broadcasting")] +[NodeDispatchGrid(1, 1, 1)] +[NumThreads(128, 1, 1)] +void node_1_2( + [NodeArraySize(128)] [MaxRecords(64)] NodeOutputArray OutputArray +) +{ + GroupNodeOutputRecords outRec = OutputArray[1].GetGroupNodeOutputRecords(2); + outRec.OutputComplete(); +} +// CHECK: define void @node_1_2() +// CHECK: [[IDXNH:%[0-9]+]] = call %dx.types.NodeHandle @dx.op.indexNodeHandle(i32 {{[0-9]+}}, %dx.types.NodeHandle {{%[0-9]+}}, i32 1) ; IndexNodeHandle(NodeOutputHandle,ArrayIndex) +// CHECK: [[ANH:%[0-9]+]] = call %dx.types.NodeHandle @dx.op.annotateNodeHandle(i32 {{[0-9]+}}, %dx.types.NodeHandle [[IDXNH]], %dx.types.NodeInfo { i32 22, i32 8 }) +// CHECK: call %dx.types.NodeRecordHandle @dx.op.allocateNodeOutputRecords(i32 {{[0-9]+}}, %dx.types.NodeHandle [[ANH]], i32 2, i1 false) + +[Shader("node")] +[NodeLaunch("broadcasting")] +[NodeDispatchGrid(1, 1, 1)] +[NumThreads(128, 1, 1)] +void node_1_3( + [NodeArraySize(128)] [MaxRecords(64)] NodeOutputArray OutputArray +) +{ + bool b = OutputArray[1].IsValid(); + if (b) { + GroupNodeOutputRecords outRec = OutputArray[1].GetGroupNodeOutputRecords(2); + outRec.OutputComplete(); + } +} +// CHECK: define void @node_1_3() +// CHECK: [[IDXNH:%[0-9]+]] = call %dx.types.NodeHandle @dx.op.indexNodeHandle(i32 {{[0-9]+}}, %dx.types.NodeHandle {{%[0-9]+}}, i32 1) +// CHECK: [[ANH:%[0-9]+]] = call %dx.types.NodeHandle @dx.op.annotateNodeHandle(i32 {{[0-9]+}}, %dx.types.NodeHandle [[IDXNH]], %dx.types.NodeInfo { i32 22, i32 8 }) +// CHECK: call i1 @dx.op.nodeOutputIsValid(i32 {{[0-9]+}}, %dx.types.NodeHandle [[ANH]]) + +[Shader("node")] +[NodeLaunch("broadcasting")] +[NodeDispatchGrid(1, 1, 1)] +[NumThreads(128, 1, 1)] +void node_2_0( + [NodeArraySize(128)] [MaxRecords(64)] EmptyNodeOutputArray EmptyOutputArray +) +{ +} +// CHECK: define void @node_2_0() +// CHECK: ret void + +[Shader("node")] +[NodeLaunch("broadcasting")] +[NodeDispatchGrid(1, 1, 1)] +[NumThreads(128, 1, 1)] +void node_2_1( + [NodeArraySize(128)] [MaxRecords(64)] EmptyNodeOutputArray EmptyOutputArray +) +{ + bool b = EmptyOutputArray[1].IsValid(); + if (b) { + EmptyOutputArray[1].GroupIncrementOutputCount(10); + } +} +// CHECK: define void @node_2_1() +// CHECK: [[IDXNH:%[0-9]+]] = call %dx.types.NodeHandle @dx.op.indexNodeHandle(i32 {{[0-9]+}}, %dx.types.NodeHandle {{%[0-9]+}}, i32 1) +// CHECK: [[ANH:%[0-9]+]] = call %dx.types.NodeHandle @dx.op.annotateNodeHandle(i32 {{[0-9]+}}, %dx.types.NodeHandle [[IDXNH]], %dx.types.NodeInfo { i32 26, i32 0 }) +// CHECK: call i1 @dx.op.nodeOutputIsValid(i32 {{[0-9]+}}, %dx.types.NodeHandle [[ANH]]) +// CHECK: call void @dx.op.incrementOutputCount(i32 {{[0-9]+}}, %dx.types.NodeHandle [[ANH]], i32 10, i1 false) \ No newline at end of file diff --git a/tools/clang/test/HLSLFileCheck/hlsl/workgraph/nodeshareinputof.hlsl b/tools/clang/test/HLSLFileCheck/hlsl/workgraph/nodeshareinputof.hlsl new file mode 100644 index 0000000000..1c5a8ba2e4 --- /dev/null +++ b/tools/clang/test/HLSLFileCheck/hlsl/workgraph/nodeshareinputof.hlsl @@ -0,0 +1,53 @@ +// RUN: %dxc -T lib_6_8 %s | FileCheck %s + +// Check that the NodeShareInputOf metadata entry is populated correctly + +struct entryRecord +{ + int data0; +}; + +[Shader("node")] +[NodeLaunch("Broadcasting")] +[NodeDispatchGrid(2, 1, 1)] +[NumThreads(1, 1, 1)] +void firstNode(DispatchNodeInputRecord inputData) +{ } + +[Shader("node")] +[NodeLaunch("Broadcasting")] +[NodeDispatchGrid(2, 1, 1)] +[NumThreads(1, 1, 1)] +[NodeShareInputOf("firstNode")] +void secondNode(DispatchNodeInputRecord inputData) +{ } + +[Shader("node")] +[NodeLaunch("Broadcasting")] +[NodeDispatchGrid(2, 1, 1)] +[NumThreads(1, 1, 1)] +[NodeShareInputOf("firstNode", 3)] +void thirdNode(DispatchNodeInputRecord inputData) +{ } + +// CHECK: !{void ()* @firstNode, !"firstNode", null, null, [[FIRSTATTRS:![0-9]+]]} +// CHECK: [[FIRSTATTRS]] = !{i32 8, i32 15, i32 13, i32 1, i32 15, [[FIRSTNODE:![0-9]+]], +// NodeShareInputOf entry should not be present +// CHECK-NOT: i32 17, {{![0-9]+}} +// CHECK: [[FIRSTNODE]] = !{!"firstNode", i32 0} + +// CHECK: !{void ()* @secondNode, !"secondNode", null, null, [[SECONDATTRS:![0-9]+]]} +// CHECK: [[SECONDATTRS]] = !{i32 8, i32 15, i32 13, i32 1, i32 15, [[SECONDNODE:![0-9]+]], +// NodeShareInputOf entry should reference "firstNode" +// CHECK-SAME: i32 17, [[FIRSTNODE]] +// CHECK-SAME: } +// CHECK: [[SECONDNODE]] = !{!"secondNode", i32 0} + +// CHECK: !{void ()* @thirdNode, !"thirdNode", null, null, [[THIRDATTRS:![0-9]+]]} +// CHECK: [[THIRDATTRS]] = !{i32 8, i32 15, i32 13, i32 1, i32 15, [[THIRDNODE:![0-9]+]], +// NodeShareInputOf entry should reference "firstNode" index 3 +// CHECK-SAME: i32 17, [[FIRSTNODE_3:![0-9]+]] +// CHECK-SAME: } +// CHECK: [[THIRDNODE]] = !{!"thirdNode", i32 0} +// CHECK: [[FIRSTNODE_3]] = !{!"firstNode", i32 3} + diff --git a/tools/clang/test/HLSLFileCheck/hlsl/workgraph/rwnodeinputrecord_sv_dispatchgrid_array.hlsl b/tools/clang/test/HLSLFileCheck/hlsl/workgraph/rwnodeinputrecord_sv_dispatchgrid_array.hlsl new file mode 100644 index 0000000000..e1710d0107 --- /dev/null +++ b/tools/clang/test/HLSLFileCheck/hlsl/workgraph/rwnodeinputrecord_sv_dispatchgrid_array.hlsl @@ -0,0 +1,27 @@ +// RUN: %dxc -T lib_6_8 %s | FileCheck %s + +// Check that SV_DispatchGrid supports array + +// CHECK: !dx.entryPoints = !{!{{[0-9+]+}}, ![[node01:[0-9+]+]]} +// CHECK: ![[node01]] = !{void ()* @node01, !"node01", null, null, ![[tags:[0-9+]+]]} +// CHECK: ![[tags]] = !{i32 8, i32 15, i32 13, i32 2, i32 15, !{{[0-9+]+}}, i32 16, i32 -1, i32 20, ![[inputs:[0-9+]+]], i32 4, !{{[0-9+]+}}, i32 5, !{{[0-9+]+}}} +// CHECK: ![[inputs]] = !{![[input0:[0-9+]+]]} +// CHECK: ![[input0]] = !{ +// CHECK-SAME: i32 2, ![[recordty:[0-9+]+]] +// CHECK: ![[recordty]] = !{ +// CHECK-SAME: i32 1, ![[svdispatchgrid:[0-9+]+]] +// CHECK: ![[svdispatchgrid]] = !{i32 12, i32 5, i32 3} + +struct RECORD +{ + uint a[3]; + uint b[3] : SV_DispatchGrid; +}; + +[Shader("node")] +[NodeLaunch("Coalescing")] +[numthreads(4,4,4)] +void node01(RWGroupNodeInputRecords input) +{ + input.Get().a = input.Get().b; +} diff --git a/tools/clang/test/HLSLFileCheck/hlsl/workgraph/test_increment_output_count.hlsl b/tools/clang/test/HLSLFileCheck/hlsl/workgraph/test_increment_output_count.hlsl new file mode 100644 index 0000000000..20c59c3c52 --- /dev/null +++ b/tools/clang/test/HLSLFileCheck/hlsl/workgraph/test_increment_output_count.hlsl @@ -0,0 +1,18 @@ +// RUN: %dxc -T lib_6_8 %s | FileCheck %s + +void loadStressEmptyRecWorker( +EmptyNodeOutput outputNode) +{ + // CHECK: call void @dx.op.incrementOutputCount + outputNode.GroupIncrementOutputCount(1); +} + +[Shader("node")] +[NodeDispatchGrid(1, 1, 1)] +[NumThreads(1, 1, 1)] +void loadStressEmptyRec_1( + [MaxOutputRecords(1)] EmptyNodeOutput loadStressChild +) +{ + loadStressEmptyRecWorker(loadStressChild); +} \ No newline at end of file diff --git a/tools/clang/test/HLSLFileCheck/hlsl/workgraph/wavesize.hlsl b/tools/clang/test/HLSLFileCheck/hlsl/workgraph/wavesize.hlsl new file mode 100644 index 0000000000..8c2b86ea76 --- /dev/null +++ b/tools/clang/test/HLSLFileCheck/hlsl/workgraph/wavesize.hlsl @@ -0,0 +1,44 @@ +// RUN: %dxc -T lib_6_8 %s | FileCheck %s + +// Check the WaveSize attribute is accepted by work graph nodes +// and appears in the metadata + +[shader("compute")] +[Shader("node")] +[NodeLaunch("broadcasting")] +[NumThreads(1,1,1)] +[WaveSize(4)] +void node01() { } + +// CHECK: !{void ()* @node01, !"node01", null, null, [[NODE01:![0-9]+]]} +// CHECK: [[NODE01]] = !{i32 8, i32 5, i32 13, i32 1, i32 11, [[NODE01_WS:![0-9]+]] +// CHECK: [[NODE01_WS]] = !{i32 4} + +[Shader("node")] +[NodeLaunch("broadcasting")] +[NumThreads(1,1,1)] +[WaveSize(8)] +void node02() { } + +// CHECK: !{void ()* @node02, !"node02", null, null, [[NODE02:![0-9]+]]} +// CHECK: [[NODE02]] = !{i32 8, i32 15, i32 13, i32 1, i32 11, [[NODE02_WS:![0-9]+]] +// CHECK: [[NODE02_WS]] = !{i32 8} + +[Shader("node")] +[NodeLaunch("coalescing")] +[NumThreads(1,1,1)] +[WaveSize(16)] +void node03() { } + +// CHECK: !{void ()* @node03, !"node03", null, null, [[NODE03:![0-9]+]]} +// CHECK: [[NODE03]] = !{i32 8, i32 15, i32 13, i32 2, i32 11, [[NODE03_WS:![0-9]+]] +// CHECK: [[NODE03_WS]] = !{i32 16} + +[Shader("node")] +[NodeLaunch("thread")] +[WaveSize(32)] +void node04() { } + +// CHECK: !{void ()* @node04, !"node04", null, null, [[NODE04:![0-9]+]]} +// CHECK: [[NODE04]] = !{i32 8, i32 15, i32 13, i32 3, i32 11, [[NODE04_WS:![0-9]+]] +// CHECK: [[NODE04_WS]] = !{i32 32} diff --git a/tools/clang/test/HLSLFileCheck/hlsl/workgraph/zero_sized_node_record.hlsl b/tools/clang/test/HLSLFileCheck/hlsl/workgraph/zero_sized_node_record.hlsl new file mode 100644 index 0000000000..f6ffb04a34 --- /dev/null +++ b/tools/clang/test/HLSLFileCheck/hlsl/workgraph/zero_sized_node_record.hlsl @@ -0,0 +1,25 @@ +// RUN: %dxc -T lib_6_8 -DTYPE=DispatchNodeInputRecord -DARGS= %s | FileCheck %s +// RUN: %dxc -T lib_6_8 -DTYPE=RWDispatchNodeInputRecord -DARGS= %s | FileCheck %s +// RUN: %dxc -T lib_6_8 -DTYPE=GroupNodeInputRecords -DARGS= %s | FileCheck %s +// RUN: %dxc -T lib_6_8 -DTYPE=RWGroupNodeInputRecords -DARGS= %s | FileCheck %s +// RUN: %dxc -T lib_6_8 -DTYPE=ThreadNodeInputRecord -DARGS= %s | FileCheck %s +// RUN: %dxc -T lib_6_8 -DTYPE=RWThreadNodeInputRecord -DARGS= %s | FileCheck %s +// RUN: %dxc -T lib_6_8 -DTYPE=NodeOutput -DARGS= %s | FileCheck %s +// ================================================================== +// zero-sized-node-record (expected error) +// An error diagnostic is generated for a zero sized record used in +// a node input/output declaration. +// N.B. We use multiple run lines as only the first CodeGen error is +// reported +// ================================================================== + +struct EMPTY { +}; + +[Shader("node")] +[NodeLaunch("Broadcasting")] +[NumThreads(1,1,1)] +void node0(TYPE ARGS a) { } + +// CHECK: 22:12: error: record used in {{DispatchNodeInputRecord|RWDispatchNodeInputRecord|GroupNodeInputRecords|RWGroupNodeInputRecords|ThreadNodeInputRecord|RWThreadNodeInputRecord|NodeOutput}} may not have zero size +// CHECK: 16:8: note: zero sized record defined here diff --git a/tools/clang/test/HLSLFileCheck/passes/hl/sroa_hlsl/NodeInput_type.ll b/tools/clang/test/HLSLFileCheck/passes/hl/sroa_hlsl/NodeInput_type.ll new file mode 100644 index 0000000000..69f75e1a91 --- /dev/null +++ b/tools/clang/test/HLSLFileCheck/passes/hl/sroa_hlsl/NodeInput_type.ll @@ -0,0 +1,194 @@ +; RUN: %opt %s -hlsl-passes-resume -scalarrepl-param-hlsl -S | FileCheck %s + +; This test ensures that NodeIO types don't get broken down to i32s by SROA. +; SROA woulda have reduced the %inputData variable to an i32, but this pass should now keep the type the same. +; Specifically, before the change associated with this new test, the CHECKs below would fail because +; SROA would replace struct.DispatchNodeInputRecord with i32. +; +; CHECK: alloca %"struct.DispatchNodeInputRecord" +; CHECK: load %"struct.DispatchNodeInputRecord", %"struct.DispatchNodeInputRecord"* + +target datalayout = "e-m:e-p:32:32-i1:32-i8:32-i16:32-i32:32-i64:64-f16:32-f32:32-f64:64-n8:16:32:64" +target triple = "dxil-ms-dx" + +%ConstantBuffer = type opaque +%"struct.DispatchNodeInputRecord" = type { i32 } +%"struct.NodeOutput" = type { i32 } +%dx.types.NodeHandle = type { i8* } +%dx.types.NodeInfo = type { i32, i32 } +%dx.types.NodeRecordHandle = type { i8* } +%dx.types.NodeRecordInfo = type { i32, i32 } +%"struct.GroupNodeOutputRecords" = type { i32 } +%struct.loadStressRecord = type { [29 x i32] } + +@"$Globals" = external constant %ConstantBuffer + +; Function Attrs: nounwind +declare void @llvm.lifetime.start(i64, i8* nocapture) #0 + +; Function Attrs: nounwind +declare void @llvm.lifetime.end(i64, i8* nocapture) #0 + +; Function Attrs: nounwind +define void @loadStress_16(%"struct.DispatchNodeInputRecord"* %inputData, %"struct.NodeOutput"* %loadStressChild) #0 { +entry: + %val.i = alloca i32, align 4 + %0 = call %dx.types.NodeHandle @"dx.hl.createnodeoutputhandle..%dx.types.NodeHandle (i32, i32)"(i32 11, i32 0) + %1 = call %dx.types.NodeHandle @"dx.hl.annotatenodehandle..%dx.types.NodeHandle (i32, %dx.types.NodeHandle, %dx.types.NodeInfo)"(i32 16, %dx.types.NodeHandle %0, %dx.types.NodeInfo { i32 6, i32 116 }) + %2 = call %"struct.NodeOutput" @"dx.hl.cast..%\22struct.NodeOutput\22 (i32, %dx.types.NodeHandle)"(i32 9, %dx.types.NodeHandle %1) + store %"struct.NodeOutput" %2, %"struct.NodeOutput"* %loadStressChild + %3 = call %dx.types.NodeRecordHandle @"dx.hl.createnodeinputrecordhandle..%dx.types.NodeRecordHandle (i32, i32)"(i32 13, i32 0) + %4 = call %dx.types.NodeRecordHandle @"dx.hl.annotatenoderecordhandle..%dx.types.NodeRecordHandle (i32, %dx.types.NodeRecordHandle, %dx.types.NodeRecordInfo)"(i32 17, %dx.types.NodeRecordHandle %3, %dx.types.NodeRecordInfo { i32 97, i32 116 }) + %5 = call %"struct.DispatchNodeInputRecord" @"dx.hl.cast..%\22struct.DispatchNodeInputRecord\22 (i32, %dx.types.NodeRecordHandle)"(i32 11, %dx.types.NodeRecordHandle %4) + store %"struct.DispatchNodeInputRecord" %5, %"struct.DispatchNodeInputRecord"* %inputData + %6 = alloca %"struct.GroupNodeOutputRecords" + %7 = alloca %"struct.DispatchNodeInputRecord" + %agg.tmp = alloca %"struct.GroupNodeOutputRecords", align 4 + %8 = bitcast %"struct.DispatchNodeInputRecord"* %inputData to i8*, !dbg !23 ; line:26 col:5 + call void @llvm.lifetime.start(i64 4, i8* %8) #0, !dbg !23 ; line:26 col:5 + %9 = load %"struct.NodeOutput", %"struct.NodeOutput"* %loadStressChild, !dbg !27 ; line:26 col:33 + %10 = call %dx.types.NodeHandle @"dx.hl.cast..%dx.types.NodeHandle (i32, %\22struct.NodeOutput\22)"(i32 10, %"struct.NodeOutput" %9), !dbg !27 ; line:26 col:33 + %11 = call %dx.types.NodeRecordHandle @"dx.hl.op..%dx.types.NodeRecordHandle (i32, %dx.types.NodeHandle, i32)"(i32 335, %dx.types.NodeHandle %10, i32 1), !dbg !27 ; line:26 col:33 + %12 = call %dx.types.NodeRecordHandle @"dx.hl.annotatenoderecordhandle..%dx.types.NodeRecordHandle (i32, %dx.types.NodeRecordHandle, %dx.types.NodeRecordInfo)"(i32 17, %dx.types.NodeRecordHandle %11, %dx.types.NodeRecordInfo { i32 70, i32 116 }), !dbg !27 ; line:26 col:33 + %13 = call %"struct.GroupNodeOutputRecords" @"dx.hl.cast..%\22struct.GroupNodeOutputRecords\22 (i32, %dx.types.NodeRecordHandle)"(i32 11, %dx.types.NodeRecordHandle %12), !dbg !27 ; line:26 col:33 + store %"struct.GroupNodeOutputRecords" %13, %"struct.GroupNodeOutputRecords"* %agg.tmp, !dbg !27 ; line:26 col:33 + %14 = bitcast %"struct.GroupNodeOutputRecords"* %6 to i8*, !dbg !23 ; line:26 col:5 + call void @llvm.lifetime.start(i64 4, i8* %14) #0, !dbg !23 ; line:26 col:5 + %15 = bitcast %"struct.GroupNodeOutputRecords"* %6 to i8*, !dbg !23 ; line:26 col:5 + %16 = bitcast %"struct.GroupNodeOutputRecords"* %agg.tmp to i8*, !dbg !23 ; line:26 col:5 + call void @llvm.memcpy.p0i8.p0i8.i64(i8* %15, i8* %16, i64 4, i32 1, i1 false), !dbg !23 ; line:26 col:5 + %17 = bitcast i32* %val.i to i8*, !dbg !28 ; line:15 col:5 + call void @llvm.lifetime.start(i64 4, i8* %17) #0, !dbg !28, !noalias !31 ; line:15 col:5 + %18 = load %"struct.DispatchNodeInputRecord", %"struct.DispatchNodeInputRecord"* %inputData, !dbg !34, !alias.scope !31 ; line:15 col:17 + %19 = call %dx.types.NodeRecordHandle @"dx.hl.cast..%dx.types.NodeRecordHandle (i32, %\22struct.DispatchNodeInputRecord\22)"(i32 12, %"struct.DispatchNodeInputRecord" %18) #0, !dbg !34 ; line:15 col:17 + %20 = call %struct.loadStressRecord* @"dx.hl.op..%struct.loadStressRecord* (i32, %dx.types.NodeRecordHandle)"(i32 18, %dx.types.NodeRecordHandle %19) #0, !dbg !34, !noalias !31 ; line:15 col:17 + %data.i = getelementptr inbounds %struct.loadStressRecord, %struct.loadStressRecord* %20, i32 0, i32 0, !dbg !35 ; line:15 col:33 + %arrayidx.i = getelementptr inbounds [29 x i32], [29 x i32]* %data.i, i32 0, i32 0, !dbg !34 ; line:15 col:17 + %21 = load i32, i32* %arrayidx.i, align 4, !dbg !34, !tbaa !36, !noalias !31 ; line:15 col:17 + store i32 %21, i32* %val.i, align 4, !dbg !40, !tbaa !36, !noalias !31 ; line:15 col:10 + %22 = load i32, i32* %val.i, align 4, !dbg !41, !tbaa !36, !noalias !31 ; line:17 col:28 + %add.i = add i32 %22, 61, !dbg !42 ; line:17 col:32 + %23 = load %"struct.GroupNodeOutputRecords", %"struct.GroupNodeOutputRecords"* %6, !dbg !43, !noalias !31 ; line:17 col:5 + %24 = call %dx.types.NodeRecordHandle @"dx.hl.cast..%dx.types.NodeRecordHandle (i32, %\22struct.GroupNodeOutputRecords\22)"(i32 12, %"struct.GroupNodeOutputRecords" %23) #0, !dbg !43 ; line:17 col:5 + %25 = call %struct.loadStressRecord* @"dx.hl.op..%struct.loadStressRecord* (i32, %dx.types.NodeRecordHandle, i32)"(i32 18, %dx.types.NodeRecordHandle %24, i32 0) #0, !dbg !43, !noalias !31 ; line:17 col:5 + %data2.i = getelementptr inbounds %struct.loadStressRecord, %struct.loadStressRecord* %25, i32 0, i32 0, !dbg !44 ; line:17 col:18 + %arrayidx3.i = getelementptr inbounds [29 x i32], [29 x i32]* %data2.i, i32 0, i32 0, !dbg !43 ; line:17 col:5 + store i32 %add.i, i32* %arrayidx3.i, align 4, !dbg !45, !tbaa !36, !noalias !31 ; line:17 col:26 + %26 = bitcast i32* %val.i to i8*, !dbg !46 ; line:18 col:1 + call void @llvm.lifetime.end(i64 4, i8* %26) #0, !dbg !46, !noalias !31 ; line:18 col:1 + %27 = bitcast %"struct.DispatchNodeInputRecord"* %7 to i8*, !dbg !23 ; line:26 col:5 + call void @llvm.lifetime.end(i64 4, i8* %27) #0, !dbg !23 ; line:26 col:5 + %28 = bitcast %"struct.GroupNodeOutputRecords"* %6 to i8*, !dbg !23 ; line:26 col:5 + call void @llvm.lifetime.end(i64 4, i8* %28) #0, !dbg !23 ; line:26 col:5 + ret void, !dbg !47 ; line:27 col:1 +} + +; Function Attrs: nounwind +declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture readonly, i64, i32, i1) #0 + +; Function Attrs: nounwind +declare %struct.loadStressRecord* @"dx.hl.op..%struct.loadStressRecord* (i32, %dx.types.NodeRecordHandle)"(i32, %dx.types.NodeRecordHandle) #1 + +; Function Attrs: nounwind readnone +declare %dx.types.NodeRecordHandle @"dx.hl.cast..%dx.types.NodeRecordHandle (i32, %\22struct.DispatchNodeInputRecord\22)"(i32, %"struct.DispatchNodeInputRecord") #2 + +; Function Attrs: nounwind +declare %struct.loadStressRecord* @"dx.hl.op..%struct.loadStressRecord* (i32, %dx.types.NodeRecordHandle, i32)"(i32, %dx.types.NodeRecordHandle, i32) #1 + +; Function Attrs: nounwind readnone +declare %dx.types.NodeRecordHandle @"dx.hl.cast..%dx.types.NodeRecordHandle (i32, %\22struct.GroupNodeOutputRecords\22)"(i32, %"struct.GroupNodeOutputRecords") #2 + +; Function Attrs: nounwind +declare %dx.types.NodeRecordHandle @"dx.hl.op..%dx.types.NodeRecordHandle (i32, %dx.types.NodeHandle, i32)"(i32, %dx.types.NodeHandle, i32) #0 + +; Function Attrs: nounwind readnone +declare %dx.types.NodeHandle @"dx.hl.cast..%dx.types.NodeHandle (i32, %\22struct.NodeOutput\22)"(i32, %"struct.NodeOutput") #2 + +; Function Attrs: nounwind +declare %dx.types.NodeRecordHandle @"dx.hl.annotatenoderecordhandle..%dx.types.NodeRecordHandle (i32, %dx.types.NodeRecordHandle, %dx.types.NodeRecordInfo)"(i32, %dx.types.NodeRecordHandle, %dx.types.NodeRecordInfo) #0 + +; Function Attrs: nounwind readnone +declare %"struct.GroupNodeOutputRecords" @"dx.hl.cast..%\22struct.GroupNodeOutputRecords\22 (i32, %dx.types.NodeRecordHandle)"(i32, %dx.types.NodeRecordHandle) #2 + +; Function Attrs: nounwind +declare %dx.types.NodeRecordHandle @"dx.hl.createnodeinputrecordhandle..%dx.types.NodeRecordHandle (i32, i32)"(i32, i32) #0 + +; Function Attrs: nounwind readnone +declare %"struct.DispatchNodeInputRecord" @"dx.hl.cast..%\22struct.DispatchNodeInputRecord\22 (i32, %dx.types.NodeRecordHandle)"(i32, %dx.types.NodeRecordHandle) #2 + +; Function Attrs: nounwind +declare %dx.types.NodeHandle @"dx.hl.createnodeoutputhandle..%dx.types.NodeHandle (i32, i32)"(i32, i32) #0 + +; Function Attrs: nounwind +declare %dx.types.NodeHandle @"dx.hl.annotatenodehandle..%dx.types.NodeHandle (i32, %dx.types.NodeHandle, %dx.types.NodeInfo)"(i32, %dx.types.NodeHandle, %dx.types.NodeInfo) #0 + +; Function Attrs: nounwind readnone +declare %"struct.NodeOutput" @"dx.hl.cast..%\22struct.NodeOutput\22 (i32, %dx.types.NodeHandle)"(i32, %dx.types.NodeHandle) #2 + +attributes #0 = { nounwind } +attributes #1 = { nounwind "dx.hlls"="ExtractRecordStructFromArray" } +attributes #2 = { nounwind readnone } + +!llvm.module.flags = !{!0} +!pauseresume = !{!1} +!llvm.ident = !{!2} +!dx.version = !{!3} +!dx.valver = !{!3} +!dx.shaderModel = !{!4} +!dx.typeAnnotations = !{!5, !11} +!dx.entryPoints = !{!16} +!dx.fnprops = !{!20} +!dx.options = !{!21, !22} + +!0 = !{i32 2, !"Debug Info Version", i32 3} +!1 = !{!"hlsl-hlemit", !"hlsl-hlensure"} +!2 = !{!"dxc(private) 1.7.0.4428 (sm68-core, 61a3275d7)"} +!3 = !{i32 1, i32 8} +!4 = !{!"lib", i32 6, i32 8} +!5 = !{i32 0, %"struct.DispatchNodeInputRecord" undef, !6, %"struct.GroupNodeOutputRecords" undef, !6, %"struct.NodeOutput" undef, !6} +!6 = !{i32 4, !7, !8} +!7 = !{i32 6, !"h", i32 3, i32 0, i32 7, i32 4} +!8 = !{i32 0, !9} +!9 = !{!10} +!10 = !{i32 0, %struct.loadStressRecord undef} +!11 = !{i32 1, void (%"struct.DispatchNodeInputRecord"*, %"struct.NodeOutput"*)* @loadStress_16, !12} +!12 = !{!13, !15, !15} +!13 = !{i32 1, !14, !14} +!14 = !{} +!15 = !{i32 14, !14, !14} +!16 = !{null, !"", null, !17, null} +!17 = !{null, null, !18, null} +!18 = !{!19} +!19 = !{i32 0, %ConstantBuffer* @"$Globals", !"$Globals", i32 0, i32 -1, i32 1, i32 0, null} +!20 = !{void (%"struct.DispatchNodeInputRecord"*, %"struct.NodeOutput"*)* @loadStress_16, i32 15, i32 16, i32 1, i32 1, i32 1, i1 false, !"loadStress_16", i32 0, !"", i32 0, i32 -1, i32 0, i32 0, i32 0, i32 3, i32 1, i32 1, i32 0} +!21 = !{i32 144} +!22 = !{i32 -1} +!23 = !DILocation(line: 26, column: 5, scope: !24) +!24 = !DISubprogram(name: "loadStress_16", scope: !25, file: !25, line: 23, type: !26, isLocal: false, isDefinition: true, scopeLine: 25, flags: DIFlagPrototyped, isOptimized: false, function: void (%"struct.DispatchNodeInputRecord"*, %"struct.NodeOutput"*)* @loadStress_16) +!25 = !DIFile(filename: "C:\5Cgit\5Cdxc\5Csm68-core\5Ctools\5Cclang\5Ctest\5CHLSLFileCheck\5Chlsl\5Cworkgraph\5Ccalled_function_arg_record_object.hlsl", directory: "") +!26 = !DISubroutineType(types: !14) +!27 = !DILocation(line: 26, column: 33, scope: !24) +!28 = !DILocation(line: 15, column: 5, scope: !29, inlinedAt: !30) +!29 = !DISubprogram(name: "loadStressWorker", scope: !25, file: !25, line: 10, type: !26, isLocal: false, isDefinition: true, scopeLine: 13, flags: DIFlagPrototyped, isOptimized: false) +!30 = distinct !DILocation(line: 26, column: 5, scope: !24) +!31 = !{!32} +!32 = distinct !{!32, !33, !"\01?loadStressWorker@@YAXU?$DispatchNodeInputRecord@UloadStressRecord@@@@U?$GroupNodeOutputRecords@UloadStressRecord@@@@@Z: %inputData"} +!33 = distinct !{!33, !"\01?loadStressWorker@@YAXU?$DispatchNodeInputRecord@UloadStressRecord@@@@U?$GroupNodeOutputRecords@UloadStressRecord@@@@@Z"} +!34 = !DILocation(line: 15, column: 17, scope: !29, inlinedAt: !30) +!35 = !DILocation(line: 15, column: 33, scope: !29, inlinedAt: !30) +!36 = !{!37, !37, i64 0} +!37 = !{!"int", !38, i64 0} +!38 = !{!"omnipotent char", !39, i64 0} +!39 = !{!"Simple C/C++ TBAA"} +!40 = !DILocation(line: 15, column: 10, scope: !29, inlinedAt: !30) +!41 = !DILocation(line: 17, column: 28, scope: !29, inlinedAt: !30) +!42 = !DILocation(line: 17, column: 32, scope: !29, inlinedAt: !30) +!43 = !DILocation(line: 17, column: 5, scope: !29, inlinedAt: !30) +!44 = !DILocation(line: 17, column: 18, scope: !29, inlinedAt: !30) +!45 = !DILocation(line: 17, column: 26, scope: !29, inlinedAt: !30) +!46 = !DILocation(line: 18, column: 1, scope: !29, inlinedAt: !30) +!47 = !DILocation(line: 27, column: 1, scope: !24) + +; The test was generated using the pass test generation script: +; python3 ExtractIRForPassTest.py -p scalarrepl-param-hlsl -o outtest_without_nodeiotypecheck.ll %HLSL_SRC_DIR%\tools\clang\test\HLSLFileCheck\hlsl\workgraph\called_function_arg_record_object.hlsl -- -T lib_6_8 + diff --git a/tools/clang/test/HLSLFileCheck/shader_targets/library/shader_cv_mismatch.hlsl b/tools/clang/test/HLSLFileCheck/shader_targets/library/shader_cv_mismatch.hlsl new file mode 100644 index 0000000000..d9aa232ede --- /dev/null +++ b/tools/clang/test/HLSLFileCheck/shader_targets/library/shader_cv_mismatch.hlsl @@ -0,0 +1,10 @@ +// RUN: %dxc -T lib_6_3 %s | FileCheck %s + +// CHECK: Invalid shader stage attribute combination + +[shader("compute")] +[shader("vertex")] +[ numthreads( 64, 2, 2 ) ] +void CVMain() { +} + diff --git a/tools/clang/test/HLSLFileCheck/shader_targets/library/shader_mp_mismatch.hlsl b/tools/clang/test/HLSLFileCheck/shader_targets/library/shader_mp_mismatch.hlsl new file mode 100644 index 0000000000..309ab33039 --- /dev/null +++ b/tools/clang/test/HLSLFileCheck/shader_targets/library/shader_mp_mismatch.hlsl @@ -0,0 +1,9 @@ +// RUN: %dxc -T lib_6_5 %s | FileCheck %s + +// CHECK: Invalid shader stage attribute combination + +[shader("mesh")] +[shader("pixel")] +void MPMain() { +} + diff --git a/tools/clang/test/HLSLFileCheck/shader_targets/library/shader_nvp_mismatch.hlsl b/tools/clang/test/HLSLFileCheck/shader_targets/library/shader_nvp_mismatch.hlsl new file mode 100644 index 0000000000..a7192436aa --- /dev/null +++ b/tools/clang/test/HLSLFileCheck/shader_targets/library/shader_nvp_mismatch.hlsl @@ -0,0 +1,10 @@ +// RUN: %dxc -T lib_6_8 %s | FileCheck %s + +// CHECK: Invalid shader stage attribute combination + +[shader("node")] +[shader("vertex")] +[shader("pixel")] +void NVPMain() { +} + diff --git a/tools/clang/test/HLSLFileCheck/shader_targets/nodes/NodeOutput.hlsl b/tools/clang/test/HLSLFileCheck/shader_targets/nodes/NodeOutput.hlsl new file mode 100644 index 0000000000..2eaeca314a --- /dev/null +++ b/tools/clang/test/HLSLFileCheck/shader_targets/nodes/NodeOutput.hlsl @@ -0,0 +1,62 @@ +// RUN: %dxc -Tlib_6_8 %s -ast-dump | FileCheck %s + +// Make sure NodeOutput attribute works at AST level. + +struct MY_INPUT_RECORD + { + float value; + uint data; + }; + + struct MY_RECORD + { + uint3 dispatchGrid : SV_DispatchGrid; + // shader arguments: + uint foo; + float bar; + }; + struct MY_MATERIAL_RECORD + { + uint textureIndex; + float3 normal; + }; + +// CHECK:FunctionDecl 0x{{.*}} myFancyNode 'void (DispatchNodeInputRecord, NodeOutput, NodeOutput, NodeOutput [63], EmptyNodeOutput)' +// CHECK-NEXT:ParmVarDecl 0x{{.*}} myInput 'DispatchNodeInputRecord':'DispatchNodeInputRecord' +// CHECK-NEXT: HLSLMaxRecordsAttr 0x{{.*}} 4 +// CHECK-NEXT: ParmVarDecl 0x{{.*}} myFascinatingNode 'NodeOutput':'NodeOutput' +// CHECK-NEXT: HLSLMaxRecordsAttr 0x{{.*}} 4 +// CHECK-NEXT: ParmVarDecl 0x{{.*}} myRecords 'NodeOutput':'NodeOutput' +// CHECK-NEXT: HLSLMaxRecordsAttr 0x{{.*}} 4 +// CHECK-NEXT: HLSLNodeIdAttr 0x{{.*}} "myNiftyNode" 3 +// CHECK-NEXT: ParmVarDecl 0x{{.*}} col:60 myMaterials 'NodeOutput [63]' +// CHECK-NEXT:HLSLNodeArraySizeAttr 0x{{.*}} 63 +// CHECK-NEXT:HLSLAllowSparseNodesAttr 0x{{.*}} +// CHECK-NEXT:HLSLMaxRecordsSharedWithAttr 0x{{.*}} myRecords +// CHECK-NEXT:ParmVarDecl 0x{{.*}} myProgressCounter 'EmptyNodeOutput' +// CHECK-NEXT: HLSLMaxRecordsAttr 0x{{.*}} 20 +// CHECK-NEXT: CompoundStmt 0x +// CHECK-NEXT: HLSLNumThreadsAttr 0x{{.*}} 4 5 6 +// CHECK-NEXT: HLSLNodeLaunchAttr 0x{{.*}} "Broadcasting" +// CHECK-NEXT: HLSLShaderAttr 0x{{.*}} "node" + [Shader("node")] + [NodeLaunch("Broadcasting")] + [NumThreads(4,5,6)] + void myFancyNode( + + [MaxRecords(4)] DispatchNodeInputRecord myInput, + + [MaxRecords(4)] NodeOutput myFascinatingNode, + + [NodeID("myNiftyNode",3)] [MaxRecords(4)] NodeOutput myRecords, + + // TODO: update to NodeOutputArray. + [MaxRecordsSharedWith(myRecords)] + [AllowSparseNodes] + [NodeArraySize(63)] NodeOutput myMaterials[63], + + // an output that has empty record size + [MaxRecords(20)] EmptyNodeOutput myProgressCounter + ) + { + } \ No newline at end of file diff --git a/tools/clang/test/HLSLFileCheck/shader_targets/nodes/max_output_records_duplicate1.hlsl b/tools/clang/test/HLSLFileCheck/shader_targets/nodes/max_output_records_duplicate1.hlsl new file mode 100644 index 0000000000..c8553a9222 --- /dev/null +++ b/tools/clang/test/HLSLFileCheck/shader_targets/nodes/max_output_records_duplicate1.hlsl @@ -0,0 +1,34 @@ +// RUN: %dxc -T lib_6_8 %s | FileCheck %s + +// Duplicate MaxRecords info with matching limits + + +// CHECK: 28:68: error: Only one of MaxRecords or MaxRecordsSharedWith may be specified to the same parameter. +// CHECK: 29:68: error: Only one of MaxRecords or MaxRecordsSharedWith may be specified to the same parameter. +// CHECK: 30:68: error: Only one of MaxRecords or MaxRecordsSharedWith may be specified to the same parameter. +// CHECK: 31:68: error: Only one of MaxRecords or MaxRecordsSharedWith may be specified to the same parameter. + +struct rec0 +{ + int i0; + float f0; +}; + +struct rec1 +{ + float f1; + int i1; +}; + +[Shader("node")] +[NodeLaunch("Thread")] +void DuplicateMax1( + RWThreadNodeInputRecord InputyMcInputFace, + [MaxRecords(5)] NodeOutput Output1, + [MaxRecordsSharedWith(Output1)] [MaxRecords(5)] NodeOutput Output2, + [MaxRecords(5)] [MaxRecordsSharedWith(Output1)] NodeOutput Output3, + [MaxRecordsSharedWith(Output6)] [MaxRecords(7)] NodeOutput Output4, + [MaxRecords(7)] [MaxRecordsSharedWith(Output6)] NodeOutput Output5, + [MaxRecords(7)] NodeOutput Output6) +{ +} diff --git a/tools/clang/test/HLSLFileCheck/shader_targets/nodes/max_output_records_duplicate2.hlsl b/tools/clang/test/HLSLFileCheck/shader_targets/nodes/max_output_records_duplicate2.hlsl new file mode 100644 index 0000000000..eb6ee30ff4 --- /dev/null +++ b/tools/clang/test/HLSLFileCheck/shader_targets/nodes/max_output_records_duplicate2.hlsl @@ -0,0 +1,33 @@ +// RUN: %dxc -T lib_6_8 %s | FileCheck %s + +// Duplicate MaxRecords info with mismatching limits + +// CHECK: 27:68: error: Only one of MaxRecords or MaxRecordsSharedWith may be specified to the same parameter. +// CHECK: 28:68: error: Only one of MaxRecords or MaxRecordsSharedWith may be specified to the same parameter. +// CHECK: 29:68: error: Only one of MaxRecords or MaxRecordsSharedWith may be specified to the same parameter. +// CHECK: 30:68: error: Only one of MaxRecords or MaxRecordsSharedWith may be specified to the same parameter. + +struct rec0 +{ + int i0; + float f0; +}; + +struct rec1 +{ + float f1; + int i1; +}; + +[Shader("node")] +[NodeLaunch("Thread")] +void DuplicateMax1( + RWThreadNodeInputRecord InputyMcInputFace, + [MaxRecords(1)] NodeOutput Output1, + [MaxRecordsSharedWith(Output1)] [MaxRecords(2)] NodeOutput Output2, + [MaxRecords(3)] [MaxRecordsSharedWith(Output1)] NodeOutput Output3, + [MaxRecordsSharedWith(Output6)] [MaxRecords(4)] NodeOutput Output4, + [MaxRecords(5)] [MaxRecordsSharedWith(Output6)] NodeOutput Output5, + [MaxRecords(6)] NodeOutput Output6) +{ +} diff --git a/tools/clang/test/HLSLFileCheck/shader_targets/nodes/max_output_records_invalidref.hlsl b/tools/clang/test/HLSLFileCheck/shader_targets/nodes/max_output_records_invalidref.hlsl new file mode 100644 index 0000000000..e71ccae02c --- /dev/null +++ b/tools/clang/test/HLSLFileCheck/shader_targets/nodes/max_output_records_invalidref.hlsl @@ -0,0 +1,33 @@ +// RUN: %dxc -T lib_6_8 %s | FileCheck %s + +// Test maxoutputrecordssharedwith with invalid references + +// CHECK: 26:52: error: MaxRecordsSharedWith must reference a valid ouput parameter name. +// CHECK: 28:62: error: MaxRecordsSharedWith must reference a valid ouput parameter name. +// CHECK: 30:52: error: MaxRecordsSharedWith must not reference the same parameter it is applied to. + +struct rec0 +{ + int i0; + float f0; +}; + +struct rec1 +{ + float f1; + int i1; +}; + +[Shader("node")] +[NodeLaunch("Thread")] +void InvalidRef( + RWThreadNodeInputRecord InputyMcInputFace, + // MaxRecordsSharedWith referencing non-existant parameter + [MaxRecordsSharedWith(Output7)] NodeOutput Output1, + // MaxRecordsSharedWith referencing an input parameter + [MaxRecordsSharedWith(InputyMcInputFace)] NodeOutput Output2, + // MaxRecordsSharedWith referencing its own parameter + [MaxRecordsSharedWith(Output3)] NodeOutput Output3, + [MaxRecords(5)] NodeOutput Output4) +{ +} diff --git a/tools/clang/test/HLSLFileCheck/shader_targets/nodes/max_output_records_shared_with.hlsl b/tools/clang/test/HLSLFileCheck/shader_targets/nodes/max_output_records_shared_with.hlsl new file mode 100644 index 0000000000..bc42ab55d4 --- /dev/null +++ b/tools/clang/test/HLSLFileCheck/shader_targets/nodes/max_output_records_shared_with.hlsl @@ -0,0 +1,36 @@ +// RUN: %dxc -T lib_6_8 %s | FileCheck %s + +// Test referencing params with MaxOutputRecordsSharedWith before and after + +// CHECK: define void {{.*}}BackwardRef +// CHECK: define void {{.*}}ForwardRef + +struct rec0 +{ + int i0; + float f0; +}; + +struct rec1 +{ + float f1; + int i1; +}; + +[Shader("node")] +[NodeLaunch("Thread")] +void BackwardRef( + RWThreadNodeInputRecord InputyMcInputFace, + [MaxRecords(5)] NodeOutput Output1, + [MaxRecordsSharedWith(Output1)] NodeOutput Output2) +{ +} + +[Shader("node")] +[NodeLaunch("Thread")] +void ForwardRef( + RWThreadNodeInputRecord InputyMcInputFace, + [MaxRecordsSharedWith(Output2)] NodeOutput Output1, + [MaxRecords(5)] NodeOutput Output2) +{ +} diff --git a/tools/clang/test/HLSLFileCheck/validation/completed-handle-all.ll b/tools/clang/test/HLSLFileCheck/validation/completed-handle-all.ll new file mode 100644 index 0000000000..6769f9cb67 --- /dev/null +++ b/tools/clang/test/HLSLFileCheck/validation/completed-handle-all.ll @@ -0,0 +1,121 @@ +; RUN: %dxv %s | FileCheck %s + +; Not all invalid uses are checked because the usage lists aren't guaranteed to be ordered. +; However, this also tests the validator won't crash with a more complex case. + +target datalayout = "e-m:e-p:32:32-i1:32-i8:32-i16:32-i32:32-i64:64-f16:32-f32:32-f64:64-n8:16:32:64" +target triple = "dxil-ms-dx" + +%dx.types.NodeHandle = type { i8* } +%dx.types.NodeInfo = type { i32, i32 } +%dx.types.NodeRecordHandle = type { i8* } +%dx.types.NodeRecordInfo = type { i32, i32 } +%struct.loadStressRecord.0 = type { [3 x i32], [3 x i32] } + +@"\01?loadStressTemp@@3PAIA" = external addrspace(3) global [128 x i32], align 4 + +define void @loadStress_16() { + %1 = call i32 @dx.op.flattenedThreadIdInGroup.i32(i32 96) + %2 = call %dx.types.NodeHandle @dx.op.createNodeOutputHandle(i32 247, i32 0) + %3 = call %dx.types.NodeHandle @dx.op.annotateNodeHandle(i32 249, %dx.types.NodeHandle %2, %dx.types.NodeInfo { i32 6, i32 24 }) + %4 = call %dx.types.NodeRecordHandle @dx.op.allocateNodeOutputRecords(i32 238, %dx.types.NodeHandle %3, i32 1, i1 true) + %5 = call %dx.types.NodeRecordHandle @dx.op.annotateNodeRecordHandle(i32 251, %dx.types.NodeRecordHandle %4, %dx.types.NodeRecordInfo { i32 38, i32 24 }) + %6 = urem i32 %1, 3 + %7 = add nuw nsw i32 %6, 1 + %8 = call %struct.loadStressRecord.0 addrspace(6)* @dx.op.getNodeRecordPtr.struct.loadStressRecord.0(i32 239, %dx.types.NodeRecordHandle %5, i32 0) + %9 = getelementptr %struct.loadStressRecord.0, %struct.loadStressRecord.0 addrspace(6)* %8, i32 0, i32 0, i32 0 + store i32 %7, i32 addrspace(6)* %9, align 4 + %10 = getelementptr %struct.loadStressRecord.0, %struct.loadStressRecord.0 addrspace(6)* %8, i32 0, i32 0, i32 1 + store i32 1, i32 addrspace(6)* %10, align 4 + %11 = getelementptr %struct.loadStressRecord.0, %struct.loadStressRecord.0 addrspace(6)* %8, i32 0, i32 0, i32 2 + store i32 1, i32 addrspace(6)* %11, align 4 + %12 = load i32, i32 addrspace(3)* getelementptr inbounds ([128 x i32], [128 x i32] addrspace(3)* @"\01?loadStressTemp@@3PAIA", i32 0, i32 0), align 4, !tbaa !21 + %13 = getelementptr %struct.loadStressRecord.0, %struct.loadStressRecord.0 addrspace(6)* %8, i32 0, i32 1, i32 0 + store i32 %12, i32 addrspace(6)* %13, align 4 + %14 = load i32, i32 addrspace(3)* getelementptr inbounds ([128 x i32], [128 x i32] addrspace(3)* @"\01?loadStressTemp@@3PAIA", i32 0, i32 1), align 4, !tbaa !21 + %15 = getelementptr %struct.loadStressRecord.0, %struct.loadStressRecord.0 addrspace(6)* %8, i32 0, i32 1, i32 1 + store i32 %14, i32 addrspace(6)* %15, align 4 + %16 = load i32, i32 addrspace(3)* getelementptr inbounds ([128 x i32], [128 x i32] addrspace(3)* @"\01?loadStressTemp@@3PAIA", i32 0, i32 2), align 4, !tbaa !21 + %17 = getelementptr %struct.loadStressRecord.0, %struct.loadStressRecord.0 addrspace(6)* %8, i32 0, i32 1, i32 2 + store i32 %16, i32 addrspace(6)* %17, align 4 + call void @dx.op.outputComplete(i32 241, %dx.types.NodeRecordHandle %5) + + ; test duplicate output complete call + ; CHECK: error: Invalid use of completed record handle. + ; CHECK: note: at 'call void @dx.op.outputComplete(i32 241, %dx.types.NodeRecordHandle %5) + call void @dx.op.outputComplete(i32 241, %dx.types.NodeRecordHandle %5) + + + %bad.idx.handle = call %struct.loadStressRecord.0 addrspace(6)* @dx.op.getNodeRecordPtr.struct.loadStressRecord.0(i32 239, %dx.types.NodeRecordHandle %5, i32 0) + + %18 = icmp eq i32 %6, 0 + br i1 %18, label %20, label %19 + +;