Set denormal-fp-math earlier in lower phase

Set it in SpirvLowerMath to let later optimization passes can detect the attributes and decide what to do.
amdrexu · Sep 22, 2023 · 646507c · 646507c
1 parent c27fb11
commit 646507c
Show file tree

Hide file tree

Showing 3 changed files with 32 additions and 11 deletions.
diff --git a/llpc/lower/llpcSpirvLowerMath.cpp b/llpc/lower/llpcSpirvLowerMath.cpp
@@ -69,6 +69,20 @@ SpirvLowerMath::SpirvLowerMath()
       m_fp16RoundToZero(false) {
 }
 
+// =====================================================================================================================
+// Set denormal-fp-math attribute to the specified function according to provided FP denormal mode.
+//
+// @param func : Function to set the attribute
+// @param fp32 : Whether the attribute is for FP32
+// @param denormMode : FP denormal mode
+static void setFpMathAttribute(Function &func, bool fp32, FpDenormMode denormMode) {
+  const char *attrName = fp32 ? "denormal-fp-math-f32" : "denormal-fp-math";
+  if (denormMode == FpDenormMode::FlushNone || denormMode == FpDenormMode::FlushIn)
+    func.addFnAttr(attrName, "ieee");
+  else if (fp32 || denormMode == FpDenormMode::FlushOut || denormMode == FpDenormMode::FlushInOut)
+    func.addFnAttr(attrName, "preserve-sign");
+}
+
 // =====================================================================================================================
 // Initialise transform class.
 //
@@ -80,14 +94,20 @@ void SpirvLowerMath::init(Module &module) {
   if (m_shaderStage == ShaderStageInvalid)
     return;
 
-  auto commonShaderMode = Pipeline::getCommonShaderMode(module, getLgcShaderStage(m_shaderStage));
-  m_fp16DenormFlush = commonShaderMode.fp16DenormMode == FpDenormMode::FlushOut ||
-                      commonShaderMode.fp16DenormMode == FpDenormMode::FlushInOut;
-  m_fp32DenormFlush = commonShaderMode.fp32DenormMode == FpDenormMode::FlushOut ||
-                      commonShaderMode.fp32DenormMode == FpDenormMode::FlushInOut;
-  m_fp64DenormFlush = commonShaderMode.fp64DenormMode == FpDenormMode::FlushOut ||
-                      commonShaderMode.fp64DenormMode == FpDenormMode::FlushInOut;
-  m_fp16RoundToZero = commonShaderMode.fp16RoundMode == FpRoundMode::Zero;
+  // NOTE: We try to set denormal-fp-math here because later optimization passes will detect the attributes and decide
+  // what to do. Such attributes will be set once again in LGC.
+  auto shaderMode = Pipeline::getCommonShaderMode(module, getLgcShaderStage(m_shaderStage));
+  setFpMathAttribute(*m_entryPoint, false, shaderMode.fp16DenormMode);
+  setFpMathAttribute(*m_entryPoint, true, shaderMode.fp32DenormMode);
+  setFpMathAttribute(*m_entryPoint, false, shaderMode.fp64DenormMode);
+
+  m_fp16DenormFlush =
+      shaderMode.fp16DenormMode == FpDenormMode::FlushOut || shaderMode.fp16DenormMode == FpDenormMode::FlushInOut;
+  m_fp32DenormFlush =
+      shaderMode.fp32DenormMode == FpDenormMode::FlushOut || shaderMode.fp32DenormMode == FpDenormMode::FlushInOut;
+  m_fp64DenormFlush =
+      shaderMode.fp64DenormMode == FpDenormMode::FlushOut || shaderMode.fp64DenormMode == FpDenormMode::FlushInOut;
+  m_fp16RoundToZero = shaderMode.fp16RoundMode == FpRoundMode::Zero;
 }
 
 // =====================================================================================================================

diff --git a/llpc/test/shaderdb/core/TestEnableImplicitInvariantExports.vert b/llpc/test/shaderdb/core/TestEnableImplicitInvariantExports.vert
@@ -24,7 +24,7 @@ void main()
 ; WITHOUT_IIE: %[[val:.*]] = extractvalue [4 x <4 x float>] %{{.*}}, 3
 ; WITHOUT_IIE: %[[mul:.*]] = fmul <4 x float> %[[val]], %{{.*}}
 ; WITHOUT_IIE: %[[arg:.*]] = fadd <4 x float> %{{.*}}, %[[mul]]
-; WITHOUT_IIE-NEXT: call void @lgc.output.export.builtin.Position.i32.v4f32(i32 0, <4 x float> %[[arg]]) #0
+; WITHOUT_IIE-NEXT: call void @lgc.output.export.builtin.Position.i32.v4f32(i32 0, <4 x float> %[[arg]])
 ; WITHOUT_IIE: AMDLLPC SUCCESS
 */
 // END_WITHOUT_IIE
@@ -36,7 +36,7 @@ void main()
 ; WITH_IIE: %[[val:.*]] = extractvalue [4 x <4 x float>] %{{.*}}, 3
 ; WITH_IIE: %[[mul:.*]] = fmul reassoc nnan nsz arcp contract afn <4 x float> %[[val]], %{{.*}}
 ; WITH_IIE: %[[arg:.*]] = fadd reassoc nnan nsz arcp contract afn <4 x float> %{{.*}}, %[[mul]]
-; WITH_IIE-NEXT: call void @lgc.output.export.builtin.Position.i32.v4f32(i32 0, <4 x float> %[[arg]]) #0
+; WITH_IIE-NEXT: call void @lgc.output.export.builtin.Position.i32.v4f32(i32 0, <4 x float> %[[arg]])
 ; WITH_IIE: AMDLLPC SUCCESS
 */
 // END_WITH_IIE
diff --git a/llpc/test/shaderdb/core/TestXfbStateMetadata.vert b/llpc/test/shaderdb/core/TestXfbStateMetadata.vert
@@ -28,8 +28,9 @@ void main()
 // CHECK-NEXT:    ret void
 //
 //.
-// CHECK: attributes #[[ATTR0]] = { nounwind }
+// CHECK: attributes #[[ATTR0]] = { nounwind "denormal-fp-math-f32"="preserve-sign" }
 // CHECK: attributes #[[ATTR1:[0-9]+]] = { nounwind willreturn memory(read) }
+// CHECK: attributes #[[ATTR2:[0-9]+]] = { nounwind }
 //.
 // CHECK: [[META1]] = !{i32 1}
 // CHECK: [[META6]] = !{i32 0}