From bf53f3c528e5b28ac459e547562feaf4f2cfce04 Mon Sep 17 00:00:00 2001 From: "Gao, Xiang" Date: Wed, 30 Oct 2024 18:41:28 -0700 Subject: [PATCH] save --- __tmp_kernel_none_f0_c0_r0_g0.cu | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/__tmp_kernel_none_f0_c0_r0_g0.cu b/__tmp_kernel_none_f0_c0_r0_g0.cu index 9757c7a6540..cb31634db10 100644 --- a/__tmp_kernel_none_f0_c0_r0_g0.cu +++ b/__tmp_kernel_none_f0_c0_r0_g0.cu @@ -10918,9 +10918,9 @@ nvfuser_none_f0_c0_r0_g0(Tensor<__half, 3, 3> T0, Tensor<__half, 3, 3> T1, const nvfuser_index_t i19; i19 = ((((-T0.logical_size[1LL]) + (16 * i13)) + i14) + (64 * ((nvfuser_index_t)threadIdx.y))) + i8; float T2[128]; - // ((*reinterpret_cast*>(&T2[0]))).set(0); - // asm volatile("wgmma.fence.sync.aligned;\n"); - // asm volatile("fence.proxy.async;\n"); + ((*reinterpret_cast*>(&T2[0]))).set(0); + asm volatile("wgmma.fence.sync.aligned;\n"); + asm volatile("fence.proxy.async;\n"); uint64_t* T7 = reinterpret_cast(array + smem_offset + 0); if ((b17 && Hopper::electSync(4294967295U))) { #pragma unroll