Skip to content

Commit

Permalink
save
Browse files Browse the repository at this point in the history
  • Loading branch information
zasdfgbnm committed Nov 5, 2024
1 parent 04cd944 commit d22e206
Showing 1 changed file with 6 additions and 2 deletions.
8 changes: 6 additions & 2 deletions __tmp_kernel_none_f0_c0_r0_g0.cu
Original file line number Diff line number Diff line change
Expand Up @@ -10927,6 +10927,10 @@ nvfuser_none_f0_c0_r0_g0(Tensor<__half, 3, 3> T0, Tensor<__half, 3, 3> T1, const
for(nvfuser_index_t i20 = 0; i20 < 4; ++i20) {
mbarrier::init(toSmem((&T7[i20])), 1U);
}
#pragma unroll
for(nvfuser_index_t i20 = 0; i20 < 4; ++i20) {
mbarrier::init(toSmem((&T7[i20 + 4])), 128U);
}
}
__syncthreads();
#pragma unroll
Expand Down Expand Up @@ -11125,9 +11129,9 @@ nvfuser_none_f0_c0_r0_g0(Tensor<__half, 3, 3> T0, Tensor<__half, 3, 3> T1, const
"n"(1),
"n"(1)
);
__syncthreads();
asm volatile("wgmma.commit_group.sync.aligned;\n");
asm volatile("wgmma.wait_group.sync.aligned %0;\n"::"n"(0):"memory");
// mbarrier::arrive(toSmem((&T7[i32 + 4])));
}
#pragma unroll 3
for(nvfuser_index_t i35 = (i2 - 3); i35 < i2; ++i35) {
Expand Down Expand Up @@ -11283,7 +11287,7 @@ nvfuser_none_f0_c0_r0_g0(Tensor<__half, 3, 3> T0, Tensor<__half, 3, 3> T1, const
}
if ((b17 && Hopper::electSync(4294967295U))) {
#pragma unroll
for(nvfuser_index_t i39 = 0; i39 < 4; ++i39) {
for(nvfuser_index_t i39 = 0; i39 < 8; ++i39) {
mbarrier::inval(toSmem((&T7[i39])));
}
}
Expand Down

0 comments on commit d22e206

Please sign in to comment.