Skip to content

Commit

Permalink
missed one arg
Browse files Browse the repository at this point in the history
  • Loading branch information
jjsjann123 committed Nov 4, 2024
1 parent 65aa77d commit 1f75d7a
Showing 1 changed file with 9 additions and 3 deletions.
12 changes: 9 additions & 3 deletions csrc/codegen.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -402,7 +402,11 @@ class CudaKernelGenerator : private kir::ConstIrVisitor {
}
}

void generateVectorizedLdSt(Val* in, Val* out, CacheOp cache_op) {
void generateVectorizedLdSt(
Val* in,
Val* out,
CacheOp cache_op,
int64_t vector_word_size) {
auto out_tv = out->as<kir::TensorIndex>()->view();
auto in_tv = in->as<kir::TensorIndex>()->view();

Expand Down Expand Up @@ -1067,7 +1071,8 @@ class CudaKernelGenerator : private kir::ConstIrVisitor {
indent() << kTab << "? ";

// TODO: should we have the option to specify cache level?
generateVectorizedLdSt(top->in2(), top->out(), CacheOp::AllLevels);
generateVectorizedLdSt(
top->in2(), top->out(), CacheOp::AllLevels, vector_word_size);

if (out_tv->getMemoryType() == MemoryType::Local &&
!out_tv->isCircularBuffered()) {
Expand Down Expand Up @@ -1426,7 +1431,8 @@ class CudaKernelGenerator : private kir::ConstIrVisitor {
"Invalid input to unary op with tensor output, found: ",
ldst->in()->toString());

generateVectorizedLdSt(ldst->in(), ldst->out(), ldst->cacheOp());
generateVectorizedLdSt(
ldst->in(), ldst->out(), ldst->cacheOp(), vector_word_size);
}
return;
}
Expand Down

0 comments on commit 1f75d7a

Please sign in to comment.