From 1f75d7ae5a4eade62bd2d064ea21282311c7d918 Mon Sep 17 00:00:00 2001 From: jjsjann123 Date: Mon, 4 Nov 2024 08:49:41 -0800 Subject: [PATCH] missed one arg --- csrc/codegen.cpp | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/csrc/codegen.cpp b/csrc/codegen.cpp index 505fe34e3b1..57241598dbd 100644 --- a/csrc/codegen.cpp +++ b/csrc/codegen.cpp @@ -402,7 +402,11 @@ class CudaKernelGenerator : private kir::ConstIrVisitor { } } - void generateVectorizedLdSt(Val* in, Val* out, CacheOp cache_op) { + void generateVectorizedLdSt( + Val* in, + Val* out, + CacheOp cache_op, + int64_t vector_word_size) { auto out_tv = out->as()->view(); auto in_tv = in->as()->view(); @@ -1067,7 +1071,8 @@ class CudaKernelGenerator : private kir::ConstIrVisitor { indent() << kTab << "? "; // TODO: should we have the option to specify cache level? - generateVectorizedLdSt(top->in2(), top->out(), CacheOp::AllLevels); + generateVectorizedLdSt( + top->in2(), top->out(), CacheOp::AllLevels, vector_word_size); if (out_tv->getMemoryType() == MemoryType::Local && !out_tv->isCircularBuffered()) { @@ -1426,7 +1431,8 @@ class CudaKernelGenerator : private kir::ConstIrVisitor { "Invalid input to unary op with tensor output, found: ", ldst->in()->toString()); - generateVectorizedLdSt(ldst->in(), ldst->out(), ldst->cacheOp()); + generateVectorizedLdSt( + ldst->in(), ldst->out(), ldst->cacheOp(), vector_word_size); } return; }