Skip to content

Commit

Permalink
Minor cleanup in executor.cpp (#2750)
Browse files Browse the repository at this point in the history
- Add NVF_THROW
- Add more information in bindInputs error
- Add IValue debug string function
- Cleanup some stale code

Improves error handling when invalid static sizes are provided to nvFuser. Specifically, when finding incompatible sizes within `ExpressionEvaluator::propagateBoundValuesThroughExactMaps`.
  • Loading branch information
csarofeen authored Sep 8, 2024
1 parent 7c9876a commit e9d27a2
Show file tree
Hide file tree
Showing 10 changed files with 229 additions and 151 deletions.
17 changes: 10 additions & 7 deletions csrc/exceptions.h
Original file line number Diff line number Diff line change
Expand Up @@ -253,17 +253,20 @@ inline const char* nvfCheckMsgImpl(const char* /*msg*/, const char* args) {
#define STRINGIZE_IMPL(x) #x
#define STRINGIZE(x) STRINGIZE_IMPL(x)

#define NVF_ERROR(cond, ...) \
if ((!(cond))) { \
nvfuser::nvfErrorFail( \
#define NVF_THROW(...) \
nvfuser::nvfErrorFail( \
__FUNCTION__, \
__FILE__, \
static_cast<uint32_t>(__LINE__), \
#cond " INTERNAL ASSERT FAILED at " \
STRINGIZE(__FILE__) ":" STRINGIZE(__LINE__) \
" INTERNAL ASSERT FAILED at " \
STRINGIZE(__FILE__) ":" STRINGIZE(__LINE__) \
", please report a bug with repro script to NVFuser at " \
"https://github.com/NVIDIA/Fuser/issues. ", \
nvfuser::to_str(__VA_ARGS__)); \
"https://github.com/NVIDIA/Fuser/issues. ", \
nvfuser::to_str(__VA_ARGS__));

#define NVF_ERROR(cond, ...) \
if ((!(cond))) { \
NVF_THROW(__VA_ARGS__) \
}

#define NVF_CHECK_MSG(cond, type, ...) \
Expand Down
95 changes: 46 additions & 49 deletions csrc/executor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1348,35 +1348,38 @@ std::vector<FusionExecutor::GlobalBufferInfo> FusionExecutor::

namespace {

FusionExecutor::GlobalBufferInfo getBufferInfo(
ExpressionEvaluator& expr_eval,
DataType index_dtype,
TensorView* tv) {
FusionExecutor::GlobalBufferInfo info;
info.tv = tv;
std::tie(info.sizes, info.strides) = inferShapeOfOutput(info.tv, expr_eval);
auto dtype =
(info.tv->dtype() == DataType::Index ? index_dtype : info.tv->dtype());
info.type = data_type_to_aten(dtype);
return info;
}

//! Return information necessary for allocating output tensors. Input
//! and output tensors are allowed to alias each other, which is
//! specified by the list of int pairs of input and output indices
std::vector<FusionExecutor::GlobalBufferInfo> getOutputBufferInfo(
const KernelArgumentHolder& args,
std::vector<FusionExecutor::GlobalBufferInfo> getBufferInfos(
ExpressionEvaluator& expr_eval,
DataType index_dtype,
const Fusion* fusion) {
std::vector<FusionExecutor::GlobalBufferInfo> outputs;
outputs.reserve(fusion->outputs().size());
NVF_ERROR(
args.size() == fusion->inputs().size(),
"fusion arguments length does not match runtime arguments.");
for (const auto out_i : c10::irange(fusion->outputs().size())) {
auto out_val = fusion->outputs()[out_i];
const std::vector<Val*>& fusion_outputs) {
FUSER_PERF_SCOPE("FusionExecutor::getOutbufferInfo");
std::vector<FusionExecutor::GlobalBufferInfo> output_buffer_infos;
output_buffer_infos.reserve(fusion_outputs.size());
for (const auto out : fusion_outputs) {
NVF_ERROR(
out_val->isA<TensorView>(),
out->isA<TensorView>(),
"Cannot allocate outputs that are not tensors.");

FusionExecutor::GlobalBufferInfo info;
info.tv = out_val->as<TensorView>();
std::tie(info.sizes, info.strides) = inferShapeOfOutput(info.tv, expr_eval);
auto dtype =
(info.tv->dtype() == DataType::Index ? index_dtype : info.tv->dtype());
info.type = data_type_to_aten(dtype);

outputs.emplace_back(info);
output_buffer_infos.emplace_back(
getBufferInfo(expr_eval, index_dtype, out->as<TensorView>()));
}
return outputs;
return output_buffer_infos;
}

} // namespace
Expand All @@ -1389,7 +1392,7 @@ std::vector<at::Tensor> allocOutputSpace(
auto expr_eval = executor_utils::bindInputs(fusion_inputs, fusion);

auto output_info =
getOutputBufferInfo(fusion_inputs, expr_eval, PrimDataType::Int, fusion);
getBufferInfos(expr_eval, PrimDataType::Int, fusion->outputs());

return allocateOutputs(fusion, output_info, device, expr_eval);
}
Expand Down Expand Up @@ -1421,8 +1424,8 @@ KernelArgumentHolder FusionExecutor::inferOutputSizes(

auto arg_index_type = args.getSmallestIndexTypeOfArguments();

KernelArgumentHolder ret;
ret.setDeviceIndex(args.getDeviceIndex());
KernelArgumentHolder output_tensor_proxies;
output_tensor_proxies.setDeviceIndex(args.getDeviceIndex());

for (Val* output : fusion->outputs()) {
NVF_ERROR(
Expand All @@ -1433,9 +1436,9 @@ KernelArgumentHolder FusionExecutor::inferOutputSizes(
const auto dtype = (output_tv->dtype() == DataType::Index)
? data_type_to_aten(arg_index_type)
: data_type_to_aten(output_tv->dtype());
ret.pushTensorProxy(sizes, strides, dtype);
output_tensor_proxies.pushTensorProxy(sizes, strides, dtype);
}
return ret;
return output_tensor_proxies;
}

namespace {
Expand Down Expand Up @@ -1547,15 +1550,6 @@ void dumpKernelArgs(
}
}

FusionExecutor::GlobalBufferInfo getGlobalBufferAllocationInfo(
const at::Tensor& at_tensor) {
FusionExecutor::GlobalBufferInfo info{
.sizes = at_tensor.sizes().vec(),
.strides = at_tensor.strides().vec(),
.type = at_tensor.scalar_type()};
return info;
}

} // namespace

void FusionExecutor::initializeExecutorEntry(
Expand Down Expand Up @@ -1587,13 +1581,16 @@ void FusionExecutor::initializeExecutorEntry(

if (outputs.empty()) {
output_info =
getOutputBufferInfo(args, expr_eval, index_type, lowered_->kernel());
getBufferInfos(expr_eval, index_type, lowered_->kernel()->outputs());
} else {
// Need to save the information necessary for allocations as
// future uses of this ExecutorEntry may not be provided with
// allocated outputs
for (const auto& output : outputs) {
output_info.emplace_back(getGlobalBufferAllocationInfo(output));
output_info.emplace_back(FusionExecutor::GlobalBufferInfo{
.sizes = output.sizes().vec(),
.strides = output.strides().vec(),
.type = output.scalar_type()});
}
}

Expand Down Expand Up @@ -1849,10 +1846,13 @@ void FusionExecutor::resetCompiledKernelProperties() {
}

std::vector<at::Tensor> FusionExecutor::evaluateFusionOutputs(
KernelArgumentHolder& args,
std::vector<at::Tensor> outputs,
ExpressionEvaluator& expr_eval) {
// TODO: Add relevant profiling code.
FUSER_PERF_SCOPE("FusionExecutor::runFusion::evaluateFusionOutputs");
NVF_ERROR(
outputs.empty(),
"Fusion executor is using expression evaluator,",
" and expects that the outputs are not populated, which they were.");
if (outputs.empty()) {
for (const auto& out_val : fusion()->outputs()) {
auto out_tensor =
Expand All @@ -1861,11 +1861,12 @@ std::vector<at::Tensor> FusionExecutor::evaluateFusionOutputs(
outputs.emplace_back(out_tensor);
}
}
args.push(outputs);
return outputs;
}

namespace {
// Host IR specific function, returns the at:Tensor (ordered list) associated
// with the provdied Fusion output tv
at::Tensor findBufferForFusionOutput(
const std::vector<at::Tensor>& out_tensors,
const Val* fusion_out,
Expand Down Expand Up @@ -1903,14 +1904,10 @@ std::vector<at::Tensor> FusionExecutor::runFusion(
" provided number of outputs does not match fusion output");

// Bind fusion inputs
ExpressionEvaluator expr_eval;
const auto& inputs = fusion()->inputs();
for (const auto i : c10::irange(inputs.size())) {
expr_eval.bind(inputs[i], *args[i]);
}
auto expr_eval = executor_utils::bindInputs(args, fusion());

if (isExpressionEvaluated(fusion())) {
outputs = evaluateFusionOutputs(args, outputs, expr_eval);
outputs = evaluateFusionOutputs(outputs, expr_eval);
if (isProfilerEnabled()) {
auto& sprof = FusionProfiler::segment(group_id_);
sprof.stopKernel();
Expand All @@ -1921,8 +1918,8 @@ std::vector<at::Tensor> FusionExecutor::runFusion(

if (host_ir_container_ != nullptr) {
if (outputs.empty()) {
std::vector<GlobalBufferInfo> output_info = getOutputBufferInfo(
args, expr_eval, PrimDataType::Int, host_ir_container_.get());
std::vector<GlobalBufferInfo> output_info = getBufferInfos(
expr_eval, PrimDataType::Int, host_ir_container_->outputs());
outputs = allocateOutputs(
host_ir_container_.get(), output_info, options_.device, expr_eval);
}
Expand Down Expand Up @@ -2009,7 +2006,7 @@ std::vector<at::Tensor> FusionExecutor::runFusion(
// Skip trivially forwarded outputs because they are just placeholders
continue;
}
expr_eval.bind(output, *args[inputs.size() + i]);
expr_eval.bind(output, *args[kernel()->inputs().size() + i]);
}

std::vector<at::Tensor> intermediates;
Expand Down Expand Up @@ -2062,7 +2059,7 @@ std::vector<at::Tensor> FusionExecutor::runFusion(
intermediates.push_back(intermediate_buffer);
expr_eval.bind(
kernel()->summary().global_allocations.at(i)->buffer(),
*args[inputs.size() + outputs.size() + i]);
*args[kernel()->inputs().size() + outputs.size() + i]);
if (buf_info.is_profile_buffer) {
profile_buffer = intermediate_buffer;
}
Expand Down
8 changes: 6 additions & 2 deletions csrc/executor.h
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,7 @@ class FusionExecutor : public NonCopyable {
//! Notes: 1. This API should ignore aliased outputs instead of
//! pushing scalar int 0 as a place-holder.
//! 2. This API does not allocate output in memory, but only returns the
//! inferred output sizes.
//! inferred output sizes. Used in kernel_cache.cpp.
KernelArgumentHolder inferOutputSizes(
Fusion* fusion,
const KernelArgumentHolder& args,
Expand Down Expand Up @@ -118,10 +118,14 @@ class FusionExecutor : public NonCopyable {

//! Computes fusion outputs through expression evaluator.
std::vector<at::Tensor> evaluateFusionOutputs(
KernelArgumentHolder& args,
std::vector<at::Tensor> outputs,
ExpressionEvaluator& expr_eval);

// TODO: args shouldn't come in a reference here because we will append the
// outputs to be able to send it to the kernel. For now none of the users are
// reconsuming the args, so it is okay. It isn't done now because changing it
// from a reference makes a call as runFusion({}) ambiguous, and that is used
// in some places in the codebase.
NVF_API std::vector<at::Tensor> runFusion(
KernelArgumentHolder& args,
const LaunchParams& launch_constraints = LaunchParams(),
Expand Down
15 changes: 14 additions & 1 deletion csrc/executor_utils.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -716,7 +716,20 @@ ExpressionEvaluator bindInputs(
// NOTE: we bind all inputs here, including at::Tensors. This means that
// expr_eval will create a PolymorphicValue containing *args[i], which means
// that at::Tensor's lifetime will be at least as long as that of expr_eval.
expr_eval.bind(inputs[i], *args[i], true);
try {
expr_eval.bind(inputs[i], *args[i], true);
} catch (const nvfError& e) {
std::stringstream ss;
ss << "When trying to run the provided host program,"
<< " there was an error with the provided input " << i
<< ". Provided input was:\n ";
ss << PolymorphicValue_functions::toString(*args[i]);
ss << "\n Fusion input is:\n ";
ss << inputs[i]->toString();
ss << "\n Expr eval provided the error:\n\"\"\"";
ss << e.msg() << "\"\"\"\n";
NVF_THROW(ss.str());
}
}

return expr_eval;
Expand Down
Loading

0 comments on commit e9d27a2

Please sign in to comment.