diff --git a/csrc/exceptions.h b/csrc/exceptions.h
index 8084a1191fb..b472b281b83 100644
--- a/csrc/exceptions.h
+++ b/csrc/exceptions.h
@@ -253,17 +253,20 @@ inline const char* nvfCheckMsgImpl(const char* /*msg*/, const char* args) {
 #define STRINGIZE_IMPL(x) #x
 #define STRINGIZE(x) STRINGIZE_IMPL(x)
 
-#define NVF_ERROR(cond, ...)                                  \
-  if ((!(cond))) {                                            \
-    nvfuser::nvfErrorFail(                                  \
+#define NVF_THROW(...) \
+  nvfuser::nvfErrorFail(                                    \
         __FUNCTION__,                                       \
         __FILE__,                                           \
         static_cast<uint32_t>(__LINE__),                    \
-        #cond " INTERNAL ASSERT FAILED at " \
-        STRINGIZE(__FILE__) ":" STRINGIZE(__LINE__) \
+        " INTERNAL ASSERT FAILED at "                       \
+        STRINGIZE(__FILE__) ":" STRINGIZE(__LINE__)         \
         ", please report a bug with repro script to NVFuser at " \
-        "https://github.com/NVIDIA/Fuser/issues. ", \
-        nvfuser::to_str(__VA_ARGS__)); \
+        "https://github.com/NVIDIA/Fuser/issues. ",         \
+        nvfuser::to_str(__VA_ARGS__));
+
+#define NVF_ERROR(cond, ...) \
+  if ((!(cond))) {           \
+    NVF_THROW(__VA_ARGS__)   \
   }
 
 #define NVF_CHECK_MSG(cond, type, ...) \
diff --git a/csrc/executor.cpp b/csrc/executor.cpp
index f9f8bf7ba5c..ca1d5403891 100644
--- a/csrc/executor.cpp
+++ b/csrc/executor.cpp
@@ -1348,35 +1348,38 @@ std::vector<FusionExecutor::GlobalBufferInfo> FusionExecutor::
 
 namespace {
 
+FusionExecutor::GlobalBufferInfo getBufferInfo(
+    ExpressionEvaluator& expr_eval,
+    DataType index_dtype,
+    TensorView* tv) {
+  FusionExecutor::GlobalBufferInfo info;
+  info.tv = tv;
+  std::tie(info.sizes, info.strides) = inferShapeOfOutput(info.tv, expr_eval);
+  auto dtype =
+      (info.tv->dtype() == DataType::Index ? index_dtype : info.tv->dtype());
+  info.type = data_type_to_aten(dtype);
+  return info;
+}
+
 //! Return information necessary for allocating output tensors. Input
 //! and output tensors are allowed to alias each other, which is
 //! specified by the list of int pairs of input and output indices
-std::vector<FusionExecutor::GlobalBufferInfo> getOutputBufferInfo(
-    const KernelArgumentHolder& args,
+std::vector<FusionExecutor::GlobalBufferInfo> getBufferInfos(
     ExpressionEvaluator& expr_eval,
     DataType index_dtype,
-    const Fusion* fusion) {
-  std::vector<FusionExecutor::GlobalBufferInfo> outputs;
-  outputs.reserve(fusion->outputs().size());
-  NVF_ERROR(
-      args.size() == fusion->inputs().size(),
-      "fusion arguments length does not match runtime arguments.");
-  for (const auto out_i : c10::irange(fusion->outputs().size())) {
-    auto out_val = fusion->outputs()[out_i];
+    const std::vector<Val*>& fusion_outputs) {
+  FUSER_PERF_SCOPE("FusionExecutor::getOutbufferInfo");
+  std::vector<FusionExecutor::GlobalBufferInfo> output_buffer_infos;
+  output_buffer_infos.reserve(fusion_outputs.size());
+  for (const auto out : fusion_outputs) {
     NVF_ERROR(
-        out_val->isA<TensorView>(),
+        out->isA<TensorView>(),
         "Cannot allocate outputs that are not tensors.");
 
-    FusionExecutor::GlobalBufferInfo info;
-    info.tv = out_val->as<TensorView>();
-    std::tie(info.sizes, info.strides) = inferShapeOfOutput(info.tv, expr_eval);
-    auto dtype =
-        (info.tv->dtype() == DataType::Index ? index_dtype : info.tv->dtype());
-    info.type = data_type_to_aten(dtype);
-
-    outputs.emplace_back(info);
+    output_buffer_infos.emplace_back(
+        getBufferInfo(expr_eval, index_dtype, out->as<TensorView>()));
   }
-  return outputs;
+  return output_buffer_infos;
 }
 
 } // namespace
@@ -1389,7 +1392,7 @@ std::vector<at::Tensor> allocOutputSpace(
   auto expr_eval = executor_utils::bindInputs(fusion_inputs, fusion);
 
   auto output_info =
-      getOutputBufferInfo(fusion_inputs, expr_eval, PrimDataType::Int, fusion);
+      getBufferInfos(expr_eval, PrimDataType::Int, fusion->outputs());
 
   return allocateOutputs(fusion, output_info, device, expr_eval);
 }
@@ -1421,8 +1424,8 @@ KernelArgumentHolder FusionExecutor::inferOutputSizes(
 
   auto arg_index_type = args.getSmallestIndexTypeOfArguments();
 
-  KernelArgumentHolder ret;
-  ret.setDeviceIndex(args.getDeviceIndex());
+  KernelArgumentHolder output_tensor_proxies;
+  output_tensor_proxies.setDeviceIndex(args.getDeviceIndex());
 
   for (Val* output : fusion->outputs()) {
     NVF_ERROR(
@@ -1433,9 +1436,9 @@ KernelArgumentHolder FusionExecutor::inferOutputSizes(
     const auto dtype = (output_tv->dtype() == DataType::Index)
         ? data_type_to_aten(arg_index_type)
         : data_type_to_aten(output_tv->dtype());
-    ret.pushTensorProxy(sizes, strides, dtype);
+    output_tensor_proxies.pushTensorProxy(sizes, strides, dtype);
   }
-  return ret;
+  return output_tensor_proxies;
 }
 
 namespace {
@@ -1547,15 +1550,6 @@ void dumpKernelArgs(
   }
 }
 
-FusionExecutor::GlobalBufferInfo getGlobalBufferAllocationInfo(
-    const at::Tensor& at_tensor) {
-  FusionExecutor::GlobalBufferInfo info{
-      .sizes = at_tensor.sizes().vec(),
-      .strides = at_tensor.strides().vec(),
-      .type = at_tensor.scalar_type()};
-  return info;
-}
-
 } // namespace
 
 void FusionExecutor::initializeExecutorEntry(
@@ -1587,13 +1581,16 @@ void FusionExecutor::initializeExecutorEntry(
 
   if (outputs.empty()) {
     output_info =
-        getOutputBufferInfo(args, expr_eval, index_type, lowered_->kernel());
+        getBufferInfos(expr_eval, index_type, lowered_->kernel()->outputs());
   } else {
     // Need to save the information necessary for allocations as
     // future uses of this ExecutorEntry may not be provided with
     // allocated outputs
     for (const auto& output : outputs) {
-      output_info.emplace_back(getGlobalBufferAllocationInfo(output));
+      output_info.emplace_back(FusionExecutor::GlobalBufferInfo{
+          .sizes = output.sizes().vec(),
+          .strides = output.strides().vec(),
+          .type = output.scalar_type()});
     }
   }
 
@@ -1849,10 +1846,13 @@ void FusionExecutor::resetCompiledKernelProperties() {
 }
 
 std::vector<at::Tensor> FusionExecutor::evaluateFusionOutputs(
-    KernelArgumentHolder& args,
     std::vector<at::Tensor> outputs,
     ExpressionEvaluator& expr_eval) {
-  // TODO: Add relevant profiling code.
+  FUSER_PERF_SCOPE("FusionExecutor::runFusion::evaluateFusionOutputs");
+  NVF_ERROR(
+      outputs.empty(),
+      "Fusion executor is using expression evaluator,",
+      " and expects that the outputs are not populated, which they were.");
   if (outputs.empty()) {
     for (const auto& out_val : fusion()->outputs()) {
       auto out_tensor =
@@ -1861,11 +1861,12 @@ std::vector<at::Tensor> FusionExecutor::evaluateFusionOutputs(
       outputs.emplace_back(out_tensor);
     }
   }
-  args.push(outputs);
   return outputs;
 }
 
 namespace {
+// Host IR specific function, returns the at:Tensor (ordered list) associated
+// with the provdied Fusion output tv
 at::Tensor findBufferForFusionOutput(
     const std::vector<at::Tensor>& out_tensors,
     const Val* fusion_out,
@@ -1903,14 +1904,10 @@ std::vector<at::Tensor> FusionExecutor::runFusion(
       " provided number of outputs does not match fusion output");
 
   // Bind fusion inputs
-  ExpressionEvaluator expr_eval;
-  const auto& inputs = fusion()->inputs();
-  for (const auto i : c10::irange(inputs.size())) {
-    expr_eval.bind(inputs[i], *args[i]);
-  }
+  auto expr_eval = executor_utils::bindInputs(args, fusion());
 
   if (isExpressionEvaluated(fusion())) {
-    outputs = evaluateFusionOutputs(args, outputs, expr_eval);
+    outputs = evaluateFusionOutputs(outputs, expr_eval);
     if (isProfilerEnabled()) {
       auto& sprof = FusionProfiler::segment(group_id_);
       sprof.stopKernel();
@@ -1921,8 +1918,8 @@ std::vector<at::Tensor> FusionExecutor::runFusion(
 
   if (host_ir_container_ != nullptr) {
     if (outputs.empty()) {
-      std::vector<GlobalBufferInfo> output_info = getOutputBufferInfo(
-          args, expr_eval, PrimDataType::Int, host_ir_container_.get());
+      std::vector<GlobalBufferInfo> output_info = getBufferInfos(
+          expr_eval, PrimDataType::Int, host_ir_container_->outputs());
       outputs = allocateOutputs(
           host_ir_container_.get(), output_info, options_.device, expr_eval);
     }
@@ -2009,7 +2006,7 @@ std::vector<at::Tensor> FusionExecutor::runFusion(
       // Skip trivially forwarded outputs because they are just placeholders
       continue;
     }
-    expr_eval.bind(output, *args[inputs.size() + i]);
+    expr_eval.bind(output, *args[kernel()->inputs().size() + i]);
   }
 
   std::vector<at::Tensor> intermediates;
@@ -2062,7 +2059,7 @@ std::vector<at::Tensor> FusionExecutor::runFusion(
       intermediates.push_back(intermediate_buffer);
       expr_eval.bind(
           kernel()->summary().global_allocations.at(i)->buffer(),
-          *args[inputs.size() + outputs.size() + i]);
+          *args[kernel()->inputs().size() + outputs.size() + i]);
       if (buf_info.is_profile_buffer) {
         profile_buffer = intermediate_buffer;
       }
diff --git a/csrc/executor.h b/csrc/executor.h
index 97710667553..4817c64bb92 100644
--- a/csrc/executor.h
+++ b/csrc/executor.h
@@ -65,7 +65,7 @@ class FusionExecutor : public NonCopyable {
   //! Notes: 1. This API should ignore aliased outputs instead of
   //! pushing scalar int 0 as a place-holder.
   //! 2. This API does not allocate output in memory, but only returns the
-  //! inferred output sizes.
+  //! inferred output sizes. Used in kernel_cache.cpp.
   KernelArgumentHolder inferOutputSizes(
       Fusion* fusion,
       const KernelArgumentHolder& args,
@@ -118,10 +118,14 @@ class FusionExecutor : public NonCopyable {
 
   //! Computes fusion outputs through expression evaluator.
   std::vector<at::Tensor> evaluateFusionOutputs(
-      KernelArgumentHolder& args,
       std::vector<at::Tensor> outputs,
       ExpressionEvaluator& expr_eval);
 
+  // TODO: args shouldn't come in a reference here because we will append the
+  // outputs to be able to send it to the kernel. For now none of the users are
+  // reconsuming the args, so it is okay. It isn't done now because changing it
+  // from a reference makes a call as runFusion({}) ambiguous, and that is used
+  // in some places in the codebase.
   NVF_API std::vector<at::Tensor> runFusion(
       KernelArgumentHolder& args,
       const LaunchParams& launch_constraints = LaunchParams(),
diff --git a/csrc/executor_utils.cpp b/csrc/executor_utils.cpp
index 740981972dc..cac3ba928c0 100644
--- a/csrc/executor_utils.cpp
+++ b/csrc/executor_utils.cpp
@@ -716,7 +716,20 @@ ExpressionEvaluator bindInputs(
     // NOTE: we bind all inputs here, including at::Tensors. This means that
     // expr_eval will create a PolymorphicValue containing *args[i], which means
     // that at::Tensor's lifetime will be at least as long as that of expr_eval.
-    expr_eval.bind(inputs[i], *args[i], true);
+    try {
+      expr_eval.bind(inputs[i], *args[i], true);
+    } catch (const nvfError& e) {
+      std::stringstream ss;
+      ss << "When trying to run the provided host program,"
+         << " there was an error with the provided input " << i
+         << ". Provided input was:\n  ";
+      ss << PolymorphicValue_functions::toString(*args[i]);
+      ss << "\n  Fusion input is:\n  ";
+      ss << inputs[i]->toString();
+      ss << "\n  Expr eval provided the error:\n\"\"\"";
+      ss << e.msg() << "\"\"\"\n";
+      NVF_THROW(ss.str());
+    }
   }
 
   return expr_eval;
diff --git a/csrc/expr_evaluator.cpp b/csrc/expr_evaluator.cpp
index 764ea83aa50..0fd62022098 100644
--- a/csrc/expr_evaluator.cpp
+++ b/csrc/expr_evaluator.cpp
@@ -352,6 +352,135 @@ void ExpressionEvaluator::print() const {
   debug() << "--------------------\n\n";
 }
 
+namespace {
+// Error handling for
+// ExpressionEvaluator::propagateBoundValuesThroughExactMaps(Fusion* fusion)
+void handlePropagateError(
+    Fusion* fusion,
+    ExpressionEvaluator* expr_eval,
+    const std::shared_ptr<VectorOfUniqueEntries<const IterDomain*>>& id_set) {
+  std::unordered_map<const IterDomain*, int64_t> id_to_size;
+  std::set<int64_t> sizes;
+
+  for (const auto id : *id_set) {
+    auto eval_val = expr_eval->evaluate(id->extent());
+    if (eval_val.hasValue()) {
+      NVF_ERROR(
+          eval_val.is<int64_t>(),
+          "Invalid extent value found, while processing ID: ",
+          id);
+      id_to_size[id] = eval_val.as<int64_t>();
+      sizes.insert(eval_val.as<int64_t>());
+    }
+  }
+
+  std::stringstream err_msg;
+  err_msg
+      << "When trying to propagate constant tensor sizes through the graph a conflict was found with "
+      << sizes.size()
+      << " different sizes across dimensions that are expected to match.\n";
+
+  // Track which size is associated with which TV and IterDomain
+  std::unordered_map<
+      int64_t,
+      std::vector<std::pair<TensorView*, const IterDomain*>>>
+      size_to_info;
+
+  for (auto tv : fusion->allTvs()) {
+    for (const IterDomain* id : tv->domain()->allIDs()) {
+      if (auto it = id_to_size.find(id); it != id_to_size.end()) {
+        size_to_info[it->second].push_back({tv, id});
+      }
+    }
+  }
+
+  // Check which TensorViews mismatch and check if they're directly related.
+  // If so, the expression between them may be the problematic expression, and
+  // the error will point to that expression(s). Don't bother to speed up this
+  // check as it only runs after an error is detected.
+  bool found_producer_consumer_issue = false;
+  // Assume producer/consumer relationship
+  for (auto [dim_size_1, tv_id_pairs_1] : size_to_info) {
+    for (auto [dim_size_2, tv_id_pairs_2] : size_to_info) {
+      if (dim_size_1 <= dim_size_2) {
+        // N^2 algorithm, only process when one size is less than the other,
+        // avoids duplicate entries.
+        continue;
+      }
+
+      for (const auto& [tv1, id1] : tv_id_pairs_1) {
+        for (const auto& [tv2, id2] : tv_id_pairs_2) {
+          bool tv1_is_consumer = false;
+
+          // Check for producer-consumer relationship
+          auto producer_tvs_of_tv1 = ir_utils::producerTvsOf(tv1);
+          auto producer_tvs_of_tv2 = ir_utils::producerTvsOf(tv2);
+          if (std::find(
+                  producer_tvs_of_tv1.begin(),
+                  producer_tvs_of_tv1.end(),
+                  tv2) != producer_tvs_of_tv1.end()) {
+            tv1_is_consumer = true;
+          } else if (
+              std::find(
+                  producer_tvs_of_tv2.begin(),
+                  producer_tvs_of_tv2.end(),
+                  tv1) == producer_tvs_of_tv2.end()) {
+            // No relationship found, skip
+            continue;
+          }
+
+          Expr* relationship =
+              tv1_is_consumer ? tv1->definition() : tv2->definition();
+
+          // Found at least one consumer/producer relationship with mismatched
+          // sizes.
+          found_producer_consumer_issue = true;
+
+          // Produce error message. Normally I'd just use swap but keys in an
+          // unordered map are const, so doing some juggling with the strings
+          // instead.
+          std::stringstream tv1_error;
+          std::stringstream tv2_error;
+          tv1_error << " TV: " << tv1 << " id: " << id1
+                    << " found size: " << dim_size_1 << "\n";
+          tv2_error << " TV: " << tv2 << " id: " << id2
+                    << " found size: " << dim_size_2 << "\n";
+          err_msg << "  For Producer"
+                  << (tv1_is_consumer ? tv2_error.str() : tv1_error.str());
+          err_msg << "  For Consumer"
+                  << (tv1_is_consumer ? tv1_error.str() : tv2_error.str());
+          err_msg
+              << "  With producer-consumer relationship through the expression: "
+              << relationship << "\n";
+        }
+      }
+    }
+  }
+
+  if (found_producer_consumer_issue) {
+    NVF_THROW(err_msg.str());
+  }
+
+  if (size_to_info.size() > 1) {
+    for (const auto& [size, info_pairs] : size_to_info) {
+      err_msg << "Size " << size << " found for ID, in TV:\n";
+      for (auto info_pair : info_pairs) {
+        err_msg << "  " << info_pair.second << ", " << info_pair.first << "\n";
+      }
+    }
+    err_msg << "These sizes should all match.\n";
+  } else {
+    err_msg
+        << "Something went wrong trying to detect what went wrong!"
+        << " There should have been ID's in TVs that should match, but don't."
+        << " Somehow IDs were registered with the exact graph that aren't used in the Fusion."
+        << std::endl;
+  }
+
+  NVF_THROW(err_msg.str());
+}
+} // namespace
+
 void ExpressionEvaluator::propagateBoundValuesThroughExactMaps(
     Fusion* fusion,
     ExactLogicalDomainMap* exact_map) {
@@ -373,15 +502,10 @@ void ExpressionEvaluator::propagateBoundValuesThroughExactMaps(
       if (eval_val.hasValue()) {
         NVF_ERROR(eval_val.is<int64_t>(), "Invalid extent value");
         int64_t this_size = eval_val.as<int64_t>();
-        if (known_size != -1) {
-          NVF_ERROR(
-              known_size == this_size,
-              "Conflicting sizes: ",
-              known_size,
-              ", ",
-              this_size);
-        } else {
+        if (known_size == -1) {
           known_size = this_size;
+        } else if (known_size != this_size) {
+          handlePropagateError(fusion, this, set);
         }
       } else {
         unknown_vals.push_back(id->extent());
diff --git a/csrc/kernel_cache.h b/csrc/kernel_cache.h
index 8f41fa9cade..e2fecaacbba 100644
--- a/csrc/kernel_cache.h
+++ b/csrc/kernel_cache.h
@@ -671,13 +671,6 @@ class FusionExecutorCache {
   //! Deserialize Fusion Executor Cache using flatbuffers
   void deserialize(const serde::FusionExecutorCache* buffer, int64_t fusion_id);
 
-  //! Allocate the outputs of the Fusion given inputs
-  //! TODO: re-implement
-  std::vector<at::Tensor> allocOutputSpace(
-      const at::ArrayRef<c10::IValue>& inputs) {
-    return runFusionWithInputs(inputs);
-  }
-
  private:
   //! evict cached short cut entry in `code_to_fe_lookup_` as well as cached
   //! entry in `FusionExecutor`
diff --git a/csrc/utils.cpp b/csrc/utils.cpp
index 2c372b95577..4dcf877c1e9 100644
--- a/csrc/utils.cpp
+++ b/csrc/utils.cpp
@@ -37,86 +37,33 @@ c10::ThreadPool* getThreadPool() {
   return &pool;
 }
 
-C10_DIAGNOSTIC_PUSH_AND_IGNORED_IF_DEFINED("-Wunused-function")
-void debugPrint(const c10::TensorTypePtr& type) {
-  std::stringstream sizes_s;
-  if (auto sizes = type->symbolic_sizes().sizes()) {
-    for (const auto& shape_symbol : *sizes) {
-      if (shape_symbol.is_static()) {
-        sizes_s << shape_symbol.static_size() << ", ";
-      } else {
-        sizes_s << "s(" << *reinterpret_cast<const int64_t*>(&shape_symbol)
-                << "), ";
-      }
-    }
-  } else {
-    sizes_s << "no size available";
-  }
-  debug() << "sizes:" << sizes_s.str() << std::endl;
-  if (const auto& stride_properties = type->stride_properties().sizes()) {
-    std::stringstream stride_s;
-    std::stringstream index_s;
-    std::stringstream contig_s;
-
-    for (const auto& stride_property : *stride_properties) {
-      if (stride_property.has_value() && stride_property->stride_.has_value()) {
-        stride_s << *stride_property->stride_ << ", ";
-      } else {
-        stride_s << "?, ";
-      }
-      if (stride_property.has_value() &&
-          stride_property->stride_index_.has_value()) {
-        index_s << *stride_property->stride_index_ << ", ";
-      } else {
-        index_s << "?, ";
-      }
-      if (stride_property.has_value() &&
-          stride_property->contiguous_.has_value()) {
-        contig_s << *stride_property->contiguous_ << ", ";
-      } else {
-        contig_s << "?, ";
-      }
-    }
-    debug() << "stride: " << stride_s.str() << std::endl;
-    debug() << "stride index: " << index_s.str() << std::endl;
-    debug() << "contiguous: " << contig_s.str() << std::endl;
-  } else {
-    debug() << "no stride properties available" << std::endl;
+std::string debug_str(const c10::IValue& val) {
+  if (val.isTensor()) {
+    return debug_str(val.toTensor());
   }
-}
-C10_DIAGNOSTIC_POP()
 
-bool is_zero_dim_tensor(const std::shared_ptr<c10::TensorType>& tensor_type) {
-  return tensor_type && tensor_type->dim().has_value() &&
-      tensor_type->dim().value() == 0;
+  std::stringstream out_s;
+  out_s << val.toScalar();
+  return out_s.str();
 }
 
-bool is_zero_sized_tensor(const std::shared_ptr<c10::TensorType>& tensor_type) {
-  auto opt_sizes = tensor_type->sizes().concrete_sizes();
-  if (opt_sizes.has_value()) {
-    auto sizes = opt_sizes.value();
-    for (const auto& size : sizes) {
-      if (size == 0) {
-        return true;
-      }
-    }
+std::string debug_str(const at::Tensor& tensor) {
+  std::stringstream ss;
+  ss << "Tensor:";
+  ss << " device: " << tensor.device();
+  ss << ", dtype: " << tensor.dtype();
+  ss << ", shape: " << tensor.sizes();
+
+  if (!tensor.is_contiguous()) {
+    ss << ", strides: " << tensor.strides();
   }
-  return false;
+  return ss.str();
 }
 
 bool is_cpu_scalar(const at::Tensor& tensor) {
   return tensor.device().is_cpu() && tensor.numel() == 1 && tensor.dim() == 0;
 }
 
-bool is_cpu_scalar(const c10::TensorType& tensor_type) {
-  auto opt_device = tensor_type.device();
-  auto opt_dim = tensor_type.dim();
-  auto opt_numel = tensor_type.numel();
-  return opt_device.has_value() && opt_device->is_cpu() &&
-      opt_dim.has_value() && opt_numel.has_value() && opt_dim.value() == 0 &&
-      opt_numel.value() == 1;
-}
-
 bool is_meta_scalar(const at::Tensor& tensor) {
   return tensor.device().is_meta() && tensor.numel() == 1 && tensor.dim() == 0;
 }
diff --git a/csrc/utils.h b/csrc/utils.h
index 43e71b2be83..97383645d71 100644
--- a/csrc/utils.h
+++ b/csrc/utils.h
@@ -52,13 +52,10 @@ namespace nvfuser {
 int getNumThreads();
 c10::ThreadPool* getThreadPool();
 
-void debugPrint(const c10::TensorTypePtr& type);
-
-bool is_zero_dim_tensor(const std::shared_ptr<c10::TensorType>& tensor_type);
-bool is_zero_sized_tensor(const std::shared_ptr<c10::TensorType>& tensor_type);
+std::string debug_str(const c10::IValue& val);
+std::string debug_str(const at::Tensor& tensor);
 
 bool is_cpu_scalar(const at::Tensor& tensor);
-bool is_cpu_scalar(const c10::TensorType& tensor_type);
 
 bool is_meta_scalar(const at::Tensor& tensor);
 
diff --git a/tests/cpp/test_gpu3.cpp b/tests/cpp/test_gpu3.cpp
index 29190520e44..725e70f0885 100644
--- a/tests/cpp/test_gpu3.cpp
+++ b/tests/cpp/test_gpu3.cpp
@@ -3839,8 +3839,8 @@ TEST_F(NVFuserTest, FusionCheckedSymbolicShape_CUDA) {
   {
     ASSERT_THAT(
         [&]() { matched_add(a, c); },
-        ::testing::ThrowsMessage<nvfuser::nvfError>(
-            ::testing::HasSubstr("Conflicting sizes")));
+        ::testing::ThrowsMessage<nvfuser::nvfError>(::testing::HasSubstr(
+            "When trying to propagate constant tensor sizes through the graph a conflict was found with 2 different sizes across dimensions that are expected to match.")));
     GTEST_SKIP() << "skipping tests on pre-AMPERE GPUs";
   }
 }
diff --git a/tests/python/opinfo_input_generators.py b/tests/python/opinfo_input_generators.py
index 7dc12e87956..e369c03f9e7 100644
--- a/tests/python/opinfo_input_generators.py
+++ b/tests/python/opinfo_input_generators.py
@@ -898,7 +898,7 @@ def pad_error_generator(
     delete_all_pad_width = [-3, 0, 0, 0]
     yield SampleInput(
         make_arg(input_shape), delete_all_pad_width, make_number(dtype)
-    ), RuntimeError, "extent_int >= 0"
+    ), RuntimeError, "Invalid resized domain extent"
 
     too_many_pad_width = [1, 1, 1, 1, 1, 1]
     yield SampleInput(