From 5d1d07e341ed687eb75b2daaf3da14e47e9f998d Mon Sep 17 00:00:00 2001
From: Naoya Maruyama <nmaruyama@nvidia.com>
Date: Thu, 19 Dec 2024 12:54:09 -0800
Subject: [PATCH] Resolve conflicts by recomputation

---
 csrc/scheduler/resize.cpp             |  61 ++++++--
 csrc/scheduler/tools/resize_utils.cpp |  21 ++-
 csrc/scheduler/tools/resize_utils.h   |  19 ++-
 tests/cpp/test_resize.cpp             | 208 ++++++++++++++++++++------
 4 files changed, 243 insertions(+), 66 deletions(-)
diff --git a/csrc/scheduler/resize.cpp b/csrc/scheduler/resize.cpp
index 194087b90e8..dece9f79238 100644
--- a/csrc/scheduler/resize.cpp
+++ b/csrc/scheduler/resize.cpp
@@ -73,19 +73,6 @@ bool ResizeScheduler::canScheduleCompileTime(Fusion* fusion) {
 
   auto resize_based_tensor_ops = ir_utils::getOpsOfType<SliceOp, PadOp>(fusion);
 
-  if (auto non_exclusive_resizes = scheduler_tools::getNonExclusiveResizeInfo(
-          resize_based_tensor_ops, id_model.idGraph(IdMappingMode::EXACT));
-      !non_exclusive_resizes.empty()) {
-    std::stringstream msg;
-    msg << "Propagation of resizes would affect fusion outputs.";
-    for (const auto& [tv, resize_ids] : non_exclusive_resizes) {
-      msg << " Resize input tv: " << tv->toString()
-          << ", resize input ID groups: " << nvfuser::toString(resize_ids);
-    }
-    scheduler_debug_utils::canScheduleRejectReason(schedulerType(), msg.str());
-    return false;
-  }
-
   // Slicing of or to a broadcast ID is not allowed yet.
   for (auto tensor_op : resize_based_tensor_ops) {
     TensorView* out_tv = tensor_op->output(0)->as<TensorView>();
@@ -133,6 +120,30 @@ bool ResizeScheduler::canScheduleCompileTime(Fusion* fusion) {
     return false;
   }
 
+  for (auto out_tv : ir_utils::filterByType<TensorView>(fusion->outputs())) {
+    if (out_tv == ref_tv) {
+      continue;
+    }
+    auto exprs = ValGraphBFS::getExprGroupsBetween(
+                     broadcast_graph,
+                     broadcast_graph.toGroups(ref_tv->getLogicalDomain()),
+                     broadcast_graph.toGroups(out_tv->getLogicalDomain()),
+                     /*require_all_to_visited=*/false)
+                     .first;
+    for (const auto& [expr_g, dir] : exprs) {
+      if (expr_g->front()->isA<Resize>()) {
+        std::stringstream msg;
+        msg << "Resize between reference and output not allowed.";
+        msg << " Reference: " << ref_tv->toString()
+            << ". Output: " << out_tv->toString()
+            << ". Resize: " << expr_g->front()->toString();
+        scheduler_debug_utils::canScheduleRejectReason(
+            schedulerType(), msg.str());
+        return false;
+      }
+    }
+  }
+
   // Disable the scheduler if there's a squeeze op. The loop option
   // may also need to be enabled in that case, but that option is not
   // turned on automatically yet.
@@ -163,6 +174,21 @@ void ResizeScheduler::schedule(Fusion* fusion, const HeuristicParams* params) {
   scheduler_utils::cacheInputs(fusion, true);
   scheduler_utils::cacheAndForkOutputs(fusion, true);
 
+  auto resize_based_tensor_ops = ir_utils::getOpsOfType<SliceOp, PadOp>(fusion);
+
+  IdModel id_model(fusion, /*build_graphs=*/false);
+  const auto& exact_graph = id_model.buildExactGraph();
+
+  // Replicate resize inputs if necessary to avoid conflicting propagations
+  for (const auto& [out_tv, exlusivity_info] :
+       scheduler_tools::getNonExclusiveResizeInfo(
+           resize_based_tensor_ops, exact_graph)) {
+    auto resize_based_op = out_tv->definition();
+    auto inp_tv = resize_based_op->input(0)->as<TensorView>();
+    auto inp_tv_copy = RecomputeTv::recompute(inp_tv);
+    ir_utils::replaceValInExprInputs(resize_based_op, inp_tv, inp_tv_copy);
+  }
+
   for (auto expr : fusion->exprs()) {
     if (!expr->isOneOf<SliceOp, PadOp>()) {
       continue;
@@ -186,9 +212,14 @@ void ResizeScheduler::schedule(Fusion* fusion, const HeuristicParams* params) {
   ref_tv->axis(-1)->parallelize(ParallelType::TIDx);
   ref_tv->axis(-2)->parallelize(ParallelType::BIDx);
 
-  // Propagate the reference to the other tensors
+  // Propagate the reference to the other tensors. Note that the
+  // update flag is enabled so to workaround the resize propagation
+  // issue. This may not work if there's a tensor that is reshaped
+  // from the reference tensor, but that should not be the case as the
+  // reference is picked by the same routine used for the pointwise
+  // scheduler.
   scheduler_tools::scheduleLoopDomainsLike(
-      fusion->allTvs(), ref_tv->getLoopDomain());
+      fusion->allTvs(), ref_tv->getLoopDomain(), true);
 
   inlineMost();
 
diff --git a/csrc/scheduler/tools/resize_utils.cpp b/csrc/scheduler/tools/resize_utils.cpp
index f1206f99676..d7987b1b412 100644
--- a/csrc/scheduler/tools/resize_utils.cpp
+++ b/csrc/scheduler/tools/resize_utils.cpp
@@ -66,13 +66,14 @@ void propagateResizeToInputs(Expr* resize_tensor_op) {
   }
 }
 
-std::unordered_map<TensorView*, ValGroups> getNonExclusiveResizeInfo(
+std::unordered_map<TensorView*, ResizeExclusivityInfo> getNonExclusiveResizeInfo(
     const std::vector<Expr*>& ordered_resize_tensor_ops,
-    const ValGraph& exact_graph) {
+    const ValGraph& exact_graph,
+    bool ignore_fusion_inputs) {
   NVF_ERROR(!ordered_resize_tensor_ops.empty());
   Fusion* fusion = ordered_resize_tensor_ops[0]->fusion();
 
-  std::unordered_map<TensorView*, ValGroups> non_exclusive_resizes;
+  std::unordered_map<TensorView*, ResizeExclusivityInfo> non_exclusive_resizes;
 
   std::unordered_set<Val*> inputs{
       fusion->inputs().begin(), fusion->inputs().end()};
@@ -95,6 +96,8 @@ std::unordered_map<TensorView*, ValGroups> getNonExclusiveResizeInfo(
     auto inp_tv = dynamic_cast<TensorView*>(resize_tensor_op->inputs().at(0));
     auto out_tv = dynamic_cast<TensorView*>(resize_tensor_op->outputs().at(0));
 
+    ResizeExclusivityInfo info;
+
     ValGroups resize_inp_ids = get_root_to_logical_resizes(out_tv);
     NVF_ERROR(!resize_inp_ids.empty());
 
@@ -107,6 +110,10 @@ std::unordered_map<TensorView*, ValGroups> getNonExclusiveResizeInfo(
     // visible changes through the tensor, the resize is considered
     // non-exclusive.
     for (auto dep_tv : ir_utils::filterByType<TensorView>(dep_vals)) {
+      if (ignore_fusion_inputs && dep_tv->isFusionInput()) {
+        continue;
+      }
+
       bool maybe_non_exclusive = false;
 
       if (dep_tv->isFusionOutput()) {
@@ -159,10 +166,16 @@ std::unordered_map<TensorView*, ValGroups> getNonExclusiveResizeInfo(
         }
 
         // This resize input ID is not exclusively used
-        non_exclusive_resizes[inp_tv].pushBack(resize_inp_id);
+        // non_exclusive_resizes[inp_tv].first.pushBack(resize_inp_id);
+        info.shared_tvs.push_back(dep_tv);
+        info.resized_ids.pushBack(resize_inp_id);
       }
     }
 
+    if (!info.shared_tvs.empty()) {
+      NVF_ERROR(non_exclusive_resizes.emplace(out_tv, info).second);
+    }
+
     // Analysis of exclusiveness until in_tv is done. Following
     // resize-based tensor ops do not need to check the same section
     // of the fusion and can start from out_tv.
diff --git a/csrc/scheduler/tools/resize_utils.h b/csrc/scheduler/tools/resize_utils.h
index 1245f166c65..8b3667c5dd3 100644
--- a/csrc/scheduler/tools/resize_utils.h
+++ b/csrc/scheduler/tools/resize_utils.h
@@ -94,9 +94,24 @@ void propagateResizeToInputs(Expr* resize_op);
 // The function returns a map from tensors that are input to
 // non-exclusive ops to their resize input ID groups. This map will be
 // used to resolve the non-exclusiveness by replication.
-std::unordered_map<TensorView*, ValGroups> getNonExclusiveResizeInfo(
+struct ResizeExclusivityInfo {
+  std::vector<TensorView*> shared_tvs;
+  // std::unordered_map<TensorView*, ValGroups> resized_ids;
+  ValGroups resized_ids;
+
+  bool operator==(const ResizeExclusivityInfo& other) const {
+    return shared_tvs == other.shared_tvs && resized_ids == other.resized_ids;
+  }
+
+  bool operator!=(const ResizeExclusivityInfo& other) const {
+    return !(*this == other);
+  }
+};
+
+std::unordered_map<TensorView*, ResizeExclusivityInfo> getNonExclusiveResizeInfo(
     const std::vector<Expr*>& ordered_resize_tensor_ops,
-    const ValGraph& exact_graph);
+    const ValGraph& exact_graph,
+    bool ignore_fusion_inputs = false);
 
 } // namespace scheduler_tools
 } // namespace nvfuser
diff --git a/tests/cpp/test_resize.cpp b/tests/cpp/test_resize.cpp
index 1c3ca839baa..7d4864b0d55 100644
--- a/tests/cpp/test_resize.cpp
+++ b/tests/cpp/test_resize.cpp
@@ -4427,25 +4427,83 @@ TEST_F(ResizeSchedulerTest, PropagateMultipleSlicesToInputs2) {
   fusion.addOutput(tv3);
   fusion.addOutput(tv6);
 
-  IdModel id_model(&fusion, /*build_graphs=*/false);
-  const auto& exact_graph = id_model.buildExactGraph();
+  {
+    IdModel id_model(&fusion, /*build_graphs=*/false);
+    const auto& exact_graph = id_model.buildExactGraph();
+    auto non_exclusive_resize_info = scheduler_tools::getNonExclusiveResizeInfo(
+        ir_utils::getOpsOfType<SliceOp, PadOp>(&fusion), exact_graph);
 
-  auto non_exclusive_resize_info = scheduler_tools::getNonExclusiveResizeInfo(
-      ir_utils::getOpsOfType<SliceOp, PadOp>(&fusion), exact_graph);
+    EXPECT_EQ(non_exclusive_resize_info.size(), 2);
 
-  // tv1 is the input of the first slice, which is not exclusive as
-  // tv1 is also a producer of tv4.
-  EXPECT_EQ(non_exclusive_resize_info.count(tv1), 1);
-  EXPECT_EQ(
-      non_exclusive_resize_info.at(tv1),
-      exact_graph.toGroups(std::vector<Val*>{tv1->axis(1)}));
+    // tv2 is the output of the first slice, which is not exclusive as
+    // tv1 is also a producer of tv4.
+    EXPECT_EQ(non_exclusive_resize_info.count(tv2), 1);
+    scheduler_tools::ResizeExclusivityInfo tv2_info{
+        {tv1}, exact_graph.toGroups(std::vector<Val*>{tv1->axis(1)})};
+    EXPECT_EQ(non_exclusive_resize_info.at(tv2), tv2_info);
+
+    // Similary, tv5 is the output of the second slice, which is not exclusive
+    // as tv1 is also a producer of tv2.
+    EXPECT_EQ(non_exclusive_resize_info.count(tv5), 1);
+    scheduler_tools::ResizeExclusivityInfo tv5_info{
+        {tv1}, exact_graph.toGroups(std::vector<Val*>{tv4->axis(1)})};
+    EXPECT_EQ(non_exclusive_resize_info.at(tv5), tv5_info);
+  }
 
-  // Similary, tv4 is the input of the second slice, which is not exclusive as
-  // tv1 is also a producer of tv2.
-  EXPECT_EQ(non_exclusive_resize_info.count(tv4), 1);
-  EXPECT_EQ(
-      non_exclusive_resize_info.at(tv4),
-      exact_graph.toGroups(std::vector<Val*>{tv4->axis(1)}));
+  // Test replication-based mitigation of conflicts
+  {
+    Fusion fusion_copy = fusion;
+    FusionGuard fg(&fusion_copy);
+
+    auto tv0 = fusion_copy.inputs().at(0)->as<TensorView>();
+    auto tv2 =
+        fusion_copy.outputs().at(0)->definition()->input(0)->as<TensorView>();
+    auto slice = dynamic_cast<SliceOp*>(tv2->definition());
+    ASSERT_NE(slice, nullptr);
+    auto tv1 = slice->input(0)->as<TensorView>();
+    auto tv5 =
+        fusion_copy.outputs().at(1)->definition()->input(0)->as<TensorView>();
+    auto tv4 = tv5->definition()->input(0)->as<TensorView>();
+
+    // Replicate tv1 for tv2
+    auto private_copy = RecomputeTv::recompute(tv1);
+    ir_utils::replaceValInExprInputs(slice, tv1, private_copy);
+
+    // The two slices should still be reported as non-exclusive but they
+    // both are shared at the fusion input.
+    IdModel id_model(&fusion_copy, /*build_graphs=*/false);
+    const auto& exact_graph = id_model.buildExactGraph();
+    auto non_exclusive_resize_info = scheduler_tools::getNonExclusiveResizeInfo(
+        ir_utils::getOpsOfType<SliceOp, PadOp>(&fusion_copy), exact_graph);
+    EXPECT_EQ(non_exclusive_resize_info.size(), 2);
+    EXPECT_EQ(non_exclusive_resize_info.count(tv2), 1);
+    scheduler_tools::ResizeExclusivityInfo tv2_info{
+        {tv0}, exact_graph.toGroups(std::vector<Val*>{tv0->axis(1)})};
+    EXPECT_EQ(non_exclusive_resize_info.at(tv2), tv2_info);
+
+    EXPECT_EQ(non_exclusive_resize_info.count(tv5), 1);
+    scheduler_tools::ResizeExclusivityInfo tv5_info{
+        {tv0}, exact_graph.toGroups(std::vector<Val*>{tv4->axis(1)})};
+    EXPECT_EQ(non_exclusive_resize_info.at(tv5), tv5_info);
+  }
+
+  auto options = at::TensorOptions().dtype(at::kFloat).device(at::kCUDA, 0);
+  auto t0 = at::randn({16, 100}, options);
+  std::vector<c10::IValue> inputs({t0});
+
+  FusionExecutorCache executor_cache(std::move(fusion_ptr));
+  auto out_tensors = executor_cache.runFusionWithInputs(inputs);
+  testValidate(
+      executor_cache.fusion(), out_tensors, inputs, __LINE__, __FILE__);
+  FusionKernelRuntime* runtime = executor_cache.getMostRecentKernelRuntime();
+  EXPECT_FALSE(runtime->isSegmented());
+  const auto& heuristic_param =
+      runtime->schedulerHeuristics()->heuristicsList().front();
+  EXPECT_EQ(heuristic_param->scheduler_type, SchedulerType::Resize);
+  Fusion* scheduled_fusion =
+      dynamic_cast<KernelExecutor*>(runtime->executors().at(0).get())->fusion();
+  checkLoopDomainEquivalence(
+      scheduled_fusion->outputs().at(0)->as<TensorView>());
 }
 
 // Non-exclusive slice due to a dependency to a fusion output
@@ -4486,12 +4544,57 @@ TEST_F(ResizeSchedulerTest, PropagateMultipleSlicesToInputs3) {
   auto non_exclusive_resize_info = scheduler_tools::getNonExclusiveResizeInfo(
       ir_utils::getOpsOfType<SliceOp, PadOp>(&fusion), exact_graph);
 
-  // tv3 is the input of the slice, which is not exclusive as
+  // tv4 is the input of the slice, which is not exclusive as
   // tv3 depends on tv2, which is a fusion output
-  EXPECT_EQ(non_exclusive_resize_info.count(tv3), 1);
-  EXPECT_EQ(
-      non_exclusive_resize_info.at(tv3),
-      exact_graph.toGroups(std::vector<Val*>{tv3->axis(1)}));
+  EXPECT_EQ(non_exclusive_resize_info.count(tv4), 1);
+  scheduler_tools::ResizeExclusivityInfo tv4_info{
+      {tv2}, exact_graph.toGroups(std::vector<Val*>{tv3->axis(1)})};
+  EXPECT_EQ(non_exclusive_resize_info.at(tv4), tv4_info);
+
+  // Test replication-based mitigation of conflicts
+  {
+    Fusion fusion_copy = fusion;
+    FusionGuard fg(&fusion_copy);
+
+    auto tv0 = fusion_copy.inputs().at(0)->as<TensorView>();
+    auto tv5 = fusion_copy.outputs().at(1)->as<TensorView>();
+    auto tv4 = tv5->definition()->input(0)->as<TensorView>();
+    auto tv3 = tv4->definition()->input(0)->as<TensorView>();
+
+    auto private_copy = RecomputeTv::recompute(tv3);
+    ir_utils::replaceValInExprInputs(tv4->definition(), tv3, private_copy);
+
+    IdModel id_model(&fusion_copy, /*build_graphs=*/false);
+    const auto& exact_graph = id_model.buildExactGraph();
+    auto non_exclusive_resize_info = scheduler_tools::getNonExclusiveResizeInfo(
+        ir_utils::getOpsOfType<SliceOp, PadOp>(&fusion_copy), exact_graph);
+    EXPECT_EQ(non_exclusive_resize_info.size(), 1);
+    EXPECT_EQ(non_exclusive_resize_info.count(tv4), 1);
+    scheduler_tools::ResizeExclusivityInfo tv4_info{
+        {tv0}, exact_graph.toGroups(std::vector<Val*>{tv0->axis(1)})};
+    EXPECT_EQ(non_exclusive_resize_info.at(tv4), tv4_info);
+  }
+
+  GTEST_SKIP() << "Scheduling not yet supported due to broadcast";
+
+  auto options = at::TensorOptions().dtype(at::kFloat).device(at::kCUDA, 0);
+  auto t0 = at::randn({16, 100}, options);
+  auto t1 = at::randn({16}, options);
+  std::vector<c10::IValue> inputs({t0, t1});
+
+  FusionExecutorCache executor_cache(std::move(fusion_ptr));
+  auto out_tensors = executor_cache.runFusionWithInputs(inputs);
+  testValidate(
+      executor_cache.fusion(), out_tensors, inputs, __LINE__, __FILE__);
+  FusionKernelRuntime* runtime = executor_cache.getMostRecentKernelRuntime();
+  EXPECT_FALSE(runtime->isSegmented());
+  const auto& heuristic_param =
+      runtime->schedulerHeuristics()->heuristicsList().front();
+  EXPECT_EQ(heuristic_param->scheduler_type, SchedulerType::Resize);
+  Fusion* scheduled_fusion =
+      dynamic_cast<KernelExecutor*>(runtime->executors().at(0).get())->fusion();
+  checkLoopDomainEquivalence(
+      scheduled_fusion->outputs().at(0)->as<TensorView>());
 }
 
 // Slice input tensor depends on a fusion output, but the slice is
@@ -4671,10 +4774,29 @@ TEST_F(ResizeSchedulerTest, PropagateMultipleSlicesToInputs6) {
   auto non_exclusive_resize_info = scheduler_tools::getNonExclusiveResizeInfo(
       ir_utils::getOpsOfType<SliceOp, PadOp>(&fusion), exact_graph);
   EXPECT_EQ(non_exclusive_resize_info.size(), 1);
-  EXPECT_EQ(non_exclusive_resize_info.count(tv1), 1);
-  EXPECT_EQ(
-      non_exclusive_resize_info.at(tv1),
-      exact_graph.toGroups(std::vector<Val*>{tv1->axis(1)}));
+  EXPECT_EQ(non_exclusive_resize_info.count(tv2), 1);
+  scheduler_tools::ResizeExclusivityInfo tv2_info{
+      {tv1}, exact_graph.toGroups(std::vector<Val*>{tv1->axis(1)})};
+  EXPECT_EQ(non_exclusive_resize_info.at(tv2), tv2_info);
+
+  // When scheduled, since the shape of the tv4 is different from the
+  // shape of tv5, this fusion is segmented. One segment is a resize
+  // segment consisting of tv2 and tv3 slices. Another is a pointwise
+  // segment for tv5.
+  FusionExecutorCache executor_cache(std::move(fusion_ptr));
+  auto out_tensors = executor_cache.runFusionWithInputs(inputs);
+  testValidate(
+      executor_cache.fusion(), out_tensors, inputs, __LINE__, __FILE__);
+  FusionKernelRuntime* runtime = executor_cache.getMostRecentKernelRuntime();
+  const auto& heuristic_list = runtime->schedulerHeuristics()->heuristicsList();
+  EXPECT_EQ(heuristic_list.size(), 2);
+  // They should be a combination of a resize scheduler and a pointwise
+  // scheduler
+  EXPECT_TRUE(
+      (heuristic_list[0]->scheduler_type == SchedulerType::PointWise &&
+       heuristic_list[1]->scheduler_type == SchedulerType::Resize) ||
+      (heuristic_list[0]->scheduler_type == SchedulerType::Resize &&
+       heuristic_list[1]->scheduler_type == SchedulerType::PointWise));
 }
 
 // RoPE-like rotation patten
@@ -4774,19 +4896,17 @@ TEST_P(ResizeSchedulerTest, SliceRotateCat) {
     const auto& exact_graph = id_model.buildExactGraph();
     auto non_exclusive_resize_info = scheduler_tools::getNonExclusiveResizeInfo(
         ir_utils::getOpsOfType<SliceOp, PadOp>(&fusion), exact_graph);
-    EXPECT_EQ(non_exclusive_resize_info.count(tv1), 1);
-    EXPECT_EQ(
-        non_exclusive_resize_info.at(tv1),
-        exact_graph.toGroups(std::vector<Val*>{tv1->axis(1)}));
-    EXPECT_EQ(non_exclusive_resize_info.count(tv3), 1);
-    EXPECT_EQ(
-        non_exclusive_resize_info.at(tv3),
-        exact_graph.toGroups(std::vector<Val*>{tv3->axis(1)}));
+    EXPECT_EQ(non_exclusive_resize_info.count(tv2), 1);
+    scheduler_tools::ResizeExclusivityInfo tv2_info{
+        {tv0}, exact_graph.toGroups(std::vector<Val*>{tv1->axis(1)})};
+    EXPECT_EQ(non_exclusive_resize_info.at(tv2), tv2_info);
+    EXPECT_EQ(non_exclusive_resize_info.count(tv4), 1);
+    scheduler_tools::ResizeExclusivityInfo tv4_info{
+        {tv0}, exact_graph.toGroups(std::vector<Val*>{tv3->axis(1)})};
+    EXPECT_EQ(non_exclusive_resize_info.at(tv4), tv4_info);
     // These two entries should be all the info map has.
     EXPECT_EQ(non_exclusive_resize_info.size(), 2);
 
-    GTEST_SKIP() << "Scheduling not yet supported";
-
     FusionExecutorCache executor_cache(std::move(fusion_ptr));
     auto out_tensors = executor_cache.runFusionWithInputs(inputs);
     testValidate(
@@ -4914,19 +5034,17 @@ TEST_P(ResizeSchedulerTest, SliceRotateCatResidual) {
     const auto& exact_graph = id_model.buildExactGraph();
     auto non_exclusive_resize_info = scheduler_tools::getNonExclusiveResizeInfo(
         ir_utils::getOpsOfType<SliceOp, PadOp>(&fusion), exact_graph);
-    EXPECT_EQ(non_exclusive_resize_info.count(tv1), 1);
-    EXPECT_EQ(
-        non_exclusive_resize_info.at(tv1),
-        exact_graph.toGroups(std::vector<Val*>{tv1->axis(1)}));
-    EXPECT_EQ(non_exclusive_resize_info.count(tv3), 1);
-    EXPECT_EQ(
-        non_exclusive_resize_info.at(tv3),
-        exact_graph.toGroups(std::vector<Val*>{tv3->axis(1)}));
+    EXPECT_EQ(non_exclusive_resize_info.count(tv2), 1);
+    scheduler_tools::ResizeExclusivityInfo tv2_info{
+        {tv0}, exact_graph.toGroups(std::vector<Val*>{tv1->axis(1)})};
+    EXPECT_EQ(non_exclusive_resize_info.at(tv2), tv2_info);
+    EXPECT_EQ(non_exclusive_resize_info.count(tv4), 1);
+    scheduler_tools::ResizeExclusivityInfo tv4_info{
+        {tv0}, exact_graph.toGroups(std::vector<Val*>{tv3->axis(1)})};
+    EXPECT_EQ(non_exclusive_resize_info.at(tv4), tv4_info);
     // These two entries should be all the info map has.
     EXPECT_EQ(non_exclusive_resize_info.size(), 2);
 
-    GTEST_SKIP() << "Scheduling not yet supported";
-
     FusionExecutorCache executor_cache(std::move(fusion_ptr));
     auto out_tensors = executor_cache.runFusionWithInputs(inputs);
     testValidate(