Skip to content

Commit

Permalink
Add back the unit tests from #2405.
Browse files Browse the repository at this point in the history
  • Loading branch information
wujingyue committed Jul 26, 2024
1 parent 8bb3067 commit f5d57fe
Show file tree
Hide file tree
Showing 2 changed files with 97 additions and 14 deletions.
52 changes: 39 additions & 13 deletions tests/cpp/test_alias.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1269,24 +1269,15 @@ TEST_F(AliasTest, ReuseBuffer_AliasAcrossSegments) {
at::Tensor t1 = at::randn({65}, options);
at::Tensor t2 = at::randn({128, 65}, options);

FusionExecutorCache executor_cache(std::move(fusion));
FusionExecutorCache fec(std::move(fusion));
// Make a copy of `t0` because `t0` will be in-place updated.
at::Tensor original_t0 = t0.clone();
std::vector<at::Tensor> outputs =
executor_cache.runFusionWithInputs({t0, t1, t2});
std::vector<at::Tensor> outputs = fec.runFusionWithInputs({t0, t1, t2});
testValidate(
executor_cache.fusion(),
outputs,
{original_t0, t1, t2},
__LINE__,
__FILE__);
fec.fusion(), outputs, {original_t0, t1, t2}, __LINE__, __FILE__);

EXPECT_EQ(
executor_cache.getMostRecentKernelRuntime()
->fusionSegments()
->groups()
.size(),
2)
fec.getMostRecentKernelRuntime()->fusionSegments()->groups().size(), 2)
<< "segmentation didn't happen as expected";

auto t3 = original_t0.add(1.0);
Expand Down Expand Up @@ -1635,4 +1626,39 @@ TEST_F(AliasTest, QKVSplitBackprop) {
EXPECT_TRUE(out_tensors[2].is_alias_of(out_tensors[1]));
}

TEST_F(AliasTest, Bookend_Issue2375) {
auto fusion = std::make_unique<Fusion>();
FusionGuard fg(fusion.get());

constexpr int64_t n = 2, c = 128, h = 16, w = 16, g = 32;
const DataType dtype = DataType::Half;
const std::vector<int64_t> input_shape = {n, g, c / g, h, w};
const std::vector<int64_t> output_shape = {n, c, h, w};

auto tv0 = makeContigTensor(input_shape.size(), dtype);
fusion->addInput(tv0);
auto tv1 = castOp(DataType::Float, tv0);
auto tv2 = set(tv1);
auto tv3 = sum(tv2, {-1, -2, -3});
auto tv4 = broadcast(tv3, {false, false, true, true, true});
auto tv5 = div(tv2, tv4);
auto tv6 = castOp(dtype, tv5);
auto tv7 = reshape(tv6, input_shape, output_shape);
fusion->addOutput(tv7);

auto options =
at::TensorOptions().dtype(data_type_to_aten(dtype)).device(at::kCUDA, 0);
auto t0 = at::randn(input_shape, options);

FusionExecutorCache fec(std::move(fusion));
auto out_tensors = fec.runFusionWithInputs({t0});
testValidate(fec.fusion(), out_tensors, {t0}, __LINE__, __FILE__);

EXPECT_THAT(
fec.getMostRecentKernelRuntime()->fusionSegments()->groups(),
UnorderedElementsAre(
HeuristicIs(ScheduleHeuristic::NoOp),
HeuristicIs(ScheduleHeuristic::InnerPersistent)));
}

} // namespace nvfuser
59 changes: 58 additions & 1 deletion tests/cpp/test_gpu_view.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@

namespace nvfuser {

using namespace at::indexing;
using testing::UnorderedElementsAre;

using GpuViewTest = NVFuserTest;

Expand Down Expand Up @@ -2380,6 +2380,63 @@ TEST_F(GpuViewTest, SplitMergePointwiseSplitMerge) {
testValidate(executor_cache.fusion(), {cg_outputs}, {t0}, __LINE__, __FILE__);
}

// segmented into 2 kernels: pointwise and reduction
TEST_F(GpuViewTest, GroupNormOriginal) {
auto fusion = std::make_unique<Fusion>();
FusionGuard fg(fusion.get());
const int64_t N = 2, C = 128, H = 16, W = 16, G = 32;
const std::vector<int64_t> input_shape = {N, C, H, W};
const std::vector<int64_t> group_shape = {N, G, C / G, H, W};
const std::vector<int64_t> input_shape_wb = {C};
const std::vector<int64_t> group_shape_wb = {G, C / G};
DataType dtype = DataType::Half;
auto tv0 = makeContigTensor(input_shape.size(), dtype);
auto tv1 = makeContigTensor(input_shape_wb.size(), DataType::Float);
auto tv2 = makeContigTensor(input_shape_wb.size(), DataType::Float);
fusion->addInput(tv0);
fusion->addInput(tv1);
fusion->addInput(tv2);
// pointwise ops, e.g. cast
auto tv3 = castOp(DataType::Float, tv0);
// reshape from {N, C, H, W} to {N, G, C / G, H, W}
auto tv4 = reshape(tv3, input_shape, group_shape);
// normalization
auto tv5 = sum(tv4, {-1, -2, -3});
auto tv6 = broadcast(tv5, {false, false, true, true, true});
auto tv7 = div(tv4, tv6);
// reshape back to {N, C, H, W}
auto tv8 = reshape(tv7, group_shape, input_shape);
// pointwise ops, e.g. scale, bias, cast
auto tv9 = broadcast(tv1, {true, false, true, true});
auto tv10 = broadcast(tv2, {true, false, true, true});
auto tv11 = mul(tv8, tv9);
auto tv12 = add(tv11, tv10);
auto tv13 = castOp(dtype, tv12);
fusion->addOutput(tv13);

auto options =
at::TensorOptions().dtype(data_type_to_aten(dtype)).device(at::kCUDA, 0);
auto options_wb = at::TensorOptions()
.dtype(data_type_to_aten(DataType::Float))
.device(at::kCUDA, 0);
auto t0 = at::randn(input_shape, options);
auto tw = at::randn(input_shape_wb, options_wb);
auto tb = at::randn(input_shape_wb, options_wb);

FusionExecutorCache executor_cache(std::move(fusion));
auto cg_outputs = executor_cache.runFusionWithInputs({t0, tw, tb});
// should expect 1 after adding a pre-segment pass to move reshape to input
// and output.
EXPECT_THAT(
executor_cache.getMostRecentKernelRuntime()->fusionSegments()->groups(),
UnorderedElementsAre(
HeuristicIs(ScheduleHeuristic::PointWise),
HeuristicIs(ScheduleHeuristic::Reduction)));

testValidate(
executor_cache.fusion(), cg_outputs, {t0, tw, tb}, __LINE__, __FILE__);
}

using ReductionAxes = std::vector<int64_t>;
class ViewReductionTest : public NVFuserFixtureParamTest<ReductionAxes> {};

Expand Down

0 comments on commit f5d57fe

Please sign in to comment.