Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Remove unneeded branches in fmin/fmax helpers #329

Merged
merged 1 commit into from
May 11, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 6 additions & 14 deletions runtime/helpers.cu
Original file line number Diff line number Diff line change
Expand Up @@ -92,9 +92,7 @@ __device__ double fmax(double a, double b) {
// check and propagate NaN
if (a != a) {
return a;
} else if (b != b) {
return b;
} else {
} else { // If b is nan, it will be returned in the next line
return a > b ? a : b;
}
}
Expand All @@ -103,9 +101,7 @@ __device__ float fmax(float a, float b) {
// check and propagate NaN
if (a != a) {
return a;
} else if (b != b) {
return b;
} else {
} else { // If b is nan, it will be returned in the next line
return a > b ? a : b;
}
}
Expand All @@ -128,22 +124,18 @@ __device__ constexpr int64_t min(int64_t a, int64_t b) {

__device__ double fmin(double a, double b) {
// check and propagate NaN
if (a != a) {
return a;
} else if (b != b) {
if (b != b) {
return b;
} else {
} else { // If a is nan, it will be returned in the next line
return a > b ? b : a;
}
}

__device__ float fmin(float a, float b) {
// check and propagate NaN
if (a != a) {
return a;
} else if (b != b) {
if (b != b) {
return b;
} else {
} else { // If a is nan, it will be returned in the next line
return a > b ? b : a;
}
}
Expand Down
45 changes: 45 additions & 0 deletions test/test_gpu3.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -8296,6 +8296,51 @@ TEST_F(NVFuserTest, FusionClearGmemBetweenSegments_CUDA) {
testValidate(
executor_cache.fusion(), outputs, {at_x}, {t4}, __LINE__, __FILE__);
}

// Test nan propagation during min/max with floats and doubles
TEST_F(NVFuserTest, FusionMinMaxNanPropagation_CUDA) {
for (auto dtype : {DataType::Float, DataType::Double}) {
for (auto do_min : {true, false}) {
auto fusion = std::make_unique<Fusion>();
FusionGuard fg(fusion.get());

auto tv0 = makeSymbolicTensor(2, dtype);
fusion->addInput(tv0);
auto tv1 = do_min ? min(tv0, {1}) : max(tv0, {1});
fusion->addOutput(tv1);

FusionExecutorCache executor_cache(std::move(fusion));

auto options =
at::TensorOptions()
.dtype(dtype == DataType::Float ? at::kFloat : at::kDouble)
.device(at::kCUDA, 0);
// Test size 1 since it will have a single comparison, which checks
// missing propagation in one position even if it propagates properly in
// the other position
for (auto size : {1, 2, 5}) {
// To check nans in multiple positions along reduction axis create a 2D
// tensor that is ones except the diagonal, which are nans
auto at_x = at::eye(size, options);
at_x = (1 - at_x) / (1 - at_x);
std::vector<c10::IValue> inputs{at_x};

std::vector<at::Tensor> at_outputs(
{do_min ? at_x.amin(1) : at_x.amax(1)});
auto nvf_outputs = executor_cache.runFusionWithInputs(inputs);

testValidate(
executor_cache.fusion(),
nvf_outputs,
inputs,
at_outputs,
__LINE__,
__FILE__);
}
}
}
}

// Test file size should be up to 10K LoC. Create a new file for more tests.

} // namespace nvfuser