Skip to content

Commit

Permalink
rename executor to ke
Browse files Browse the repository at this point in the history
  • Loading branch information
naoyam committed Nov 5, 2024
1 parent 629b774 commit 14993fa
Show file tree
Hide file tree
Showing 3 changed files with 29 additions and 29 deletions.
24 changes: 12 additions & 12 deletions benchmarks/cpp/gelu_backward.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -162,8 +162,8 @@ static void NvFuserScheduler_GeluBackward_Compile(
&fusion, SchedulerType::PointWise, c10::ArrayRef<c10::IValue>(inputs));

for (auto _ : benchmark_state) {
KernelExecutor executor;
executor.compileFusion(&fusion, inputs, heuristic_params->lparams);
KernelExecutor ke;
ke.compileFusion(&fusion, inputs, heuristic_params->lparams);
}
}

Expand All @@ -187,13 +187,13 @@ static void NvFuserScheduler_GeluBackward_RunFusion(
auto heuristic_params = SchedulerEntry::scheduleWith(
&fusion, SchedulerType::PointWise, c10::ArrayRef<c10::IValue>(inputs));

KernelExecutor executor;
executor.compileFusion(&fusion, inputs, heuristic_params->lparams);
KernelExecutor ke;
ke.compileFusion(&fusion, inputs, heuristic_params->lparams);

C10_CUDA_CHECK(cudaDeviceSynchronize());

for (auto _ : benchmark_state) {
outputs = executor.runFusion(
outputs = ke.runFusion(
c10::ArrayRef<c10::IValue>(inputs), heuristic_params->lparams);
C10_CUDA_CHECK(cudaDeviceSynchronize());
clearL2Cache();
Expand All @@ -218,11 +218,11 @@ static void NvFuserScheduler_GeluBackward_RunFusion_GpuOnly(
auto heuristic_params = SchedulerEntry::scheduleWith(
&fusion, SchedulerType::PointWise, c10::ArrayRef<c10::IValue>(inputs));

KernelExecutor executor;
executor.compileFusion(&fusion, inputs, heuristic_params->lparams);
KernelExecutor ke;
ke.compileFusion(&fusion, inputs, heuristic_params->lparams);

runBenchmarkIterations(
benchmark_state, &executor, inputs, heuristic_params->lparams);
benchmark_state, &ke, inputs, heuristic_params->lparams);
}

BENCHMARK(NvFuserScheduler_GeluBackward_RunFusion_GpuOnly)
Expand All @@ -247,12 +247,12 @@ static void NvFuserScheduler_GeluBackward_RunFusion_CpuOnly(
auto heuristic_params = SchedulerEntry::scheduleWith(
&fusion, SchedulerType::PointWise, c10::ArrayRef<c10::IValue>(inputs));

KernelExecutor executor;
executor.setExecuteKernelFlag(false);
executor.compileFusion(&fusion, inputs, heuristic_params->lparams);
KernelExecutor ke;
ke.setExecuteKernelFlag(false);
ke.compileFusion(&fusion, inputs, heuristic_params->lparams);

for (auto _ : benchmark_state) {
outputs = executor.runFusion(
outputs = ke.runFusion(
c10::ArrayRef<c10::IValue>(inputs), heuristic_params->lparams);
}
}
Expand Down
10 changes: 5 additions & 5 deletions benchmarks/cpp/indexselect.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -132,8 +132,8 @@ static void NvFuserScheduler_IndexSelect_Compile(
&fusion, SchedulerType::PointWise, c10::ArrayRef<c10::IValue>(inputs));

for (auto _ : benchmark_state) {
KernelExecutor executor;
executor.compileFusion(
KernelExecutor ke;
ke.compileFusion(
&fusion, c10::ArrayRef<c10::IValue>(inputs), heuristic_params->lparams);
}
}
Expand All @@ -155,16 +155,16 @@ static void NvFuserScheduler_IndexSelect_RunFusion(
auto heuristic_params = SchedulerEntry::scheduleWith(
&fusion, SchedulerType::PointWise, c10::ArrayRef<c10::IValue>(inputs));

KernelExecutor executor;
executor.compileFusion(
KernelExecutor ke;
ke.compileFusion(
&fusion, c10::ArrayRef<c10::IValue>(inputs), heuristic_params->lparams);

C10_CUDA_CHECK(cudaDeviceSynchronize());

at::Tensor output = at::empty_like(inputs[0].toTensor());

for (auto _ : benchmark_state) {
executor.runFusion(
ke.runFusion(
c10::ArrayRef<c10::IValue>(inputs),
{output},
heuristic_params->lparams);
Expand Down
24 changes: 12 additions & 12 deletions benchmarks/cpp/lstm_cell.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -155,8 +155,8 @@ static void NvFuserScheduler_LstmCell_Compile(
&fusion, SchedulerType::PointWise, c10::ArrayRef<c10::IValue>(inputs));

for (auto _ : benchmark_state) {
KernelExecutor executor;
executor.compileFusion(&fusion, inputs);
KernelExecutor ke;
ke.compileFusion(&fusion, inputs);
}
}

Expand All @@ -182,13 +182,13 @@ static void NvFuserScheduler_LstmCell_RunFusion(
auto heuristic_params = SchedulerEntry::scheduleWith(
&fusion, SchedulerType::PointWise, c10::ArrayRef<c10::IValue>(inputs));

KernelExecutor executor;
executor.compileFusion(&fusion, inputs);
KernelExecutor ke;
ke.compileFusion(&fusion, inputs);

C10_CUDA_CHECK(cudaDeviceSynchronize());

for (auto _ : benchmark_state) {
outputs = executor.runFusion(
outputs = ke.runFusion(
c10::ArrayRef<c10::IValue>(inputs), heuristic_params->lparams);
C10_CUDA_CHECK(cudaDeviceSynchronize());
}
Expand Down Expand Up @@ -220,11 +220,11 @@ static void NvFuserScheduler_LstmCell_RunFusion_GpuOnly(
auto heuristic_params = SchedulerEntry::scheduleWith(
&fusion, SchedulerType::PointWise, c10::ArrayRef<c10::IValue>(inputs));

KernelExecutor executor;
executor.compileFusion(&fusion, inputs);
KernelExecutor ke;
ke.compileFusion(&fusion, inputs);

runBenchmarkIterations(
benchmark_state, &executor, inputs, heuristic_params->lparams);
benchmark_state, &ke, inputs, heuristic_params->lparams);
}

BENCHMARK_CAPTURE(NvFuserScheduler_LstmCell_RunFusion_GpuOnly, Small, 512, 64)
Expand Down Expand Up @@ -259,12 +259,12 @@ static void NvFuserScheduler_LstmCell_RunFusion_CpuOnly(
auto heuristic_params = SchedulerEntry::scheduleWith(
&fusion, SchedulerType::PointWise, c10::ArrayRef<c10::IValue>(inputs));

KernelExecutor executor;
executor.setExecuteKernelFlag(false);
executor.compileFusion(&fusion, inputs);
KernelExecutor ke;
ke.setExecuteKernelFlag(false);
ke.compileFusion(&fusion, inputs);

for (auto _ : benchmark_state) {
outputs = executor.runFusion(
outputs = ke.runFusion(
c10::ArrayRef<c10::IValue>(inputs), heuristic_params->lparams);
}
}
Expand Down

0 comments on commit 14993fa

Please sign in to comment.