Skip to content

Commit

Permalink
[Feature](mluOpExecFFT): add perf border for cooleytuky and stockham …
Browse files Browse the repository at this point in the history
…kernel. (#1196)
  • Loading branch information
DanieeelLiu authored Jan 17, 2025
1 parent b2f930a commit 48638a4
Show file tree
Hide file tree
Showing 3 changed files with 30 additions and 14 deletions.
32 changes: 23 additions & 9 deletions kernels/fft/common/fft_basic_ops.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,10 @@

#include "fft_basic_ops.h"

#ifndef FFT_STOCK_BATCH_LIMIT
#define FFT_STOCK_BATCH_LIMIT 512
#endif

bool fftIsIntDtype(const mluOpDataType_t dtype) {
if (dtype == MLUOP_DTYPE_INT8 || dtype == MLUOP_DTYPE_INT16 ||
dtype == MLUOP_DTYPE_INT31) {
Expand Down Expand Up @@ -956,16 +960,26 @@ static bool findCooleyTukey(mluOpHandle_t handle, int &L, int &m, int &s) {
}

// Find the most suitable parameters for Cooley-Tukey or Stockham algorithm.
int findFFTOptLimit(mluOpHandle_t handle, const int n, int &m, int &L, int &s,
int &L_sub, bool &find_stockham) {
int findFFTOptLimit(mluOpHandle_t handle, const int n, const int batch, int &m,
int &L, int &s, int &L_sub, bool &find_stockham) {
initBasicParam(n, L, m);

int flag;
flag = findStockham(handle, L, m, L_sub, find_stockham);
if (flag) {
return 0;
int flag = 0;
int flag_stockham;
int flag_cooley_tukey;
flag_stockham = findStockham(handle, L, m, L_sub, find_stockham);
if (flag_stockham && batch > FFT_STOCK_BATCH_LIMIT &&
L > 30 * std::pow(2, m)) {
// FFT_STOCK_BATCH_LIMIT & L > 30 * 2^m : Numerical
// values derived from testing experience
flag_cooley_tukey = findCooleyTukey(handle, L, m, s);
// try Cooley-Tukey algo, which may has better performace
if (flag_cooley_tukey) {
flag = 1; // Cooley-Tukey algo has better performance
}
}
if (!flag_stockham) { // if cannot deal by Stockham algo, try Cooley-Tukey
// algo
flag = findCooleyTukey(handle, L, m, s);
}

flag = findCooleyTukey(handle, L, m, s);
return flag;
}
4 changes: 2 additions & 2 deletions kernels/fft/common/fft_basic_ops.h
Original file line number Diff line number Diff line change
Expand Up @@ -107,6 +107,6 @@ mluOpStatus_t fftOptensor(mluOpHandle_t handle, int elem_num, void *in1_ptr,
cnnlOpTensorDesc_t op_type, void *workspace,
size_t workspace_size, const std::string api);

int findFFTOptLimit(mluOpHandle_t handle, const int n, int &m, int &L, int &s,
int &L_sub, bool &find_stockham);
int findFFTOptLimit(mluOpHandle_t handle, const int n, const int batch, int &m,
int &L, int &s, int &L_sub, bool &find_stockham);
#endif // KERNELS_FFT_COMMON_FFT_BASIC_OPS_H_
8 changes: 5 additions & 3 deletions kernels/fft/fft.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -56,9 +56,11 @@ mluOpStatus_t selectFFTStrategy(mluOpHandle_t handle, mluOpFFTPlan_t fft_plan,
// strategy_status: 0 means select MLUOP_FUNC_STOCKHAM, 1 means selelct
// COOLEY_TUKEY,
// -1 means still select CNFFT_FUNC_MATMUL.
int strategy_status =
findFFTOptLimit(handle, fft_plan->n[0], fft_plan->m, fft_plan->L,
fft_plan->s, fft_plan->L_sub, find_stockham);
VLOG(5) << "signal_length: " << fft_plan->n[0];
VLOG(5) << "batch: " << fft_plan->batch;
int strategy_status = findFFTOptLimit(
handle, fft_plan->n[0], fft_plan->batch, fft_plan->m, fft_plan->L,
fft_plan->s, fft_plan->L_sub, find_stockham);
if (strategy_status == 1) {
fft_plan->fft_strategy = CNFFT_FUNC_COOLEY_TUKEY;
} else if (strategy_status == 0) {
Expand Down

0 comments on commit 48638a4

Please sign in to comment.