forked from Themaister/GLFFT
-
Notifications
You must be signed in to change notification settings - Fork 3
/
glfft.hpp
240 lines (213 loc) · 12 KB
/
glfft.hpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
/* Copyright (C) 2015 Hans-Kristian Arntzen <[email protected]>
*
* Permission is hereby granted, free of charge,
* to any person obtaining a copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation the rights to
* use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software,
* and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED,
* INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
#ifndef GLFFT_HPP__
#define GLFFT_HPP__
#include "glfft_interface.hpp"
#include "glfft_common.hpp"
#include "glfft_wisdom.hpp"
#include <vector>
#include <unordered_map>
#include <limits>
/// GLFFT doesn't try to preserve GL state in any way.
/// E.g. SHADER_STORAGE_BUFFER bindings, programs bound, texture bindings, etc.
/// Applications calling this library must expect that some GL state will be modified.
/// No rendering state associated with graphics will be modified.
namespace GLFFT
{
class FFT
{
public:
/// @brief Creates a full FFT.
///
/// All buffer allocation done by GLFFT will be done in constructor.
/// Will throw if invalid parameters are passed.
///
/// @param context The graphics context.
/// @param Nx Number of samples in horizontal dimension.
/// @param Ny Number of samples in vertical dimension.
/// @param type The transform type.
/// @param direction Forward, inverse or inverse with convolution.
/// For real-to-complex and complex-to-real transforms, the
/// transform type must match.
/// @param input_target GL object type of input target. For real-to-complex with texture as input, ImageReal is used.
/// @param output_target GL object type of output target. For complex-to-real with texture as output, ImageReal is used.
/// @param cache A program cache for caching the GLFFT programs created.
/// @param options FFT options such as performance related parameters and types.
/// @param wisdom GLFFT wisdom which can override performance related options
/// (options.performance is used as a fallback).
/// @param input_load_texture_code
/// Custom code for sampling the input texture can be inserted here.
/// This must only use a single line and must define a function with signature
/// "cfloat load_texture(uvec2 coord)" and can call "cfloat load_texture_inner(uvec2 coord)".
/// @param reuse_preallocated_temporary_buffer0
/// For large FFTs also a large internal temporary buffer is required. To reduce memory consumption
/// you can provide a preallocated buffer here that can be shared with other parts of the program.
/// The buffer must have size at least Nx * Ny * (type == ComplexToComplexDual ? 4 : 2) * (options.type.fp16 ? 2 : 4).
/// The provided buffer must not be used while the FFT is in progress and will contain unpredictable garbage data afterwards.
/// @param reuse_preallocated_temporary_buffer1
/// Same as reuse_preallocated_temporary_buffer0 and used only if the output is a texture.
/// May be aliased with the input if the input if the input is not needed again after processing.
FFT(Context *context, unsigned Nx, unsigned Ny,
Type type, Direction direction, Target input_target, Target output_target,
std::shared_ptr<ProgramCache> cache, const FFTOptions &options,
const FFTWisdom &wisdom = FFTWisdom(),
std::string input_load_texture_code = input_load_texture_code_default,
std::unique_ptr<Buffer> reuse_preallocated_temporary_buffer0 = nullptr,
std::unique_ptr<Buffer> reuse_preallocated_temporary_buffer1 = nullptr);
/// @brief Creates a single stage FFT. Used mostly internally for benchmarking partial FFTs.
///
/// All buffer allocation done by GLFFT will be done in constructor.
/// Will throw if invalid parameters are passed.
///
/// @param context The graphics context.
/// @param Nx Number of samples in horizontal dimension.
/// @param Ny Number of samples in vertical dimension.
/// @param radix FFT radix to test.
/// @param p Accumulated p factor. If 1, "first pass" mode is tested, otherwise, generic FFT stages.
/// @param mode The transform mode.
/// @param input_target GL object type of input target. For real-to-complex with texture as input, ImageReal is used.
/// @param output_target GL object type of output target. For complex-to-real with texture as output, ImageReal is used.
/// @param cache A program cache for caching the GLFFT programs created.
/// @param options FFT options such as performance related parameters and types.
FFT(Context *context, unsigned Nx, unsigned Ny, unsigned radix, unsigned p,
Mode mode, Target input_target, Target output_target,
std::shared_ptr<ProgramCache> cache, const FFTOptions &options);
/// @brief Process the FFT.
///
/// The type of object passed here must match what FFT was initialized with.
///
/// @param cmd Command buffer for issuing dispatch commands.
/// @param output Output buffer or image.
/// NOTE: For images, the texture must be using immutable storage, i.e. glTexStorage2D!
/// @param input Input buffer or texture.
/// @param input_aux If using convolution transform type,
/// the content of input and input_aux will be multiplied together.
void process(CommandBuffer *cmd, Resource *output, Resource *input, Resource *input_aux = nullptr);
/// @brief Run process() multiple times, timing the results.
///
/// Mostly used internally by GLFFT wisdom, glfft_cli's bench, and so on.
///
/// @param context The graphics context.
/// @param output Output buffer or image.
/// NOTE: For images, the texture must be using immutable storage, i.e. glTexStorage2D!
/// @param input Input buffer or texture.
/// @param warmup_iterations Number of iterations to run to "warm" up GL, ensures we don't hit
/// recompilations or similar when benching.
/// @param iterations Number of iterations to run the benchmark.
/// Each iteration will ensure timing with a glFinish() followed by timing.
/// @param dispatches_per_iteration Number of calls to process() we should do per iteration.
/// @param max_time The max time the benchmark should run. Will be checked after each iteration is complete.
///
/// @returns Average GPU time per process() call.
double bench(Context *context, Resource *output, Resource *input,
unsigned warmup_iterations, unsigned iterations, unsigned dispatches_per_iteration,
double max_time = std::numeric_limits<double>::max());
/// @brief Returns cost for a process() call. Only used for debugging.
double get_cost() const { return cost; }
/// @brief Returns number of passes (glDispatchCompute) in a process() call.
size_t get_num_passes() const { return passes.size(); }
/// @brief Returns Nx.
size_t get_dimension_x() const { return size_x; }
/// @brief Returns Ny.
size_t get_dimension_y() const { return size_y; }
/// @brief Sets offset and scale parameters for normalized texel coordinates when sampling textures.
///
/// By default, these values are 0.5 / size (samples in the center of texel (0, 0)).
/// Scale is 1.0 / size, so it steps one texel for each coordinate in the FFT transform.
/// Setting this to something custom is useful to get downsampling with GL_LINEAR -> FFT transform
/// without having to downsample the texture first, then FFT.
void set_texture_offset_scale(float offset_x, float offset_y, float scale_x, float scale_y)
{
texture.offset_x = offset_x;
texture.offset_y = offset_y;
texture.scale_x = scale_x;
texture.scale_y = scale_y;
}
/// @brief Set binding range for input.
///
/// If input is an SSBO, set a custom binding range to be passed to glBindBufferRange.
/// By default, the entire buffer is bound.
void set_input_buffer_range(size_t offset, size_t size)
{
ssbo.input.offset = offset;
ssbo.input.size = size;
}
/// @brief Set binding range for input_aux.
///
/// If input_aux is an SSBO, set a custom binding range to be passed to glBindBufferRange.
/// By default, the entire buffer is bound.
void set_input_aux_buffer_range(size_t offset, size_t size)
{
ssbo.input_aux.offset = offset;
ssbo.input_aux.size = size;
}
/// @brief Set binding range for output.
///
/// If output buffer is an SSBO, set a custom binding range to be passed to glBindBufferRange.
/// By default, the entire buffer is bound.
void set_output_buffer_range(size_t offset, size_t size)
{
ssbo.output.offset = offset;
ssbo.output.size = size;
}
/// @brief Set samplers for input textures.
///
/// Set sampler objects to be used for input and input_aux if textures are used as input.
/// By default, sampler object 0 will be used (inheriting sampler parameters from the texture object itself).
void set_samplers(Sampler *sampler0, Sampler *sampler1 = nullptr)
{
texture.samplers[0] = sampler0;
texture.samplers[1] = sampler1;
}
private:
Context *context;
struct Pass
{
Parameters parameters;
unsigned workgroups_x;
unsigned workgroups_y;
unsigned uv_scale_x;
unsigned stride;
Program *program;
};
double cost = 0.0;
std::unique_ptr<Buffer> temp_buffer;
std::unique_ptr<Buffer> temp_buffer_image;
std::vector<Pass> passes;
std::shared_ptr<ProgramCache> cache;
std::unique_ptr<Program> build_program(const Parameters ¶ms);
static std::string load_shader_string(const char *path);
static void store_shader_string(const char *path, const std::string &source);
Program* get_program(const Parameters ¶ms);
struct
{
float offset_x = 0.0f, offset_y = 0.0f, scale_x = 1.0f, scale_y = 1.0f;
Sampler *samplers[2] = { nullptr, nullptr };
} texture;
struct
{
struct
{
size_t offset = 0;
size_t size = 0;
} input, input_aux, output;
} ssbo;
unsigned size_x, size_y;
};
}
#endif