-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmain.cpp
395 lines (296 loc) · 10.9 KB
/
main.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
// OpenCL ray tracing tutorial by Sam Lapere, 2016
// http://raytracey.blogspot.com
#include <iostream>
#include <fstream>
#include <vector>
#include "gl_interop.h"
#include "camera.h"
#include "geometry.h"
#include <CL/opencl.hpp>
#include "user_interaction.h"
#include <GL/glx.h>
#include <random>
// TODO
// cleanup()
// check for cl-gl interop
using namespace std;
using namespace cl;
const int sphere_count = 9;
std::random_device dev;
std::mt19937 rng(dev());
std::uniform_int_distribution<std::mt19937::result_type> dist(1,1000000000000); // distribution in range [1, 6]
// OpenCL objects
Device device;
CommandQueue queue;
Kernel kernel;
Context context;
Program program;
Buffer cl_output;
Buffer cl_spheres;
Buffer cl_camera;
Buffer cl_accumbuffer;
BufferGL cl_vbo;
cl::vector<Memory> cl_vbos;
// image buffer (not needed with real-time viewport)
cl_float4* cpu_output;
cl_int err;
unsigned int framenumber = 0;
// padding with dummy variables are required for memory alignment
// float3 is considered as float4 by OpenCL
// alignment can also be enforced by using __attribute__ ((aligned (16)));
// see https://www.khronos.org/registry/cl/sdk/1.0/docs/man/xhtml/attributes-variables.html
Camera* hostRendercam = NULL;
Sphere cpu_spheres[sphere_count];
void pickPlatform(Platform& platform, const cl::vector<Platform>& platforms){
if (platforms.size() == 1) platform = platforms[0];
else{
int input = 0;
cout << "\nChoose an OpenCL platform: ";
cin >> input;
// handle incorrect user input
while (input < 1 || input > platforms.size()){
cin.clear(); //clear errors/bad flags on cin
cin.ignore(cin.rdbuf()->in_avail(), '\n'); // ignores exact number of chars in cin buffer
cout << "No such option. Choose an OpenCL platform: ";
cin >> input;
}
platform = platforms[input - 1];
}
}
void pickDevice(Device& device, const cl::vector<Device>& devices){
if (devices.size() == 1) device = devices[0];
else{
int input = 0;
cout << "\nChoose an OpenCL device: ";
cin >> input;
// handle incorrect user input
while (input < 1 || input > devices.size()){
cin.clear(); //clear errors/bad flags on cin
cin.ignore(cin.rdbuf()->in_avail(), '\n'); // ignores exact number of chars in cin buffer
cout << "No such option. Choose an OpenCL device: ";
cin >> input;
}
device = devices[input - 1];
}
}
void printErrorLog(const Program& program, const Device& device){
// Get the error log and print to console
string buildlog = program.getBuildInfo<CL_PROGRAM_BUILD_LOG>(device);
cerr << "Build log:" << std::endl << buildlog << std::endl;
// Print the error log to a file
FILE *log = fopen("errorlog.txt", "w");
fprintf(log, "%s\n", buildlog.c_str());
cout << "Error log saved in 'errorlog.txt'" << endl;
system("PAUSE");
exit(1);
}
void initOpenCL()
{
// Get all available OpenCL platforms (e.g. AMD OpenCL, Nvidia CUDA, Intel OpenCL)
cl::vector<Platform> platforms;
Platform::get(&platforms);
cout << "Available OpenCL platforms : " << endl << endl;
for (int i = 0; i < platforms.size(); i++)
cout << "\t" << i + 1 << ": " << platforms[i].getInfo<CL_PLATFORM_NAME>() << endl;
cout << endl << "WARNING: " << endl << endl;
cout << "OpenCL-OpenGL interoperability is only tested " << endl;
cout << "on discrete GPUs from Nvidia and AMD" << endl;
cout << "Other devices (such as Intel integrated GPUs) may fail" << endl << endl;
// Pick one platform
Platform platform;
pickPlatform(platform, platforms);
cout << "\nUsing OpenCL platform: \t" << platform.getInfo<CL_PLATFORM_NAME>() << endl;
// Get available OpenCL devices on platform
cl::vector<Device> devices;
platform.getDevices(CL_DEVICE_TYPE_GPU, &devices);
cout << "Available OpenCL devices on this platform: " << endl << endl;
for (int i = 0; i < devices.size(); i++){
cout << "\t" << i + 1 << ": " << devices[i].getInfo<CL_DEVICE_NAME>() << endl;
cout << "\t\tMax compute units: " << devices[i].getInfo<CL_DEVICE_MAX_COMPUTE_UNITS>() << endl;
cout << "\t\tMax work group size: " << devices[i].getInfo<CL_DEVICE_MAX_WORK_GROUP_SIZE>() << endl << endl;
}
// Pick one device
//Device device;
pickDevice(device, devices);
cout << "\nUsing OpenCL device: \t" << device.getInfo<CL_DEVICE_NAME>() << endl;
// Create an OpenCL context on that device.
// Windows specific OpenCL-OpenGL interop
// cl_context_properties properties[] =
// {
// CL_GL_CONTEXT_KHR, (cl_context_properties)wglGetCurrentContext(),
// CL_WGL_HDC_KHR, (cl_context_properties)wglGetCurrentDC(),
// CL_CONTEXT_PLATFORM, (cl_context_properties)platform(),
// 0
// };
cl_context_properties properties_linux[] =
{
CL_GL_CONTEXT_KHR, (cl_context_properties)glXGetCurrentContext(),
CL_GLX_DISPLAY_KHR, (cl_context_properties)glXGetCurrentDisplay(),
CL_CONTEXT_PLATFORM, (cl_context_properties)platform(),
0
};
context = Context(device, properties_linux);
// Create a command queue
queue = CommandQueue(context, device);
// Convert the OpenCL source code to a string// Convert the OpenCL source code to a string
string source;
ifstream file("opencl_kernel.cl");
if (!file){
cout << "\nNo OpenCL file found!" << endl << "Exiting..." << endl;
system("PAUSE");
exit(1);
}
while (!file.eof()){
char line[256];
file.getline(line, 255);
source += line;
}
const char* kernel_source = source.c_str();
// Create an OpenCL program with source
program = Program(context, kernel_source);
// Build the program for the selected device
cl_int result = program.build({ device }); // "-cl-fast-relaxed-math"
if (result) cout << "Error during compilation OpenCL code!!!\n (" << result << ")" << endl;
if (result == CL_BUILD_PROGRAM_FAILURE) printErrorLog(program, device);
}
#define float3(x, y, z) {{x, y, z}} // macro to replace ugly initializer braces
void initScene(Sphere* cpu_spheres){
// floor
cpu_spheres[0].radius = 200.0f;
cpu_spheres[0].position = Vector3Df(0.0f, -200.4f, 0.0f);
cpu_spheres[0].color = Vector3Df(0.9f, 0.3f, 0.0f);
cpu_spheres[0].emission = Vector3Df(0.0f, 0.0f, 0.0f);
// left sphere
cpu_spheres[1].radius = 0.16f;
cpu_spheres[1].position = Vector3Df(-0.25f, -0.24f, -0.1f);
cpu_spheres[1].color = Vector3Df(0.9f, 0.8f, 0.7f);
cpu_spheres[1].emission = Vector3Df(0.0f, 0.0f, 0.0f);
// right sphere
cpu_spheres[2].radius = 0.16f;
cpu_spheres[2].position = Vector3Df(0.25f, -0.24f, 0.1f);
cpu_spheres[2].color = Vector3Df(0.9f, 0.8f, 0.7f);
cpu_spheres[2].emission = Vector3Df(0.0f, 0.0f, 0.0f);
// lightsource
cpu_spheres[3].radius = 1.0f;
cpu_spheres[3].position = Vector3Df(0.0f, 1.36f, 0.0f);
cpu_spheres[3].color = Vector3Df(0.0f, 0.0f, 0.0f);
cpu_spheres[3].emission = Vector3Df(9.0f, 8.0f, 6.0f);
}
unsigned int WangHash(unsigned int a) {
a = (a ^ 61) ^ (a >> 16);
a = a + (a << 3);
a = a ^ (a >> 4);
a = a * 0x27d4eb2d;
a = a ^ (a >> 15);
return a;
}
void initCLKernel(){
// pick a rendermode
unsigned int rendermode = 1;
// Create a kernel (entry point in the OpenCL source program)
kernel = Kernel(program, "render_kernel");
// specify OpenCL kernel arguments
//kernel.setArg(0, cl_output);
kernel.setArg(0, cl_spheres);
kernel.setArg(1, window_width);
kernel.setArg(2, window_height);
kernel.setArg(3, sphere_count);
kernel.setArg(4, cl_vbo);
kernel.setArg(5, framenumber);
kernel.setArg(6, cl_camera);
kernel.setArg(7, rand());
kernel.setArg(8, rand());
kernel.setArg(9, cl_accumbuffer);
kernel.setArg(10, WangHash(framenumber));
kernel.setArg(11, randomNumbers);
}
void runKernel(){
// every pixel in the image has its own thread or "work item",
// so the total amount of work items equals the number of pixels
std::size_t global_work_size = window_width * window_height;
std::size_t local_work_size = kernel.getWorkGroupInfo<CL_KERNEL_WORK_GROUP_SIZE>(device);;
// Ensure the global work size is a multiple of local work size
if (global_work_size % local_work_size != 0)
global_work_size = (global_work_size / local_work_size + 1) * local_work_size;
//Make sure OpenGL is done using the VBOs
glFinish();
//this passes in the vector of VBO buffer objects
queue.enqueueAcquireGLObjects(&cl_vbos);
queue.finish();
// launch the kernel
queue.enqueueNDRangeKernel(kernel, NULL, global_work_size, local_work_size); // local_work_size
queue.finish();
//Release the VBOs so OpenGL can play with them
queue.enqueueReleaseGLObjects(&cl_vbos);
queue.finish();
}
void render(){
queue.enqueueWriteBuffer(cl_spheres, CL_TRUE, 0, sphere_count * sizeof(Sphere), cpu_spheres);
if (buffer_reset){
float arg = 0;
queue.enqueueFillBuffer(cl_accumbuffer, arg, 0, window_width * window_height * sizeof(cl_float3));
framenumber = 0;
}
buffer_reset = false;
framenumber++;
// build a new camera for each frame on the CPU
interactiveCamera->buildRenderCamera(hostRendercam);
// copy the host camera to a OpenCL camera
queue.enqueueWriteBuffer(cl_camera, CL_TRUE, 0, sizeof(Camera), hostRendercam);
queue.finish();
randomNumbers= {dist(rng),dist(rng),dist(rng),dist(rng),dist(rng),dist(rng),dist(rng),dist(rng),dist(rng),dist(rng)};
kernel.setArg(5, framenumber);
kernel.setArg(6, cl_camera);
kernel.setArg(7, dist(rng));
kernel.setArg(8, dist(rng));
kernel.setArg(10, WangHash(framenumber));
runKernel();
drawGL();
}
void cleanUp(){
// delete cpu_output;
}
void initCamera()
{
delete interactiveCamera;
interactiveCamera = new InteractiveCamera();
interactiveCamera->setResolution(window_width, window_height);
interactiveCamera->setFOVX(45);
}
int main(int argc, char** argv){
// initialise OpenGL (GLEW and GLUT window + callback functions)
initGL(argc, argv);
cout << "OpenGL initialized \n";
// initialise OpenCL
initOpenCL();
// create vertex buffer object
createVBO(&vbo);
// call Timer():
Timer(0);
//make sure OpenGL is finished before we proceed
glFinish();
// initialise scene
initScene(cpu_spheres);
cl_spheres = Buffer(context, CL_MEM_READ_ONLY, sphere_count * sizeof(Sphere));
queue.enqueueWriteBuffer(cl_spheres, CL_TRUE, 0, sphere_count * sizeof(Sphere), cpu_spheres);
// initialise an interactive camera on the CPU side
initCamera();
// create a CPU camera
hostRendercam = new Camera();
interactiveCamera->buildRenderCamera(hostRendercam);
cl_camera = Buffer(context, CL_MEM_READ_ONLY, sizeof(Camera));
queue.enqueueWriteBuffer(cl_camera, CL_TRUE, 0, sizeof(Camera), hostRendercam);
// create OpenCL buffer from OpenGL vertex buffer object
cl_vbo = BufferGL(context, CL_MEM_WRITE_ONLY, vbo);
cl_vbos.push_back(cl_vbo);
// reserve memory buffer on OpenCL device to hold image buffer for accumulated samples
cl_accumbuffer = Buffer(context, CL_MEM_WRITE_ONLY, window_width * window_height * sizeof(cl_float3));
// intitialise the kernel
initCLKernel();
// start rendering continuously
glutMainLoop();
// release memory
cleanUp();
system("PAUSE");
return 0;
}