Skip to content

Commit

Permalink
test ldst
Browse files Browse the repository at this point in the history
  • Loading branch information
clamchowder committed Dec 12, 2024
1 parent d1786ed commit 38b52d4
Show file tree
Hide file tree
Showing 3 changed files with 50 additions and 2 deletions.
28 changes: 28 additions & 0 deletions GpuMemLatency/kernels/ldst_bw_test.cl
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
#define ldst_bw_test_size 1024
// test load/store bandwidth with a small test size that should fit in L1
__kernel void ldst_bw_test(__global float* A, uint count, __global float* ret) {
int threadId = get_global_id(0);
int localId = get_local_id(0);
int localSize = get_local_size(0);
int groupId = get_group_id(0);
float acc1 = 1.1;
float acc2 = 2.2;
float acc3 = 3.3;
float acc4 = 4.4;

// assumes local memory size is at least 1024 float4s
int idx0 = localId;
int idx1 = localId + localSize;
int idx2 = localId + localSize * 2;
for (int i = 0; i < count; i += 12) {
acc1 += A[idx0] * A[idx1] + A[idx2];
acc2 += A[idx0 + 1] * A[idx1 + 1] + A[idx2 + 1];
acc3 += A[idx0 + 2] * A[idx1 + 2] + A[idx2 + 2];
acc4 += A[idx0 + 3] * A[idx1 + 3] + A[idx2 + 3];
idx0 = (idx0 + localSize) & 0x3FF;
idx1 = (idx1 + localSize) & 0x3FF;
idx2 = (idx2 + localSize) & 0x3FF;
}

ret[threadId] = acc1 + acc2 + acc3 + acc4;
}
21 changes: 19 additions & 2 deletions GpuMemLatency/opencltest.c
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@ enum TestType {
LocalMemChaseBandwidth,
LocalMem64Bandwidth,
LocalMemFloat4Bandwidth,
LoadStoreBandwidth,
TextureThroughput,
BufferBandwidth,
MemBandwidthWorkgroupScaling,
Expand Down Expand Up @@ -150,7 +151,7 @@ int main(int argc, char* argv[]) {
testType = VectorMemLatency;
fprintf(stderr, "Testing global memory latency, vector accesses\n");
}
if (_strnicmp(argv[argIdx], "scalarlatency", 13) == 0) {
else if (_strnicmp(argv[argIdx], "scalarlatency", 13) == 0) {
testType = ScalarMemLatency;
fprintf(stderr, "Testing global memory latency, scalar accesses\n");
}
Expand Down Expand Up @@ -221,6 +222,10 @@ int main(int argc, char* argv[]) {
testType = BufferBandwidth;
fprintf(stderr, "Testing buffer bandwidth\n");
}
else if (_strnicmp(argv[argIdx], "ldstbw", 6) == 0) {
testType = LoadStoreBandwidth;
fprintf(stderr, "Testing load/store bandwidth\n");
}
else if (_strnicmp(argv[argIdx], "scaling", 7) == 0)
{
testType = MemBandwidthWorkgroupScaling;
Expand Down Expand Up @@ -552,11 +557,12 @@ int main(int argc, char* argv[]) {
else if (testType == LocalMemBandwidth ||
testType == LocalMem64Bandwidth ||
testType == BufferBandwidth ||
testType == LoadStoreBandwidth ||
testType == TextureThroughput ||
testType == LocalMemFloat4Bandwidth)
{
cl_program prog;
cl_kernel local_bw_kernel = NULL, local_64_bw_kernel = NULL, local_float4_bw_kernel = NULL, buffer_bw_kernel = NULL, tex_bw_kernel = NULL;
cl_kernel local_bw_kernel = NULL, local_64_bw_kernel = NULL, local_float4_bw_kernel = NULL, buffer_bw_kernel = NULL, tex_bw_kernel = NULL, loadstore_bw_kernel = NULL;
if (testType == LocalMemBandwidth)
{
prog = build_program(context, "local_bw_test.cl", NULL);
Expand All @@ -578,6 +584,12 @@ int main(int argc, char* argv[]) {
buffer_bw_kernel = clCreateKernel(prog, "buffer_bw_test", &ret);
if (saveprogram) write_program(prog, "buffer_bw_test");
}
else if (testType == LoadStoreBandwidth)
{
prog = build_program(context, "ldst_bw_test.cl", NULL);
loadstore_bw_kernel = clCreateKernel(prog, "ldst_bw_test", &ret);
if (saveprogram) write_program(prog, "ldst_bw_test");
}
else { // tex throughput
prog = build_program(context, "tex_bw_test.cl", NULL);
tex_bw_kernel = clCreateKernel(prog, "tex_bw_test", &ret);
Expand Down Expand Up @@ -610,6 +622,11 @@ int main(int argc, char* argv[]) {
fprintf(stderr, "Testing buffer bw\n");
result = buffer_bw_test(context, command_queue, buffer_bw_kernel, thread_count, local_size, chase_iterations, &elapsed_ms);
}
else if (testType == LoadStoreBandwidth)
{
fprintf(stderr, "Testing global load bandwidth\n");
result = local_bw_test(context, command_queue, loadstore_bw_kernel, thread_count, local_size, chase_iterations, &elapsed_ms);
}
else if (testType == TextureThroughput)
{
fprintf(stderr, "Testing texture throughput\n");
Expand Down
3 changes: 3 additions & 0 deletions GpuMemLatency/opencltest.vcxproj
Original file line number Diff line number Diff line change
Expand Up @@ -208,6 +208,9 @@
<CopyFileToFolders Include="kernels\local_bw_test.cl">
<FileType>Document</FileType>
</CopyFileToFolders>
<CopyFileToFolders Include="kernels\ldst_bw_test.cl">
<FileType>Document</FileType>
</CopyFileToFolders>
<CopyFileToFolders Include="kernels\local_float4_bw_test.cl">
<FileType>Document</FileType>
</CopyFileToFolders>
Expand Down

0 comments on commit 38b52d4

Please sign in to comment.