Skip to content

Commit

Permalink
aaaaa
Browse files Browse the repository at this point in the history
  • Loading branch information
clamchowder committed Feb 24, 2024
1 parent 5a35bfc commit 76fc858
Show file tree
Hide file tree
Showing 2 changed files with 29 additions and 8 deletions.
23 changes: 18 additions & 5 deletions GpuMemLatency/instruction_rate.c
Original file line number Diff line number Diff line change
Expand Up @@ -335,7 +335,7 @@ float run_divergence_rate_test(cl_context context,
{
size_t global_item_size = thread_count;
size_t local_item_size = local_size;
uint32_t actual_threads = thread_count;
uint32_t active_threads = thread_count;
cl_int ret;
float totalOps, gOpsPerSec;
uint64_t time_diff_ms = 0;
Expand All @@ -348,8 +348,9 @@ float run_divergence_rate_test(cl_context context,
float* A = (float*)malloc(sizeof(float) * thread_count);
memset(result, 0, sizeof(float) * thread_count);

if (partitionPattern != NULL) actual_threads = 0;
if (partitionPattern != NULL) active_threads = 0;

fprintf(stderr, "\n");
for (int i = 0; i < thread_count; i++)
{
if (partitionPattern == NULL) {
Expand All @@ -361,12 +362,24 @@ float run_divergence_rate_test(cl_context context,
{
if (partitionPattern[(i / wave)]) {
A[i] = 0.2f;
actual_threads++;
fprintf(stderr, "a ");
active_threads++;
}
else
{
fprintf(stderr, "_ ");
A[i] = 1.2f;
}

if ((i + 1) % wave == 0)
{
fprintf(stderr, "\n");
}
else A[i] = 1.2f;
}
}

fprintf(stderr, "\nActive threads: %d\n", active_threads);

cl_mem a_mem_obj = clCreateBuffer(context, CL_MEM_READ_ONLY, thread_count * sizeof(float), NULL, &ret);
cl_mem result_obj = clCreateBuffer(context, CL_MEM_READ_WRITE, thread_count * sizeof(float), NULL, &ret);
ret = clEnqueueWriteBuffer(command_queue, a_mem_obj, CL_TRUE, 0, thread_count * sizeof(float), A, 0, NULL, NULL);
Expand Down Expand Up @@ -397,7 +410,7 @@ float run_divergence_rate_test(cl_context context,

time_diff_ms = end_timing();

totalOps = (float)chase_iterations * 8 * (float)actual_threads;
totalOps = (float)chase_iterations * 8 * (float)active_threads;
gOpsPerSec = ((float)totalOps / 1e9) / ((float)time_diff_ms / 1000);
//fprintf(stderr, "chase iterations: %d, thread count: %d\n", chase_iterations, thread_count);
//fprintf(stderr, "total ops: %f (%.2f G)\ntotal time: %llu ms\n", totalOps, totalOps / 1e9, time_diff_ms);
Expand Down
14 changes: 11 additions & 3 deletions GpuMemLatency/opencltest.c
Original file line number Diff line number Diff line change
Expand Up @@ -690,11 +690,19 @@ int main(int argc, char* argv[]) {
}
else if (testType == Partition)
{
int pattern[] = { 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0 };
int pattern4[] = { 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0 };

// function and its associated kernel serve two purposes
float result = run_divergence_rate_test(context, command_queue, thread_count, local_size, wave, pattern);
printf("Throughput: %f\n", result);
float result = run_divergence_rate_test(context, command_queue, thread_count, local_size, wave, pattern4);
printf("Throughput (mod 4): %f\n", result);

int pattern2[] = { 1, 0, 1, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0 };
result = run_divergence_rate_test(context, command_queue, thread_count, local_size, wave, pattern2);
printf("Throughput (mod 2): %f\n", result);

int consec_pattern[] = { 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 };
result = run_divergence_rate_test(context, command_queue, thread_count, local_size, wave, consec_pattern);
printf("Throughput (x4): %f\n", result);
}

//printf("If you didn't run this through cmd, now you can copy the results. And press ctrl+c to close");
Expand Down

0 comments on commit 76fc858

Please sign in to comment.