forked from emilk/ram_bench
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathlist_traversal.c
104 lines (83 loc) · 2.95 KB
/
list_traversal.c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
// Created by Emil Ernerfeldt on 2014-04-17.
#include <stdio.h>
#include <stdint.h>
#include <stdlib.h>
#include <time.h>
#include <math.h>
typedef uint64_t Int;
typedef struct Node Node;
struct Node {
Int payload; // To make the node dirty
Node* next;
};
void random_shuffle(Node** list, Int N)
{
for (Int i=0; i<N-1; ++i) {
Int swap_ix = i + rand() % (N-i);
Node* tmp = list[swap_ix];
list[swap_ix] = list[i];
list[i] = tmp;
}
}
// Returns nanoseconds per element.
double bench(Int N, Int iters) {
// Allocate all the memory continuously so we aren't affected by the particulars of the system allocator:
Node* memory = (Node*)malloc(N * sizeof(Node));
// Initialize the node pointers:
Node** nodes = (Node**)malloc(N * sizeof(Node*));
for (Int i=0; i<N; ++i) {
nodes[i] = &memory[i];
}
// Randomize so emulate a list that has been shuffled around a bit.
// This is so that each memory acces is a *random* memory access.
// This is the worst case scenario for a linked list, which is exactly what we want to measure.
// Without the random_shuffle we get O(N) because it enables the pre-fetcher to do its job.
// Without a prefetcher, a random memory access in N bytes of RAM costs O(N^0.5) due to the Bekenstein bound.
// This means we get O(N^1.5) complexity of a linked list traversal.
random_shuffle(nodes, N);
// Link up the nodes:
for (Int i=0; i<N-1; ++i) {
nodes[i]->next = nodes[i+1];
}
nodes[N-1]->next = NULL;
Node* start_node = nodes[0];
free(nodes); // Free up unused memory before meassuring:
// Do the actual measurements:
Int start = clock();
for (Int it=0; it<iters; ++it) {
// Run through all the nodes:
Node* node = start_node;
while (node) {
node->payload = 300;
node = node->next;
}
}
Int dur = clock() - start;
double ns = 1e9 * dur / CLOCKS_PER_SEC;
return ns / (N * iters);
}
int main(int argc, const char * argv[])
{
// Outputs data in gnuplot friendly .data format
printf("#bytes ns/elem\n");
long long unsigned GB = 1024*1024*1024;
long long unsigned int RAM_SIZE = GB*4;
//printf("The max memory to be used is %llu", RAM_SIZE);
Int stopsPerFactor = 4; // For every power of 2, how many measurements do we do?
Int minElemensFactor = 6; // First measurement is 2^this number of elements.
Int maxElemsFactor = 30; // Last measurement is 2^this number of elements. 30 == 16GB of memory
//Int elemsPerMeasure = Int(1) << 28; // measure enough times to process this many elements (to get a good average)
Int min = stopsPerFactor * minElemensFactor;
Int max = stopsPerFactor * maxElemsFactor;
for (Int ei=min; ei<=max; ++ei) {
Int N = (Int)floor(pow(2.0, (double)ei / stopsPerFactor) + 0.5);
//Int reps = elemsPerMeasure / N;
Int reps = (Int)floor(1e9 / pow(N, 1.5) + 0.5);
if (reps<1) reps = 1;
if (N >= RAM_SIZE/ sizeof(Node)) {
continue;
}
double ans = bench(N, reps);
printf("%llu %f # (N=%llu, reps=%llu) %llu/%llu\n", N*sizeof(Node), ans, N, reps, ei-min+1, max-min+1);
}
}