-
Notifications
You must be signed in to change notification settings - Fork 7
/
radix7.cpp
96 lines (80 loc) · 2.89 KB
/
radix7.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
#include "common.hpp"
#include "hedley.h"
#include <algorithm>
#include <memory>
#include <array>
#include <assert.h>
#include <string.h>
#include <assert.h>
#include <x86intrin.h>
const size_t RADIX_BITS = 8;
const size_t RADIX_SIZE = (size_t)1 << RADIX_BITS;
const size_t RADIX_LEVELS = (63 / RADIX_BITS) + 1;
const uint64_t RADIX_MASK = RADIX_SIZE - 1;
using freq_array_type = size_t [RADIX_LEVELS][RADIX_SIZE];
// never inline just to make it show up easily in profiles (inlining this lengthly function doesn't
// really help anyways)
HEDLEY_NEVER_INLINE
static void count_frequency(uint64_t *a, size_t count, freq_array_type freqs) {
for (size_t i = 0; i < count; i++) {
uint64_t value = a[i];
for (size_t pass = 0; pass < RADIX_LEVELS; pass++) {
freqs[pass][value & RADIX_MASK]++;
value >>= RADIX_BITS;
}
}
}
/**
* Determine if the frequencies for a given level are "trivial".
*
* Frequencies are trivial if only a single frequency has non-zero
* occurrences. In that case, the radix step just acts as a copy so we can
* skip it.
*/
static bool is_trivial(size_t freqs[RADIX_SIZE], size_t count) {
for (size_t i = 0; i < RADIX_SIZE; i++) {
auto freq = freqs[i];
if (freq != 0) {
return freq == count;
}
}
assert(count == 0); // we only get here if count was zero
return true;
}
void radix_sort7(uint64_t *a, size_t count)
{
std::unique_ptr<uint64_t[]> queue_area(new uint64_t[count]);
// huge_unique_ptr<uint64_t[]> queue_area = make_huge_array<uint64_t>(count, false);
freq_array_type freqs = {};
count_frequency(a, count, freqs);
uint64_t *from = a, *to = queue_area.get();
for (size_t pass = 0; pass < RADIX_LEVELS; pass++) {
if (is_trivial(freqs[pass], count)) {
// this pass would do nothing, just skip it
continue;
}
uint64_t shift = pass * RADIX_BITS;
// array of pointers to the current position in each queue, which we set up based on the
// known final sizes of each queue (i.e., "tighly packed")
uint64_t * queue_ptrs[RADIX_SIZE], * next = to;
for (size_t i = 0; i < RADIX_SIZE; i++) {
queue_ptrs[i] = next;
next += freqs[pass][i];
}
// copy each element into the appropriate queue based on the current RADIX_BITS sized
// "digit" within it
for (size_t i = 0; i < count; i++) {
size_t value = from[i];
size_t index = (value >> shift) & RADIX_MASK;
*queue_ptrs[index]++ = value;
__builtin_prefetch(queue_ptrs[index] + 1);
}
// swap from and to areas
std::swap(from, to);
}
// because of the last swap, the "from" area has the sorted payload: if it's
// not the original array "a", do a final copy
if (from != a) {
std::copy(from, from + count, a);
}
}