Skip to content

Commit

Permalink
Merge pull request #726 from rhpvorderman/fasterexpectederrors
Browse files Browse the repository at this point in the history
Speed up expected errors calculations using SSE2 instructions
  • Loading branch information
marcelm authored Sep 6, 2023
2 parents a5f1d69 + 6832374 commit b5c6d00
Show file tree
Hide file tree
Showing 3 changed files with 172 additions and 16 deletions.
163 changes: 163 additions & 0 deletions src/cutadapt/expected_errors.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,163 @@
#include <stdint.h>
#ifdef __SSE2__
#include "emmintrin.h"
#endif

static const float SCORE_TO_ERROR_RATE[94] = {
1.0L, // 0
0.7943282347242815L, // 1
0.6309573444801932L, // 2
0.5011872336272722L, // 3
0.3981071705534972L, // 4
0.31622776601683794L, // 5
0.251188643150958L, // 6
0.19952623149688797L, // 7
0.15848931924611134L, // 8
0.12589254117941673L, // 9
0.1L, // 10
0.07943282347242814L, // 11
0.06309573444801933L, // 12
0.05011872336272722L, // 13
0.039810717055349734L, // 14
0.03162277660168379L, // 15
0.025118864315095794L, // 16
0.0199526231496888L, // 17
0.015848931924611134L, // 18
0.012589254117941675L, // 19
0.01L, // 20
0.007943282347242814L, // 21
0.00630957344480193L, // 22
0.005011872336272725L, // 23
0.003981071705534973L, // 24
0.0031622776601683794L, // 25
0.0025118864315095794L, // 26
0.001995262314968879L, // 27
0.001584893192461114L, // 28
0.0012589254117941675L, // 29
0.001L, // 30
0.0007943282347242813L, // 31
0.000630957344480193L, // 32
0.0005011872336272725L, // 33
0.00039810717055349735L, // 34
0.00031622776601683794L, // 35
0.00025118864315095795L, // 36
0.00019952623149688788L, // 37
0.00015848931924611142L, // 38
0.00012589254117941674L, // 39
0.0001L, // 40
7.943282347242822E-05L, // 41
6.309573444801929E-05L, // 42
5.011872336272725E-05L, // 43
3.9810717055349695E-05L, // 44
3.1622776601683795E-05L, // 45
2.5118864315095822E-05L, // 46
1.9952623149688786E-05L, // 47
1.584893192461114E-05L, // 48
1.2589254117941661E-05L, // 49
1E-05L, // 50
7.943282347242822E-06L, // 51
6.30957344480193E-06L, // 52
5.011872336272725E-06L, // 53
3.981071705534969E-06L, // 54
3.162277660168379E-06L, // 55
2.5118864315095823E-06L, // 56
1.9952623149688787E-06L, // 57
1.584893192461114E-06L, // 58
1.2589254117941661E-06L, // 59
1E-06L, // 60
7.943282347242822E-07L, // 61
6.30957344480193E-07L, // 62
5.011872336272725E-07L, // 63
3.981071705534969E-07L, // 64
3.162277660168379E-07L, // 65
2.5118864315095823E-07L, // 66
1.9952623149688787E-07L, // 67
1.584893192461114E-07L, // 68
1.2589254117941662E-07L, // 69
1E-07L, // 70
7.943282347242822E-08L, // 71
6.30957344480193E-08L, // 72
5.011872336272725E-08L, // 73
3.981071705534969E-08L, // 74
3.162277660168379E-08L, // 75
2.511886431509582E-08L, // 76
1.9952623149688786E-08L, // 77
1.5848931924611143E-08L, // 78
1.2589254117941661E-08L, // 79
1E-08L, // 80
7.943282347242822E-09L, // 81
6.309573444801943E-09L, // 82
5.011872336272715E-09L, // 83
3.981071705534969E-09L, // 84
3.1622776601683795E-09L, // 85
2.511886431509582E-09L, // 86
1.9952623149688828E-09L, // 87
1.584893192461111E-09L, // 88
1.2589254117941663E-09L, // 89
1E-09L, // 90
7.943282347242822E-10L, // 91
6.309573444801942E-10L, // 92
5.011872336272714E-10L, // 93
};

static inline float
expected_errors_from_phreds(const uint8_t *phreds, size_t phreds_length, uint8_t base) {
const uint8_t *end_ptr = phreds + phreds_length;
const uint8_t *cursor = phreds;
float expected_errors = 0.0;
uint8_t max_phred = 126 - base;
#ifdef __SSE2__
const uint8_t *vec_end_ptr = end_ptr - sizeof(__m128i);
__m128 accumulator = _mm_set1_ps(0.0);
while (cursor < vec_end_ptr) {
__m128i phred_array = _mm_loadu_si128((__m128i *)cursor);
__m128i illegal_phreds = _mm_cmpgt_epi8(phred_array, _mm_set1_epi8(126));
illegal_phreds = _mm_or_si128(
illegal_phreds, _mm_cmplt_epi8(phred_array, _mm_set1_epi8(base)));
if (_mm_movemask_epi8(illegal_phreds)) {
return -1.0;
}
__m128 loader = _mm_set_ps(
SCORE_TO_ERROR_RATE[cursor[0] - base],
SCORE_TO_ERROR_RATE[cursor[1] - base],
SCORE_TO_ERROR_RATE[cursor[2] - base],
SCORE_TO_ERROR_RATE[cursor[3] - base]
);
accumulator = _mm_add_ps(accumulator, loader);
loader = _mm_set_ps(
SCORE_TO_ERROR_RATE[cursor[4] - base],
SCORE_TO_ERROR_RATE[cursor[5] - base],
SCORE_TO_ERROR_RATE[cursor[6] - base],
SCORE_TO_ERROR_RATE[cursor[7] - base]
);
accumulator = _mm_add_ps(accumulator, loader);
loader = _mm_set_ps(
SCORE_TO_ERROR_RATE[cursor[8] - base],
SCORE_TO_ERROR_RATE[cursor[9] - base],
SCORE_TO_ERROR_RATE[cursor[10] - base],
SCORE_TO_ERROR_RATE[cursor[11] - base]
);
accumulator = _mm_add_ps(accumulator, loader);
loader = _mm_set_ps(
SCORE_TO_ERROR_RATE[cursor[12] - base],
SCORE_TO_ERROR_RATE[cursor[13] - base],
SCORE_TO_ERROR_RATE[cursor[14] - base],
SCORE_TO_ERROR_RATE[cursor[15] - base]
);
accumulator = _mm_add_ps(accumulator, loader);
cursor += sizeof(__m128i);
}
float float_store[4];
_mm_store_ps(float_store, accumulator);
expected_errors = float_store[0] + float_store[1] + float_store[2] + float_store[3];
#endif
while (cursor < end_ptr) {
uint8_t phred = *cursor - base;
if (phred > max_phred) {
return -1.0;
}
expected_errors += SCORE_TO_ERROR_RATE[phred];
cursor += 1;
}
return expected_errors;
}
23 changes: 8 additions & 15 deletions src/cutadapt/qualtrim.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,8 @@ cdef extern from *:
int PyUnicode_KIND(object o)
int PyUnicode_1BYTE_KIND

cdef extern from "expected_errors.h":
float expected_errors_from_phreds(const uint8_t *phreds, size_t phreds_length, uint8_t base)

cdef class HasNoQualities(Exception):
pass
Expand Down Expand Up @@ -142,11 +144,6 @@ def poly_a_trim_index(str s):

return best_index


# A lookup table is faster than recalculating the very expensive exponent
# calculation every time.
cdef double[94] QUAL_TO_ERROR_RATE = [10 ** (-q / 10) for q in range(94)]

def expected_errors(str qualities, uint8_t base=33):
"""
Return the number of expected errors (as double) from a read’s
Expand All @@ -160,16 +157,12 @@ def expected_errors(str qualities, uint8_t base=33):
if not PyUnicode_IS_COMPACT_ASCII(qualities):
raise ValueError(f"Quality string contains non-ASCII values: {qualities}")
cdef:
size_t i
uint8_t phred, q
uint8_t *quals = <uint8_t *>PyUnicode_DATA(qualities)
size_t qual_length = PyUnicode_GET_LENGTH(qualities)
double e = 0.0

for i in range(qual_length):
phred = quals[i]
q = phred - base
if q > 93:
raise ValueError(f"Not a valid phred value {q} for character {ord(q)}")
e += QUAL_TO_ERROR_RATE[q]
float e = expected_errors_from_phreds(quals, qual_length, base)

if e < 0.0:
for q in qualities:
if ord(q) < base or ord(q) > 126:
raise ValueError(f"Not a valid phred value {ord(q)} for character {q}")
return e
2 changes: 1 addition & 1 deletion tests/test_predicates.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,7 @@ def test_invalid_pair_filter_mode():
(chr(43) * 3 + chr(33), 0.1, True), # 3 * 0.1 + 1
(chr(43) * 3 + chr(33), 0.33, False), # 3 * 0.1 + 1
(chr(43) * 3 + chr(33), 0.32, True), # 3 * 0.1 + 1
(chr(126) * 9 + chr(33), 0.1, True),
(chr(103) * 9 + chr(33), 0.1, True), # 9 * 10^-7 + 1
],
)
def test_too_high_average_error_rate(quals, rate, expected):
Expand Down

0 comments on commit b5c6d00

Please sign in to comment.