Skip to content

Commit

Permalink
Allow AVX2 and SSE42 to both be used if available (#45)
Browse files Browse the repository at this point in the history
* don't try sse42 if avx2 found a match

* adjust longer request bench show benefit of sse42 combined with avx2

* allow runtime detection to use sse42 if not long enough for avx2
  • Loading branch information
seanmonstar authored Jul 6, 2018
1 parent 3aad4fa commit 7920414
Show file tree
Hide file tree
Showing 3 changed files with 43 additions and 11 deletions.
2 changes: 1 addition & 1 deletion benches/parse.rs
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ Accept-Encoding: gzip,deflate\r\n\
Accept-Charset: Shift_JIS,utf-8;q=0.7,*;q=0.7\r\n\
Keep-Alive: 115\r\n\
Connection: keep-alive\r\n\
Cookie: wp_ozh_wsa_visits=2; wp_ozh_wsa_visit_lasttime=xxxxxxxxxx; __utma=xxxxxxxxx.xxxxxxxxxx.xxxxxxxxxx.xxxxxxxxxx.xxxxxxxxxx.x; __utmz=xxxxxxxxx.xxxxxxxxxx.x.x.utmccn=(referral)|utmcsr=reader.livedoor.com|utmcct=/reader/|utmcmd=referral\r\n\r\n";
Cookie: wp_ozh_wsa_visits=2; wp_ozh_wsa_visit_lasttime=xxxxxxxxxx; __utma=xxxxxxxxx.xxxxxxxxxx.xxxxxxxxxx.xxxxxxxxxx.xxxxxxxxxx.x; __utmz=xxxxxxxxx.xxxxxxxxxx.x.x.utmccn=(referral)|utmcsr=reader.livedoor.com|utmcct=/reader/|utmcmd=referral|padding=under256\r\n\r\n";


#[bench]
Expand Down
18 changes: 14 additions & 4 deletions src/simd/avx2.rs
Original file line number Diff line number Diff line change
@@ -1,14 +1,23 @@
use ::iter::Bytes;

pub unsafe fn parse_uri_batch_32<'a>(bytes: &mut Bytes<'a>) {
pub enum Scan {
/// Returned when an implementation finds a noteworthy token.
Found,
/// Returned when an implementation couldn't keep running because the input was too short.
TooShort,
}


pub unsafe fn parse_uri_batch_32<'a>(bytes: &mut Bytes<'a>) -> Scan {
while bytes.as_ref().len() >= 32 {
let advance = match_url_char_32_avx(bytes.as_ref());
bytes.advance(advance);

if advance != 32 {
break;
return Scan::Found;
}
}
Scan::TooShort
}

#[cfg(target_arch = "x86_64")]
Expand Down Expand Up @@ -57,15 +66,16 @@ unsafe fn match_url_char_32_avx(_: &[u8]) -> usize {
unreachable!("AVX2 detection should be disabled for x86");
}

pub unsafe fn match_header_value_batch_32(bytes: &mut Bytes) {
pub unsafe fn match_header_value_batch_32(bytes: &mut Bytes) -> Scan {
while bytes.as_ref().len() >= 32 {
let advance = match_header_value_char_32_avx(bytes.as_ref());
bytes.advance(advance);

if advance != 32 {
break;
return Scan::Found;
}
}
Scan::TooShort
}

#[cfg(target_arch = "x86_64")]
Expand Down
34 changes: 28 additions & 6 deletions src/simd/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -64,14 +64,20 @@ mod runtime {
const INIT: usize = 0;
const SSE_42: usize = 1;
const AVX_2: usize = 2;
const AVX_2_AND_SSE_42: usize = 3;
const NONE: usize = ::core::usize::MAX;

fn detect() -> usize {
let feat = FEATURE.load(Ordering::Relaxed);
if feat == INIT {
if cfg!(target_arch = "x86_64") && is_x86_feature_detected!("avx2") {
FEATURE.store(AVX_2, Ordering::Relaxed);
return AVX_2;
if is_x86_feature_detected!("sse4.2") {
FEATURE.store(AVX_2_AND_SSE_42, Ordering::Relaxed);
return AVX_2_AND_SSE_42;
} else {
FEATURE.store(AVX_2, Ordering::Relaxed);
return AVX_2;
}
} else if is_x86_feature_detected!("sse4.2") {
FEATURE.store(SSE_42, Ordering::Relaxed);
return SSE_42;
Expand All @@ -86,7 +92,13 @@ mod runtime {
unsafe {
match detect() {
SSE_42 => super::sse42::parse_uri_batch_16(bytes),
AVX_2 => super::avx2::parse_uri_batch_32(bytes),
AVX_2 => { super::avx2::parse_uri_batch_32(bytes); },
AVX_2_AND_SSE_42 => {
if let super::avx2::Scan::Found = super::avx2::parse_uri_batch_32(bytes) {
return;
}
super::sse42::parse_uri_batch_16(bytes)
},
_ => ()
}
}
Expand All @@ -98,7 +110,13 @@ mod runtime {
unsafe {
match detect() {
SSE_42 => super::sse42::match_header_value_batch_16(bytes),
AVX_2 => super::avx2::match_header_value_batch_32(bytes),
AVX_2 => { super::avx2::match_header_value_batch_32(bytes); },
AVX_2_AND_SSE_42 => {
if let super::avx2::Scan::Found = super::avx2::match_header_value_batch_32(bytes) {
return;
}
super::sse42::match_header_value_batch_16(bytes)
},
_ => ()
}
}
Expand Down Expand Up @@ -191,8 +209,12 @@ mod avx2_compile_time {
pub fn match_header_value_vectored(bytes: &mut ::Bytes) {
// do both, since avx2 only works when bytes.len() >= 32
if cfg!(target_arch = "x86_64") && is_x86_feature_detected!("avx2") {
unsafe {
super::avx2::match_header_value_batch_32(bytes);
let scanned = unsafe {
super::avx2::match_header_value_batch_32(bytes)
};

if let super::avx2::Scan::Found = scanned {
return;
}
}
if is_x86_feature_detected!("sse4.2") {
Expand Down

0 comments on commit 7920414

Please sign in to comment.