Skip to content

Commit

Permalink
Use multiversion to accelerate decoding in production
Browse files Browse the repository at this point in the history
AVX2 looks to be most important; this also adds a more complete list of
Zen2-ish features as one version.
  • Loading branch information
sk1p committed Jul 31, 2024
1 parent 64b33b4 commit 5529b49
Show file tree
Hide file tree
Showing 3 changed files with 55 additions and 1 deletion.
29 changes: 29 additions & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions common/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ log = "0.4.21"
ndarray = { version = "0.15.6" }
zerocopy = "0.7.35"
num = "0.4.3"
multiversion = "0.7.4"

[dev-dependencies]
criterion = "0.5.1"
Expand Down
26 changes: 25 additions & 1 deletion common/src/generic_cam_client.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
use std::fmt::Debug;

use ipc_test::{slab::SlabInitError, SharedSlabAllocator};
use multiversion::multiversion;
use ndarray::ArrayViewMut3;
use num::cast::AsPrimitive;

Expand Down Expand Up @@ -35,6 +36,29 @@ where
decoder: D,
}

#[multiversion(targets(
"x86_64+adx+aes+avx+avx2+bmi1+bmi2+cmpxchg16b+f16c+fma+fxsr+lzcnt+movbe+pclmulqdq+popcnt+rdrand+rdseed+sha+sse+sse2+sse3+sse4.1+sse4.2+ssse3+xsave+xsavec+xsaveopt+xsaves",
"x86_64+avx+avx2+bmi1+bmi2+fma+sse+sse2+sse3+sse4.1+sse4.2+ssse3+popcnt",
"x86_64+avx+avx2",
"x86_64+avx",
))]
fn decode_multi_version<D, T>(
decoder: &D,
shm: &SharedSlabAllocator,
input: &FrameStackHandle<D::FrameMeta>,
dest: &mut ArrayViewMut3<'_, T>,
start_idx: usize,
end_idx: usize,
) -> Result<(), CamClientError>
where
D: Decoder,
T: DecoderTargetPixelType,
u8: AsPrimitive<T>,
u16: AsPrimitive<T>,
{
Ok(decoder.decode(shm, input, dest, start_idx, end_idx)?)
}

/// Client for reading dense data from SHM. That means we get the data as stacks
/// of 2D frames, which either already are strided arrays, or can be decoded
/// into strided arrays.
Expand Down Expand Up @@ -132,7 +156,7 @@ where
u16: AsPrimitive<T>,
{
let shm = self.get_shm()?;
Ok(self.decoder.decode(shm, input, dest, start_idx, end_idx)?)
decode_multi_version(&self.decoder, shm, input, dest, start_idx, end_idx)
}

/// Free the given `FrameStackHandle`. When calling this, no Python objects
Expand Down

0 comments on commit 5529b49

Please sign in to comment.