Skip to content

Commit

Permalink
WIP: trying out some function multiversioning
Browse files Browse the repository at this point in the history
  • Loading branch information
sk1p committed Apr 30, 2024
1 parent bd9cc1c commit 77b3387
Show file tree
Hide file tree
Showing 5 changed files with 45 additions and 4 deletions.
4 changes: 2 additions & 2 deletions k2o/.cargo/config.toml
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
[build]
rustflags = ["-Ctarget-cpu=native"]
# [build]
# rustflags = ["-Ctarget-cpu=native"]
3 changes: 3 additions & 0 deletions k2o/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,9 @@ dbus = "0.9.7"
libc = "0.2.147"
env_logger = "0.10.0"
partialdebug = "0.2.0"
tokio-uring = "0.4.0"
pulp = "0.18.6"
multiversion = "0.7.3"

[features]
hdf5 = ["dep:hdf5"]
Expand Down
37 changes: 37 additions & 0 deletions k2o/src/decode.rs
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
//! output: BC FA DE
use std::convert::TryInto;

use multiversion::multiversion;
use num::cast::AsPrimitive;

pub const HEADER_SIZE: usize = 40;
Expand All @@ -39,6 +40,12 @@ pub const HEADER_SIZE: usize = 40;
/// * `bytes` - the bytes that should be decoded. Should be `PACKET_SIZE` long (`0x5758` or `0xc028`)
/// * `out` - the slice where the decoded integers should be written to. Should be `DECODED_SIZE` long (`14880`).
///
#[multiversion(targets(
"x86_64+adx+aes+avx+avx2+bmi1+bmi2+cmpxchg16b+f16c+fma+fxsr+lzcnt+movbe+pclmulqdq+popcnt+rdrand+rdseed+sha+sse+sse2+sse3+sse4.1+sse4.2+ssse3+xsave+xsavec+xsaveopt+xsaves",
"x86_64+avx+avx2+bmi1+bmi2+fma+sse+sse2+sse3+sse4.1+sse4.2+ssse3+popcnt",
"x86_64+avx+avx2",
"x86_64+avx",
))]
pub fn decode<const PACKET_SIZE: usize>(bytes: &[u8], out: &mut [u16]) {
// make sure input/output are bounded to PACKET_SIZE and DECODED_SIZE
//const DECODED_SIZE: usize = (PACKET_SIZE - HEADER_SIZE) * 2 / 3;
Expand Down Expand Up @@ -68,6 +75,12 @@ pub fn decode<const PACKET_SIZE: usize>(bytes: &[u8], out: &mut [u16]) {
/// * `bytes` - the bytes that should be decoded. Should be `PACKET_SIZE` long (`0x5758`)
/// * `out` - the slice where the decoded integers should be written to. Should be `DECODED_SIZE` long (`14880`).
///
#[multiversion(targets(
"x86_64+adx+aes+avx+avx2+bmi1+bmi2+cmpxchg16b+f16c+fma+fxsr+lzcnt+movbe+pclmulqdq+popcnt+rdrand+rdseed+sha+sse+sse2+sse3+sse4.1+sse4.2+ssse3+xsave+xsavec+xsaveopt+xsaves",
"x86_64+avx+avx2+bmi1+bmi2+fma+sse+sse2+sse3+sse4.1+sse4.2+ssse3+popcnt",
"x86_64+avx+avx2",
"x86_64+avx",
))]
pub fn decode_map<D, F, const PACKET_SIZE: usize, const DECODED_SIZE: usize>(
bytes: &[u8],
out: &mut [D],
Expand Down Expand Up @@ -102,6 +115,12 @@ pub fn decode_map<D, F, const PACKET_SIZE: usize, const DECODED_SIZE: usize>(
/// * `bytes` - the bytes that should be decoded. Should be `PACKET_SIZE` long (`0x5758`)
/// * `out` - the slice where the decoded integers should be written to. Should be `DECODED_SIZE` long (`14880`).
///
#[multiversion(targets(
"x86_64+adx+aes+avx+avx2+bmi1+bmi2+cmpxchg16b+f16c+fma+fxsr+lzcnt+movbe+pclmulqdq+popcnt+rdrand+rdseed+sha+sse+sse2+sse3+sse4.1+sse4.2+ssse3+xsave+xsavec+xsaveopt+xsaves",
"x86_64+avx+avx2+bmi1+bmi2+fma+sse+sse2+sse3+sse4.1+sse4.2+ssse3+popcnt",
"x86_64+avx+avx2",
"x86_64+avx",
))]
pub fn decode_converted<D, const PACKET_SIZE: usize, const DECODED_SIZE: usize>(
bytes: &[u8],
out: &mut [D],
Expand Down Expand Up @@ -137,6 +156,12 @@ pub fn decode_converted<D, const PACKET_SIZE: usize, const DECODED_SIZE: usize>(
/// * `bytes` - the bytes that should be decoded. Should be `PACKET_SIZE` long (`0x5758`)
/// * `out` - the slice where the decoded integers should be written to. Should be `DECODED_SIZE` long (`14880`).
///
#[multiversion(targets(
"x86_64+adx+aes+avx+avx2+bmi1+bmi2+cmpxchg16b+f16c+fma+fxsr+lzcnt+movbe+pclmulqdq+popcnt+rdrand+rdseed+sha+sse+sse2+sse3+sse4.1+sse4.2+ssse3+xsave+xsavec+xsaveopt+xsaves",
"x86_64+avx+avx2+bmi1+bmi2+fma+sse+sse2+sse3+sse4.1+sse4.2+ssse3+popcnt",
"x86_64+avx+avx2",
"x86_64+avx",
))]
pub fn decode_unrolled<const PACKET_SIZE: usize, const DECODED_SIZE: usize>(
bytes: &[u8],
out: &mut [u16],
Expand Down Expand Up @@ -187,6 +212,12 @@ pub fn decode_u16(bytes: &[u8]) -> u16 {
u16::from_be_bytes(bytes.try_into().unwrap())
}

#[multiversion(targets(
"x86_64+adx+aes+avx+avx2+bmi1+bmi2+cmpxchg16b+f16c+fma+fxsr+lzcnt+movbe+pclmulqdq+popcnt+rdrand+rdseed+sha+sse+sse2+sse3+sse4.1+sse4.2+ssse3+xsave+xsavec+xsaveopt+xsaves",
"x86_64+avx+avx2+bmi1+bmi2+fma+sse+sse2+sse3+sse4.1+sse4.2+ssse3+popcnt",
"x86_64+avx+avx2",
"x86_64+avx",
))]
pub fn decode_u16_vec<const PACKET_SIZE: usize>(bytes: &[u8], out: &mut [u16]) {
for i in 0..(PACKET_SIZE - HEADER_SIZE) / 2 {
let in_bytes = &bytes[HEADER_SIZE + i * 2..HEADER_SIZE + i * 2 + 2];
Expand All @@ -205,6 +236,12 @@ pub fn decode_packet_size(bytes: &[u8]) -> u32 {
/// * `inp` - The input integers
/// * `out` - A mutable byte slice where the encoded values will be written
///
#[multiversion(targets(
"x86_64+adx+aes+avx+avx2+bmi1+bmi2+cmpxchg16b+f16c+fma+fxsr+lzcnt+movbe+pclmulqdq+popcnt+rdrand+rdseed+sha+sse+sse2+sse3+sse4.1+sse4.2+ssse3+xsave+xsavec+xsaveopt+xsaves",
"x86_64+avx+avx2+bmi1+bmi2+fma+sse+sse2+sse3+sse4.1+sse4.2+ssse3+popcnt",
"x86_64+avx+avx2",
"x86_64+avx",
))]
pub fn encode(inp: &Vec<u16>, out: &mut [u8]) {
// pre-condition: out_chunks should have no remainder
assert_eq!(out.len() % 3, 0);
Expand Down
1 change: 1 addition & 0 deletions k2o/src/frame.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
use std::{ops::Range, time::Instant};

use ipc_test::{SharedSlabAllocator, Slot, SlotInfo};
use multiversion::multiversion;
use ndarray::{s, ArrayView2, ArrayViewMut2};

use crate::{block::K2Block, events::Binning, helpers::Shape2};
Expand Down
4 changes: 2 additions & 2 deletions libertem_k2is/.cargo/config.toml
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
[build]
rustflags = ["-Ctarget-cpu=native"]
# [build]
# rustflags = ["-Ctarget-cpu=native"]

0 comments on commit 77b3387

Please sign in to comment.