Skip to content

Commit

Permalink
Added SSE2 protokernel to 4ic_deinterleave_8i_x2
Browse files Browse the repository at this point in the history
  • Loading branch information
dkozel committed Aug 4, 2020
1 parent 84b6fde commit 0bbdeb0
Showing 1 changed file with 46 additions and 0 deletions.
46 changes: 46 additions & 0 deletions kernels/volk/volk_4ic_deinterleave_8i_x2.h
Original file line number Diff line number Diff line change
Expand Up @@ -60,4 +60,50 @@ static inline void volk_4ic_deinterleave_8i_x2_generic(int8_t* iBuffer,
}

#endif /* LV_HAVE_GENERIC */

#ifdef LV_HAVE_SSE2
#include <emmintrin.h>

static inline void volk_4ic_deinterleave_8i_x2_a_sse2(int8_t* iBuffer,
int8_t* qBuffer,
const int8_t* complexVector,
unsigned int num_points)
{
// SSE2 algorithm was written by Andrey Semashev, licensed as CC-BY-SA
// https://stackoverflow.com/questions/63200053/deinterleve-vector-of-nibbles-using-simd
const __m128i mask = _mm_set1_epi32(0x0F0F0F0F);
const __m128i signed_max = _mm_set1_epi32(0x07070707);

unsigned int number = 0;
const unsigned int sixteenthPoints = num_points / 16;
for (; number < sixteenthPoints; number++)
{
// Load and deinterleave input half-bytes
__m128i input_even = _mm_loadu_si128(((const __m128i*)(complexVector)) + number);
__m128i input_odd = _mm_srli_epi32(input_even, 4);

input_even = _mm_and_si128(input_even, mask);
input_odd = _mm_and_si128(input_odd, mask);

// Get the sign bits
__m128i sign_even = _mm_cmpgt_epi8(input_even, signed_max);
__m128i sign_odd = _mm_cmpgt_epi8(input_odd, signed_max);

// Combine sign bits with deinterleaved input
input_even = _mm_or_si128(input_even, _mm_andnot_si128(mask, sign_even));
input_odd = _mm_or_si128(input_odd, _mm_andnot_si128(mask, sign_odd));

// Store the results
_mm_storeu_si128(((__m128i*)(iBuffer)) + number, input_even);
_mm_storeu_si128(((__m128i*)(qBuffer)) + number, input_odd);
}

number = sixteenthPoints * 16;
for (; number < num_points; number++) {
iBuffer[number] = complexVector[number] >> 4;
qBuffer[number] = (complexVector[number] << 4) >> 4;
}
}

#endif /* LV_HAVE_SSE2 */
#endif /* INCLUDED_volk_4ic_deinterleave_8i_x2_u_H */

0 comments on commit 0bbdeb0

Please sign in to comment.