diff --git a/src/libImaging/Bands.c b/src/libImaging/Bands.c index 1cfeb04bf79..6b3f8c03af3 100644 --- a/src/libImaging/Bands.c +++ b/src/libImaging/Bands.c @@ -51,7 +51,22 @@ ImagingGetBand(Imaging imIn, int band) { #ifdef __SSE4__ shuffle_mask = _mm_set_epi8( - -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, 12+band,8+band,4+band,0+band); + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + 12 + band, + 8 + band, + 4 + band, + 0 + band); #endif /* Extract band from image */ @@ -62,8 +77,8 @@ ImagingGetBand(Imaging imIn, int band) { for (; x < imIn->xsize - 3; x += 4) { #ifdef __SSE4__ __m128i source = _mm_loadu_si128((__m128i *)(in - band)); - *((UINT32 *)(out + x)) = _mm_cvtsi128_si32( - _mm_shuffle_epi8(source, shuffle_mask)); + *((UINT32 *)(out + x)) = + _mm_cvtsi128_si32(_mm_shuffle_epi8(source, shuffle_mask)); #else UINT32 v = MAKE_UINT32(in[0], in[4], in[8], in[12]); memcpy(out + x, &v, sizeof(v)); @@ -115,8 +130,9 @@ ImagingSplit(Imaging imIn, Imaging bands[4]) { for (; x < imIn->xsize - 3; x += 4) { #ifdef __SSE4__ __m128i source = _mm_loadu_si128((__m128i *)in); - source = _mm_shuffle_epi8(source, _mm_set_epi8( - 15,11,7,3, 14,10,6,2, 13,9,5,1, 12,8,4,0)); + source = _mm_shuffle_epi8( + source, + _mm_set_epi8(15, 11, 7, 3, 14, 10, 6, 2, 13, 9, 5, 1, 12, 8, 4, 0)); *((UINT32 *)(out0 + x)) = _mm_cvtsi128_si32(source); *((UINT32 *)(out1 + x)) = _mm_cvtsi128_si32(_mm_srli_si128(source, 12)); #else @@ -143,8 +159,9 @@ ImagingSplit(Imaging imIn, Imaging bands[4]) { for (; x < imIn->xsize - 3; x += 4) { #ifdef __SSE4__ __m128i source = _mm_loadu_si128((__m128i *)in); - source = _mm_shuffle_epi8(source, _mm_set_epi8( - 15,11,7,3, 14,10,6,2, 13,9,5,1, 12,8,4,0)); + source = _mm_shuffle_epi8( + source, + _mm_set_epi8(15, 11, 7, 3, 14, 10, 6, 2, 13, 9, 5, 1, 12, 8, 4, 0)); *((UINT32 *)(out0 + x)) = _mm_cvtsi128_si32(source); *((UINT32 *)(out1 + x)) = _mm_cvtsi128_si32(_mm_srli_si128(source, 4)); *((UINT32 *)(out2 + x)) = _mm_cvtsi128_si32(_mm_srli_si128(source, 8)); @@ -176,8 +193,9 @@ ImagingSplit(Imaging imIn, Imaging bands[4]) { for (; x < imIn->xsize - 3; x += 4) { #ifdef __SSE4__ __m128i source = _mm_loadu_si128((__m128i *)in); - source = _mm_shuffle_epi8(source, _mm_set_epi8( - 15,11,7,3, 14,10,6,2, 13,9,5,1, 12,8,4,0)); + source = _mm_shuffle_epi8( + source, + _mm_set_epi8(15, 11, 7, 3, 14, 10, 6, 2, 13, 9, 5, 1, 12, 8, 4, 0)); *((UINT32 *)(out0 + x)) = _mm_cvtsi128_si32(source); *((UINT32 *)(out1 + x)) = _mm_cvtsi128_si32(_mm_srli_si128(source, 4)); *((UINT32 *)(out2 + x)) = _mm_cvtsi128_si32(_mm_srli_si128(source, 8)); diff --git a/src/libImaging/ImagingSIMD.h b/src/libImaging/ImagingSIMD.h index ed259cebea2..a63f3b0aa02 100644 --- a/src/libImaging/ImagingSIMD.h +++ b/src/libImaging/ImagingSIMD.h @@ -1,46 +1,46 @@ /* Microsoft compiler doesn't limit intrinsics for an architecture. This macro is set only on x86 and means SSE2 and above including AVX2. */ #if defined(_M_X64) || _M_IX86_FP == 2 - #define __SSE2__ - /* However, Microsoft compiler set __AVX2__ if /arch:AVX2 option is set */ - #ifdef __AVX2__ - #define __SSE4_2__ - #endif +#define __SSE2__ +/* However, Microsoft compiler set __AVX2__ if /arch:AVX2 option is set */ +#ifdef __AVX2__ +#define __SSE4_2__ +#endif #endif /* For better readability */ #ifdef __SSE4_2__ - #define __SSE4__ +#define __SSE4__ #endif #ifdef __SSE2__ - #include // MMX - #include // SSE - #include // SSE2 +#include // MMX +#include // SSE +#include // SSE2 #endif #ifdef __SSE4__ - #include // SSE3 - #include // SSSE3 - #include // SSE4.1 - #include // SSE4.2 +#include // SSE3 +#include // SSSE3 +#include // SSE4.1 +#include // SSE4.2 #endif #ifdef __AVX2__ - #include // AVX, AVX2 +#include // AVX, AVX2 #endif #ifdef __aarch64__ - #include // ARM NEON +#include // ARM NEON #endif #ifdef __SSE4__ -static __m128i inline +static inline __m128i mm_cvtepu8_epi32(void *ptr) { - return _mm_cvtepu8_epi32(_mm_cvtsi32_si128(*(INT32 *) ptr)); + return _mm_cvtepu8_epi32(_mm_cvtsi32_si128(*(INT32 *)ptr)); } #endif #ifdef __AVX2__ -static __m256i inline +static inline __m256i mm256_cvtepu8_epi32(void *ptr) { - return _mm256_cvtepu8_epi32(_mm_loadl_epi64((__m128i *) ptr)); + return _mm256_cvtepu8_epi32(_mm_loadl_epi64((__m128i *)ptr)); } #endif