Skip to content

Commit

Permalink
clang-format
Browse files Browse the repository at this point in the history
  • Loading branch information
homm committed Aug 11, 2024
1 parent 2db9cd3 commit 5df34a2
Show file tree
Hide file tree
Showing 2 changed files with 46 additions and 28 deletions.
36 changes: 27 additions & 9 deletions src/libImaging/Bands.c
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,22 @@ ImagingGetBand(Imaging imIn, int band) {

#ifdef __SSE4__
shuffle_mask = _mm_set_epi8(
-1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, 12+band,8+band,4+band,0+band);
-1,
-1,
-1,
-1,
-1,
-1,
-1,
-1,
-1,
-1,
-1,
-1,
12 + band,
8 + band,
4 + band,
0 + band);
#endif

/* Extract band from image */
Expand All @@ -62,8 +77,8 @@ ImagingGetBand(Imaging imIn, int band) {
for (; x < imIn->xsize - 3; x += 4) {
#ifdef __SSE4__
__m128i source = _mm_loadu_si128((__m128i *)(in - band));
*((UINT32 *)(out + x)) = _mm_cvtsi128_si32(
_mm_shuffle_epi8(source, shuffle_mask));
*((UINT32 *)(out + x)) =
_mm_cvtsi128_si32(_mm_shuffle_epi8(source, shuffle_mask));
#else
UINT32 v = MAKE_UINT32(in[0], in[4], in[8], in[12]);
memcpy(out + x, &v, sizeof(v));
Expand Down Expand Up @@ -115,8 +130,9 @@ ImagingSplit(Imaging imIn, Imaging bands[4]) {
for (; x < imIn->xsize - 3; x += 4) {
#ifdef __SSE4__
__m128i source = _mm_loadu_si128((__m128i *)in);
source = _mm_shuffle_epi8(source, _mm_set_epi8(
15,11,7,3, 14,10,6,2, 13,9,5,1, 12,8,4,0));
source = _mm_shuffle_epi8(
source,
_mm_set_epi8(15, 11, 7, 3, 14, 10, 6, 2, 13, 9, 5, 1, 12, 8, 4, 0));
*((UINT32 *)(out0 + x)) = _mm_cvtsi128_si32(source);
*((UINT32 *)(out1 + x)) = _mm_cvtsi128_si32(_mm_srli_si128(source, 12));
#else
Expand All @@ -143,8 +159,9 @@ ImagingSplit(Imaging imIn, Imaging bands[4]) {
for (; x < imIn->xsize - 3; x += 4) {
#ifdef __SSE4__
__m128i source = _mm_loadu_si128((__m128i *)in);
source = _mm_shuffle_epi8(source, _mm_set_epi8(
15,11,7,3, 14,10,6,2, 13,9,5,1, 12,8,4,0));
source = _mm_shuffle_epi8(
source,
_mm_set_epi8(15, 11, 7, 3, 14, 10, 6, 2, 13, 9, 5, 1, 12, 8, 4, 0));
*((UINT32 *)(out0 + x)) = _mm_cvtsi128_si32(source);
*((UINT32 *)(out1 + x)) = _mm_cvtsi128_si32(_mm_srli_si128(source, 4));
*((UINT32 *)(out2 + x)) = _mm_cvtsi128_si32(_mm_srli_si128(source, 8));
Expand Down Expand Up @@ -176,8 +193,9 @@ ImagingSplit(Imaging imIn, Imaging bands[4]) {
for (; x < imIn->xsize - 3; x += 4) {
#ifdef __SSE4__
__m128i source = _mm_loadu_si128((__m128i *)in);
source = _mm_shuffle_epi8(source, _mm_set_epi8(
15,11,7,3, 14,10,6,2, 13,9,5,1, 12,8,4,0));
source = _mm_shuffle_epi8(
source,
_mm_set_epi8(15, 11, 7, 3, 14, 10, 6, 2, 13, 9, 5, 1, 12, 8, 4, 0));
*((UINT32 *)(out0 + x)) = _mm_cvtsi128_si32(source);
*((UINT32 *)(out1 + x)) = _mm_cvtsi128_si32(_mm_srli_si128(source, 4));
*((UINT32 *)(out2 + x)) = _mm_cvtsi128_si32(_mm_srli_si128(source, 8));
Expand Down
38 changes: 19 additions & 19 deletions src/libImaging/ImagingSIMD.h
Original file line number Diff line number Diff line change
@@ -1,46 +1,46 @@
/* Microsoft compiler doesn't limit intrinsics for an architecture.
This macro is set only on x86 and means SSE2 and above including AVX2. */
#if defined(_M_X64) || _M_IX86_FP == 2
#define __SSE2__
/* However, Microsoft compiler set __AVX2__ if /arch:AVX2 option is set */
#ifdef __AVX2__
#define __SSE4_2__
#endif
#define __SSE2__
/* However, Microsoft compiler set __AVX2__ if /arch:AVX2 option is set */
#ifdef __AVX2__
#define __SSE4_2__
#endif
#endif

/* For better readability */
#ifdef __SSE4_2__
#define __SSE4__
#define __SSE4__
#endif

#ifdef __SSE2__
#include <mmintrin.h> // MMX
#include <xmmintrin.h> // SSE
#include <emmintrin.h> // SSE2
#include <mmintrin.h> // MMX
#include <xmmintrin.h> // SSE
#include <emmintrin.h> // SSE2
#endif
#ifdef __SSE4__
#include <pmmintrin.h> // SSE3
#include <tmmintrin.h> // SSSE3
#include <smmintrin.h> // SSE4.1
#include <nmmintrin.h> // SSE4.2
#include <pmmintrin.h> // SSE3
#include <tmmintrin.h> // SSSE3
#include <smmintrin.h> // SSE4.1
#include <nmmintrin.h> // SSE4.2
#endif
#ifdef __AVX2__
#include <immintrin.h> // AVX, AVX2
#include <immintrin.h> // AVX, AVX2
#endif
#ifdef __aarch64__
#include <arm_neon.h> // ARM NEON
#include <arm_neon.h> // ARM NEON
#endif

#ifdef __SSE4__
static __m128i inline
static inline __m128i
mm_cvtepu8_epi32(void *ptr) {
return _mm_cvtepu8_epi32(_mm_cvtsi32_si128(*(INT32 *) ptr));
return _mm_cvtepu8_epi32(_mm_cvtsi32_si128(*(INT32 *)ptr));
}
#endif

#ifdef __AVX2__
static __m256i inline
static inline __m256i
mm256_cvtepu8_epi32(void *ptr) {
return _mm256_cvtepu8_epi32(_mm_loadl_epi64((__m128i *) ptr));
return _mm256_cvtepu8_epi32(_mm_loadl_epi64((__m128i *)ptr));
}
#endif

0 comments on commit 5df34a2

Please sign in to comment.