From 04ed6bc96841e8a2bf026d1327397eff07dc6f13 Mon Sep 17 00:00:00 2001 From: Aleksandr Karpinskii Date: Fri, 11 Oct 2024 19:58:18 +0400 Subject: [PATCH] Update to lates versions from optimization branch --- src/encode.c | 62 ++++++++++++++++++++++++++++++++-------------------- 1 file changed, 38 insertions(+), 24 deletions(-) diff --git a/src/encode.c b/src/encode.c index b218fb5..6e9d8a7 100644 --- a/src/encode.c +++ b/src/encode.c @@ -5,14 +5,8 @@ #include "common.h" -struct RGB { - float r; - float g; - float b; -}; - static void multiplyBasisFunction( - struct RGB *factors, int factorsCount, int width, int height, uint8_t *rgb, size_t bytesPerRow, + float factors[][4], int factorsCount, int width, int height, uint8_t *rgb, size_t bytesPerRow, float *cosX, float *cosY); static char *encode_int(int value, int length, char *destination); @@ -31,12 +25,15 @@ static void init_sRGBToLinear_cache() { } } -const char *blurHashForPixels(int xComponents, int yComponents, int width, int height, uint8_t *rgb, size_t bytesPerRow, char *destination) { +const char *blurHashForPixels(int xComponents, int yComponents, int width, int height, uint8_t *rgb, size_t bytesPerRow) { + static char buffer[2 + 4 + (9 * 9 - 1) * 2 + 1]; + if(xComponents < 1 || xComponents > 9) return NULL; if(yComponents < 1 || yComponents > 9) return NULL; - struct RGB factors[9 * 9] = {0}; + float factors[yComponents * xComponents][4]; int factorsCount = xComponents * yComponents; + memset(factors, 0, sizeof(factors)); init_sRGBToLinear_cache(); @@ -67,10 +64,10 @@ const char *blurHashForPixels(int xComponents, int yComponents, int width, int h free(cosX); free(cosY); - float *dc = (float *)factors; - float *ac = dc + 3; + float *dc = factors[0]; + float *ac = dc + 4; int acCount = factorsCount - 1; - char *ptr = destination; + char *ptr = buffer; int sizeFlag = (xComponents - 1) + (yComponents - 1) * 9; ptr = encode_int(sizeFlag, 1, ptr); @@ -78,7 +75,7 @@ const char *blurHashForPixels(int xComponents, int yComponents, int width, int h float maximumValue; if(acCount > 0) { float actualMaximumValue = 0; - for(int i = 0; i < acCount * 3; i++) { + for(int i = 0; i < acCount * 4; i++) { actualMaximumValue = fmaxf(fabsf(ac[i]), actualMaximumValue); } @@ -93,32 +90,49 @@ const char *blurHashForPixels(int xComponents, int yComponents, int width, int h ptr = encode_int(encodeDC(dc[0], dc[1], dc[2]), 4, ptr); for(int i = 0; i < acCount; i++) { - ptr = encode_int(encodeAC(ac[i * 3 + 0], ac[i * 3 + 1], ac[i * 3 + 2], maximumValue), 2, ptr); + ptr = encode_int(encodeAC(ac[i * 4 + 0], ac[i * 4 + 1], ac[i * 4 + 2], maximumValue), 2, ptr); } *ptr = 0; - return destination; + return buffer; } static void multiplyBasisFunction( - struct RGB *factors, int factorsCount, int width, int height, uint8_t *rgb, size_t bytesPerRow, + float factors[][4], int factorsCount, int width, int height, uint8_t *rgb, size_t bytesPerRow, float *cosX, float *cosY ) { for(int y = 0; y < height; y++) { uint8_t *src = rgb + y * bytesPerRow; float *cosYLocal = cosY + y * factorsCount; - for(int x = 0; x < width; x++) { - float pixel[3]; + int x = 0; + for(; x < width - 3; x += 4) { + float *cosXLocal = cosX + x * factorsCount; + float pixel0[4] = {sRGBToLinear_cache[src[3 * (x+0) + 0]], sRGBToLinear_cache[src[3 * (x+0) + 1]], sRGBToLinear_cache[src[3 * (x+0) + 2]]}; + float pixel1[4] = {sRGBToLinear_cache[src[3 * (x+1) + 0]], sRGBToLinear_cache[src[3 * (x+1) + 1]], sRGBToLinear_cache[src[3 * (x+1) + 2]]}; + float pixel2[4] = {sRGBToLinear_cache[src[3 * (x+2) + 0]], sRGBToLinear_cache[src[3 * (x+2) + 1]], sRGBToLinear_cache[src[3 * (x+2) + 2]]}; + float pixel3[4] = {sRGBToLinear_cache[src[3 * (x+3) + 0]], sRGBToLinear_cache[src[3 * (x+3) + 1]], sRGBToLinear_cache[src[3 * (x+3) + 2]]}; + for (int i = 0; i < factorsCount; i++) { + float basis0 = cosYLocal[i] * cosXLocal[i + 0 * factorsCount]; + float basis1 = cosYLocal[i] * cosXLocal[i + 1 * factorsCount]; + float basis2 = cosYLocal[i] * cosXLocal[i + 2 * factorsCount]; + float basis3 = cosYLocal[i] * cosXLocal[i + 3 * factorsCount]; + factors[i][0] += basis0 * pixel0[0] + basis1 * pixel1[0] + basis2 * pixel2[0] + basis3 * pixel3[0]; + factors[i][1] += basis0 * pixel0[1] + basis1 * pixel1[1] + basis2 * pixel2[1] + basis3 * pixel3[1]; + factors[i][2] += basis0 * pixel0[2] + basis1 * pixel1[2] + basis2 * pixel2[2] + basis3 * pixel3[2]; + } + } + for(; x < width; x++) { + float pixel[4]; float *cosXLocal = cosX + x * factorsCount; pixel[0] = sRGBToLinear_cache[src[3 * x + 0]]; pixel[1] = sRGBToLinear_cache[src[3 * x + 1]]; pixel[2] = sRGBToLinear_cache[src[3 * x + 2]]; for (int i = 0; i < factorsCount; i++) { float basis = cosYLocal[i] * cosXLocal[i]; - factors[i].r += basis * pixel[0]; - factors[i].g += basis * pixel[1]; - factors[i].b += basis * pixel[2]; + factors[i][0] += basis * pixel[0]; + factors[i][1] += basis * pixel[1]; + factors[i][2] += basis * pixel[2]; } } } @@ -126,9 +140,9 @@ static void multiplyBasisFunction( for (int i = 0; i < factorsCount; i++) { float normalisation = (i == 0) ? 1 : 2; float scale = normalisation / (width * height); - factors[i].r *= scale; - factors[i].g *= scale; - factors[i].b *= scale; + factors[i][0] *= scale; + factors[i][1] *= scale; + factors[i][2] *= scale; } }