diff --git a/C/Makefile b/C/Makefile index 33dddaa4..cd02064e 100644 --- a/C/Makefile +++ b/C/Makefile @@ -1,12 +1,18 @@ PROGRAM=blurhash_encoder DECODER=blurhash_decoder -$(PROGRAM): encode_stb.c encode.c encode.h stb_image.h common.h - $(CC) -o $@ encode_stb.c encode.c -lm -Ofast -$(DECODER): decode_stb.c decode.c decode.h stb_writer.h common.h - $(CC) -o $(DECODER) decode_stb.c decode.c -lm -Ofast +encod%.o: encod%.c encode.h stb_image.h common.h + $(CC) -c $< -o $@ -Ofast -Wall +$(PROGRAM): encode_stb.o encode.o + $(CC) -o $@ encode_stb.o encode.o -lm + +decod%.o: decod%.c decode.h stb_writer.h common.h + $(CC) -c $< -o $@ -Ofast -Wall +$(DECODER): decode_stb.o decode.o + $(CC) -o $@ decode_stb.o decode.o -lm .PHONY: clean clean: rm -f $(PROGRAM) - rm -f $(DECODER) \ No newline at end of file + rm -f $(DECODER) + rm -f *.o diff --git a/C/common.h b/C/common.h index ce581442..029b9bee 100644 --- a/C/common.h +++ b/C/common.h @@ -1,11 +1,8 @@ #ifndef __BLURHASH_COMMON_H__ #define __BLURHASH_COMMON_H__ -#include - -#ifndef M_PI -#define M_PI 3.14159265358979323846 -#endif +#define _USE_MATH_DEFINES +#include static inline int linearTosRGB(float value) { float v = fmaxf(0, fminf(1, value)); diff --git a/C/decode.c b/C/decode.c index a8cca05d..640a3112 100644 --- a/C/decode.c +++ b/C/decode.c @@ -3,10 +3,26 @@ static char chars[83] = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz#$%*+,-.:;=?@[]^_{|}~"; -static inline uint8_t clampToUByte(int * src) { - if( *src >= 0 && *src <= 255 ) - return *src; - return (*src < 0) ? 0 : 255; +#define CACHE_ACCURACY 8191 + +static inline int convertToCacheIdx(float src) { + int res = src * CACHE_ACCURACY + 0.5; + return (res < 0) ? 0 : (res > CACHE_ACCURACY) ? CACHE_ACCURACY : res; +} + +uint8_t *linearTosRGB_cache = NULL; + +static void init_linearTosRGB_cache() { + uint8_t *cache; + if (linearTosRGB_cache != NULL) { + return; + } + cache = (uint8_t *)malloc(sizeof(uint8_t) * (CACHE_ACCURACY + 1)); + for (int x = 0; x <= CACHE_ACCURACY; x++) { + cache[x] = linearTosRGB((float)x / CACHE_ACCURACY); + } + // Assign cache after population to avoid races + linearTosRGB_cache = cache; } static inline uint8_t * createByteArray(int size) { @@ -98,30 +114,43 @@ int decodeToArray(const char * blurhash, int width, int height, int punch, int n int bytesPerRow = width * nChannels; int x = 0, y = 0, i = 0, j = 0; - int intR = 0, intG = 0, intB = 0; + + float *cosX = malloc(width * colors_size * sizeof(float)); + float *cosY = malloc(height * colors_size * sizeof(float)); + for(x = 0; x < width; x ++) { + for(i = 0; i < numX; i ++) { + float weight = cosf(M_PI * x * i / width); + for(j = 0; j < numY; j ++) { + cosX[x * colors_size + j * numX + i] = weight; + } + } + } + for(y = 0; y < height; y ++) { + for(j = 0; j < numY; j ++) { + float weight = cosf((M_PI * y * j) / height); + for(i = 0; i < numX; i ++) { + cosY[y * colors_size + j * numX + i] = weight; + } + } + } + + init_linearTosRGB_cache(); for(y = 0; y < height; y ++) { for(x = 0; x < width; x ++) { float r = 0, g = 0, b = 0; - for(j = 0; j < numY; j ++) { - for(i = 0; i < numX; i ++) { - float basics = cos((M_PI * x * i) / width) * cos((M_PI * y * j) / height); - int idx = i + j * numX; - r += colors[idx][0] * basics; - g += colors[idx][1] * basics; - b += colors[idx][2] * basics; - } + for (int idx = 0; idx < colors_size; idx ++) { + float basics = cosX[x * colors_size + idx] * cosY[y * colors_size + idx]; + r += colors[idx][0] * basics; + g += colors[idx][1] * basics; + b += colors[idx][2] * basics; } - intR = linearTosRGB(r); - intG = linearTosRGB(g); - intB = linearTosRGB(b); - - pixelArray[nChannels * x + 0 + y * bytesPerRow] = clampToUByte(&intR); - pixelArray[nChannels * x + 1 + y * bytesPerRow] = clampToUByte(&intG); - pixelArray[nChannels * x + 2 + y * bytesPerRow] = clampToUByte(&intB); + pixelArray[nChannels * x + 0 + y * bytesPerRow] = linearTosRGB_cache[convertToCacheIdx(r)]; + pixelArray[nChannels * x + 1 + y * bytesPerRow] = linearTosRGB_cache[convertToCacheIdx(g)]; + pixelArray[nChannels * x + 2 + y * bytesPerRow] = linearTosRGB_cache[convertToCacheIdx(b)]; if (nChannels == 4) pixelArray[nChannels * x + 3 + y * bytesPerRow] = 255; // If nChannels=4, treat each pixel as RGBA instead of RGB @@ -129,6 +158,9 @@ int decodeToArray(const char * blurhash, int width, int height, int punch, int n } } + free(cosX); + free(cosY); + return 0; } diff --git a/C/encode.c b/C/encode.c index c7a39da2..b9ca6b7c 100644 --- a/C/encode.c +++ b/C/encode.c @@ -3,33 +3,69 @@ #include -static float *multiplyBasisFunction(int xComponent, int yComponent, int width, int height, uint8_t *rgb, size_t bytesPerRow); +static void multiplyBasisFunction( + float factors[][4], int factorsCount, int width, int height, uint8_t *rgb, size_t bytesPerRow, + float *cosX, float *cosY); static char *encode_int(int value, int length, char *destination); static int encodeDC(float r, float g, float b); static int encodeAC(float r, float g, float b, float maximumValue); +float *sRGBToLinear_cache = NULL; + +static void init_sRGBToLinear_cache() { + float *cache; + if (sRGBToLinear_cache != NULL) { + return; + } + cache = (float *)malloc(sizeof(float) * 256); + for (int x = 0; x < 256; x++) { + cache[x] = sRGBToLinear(x); + } + // Assign cache after population to avoid races + sRGBToLinear_cache = cache; +} + const char *blurHashForPixels(int xComponents, int yComponents, int width, int height, uint8_t *rgb, size_t bytesPerRow) { static char buffer[2 + 4 + (9 * 9 - 1) * 2 + 1]; if(xComponents < 1 || xComponents > 9) return NULL; if(yComponents < 1 || yComponents > 9) return NULL; - float factors[yComponents][xComponents][3]; + float factors[yComponents * xComponents][4]; + int factorsCount = xComponents * yComponents; memset(factors, 0, sizeof(factors)); - for(int y = 0; y < yComponents; y++) { + float *cosX = (float *)malloc(sizeof(float) * width * factorsCount); + if (! cosX) return NULL; + float *cosY = (float *)malloc(sizeof(float) * height * factorsCount); + if (! cosY) { + free(cosX); + return NULL; + } + for(int i = 0; i < width; i++) { for(int x = 0; x < xComponents; x++) { - float *factor = multiplyBasisFunction(x, y, width, height, rgb, bytesPerRow); - factors[y][x][0] = factor[0]; - factors[y][x][1] = factor[1]; - factors[y][x][2] = factor[2]; + float weight = cosf(M_PI * x * i / width); + for(int y = 0; y < yComponents; y++) { + cosX[i * factorsCount + y * xComponents + x] = weight; + } } } + for(int i = 0; i < height; i++) { + for(int y = 0; y < yComponents; y++) { + float weight = cosf(M_PI * y * i / height); + for(int x = 0; x < xComponents; x++) { + cosY[i * factorsCount + y * xComponents + x] = weight; + } + } + } + multiplyBasisFunction(factors, factorsCount, width, height, rgb, bytesPerRow, cosX, cosY); + free(cosX); + free(cosY); - float *dc = factors[0][0]; - float *ac = dc + 3; - int acCount = xComponents * yComponents - 1; + float *dc = factors[0]; + float *ac = dc + 4; + int acCount = factorsCount - 1; char *ptr = buffer; int sizeFlag = (xComponents - 1) + (yComponents - 1) * 9; @@ -38,7 +74,7 @@ const char *blurHashForPixels(int xComponents, int yComponents, int width, int h float maximumValue; if(acCount > 0) { float actualMaximumValue = 0; - for(int i = 0; i < acCount * 3; i++) { + for(int i = 0; i < acCount * 4; i++) { actualMaximumValue = fmaxf(fabsf(ac[i]), actualMaximumValue); } @@ -53,7 +89,7 @@ const char *blurHashForPixels(int xComponents, int yComponents, int width, int h ptr = encode_int(encodeDC(dc[0], dc[1], dc[2]), 4, ptr); for(int i = 0; i < acCount; i++) { - ptr = encode_int(encodeAC(ac[i * 3 + 0], ac[i * 3 + 1], ac[i * 3 + 2], maximumValue), 2, ptr); + ptr = encode_int(encodeAC(ac[i * 4 + 0], ac[i * 4 + 1], ac[i * 4 + 2], maximumValue), 2, ptr); } *ptr = 0; @@ -61,27 +97,54 @@ const char *blurHashForPixels(int xComponents, int yComponents, int width, int h return buffer; } -static float *multiplyBasisFunction(int xComponent, int yComponent, int width, int height, uint8_t *rgb, size_t bytesPerRow) { - float r = 0, g = 0, b = 0; - float normalisation = (xComponent == 0 && yComponent == 0) ? 1 : 2; +static void multiplyBasisFunction( + float factors[][4], int factorsCount, int width, int height, uint8_t *rgb, size_t bytesPerRow, + float *cosX, float *cosY +) { + init_sRGBToLinear_cache(); for(int y = 0; y < height; y++) { - for(int x = 0; x < width; x++) { - float basis = cosf(M_PI * xComponent * x / width) * cosf(M_PI * yComponent * y / height); - r += basis * sRGBToLinear(rgb[3 * x + 0 + y * bytesPerRow]); - g += basis * sRGBToLinear(rgb[3 * x + 1 + y * bytesPerRow]); - b += basis * sRGBToLinear(rgb[3 * x + 2 + y * bytesPerRow]); + uint8_t *src = rgb + y * bytesPerRow; + float *cosYLocal = cosY + y * factorsCount; + int x = 0; + for(; x < width - 3; x += 4) { + float *cosXLocal = cosX + x * factorsCount; + float pixel0[4] = {sRGBToLinear_cache[src[3 * (x+0) + 0]], sRGBToLinear_cache[src[3 * (x+0) + 1]], sRGBToLinear_cache[src[3 * (x+0) + 2]]}; + float pixel1[4] = {sRGBToLinear_cache[src[3 * (x+1) + 0]], sRGBToLinear_cache[src[3 * (x+1) + 1]], sRGBToLinear_cache[src[3 * (x+1) + 2]]}; + float pixel2[4] = {sRGBToLinear_cache[src[3 * (x+2) + 0]], sRGBToLinear_cache[src[3 * (x+2) + 1]], sRGBToLinear_cache[src[3 * (x+2) + 2]]}; + float pixel3[4] = {sRGBToLinear_cache[src[3 * (x+3) + 0]], sRGBToLinear_cache[src[3 * (x+3) + 1]], sRGBToLinear_cache[src[3 * (x+3) + 2]]}; + for (int i = 0; i < factorsCount; i++) { + float basis0 = cosYLocal[i] * cosXLocal[i + 0 * factorsCount]; + float basis1 = cosYLocal[i] * cosXLocal[i + 1 * factorsCount]; + float basis2 = cosYLocal[i] * cosXLocal[i + 2 * factorsCount]; + float basis3 = cosYLocal[i] * cosXLocal[i + 3 * factorsCount]; + factors[i][0] += basis0 * pixel0[0] + basis1 * pixel1[0] + basis2 * pixel2[0] + basis3 * pixel3[0]; + factors[i][1] += basis0 * pixel0[1] + basis1 * pixel1[1] + basis2 * pixel2[1] + basis3 * pixel3[1]; + factors[i][2] += basis0 * pixel0[2] + basis1 * pixel1[2] + basis2 * pixel2[2] + basis3 * pixel3[2]; + } + } + for(; x < width; x++) { + float pixel[4]; + float *cosXLocal = cosX + x * factorsCount; + pixel[0] = sRGBToLinear_cache[src[3 * x + 0]]; + pixel[1] = sRGBToLinear_cache[src[3 * x + 1]]; + pixel[2] = sRGBToLinear_cache[src[3 * x + 2]]; + for (int i = 0; i < factorsCount; i++) { + float basis = cosYLocal[i] * cosXLocal[i]; + factors[i][0] += basis * pixel[0]; + factors[i][1] += basis * pixel[1]; + factors[i][2] += basis * pixel[2]; + } } } - float scale = normalisation / (width * height); - - static float result[3]; - result[0] = r * scale; - result[1] = g * scale; - result[2] = b * scale; - - return result; + for (int i = 0; i < factorsCount; i++) { + float normalisation = (i == 0) ? 1 : 2; + float scale = normalisation / (width * height); + factors[i][0] *= scale; + factors[i][1] *= scale; + factors[i][2] *= scale; + } } diff --git a/C/encode_stb.c b/C/encode_stb.c index cd3e461a..811ca000 100644 --- a/C/encode_stb.c +++ b/C/encode_stb.c @@ -15,8 +15,8 @@ int main(int argc, const char **argv) { int xComponents = atoi(argv[1]); int yComponents = atoi(argv[2]); - if(xComponents < 1 || xComponents > 8 || yComponents < 1 || yComponents > 8) { - fprintf(stderr, "Component counts must be between 1 and 8.\n"); + if(xComponents < 1 || xComponents > 9 || yComponents < 1 || yComponents > 9) { + fprintf(stderr, "Component counts must be between 1 and 9.\n"); return 1; }