From 3c74201d2f4952048add84a416f355aa3c08c9ae Mon Sep 17 00:00:00 2001 From: jhen Date: Tue, 14 Nov 2023 07:58:22 +0800 Subject: [PATCH] fix(cpp): rename quantize / dequantize to avoid redefinition with llama.rn --- cpp/ggml-quants.c | 116 +++++++++++++++++------------------ cpp/ggml-quants.h | 80 ++++++++++++------------ cpp/ggml.c | 142 +++++++++++++++++++++---------------------- cpp/ggml.h | 28 ++++----- scripts/bootstrap.sh | 4 ++ 5 files changed, 187 insertions(+), 183 deletions(-) diff --git a/cpp/ggml-quants.c b/cpp/ggml-quants.c index 7603f86..32f4170 100644 --- a/cpp/ggml-quants.c +++ b/cpp/ggml-quants.c @@ -425,7 +425,7 @@ static const uint64_t table_b2b_1[1 << 8] = { B8(10, 00) }; // (!b) << 4 #endif // reference implementation for deterministic creation of model files -void quantize_row_q4_0_reference(const float * restrict x, block_q4_0 * restrict y, int k) { +void wsp_quantize_row_q4_0_reference(const float * restrict x, block_q4_0 * restrict y, int k) { static const int qk = QK4_0; assert(k % qk == 0); @@ -462,11 +462,11 @@ void quantize_row_q4_0_reference(const float * restrict x, block_q4_0 * restrict } } -void quantize_row_q4_0(const float * restrict x, void * restrict y, int k) { - quantize_row_q4_0_reference(x, y, k); +void wsp_quantize_row_q4_0(const float * restrict x, void * restrict y, int k) { + wsp_quantize_row_q4_0_reference(x, y, k); } -void quantize_row_q4_1_reference(const float * restrict x, block_q4_1 * restrict y, int k) { +void wsp_quantize_row_q4_1_reference(const float * restrict x, block_q4_1 * restrict y, int k) { const int qk = QK4_1; assert(k % qk == 0); @@ -503,11 +503,11 @@ void quantize_row_q4_1_reference(const float * restrict x, block_q4_1 * restrict } } -void quantize_row_q4_1(const float * restrict x, void * restrict y, int k) { - quantize_row_q4_1_reference(x, y, k); +void wsp_quantize_row_q4_1(const float * restrict x, void * restrict y, int k) { + wsp_quantize_row_q4_1_reference(x, y, k); } -void quantize_row_q5_0_reference(const float * restrict x, block_q5_0 * restrict y, int k) { +void wsp_quantize_row_q5_0_reference(const float * restrict x, block_q5_0 * restrict y, int k) { static const int qk = QK5_0; assert(k % qk == 0); @@ -551,11 +551,11 @@ void quantize_row_q5_0_reference(const float * restrict x, block_q5_0 * restrict } } -void quantize_row_q5_0(const float * restrict x, void * restrict y, int k) { - quantize_row_q5_0_reference(x, y, k); +void wsp_quantize_row_q5_0(const float * restrict x, void * restrict y, int k) { + wsp_quantize_row_q5_0_reference(x, y, k); } -void quantize_row_q5_1_reference(const float * restrict x, block_q5_1 * restrict y, int k) { +void wsp_quantize_row_q5_1_reference(const float * restrict x, block_q5_1 * restrict y, int k) { const int qk = QK5_1; assert(k % qk == 0); @@ -599,12 +599,12 @@ void quantize_row_q5_1_reference(const float * restrict x, block_q5_1 * restrict } } -void quantize_row_q5_1(const float * restrict x, void * restrict y, int k) { - quantize_row_q5_1_reference(x, y, k); +void wsp_quantize_row_q5_1(const float * restrict x, void * restrict y, int k) { + wsp_quantize_row_q5_1_reference(x, y, k); } // reference implementation for deterministic creation of model files -void quantize_row_q8_0_reference(const float * restrict x, block_q8_0 * restrict y, int k) { +void wsp_quantize_row_q8_0_reference(const float * restrict x, block_q8_0 * restrict y, int k) { assert(k % QK8_0 == 0); const int nb = k / QK8_0; @@ -629,7 +629,7 @@ void quantize_row_q8_0_reference(const float * restrict x, block_q8_0 * restrict } } -void quantize_row_q8_0(const float * restrict x, void * restrict vy, int k) { +void wsp_quantize_row_q8_0(const float * restrict x, void * restrict vy, int k) { assert(QK8_0 == 32); assert(k % QK8_0 == 0); const int nb = k / QK8_0; @@ -813,12 +813,12 @@ void quantize_row_q8_0(const float * restrict x, void * restrict vy, int k) { #else WSP_GGML_UNUSED(nb); // scalar - quantize_row_q8_0_reference(x, y, k); + wsp_quantize_row_q8_0_reference(x, y, k); #endif } // reference implementation for deterministic creation of model files -void quantize_row_q8_1_reference(const float * restrict x, block_q8_1 * restrict y, int k) { +void wsp_quantize_row_q8_1_reference(const float * restrict x, block_q8_1 * restrict y, int k) { assert(QK8_1 == 32); assert(k % QK8_1 == 0); const int nb = k / QK8_1; @@ -853,7 +853,7 @@ void quantize_row_q8_1_reference(const float * restrict x, block_q8_1 * restrict } } -void quantize_row_q8_1(const float * restrict x, void * restrict vy, int k) { +void wsp_quantize_row_q8_1(const float * restrict x, void * restrict vy, int k) { assert(k % QK8_1 == 0); const int nb = k / QK8_1; @@ -1067,11 +1067,11 @@ void quantize_row_q8_1(const float * restrict x, void * restrict vy, int k) { #else WSP_GGML_UNUSED(nb); // scalar - quantize_row_q8_1_reference(x, y, k); + wsp_quantize_row_q8_1_reference(x, y, k); #endif } -void dequantize_row_q4_0(const block_q4_0 * restrict x, float * restrict y, int k) { +void wsp_dewsp_quantize_row_q4_0(const block_q4_0 * restrict x, float * restrict y, int k) { static const int qk = QK4_0; assert(k % qk == 0); @@ -1091,7 +1091,7 @@ void dequantize_row_q4_0(const block_q4_0 * restrict x, float * restrict y, int } } -void dequantize_row_q4_1(const block_q4_1 * restrict x, float * restrict y, int k) { +void wsp_dewsp_quantize_row_q4_1(const block_q4_1 * restrict x, float * restrict y, int k) { static const int qk = QK4_1; assert(k % qk == 0); @@ -1112,7 +1112,7 @@ void dequantize_row_q4_1(const block_q4_1 * restrict x, float * restrict y, int } } -void dequantize_row_q5_0(const block_q5_0 * restrict x, float * restrict y, int k) { +void wsp_dewsp_quantize_row_q5_0(const block_q5_0 * restrict x, float * restrict y, int k) { static const int qk = QK5_0; assert(k % qk == 0); @@ -1138,7 +1138,7 @@ void dequantize_row_q5_0(const block_q5_0 * restrict x, float * restrict y, int } } -void dequantize_row_q5_1(const block_q5_1 * restrict x, float * restrict y, int k) { +void wsp_dewsp_quantize_row_q5_1(const block_q5_1 * restrict x, float * restrict y, int k) { static const int qk = QK5_1; assert(k % qk == 0); @@ -1165,7 +1165,7 @@ void dequantize_row_q5_1(const block_q5_1 * restrict x, float * restrict y, int } } -void dequantize_row_q8_0(const block_q8_0 * restrict x, float * restrict y, int k) { +void wsp_dewsp_quantize_row_q8_0(const block_q8_0 * restrict x, float * restrict y, int k) { static const int qk = QK8_0; assert(k % qk == 0); @@ -1450,7 +1450,7 @@ static inline void get_scale_min_k4(int j, const uint8_t * restrict q, uint8_t * //========================- 2-bit (de)-quantization -void quantize_row_q2_K_reference(const float * restrict x, block_q2_K * restrict y, int k) { +void wsp_quantize_row_q2_K_reference(const float * restrict x, block_q2_K * restrict y, int k) { assert(k % QK_K == 0); const int nb = k / QK_K; @@ -1527,7 +1527,7 @@ void quantize_row_q2_K_reference(const float * restrict x, block_q2_K * restrict } } -void dequantize_row_q2_K(const block_q2_K * restrict x, float * restrict y, int k) { +void wsp_dewsp_quantize_row_q2_K(const block_q2_K * restrict x, float * restrict y, int k) { assert(k % QK_K == 0); const int nb = k / QK_K; @@ -1573,23 +1573,23 @@ void dequantize_row_q2_K(const block_q2_K * restrict x, float * restrict y, int } } -void quantize_row_q2_K(const float * restrict x, void * restrict vy, int k) { - quantize_row_q2_K_reference(x, vy, k); +void wsp_quantize_row_q2_K(const float * restrict x, void * restrict vy, int k) { + wsp_quantize_row_q2_K_reference(x, vy, k); } -size_t wsp_ggml_quantize_q2_K(const float * restrict src, void * restrict dst, int n, int k, int64_t * restrict hist) { +size_t wsp_ggml_wsp_quantize_q2_K(const float * restrict src, void * restrict dst, int n, int k, int64_t * restrict hist) { (void)hist; // TODO: collect histograms for (int j = 0; j < n; j += k) { block_q2_K * restrict y = (block_q2_K *)dst + j/QK_K; - quantize_row_q2_K_reference(src + j, y, k); + wsp_quantize_row_q2_K_reference(src + j, y, k); } return (n/QK_K*sizeof(block_q2_K)); } //========================= 3-bit (de)-quantization -void quantize_row_q3_K_reference(const float * restrict x, block_q3_K * restrict y, int k) { +void wsp_quantize_row_q3_K_reference(const float * restrict x, block_q3_K * restrict y, int k) { assert(k % QK_K == 0); const int nb = k / QK_K; @@ -1703,7 +1703,7 @@ void quantize_row_q3_K_reference(const float * restrict x, block_q3_K * restrict } #if QK_K == 256 -void dequantize_row_q3_K(const block_q3_K * restrict x, float * restrict y, int k) { +void wsp_dewsp_quantize_row_q3_K(const block_q3_K * restrict x, float * restrict y, int k) { assert(k % QK_K == 0); const int nb = k / QK_K; @@ -1753,7 +1753,7 @@ void dequantize_row_q3_K(const block_q3_K * restrict x, float * restrict y, int } } #else -void dequantize_row_q3_K(const block_q3_K * restrict x, float * restrict y, int k) { +void wsp_dewsp_quantize_row_q3_K(const block_q3_K * restrict x, float * restrict y, int k) { assert(k % QK_K == 0); assert(QK_K == 64); const int nb = k / QK_K; @@ -1786,23 +1786,23 @@ void dequantize_row_q3_K(const block_q3_K * restrict x, float * restrict y, int } #endif -void quantize_row_q3_K(const float * restrict x, void * restrict vy, int k) { - quantize_row_q3_K_reference(x, vy, k); +void wsp_quantize_row_q3_K(const float * restrict x, void * restrict vy, int k) { + wsp_quantize_row_q3_K_reference(x, vy, k); } -size_t wsp_ggml_quantize_q3_K(const float * restrict src, void * restrict dst, int n, int k, int64_t * restrict hist) { +size_t wsp_ggml_wsp_quantize_q3_K(const float * restrict src, void * restrict dst, int n, int k, int64_t * restrict hist) { (void)hist; // TODO: collect histograms for (int j = 0; j < n; j += k) { block_q3_K * restrict y = (block_q3_K *)dst + j/QK_K; - quantize_row_q3_K_reference(src + j, y, k); + wsp_quantize_row_q3_K_reference(src + j, y, k); } return (n/QK_K*sizeof(block_q3_K)); } // ====================== 4-bit (de)-quantization -void quantize_row_q4_K_reference(const float * restrict x, block_q4_K * restrict y, int k) { +void wsp_quantize_row_q4_K_reference(const float * restrict x, block_q4_K * restrict y, int k) { assert(k % QK_K == 0); const int nb = k / QK_K; @@ -1909,7 +1909,7 @@ void quantize_row_q4_K_reference(const float * restrict x, block_q4_K * restrict } } -void dequantize_row_q4_K(const block_q4_K * restrict x, float * restrict y, int k) { +void wsp_dewsp_quantize_row_q4_K(const block_q4_K * restrict x, float * restrict y, int k) { assert(k % QK_K == 0); const int nb = k / QK_K; @@ -1948,26 +1948,26 @@ void dequantize_row_q4_K(const block_q4_K * restrict x, float * restrict y, int } } -void quantize_row_q4_K(const float * restrict x, void * restrict vy, int k) { +void wsp_quantize_row_q4_K(const float * restrict x, void * restrict vy, int k) { assert(k % QK_K == 0); block_q4_K * restrict y = vy; - quantize_row_q4_K_reference(x, y, k); + wsp_quantize_row_q4_K_reference(x, y, k); } -size_t wsp_ggml_quantize_q4_K(const float * restrict src, void * restrict dst, int n, int k, int64_t * restrict hist) { +size_t wsp_ggml_wsp_quantize_q4_K(const float * restrict src, void * restrict dst, int n, int k, int64_t * restrict hist) { assert(k % QK_K == 0); (void)hist; // TODO: collect histograms for (int j = 0; j < n; j += k) { block_q4_K * restrict y = (block_q4_K *)dst + j/QK_K; - quantize_row_q4_K_reference(src + j, y, k); + wsp_quantize_row_q4_K_reference(src + j, y, k); } return (n/QK_K*sizeof(block_q4_K)); } // ====================== 5-bit (de)-quantization -void quantize_row_q5_K_reference(const float * restrict x, block_q5_K * restrict y, int k) { +void wsp_quantize_row_q5_K_reference(const float * restrict x, block_q5_K * restrict y, int k) { assert(k % QK_K == 0); const int nb = k / QK_K; @@ -2109,7 +2109,7 @@ void quantize_row_q5_K_reference(const float * restrict x, block_q5_K * restrict } } -void dequantize_row_q5_K(const block_q5_K * restrict x, float * restrict y, int k) { +void wsp_dewsp_quantize_row_q5_K(const block_q5_K * restrict x, float * restrict y, int k) { assert(k % QK_K == 0); const int nb = k / QK_K; @@ -2154,26 +2154,26 @@ void dequantize_row_q5_K(const block_q5_K * restrict x, float * restrict y, int } } -void quantize_row_q5_K(const float * restrict x, void * restrict vy, int k) { +void wsp_quantize_row_q5_K(const float * restrict x, void * restrict vy, int k) { assert(k % QK_K == 0); block_q5_K * restrict y = vy; - quantize_row_q5_K_reference(x, y, k); + wsp_quantize_row_q5_K_reference(x, y, k); } -size_t wsp_ggml_quantize_q5_K(const float * restrict src, void * restrict dst, int n, int k, int64_t * restrict hist) { +size_t wsp_ggml_wsp_quantize_q5_K(const float * restrict src, void * restrict dst, int n, int k, int64_t * restrict hist) { assert(k % QK_K == 0); (void)hist; // TODO: collect histograms for (int j = 0; j < n; j += k) { block_q5_K * restrict y = (block_q5_K *)dst + j/QK_K; - quantize_row_q5_K_reference(src + j, y, k); + wsp_quantize_row_q5_K_reference(src + j, y, k); } return (n/QK_K*sizeof(block_q5_K)); } // ====================== 6-bit (de)-quantization -void quantize_row_q6_K_reference(const float * restrict x, block_q6_K * restrict y, int k) { +void wsp_quantize_row_q6_K_reference(const float * restrict x, block_q6_K * restrict y, int k) { assert(k % QK_K == 0); const int nb = k / QK_K; @@ -2255,7 +2255,7 @@ void quantize_row_q6_K_reference(const float * restrict x, block_q6_K * restrict } } -void dequantize_row_q6_K(const block_q6_K * restrict x, float * restrict y, int k) { +void wsp_dewsp_quantize_row_q6_K(const block_q6_K * restrict x, float * restrict y, int k) { assert(k % QK_K == 0); const int nb = k / QK_K; @@ -2302,26 +2302,26 @@ void dequantize_row_q6_K(const block_q6_K * restrict x, float * restrict y, int } } -void quantize_row_q6_K(const float * restrict x, void * restrict vy, int k) { +void wsp_quantize_row_q6_K(const float * restrict x, void * restrict vy, int k) { assert(k % QK_K == 0); block_q6_K * restrict y = vy; - quantize_row_q6_K_reference(x, y, k); + wsp_quantize_row_q6_K_reference(x, y, k); } -size_t wsp_ggml_quantize_q6_K(const float * src, void * dst, int n, int k, int64_t * hist) { +size_t wsp_ggml_wsp_quantize_q6_K(const float * src, void * dst, int n, int k, int64_t * hist) { assert(k % QK_K == 0); (void)hist; // TODO: collect histograms for (int j = 0; j < n; j += k) { block_q6_K * restrict y = (block_q6_K *)dst + j/QK_K; - quantize_row_q6_K_reference(src + j, y, k); + wsp_quantize_row_q6_K_reference(src + j, y, k); } return (n/QK_K*sizeof(block_q6_K)); } //===================================== Q8_K ============================================== -void quantize_row_q8_K_reference(const float * restrict x, block_q8_K * restrict y, int k) { +void wsp_quantize_row_q8_K_reference(const float * restrict x, block_q8_K * restrict y, int k) { assert(k % QK_K == 0); const int nb = k / QK_K; @@ -2358,7 +2358,7 @@ void quantize_row_q8_K_reference(const float * restrict x, block_q8_K * restrict } } -void dequantize_row_q8_K(const block_q8_K * restrict x, float * restrict y, int k) { +void wsp_dewsp_quantize_row_q8_K(const block_q8_K * restrict x, float * restrict y, int k) { assert(k % QK_K == 0); const int nb = k / QK_K; @@ -2369,8 +2369,8 @@ void dequantize_row_q8_K(const block_q8_K * restrict x, float * restrict y, int } } -void quantize_row_q8_K(const float * restrict x, void * restrict y, int k) { - quantize_row_q8_K_reference(x, y, k); +void wsp_quantize_row_q8_K(const float * restrict x, void * restrict y, int k) { + wsp_quantize_row_q8_K_reference(x, y, k); } //===================================== Dot ptoducts ================================= diff --git a/cpp/ggml-quants.h b/cpp/ggml-quants.h index 603f973..b0e60cf 100644 --- a/cpp/ggml-quants.h +++ b/cpp/ggml-quants.h @@ -167,48 +167,48 @@ static_assert(sizeof(block_q8_K) == sizeof(float) + QK_K + QK_K/16*sizeof(int16_ // Quantization -void quantize_row_q4_0_reference(const float * restrict x, block_q4_0 * restrict y, int k); -void quantize_row_q4_1_reference(const float * restrict x, block_q4_1 * restrict y, int k); -void quantize_row_q5_0_reference(const float * restrict x, block_q5_0 * restrict y, int k); -void quantize_row_q5_1_reference(const float * restrict x, block_q5_1 * restrict y, int k); -void quantize_row_q8_0_reference(const float * restrict x, block_q8_0 * restrict y, int k); -void quantize_row_q8_1_reference(const float * restrict x, block_q8_1 * restrict y, int k); - -void quantize_row_q2_K_reference(const float * restrict x, block_q2_K * restrict y, int k); -void quantize_row_q3_K_reference(const float * restrict x, block_q3_K * restrict y, int k); -void quantize_row_q4_K_reference(const float * restrict x, block_q4_K * restrict y, int k); -void quantize_row_q5_K_reference(const float * restrict x, block_q5_K * restrict y, int k); -void quantize_row_q6_K_reference(const float * restrict x, block_q6_K * restrict y, int k); -void quantize_row_q8_K_reference(const float * restrict x, block_q8_K * restrict y, int k); - -void quantize_row_q4_0(const float * restrict x, void * restrict y, int k); -void quantize_row_q4_1(const float * restrict x, void * restrict y, int k); -void quantize_row_q5_0(const float * restrict x, void * restrict y, int k); -void quantize_row_q5_1(const float * restrict x, void * restrict y, int k); -void quantize_row_q8_0(const float * restrict x, void * restrict y, int k); -void quantize_row_q8_1(const float * restrict x, void * restrict y, int k); - -void quantize_row_q2_K(const float * restrict x, void * restrict y, int k); -void quantize_row_q3_K(const float * restrict x, void * restrict y, int k); -void quantize_row_q4_K(const float * restrict x, void * restrict y, int k); -void quantize_row_q5_K(const float * restrict x, void * restrict y, int k); -void quantize_row_q6_K(const float * restrict x, void * restrict y, int k); -void quantize_row_q8_K(const float * restrict x, void * restrict y, int k); +void wsp_quantize_row_q4_0_reference(const float * restrict x, block_q4_0 * restrict y, int k); +void wsp_quantize_row_q4_1_reference(const float * restrict x, block_q4_1 * restrict y, int k); +void wsp_quantize_row_q5_0_reference(const float * restrict x, block_q5_0 * restrict y, int k); +void wsp_quantize_row_q5_1_reference(const float * restrict x, block_q5_1 * restrict y, int k); +void wsp_quantize_row_q8_0_reference(const float * restrict x, block_q8_0 * restrict y, int k); +void wsp_quantize_row_q8_1_reference(const float * restrict x, block_q8_1 * restrict y, int k); + +void wsp_quantize_row_q2_K_reference(const float * restrict x, block_q2_K * restrict y, int k); +void wsp_quantize_row_q3_K_reference(const float * restrict x, block_q3_K * restrict y, int k); +void wsp_quantize_row_q4_K_reference(const float * restrict x, block_q4_K * restrict y, int k); +void wsp_quantize_row_q5_K_reference(const float * restrict x, block_q5_K * restrict y, int k); +void wsp_quantize_row_q6_K_reference(const float * restrict x, block_q6_K * restrict y, int k); +void wsp_quantize_row_q8_K_reference(const float * restrict x, block_q8_K * restrict y, int k); + +void wsp_quantize_row_q4_0(const float * restrict x, void * restrict y, int k); +void wsp_quantize_row_q4_1(const float * restrict x, void * restrict y, int k); +void wsp_quantize_row_q5_0(const float * restrict x, void * restrict y, int k); +void wsp_quantize_row_q5_1(const float * restrict x, void * restrict y, int k); +void wsp_quantize_row_q8_0(const float * restrict x, void * restrict y, int k); +void wsp_quantize_row_q8_1(const float * restrict x, void * restrict y, int k); + +void wsp_quantize_row_q2_K(const float * restrict x, void * restrict y, int k); +void wsp_quantize_row_q3_K(const float * restrict x, void * restrict y, int k); +void wsp_quantize_row_q4_K(const float * restrict x, void * restrict y, int k); +void wsp_quantize_row_q5_K(const float * restrict x, void * restrict y, int k); +void wsp_quantize_row_q6_K(const float * restrict x, void * restrict y, int k); +void wsp_quantize_row_q8_K(const float * restrict x, void * restrict y, int k); // Dequantization -void dequantize_row_q4_0(const block_q4_0 * restrict x, float * restrict y, int k); -void dequantize_row_q4_1(const block_q4_1 * restrict x, float * restrict y, int k); -void dequantize_row_q5_0(const block_q5_0 * restrict x, float * restrict y, int k); -void dequantize_row_q5_1(const block_q5_1 * restrict x, float * restrict y, int k); -void dequantize_row_q8_0(const block_q8_0 * restrict x, float * restrict y, int k); -//void dequantize_row_q8_1(const block_q8_1 * restrict x, float * restrict y, int k); - -void dequantize_row_q2_K(const block_q2_K * restrict x, float * restrict y, int k); -void dequantize_row_q3_K(const block_q3_K * restrict x, float * restrict y, int k); -void dequantize_row_q4_K(const block_q4_K * restrict x, float * restrict y, int k); -void dequantize_row_q5_K(const block_q5_K * restrict x, float * restrict y, int k); -void dequantize_row_q6_K(const block_q6_K * restrict x, float * restrict y, int k); -void dequantize_row_q8_K(const block_q8_K * restrict x, float * restrict y, int k); +void wsp_dewsp_quantize_row_q4_0(const block_q4_0 * restrict x, float * restrict y, int k); +void wsp_dewsp_quantize_row_q4_1(const block_q4_1 * restrict x, float * restrict y, int k); +void wsp_dewsp_quantize_row_q5_0(const block_q5_0 * restrict x, float * restrict y, int k); +void wsp_dewsp_quantize_row_q5_1(const block_q5_1 * restrict x, float * restrict y, int k); +void wsp_dewsp_quantize_row_q8_0(const block_q8_0 * restrict x, float * restrict y, int k); +//void wsp_dewsp_quantize_row_q8_1(const block_q8_1 * restrict x, float * restrict y, int k); + +void wsp_dewsp_quantize_row_q2_K(const block_q2_K * restrict x, float * restrict y, int k); +void wsp_dewsp_quantize_row_q3_K(const block_q3_K * restrict x, float * restrict y, int k); +void wsp_dewsp_quantize_row_q4_K(const block_q4_K * restrict x, float * restrict y, int k); +void wsp_dewsp_quantize_row_q5_K(const block_q5_K * restrict x, float * restrict y, int k); +void wsp_dewsp_quantize_row_q6_K(const block_q6_K * restrict x, float * restrict y, int k); +void wsp_dewsp_quantize_row_q8_K(const block_q8_K * restrict x, float * restrict y, int k); // Dot product void wsp_ggml_vec_dot_q4_0_q8_0(int n, float * restrict s, const void * restrict vx, const void * restrict vy); diff --git a/cpp/ggml.c b/cpp/ggml.c index 9c079d8..ee1d795 100644 --- a/cpp/ggml.c +++ b/cpp/ggml.c @@ -455,9 +455,9 @@ static const wsp_ggml_type_traits_t type_traits[WSP_GGML_TYPE_COUNT] = { .blck_size = QK4_0, .type_size = sizeof(block_q4_0), .is_quantized = true, - .to_float = (wsp_ggml_to_float_t) dequantize_row_q4_0, - .from_float = quantize_row_q4_0, - .from_float_reference = (wsp_ggml_from_float_t) quantize_row_q4_0_reference, + .to_float = (wsp_ggml_to_float_t) wsp_dewsp_quantize_row_q4_0, + .from_float = wsp_quantize_row_q4_0, + .from_float_reference = (wsp_ggml_from_float_t) wsp_quantize_row_q4_0_reference, .vec_dot = wsp_ggml_vec_dot_q4_0_q8_0, .vec_dot_type = WSP_GGML_TYPE_Q8_0, }, @@ -466,9 +466,9 @@ static const wsp_ggml_type_traits_t type_traits[WSP_GGML_TYPE_COUNT] = { .blck_size = QK4_1, .type_size = sizeof(block_q4_1), .is_quantized = true, - .to_float = (wsp_ggml_to_float_t) dequantize_row_q4_1, - .from_float = quantize_row_q4_1, - .from_float_reference = (wsp_ggml_from_float_t) quantize_row_q4_1_reference, + .to_float = (wsp_ggml_to_float_t) wsp_dewsp_quantize_row_q4_1, + .from_float = wsp_quantize_row_q4_1, + .from_float_reference = (wsp_ggml_from_float_t) wsp_quantize_row_q4_1_reference, .vec_dot = wsp_ggml_vec_dot_q4_1_q8_1, .vec_dot_type = WSP_GGML_TYPE_Q8_1, }, @@ -499,9 +499,9 @@ static const wsp_ggml_type_traits_t type_traits[WSP_GGML_TYPE_COUNT] = { .blck_size = QK5_0, .type_size = sizeof(block_q5_0), .is_quantized = true, - .to_float = (wsp_ggml_to_float_t) dequantize_row_q5_0, - .from_float = quantize_row_q5_0, - .from_float_reference = (wsp_ggml_from_float_t) quantize_row_q5_0_reference, + .to_float = (wsp_ggml_to_float_t) wsp_dewsp_quantize_row_q5_0, + .from_float = wsp_quantize_row_q5_0, + .from_float_reference = (wsp_ggml_from_float_t) wsp_quantize_row_q5_0_reference, .vec_dot = wsp_ggml_vec_dot_q5_0_q8_0, .vec_dot_type = WSP_GGML_TYPE_Q8_0, }, @@ -510,9 +510,9 @@ static const wsp_ggml_type_traits_t type_traits[WSP_GGML_TYPE_COUNT] = { .blck_size = QK5_1, .type_size = sizeof(block_q5_1), .is_quantized = true, - .to_float = (wsp_ggml_to_float_t) dequantize_row_q5_1, - .from_float = quantize_row_q5_1, - .from_float_reference = (wsp_ggml_from_float_t) quantize_row_q5_1_reference, + .to_float = (wsp_ggml_to_float_t) wsp_dewsp_quantize_row_q5_1, + .from_float = wsp_quantize_row_q5_1, + .from_float_reference = (wsp_ggml_from_float_t) wsp_quantize_row_q5_1_reference, .vec_dot = wsp_ggml_vec_dot_q5_1_q8_1, .vec_dot_type = WSP_GGML_TYPE_Q8_1, }, @@ -521,9 +521,9 @@ static const wsp_ggml_type_traits_t type_traits[WSP_GGML_TYPE_COUNT] = { .blck_size = QK8_0, .type_size = sizeof(block_q8_0), .is_quantized = true, - .to_float = (wsp_ggml_to_float_t) dequantize_row_q8_0, - .from_float = quantize_row_q8_0, - .from_float_reference = (wsp_ggml_from_float_t) quantize_row_q8_0_reference, + .to_float = (wsp_ggml_to_float_t) wsp_dewsp_quantize_row_q8_0, + .from_float = wsp_quantize_row_q8_0, + .from_float_reference = (wsp_ggml_from_float_t) wsp_quantize_row_q8_0_reference, .vec_dot = wsp_ggml_vec_dot_q8_0_q8_0, .vec_dot_type = WSP_GGML_TYPE_Q8_0, }, @@ -532,8 +532,8 @@ static const wsp_ggml_type_traits_t type_traits[WSP_GGML_TYPE_COUNT] = { .blck_size = QK8_1, .type_size = sizeof(block_q8_1), .is_quantized = true, - .from_float = quantize_row_q8_1, - .from_float_reference = (wsp_ggml_from_float_t) quantize_row_q8_1_reference, + .from_float = wsp_quantize_row_q8_1, + .from_float_reference = (wsp_ggml_from_float_t) wsp_quantize_row_q8_1_reference, .vec_dot_type = WSP_GGML_TYPE_Q8_1, }, [WSP_GGML_TYPE_Q2_K] = { @@ -541,9 +541,9 @@ static const wsp_ggml_type_traits_t type_traits[WSP_GGML_TYPE_COUNT] = { .blck_size = QK_K, .type_size = sizeof(block_q2_K), .is_quantized = true, - .to_float = (wsp_ggml_to_float_t) dequantize_row_q2_K, - .from_float = quantize_row_q2_K, - .from_float_reference = (wsp_ggml_from_float_t) quantize_row_q2_K_reference, + .to_float = (wsp_ggml_to_float_t) wsp_dewsp_quantize_row_q2_K, + .from_float = wsp_quantize_row_q2_K, + .from_float_reference = (wsp_ggml_from_float_t) wsp_quantize_row_q2_K_reference, .vec_dot = wsp_ggml_vec_dot_q2_K_q8_K, .vec_dot_type = WSP_GGML_TYPE_Q8_K, }, @@ -552,9 +552,9 @@ static const wsp_ggml_type_traits_t type_traits[WSP_GGML_TYPE_COUNT] = { .blck_size = QK_K, .type_size = sizeof(block_q3_K), .is_quantized = true, - .to_float = (wsp_ggml_to_float_t) dequantize_row_q3_K, - .from_float = quantize_row_q3_K, - .from_float_reference = (wsp_ggml_from_float_t) quantize_row_q3_K_reference, + .to_float = (wsp_ggml_to_float_t) wsp_dewsp_quantize_row_q3_K, + .from_float = wsp_quantize_row_q3_K, + .from_float_reference = (wsp_ggml_from_float_t) wsp_quantize_row_q3_K_reference, .vec_dot = wsp_ggml_vec_dot_q3_K_q8_K, .vec_dot_type = WSP_GGML_TYPE_Q8_K, }, @@ -563,9 +563,9 @@ static const wsp_ggml_type_traits_t type_traits[WSP_GGML_TYPE_COUNT] = { .blck_size = QK_K, .type_size = sizeof(block_q4_K), .is_quantized = true, - .to_float = (wsp_ggml_to_float_t) dequantize_row_q4_K, - .from_float = quantize_row_q4_K, - .from_float_reference = (wsp_ggml_from_float_t) quantize_row_q4_K_reference, + .to_float = (wsp_ggml_to_float_t) wsp_dewsp_quantize_row_q4_K, + .from_float = wsp_quantize_row_q4_K, + .from_float_reference = (wsp_ggml_from_float_t) wsp_quantize_row_q4_K_reference, .vec_dot = wsp_ggml_vec_dot_q4_K_q8_K, .vec_dot_type = WSP_GGML_TYPE_Q8_K, }, @@ -574,9 +574,9 @@ static const wsp_ggml_type_traits_t type_traits[WSP_GGML_TYPE_COUNT] = { .blck_size = QK_K, .type_size = sizeof(block_q5_K), .is_quantized = true, - .to_float = (wsp_ggml_to_float_t) dequantize_row_q5_K, - .from_float = quantize_row_q5_K, - .from_float_reference = (wsp_ggml_from_float_t) quantize_row_q5_K_reference, + .to_float = (wsp_ggml_to_float_t) wsp_dewsp_quantize_row_q5_K, + .from_float = wsp_quantize_row_q5_K, + .from_float_reference = (wsp_ggml_from_float_t) wsp_quantize_row_q5_K_reference, .vec_dot = wsp_ggml_vec_dot_q5_K_q8_K, .vec_dot_type = WSP_GGML_TYPE_Q8_K, }, @@ -585,9 +585,9 @@ static const wsp_ggml_type_traits_t type_traits[WSP_GGML_TYPE_COUNT] = { .blck_size = QK_K, .type_size = sizeof(block_q6_K), .is_quantized = true, - .to_float = (wsp_ggml_to_float_t) dequantize_row_q6_K, - .from_float = quantize_row_q6_K, - .from_float_reference = (wsp_ggml_from_float_t) quantize_row_q6_K_reference, + .to_float = (wsp_ggml_to_float_t) wsp_dewsp_quantize_row_q6_K, + .from_float = wsp_quantize_row_q6_K, + .from_float_reference = (wsp_ggml_from_float_t) wsp_quantize_row_q6_K_reference, .vec_dot = wsp_ggml_vec_dot_q6_K_q8_K, .vec_dot_type = WSP_GGML_TYPE_Q8_K, }, @@ -596,7 +596,7 @@ static const wsp_ggml_type_traits_t type_traits[WSP_GGML_TYPE_COUNT] = { .blck_size = QK_K, .type_size = sizeof(block_q8_K), .is_quantized = true, - .from_float = quantize_row_q8_K, + .from_float = wsp_quantize_row_q8_K, } }; @@ -6472,7 +6472,7 @@ static void wsp_ggml_compute_forward_dup_f16( } } } else if (type_traits[dst->type].from_float) { - wsp_ggml_from_float_t const quantize_row_q = type_traits[dst->type].from_float; + wsp_ggml_from_float_t const wsp_quantize_row_q = type_traits[dst->type].from_float; float * src0_f32 = (float *) params->wdata + (ne00 + CACHE_LINE_SIZE_F32) * ith; size_t id = 0; @@ -6489,7 +6489,7 @@ static void wsp_ggml_compute_forward_dup_f16( src0_f32[i00] = WSP_GGML_FP16_TO_FP32(src0_ptr[i00]); } - quantize_row_q(src0_f32, dst_ptr + id, ne00); + wsp_quantize_row_q(src0_f32, dst_ptr + id, ne00); id += rs; } id += rs * (ne01 - ir1); @@ -6725,7 +6725,7 @@ static void wsp_ggml_compute_forward_dup_f32( } } } else if (type_traits[dst->type].from_float) { - wsp_ggml_from_float_t const quantize_row_q = type_traits[dst->type].from_float; + wsp_ggml_from_float_t const wsp_quantize_row_q = type_traits[dst->type].from_float; size_t id = 0; size_t rs = nb0 * (ne00 / wsp_ggml_blck_size(dst->type)); @@ -6736,7 +6736,7 @@ static void wsp_ggml_compute_forward_dup_f32( id += rs * ir0; for (int i01 = ir0; i01 < ir1; i01++) { const float * src0_ptr = (float *) ((char *) src0->data + i01*nb01 + i02*nb02 + i03*nb03); - quantize_row_q(src0_ptr, dst_ptr + id, ne00); + wsp_quantize_row_q(src0_ptr, dst_ptr + id, ne00); id += rs; } id += rs * (ne01 - ir1); @@ -7158,8 +7158,8 @@ static void wsp_ggml_compute_forward_add_q_f32( const enum wsp_ggml_type type = src0->type; const enum wsp_ggml_type dtype = dst->type; - wsp_ggml_to_float_t const dequantize_row_q = type_traits[type].to_float; - wsp_ggml_from_float_t const quantize_row_q = type_traits[dtype].from_float; + wsp_ggml_to_float_t const wsp_dewsp_quantize_row_q = type_traits[type].to_float; + wsp_ggml_from_float_t const wsp_quantize_row_q = type_traits[dtype].from_float; // we don't support permuted src0 or src1 WSP_GGML_ASSERT(nb00 == wsp_ggml_type_size(type)); @@ -7204,12 +7204,12 @@ static void wsp_ggml_compute_forward_add_q_f32( assert(ne00 % 32 == 0); // unquantize row from src0 to temp buffer - dequantize_row_q(src0_row, wdata, ne00); + wsp_dewsp_quantize_row_q(src0_row, wdata, ne00); // add src1 wsp_ggml_vec_acc_f32(ne00, wdata, src1_row); // quantize row to dst - if (quantize_row_q != NULL) { - quantize_row_q(wdata, dst_row, ne00); + if (wsp_quantize_row_q != NULL) { + wsp_quantize_row_q(wdata, dst_row, ne00); } else { memcpy(dst_row, wdata, ne0*nb0); } @@ -7435,8 +7435,8 @@ static void wsp_ggml_compute_forward_add1_q_f32( WSP_GGML_TENSOR_UNARY_OP_LOCALS const enum wsp_ggml_type type = src0->type; - wsp_ggml_to_float_t const dequantize_row_q = type_traits[type].to_float; - wsp_ggml_from_float_t const quantize_row_q = type_traits[type].from_float; + wsp_ggml_to_float_t const wsp_dewsp_quantize_row_q = type_traits[type].to_float; + wsp_ggml_from_float_t const wsp_quantize_row_q = type_traits[type].from_float; // we don't support permuted src0 WSP_GGML_ASSERT(nb00 == wsp_ggml_type_size(type)); @@ -7471,11 +7471,11 @@ static void wsp_ggml_compute_forward_add1_q_f32( assert(ne0 % 32 == 0); // unquantize row from src0 to temp buffer - dequantize_row_q(src0_row, wdata, ne0); + wsp_dewsp_quantize_row_q(src0_row, wdata, ne0); // add src1 wsp_ggml_vec_acc1_f32(ne0, wdata, v); // quantize row to dst - quantize_row_q(wdata, dst_row, ne0); + wsp_quantize_row_q(wdata, dst_row, ne0); } } @@ -9880,7 +9880,7 @@ static void wsp_ggml_compute_forward_out_prod_q_f32( const int nth = params->nth; const enum wsp_ggml_type type = src0->type; - wsp_ggml_to_float_t const dequantize_row_q = type_traits[type].to_float; + wsp_ggml_to_float_t const wsp_dewsp_quantize_row_q = type_traits[type].to_float; WSP_GGML_ASSERT(ne02 == ne12); WSP_GGML_ASSERT(ne03 == ne13); @@ -9957,7 +9957,7 @@ static void wsp_ggml_compute_forward_out_prod_q_f32( float * s1 = (float *) ((char *) src1->data + (i1*nb10 + i11*nb11 + i12*nb12 + i13*nb13)); float * d = (float *) ((char *) dst->data + ( i1*nb1 + i2*nb2 + i3*nb3)); - dequantize_row_q(s0, wdata, ne0); + wsp_dewsp_quantize_row_q(s0, wdata, ne0); wsp_ggml_vec_mad_f32(ne0, d, wdata, *s1); } } @@ -10251,7 +10251,7 @@ static void wsp_ggml_compute_forward_get_rows_q( const int nc = src0->ne[0]; const int nr = wsp_ggml_nelements(src1); const enum wsp_ggml_type type = src0->type; - wsp_ggml_to_float_t const dequantize_row_q = type_traits[type].to_float; + wsp_ggml_to_float_t const wsp_dewsp_quantize_row_q = type_traits[type].to_float; assert( dst->ne[0] == nc); assert( dst->ne[1] == nr); @@ -10260,7 +10260,7 @@ static void wsp_ggml_compute_forward_get_rows_q( for (int i = 0; i < nr; ++i) { const int r = ((int32_t *) src1->data)[i]; - dequantize_row_q( + wsp_dewsp_quantize_row_q( (const void *) ((char *) src0->data + r*src0->nb[1]), (float *) ((char *) dst->data + i*dst->nb[1]), nc); } @@ -18719,14 +18719,14 @@ enum wsp_ggml_opt_result wsp_ggml_opt_resume_g( //////////////////////////////////////////////////////////////////////////////// -size_t wsp_ggml_quantize_q4_0(const float * src, void * dst, int n, int k, int64_t * hist) { +size_t wsp_ggml_wsp_quantize_q4_0(const float * src, void * dst, int n, int k, int64_t * hist) { assert(k % QK4_0 == 0); const int nb = k / QK4_0; for (int b = 0; b < n; b += k) { block_q4_0 * restrict y = (block_q4_0 *) dst + b/QK4_0; - quantize_row_q4_0_reference(src + b, y, k); + wsp_quantize_row_q4_0_reference(src + b, y, k); for (int i = 0; i < nb; i++) { for (int j = 0; j < QK4_0; j += 2) { @@ -18742,14 +18742,14 @@ size_t wsp_ggml_quantize_q4_0(const float * src, void * dst, int n, int k, int64 return (n/QK4_0*sizeof(block_q4_0)); } -size_t wsp_ggml_quantize_q4_1(const float * src, void * dst, int n, int k, int64_t * hist) { +size_t wsp_ggml_wsp_quantize_q4_1(const float * src, void * dst, int n, int k, int64_t * hist) { assert(k % QK4_1 == 0); const int nb = k / QK4_1; for (int b = 0; b < n; b += k) { block_q4_1 * restrict y = (block_q4_1 *) dst + b/QK4_1; - quantize_row_q4_1_reference(src + b, y, k); + wsp_quantize_row_q4_1_reference(src + b, y, k); for (int i = 0; i < nb; i++) { for (int j = 0; j < QK4_1; j += 2) { @@ -18765,14 +18765,14 @@ size_t wsp_ggml_quantize_q4_1(const float * src, void * dst, int n, int k, int64 return (n/QK4_1*sizeof(block_q4_1)); } -size_t wsp_ggml_quantize_q5_0(const float * src, void * dst, int n, int k, int64_t * hist) { +size_t wsp_ggml_wsp_quantize_q5_0(const float * src, void * dst, int n, int k, int64_t * hist) { assert(k % QK5_0 == 0); const int nb = k / QK5_0; for (int b = 0; b < n; b += k) { block_q5_0 * restrict y = (block_q5_0 *)dst + b/QK5_0; - quantize_row_q5_0_reference(src + b, y, k); + wsp_quantize_row_q5_0_reference(src + b, y, k); for (int i = 0; i < nb; i++) { uint32_t qh; @@ -18795,14 +18795,14 @@ size_t wsp_ggml_quantize_q5_0(const float * src, void * dst, int n, int k, int64 return (n/QK5_0*sizeof(block_q5_0)); } -size_t wsp_ggml_quantize_q5_1(const float * src, void * dst, int n, int k, int64_t * hist) { +size_t wsp_ggml_wsp_quantize_q5_1(const float * src, void * dst, int n, int k, int64_t * hist) { assert(k % QK5_1 == 0); const int nb = k / QK5_1; for (int b = 0; b < n; b += k) { block_q5_1 * restrict y = (block_q5_1 *)dst + b/QK5_1; - quantize_row_q5_1_reference(src + b, y, k); + wsp_quantize_row_q5_1_reference(src + b, y, k); for (int i = 0; i < nb; i++) { uint32_t qh; @@ -18825,14 +18825,14 @@ size_t wsp_ggml_quantize_q5_1(const float * src, void * dst, int n, int k, int64 return (n/QK5_1*sizeof(block_q5_1)); } -size_t wsp_ggml_quantize_q8_0(const float * src, void * dst, int n, int k, int64_t * hist) { +size_t wsp_ggml_wsp_quantize_q8_0(const float * src, void * dst, int n, int k, int64_t * hist) { assert(k % QK8_0 == 0); const int nb = k / QK8_0; for (int b = 0; b < n; b += k) { block_q8_0 * restrict y = (block_q8_0 *)dst + b/QK8_0; - quantize_row_q8_0_reference(src + b, y, k); + wsp_quantize_row_q8_0_reference(src + b, y, k); for (int i = 0; i < nb; i++) { for (int j = 0; j < QK8_0; ++j) { @@ -18846,68 +18846,68 @@ size_t wsp_ggml_quantize_q8_0(const float * src, void * dst, int n, int k, int64 return (n/QK8_0*sizeof(block_q8_0)); } -size_t wsp_ggml_quantize_chunk(enum wsp_ggml_type type, const float * src, void * dst, int start, int n, int64_t * hist) { +size_t wsp_ggml_wsp_quantize_chunk(enum wsp_ggml_type type, const float * src, void * dst, int start, int n, int64_t * hist) { size_t result = 0; switch (type) { case WSP_GGML_TYPE_Q4_0: { WSP_GGML_ASSERT(start % QK4_0 == 0); block_q4_0 * block = (block_q4_0*)dst + start / QK4_0; - result = wsp_ggml_quantize_q4_0(src + start, block, n, n, hist); + result = wsp_ggml_wsp_quantize_q4_0(src + start, block, n, n, hist); } break; case WSP_GGML_TYPE_Q4_1: { WSP_GGML_ASSERT(start % QK4_1 == 0); block_q4_1 * block = (block_q4_1*)dst + start / QK4_1; - result = wsp_ggml_quantize_q4_1(src + start, block, n, n, hist); + result = wsp_ggml_wsp_quantize_q4_1(src + start, block, n, n, hist); } break; case WSP_GGML_TYPE_Q5_0: { WSP_GGML_ASSERT(start % QK5_0 == 0); block_q5_0 * block = (block_q5_0*)dst + start / QK5_0; - result = wsp_ggml_quantize_q5_0(src + start, block, n, n, hist); + result = wsp_ggml_wsp_quantize_q5_0(src + start, block, n, n, hist); } break; case WSP_GGML_TYPE_Q5_1: { WSP_GGML_ASSERT(start % QK5_1 == 0); block_q5_1 * block = (block_q5_1*)dst + start / QK5_1; - result = wsp_ggml_quantize_q5_1(src + start, block, n, n, hist); + result = wsp_ggml_wsp_quantize_q5_1(src + start, block, n, n, hist); } break; case WSP_GGML_TYPE_Q8_0: { WSP_GGML_ASSERT(start % QK8_0 == 0); block_q8_0 * block = (block_q8_0*)dst + start / QK8_0; - result = wsp_ggml_quantize_q8_0(src + start, block, n, n, hist); + result = wsp_ggml_wsp_quantize_q8_0(src + start, block, n, n, hist); } break; case WSP_GGML_TYPE_Q2_K: { WSP_GGML_ASSERT(start % QK_K == 0); block_q2_K * block = (block_q2_K*)dst + start / QK_K; - result = wsp_ggml_quantize_q2_K(src + start, block, n, n, hist); + result = wsp_ggml_wsp_quantize_q2_K(src + start, block, n, n, hist); } break; case WSP_GGML_TYPE_Q3_K: { WSP_GGML_ASSERT(start % QK_K == 0); block_q3_K * block = (block_q3_K*)dst + start / QK_K; - result = wsp_ggml_quantize_q3_K(src + start, block, n, n, hist); + result = wsp_ggml_wsp_quantize_q3_K(src + start, block, n, n, hist); } break; case WSP_GGML_TYPE_Q4_K: { WSP_GGML_ASSERT(start % QK_K == 0); block_q4_K * block = (block_q4_K*)dst + start / QK_K; - result = wsp_ggml_quantize_q4_K(src + start, block, n, n, hist); + result = wsp_ggml_wsp_quantize_q4_K(src + start, block, n, n, hist); } break; case WSP_GGML_TYPE_Q5_K: { WSP_GGML_ASSERT(start % QK_K == 0); block_q5_K * block = (block_q5_K*)dst + start / QK_K; - result = wsp_ggml_quantize_q5_K(src + start, block, n, n, hist); + result = wsp_ggml_wsp_quantize_q5_K(src + start, block, n, n, hist); } break; case WSP_GGML_TYPE_Q6_K: { WSP_GGML_ASSERT(start % QK_K == 0); block_q6_K * block = (block_q6_K*)dst + start / QK_K; - result = wsp_ggml_quantize_q6_K(src + start, block, n, n, hist); + result = wsp_ggml_wsp_quantize_q6_K(src + start, block, n, n, hist); } break; case WSP_GGML_TYPE_F16: { diff --git a/cpp/ggml.h b/cpp/ggml.h index bf1d729..36b0465 100644 --- a/cpp/ggml.h +++ b/cpp/ggml.h @@ -1955,20 +1955,20 @@ extern "C" { // quantization // - // TODO: these would probably get removed in favor of the more general wsp_ggml_quantize_chunk - WSP_GGML_API size_t wsp_ggml_quantize_q4_0(const float * src, void * dst, int n, int k, int64_t * hist); - WSP_GGML_API size_t wsp_ggml_quantize_q4_1(const float * src, void * dst, int n, int k, int64_t * hist); - WSP_GGML_API size_t wsp_ggml_quantize_q5_0(const float * src, void * dst, int n, int k, int64_t * hist); - WSP_GGML_API size_t wsp_ggml_quantize_q5_1(const float * src, void * dst, int n, int k, int64_t * hist); - WSP_GGML_API size_t wsp_ggml_quantize_q8_0(const float * src, void * dst, int n, int k, int64_t * hist); - - WSP_GGML_API size_t wsp_ggml_quantize_q2_K(const float * src, void * dst, int n, int k, int64_t * hist); - WSP_GGML_API size_t wsp_ggml_quantize_q3_K(const float * src, void * dst, int n, int k, int64_t * hist); - WSP_GGML_API size_t wsp_ggml_quantize_q4_K(const float * src, void * dst, int n, int k, int64_t * hist); - WSP_GGML_API size_t wsp_ggml_quantize_q5_K(const float * src, void * dst, int n, int k, int64_t * hist); - WSP_GGML_API size_t wsp_ggml_quantize_q6_K(const float * src, void * dst, int n, int k, int64_t * hist); - - WSP_GGML_API size_t wsp_ggml_quantize_chunk(enum wsp_ggml_type type, const float * src, void * dst, int start, int n, int64_t * hist); + // TODO: these would probably get removed in favor of the more general wsp_ggml_wsp_quantize_chunk + WSP_GGML_API size_t wsp_ggml_wsp_quantize_q4_0(const float * src, void * dst, int n, int k, int64_t * hist); + WSP_GGML_API size_t wsp_ggml_wsp_quantize_q4_1(const float * src, void * dst, int n, int k, int64_t * hist); + WSP_GGML_API size_t wsp_ggml_wsp_quantize_q5_0(const float * src, void * dst, int n, int k, int64_t * hist); + WSP_GGML_API size_t wsp_ggml_wsp_quantize_q5_1(const float * src, void * dst, int n, int k, int64_t * hist); + WSP_GGML_API size_t wsp_ggml_wsp_quantize_q8_0(const float * src, void * dst, int n, int k, int64_t * hist); + + WSP_GGML_API size_t wsp_ggml_wsp_quantize_q2_K(const float * src, void * dst, int n, int k, int64_t * hist); + WSP_GGML_API size_t wsp_ggml_wsp_quantize_q3_K(const float * src, void * dst, int n, int k, int64_t * hist); + WSP_GGML_API size_t wsp_ggml_wsp_quantize_q4_K(const float * src, void * dst, int n, int k, int64_t * hist); + WSP_GGML_API size_t wsp_ggml_wsp_quantize_q5_K(const float * src, void * dst, int n, int k, int64_t * hist); + WSP_GGML_API size_t wsp_ggml_wsp_quantize_q6_K(const float * src, void * dst, int n, int k, int64_t * hist); + + WSP_GGML_API size_t wsp_ggml_wsp_quantize_chunk(enum wsp_ggml_type type, const float * src, void * dst, int start, int n, int64_t * hist); // // gguf diff --git a/scripts/bootstrap.sh b/scripts/bootstrap.sh index 4e80b62..db1add9 100755 --- a/scripts/bootstrap.sh +++ b/scripts/bootstrap.sh @@ -50,12 +50,16 @@ for file in "${files[@]}"; do sed -i '' 's/GGUF_/WSP_GGUF_/g' $file sed -i '' 's/gguf_/wsp_gguf_/g' $file sed -i '' 's/GGMLMetalClass/WSPGGMLMetalClass/g' $file + sed -i '' 's/dequantize_/wsp_dequantize_/g' $file + sed -i '' 's/quantize_/wsp_quantize_/g' $file else sed -i 's/GGML_/WSP_GGML_/g' $file sed -i 's/ggml_/wsp_ggml_/g' $file sed -i 's/GGUF_/WSP_GGUF_/g' $file sed -i 's/gguf_/wsp_gguf_/g' $file sed -i 's/GGMLMetalClass/WSPGGMLMetalClass/g' $file + sed -i 's/dequantize_/wsp_dequantize_/g' $file + sed -i 's/quantize_/wsp_quantize_/g' $file fi done