lint

li-plus · Jun 20, 2024 · ffa0d77 · ffa0d77
1 parent 5825cc2
commit ffa0d77
Show file tree

Hide file tree

Showing 4 changed files with 25 additions and 25 deletions.
diff --git a/chatglm.cpp b/chatglm.cpp
@@ -1,11 +1,11 @@
 #include "chatglm.h"
-#include <ggml-quants.h>
 #include <algorithm>
 #include <codecvt>
 #include <cstring>
 #include <fcntl.h>
 #include <fstream>
 #include <functional>
+#include <ggml-quants.h>
 #include <google/protobuf/stubs/strutil.h>
 #include <iomanip>
 #include <iostream>
@@ -67,7 +67,7 @@ static std::string strides_to_string(ggml_tensor *tensor) {
 
 std::string to_string(ggml_tensor *tensor, bool with_data) {
     std::vector<char> buf(ggml_nbytes(tensor));
-    if (tensor->buffer ) {
+    if (tensor->buffer) {
         ggml_backend_tensor_get(tensor, buf.data(), 0, buf.size());
     } else {
         memcpy(buf.data(), tensor->data, buf.size());
@@ -80,25 +80,25 @@ std::string to_string(ggml_tensor *tensor, bool with_data) {
         memcpy(float_buf.data(), buf.data(), buf.size());
         break;
     case GGML_TYPE_F16:
-        ggml_fp16_to_fp32_row((const ggml_fp16_t*)buf.data(), float_buf.data(), ggml_nelements(tensor));
+        ggml_fp16_to_fp32_row((ggml_fp16_t *)buf.data(), float_buf.data(), ggml_nelements(tensor));
         break;
     case GGML_TYPE_Q4_0:
-        dequantize_row_q4_0((block_q4_0*)buf.data(), float_buf.data(), ggml_nelements(tensor));
+        dequantize_row_q4_0((block_q4_0 *)buf.data(), float_buf.data(), ggml_nelements(tensor));
         break;
     case GGML_TYPE_Q4_1:
-        dequantize_row_q4_1((block_q4_1*)buf.data(), float_buf.data(), ggml_nelements(tensor));
+        dequantize_row_q4_1((block_q4_1 *)buf.data(), float_buf.data(), ggml_nelements(tensor));
         break;
     case GGML_TYPE_Q5_0:
-        dequantize_row_q5_0((block_q5_0*)buf.data(), float_buf.data(), ggml_nelements(tensor));
+        dequantize_row_q5_0((block_q5_0 *)buf.data(), float_buf.data(), ggml_nelements(tensor));
         break;
     case GGML_TYPE_Q5_1:
-        dequantize_row_q5_1((block_q5_1*)buf.data(), float_buf.data(), ggml_nelements(tensor));
+        dequantize_row_q5_1((block_q5_1 *)buf.data(), float_buf.data(), ggml_nelements(tensor));
         break;
     case GGML_TYPE_Q8_0:
-        dequantize_row_q8_0((block_q8_0*)buf.data(), float_buf.data(), ggml_nelements(tensor));
+        dequantize_row_q8_0((block_q8_0 *)buf.data(), float_buf.data(), ggml_nelements(tensor));
         break;
     default:
-        CHATGLM_THROW <<  "Unsupported dtype " << tensor->type;
+        CHATGLM_THROW << "Unsupported dtype " << tensor->type;
     }
 
     std::ostringstream oss;
@@ -118,7 +118,7 @@ std::string to_string(ggml_tensor *tensor, bool with_data) {
                     oss << (i1 > 0 ? ",\n[" : "[");
                     for (int i0 = 0; i0 < tensor->ne[0]; i0++) {
                         oss << (i0 > 0 ? ", " : "");
-                        const int i = ((i3 * tensor->ne[2]  + i2 ) * tensor->ne[1] + i1) * tensor->ne[0] + i0;
+                        const int i = ((i3 * tensor->ne[2] + i2) * tensor->ne[1] + i1) * tensor->ne[0] + i0;
                         oss << std::setw(7) << std::fixed << std::setprecision(4) << float_buf[i];
                     }
                     oss << "]";
@@ -548,8 +548,8 @@ static ggml_tensor *apply_rotary_emb_basic(ModelContext *mctx, ggml_tensor *laye
     }
 #endif
     const int head_size = layer->ne[0];
-    layer = ggml_rope_ext_inplace(ctx, layer, position_ids, nullptr, head_size, (int)rope_type, 0, rope_theta,
-                                  1.0f, 0.0f, 1.0f, 0.0f, 0.0f); // [s, #h, d]
+    layer = ggml_rope_ext_inplace(ctx, layer, position_ids, nullptr, head_size, (int)rope_type, 0, rope_theta, 1.0f,
+                                  0.0f, 1.0f, 0.0f, 0.0f); // [s, #h, d]
     return layer;
 }
 

diff --git a/chatglm.h b/chatglm.h
@@ -148,7 +148,8 @@ class ModelConfig {
                       num_virtual_tokens, rec.max_length, rec.bos_token_id, rec.eos_token_id, rec.pad_token_id,
                       rec.sep_token_id, {}) {}
 
-    ModelConfig(ModelType model_type, const ConfigRecordV1GQA &rec, float norm_eps, float rope_theta, int num_virtual_tokens)
+    ModelConfig(ModelType model_type, const ConfigRecordV1GQA &rec, float norm_eps, float rope_theta,
+                int num_virtual_tokens)
         : ModelConfig(model_type, rec.dtype, rec.vocab_size, rec.hidden_size, rec.num_attention_heads, rec.num_kv_heads,
                       rec.num_hidden_layers, rec.intermediate_size, norm_eps, rope_theta, num_virtual_tokens,
                       rec.max_length, rec.bos_token_id, rec.eos_token_id, rec.pad_token_id, rec.sep_token_id, {}) {}

diff --git a/chatglm_test.cpp b/chatglm_test.cpp
@@ -70,9 +70,10 @@ static inline void _fill(ggml_tensor *tensor, const std::vector<float> &values)
     case GGML_TYPE_Q4_1:
     case GGML_TYPE_Q5_0:
     case GGML_TYPE_Q5_1:
-    case GGML_TYPE_Q8_0: { 
+    case GGML_TYPE_Q8_0: {
         std::vector<no_init<char>> q_buf(ggml_nbytes(tensor));
-        ggml_quantize_chunk(tensor->type, values.data(), q_buf.data(), 0, ggml_nelements(tensor) / tensor->ne[0], tensor->ne[0], nullptr);
+        ggml_quantize_chunk(tensor->type, values.data(), q_buf.data(), 0, ggml_nelements(tensor) / tensor->ne[0],
+                            tensor->ne[0], nullptr);
         ggml_backend_tensor_set(tensor, q_buf.data(), 0, ggml_nbytes(tensor));
     } break;
     default:
@@ -92,7 +93,7 @@ static inline void random_(ggml_tensor *tensor) {
     _fill(tensor, values);
 }
 
-static inline float randn() { 
+static inline float randn() {
     thread_local std::random_device rd{};
     thread_local std::mt19937 gen{rd()};
     std::normal_distribution<float> d;
@@ -480,7 +481,8 @@ TEST_F(ChatGLMTest, Linear) {
 
 TEST_F(ChatGLMTest, BenchmarkLinear) {
     constexpr int M = 64, N = 1024, K = 1024 * 3;
-   std::vector<ggml_type> dtypes { GGML_TYPE_F32, GGML_TYPE_F16, GGML_TYPE_Q8_0, GGML_TYPE_Q5_1, GGML_TYPE_Q5_0, GGML_TYPE_Q4_1, GGML_TYPE_Q4_0};
+    std::vector<ggml_type> dtypes{GGML_TYPE_F32,  GGML_TYPE_F16,  GGML_TYPE_Q8_0, GGML_TYPE_Q5_1,
+                                  GGML_TYPE_Q5_0, GGML_TYPE_Q4_1, GGML_TYPE_Q4_0};
     for (ggml_type dtype : dtypes) {
         mctx_ = std::make_unique<ModelContext>(dtype);
 
@@ -496,11 +498,8 @@ TEST_F(ChatGLMTest, BenchmarkLinear) {
             randn_(tensor);
         }
 
-        std::cout << "[Benchmark] Linear " << ggml_type_name(mctx_->dtype) << " time: " << perf_graph_compute() << " ms\n";
-
-        // for (int i =  ggml_nelements(y); i >= 0  ; i--) {
-        //     CHATGLM_CHECK(std::isfinite(((float *)y->data)[i])) << i;
-        // }
+        std::cout << "[Benchmark] Linear " << ggml_type_name(mctx_->dtype) << " time: " << perf_graph_compute()
+                  << " ms\n";
     }
 }
 

diff --git a/tests/test_convert.py b/tests/test_convert.py
@@ -190,9 +190,9 @@ def make_data_embedding():
 
 
 def make_data_linear():
-    w = torch.randn(16, 32)
-    b = torch.randn(16)
-    x = torch.randn(2, 32)
+    w = torch.randn(32, 64)
+    b = torch.randn(32)
+    x = torch.randn(2, 64)
     y = F.linear(x, w, b)
 
     vec_x = x[0]