Skip to content

Commit

Permalink
lint
Browse files Browse the repository at this point in the history
  • Loading branch information
li-plus committed Jun 20, 2024
1 parent 5825cc2 commit ffa0d77
Show file tree
Hide file tree
Showing 4 changed files with 25 additions and 25 deletions.
24 changes: 12 additions & 12 deletions chatglm.cpp
Original file line number Diff line number Diff line change
@@ -1,11 +1,11 @@
#include "chatglm.h"
#include <ggml-quants.h>
#include <algorithm>
#include <codecvt>
#include <cstring>
#include <fcntl.h>
#include <fstream>
#include <functional>
#include <ggml-quants.h>
#include <google/protobuf/stubs/strutil.h>
#include <iomanip>
#include <iostream>
Expand Down Expand Up @@ -67,7 +67,7 @@ static std::string strides_to_string(ggml_tensor *tensor) {

std::string to_string(ggml_tensor *tensor, bool with_data) {
std::vector<char> buf(ggml_nbytes(tensor));
if (tensor->buffer ) {
if (tensor->buffer) {
ggml_backend_tensor_get(tensor, buf.data(), 0, buf.size());
} else {
memcpy(buf.data(), tensor->data, buf.size());
Expand All @@ -80,25 +80,25 @@ std::string to_string(ggml_tensor *tensor, bool with_data) {
memcpy(float_buf.data(), buf.data(), buf.size());
break;
case GGML_TYPE_F16:
ggml_fp16_to_fp32_row((const ggml_fp16_t*)buf.data(), float_buf.data(), ggml_nelements(tensor));
ggml_fp16_to_fp32_row((ggml_fp16_t *)buf.data(), float_buf.data(), ggml_nelements(tensor));
break;
case GGML_TYPE_Q4_0:
dequantize_row_q4_0((block_q4_0*)buf.data(), float_buf.data(), ggml_nelements(tensor));
dequantize_row_q4_0((block_q4_0 *)buf.data(), float_buf.data(), ggml_nelements(tensor));
break;
case GGML_TYPE_Q4_1:
dequantize_row_q4_1((block_q4_1*)buf.data(), float_buf.data(), ggml_nelements(tensor));
dequantize_row_q4_1((block_q4_1 *)buf.data(), float_buf.data(), ggml_nelements(tensor));
break;
case GGML_TYPE_Q5_0:
dequantize_row_q5_0((block_q5_0*)buf.data(), float_buf.data(), ggml_nelements(tensor));
dequantize_row_q5_0((block_q5_0 *)buf.data(), float_buf.data(), ggml_nelements(tensor));
break;
case GGML_TYPE_Q5_1:
dequantize_row_q5_1((block_q5_1*)buf.data(), float_buf.data(), ggml_nelements(tensor));
dequantize_row_q5_1((block_q5_1 *)buf.data(), float_buf.data(), ggml_nelements(tensor));
break;
case GGML_TYPE_Q8_0:
dequantize_row_q8_0((block_q8_0*)buf.data(), float_buf.data(), ggml_nelements(tensor));
dequantize_row_q8_0((block_q8_0 *)buf.data(), float_buf.data(), ggml_nelements(tensor));
break;
default:
CHATGLM_THROW << "Unsupported dtype " << tensor->type;
CHATGLM_THROW << "Unsupported dtype " << tensor->type;
}

std::ostringstream oss;
Expand All @@ -118,7 +118,7 @@ std::string to_string(ggml_tensor *tensor, bool with_data) {
oss << (i1 > 0 ? ",\n[" : "[");
for (int i0 = 0; i0 < tensor->ne[0]; i0++) {
oss << (i0 > 0 ? ", " : "");
const int i = ((i3 * tensor->ne[2] + i2 ) * tensor->ne[1] + i1) * tensor->ne[0] + i0;
const int i = ((i3 * tensor->ne[2] + i2) * tensor->ne[1] + i1) * tensor->ne[0] + i0;
oss << std::setw(7) << std::fixed << std::setprecision(4) << float_buf[i];
}
oss << "]";
Expand Down Expand Up @@ -548,8 +548,8 @@ static ggml_tensor *apply_rotary_emb_basic(ModelContext *mctx, ggml_tensor *laye
}
#endif
const int head_size = layer->ne[0];
layer = ggml_rope_ext_inplace(ctx, layer, position_ids, nullptr, head_size, (int)rope_type, 0, rope_theta,
1.0f, 0.0f, 1.0f, 0.0f, 0.0f); // [s, #h, d]
layer = ggml_rope_ext_inplace(ctx, layer, position_ids, nullptr, head_size, (int)rope_type, 0, rope_theta, 1.0f,
0.0f, 1.0f, 0.0f, 0.0f); // [s, #h, d]
return layer;
}

Expand Down
3 changes: 2 additions & 1 deletion chatglm.h
Original file line number Diff line number Diff line change
Expand Up @@ -148,7 +148,8 @@ class ModelConfig {
num_virtual_tokens, rec.max_length, rec.bos_token_id, rec.eos_token_id, rec.pad_token_id,
rec.sep_token_id, {}) {}

ModelConfig(ModelType model_type, const ConfigRecordV1GQA &rec, float norm_eps, float rope_theta, int num_virtual_tokens)
ModelConfig(ModelType model_type, const ConfigRecordV1GQA &rec, float norm_eps, float rope_theta,
int num_virtual_tokens)
: ModelConfig(model_type, rec.dtype, rec.vocab_size, rec.hidden_size, rec.num_attention_heads, rec.num_kv_heads,
rec.num_hidden_layers, rec.intermediate_size, norm_eps, rope_theta, num_virtual_tokens,
rec.max_length, rec.bos_token_id, rec.eos_token_id, rec.pad_token_id, rec.sep_token_id, {}) {}
Expand Down
17 changes: 8 additions & 9 deletions chatglm_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -70,9 +70,10 @@ static inline void _fill(ggml_tensor *tensor, const std::vector<float> &values)
case GGML_TYPE_Q4_1:
case GGML_TYPE_Q5_0:
case GGML_TYPE_Q5_1:
case GGML_TYPE_Q8_0: {
case GGML_TYPE_Q8_0: {
std::vector<no_init<char>> q_buf(ggml_nbytes(tensor));
ggml_quantize_chunk(tensor->type, values.data(), q_buf.data(), 0, ggml_nelements(tensor) / tensor->ne[0], tensor->ne[0], nullptr);
ggml_quantize_chunk(tensor->type, values.data(), q_buf.data(), 0, ggml_nelements(tensor) / tensor->ne[0],
tensor->ne[0], nullptr);
ggml_backend_tensor_set(tensor, q_buf.data(), 0, ggml_nbytes(tensor));
} break;
default:
Expand All @@ -92,7 +93,7 @@ static inline void random_(ggml_tensor *tensor) {
_fill(tensor, values);
}

static inline float randn() {
static inline float randn() {
thread_local std::random_device rd{};
thread_local std::mt19937 gen{rd()};
std::normal_distribution<float> d;
Expand Down Expand Up @@ -480,7 +481,8 @@ TEST_F(ChatGLMTest, Linear) {

TEST_F(ChatGLMTest, BenchmarkLinear) {
constexpr int M = 64, N = 1024, K = 1024 * 3;
std::vector<ggml_type> dtypes { GGML_TYPE_F32, GGML_TYPE_F16, GGML_TYPE_Q8_0, GGML_TYPE_Q5_1, GGML_TYPE_Q5_0, GGML_TYPE_Q4_1, GGML_TYPE_Q4_0};
std::vector<ggml_type> dtypes{GGML_TYPE_F32, GGML_TYPE_F16, GGML_TYPE_Q8_0, GGML_TYPE_Q5_1,
GGML_TYPE_Q5_0, GGML_TYPE_Q4_1, GGML_TYPE_Q4_0};
for (ggml_type dtype : dtypes) {
mctx_ = std::make_unique<ModelContext>(dtype);

Expand All @@ -496,11 +498,8 @@ TEST_F(ChatGLMTest, BenchmarkLinear) {
randn_(tensor);
}

std::cout << "[Benchmark] Linear " << ggml_type_name(mctx_->dtype) << " time: " << perf_graph_compute() << " ms\n";

// for (int i = ggml_nelements(y); i >= 0 ; i--) {
// CHATGLM_CHECK(std::isfinite(((float *)y->data)[i])) << i;
// }
std::cout << "[Benchmark] Linear " << ggml_type_name(mctx_->dtype) << " time: " << perf_graph_compute()
<< " ms\n";
}
}

Expand Down
6 changes: 3 additions & 3 deletions tests/test_convert.py
Original file line number Diff line number Diff line change
Expand Up @@ -190,9 +190,9 @@ def make_data_embedding():


def make_data_linear():
w = torch.randn(16, 32)
b = torch.randn(16)
x = torch.randn(2, 32)
w = torch.randn(32, 64)
b = torch.randn(32)
x = torch.randn(2, 64)
y = F.linear(x, w, b)

vec_x = x[0]
Expand Down

0 comments on commit ffa0d77

Please sign in to comment.