Skip to content

Commit

Permalink
delete model file for converting ckpt
Browse files Browse the repository at this point in the history
  • Loading branch information
lzy-dev committed Dec 27, 2024
1 parent efab98e commit 126a754
Show file tree
Hide file tree
Showing 6 changed files with 6 additions and 1,635 deletions.
4 changes: 2 additions & 2 deletions megatron/megatron/core/transformer/attention.py
Original file line number Diff line number Diff line change
Expand Up @@ -521,7 +521,7 @@ def __init__(
# nums_head_cur_rank = divide(self.config.num_attention_heads, tp_world_size)
self.q_layernorm = build_module(
submodules.q_layernorm,
hidden_size=self.config.num_attention_heads * self.hidden_size_per_attention_head,
hidden_size=self.query_projection_size,
config=self.config,
eps=self.config.layernorm_epsilon,
)
Expand All @@ -542,7 +542,7 @@ def __init__(
# nums_head_cur_rank = divide(self.config.num_attention_heads, tp_world_size)
self.k_layernorm = build_module(
submodules.k_layernorm,
hidden_size=self.config.num_query_groups * self.hidden_size_per_attention_head,
hidden_size=self.kv_projection_size,
config=self.config,
eps=self.config.layernorm_epsilon,
)
Expand Down
Empty file.
33 changes: 0 additions & 33 deletions tools/checkpoint/aquila/llama_model/config.json

This file was deleted.

187 changes: 0 additions & 187 deletions tools/checkpoint/aquila/llama_model/configuration_llama.py

This file was deleted.

Loading

0 comments on commit 126a754

Please sign in to comment.