From aa4d664d58eda0f510fd0b1452eee5014d4f2145 Mon Sep 17 00:00:00 2001 From: Jannik Brinkmann Date: Sat, 27 Apr 2024 23:37:17 +0200 Subject: [PATCH] fix problem with attention_size --- configs/stories/llama2/1m.json | 2 +- configs/stories/llama2/2.5m.json | 4 ++-- configs/stories/llama2/250k.json | 4 ++-- configs/stories/llama2/500k.json | 4 ++-- configs/stories/llama2/50k.json | 4 ++-- configs/stories/llama2/5m.json | 4 ++-- 6 files changed, 11 insertions(+), 11 deletions(-) diff --git a/configs/stories/llama2/1m.json b/configs/stories/llama2/1m.json index 52f1c893..4d3d7cbb 100644 --- a/configs/stories/llama2/1m.json +++ b/configs/stories/llama2/1m.json @@ -1,6 +1,6 @@ { "model_config": { - "hidden_size": 84, + "hidden_size": 82, "intermediate_size": 256, "num_attention_heads": 8, "num_hidden_layers": 4, diff --git a/configs/stories/llama2/2.5m.json b/configs/stories/llama2/2.5m.json index 4d55904c..0be616ec 100644 --- a/configs/stories/llama2/2.5m.json +++ b/configs/stories/llama2/2.5m.json @@ -1,7 +1,7 @@ { "model_config": { - "hidden_size": 168, - "intermediate_size": 384, + "hidden_size": 176, + "intermediate_size": 352, "num_attention_heads": 8, "num_hidden_layers": 4, "num_key_value_heads": 4 diff --git a/configs/stories/llama2/250k.json b/configs/stories/llama2/250k.json index 7a4ed066..baa81f2e 100644 --- a/configs/stories/llama2/250k.json +++ b/configs/stories/llama2/250k.json @@ -1,7 +1,7 @@ { "model_config": { - "hidden_size": 28, - "intermediate_size": 96, + "hidden_size": 30, + "intermediate_size": 68, "num_attention_heads": 4, "num_hidden_layers": 2, "num_key_value_heads": 2 diff --git a/configs/stories/llama2/500k.json b/configs/stories/llama2/500k.json index c4e0ec8e..060062d8 100644 --- a/configs/stories/llama2/500k.json +++ b/configs/stories/llama2/500k.json @@ -1,7 +1,7 @@ { "model_config": { - "hidden_size": 52, - "intermediate_size": 184, + "hidden_size": 54, + "intermediate_size": 144, "num_attention_heads": 4, "num_hidden_layers": 2, "num_key_value_heads": 2 diff --git a/configs/stories/llama2/50k.json b/configs/stories/llama2/50k.json index 53afb500..73060f6f 100644 --- a/configs/stories/llama2/50k.json +++ b/configs/stories/llama2/50k.json @@ -1,7 +1,7 @@ { "model_config": { - "hidden_size": 6, - "intermediate_size": 24, + "hidden_size": 8, + "intermediate_size": 16, "num_attention_heads": 2, "num_hidden_layers": 1, "num_key_value_heads": 1 diff --git a/configs/stories/llama2/5m.json b/configs/stories/llama2/5m.json index 839221f6..795cb768 100644 --- a/configs/stories/llama2/5m.json +++ b/configs/stories/llama2/5m.json @@ -1,7 +1,7 @@ { "model_config": { - "hidden_size": 232, - "intermediate_size": 512, + "hidden_size": 240, + "intermediate_size": 480, "num_attention_heads": 12, "num_hidden_layers": 6, "num_key_value_heads": 6