diff --git a/configs/stories/llama2/100k.json b/configs/stories/llama2/100k.json index 601c3809..57eecf05 100644 --- a/configs/stories/llama2/100k.json +++ b/configs/stories/llama2/100k.json @@ -1,7 +1,7 @@ { "model_config": { "hidden_size": 12, - "intermediate_size": 48, + "intermediate_size": 32, "num_attention_heads": 2, "num_hidden_layers": 1, "num_key_value_heads": 1 diff --git a/configs/stories/llama2/10m.json b/configs/stories/llama2/10m.json index 224b4674..953029e0 100644 --- a/configs/stories/llama2/10m.json +++ b/configs/stories/llama2/10m.json @@ -1,9 +1,9 @@ { "model_config": { - "hidden_size": 332, - "intermediate_size": 896, - "num_attention_heads": 12, + "hidden_size": 340, + "intermediate_size": 906, + "num_attention_heads": 10, "num_hidden_layers": 6, - "num_key_value_heads": 6 + "num_key_value_heads": 5 } } \ No newline at end of file diff --git a/configs/stories/llama2/1m.json b/configs/stories/llama2/1m.json index 4d3d7cbb..c83c064c 100644 --- a/configs/stories/llama2/1m.json +++ b/configs/stories/llama2/1m.json @@ -1,9 +1,9 @@ { "model_config": { - "hidden_size": 82, - "intermediate_size": 256, - "num_attention_heads": 8, + "hidden_size": 84, + "intermediate_size": 244, + "num_attention_heads": 6, "num_hidden_layers": 4, - "num_key_value_heads": 4 + "num_key_value_heads": 3 } } \ No newline at end of file diff --git a/configs/stories/llama2/2.5m.json b/configs/stories/llama2/2.5m.json index 0be616ec..cd7f5cd9 100644 --- a/configs/stories/llama2/2.5m.json +++ b/configs/stories/llama2/2.5m.json @@ -1,9 +1,9 @@ { "model_config": { - "hidden_size": 176, - "intermediate_size": 352, + "hidden_size": 160, + "intermediate_size": 426, "num_attention_heads": 8, - "num_hidden_layers": 4, + "num_hidden_layers": 5, "num_key_value_heads": 4 } } \ No newline at end of file diff --git a/configs/stories/llama2/250k.json b/configs/stories/llama2/250k.json index baa81f2e..b10f7e87 100644 --- a/configs/stories/llama2/250k.json +++ b/configs/stories/llama2/250k.json @@ -1,7 +1,7 @@ { "model_config": { "hidden_size": 30, - "intermediate_size": 68, + "intermediate_size": 80, "num_attention_heads": 4, "num_hidden_layers": 2, "num_key_value_heads": 2 diff --git a/configs/stories/llama2/25m.json b/configs/stories/llama2/25m.json index 813d2b63..5e22a658 100644 --- a/configs/stories/llama2/25m.json +++ b/configs/stories/llama2/25m.json @@ -1,7 +1,7 @@ { "model_config": { - "hidden_size": 484, - "intermediate_size": 1332, + "hidden_size": 512, + "intermediate_size": 1365, "num_attention_heads": 16, "num_hidden_layers": 8, "num_key_value_heads": 8 diff --git a/configs/stories/llama2/50k.json b/configs/stories/llama2/50k.json index 73060f6f..8a02b571 100644 --- a/configs/stories/llama2/50k.json +++ b/configs/stories/llama2/50k.json @@ -1,8 +1,8 @@ { "model_config": { - "hidden_size": 8, + "hidden_size": 6, "intermediate_size": 16, - "num_attention_heads": 2, + "num_attention_heads": 3, "num_hidden_layers": 1, "num_key_value_heads": 1 } diff --git a/configs/stories/llama2/50m.json b/configs/stories/llama2/50m.json index 3fa95022..4baa3610 100644 --- a/configs/stories/llama2/50m.json +++ b/configs/stories/llama2/50m.json @@ -1,7 +1,7 @@ { "model_config": { - "hidden_size": 708, - "intermediate_size": 1896, + "hidden_size": 736, + "intermediate_size": 1962, "num_attention_heads": 16, "num_hidden_layers": 8, "num_key_value_heads": 8 diff --git a/configs/stories/llama2/5m.json b/configs/stories/llama2/5m.json index 795cb768..15ffdc0a 100644 --- a/configs/stories/llama2/5m.json +++ b/configs/stories/llama2/5m.json @@ -1,9 +1,9 @@ { "model_config": { "hidden_size": 240, - "intermediate_size": 480, - "num_attention_heads": 12, - "num_hidden_layers": 6, - "num_key_value_heads": 6 + "intermediate_size": 640, + "num_attention_heads": 10, + "num_hidden_layers": 5, + "num_key_value_heads": 5 } } \ No newline at end of file