Skip to content

Commit

Permalink
reset yaml
Browse files Browse the repository at this point in the history
  • Loading branch information
lzy-dev committed Dec 26, 2024
1 parent 535c304 commit 9ffba0f
Show file tree
Hide file tree
Showing 3 changed files with 16 additions and 22 deletions.
4 changes: 2 additions & 2 deletions examples/aquila/conf/config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,8 @@ experiment:
type: train
backend: megatron
entrypoint: ./flagscale/train/train_aquila.py
cmds:
before_start: source /root/miniconda3/bin/activate flagscale
# cmds:
# before_start: source /root/miniconda3/bin/activate flagscale
runner:
backend: torchrun
nnodes: 1
Expand Down
30 changes: 14 additions & 16 deletions examples/aquila/conf/train/train_aquila_3b.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -26,20 +26,19 @@ system:
ckpt_format: torch
save_interval: 2385

hetero:
enable_hetero: True
hetero_use_cpu_communication: False
use_partional_reduce_for_shared_embedding: False
# mesh format [tp1,cp1,ep1,dp1,pp1,(tp2,cp2...)]
# hetero:
# enable_hetero: True
# hetero_use_cpu_communication: False
# use_partional_reduce_for_shared_embedding: True
# # mesh format [tp1,cp1,ep1,dp1,pp1,(tp2,cp2...)]

hetero_pipeline_layer_split: [12,12]
hetero_process_meshes: [1,1,1,4,1, 1,1,1,4,1]
hetero_device_types: ["A800", "A800"]
# hetero_pipeline_layer_split: [12,12]
# hetero_process_meshes: [1,1,1,4,1, 1,1,1,4,1]
# hetero_device_types: ["A800", "A800"]

standalone_embedding_stage: False
hetero_current_device_type: "A800"
# standalone_embedding_stage: False
# hetero_current_device_type: "A800"
model:
# use_mcore_models: True # deprecated
transformer_impl: transformer_engine
num_layers: 24
hidden_size: 1024
Expand All @@ -63,10 +62,10 @@ model:
hidden_dropout: 0.0
weight_decay: 0.1
clip_grad: 1.0
train_samples: 6400 #120B tokens
train_samples: 29297664 #120B tokens
eval_iters: 0
micro_batch_size: 4
global_batch_size: 64
micro_batch_size: 2
global_batch_size: 1024
seed: 42

optimizer:
Expand All @@ -80,8 +79,7 @@ model:
lr_decay_style: cosine

data:
# data_path: [40,/mnt/share/hetero_data/datasets/fineweb-edu-CC-3_5_text_document,28,/mnt/share/hetero_data/datasets/dclm-baseline-1.0-top_pct5_text_document,3,/mnt/share/hetero_data/datasets/k73_edu_qwen_text_document,5,/mnt/share/hetero_data/datasets/wxb_edu_qwen_text_document,20,/mnt/share/hetero_data/datasets/cosmopedia-v2-full_text_document,4,/mnt/share/hetero_data/datasets/infinst-kg-0712_text_document]
data_path: build/data/pile_wikipedia_demo
data_path: {data_path:??}
split: 1
no_mmap_bin_files: true
tokenizer:
Expand Down
4 changes: 0 additions & 4 deletions examples/aquila/conf/train/train_aquila_7b.yaml
Original file line number Diff line number Diff line change
@@ -1,8 +1,6 @@
system:
tensor_model_parallel_size: 1
pipeline_model_parallel_size: 1
context_parallel_size: 1
ulysses_sp_parallel_size: 4
disable_bias_linear: True
use_flash_attn: True
use_distributed_optimizer: True
Expand All @@ -26,7 +24,6 @@ model:
num_attention_heads: 32
seq_length: 2048
max_position_embeddings: 2048
max_position_embeddings: 2048
norm_epsilon: 1e-5
use_rotary_position_embeddings: true
no_position_embedding: true
Expand Down Expand Up @@ -58,7 +55,6 @@ model:

data:
data_path: ${data_path:??}
data_path: ./build/data/pile_wikipedia_demo
split: 1
tokenizer:
tokenizer_type: AquilaTokenizerFS
Expand Down

0 comments on commit 9ffba0f

Please sign in to comment.