From 1f6c0365824c91dbede4c823bc72065d693167f7 Mon Sep 17 00:00:00 2001 From: wenxindongwork Date: Thu, 19 Dec 2024 11:19:09 -0800 Subject: [PATCH] Update 70b to 7b --- .../trillium/{llama2_70b_4096.sh => llama2_7b_4096.sh} | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) rename MaxText/configs/trillium/{llama2_70b_4096.sh => llama2_7b_4096.sh} (89%) diff --git a/MaxText/configs/trillium/llama2_70b_4096.sh b/MaxText/configs/trillium/llama2_7b_4096.sh similarity index 89% rename from MaxText/configs/trillium/llama2_70b_4096.sh rename to MaxText/configs/trillium/llama2_7b_4096.sh index 339e9fd51..fea2b3d1a 100644 --- a/MaxText/configs/trillium/llama2_70b_4096.sh +++ b/MaxText/configs/trillium/llama2_7b_4096.sh @@ -1,4 +1,4 @@ -# Llama2-70b model. +# Llama2-7b model. # This config will work out of the box for any number of trillium-256 slices. # # Command Flags: @@ -7,7 +7,7 @@ # RUN_NAME (Required, unless run_name is already set in base.yml or running with XPK/GKE) # # Example to invoke this script: -# bash MaxText/configs/trillium/llama2_70b_4096.sh RUN_NAME="" OUTPUT_PATH="gs://" DATASET_PATH="gs://" +# bash MaxText/configs/trillium/llama2_7b_4096.sh RUN_NAME="" OUTPUT_PATH="gs://" DATASET_PATH="gs://" # @@ -39,7 +39,7 @@ fi # Train export LIBTPU_INIT_ARGS="--xla_tpu_scoped_vmem_limit_kib=98304 --xla_enable_async_all_gather=true --xla_tpu_overlap_compute_collective_tc=true --xla_tpu_enable_async_collective_fusion_multiple_steps=true --xla_tpu_enable_async_collective_fusion=true --xla_tpu_enable_async_collective_fusion_fuse_all_gather=true" -python3 MaxText/$EXECUTABLE MaxText/configs/base.yml model_name=llama2-70b\ +python3 MaxText/$EXECUTABLE MaxText/configs/base.yml model_name=llama2-7b\ steps=15 per_device_batch_size=12 enable_checkpointing=false\ remat_policy=full ici_fsdp_parallelism=-1\ max_target_length=4096 base_output_directory=$OUTPUT_PATH\