Update 70b to 7b

AI-Hypercomputer · Dec 19, 2024 · 1f6c036 · 1f6c036
1 parent b065ced
commit 1f6c036
Showing 1 changed file with 3 additions and 3 deletions.
diff --git a/MaxText/configs/trillium/llama2_70b_4096.sh → MaxText/configs/trillium/llama2_7b_4096.sh b/MaxText/configs/trillium/llama2_70b_4096.sh → MaxText/configs/trillium/llama2_7b_4096.sh
@@ -1,4 +1,4 @@
-# Llama2-70b model.
+# Llama2-7b model.
 # This config will work out of the box for any number of trillium-256 slices.
 #
 # Command Flags:
@@ -7,7 +7,7 @@
 # RUN_NAME (Required, unless run_name is already set in base.yml or running with XPK/GKE)
 #
 # Example to invoke this script:
-# bash MaxText/configs/trillium/llama2_70b_4096.sh RUN_NAME="<your_run_name>" OUTPUT_PATH="gs://<your_output_path>" DATASET_PATH="gs://<your_dataset_path>"
+# bash MaxText/configs/trillium/llama2_7b_4096.sh RUN_NAME="<your_run_name>" OUTPUT_PATH="gs://<your_output_path>" DATASET_PATH="gs://<your_dataset_path>"
 #
 
 
@@ -39,7 +39,7 @@ fi
 # Train
 export LIBTPU_INIT_ARGS="--xla_tpu_scoped_vmem_limit_kib=98304 --xla_enable_async_all_gather=true --xla_tpu_overlap_compute_collective_tc=true --xla_tpu_enable_async_collective_fusion_multiple_steps=true --xla_tpu_enable_async_collective_fusion=true --xla_tpu_enable_async_collective_fusion_fuse_all_gather=true"
 
-python3 MaxText/$EXECUTABLE MaxText/configs/base.yml model_name=llama2-70b\
+python3 MaxText/$EXECUTABLE MaxText/configs/base.yml model_name=llama2-7b\
   steps=15 per_device_batch_size=12 enable_checkpointing=false\
   remat_policy=full ici_fsdp_parallelism=-1\
   max_target_length=4096 base_output_directory=$OUTPUT_PATH\