Revert "Fix llm with torchtune v0.3"

This reverts commit fd99d8a.
mila-iqia · Sep 30, 2024 · 770ca62 · 770ca62
1 parent 6a1c120
commit 770ca62
Show file tree

Hide file tree

Showing 14 changed files with 711 additions and 1,328 deletions.
diff --git a/benchmarks/llm/configs/llama3_70B_full.yaml b/benchmarks/llm/configs/llama3_70B_full.yaml
@@ -20,7 +20,6 @@
 tokenizer:
   _component_: torchtune.models.llama3.llama3_tokenizer
   path: /tmp/Meta-Llama-3.1-70B-Instruct/original/tokenizer.model
-  max_seq_len: null
 
 # Dataset
 dataset:
@@ -34,7 +33,7 @@ model:
 
 safetensors: true
 checkpointer:
-  _component_: torchtune.training.FullModelHFCheckpointer
+  _component_: torchtune.utils.FullModelHFCheckpointer
   checkpoint_dir: /tmp/Meta-Llama-3.1-70B-Instruct/
   checkpoint_files: [
      model-00001-of-00030.safetensors,
@@ -86,7 +85,7 @@ optimizer:
   fused: True
 
 loss:
-  _component_: torchtune.modules.loss.CEWithChunkedOutputLoss
+  _component_: torch.nn.CrossEntropyLoss
 max_steps_per_epoch: null
 gradient_accumulation_steps: 1
 
@@ -96,16 +95,16 @@ device: cuda
 
 # Memory management
 enable_activation_checkpointing: True
-custom_sharded_layers: ['tok_embeddings', 'output']
+memory_efficient_fsdp_wrap: True
 fsdp_cpu_offload: True
 
 # Reduced precision
 dtype: bf16
 
 # Logging
 metric_logger:
-  _component_: torchtune.training.metric_logging.DiskLogger
+  _component_: torchtune.utils.metric_logging.DiskLogger
   log_dir: ${output_dir}
-output_dir: /tmp/full-llama3_1-finetune
+output_dir: /tmp/alpaca-llama3-finetune
 log_every_n_steps: 1
 log_peak_memory_stats: False
diff --git a/benchmarks/llm/configs/llama3_70B_lora.yaml b/benchmarks/llm/configs/llama3_70B_lora.yaml
@@ -16,16 +16,14 @@ model:
   apply_lora_to_output: False
   lora_rank: 16
   lora_alpha: 32
-  lora_dropout: 0.0
 
 tokenizer:
   _component_: torchtune.models.llama3.llama3_tokenizer
   path: /tmp/Meta-Llama-3.1-70B-Instruct/original/tokenizer.model
-  max_seq_len: null
 
 safetensors: true
 checkpointer:
-  _component_: torchtune.training.FullModelHFCheckpointer
+  _component_: torchtune.utils.FullModelHFCheckpointer
   checkpoint_dir: /tmp/Meta-Llama-3.1-70B-Instruct/
   checkpoint_files: [
     model-00001-of-00030.safetensors,
@@ -82,17 +80,17 @@ lr_scheduler:
   num_warmup_steps: 100
 
 loss:
-  _component_: torchtune.modules.loss.CEWithChunkedOutputLoss
+  _component_: torch.nn.CrossEntropyLoss
 
 # Training
 epochs: 1
 max_steps_per_epoch: null
 gradient_accumulation_steps: 1
 
 # Logging
-output_dir: /tmp/lora-llama3_1-finetune-output
+output_dir: /tmp/lora_finetune_output
 metric_logger:
-  _component_: torchtune.training.metric_logging.DiskLogger
+  _component_: torchtune.utils.metric_logging.DiskLogger
   log_dir: ${output_dir}
 log_every_n_steps: 1
 log_peak_memory_stats: False

diff --git a/benchmarks/llm/configs/llama3_8B_lora.yaml b/benchmarks/llm/configs/llama3_8B_lora.yaml
@@ -21,7 +21,6 @@
 tokenizer:
   _component_: torchtune.models.llama3.llama3_tokenizer
   path: /tmp/Meta-Llama-3-8B-Instruct/original/tokenizer.model
-  max_seq_len: null
 
 # Model Arguments
 model:
@@ -31,10 +30,9 @@ model:
   apply_lora_to_output: False
   lora_rank: 8
   lora_alpha: 16
-  lora_dropout: 0.0
 
 checkpointer:
-  _component_: torchtune.training.FullModelMetaCheckpointer
+  _component_: torchtune.utils.FullModelMetaCheckpointer
   checkpoint_dir: /tmp/Meta-Llama-3-8B-Instruct/original/
   checkpoint_files: [
     consolidated.00.pth
@@ -43,7 +41,6 @@ checkpointer:
   output_dir: /tmp/Meta-Llama-3-8B-Instruct/
   model_type: LLAMA3
 resume_from_checkpoint: False
-save_adapter_weights_only: False
 
 # Dataset and Sampler
 dataset:
@@ -62,7 +59,7 @@ lr_scheduler:
   num_warmup_steps: 100
 
 loss:
-  _component_: torchtune.modules.loss.CEWithChunkedOutputLoss
+  _component_: torch.nn.CrossEntropyLoss
 
 # Training
 epochs: 1
@@ -72,7 +69,7 @@ gradient_accumulation_steps: 32
 # Logging
 output_dir: /tmp/lora_finetune_output
 metric_logger:
-  _component_: torchtune.training.metric_logging.DiskLogger
+  _component_: torchtune.utils.metric_logging.DiskLogger
   log_dir: ${output_dir}
 log_every_n_steps: 1
 log_peak_memory_stats: False

diff --git a/benchmarks/llm/configs/llama3_8B_lora_single_device.yaml b/benchmarks/llm/configs/llama3_8B_lora_single_device.yaml
@@ -24,16 +24,14 @@ model:
   apply_lora_to_output: False
   lora_rank: 8
   lora_alpha: 16
-  lora_dropout: 0.0
 
 # Tokenizer
 tokenizer:
   _component_: torchtune.models.llama3.llama3_tokenizer
   path: /tmp/Meta-Llama-3-8B-Instruct/original/tokenizer.model
-  max_seq_len: null
 
 checkpointer:
-  _component_: torchtune.training.FullModelMetaCheckpointer
+  _component_: torchtune.utils.FullModelMetaCheckpointer
   checkpoint_dir: /tmp/Meta-Llama-3-8B-Instruct/original/
   checkpoint_files: [
     consolidated.00.pth
@@ -42,7 +40,6 @@ checkpointer:
   output_dir: /tmp/Meta-Llama-3-8B-Instruct/
   model_type: LLAMA3
 resume_from_checkpoint: False
-save_adapter_weights_only: False
 
 # Dataset and Sampler
 dataset:
@@ -61,7 +58,7 @@ lr_scheduler:
   num_warmup_steps: 100
 
 loss:
-  _component_: torchtune.modules.loss.CEWithChunkedOutputLoss
+  _component_: torch.nn.CrossEntropyLoss
 
 # Training
 epochs: 1
@@ -72,7 +69,7 @@ compile: False
 # Logging
 output_dir: /tmp/lora_finetune_output
 metric_logger:
-  _component_: torchtune.training.metric_logging.DiskLogger
+  _component_: torchtune.utils.metric_logging.DiskLogger
   log_dir: ${output_dir}
 log_every_n_steps: 1
 log_peak_memory_stats: False
@@ -84,7 +81,7 @@ enable_activation_checkpointing: True
 
 # Profiler (disabled)
 profiler:
-  _component_: torchtune.training.setup_torch_profiler
+  _component_: torchtune.utils.setup_torch_profiler
   enabled: False
 
   #Output directory of trace artifacts

diff --git a/benchmarks/llm/configs/llama3_8B_qat_full.yaml b/benchmarks/llm/configs/llama3_8B_qat_full.yaml
@@ -17,7 +17,6 @@
 tokenizer:
   _component_: torchtune.models.llama3.llama3_tokenizer
   path: /tmp/Meta-Llama-3-8B-Instruct/original/tokenizer.model
-  max_seq_len: null
 
 # Dataset
 dataset:
@@ -30,7 +29,7 @@ model:
   _component_: torchtune.models.llama3_1.llama3_1_8b
 
 checkpointer:
-  _component_: torchtune.training.FullModelMetaCheckpointer
+  _component_: torchtune.utils.FullModelMetaCheckpointer
   checkpoint_dir: /tmp/Meta-Llama-3-8B-Instruct/original/
   checkpoint_files: [
     consolidated.00.pth
@@ -46,7 +45,7 @@ epochs: 3
 
 # QAT arguments
 quantizer:
-  _component_: torchtune.training.quantization.Int8DynActInt4WeightQATQuantizer
+  _component_: torchtune.utils.quantization.Int8DynActInt4WeightQATQuantizer
   groupsize: 256
 
 optimizer:
@@ -55,7 +54,7 @@ optimizer:
   foreach: False
 
 loss:
-  _component_: torchtune.modules.loss.CEWithChunkedOutputLoss
+  _component_: torch.nn.CrossEntropyLoss
 max_steps_per_epoch: null
 gradient_accumulation_steps: 1
 
@@ -71,7 +70,7 @@ dtype: bf16
 
 # Logging
 metric_logger:
-  _component_: torchtune.training.metric_logging.DiskLogger
+  _component_: torchtune.utils.metric_logging.DiskLogger
   log_dir: ${output_dir}
 output_dir: /tmp/alpaca-llama3-finetune
 log_every_n_steps: 1

diff --git a/benchmarks/llm/configs/llama3_8B_qlora_single_device.yaml b/benchmarks/llm/configs/llama3_8B_qlora_single_device.yaml
@@ -23,16 +23,14 @@ model:
   apply_lora_to_output: False
   lora_rank: 8
   lora_alpha: 16
-  lora_dropout: 0.0
 
 # Tokenizer
 tokenizer:
   _component_: torchtune.models.llama3.llama3_tokenizer
   path: /tmp/Meta-Llama-3-8B-Instruct/original/tokenizer.model
-  max_seq_len: null
 
 checkpointer:
-  _component_: torchtune.training.FullModelMetaCheckpointer
+  _component_: torchtune.utils.FullModelMetaCheckpointer
   checkpoint_dir: /tmp/Meta-Llama-3-8B-Instruct/original/
   checkpoint_files: [
     consolidated.00.pth
@@ -41,7 +39,6 @@ checkpointer:
   output_dir: /tmp/Meta-Llama-3-8B-Instruct/
   model_type: LLAMA3
 resume_from_checkpoint: False
-save_adapter_weights_only: False
 
 # Dataset and Sampler
 dataset:
@@ -60,7 +57,7 @@ lr_scheduler:
   num_warmup_steps: 100
 
 loss:
-  _component_: torchtune.modules.loss.CEWithChunkedOutputLoss
+  _component_: torch.nn.CrossEntropyLoss
 
 # Training
 epochs: 1
@@ -71,7 +68,7 @@ compile: False
 # Logging
 output_dir: /tmp/qlora_finetune_output/
 metric_logger:
-  _component_: torchtune.training.metric_logging.DiskLogger
+  _component_: torchtune.utils.metric_logging.DiskLogger
   log_dir: ${output_dir}
 log_every_n_steps: 1
 log_peak_memory_stats: False
@@ -83,7 +80,7 @@ enable_activation_checkpointing: True
 
 # Profiler (disabled)
 profiler:
-  _component_: torchtune.training.setup_torch_profiler
+  _component_: torchtune.utils.setup_torch_profiler
   enabled: False
 
   #Output directory of trace artifacts
@@ -105,6 +102,3 @@ profiler:
   warmup_steps: 5
   active_steps: 2
   num_cycles: 1
-
-# For colab use True
-low_cpu_ram: False