Skip to content

Commit

Permalink
Revert "Fix llm with torchtune v0.3"
Browse files Browse the repository at this point in the history
This reverts commit fd99d8a.
  • Loading branch information
pierre.delaunay committed Sep 30, 2024
1 parent 6a1c120 commit 770ca62
Show file tree
Hide file tree
Showing 14 changed files with 711 additions and 1,328 deletions.
11 changes: 5 additions & 6 deletions benchmarks/llm/configs/llama3_70B_full.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,6 @@
tokenizer:
_component_: torchtune.models.llama3.llama3_tokenizer
path: /tmp/Meta-Llama-3.1-70B-Instruct/original/tokenizer.model
max_seq_len: null

# Dataset
dataset:
Expand All @@ -34,7 +33,7 @@ model:

safetensors: true
checkpointer:
_component_: torchtune.training.FullModelHFCheckpointer
_component_: torchtune.utils.FullModelHFCheckpointer
checkpoint_dir: /tmp/Meta-Llama-3.1-70B-Instruct/
checkpoint_files: [
model-00001-of-00030.safetensors,
Expand Down Expand Up @@ -86,7 +85,7 @@ optimizer:
fused: True

loss:
_component_: torchtune.modules.loss.CEWithChunkedOutputLoss
_component_: torch.nn.CrossEntropyLoss
max_steps_per_epoch: null
gradient_accumulation_steps: 1

Expand All @@ -96,16 +95,16 @@ device: cuda

# Memory management
enable_activation_checkpointing: True
custom_sharded_layers: ['tok_embeddings', 'output']
memory_efficient_fsdp_wrap: True
fsdp_cpu_offload: True

# Reduced precision
dtype: bf16

# Logging
metric_logger:
_component_: torchtune.training.metric_logging.DiskLogger
_component_: torchtune.utils.metric_logging.DiskLogger
log_dir: ${output_dir}
output_dir: /tmp/full-llama3_1-finetune
output_dir: /tmp/alpaca-llama3-finetune
log_every_n_steps: 1
log_peak_memory_stats: False
10 changes: 4 additions & 6 deletions benchmarks/llm/configs/llama3_70B_lora.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -16,16 +16,14 @@ model:
apply_lora_to_output: False
lora_rank: 16
lora_alpha: 32
lora_dropout: 0.0

tokenizer:
_component_: torchtune.models.llama3.llama3_tokenizer
path: /tmp/Meta-Llama-3.1-70B-Instruct/original/tokenizer.model
max_seq_len: null

safetensors: true
checkpointer:
_component_: torchtune.training.FullModelHFCheckpointer
_component_: torchtune.utils.FullModelHFCheckpointer
checkpoint_dir: /tmp/Meta-Llama-3.1-70B-Instruct/
checkpoint_files: [
model-00001-of-00030.safetensors,
Expand Down Expand Up @@ -82,17 +80,17 @@ lr_scheduler:
num_warmup_steps: 100

loss:
_component_: torchtune.modules.loss.CEWithChunkedOutputLoss
_component_: torch.nn.CrossEntropyLoss

# Training
epochs: 1
max_steps_per_epoch: null
gradient_accumulation_steps: 1

# Logging
output_dir: /tmp/lora-llama3_1-finetune-output
output_dir: /tmp/lora_finetune_output
metric_logger:
_component_: torchtune.training.metric_logging.DiskLogger
_component_: torchtune.utils.metric_logging.DiskLogger
log_dir: ${output_dir}
log_every_n_steps: 1
log_peak_memory_stats: False
Expand Down
9 changes: 3 additions & 6 deletions benchmarks/llm/configs/llama3_8B_lora.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,6 @@
tokenizer:
_component_: torchtune.models.llama3.llama3_tokenizer
path: /tmp/Meta-Llama-3-8B-Instruct/original/tokenizer.model
max_seq_len: null

# Model Arguments
model:
Expand All @@ -31,10 +30,9 @@ model:
apply_lora_to_output: False
lora_rank: 8
lora_alpha: 16
lora_dropout: 0.0

checkpointer:
_component_: torchtune.training.FullModelMetaCheckpointer
_component_: torchtune.utils.FullModelMetaCheckpointer
checkpoint_dir: /tmp/Meta-Llama-3-8B-Instruct/original/
checkpoint_files: [
consolidated.00.pth
Expand All @@ -43,7 +41,6 @@ checkpointer:
output_dir: /tmp/Meta-Llama-3-8B-Instruct/
model_type: LLAMA3
resume_from_checkpoint: False
save_adapter_weights_only: False

# Dataset and Sampler
dataset:
Expand All @@ -62,7 +59,7 @@ lr_scheduler:
num_warmup_steps: 100

loss:
_component_: torchtune.modules.loss.CEWithChunkedOutputLoss
_component_: torch.nn.CrossEntropyLoss

# Training
epochs: 1
Expand All @@ -72,7 +69,7 @@ gradient_accumulation_steps: 32
# Logging
output_dir: /tmp/lora_finetune_output
metric_logger:
_component_: torchtune.training.metric_logging.DiskLogger
_component_: torchtune.utils.metric_logging.DiskLogger
log_dir: ${output_dir}
log_every_n_steps: 1
log_peak_memory_stats: False
Expand Down
11 changes: 4 additions & 7 deletions benchmarks/llm/configs/llama3_8B_lora_single_device.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -24,16 +24,14 @@ model:
apply_lora_to_output: False
lora_rank: 8
lora_alpha: 16
lora_dropout: 0.0

# Tokenizer
tokenizer:
_component_: torchtune.models.llama3.llama3_tokenizer
path: /tmp/Meta-Llama-3-8B-Instruct/original/tokenizer.model
max_seq_len: null

checkpointer:
_component_: torchtune.training.FullModelMetaCheckpointer
_component_: torchtune.utils.FullModelMetaCheckpointer
checkpoint_dir: /tmp/Meta-Llama-3-8B-Instruct/original/
checkpoint_files: [
consolidated.00.pth
Expand All @@ -42,7 +40,6 @@ checkpointer:
output_dir: /tmp/Meta-Llama-3-8B-Instruct/
model_type: LLAMA3
resume_from_checkpoint: False
save_adapter_weights_only: False

# Dataset and Sampler
dataset:
Expand All @@ -61,7 +58,7 @@ lr_scheduler:
num_warmup_steps: 100

loss:
_component_: torchtune.modules.loss.CEWithChunkedOutputLoss
_component_: torch.nn.CrossEntropyLoss

# Training
epochs: 1
Expand All @@ -72,7 +69,7 @@ compile: False
# Logging
output_dir: /tmp/lora_finetune_output
metric_logger:
_component_: torchtune.training.metric_logging.DiskLogger
_component_: torchtune.utils.metric_logging.DiskLogger
log_dir: ${output_dir}
log_every_n_steps: 1
log_peak_memory_stats: False
Expand All @@ -84,7 +81,7 @@ enable_activation_checkpointing: True

# Profiler (disabled)
profiler:
_component_: torchtune.training.setup_torch_profiler
_component_: torchtune.utils.setup_torch_profiler
enabled: False

#Output directory of trace artifacts
Expand Down
9 changes: 4 additions & 5 deletions benchmarks/llm/configs/llama3_8B_qat_full.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,6 @@
tokenizer:
_component_: torchtune.models.llama3.llama3_tokenizer
path: /tmp/Meta-Llama-3-8B-Instruct/original/tokenizer.model
max_seq_len: null

# Dataset
dataset:
Expand All @@ -30,7 +29,7 @@ model:
_component_: torchtune.models.llama3_1.llama3_1_8b

checkpointer:
_component_: torchtune.training.FullModelMetaCheckpointer
_component_: torchtune.utils.FullModelMetaCheckpointer
checkpoint_dir: /tmp/Meta-Llama-3-8B-Instruct/original/
checkpoint_files: [
consolidated.00.pth
Expand All @@ -46,7 +45,7 @@ epochs: 3

# QAT arguments
quantizer:
_component_: torchtune.training.quantization.Int8DynActInt4WeightQATQuantizer
_component_: torchtune.utils.quantization.Int8DynActInt4WeightQATQuantizer
groupsize: 256

optimizer:
Expand All @@ -55,7 +54,7 @@ optimizer:
foreach: False

loss:
_component_: torchtune.modules.loss.CEWithChunkedOutputLoss
_component_: torch.nn.CrossEntropyLoss
max_steps_per_epoch: null
gradient_accumulation_steps: 1

Expand All @@ -71,7 +70,7 @@ dtype: bf16

# Logging
metric_logger:
_component_: torchtune.training.metric_logging.DiskLogger
_component_: torchtune.utils.metric_logging.DiskLogger
log_dir: ${output_dir}
output_dir: /tmp/alpaca-llama3-finetune
log_every_n_steps: 1
Expand Down
14 changes: 4 additions & 10 deletions benchmarks/llm/configs/llama3_8B_qlora_single_device.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -23,16 +23,14 @@ model:
apply_lora_to_output: False
lora_rank: 8
lora_alpha: 16
lora_dropout: 0.0

# Tokenizer
tokenizer:
_component_: torchtune.models.llama3.llama3_tokenizer
path: /tmp/Meta-Llama-3-8B-Instruct/original/tokenizer.model
max_seq_len: null

checkpointer:
_component_: torchtune.training.FullModelMetaCheckpointer
_component_: torchtune.utils.FullModelMetaCheckpointer
checkpoint_dir: /tmp/Meta-Llama-3-8B-Instruct/original/
checkpoint_files: [
consolidated.00.pth
Expand All @@ -41,7 +39,6 @@ checkpointer:
output_dir: /tmp/Meta-Llama-3-8B-Instruct/
model_type: LLAMA3
resume_from_checkpoint: False
save_adapter_weights_only: False

# Dataset and Sampler
dataset:
Expand All @@ -60,7 +57,7 @@ lr_scheduler:
num_warmup_steps: 100

loss:
_component_: torchtune.modules.loss.CEWithChunkedOutputLoss
_component_: torch.nn.CrossEntropyLoss

# Training
epochs: 1
Expand All @@ -71,7 +68,7 @@ compile: False
# Logging
output_dir: /tmp/qlora_finetune_output/
metric_logger:
_component_: torchtune.training.metric_logging.DiskLogger
_component_: torchtune.utils.metric_logging.DiskLogger
log_dir: ${output_dir}
log_every_n_steps: 1
log_peak_memory_stats: False
Expand All @@ -83,7 +80,7 @@ enable_activation_checkpointing: True

# Profiler (disabled)
profiler:
_component_: torchtune.training.setup_torch_profiler
_component_: torchtune.utils.setup_torch_profiler
enabled: False

#Output directory of trace artifacts
Expand All @@ -105,6 +102,3 @@ profiler:
warmup_steps: 5
active_steps: 2
num_cycles: 1

# For colab use True
low_cpu_ram: False
Loading

0 comments on commit 770ca62

Please sign in to comment.