Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix llm with torchtune v0.3 #289

Merged
merged 1 commit into from
Oct 2, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 6 additions & 5 deletions benchmarks/llm/configs/llama3_70B_full.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
tokenizer:
_component_: torchtune.models.llama3.llama3_tokenizer
path: /tmp/Meta-Llama-3.1-70B-Instruct/original/tokenizer.model
max_seq_len: null

# Dataset
dataset:
Expand All @@ -33,7 +34,7 @@ model:

safetensors: true
checkpointer:
_component_: torchtune.utils.FullModelHFCheckpointer
_component_: torchtune.training.FullModelHFCheckpointer
checkpoint_dir: /tmp/Meta-Llama-3.1-70B-Instruct/
checkpoint_files: [
model-00001-of-00030.safetensors,
Expand Down Expand Up @@ -85,7 +86,7 @@ optimizer:
fused: True

loss:
_component_: torch.nn.CrossEntropyLoss
_component_: torchtune.modules.loss.CEWithChunkedOutputLoss
max_steps_per_epoch: null
gradient_accumulation_steps: 1

Expand All @@ -95,16 +96,16 @@ device: cuda

# Memory management
enable_activation_checkpointing: True
memory_efficient_fsdp_wrap: True
custom_sharded_layers: ['tok_embeddings', 'output']
fsdp_cpu_offload: True

# Reduced precision
dtype: bf16

# Logging
metric_logger:
_component_: torchtune.utils.metric_logging.DiskLogger
_component_: torchtune.training.metric_logging.DiskLogger
log_dir: ${output_dir}
output_dir: /tmp/alpaca-llama3-finetune
output_dir: /tmp/full-llama3_1-finetune
log_every_n_steps: 1
log_peak_memory_stats: False
10 changes: 6 additions & 4 deletions benchmarks/llm/configs/llama3_70B_lora.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -16,14 +16,16 @@ model:
apply_lora_to_output: False
lora_rank: 16
lora_alpha: 32
lora_dropout: 0.0

tokenizer:
_component_: torchtune.models.llama3.llama3_tokenizer
path: /tmp/Meta-Llama-3.1-70B-Instruct/original/tokenizer.model
max_seq_len: null

safetensors: true
checkpointer:
_component_: torchtune.utils.FullModelHFCheckpointer
_component_: torchtune.training.FullModelHFCheckpointer
checkpoint_dir: /tmp/Meta-Llama-3.1-70B-Instruct/
checkpoint_files: [
model-00001-of-00030.safetensors,
Expand Down Expand Up @@ -80,17 +82,17 @@ lr_scheduler:
num_warmup_steps: 100

loss:
_component_: torch.nn.CrossEntropyLoss
_component_: torchtune.modules.loss.CEWithChunkedOutputLoss

# Training
epochs: 1
max_steps_per_epoch: null
gradient_accumulation_steps: 1

# Logging
output_dir: /tmp/lora_finetune_output
output_dir: /tmp/lora-llama3_1-finetune-output
metric_logger:
_component_: torchtune.utils.metric_logging.DiskLogger
_component_: torchtune.training.metric_logging.DiskLogger
log_dir: ${output_dir}
log_every_n_steps: 1
log_peak_memory_stats: False
Expand Down
9 changes: 6 additions & 3 deletions benchmarks/llm/configs/llama3_8B_lora.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
tokenizer:
_component_: torchtune.models.llama3.llama3_tokenizer
path: /tmp/Meta-Llama-3-8B-Instruct/original/tokenizer.model
max_seq_len: null

# Model Arguments
model:
Expand All @@ -30,9 +31,10 @@ model:
apply_lora_to_output: False
lora_rank: 8
lora_alpha: 16
lora_dropout: 0.0

checkpointer:
_component_: torchtune.utils.FullModelMetaCheckpointer
_component_: torchtune.training.FullModelMetaCheckpointer
checkpoint_dir: /tmp/Meta-Llama-3-8B-Instruct/original/
checkpoint_files: [
consolidated.00.pth
Expand All @@ -41,6 +43,7 @@ checkpointer:
output_dir: /tmp/Meta-Llama-3-8B-Instruct/
model_type: LLAMA3
resume_from_checkpoint: False
save_adapter_weights_only: False

# Dataset and Sampler
dataset:
Expand All @@ -59,7 +62,7 @@ lr_scheduler:
num_warmup_steps: 100

loss:
_component_: torch.nn.CrossEntropyLoss
_component_: torchtune.modules.loss.CEWithChunkedOutputLoss

# Training
epochs: 1
Expand All @@ -69,7 +72,7 @@ gradient_accumulation_steps: 32
# Logging
output_dir: /tmp/lora_finetune_output
metric_logger:
_component_: torchtune.utils.metric_logging.DiskLogger
_component_: torchtune.training.metric_logging.DiskLogger
log_dir: ${output_dir}
log_every_n_steps: 1
log_peak_memory_stats: False
Expand Down
11 changes: 7 additions & 4 deletions benchmarks/llm/configs/llama3_8B_lora_single_device.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -24,14 +24,16 @@ model:
apply_lora_to_output: False
lora_rank: 8
lora_alpha: 16
lora_dropout: 0.0

# Tokenizer
tokenizer:
_component_: torchtune.models.llama3.llama3_tokenizer
path: /tmp/Meta-Llama-3-8B-Instruct/original/tokenizer.model
max_seq_len: null

checkpointer:
_component_: torchtune.utils.FullModelMetaCheckpointer
_component_: torchtune.training.FullModelMetaCheckpointer
checkpoint_dir: /tmp/Meta-Llama-3-8B-Instruct/original/
checkpoint_files: [
consolidated.00.pth
Expand All @@ -40,6 +42,7 @@ checkpointer:
output_dir: /tmp/Meta-Llama-3-8B-Instruct/
model_type: LLAMA3
resume_from_checkpoint: False
save_adapter_weights_only: False

# Dataset and Sampler
dataset:
Expand All @@ -58,7 +61,7 @@ lr_scheduler:
num_warmup_steps: 100

loss:
_component_: torch.nn.CrossEntropyLoss
_component_: torchtune.modules.loss.CEWithChunkedOutputLoss

# Training
epochs: 1
Expand All @@ -69,7 +72,7 @@ compile: False
# Logging
output_dir: /tmp/lora_finetune_output
metric_logger:
_component_: torchtune.utils.metric_logging.DiskLogger
_component_: torchtune.training.metric_logging.DiskLogger
log_dir: ${output_dir}
log_every_n_steps: 1
log_peak_memory_stats: False
Expand All @@ -81,7 +84,7 @@ enable_activation_checkpointing: True

# Profiler (disabled)
profiler:
_component_: torchtune.utils.setup_torch_profiler
_component_: torchtune.training.setup_torch_profiler
enabled: False

#Output directory of trace artifacts
Expand Down
9 changes: 5 additions & 4 deletions benchmarks/llm/configs/llama3_8B_qat_full.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
tokenizer:
_component_: torchtune.models.llama3.llama3_tokenizer
path: /tmp/Meta-Llama-3-8B-Instruct/original/tokenizer.model
max_seq_len: null

# Dataset
dataset:
Expand All @@ -29,7 +30,7 @@ model:
_component_: torchtune.models.llama3_1.llama3_1_8b

checkpointer:
_component_: torchtune.utils.FullModelMetaCheckpointer
_component_: torchtune.training.FullModelMetaCheckpointer
checkpoint_dir: /tmp/Meta-Llama-3-8B-Instruct/original/
checkpoint_files: [
consolidated.00.pth
Expand All @@ -45,7 +46,7 @@ epochs: 3

# QAT arguments
quantizer:
_component_: torchtune.utils.quantization.Int8DynActInt4WeightQATQuantizer
_component_: torchtune.training.quantization.Int8DynActInt4WeightQATQuantizer
groupsize: 256

optimizer:
Expand All @@ -54,7 +55,7 @@ optimizer:
foreach: False

loss:
_component_: torch.nn.CrossEntropyLoss
_component_: torchtune.modules.loss.CEWithChunkedOutputLoss
max_steps_per_epoch: null
gradient_accumulation_steps: 1

Expand All @@ -70,7 +71,7 @@ dtype: bf16

# Logging
metric_logger:
_component_: torchtune.utils.metric_logging.DiskLogger
_component_: torchtune.training.metric_logging.DiskLogger
log_dir: ${output_dir}
output_dir: /tmp/alpaca-llama3-finetune
log_every_n_steps: 1
Expand Down
14 changes: 10 additions & 4 deletions benchmarks/llm/configs/llama3_8B_qlora_single_device.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -23,14 +23,16 @@ model:
apply_lora_to_output: False
lora_rank: 8
lora_alpha: 16
lora_dropout: 0.0

# Tokenizer
tokenizer:
_component_: torchtune.models.llama3.llama3_tokenizer
path: /tmp/Meta-Llama-3-8B-Instruct/original/tokenizer.model
max_seq_len: null

checkpointer:
_component_: torchtune.utils.FullModelMetaCheckpointer
_component_: torchtune.training.FullModelMetaCheckpointer
checkpoint_dir: /tmp/Meta-Llama-3-8B-Instruct/original/
checkpoint_files: [
consolidated.00.pth
Expand All @@ -39,6 +41,7 @@ checkpointer:
output_dir: /tmp/Meta-Llama-3-8B-Instruct/
model_type: LLAMA3
resume_from_checkpoint: False
save_adapter_weights_only: False

# Dataset and Sampler
dataset:
Expand All @@ -57,7 +60,7 @@ lr_scheduler:
num_warmup_steps: 100

loss:
_component_: torch.nn.CrossEntropyLoss
_component_: torchtune.modules.loss.CEWithChunkedOutputLoss

# Training
epochs: 1
Expand All @@ -68,7 +71,7 @@ compile: False
# Logging
output_dir: /tmp/qlora_finetune_output/
metric_logger:
_component_: torchtune.utils.metric_logging.DiskLogger
_component_: torchtune.training.metric_logging.DiskLogger
log_dir: ${output_dir}
log_every_n_steps: 1
log_peak_memory_stats: False
Expand All @@ -80,7 +83,7 @@ enable_activation_checkpointing: True

# Profiler (disabled)
profiler:
_component_: torchtune.utils.setup_torch_profiler
_component_: torchtune.training.setup_torch_profiler
enabled: False

#Output directory of trace artifacts
Expand All @@ -102,3 +105,6 @@ profiler:
warmup_steps: 5
active_steps: 2
num_cycles: 1

# For colab use True
low_cpu_ram: False
Loading
Loading