Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Sync #313

Merged
merged 36 commits into from
Nov 22, 2024
Merged

Sync #313

Changes from 1 commit
Commits
Show all changes
36 commits
Select commit Hold shift + click to select a range
dbebc7c
Batch resizing (#286)
Delaunay Sep 19, 2024
1c265f9
Force exaclty one monitor tag
satyaog Sep 20, 2024
fd99d8a
Fix llm with torchtune v0.3
satyaog Sep 23, 2024
6caac29
Fix rlhf on trl v0.11.0
satyaog Sep 23, 2024
34f56e7
Merge branch 'hotfix/fix_llm' of https://github.com/satyaog/milabench…
Delaunay Sep 25, 2024
4639c19
Merge branch 'hotfix/monitor_tags' of https://github.com/satyaog/mila…
Delaunay Sep 25, 2024
6a1c120
Add missing monitor tag
Sep 26, 2024
770ca62
Revert "Fix llm with torchtune v0.3"
Sep 30, 2024
6d1e114
Revert "Fix rlhf on trl v0.11.0"
Sep 30, 2024
93015e5
Add latex output as an option
Oct 2, 2024
558c278
Merge pull request #291 from mila-iqia/staging
Delaunay Oct 2, 2024
3b87cb4
Add container recipe
Delaunay Oct 9, 2024
4fb687c
Tweaks
Oct 30, 2024
a849fc2
-
Oct 30, 2024
dafcbf9
Ignore prepare & install runs for reports
Oct 30, 2024
226e455
explore scaling
satyaog Nov 7, 2024
a60a3aa
Revert "explore scaling"
satyaog Nov 7, 2024
783a13b
Update report.py
Delaunay Nov 16, 2024
ecd9214
Consolidate metric pushing logic
Nov 21, 2024
8148e53
Tweaks
Nov 21, 2024
3d27180
Revert bad commit
Nov 21, 2024
3d7d5f1
Tweaks
Nov 21, 2024
08eebc1
Merge pull request #310 from mila-iqia/handle-score==0
Delaunay Nov 21, 2024
8498689
Merge branch 'staging' into H100
Delaunay Nov 21, 2024
4fdf736
Merge pull request #309 from mila-iqia/H100
Delaunay Nov 21, 2024
798a186
Hpu (#292)
Delaunay Nov 21, 2024
0b93d59
Rocm (#293)
Delaunay Nov 21, 2024
491505f
Multirun system (#308)
Delaunay Nov 21, 2024
a66519e
Merge branch 'master' into staging
Delaunay Nov 21, 2024
deb271a
Add monitor tag to templates
Nov 21, 2024
a8415d3
Do not rely on DNS to resolve nodes
Nov 21, 2024
d5cbbf5
Update README
Nov 21, 2024
9a29f06
undo gaudi2 config for llm
Nov 21, 2024
6c15d81
Merge pull request #311 from mila-iqia/staging
Delaunay Nov 22, 2024
06fa1be
Update README.md
Delaunay Nov 22, 2024
ab6b412
Add missing tags to tests config (#312)
Delaunay Nov 22, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
Fix llm with torchtune v0.3
  • Loading branch information
satyaog committed Sep 23, 2024
commit fd99d8ab642519da5d4ff223c7a2de535f4d41bc
11 changes: 6 additions & 5 deletions benchmarks/llm/configs/llama3_70B_full.yaml
Original file line number Diff line number Diff line change
@@ -20,6 +20,7 @@
tokenizer:
_component_: torchtune.models.llama3.llama3_tokenizer
path: /tmp/Meta-Llama-3.1-70B-Instruct/original/tokenizer.model
max_seq_len: null

# Dataset
dataset:
@@ -33,7 +34,7 @@ model:

safetensors: true
checkpointer:
_component_: torchtune.utils.FullModelHFCheckpointer
_component_: torchtune.training.FullModelHFCheckpointer
checkpoint_dir: /tmp/Meta-Llama-3.1-70B-Instruct/
checkpoint_files: [
model-00001-of-00030.safetensors,
@@ -85,7 +86,7 @@ optimizer:
fused: True

loss:
_component_: torch.nn.CrossEntropyLoss
_component_: torchtune.modules.loss.CEWithChunkedOutputLoss
max_steps_per_epoch: null
gradient_accumulation_steps: 1

@@ -95,16 +96,16 @@ device: cuda

# Memory management
enable_activation_checkpointing: True
memory_efficient_fsdp_wrap: True
custom_sharded_layers: ['tok_embeddings', 'output']
fsdp_cpu_offload: True

# Reduced precision
dtype: bf16

# Logging
metric_logger:
_component_: torchtune.utils.metric_logging.DiskLogger
_component_: torchtune.training.metric_logging.DiskLogger
log_dir: ${output_dir}
output_dir: /tmp/alpaca-llama3-finetune
output_dir: /tmp/full-llama3_1-finetune
log_every_n_steps: 1
log_peak_memory_stats: False
10 changes: 6 additions & 4 deletions benchmarks/llm/configs/llama3_70B_lora.yaml
Original file line number Diff line number Diff line change
@@ -16,14 +16,16 @@ model:
apply_lora_to_output: False
lora_rank: 16
lora_alpha: 32
lora_dropout: 0.0

tokenizer:
_component_: torchtune.models.llama3.llama3_tokenizer
path: /tmp/Meta-Llama-3.1-70B-Instruct/original/tokenizer.model
max_seq_len: null

safetensors: true
checkpointer:
_component_: torchtune.utils.FullModelHFCheckpointer
_component_: torchtune.training.FullModelHFCheckpointer
checkpoint_dir: /tmp/Meta-Llama-3.1-70B-Instruct/
checkpoint_files: [
model-00001-of-00030.safetensors,
@@ -80,17 +82,17 @@ lr_scheduler:
num_warmup_steps: 100

loss:
_component_: torch.nn.CrossEntropyLoss
_component_: torchtune.modules.loss.CEWithChunkedOutputLoss

# Training
epochs: 1
max_steps_per_epoch: null
gradient_accumulation_steps: 1

# Logging
output_dir: /tmp/lora_finetune_output
output_dir: /tmp/lora-llama3_1-finetune-output
metric_logger:
_component_: torchtune.utils.metric_logging.DiskLogger
_component_: torchtune.training.metric_logging.DiskLogger
log_dir: ${output_dir}
log_every_n_steps: 1
log_peak_memory_stats: False
9 changes: 6 additions & 3 deletions benchmarks/llm/configs/llama3_8B_lora.yaml
Original file line number Diff line number Diff line change
@@ -21,6 +21,7 @@
tokenizer:
_component_: torchtune.models.llama3.llama3_tokenizer
path: /tmp/Meta-Llama-3-8B-Instruct/original/tokenizer.model
max_seq_len: null

# Model Arguments
model:
@@ -30,9 +31,10 @@ model:
apply_lora_to_output: False
lora_rank: 8
lora_alpha: 16
lora_dropout: 0.0

checkpointer:
_component_: torchtune.utils.FullModelMetaCheckpointer
_component_: torchtune.training.FullModelMetaCheckpointer
checkpoint_dir: /tmp/Meta-Llama-3-8B-Instruct/original/
checkpoint_files: [
consolidated.00.pth
@@ -41,6 +43,7 @@ checkpointer:
output_dir: /tmp/Meta-Llama-3-8B-Instruct/
model_type: LLAMA3
resume_from_checkpoint: False
save_adapter_weights_only: False

# Dataset and Sampler
dataset:
@@ -59,7 +62,7 @@ lr_scheduler:
num_warmup_steps: 100

loss:
_component_: torch.nn.CrossEntropyLoss
_component_: torchtune.modules.loss.CEWithChunkedOutputLoss

# Training
epochs: 1
@@ -69,7 +72,7 @@ gradient_accumulation_steps: 32
# Logging
output_dir: /tmp/lora_finetune_output
metric_logger:
_component_: torchtune.utils.metric_logging.DiskLogger
_component_: torchtune.training.metric_logging.DiskLogger
log_dir: ${output_dir}
log_every_n_steps: 1
log_peak_memory_stats: False
11 changes: 7 additions & 4 deletions benchmarks/llm/configs/llama3_8B_lora_single_device.yaml
Original file line number Diff line number Diff line change
@@ -24,14 +24,16 @@ model:
apply_lora_to_output: False
lora_rank: 8
lora_alpha: 16
lora_dropout: 0.0

# Tokenizer
tokenizer:
_component_: torchtune.models.llama3.llama3_tokenizer
path: /tmp/Meta-Llama-3-8B-Instruct/original/tokenizer.model
max_seq_len: null

checkpointer:
_component_: torchtune.utils.FullModelMetaCheckpointer
_component_: torchtune.training.FullModelMetaCheckpointer
checkpoint_dir: /tmp/Meta-Llama-3-8B-Instruct/original/
checkpoint_files: [
consolidated.00.pth
@@ -40,6 +42,7 @@ checkpointer:
output_dir: /tmp/Meta-Llama-3-8B-Instruct/
model_type: LLAMA3
resume_from_checkpoint: False
save_adapter_weights_only: False

# Dataset and Sampler
dataset:
@@ -58,7 +61,7 @@ lr_scheduler:
num_warmup_steps: 100

loss:
_component_: torch.nn.CrossEntropyLoss
_component_: torchtune.modules.loss.CEWithChunkedOutputLoss

# Training
epochs: 1
@@ -69,7 +72,7 @@ compile: False
# Logging
output_dir: /tmp/lora_finetune_output
metric_logger:
_component_: torchtune.utils.metric_logging.DiskLogger
_component_: torchtune.training.metric_logging.DiskLogger
log_dir: ${output_dir}
log_every_n_steps: 1
log_peak_memory_stats: False
@@ -81,7 +84,7 @@ enable_activation_checkpointing: True

# Profiler (disabled)
profiler:
_component_: torchtune.utils.setup_torch_profiler
_component_: torchtune.training.setup_torch_profiler
enabled: False

#Output directory of trace artifacts
9 changes: 5 additions & 4 deletions benchmarks/llm/configs/llama3_8B_qat_full.yaml
Original file line number Diff line number Diff line change
@@ -17,6 +17,7 @@
tokenizer:
_component_: torchtune.models.llama3.llama3_tokenizer
path: /tmp/Meta-Llama-3-8B-Instruct/original/tokenizer.model
max_seq_len: null

# Dataset
dataset:
@@ -29,7 +30,7 @@ model:
_component_: torchtune.models.llama3_1.llama3_1_8b

checkpointer:
_component_: torchtune.utils.FullModelMetaCheckpointer
_component_: torchtune.training.FullModelMetaCheckpointer
checkpoint_dir: /tmp/Meta-Llama-3-8B-Instruct/original/
checkpoint_files: [
consolidated.00.pth
@@ -45,7 +46,7 @@ epochs: 3

# QAT arguments
quantizer:
_component_: torchtune.utils.quantization.Int8DynActInt4WeightQATQuantizer
_component_: torchtune.training.quantization.Int8DynActInt4WeightQATQuantizer
groupsize: 256

optimizer:
@@ -54,7 +55,7 @@ optimizer:
foreach: False

loss:
_component_: torch.nn.CrossEntropyLoss
_component_: torchtune.modules.loss.CEWithChunkedOutputLoss
max_steps_per_epoch: null
gradient_accumulation_steps: 1

@@ -70,7 +71,7 @@ dtype: bf16

# Logging
metric_logger:
_component_: torchtune.utils.metric_logging.DiskLogger
_component_: torchtune.training.metric_logging.DiskLogger
log_dir: ${output_dir}
output_dir: /tmp/alpaca-llama3-finetune
log_every_n_steps: 1
14 changes: 10 additions & 4 deletions benchmarks/llm/configs/llama3_8B_qlora_single_device.yaml
Original file line number Diff line number Diff line change
@@ -23,14 +23,16 @@ model:
apply_lora_to_output: False
lora_rank: 8
lora_alpha: 16
lora_dropout: 0.0

# Tokenizer
tokenizer:
_component_: torchtune.models.llama3.llama3_tokenizer
path: /tmp/Meta-Llama-3-8B-Instruct/original/tokenizer.model
max_seq_len: null

checkpointer:
_component_: torchtune.utils.FullModelMetaCheckpointer
_component_: torchtune.training.FullModelMetaCheckpointer
checkpoint_dir: /tmp/Meta-Llama-3-8B-Instruct/original/
checkpoint_files: [
consolidated.00.pth
@@ -39,6 +41,7 @@ checkpointer:
output_dir: /tmp/Meta-Llama-3-8B-Instruct/
model_type: LLAMA3
resume_from_checkpoint: False
save_adapter_weights_only: False

# Dataset and Sampler
dataset:
@@ -57,7 +60,7 @@ lr_scheduler:
num_warmup_steps: 100

loss:
_component_: torch.nn.CrossEntropyLoss
_component_: torchtune.modules.loss.CEWithChunkedOutputLoss

# Training
epochs: 1
@@ -68,7 +71,7 @@ compile: False
# Logging
output_dir: /tmp/qlora_finetune_output/
metric_logger:
_component_: torchtune.utils.metric_logging.DiskLogger
_component_: torchtune.training.metric_logging.DiskLogger
log_dir: ${output_dir}
log_every_n_steps: 1
log_peak_memory_stats: False
@@ -80,7 +83,7 @@ enable_activation_checkpointing: True

# Profiler (disabled)
profiler:
_component_: torchtune.utils.setup_torch_profiler
_component_: torchtune.training.setup_torch_profiler
enabled: False

#Output directory of trace artifacts
@@ -102,3 +105,6 @@ profiler:
warmup_steps: 5
active_steps: 2
num_cycles: 1

# For colab use True
low_cpu_ram: False
Loading
Loading