Skip to content

Commit

Permalink
change default params: use tinyshakespeare and decrease LR
Browse files Browse the repository at this point in the history
  • Loading branch information
karpathy committed Sep 13, 2024
1 parent bd457aa commit bd8c604
Showing 1 changed file with 2 additions and 2 deletions.
4 changes: 2 additions & 2 deletions train_llama3.py
Original file line number Diff line number Diff line change
Expand Up @@ -943,7 +943,7 @@ def print0(*args, **kwargs):
parser.add_argument("--ckpt_dir", type=str, default=None, help="path to llama3 model checkpoint (needed if use_hf=0)")
parser.add_argument("--tokenizer_path", type=str, default=None, help="path to llama3 tokenizer (needed if use_hf=0)")
# file system input / output
parser.add_argument("--input_bin", type=str, default="dev/data/tinystories/TinyStories_val.bin", help="input .bin to train on")
parser.add_argument("--input_bin", type=str, default="dev/data/tinyshakespeare/tiny_shakespeare_val.bin", help="input .bin to train on")
parser.add_argument("--input_val_bin", type=str, default="", help="input .bin to eval validation loss on")
parser.add_argument("--output_dir", type=str, default="", help="output directory to which to write logs and checkpoints")
parser.add_argument("--model", type=str, default="meta-llama/Meta-Llama-3.1-8B", help="chose the llama model")
Expand All @@ -955,7 +955,7 @@ def print0(*args, **kwargs):
parser.add_argument("--num_iterations", type=int, default=10, help="number of iterations to run")
parser.add_argument("--inference_only", type=int, default=0, help="only run inference")
# optimization
parser.add_argument("--learning_rate", type=float, default=1e-4, help="learning rate warmup iterations")
parser.add_argument("--learning_rate", type=float, default=1e-5, help="learning rate warmup iterations")
parser.add_argument("--warmup_iters", type=int, default=0, help="learning rate warmup iterations")
parser.add_argument("--learning_rate_decay_frac", type=float, default=1.0, help="learning rate warmup iterations")
parser.add_argument("--weight_decay", type=float, default=0.0, help="weight decay")
Expand Down

0 comments on commit bd8c604

Please sign in to comment.