configs/config.yaml

model: /model-weights/Llama-2-7b-chat-hf/
enable_wandb_logging: True

wandb_config:
  project: MedGPT
  name: Llama-2-7B-chat

train_parameters:
  output_dir: your/output/dir
  max_seq_len: 4096
  epochs: 1
  seed: 11

  # Sharding strategy
  sharding_strategy: FULL_SHARD

  # Memory
  use_mp: True
  use_activation_checkpointing: True
  use_flash_attention: True
  low_cpu_mem_usage: True

  # LoRA config: uncomment the block below to enable LoRA
  
  # lora_peft_config:
  #   task_type: CAUSAL_LM
  #   inference_mode: False
  #   r: 8
  #   lora_alpha: 32
  #   lora_dropout: 0.1


  # Gradient norm clipping
  max_grad_norm: 1
  gradient_accumulation_steps: 4

  # Optimizer
  optimizer:
    lr: 2.0e-5
    weight_decay: 0.1
    betas: [0.9, 0.95]
    eps: 1.0e-5

  # Scheduler
  lr_scheduler_type: cosine
  warmup_ratio: 0.05

  # Checkpointing
  checkpointing_enabled: True
  logging_steps: 500
  save_frequency: 0.25

dataset:
  ignore_index: -100
  eval_bs: 8
  train_bs: 8
  train_ds: your/train/ds
  eval_ds: your/eval/ds

dataset_preprocess:
  ignore_index: -100
  dataset_format: hf
  data_field: text
  packing_type: partial
  add_bos_eos_tokens: True
  from_disk: True
  load_path: your/unprocessed/dataset
  split: train
  save_path: dir/to/save/processed/dataset
  truncate: False