From 3beba227465558d91b490a4da02733d49055f615 Mon Sep 17 00:00:00 2001 From: Youssef Mohamed <91757835+youssefkhalil320@users.noreply.github.com> Date: Thu, 8 Aug 2024 20:48:02 +0300 Subject: [PATCH] Add qwen2 config file to mntp --- train_configs/mntp/qwen2.json | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) create mode 100644 train_configs/mntp/qwen2.json diff --git a/train_configs/mntp/qwen2.json b/train_configs/mntp/qwen2.json new file mode 100644 index 0000000..a4427e6 --- /dev/null +++ b/train_configs/mntp/qwen2.json @@ -0,0 +1,26 @@ +{ + "model_name_or_path": "Qwen/Qwen2-0.5B-Instruct", + "dataset_name": "wikitext", + "dataset_config_name": "wikitext-103-raw-v1", + "per_device_train_batch_size": 32, + "per_device_eval_batch_size": 32, + "gradient_accumulation_steps": 1, + "do_train": true, + "do_eval": true, + "max_seq_length": 512, + "mask_token_type": "blank", + "data_collator_type": "default", + "mlm_probability": 0.2, + "overwrite_output_dir": true, + "output_dir": "output/mntp/Qwen2-0.5B", + "evaluation_strategy": "steps", + "eval_steps": 100, + "save_steps": 200, + "stop_after_n_steps": 1000, + "lora_r": 16, + "gradient_checkpointing": true, + "torch_dtype": "float16", + "attn_implementation": "eager", + "dataloader_num_workers": 4, + "dataloader_prefetch_factor": 2, +}