We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
qlora 8196 23366MiB / 81251MiB
config = LoraConfig( r=8, lora_alpha=32, inference_mode=False, target_modules=["q_pro","v_proj","down_proj","up_proj"], lora_dropout=0.05, bias="none", task_type="CAUSAL_LM" )
测试代码:
tokenizer = LlamaTokenizer.from_pretrained(model_id) tokenizer.padding_side = "right" tokenizer.bos_token_id = 1 tokenizer.eos_token_id = 2 tokenizer.pad_token_id = 0 tokenizer.unk_token_id = 0 # 初始化模型 bnb_config = BitsAndBytesConfig( load_in_4bit=True, bnb_4bit_use_double_quant=True, bnb_4bit_quant_type="nf4", bnb_4bit_compute_dtype=torch.bfloat16 ) config = LlamaConfig.from_pretrained(model_id) model = LlamaForCausalLM.from_pretrained( model_id, torch_dtype=torch.bfloat16, quantization_config=bnb_config, load_in_4bit=True, use_safetensors=True, config = config, device_map={"": int(os.environ.get("LOCAL_RANK") or 0)} ) model.bos_token_id = 1 model.eos_token_id = 2 model.pad_token_id = 0 model.unk_token_id = 0 model = prepare_model_for_kbit_training(model, use_gradient_checkpointing=True) peft_params = [] non_peft_names = [] non_peft_params = [] for name, param in model.named_parameters(): if param.requires_grad is False: continue non_peft_names.append(name) non_peft_params.append(param) config = LoraConfig( r=8, lora_alpha=32, inference_mode=False, target_modules=["q_pro","v_proj","down_proj","up_proj"], lora_dropout=0.05, bias="none", task_type="CAUSAL_LM" ) model = get_peft_model(model, config) print_trainable_parameters(model) for name, param in model.named_parameters(): if name.split('base_model.model.')[1] in non_peft_names: if not training_args.lora_only: param.requires_grad = True if "lora_" in name: peft_params.append(param) torch.cuda.empty_cache() # 加载数据 train_data = Dataset.from_list(ff) # 略 valid_data = None def tokenize(item, cutoff_len=cutoff_len): result = {} input_ids, labels, conversation = _addrole_masklabel_tokenize(item) # 略 attention_mask = [1] * len(input_ids) result['input_ids'] = input_ids[:cutoff_len] result['attention_mask'] = attention_mask[:cutoff_len] result['labels'] = labels[:cutoff_len] return result train_data = train_data.map(tokenize) # ========== Initialize our Trainer. ========== training_args = transformers.TrainingArguments( per_device_train_batch_size=bs, gradient_accumulation_steps=8, warmup_steps=1000, optim="paged_adamw_32bit", learning_rate=1e-5, num_train_epochs=2, fp16=True, logging_steps=10, save_strategy="steps", save_steps=100, output_dir=output_dir, save_total_limit=1, load_best_model_at_end=False, ddp_find_unused_parameters=False, # if ddp else None, deepspeed="ds.config" # group_by_length=True ) training_args.lora_only = True training_args.do_train = True training_args.hf_weight_decay = 0.1 training_args.hf_lr_scheduler_type = "cosine" training_args.clip_loss_value = 20.0 training_args.gradient_clipping = 10.0 trainer = LOMOLoRATrainer( model=model, training_args=training_args, data_collator={'train': DataCollatorForCauselLM(tokenizer, max_length=8196, padding_side='right'), 'eval': EvalDataCollatorForCauselLM(tokenizer, max_length=8196, padding_side='right')}, train_dataset=train_data, eval_dataset=valid_data, tokenizer=tokenizer, compute_metrics=None, optimizers={'model_parameters': peft_params}, ) trainer.train()
ps: deepspeed+zero2 的qlora 大约占用13432MiB / 81251MiB
The text was updated successfully, but these errors were encountered:
看起来像是没有量化?我还没测试过结合量化训练
Sorry, something went wrong.
No branches or pull requests
qlora 8196
23366MiB / 81251MiB
config = LoraConfig(
r=8,
lora_alpha=32,
inference_mode=False,
target_modules=["q_pro","v_proj","down_proj","up_proj"],
lora_dropout=0.05,
bias="none",
task_type="CAUSAL_LM"
)
测试代码:
llama
ps: deepspeed+zero2 的qlora 大约占用13432MiB / 81251MiB
The text was updated successfully, but these errors were encountered: