We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Hello. I'm having a issue on this part of your code [train_lora].
trainer = transformers.Trainer(
model=model, train_dataset=data["train"], args=transformers.TrainingArguments( per_device_train_batch_size=MICRO_BATCH_SIZE, gradient_accumulation_steps=GRADIENT_ACCUMULATION_STEPS, warmup_steps=100, num_train_epochs=EPOCHS, learning_rate=LEARNING_RATE, fp16=True, logging_steps=20, output_dir="lora-alpaca", save_total_limit=3, ), data_collator=transformers.DataCollatorForLanguageModeling(tokenizer, mlm=False),
)
model.config.use_cache = False
trainer.train(resume_from_checkpoint=False)
╭─────────────────────────────── Traceback (most recent call last) ────────────────────────────────╮ │ in :1 │ │ │ │ ❱ 1 trainer = transformers.Trainer( │ │ 2 │ model=model, │ │ 3 │ train_dataset=data["train"], │ │ 4 │ args=transformers.TrainingArguments( │ │ │ │ /home/user/anaconda3/lib/python3.9/site-packages/transformers/trainer.py:476 in init │ │ │ │ 473 │ │ self.tokenizer = tokenizer │ │ 474 │ │ │ │ 475 │ │ if self.place_model_on_device and not getattr(model, "is_loaded_in_8bit", False) │ │ ❱ 476 │ │ │ self._move_model_to_device(model, args.device) │ │ 477 │ │ │ │ 478 │ │ # Force n_gpu to 1 to avoid DataParallel as MP will manage the GPUs │ │ 479 │ │ if self.is_model_parallel: │ │ │ │ /home/user/anaconda3/lib/python3.9/site-packages/transformers/trainer.py:715 in │ │ _move_model_to_device │ │ │ │ 712 │ │ self.callback_handler.remove_callback(callback) │ │ 713 │ │ │ 714 │ def _move_model_to_device(self, model, device): │ │ ❱ 715 │ │ model = model.to(device) │ │ 716 │ │ # Moving a model to an XLA device disconnects the tied weights, so we have to re │ │ 717 │ │ if self.args.parallel_mode == ParallelMode.TPU and hasattr(model, "tie_weights") │ │ 718 │ │ │ model.tie_weights() │ │ │ │ /home/user/anaconda3/lib/python3.9/site-packages/transformers/modeling_utils.py:1811 in to │ │ │ │ 1808 │ │ │ │ " model has already been set to the correct devices and casted to the co │ │ 1809 │ │ │ ) │ │ 1810 │ │ else: │ │ ❱ 1811 │ │ │ return super().to(*args, **kwargs) │ │ 1812 │ │ │ 1813 │ def half(self, *args): │ │ 1814 │ │ # Checks if the model has been loaded in 8-bit │ │ │ │ /home/user/anaconda3/lib/python3.9/site-packages/torch/nn/modules/module.py:1145 in to │ │ │ │ 1142 │ │ │ │ │ │ │ non_blocking, memory_format=convert_to_format) │ │ 1143 │ │ │ return t.to(device, dtype if t.is_floating_point() or t.is_complex() else No │ │ 1144 │ │ │ │ ❱ 1145 │ │ return self._apply(convert) │ │ 1146 │ │ │ 1147 │ def register_full_backward_pre_hook( │ │ 1148 │ │ self, │ │ │ │ /home/user/anaconda3/lib/python3.9/site-packages/torch/nn/modules/module.py:797 in _apply │ │ │ │ 794 │ │ │ 795 │ def _apply(self, fn): │ │ 796 │ │ for module in self.children(): │ │ ❱ 797 │ │ │ module._apply(fn) │ │ 798 │ │ │ │ 799 │ │ def compute_should_use_set_data(tensor, tensor_applied): │ │ 800 │ │ │ if torch._has_compatible_shallow_copy_type(tensor, tensor_applied): │ │ │ │ /home/user/anaconda3/lib/python3.9/site-packages/torch/nn/modules/module.py:797 in _apply │ │ │ │ 794 │ │ │ 795 │ def _apply(self, fn): │ │ 796 │ │ for module in self.children(): │ │ ❱ 797 │ │ │ module._apply(fn) │ │ 798 │ │ │ │ 799 │ │ def compute_should_use_set_data(tensor, tensor_applied): │ │ 800 │ │ │ if torch._has_compatible_shallow_copy_type(tensor, tensor_applied): │ │ │ │ /home/user/anaconda3/lib/python3.9/site-packages/torch/nn/modules/module.py:797 in _apply │ │ │ │ 794 │ │ │ 795 │ def _apply(self, fn): │ │ 796 │ │ for module in self.children(): │ │ ❱ 797 │ │ │ module._apply(fn) │ │ 798 │ │ │ │ 799 │ │ def compute_should_use_set_data(tensor, tensor_applied): │ │ 800 │ │ │ if torch._has_compatible_shallow_copy_type(tensor, tensor_applied): │ │ │ │ /home/user/anaconda3/lib/python3.9/site-packages/torch/nn/modules/module.py:797 in _apply │ │ │ │ 794 │ │ │ 795 │ def _apply(self, fn): │ │ 796 │ │ for module in self.children(): │ │ ❱ 797 │ │ │ module._apply(fn) │ │ 798 │ │ │ │ 799 │ │ def compute_should_use_set_data(tensor, tensor_applied): │ │ 800 │ │ │ if torch._has_compatible_shallow_copy_type(tensor, tensor_applied): │ │ │ │ /home/user/anaconda3/lib/python3.9/site-packages/torch/nn/modules/module.py:797 in _apply │ │ │ │ 794 │ │ │ 795 │ def _apply(self, fn): │ │ 796 │ │ for module in self.children(): │ │ ❱ 797 │ │ │ module._apply(fn) │ │ 798 │ │ │ │ 799 │ │ def compute_should_use_set_data(tensor, tensor_applied): │ │ 800 │ │ │ if torch._has_compatible_shallow_copy_type(tensor, tensor_applied): │ │ │ │ /home/user/anaconda3/lib/python3.9/site-packages/torch/nn/modules/module.py:820 in _apply │ │ │ │ 817 │ │ │ # track autograd history of param_applied, so we have to use │ │ 818 │ │ │ # with torch.no_grad(): │ │ 819 │ │ │ with torch.no_grad(): │ │ ❱ 820 │ │ │ │ param_applied = fn(param) │ │ 821 │ │ │ should_use_set_data = compute_should_use_set_data(param, param_applied) │ │ 822 │ │ │ if should_use_set_data: │ │ 823 │ │ │ │ param.data = param_applied │ │ │ │ /home/user/anaconda3/lib/python3.9/site-packages/torch/nn/modules/module.py:1143 in │ │ convert │ │ │ │ 1140 │ │ │ if convert_to_format is not None and t.dim() in (4, 5): │ │ 1141 │ │ │ │ return t.to(device, dtype if t.is_floating_point() or t.is_complex() els │ │ 1142 │ │ │ │ │ │ │ non_blocking, memory_format=convert_to_format) │ │ ❱ 1143 │ │ │ return t.to(device, dtype if t.is_floating_point() or t.is_complex() else No │ │ 1144 │ │ │ │ 1145 │ │ return self._apply(convert) │ │ 1146 │ ╰──────────────────────────────────────────────────────────────────────────────────────────────────╯ NotImplementedError: Cannot copy out of meta tensor; no data!
param_applied
with torch.no_grad():
The text was updated successfully, but these errors were encountered:
No branches or pull requests
Hello. I'm having a issue on this part of your code [train_lora].
trainer = transformers.Trainer(
)
model.config.use_cache = False
trainer.train(resume_from_checkpoint=False)
╭─────────────────────────────── Traceback (most recent call last) ────────────────────────────────╮
│ in :1 │
│ │
│ ❱ 1 trainer = transformers.Trainer( │
│ 2 │ model=model, │
│ 3 │ train_dataset=data["train"], │
│ 4 │ args=transformers.TrainingArguments( │
│ │
│ /home/user/anaconda3/lib/python3.9/site-packages/transformers/trainer.py:476 in init │
│ │
│ 473 │ │ self.tokenizer = tokenizer │
│ 474 │ │ │
│ 475 │ │ if self.place_model_on_device and not getattr(model, "is_loaded_in_8bit", False) │
│ ❱ 476 │ │ │ self._move_model_to_device(model, args.device) │
│ 477 │ │ │
│ 478 │ │ # Force n_gpu to 1 to avoid DataParallel as MP will manage the GPUs │
│ 479 │ │ if self.is_model_parallel: │
│ │
│ /home/user/anaconda3/lib/python3.9/site-packages/transformers/trainer.py:715 in │
│ _move_model_to_device │
│ │
│ 712 │ │ self.callback_handler.remove_callback(callback) │
│ 713 │ │
│ 714 │ def _move_model_to_device(self, model, device): │
│ ❱ 715 │ │ model = model.to(device) │
│ 716 │ │ # Moving a model to an XLA device disconnects the tied weights, so we have to re │
│ 717 │ │ if self.args.parallel_mode == ParallelMode.TPU and hasattr(model, "tie_weights") │
│ 718 │ │ │ model.tie_weights() │
│ │
│ /home/user/anaconda3/lib/python3.9/site-packages/transformers/modeling_utils.py:1811 in to │
│ │
│ 1808 │ │ │ │ " model has already been set to the correct devices and casted to the co │
│ 1809 │ │ │ ) │
│ 1810 │ │ else: │
│ ❱ 1811 │ │ │ return super().to(*args, **kwargs) │
│ 1812 │ │
│ 1813 │ def half(self, *args): │
│ 1814 │ │ # Checks if the model has been loaded in 8-bit │
│ │
│ /home/user/anaconda3/lib/python3.9/site-packages/torch/nn/modules/module.py:1145 in to │
│ │
│ 1142 │ │ │ │ │ │ │ non_blocking, memory_format=convert_to_format) │
│ 1143 │ │ │ return t.to(device, dtype if t.is_floating_point() or t.is_complex() else No │
│ 1144 │ │ │
│ ❱ 1145 │ │ return self._apply(convert) │
│ 1146 │ │
│ 1147 │ def register_full_backward_pre_hook( │
│ 1148 │ │ self, │
│ │
│ /home/user/anaconda3/lib/python3.9/site-packages/torch/nn/modules/module.py:797 in _apply │
│ │
│ 794 │ │
│ 795 │ def _apply(self, fn): │
│ 796 │ │ for module in self.children(): │
│ ❱ 797 │ │ │ module._apply(fn) │
│ 798 │ │ │
│ 799 │ │ def compute_should_use_set_data(tensor, tensor_applied): │
│ 800 │ │ │ if torch._has_compatible_shallow_copy_type(tensor, tensor_applied): │
│ │
│ /home/user/anaconda3/lib/python3.9/site-packages/torch/nn/modules/module.py:797 in _apply │
│ │
│ 794 │ │
│ 795 │ def _apply(self, fn): │
│ 796 │ │ for module in self.children(): │
│ ❱ 797 │ │ │ module._apply(fn) │
│ 798 │ │ │
│ 799 │ │ def compute_should_use_set_data(tensor, tensor_applied): │
│ 800 │ │ │ if torch._has_compatible_shallow_copy_type(tensor, tensor_applied): │
│ │
│ /home/user/anaconda3/lib/python3.9/site-packages/torch/nn/modules/module.py:797 in _apply │
│ │
│ 794 │ │
│ 795 │ def _apply(self, fn): │
│ 796 │ │ for module in self.children(): │
│ ❱ 797 │ │ │ module._apply(fn) │
│ 798 │ │ │
│ 799 │ │ def compute_should_use_set_data(tensor, tensor_applied): │
│ 800 │ │ │ if torch._has_compatible_shallow_copy_type(tensor, tensor_applied): │
│ │
│ /home/user/anaconda3/lib/python3.9/site-packages/torch/nn/modules/module.py:797 in _apply │
│ │
│ 794 │ │
│ 795 │ def _apply(self, fn): │
│ 796 │ │ for module in self.children(): │
│ ❱ 797 │ │ │ module._apply(fn) │
│ 798 │ │ │
│ 799 │ │ def compute_should_use_set_data(tensor, tensor_applied): │
│ 800 │ │ │ if torch._has_compatible_shallow_copy_type(tensor, tensor_applied): │
│ │
│ /home/user/anaconda3/lib/python3.9/site-packages/torch/nn/modules/module.py:797 in _apply │
│ │
│ 794 │ │
│ 795 │ def _apply(self, fn): │
│ 796 │ │ for module in self.children(): │
│ ❱ 797 │ │ │ module._apply(fn) │
│ 798 │ │ │
│ 799 │ │ def compute_should_use_set_data(tensor, tensor_applied): │
│ 800 │ │ │ if torch._has_compatible_shallow_copy_type(tensor, tensor_applied): │
│ │
│ /home/user/anaconda3/lib/python3.9/site-packages/torch/nn/modules/module.py:820 in _apply │
│ │
│ 817 │ │ │ # track autograd history of
param_applied
, so we have to use ││ 818 │ │ │ #
with torch.no_grad():
││ 819 │ │ │ with torch.no_grad(): │
│ ❱ 820 │ │ │ │ param_applied = fn(param) │
│ 821 │ │ │ should_use_set_data = compute_should_use_set_data(param, param_applied) │
│ 822 │ │ │ if should_use_set_data: │
│ 823 │ │ │ │ param.data = param_applied │
│ │
│ /home/user/anaconda3/lib/python3.9/site-packages/torch/nn/modules/module.py:1143 in │
│ convert │
│ │
│ 1140 │ │ │ if convert_to_format is not None and t.dim() in (4, 5): │
│ 1141 │ │ │ │ return t.to(device, dtype if t.is_floating_point() or t.is_complex() els │
│ 1142 │ │ │ │ │ │ │ non_blocking, memory_format=convert_to_format) │
│ ❱ 1143 │ │ │ return t.to(device, dtype if t.is_floating_point() or t.is_complex() else No │
│ 1144 │ │ │
│ 1145 │ │ return self._apply(convert) │
│ 1146 │
╰──────────────────────────────────────────────────────────────────────────────────────────────────╯
NotImplementedError: Cannot copy out of meta tensor; no data!
The text was updated successfully, but these errors were encountered: