Skip to content

Commit

Permalink
use faster norm check
Browse files Browse the repository at this point in the history
  • Loading branch information
caic99 authored and iProzd committed Nov 27, 2024
1 parent 6b5f959 commit 107dbe7
Showing 1 changed file with 1 addition and 4 deletions.
5 changes: 1 addition & 4 deletions deepmd/pt/train/training.py
Original file line number Diff line number Diff line change
Expand Up @@ -690,11 +690,8 @@ def step(_step_id, task_key="Default") -> None:
loss.backward()
if self.gradient_max_norm > 0.0:
grad_norm = torch.nn.utils.clip_grad_norm_(
self.wrapper.parameters(), self.gradient_max_norm
self.wrapper.parameters(), self.gradient_max_norm, error_if_nonfinite=True
)
if not torch.isfinite(grad_norm).all():
# check local gradnorm single GPU case, trigger NanDetector
raise FloatingPointError("gradients are Nan/Inf")
with torch.device("cpu"):
self.optimizer.step()
self.scheduler.step()
Expand Down

0 comments on commit 107dbe7

Please sign in to comment.