diff --git a/deepmd/pd/utils/auto_batch_size.py b/deepmd/pd/utils/auto_batch_size.py index 8cdb5ddea2..0eb5e46d5f 100644 --- a/deepmd/pd/utils/auto_batch_size.py +++ b/deepmd/pd/utils/auto_batch_size.py @@ -49,12 +49,8 @@ def is_oom_error(self, e: Exception) -> bool: # several sources think CUSOLVER_STATUS_INTERNAL_ERROR is another out-of-memory error, # such as https://github.com/JuliaGPU/CUDA.jl/issues/1924 # (the meaningless error message should be considered as a bug in cusolver) - if isinstance(e, RuntimeError) and ( - "CUDA out of memory." in e.args[0] - or "CUDA driver error: out of memory" in e.args[0] - or "cusolver error: CUSOLVER_STATUS_INTERNAL_ERROR" in e.args[0] - ): + if isinstance(e, MemoryError) and ("ResourceExhaustedError" in e.args[0]): # Release all unoccupied cached memory - # paddle.device.cuda.empty_cache() + paddle.device.cuda.empty_cache() return True return False