diff --git a/deepspeed/runtime/zero/stage3.py b/deepspeed/runtime/zero/stage3.py index 4dfb91676525..c0fd9d6625c7 100644 --- a/deepspeed/runtime/zero/stage3.py +++ b/deepspeed/runtime/zero/stage3.py @@ -1376,8 +1376,6 @@ def partition_grads(self, params_to_release: List[Parameter], grad_partitions: L # offload the gradient partition if applicable if self.offload_optimizer: i, dest_offset, _ = self.grad_position[self.get_param_id(param)] - offload_fp32_gradients = {} - offload_fp32_offsets = {} if self.is_gradient_accumulation_boundary: self.norm_for_param_grads[self.get_param_id(param)] = self._constant_buffered_norm2(grad_buffer)