diff --git a/python/paddle/fluid/optimizer.py b/python/paddle/fluid/optimizer.py index 478ea75472717..bb34d9c1873ac 100755 --- a/python/paddle/fluid/optimizer.py +++ b/python/paddle/fluid/optimizer.py @@ -5316,7 +5316,13 @@ def _accumulate_gradients_with_fuse(self, main_block, fp16, fused_size): "copy_data": False, "use_align": True, "dtype": grads[0].dtype, - self._op_role_key: self._op_role.Backward + self._op_role_key: self._op_role.Backward, + # On npu, the nan/inf check login is different with gpu. + # If there are some not initialized sections in the fused var, + # and the value in those sections are nan/inf, it will trigger the nan/inf check. + # To avoid these problematic triggers, set constant is needed for npu + "set_constant": core.is_compiled_with_npu(), + "constant": float(0.0), }) offset += 1 # For the gradient_merged_fused_var, given a init value during the coalesce op