diff --git a/deepspeed/runtime/constants.py b/deepspeed/runtime/constants.py index 9e73bad73376..d9f3ba1639e4 100755 --- a/deepspeed/runtime/constants.py +++ b/deepspeed/runtime/constants.py @@ -251,7 +251,7 @@ "gradient_clipping": 1.0 ''' GRADIENT_CLIPPING = 'gradient_clipping' -GRADIENT_CLIPPING_DEFAULT = 0. +GRADIENT_CLIPPING_DEFAULT = 1.0 ######################################### # Capture graph for short kernels sequences