From 5e7b06096c49ba51ca0c779fa2169211cdef9031 Mon Sep 17 00:00:00 2001
From: UnglvKitDe <100289696+UnglvKitDe@users.noreply.github.com>
Date: Mon, 1 Aug 2022 12:08:46 +0200
Subject: [PATCH] Add tensor hooks and 10.0 gradient clipping (#8598)

* Add tensor hooks and gradient clipping https://github.com/ultralytics/yolov5/issues/8578

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* Remove retain_grad(), because its not necessary

* Update train.py

* Simplify

* Update train.py

* Update train.py

* Update train.py

* Update train.py

Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Co-authored-by: Glenn Jocher <glenn.jocher@ultralytics.com>
---
 train.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/train.py b/train.py
index dc93c22d621a..6ada2a2f121b 100644
--- a/train.py
+++ b/train.py
@@ -131,6 +131,7 @@ def train(hyp, opt, device, callbacks):  # hyp is path/to/hyp.yaml or hyp dictio
     freeze = [f'model.{x}.' for x in (freeze if len(freeze) > 1 else range(freeze[0]))]  # layers to freeze
     for k, v in model.named_parameters():
         v.requires_grad = True  # train all layers
+        v.register_hook(lambda x: torch.nan_to_num(x))  # NaN to 0.0
         if any(x in k for x in freeze):
             LOGGER.info(f'freezing {k}')
             v.requires_grad = False
@@ -334,8 +335,10 @@ def train(hyp, opt, device, callbacks):  # hyp is path/to/hyp.yaml or hyp dictio
             # Backward
             scaler.scale(loss).backward()
 
-            # Optimize
+            # Optimize - https://pytorch.org/docs/master/notes/amp_examples.html
             if ni - last_opt_step >= accumulate:
+                scaler.unscale_(optimizer)  # unscale gradients
+                torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=10.0)  # clip gradients
                 scaler.step(optimizer)  # optimizer.step
                 scaler.update()
                 optimizer.zero_grad()