NVIDIA · tkornuta-nvidia · Feb 1, 2020 · Jan 29, 2020 · Jan 31, 2020
diff --git a/examples/start_here/simplest_example.py b/examples/start_here/simplest_example.py
@@ -2,6 +2,7 @@
 import nemo
 
 nf = nemo.core.NeuralModuleFactory()
+
 # To use CPU-only do:
 # from nemo.core import DeviceType
 # nf = nemo.core.NeuralModuleFactory(placement=DeviceType.CPU)

diff --git a/nemo/backends/pytorch/actions.py b/nemo/backends/pytorch/actions.py
@@ -1334,7 +1334,11 @@ def train(
                         final_loss.backward(bps_scale.to(final_loss.get_device()))
                     # single device (CPU or GPU)
                     else:
-                        final_loss.backward(bps_scale.to(final_loss.get_device()))
+                        # Fix (workaround?) enabling to backpropagate gradiens on CPUs.
+                        if final_loss.get_device() < 0:
+                            final_loss.backward(bps_scale.to(final_loss))
+                        else:
+                            final_loss.backward(bps_scale.to(final_loss.get_device()))
 
                 batch_counter += 1