Fix gather when collecting 'num_input_tokens_seen' (#31974)

* Move token count to device before gathering * Run 'make style; make quality'
huggingface · Jul 16, 2024 · e391706 · e391706
1 parent c22efa6
commit e391706
Showing 1 changed file with 10 additions and 5 deletions.
diff --git a/src/transformers/trainer.py b/src/transformers/trainer.py
@@ -2245,12 +2245,17 @@ def _inner_training_loop(
                             "a `main_input_name` attribute to the model class you are using."
                         )
                     else:
-                        input_device = inputs[main_input_name].device
-                        self.state.num_input_tokens_seen += torch.sum(
-                            self.accelerator.gather(
-                                torch.tensor(inputs[main_input_name].numel(), device=input_device, dtype=torch.int64)
+                        self.state.num_input_tokens_seen += (
+                            torch.sum(
+                                self.accelerator.gather(
+                                    torch.tensor(
+                                        inputs[main_input_name].numel(), device=self.args.device, dtype=torch.int64
+                                    )
+                                )
                             )
-                        ).item()
+                            .cpu()
+                            .item()
+                        )
                 if rng_to_sync:
                     self._load_rng_state(resume_from_checkpoint)
                     rng_to_sync = False