Lightning-AI · williamFalcon · Jun 27, 2020 · Jun 26, 2020 · Jun 26, 2020 · Jun 27, 2020
@@ -8,6 +8,8 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/).
 
 ### Added
 
+- Added TorchText support for moving data to GPU ([#2379](https://github.com/PyTorchLightning/pytorch-lightning/pull/2379))
+
 ### Changed
 
 - Changed epoch indexing from 0 instead of 1 ([#2289](https://github.com/PyTorchLightning/pytorch-lightning/pull/2289))

@@ -208,7 +208,7 @@ def transfer_batch_to_device(self, batch: Any, device: torch.device) -> Any:
         - :class:`list`
         - :class:`dict`
         - :class:`tuple`
-        - ``torchtext.data.Batch`` (COMING SOON)
+        - :class:`torchtext.data.Batch`
 
         For anything else, you need to define how the data is moved to the target device (CPU, GPU, TPU, ...).
 

@@ -3,6 +3,8 @@
 from typing import Any, Callable, Union
 
 import torch
+from torchtext.data import Batch
+from copy import copy
 
 
 def apply_to_collection(data: Any, dtype: Union[type, tuple], function: Callable, *args, **kwargs) -> Any:
@@ -34,6 +36,12 @@ def apply_to_collection(data: Any, dtype: Union[type, tuple], function: Callable
         return elem_type(*(apply_to_collection(d, dtype, function, *args, **kwargs) for d in data))
     elif isinstance(data, Sequence) and not isinstance(data, str):
         return elem_type([apply_to_collection(d, dtype, function, *args, **kwargs) for d in data])
+    elif isinstance(data, Batch):
+        new_batch = copy(data)  # Shallow copy is enough + I don't want to modify the object
+        for field in data.fields:
+            new_data = apply_to_collection(getattr(data, field), dtype, function, *args, **kwargs)
+            setattr(new_batch, field, new_data)
+        return new_batch
 
     # data is neither of dtype, nor a collection
     return data

@@ -12,3 +12,4 @@ horovod>=0.19.1
 omegaconf>=2.0.0
 # scipy>=0.13.3
 scikit-learn>=0.20.0
+torchtext>=0.3.1
@@ -9,6 +9,7 @@
 from pytorch_lightning.trainer.distrib_parts import _parse_gpu_ids, determine_root_gpu_device
 from pytorch_lightning.utilities.exceptions import MisconfigurationException
 from tests.base import EvalModelTemplate
+from torchtext.data import Batch, Dataset, Example, Field, LabelField
 
 PRETEND_N_OF_GPUS = 16
 
@@ -298,3 +299,32 @@ def to(self, *args, **kwargs):
 
     batch = trainer.transfer_batch_to_gpu(CustomBatchType())
     assert batch.a.type() == 'torch.cuda.FloatTensor'
+
+    # torchtext.data.Batch
+    samples = [
+        {'text': 'PyTorch Lightning is awesome!', 'label': 0},
+        {'text': 'Please make it work with torchtext', 'label': 1}
+    ]
+
+    text_field = Field()
+    label_field = LabelField()
+    fields = {
+        'text': ('text', text_field),
+        'label': ('label', label_field)
+    }
+
+    examples = [Example.fromdict(sample, fields) for sample in samples]
+    dataset = Dataset(
+        examples=examples,
+        fields=fields.values()
+    )
+
+    # Batch runs field.process() that numericalizes tokens, but it requires to build dictionary first
+    text_field.build_vocab(dataset)
+    label_field.build_vocab(dataset)
+
+    batch = Batch(data=examples, dataset=dataset)
+    batch = trainer.transfer_batch_to_gpu(batch, 0)
+
+    assert batch.text.type() == 'torch.cuda.LongTensor'
+    assert batch.label.type() == 'torch.cuda.LongTensor'