refactor: Drops support of np.float16 (#627)

* refactor: Removed support of fp16 in datasets * test: Updated datasets tests * refactor: Removed support of fp16 from preprocessors * refactor: Dropped support of np.float16 * refactor: Removed unused imports * test: Fixed unittests of preprocessor * refactor: Removed fp16 from SVT * refactor: Removed unused kwargs
mindee · Nov 16, 2021 · fc22516 · fc22516
1 parent eebd44a
commit fc22516
Show file tree

Hide file tree

Showing 19 changed files with 24 additions and 111 deletions.
diff --git a/doctr/datasets/cord.py b/doctr/datasets/cord.py
@@ -52,7 +52,6 @@ def __init__(
         # # List images
         tmp_root = os.path.join(self.root, 'image')
         self.data: List[Tuple[str, Dict[str, Any]]] = []
-        np_dtype = np.float16 if self.fp16 else np.float32
         self.train = train
         self.sample_transforms = sample_transforms
         for img_path in os.listdir(tmp_root):
@@ -74,7 +73,7 @@ def __init__(
                                     [x[1], y[1]],
                                     [x[2], y[2]],
                                     [x[3], y[3]],
-                                ], dtype=np_dtype)))
+                                ], dtype=np.float32)))
                             else:
                                 # Reduce 8 coords to 4
                                 box = [min(x), min(y), max(x), max(y)]

diff --git a/doctr/datasets/datasets/base.py b/doctr/datasets/datasets/base.py
@@ -20,14 +20,12 @@ class _AbstractDataset:
     def __init__(
         self,
         root: Union[str, Path],
-        fp16: bool = False,
     ) -> None:
 
         if not Path(root).is_dir():
             raise ValueError(f'expected a path to a reachable folder: {root}')
 
         self.root = root
-        self.fp16 = fp16
 
     def __len__(self) -> int:
         return len(self.data)
@@ -70,7 +68,6 @@ class _VisionDataset(_AbstractDataset):
         extract_archive: whether the downloaded file is an archive to be extracted
         download: whether the dataset should be downloaded if not present on disk
         overwrite: whether the archive should be re-extracted
-        fp16: should FP precision be switched to FP16
     """
 
     def __init__(
@@ -81,7 +78,6 @@ def __init__(
         extract_archive: bool = False,
         download: bool = False,
         overwrite: bool = False,
-        fp16: bool = False,
     ) -> None:
 
         dataset_cache = os.path.join(os.path.expanduser('~'), '.cache', 'doctr', 'datasets')
@@ -102,4 +98,4 @@ def __init__(
             if not dataset_path.is_dir() or overwrite:
                 shutil.unpack_archive(archive_path, dataset_path)
 
-        super().__init__(dataset_path if extract_archive else archive_path, fp16)
+        super().__init__(dataset_path if extract_archive else archive_path)
diff --git a/doctr/datasets/datasets/pytorch.py b/doctr/datasets/datasets/pytorch.py
@@ -24,7 +24,7 @@ def _get_img_shape(img: Any) -> Tuple[int, int]:
     def _read_sample(self, index: int) -> Tuple[torch.Tensor, Any]:
         img_name, target = self.data[index]
         # Read image
-        img = read_img_as_tensor(os.path.join(self.root, img_name), dtype=torch.float16 if self.fp16 else torch.float32)
+        img = read_img_as_tensor(os.path.join(self.root, img_name), dtype=torch.float32)
 
         return img, target
 

diff --git a/doctr/datasets/datasets/tensorflow.py b/doctr/datasets/datasets/tensorflow.py
@@ -24,7 +24,7 @@ def _get_img_shape(img: Any) -> Tuple[int, int]:
     def _read_sample(self, index: int) -> Tuple[tf.Tensor, Any]:
         img_name, target = self.data[index]
         # Read image
-        img = read_img_as_tensor(os.path.join(self.root, img_name), dtype=tf.float16 if self.fp16 else tf.float32)
+        img = read_img_as_tensor(os.path.join(self.root, img_name), dtype=tf.float32)
 
         return img, target
 

diff --git a/doctr/datasets/detection.py b/doctr/datasets/detection.py
@@ -37,9 +37,8 @@ def __init__(
         label_path: str,
         sample_transforms: Optional[Callable[[Any], Any]] = None,
         rotated_bbox: bool = False,
-        **kwargs: Any,
     ) -> None:
-        super().__init__(img_folder, **kwargs)
+        super().__init__(img_folder)
         self.sample_transforms = sample_transforms
 
         # File existence check
@@ -49,7 +48,6 @@ def __init__(
             labels = json.load(f)
 
         self.data: List[Tuple[str, np.ndarray]] = []
-        np_dtype = np.float16 if self.fp16 else np.float32
         for img_name, label in labels.items():
             polygons = np.asarray(label['polygons'])
             if rotated_bbox:
@@ -59,7 +57,7 @@ def __init__(
                 # Switch to xmin, ymin, xmax, ymax
                 boxes = np.concatenate((polygons.min(axis=1), polygons.max(axis=1)), axis=1)
 
-            self.data.append((img_name, np.asarray(boxes, dtype=np_dtype)))
+            self.data.append((img_name, np.asarray(boxes, dtype=np.float32)))
 
     def __getitem__(
         self,

diff --git a/doctr/datasets/doc_artefacts.py b/doctr/datasets/doc_artefacts.py
@@ -56,7 +56,7 @@ def __init__(
         img_list = os.listdir(tmp_root)
         if len(labels) != len(img_list):
             raise AssertionError('the number of images and labels do not match')
-        np_dtype = np.float16 if self.fp16 else np.float32
+        np_dtype = np.float32
         for img_name, label in labels.items():
             # File existence check
             if not os.path.exists(os.path.join(tmp_root, img_name)):
@@ -70,7 +70,7 @@ def __init__(
                     boxes[:, [1, 3]].mean(axis=1),
                     boxes[:, 2] - boxes[:, 0],
                     boxes[:, 3] - boxes[:, 1],
-                    np.zeros(boxes.shape[0], dtype=np.dtype),
+                    np.zeros(boxes.shape[0], dtype=np_dtype),
                 ), axis=1)
             self.data.append((img_name, dict(boxes=boxes, labels=classes)))
         self.root = tmp_root

diff --git a/doctr/datasets/iiit5k.py b/doctr/datasets/iiit5k.py
@@ -55,7 +55,7 @@ def __init__(
         mat_data = sio.loadmat(os.path.join(tmp_root, f'{mat_file}.mat'))[mat_file][0]
 
         self.data: List[Tuple[Path, Dict[str, Any]]] = []
-        np_dtype = np.float16 if self.fp16 else np.float32
+        np_dtype = np.float32
 
         for img_path, label, box_targets in mat_data:
             _raw_path = img_path[0]

diff --git a/doctr/datasets/ocr.py b/doctr/datasets/ocr.py
@@ -22,22 +22,20 @@ class OCRDataset(AbstractDataset):
         img_folder: local path to image folder (all jpg at the root)
         label_file: local path to the label file
         sample_transforms: composable transformations that will be applied to each image
-        **kwargs: keyword arguments from `VisionDataset`.
     """
 
     def __init__(
         self,
         img_folder: str,
         label_file: str,
         sample_transforms: Optional[Callable[[Any], Any]] = None,
-        **kwargs: Any,
     ) -> None:
-        super().__init__(img_folder, **kwargs)
+        super().__init__(img_folder)
         self.sample_transforms = sample_transforms
 
         # List images
         self.data: List[Tuple[str, Dict[str, Any]]] = []
-        np_dtype = np.float16 if self.fp16 else np.float32
+        np_dtype = np.float32
         with open(label_file, 'rb') as f:
             data = json.load(f)
 

diff --git a/doctr/datasets/recognition.py b/doctr/datasets/recognition.py
@@ -32,9 +32,8 @@ def __init__(
         img_folder: str,
         labels_path: str,
         sample_transforms: Optional[Callable[[Any], Any]] = None,
-        **kwargs: Any,
     ) -> None:
-        super().__init__(img_folder, **kwargs)
+        super().__init__(img_folder)
         self.sample_transforms = (lambda x: x) if sample_transforms is None else sample_transforms
 
         self.data: List[Tuple[str, str]] = []

diff --git a/doctr/datasets/sroie.py b/doctr/datasets/sroie.py
@@ -55,7 +55,7 @@ def __init__(
         # # List images
         tmp_root = os.path.join(self.root, 'images')
         self.data: List[Tuple[str, Dict[str, Any]]] = []
-        np_dtype = np.float16 if self.fp16 else np.float32
+        np_dtype = np.float32
         for img_path in os.listdir(tmp_root):
             # File existence check
             if not os.path.exists(os.path.join(tmp_root, img_path)):

diff --git a/doctr/datasets/svt.py b/doctr/datasets/svt.py
@@ -45,7 +45,7 @@ def __init__(
         self.sample_transforms = sample_transforms
         self.train = train
         self.data: List[Tuple[str, Dict[str, Any]]] = []
-        np_dtype = np.float16 if self.fp16 else np.float32
+        np_dtype = np.float32
 
         # Load xml data
         tmp_root = os.path.join(self.root, 'svt1')

diff --git a/doctr/models/detection/differentiable_binarization/base.py b/doctr/models/detection/differentiable_binarization/base.py
@@ -256,8 +256,8 @@ def compute_target(
         output_shape: Tuple[int, int, int],
     ) -> Tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray]:
 
-        if any(t.dtype not in (np.float32, np.float16) for t in target):
-            raise AssertionError("the expected dtype of target 'boxes' entry is either 'np.float32' or 'np.float16'.")
+        if any(t.dtype != np.float32 for t in target):
+            raise AssertionError("the expected dtype of target 'boxes' entry is 'np.float32'.")
         if any(np.any((t[:, :4] > 1) | (t[:, :4] < 0)) for t in target):
             raise ValueError("the 'boxes' entry of the target is expected to take values between 0 & 1.")
 

diff --git a/doctr/models/detection/linknet/base.py b/doctr/models/detection/linknet/base.py
@@ -111,8 +111,8 @@ def compute_target(
         output_shape: Tuple[int, int, int],
     ) -> Tuple[np.ndarray, np.ndarray, np.ndarray]:
 
-        if any(t.dtype not in (np.float32, np.float16) for t in target):
-            raise AssertionError("the expected dtype of target 'boxes' entry is either 'np.float32' or 'np.float16'.")
+        if any(t.dtype != np.float32 for t in target):
+            raise AssertionError("the expected dtype of target 'boxes' entry is 'np.float32'.")
         if any(np.any((t[:, :4] > 1) | (t[:, :4] < 0)) for t in target):
             raise ValueError("the 'boxes' entry of the target is expected to take values between 0 & 1.")
 

diff --git a/doctr/models/preprocessor/pytorch.py b/doctr/models/preprocessor/pytorch.py
@@ -26,7 +26,6 @@ class PreProcessor(nn.Module):
         batch_size: the size of page batches
         mean: mean value of the training distribution by channel
         std: standard deviation of the training distribution by channel
-        fp16: whether returned batches should be in FP16
     """
 
     def __init__(
@@ -43,7 +42,6 @@ def __init__(
         self.resize: T.Resize = Resize(output_size, **kwargs)
         # Perform the division by 255 at the same time
         self.normalize = T.Normalize(mean, std)
-        self.fp16 = fp16
 
     def batch_inputs(
         self,
@@ -70,7 +68,7 @@ def sample_transforms(self, x: Union[np.ndarray, torch.Tensor]) -> torch.Tensor:
         if x.ndim != 3:
             raise AssertionError("expected list of 3D Tensors")
         if isinstance(x, np.ndarray):
-            if x.dtype not in (np.uint8, np.float16, np.float32):
+            if x.dtype not in (np.uint8, np.float32):
                 raise TypeError("unsupported data type for numpy.ndarray")
             x = torch.from_numpy(x.copy()).permute(2, 0, 1)
         elif x.dtype not in (torch.uint8, torch.float16, torch.float32):
@@ -80,7 +78,7 @@ def sample_transforms(self, x: Union[np.ndarray, torch.Tensor]) -> torch.Tensor:
         # Data type
         if x.dtype == torch.uint8:
             x = x.to(dtype=torch.float32).div(255).clip(0, 1)
-        x = x.to(dtype=torch.float16 if self.fp16 else torch.float32)
+        x = x.to(dtype=torch.float32)
 
         return x
 
@@ -101,7 +99,7 @@ def __call__(
             if x.ndim != 4:
                 raise AssertionError("expected 4D Tensor")
             if isinstance(x, np.ndarray):
-                if x.dtype not in (np.uint8, np.float16, np.float32):
+                if x.dtype not in (np.uint8, np.float32):
                     raise TypeError("unsupported data type for numpy.ndarray")
                 x = torch.from_numpy(x.copy()).permute(0, 3, 1, 2)
             elif x.dtype not in (torch.uint8, torch.float16, torch.float32):
@@ -112,7 +110,7 @@ def __call__(
             # Data type
             if x.dtype == torch.uint8:
                 x = x.to(dtype=torch.float32).div(255).clip(0, 1)
-            x = x.to(dtype=torch.float16 if self.fp16 else torch.float32)
+            x = x.to(dtype=torch.float32)
             batches = [x]
 
         elif isinstance(x, list) and all(isinstance(sample, (np.ndarray, torch.Tensor)) for sample in x):

diff --git a/doctr/models/preprocessor/tensorflow.py b/doctr/models/preprocessor/tensorflow.py
@@ -24,7 +24,6 @@ class PreProcessor(NestedObject):
         batch_size: the size of page batches
         mean: mean value of the training distribution by channel
         std: standard deviation of the training distribution by channel
-        fp16: whether returned batches should be in FP16
     """
 
     _children_names: List[str] = ['resize', 'normalize']
@@ -43,7 +42,6 @@ def __init__(
         self.resize = Resize(output_size, **kwargs)
         # Perform the division by 255 at the same time
         self.normalize = Normalize(mean, std)
-        self.fp16 = fp16
 
     def batch_inputs(
         self,
@@ -70,7 +68,7 @@ def sample_transforms(self, x: Union[np.ndarray, tf.Tensor]) -> tf.Tensor:
         if x.ndim != 3:
             raise AssertionError("expected list of 3D Tensors")
         if isinstance(x, np.ndarray):
-            if x.dtype not in (np.uint8, np.float16, np.float32):
+            if x.dtype not in (np.uint8, np.float32):
                 raise TypeError("unsupported data type for numpy.ndarray")
             x = tf.convert_to_tensor(x)
         elif x.dtype not in (tf.uint8, tf.float16, tf.float32):
@@ -100,7 +98,7 @@ def __call__(
             if x.ndim != 4:
                 raise AssertionError("expected 4D Tensor")
             if isinstance(x, np.ndarray):
-                if x.dtype not in (np.uint8, np.float16, np.float32):
+                if x.dtype not in (np.uint8, np.float32):
                     raise TypeError("unsupported data type for numpy.ndarray")
                 x = tf.convert_to_tensor(x)
             elif x.dtype not in (tf.uint8, tf.float16, tf.float32):
@@ -126,8 +124,4 @@ def __call__(
         # Batch transforms (normalize)
         batches = multithread_exec(self.normalize, batches)  # type: ignore[assignment]
 
-        # Resize outputs tf.float32
-        if self.fp16:
-            batches = [tf.cast(b, dtype=tf.float16) for b in batches]
-
         return batches
diff --git a/tests/pytorch/test_datasets_pt.py b/tests/pytorch/test_datasets_pt.py
@@ -1,5 +1,3 @@
-from copy import deepcopy
-
 import numpy as np
 import pytest
 import torch
@@ -58,11 +56,6 @@ def test_dataset(dataset_name, train, input_size, size, rotate):
     assert isinstance(images, torch.Tensor) and images.shape == (2, 3, *input_size)
     assert isinstance(targets, list) and all(isinstance(elt, dict) for elt in targets)
 
-    # FP16 checks
-    ds = datasets.__dict__[dataset_name](train=train, download=True, fp16=True)
-    img, target = ds[0]
-    assert img.dtype == torch.float16
-
 
 def test_detection_dataset(mock_image_folder, mock_detection_label):
 
@@ -99,13 +92,6 @@ def test_detection_dataset(mock_image_folder, mock_detection_label):
     _, r_target = rotated_ds[0]
     assert r_target.shape[1] == 5
 
-    # FP16
-    ds = datasets.DetectionDataset(img_folder=mock_image_folder, label_path=mock_detection_label, fp16=True)
-    img, target = ds[0]
-    assert img.dtype == torch.float16
-    # Bounding boxes
-    assert target.dtype == np.float16
-
 
 def test_recognition_dataset(mock_image_folder, mock_recognition_label):
     input_size = (32, 128)
@@ -126,14 +112,6 @@ def test_recognition_dataset(mock_image_folder, mock_recognition_label):
     assert isinstance(images, torch.Tensor) and images.shape == (2, 3, *input_size)
     assert isinstance(labels, list) and all(isinstance(elt, str) for elt in labels)
 
-    # FP16
-    ds = datasets.RecognitionDataset(img_folder=mock_image_folder, labels_path=mock_recognition_label, fp16=True)
-    image, label = ds[0]
-    assert image.dtype == torch.float16
-    ds2, ds3 = deepcopy(ds), deepcopy(ds)
-    ds2.merge_dataset(ds3)
-    assert len(ds2) == 2 * len(ds)
-
 
 def test_ocrdataset(mock_ocrdataset):
 
@@ -163,13 +141,6 @@ def test_ocrdataset(mock_ocrdataset):
     assert isinstance(images, torch.Tensor) and images.shape == (2, 3, *input_size)
     assert isinstance(targets, list) and all(isinstance(elt, dict) for elt in targets)
 
-    # FP16
-    ds = datasets.OCRDataset(*mock_ocrdataset, fp16=True)
-    img, target = ds[0]
-    assert img.dtype == torch.float16
-    # Bounding boxes
-    assert target['boxes'].dtype == np.float16
-
 
 def test_charactergenerator():
 

diff --git a/tests/pytorch/test_models_preprocessor_pt.py b/tests/pytorch/test_models_preprocessor_pt.py
@@ -10,13 +10,11 @@
     [
         [2, (128, 128), np.full((3, 256, 128, 3), 255, dtype=np.uint8), 1, .5],  # numpy uint8
         [2, (128, 128), np.ones((3, 256, 128, 3), dtype=np.float32), 1, .5],  # numpy fp32
-        [2, (128, 128), np.ones((3, 256, 128, 3), dtype=np.float16), 1, .5],  # numpy fp16
         [2, (128, 128), torch.full((3, 3, 256, 128), 255, dtype=torch.uint8), 1, .5],  # torch uint8
         [2, (128, 128), torch.ones((3, 3, 256, 128), dtype=torch.float32), 1, .5],  # torch fp32
         [2, (128, 128), torch.ones((3, 3, 256, 128), dtype=torch.float16), 1, .5],  # torch fp16
         [2, (128, 128), [np.full((256, 128, 3), 255, dtype=np.uint8)] * 3, 2, .5],  # list of numpy uint8
         [2, (128, 128), [np.ones((256, 128, 3), dtype=np.float32)] * 3, 2, .5],  # list of numpy fp32
-        [2, (128, 128), [np.ones((256, 128, 3), dtype=np.float16)] * 3, 2, .5],  # list of numpy fp16
         [2, (128, 128), [torch.full((3, 256, 128), 255, dtype=torch.uint8)] * 3, 2, .5],  # list of torch uint8
         [2, (128, 128), [torch.ones((3, 256, 128), dtype=torch.float32)] * 3, 2, .5],  # list of torch fp32
         [2, (128, 128), [torch.ones((3, 256, 128), dtype=torch.float16)] * 3, 2, .5],  # list of torch fp32
@@ -48,9 +46,3 @@ def test_preprocessor(batch_size, output_size, input_tensor, expected_batches, e
     assert all(b.shape[-2:] == output_size for b in out)
     assert all(torch.all(b == expected_value) for b in out)
     assert len(repr(processor).split('\n')) == 4
-
-    # Check FP16
-    processor = PreProcessor(output_size, batch_size, fp16=True)
-    with torch.no_grad():
-        out = processor(input_tensor)
-    assert all(b.dtype == torch.float16 for b in out)