Skip to content

Commit

Permalink
refactor: Drops support of np.float16 (#627)
Browse files Browse the repository at this point in the history
* refactor: Removed support of fp16 in datasets

* test: Updated datasets tests

* refactor: Removed support of fp16 from preprocessors

* refactor: Dropped support of np.float16

* refactor: Removed unused imports

* test: Fixed unittests of preprocessor

* refactor: Removed fp16 from SVT

* refactor: Removed unused kwargs
  • Loading branch information
fg-mindee authored Nov 16, 2021
1 parent eebd44a commit fc22516
Show file tree
Hide file tree
Showing 19 changed files with 24 additions and 111 deletions.
3 changes: 1 addition & 2 deletions doctr/datasets/cord.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,6 @@ def __init__(
# # List images
tmp_root = os.path.join(self.root, 'image')
self.data: List[Tuple[str, Dict[str, Any]]] = []
np_dtype = np.float16 if self.fp16 else np.float32
self.train = train
self.sample_transforms = sample_transforms
for img_path in os.listdir(tmp_root):
Expand All @@ -74,7 +73,7 @@ def __init__(
[x[1], y[1]],
[x[2], y[2]],
[x[3], y[3]],
], dtype=np_dtype)))
], dtype=np.float32)))
else:
# Reduce 8 coords to 4
box = [min(x), min(y), max(x), max(y)]
Expand Down
6 changes: 1 addition & 5 deletions doctr/datasets/datasets/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,14 +20,12 @@ class _AbstractDataset:
def __init__(
self,
root: Union[str, Path],
fp16: bool = False,
) -> None:

if not Path(root).is_dir():
raise ValueError(f'expected a path to a reachable folder: {root}')

self.root = root
self.fp16 = fp16

def __len__(self) -> int:
return len(self.data)
Expand Down Expand Up @@ -70,7 +68,6 @@ class _VisionDataset(_AbstractDataset):
extract_archive: whether the downloaded file is an archive to be extracted
download: whether the dataset should be downloaded if not present on disk
overwrite: whether the archive should be re-extracted
fp16: should FP precision be switched to FP16
"""

def __init__(
Expand All @@ -81,7 +78,6 @@ def __init__(
extract_archive: bool = False,
download: bool = False,
overwrite: bool = False,
fp16: bool = False,
) -> None:

dataset_cache = os.path.join(os.path.expanduser('~'), '.cache', 'doctr', 'datasets')
Expand All @@ -102,4 +98,4 @@ def __init__(
if not dataset_path.is_dir() or overwrite:
shutil.unpack_archive(archive_path, dataset_path)

super().__init__(dataset_path if extract_archive else archive_path, fp16)
super().__init__(dataset_path if extract_archive else archive_path)
2 changes: 1 addition & 1 deletion doctr/datasets/datasets/pytorch.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ def _get_img_shape(img: Any) -> Tuple[int, int]:
def _read_sample(self, index: int) -> Tuple[torch.Tensor, Any]:
img_name, target = self.data[index]
# Read image
img = read_img_as_tensor(os.path.join(self.root, img_name), dtype=torch.float16 if self.fp16 else torch.float32)
img = read_img_as_tensor(os.path.join(self.root, img_name), dtype=torch.float32)

return img, target

Expand Down
2 changes: 1 addition & 1 deletion doctr/datasets/datasets/tensorflow.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ def _get_img_shape(img: Any) -> Tuple[int, int]:
def _read_sample(self, index: int) -> Tuple[tf.Tensor, Any]:
img_name, target = self.data[index]
# Read image
img = read_img_as_tensor(os.path.join(self.root, img_name), dtype=tf.float16 if self.fp16 else tf.float32)
img = read_img_as_tensor(os.path.join(self.root, img_name), dtype=tf.float32)

return img, target

Expand Down
6 changes: 2 additions & 4 deletions doctr/datasets/detection.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,9 +37,8 @@ def __init__(
label_path: str,
sample_transforms: Optional[Callable[[Any], Any]] = None,
rotated_bbox: bool = False,
**kwargs: Any,
) -> None:
super().__init__(img_folder, **kwargs)
super().__init__(img_folder)
self.sample_transforms = sample_transforms

# File existence check
Expand All @@ -49,7 +48,6 @@ def __init__(
labels = json.load(f)

self.data: List[Tuple[str, np.ndarray]] = []
np_dtype = np.float16 if self.fp16 else np.float32
for img_name, label in labels.items():
polygons = np.asarray(label['polygons'])
if rotated_bbox:
Expand All @@ -59,7 +57,7 @@ def __init__(
# Switch to xmin, ymin, xmax, ymax
boxes = np.concatenate((polygons.min(axis=1), polygons.max(axis=1)), axis=1)

self.data.append((img_name, np.asarray(boxes, dtype=np_dtype)))
self.data.append((img_name, np.asarray(boxes, dtype=np.float32)))

def __getitem__(
self,
Expand Down
4 changes: 2 additions & 2 deletions doctr/datasets/doc_artefacts.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@ def __init__(
img_list = os.listdir(tmp_root)
if len(labels) != len(img_list):
raise AssertionError('the number of images and labels do not match')
np_dtype = np.float16 if self.fp16 else np.float32
np_dtype = np.float32
for img_name, label in labels.items():
# File existence check
if not os.path.exists(os.path.join(tmp_root, img_name)):
Expand All @@ -70,7 +70,7 @@ def __init__(
boxes[:, [1, 3]].mean(axis=1),
boxes[:, 2] - boxes[:, 0],
boxes[:, 3] - boxes[:, 1],
np.zeros(boxes.shape[0], dtype=np.dtype),
np.zeros(boxes.shape[0], dtype=np_dtype),
), axis=1)
self.data.append((img_name, dict(boxes=boxes, labels=classes)))
self.root = tmp_root
Expand Down
2 changes: 1 addition & 1 deletion doctr/datasets/iiit5k.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ def __init__(
mat_data = sio.loadmat(os.path.join(tmp_root, f'{mat_file}.mat'))[mat_file][0]

self.data: List[Tuple[Path, Dict[str, Any]]] = []
np_dtype = np.float16 if self.fp16 else np.float32
np_dtype = np.float32

for img_path, label, box_targets in mat_data:
_raw_path = img_path[0]
Expand Down
6 changes: 2 additions & 4 deletions doctr/datasets/ocr.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,22 +22,20 @@ class OCRDataset(AbstractDataset):
img_folder: local path to image folder (all jpg at the root)
label_file: local path to the label file
sample_transforms: composable transformations that will be applied to each image
**kwargs: keyword arguments from `VisionDataset`.
"""

def __init__(
self,
img_folder: str,
label_file: str,
sample_transforms: Optional[Callable[[Any], Any]] = None,
**kwargs: Any,
) -> None:
super().__init__(img_folder, **kwargs)
super().__init__(img_folder)
self.sample_transforms = sample_transforms

# List images
self.data: List[Tuple[str, Dict[str, Any]]] = []
np_dtype = np.float16 if self.fp16 else np.float32
np_dtype = np.float32
with open(label_file, 'rb') as f:
data = json.load(f)

Expand Down
3 changes: 1 addition & 2 deletions doctr/datasets/recognition.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,9 +32,8 @@ def __init__(
img_folder: str,
labels_path: str,
sample_transforms: Optional[Callable[[Any], Any]] = None,
**kwargs: Any,
) -> None:
super().__init__(img_folder, **kwargs)
super().__init__(img_folder)
self.sample_transforms = (lambda x: x) if sample_transforms is None else sample_transforms

self.data: List[Tuple[str, str]] = []
Expand Down
2 changes: 1 addition & 1 deletion doctr/datasets/sroie.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ def __init__(
# # List images
tmp_root = os.path.join(self.root, 'images')
self.data: List[Tuple[str, Dict[str, Any]]] = []
np_dtype = np.float16 if self.fp16 else np.float32
np_dtype = np.float32
for img_path in os.listdir(tmp_root):
# File existence check
if not os.path.exists(os.path.join(tmp_root, img_path)):
Expand Down
2 changes: 1 addition & 1 deletion doctr/datasets/svt.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ def __init__(
self.sample_transforms = sample_transforms
self.train = train
self.data: List[Tuple[str, Dict[str, Any]]] = []
np_dtype = np.float16 if self.fp16 else np.float32
np_dtype = np.float32

# Load xml data
tmp_root = os.path.join(self.root, 'svt1')
Expand Down
4 changes: 2 additions & 2 deletions doctr/models/detection/differentiable_binarization/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -256,8 +256,8 @@ def compute_target(
output_shape: Tuple[int, int, int],
) -> Tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray]:

if any(t.dtype not in (np.float32, np.float16) for t in target):
raise AssertionError("the expected dtype of target 'boxes' entry is either 'np.float32' or 'np.float16'.")
if any(t.dtype != np.float32 for t in target):
raise AssertionError("the expected dtype of target 'boxes' entry is 'np.float32'.")
if any(np.any((t[:, :4] > 1) | (t[:, :4] < 0)) for t in target):
raise ValueError("the 'boxes' entry of the target is expected to take values between 0 & 1.")

Expand Down
4 changes: 2 additions & 2 deletions doctr/models/detection/linknet/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -111,8 +111,8 @@ def compute_target(
output_shape: Tuple[int, int, int],
) -> Tuple[np.ndarray, np.ndarray, np.ndarray]:

if any(t.dtype not in (np.float32, np.float16) for t in target):
raise AssertionError("the expected dtype of target 'boxes' entry is either 'np.float32' or 'np.float16'.")
if any(t.dtype != np.float32 for t in target):
raise AssertionError("the expected dtype of target 'boxes' entry is 'np.float32'.")
if any(np.any((t[:, :4] > 1) | (t[:, :4] < 0)) for t in target):
raise ValueError("the 'boxes' entry of the target is expected to take values between 0 & 1.")

Expand Down
10 changes: 4 additions & 6 deletions doctr/models/preprocessor/pytorch.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,6 @@ class PreProcessor(nn.Module):
batch_size: the size of page batches
mean: mean value of the training distribution by channel
std: standard deviation of the training distribution by channel
fp16: whether returned batches should be in FP16
"""

def __init__(
Expand All @@ -43,7 +42,6 @@ def __init__(
self.resize: T.Resize = Resize(output_size, **kwargs)
# Perform the division by 255 at the same time
self.normalize = T.Normalize(mean, std)
self.fp16 = fp16

def batch_inputs(
self,
Expand All @@ -70,7 +68,7 @@ def sample_transforms(self, x: Union[np.ndarray, torch.Tensor]) -> torch.Tensor:
if x.ndim != 3:
raise AssertionError("expected list of 3D Tensors")
if isinstance(x, np.ndarray):
if x.dtype not in (np.uint8, np.float16, np.float32):
if x.dtype not in (np.uint8, np.float32):
raise TypeError("unsupported data type for numpy.ndarray")
x = torch.from_numpy(x.copy()).permute(2, 0, 1)
elif x.dtype not in (torch.uint8, torch.float16, torch.float32):
Expand All @@ -80,7 +78,7 @@ def sample_transforms(self, x: Union[np.ndarray, torch.Tensor]) -> torch.Tensor:
# Data type
if x.dtype == torch.uint8:
x = x.to(dtype=torch.float32).div(255).clip(0, 1)
x = x.to(dtype=torch.float16 if self.fp16 else torch.float32)
x = x.to(dtype=torch.float32)

return x

Expand All @@ -101,7 +99,7 @@ def __call__(
if x.ndim != 4:
raise AssertionError("expected 4D Tensor")
if isinstance(x, np.ndarray):
if x.dtype not in (np.uint8, np.float16, np.float32):
if x.dtype not in (np.uint8, np.float32):
raise TypeError("unsupported data type for numpy.ndarray")
x = torch.from_numpy(x.copy()).permute(0, 3, 1, 2)
elif x.dtype not in (torch.uint8, torch.float16, torch.float32):
Expand All @@ -112,7 +110,7 @@ def __call__(
# Data type
if x.dtype == torch.uint8:
x = x.to(dtype=torch.float32).div(255).clip(0, 1)
x = x.to(dtype=torch.float16 if self.fp16 else torch.float32)
x = x.to(dtype=torch.float32)
batches = [x]

elif isinstance(x, list) and all(isinstance(sample, (np.ndarray, torch.Tensor)) for sample in x):
Expand Down
10 changes: 2 additions & 8 deletions doctr/models/preprocessor/tensorflow.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,6 @@ class PreProcessor(NestedObject):
batch_size: the size of page batches
mean: mean value of the training distribution by channel
std: standard deviation of the training distribution by channel
fp16: whether returned batches should be in FP16
"""

_children_names: List[str] = ['resize', 'normalize']
Expand All @@ -43,7 +42,6 @@ def __init__(
self.resize = Resize(output_size, **kwargs)
# Perform the division by 255 at the same time
self.normalize = Normalize(mean, std)
self.fp16 = fp16

def batch_inputs(
self,
Expand All @@ -70,7 +68,7 @@ def sample_transforms(self, x: Union[np.ndarray, tf.Tensor]) -> tf.Tensor:
if x.ndim != 3:
raise AssertionError("expected list of 3D Tensors")
if isinstance(x, np.ndarray):
if x.dtype not in (np.uint8, np.float16, np.float32):
if x.dtype not in (np.uint8, np.float32):
raise TypeError("unsupported data type for numpy.ndarray")
x = tf.convert_to_tensor(x)
elif x.dtype not in (tf.uint8, tf.float16, tf.float32):
Expand Down Expand Up @@ -100,7 +98,7 @@ def __call__(
if x.ndim != 4:
raise AssertionError("expected 4D Tensor")
if isinstance(x, np.ndarray):
if x.dtype not in (np.uint8, np.float16, np.float32):
if x.dtype not in (np.uint8, np.float32):
raise TypeError("unsupported data type for numpy.ndarray")
x = tf.convert_to_tensor(x)
elif x.dtype not in (tf.uint8, tf.float16, tf.float32):
Expand All @@ -126,8 +124,4 @@ def __call__(
# Batch transforms (normalize)
batches = multithread_exec(self.normalize, batches) # type: ignore[assignment]

# Resize outputs tf.float32
if self.fp16:
batches = [tf.cast(b, dtype=tf.float16) for b in batches]

return batches
29 changes: 0 additions & 29 deletions tests/pytorch/test_datasets_pt.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,3 @@
from copy import deepcopy

import numpy as np
import pytest
import torch
Expand Down Expand Up @@ -58,11 +56,6 @@ def test_dataset(dataset_name, train, input_size, size, rotate):
assert isinstance(images, torch.Tensor) and images.shape == (2, 3, *input_size)
assert isinstance(targets, list) and all(isinstance(elt, dict) for elt in targets)

# FP16 checks
ds = datasets.__dict__[dataset_name](train=train, download=True, fp16=True)
img, target = ds[0]
assert img.dtype == torch.float16


def test_detection_dataset(mock_image_folder, mock_detection_label):

Expand Down Expand Up @@ -99,13 +92,6 @@ def test_detection_dataset(mock_image_folder, mock_detection_label):
_, r_target = rotated_ds[0]
assert r_target.shape[1] == 5

# FP16
ds = datasets.DetectionDataset(img_folder=mock_image_folder, label_path=mock_detection_label, fp16=True)
img, target = ds[0]
assert img.dtype == torch.float16
# Bounding boxes
assert target.dtype == np.float16


def test_recognition_dataset(mock_image_folder, mock_recognition_label):
input_size = (32, 128)
Expand All @@ -126,14 +112,6 @@ def test_recognition_dataset(mock_image_folder, mock_recognition_label):
assert isinstance(images, torch.Tensor) and images.shape == (2, 3, *input_size)
assert isinstance(labels, list) and all(isinstance(elt, str) for elt in labels)

# FP16
ds = datasets.RecognitionDataset(img_folder=mock_image_folder, labels_path=mock_recognition_label, fp16=True)
image, label = ds[0]
assert image.dtype == torch.float16
ds2, ds3 = deepcopy(ds), deepcopy(ds)
ds2.merge_dataset(ds3)
assert len(ds2) == 2 * len(ds)


def test_ocrdataset(mock_ocrdataset):

Expand Down Expand Up @@ -163,13 +141,6 @@ def test_ocrdataset(mock_ocrdataset):
assert isinstance(images, torch.Tensor) and images.shape == (2, 3, *input_size)
assert isinstance(targets, list) and all(isinstance(elt, dict) for elt in targets)

# FP16
ds = datasets.OCRDataset(*mock_ocrdataset, fp16=True)
img, target = ds[0]
assert img.dtype == torch.float16
# Bounding boxes
assert target['boxes'].dtype == np.float16


def test_charactergenerator():

Expand Down
8 changes: 0 additions & 8 deletions tests/pytorch/test_models_preprocessor_pt.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,13 +10,11 @@
[
[2, (128, 128), np.full((3, 256, 128, 3), 255, dtype=np.uint8), 1, .5], # numpy uint8
[2, (128, 128), np.ones((3, 256, 128, 3), dtype=np.float32), 1, .5], # numpy fp32
[2, (128, 128), np.ones((3, 256, 128, 3), dtype=np.float16), 1, .5], # numpy fp16
[2, (128, 128), torch.full((3, 3, 256, 128), 255, dtype=torch.uint8), 1, .5], # torch uint8
[2, (128, 128), torch.ones((3, 3, 256, 128), dtype=torch.float32), 1, .5], # torch fp32
[2, (128, 128), torch.ones((3, 3, 256, 128), dtype=torch.float16), 1, .5], # torch fp16
[2, (128, 128), [np.full((256, 128, 3), 255, dtype=np.uint8)] * 3, 2, .5], # list of numpy uint8
[2, (128, 128), [np.ones((256, 128, 3), dtype=np.float32)] * 3, 2, .5], # list of numpy fp32
[2, (128, 128), [np.ones((256, 128, 3), dtype=np.float16)] * 3, 2, .5], # list of numpy fp16
[2, (128, 128), [torch.full((3, 256, 128), 255, dtype=torch.uint8)] * 3, 2, .5], # list of torch uint8
[2, (128, 128), [torch.ones((3, 256, 128), dtype=torch.float32)] * 3, 2, .5], # list of torch fp32
[2, (128, 128), [torch.ones((3, 256, 128), dtype=torch.float16)] * 3, 2, .5], # list of torch fp32
Expand Down Expand Up @@ -48,9 +46,3 @@ def test_preprocessor(batch_size, output_size, input_tensor, expected_batches, e
assert all(b.shape[-2:] == output_size for b in out)
assert all(torch.all(b == expected_value) for b in out)
assert len(repr(processor).split('\n')) == 4

# Check FP16
processor = PreProcessor(output_size, batch_size, fp16=True)
with torch.no_grad():
out = processor(input_tensor)
assert all(b.dtype == torch.float16 for b in out)
Loading

0 comments on commit fc22516

Please sign in to comment.