diff --git a/utils/datasets.py b/utils/datasets.py index 4f9bd0f05d09..091d65336fb1 100755 --- a/utils/datasets.py +++ b/utils/datasets.py @@ -375,7 +375,7 @@ def img2label_paths(img_paths): class LoadImagesAndLabels(Dataset): # YOLOv5 train_loader/val_loader, loads images and labels for training and validation - cache_version = 0.5 # dataset labels *.cache version + cache_version = 0.6 # dataset labels *.cache version def __init__(self, path, img_size=640, batch_size=16, augment=False, hyp=None, rect=False, image_weights=False, cache_images=False, single_cls=False, stride=32, pad=0.0, prefix=''): @@ -897,7 +897,7 @@ def verify_image_label(args): f.seek(-2, 2) if f.read() != b'\xff\xd9': # corrupt JPEG Image.open(im_file).save(im_file, format='JPEG', subsampling=0, quality=100) # re-save image - msg = f'{prefix}WARNING: corrupt JPEG restored and saved {im_file}' + msg = f'{prefix}WARNING: {im_file}: corrupt JPEG restored and saved' # verify labels if os.path.isfile(lb_file): @@ -909,11 +909,15 @@ def verify_image_label(args): segments = [np.array(x[1:], dtype=np.float32).reshape(-1, 2) for x in l] # (cls, xy1...) l = np.concatenate((classes.reshape(-1, 1), segments2boxes(segments)), 1) # (cls, xywh) l = np.array(l, dtype=np.float32) - if len(l): - assert l.shape[1] == 5, 'labels require 5 columns each' - assert (l >= 0).all(), 'negative labels' - assert (l[:, 1:] <= 1).all(), 'non-normalized or out of bounds coordinate labels' - assert np.unique(l, axis=0).shape[0] == l.shape[0], 'duplicate labels' + nl = len(l) + if nl: + assert l.shape[1] == 5, f'labels require 5 columns, {l.shape[1]} columns detected' + assert (l >= 0).all(), f'negative label values {l[l < 0]}' + assert (l[:, 1:] <= 1).all(), f'non-normalized or out of bounds coordinates {l[:, 1:][l[:, 1:] > 1]}' + l = np.unique(l, axis=0) # remove duplicate rows + if len(l) < nl: + segments = np.unique(segments, axis=0) + msg = f'{prefix}WARNING: {im_file}: {nl - len(l)} duplicate labels removed' else: ne = 1 # label empty l = np.zeros((0, 5), dtype=np.float32) @@ -923,7 +927,7 @@ def verify_image_label(args): return im_file, l, shape, segments, nm, nf, ne, nc, msg except Exception as e: nc = 1 - msg = f'{prefix}WARNING: Ignoring corrupted image and/or label {im_file}: {e}' + msg = f'{prefix}WARNING: {im_file}: ignoring corrupt image/label: {e}' return [None, None, None, None, nm, nf, ne, nc, msg]