From 0a418cb9fc7a0a91156170eff20700f3baee41df Mon Sep 17 00:00:00 2001 From: Glenn Jocher Date: Wed, 21 Apr 2021 21:14:01 +0200 Subject: [PATCH 1/5] VisDrone Dataset Auto-Download --- data/argoverse_hd.yaml | 2 +- data/coco.yaml | 2 +- data/coco128.yaml | 2 +- data/voc.yaml | 2 +- utils/general.py | 33 +++++++++++++++++++++++++++++---- 5 files changed, 33 insertions(+), 8 deletions(-) diff --git a/data/argoverse_hd.yaml b/data/argoverse_hd.yaml index df7a9361e769..0ba314d82ce1 100644 --- a/data/argoverse_hd.yaml +++ b/data/argoverse_hd.yaml @@ -1,6 +1,6 @@ # Argoverse-HD dataset (ring-front-center camera) http://www.cs.cmu.edu/~mengtial/proj/streaming/ # Train command: python train.py --data argoverse_hd.yaml -# Default dataset location is next to /yolov5: +# Default dataset location is next to YOLOv5: # /parent_folder # /argoverse # /yolov5 diff --git a/data/coco.yaml b/data/coco.yaml index fa33a1210004..f818a49ff0fa 100644 --- a/data/coco.yaml +++ b/data/coco.yaml @@ -1,6 +1,6 @@ # COCO 2017 dataset http://cocodataset.org # Train command: python train.py --data coco.yaml -# Default dataset location is next to /yolov5: +# Default dataset location is next to YOLOv5: # /parent_folder # /coco # /yolov5 diff --git a/data/coco128.yaml b/data/coco128.yaml index c41bccf2b8d5..83fbc29d3404 100644 --- a/data/coco128.yaml +++ b/data/coco128.yaml @@ -1,6 +1,6 @@ # COCO 2017 dataset http://cocodataset.org - first 128 training images # Train command: python train.py --data coco128.yaml -# Default dataset location is next to /yolov5: +# Default dataset location is next to YOLOv5: # /parent_folder # /coco128 # /yolov5 diff --git a/data/voc.yaml b/data/voc.yaml index 851a9e0b060c..ca293c4f091f 100644 --- a/data/voc.yaml +++ b/data/voc.yaml @@ -1,6 +1,6 @@ # PASCAL VOC dataset http://host.robots.ox.ac.uk/pascal/VOC/ # Train command: python train.py --data voc.yaml -# Default dataset location is next to /yolov5: +# Default dataset location is next to YOLOv5: # /parent_folder # /VOC # /yolov5 diff --git a/utils/general.py b/utils/general.py index 9898549d3eaf..8ddf1d377c4b 100755 --- a/utils/general.py +++ b/utils/general.py @@ -9,6 +9,8 @@ import re import subprocess import time +from itertools import repeat +from multiprocessing.pool import ThreadPool from pathlib import Path import cv2 @@ -161,18 +163,41 @@ def check_dataset(dict): if not all(x.exists() for x in val): print('\nWARNING: Dataset not found, nonexistent paths: %s' % [str(x) for x in val if not x.exists()]) if s and len(s): # download script - print('Downloading %s ...' % s) if s.startswith('http') and s.endswith('.zip'): # URL f = Path(s).name # filename + print(f'Downloading {s} ...') torch.hub.download_url_to_file(s, f) - r = os.system('unzip -q %s -d ../ && rm %s' % (f, f)) # unzip - else: # bash script + r = os.system(f'unzip -q {f} -d ../ && rm {f}') # unzip + elif s.startswith('bash '): # bash script + print(f'Running {s} ...') r = os.system(s) - print('Dataset autodownload %s\n' % ('success' if r == 0 else 'failure')) # analyze return value + else: # python script + print(s) + r = exec(s) # return None + print('Dataset autodownload %s\n' % ('success' if r in (0, None) else 'failure')) # print result else: raise Exception('Dataset not found.') +def download(url, dir='.', multi_thread=False): + # Multi-threaded file download function + def download_one(url, dir): + # Download 1 file + f = dir / Path(url).name # filename + print(f'Downloading {url} to {f}...') + torch.hub.download_url_to_file(url, f, progress=True) # download + if f.suffix == '.zip': + os.system(f'unzip -qo {f} -d {dir} && rm {f}') # unzip -quiet -overwrite + + dir = Path(dir) + dir.mkdir(parents=True, exist_ok=True) # make directory + if multi_thread: + ThreadPool(8).imap(lambda x: download_one(*x), zip(url, repeat(dir))) # 8 threads + else: + for u in tuple(url) if isinstance(url, str) else url: + download_one(u, dir) + + def make_divisible(x, divisor): # Returns x evenly divisible by divisor return math.ceil(x / divisor) * divisor From 50abc1ffcaaccac6dc95b74d3a8d3b74a79b979d Mon Sep 17 00:00:00 2001 From: Glenn Jocher Date: Wed, 21 Apr 2021 21:17:23 +0200 Subject: [PATCH 2/5] add visdrone.yaml --- data/visdrone.yaml | 63 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 63 insertions(+) create mode 100644 data/visdrone.yaml diff --git a/data/visdrone.yaml b/data/visdrone.yaml new file mode 100644 index 000000000000..58c60f2dc264 --- /dev/null +++ b/data/visdrone.yaml @@ -0,0 +1,63 @@ +# VisDrone2019-DET dataset https://github.com/VisDrone/VisDrone-Dataset +# Train command: python train.py --data visdrone.yaml +# Default dataset location is next to YOLOv5: +# /parent_folder +# /VisDrone +# /yolov5 + + +# train and val data as 1) directory: path/images/, 2) file: path/images.txt, or 3) list: [path1/images/, path2/images/] +train: ../VisDrone/VisDrone2019-DET-train/images # 6471 images +val: ../VisDrone/VisDrone2019-DET-val/images # 548 images +# test: ../VisDrone/VisDrone2019-DET-test-dev/images # 1610 images + +# number of classes +nc: 10 + +# class names +names: [ 'pedestrian', 'people', 'bicycle', 'car', 'van', 'truck', 'tricycle', 'awning-tricycle', 'bus', 'motor' ] + + +# download command/URL (optional) -------------------------------------------------------------------------------------- +download: | + import os + from pathlib import Path + + from utils.general import download + + + def visdrone2yolo(dir): + from PIL import Image + from tqdm import tqdm + + def convert_box(size, box): + # Convert VisDrone box to YOLO xywh box + dw = 1. / size[0] + dh = 1. / size[1] + return (box[0] + box[2] / 2) * dw, (box[1] + box[3] / 2) * dh, box[2] * dw, box[3] * dh + + (dir / 'labels').mkdir(parents=True, exist_ok=True) # make labels directory + pbar = tqdm((dir / 'annotations').glob('*.txt'), desc=f'Converting {dir}') + for f in pbar: + img_size = Image.open((dir / 'images' / f.name).with_suffix('.jpg')).size + lines = [] + with open(f, 'r') as file: # read annotation.txt + for row in [x.split(',') for x in file.read().strip().splitlines()]: + if row[4] == '0': # TODO explain this line + continue + cls = int(row[5]) - 1 + box = convert_box(img_size, tuple(map(int, row[:4]))) + lines.append(f"{cls} {' '.join(f'{x:.6f}' for x in box)}\n") + with open(str(f).replace(os.sep + 'annotations' + os.sep, os.sep + 'labels' + os.sep), 'w') as fl: + fl.writelines(lines) # write label.txt + + + # Download + dir = Path('../VisDrone') # dataset directory + urls = ['https://github.com/ultralytics/yolov5/releases/download/v1.0/VisDrone2019-DET-train.zip', + 'https://github.com/ultralytics/yolov5/releases/download/v1.0/VisDrone2019-DET-val.zip'] + download(urls, dir=dir) + + # Convert + for d in 'VisDrone2019-DET-train', 'VisDrone2019-DET-val': + visdrone2yolo(dir / d) # convert VisDrone annotations to YOLO labels From b5eb91953b7d64613ddd6b605ee1606cca17b98a Mon Sep 17 00:00:00 2001 From: Glenn Jocher Date: Wed, 21 Apr 2021 21:28:37 +0200 Subject: [PATCH 3/5] cleanup --- utils/general.py | 1 - 1 file changed, 1 deletion(-) diff --git a/utils/general.py b/utils/general.py index 8ddf1d377c4b..92c6ca5df208 100755 --- a/utils/general.py +++ b/utils/general.py @@ -172,7 +172,6 @@ def check_dataset(dict): print(f'Running {s} ...') r = os.system(s) else: # python script - print(s) r = exec(s) # return None print('Dataset autodownload %s\n' % ('success' if r in (0, None) else 'failure')) # print result else: From ad8c90810d8bd4e84d7f30c5504844b58014f0d2 Mon Sep 17 00:00:00 2001 From: Glenn Jocher Date: Thu, 22 Apr 2021 00:39:15 +0200 Subject: [PATCH 4/5] add VisDrone2019-DET-test-dev --- data/visdrone.yaml | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/data/visdrone.yaml b/data/visdrone.yaml index 58c60f2dc264..59f597a9c6f1 100644 --- a/data/visdrone.yaml +++ b/data/visdrone.yaml @@ -9,7 +9,7 @@ # train and val data as 1) directory: path/images/, 2) file: path/images.txt, or 3) list: [path1/images/, path2/images/] train: ../VisDrone/VisDrone2019-DET-train/images # 6471 images val: ../VisDrone/VisDrone2019-DET-val/images # 548 images -# test: ../VisDrone/VisDrone2019-DET-test-dev/images # 1610 images +test: ../VisDrone/VisDrone2019-DET-test-dev/images # 1610 images # number of classes nc: 10 @@ -43,7 +43,7 @@ download: | lines = [] with open(f, 'r') as file: # read annotation.txt for row in [x.split(',') for x in file.read().strip().splitlines()]: - if row[4] == '0': # TODO explain this line + if row[4] == '0': # VisDrone 'ignored regions' class 0 continue cls = int(row[5]) - 1 box = convert_box(img_size, tuple(map(int, row[:4]))) @@ -55,9 +55,11 @@ download: | # Download dir = Path('../VisDrone') # dataset directory urls = ['https://github.com/ultralytics/yolov5/releases/download/v1.0/VisDrone2019-DET-train.zip', - 'https://github.com/ultralytics/yolov5/releases/download/v1.0/VisDrone2019-DET-val.zip'] + 'https://github.com/ultralytics/yolov5/releases/download/v1.0/VisDrone2019-DET-val.zip', + 'https://github.com/ultralytics/yolov5/releases/download/v1.0/VisDrone2019-DET-test-dev.zip' + 'https://github.com/ultralytics/yolov5/releases/download/v1.0/VisDrone2019-DET-test-challenge.zip'] download(urls, dir=dir) # Convert - for d in 'VisDrone2019-DET-train', 'VisDrone2019-DET-val': + for d in 'VisDrone2019-DET-train', 'VisDrone2019-DET-val', 'VisDrone2019-DET-test-dev': visdrone2yolo(dir / d) # convert VisDrone annotations to YOLO labels From d2301841cd3bc27ebb143f95d73c40d938b62ad9 Mon Sep 17 00:00:00 2001 From: Glenn Jocher Date: Thu, 22 Apr 2021 02:56:51 +0200 Subject: [PATCH 5/5] cleanup VOC --- data/scripts/get_argoverse_hd.sh | 2 +- data/scripts/get_coco.sh | 2 +- data/scripts/get_voc.sh | 113 ++++++++++++------------------- 3 files changed, 47 insertions(+), 70 deletions(-) diff --git a/data/scripts/get_argoverse_hd.sh b/data/scripts/get_argoverse_hd.sh index caec61efed78..18131a6764d6 100644 --- a/data/scripts/get_argoverse_hd.sh +++ b/data/scripts/get_argoverse_hd.sh @@ -2,7 +2,7 @@ # Argoverse-HD dataset (ring-front-center camera) http://www.cs.cmu.edu/~mengtial/proj/streaming/ # Download command: bash data/scripts/get_argoverse_hd.sh # Train command: python train.py --data argoverse_hd.yaml -# Default dataset location is next to /yolov5: +# Default dataset location is next to YOLOv5: # /parent_folder # /argoverse # /yolov5 diff --git a/data/scripts/get_coco.sh b/data/scripts/get_coco.sh index bbb1e9291d5b..caae37504780 100755 --- a/data/scripts/get_coco.sh +++ b/data/scripts/get_coco.sh @@ -2,7 +2,7 @@ # COCO 2017 dataset http://cocodataset.org # Download command: bash data/scripts/get_coco.sh # Train command: python train.py --data coco.yaml -# Default dataset location is next to /yolov5: +# Default dataset location is next to YOLOv5: # /parent_folder # /coco # /yolov5 diff --git a/data/scripts/get_voc.sh b/data/scripts/get_voc.sh index 13b83c28d706..4c04aaa95a29 100644 --- a/data/scripts/get_voc.sh +++ b/data/scripts/get_voc.sh @@ -2,7 +2,7 @@ # PASCAL VOC dataset http://host.robots.ox.ac.uk/pascal/VOC/ # Download command: bash data/scripts/get_voc.sh # Train command: python train.py --data voc.yaml -# Default dataset location is next to /yolov5: +# Default dataset location is next to YOLOv5: # /parent_folder # /VOC # /yolov5 @@ -29,34 +29,27 @@ echo "Completed in" $runtime "seconds" echo "Splitting dataset..." python3 - "$@" <train.txt cat 2007_train.txt 2007_val.txt 2007_test.txt 2012_train.txt 2012_val.txt >train.all.txt -python3 - "$@" <