Skip to content

Commit

Permalink
Multi-threaded VisDrone and VOC downloads (#7108)
Browse files Browse the repository at this point in the history
* Multi-threaded VOC download

* Update VOC.yaml

* Update

* Update general.py

* Update general.py
  • Loading branch information
glenn-jocher committed Mar 23, 2022
1 parent ecc2c7b commit c3ae4e4
Show file tree
Hide file tree
Showing 7 changed files with 13 additions and 6 deletions.
1 change: 1 addition & 0 deletions data/GlobalWheat2020.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@ names: ['wheat_head'] # class names
download: |
from utils.general import download, Path
# Download
dir = Path(yaml['path']) # dataset root dir
urls = ['https://zenodo.org/record/4298502/files/global-wheat-codalab-official.zip',
Expand Down
1 change: 1 addition & 0 deletions data/Objects365.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,7 @@ download: |
from utils.general import Path, download, np, xyxy2xywhn
# Make Directories
dir = Path(yaml['path']) # dataset root dir
for p in 'images', 'labels':
Expand Down
1 change: 1 addition & 0 deletions data/SKU-110K.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ download: |
from tqdm import tqdm
from utils.general import np, pd, Path, download, xyxy2xywh
# Download
dir = Path(yaml['path']) # dataset root dir
parent = Path(dir.parent) # download dir
Expand Down
2 changes: 1 addition & 1 deletion data/VOC.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,7 @@ download: |
urls = [url + 'VOCtrainval_06-Nov-2007.zip', # 446MB, 5012 images
url + 'VOCtest_06-Nov-2007.zip', # 438MB, 4953 images
url + 'VOCtrainval_11-May-2012.zip'] # 1.95GB, 17126 images
download(urls, dir=dir / 'images', delete=False)
download(urls, dir=dir / 'images', delete=False, threads=3)
# Convert
path = dir / f'images/VOCdevkit'
Expand Down
2 changes: 1 addition & 1 deletion data/VisDrone.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ download: |
'https://github.com/ultralytics/yolov5/releases/download/v1.0/VisDrone2019-DET-val.zip',
'https://github.com/ultralytics/yolov5/releases/download/v1.0/VisDrone2019-DET-test-dev.zip',
'https://github.com/ultralytics/yolov5/releases/download/v1.0/VisDrone2019-DET-test-challenge.zip']
download(urls, dir=dir)
download(urls, dir=dir, threads=4)
# Convert
for d in 'VisDrone2019-DET-train', 'VisDrone2019-DET-val', 'VisDrone2019-DET-test-dev':
Expand Down
1 change: 1 addition & 0 deletions data/coco.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ names: ['person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', 'train', 't
download: |
from utils.general import download, Path
# Download labels
segments = False # segment or box labels
dir = Path(yaml['path']) # dataset root dir
Expand Down
11 changes: 7 additions & 4 deletions utils/general.py
Original file line number Diff line number Diff line change
Expand Up @@ -449,8 +449,9 @@ def check_dataset(data, autodownload=True):
if val:
val = [Path(x).resolve() for x in (val if isinstance(val, list) else [val])] # val path
if not all(x.exists() for x in val):
LOGGER.info('\nDataset not found, missing paths: %s' % [str(x) for x in val if not x.exists()])
LOGGER.info(emojis('\nDataset not found ⚠️, missing paths %s' % [str(x) for x in val if not x.exists()]))
if s and autodownload: # download script
t = time.time()
root = path.parent if 'path' in data else '..' # unzip directory i.e. '../'
if s.startswith('http') and s.endswith('.zip'): # URL
f = Path(s).name # filename
Expand All @@ -465,9 +466,11 @@ def check_dataset(data, autodownload=True):
r = os.system(s)
else: # python script
r = exec(s, {'yaml': data}) # return None
LOGGER.info(f"Dataset autodownload {f'success, saved to {root}' if r in (0, None) else 'failure'}\n")
dt = f'({round(time.time() - t, 1)}s)'
s = f"success ✅ {dt}, saved to {colorstr('bold', root)}" if r in (0, None) else f"failure {dt} ❌"
LOGGER.info(emojis(f"Dataset download {s}"))
else:
raise Exception('Dataset not found.')
raise Exception(emojis('Dataset not found ❌'))

return data # dictionary

Expand All @@ -491,7 +494,7 @@ def download_one(url, dir):
if curl:
os.system(f"curl -L '{url}' -o '{f}' --retry 9 -C -") # curl download, retry and resume on fail
else:
torch.hub.download_url_to_file(url, f, progress=True) # torch download
torch.hub.download_url_to_file(url, f, progress=threads == 1) # torch download
if unzip and f.suffix in ('.zip', '.gz'):
LOGGER.info(f'Unzipping {f}...')
if f.suffix == '.zip':
Expand Down

0 comments on commit c3ae4e4

Please sign in to comment.