diff --git a/data/Argoverse_HD.yaml b/data/Argoverse_HD.yaml new file mode 100644 index 000000000000..ad1a52254d74 --- /dev/null +++ b/data/Argoverse_HD.yaml @@ -0,0 +1,66 @@ +# Argoverse-HD dataset (ring-front-center camera) http://www.cs.cmu.edu/~mengtial/proj/streaming/ +# Train command: python train.py --data Argoverse_HD.yaml +# Default dataset location is next to YOLOv5: +# /parent +# /datasets/Argoverse +# /yolov5 + + +# Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..] +path: ../datasets/Argoverse # dataset root dir +train: Argoverse-1.1/images/train/ # train images (relative to 'path') 39384 images +val: Argoverse-1.1/images/val/ # val images (relative to 'path') 15062 images +test: Argoverse-1.1/images/test/ # test images (optional) https://eval.ai/web/challenges/challenge-page/800/overview + +# Classes +nc: 8 # number of classes +names: [ 'person', 'bicycle', 'car', 'motorcycle', 'bus', 'truck', 'traffic_light', 'stop_sign' ] # class names + + +# Download script/URL (optional) --------------------------------------------------------------------------------------- +download: | + import json + + from tqdm import tqdm + from utils.general import download, Path + + + def argoverse2yolo(set): + labels = {} + a = json.load(open(set, "rb")) + for annot in tqdm(a['annotations'], desc=f"Converting {set} to YOLOv5 format..."): + img_id = annot['image_id'] + img_name = a['images'][img_id]['name'] + img_label_name = img_name[:-3] + "txt" + + cls = annot['category_id'] # instance class id + x_center, y_center, width, height = annot['bbox'] + x_center = (x_center + width / 2) / 1920.0 # offset and scale + y_center = (y_center + height / 2) / 1200.0 # offset and scale + width /= 1920.0 # scale + height /= 1200.0 # scale + + img_dir = set.parents[2] / 'Argoverse-1.1' / 'labels' / a['seq_dirs'][a['images'][annot['image_id']]['sid']] + if not img_dir.exists(): + img_dir.mkdir(parents=True, exist_ok=True) + + k = str(img_dir / img_label_name) + if k not in labels: + labels[k] = [] + labels[k].append(f"{cls} {x_center} {y_center} {width} {height}\n") + + for k in labels: + with open(k, "w") as f: + f.writelines(labels[k]) + + + # Download + dir = Path('../datasets/Argoverse') # dataset root dir + urls = ['https://argoverse-hd.s3.us-east-2.amazonaws.com/Argoverse-HD-Full.zip'] + download(urls, dir=dir, delete=False) + + # Convert + annotations_dir = 'Argoverse-HD/annotations/' + (dir / 'Argoverse-1.1' / 'tracking').rename(dir / 'Argoverse-1.1' / 'images') # rename 'tracking' to 'images' + for d in "train.json", "val.json": + argoverse2yolo(dir / annotations_dir / d) # convert VisDrone annotations to YOLO labels diff --git a/data/GlobalWheat2020.yaml b/data/GlobalWheat2020.yaml index f45182b43e25..b77534944ed7 100644 --- a/data/GlobalWheat2020.yaml +++ b/data/GlobalWheat2020.yaml @@ -1,43 +1,40 @@ # Global Wheat 2020 dataset http://www.global-wheat.com/ # Train command: python train.py --data GlobalWheat2020.yaml # Default dataset location is next to YOLOv5: -# /parent_folder +# /parent # /datasets/GlobalWheat2020 # /yolov5 -# train and val data as 1) directory: path/images/, 2) file: path/images.txt, or 3) list: [path1/images/, path2/images/] -train: # 3422 images - - ../datasets/GlobalWheat2020/images/arvalis_1 - - ../datasets/GlobalWheat2020/images/arvalis_2 - - ../datasets/GlobalWheat2020/images/arvalis_3 - - ../datasets/GlobalWheat2020/images/ethz_1 - - ../datasets/GlobalWheat2020/images/rres_1 - - ../datasets/GlobalWheat2020/images/inrae_1 - - ../datasets/GlobalWheat2020/images/usask_1 - -val: # 748 images (WARNING: train set contains ethz_1) - - ../datasets/GlobalWheat2020/images/ethz_1 - -test: # 1276 images - - ../datasets/GlobalWheat2020/images/utokyo_1 - - ../datasets/GlobalWheat2020/images/utokyo_2 - - ../datasets/GlobalWheat2020/images/nau_1 - - ../datasets/GlobalWheat2020/images/uq_1 - -# number of classes -nc: 1 - -# class names -names: [ 'wheat_head' ] - - -# download command/URL (optional) -------------------------------------------------------------------------------------- +# Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..] +path: ../datasets/GlobalWheat2020 # dataset root dir +train: # train images (relative to 'path') 3422 images + - images/arvalis_1 + - images/arvalis_2 + - images/arvalis_3 + - images/ethz_1 + - images/rres_1 + - images/inrae_1 + - images/usask_1 +val: # val images (relative to 'path') 748 images (WARNING: train set contains ethz_1) + - images/ethz_1 +test: # test images (optional) 1276 images + - images/utokyo_1 + - images/utokyo_2 + - images/nau_1 + - images/uq_1 + +# Classes +nc: 1 # number of classes +names: [ 'wheat_head' ] # class names + + +# Download script/URL (optional) --------------------------------------------------------------------------------------- download: | from utils.general import download, Path # Download - dir = Path('../datasets/GlobalWheat2020') # dataset directory + dir = Path(yaml['path']) # dataset root dir urls = ['https://zenodo.org/record/4298502/files/global-wheat-codalab-official.zip', 'https://github.com/ultralytics/yolov5/releases/download/v1.0/GlobalWheat2020_labels.zip'] download(urls, dir=dir) diff --git a/data/objects365.yaml b/data/Objects365.yaml similarity index 92% rename from data/objects365.yaml rename to data/Objects365.yaml index eb99995903cf..e365c82cab08 100644 --- a/data/objects365.yaml +++ b/data/Objects365.yaml @@ -1,18 +1,19 @@ # Objects365 dataset https://www.objects365.org/ -# Train command: python train.py --data objects365.yaml +# Train command: python train.py --data Objects365.yaml # Default dataset location is next to YOLOv5: -# /parent_folder -# /datasets/objects365 +# /parent +# /datasets/Objects365 # /yolov5 -# train and val data as 1) directory: path/images/, 2) file: path/images.txt, or 3) list: [path1/images/, path2/images/] -train: ../datasets/objects365/images/train # 1742289 images -val: ../datasets/objects365/images/val # 5570 images -# number of classes -nc: 365 +# Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..] +path: ../datasets/Objects365 # dataset root dir +train: images/train # train images (relative to 'path') 1742289 images +val: images/val # val images (relative to 'path') 5570 images +test: # test images (optional) -# class names +# Classes +nc: 365 # number of classes names: [ 'Person', 'Sneakers', 'Chair', 'Other Shoes', 'Hat', 'Car', 'Lamp', 'Glasses', 'Bottle', 'Desk', 'Cup', 'Street Lights', 'Cabinet/shelf', 'Handbag/Satchel', 'Bracelet', 'Plate', 'Picture/Frame', 'Helmet', 'Book', 'Gloves', 'Storage box', 'Boat', 'Leather Shoes', 'Flower', 'Bench', 'Potted Plant', 'Bowl/Basin', 'Flag', @@ -56,7 +57,7 @@ names: [ 'Person', 'Sneakers', 'Chair', 'Other Shoes', 'Hat', 'Car', 'Lamp', 'Gl 'Chainsaw', 'Eraser', 'Lobster', 'Durian', 'Okra', 'Lipstick', 'Cosmetics Mirror', 'Curling', 'Table Tennis' ] -# download command/URL (optional) -------------------------------------------------------------------------------------- +# Download script/URL (optional) --------------------------------------------------------------------------------------- download: | from pycocotools.coco import COCO from tqdm import tqdm @@ -64,7 +65,7 @@ download: | from utils.general import download, Path # Make Directories - dir = Path('../datasets/objects365') # dataset directory + dir = Path(yaml['path']) # dataset root dir for p in 'images', 'labels': (dir / p).mkdir(parents=True, exist_ok=True) for q in 'train', 'val': diff --git a/data/SKU-110K.yaml b/data/SKU-110K.yaml index a8c1f25b385a..7087bb9c2893 100644 --- a/data/SKU-110K.yaml +++ b/data/SKU-110K.yaml @@ -1,39 +1,38 @@ # SKU-110K retail items dataset https://github.com/eg4000/SKU110K_CVPR19 # Train command: python train.py --data SKU-110K.yaml # Default dataset location is next to YOLOv5: -# /parent_folder +# /parent # /datasets/SKU-110K # /yolov5 -# train and val data as 1) directory: path/images/, 2) file: path/images.txt, or 3) list: [path1/images/, path2/images/] -train: ../datasets/SKU-110K/train.txt # 8219 images -val: ../datasets/SKU-110K/val.txt # 588 images -test: ../datasets/SKU-110K/test.txt # 2936 images +# Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..] +path: ../datasets/SKU-110K # dataset root dir +train: train.txt # train images (relative to 'path') 8219 images +val: val.txt # val images (relative to 'path') 588 images +test: test.txt # test images (optional) 2936 images -# number of classes -nc: 1 +# Classes +nc: 1 # number of classes +names: [ 'object' ] # class names -# class names -names: [ 'object' ] - -# download command/URL (optional) -------------------------------------------------------------------------------------- +# Download script/URL (optional) --------------------------------------------------------------------------------------- download: | import shutil from tqdm import tqdm from utils.general import np, pd, Path, download, xyxy2xywh # Download - datasets = Path('../datasets') # download directory + dir = Path(yaml['path']) # dataset root dir + parent = Path(dir.parent) # download dir urls = ['http://trax-geometry.s3.amazonaws.com/cvpr_challenge/SKU110K_fixed.tar.gz'] - download(urls, dir=datasets, delete=False) + download(urls, dir=parent, delete=False) # Rename directories - dir = (datasets / 'SKU-110K') if dir.exists(): shutil.rmtree(dir) - (datasets / 'SKU110K_fixed').rename(dir) # rename dir + (parent / 'SKU110K_fixed').rename(dir) # rename dir (dir / 'labels').mkdir(parents=True, exist_ok=True) # create labels dir # Convert labels diff --git a/data/VOC.yaml b/data/VOC.yaml new file mode 100644 index 000000000000..3d878fa67a60 --- /dev/null +++ b/data/VOC.yaml @@ -0,0 +1,79 @@ +# PASCAL VOC dataset http://host.robots.ox.ac.uk/pascal/VOC/ +# Train command: python train.py --data VOC.yaml +# Default dataset location is next to YOLOv5: +# /parent +# /datasets/VOC +# /yolov5 + + +# Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..] +path: ../datasets/VOC +train: # train images (relative to 'path') 16551 images + - images/train2012 + - images/train2007 + - images/val2012 + - images/val2007 +val: # val images (relative to 'path') 4952 images + - images/test2007 +test: # test images (optional) + - images/test2007 + +# Classes +nc: 20 # number of classes +names: [ 'aeroplane', 'bicycle', 'bird', 'boat', 'bottle', 'bus', 'car', 'cat', 'chair', 'cow', 'diningtable', 'dog', + 'horse', 'motorbike', 'person', 'pottedplant', 'sheep', 'sofa', 'train', 'tvmonitor' ] # class names + + +# Download script/URL (optional) --------------------------------------------------------------------------------------- +download: | + import xml.etree.ElementTree as ET + + from tqdm import tqdm + from utils.general import download, Path + + + def convert_label(path, lb_path, year, image_id): + def convert_box(size, box): + dw, dh = 1. / size[0], 1. / size[1] + x, y, w, h = (box[0] + box[1]) / 2.0 - 1, (box[2] + box[3]) / 2.0 - 1, box[1] - box[0], box[3] - box[2] + return x * dw, y * dh, w * dw, h * dh + + in_file = open(path / f'VOC{year}/Annotations/{image_id}.xml') + out_file = open(lb_path, 'w') + tree = ET.parse(in_file) + root = tree.getroot() + size = root.find('size') + w = int(size.find('width').text) + h = int(size.find('height').text) + + for obj in root.iter('object'): + cls = obj.find('name').text + if cls in yaml['names'] and not int(obj.find('difficult').text) == 1: + xmlbox = obj.find('bndbox') + bb = convert_box((w, h), [float(xmlbox.find(x).text) for x in ('xmin', 'xmax', 'ymin', 'ymax')]) + cls_id = yaml['names'].index(cls) # class id + out_file.write(" ".join([str(a) for a in (cls_id, *bb)]) + '\n') + + + # Download + dir = Path(yaml['path']) # dataset root dir + url = 'https://github.com/ultralytics/yolov5/releases/download/v1.0/' + urls = [url + 'VOCtrainval_06-Nov-2007.zip', # 446MB, 5012 images + url + 'VOCtest_06-Nov-2007.zip', # 438MB, 4953 images + url + 'VOCtrainval_11-May-2012.zip'] # 1.95GB, 17126 images + download(urls, dir=dir / 'images', delete=False) + + # Convert + path = dir / f'images/VOCdevkit' + for year, image_set in ('2012', 'train'), ('2012', 'val'), ('2007', 'train'), ('2007', 'val'), ('2007', 'test'): + imgs_path = dir / 'images' / f'{image_set}{year}' + lbs_path = dir / 'labels' / f'{image_set}{year}' + imgs_path.mkdir(exist_ok=True, parents=True) + lbs_path.mkdir(exist_ok=True, parents=True) + + image_ids = open(path / f'VOC{year}/ImageSets/Main/{image_set}.txt').read().strip().split() + for id in tqdm(image_ids, desc=f'{image_set}{year}'): + f = path / f'VOC{year}/JPEGImages/{id}.jpg' # old img path + lb_path = (lbs_path / f.name).with_suffix('.txt') # new label path + f.rename(imgs_path / f.name) # move image + convert_label(path, lb_path, year, id) # convert labels to YOLO format diff --git a/data/VisDrone.yaml b/data/VisDrone.yaml index c4603b200132..c1cd38d1e10f 100644 --- a/data/VisDrone.yaml +++ b/data/VisDrone.yaml @@ -1,24 +1,23 @@ # VisDrone2019-DET dataset https://github.com/VisDrone/VisDrone-Dataset # Train command: python train.py --data VisDrone.yaml # Default dataset location is next to YOLOv5: -# /parent_folder -# /VisDrone +# /parent +# /datasets/VisDrone # /yolov5 -# train and val data as 1) directory: path/images/, 2) file: path/images.txt, or 3) list: [path1/images/, path2/images/] -train: ../VisDrone/VisDrone2019-DET-train/images # 6471 images -val: ../VisDrone/VisDrone2019-DET-val/images # 548 images -test: ../VisDrone/VisDrone2019-DET-test-dev/images # 1610 images +# Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..] +path: ../datasets/VisDrone # dataset root dir +train: VisDrone2019-DET-train/images # train images (relative to 'path') 6471 images +val: VisDrone2019-DET-val/images # val images (relative to 'path') 548 images +test: VisDrone2019-DET-test-dev/images # test images (optional) 1610 images -# number of classes -nc: 10 - -# class names +# Classes +nc: 10 # number of classes names: [ 'pedestrian', 'people', 'bicycle', 'car', 'van', 'truck', 'tricycle', 'awning-tricycle', 'bus', 'motor' ] -# download command/URL (optional) -------------------------------------------------------------------------------------- +# Download script/URL (optional) --------------------------------------------------------------------------------------- download: | from utils.general import download, os, Path @@ -49,7 +48,7 @@ download: | # Download - dir = Path('../VisDrone') # dataset directory + dir = Path(yaml['path']) # dataset root dir urls = ['https://github.com/ultralytics/yolov5/releases/download/v1.0/VisDrone2019-DET-train.zip', 'https://github.com/ultralytics/yolov5/releases/download/v1.0/VisDrone2019-DET-val.zip', 'https://github.com/ultralytics/yolov5/releases/download/v1.0/VisDrone2019-DET-test-dev.zip', diff --git a/data/argoverse_hd.yaml b/data/argoverse_hd.yaml deleted file mode 100644 index 0ba314d82ce1..000000000000 --- a/data/argoverse_hd.yaml +++ /dev/null @@ -1,21 +0,0 @@ -# Argoverse-HD dataset (ring-front-center camera) http://www.cs.cmu.edu/~mengtial/proj/streaming/ -# Train command: python train.py --data argoverse_hd.yaml -# Default dataset location is next to YOLOv5: -# /parent_folder -# /argoverse -# /yolov5 - - -# download command/URL (optional) -download: bash data/scripts/get_argoverse_hd.sh - -# train and val data as 1) directory: path/images/, 2) file: path/images.txt, or 3) list: [path1/images/, path2/images/] -train: ../argoverse/Argoverse-1.1/images/train/ # 39384 images -val: ../argoverse/Argoverse-1.1/images/val/ # 15062 iamges -test: ../argoverse/Argoverse-1.1/images/test/ # Submit to: https://eval.ai/web/challenges/challenge-page/800/overview - -# number of classes -nc: 8 - -# class names -names: [ 'person', 'bicycle', 'car', 'motorcycle', 'bus', 'truck', 'traffic_light', 'stop_sign' ] diff --git a/data/coco.yaml b/data/coco.yaml index f818a49ff0fa..c6053c984bc0 100644 --- a/data/coco.yaml +++ b/data/coco.yaml @@ -1,23 +1,19 @@ # COCO 2017 dataset http://cocodataset.org # Train command: python train.py --data coco.yaml # Default dataset location is next to YOLOv5: -# /parent_folder -# /coco +# /parent +# /datasets/coco # /yolov5 -# download command/URL (optional) -download: bash data/scripts/get_coco.sh +# Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..] +path: ../datasets/coco # dataset root dir +train: train2017.txt # train images (relative to 'path') 118287 images +val: val2017.txt # train images (relative to 'path') 5000 images +test: test-dev2017.txt # 20288 of 40670 images, submit to https://competitions.codalab.org/competitions/20794 -# train and val data as 1) directory: path/images/, 2) file: path/images.txt, or 3) list: [path1/images/, path2/images/] -train: ../coco/train2017.txt # 118287 images -val: ../coco/val2017.txt # 5000 images -test: ../coco/test-dev2017.txt # 20288 of 40670 images, submit to https://competitions.codalab.org/competitions/20794 - -# number of classes -nc: 80 - -# class names +# Classes +nc: 80 # number of classes names: [ 'person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', 'train', 'truck', 'boat', 'traffic light', 'fire hydrant', 'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow', 'elephant', 'bear', 'zebra', 'giraffe', 'backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee', @@ -26,10 +22,22 @@ names: [ 'person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', 'train', ' 'sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza', 'donut', 'cake', 'chair', 'couch', 'potted plant', 'bed', 'dining table', 'toilet', 'tv', 'laptop', 'mouse', 'remote', 'keyboard', 'cell phone', 'microwave', 'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock', 'vase', 'scissors', 'teddy bear', - 'hair drier', 'toothbrush' ] + 'hair drier', 'toothbrush' ] # class names + + +# Download script/URL (optional) +download: | + from utils.general import download, Path + + # Download labels + segments = False # segment or box labels + dir = Path(yaml['path']) # dataset root dir + url = 'https://github.com/ultralytics/yolov5/releases/download/v1.0/' + urls = [url + ('coco2017labels-segments.zip' if segments else 'coco2017labels.zip')] # labels + download(urls, dir=dir.parent) -# Print classes -# with open('data/coco.yaml') as f: -# d = yaml.safe_load(f) # dict -# for i, x in enumerate(d['names']): -# print(i, x) + # Download data + urls = ['http://images.cocodataset.org/zips/train2017.zip', # 19G, 118k images + 'http://images.cocodataset.org/zips/val2017.zip', # 1G, 5k images + 'http://images.cocodataset.org/zips/test2017.zip'] # 7G, 41k images (optional) + download(urls, dir=dir / 'images', threads=3) diff --git a/data/coco128.yaml b/data/coco128.yaml index 83fbc29d3404..e70ad687dd88 100644 --- a/data/coco128.yaml +++ b/data/coco128.yaml @@ -1,22 +1,19 @@ # COCO 2017 dataset http://cocodataset.org - first 128 training images # Train command: python train.py --data coco128.yaml # Default dataset location is next to YOLOv5: -# /parent_folder -# /coco128 +# /parent +# /datasets/coco128 # /yolov5 -# download command/URL (optional) -download: https://github.com/ultralytics/yolov5/releases/download/v1.0/coco128.zip +# Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..] +path: ../datasets/coco128 # dataset root dir +train: images/train2017 # train images (relative to 'path') 128 images +val: images/train2017 # val images (relative to 'path') 128 images +test: # test images (optional) -# train and val data as 1) directory: path/images/, 2) file: path/images.txt, or 3) list: [path1/images/, path2/images/] -train: ../coco128/images/train2017/ # 128 images -val: ../coco128/images/train2017/ # 128 images - -# number of classes -nc: 80 - -# class names +# Classes +nc: 80 # number of classes names: [ 'person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', 'train', 'truck', 'boat', 'traffic light', 'fire hydrant', 'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow', 'elephant', 'bear', 'zebra', 'giraffe', 'backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee', @@ -25,4 +22,8 @@ names: [ 'person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', 'train', ' 'sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza', 'donut', 'cake', 'chair', 'couch', 'potted plant', 'bed', 'dining table', 'toilet', 'tv', 'laptop', 'mouse', 'remote', 'keyboard', 'cell phone', 'microwave', 'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock', 'vase', 'scissors', 'teddy bear', - 'hair drier', 'toothbrush' ] + 'hair drier', 'toothbrush' ] # class names + + +# Download script/URL (optional) +download: https://github.com/ultralytics/yolov5/releases/download/v1.0/coco128.zip \ No newline at end of file diff --git a/data/hyps/hyp.finetune.yaml b/data/hyps/hyp.finetune.yaml index 1b84cff95c2c..a77597741356 100644 --- a/data/hyps/hyp.finetune.yaml +++ b/data/hyps/hyp.finetune.yaml @@ -1,5 +1,5 @@ # Hyperparameters for VOC finetuning -# python train.py --batch 64 --weights yolov5m.pt --data voc.yaml --img 512 --epochs 50 +# python train.py --batch 64 --weights yolov5m.pt --data VOC.yaml --img 512 --epochs 50 # See tutorials for hyperparameter evolution https://github.com/ultralytics/yolov5#tutorials diff --git a/data/scripts/get_argoverse_hd.sh b/data/scripts/get_argoverse_hd.sh deleted file mode 100644 index 331509914568..000000000000 --- a/data/scripts/get_argoverse_hd.sh +++ /dev/null @@ -1,61 +0,0 @@ -#!/bin/bash -# Argoverse-HD dataset (ring-front-center camera) http://www.cs.cmu.edu/~mengtial/proj/streaming/ -# Download command: bash data/scripts/get_argoverse_hd.sh -# Train command: python train.py --data argoverse_hd.yaml -# Default dataset location is next to YOLOv5: -# /parent_folder -# /argoverse -# /yolov5 - -# Download/unzip images -d='../argoverse/' # unzip directory -mkdir $d -url=https://argoverse-hd.s3.us-east-2.amazonaws.com/ -f=Argoverse-HD-Full.zip -curl -L $url$f -o $f && unzip -q $f -d $d && rm $f &# download, unzip, remove in background -wait # finish background tasks - -cd ../argoverse/Argoverse-1.1/ -ln -s tracking images - -cd ../Argoverse-HD/annotations/ - -python3 - "$@" <train.txt -cat 2007_train.txt 2007_val.txt 2007_test.txt 2012_train.txt 2012_val.txt >train.all.txt - -mkdir ../VOC ../VOC/images ../VOC/images/train ../VOC/images/val -mkdir ../VOC/labels ../VOC/labels/train ../VOC/labels/val - -python3 - "$@" < 1: # model = nn.DataParallel(model) + # Data + with open(data) as f: + data = yaml.safe_load(f) + check_dataset(data) # check + # Half half &= device.type != 'cpu' # half precision only supported on CUDA if half: @@ -83,10 +88,6 @@ def run(data, # Configure model.eval() - if isinstance(data, str): - with open(data) as f: - data = yaml.safe_load(f) - check_dataset(data) # check is_coco = type(data['val']) is str and data['val'].endswith('coco/val2017.txt') # COCO dataset nc = 1 if single_cls else int(data['nc']) # number of classes iouv = torch.linspace(0.5, 0.95, 10).to(device) # iou vector for mAP@0.5:0.95 diff --git a/train.py b/train.py index ba84b432f660..6b04e8ff3a6a 100644 --- a/train.py +++ b/train.py @@ -453,7 +453,7 @@ def train(hyp, # path/to/hyp.yaml or hyp dictionary if not evolve: if is_coco: # COCO dataset for m in [last, best] if best.exists() else [last]: # speed, mAP tests - results, _, _ = test.run(data, + results, _, _ = test.run(data_dict, batch_size=batch_size // WORLD_SIZE * 2, imgsz=imgsz_test, conf_thres=0.001, diff --git a/tutorial.ipynb b/tutorial.ipynb index bcdbc014dfb4..d136803659fb 100644 --- a/tutorial.ipynb +++ b/tutorial.ipynb @@ -1255,7 +1255,7 @@ "source": [ "# VOC\n", "for b, m in zip([64, 48, 32, 16], ['yolov5s', 'yolov5m', 'yolov5l', 'yolov5x']): # zip(batch_size, model)\n", - " !python train.py --batch {b} --weights {m}.pt --data voc.yaml --epochs 50 --cache --img 512 --nosave --hyp hyp.finetune.yaml --project VOC --name {m}" + " !python train.py --batch {b} --weights {m}.pt --data VOC.yaml --epochs 50 --cache --img 512 --nosave --hyp hyp.finetune.yaml --project VOC --name {m}" ], "execution_count": null, "outputs": [] diff --git a/utils/general.py b/utils/general.py index e39f2ac09ca3..555975f07c5d 100755 --- a/utils/general.py +++ b/utils/general.py @@ -222,9 +222,14 @@ def check_file(file): def check_dataset(data, autodownload=True): # Download dataset if not found locally - val, s = data.get('val'), data.get('download') + path = Path(data.get('path', '')) # optional 'path' field + if path: + for k in 'train', 'val', 'test': + if data.get(k): # prepend path + data[k] = str(path / data[k]) if isinstance(data[k], str) else [str(path / x) for x in data[k]] + + train, val, test, s = [data.get(x) for x in ('train', 'val', 'test', 'download')] if val: - root = Path(val).parts[0] + os.sep # unzip directory i.e. '../' val = [Path(x).resolve() for x in (val if isinstance(val, list) else [val])] # val path if not all(x.exists() for x in val): print('\nWARNING: Dataset not found, nonexistent paths: %s' % [str(x) for x in val if not x.exists()]) @@ -233,12 +238,14 @@ def check_dataset(data, autodownload=True): f = Path(s).name # filename print(f'Downloading {s} ...') torch.hub.download_url_to_file(s, f) + root = path.parent if 'path' in data else '..' # unzip directory i.e. '../' + Path(root).mkdir(parents=True, exist_ok=True) # create root r = os.system(f'unzip -q {f} -d {root} && rm {f}') # unzip elif s.startswith('bash '): # bash script print(f'Running {s} ...') r = os.system(s) else: # python script - r = exec(s) # return None + r = exec(s, {'yaml': data}) # return None print('Dataset autodownload %s\n' % ('success' if r in (0, None) else 'failure')) # print result else: raise Exception('Dataset not found.') @@ -258,7 +265,7 @@ def download_one(url, dir): if unzip and f.suffix in ('.zip', '.gz'): print(f'Unzipping {f}...') if f.suffix == '.zip': - s = f'unzip -qo {f} -d {dir} && rm {f}' # unzip -quiet -overwrite + s = f'unzip -qo {f} -d {dir}' # unzip -quiet -overwrite elif f.suffix == '.gz': s = f'tar xfz {f} --directory {f.parent}' # unzip if delete: # delete zip file after unzip