From 8f94873e2e33527d624eb678ec4afc5d43c3b621 Mon Sep 17 00:00:00 2001
From: Glenn Jocher <glenn.jocher@ultralytics.com>
Date: Thu, 24 Jun 2021 00:44:06 +0200
Subject: [PATCH 01/28] Add optional dataset.yaml `path` attribute

@KalenMike
---
 utils/general.py | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/utils/general.py b/utils/general.py
index e39f2ac09ca3..374226508a83 100755
--- a/utils/general.py
+++ b/utils/general.py
@@ -222,9 +222,15 @@ def check_file(file):
 
 def check_dataset(data, autodownload=True):
     # Download dataset if not found locally
-    val, s = data.get('val'), data.get('download')
+    path = Path(data.get('path', ''))  # optional 'path' field
+    if path:
+        for k in 'train', 'val', 'test':
+            if k in data:
+                data[k] = str(path / Path(data.get(k, '')))  # prepend path
+
+    train, val, test, s = [data.get(x) for x in ('train', 'val', 'test', 'download')]
     if val:
-        root = Path(val).parts[0] + os.sep  # unzip directory i.e. '../'
+        root = path.parent if 'path' in data else '..'  # unzip directory i.e. '../'
         val = [Path(x).resolve() for x in (val if isinstance(val, list) else [val])]  # val path
         if not all(x.exists() for x in val):
             print('\nWARNING: Dataset not found, nonexistent paths: %s' % [str(x) for x in val if not x.exists()])

From d0367dca7a5b054594af3e1d0c4c5db3be8ed0c6 Mon Sep 17 00:00:00 2001
From: Glenn Jocher <glenn.jocher@ultralytics.com>
Date: Thu, 24 Jun 2021 13:14:29 +0200
Subject: [PATCH 02/28] pass locals to python scripts

---
 data/coco128.yaml | 5 +++--
 utils/general.py  | 2 +-
 2 files changed, 4 insertions(+), 3 deletions(-)

diff --git a/data/coco128.yaml b/data/coco128.yaml
index 83fbc29d3404..96faf3201778 100644
--- a/data/coco128.yaml
+++ b/data/coco128.yaml
@@ -10,8 +10,9 @@
 download: https://github.com/ultralytics/yolov5/releases/download/v1.0/coco128.zip
 
 # train and val data as 1) directory: path/images/, 2) file: path/images.txt, or 3) list: [path1/images/, path2/images/]
-train: ../coco128/images/train2017/  # 128 images
-val: ../coco128/images/train2017/  # 128 images
+path: ../coco128  # dataset dir (relative or absolute)
+train: images/train2017  # 128 images
+val: images/train2017  # 128 images
 
 # number of classes
 nc: 80
diff --git a/utils/general.py b/utils/general.py
index 374226508a83..ada0cbd6bf86 100755
--- a/utils/general.py
+++ b/utils/general.py
@@ -244,7 +244,7 @@ def check_dataset(data, autodownload=True):
                     print(f'Running {s} ...')
                     r = os.system(s)
                 else:  # python script
-                    r = exec(s)  # return None
+                    r = exec(s, {'yaml': data})  # return None
                 print('Dataset autodownload %s\n' % ('success' if r in (0, None) else 'failure'))  # print result
             else:
                 raise Exception('Dataset not found.')

From dc541360408acceda86815097c47e7517f9249d0 Mon Sep 17 00:00:00 2001
From: Glenn Jocher <glenn.jocher@ultralytics.com>
Date: Thu, 24 Jun 2021 13:28:43 +0200
Subject: [PATCH 03/28] handle lists

---
 utils/general.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/utils/general.py b/utils/general.py
index ada0cbd6bf86..1c4eb848ae0f 100755
--- a/utils/general.py
+++ b/utils/general.py
@@ -225,8 +225,8 @@ def check_dataset(data, autodownload=True):
     path = Path(data.get('path', ''))  # optional 'path' field
     if path:
         for k in 'train', 'val', 'test':
-            if k in data:
-                data[k] = str(path / Path(data.get(k, '')))  # prepend path
+            if k in data:  # prepend path
+                data[k] = str(path / data[k]) if isinstance(data[k], str) else [str(path / x) for x in data[k]]
 
     train, val, test, s = [data.get(x) for x in ('train', 'val', 'test', 'download')]
     if val:

From 637056566e75fed8e7e873aea2f4c1e4d257493b Mon Sep 17 00:00:00 2001
From: Glenn Jocher <glenn.jocher@ultralytics.com>
Date: Thu, 24 Jun 2021 13:45:36 +0200
Subject: [PATCH 04/28] update coco128.yaml

---
 data/coco128.yaml | 19 +++++++++----------
 1 file changed, 9 insertions(+), 10 deletions(-)

diff --git a/data/coco128.yaml b/data/coco128.yaml
index 96faf3201778..857dc89eda9d 100644
--- a/data/coco128.yaml
+++ b/data/coco128.yaml
@@ -1,7 +1,7 @@
 # COCO 2017 dataset http://cocodataset.org - first 128 training images
 # Train command: python train.py --data coco128.yaml
 # Default dataset location is next to YOLOv5:
-#   /parent_folder
+#   /parent
 #     /coco128
 #     /yolov5
 
@@ -9,15 +9,14 @@
 # download command/URL (optional)
 download: https://github.com/ultralytics/yolov5/releases/download/v1.0/coco128.zip
 
-# train and val data as 1) directory: path/images/, 2) file: path/images.txt, or 3) list: [path1/images/, path2/images/]
-path: ../coco128  # dataset dir (relative or absolute)
-train: images/train2017  # 128 images
-val: images/train2017  # 128 images
+# train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..]
+path: ../coco128  # dataset root dir
+train: images/train2017  # train images (relative to 'path') 128 images
+val: images/train2017  # val images (relative to 'path') 128 images
+test:  # test images (optional)
 
-# number of classes
-nc: 80
-
-# class names
+# classes
+nc: 80  # number of classes
 names: [ 'person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', 'train', 'truck', 'boat', 'traffic light',
          'fire hydrant', 'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow',
          'elephant', 'bear', 'zebra', 'giraffe', 'backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee',
@@ -26,4 +25,4 @@ names: [ 'person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', 'train', '
          'sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza', 'donut', 'cake', 'chair', 'couch',
          'potted plant', 'bed', 'dining table', 'toilet', 'tv', 'laptop', 'mouse', 'remote', 'keyboard', 'cell phone',
          'microwave', 'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock', 'vase', 'scissors', 'teddy bear',
-         'hair drier', 'toothbrush' ]
+         'hair drier', 'toothbrush' ]  # names of classes

From cd52628e213f587ae4030122d46872f3db175f10 Mon Sep 17 00:00:00 2001
From: Glenn Jocher <glenn.jocher@ultralytics.com>
Date: Thu, 24 Jun 2021 13:53:10 +0200
Subject: [PATCH 05/28] Capitalize first letter

---
 data/GlobalWheat2020.yaml | 43 ++++++++++++++++++---------------------
 data/SKU-110K.yaml        | 14 ++++++-------
 data/VisDrone.yaml        | 12 +++++------
 data/argoverse_hd.yaml    | 14 ++++++-------
 data/coco.yaml            | 20 ++++++------------
 data/coco128.yaml         |  9 ++++----
 data/objects365.yaml      | 12 +++++------
 data/voc.yaml             | 14 ++++++-------
 8 files changed, 59 insertions(+), 79 deletions(-)

diff --git a/data/GlobalWheat2020.yaml b/data/GlobalWheat2020.yaml
index f45182b43e25..8f5ba79db923 100644
--- a/data/GlobalWheat2020.yaml
+++ b/data/GlobalWheat2020.yaml
@@ -1,43 +1,40 @@
 # Global Wheat 2020 dataset http://www.global-wheat.com/
 # Train command: python train.py --data GlobalWheat2020.yaml
 # Default dataset location is next to YOLOv5:
-#   /parent_folder
+#   /parent
 #     /datasets/GlobalWheat2020
 #     /yolov5
 
 
-# train and val data as 1) directory: path/images/, 2) file: path/images.txt, or 3) list: [path1/images/, path2/images/]
+# Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..]
+path: ../datasets/GlobalWheat2020  # dataset directory (relative or absolute)
 train: # 3422 images
-  - ../datasets/GlobalWheat2020/images/arvalis_1
-  - ../datasets/GlobalWheat2020/images/arvalis_2
-  - ../datasets/GlobalWheat2020/images/arvalis_3
-  - ../datasets/GlobalWheat2020/images/ethz_1
-  - ../datasets/GlobalWheat2020/images/rres_1
-  - ../datasets/GlobalWheat2020/images/inrae_1
-  - ../datasets/GlobalWheat2020/images/usask_1
-
+  - images/arvalis_1
+  - images/arvalis_2
+  - images/arvalis_3
+  - images/ethz_1
+  - images/rres_1
+  - images/inrae_1
+  - images/usask_1
 val: # 748 images (WARNING: train set contains ethz_1)
-  - ../datasets/GlobalWheat2020/images/ethz_1
-
+  - images/ethz_1
 test: # 1276 images
-  - ../datasets/GlobalWheat2020/images/utokyo_1
-  - ../datasets/GlobalWheat2020/images/utokyo_2
-  - ../datasets/GlobalWheat2020/images/nau_1
-  - ../datasets/GlobalWheat2020/images/uq_1
-
-# number of classes
-nc: 1
+  - images/utokyo_1
+  - images/utokyo_2
+  - images/nau_1
+  - images/uq_1
 
-# class names
-names: [ 'wheat_head' ]
+# Classes
+nc: 1  # number of classes
+names: [ 'wheat_head' ]  # class names
 
 
-# download command/URL (optional) --------------------------------------------------------------------------------------
+# Download command/URL (optional) --------------------------------------------------------------------------------------
 download: |
   from utils.general import download, Path
 
   # Download
-  dir = Path('../datasets/GlobalWheat2020')  # dataset directory
+  dir = Path(yaml['path'])  # dataset directory
   urls = ['https://zenodo.org/record/4298502/files/global-wheat-codalab-official.zip',
           'https://github.com/ultralytics/yolov5/releases/download/v1.0/GlobalWheat2020_labels.zip']
   download(urls, dir=dir)
diff --git a/data/SKU-110K.yaml b/data/SKU-110K.yaml
index a8c1f25b385a..73b09513c635 100644
--- a/data/SKU-110K.yaml
+++ b/data/SKU-110K.yaml
@@ -1,24 +1,22 @@
 # SKU-110K retail items dataset https://github.com/eg4000/SKU110K_CVPR19
 # Train command: python train.py --data SKU-110K.yaml
 # Default dataset location is next to YOLOv5:
-#   /parent_folder
+#   /parent
 #     /datasets/SKU-110K
 #     /yolov5
 
 
-# train and val data as 1) directory: path/images/, 2) file: path/images.txt, or 3) list: [path1/images/, path2/images/]
+# Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..]
 train: ../datasets/SKU-110K/train.txt  # 8219 images
 val: ../datasets/SKU-110K/val.txt  # 588 images
 test: ../datasets/SKU-110K/test.txt  # 2936 images
 
-# number of classes
-nc: 1
+# Classes
+nc: 1  # number of classes
+names: [ 'object' ]  # class names
 
-# class names
-names: [ 'object' ]
 
-
-# download command/URL (optional) --------------------------------------------------------------------------------------
+# Download command/URL (optional) --------------------------------------------------------------------------------------
 download: |
   import shutil
   from tqdm import tqdm
diff --git a/data/VisDrone.yaml b/data/VisDrone.yaml
index c4603b200132..f0deb273a363 100644
--- a/data/VisDrone.yaml
+++ b/data/VisDrone.yaml
@@ -1,24 +1,22 @@
 # VisDrone2019-DET dataset https://github.com/VisDrone/VisDrone-Dataset
 # Train command: python train.py --data VisDrone.yaml
 # Default dataset location is next to YOLOv5:
-#   /parent_folder
+#   /parent
 #     /VisDrone
 #     /yolov5
 
 
-# train and val data as 1) directory: path/images/, 2) file: path/images.txt, or 3) list: [path1/images/, path2/images/]
+# Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..]
 train: ../VisDrone/VisDrone2019-DET-train/images  # 6471 images
 val: ../VisDrone/VisDrone2019-DET-val/images  # 548 images
 test: ../VisDrone/VisDrone2019-DET-test-dev/images  # 1610 images
 
-# number of classes
-nc: 10
-
-# class names
+# Classes
+nc: 10  # number of classes
 names: [ 'pedestrian', 'people', 'bicycle', 'car', 'van', 'truck', 'tricycle', 'awning-tricycle', 'bus', 'motor' ]
 
 
-# download command/URL (optional) --------------------------------------------------------------------------------------
+# Download command/URL (optional) --------------------------------------------------------------------------------------
 download: |
   from utils.general import download, os, Path
 
diff --git a/data/argoverse_hd.yaml b/data/argoverse_hd.yaml
index 0ba314d82ce1..0eea43e62b4d 100644
--- a/data/argoverse_hd.yaml
+++ b/data/argoverse_hd.yaml
@@ -1,21 +1,19 @@
 # Argoverse-HD dataset (ring-front-center camera) http://www.cs.cmu.edu/~mengtial/proj/streaming/
 # Train command: python train.py --data argoverse_hd.yaml
 # Default dataset location is next to YOLOv5:
-#   /parent_folder
+#   /parent
 #     /argoverse
 #     /yolov5
 
 
-# download command/URL (optional)
+# Download command/URL (optional)
 download: bash data/scripts/get_argoverse_hd.sh
 
-# train and val data as 1) directory: path/images/, 2) file: path/images.txt, or 3) list: [path1/images/, path2/images/]
+# Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..]
 train: ../argoverse/Argoverse-1.1/images/train/  # 39384 images
 val: ../argoverse/Argoverse-1.1/images/val/  # 15062 iamges
 test: ../argoverse/Argoverse-1.1/images/test/  # Submit to: https://eval.ai/web/challenges/challenge-page/800/overview
 
-# number of classes
-nc: 8
-
-# class names
-names: [ 'person',  'bicycle',  'car',  'motorcycle',  'bus',  'truck',  'traffic_light',  'stop_sign' ]
+# Classes
+nc: 8  # number of classes
+names: [ 'person',  'bicycle',  'car',  'motorcycle',  'bus',  'truck',  'traffic_light',  'stop_sign' ]  # class names
diff --git a/data/coco.yaml b/data/coco.yaml
index f818a49ff0fa..ed46fc1efd6e 100644
--- a/data/coco.yaml
+++ b/data/coco.yaml
@@ -1,23 +1,21 @@
 # COCO 2017 dataset http://cocodataset.org
 # Train command: python train.py --data coco.yaml
 # Default dataset location is next to YOLOv5:
-#   /parent_folder
+#   /parent
 #     /coco
 #     /yolov5
 
 
-# download command/URL (optional)
+# Download command/URL (optional)
 download: bash data/scripts/get_coco.sh
 
-# train and val data as 1) directory: path/images/, 2) file: path/images.txt, or 3) list: [path1/images/, path2/images/]
+# Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..]
 train: ../coco/train2017.txt  # 118287 images
 val: ../coco/val2017.txt  # 5000 images
 test: ../coco/test-dev2017.txt  # 20288 of 40670 images, submit to https://competitions.codalab.org/competitions/20794
 
-# number of classes
-nc: 80
-
-# class names
+# Classes
+nc: 80  # number of classes
 names: [ 'person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', 'train', 'truck', 'boat', 'traffic light',
          'fire hydrant', 'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow',
          'elephant', 'bear', 'zebra', 'giraffe', 'backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee',
@@ -26,10 +24,4 @@ names: [ 'person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', 'train', '
          'sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza', 'donut', 'cake', 'chair', 'couch',
          'potted plant', 'bed', 'dining table', 'toilet', 'tv', 'laptop', 'mouse', 'remote', 'keyboard', 'cell phone',
          'microwave', 'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock', 'vase', 'scissors', 'teddy bear',
-         'hair drier', 'toothbrush' ]
-
-# Print classes
-# with open('data/coco.yaml') as f:
-#   d = yaml.safe_load(f)  # dict
-#   for i, x in enumerate(d['names']):
-#     print(i, x)
+         'hair drier', 'toothbrush' ]  # class names
diff --git a/data/coco128.yaml b/data/coco128.yaml
index 857dc89eda9d..8964f5f333f9 100644
--- a/data/coco128.yaml
+++ b/data/coco128.yaml
@@ -6,16 +6,16 @@
 #     /yolov5
 
 
-# download command/URL (optional)
+# Download command/URL (optional)
 download: https://github.com/ultralytics/yolov5/releases/download/v1.0/coco128.zip
 
-# train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..]
+# Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..]
 path: ../coco128  # dataset root dir
 train: images/train2017  # train images (relative to 'path') 128 images
 val: images/train2017  # val images (relative to 'path') 128 images
 test:  # test images (optional)
 
-# classes
+# Classes
 nc: 80  # number of classes
 names: [ 'person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', 'train', 'truck', 'boat', 'traffic light',
          'fire hydrant', 'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow',
@@ -25,4 +25,5 @@ names: [ 'person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', 'train', '
          'sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza', 'donut', 'cake', 'chair', 'couch',
          'potted plant', 'bed', 'dining table', 'toilet', 'tv', 'laptop', 'mouse', 'remote', 'keyboard', 'cell phone',
          'microwave', 'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock', 'vase', 'scissors', 'teddy bear',
-         'hair drier', 'toothbrush' ]  # names of classes
+         'hair drier', 'toothbrush' ]  # class names
+
diff --git a/data/objects365.yaml b/data/objects365.yaml
index eb99995903cf..8e710f4ddc50 100644
--- a/data/objects365.yaml
+++ b/data/objects365.yaml
@@ -1,18 +1,16 @@
 # Objects365 dataset https://www.objects365.org/
 # Train command: python train.py --data objects365.yaml
 # Default dataset location is next to YOLOv5:
-#   /parent_folder
+#   /parent
 #     /datasets/objects365
 #     /yolov5
 
-# train and val data as 1) directory: path/images/, 2) file: path/images.txt, or 3) list: [path1/images/, path2/images/]
+# Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..]
 train: ../datasets/objects365/images/train  # 1742289 images
 val: ../datasets/objects365/images/val # 5570 images
 
-# number of classes
-nc: 365
-
-# class names
+# Classes
+nc: 365  # number of classes
 names: [ 'Person', 'Sneakers', 'Chair', 'Other Shoes', 'Hat', 'Car', 'Lamp', 'Glasses', 'Bottle', 'Desk', 'Cup',
          'Street Lights', 'Cabinet/shelf', 'Handbag/Satchel', 'Bracelet', 'Plate', 'Picture/Frame', 'Helmet', 'Book',
          'Gloves', 'Storage box', 'Boat', 'Leather Shoes', 'Flower', 'Bench', 'Potted Plant', 'Bowl/Basin', 'Flag',
@@ -56,7 +54,7 @@ names: [ 'Person', 'Sneakers', 'Chair', 'Other Shoes', 'Hat', 'Car', 'Lamp', 'Gl
          'Chainsaw', 'Eraser', 'Lobster', 'Durian', 'Okra', 'Lipstick', 'Cosmetics Mirror', 'Curling', 'Table Tennis' ]
 
 
-# download command/URL (optional) --------------------------------------------------------------------------------------
+# Download command/URL (optional) --------------------------------------------------------------------------------------
 download: |
   from pycocotools.coco import COCO
   from tqdm import tqdm
diff --git a/data/voc.yaml b/data/voc.yaml
index ca293c4f091f..2c3baa49eae1 100644
--- a/data/voc.yaml
+++ b/data/voc.yaml
@@ -1,21 +1,19 @@
 # PASCAL VOC dataset http://host.robots.ox.ac.uk/pascal/VOC/
 # Train command: python train.py --data voc.yaml
 # Default dataset location is next to YOLOv5:
-#   /parent_folder
+#   /parent
 #     /VOC
 #     /yolov5
 
 
-# download command/URL (optional)
+# Download command/URL (optional)
 download: bash data/scripts/get_voc.sh
 
-# train and val data as 1) directory: path/images/, 2) file: path/images.txt, or 3) list: [path1/images/, path2/images/]
+# Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..]
 train: ../VOC/images/train/  # 16551 images
 val: ../VOC/images/val/  # 4952 images
 
-# number of classes
-nc: 20
-
-# class names
+# Classes
+nc: 20  # number of classes
 names: [ 'aeroplane', 'bicycle', 'bird', 'boat', 'bottle', 'bus', 'car', 'cat', 'chair', 'cow', 'diningtable', 'dog',
-         'horse', 'motorbike', 'person', 'pottedplant', 'sheep', 'sofa', 'train', 'tvmonitor' ]
+         'horse', 'motorbike', 'person', 'pottedplant', 'sheep', 'sofa', 'train', 'tvmonitor' ]  # class names

From a0815a11e5fe4a3cbd2dd2a4977841d14a112ce8 Mon Sep 17 00:00:00 2001
From: Glenn Jocher <glenn.jocher@ultralytics.com>
Date: Thu, 24 Jun 2021 13:55:21 +0200
Subject: [PATCH 06/28] add test key

---
 data/GlobalWheat2020.yaml | 2 +-
 data/objects365.yaml      | 2 ++
 data/voc.yaml             | 1 +
 3 files changed, 4 insertions(+), 1 deletion(-)

diff --git a/data/GlobalWheat2020.yaml b/data/GlobalWheat2020.yaml
index 8f5ba79db923..30c95eec3f73 100644
--- a/data/GlobalWheat2020.yaml
+++ b/data/GlobalWheat2020.yaml
@@ -7,7 +7,7 @@
 
 
 # Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..]
-path: ../datasets/GlobalWheat2020  # dataset directory (relative or absolute)
+path: ../datasets/GlobalWheat2020  # dataset root dir
 train: # 3422 images
   - images/arvalis_1
   - images/arvalis_2
diff --git a/data/objects365.yaml b/data/objects365.yaml
index 8e710f4ddc50..1b37c5050b46 100644
--- a/data/objects365.yaml
+++ b/data/objects365.yaml
@@ -5,9 +5,11 @@
 #     /datasets/objects365
 #     /yolov5
 
+
 # Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..]
 train: ../datasets/objects365/images/train  # 1742289 images
 val: ../datasets/objects365/images/val # 5570 images
+test:  # test images (optional)
 
 # Classes
 nc: 365  # number of classes
diff --git a/data/voc.yaml b/data/voc.yaml
index 2c3baa49eae1..5df90e6021a1 100644
--- a/data/voc.yaml
+++ b/data/voc.yaml
@@ -12,6 +12,7 @@ download: bash data/scripts/get_voc.sh
 # Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..]
 train: ../VOC/images/train/  # 16551 images
 val: ../VOC/images/val/  # 4952 images
+test:  # test images (optional)
 
 # Classes
 nc: 20  # number of classes

From 451fe11d73a3f04062033371049c83d818a1b67b Mon Sep 17 00:00:00 2001
From: Glenn Jocher <glenn.jocher@ultralytics.com>
Date: Thu, 24 Jun 2021 13:57:19 +0200
Subject: [PATCH 07/28] finalize GlobalWheat2020.yaml

---
 data/GlobalWheat2020.yaml | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/data/GlobalWheat2020.yaml b/data/GlobalWheat2020.yaml
index 30c95eec3f73..dfb65f804b22 100644
--- a/data/GlobalWheat2020.yaml
+++ b/data/GlobalWheat2020.yaml
@@ -8,7 +8,7 @@
 
 # Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..]
 path: ../datasets/GlobalWheat2020  # dataset root dir
-train: # 3422 images
+train: # train images (relative to 'path') 3422 images
   - images/arvalis_1
   - images/arvalis_2
   - images/arvalis_3
@@ -16,9 +16,9 @@ train: # 3422 images
   - images/rres_1
   - images/inrae_1
   - images/usask_1
-val: # 748 images (WARNING: train set contains ethz_1)
+val: # val images (relative to 'path') 748 images (WARNING: train set contains ethz_1)
   - images/ethz_1
-test: # 1276 images
+test: # test images (optional) 1276 images
   - images/utokyo_1
   - images/utokyo_2
   - images/nau_1

From ac22a568e0f91e515a56fa4eaf8d700dd9310b32 Mon Sep 17 00:00:00 2001
From: Glenn Jocher <glenn.jocher@ultralytics.com>
Date: Thu, 24 Jun 2021 13:59:56 +0200
Subject: [PATCH 08/28] finalize objects365.yaml

---
 data/GlobalWheat2020.yaml | 2 +-
 data/objects365.yaml      | 7 ++++---
 2 files changed, 5 insertions(+), 4 deletions(-)

diff --git a/data/GlobalWheat2020.yaml b/data/GlobalWheat2020.yaml
index dfb65f804b22..6dbc0f45c14d 100644
--- a/data/GlobalWheat2020.yaml
+++ b/data/GlobalWheat2020.yaml
@@ -34,7 +34,7 @@ download: |
   from utils.general import download, Path
 
   # Download
-  dir = Path(yaml['path'])  # dataset directory
+  dir = Path(yaml['path'])  # dataset root dir
   urls = ['https://zenodo.org/record/4298502/files/global-wheat-codalab-official.zip',
           'https://github.com/ultralytics/yolov5/releases/download/v1.0/GlobalWheat2020_labels.zip']
   download(urls, dir=dir)
diff --git a/data/objects365.yaml b/data/objects365.yaml
index 1b37c5050b46..5eef8c02cac4 100644
--- a/data/objects365.yaml
+++ b/data/objects365.yaml
@@ -7,8 +7,9 @@
 
 
 # Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..]
-train: ../datasets/objects365/images/train  # 1742289 images
-val: ../datasets/objects365/images/val # 5570 images
+path: ../datasets/objects365  # dataset root dir
+train: images/train  # train images (relative to 'path') 1742289 images
+val: images/val # val images (relative to 'path') 5570 images
 test:  # test images (optional)
 
 # Classes
@@ -64,7 +65,7 @@ download: |
   from utils.general import download, Path
 
   # Make Directories
-  dir = Path('../datasets/objects365')  # dataset directory
+  dir = Path(yaml['path'])  # dataset root dir
   for p in 'images', 'labels':
       (dir / p).mkdir(parents=True, exist_ok=True)
       for q in 'train', 'val':

From 1ad61569ccdea05123d3a50b0ec45a670682665c Mon Sep 17 00:00:00 2001
From: Glenn Jocher <glenn.jocher@ultralytics.com>
Date: Thu, 24 Jun 2021 14:04:57 +0200
Subject: [PATCH 09/28] finalize SKU-110K.yaml

---
 data/SKU-110K.yaml | 15 ++++++++-------
 1 file changed, 8 insertions(+), 7 deletions(-)

diff --git a/data/SKU-110K.yaml b/data/SKU-110K.yaml
index 73b09513c635..ee79590d2746 100644
--- a/data/SKU-110K.yaml
+++ b/data/SKU-110K.yaml
@@ -7,9 +7,10 @@
 
 
 # Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..]
-train: ../datasets/SKU-110K/train.txt  # 8219 images
-val: ../datasets/SKU-110K/val.txt  # 588 images
-test: ../datasets/SKU-110K/test.txt  # 2936 images
+path: ../datasets/SKU-110K  # dataset root dir
+train: train.txt  # 8219 images
+val: val.txt  # 588 images
+test: test.txt  # 2936 images
 
 # Classes
 nc: 1  # number of classes
@@ -23,15 +24,15 @@ download: |
   from utils.general import np, pd, Path, download, xyxy2xywh
 
   # Download
-  datasets = Path('../datasets')  # download directory
+  dir = Path(yaml['path'])  # dataset root dir
+  parent = Path(dir.parent)  # download dir
   urls = ['http://trax-geometry.s3.amazonaws.com/cvpr_challenge/SKU110K_fixed.tar.gz']
-  download(urls, dir=datasets, delete=False)
+  download(urls, dir=parent, delete=False)
 
   # Rename directories
-  dir = (datasets / 'SKU-110K')
   if dir.exists():
       shutil.rmtree(dir)
-  (datasets / 'SKU110K_fixed').rename(dir)  # rename dir
+  (parent / 'SKU110K_fixed').rename(dir)  # rename dir
   (dir / 'labels').mkdir(parents=True, exist_ok=True)  # create labels dir
 
   # Convert labels

From ec16dd38663cb3897439017b736732d852ebb73f Mon Sep 17 00:00:00 2001
From: Glenn Jocher <glenn.jocher@ultralytics.com>
Date: Thu, 24 Jun 2021 14:05:49 +0200
Subject: [PATCH 10/28] finalize SKU-110K.yaml

---
 data/SKU-110K.yaml | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/data/SKU-110K.yaml b/data/SKU-110K.yaml
index ee79590d2746..0d010de4ee3c 100644
--- a/data/SKU-110K.yaml
+++ b/data/SKU-110K.yaml
@@ -8,9 +8,9 @@
 
 # Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..]
 path: ../datasets/SKU-110K  # dataset root dir
-train: train.txt  # 8219 images
-val: val.txt  # 588 images
-test: test.txt  # 2936 images
+train: train.txt  # train images (relative to 'path')  8219 images
+val: val.txt  # val images (relative to 'path')  588 images
+test: test.txt  # test images (optional)  2936 images
 
 # Classes
 nc: 1  # number of classes

From 7a989dd5dd9944afa7fad3269578fa87c1a58f36 Mon Sep 17 00:00:00 2001
From: Glenn Jocher <glenn.jocher@ultralytics.com>
Date: Thu, 24 Jun 2021 14:10:11 +0200
Subject: [PATCH 11/28] finalize VisDrone.yaml

---
 data/VisDrone.yaml | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

diff --git a/data/VisDrone.yaml b/data/VisDrone.yaml
index f0deb273a363..af82eea91e43 100644
--- a/data/VisDrone.yaml
+++ b/data/VisDrone.yaml
@@ -2,14 +2,15 @@
 # Train command: python train.py --data VisDrone.yaml
 # Default dataset location is next to YOLOv5:
 #   /parent
-#     /VisDrone
+#     /datasets/VisDrone
 #     /yolov5
 
 
 # Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..]
-train: ../VisDrone/VisDrone2019-DET-train/images  # 6471 images
-val: ../VisDrone/VisDrone2019-DET-val/images  # 548 images
-test: ../VisDrone/VisDrone2019-DET-test-dev/images  # 1610 images
+path: ../datasets/VisDrone  # dataset root dir
+train: VisDrone2019-DET-train/images  # train images (relative to 'path')  6471 images
+val: VisDrone2019-DET-val/images  # val images (relative to 'path')  548 images
+test: VisDrone2019-DET-test-dev/images  # test images (optional)  1610 images
 
 # Classes
 nc: 10  # number of classes
@@ -47,7 +48,7 @@ download: |
 
 
   # Download
-  dir = Path('../VisDrone')  # dataset directory
+  dir = Path(yaml['path'])  # dataset root dir
   urls = ['https://github.com/ultralytics/yolov5/releases/download/v1.0/VisDrone2019-DET-train.zip',
           'https://github.com/ultralytics/yolov5/releases/download/v1.0/VisDrone2019-DET-val.zip',
           'https://github.com/ultralytics/yolov5/releases/download/v1.0/VisDrone2019-DET-test-dev.zip',

From 48f51a037ee2ce43227b168f362346b44f9e1a20 Mon Sep 17 00:00:00 2001
From: Glenn Jocher <glenn.jocher@ultralytics.com>
Date: Thu, 24 Jun 2021 14:11:52 +0200
Subject: [PATCH 12/28] NoneType fix

---
 utils/general.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/utils/general.py b/utils/general.py
index 1c4eb848ae0f..a486055f2378 100755
--- a/utils/general.py
+++ b/utils/general.py
@@ -225,7 +225,7 @@ def check_dataset(data, autodownload=True):
     path = Path(data.get('path', ''))  # optional 'path' field
     if path:
         for k in 'train', 'val', 'test':
-            if k in data:  # prepend path
+            if data.get(k):  # prepend path
                 data[k] = str(path / data[k]) if isinstance(data[k], str) else [str(path / x) for x in data[k]]
 
     train, val, test, s = [data.get(x) for x in ('train', 'val', 'test', 'download')]

From 131a0b96bd69368d19bab27c7eb219702213da60 Mon Sep 17 00:00:00 2001
From: Glenn Jocher <glenn.jocher@ultralytics.com>
Date: Thu, 24 Jun 2021 17:51:32 +0200
Subject: [PATCH 13/28] update download comment

---
 data/GlobalWheat2020.yaml |  4 ++--
 data/SKU-110K.yaml        |  2 +-
 data/VisDrone.yaml        |  2 +-
 data/argoverse_hd.yaml    | 17 ++++++++++-------
 data/coco.yaml            | 27 +++++++++++++++++++++------
 data/coco128.yaml         |  6 +++---
 data/objects365.yaml      |  2 +-
 data/voc.yaml             |  7 ++++---
 8 files changed, 43 insertions(+), 24 deletions(-)

diff --git a/data/GlobalWheat2020.yaml b/data/GlobalWheat2020.yaml
index 6dbc0f45c14d..b25112f31901 100644
--- a/data/GlobalWheat2020.yaml
+++ b/data/GlobalWheat2020.yaml
@@ -29,7 +29,7 @@ nc: 1  # number of classes
 names: [ 'wheat_head' ]  # class names
 
 
-# Download command/URL (optional) --------------------------------------------------------------------------------------
+# Download script/URL (optional) ---------------------------------------------------------------------------------------
 download: |
   from utils.general import download, Path
 
@@ -37,7 +37,7 @@ download: |
   dir = Path(yaml['path'])  # dataset root dir
   urls = ['https://zenodo.org/record/4298502/files/global-wheat-codalab-official.zip',
           'https://github.com/ultralytics/yolov5/releases/download/v1.0/GlobalWheat2020_labels.zip']
-  download(urls, dir=dir)
+  download(urls, dir=dir.parent)
 
   # Make Directories
   for p in 'annotations', 'images', 'labels':
diff --git a/data/SKU-110K.yaml b/data/SKU-110K.yaml
index 0d010de4ee3c..7087bb9c2893 100644
--- a/data/SKU-110K.yaml
+++ b/data/SKU-110K.yaml
@@ -17,7 +17,7 @@ nc: 1  # number of classes
 names: [ 'object' ]  # class names
 
 
-# Download command/URL (optional) --------------------------------------------------------------------------------------
+# Download script/URL (optional) ---------------------------------------------------------------------------------------
 download: |
   import shutil
   from tqdm import tqdm
diff --git a/data/VisDrone.yaml b/data/VisDrone.yaml
index af82eea91e43..c1cd38d1e10f 100644
--- a/data/VisDrone.yaml
+++ b/data/VisDrone.yaml
@@ -17,7 +17,7 @@ nc: 10  # number of classes
 names: [ 'pedestrian', 'people', 'bicycle', 'car', 'van', 'truck', 'tricycle', 'awning-tricycle', 'bus', 'motor' ]
 
 
-# Download command/URL (optional) --------------------------------------------------------------------------------------
+# Download script/URL (optional) ---------------------------------------------------------------------------------------
 download: |
   from utils.general import download, os, Path
 
diff --git a/data/argoverse_hd.yaml b/data/argoverse_hd.yaml
index 0eea43e62b4d..6eaea9e48b84 100644
--- a/data/argoverse_hd.yaml
+++ b/data/argoverse_hd.yaml
@@ -2,18 +2,21 @@
 # Train command: python train.py --data argoverse_hd.yaml
 # Default dataset location is next to YOLOv5:
 #   /parent
-#     /argoverse
+#     /datasets/argoverse
 #     /yolov5
 
 
-# Download command/URL (optional)
-download: bash data/scripts/get_argoverse_hd.sh
-
 # Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..]
-train: ../argoverse/Argoverse-1.1/images/train/  # 39384 images
-val: ../argoverse/Argoverse-1.1/images/val/  # 15062 iamges
-test: ../argoverse/Argoverse-1.1/images/test/  # Submit to: https://eval.ai/web/challenges/challenge-page/800/overview
+path: ../datasets/argoverse  # dataset root dir
+train: Argoverse-1.1/images/train/  # train images (relative to 'path') 39384 images
+val: Argoverse-1.1/images/val/  # val images (relative to 'path') 15062 images
+test: Argoverse-1.1/images/test/  # test images (optional) https://eval.ai/web/challenges/challenge-page/800/overview
 
 # Classes
 nc: 8  # number of classes
 names: [ 'person',  'bicycle',  'car',  'motorcycle',  'bus',  'truck',  'traffic_light',  'stop_sign' ]  # class names
+
+
+# Download script/URL (optional) ---------------------------------------------------------------------------------------
+download: |
+  pass
diff --git a/data/coco.yaml b/data/coco.yaml
index ed46fc1efd6e..fa217f8e4c28 100644
--- a/data/coco.yaml
+++ b/data/coco.yaml
@@ -6,13 +6,11 @@
 #     /yolov5
 
 
-# Download command/URL (optional)
-download: bash data/scripts/get_coco.sh
-
 # Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..]
-train: ../coco/train2017.txt  # 118287 images
-val: ../coco/val2017.txt  # 5000 images
-test: ../coco/test-dev2017.txt  # 20288 of 40670 images, submit to https://competitions.codalab.org/competitions/20794
+path: ../coco  # dataset root dir
+train: train2017.txt  # train images (relative to 'path') 118287 images
+val: val2017.txt  # train images (relative to 'path') 5000 images
+test: test-dev2017.txt  # 20288 of 40670 images, submit to https://competitions.codalab.org/competitions/20794
 
 # Classes
 nc: 80  # number of classes
@@ -25,3 +23,20 @@ names: [ 'person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', 'train', '
          'potted plant', 'bed', 'dining table', 'toilet', 'tv', 'laptop', 'mouse', 'remote', 'keyboard', 'cell phone',
          'microwave', 'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock', 'vase', 'scissors', 'teddy bear',
          'hair drier', 'toothbrush' ]  # class names
+
+
+# Download script/URL (optional) ---------------------------------------------------------------------------------------
+download: |
+  from utils.general import download, Path
+
+  # Download labels
+  dir = Path(yaml['path'])  # dataset root dir
+  urls = ['https://github.com/ultralytics/yolov5/releases/download/v1.0/coco2017labels.zip']  # box labels
+  # urls = ['https://github.com/ultralytics/yolov5/releases/download/v1.0/coco2017labels-segments.zip'] # segment labels
+  download(urls, dir=dir.parent)
+
+  # Download data
+  urls = ['http://images.cocodataset.org/zips/train2017.zip',  # 19G, 118k images
+          'http://images.cocodataset.org/zips/val2017.zip',  # 1G, 5k images
+          'http://images.cocodataset.org/zips/test2017.zip']  # 7G, 41k images (optional)
+  download(urls, dir=dir / 'images', threads=3)
diff --git a/data/coco128.yaml b/data/coco128.yaml
index 8964f5f333f9..476148435bd4 100644
--- a/data/coco128.yaml
+++ b/data/coco128.yaml
@@ -6,9 +6,6 @@
 #     /yolov5
 
 
-# Download command/URL (optional)
-download: https://github.com/ultralytics/yolov5/releases/download/v1.0/coco128.zip
-
 # Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..]
 path: ../coco128  # dataset root dir
 train: images/train2017  # train images (relative to 'path') 128 images
@@ -27,3 +24,6 @@ names: [ 'person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', 'train', '
          'microwave', 'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock', 'vase', 'scissors', 'teddy bear',
          'hair drier', 'toothbrush' ]  # class names
 
+
+# Download script/URL (optional) ---------------------------------------------------------------------------------------
+download: https://github.com/ultralytics/yolov5/releases/download/v1.0/coco128.zip
\ No newline at end of file
diff --git a/data/objects365.yaml b/data/objects365.yaml
index 5eef8c02cac4..2ded71ca5476 100644
--- a/data/objects365.yaml
+++ b/data/objects365.yaml
@@ -57,7 +57,7 @@ names: [ 'Person', 'Sneakers', 'Chair', 'Other Shoes', 'Hat', 'Car', 'Lamp', 'Gl
          'Chainsaw', 'Eraser', 'Lobster', 'Durian', 'Okra', 'Lipstick', 'Cosmetics Mirror', 'Curling', 'Table Tennis' ]
 
 
-# Download command/URL (optional) --------------------------------------------------------------------------------------
+# Download script/URL (optional) ---------------------------------------------------------------------------------------
 download: |
   from pycocotools.coco import COCO
   from tqdm import tqdm
diff --git a/data/voc.yaml b/data/voc.yaml
index 5df90e6021a1..4ae178b845dd 100644
--- a/data/voc.yaml
+++ b/data/voc.yaml
@@ -6,9 +6,6 @@
 #     /yolov5
 
 
-# Download command/URL (optional)
-download: bash data/scripts/get_voc.sh
-
 # Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..]
 train: ../VOC/images/train/  # 16551 images
 val: ../VOC/images/val/  # 4952 images
@@ -18,3 +15,7 @@ test:  # test images (optional)
 nc: 20  # number of classes
 names: [ 'aeroplane', 'bicycle', 'bird', 'boat', 'bottle', 'bus', 'car', 'cat', 'chair', 'cow', 'diningtable', 'dog',
          'horse', 'motorbike', 'person', 'pottedplant', 'sheep', 'sofa', 'train', 'tvmonitor' ]  # class names
+
+
+# Download script/URL (optional) ---------------------------------------------------------------------------------------
+download: bash data/scripts/get_voc.sh
\ No newline at end of file

From c26062290b3dfbf37fd2382215bcef6314edd1fd Mon Sep 17 00:00:00 2001
From: Glenn Jocher <glenn.jocher@ultralytics.com>
Date: Thu, 24 Jun 2021 17:54:23 +0200
Subject: [PATCH 14/28] voc to VOC

---
 data/{voc.yaml => VOC.yaml} | 2 +-
 data/hyps/hyp.finetune.yaml | 2 +-
 data/scripts/get_voc.sh     | 2 +-
 tutorial.ipynb              | 2 +-
 4 files changed, 4 insertions(+), 4 deletions(-)
 rename data/{voc.yaml => VOC.yaml} (94%)

diff --git a/data/voc.yaml b/data/VOC.yaml
similarity index 94%
rename from data/voc.yaml
rename to data/VOC.yaml
index 4ae178b845dd..667561c6a47a 100644
--- a/data/voc.yaml
+++ b/data/VOC.yaml
@@ -1,5 +1,5 @@
 # PASCAL VOC dataset http://host.robots.ox.ac.uk/pascal/VOC/
-# Train command: python train.py --data voc.yaml
+# Train command: python train.py --data VOC.yaml
 # Default dataset location is next to YOLOv5:
 #   /parent
 #     /VOC
diff --git a/data/hyps/hyp.finetune.yaml b/data/hyps/hyp.finetune.yaml
index 1b84cff95c2c..a77597741356 100644
--- a/data/hyps/hyp.finetune.yaml
+++ b/data/hyps/hyp.finetune.yaml
@@ -1,5 +1,5 @@
 # Hyperparameters for VOC finetuning
-# python train.py --batch 64 --weights yolov5m.pt --data voc.yaml --img 512 --epochs 50
+# python train.py --batch 64 --weights yolov5m.pt --data VOC.yaml --img 512 --epochs 50
 # See tutorials for hyperparameter evolution https://github.com/ultralytics/yolov5#tutorials
 
 
diff --git a/data/scripts/get_voc.sh b/data/scripts/get_voc.sh
index 4c04aaa95a29..7aba8c1fc691 100644
--- a/data/scripts/get_voc.sh
+++ b/data/scripts/get_voc.sh
@@ -1,7 +1,7 @@
 #!/bin/bash
 # PASCAL VOC dataset http://host.robots.ox.ac.uk/pascal/VOC/
 # Download command: bash data/scripts/get_voc.sh
-# Train command: python train.py --data voc.yaml
+# Train command: python train.py --data VOC.yaml
 # Default dataset location is next to YOLOv5:
 #   /parent_folder
 #     /VOC
diff --git a/tutorial.ipynb b/tutorial.ipynb
index bcdbc014dfb4..d136803659fb 100644
--- a/tutorial.ipynb
+++ b/tutorial.ipynb
@@ -1255,7 +1255,7 @@
       "source": [
         "# VOC\n",
         "for b, m in zip([64, 48, 32, 16], ['yolov5s', 'yolov5m', 'yolov5l', 'yolov5x']):  # zip(batch_size, model)\n",
-        "  !python train.py --batch {b} --weights {m}.pt --data voc.yaml --epochs 50 --cache --img 512 --nosave --hyp hyp.finetune.yaml --project VOC --name {m}"
+        "  !python train.py --batch {b} --weights {m}.pt --data VOC.yaml --epochs 50 --cache --img 512 --nosave --hyp hyp.finetune.yaml --project VOC --name {m}"
       ],
       "execution_count": null,
       "outputs": []

From eeb64dc01faf7423a2ba383e6afb8f5a708c7e90 Mon Sep 17 00:00:00 2001
From: Glenn Jocher <glenn.jocher@ultralytics.com>
Date: Thu, 24 Jun 2021 20:05:04 +0200
Subject: [PATCH 15/28] update

---
 data/GlobalWheat2020.yaml |  2 +-
 data/VOC.yaml             | 16 ++++++++++++----
 data/coco.yaml            |  5 +++--
 3 files changed, 16 insertions(+), 7 deletions(-)

diff --git a/data/GlobalWheat2020.yaml b/data/GlobalWheat2020.yaml
index b25112f31901..b77534944ed7 100644
--- a/data/GlobalWheat2020.yaml
+++ b/data/GlobalWheat2020.yaml
@@ -37,7 +37,7 @@ download: |
   dir = Path(yaml['path'])  # dataset root dir
   urls = ['https://zenodo.org/record/4298502/files/global-wheat-codalab-official.zip',
           'https://github.com/ultralytics/yolov5/releases/download/v1.0/GlobalWheat2020_labels.zip']
-  download(urls, dir=dir.parent)
+  download(urls, dir=dir)
 
   # Make Directories
   for p in 'annotations', 'images', 'labels':
diff --git a/data/VOC.yaml b/data/VOC.yaml
index 667561c6a47a..7eeb7e24c135 100644
--- a/data/VOC.yaml
+++ b/data/VOC.yaml
@@ -2,13 +2,14 @@
 # Train command: python train.py --data VOC.yaml
 # Default dataset location is next to YOLOv5:
 #   /parent
-#     /VOC
+#     /datasets/VOC
 #     /yolov5
 
 
 # Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..]
-train: ../VOC/images/train/  # 16551 images
-val: ../VOC/images/val/  # 4952 images
+path: ../datasets/VOC
+train: images/train  # train images (relative to 'path')  16551 images
+val: images/val  # val images (relative to 'path')  4952 images
 test:  # test images (optional)
 
 # Classes
@@ -18,4 +19,11 @@ names: [ 'aeroplane', 'bicycle', 'bird', 'boat', 'bottle', 'bus', 'car', 'cat',
 
 
 # Download script/URL (optional) ---------------------------------------------------------------------------------------
-download: bash data/scripts/get_voc.sh
\ No newline at end of file
+download: |
+  # Download
+  dir = Path(yaml['path'])  # dataset root dir
+  url = 'https://github.com/ultralytics/yolov5/releases/download/v1.0/'
+  urls = [url + 'VOCtrainval_06-Nov-2007.zip',  # 446MB, 5012 images
+          url + 'VOCtest_06-Nov-2007.zip',  # 438MB, 4953 images
+          url + 'VOCtrainval_11-May-2012.zip']  # 1.95GB, 17126 images
+  download(urls, dir=dir / 'images', threads=3, delete=False)
diff --git a/data/coco.yaml b/data/coco.yaml
index fa217f8e4c28..ccac351dfcd7 100644
--- a/data/coco.yaml
+++ b/data/coco.yaml
@@ -30,9 +30,10 @@ download: |
   from utils.general import download, Path
 
   # Download labels
+  segments = False  # segment or box labels
   dir = Path(yaml['path'])  # dataset root dir
-  urls = ['https://github.com/ultralytics/yolov5/releases/download/v1.0/coco2017labels.zip']  # box labels
-  # urls = ['https://github.com/ultralytics/yolov5/releases/download/v1.0/coco2017labels-segments.zip'] # segment labels
+  url = 'https://github.com/ultralytics/yolov5/releases/download/v1.0/'
+  urls = [url + 'coco2017labels-segments.zip' if segments else ''coco2017labels.zip']  # labels
   download(urls, dir=dir.parent)
 
   # Download data

From 8e676d32dfefd2954e61049f6a70b54025f2004f Mon Sep 17 00:00:00 2001
From: Glenn Jocher <glenn.jocher@ultralytics.com>
Date: Thu, 24 Jun 2021 20:38:40 +0200
Subject: [PATCH 16/28] update VOC.yaml

---
 data/VOC.yaml | 12 +++++++++---
 1 file changed, 9 insertions(+), 3 deletions(-)

diff --git a/data/VOC.yaml b/data/VOC.yaml
index 7eeb7e24c135..a554271fcfe5 100644
--- a/data/VOC.yaml
+++ b/data/VOC.yaml
@@ -8,9 +8,15 @@
 
 # Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..]
 path: ../datasets/VOC
-train: images/train  # train images (relative to 'path')  16551 images
-val: images/val  # val images (relative to 'path')  4952 images
-test:  # test images (optional)
+train: # train images (relative to 'path')  16551 images
+  - images/train2012
+  - images/train2007
+  - images/val2012
+  - images/val2007
+val: # val images (relative to 'path')  4952 images
+  - images/test2007
+test: # test images (optional)
+  - images/test2007
 
 # Classes
 nc: 20  # number of classes

From 083e0af265d3b12c6c0e94e350bbfcb51c59ad18 Mon Sep 17 00:00:00 2001
From: Glenn Jocher <glenn.jocher@ultralytics.com>
Date: Thu, 24 Jun 2021 21:25:10 +0200
Subject: [PATCH 17/28] update VOC.yaml

---
 data/VOC.yaml    | 44 ++++++++++++++++++++++++++++++++++++++++++++
 utils/general.py |  2 +-
 2 files changed, 45 insertions(+), 1 deletion(-)

diff --git a/data/VOC.yaml b/data/VOC.yaml
index a554271fcfe5..ffb3a7c7c2e9 100644
--- a/data/VOC.yaml
+++ b/data/VOC.yaml
@@ -26,6 +26,35 @@ names: [ 'aeroplane', 'bicycle', 'bird', 'boat', 'bottle', 'bus', 'car', 'cat',
 
 # Download script/URL (optional) ---------------------------------------------------------------------------------------
 download: |
+  import xml.etree.ElementTree as ET
+
+  from tqdm import tqdm
+  from utils.general import download, Path
+
+
+  def convert_label(path, lb_path, year, image_id):
+      def convert_box(size, box):
+          dw, dh = 1. / size[0], 1. / size[1]
+          x, y, w, h = (box[0] + box[1]) / 2.0 - 1, (box[2] + box[3]) / 2.0 - 1, box[1] - box[0], box[3] - box[2]
+          return x * dw, y * dh, w * dw, h * dh
+
+      in_file = open(path / f'VOC{year}/Annotations/{image_id}.xml')
+      out_file = open(lb_path, 'w')
+      tree = ET.parse(in_file)
+      root = tree.getroot()
+      size = root.find('size')
+      w = int(size.find('width').text)
+      h = int(size.find('height').text)
+
+      for obj in root.iter('object'):
+          cls = obj.find('name').text
+          if cls in yaml['names'] and not int(obj.find('difficult').text) == 1:
+              xmlbox = obj.find('bndbox')
+              bb = convert_box((w, h), [float(xmlbox.find(x).text) for x in ('xmin', 'xmax', 'ymin', 'ymax')])
+              cls_id = yaml['names'].index(cls)  # class id
+              out_file.write(" ".join([str(a) for a in (cls_id, *bb)]) + '\n')
+
+
   # Download
   dir = Path(yaml['path'])  # dataset root dir
   url = 'https://github.com/ultralytics/yolov5/releases/download/v1.0/'
@@ -33,3 +62,18 @@ download: |
           url + 'VOCtest_06-Nov-2007.zip',  # 438MB, 4953 images
           url + 'VOCtrainval_11-May-2012.zip']  # 1.95GB, 17126 images
   download(urls, dir=dir / 'images', threads=3, delete=False)
+
+  # Convert
+  path = dir / f'images/VOCdevkit'
+  for year, image_set in ('2012', 'train'), ('2012', 'val'), ('2007', 'train'), ('2007', 'val'), ('2007', 'test'):
+      imgs_path = dir / 'images' / f'{image_set}{year}'
+      lbs_path = dir / 'labels' / f'{image_set}{year}'
+      imgs_path.mkdir(exist_ok=True, parents=True)
+      lbs_path.mkdir(exist_ok=True, parents=True)
+
+      image_ids = open(path / f'VOC{year}/ImageSets/Main/{image_set}.txt').read().strip().split()
+      for id in tqdm(image_ids, desc=f'{image_set}{year}'):
+          f = path / f'VOC{year}/JPEGImages/{id}.jpg'  # old img path
+          lb_path = (lbs_path / f.name).with_suffix('.txt')  # new label path
+          f.rename(imgs_path / f.name)  # move image
+          convert_label(path, lb_path, year, id)  # convert labels to YOLO format
diff --git a/utils/general.py b/utils/general.py
index a486055f2378..3139898fa872 100755
--- a/utils/general.py
+++ b/utils/general.py
@@ -264,7 +264,7 @@ def download_one(url, dir):
         if unzip and f.suffix in ('.zip', '.gz'):
             print(f'Unzipping {f}...')
             if f.suffix == '.zip':
-                s = f'unzip -qo {f} -d {dir} && rm {f}'  # unzip -quiet -overwrite
+                s = f'unzip -qo {f} -d {dir}'  # unzip -quiet -overwrite
             elif f.suffix == '.gz':
                 s = f'tar xfz {f} --directory {f.parent}'  # unzip
             if delete:  # delete zip file after unzip

From 2467ab4ce4a5b29d53bdfcf1979c021683414c6d Mon Sep 17 00:00:00 2001
From: Glenn Jocher <glenn.jocher@ultralytics.com>
Date: Thu, 24 Jun 2021 21:26:00 +0200
Subject: [PATCH 18/28] remove dashes

---
 data/coco.yaml    | 2 +-
 data/coco128.yaml | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/data/coco.yaml b/data/coco.yaml
index ccac351dfcd7..f896948fe0f1 100644
--- a/data/coco.yaml
+++ b/data/coco.yaml
@@ -25,7 +25,7 @@ names: [ 'person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', 'train', '
          'hair drier', 'toothbrush' ]  # class names
 
 
-# Download script/URL (optional) ---------------------------------------------------------------------------------------
+# Download script/URL (optional)
 download: |
   from utils.general import download, Path
 
diff --git a/data/coco128.yaml b/data/coco128.yaml
index 476148435bd4..8d834e418dc5 100644
--- a/data/coco128.yaml
+++ b/data/coco128.yaml
@@ -25,5 +25,5 @@ names: [ 'person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', 'train', '
          'hair drier', 'toothbrush' ]  # class names
 
 
-# Download script/URL (optional) ---------------------------------------------------------------------------------------
+# Download script/URL (optional)
 download: https://github.com/ultralytics/yolov5/releases/download/v1.0/coco128.zip
\ No newline at end of file

From 7faca1df942776682c7ff48bd18b87a264ffa48a Mon Sep 17 00:00:00 2001
From: Glenn Jocher <glenn.jocher@ultralytics.com>
Date: Thu, 24 Jun 2021 21:31:09 +0200
Subject: [PATCH 19/28] delete get_voc.sh

---
 data/scripts/get_voc.sh | 116 ----------------------------------------
 1 file changed, 116 deletions(-)
 delete mode 100644 data/scripts/get_voc.sh

diff --git a/data/scripts/get_voc.sh b/data/scripts/get_voc.sh
deleted file mode 100644
index 7aba8c1fc691..000000000000
--- a/data/scripts/get_voc.sh
+++ /dev/null
@@ -1,116 +0,0 @@
-#!/bin/bash
-# PASCAL VOC dataset http://host.robots.ox.ac.uk/pascal/VOC/
-# Download command: bash data/scripts/get_voc.sh
-# Train command: python train.py --data VOC.yaml
-# Default dataset location is next to YOLOv5:
-#   /parent_folder
-#     /VOC
-#     /yolov5
-
-start=$(date +%s)
-mkdir -p ../tmp
-cd ../tmp/
-
-# Download/unzip images and labels
-d='.' # unzip directory
-url=https://github.com/ultralytics/yolov5/releases/download/v1.0/
-f1=VOCtrainval_06-Nov-2007.zip # 446MB, 5012 images
-f2=VOCtest_06-Nov-2007.zip     # 438MB, 4953 images
-f3=VOCtrainval_11-May-2012.zip # 1.95GB, 17126 images
-for f in $f3 $f2 $f1; do
-  echo 'Downloading' $url$f '...' 
-  curl -L $url$f -o $f && unzip -q $f -d $d && rm $f & # download, unzip, remove in background
-done
-wait # finish background tasks
-
-end=$(date +%s)
-runtime=$((end - start))
-echo "Completed in" $runtime "seconds"
-
-echo "Splitting dataset..."
-python3 - "$@" <<END
-import os
-import xml.etree.ElementTree as ET
-from os import getcwd
-
-sets = [('2012', 'train'), ('2012', 'val'), ('2007', 'train'), ('2007', 'val'), ('2007', 'test')]
-
-classes = ["aeroplane", "bicycle", "bird", "boat", "bottle", "bus", "car", "cat", "chair", "cow", "diningtable", "dog",
-           "horse", "motorbike", "person", "pottedplant", "sheep", "sofa", "train", "tvmonitor"]
-
-
-def convert_box(size, box):
-    dw = 1. / (size[0])
-    dh = 1. / (size[1])
-    x, y, w, h = (box[0] + box[1]) / 2.0 - 1, (box[2] + box[3]) / 2.0 - 1, box[1] - box[0], box[3] - box[2]
-    return x * dw, y * dh, w * dw, h * dh
-
-
-def convert_annotation(year, image_id):
-    in_file = open('VOCdevkit/VOC%s/Annotations/%s.xml' % (year, image_id))
-    out_file = open('VOCdevkit/VOC%s/labels/%s.txt' % (year, image_id), 'w')
-    tree = ET.parse(in_file)
-    root = tree.getroot()
-    size = root.find('size')
-    w = int(size.find('width').text)
-    h = int(size.find('height').text)
-
-    for obj in root.iter('object'):
-        difficult = obj.find('difficult').text
-        cls = obj.find('name').text
-        if cls not in classes or int(difficult) == 1:
-            continue
-        cls_id = classes.index(cls)
-        xmlbox = obj.find('bndbox')
-        b = (float(xmlbox.find('xmin').text), float(xmlbox.find('xmax').text), float(xmlbox.find('ymin').text),
-             float(xmlbox.find('ymax').text))
-        bb = convert_box((w, h), b)
-        out_file.write(str(cls_id) + " " + " ".join([str(a) for a in bb]) + '\n')
-
-
-cwd = getcwd()
-for year, image_set in sets:
-    if not os.path.exists('VOCdevkit/VOC%s/labels/' % year):
-        os.makedirs('VOCdevkit/VOC%s/labels/' % year)
-    image_ids = open('VOCdevkit/VOC%s/ImageSets/Main/%s.txt' % (year, image_set)).read().strip().split()
-    list_file = open('%s_%s.txt' % (year, image_set), 'w')
-    for image_id in image_ids:
-        list_file.write('%s/VOCdevkit/VOC%s/JPEGImages/%s.jpg\n' % (cwd, year, image_id))
-        convert_annotation(year, image_id)
-    list_file.close()
-END
-
-cat 2007_train.txt 2007_val.txt 2012_train.txt 2012_val.txt >train.txt
-cat 2007_train.txt 2007_val.txt 2007_test.txt 2012_train.txt 2012_val.txt >train.all.txt
-
-mkdir ../VOC ../VOC/images ../VOC/images/train ../VOC/images/val
-mkdir ../VOC/labels ../VOC/labels/train ../VOC/labels/val
-
-python3 - "$@" <<END
-import os
-
-print(os.path.exists('../tmp/train.txt'))
-with open('../tmp/train.txt', 'r') as f:
-    for line in f.readlines():
-        line = "/".join(line.split('/')[-5:]).strip()
-        if os.path.exists("../" + line):
-            os.system("cp ../" + line + " ../VOC/images/train")
-
-        line = line.replace('JPEGImages', 'labels').replace('jpg', 'txt')
-        if os.path.exists("../" + line):
-            os.system("cp ../" + line + " ../VOC/labels/train")
-
-print(os.path.exists('../tmp/2007_test.txt'))
-with open('../tmp/2007_test.txt', 'r') as f:
-    for line in f.readlines():
-        line = "/".join(line.split('/')[-5:]).strip()
-        if os.path.exists("../" + line):
-            os.system("cp ../" + line + " ../VOC/images/val")
-
-        line = line.replace('JPEGImages', 'labels').replace('jpg', 'txt')
-        if os.path.exists("../" + line):
-            os.system("cp ../" + line + " ../VOC/labels/val")
-END
-
-rm -rf ../tmp # remove temporary directory
-echo "VOC download done."

From 57adf71abfead87451a0e59bf15616ecb32cf7e1 Mon Sep 17 00:00:00 2001
From: Glenn Jocher <glenn.jocher@ultralytics.com>
Date: Thu, 24 Jun 2021 21:33:37 +0200
Subject: [PATCH 20/28] force coco and coco128 to ../datasets

---
 data/coco.yaml    | 4 ++--
 data/coco128.yaml | 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/data/coco.yaml b/data/coco.yaml
index f896948fe0f1..fa24620d461c 100644
--- a/data/coco.yaml
+++ b/data/coco.yaml
@@ -2,12 +2,12 @@
 # Train command: python train.py --data coco.yaml
 # Default dataset location is next to YOLOv5:
 #   /parent
-#     /coco
+#     /datasets/coco
 #     /yolov5
 
 
 # Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..]
-path: ../coco  # dataset root dir
+path: ../datasets/coco  # dataset root dir
 train: train2017.txt  # train images (relative to 'path') 118287 images
 val: val2017.txt  # train images (relative to 'path') 5000 images
 test: test-dev2017.txt  # 20288 of 40670 images, submit to https://competitions.codalab.org/competitions/20794
diff --git a/data/coco128.yaml b/data/coco128.yaml
index 8d834e418dc5..e70ad687dd88 100644
--- a/data/coco128.yaml
+++ b/data/coco128.yaml
@@ -2,12 +2,12 @@
 # Train command: python train.py --data coco128.yaml
 # Default dataset location is next to YOLOv5:
 #   /parent
-#     /coco128
+#     /datasets/coco128
 #     /yolov5
 
 
 # Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..]
-path: ../coco128  # dataset root dir
+path: ../datasets/coco128  # dataset root dir
 train: images/train2017  # train images (relative to 'path') 128 images
 val: images/train2017  # val images (relative to 'path') 128 images
 test:  # test images (optional)

From 9f2d7640e27cb27920f2817a0c9a64b640cc0503 Mon Sep 17 00:00:00 2001
From: Glenn Jocher <glenn.jocher@ultralytics.com>
Date: Thu, 24 Jun 2021 21:36:01 +0200
Subject: [PATCH 21/28] Capitalize Argoverse_HD.yaml

---
 data/{argoverse_hd.yaml => Argoverse_HD.yaml} | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)
 rename data/{argoverse_hd.yaml => Argoverse_HD.yaml} (86%)

diff --git a/data/argoverse_hd.yaml b/data/Argoverse_HD.yaml
similarity index 86%
rename from data/argoverse_hd.yaml
rename to data/Argoverse_HD.yaml
index 6eaea9e48b84..335dc48ee3ae 100644
--- a/data/argoverse_hd.yaml
+++ b/data/Argoverse_HD.yaml
@@ -1,13 +1,13 @@
 # Argoverse-HD dataset (ring-front-center camera) http://www.cs.cmu.edu/~mengtial/proj/streaming/
-# Train command: python train.py --data argoverse_hd.yaml
+# Train command: python train.py --data Argoverse_HD.yaml
 # Default dataset location is next to YOLOv5:
 #   /parent
-#     /datasets/argoverse
+#     /datasets/Argoverse
 #     /yolov5
 
 
 # Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..]
-path: ../datasets/argoverse  # dataset root dir
+path: ../datasets/Argoverse  # dataset root dir
 train: Argoverse-1.1/images/train/  # train images (relative to 'path') 39384 images
 val: Argoverse-1.1/images/val/  # val images (relative to 'path') 15062 images
 test: Argoverse-1.1/images/test/  # test images (optional) https://eval.ai/web/challenges/challenge-page/800/overview

From 313f04fc4ae2380dab558d8e14fe383a54c0099f Mon Sep 17 00:00:00 2001
From: Glenn Jocher <glenn.jocher@ultralytics.com>
Date: Thu, 24 Jun 2021 21:38:53 +0200
Subject: [PATCH 22/28] Capitalize Objects365.yaml

---
 data/{objects365.yaml => Objects365.yaml} | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)
 rename data/{objects365.yaml => Objects365.yaml} (98%)

diff --git a/data/objects365.yaml b/data/Objects365.yaml
similarity index 98%
rename from data/objects365.yaml
rename to data/Objects365.yaml
index 2ded71ca5476..e365c82cab08 100644
--- a/data/objects365.yaml
+++ b/data/Objects365.yaml
@@ -1,13 +1,13 @@
 # Objects365 dataset https://www.objects365.org/
-# Train command: python train.py --data objects365.yaml
+# Train command: python train.py --data Objects365.yaml
 # Default dataset location is next to YOLOv5:
 #   /parent
-#     /datasets/objects365
+#     /datasets/Objects365
 #     /yolov5
 
 
 # Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..]
-path: ../datasets/objects365  # dataset root dir
+path: ../datasets/Objects365  # dataset root dir
 train: images/train  # train images (relative to 'path') 1742289 images
 val: images/val # val images (relative to 'path') 5570 images
 test:  # test images (optional)

From d86521923a448fb98f3bd7aefc3bafa1e723b5f5 Mon Sep 17 00:00:00 2001
From: Glenn Jocher <glenn.jocher@ultralytics.com>
Date: Thu, 24 Jun 2021 22:57:49 +0200
Subject: [PATCH 23/28] update Argoverse_HD.yaml

---
 data/Argoverse_HD.yaml           | 45 ++++++++++++++++++++++-
 data/scripts/get_argoverse_hd.sh | 61 --------------------------------
 2 files changed, 44 insertions(+), 62 deletions(-)
 delete mode 100644 data/scripts/get_argoverse_hd.sh

diff --git a/data/Argoverse_HD.yaml b/data/Argoverse_HD.yaml
index 335dc48ee3ae..31a26d9840ab 100644
--- a/data/Argoverse_HD.yaml
+++ b/data/Argoverse_HD.yaml
@@ -19,4 +19,47 @@ names: [ 'person',  'bicycle',  'car',  'motorcycle',  'bus',  'truck',  'traffi
 
 # Download script/URL (optional) ---------------------------------------------------------------------------------------
 download: |
-  pass
+  import json
+  from utils.general import download, os, Path
+
+
+  def argoverse2yolo(set):
+      print("Converting {set} to YOLOv5 format...")
+      a = json.load(open(set, "rb"))
+
+      label_dict = {}
+      for annot in a['annotations']:
+          img_id = annot['image_id']
+          img_name = a['images'][img_id]['name']
+          img_label_name = img_name[:-3] + "txt"
+
+          cls = annot['category_id']  # instance class id
+          x_center, y_center, width, height = annot['bbox']
+          x_center = (x_center + width / 2) / 1920.  # offset and scale
+          y_center = (y_center + height / 2) / 1200.  # offset and scale
+          width /= 1920.  # scale
+          height /= 1200.  # scale
+
+          img_dir = "./labels/" + a['seq_dirs'][a['images'][annot['image_id']]['sid']]
+
+          Path(img_dir).mkdir(parents=True, exist_ok=True)
+          if img_dir + "/" + img_label_name not in label_dict:
+              label_dict[img_dir + "/" + img_label_name] = []
+
+          label_dict[img_dir + "/" + img_label_name].append(f"{cls} {x_center} {y_center} {width} {height}\n")
+
+      for filename in label_dict:
+          with open(filename, "w") as file:
+              for string in label_dict[filename]:
+                  file.write(string)
+
+
+  # Download
+  dir = Path('../datasets/Argoverse')  # dataset root dir
+  urls = ['https://argoverse-hd.s3.us-east-2.amazonaws.com/Argoverse-HD-Full.zip']
+  download(urls, dir=dir)
+
+  # Convert
+  annotations_dir = 'Argoverse-1.1/Argoverse-HD/annotations/'
+  for d in "train.json", "val.json":
+      argoverse2yolo(dir / annotations_dir / d)  # convert VisDrone annotations to YOLO labels
diff --git a/data/scripts/get_argoverse_hd.sh b/data/scripts/get_argoverse_hd.sh
deleted file mode 100644
index 331509914568..000000000000
--- a/data/scripts/get_argoverse_hd.sh
+++ /dev/null
@@ -1,61 +0,0 @@
-#!/bin/bash
-# Argoverse-HD dataset (ring-front-center camera) http://www.cs.cmu.edu/~mengtial/proj/streaming/
-# Download command: bash data/scripts/get_argoverse_hd.sh
-# Train command: python train.py --data argoverse_hd.yaml
-# Default dataset location is next to YOLOv5:
-#   /parent_folder
-#     /argoverse
-#     /yolov5
-
-# Download/unzip images
-d='../argoverse/' # unzip directory
-mkdir $d
-url=https://argoverse-hd.s3.us-east-2.amazonaws.com/
-f=Argoverse-HD-Full.zip
-curl -L $url$f -o $f && unzip -q $f -d $d && rm $f &# download, unzip, remove in background
-wait                                              # finish background tasks
-
-cd ../argoverse/Argoverse-1.1/
-ln -s tracking images
-
-cd ../Argoverse-HD/annotations/
-
-python3 - "$@" <<END
-import json
-from pathlib import Path
-
-annotation_files = ["train.json", "val.json"]
-print("Converting annotations to YOLOv5 format...")
-
-for val in annotation_files:
-    a = json.load(open(val, "rb"))
-
-    label_dict = {}
-    for annot in a['annotations']:
-        img_id = annot['image_id']
-        img_name = a['images'][img_id]['name']
-        img_label_name = img_name[:-3] + "txt"
-
-        cls = annot['category_id']  # instance class id
-        x_center, y_center, width, height = annot['bbox']
-        x_center = (x_center + width / 2) / 1920.  # offset and scale
-        y_center = (y_center + height / 2) / 1200.  # offset and scale
-        width /= 1920.  # scale
-        height /= 1200.  # scale
-
-        img_dir = "./labels/" + a['seq_dirs'][a['images'][annot['image_id']]['sid']]
-
-        Path(img_dir).mkdir(parents=True, exist_ok=True)
-        if img_dir + "/" + img_label_name not in label_dict:
-            label_dict[img_dir + "/" + img_label_name] = []
-
-        label_dict[img_dir + "/" + img_label_name].append(f"{cls} {x_center} {y_center} {width} {height}\n")
-
-    for filename in label_dict:
-        with open(filename, "w") as file:
-            for string in label_dict[filename]:
-                file.write(string)
-
-END
-
-mv ./labels ../../Argoverse-1.1/

From 5a9b5b1b1bee60492bd995cf1f78588ab25bbeaa Mon Sep 17 00:00:00 2001
From: Glenn Jocher <glenn.jocher@ultralytics.com>
Date: Thu, 24 Jun 2021 23:12:05 +0200
Subject: [PATCH 24/28] coco segments fix

---
 data/coco.yaml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/data/coco.yaml b/data/coco.yaml
index fa24620d461c..c6053c984bc0 100644
--- a/data/coco.yaml
+++ b/data/coco.yaml
@@ -33,7 +33,7 @@ download: |
   segments = False  # segment or box labels
   dir = Path(yaml['path'])  # dataset root dir
   url = 'https://github.com/ultralytics/yolov5/releases/download/v1.0/'
-  urls = [url + 'coco2017labels-segments.zip' if segments else ''coco2017labels.zip']  # labels
+  urls = [url + ('coco2017labels-segments.zip' if segments else 'coco2017labels.zip')]  # labels
   download(urls, dir=dir.parent)
 
   # Download data

From 9bfc09a7a3ad008938783237d55648b770dcf3af Mon Sep 17 00:00:00 2001
From: Glenn Jocher <glenn.jocher@ultralytics.com>
Date: Fri, 25 Jun 2021 00:01:21 +0200
Subject: [PATCH 25/28] VOC single-thread

---
 data/VOC.yaml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/data/VOC.yaml b/data/VOC.yaml
index ffb3a7c7c2e9..3d878fa67a60 100644
--- a/data/VOC.yaml
+++ b/data/VOC.yaml
@@ -61,7 +61,7 @@ download: |
   urls = [url + 'VOCtrainval_06-Nov-2007.zip',  # 446MB, 5012 images
           url + 'VOCtest_06-Nov-2007.zip',  # 438MB, 4953 images
           url + 'VOCtrainval_11-May-2012.zip']  # 1.95GB, 17126 images
-  download(urls, dir=dir / 'images', threads=3, delete=False)
+  download(urls, dir=dir / 'images', delete=False)
 
   # Convert
   path = dir / f'images/VOCdevkit'

From f75ff30a10210fd6cb02fecfdb2deae0b849276a Mon Sep 17 00:00:00 2001
From: Glenn Jocher <glenn.jocher@ultralytics.com>
Date: Fri, 25 Jun 2021 00:06:42 +0200
Subject: [PATCH 26/28] update Argoverse_HD.yaml

---
 data/Argoverse_HD.yaml | 43 +++++++++++++++++++++---------------------
 1 file changed, 22 insertions(+), 21 deletions(-)

diff --git a/data/Argoverse_HD.yaml b/data/Argoverse_HD.yaml
index 31a26d9840ab..ad1a52254d74 100644
--- a/data/Argoverse_HD.yaml
+++ b/data/Argoverse_HD.yaml
@@ -20,46 +20,47 @@ names: [ 'person',  'bicycle',  'car',  'motorcycle',  'bus',  'truck',  'traffi
 # Download script/URL (optional) ---------------------------------------------------------------------------------------
 download: |
   import json
-  from utils.general import download, os, Path
+
+  from tqdm import tqdm
+  from utils.general import download, Path
 
 
   def argoverse2yolo(set):
-      print("Converting {set} to YOLOv5 format...")
+      labels = {}
       a = json.load(open(set, "rb"))
-
-      label_dict = {}
-      for annot in a['annotations']:
+      for annot in tqdm(a['annotations'], desc=f"Converting {set} to YOLOv5 format..."):
           img_id = annot['image_id']
           img_name = a['images'][img_id]['name']
           img_label_name = img_name[:-3] + "txt"
 
           cls = annot['category_id']  # instance class id
           x_center, y_center, width, height = annot['bbox']
-          x_center = (x_center + width / 2) / 1920.  # offset and scale
-          y_center = (y_center + height / 2) / 1200.  # offset and scale
-          width /= 1920.  # scale
-          height /= 1200.  # scale
-
-          img_dir = "./labels/" + a['seq_dirs'][a['images'][annot['image_id']]['sid']]
+          x_center = (x_center + width / 2) / 1920.0  # offset and scale
+          y_center = (y_center + height / 2) / 1200.0  # offset and scale
+          width /= 1920.0  # scale
+          height /= 1200.0  # scale
 
-          Path(img_dir).mkdir(parents=True, exist_ok=True)
-          if img_dir + "/" + img_label_name not in label_dict:
-              label_dict[img_dir + "/" + img_label_name] = []
+          img_dir = set.parents[2] / 'Argoverse-1.1' / 'labels' / a['seq_dirs'][a['images'][annot['image_id']]['sid']]
+          if not img_dir.exists():
+              img_dir.mkdir(parents=True, exist_ok=True)
 
-          label_dict[img_dir + "/" + img_label_name].append(f"{cls} {x_center} {y_center} {width} {height}\n")
+          k = str(img_dir / img_label_name)
+          if k not in labels:
+              labels[k] = []
+          labels[k].append(f"{cls} {x_center} {y_center} {width} {height}\n")
 
-      for filename in label_dict:
-          with open(filename, "w") as file:
-              for string in label_dict[filename]:
-                  file.write(string)
+      for k in labels:
+          with open(k, "w") as f:
+              f.writelines(labels[k])
 
 
   # Download
   dir = Path('../datasets/Argoverse')  # dataset root dir
   urls = ['https://argoverse-hd.s3.us-east-2.amazonaws.com/Argoverse-HD-Full.zip']
-  download(urls, dir=dir)
+  download(urls, dir=dir, delete=False)
 
   # Convert
-  annotations_dir = 'Argoverse-1.1/Argoverse-HD/annotations/'
+  annotations_dir = 'Argoverse-HD/annotations/'
+  (dir / 'Argoverse-1.1' / 'tracking').rename(dir / 'Argoverse-1.1' / 'images')  # rename 'tracking' to 'images'
   for d in "train.json", "val.json":
       argoverse2yolo(dir / annotations_dir / d)  # convert VisDrone annotations to YOLO labels

From 173cca27ac064693d87ec0163b801cefe4b0eacf Mon Sep 17 00:00:00 2001
From: Glenn Jocher <glenn.jocher@ultralytics.com>
Date: Fri, 25 Jun 2021 01:02:43 +0200
Subject: [PATCH 27/28] update data_dict in test handling

---
 test.py  | 9 +++++----
 train.py | 2 +-
 2 files changed, 6 insertions(+), 5 deletions(-)

diff --git a/test.py b/test.py
index 0e0f01efa531..31d57221a3d5 100644
--- a/test.py
+++ b/test.py
@@ -76,6 +76,11 @@ def run(data,
         # if device.type != 'cpu' and torch.cuda.device_count() > 1:
         #     model = nn.DataParallel(model)
 
+        # Data
+        with open(data) as f:
+            data = yaml.safe_load(f)
+        check_dataset(data)  # check
+
     # Half
     half &= device.type != 'cpu'  # half precision only supported on CUDA
     if half:
@@ -83,10 +88,6 @@ def run(data,
 
     # Configure
     model.eval()
-    if isinstance(data, str):
-        with open(data) as f:
-            data = yaml.safe_load(f)
-    check_dataset(data)  # check
     is_coco = type(data['val']) is str and data['val'].endswith('coco/val2017.txt')  # COCO dataset
     nc = 1 if single_cls else int(data['nc'])  # number of classes
     iouv = torch.linspace(0.5, 0.95, 10).to(device)  # iou vector for mAP@0.5:0.95
diff --git a/train.py b/train.py
index ba84b432f660..6b04e8ff3a6a 100644
--- a/train.py
+++ b/train.py
@@ -453,7 +453,7 @@ def train(hyp,  # path/to/hyp.yaml or hyp dictionary
         if not evolve:
             if is_coco:  # COCO dataset
                 for m in [last, best] if best.exists() else [last]:  # speed, mAP tests
-                    results, _, _ = test.run(data,
+                    results, _, _ = test.run(data_dict,
                                              batch_size=batch_size // WORLD_SIZE * 2,
                                              imgsz=imgsz_test,
                                              conf_thres=0.001,

From 7bcac6bf9a89d5707246964cea5ac4cf117e58da Mon Sep 17 00:00:00 2001
From: Glenn Jocher <glenn.jocher@ultralytics.com>
Date: Fri, 25 Jun 2021 01:09:45 +0200
Subject: [PATCH 28/28] create root

---
 utils/general.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/utils/general.py b/utils/general.py
index 3139898fa872..555975f07c5d 100755
--- a/utils/general.py
+++ b/utils/general.py
@@ -230,7 +230,6 @@ def check_dataset(data, autodownload=True):
 
     train, val, test, s = [data.get(x) for x in ('train', 'val', 'test', 'download')]
     if val:
-        root = path.parent if 'path' in data else '..'  # unzip directory i.e. '../'
         val = [Path(x).resolve() for x in (val if isinstance(val, list) else [val])]  # val path
         if not all(x.exists() for x in val):
             print('\nWARNING: Dataset not found, nonexistent paths: %s' % [str(x) for x in val if not x.exists()])
@@ -239,6 +238,8 @@ def check_dataset(data, autodownload=True):
                     f = Path(s).name  # filename
                     print(f'Downloading {s} ...')
                     torch.hub.download_url_to_file(s, f)
+                    root = path.parent if 'path' in data else '..'  # unzip directory i.e. '../'
+                    Path(root).mkdir(parents=True, exist_ok=True)  # create root
                     r = os.system(f'unzip -q {f} -d {root} && rm {f}')  # unzip
                 elif s.startswith('bash '):  # bash script
                     print(f'Running {s} ...')