Add optional dataset.yaml path attribute (ultralytics#3753)

* Add optional dataset.yaml `path` attribute @kalenmike * pass locals to python scripts * handle lists * update coco128.yaml * Capitalize first letter * add test key * finalize GlobalWheat2020.yaml * finalize objects365.yaml * finalize SKU-110K.yaml * finalize SKU-110K.yaml * finalize VisDrone.yaml * NoneType fix * update download comment * voc to VOC * update * update VOC.yaml * update VOC.yaml * remove dashes * delete get_voc.sh * force coco and coco128 to ../datasets * Capitalize Argoverse_HD.yaml * Capitalize Objects365.yaml * update Argoverse_HD.yaml * coco segments fix * VOC single-thread * update Argoverse_HD.yaml * update data_dict in test handling * create root
fhkiel-mlaip · Jun 24, 2021 · a5f7e25 · a5f7e25
1 parent f7f7f4e
commit a5f7e25
Show file tree

Hide file tree

Showing 17 changed files with 268 additions and 329 deletions.
diff --git a/data/Argoverse_HD.yaml b/data/Argoverse_HD.yaml
@@ -0,0 +1,66 @@
+# Argoverse-HD dataset (ring-front-center camera) http://www.cs.cmu.edu/~mengtial/proj/streaming/
+# Train command: python train.py --data Argoverse_HD.yaml
+# Default dataset location is next to YOLOv5:
+#   /parent
+#     /datasets/Argoverse
+#     /yolov5
+
+
+# Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..]
+path: ../datasets/Argoverse  # dataset root dir
+train: Argoverse-1.1/images/train/  # train images (relative to 'path') 39384 images
+val: Argoverse-1.1/images/val/  # val images (relative to 'path') 15062 images
+test: Argoverse-1.1/images/test/  # test images (optional) https://eval.ai/web/challenges/challenge-page/800/overview
+
+# Classes
+nc: 8  # number of classes
+names: [ 'person',  'bicycle',  'car',  'motorcycle',  'bus',  'truck',  'traffic_light',  'stop_sign' ]  # class names
+
+
+# Download script/URL (optional) ---------------------------------------------------------------------------------------
+download: |
+  import json
+
+  from tqdm import tqdm
+  from utils.general import download, Path
+
+
+  def argoverse2yolo(set):
+      labels = {}
+      a = json.load(open(set, "rb"))
+      for annot in tqdm(a['annotations'], desc=f"Converting {set} to YOLOv5 format..."):
+          img_id = annot['image_id']
+          img_name = a['images'][img_id]['name']
+          img_label_name = img_name[:-3] + "txt"
+
+          cls = annot['category_id']  # instance class id
+          x_center, y_center, width, height = annot['bbox']
+          x_center = (x_center + width / 2) / 1920.0  # offset and scale
+          y_center = (y_center + height / 2) / 1200.0  # offset and scale
+          width /= 1920.0  # scale
+          height /= 1200.0  # scale
+
+          img_dir = set.parents[2] / 'Argoverse-1.1' / 'labels' / a['seq_dirs'][a['images'][annot['image_id']]['sid']]
+          if not img_dir.exists():
+              img_dir.mkdir(parents=True, exist_ok=True)
+
+          k = str(img_dir / img_label_name)
+          if k not in labels:
+              labels[k] = []
+          labels[k].append(f"{cls} {x_center} {y_center} {width} {height}\n")
+
+      for k in labels:
+          with open(k, "w") as f:
+              f.writelines(labels[k])
+
+
+  # Download
+  dir = Path('../datasets/Argoverse')  # dataset root dir
+  urls = ['https://argoverse-hd.s3.us-east-2.amazonaws.com/Argoverse-HD-Full.zip']
+  download(urls, dir=dir, delete=False)
+
+  # Convert
+  annotations_dir = 'Argoverse-HD/annotations/'
+  (dir / 'Argoverse-1.1' / 'tracking').rename(dir / 'Argoverse-1.1' / 'images')  # rename 'tracking' to 'images'
+  for d in "train.json", "val.json":
+      argoverse2yolo(dir / annotations_dir / d)  # convert VisDrone annotations to YOLO labels
diff --git a/data/GlobalWheat2020.yaml b/data/GlobalWheat2020.yaml
@@ -1,43 +1,40 @@
 # Global Wheat 2020 dataset http://www.global-wheat.com/
 # Train command: python train.py --data GlobalWheat2020.yaml
 # Default dataset location is next to YOLOv5:
-#   /parent_folder
+#   /parent
 #     /datasets/GlobalWheat2020
 #     /yolov5
 
 
-# train and val data as 1) directory: path/images/, 2) file: path/images.txt, or 3) list: [path1/images/, path2/images/]
-train: # 3422 images
-  - ../datasets/GlobalWheat2020/images/arvalis_1
-  - ../datasets/GlobalWheat2020/images/arvalis_2
-  - ../datasets/GlobalWheat2020/images/arvalis_3
-  - ../datasets/GlobalWheat2020/images/ethz_1
-  - ../datasets/GlobalWheat2020/images/rres_1
-  - ../datasets/GlobalWheat2020/images/inrae_1
-  - ../datasets/GlobalWheat2020/images/usask_1
-
-val: # 748 images (WARNING: train set contains ethz_1)
-  - ../datasets/GlobalWheat2020/images/ethz_1
-
-test: # 1276 images
-  - ../datasets/GlobalWheat2020/images/utokyo_1
-  - ../datasets/GlobalWheat2020/images/utokyo_2
-  - ../datasets/GlobalWheat2020/images/nau_1
-  - ../datasets/GlobalWheat2020/images/uq_1
-
-# number of classes
-nc: 1
-
-# class names
-names: [ 'wheat_head' ]
-
-
-# download command/URL (optional) --------------------------------------------------------------------------------------
+# Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..]
+path: ../datasets/GlobalWheat2020  # dataset root dir
+train: # train images (relative to 'path') 3422 images
+  - images/arvalis_1
+  - images/arvalis_2
+  - images/arvalis_3
+  - images/ethz_1
+  - images/rres_1
+  - images/inrae_1
+  - images/usask_1
+val: # val images (relative to 'path') 748 images (WARNING: train set contains ethz_1)
+  - images/ethz_1
+test: # test images (optional) 1276 images
+  - images/utokyo_1
+  - images/utokyo_2
+  - images/nau_1
+  - images/uq_1
+
+# Classes
+nc: 1  # number of classes
+names: [ 'wheat_head' ]  # class names
+
+
+# Download script/URL (optional) ---------------------------------------------------------------------------------------
 download: |
   from utils.general import download, Path
 
   # Download
-  dir = Path('../datasets/GlobalWheat2020')  # dataset directory
+  dir = Path(yaml['path'])  # dataset root dir
   urls = ['https://zenodo.org/record/4298502/files/global-wheat-codalab-official.zip',
           'https://github.com/ultralytics/yolov5/releases/download/v1.0/GlobalWheat2020_labels.zip']
   download(urls, dir=dir)

diff --git a/data/objects365.yaml → data/Objects365.yaml b/data/objects365.yaml → data/Objects365.yaml
@@ -1,18 +1,19 @@
 # Objects365 dataset https://www.objects365.org/
-# Train command: python train.py --data objects365.yaml
+# Train command: python train.py --data Objects365.yaml
 # Default dataset location is next to YOLOv5:
-#   /parent_folder
-#     /datasets/objects365
+#   /parent
+#     /datasets/Objects365
 #     /yolov5
 
-# train and val data as 1) directory: path/images/, 2) file: path/images.txt, or 3) list: [path1/images/, path2/images/]
-train: ../datasets/objects365/images/train  # 1742289 images
-val: ../datasets/objects365/images/val # 5570 images
 
-# number of classes
-nc: 365
+# Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..]
+path: ../datasets/Objects365  # dataset root dir
+train: images/train  # train images (relative to 'path') 1742289 images
+val: images/val # val images (relative to 'path') 5570 images
+test:  # test images (optional)
 
-# class names
+# Classes
+nc: 365  # number of classes
 names: [ 'Person', 'Sneakers', 'Chair', 'Other Shoes', 'Hat', 'Car', 'Lamp', 'Glasses', 'Bottle', 'Desk', 'Cup',
          'Street Lights', 'Cabinet/shelf', 'Handbag/Satchel', 'Bracelet', 'Plate', 'Picture/Frame', 'Helmet', 'Book',
          'Gloves', 'Storage box', 'Boat', 'Leather Shoes', 'Flower', 'Bench', 'Potted Plant', 'Bowl/Basin', 'Flag',
@@ -56,15 +57,15 @@ names: [ 'Person', 'Sneakers', 'Chair', 'Other Shoes', 'Hat', 'Car', 'Lamp', 'Gl
          'Chainsaw', 'Eraser', 'Lobster', 'Durian', 'Okra', 'Lipstick', 'Cosmetics Mirror', 'Curling', 'Table Tennis' ]
 
 
-# download command/URL (optional) --------------------------------------------------------------------------------------
+# Download script/URL (optional) ---------------------------------------------------------------------------------------
 download: |
   from pycocotools.coco import COCO
   from tqdm import tqdm
 
   from utils.general import download, Path
 
   # Make Directories
-  dir = Path('../datasets/objects365')  # dataset directory
+  dir = Path(yaml['path'])  # dataset root dir
   for p in 'images', 'labels':
       (dir / p).mkdir(parents=True, exist_ok=True)
       for q in 'train', 'val':

diff --git a/data/SKU-110K.yaml b/data/SKU-110K.yaml
@@ -1,39 +1,38 @@
 # SKU-110K retail items dataset https://github.com/eg4000/SKU110K_CVPR19
 # Train command: python train.py --data SKU-110K.yaml
 # Default dataset location is next to YOLOv5:
-#   /parent_folder
+#   /parent
 #     /datasets/SKU-110K
 #     /yolov5
 
 
-# train and val data as 1) directory: path/images/, 2) file: path/images.txt, or 3) list: [path1/images/, path2/images/]
-train: ../datasets/SKU-110K/train.txt  # 8219 images
-val: ../datasets/SKU-110K/val.txt  # 588 images
-test: ../datasets/SKU-110K/test.txt  # 2936 images
+# Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..]
+path: ../datasets/SKU-110K  # dataset root dir
+train: train.txt  # train images (relative to 'path')  8219 images
+val: val.txt  # val images (relative to 'path')  588 images
+test: test.txt  # test images (optional)  2936 images
 
-# number of classes
-nc: 1
+# Classes
+nc: 1  # number of classes
+names: [ 'object' ]  # class names
 
-# class names
-names: [ 'object' ]
 
-
-# download command/URL (optional) --------------------------------------------------------------------------------------
+# Download script/URL (optional) ---------------------------------------------------------------------------------------
 download: |
   import shutil
   from tqdm import tqdm
   from utils.general import np, pd, Path, download, xyxy2xywh
 
   # Download
-  datasets = Path('../datasets')  # download directory
+  dir = Path(yaml['path'])  # dataset root dir
+  parent = Path(dir.parent)  # download dir
   urls = ['http://trax-geometry.s3.amazonaws.com/cvpr_challenge/SKU110K_fixed.tar.gz']
-  download(urls, dir=datasets, delete=False)
+  download(urls, dir=parent, delete=False)
 
   # Rename directories
-  dir = (datasets / 'SKU-110K')
   if dir.exists():
       shutil.rmtree(dir)
-  (datasets / 'SKU110K_fixed').rename(dir)  # rename dir
+  (parent / 'SKU110K_fixed').rename(dir)  # rename dir
   (dir / 'labels').mkdir(parents=True, exist_ok=True)  # create labels dir
 
   # Convert labels

diff --git a/data/VOC.yaml b/data/VOC.yaml
@@ -0,0 +1,79 @@
+# PASCAL VOC dataset http://host.robots.ox.ac.uk/pascal/VOC/
+# Train command: python train.py --data VOC.yaml
+# Default dataset location is next to YOLOv5:
+#   /parent
+#     /datasets/VOC
+#     /yolov5
+
+
+# Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..]
+path: ../datasets/VOC
+train: # train images (relative to 'path')  16551 images
+  - images/train2012
+  - images/train2007
+  - images/val2012
+  - images/val2007
+val: # val images (relative to 'path')  4952 images
+  - images/test2007
+test: # test images (optional)
+  - images/test2007
+
+# Classes
+nc: 20  # number of classes
+names: [ 'aeroplane', 'bicycle', 'bird', 'boat', 'bottle', 'bus', 'car', 'cat', 'chair', 'cow', 'diningtable', 'dog',
+         'horse', 'motorbike', 'person', 'pottedplant', 'sheep', 'sofa', 'train', 'tvmonitor' ]  # class names
+
+
+# Download script/URL (optional) ---------------------------------------------------------------------------------------
+download: |
+  import xml.etree.ElementTree as ET
+
+  from tqdm import tqdm
+  from utils.general import download, Path
+
+
+  def convert_label(path, lb_path, year, image_id):
+      def convert_box(size, box):
+          dw, dh = 1. / size[0], 1. / size[1]
+          x, y, w, h = (box[0] + box[1]) / 2.0 - 1, (box[2] + box[3]) / 2.0 - 1, box[1] - box[0], box[3] - box[2]
+          return x * dw, y * dh, w * dw, h * dh
+
+      in_file = open(path / f'VOC{year}/Annotations/{image_id}.xml')
+      out_file = open(lb_path, 'w')
+      tree = ET.parse(in_file)
+      root = tree.getroot()
+      size = root.find('size')
+      w = int(size.find('width').text)
+      h = int(size.find('height').text)
+
+      for obj in root.iter('object'):
+          cls = obj.find('name').text
+          if cls in yaml['names'] and not int(obj.find('difficult').text) == 1:
+              xmlbox = obj.find('bndbox')
+              bb = convert_box((w, h), [float(xmlbox.find(x).text) for x in ('xmin', 'xmax', 'ymin', 'ymax')])
+              cls_id = yaml['names'].index(cls)  # class id
+              out_file.write(" ".join([str(a) for a in (cls_id, *bb)]) + '\n')
+
+
+  # Download
+  dir = Path(yaml['path'])  # dataset root dir
+  url = 'https://github.com/ultralytics/yolov5/releases/download/v1.0/'
+  urls = [url + 'VOCtrainval_06-Nov-2007.zip',  # 446MB, 5012 images
+          url + 'VOCtest_06-Nov-2007.zip',  # 438MB, 4953 images
+          url + 'VOCtrainval_11-May-2012.zip']  # 1.95GB, 17126 images
+  download(urls, dir=dir / 'images', delete=False)
+
+  # Convert
+  path = dir / f'images/VOCdevkit'
+  for year, image_set in ('2012', 'train'), ('2012', 'val'), ('2007', 'train'), ('2007', 'val'), ('2007', 'test'):
+      imgs_path = dir / 'images' / f'{image_set}{year}'
+      lbs_path = dir / 'labels' / f'{image_set}{year}'
+      imgs_path.mkdir(exist_ok=True, parents=True)
+      lbs_path.mkdir(exist_ok=True, parents=True)
+
+      image_ids = open(path / f'VOC{year}/ImageSets/Main/{image_set}.txt').read().strip().split()
+      for id in tqdm(image_ids, desc=f'{image_set}{year}'):
+          f = path / f'VOC{year}/JPEGImages/{id}.jpg'  # old img path
+          lb_path = (lbs_path / f.name).with_suffix('.txt')  # new label path
+          f.rename(imgs_path / f.name)  # move image
+          convert_label(path, lb_path, year, id)  # convert labels to YOLO format
diff --git a/data/VisDrone.yaml b/data/VisDrone.yaml
@@ -1,24 +1,23 @@
 # VisDrone2019-DET dataset https://github.com/VisDrone/VisDrone-Dataset
 # Train command: python train.py --data VisDrone.yaml
 # Default dataset location is next to YOLOv5:
-#   /parent_folder
-#     /VisDrone
+#   /parent
+#     /datasets/VisDrone
 #     /yolov5
 
 
-# train and val data as 1) directory: path/images/, 2) file: path/images.txt, or 3) list: [path1/images/, path2/images/]
-train: ../VisDrone/VisDrone2019-DET-train/images  # 6471 images
-val: ../VisDrone/VisDrone2019-DET-val/images  # 548 images
-test: ../VisDrone/VisDrone2019-DET-test-dev/images  # 1610 images
+# Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..]
+path: ../datasets/VisDrone  # dataset root dir
+train: VisDrone2019-DET-train/images  # train images (relative to 'path')  6471 images
+val: VisDrone2019-DET-val/images  # val images (relative to 'path')  548 images
+test: VisDrone2019-DET-test-dev/images  # test images (optional)  1610 images
 
-# number of classes
-nc: 10
-
-# class names
+# Classes
+nc: 10  # number of classes
 names: [ 'pedestrian', 'people', 'bicycle', 'car', 'van', 'truck', 'tricycle', 'awning-tricycle', 'bus', 'motor' ]
 
 
-# download command/URL (optional) --------------------------------------------------------------------------------------
+# Download script/URL (optional) ---------------------------------------------------------------------------------------
 download: |
   from utils.general import download, os, Path
 
@@ -49,7 +48,7 @@ download: |
 
 
   # Download
-  dir = Path('../VisDrone')  # dataset directory
+  dir = Path(yaml['path'])  # dataset root dir
   urls = ['https://github.com/ultralytics/yolov5/releases/download/v1.0/VisDrone2019-DET-train.zip',
           'https://github.com/ultralytics/yolov5/releases/download/v1.0/VisDrone2019-DET-val.zip',
           'https://github.com/ultralytics/yolov5/releases/download/v1.0/VisDrone2019-DET-test-dev.zip',

diff --git a/data/argoverse_hd.yaml b/data/argoverse_hd.yaml