From 7223e5ef18a7e6e1406ec6f80838487751fc7bcc Mon Sep 17 00:00:00 2001 From: Glenn Jocher Date: Sun, 9 Aug 2020 20:52:57 -0700 Subject: [PATCH] Dataset autodownload feature addition (#685) * initial commit * move download scripts into data/scripts * new check_dataset() function in general.py * move check_dataset() out of with context * Update general.py * DDP update * Update general.py --- data/coco.yaml | 4 +- data/coco128.yaml | 4 +- data/get_coco2017.sh | 30 ------------- data/scripts/get_coco.sh | 21 +++++++++ data/{ => scripts}/get_voc.sh | 81 +++++++++++++++++------------------ data/voc.yaml | 4 +- test.py | 3 +- train.py | 8 ++-- utils/general.py | 19 ++++++++ 9 files changed, 95 insertions(+), 79 deletions(-) delete mode 100755 data/get_coco2017.sh create mode 100755 data/scripts/get_coco.sh rename data/{ => scripts}/get_voc.sh (84%) diff --git a/data/coco.yaml b/data/coco.yaml index e346b9395125..09f3a7890373 100644 --- a/data/coco.yaml +++ b/data/coco.yaml @@ -1,5 +1,4 @@ # COCO 2017 dataset http://cocodataset.org -# Download command: bash yolov5/data/get_coco2017.sh # Train command: python train.py --data coco.yaml # Default dataset location is next to /yolov5: # /parent_folder @@ -7,6 +6,9 @@ # /yolov5 +# download command/URL (optional) +download: bash data/scripts/get_coco.sh + # train and val data as 1) directory: path/images/, 2) file: path/images.txt, or 3) list: [path1/images/, path2/images/] train: ../coco/train2017.txt # 118287 images val: ../coco/val2017.txt # 5000 images diff --git a/data/coco128.yaml b/data/coco128.yaml index 9f47382547d0..12e1d799718d 100644 --- a/data/coco128.yaml +++ b/data/coco128.yaml @@ -1,5 +1,4 @@ # COCO 2017 dataset http://cocodataset.org - first 128 training images -# Download command: python -c "from yolov5.utils.google_utils import *; gdrive_download('1n_oKgR81BJtqk75b00eAjdv03qVCQn2f', 'coco128.zip')" # Train command: python train.py --data coco128.yaml # Default dataset location is next to /yolov5: # /parent_folder @@ -7,6 +6,9 @@ # /yolov5 +# download command/URL (optional) +download: https://github.com/ultralytics/yolov5/releases/download/v1.0/coco128.zip + # train and val data as 1) directory: path/images/, 2) file: path/images.txt, or 3) list: [path1/images/, path2/images/] train: ../coco128/images/train2017/ # 128 images val: ../coco128/images/train2017/ # 128 images diff --git a/data/get_coco2017.sh b/data/get_coco2017.sh deleted file mode 100755 index aa031dfb6a4e..000000000000 --- a/data/get_coco2017.sh +++ /dev/null @@ -1,30 +0,0 @@ -#!/bin/bash -# COCO 2017 dataset http://cocodataset.org -# Download command: bash yolov5/data/get_coco2017.sh -# Train command: python train.py --data coco.yaml -# Default dataset location is next to /yolov5: -# /parent_folder -# /coco -# /yolov5 - - -# Download labels from Google Drive, accepting presented query -filename="coco2017labels.zip" -fileid="1cXZR_ckHki6nddOmcysCuuJFM--T-Q6L" -curl -c ./cookie -s -L "https://drive.google.com/uc?export=download&id=${fileid}" > /dev/null -curl -Lb ./cookie "https://drive.google.com/uc?export=download&confirm=`awk '/download/ {print $NF}' ./cookie`&id=${fileid}" -o ${filename} -rm ./cookie - -# Unzip labels -unzip -q ${filename} # for coco.zip -# tar -xzf ${filename} # for coco.tar.gz -rm ${filename} - -# Download and unzip images -cd coco/images -f="train2017.zip" && curl http://images.cocodataset.org/zips/$f -o $f && unzip -q $f && rm $f # 19G, 118k images -f="val2017.zip" && curl http://images.cocodataset.org/zips/$f -o $f && unzip -q $f && rm $f # 1G, 5k images -# f="test2017.zip" && curl http://images.cocodataset.org/zips/$f -o $f && unzip -q $f && rm $f # 7G, 41k images - -# cd out -cd ../.. diff --git a/data/scripts/get_coco.sh b/data/scripts/get_coco.sh new file mode 100755 index 000000000000..7f86377070a5 --- /dev/null +++ b/data/scripts/get_coco.sh @@ -0,0 +1,21 @@ +#!/bin/bash +# COCO 2017 dataset http://cocodataset.org +# Download command: bash data/scripts/get_coco.sh +# Train command: python train.py --data coco.yaml +# Default dataset location is next to /yolov5: +# /parent_folder +# /coco +# /yolov5 + +# Download/unzip labels +echo 'Downloading COCO 2017 labels ...' +d='../' # unzip directory +f='coco2017labels.zip' && curl -L https://github.com/ultralytics/yolov5/releases/download/v1.0/$f -o $f +unzip -q $f -d $d && rm $f + +# Download/unzip images +echo 'Downloading COCO 2017 images ...' +d='../coco/images' # unzip directory +f='train2017.zip' && curl http://images.cocodataset.org/zips/$f -o $f && unzip -q $f -d $d && rm $f # 19G, 118k images +f='val2017.zip' && curl http://images.cocodataset.org/zips/$f -o $f && unzip -q $f -d $d && rm $f # 1G, 5k images +# f='test2017.zip' && curl http://images.cocodataset.org/zips/$f -o $f && unzip -q $f -d $d && rm $f # 7G, 41k images diff --git a/data/get_voc.sh b/data/scripts/get_voc.sh similarity index 84% rename from data/get_voc.sh rename to data/scripts/get_voc.sh index 3eaad6b56efb..94843a64532f 100644 --- a/data/get_voc.sh +++ b/data/scripts/get_voc.sh @@ -1,33 +1,32 @@ +#!/bin/bash # PASCAL VOC dataset http://host.robots.ox.ac.uk/pascal/VOC/ -# Download command: bash ./data/get_voc.sh +# Download command: bash data/scripts/get_voc.sh # Train command: python train.py --data voc.yaml # Default dataset location is next to /yolov5: # /parent_folder # /VOC # /yolov5 - -start=`date +%s` +start=$(date +%s) # handle optional download dir -if [ -z "$1" ] - then - # navigate to ~/tmp - echo "navigating to ../tmp/ ..." - mkdir -p ../tmp - cd ../tmp/ - else - # check if is valid directory - if [ ! -d $1 ]; then - echo $1 "is not a valid directory" - exit 0 - fi - echo "navigating to" $1 "..." - cd $1 +if [ -z "$1" ]; then + # navigate to ~/tmp + echo "navigating to ../tmp/ ..." + mkdir -p ../tmp + cd ../tmp/ +else + # check if is valid directory + if [ ! -d $1 ]; then + echo $1 "is not a valid directory" + exit 0 + fi + echo "navigating to" $1 "..." + cd $1 fi echo "Downloading VOC2007 trainval ..." -# Download the data. +# Download data curl -LO http://host.robots.ox.ac.uk/pascal/VOC/voc2007/VOCtrainval_06-Nov-2007.tar echo "Downloading VOC2007 test data ..." curl -LO http://host.robots.ox.ac.uk/pascal/VOC/voc2007/VOCtest_06-Nov-2007.tar @@ -42,44 +41,42 @@ echo "removing tars ..." rm VOCtrainval_06-Nov-2007.tar rm VOCtest_06-Nov-2007.tar -end=`date +%s` -runtime=$((end-start)) +end=$(date +%s) +runtime=$((end - start)) echo "Completed in" $runtime "seconds" -start=`date +%s` +start=$(date +%s) # handle optional download dir -if [ -z "$1" ] - then - # navigate to ~/tmp - echo "navigating to ../tmp/ ..." - mkdir -p ../tmp - cd ../tmp/ - else - # check if is valid directory - if [ ! -d $1 ]; then - echo $1 "is not a valid directory" - exit 0 - fi - echo "navigating to" $1 "..." - cd $1 +if [ -z "$1" ]; then + # navigate to ~/tmp + echo "navigating to ../tmp/ ..." + mkdir -p ../tmp + cd ../tmp/ +else + # check if is valid directory + if [ ! -d $1 ]; then + echo $1 "is not a valid directory" + exit 0 + fi + echo "navigating to" $1 "..." + cd $1 fi echo "Downloading VOC2012 trainval ..." -# Download the data. +# Download data curl -LO http://host.robots.ox.ac.uk/pascal/VOC/voc2012/VOCtrainval_11-May-2012.tar echo "Done downloading." - # Extract data echo "Extracting trainval ..." tar -xf VOCtrainval_11-May-2012.tar echo "removing tar ..." rm VOCtrainval_11-May-2012.tar -end=`date +%s` -runtime=$((end-start)) +end=$(date +%s) +runtime=$((end - start)) echo "Completed in" $runtime "seconds" @@ -144,8 +141,8 @@ for year, image_set in sets: END -cat 2007_train.txt 2007_val.txt 2012_train.txt 2012_val.txt > train.txt -cat 2007_train.txt 2007_val.txt 2007_test.txt 2012_train.txt 2012_val.txt > train.all.txt +cat 2007_train.txt 2007_val.txt 2012_train.txt 2012_val.txt >train.txt +cat 2007_train.txt 2007_val.txt 2007_test.txt 2012_train.txt 2012_val.txt >train.all.txt python3 - "$@" <