From 9c513ca62942ba34306b9f5be3d0f0110b4435ec Mon Sep 17 00:00:00 2001 From: Glenn Jocher Date: Tue, 8 Feb 2022 22:20:44 +0100 Subject: [PATCH] Add `DATASETS_DIR` global in general.py (#6578) --- utils/datasets.py | 12 ++++++------ utils/general.py | 5 +++-- 2 files changed, 9 insertions(+), 8 deletions(-) diff --git a/utils/datasets.py b/utils/datasets.py index 1026731f3a41..b3e0dbb3523e 100755 --- a/utils/datasets.py +++ b/utils/datasets.py @@ -27,7 +27,7 @@ from tqdm import tqdm from utils.augmentations import Albumentations, augment_hsv, copy_paste, letterbox, mixup, random_perspective -from utils.general import (LOGGER, NUM_THREADS, check_dataset, check_requirements, check_yaml, clean_str, +from utils.general import (DATASETS_DIR, LOGGER, NUM_THREADS, check_dataset, check_requirements, check_yaml, clean_str, segments2boxes, xyn2xy, xywh2xyxy, xywhn2xyxy, xyxy2xywhn) from utils.torch_utils import torch_distributed_zero_first @@ -817,15 +817,15 @@ def create_folder(path='./new'): os.makedirs(path) # make new output folder -def flatten_recursive(path='../datasets/coco128'): +def flatten_recursive(path=DATASETS_DIR / 'coco128'): # Flatten a recursive directory by bringing all files to top level - new_path = Path(path + '_flat') + new_path = Path(str(path) + '_flat') create_folder(new_path) for file in tqdm(glob.glob(str(Path(path)) + '/**/*.*', recursive=True)): shutil.copyfile(file, new_path / Path(file).name) -def extract_boxes(path='../datasets/coco128'): # from utils.datasets import *; extract_boxes() +def extract_boxes(path=DATASETS_DIR / 'coco128'): # from utils.datasets import *; extract_boxes() # Convert detection dataset into classification dataset, with one directory per class path = Path(path) # images dir shutil.rmtree(path / 'classifier') if (path / 'classifier').is_dir() else None # remove existing @@ -859,7 +859,7 @@ def extract_boxes(path='../datasets/coco128'): # from utils.datasets import *; assert cv2.imwrite(str(f), im[b[1]:b[3], b[0]:b[2]]), f'box failure in {f}' -def autosplit(path='../datasets/coco128/images', weights=(0.9, 0.1, 0.0), annotated_only=False): +def autosplit(path=DATASETS_DIR / 'coco128/images', weights=(0.9, 0.1, 0.0), annotated_only=False): """ Autosplit a dataset into train/val/test splits and save path/autosplit_*.txt files Usage: from utils.datasets import *; autosplit() Arguments @@ -939,7 +939,7 @@ def dataset_stats(path='coco128.yaml', autodownload=False, verbose=False, profil """ Return dataset statistics dictionary with images and instances counts per split per class To run in parent directory: export PYTHONPATH="$PWD/yolov5" Usage1: from utils.datasets import *; dataset_stats('coco128.yaml', autodownload=True) - Usage2: from utils.datasets import *; dataset_stats('../datasets/coco128_with_yaml.zip') + Usage2: from utils.datasets import *; dataset_stats('path/to/coco128_with_yaml.zip') Arguments path: Path to data.yaml or data.zip (with data.yaml inside data.zip) autodownload: Attempt to download dataset if not found locally diff --git a/utils/general.py b/utils/general.py index 030539d76704..d4ca6e2736d9 100755 --- a/utils/general.py +++ b/utils/general.py @@ -35,6 +35,7 @@ # Settings FILE = Path(__file__).resolve() ROOT = FILE.parents[1] # YOLOv5 root directory +DATASETS_DIR = ROOT.parent / 'datasets' # YOLOv5 datasets directory NUM_THREADS = min(8, max(1, os.cpu_count() - 1)) # number of YOLOv5 multiprocessing threads VERBOSE = str(os.getenv('YOLOv5_VERBOSE', True)).lower() == 'true' # global verbose mode FONT = 'Arial.ttf' # https://ultralytics.com/assets/Arial.ttf @@ -398,8 +399,8 @@ def check_dataset(data, autodownload=True): # Download (optional) extract_dir = '' if isinstance(data, (str, Path)) and str(data).endswith('.zip'): # i.e. gs://bucket/dir/coco128.zip - download(data, dir='../datasets', unzip=True, delete=False, curl=False, threads=1) - data = next((Path('../datasets') / Path(data).stem).rglob('*.yaml')) + download(data, dir=DATASETS_DIR, unzip=True, delete=False, curl=False, threads=1) + data = next((DATASETS_DIR / Path(data).stem).rglob('*.yaml')) extract_dir, autodownload = data.parent, False # Read yaml (optional)