Skip to content

Commit

Permalink
Update dataset_stats() for HUB (ultralytics#3536)
Browse files Browse the repository at this point in the history
* Update `dataset_stats()` for HUB 

Cleanup of b6fdd2e

* autodownload flag

* Update general.py

* cleanup
  • Loading branch information
glenn-jocher authored Jun 9, 2021
1 parent b6fdd2e commit 1b5edb6
Show file tree
Hide file tree
Showing 2 changed files with 8 additions and 9 deletions.
11 changes: 5 additions & 6 deletions utils/datasets.py
Original file line number Diff line number Diff line change
Expand Up @@ -1086,18 +1086,17 @@ def verify_image_label(params):
return [None] * 4 + [nm, nf, ne, nc]


def dataset_stats(path='data/coco128.yaml', verbose=False):
def dataset_stats(path='coco128.yaml', autodownload=False, verbose=False):
""" Return dataset statistics dictionary with images and instances counts per split per class
Usage: from utils.datasets import *; dataset_stats('data/coco128.yaml')
Usage: from utils.datasets import *; dataset_stats('coco128.yaml', verbose=True)
Arguments
path: Path to data.yaml
autodownload: Attempt to download dataset if not found locally
verbose: Print stats dictionary
"""
path = check_file(Path(path))
with open(path) as f:
with open(check_file(Path(path))) as f:
data = yaml.safe_load(f) # data dict
check_dataset(data) # download dataset if missing

check_dataset(data, autodownload) # download dataset if missing
nc = data['nc'] # number of classes
stats = {'nc': nc, 'names': data['names']} # statistics dictionary
for split in 'train', 'val', 'test':
Expand Down
6 changes: 3 additions & 3 deletions utils/general.py
Original file line number Diff line number Diff line change
Expand Up @@ -220,14 +220,14 @@ def check_file(file):
return files[0] # return file


def check_dataset(dict):
def check_dataset(data, autodownload=True):
# Download dataset if not found locally
val, s = dict.get('val'), dict.get('download')
val, s = data.get('val'), data.get('download')
if val and len(val):
val = [Path(x).resolve() for x in (val if isinstance(val, list) else [val])] # val path
if not all(x.exists() for x in val):
print('\nWARNING: Dataset not found, nonexistent paths: %s' % [str(x) for x in val if not x.exists()])
if s and len(s): # download script
if s and len(s) and autodownload: # download script
if s.startswith('http') and s.endswith('.zip'): # URL
f = Path(s).name # filename
print(f'Downloading {s} ...')
Expand Down

0 comments on commit 1b5edb6

Please sign in to comment.