Skip to content

Commit

Permalink
Update dataset_stats() to list of dicts (#3657)
Browse files Browse the repository at this point in the history
* Update `dataset_stats()` to list of dicts

@kalenmike

* Update datasets.py
  • Loading branch information
glenn-jocher committed Jun 17, 2021
1 parent df7706d commit 9b6dba6
Showing 1 changed file with 9 additions and 3 deletions.
12 changes: 9 additions & 3 deletions utils/datasets.py
Original file line number Diff line number Diff line change
Expand Up @@ -1099,6 +1099,11 @@ def dataset_stats(path='coco128.yaml', autodownload=False, verbose=False):
autodownload: Attempt to download dataset if not found locally
verbose: Print stats dictionary
"""

def round_labels(labels):
# Update labels to integer class and 6 decimal place floats
return [[int(c), *[round(x, 6) for x in points]] for c, *points in labels]

with open(check_file(path)) as f:
data = yaml.safe_load(f) # data dict
check_dataset(data, autodownload) # download dataset if missing
Expand All @@ -1118,12 +1123,13 @@ def dataset_stats(path='coco128.yaml', autodownload=False, verbose=False):
stats[split] = {'instance_stats': {'total': int(x.sum()), 'per_class': x.sum(0).tolist()},
'image_stats': {'total': dataset.n, 'unlabelled': int(np.all(x == 0, 1).sum()),
'per_class': (x > 0).sum(0).tolist()},
'labels': {str(Path(k).name): v.tolist() for k, v in zip(dataset.img_files, dataset.labels)}}
'labels': [{str(Path(k).name): round_labels(v.tolist())} for k, v in
zip(dataset.img_files, dataset.labels)]}

# Save, print and return
with open(cache_path.with_suffix('.json'), 'w') as f:
json.dump(stats, f) # save stats *.json
if verbose:
print(yaml.dump([stats], sort_keys=False, default_flow_style=False))
# print(json.dumps(stats, indent=2, sort_keys=False))
print(json.dumps(stats, indent=2, sort_keys=False))
# print(yaml.dump([stats], sort_keys=False, default_flow_style=False))
return stats

0 comments on commit 9b6dba6

Please sign in to comment.