Skip to content

Commit

Permalink
Sorted datasets update to cache_labels() (ultralytics#4845)
Browse files Browse the repository at this point in the history
PR should produce datasets sorted alphabetically by filename. Cache version incremented to 0.5. 

Note: will force a one-time re-caching of existing datasets on first-use.
  • Loading branch information
glenn-jocher authored and CesarBazanAV committed Sep 29, 2021
1 parent 708b150 commit 706c672
Showing 1 changed file with 2 additions and 2 deletions.
4 changes: 2 additions & 2 deletions utils/datasets.py
Original file line number Diff line number Diff line change
Expand Up @@ -487,7 +487,7 @@ def cache_labels(self, path=Path('./labels.cache'), prefix=''):
nm, nf, ne, nc, msgs = 0, 0, 0, 0, [] # number missing, found, empty, corrupt, messages
desc = f"{prefix}Scanning '{path.parent / path.stem}' images and labels..."
with Pool(NUM_THREADS) as pool:
pbar = tqdm(pool.imap_unordered(verify_image_label, zip(self.img_files, self.label_files, repeat(prefix))),
pbar = tqdm(pool.imap(verify_image_label, zip(self.img_files, self.label_files, repeat(prefix))),
desc=desc, total=len(self.img_files))
for im_file, l, shape, segments, nm_f, nf_f, ne_f, nc_f, msg in pbar:
nm += nm_f
Expand All @@ -508,7 +508,7 @@ def cache_labels(self, path=Path('./labels.cache'), prefix=''):
x['hash'] = get_hash(self.label_files + self.img_files)
x['results'] = nf, nm, ne, nc, len(self.img_files)
x['msgs'] = msgs # warnings
x['version'] = 0.4 # cache version
x['version'] = 0.5 # cache version
try:
np.save(path, x) # save cache for next time
path.with_suffix('.cache.npy').rename(path) # remove .npy suffix
Expand Down

0 comments on commit 706c672

Please sign in to comment.