From 14bd649c1476b9a76089ff2bf8e71f8d2a2ee666 Mon Sep 17 00:00:00 2001 From: Glenn Jocher Date: Fri, 17 Sep 2021 17:27:58 +0200 Subject: [PATCH] Sorted datasets update to `cache_labels()` PR should produce datasets sorted alphabetically by filename. Cache version incremented to 0.5. Note: will force a one-time re-caching of existing datasets on first-use. --- utils/datasets.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/utils/datasets.py b/utils/datasets.py index 4a4b187da345..adcdafe69df7 100755 --- a/utils/datasets.py +++ b/utils/datasets.py @@ -487,7 +487,7 @@ def cache_labels(self, path=Path('./labels.cache'), prefix=''): nm, nf, ne, nc, msgs = 0, 0, 0, 0, [] # number missing, found, empty, corrupt, messages desc = f"{prefix}Scanning '{path.parent / path.stem}' images and labels..." with Pool(NUM_THREADS) as pool: - pbar = tqdm(pool.imap_unordered(verify_image_label, zip(self.img_files, self.label_files, repeat(prefix))), + pbar = tqdm(pool.imap(verify_image_label, zip(self.img_files, self.label_files, repeat(prefix))), desc=desc, total=len(self.img_files)) for im_file, l, shape, segments, nm_f, nf_f, ne_f, nc_f, msg in pbar: nm += nm_f @@ -508,7 +508,7 @@ def cache_labels(self, path=Path('./labels.cache'), prefix=''): x['hash'] = get_hash(self.label_files + self.img_files) x['results'] = nf, nm, ne, nc, len(self.img_files) x['msgs'] = msgs # warnings - x['version'] = 0.4 # cache version + x['version'] = 0.5 # cache version try: np.save(path, x) # save cache for next time path.with_suffix('.cache.npy').rename(path) # remove .npy suffix