From 6847e1aa59e22703183e6dae1604b29b1ea184b2 Mon Sep 17 00:00:00 2001 From: Glenn Jocher Date: Wed, 26 May 2021 14:26:52 +0200 Subject: [PATCH] Updated cache v0.2 with `hashlib` (#3350) * Update cache v0.2 to include parent hash Possible fix for https://github.com/ultralytics/yolov5/issues/3349 * Update datasets.py --- utils/datasets.py | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/utils/datasets.py b/utils/datasets.py index 36416b14e138..882c7764c4ab 100755 --- a/utils/datasets.py +++ b/utils/datasets.py @@ -1,6 +1,7 @@ # Dataset utils and dataloaders import glob +import hashlib import logging import math import os @@ -36,9 +37,12 @@ break -def get_hash(files): - # Returns a single hash value of a list of files - return sum(os.path.getsize(f) for f in files if os.path.isfile(f)) +def get_hash(paths): + # Returns a single hash value of a list of paths (files or dirs) + size = sum(os.path.getsize(p) for p in paths if os.path.exists(p)) # sizes + h = hashlib.md5(str(size).encode()) # hash sizes + h.update(''.join(paths).encode()) # hash paths + return h.hexdigest() # return hash def exif_size(img): @@ -383,7 +387,7 @@ def __init__(self, path, img_size=640, batch_size=16, augment=False, hyp=None, r cache_path = (p if p.is_file() else Path(self.label_files[0]).parent).with_suffix('.cache') # cached labels if cache_path.is_file(): cache, exists = torch.load(cache_path), True # load - if cache['hash'] != get_hash(self.label_files + self.img_files) or 'version' not in cache: # changed + if cache['hash'] != get_hash(self.label_files + self.img_files): # changed cache, exists = self.cache_labels(cache_path, prefix), False # re-cache else: cache, exists = self.cache_labels(cache_path, prefix), False # cache @@ -501,9 +505,9 @@ def cache_labels(self, path=Path('./labels.cache'), prefix=''): x['hash'] = get_hash(self.label_files + self.img_files) x['results'] = nf, nm, ne, nc, i + 1 - x['version'] = 0.1 # cache version + x['version'] = 0.2 # cache version try: - torch.save(x, path) # save for next time + torch.save(x, path) # save cache for next time logging.info(f'{prefix}New cache created: {path}') except Exception as e: logging.info(f'{prefix}WARNING: Cache directory {path.parent} is not writeable: {e}') # path not writeable