Skip to content

Commit

Permalink
Updated cache v0.2 with hashlib (ultralytics#3350)
Browse files Browse the repository at this point in the history
* Update cache v0.2 to include parent hash

Possible fix for ultralytics#3349

* Update datasets.py
  • Loading branch information
glenn-jocher committed May 26, 2021
1 parent 1f8d716 commit c6b5bfc
Showing 1 changed file with 10 additions and 6 deletions.
16 changes: 10 additions & 6 deletions utils/datasets.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
# Dataset utils and dataloaders

import glob
import hashlib
import logging
import math
import os
Expand Down Expand Up @@ -36,9 +37,12 @@
break


def get_hash(files):
# Returns a single hash value of a list of files
return sum(os.path.getsize(f) for f in files if os.path.isfile(f))
def get_hash(paths):
# Returns a single hash value of a list of paths (files or dirs)
size = sum(os.path.getsize(p) for p in paths if os.path.exists(p)) # sizes
h = hashlib.md5(str(size).encode()) # hash sizes
h.update(''.join(paths).encode()) # hash paths
return h.hexdigest() # return hash


def exif_size(img):
Expand Down Expand Up @@ -383,7 +387,7 @@ def __init__(self, path, img_size=640, batch_size=16, augment=False, hyp=None, r
cache_path = (p if p.is_file() else Path(self.label_files[0]).parent).with_suffix('.cache') # cached labels
if cache_path.is_file():
cache, exists = torch.load(cache_path), True # load
if cache['hash'] != get_hash(self.label_files + self.img_files) or 'version' not in cache: # changed
if cache['hash'] != get_hash(self.label_files + self.img_files): # changed
cache, exists = self.cache_labels(cache_path, prefix), False # re-cache
else:
cache, exists = self.cache_labels(cache_path, prefix), False # cache
Expand Down Expand Up @@ -501,9 +505,9 @@ def cache_labels(self, path=Path('./labels.cache'), prefix=''):

x['hash'] = get_hash(self.label_files + self.img_files)
x['results'] = nf, nm, ne, nc, i + 1
x['version'] = 0.1 # cache version
x['version'] = 0.2 # cache version
try:
torch.save(x, path) # save for next time
torch.save(x, path) # save cache for next time
logging.info(f'{prefix}New cache created: {path}')
except Exception as e:
logging.info(f'{prefix}WARNING: Cache directory {path.parent} is not writeable: {e}') # path not writeable
Expand Down

0 comments on commit c6b5bfc

Please sign in to comment.