Skip to content

Commit

Permalink
Don't race to cache downloads
Browse files Browse the repository at this point in the history
  • Loading branch information
rhelmot committed Jul 15, 2024
1 parent 508ad9a commit e72b653
Showing 1 changed file with 23 additions and 7 deletions.
30 changes: 23 additions & 7 deletions pydatatask/host.py
Original file line number Diff line number Diff line change
Expand Up @@ -150,20 +150,36 @@ def mk_mkdir(self, filepath: str) -> str:

def mk_cache_get_static(self, dest_filepath: str, cache_key: str, miss, cache_dir) -> str:
if self.os == HostOS.Linux:
cp = "cp"
cache_key_hash = hashlib.md5(cache_key.encode()).hexdigest()
tick = "'"
backslash = "\\"
cache_key_sane = f'{cache_key.replace("/", "-").replace(" ", "-").replace(backslash, "-").replace(tick, "-")[:55]}-{cache_key_hash[:8]}'
cache_key_dirname = f"{cache_dir}/{cache_key_hash[:2]}"
cache_key_path = f"{cache_key_dirname}/{cache_key_sane}"
return f"""
if [ -f "{cache_key_path}" ]; then
cp "{cache_key_path}" "{dest_filepath}"
else
mkdir -p "{cache_key_dirname}"
{miss}
cp "{dest_filepath}" "{cache_key_path}"
fi
while true; do
if [ -f "{cache_key_path}" ]; then
{cp} "{cache_key_path}" "{dest_filepath}"
else
mkdir -p "{cache_key_dirname}"
if mkdir "{cache_key_path}.lock"; then
{miss}
{cp} "{dest_filepath}" "{cache_key_path}"
rm -rf "{cache_key_path}.lock"
else
while [ -d "{cache_key_path}.lock" ]; do
if [ "$(($(date +%s) - $(stat -c %W "{cache_key_path}.lock")))" -ge 300 ]; then
rm -rf "{cache_key_path}.lock"
continue
fi
sleep 5
done
{cp} "{cache_key_path}" "{dest_filepath}"
fi
fi
break
done
"""
else:
raise TypeError(self.os)
Expand Down

0 comments on commit e72b653

Please sign in to comment.