Skip to content

Commit

Permalink
Parse URL authentication (#3424)
Browse files Browse the repository at this point in the history
* Parse URL authentication

* urllib.parse.unquote()

* improved error handling

* improved error handling

* remove %3F

* update check_file()
  • Loading branch information
glenn-jocher committed Jun 3, 2021
1 parent 3cb9ad4 commit f8651c3
Show file tree
Hide file tree
Showing 2 changed files with 13 additions and 8 deletions.
4 changes: 3 additions & 1 deletion utils/general.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
import re
import subprocess
import time
import urllib
from itertools import repeat
from multiprocessing.pool import ThreadPool
from pathlib import Path
Expand Down Expand Up @@ -183,7 +184,8 @@ def check_file(file):
if Path(file).is_file() or file == '': # exists
return file
elif file.startswith(('http://', 'https://')): # download
url, file = file, Path(file).name
url, file = file, Path(urllib.parse.unquote(str(file))).name # url, file (decode '%2F' to '/' etc.)
file = file.split('?')[0] # parse authentication https://url.com/file.txt?auth...
print(f'Downloading {url} to {file}...')
torch.hub.download_url_to_file(url, file)
assert Path(file).exists() and Path(file).stat().st_size > 0, f'File download failed: {url}' # check
Expand Down
17 changes: 10 additions & 7 deletions utils/google_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
import platform
import subprocess
import time
import urllib
from pathlib import Path

import requests
Expand All @@ -19,30 +20,32 @@ def gsutil_getsize(url=''):
def safe_download(file, url, url2=None, min_bytes=1E0, error_msg=''):
# Attempts to download file from url or url2, checks and removes incomplete downloads < min_bytes
file = Path(file)
try: # GitHub
assert_msg = f"Downloaded file '{file}' does not exist or size is < min_bytes={min_bytes}"
try: # url1
print(f'Downloading {url} to {file}...')
torch.hub.download_url_to_file(url, str(file))
assert file.exists() and file.stat().st_size > min_bytes # check
except Exception as e: # GCP
assert file.exists() and file.stat().st_size > min_bytes, assert_msg # check
except Exception as e: # url2
file.unlink(missing_ok=True) # remove partial downloads
print(f'Download error: {e}\nRe-attempting {url2 or url} to {file}...')
print(f'ERROR: {e}\nRe-attempting {url2 or url} to {file}...')
os.system(f"curl -L '{url2 or url}' -o '{file}' --retry 3 -C -") # curl download, retry and resume on fail
finally:
if not file.exists() or file.stat().st_size < min_bytes: # check
file.unlink(missing_ok=True) # remove partial downloads
print(f'ERROR: Download failure: {error_msg or url}')
print(f"ERROR: {assert_msg}\n{error_msg}")
print('')


def attempt_download(file, repo='ultralytics/yolov5'):
def attempt_download(file, repo='ultralytics/yolov5'): # from utils.google_utils import *; attempt_download()
# Attempt file download if does not exist
file = Path(str(file).strip().replace("'", ''))

if not file.exists():
# URL specified
name = file.name
name = Path(urllib.parse.unquote(str(file))).name # decode '%2F' to '/' etc.
if str(file).startswith(('http:/', 'https:/')): # download
url = str(file).replace(':/', '://') # Pathlib turns :// -> :/
name = name.split('?')[0] # parse authentication https://url.com/file.txt?auth...
safe_download(file=name, url=url, min_bytes=1E5)
return name

Expand Down

0 comments on commit f8651c3

Please sign in to comment.