From 26f57173442df5f3c300c7cf92c43916f5687f31 Mon Sep 17 00:00:00 2001 From: Leonard Lausen Date: Tue, 3 Jul 2018 23:03:32 +0000 Subject: [PATCH] Remove gluon.utils.download copy with verify_ssl support The problematic server now servers a valid SSL certificate. --- gluonnlp/data/word_embedding_evaluation.py | 64 +--------------------- 1 file changed, 2 insertions(+), 62 deletions(-) diff --git a/gluonnlp/data/word_embedding_evaluation.py b/gluonnlp/data/word_embedding_evaluation.py index 0719953c08..0056f0387b 100644 --- a/gluonnlp/data/word_embedding_evaluation.py +++ b/gluonnlp/data/word_embedding_evaluation.py @@ -25,7 +25,7 @@ import zipfile from mxnet.gluon.data.dataset import SimpleDataset -from mxnet.gluon.utils import check_sha1, _get_repo_file_url +from mxnet.gluon.utils import check_sha1, _get_repo_file_url, download from .. import _constants as C from .dataset import CorpusDataset @@ -52,68 +52,10 @@ class requests_failed_to_import(object): __all__ = base_datasets + word_similarity_datasets + word_analogy_datasets -# TODO Remove once verify support is merged in mxnet.gluon.utils.download -def download(url, path=None, overwrite=False, sha1_hash=None, verify=True): - """Download an given URL - - Parameters - ---------- - url : str - URL to download - path : str, optional - Destination path to store downloaded file. By default stores to the - current directory with same name as in url. - overwrite : bool, optional - Whether to overwrite destination file if already exists. - sha1_hash : str, optional - Expected sha1 hash in hexadecimal digits. Will ignore existing file when hash is specified - but doesn't match. - verify : bool - Toggle verification of SSL certificates. - - Returns - ------- - str - The file path of the downloaded file. - """ - if path is None: - fname = url.split('/')[-1] - else: - path = os.path.expanduser(path) - if os.path.isdir(path): - fname = os.path.join(path, url.split('/')[-1]) - else: - fname = path - - if overwrite or not os.path.exists(fname) or ( - sha1_hash and not check_sha1(fname, sha1_hash)): - dirname = os.path.dirname(os.path.abspath(os.path.expanduser(fname))) - if not os.path.exists(dirname): - os.makedirs(dirname) - - print('Downloading %s from %s...' % (fname, url)) - r = requests.get(url, stream=True, verify=verify) - if r.status_code != 200: - raise RuntimeError('Failed downloading url %s' % url) - with open(fname, 'wb') as f: - for chunk in r.iter_content(chunk_size=1024): - if chunk: # filter out keep-alive new chunks - f.write(chunk) - - if sha1_hash and not check_sha1(fname, sha1_hash): - raise UserWarning('File {} is downloaded but the content hash does not match. ' \ - 'The repo may be outdated or download may be incomplete. ' \ - 'If the "repo_url" is overridden, consider switching to ' \ - 'the default repo.'.format(fname)) - - return fname - - class _Dataset(SimpleDataset): _url = None # Dataset is retrieved from here if not cached _archive_file = (None, None) # Archive name and checksum _checksums = None # Checksum of archive contents - _verify_ssl = True # Verify SSL certificates when downloading from self._url _namespace = None # Contains S3 namespace for self-hosted datasets def __init__(self, root): @@ -135,8 +77,7 @@ def _download_data(self): else: url = self._url downloaded_file_path = download(url, path=self.root, - sha1_hash=archive_hash, - verify=self._verify_ssl) + sha1_hash=archive_hash) if downloaded_file_path.lower().endswith('zip'): with zipfile.ZipFile(downloaded_file_path, 'r') as zf: @@ -644,7 +585,6 @@ class BakerVerb143(WordSimilarityEvaluationDataset): 'verb_similarity dataset.txt': 'd7e4820c7504cbae56898353e4d94e6408c330fc' } - _verify_ssl = False # ie.technion.ac.il serves an invalid cert as of 2018-04-16 min = 0 max = 10