Skip to content
This repository has been archived by the owner on Jan 15, 2024. It is now read-only.

Commit

Permalink
Remove gluon.utils.download copy with verify_ssl support
Browse files Browse the repository at this point in the history
The problematic server now servers a valid SSL certificate.
  • Loading branch information
leezu committed Jul 3, 2018
1 parent 8b01fa7 commit 26f5717
Showing 1 changed file with 2 additions and 62 deletions.
64 changes: 2 additions & 62 deletions gluonnlp/data/word_embedding_evaluation.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@
import zipfile

from mxnet.gluon.data.dataset import SimpleDataset
from mxnet.gluon.utils import check_sha1, _get_repo_file_url
from mxnet.gluon.utils import check_sha1, _get_repo_file_url, download

from .. import _constants as C
from .dataset import CorpusDataset
Expand All @@ -52,68 +52,10 @@ class requests_failed_to_import(object):
__all__ = base_datasets + word_similarity_datasets + word_analogy_datasets


# TODO Remove once verify support is merged in mxnet.gluon.utils.download
def download(url, path=None, overwrite=False, sha1_hash=None, verify=True):
"""Download an given URL
Parameters
----------
url : str
URL to download
path : str, optional
Destination path to store downloaded file. By default stores to the
current directory with same name as in url.
overwrite : bool, optional
Whether to overwrite destination file if already exists.
sha1_hash : str, optional
Expected sha1 hash in hexadecimal digits. Will ignore existing file when hash is specified
but doesn't match.
verify : bool
Toggle verification of SSL certificates.
Returns
-------
str
The file path of the downloaded file.
"""
if path is None:
fname = url.split('/')[-1]
else:
path = os.path.expanduser(path)
if os.path.isdir(path):
fname = os.path.join(path, url.split('/')[-1])
else:
fname = path

if overwrite or not os.path.exists(fname) or (
sha1_hash and not check_sha1(fname, sha1_hash)):
dirname = os.path.dirname(os.path.abspath(os.path.expanduser(fname)))
if not os.path.exists(dirname):
os.makedirs(dirname)

print('Downloading %s from %s...' % (fname, url))
r = requests.get(url, stream=True, verify=verify)
if r.status_code != 200:
raise RuntimeError('Failed downloading url %s' % url)
with open(fname, 'wb') as f:
for chunk in r.iter_content(chunk_size=1024):
if chunk: # filter out keep-alive new chunks
f.write(chunk)

if sha1_hash and not check_sha1(fname, sha1_hash):
raise UserWarning('File {} is downloaded but the content hash does not match. ' \
'The repo may be outdated or download may be incomplete. ' \
'If the "repo_url" is overridden, consider switching to ' \
'the default repo.'.format(fname))

return fname


class _Dataset(SimpleDataset):
_url = None # Dataset is retrieved from here if not cached
_archive_file = (None, None) # Archive name and checksum
_checksums = None # Checksum of archive contents
_verify_ssl = True # Verify SSL certificates when downloading from self._url
_namespace = None # Contains S3 namespace for self-hosted datasets

def __init__(self, root):
Expand All @@ -135,8 +77,7 @@ def _download_data(self):
else:
url = self._url
downloaded_file_path = download(url, path=self.root,
sha1_hash=archive_hash,
verify=self._verify_ssl)
sha1_hash=archive_hash)

if downloaded_file_path.lower().endswith('zip'):
with zipfile.ZipFile(downloaded_file_path, 'r') as zf:
Expand Down Expand Up @@ -644,7 +585,6 @@ class BakerVerb143(WordSimilarityEvaluationDataset):
'verb_similarity dataset.txt':
'd7e4820c7504cbae56898353e4d94e6408c330fc'
}
_verify_ssl = False # ie.technion.ac.il serves an invalid cert as of 2018-04-16

min = 0
max = 10
Expand Down

0 comments on commit 26f5717

Please sign in to comment.