Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Update ZipFile to context manager #9843

Merged
merged 7 commits into from
Oct 18, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions utils/dataloaders.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,6 @@
from pathlib import Path
from threading import Thread
from urllib.parse import urlparse
from zipfile import ZipFile

import numpy as np
import torch
Expand All @@ -31,7 +30,8 @@
from utils.augmentations import (Albumentations, augment_hsv, classify_albumentations, classify_transforms, copy_paste,
cutout, letterbox, mixup, random_perspective)
from utils.general import (DATASETS_DIR, LOGGER, NUM_THREADS, check_dataset, check_requirements, check_yaml, clean_str,
cv2, is_colab, is_kaggle, segments2boxes, xyn2xy, xywh2xyxy, xywhn2xyxy, xyxy2xywhn)
cv2, is_colab, is_kaggle, segments2boxes, unzip_file, xyn2xy, xywh2xyxy, xywhn2xyxy,
xyxy2xywhn)
from utils.torch_utils import torch_distributed_zero_first

# Parameters
Expand Down Expand Up @@ -1053,7 +1053,7 @@ def _unzip(self, path):
if not str(path).endswith('.zip'): # path is data.yaml
return False, None, path
assert Path(path).is_file(), f'Error unzipping {path}, file not found'
ZipFile(path).extractall(path=path.parent) # unzip
unzip_file(path, path=path.parent)
dir = path.with_suffix('') # dataset directory == zip name
assert dir.is_dir(), f'Error unzipping {path}, {dir} not found. path/to/abc.zip MUST unzip to path/to/abc/'
return True, str(dir), self._find_yaml(dir) # zipped, data_dir, yaml_path
Expand Down
81 changes: 0 additions & 81 deletions utils/downloads.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,12 +5,9 @@

import logging
import os
import platform
import subprocess
import time
import urllib
from pathlib import Path
from zipfile import ZipFile

import requests
import torch
Expand Down Expand Up @@ -109,81 +106,3 @@ def github_assets(repository, version='latest'):
error_msg=f'{file} missing, try downloading from https://github.com/{repo}/releases/{tag} or {url3}')

return str(file)


def gdrive_download(id='16TiPfZj7htmTyhntwcZyEEAejOUxuT6m', file='tmp.zip'):
# Downloads a file from Google Drive. from yolov5.utils.downloads import *; gdrive_download()
t = time.time()
file = Path(file)
cookie = Path('cookie') # gdrive cookie
print(f'Downloading https://drive.google.com/uc?export=download&id={id} as {file}... ', end='')
if file.exists():
file.unlink() # remove existing file
if cookie.exists():
cookie.unlink() # remove existing cookie

# Attempt file download
out = "NUL" if platform.system() == "Windows" else "/dev/null"
os.system(f'curl -c ./cookie -s -L "drive.google.com/uc?export=download&id={id}" > {out}')
if os.path.exists('cookie'): # large file
s = f'curl -Lb ./cookie "drive.google.com/uc?export=download&confirm={get_token()}&id={id}" -o {file}'
else: # small file
s = f'curl -s -L -o {file} "drive.google.com/uc?export=download&id={id}"'
r = os.system(s) # execute, capture return
if cookie.exists():
cookie.unlink() # remove existing cookie

# Error check
if r != 0:
if file.exists():
file.unlink() # remove partial
print('Download error ') # raise Exception('Download error')
return r

# Unzip if archive
if file.suffix == '.zip':
print('unzipping... ', end='')
ZipFile(file).extractall(path=file.parent) # unzip
file.unlink() # remove zip

print(f'Done ({time.time() - t:.1f}s)')
return r


def get_token(cookie="./cookie"):
with open(cookie) as f:
for line in f:
if "download" in line:
return line.split()[-1]
return ""


# Google utils: https://cloud.google.com/storage/docs/reference/libraries ----------------------------------------------
#
#
# def upload_blob(bucket_name, source_file_name, destination_blob_name):
# # Uploads a file to a bucket
# # https://cloud.google.com/storage/docs/uploading-objects#storage-upload-object-python
#
# storage_client = storage.Client()
# bucket = storage_client.get_bucket(bucket_name)
# blob = bucket.blob(destination_blob_name)
#
# blob.upload_from_filename(source_file_name)
#
# print('File {} uploaded to {}.'.format(
# source_file_name,
# destination_blob_name))
#
#
# def download_blob(bucket_name, source_blob_name, destination_file_name):
# # Uploads a blob from a bucket
# storage_client = storage.Client()
# bucket = storage_client.get_bucket(bucket_name)
# blob = bucket.blob(source_blob_name)
#
# blob.download_to_filename(destination_file_name)
#
# print('Blob {} downloaded to {}.'.format(
# source_blob_name,
# destination_file_name))
14 changes: 12 additions & 2 deletions utils/general.py
Original file line number Diff line number Diff line change
Expand Up @@ -511,7 +511,7 @@ def check_dataset(data, autodownload=True):
LOGGER.info(f'Downloading {s} to {f}...')
torch.hub.download_url_to_file(s, f)
Path(DATASETS_DIR).mkdir(parents=True, exist_ok=True) # create root
ZipFile(f).extractall(path=DATASETS_DIR) # unzip
unzip_file(f, path=DATASETS_DIR) # unzip
Path(f).unlink() # remove zip
r = None # success
elif s.startswith('bash '): # bash script
Expand Down Expand Up @@ -566,6 +566,16 @@ def yaml_save(file='data.yaml', data={}):
yaml.safe_dump({k: str(v) if isinstance(v, Path) else v for k, v in data.items()}, f, sort_keys=False)


def unzip_file(file, path=None, exclude=('.DS_Store', '__MACOSX')):
# Unzip a *.zip file to path/, excluding files containing strings in exclude list
if path is None:
path = Path(file).parent # default path
with ZipFile(file) as zipObj:
for f in zipObj.namelist(): # list all archived filenames in the zip
if all(x not in f for x in exclude):
zipObj.extract(f, path=path)


def url2file(url):
# Convert URL to filename, i.e. https://url.com/file.txt?auth -> file.txt
url = str(Path(url)).replace(':/', '://') # Pathlib turns :// -> :/
Expand Down Expand Up @@ -601,7 +611,7 @@ def download_one(url, dir):
if unzip and success and f.suffix in ('.zip', '.tar', '.gz'):
LOGGER.info(f'Unzipping {f}...')
if f.suffix == '.zip':
ZipFile(f).extractall(path=dir) # unzip
unzip_file(f, dir) # unzip
elif f.suffix == '.tar':
os.system(f'tar xf {f} --directory {f.parent}') # unzip
elif f.suffix == '.gz':
Expand Down