From 56e17120acd5757da2c1fd37e83d858b5a22907c Mon Sep 17 00:00:00 2001 From: Glenn Jocher Date: Tue, 18 Oct 2022 15:47:49 +0200 Subject: [PATCH 1/5] Update zipFile to context manager --- utils/dataloaders.py | 7 +++++-- utils/downloads.py | 4 ++-- utils/general.py | 6 ++++-- 3 files changed, 11 insertions(+), 6 deletions(-) diff --git a/utils/dataloaders.py b/utils/dataloaders.py index 5074d25ee268..420c57a8ffd9 100644 --- a/utils/dataloaders.py +++ b/utils/dataloaders.py @@ -1048,12 +1048,15 @@ def _find_yaml(dir): assert len(files) == 1, f'Multiple *.yaml files found: {files}, only 1 *.yaml file allowed in {dir}' return files[0] - def _unzip(self, path): + def _unzip(self, path, exclude=('.DS_Store', '__MACOSX')): # Unzip data.zip if not str(path).endswith('.zip'): # path is data.yaml return False, None, path assert Path(path).is_file(), f'Error unzipping {path}, file not found' - ZipFile(path).extractall(path=path.parent) # unzip + with ZipFile(path) as zipObj: + for f in zipObj.namelist(): # list all archived filenames in the zip + if all(x not in f for x in exclude): + zipObj.extract(f, path=path.parent) dir = path.with_suffix('') # dataset directory == zip name assert dir.is_dir(), f'Error unzipping {path}, {dir} not found. path/to/abc.zip MUST unzip to path/to/abc/' return True, str(dir), self._find_yaml(dir) # zipped, data_dir, yaml_path diff --git a/utils/downloads.py b/utils/downloads.py index 60417c1f8835..1ac2056f19d2 100644 --- a/utils/downloads.py +++ b/utils/downloads.py @@ -143,7 +143,8 @@ def gdrive_download(id='16TiPfZj7htmTyhntwcZyEEAejOUxuT6m', file='tmp.zip'): # Unzip if archive if file.suffix == '.zip': print('unzipping... ', end='') - ZipFile(file).extractall(path=file.parent) # unzip + with ZipFile(file) as zipObj: + zipObj.extractall(path=file.parent) # unzip file.unlink() # remove zip print(f'Done ({time.time() - t:.1f}s)') @@ -157,7 +158,6 @@ def get_token(cookie="./cookie"): return line.split()[-1] return "" - # Google utils: https://cloud.google.com/storage/docs/reference/libraries ---------------------------------------------- # # diff --git a/utils/general.py b/utils/general.py index 8ea0ad07ed13..3e857bb4a90b 100644 --- a/utils/general.py +++ b/utils/general.py @@ -511,7 +511,8 @@ def check_dataset(data, autodownload=True): LOGGER.info(f'Downloading {s} to {f}...') torch.hub.download_url_to_file(s, f) Path(DATASETS_DIR).mkdir(parents=True, exist_ok=True) # create root - ZipFile(f).extractall(path=DATASETS_DIR) # unzip + with ZipFile(f) as zipObj: + zipObj.extractall(path=DATASETS_DIR) # unzip Path(f).unlink() # remove zip r = None # success elif s.startswith('bash '): # bash script @@ -601,7 +602,8 @@ def download_one(url, dir): if unzip and success and f.suffix in ('.zip', '.tar', '.gz'): LOGGER.info(f'Unzipping {f}...') if f.suffix == '.zip': - ZipFile(f).extractall(path=dir) # unzip + with ZipFile(f) as zipObj: + zipObj.extractall(path=dir) # unzip elif f.suffix == '.tar': os.system(f'tar xf {f} --directory {f.parent}') # unzip elif f.suffix == '.gz': From 1252f3f6865a8e8d786fbe76eff43fd9afcf21e8 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Tue, 18 Oct 2022 13:50:47 +0000 Subject: [PATCH 2/5] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- utils/downloads.py | 1 + 1 file changed, 1 insertion(+) diff --git a/utils/downloads.py b/utils/downloads.py index 1ac2056f19d2..ba86a4fee756 100644 --- a/utils/downloads.py +++ b/utils/downloads.py @@ -158,6 +158,7 @@ def get_token(cookie="./cookie"): return line.split()[-1] return "" + # Google utils: https://cloud.google.com/storage/docs/reference/libraries ---------------------------------------------- # # From 7ed7eeef79ccfb4a174d35b31960a264c21d76b3 Mon Sep 17 00:00:00 2001 From: Glenn Jocher Date: Tue, 18 Oct 2022 15:57:58 +0200 Subject: [PATCH 3/5] Cleanup --- utils/dataloaders.py | 8 ++--- utils/downloads.py | 81 -------------------------------------------- utils/general.py | 16 ++++++--- 3 files changed, 14 insertions(+), 91 deletions(-) diff --git a/utils/dataloaders.py b/utils/dataloaders.py index 420c57a8ffd9..e6a7f8e1b322 100644 --- a/utils/dataloaders.py +++ b/utils/dataloaders.py @@ -17,7 +17,6 @@ from pathlib import Path from threading import Thread from urllib.parse import urlparse -from zipfile import ZipFile import numpy as np import torch @@ -31,7 +30,7 @@ from utils.augmentations import (Albumentations, augment_hsv, classify_albumentations, classify_transforms, copy_paste, cutout, letterbox, mixup, random_perspective) from utils.general import (DATASETS_DIR, LOGGER, NUM_THREADS, check_dataset, check_requirements, check_yaml, clean_str, - cv2, is_colab, is_kaggle, segments2boxes, xyn2xy, xywh2xyxy, xywhn2xyxy, xyxy2xywhn) + cv2, is_colab, is_kaggle, segments2boxes, xyn2xy, xywh2xyxy, xywhn2xyxy, xyxy2xywhn, unzip_file) from utils.torch_utils import torch_distributed_zero_first # Parameters @@ -1053,10 +1052,7 @@ def _unzip(self, path, exclude=('.DS_Store', '__MACOSX')): if not str(path).endswith('.zip'): # path is data.yaml return False, None, path assert Path(path).is_file(), f'Error unzipping {path}, file not found' - with ZipFile(path) as zipObj: - for f in zipObj.namelist(): # list all archived filenames in the zip - if all(x not in f for x in exclude): - zipObj.extract(f, path=path.parent) + unzip_file(path, path=path.parent) dir = path.with_suffix('') # dataset directory == zip name assert dir.is_dir(), f'Error unzipping {path}, {dir} not found. path/to/abc.zip MUST unzip to path/to/abc/' return True, str(dir), self._find_yaml(dir) # zipped, data_dir, yaml_path diff --git a/utils/downloads.py b/utils/downloads.py index 1ac2056f19d2..21bb6608d5ba 100644 --- a/utils/downloads.py +++ b/utils/downloads.py @@ -5,12 +5,9 @@ import logging import os -import platform import subprocess -import time import urllib from pathlib import Path -from zipfile import ZipFile import requests import torch @@ -109,81 +106,3 @@ def github_assets(repository, version='latest'): error_msg=f'{file} missing, try downloading from https://github.com/{repo}/releases/{tag} or {url3}') return str(file) - - -def gdrive_download(id='16TiPfZj7htmTyhntwcZyEEAejOUxuT6m', file='tmp.zip'): - # Downloads a file from Google Drive. from yolov5.utils.downloads import *; gdrive_download() - t = time.time() - file = Path(file) - cookie = Path('cookie') # gdrive cookie - print(f'Downloading https://drive.google.com/uc?export=download&id={id} as {file}... ', end='') - if file.exists(): - file.unlink() # remove existing file - if cookie.exists(): - cookie.unlink() # remove existing cookie - - # Attempt file download - out = "NUL" if platform.system() == "Windows" else "/dev/null" - os.system(f'curl -c ./cookie -s -L "drive.google.com/uc?export=download&id={id}" > {out}') - if os.path.exists('cookie'): # large file - s = f'curl -Lb ./cookie "drive.google.com/uc?export=download&confirm={get_token()}&id={id}" -o {file}' - else: # small file - s = f'curl -s -L -o {file} "drive.google.com/uc?export=download&id={id}"' - r = os.system(s) # execute, capture return - if cookie.exists(): - cookie.unlink() # remove existing cookie - - # Error check - if r != 0: - if file.exists(): - file.unlink() # remove partial - print('Download error ') # raise Exception('Download error') - return r - - # Unzip if archive - if file.suffix == '.zip': - print('unzipping... ', end='') - with ZipFile(file) as zipObj: - zipObj.extractall(path=file.parent) # unzip - file.unlink() # remove zip - - print(f'Done ({time.time() - t:.1f}s)') - return r - - -def get_token(cookie="./cookie"): - with open(cookie) as f: - for line in f: - if "download" in line: - return line.split()[-1] - return "" - -# Google utils: https://cloud.google.com/storage/docs/reference/libraries ---------------------------------------------- -# -# -# def upload_blob(bucket_name, source_file_name, destination_blob_name): -# # Uploads a file to a bucket -# # https://cloud.google.com/storage/docs/uploading-objects#storage-upload-object-python -# -# storage_client = storage.Client() -# bucket = storage_client.get_bucket(bucket_name) -# blob = bucket.blob(destination_blob_name) -# -# blob.upload_from_filename(source_file_name) -# -# print('File {} uploaded to {}.'.format( -# source_file_name, -# destination_blob_name)) -# -# -# def download_blob(bucket_name, source_blob_name, destination_file_name): -# # Uploads a blob from a bucket -# storage_client = storage.Client() -# bucket = storage_client.get_bucket(bucket_name) -# blob = bucket.blob(source_blob_name) -# -# blob.download_to_filename(destination_file_name) -# -# print('Blob {} downloaded to {}.'.format( -# source_blob_name, -# destination_file_name)) diff --git a/utils/general.py b/utils/general.py index 3e857bb4a90b..7742a8bb3efb 100644 --- a/utils/general.py +++ b/utils/general.py @@ -511,8 +511,7 @@ def check_dataset(data, autodownload=True): LOGGER.info(f'Downloading {s} to {f}...') torch.hub.download_url_to_file(s, f) Path(DATASETS_DIR).mkdir(parents=True, exist_ok=True) # create root - with ZipFile(f) as zipObj: - zipObj.extractall(path=DATASETS_DIR) # unzip + unzip_file(f, path=DATASETS_DIR) # unzip Path(f).unlink() # remove zip r = None # success elif s.startswith('bash '): # bash script @@ -567,6 +566,16 @@ def yaml_save(file='data.yaml', data={}): yaml.safe_dump({k: str(v) if isinstance(v, Path) else v for k, v in data.items()}, f, sort_keys=False) +def unzip_file(file, path=None, exclude=('.DS_Store', '__MACOSX')): + # Unzip a *.zip file to path/, excluding files containing strings in exclude list + if path is None: + path = Path(file).parent # default path + with ZipFile(file) as zipObj: + for f in zipObj.namelist(): # list all archived filenames in the zip + if all(x not in f for x in exclude): + zipObj.extract(f, path=path) + + def url2file(url): # Convert URL to filename, i.e. https://url.com/file.txt?auth -> file.txt url = str(Path(url)).replace(':/', '://') # Pathlib turns :// -> :/ @@ -602,8 +611,7 @@ def download_one(url, dir): if unzip and success and f.suffix in ('.zip', '.tar', '.gz'): LOGGER.info(f'Unzipping {f}...') if f.suffix == '.zip': - with ZipFile(f) as zipObj: - zipObj.extractall(path=dir) # unzip + unzip_file(f, dir) # unzip elif f.suffix == '.tar': os.system(f'tar xf {f} --directory {f.parent}') # unzip elif f.suffix == '.gz': From 721d405f1b241ea0ed6aa967fa83a6d51cce5403 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Tue, 18 Oct 2022 13:59:00 +0000 Subject: [PATCH 4/5] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- utils/dataloaders.py | 3 ++- utils/general.py | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/utils/dataloaders.py b/utils/dataloaders.py index e6a7f8e1b322..003a16a04890 100644 --- a/utils/dataloaders.py +++ b/utils/dataloaders.py @@ -30,7 +30,8 @@ from utils.augmentations import (Albumentations, augment_hsv, classify_albumentations, classify_transforms, copy_paste, cutout, letterbox, mixup, random_perspective) from utils.general import (DATASETS_DIR, LOGGER, NUM_THREADS, check_dataset, check_requirements, check_yaml, clean_str, - cv2, is_colab, is_kaggle, segments2boxes, xyn2xy, xywh2xyxy, xywhn2xyxy, xyxy2xywhn, unzip_file) + cv2, is_colab, is_kaggle, segments2boxes, unzip_file, xyn2xy, xywh2xyxy, xywhn2xyxy, + xyxy2xywhn) from utils.torch_utils import torch_distributed_zero_first # Parameters diff --git a/utils/general.py b/utils/general.py index 7742a8bb3efb..fb8484ce434e 100644 --- a/utils/general.py +++ b/utils/general.py @@ -611,7 +611,7 @@ def download_one(url, dir): if unzip and success and f.suffix in ('.zip', '.tar', '.gz'): LOGGER.info(f'Unzipping {f}...') if f.suffix == '.zip': - unzip_file(f, dir) # unzip + unzip_file(f, dir) # unzip elif f.suffix == '.tar': os.system(f'tar xf {f} --directory {f.parent}') # unzip elif f.suffix == '.gz': From c411015053fbc45495f4596549b9dee7f82f5d86 Mon Sep 17 00:00:00 2001 From: Glenn Jocher Date: Tue, 18 Oct 2022 16:01:19 +0200 Subject: [PATCH 5/5] Cleanup --- utils/dataloaders.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/utils/dataloaders.py b/utils/dataloaders.py index e6a7f8e1b322..ec1d3a3b2df6 100644 --- a/utils/dataloaders.py +++ b/utils/dataloaders.py @@ -1047,7 +1047,7 @@ def _find_yaml(dir): assert len(files) == 1, f'Multiple *.yaml files found: {files}, only 1 *.yaml file allowed in {dir}' return files[0] - def _unzip(self, path, exclude=('.DS_Store', '__MACOSX')): + def _unzip(self, path): # Unzip data.zip if not str(path).endswith('.zip'): # path is data.yaml return False, None, path