From 56e17120acd5757da2c1fd37e83d858b5a22907c Mon Sep 17 00:00:00 2001
From: Glenn Jocher <glenn.jocher@ultralytics.com>
Date: Tue, 18 Oct 2022 15:47:49 +0200
Subject: [PATCH 1/5] Update zipFile to context manager

---
 utils/dataloaders.py | 7 +++++--
 utils/downloads.py   | 4 ++--
 utils/general.py     | 6 ++++--
 3 files changed, 11 insertions(+), 6 deletions(-)

diff --git a/utils/dataloaders.py b/utils/dataloaders.py
index 5074d25ee268..420c57a8ffd9 100644
--- a/utils/dataloaders.py
+++ b/utils/dataloaders.py
@@ -1048,12 +1048,15 @@ def _find_yaml(dir):
         assert len(files) == 1, f'Multiple *.yaml files found: {files}, only 1 *.yaml file allowed in {dir}'
         return files[0]
 
-    def _unzip(self, path):
+    def _unzip(self, path, exclude=('.DS_Store', '__MACOSX')):
         # Unzip data.zip
         if not str(path).endswith('.zip'):  # path is data.yaml
             return False, None, path
         assert Path(path).is_file(), f'Error unzipping {path}, file not found'
-        ZipFile(path).extractall(path=path.parent)  # unzip
+        with ZipFile(path) as zipObj:
+            for f in zipObj.namelist():  # list all archived filenames in the zip
+                if all(x not in f for x in exclude):
+                    zipObj.extract(f, path=path.parent)
         dir = path.with_suffix('')  # dataset directory == zip name
         assert dir.is_dir(), f'Error unzipping {path}, {dir} not found. path/to/abc.zip MUST unzip to path/to/abc/'
         return True, str(dir), self._find_yaml(dir)  # zipped, data_dir, yaml_path
diff --git a/utils/downloads.py b/utils/downloads.py
index 60417c1f8835..1ac2056f19d2 100644
--- a/utils/downloads.py
+++ b/utils/downloads.py
@@ -143,7 +143,8 @@ def gdrive_download(id='16TiPfZj7htmTyhntwcZyEEAejOUxuT6m', file='tmp.zip'):
     # Unzip if archive
     if file.suffix == '.zip':
         print('unzipping... ', end='')
-        ZipFile(file).extractall(path=file.parent)  # unzip
+        with ZipFile(file) as zipObj:
+            zipObj.extractall(path=file.parent)  # unzip
         file.unlink()  # remove zip
 
     print(f'Done ({time.time() - t:.1f}s)')
@@ -157,7 +158,6 @@ def get_token(cookie="./cookie"):
                 return line.split()[-1]
     return ""
 
-
 # Google utils: https://cloud.google.com/storage/docs/reference/libraries ----------------------------------------------
 #
 #
diff --git a/utils/general.py b/utils/general.py
index 8ea0ad07ed13..3e857bb4a90b 100644
--- a/utils/general.py
+++ b/utils/general.py
@@ -511,7 +511,8 @@ def check_dataset(data, autodownload=True):
                 LOGGER.info(f'Downloading {s} to {f}...')
                 torch.hub.download_url_to_file(s, f)
                 Path(DATASETS_DIR).mkdir(parents=True, exist_ok=True)  # create root
-                ZipFile(f).extractall(path=DATASETS_DIR)  # unzip
+                with ZipFile(f) as zipObj:
+                    zipObj.extractall(path=DATASETS_DIR)  # unzip
                 Path(f).unlink()  # remove zip
                 r = None  # success
             elif s.startswith('bash '):  # bash script
@@ -601,7 +602,8 @@ def download_one(url, dir):
         if unzip and success and f.suffix in ('.zip', '.tar', '.gz'):
             LOGGER.info(f'Unzipping {f}...')
             if f.suffix == '.zip':
-                ZipFile(f).extractall(path=dir)  # unzip
+                with ZipFile(f) as zipObj:
+                    zipObj.extractall(path=dir)  # unzip
             elif f.suffix == '.tar':
                 os.system(f'tar xf {f} --directory {f.parent}')  # unzip
             elif f.suffix == '.gz':

From 1252f3f6865a8e8d786fbe76eff43fd9afcf21e8 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Tue, 18 Oct 2022 13:50:47 +0000
Subject: [PATCH 2/5] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 utils/downloads.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/utils/downloads.py b/utils/downloads.py
index 1ac2056f19d2..ba86a4fee756 100644
--- a/utils/downloads.py
+++ b/utils/downloads.py
@@ -158,6 +158,7 @@ def get_token(cookie="./cookie"):
                 return line.split()[-1]
     return ""
 
+
 # Google utils: https://cloud.google.com/storage/docs/reference/libraries ----------------------------------------------
 #
 #

From 7ed7eeef79ccfb4a174d35b31960a264c21d76b3 Mon Sep 17 00:00:00 2001
From: Glenn Jocher <glenn.jocher@ultralytics.com>
Date: Tue, 18 Oct 2022 15:57:58 +0200
Subject: [PATCH 3/5] Cleanup

---
 utils/dataloaders.py |  8 ++---
 utils/downloads.py   | 81 --------------------------------------------
 utils/general.py     | 16 ++++++---
 3 files changed, 14 insertions(+), 91 deletions(-)

diff --git a/utils/dataloaders.py b/utils/dataloaders.py
index 420c57a8ffd9..e6a7f8e1b322 100644
--- a/utils/dataloaders.py
+++ b/utils/dataloaders.py
@@ -17,7 +17,6 @@
 from pathlib import Path
 from threading import Thread
 from urllib.parse import urlparse
-from zipfile import ZipFile
 
 import numpy as np
 import torch
@@ -31,7 +30,7 @@
 from utils.augmentations import (Albumentations, augment_hsv, classify_albumentations, classify_transforms, copy_paste,
                                  cutout, letterbox, mixup, random_perspective)
 from utils.general import (DATASETS_DIR, LOGGER, NUM_THREADS, check_dataset, check_requirements, check_yaml, clean_str,
-                           cv2, is_colab, is_kaggle, segments2boxes, xyn2xy, xywh2xyxy, xywhn2xyxy, xyxy2xywhn)
+                           cv2, is_colab, is_kaggle, segments2boxes, xyn2xy, xywh2xyxy, xywhn2xyxy, xyxy2xywhn, unzip_file)
 from utils.torch_utils import torch_distributed_zero_first
 
 # Parameters
@@ -1053,10 +1052,7 @@ def _unzip(self, path, exclude=('.DS_Store', '__MACOSX')):
         if not str(path).endswith('.zip'):  # path is data.yaml
             return False, None, path
         assert Path(path).is_file(), f'Error unzipping {path}, file not found'
-        with ZipFile(path) as zipObj:
-            for f in zipObj.namelist():  # list all archived filenames in the zip
-                if all(x not in f for x in exclude):
-                    zipObj.extract(f, path=path.parent)
+        unzip_file(path, path=path.parent)
         dir = path.with_suffix('')  # dataset directory == zip name
         assert dir.is_dir(), f'Error unzipping {path}, {dir} not found. path/to/abc.zip MUST unzip to path/to/abc/'
         return True, str(dir), self._find_yaml(dir)  # zipped, data_dir, yaml_path
diff --git a/utils/downloads.py b/utils/downloads.py
index 1ac2056f19d2..21bb6608d5ba 100644
--- a/utils/downloads.py
+++ b/utils/downloads.py
@@ -5,12 +5,9 @@
 
 import logging
 import os
-import platform
 import subprocess
-import time
 import urllib
 from pathlib import Path
-from zipfile import ZipFile
 
 import requests
 import torch
@@ -109,81 +106,3 @@ def github_assets(repository, version='latest'):
                 error_msg=f'{file} missing, try downloading from https://github.com/{repo}/releases/{tag} or {url3}')
 
     return str(file)
-
-
-def gdrive_download(id='16TiPfZj7htmTyhntwcZyEEAejOUxuT6m', file='tmp.zip'):
-    # Downloads a file from Google Drive. from yolov5.utils.downloads import *; gdrive_download()
-    t = time.time()
-    file = Path(file)
-    cookie = Path('cookie')  # gdrive cookie
-    print(f'Downloading https://drive.google.com/uc?export=download&id={id} as {file}... ', end='')
-    if file.exists():
-        file.unlink()  # remove existing file
-    if cookie.exists():
-        cookie.unlink()  # remove existing cookie
-
-    # Attempt file download
-    out = "NUL" if platform.system() == "Windows" else "/dev/null"
-    os.system(f'curl -c ./cookie -s -L "drive.google.com/uc?export=download&id={id}" > {out}')
-    if os.path.exists('cookie'):  # large file
-        s = f'curl -Lb ./cookie "drive.google.com/uc?export=download&confirm={get_token()}&id={id}" -o {file}'
-    else:  # small file
-        s = f'curl -s -L -o {file} "drive.google.com/uc?export=download&id={id}"'
-    r = os.system(s)  # execute, capture return
-    if cookie.exists():
-        cookie.unlink()  # remove existing cookie
-
-    # Error check
-    if r != 0:
-        if file.exists():
-            file.unlink()  # remove partial
-        print('Download error ')  # raise Exception('Download error')
-        return r
-
-    # Unzip if archive
-    if file.suffix == '.zip':
-        print('unzipping... ', end='')
-        with ZipFile(file) as zipObj:
-            zipObj.extractall(path=file.parent)  # unzip
-        file.unlink()  # remove zip
-
-    print(f'Done ({time.time() - t:.1f}s)')
-    return r
-
-
-def get_token(cookie="./cookie"):
-    with open(cookie) as f:
-        for line in f:
-            if "download" in line:
-                return line.split()[-1]
-    return ""
-
-# Google utils: https://cloud.google.com/storage/docs/reference/libraries ----------------------------------------------
-#
-#
-# def upload_blob(bucket_name, source_file_name, destination_blob_name):
-#     # Uploads a file to a bucket
-#     # https://cloud.google.com/storage/docs/uploading-objects#storage-upload-object-python
-#
-#     storage_client = storage.Client()
-#     bucket = storage_client.get_bucket(bucket_name)
-#     blob = bucket.blob(destination_blob_name)
-#
-#     blob.upload_from_filename(source_file_name)
-#
-#     print('File {} uploaded to {}.'.format(
-#         source_file_name,
-#         destination_blob_name))
-#
-#
-# def download_blob(bucket_name, source_blob_name, destination_file_name):
-#     # Uploads a blob from a bucket
-#     storage_client = storage.Client()
-#     bucket = storage_client.get_bucket(bucket_name)
-#     blob = bucket.blob(source_blob_name)
-#
-#     blob.download_to_filename(destination_file_name)
-#
-#     print('Blob {} downloaded to {}.'.format(
-#         source_blob_name,
-#         destination_file_name))
diff --git a/utils/general.py b/utils/general.py
index 3e857bb4a90b..7742a8bb3efb 100644
--- a/utils/general.py
+++ b/utils/general.py
@@ -511,8 +511,7 @@ def check_dataset(data, autodownload=True):
                 LOGGER.info(f'Downloading {s} to {f}...')
                 torch.hub.download_url_to_file(s, f)
                 Path(DATASETS_DIR).mkdir(parents=True, exist_ok=True)  # create root
-                with ZipFile(f) as zipObj:
-                    zipObj.extractall(path=DATASETS_DIR)  # unzip
+                unzip_file(f, path=DATASETS_DIR)  # unzip
                 Path(f).unlink()  # remove zip
                 r = None  # success
             elif s.startswith('bash '):  # bash script
@@ -567,6 +566,16 @@ def yaml_save(file='data.yaml', data={}):
         yaml.safe_dump({k: str(v) if isinstance(v, Path) else v for k, v in data.items()}, f, sort_keys=False)
 
 
+def unzip_file(file, path=None, exclude=('.DS_Store', '__MACOSX')):
+    # Unzip a *.zip file to path/, excluding files containing strings in exclude list
+    if path is None:
+        path = Path(file).parent  # default path
+    with ZipFile(file) as zipObj:
+        for f in zipObj.namelist():  # list all archived filenames in the zip
+            if all(x not in f for x in exclude):
+                zipObj.extract(f, path=path)
+
+
 def url2file(url):
     # Convert URL to filename, i.e. https://url.com/file.txt?auth -> file.txt
     url = str(Path(url)).replace(':/', '://')  # Pathlib turns :// -> :/
@@ -602,8 +611,7 @@ def download_one(url, dir):
         if unzip and success and f.suffix in ('.zip', '.tar', '.gz'):
             LOGGER.info(f'Unzipping {f}...')
             if f.suffix == '.zip':
-                with ZipFile(f) as zipObj:
-                    zipObj.extractall(path=dir)  # unzip
+                unzip_file(f, dir) # unzip
             elif f.suffix == '.tar':
                 os.system(f'tar xf {f} --directory {f.parent}')  # unzip
             elif f.suffix == '.gz':

From 721d405f1b241ea0ed6aa967fa83a6d51cce5403 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Tue, 18 Oct 2022 13:59:00 +0000
Subject: [PATCH 4/5] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 utils/dataloaders.py | 3 ++-
 utils/general.py     | 2 +-
 2 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/utils/dataloaders.py b/utils/dataloaders.py
index e6a7f8e1b322..003a16a04890 100644
--- a/utils/dataloaders.py
+++ b/utils/dataloaders.py
@@ -30,7 +30,8 @@
 from utils.augmentations import (Albumentations, augment_hsv, classify_albumentations, classify_transforms, copy_paste,
                                  cutout, letterbox, mixup, random_perspective)
 from utils.general import (DATASETS_DIR, LOGGER, NUM_THREADS, check_dataset, check_requirements, check_yaml, clean_str,
-                           cv2, is_colab, is_kaggle, segments2boxes, xyn2xy, xywh2xyxy, xywhn2xyxy, xyxy2xywhn, unzip_file)
+                           cv2, is_colab, is_kaggle, segments2boxes, unzip_file, xyn2xy, xywh2xyxy, xywhn2xyxy,
+                           xyxy2xywhn)
 from utils.torch_utils import torch_distributed_zero_first
 
 # Parameters
diff --git a/utils/general.py b/utils/general.py
index 7742a8bb3efb..fb8484ce434e 100644
--- a/utils/general.py
+++ b/utils/general.py
@@ -611,7 +611,7 @@ def download_one(url, dir):
         if unzip and success and f.suffix in ('.zip', '.tar', '.gz'):
             LOGGER.info(f'Unzipping {f}...')
             if f.suffix == '.zip':
-                unzip_file(f, dir) # unzip
+                unzip_file(f, dir)  # unzip
             elif f.suffix == '.tar':
                 os.system(f'tar xf {f} --directory {f.parent}')  # unzip
             elif f.suffix == '.gz':

From c411015053fbc45495f4596549b9dee7f82f5d86 Mon Sep 17 00:00:00 2001
From: Glenn Jocher <glenn.jocher@ultralytics.com>
Date: Tue, 18 Oct 2022 16:01:19 +0200
Subject: [PATCH 5/5] Cleanup

---
 utils/dataloaders.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/utils/dataloaders.py b/utils/dataloaders.py
index e6a7f8e1b322..ec1d3a3b2df6 100644
--- a/utils/dataloaders.py
+++ b/utils/dataloaders.py
@@ -1047,7 +1047,7 @@ def _find_yaml(dir):
         assert len(files) == 1, f'Multiple *.yaml files found: {files}, only 1 *.yaml file allowed in {dir}'
         return files[0]
 
-    def _unzip(self, path, exclude=('.DS_Store', '__MACOSX')):
+    def _unzip(self, path):
         # Unzip data.zip
         if not str(path).endswith('.zip'):  # path is data.yaml
             return False, None, path