From 3d1d7a906ae472db1087f897ed2dec26e6a48af7 Mon Sep 17 00:00:00 2001 From: Glenn Jocher Date: Sat, 25 Sep 2021 08:52:36 -0700 Subject: [PATCH] Replace `os.system('unzip file.zip')` -> `ZipFile.extractall()` (#4919) * Replace `os.system('unzip file.zip')` -> `ZipFile.extractall()` * Cleanup --- utils/datasets.py | 5 +++-- utils/downloads.py | 5 +++-- utils/general.py | 18 ++++++++++-------- 3 files changed, 16 insertions(+), 12 deletions(-) diff --git a/utils/datasets.py b/utils/datasets.py index d253cb177b82..a54e29fd2908 100755 --- a/utils/datasets.py +++ b/utils/datasets.py @@ -15,6 +15,7 @@ from multiprocessing.pool import ThreadPool, Pool from pathlib import Path from threading import Thread +from zipfile import ZipFile import cv2 import numpy as np @@ -928,8 +929,8 @@ def unzip(path): # Unzip data.zip TODO: CONSTRAINT: path/to/abc.zip MUST unzip to 'path/to/abc/' if str(path).endswith('.zip'): # path is data.zip assert Path(path).is_file(), f'Error unzipping {path}, file not found' - assert os.system(f'unzip -q {path} -d {path.parent}') == 0, f'Error unzipping {path}' - dir = path.with_suffix('') # dataset directory + ZipFile(path).extractall(path=path.parent) # unzip + dir = path.with_suffix('') # dataset directory == zip name return True, str(dir), next(dir.rglob('*.yaml')) # zipped, data_dir, yaml_path else: # path is data.yaml return False, None, path diff --git a/utils/downloads.py b/utils/downloads.py index 27cb899cd606..eafa3b7ac309 100644 --- a/utils/downloads.py +++ b/utils/downloads.py @@ -9,6 +9,7 @@ import time import urllib from pathlib import Path +from zipfile import ZipFile import requests import torch @@ -104,8 +105,8 @@ def gdrive_download(id='16TiPfZj7htmTyhntwcZyEEAejOUxuT6m', file='tmp.zip'): # Unzip if archive if file.suffix == '.zip': print('unzipping... ', end='') - os.system(f'unzip -q {file}') # unzip - file.unlink() # remove zip to free space + ZipFile(file).extractall(path=file.parent) # unzip + file.unlink() # remove zip print(f'Done ({time.time() - t:.1f}s)') return r diff --git a/utils/general.py b/utils/general.py index dcaa3c71b3f5..2e2cdf389075 100755 --- a/utils/general.py +++ b/utils/general.py @@ -18,6 +18,7 @@ from multiprocessing.pool import ThreadPool from pathlib import Path from subprocess import check_output +from zipfile import ZipFile import cv2 import numpy as np @@ -353,17 +354,19 @@ def check_dataset(data, autodownload=True): if s and autodownload: # download script if s.startswith('http') and s.endswith('.zip'): # URL f = Path(s).name # filename - print(f'Downloading {s} ...') + print(f'Downloading {s} to {f}...') torch.hub.download_url_to_file(s, f) root = path.parent if 'path' in data else '..' # unzip directory i.e. '../' Path(root).mkdir(parents=True, exist_ok=True) # create root - r = os.system(f'unzip -q {f} -d {root} && rm {f}') # unzip + ZipFile(f).extractall(path=root) # unzip + Path(f).unlink() # remove zip + r = None # success elif s.startswith('bash '): # bash script print(f'Running {s} ...') r = os.system(s) else: # python script r = exec(s, {'yaml': data}) # return None - print('Dataset autodownload %s\n' % ('success' if r in (0, None) else 'failure')) # print result + print(f"Dataset autodownload {f'success, saved to {root}' if r in (0, None) else 'failure'}") else: raise Exception('Dataset not found.') @@ -393,12 +396,11 @@ def download_one(url, dir): if unzip and f.suffix in ('.zip', '.gz'): print(f'Unzipping {f}...') if f.suffix == '.zip': - s = f'unzip -qo {f} -d {dir}' # unzip -quiet -overwrite + ZipFile(f).extractall(path=dir) # unzip elif f.suffix == '.gz': - s = f'tar xfz {f} --directory {f.parent}' # unzip - if delete: # delete zip file after unzip - s += f' && rm {f}' - os.system(s) + os.system(f'tar xfz {f} --directory {f.parent}') # unzip + if delete: + f.unlink() # remove zip dir = Path(dir) dir.mkdir(parents=True, exist_ok=True) # make directory