Skip to content

Commit

Permalink
Replace os.system('unzip file.zip') -> ZipFile.extractall() (ultr…
Browse files Browse the repository at this point in the history
…alytics#4919)

* Replace `os.system('unzip file.zip')` -> `ZipFile.extractall()`

* Cleanup
  • Loading branch information
glenn-jocher committed Sep 25, 2021
1 parent 22b07de commit 3d1d7a9
Show file tree
Hide file tree
Showing 3 changed files with 16 additions and 12 deletions.
5 changes: 3 additions & 2 deletions utils/datasets.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
from multiprocessing.pool import ThreadPool, Pool
from pathlib import Path
from threading import Thread
from zipfile import ZipFile

import cv2
import numpy as np
Expand Down Expand Up @@ -928,8 +929,8 @@ def unzip(path):
# Unzip data.zip TODO: CONSTRAINT: path/to/abc.zip MUST unzip to 'path/to/abc/'
if str(path).endswith('.zip'): # path is data.zip
assert Path(path).is_file(), f'Error unzipping {path}, file not found'
assert os.system(f'unzip -q {path} -d {path.parent}') == 0, f'Error unzipping {path}'
dir = path.with_suffix('') # dataset directory
ZipFile(path).extractall(path=path.parent) # unzip
dir = path.with_suffix('') # dataset directory == zip name
return True, str(dir), next(dir.rglob('*.yaml')) # zipped, data_dir, yaml_path
else: # path is data.yaml
return False, None, path
Expand Down
5 changes: 3 additions & 2 deletions utils/downloads.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
import time
import urllib
from pathlib import Path
from zipfile import ZipFile

import requests
import torch
Expand Down Expand Up @@ -104,8 +105,8 @@ def gdrive_download(id='16TiPfZj7htmTyhntwcZyEEAejOUxuT6m', file='tmp.zip'):
# Unzip if archive
if file.suffix == '.zip':
print('unzipping... ', end='')
os.system(f'unzip -q {file}') # unzip
file.unlink() # remove zip to free space
ZipFile(file).extractall(path=file.parent) # unzip
file.unlink() # remove zip

print(f'Done ({time.time() - t:.1f}s)')
return r
Expand Down
18 changes: 10 additions & 8 deletions utils/general.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
from multiprocessing.pool import ThreadPool
from pathlib import Path
from subprocess import check_output
from zipfile import ZipFile

import cv2
import numpy as np
Expand Down Expand Up @@ -353,17 +354,19 @@ def check_dataset(data, autodownload=True):
if s and autodownload: # download script
if s.startswith('http') and s.endswith('.zip'): # URL
f = Path(s).name # filename
print(f'Downloading {s} ...')
print(f'Downloading {s} to {f}...')
torch.hub.download_url_to_file(s, f)
root = path.parent if 'path' in data else '..' # unzip directory i.e. '../'
Path(root).mkdir(parents=True, exist_ok=True) # create root
r = os.system(f'unzip -q {f} -d {root} && rm {f}') # unzip
ZipFile(f).extractall(path=root) # unzip
Path(f).unlink() # remove zip
r = None # success
elif s.startswith('bash '): # bash script
print(f'Running {s} ...')
r = os.system(s)
else: # python script
r = exec(s, {'yaml': data}) # return None
print('Dataset autodownload %s\n' % ('success' if r in (0, None) else 'failure')) # print result
print(f"Dataset autodownload {f'success, saved to {root}' if r in (0, None) else 'failure'}")
else:
raise Exception('Dataset not found.')

Expand Down Expand Up @@ -393,12 +396,11 @@ def download_one(url, dir):
if unzip and f.suffix in ('.zip', '.gz'):
print(f'Unzipping {f}...')
if f.suffix == '.zip':
s = f'unzip -qo {f} -d {dir}' # unzip -quiet -overwrite
ZipFile(f).extractall(path=dir) # unzip
elif f.suffix == '.gz':
s = f'tar xfz {f} --directory {f.parent}' # unzip
if delete: # delete zip file after unzip
s += f' && rm {f}'
os.system(s)
os.system(f'tar xfz {f} --directory {f.parent}') # unzip
if delete:
f.unlink() # remove zip

dir = Path(dir)
dir.mkdir(parents=True, exist_ok=True) # make directory
Expand Down

0 comments on commit 3d1d7a9

Please sign in to comment.