Skip to content

Commit

Permalink
download() ThreadPool update (ultralytics#3027)
Browse files Browse the repository at this point in the history
* download() ThreadPool update

* update train image count

* cid + 1
  • Loading branch information
glenn-jocher committed May 4, 2021
1 parent 5189b3a commit 8cab907
Show file tree
Hide file tree
Showing 2 changed files with 12 additions and 4 deletions.
11 changes: 8 additions & 3 deletions data/objects365.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
# /yolov5

# train and val data as 1) directory: path/images/, 2) file: path/images.txt, or 3) list: [path1/images/, path2/images/]
train: ../datasets/objects365/images/train # 1.7 Million images
train: ../datasets/objects365/images/train # 1742289 images
val: ../datasets/objects365/images/val # 5570 images

# number of classes
Expand Down Expand Up @@ -72,17 +72,22 @@ download: |
# Download
url = "https://dorc.ks3-cn-beijing.ksyun.com/data-set/2020Objects365%E6%95%B0%E6%8D%AE%E9%9B%86/train/"
download([url + 'zhiyuan_objv2_train.tar.gz'], dir=dir) # annotations json
download([url + 'zhiyuan_objv2_train.tar.gz'], dir=dir, delete=False) # annotations json
download([url + f for f in [f'patch{i}.tar.gz' for i in range(51)]], dir=dir / 'images' / 'train',
curl=True, delete=False, threads=8)
# Move
train = dir / 'images' / 'train'
for f in tqdm(train.rglob('*.jpg'), desc=f'Moving images'):
f.rename(train / f.name) # move to /images/train
# Labels
coco = COCO(dir / 'zhiyuan_objv2_train.json')
names = [x["name"] for x in coco.loadCats(coco.getCatIds())]
for cid, cat in enumerate(names):
catIds = coco.getCatIds(catNms=[cat])
imgIds = coco.getImgIds(catIds=catIds)
for im in tqdm(coco.loadImgs(imgIds), desc=f'Class {cid}/{len(names)} {cat}'):
for im in tqdm(coco.loadImgs(imgIds), desc=f'Class {cid + 1}/{len(names)} {cat}'):
width, height = im["width"], im["height"]
path = Path(im["file_name"]) # image filename
try:
Expand Down
5 changes: 4 additions & 1 deletion utils/general.py
Original file line number Diff line number Diff line change
Expand Up @@ -217,7 +217,10 @@ def download_one(url, dir):
dir = Path(dir)
dir.mkdir(parents=True, exist_ok=True) # make directory
if threads > 1:
ThreadPool(threads).imap(lambda x: download_one(*x), zip(url, repeat(dir))) # multi-threaded
pool = ThreadPool(threads)
pool.imap(lambda x: download_one(*x), zip(url, repeat(dir))) # multi-threaded
pool.close()
pool.join()
else:
for u in tuple(url) if isinstance(url, str) else url:
download_one(u, dir)
Expand Down

0 comments on commit 8cab907

Please sign in to comment.