From 55f55266590dd910f8199ee8b1e38c1d76b2975d Mon Sep 17 00:00:00 2001 From: Farley Lai <158540+farleylai@users.noreply.github.com> Date: Thu, 14 Oct 2021 17:29:12 -0700 Subject: [PATCH 1/3] Update Objects365.yaml Download the official Objects365 validation set and convert the labels --- data/Objects365.yaml | 58 ++++++++++++++++++++++++++------------------ 1 file changed, 34 insertions(+), 24 deletions(-) diff --git a/data/Objects365.yaml b/data/Objects365.yaml index dc5bfbc7faa4..3f9673f15294 100644 --- a/data/Objects365.yaml +++ b/data/Objects365.yaml @@ -72,33 +72,43 @@ download: | for q in 'train', 'val': (dir / p / q).mkdir(parents=True, exist_ok=True) - # Download - url = "https://dorc.ks3-cn-beijing.ksyun.com/data-set/2020Objects365%E6%95%B0%E6%8D%AE%E9%9B%86/train/" - download([url + 'zhiyuan_objv2_train.tar.gz'], dir=dir, delete=False) # annotations json - download([url + f for f in [f'patch{i}.tar.gz' for i in range(51)]], dir=dir / 'images' / 'train', - curl=True, delete=False, threads=8) + for split, patches in [('train', 50+1), ('val', 43+1)]: + print(f"Processing {split} in {patches} patches ...") - # Move - train = dir / 'images' / 'train' - for f in tqdm(train.rglob('*.jpg'), desc=f'Moving images'): - f.rename(train / f.name) # move to /images/train + # Download split + url = f"https://dorc.ks3-cn-beijing.ksyun.com/data-set/2020Objects365%E6%95%B0%E6%8D%AE%E9%9B%86/{split}/" + if split == 'train': + download([url + f'zhiyuan_objv2_{split}.tar.gz'], dir=dir, delete=False) # annotations json + download([url + f for f in [f'patch{i}.tar.gz' for i in range(patches)]], dir=dir / 'images' / split, + curl=True, delete=False, threads=8) + elif split == 'val': + download([url + f'zhiyuan_objv2_{split}.json'], dir=dir, delete=False) # annotations json + download([url + 'images/v1/' + f for f in [f'patch{i}.tar.gz' for i in range(15+1)]], dir=dir / 'images' / split, + curl=True, delete=False, threads=8) + download([url + 'images/v2/' + f for f in [f'patch{i}.tar.gz' for i in range(16, patches)]], dir=dir / 'images' / split, + curl=True, delete=False, threads=8) - # Labels - coco = COCO(dir / 'zhiyuan_objv2_train.json') - names = [x["name"] for x in coco.loadCats(coco.getCatIds())] - for cid, cat in enumerate(names): + # Move split images + splitP = dir / 'images' / split + for f in tqdm(splitP.rglob('*.jpg'), desc=f'Moving {split} images'): + f.rename(splitP / f.name) # move to /images/{split} + + # Split labels + coco = COCO(dir / f'zhiyuan_objv2_{split}.json') + names = [x["name"] for x in coco.loadCats(coco.getCatIds())] + for cid, cat in enumerate(names): catIds = coco.getCatIds(catNms=[cat]) imgIds = coco.getImgIds(catIds=catIds) for im in tqdm(coco.loadImgs(imgIds), desc=f'Class {cid + 1}/{len(names)} {cat}'): - width, height = im["width"], im["height"] - path = Path(im["file_name"]) # image filename - try: - with open(dir / 'labels' / 'train' / path.with_suffix('.txt').name, 'a') as file: - annIds = coco.getAnnIds(imgIds=im["id"], catIds=catIds, iscrowd=None) - for a in coco.loadAnns(annIds): - x, y, w, h = a['bbox'] # bounding box in xywh (xy top-left corner) - x, y = x + w / 2, y + h / 2 # xy to center - file.write(f"{cid} {x / width:.5f} {y / height:.5f} {w / width:.5f} {h / height:.5f}\n") + width, height = im["width"], im["height"] + path = Path(im["file_name"]) # image filename + try: + with open(dir / 'labels' / split / path.with_suffix('.txt').name, 'a') as file: + annIds = coco.getAnnIds(imgIds=im["id"], catIds=catIds, iscrowd=None) + for a in coco.loadAnns(annIds): + x, y, w, h = a['bbox'] # bounding box in xywh (xy top-left corner) + x, y = x + w / 2, y + h / 2 # xy to center + file.write(f"{cid} {x / width:.5f} {y / height:.5f} {w / width:.5f} {h / height:.5f}\n") - except Exception as e: - print(e) + except Exception as e: + print(e) From 5e4776670276db41410f4a7abc6da0836873428a Mon Sep 17 00:00:00 2001 From: Glenn Jocher Date: Thu, 14 Oct 2021 19:12:16 -0700 Subject: [PATCH 2/3] Enforce 4-space indent, reformat and cleanup --- data/Objects365.yaml | 72 ++++++++++++++++++++++---------------------- 1 file changed, 36 insertions(+), 36 deletions(-) diff --git a/data/Objects365.yaml b/data/Objects365.yaml index 3f9673f15294..0571fda50339 100644 --- a/data/Objects365.yaml +++ b/data/Objects365.yaml @@ -72,43 +72,43 @@ download: | for q in 'train', 'val': (dir / p / q).mkdir(parents=True, exist_ok=True) - for split, patches in [('train', 50+1), ('val', 43+1)]: - print(f"Processing {split} in {patches} patches ...") + for split, patches in [('train', 50 + 1), ('val', 43 + 1)]: + print(f"Processing {split} in {patches} patches ...") + images, labels = dir / 'images' / split, dir / 'labels' / split - # Download split - url = f"https://dorc.ks3-cn-beijing.ksyun.com/data-set/2020Objects365%E6%95%B0%E6%8D%AE%E9%9B%86/{split}/" - if split == 'train': - download([url + f'zhiyuan_objv2_{split}.tar.gz'], dir=dir, delete=False) # annotations json - download([url + f for f in [f'patch{i}.tar.gz' for i in range(patches)]], dir=dir / 'images' / split, - curl=True, delete=False, threads=8) - elif split == 'val': - download([url + f'zhiyuan_objv2_{split}.json'], dir=dir, delete=False) # annotations json - download([url + 'images/v1/' + f for f in [f'patch{i}.tar.gz' for i in range(15+1)]], dir=dir / 'images' / split, - curl=True, delete=False, threads=8) - download([url + 'images/v2/' + f for f in [f'patch{i}.tar.gz' for i in range(16, patches)]], dir=dir / 'images' / split, - curl=True, delete=False, threads=8) + # Download + url = f"https://dorc.ks3-cn-beijing.ksyun.com/data-set/2020Objects365%E6%95%B0%E6%8D%AE%E9%9B%86/{split}/" + if split == 'train': + download([url + f'zhiyuan_objv2_{split}.tar.gz'], dir=dir, delete=False) # annotations json + download([url + f for f in [f'patch{i}.tar.gz' for i in range(patches)]], + dir=images, curl=True, delete=False, threads=8) + elif split == 'val': + download([url + f'zhiyuan_objv2_{split}.json'], dir=dir, delete=False) # annotations json + download([url + 'images/v1/' + f for f in [f'patch{i}.tar.gz' for i in range(15 + 1)]], + dir=images, curl=True, delete=False, threads=8) + download([url + 'images/v2/' + f for f in [f'patch{i}.tar.gz' for i in range(16, patches)]], + dir=images, curl=True, delete=False, threads=8) - # Move split images - splitP = dir / 'images' / split - for f in tqdm(splitP.rglob('*.jpg'), desc=f'Moving {split} images'): - f.rename(splitP / f.name) # move to /images/{split} + # Move + for f in tqdm(images.rglob('*.jpg'), desc=f'Moving {split} images'): + f.rename(images / f.name) # move to /images/{split} - # Split labels - coco = COCO(dir / f'zhiyuan_objv2_{split}.json') - names = [x["name"] for x in coco.loadCats(coco.getCatIds())] - for cid, cat in enumerate(names): - catIds = coco.getCatIds(catNms=[cat]) - imgIds = coco.getImgIds(catIds=catIds) - for im in tqdm(coco.loadImgs(imgIds), desc=f'Class {cid + 1}/{len(names)} {cat}'): - width, height = im["width"], im["height"] - path = Path(im["file_name"]) # image filename - try: - with open(dir / 'labels' / split / path.with_suffix('.txt').name, 'a') as file: - annIds = coco.getAnnIds(imgIds=im["id"], catIds=catIds, iscrowd=None) - for a in coco.loadAnns(annIds): - x, y, w, h = a['bbox'] # bounding box in xywh (xy top-left corner) - x, y = x + w / 2, y + h / 2 # xy to center - file.write(f"{cid} {x / width:.5f} {y / height:.5f} {w / width:.5f} {h / height:.5f}\n") + # Labels + coco = COCO(dir / f'zhiyuan_objv2_{split}.json') + names = [x["name"] for x in coco.loadCats(coco.getCatIds())] + for cid, cat in enumerate(names): + catIds = coco.getCatIds(catNms=[cat]) + imgIds = coco.getImgIds(catIds=catIds) + for im in tqdm(coco.loadImgs(imgIds), desc=f'Class {cid + 1}/{len(names)} {cat}'): + width, height = im["width"], im["height"] + path = Path(im["file_name"]) # image filename + try: + with open(labels / path.with_suffix('.txt').name, 'a') as file: + annIds = coco.getAnnIds(imgIds=im["id"], catIds=catIds, iscrowd=None) + for a in coco.loadAnns(annIds): + x, y, w, h = a['bbox'] # bounding box in xywh (xy top-left corner) + x, y = x + w / 2, y + h / 2 # xy to center + file.write(f"{cid} {x / width:.5f} {y / height:.5f} {w / width:.5f} {h / height:.5f}\n") - except Exception as e: - print(e) + except Exception as e: + print(e) From 3f094dc75e8d39e15eb1ec0483786eae04c8b42a Mon Sep 17 00:00:00 2001 From: Glenn Jocher Date: Thu, 14 Oct 2021 19:34:06 -0700 Subject: [PATCH 3/3] shorten list comprehension --- data/Objects365.yaml | 27 ++++++++++++--------------- 1 file changed, 12 insertions(+), 15 deletions(-) diff --git a/data/Objects365.yaml b/data/Objects365.yaml index 0571fda50339..3ade4d683d67 100644 --- a/data/Objects365.yaml +++ b/data/Objects365.yaml @@ -62,37 +62,35 @@ names: ['Person', 'Sneakers', 'Chair', 'Other Shoes', 'Hat', 'Car', 'Lamp', 'Gla download: | from pycocotools.coco import COCO from tqdm import tqdm - + from utils.general import download, Path - + # Make Directories dir = Path(yaml['path']) # dataset root dir for p in 'images', 'labels': (dir / p).mkdir(parents=True, exist_ok=True) for q in 'train', 'val': (dir / p / q).mkdir(parents=True, exist_ok=True) - + + # Train, Val Splits for split, patches in [('train', 50 + 1), ('val', 43 + 1)]: print(f"Processing {split} in {patches} patches ...") images, labels = dir / 'images' / split, dir / 'labels' / split - + # Download url = f"https://dorc.ks3-cn-beijing.ksyun.com/data-set/2020Objects365%E6%95%B0%E6%8D%AE%E9%9B%86/{split}/" if split == 'train': - download([url + f'zhiyuan_objv2_{split}.tar.gz'], dir=dir, delete=False) # annotations json - download([url + f for f in [f'patch{i}.tar.gz' for i in range(patches)]], - dir=images, curl=True, delete=False, threads=8) + download([f'{url}zhiyuan_objv2_{split}.tar.gz'], dir=dir, delete=False) # annotations json + download([f'{url}patch{i}.tar.gz' for i in range(patches)], dir=images, curl=True, delete=False, threads=8) elif split == 'val': - download([url + f'zhiyuan_objv2_{split}.json'], dir=dir, delete=False) # annotations json - download([url + 'images/v1/' + f for f in [f'patch{i}.tar.gz' for i in range(15 + 1)]], - dir=images, curl=True, delete=False, threads=8) - download([url + 'images/v2/' + f for f in [f'patch{i}.tar.gz' for i in range(16, patches)]], - dir=images, curl=True, delete=False, threads=8) - + download([f'{url}zhiyuan_objv2_{split}.json'], dir=dir, delete=False) # annotations json + download([f'{url}images/v1/patch{i}.tar.gz' for i in range(15 + 1)], dir=images, curl=True, delete=False, threads=8) + download([f'{url}images/v2/patch{i}.tar.gz' for i in range(16, patches)], dir=images, curl=True, delete=False, threads=8) + # Move for f in tqdm(images.rglob('*.jpg'), desc=f'Moving {split} images'): f.rename(images / f.name) # move to /images/{split} - + # Labels coco = COCO(dir / f'zhiyuan_objv2_{split}.json') names = [x["name"] for x in coco.loadCats(coco.getCatIds())] @@ -109,6 +107,5 @@ download: | x, y, w, h = a['bbox'] # bounding box in xywh (xy top-left corner) x, y = x + w / 2, y + h / 2 # xy to center file.write(f"{cid} {x / width:.5f} {y / height:.5f} {w / width:.5f} {h / height:.5f}\n") - except Exception as e: print(e)