Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Be able to create dataset from annotated images only #2466

Merged
merged 2 commits into from
Mar 15, 2021
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 11 additions & 7 deletions utils/datasets.py
Original file line number Diff line number Diff line change
Expand Up @@ -1032,20 +1032,24 @@ def extract_boxes(path='../coco128/'): # from utils.datasets import *; extract_
b[[1, 3]] = np.clip(b[[1, 3]], 0, h)
assert cv2.imwrite(str(f), im[b[1]:b[3], b[0]:b[2]]), f'box failure in {f}'


def autosplit(path='../coco128', weights=(0.9, 0.1, 0.0)): # from utils.datasets import *; autosplit('../coco128')
def autosplit(path='../coco128', weights=(0.9, 0.1, 0.0), annotated_only=False):
""" Autosplit a dataset into train/val/test splits and save path/autosplit_*.txt files
# Arguments
path: Path to images directory
weights: Train, val, test weights (list)
Usage: from utils.datasets import *; autosplit('../coco128')
Arguments
path: Path to images directory
weights: Train, val, test weights (list)
annotated_only: Only use images with an annotated txt file
"""
path = Path(path) # images dir
files = list(path.rglob('*.*'))
files = sum([list(path.rglob(f"*.{img_ext}")) for img_ext in img_formats], []) # image files only
n = len(files) # number of files
indices = random.choices([0, 1, 2], weights=weights, k=n) # assign each image to a split

txt = ['autosplit_train.txt', 'autosplit_val.txt', 'autosplit_test.txt'] # 3 txt files
[(path / x).unlink() for x in txt if (path / x).exists()] # remove existing

print(f'Autosplitting images from {path}' + ', using *.txt labeled images only' * annotated_only)
for i, img in tqdm(zip(indices, files), total=n):
if img.suffix[1:] in img_formats:
if not annotated_only or Path(img2label_paths([str(img)])[0]).exists(): # check label
with open(path / txt[i], 'a') as f:
f.write(str(img) + '\n') # add image to txt file