Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Robust scipy.cluster.vq.kmeans too few points #6668

Merged
merged 3 commits into from
Feb 17, 2022
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 7 additions & 6 deletions utils/autoanchor.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# YOLOv5 πŸš€ by Ultralytics, GPL-3.0 license
"""
Auto-anchor utils
AutoAnchor utils
"""

import random
Expand Down Expand Up @@ -81,6 +81,7 @@ def kmean_anchors(dataset='./data/coco128.yaml', n=9, img_size=640, thr=4.0, gen
"""
from scipy.cluster.vq import kmeans

npr = np.random
thr = 1 / thr

def metric(k, wh): # compute metrics
Expand Down Expand Up @@ -121,14 +122,15 @@ def print_results(k, verbose=True):
if i:
LOGGER.info(f'{PREFIX}WARNING: Extremely small objects found. {i} of {len(wh0)} labels are < 3 pixels in size.')
wh = wh0[(wh0 >= 2.0).any(1)] # filter > 2 pixels
# wh = wh * (np.random.rand(wh.shape[0], 1) * 0.9 + 0.1) # multiply by random scale 0-1
# wh = wh * (npr.rand(wh.shape[0], 1) * 0.9 + 0.1) # multiply by random scale 0-1

# Kmeans calculation
LOGGER.info(f'{PREFIX}Running kmeans for {n} anchors on {len(wh)} points...')
s = wh.std(0) # sigmas for whitening
k, dist = kmeans(wh / s, n, iter=30) # points, mean distance
assert len(k) == n, f'{PREFIX}ERROR: scipy.cluster.vq.kmeans requested {n} points but returned only {len(k)}'
k *= s
k = kmeans(wh / s, n, iter=30)[0] * s # points
if len(k) != n: # kmeans may return fewer points than requested if wh is insufficient or too similar
LOGGER.warning(f'{PREFIX}WARNING: scipy.cluster.vq.kmeans returned only {len(k)} of {n} requested points')
k = np.sort(npr.rand(n * 2)).reshape(n, 2) * img_size # random init
wh = torch.tensor(wh, dtype=torch.float32) # filtered
wh0 = torch.tensor(wh0, dtype=torch.float32) # unfiltered
k = print_results(k, verbose=False)
Expand All @@ -146,7 +148,6 @@ def print_results(k, verbose=True):
# fig.savefig('wh.png', dpi=200)

# Evolve
npr = np.random
f, sh, mp, s = anchor_fitness(k), k.shape, 0.9, 0.1 # fitness, generations, mutation prob, sigma
pbar = tqdm(range(gen), desc=f'{PREFIX}Evolving anchors with Genetic Algorithm:') # progress bar
for _ in pbar:
Expand Down