diff --git a/gen_anchors.py b/gen_anchors.py new file mode 100644 index 0000000000..06fc380e69 --- /dev/null +++ b/gen_anchors.py @@ -0,0 +1,53 @@ +import glob +import os +import argparse +import numpy as np +from utils.kmeans import kmeans, avg_iou +from utils.parse_config import parse_data_cfg + +img_formats = ['.bmp', '.jpg', '.jpeg', '.png', '.tif'] + +def load_dataset(path): + img_files = [] + with open(path, 'r') as f: + img_files = [x.replace('/', os.sep) for x in f.read().splitlines() # os-agnostic + if os.path.splitext(x)[-1].lower() in img_formats] + label_files = [x.replace('images', 'labels').replace(os.path.splitext(x)[-1], '.txt') + for x in img_files] + dataset = np.empty(shape=[0, 2]) + for label_path in label_files: + if os.path.isfile(label_path): + with open(label_path, 'r') as f: + x = np.array([x.split() for x in f.read().splitlines()], dtype=np.float32) + + if x.size > 0: + x = x[:, 3:] + dataset = np.append(dataset, x, axis=0) + return dataset + +def gen_anchors(): + data = opt.data + img_size = opt.img_size + clusters = opt.clusters + + data_dict = parse_data_cfg(data) + train_path = data_dict['train'] + + data = load_dataset(train_path) + out = kmeans(data, k=clusters) + print("Accuracy: {:.2f}%".format(avg_iou(data, out) * 100)) + a = sorted(out * img_size, key = lambda x: x[0] * x[1]) + print("Sorted Boxes:\n {}".format(a)) + + ratios = np.around(out[:, 0] / out[:, 1], decimals=2).tolist() + print("Ratios:\n {}".format(sorted(ratios))) + +if __name__ == '__main__': + parser = argparse.ArgumentParser() + parser.add_argument('--img-size', type=int, default=416, help='inference size (pixels)') + parser.add_argument('--clusters', type=int, default=9, help='num of clusters for k-means') + parser.add_argument('--data', type=str, default='data/coco2017.data', help='*.data path') + opt = parser.parse_args() + print(opt) + + gen_anchors() diff --git a/utils/kmeans.py b/utils/kmeans.py new file mode 100644 index 0000000000..5b1ffa8cfe --- /dev/null +++ b/utils/kmeans.py @@ -0,0 +1,104 @@ +""" +MIT License + +Copyright (c) 2018 Lars Nieradzik + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. +""" + +import numpy as np + + +def iou(box, clusters): + """ + Calculates the Intersection over Union (IoU) between a box and k clusters. + :param box: tuple or array, shifted to the origin (i. e. width and height) + :param clusters: numpy array of shape (k, 2) where k is the number of clusters + :return: numpy array of shape (k, 0) where k is the number of clusters + """ + x = np.minimum(clusters[:, 0], box[0]) + y = np.minimum(clusters[:, 1], box[1]) + if np.count_nonzero(x == 0) > 0 or np.count_nonzero(y == 0) > 0: + raise ValueError("Box has no area") + + intersection = x * y + box_area = box[0] * box[1] + cluster_area = clusters[:, 0] * clusters[:, 1] + + iou_ = intersection / (box_area + cluster_area - intersection) + + return iou_ + + +def avg_iou(boxes, clusters): + """ + Calculates the average Intersection over Union (IoU) between a numpy array of boxes and k clusters. + :param boxes: numpy array of shape (r, 2), where r is the number of rows + :param clusters: numpy array of shape (k, 2) where k is the number of clusters + :return: average IoU as a single float + """ + return np.mean([np.max(iou(boxes[i], clusters)) for i in range(boxes.shape[0])]) + + +def translate_boxes(boxes): + """ + Translates all the boxes to the origin. + :param boxes: numpy array of shape (r, 4) + :return: numpy array of shape (r, 2) + """ + new_boxes = boxes.copy() + for row in range(new_boxes.shape[0]): + new_boxes[row][2] = np.abs(new_boxes[row][2] - new_boxes[row][0]) + new_boxes[row][3] = np.abs(new_boxes[row][3] - new_boxes[row][1]) + return np.delete(new_boxes, [0, 1], axis=1) + + +def kmeans(boxes, k, dist=np.median): + """ + Calculates k-means clustering with the Intersection over Union (IoU) metric. + :param boxes: numpy array of shape (r, 2), where r is the number of rows + :param k: number of clusters + :param dist: distance function + :return: numpy array of shape (k, 2) + """ + rows = boxes.shape[0] + + distances = np.empty((rows, k)) + last_clusters = np.zeros((rows,)) + + np.random.seed() + + # the Forgy method will fail if the whole array contains the same rows + clusters = boxes[np.random.choice(rows, k, replace=False)] + + while True: + for row in range(rows): + distances[row] = 1 - iou(boxes[row], clusters) + + nearest_clusters = np.argmin(distances, axis=1) + + if (last_clusters == nearest_clusters).all(): + break + + for cluster in range(k): + clusters[cluster] = dist(boxes[nearest_clusters == cluster], axis=0) + + last_clusters = nearest_clusters + + return clusters