ultralytics · wang-xinyu · Jan 2, 2020
diff --git a/gen_anchors.py b/gen_anchors.py
@@ -0,0 +1,53 @@
+import glob
+import os
+import argparse
+import numpy as np
+from utils.kmeans import kmeans, avg_iou
+from utils.parse_config import parse_data_cfg
+
+img_formats = ['.bmp', '.jpg', '.jpeg', '.png', '.tif']
+
+def load_dataset(path):
+    img_files = []
+    with open(path, 'r') as f:
+        img_files = [x.replace('/', os.sep) for x in f.read().splitlines()  # os-agnostic
+            if os.path.splitext(x)[-1].lower() in img_formats]
+    label_files = [x.replace('images', 'labels').replace(os.path.splitext(x)[-1], '.txt')
+                      for x in img_files]
+    dataset = np.empty(shape=[0, 2])
+    for label_path in label_files:
+        if os.path.isfile(label_path):
+            with open(label_path, 'r') as f:
+                x = np.array([x.split() for x in f.read().splitlines()], dtype=np.float32)
+
+            if x.size > 0:
+                x = x[:, 3:]
+                dataset = np.append(dataset, x, axis=0)
+    return dataset
+
+def gen_anchors():
+    data = opt.data
+    img_size = opt.img_size
+    clusters = opt.clusters
+
+    data_dict = parse_data_cfg(data)
+    train_path = data_dict['train']
+
+    data = load_dataset(train_path)
+    out = kmeans(data, k=clusters)
+    print("Accuracy: {:.2f}%".format(avg_iou(data, out) * 100))
+    a = sorted(out * img_size, key = lambda x: x[0] * x[1])
+    print("Sorted Boxes:\n {}".format(a))
+
+    ratios = np.around(out[:, 0] / out[:, 1], decimals=2).tolist()
+    print("Ratios:\n {}".format(sorted(ratios)))
+
+if __name__ == '__main__':
+    parser = argparse.ArgumentParser()
+    parser.add_argument('--img-size', type=int, default=416, help='inference size (pixels)')
+    parser.add_argument('--clusters', type=int, default=9, help='num of clusters for k-means')
+    parser.add_argument('--data', type=str, default='data/coco2017.data', help='*.data path')
+    opt = parser.parse_args()
+    print(opt)
+
+    gen_anchors()
diff --git a/utils/kmeans.py b/utils/kmeans.py
@@ -0,0 +1,104 @@
+"""
+MIT License
+
+Copyright (c) 2018 Lars Nieradzik
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
+"""
+
+import numpy as np
+
+
+def iou(box, clusters):
+    """
+    Calculates the Intersection over Union (IoU) between a box and k clusters.
+    :param box: tuple or array, shifted to the origin (i. e. width and height)
+    :param clusters: numpy array of shape (k, 2) where k is the number of clusters
+    :return: numpy array of shape (k, 0) where k is the number of clusters
+    """
+    x = np.minimum(clusters[:, 0], box[0])
+    y = np.minimum(clusters[:, 1], box[1])
+    if np.count_nonzero(x == 0) > 0 or np.count_nonzero(y == 0) > 0:
+        raise ValueError("Box has no area")
+
+    intersection = x * y
+    box_area = box[0] * box[1]
+    cluster_area = clusters[:, 0] * clusters[:, 1]
+
+    iou_ = intersection / (box_area + cluster_area - intersection)
+
+    return iou_
+
+
+def avg_iou(boxes, clusters):
+    """
+    Calculates the average Intersection over Union (IoU) between a numpy array of boxes and k clusters.
+    :param boxes: numpy array of shape (r, 2), where r is the number of rows
+    :param clusters: numpy array of shape (k, 2) where k is the number of clusters
+    :return: average IoU as a single float
+    """
+    return np.mean([np.max(iou(boxes[i], clusters)) for i in range(boxes.shape[0])])
+
+
+def translate_boxes(boxes):
+    """
+    Translates all the boxes to the origin.
+    :param boxes: numpy array of shape (r, 4)
+    :return: numpy array of shape (r, 2)
+    """
+    new_boxes = boxes.copy()
+    for row in range(new_boxes.shape[0]):
+        new_boxes[row][2] = np.abs(new_boxes[row][2] - new_boxes[row][0])
+        new_boxes[row][3] = np.abs(new_boxes[row][3] - new_boxes[row][1])
+    return np.delete(new_boxes, [0, 1], axis=1)
+
+
+def kmeans(boxes, k, dist=np.median):
+    """
+    Calculates k-means clustering with the Intersection over Union (IoU) metric.
+    :param boxes: numpy array of shape (r, 2), where r is the number of rows
+    :param k: number of clusters
+    :param dist: distance function
+    :return: numpy array of shape (k, 2)
+    """
+    rows = boxes.shape[0]
+
+    distances = np.empty((rows, k))
+    last_clusters = np.zeros((rows,))
+
+    np.random.seed()
+
+    # the Forgy method will fail if the whole array contains the same rows
+    clusters = boxes[np.random.choice(rows, k, replace=False)]
+
+    while True:
+        for row in range(rows):
+            distances[row] = 1 - iou(boxes[row], clusters)
+
+        nearest_clusters = np.argmin(distances, axis=1)
+
+        if (last_clusters == nearest_clusters).all():
+            break
+
+        for cluster in range(k):
+            clusters[cluster] = dist(boxes[nearest_clusters == cluster], axis=0)
+
+        last_clusters = nearest_clusters
+
+    return clusters