Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Adding support for F1 Score. #1264

Open
wants to merge 13 commits into
base: main
Choose a base branch
from
14 changes: 13 additions & 1 deletion keras_retinanet/bin/evaluate.py
Original file line number Diff line number Diff line change
Expand Up @@ -154,7 +154,7 @@ def main(args=None):
from ..utils.coco_eval import evaluate_coco
evaluate_coco(generator, model, args.score_threshold)
else:
average_precisions, inference_time = evaluate(
average_precisions, inference_time, f1_scores = evaluate(
generator,
model,
iou_threshold=args.iou_threshold,
Expand All @@ -166,11 +166,19 @@ def main(args=None):
# print evaluation
total_instances = []
precisions = []
scores = []

for label, (average_precision, num_annotations) in average_precisions.items():
print('{:.0f} instances of class'.format(num_annotations),
generator.label_to_name(label), 'with average precision: {:.4f}'.format(average_precision))
total_instances.append(num_annotations)
precisions.append(average_precision)

for label, (f1_score, num_annotations) in f1_scores.items():
print('{:.0f} instances of class'.format(num_annotations),
generator.label_to_name(label), 'with F1 score: {:.4f}'.format(f1_score))
# total_instances.append(num_annotations)
scores.append(f1_score)

if sum(total_instances) == 0:
print('No test instances found.')
Expand All @@ -180,6 +188,10 @@ def main(args=None):

print('mAP using the weighted average of precisions among classes: {:.4f}'.format(sum([a * b for a, b in zip(total_instances, precisions)]) / sum(total_instances)))
print('mAP: {:.4f}'.format(sum(precisions) / sum(x > 0 for x in total_instances)))

print('mF1 using the weighted F1 scores among classes: {:.4f}'.format(sum([a * b for a, b in zip(total_instances, scores)]) / sum(total_instances)))
print('mF1: {:.4f}'.format(sum(scores) / sum(x > 0 for x in total_instances)))



if __name__ == '__main__':
Expand Down
30 changes: 25 additions & 5 deletions keras_retinanet/callbacks/eval.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@ def on_epoch_end(self, epoch, logs=None):
logs = logs or {}

# run evaluation
average_precisions, _ = evaluate(
average_precisions, _, f1_scores = evaluate(
self.generator,
self.model,
iou_threshold=self.iou_threshold,
Expand All @@ -72,6 +72,8 @@ def on_epoch_end(self, epoch, logs=None):
# compute per class average precision
total_instances = []
precisions = []
scores = []

for label, (average_precision, num_annotations) in average_precisions.items():
if self.verbose == 1:
print('{:.0f} instances of class'.format(num_annotations),
Expand All @@ -83,16 +85,34 @@ def on_epoch_end(self, epoch, logs=None):
else:
self.mean_ap = sum(precisions) / sum(x > 0 for x in total_instances)

# compute per class F1 score
for label, (f1_score, num_annotations) in f1_scores.items():
if self.verbose == 1:
print('{:.0f} instances of class'.format(num_annotations),
self.generator.label_to_name(label), ' with F1 score: {:.4f}'.format(f1_score))
total_instances.append(num_annotations)
scores.append(f1_score)

if self.weighted_average:
self.mean_f1_score = sum([a * b for a, b in zip(total_instances, scores)]) / sum(total_instances)
else:
self.mean_f1_score = sum(scores) / sum(x > 0 for x in total_instances)

if self.tensorboard:
import tensorflow as tf
if tf.version.VERSION < '2.0.0' and self.tensorboard.writer:
summary = tf.Summary()
summary_value = summary.value.add()
summary_value.simple_value = self.mean_ap
summary_value.tag = "mAP"
summary_value_map = summary.value.add()
summary_value_map.simple_value = self.mean_ap
summary_value_map.tag = "mAP"
summary_value_f1 = summary.value.add()
summary_value_f1.simple_value = self.mean_f1_score
summary_value_f1.tag = "mF1"
self.tensorboard.writer.add_summary(summary, epoch)

logs['mAP'] = self.mean_ap
logs['mF1'] = self.mean_f1_score

if self.verbose == 1:
print('mAP: {:.4f}'.format(self.mean_ap))
print('mAP: {:.4f}\nmF1: {:.4f}'.format(
self.mean_ap, self.mean_f1_score))
29 changes: 28 additions & 1 deletion keras_retinanet/utils/eval.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,28 @@ def _compute_ap(recall, precision):
return ap


def _compute_f1(recall, precision):
"""
# Arguments
recall: The recall curve (list).
precision: The precision curve (list).
# Returns
The F1 score
"""

# to calculate area under PR curve, look for points
# where X axis (recall) and Y axis(precision) have
# higher and identical values
# i = np.where((precision==precision.max()) & (recall==recall.max()) & (recall==precision))[0]

# and multiply precision and recall, sum precision
# and recall, divide each other and multiply for two
# f1 = np.sum(2 * (precision[i] * recall[i]) / (precision[i] + recall[i]), axis=0)

f1 = 2*((precision.max()*recall.max())/(precision.max()+recall.max()))
return f1 # f1.max()


def _get_detections(generator, model, score_threshold=0.05, max_detections=100, save_path=None):
""" Get the detections from the model using the generator.

Expand Down Expand Up @@ -174,6 +196,7 @@ def evaluate(
all_detections, all_inferences = _get_detections(generator, model, score_threshold=score_threshold, max_detections=max_detections, save_path=save_path)
all_annotations = _get_annotations(generator)
average_precisions = {}
f1_scores = {}

# all_detections = pickle.load(open('all_detections.pkl', 'rb'))
# all_annotations = pickle.load(open('all_annotations.pkl', 'rb'))
Expand Down Expand Up @@ -237,8 +260,12 @@ def evaluate(
# compute average precision
average_precision = _compute_ap(recall, precision)
average_precisions[label] = average_precision, num_annotations

# compute F1 scores
f1_score = _compute_f1(recall, precision)
f1_scores[label] = f1_score, num_annotations

# inference time
inference_time = np.sum(all_inferences) / generator.size()

return average_precisions, inference_time
return average_precisions, inference_time, f1_scores