diff --git a/build/darknet/x64/cfg/Gaussian_yolov3_BDD.cfg b/build/darknet/x64/cfg/Gaussian_yolov3_BDD.cfg new file mode 100644 index 00000000000..2ca7ec600e3 --- /dev/null +++ b/build/darknet/x64/cfg/Gaussian_yolov3_BDD.cfg @@ -0,0 +1,807 @@ +[net] +# Testing +#batch=1 +#subdivisions=1 +# Training +batch=64 +subdivisions=16 +width=512 +height=512 +channels=3 +momentum=0.9 +decay=0.0005 +angle=0 +saturation = 1.5 +exposure = 1.5 +hue=.1 + +learning_rate=0.0001 +burn_in=1000 +max_batches = 500200 +policy=steps +steps=400000,450000 +scales=.1,.1 +max_epochs = 300 + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +# Downsample + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +# Downsample + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +# Downsample + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +# Downsample + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +# Downsample + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +###################### + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=1024 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=1024 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=1024 +activation=leaky + +[convolutional] +size=1 +stride=1 +pad=1 +filters=57 +activation=linear + + +[Gaussian_yolo] +mask = 6,7,8 +anchors = 7,10, 14,24, 27,43, 32,97, 57,64, 92,109, 73,175, 141,178, 144,291 +classes=10 +num=9 +jitter=.3 +ignore_thresh = .5 +truth_thresh = 1 +iou_thresh=0.213 +uc_normalizer=1.0 +cls_normalizer=1.0 +iou_normalizer=0.5 +iou_loss=giou +scale_x_y=1.0 +random=1 + + +[route] +layers = -4 + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[upsample] +stride=2 + +[route] +layers = -1, 61 + + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=512 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=512 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=512 +activation=leaky + +[convolutional] +size=1 +stride=1 +pad=1 +filters=57 +activation=linear + + +[Gaussian_yolo] +mask = 3,4,5 +anchors = 7,10, 14,24, 27,43, 32,97, 57,64, 92,109, 73,175, 141,178, 144,291 +classes=10 +num=9 +jitter=.3 +ignore_thresh = .5 +truth_thresh = 1 +iou_thresh=0.213 +uc_normalizer=1.0 +cls_normalizer=1.0 +iou_normalizer=0.5 +iou_loss=giou +scale_x_y=1.0 +random=1 + + + +[route] +layers = -4 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[upsample] +stride=2 + +[route] +layers = -1, 36 + + + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=256 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=256 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=256 +activation=leaky + +[convolutional] +size=1 +stride=1 +pad=1 +filters=57 +activation=linear + + +[Gaussian_yolo] +mask = 0,1,2 +anchors = 7,10, 14,24, 27,43, 32,97, 57,64, 92,109, 73,175, 141,178, 144,291 +classes=10 +num=9 +jitter=.3 +ignore_thresh = .5 +truth_thresh = 1 +iou_thresh=0.213 +uc_normalizer=1.0 +cls_normalizer=1.0 +iou_normalizer=0.5 +iou_loss=giou +scale_x_y=1.0 +random=1 diff --git a/cfg/Gaussian_yolov3_BDD.cfg b/cfg/Gaussian_yolov3_BDD.cfg new file mode 100644 index 00000000000..2ca7ec600e3 --- /dev/null +++ b/cfg/Gaussian_yolov3_BDD.cfg @@ -0,0 +1,807 @@ +[net] +# Testing +#batch=1 +#subdivisions=1 +# Training +batch=64 +subdivisions=16 +width=512 +height=512 +channels=3 +momentum=0.9 +decay=0.0005 +angle=0 +saturation = 1.5 +exposure = 1.5 +hue=.1 + +learning_rate=0.0001 +burn_in=1000 +max_batches = 500200 +policy=steps +steps=400000,450000 +scales=.1,.1 +max_epochs = 300 + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +# Downsample + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +# Downsample + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +# Downsample + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +# Downsample + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +# Downsample + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +###################### + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=1024 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=1024 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=1024 +activation=leaky + +[convolutional] +size=1 +stride=1 +pad=1 +filters=57 +activation=linear + + +[Gaussian_yolo] +mask = 6,7,8 +anchors = 7,10, 14,24, 27,43, 32,97, 57,64, 92,109, 73,175, 141,178, 144,291 +classes=10 +num=9 +jitter=.3 +ignore_thresh = .5 +truth_thresh = 1 +iou_thresh=0.213 +uc_normalizer=1.0 +cls_normalizer=1.0 +iou_normalizer=0.5 +iou_loss=giou +scale_x_y=1.0 +random=1 + + +[route] +layers = -4 + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[upsample] +stride=2 + +[route] +layers = -1, 61 + + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=512 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=512 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=512 +activation=leaky + +[convolutional] +size=1 +stride=1 +pad=1 +filters=57 +activation=linear + + +[Gaussian_yolo] +mask = 3,4,5 +anchors = 7,10, 14,24, 27,43, 32,97, 57,64, 92,109, 73,175, 141,178, 144,291 +classes=10 +num=9 +jitter=.3 +ignore_thresh = .5 +truth_thresh = 1 +iou_thresh=0.213 +uc_normalizer=1.0 +cls_normalizer=1.0 +iou_normalizer=0.5 +iou_loss=giou +scale_x_y=1.0 +random=1 + + + +[route] +layers = -4 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[upsample] +stride=2 + +[route] +layers = -1, 36 + + + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=256 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=256 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=256 +activation=leaky + +[convolutional] +size=1 +stride=1 +pad=1 +filters=57 +activation=linear + + +[Gaussian_yolo] +mask = 0,1,2 +anchors = 7,10, 14,24, 27,43, 32,97, 57,64, 92,109, 73,175, 141,178, 144,291 +classes=10 +num=9 +jitter=.3 +ignore_thresh = .5 +truth_thresh = 1 +iou_thresh=0.213 +uc_normalizer=1.0 +cls_normalizer=1.0 +iou_normalizer=0.5 +iou_loss=giou +scale_x_y=1.0 +random=1 diff --git a/include/darknet.h b/include/darknet.h index 2bd70d1cde3..7a906780c61 100644 --- a/include/darknet.h +++ b/include/darknet.h @@ -330,6 +330,7 @@ struct layer { float *weight_updates; float scale_x_y; + float uc_normalizer; float iou_normalizer; float cls_normalizer; IOU_LOSS iou_loss; diff --git a/src/gaussian_yolo_layer.c b/src/gaussian_yolo_layer.c index 7200fecd61b..16971437872 100644 --- a/src/gaussian_yolo_layer.c +++ b/src/gaussian_yolo_layer.c @@ -256,6 +256,26 @@ float delta_gaussian_yolo_box(box truth, float *x, float *biases, int n, int ind return iou; } +void averages_gaussian_yolo_deltas(int class_index, int box_index, int stride, int classes, float *delta) +{ + + int classes_in_one_box = 0; + int c; + for (c = 0; c < classes; ++c) { + if (delta[class_index + stride*c] > 0) classes_in_one_box++; + } + + if (classes_in_one_box > 0) { + delta[box_index + 0 * stride] /= classes_in_one_box; + delta[box_index + 1 * stride] /= classes_in_one_box; + delta[box_index + 2 * stride] /= classes_in_one_box; + delta[box_index + 3 * stride] /= classes_in_one_box; + delta[box_index + 4 * stride] /= classes_in_one_box; + delta[box_index + 5 * stride] /= classes_in_one_box; + delta[box_index + 6 * stride] /= classes_in_one_box; + delta[box_index + 7 * stride] /= classes_in_one_box; + } +} void delta_gaussian_yolo_class(float *output, float *delta, int index, int class_id, int classes, int stride, float *avg_cat) { @@ -469,25 +489,73 @@ void forward_gaussian_yolo_layer(const layer l, network_state state) int class_index = entry_gaussian_index(l, b, n*l.w*l.h + j*l.w + i, 9); const int stride = l.w*l.h; - int classes_in_one_box = 0; - for (n = 0; n < l.classes; ++n) { - if (l.delta[class_index + stride*n] > 0) classes_in_one_box++; - } + averages_gaussian_yolo_deltas(class_index, box_index, stride, l.classes, l.delta); + } + } + } + } + + + // calculate: Classification-loss, IoU-loss and Uncertainty-loss + const int stride = l.w*l.h; + float* classification_lost = (float *)calloc(l.batch * l.outputs, sizeof(float)); + memcpy(classification_lost, l.delta, l.batch * l.outputs * sizeof(float)); + - l.delta[box_index + 0 * stride] /= classes_in_one_box; - l.delta[box_index + 1 * stride] /= classes_in_one_box; - l.delta[box_index + 2 * stride] /= classes_in_one_box; - l.delta[box_index + 3 * stride] /= classes_in_one_box; - l.delta[box_index + 4 * stride] /= classes_in_one_box; - l.delta[box_index + 5 * stride] /= classes_in_one_box; - l.delta[box_index + 6 * stride] /= classes_in_one_box; - l.delta[box_index + 7 * stride] /= classes_in_one_box; + for (b = 0; b < l.batch; ++b) { + for (j = 0; j < l.h; ++j) { + for (i = 0; i < l.w; ++i) { + for (n = 0; n < l.n; ++n) { + int box_index = entry_gaussian_index(l, b, n*l.w*l.h + j*l.w + i, 0); + + classification_lost[box_index + 0 * stride] = 0; + classification_lost[box_index + 1 * stride] = 0; + classification_lost[box_index + 2 * stride] = 0; + classification_lost[box_index + 3 * stride] = 0; + classification_lost[box_index + 4 * stride] = 0; + classification_lost[box_index + 5 * stride] = 0; + classification_lost[box_index + 6 * stride] = 0; + classification_lost[box_index + 7 * stride] = 0; } } } } + float class_loss = pow(mag_array(classification_lost, l.outputs * l.batch), 2); + free(classification_lost); + + + float* except_uncertainty_lost = (float *)calloc(l.batch * l.outputs, sizeof(float)); + memcpy(except_uncertainty_lost, l.delta, l.batch * l.outputs * sizeof(float)); + for (b = 0; b < l.batch; ++b) { + for (j = 0; j < l.h; ++j) { + for (i = 0; i < l.w; ++i) { + for (n = 0; n < l.n; ++n) { + int box_index = entry_gaussian_index(l, b, n*l.w*l.h + j*l.w + i, 0); + except_uncertainty_lost[box_index + 4 * stride] = 0; + except_uncertainty_lost[box_index + 5 * stride] = 0; + except_uncertainty_lost[box_index + 6 * stride] = 0; + except_uncertainty_lost[box_index + 7 * stride] = 0; + } + } + } + } + float except_uc_loss = pow(mag_array(except_uncertainty_lost, l.outputs * l.batch), 2); + free(except_uncertainty_lost); + *(l.cost) = pow(mag_array(l.delta, l.outputs * l.batch), 2); - printf("Region %d Avg IOU: %f, Class: %f, Obj: %f, No Obj: %f, .5R: %f, .75R: %f, count: %d\n", state.index, avg_iou/count, avg_cat/class_count, avg_obj/count, avg_anyobj/(l.w*l.h*l.n*l.batch), recall/count, recall75/count, count); + + float loss = pow(mag_array(l.delta, l.outputs * l.batch), 2); + float uc_loss = loss - except_uc_loss; + float iou_loss = except_uc_loss - class_loss; + + loss /= l.batch; + class_loss /= l.batch; + uc_loss /= l.batch; + iou_loss /= l.batch; + + printf("Region %d Avg IOU: %f, Class: %f, Obj: %f, No Obj: %f, .5R: %f, .75R: %f, count: %d, loss = %.2f, class_loss = %.2f, iou_loss = %.2f, uc_loss = %.2f \n", + state.index, avg_iou/count, avg_cat/class_count, avg_obj/count, avg_anyobj/(l.w*l.h*l.n*l.batch), recall/count, recall75/count, count, + loss, class_loss, iou_loss, uc_loss); } void backward_gaussian_yolo_layer(const layer l, network_state state) diff --git a/src/yolo_layer.c b/src/yolo_layer.c index b663b73da74..34185937459 100644 --- a/src/yolo_layer.c +++ b/src/yolo_layer.c @@ -192,6 +192,23 @@ ious delta_yolo_box(box truth, float *x, float *biases, int n, int index, int i, return all_ious; } +void averages_yolo_deltas(int class_index, int box_index, int stride, int classes, float *delta) +{ + + int classes_in_one_box = 0; + int c; + for (c = 0; c < classes; ++c) { + if (delta[class_index + stride*c] > 0) classes_in_one_box++; + } + + if (classes_in_one_box > 0) { + delta[box_index + 0 * stride] /= classes_in_one_box; + delta[box_index + 1 * stride] /= classes_in_one_box; + delta[box_index + 2 * stride] /= classes_in_one_box; + delta[box_index + 3 * stride] /= classes_in_one_box; + } +} + void delta_yolo_class(float *output, float *delta, int index, int class_id, int classes, int stride, float *avg_cat, int focal_loss) { int n; @@ -436,19 +453,12 @@ void forward_yolo_layer(const layer l, network_state state) int class_index = entry_index(l, b, n*l.w*l.h + j*l.w + i, 4 + 1); const int stride = l.w*l.h; - int classes_in_one_box = 0; - for (n = 0; n < l.classes; ++n) { - if (l.delta[class_index + stride*n] > 0) classes_in_one_box++; - } - - l.delta[box_index + 0 * stride] /= classes_in_one_box; - l.delta[box_index + 1 * stride] /= classes_in_one_box; - l.delta[box_index + 2 * stride] /= classes_in_one_box; - l.delta[box_index + 3 * stride] /= classes_in_one_box; + averages_yolo_deltas(class_index, box_index, stride, l.classes, l.delta); } } } } + //*(l.cost) = pow(mag_array(l.delta, l.outputs * l.batch), 2); //printf("Region %d Avg IOU: %f, Class: %f, Obj: %f, No Obj: %f, .5R: %f, .75R: %f, count: %d\n", state.index, avg_iou / count, avg_cat / class_count, avg_obj / count, avg_anyobj / (l.w*l.h*l.n*l.batch), recall / count, recall75 / count, count);