Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Initial support for YOLOv3-tiny, 3x performance on edge TPU #1022

Closed
wants to merge 1 commit into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 8 additions & 2 deletions docs/docs/configuration/advanced.md
Original file line number Diff line number Diff line change
Expand Up @@ -112,7 +112,13 @@ detectors:
```yaml
model:
# Required: height of the trained model
height: 320
height: 416
# Required: width of the trained model
width: 320
width: 416
# Required: type of model (ssd or yolo)
model_type: 'yolo'
# Required: path of label map
label_path: '/labelmap.txt'
# Optional: (but required for yolo) - anchors, comma separated
anchors: '10,14, 23,27, 37,58, 81,82, 135,169, 344,319'
```
26 changes: 26 additions & 0 deletions docs/docs/configuration/objects.mdx
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,32 @@ Models for both CPU and EdgeTPU (Coral) are bundled in the image. You can use yo

You also need to update the model width/height in the config if they differ from the defaults.

You can also try improving the speed using a YOLOv3-tiny model, quantized to work on the edge TPU.

A compiled model exists [here](https://github.com/guichristmann/edge-tpu-tiny-yolo/tree/master/models)

Add it as a volume mount in your docker-compose file:
```yaml
volumes:
- /path/to/quant_coco-tiny-v3-relu_edgetpu.tflite:/edgetpu_model.tflite
```

And then set the configuration for the model in config.yml:

```yaml
model:
# Required: height of the trained model
height: 416
# Required: width of the trained model
width: 416
# Required: type of model (ssd or yolo)
model_type: 'yolo'
# Required: path of label map
label_path: '/labelmap.txt'
# Optional: (but required for yolo) - anchors, comma separated
anchors: '10,14, 23,27, 37,58, 81,82, 135,169, 344,319'
```

### Customizing the Labelmap

The labelmap can be customized to your needs. A common reason to do this is to combine multiple object types that are easily confused when you don't need to be as granular such as car/truck. You must retain the same number of labels, but you can change the names. To change:
Expand Down
5 changes: 3 additions & 2 deletions frigate/app.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@

logger = logging.getLogger(__name__)


class FrigateApp():
def __init__(self):
self.stop_event = mp.Event()
Expand Down Expand Up @@ -153,9 +154,9 @@ def start_detectors(self):

for name, detector in self.config.detectors.items():
if detector.type == 'cpu':
self.detectors[name] = EdgeTPUProcess(name, self.detection_queue, self.detection_out_events, model_shape, 'cpu', detector.num_threads)
self.detectors[name] = EdgeTPUProcess(name, self.detection_queue, self.detection_out_events, self.config.model, 'cpu', detector.num_threads)
if detector.type == 'edgetpu':
self.detectors[name] = EdgeTPUProcess(name, self.detection_queue, self.detection_out_events, model_shape, detector.device, detector.num_threads)
self.detectors[name] = EdgeTPUProcess(name, self.detection_queue, self.detection_out_events, self.config.model, detector.device, detector.num_threads)

def start_detected_frames_processor(self):
self.detected_frames_processor = TrackedObjectProcessor(self.config, self.mqtt_client, self.config.mqtt.topic_prefix,
Expand Down
27 changes: 24 additions & 3 deletions frigate/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -244,9 +244,12 @@ def ensure_zones_and_cameras_have_different_names(cameras):
vol.Optional('database', default={}): {
vol.Optional('path', default=os.path.join(CLIPS_DIR, 'frigate.db')): str
},
vol.Optional('model', default={'width': 320, 'height': 320}): {
vol.Optional('model', default={'width': 320, 'height': 320, 'model_type': 'ssd', 'label_path': '/labelmap.txt'}): {
vol.Required('width'): int,
vol.Required('height'): int
vol.Required('height'): int,
vol.Required('model_type') : vol.In(['ssd', 'yolo']),
vol.Required('label_path') : str,
vol.Optional('anchors', default="") : str
},
vol.Optional('detectors', default=DEFAULT_DETECTORS): DETECTORS_SCHEMA,
'mqtt': MQTT_SCHEMA,
Expand Down Expand Up @@ -288,6 +291,9 @@ class ModelConfig():
def __init__(self, config):
self._width = config['width']
self._height = config['height']
self._label_path = config['label_path']
self._model_type = config['model_type']
self._anchors = config['anchors']

@property
def width(self):
Expand All @@ -297,10 +303,25 @@ def width(self):
def height(self):
return self._height

@property
def label_path(self):
return self._label_path

@property
def model_type(self):
return self._model_type

@property
def anchors(self):
return self._anchors

def to_dict(self):
return {
'width': self.width,
'height': self.height
'height': self.height,
'label_path': self.label_path,
'model_type': self.model_type,
'anchors': self.anchors
}

class DetectorConfig():
Expand Down
146 changes: 133 additions & 13 deletions frigate/edgetpu.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,8 +27,10 @@ def load_labels(path, encoding='utf-8'):
Returns:
Dictionary mapping indices to labels.
"""
logger.warn(f"Loaded labels from {path}")
with open(path, 'r', encoding=encoding) as f:
lines = f.readlines()

if not lines:
return {}

Expand All @@ -44,13 +46,11 @@ def detect(self, tensor_input, threshold = .4):
pass

class LocalObjectDetector(ObjectDetector):
def __init__(self, tf_device=None, num_threads=3, labels=None):
def __init__(self, model_config, tf_device=None, num_threads=3):
self.fps = EventsPerSecond()
if labels is None:
self.labels = {}
else:
self.labels = load_labels(labels)

self.labels = load_labels(model_config.label_path)
self.model_config = model_config

device_config = {"device": "usb"}
if not tf_device is None:
device_config = {"device": tf_device}
Expand All @@ -76,7 +76,11 @@ def __init__(self, tf_device=None, num_threads=3, labels=None):

self.tensor_input_details = self.interpreter.get_input_details()
self.tensor_output_details = self.interpreter.get_output_details()


if model_config.anchors != "":
anchors = [float(x) for x in model_config.anchors.split(',')]
self.anchors = np.array(anchors).reshape(-1, 2)

def detect(self, tensor_input, threshold=.4):
detections = []

Expand All @@ -93,20 +97,136 @@ def detect(self, tensor_input, threshold=.4):
self.fps.update()
return detections

def sigmoid(self, x):
return 1. / (1 + np.exp(-x))

def detect_raw(self, tensor_input):
if self.model_config.model_type == "ssd":
raw_detections = self.detect_ssd(tensor_input)
elif self.model_config.model_type == "yolo":
raw_detections = self.detect_yolo(tensor_input)
else:
logger.error(f"Unsupported model type {self.model_config.model_type}")
raw_detections = []
return raw_detections


def get_interpreter_details(self):
# Get input and output tensor details
input_details = self.interpreter.get_input_details()
output_details = self.interpreter.get_output_details()
input_shape = input_details[0]["shape"]
return input_details, output_details, input_shape

# from util.py in https://github.com/guichristmann/edge-tpu-tiny-yolo
def featuresToBoxes(self, outputs, anchors, n_classes, net_input_shape):
grid_shape = outputs.shape[1:3]
n_anchors = len(anchors)

# Numpy screwaround to get the boxes in reasonable amount of time
grid_y = np.tile(np.arange(grid_shape[0]).reshape(-1, 1), grid_shape[0]).reshape(1, grid_shape[0], grid_shape[0], 1).astype(np.float32)
grid_x = grid_y.copy().T.reshape(1, grid_shape[0], grid_shape[1], 1).astype(np.float32)
outputs = outputs.reshape(1, grid_shape[0], grid_shape[1], n_anchors, -1)
_anchors = anchors.reshape(1, 1, 3, 2).astype(np.float32)

# Get box parameters from network output and apply transformations
bx = (self.sigmoid(outputs[..., 0]) + grid_x) / grid_shape[0]
by = (self.sigmoid(outputs[..., 1]) + grid_y) / grid_shape[1]
# Should these be inverted?
bw = np.multiply(_anchors[..., 0] / net_input_shape[1], np.exp(outputs[..., 2]))
bh = np.multiply(_anchors[..., 1] / net_input_shape[2], np.exp(outputs[..., 3]))

# Get the scores
scores = self.sigmoid(np.expand_dims(outputs[..., 4], -1)) * \
self.sigmoid(outputs[..., 5:])
scores = scores.reshape(-1, n_classes)

# TODO: some of these are probably not needed but I don't understand numpy magic well enough
bx = bx.flatten()
by = (by.flatten()) * 1
bw = bw.flatten()
bh = bh.flatten() * 1
half_bw = bw / 2.
half_bh = bh / 2.

tl_x = np.multiply(bx - half_bw, 1)
tl_y = np.multiply(by - half_bh, 1)
br_x = np.multiply(bx + half_bw, 1)
br_y = np.multiply(by + half_bh, 1)

# Get indices of boxes with score higher than threshold
indices = np.argwhere(scores >= 0.5)
selected_boxes = []
selected_scores = []
for i in indices:
i = tuple(i)
selected_boxes.append( ((tl_x[i[0]], tl_y[i[0]]), (br_x[i[0]], br_y[i[0]])) )
selected_scores.append(scores[i])

selected_boxes = np.array(selected_boxes)
selected_scores = np.array(selected_scores)
selected_classes = indices[:, 1]

return selected_boxes, selected_scores, selected_classes

def detect_ssd(self, tensor_input):
self.interpreter.set_tensor(self.tensor_input_details[0]['index'], tensor_input)
self.interpreter.invoke()
boxes = np.squeeze(self.interpreter.get_tensor(self.tensor_output_details[0]['index']))
label_codes = np.squeeze(self.interpreter.get_tensor(self.tensor_output_details[1]['index']))
scores = np.squeeze(self.interpreter.get_tensor(self.tensor_output_details[2]['index']))

# TODO: there's definitely a bug here. 20 seems to be artifically chosen and you'll get an indexing error if there's
# more than 20 results.
detections = np.zeros((20,6), np.float32)
for i, score in enumerate(scores):
detections[i] = [label_codes[i], score, boxes[i][0], boxes[i][1], boxes[i][2], boxes[i][3]]

return detections

def run_detector(name: str, detection_queue: mp.Queue, out_events: Dict[str, mp.Event], avg_speed, start, model_shape, tf_device, num_threads):
def detect_yolo(self, tensor_input):
input_details, output_details, net_input_shape = \
self.get_interpreter_details()

self.interpreter.set_tensor(self.tensor_input_details[0]['index'], tensor_input)
self.interpreter.invoke()

# for yolo, it's a little diffrent
out1 = self.interpreter.get_tensor(self.tensor_output_details[0]['index'])
out2 = self.interpreter.get_tensor(self.tensor_output_details[1]['index'])

# Dequantize output (tpu only)
o1_scale, o1_zero = self.tensor_output_details[0]['quantization']
out1 = (out1.astype(np.float32) - o1_zero) * o1_scale
o2_scale, o2_zero = self.tensor_output_details[1]['quantization']
out2 = (out2.astype(np.float32) - o2_zero) * o2_scale

num_classes = len(self.labels)
_boxes1, _scores1, _classes1 = self.featuresToBoxes(out1, self.anchors[[3, 4, 5]], len(self.labels), net_input_shape)
_boxes2, _scores2, _classes2 = self.featuresToBoxes(out2, self.anchors[[1, 2, 3]], len(self.labels), net_input_shape)

if _boxes1.shape[0] == 0:
_boxes1 = np.empty([0, 2, 2])
_scores1 = np.empty([0,])
_classes1 = np.empty([0,])
if _boxes2.shape[0] == 0:
_boxes2 = np.empty([0, 2, 2])
_scores2 = np.empty([0,])
_classes2 = np.empty([0,])
boxes = np.append(_boxes1, _boxes2, axis=0)
scores = np.append(_scores1, _scores2, axis=0)
label_codes = np.append(_classes1, _classes2, axis=0)

# TODO: there's definitely a bug here. 20 seems to be artifically chosen and you'll get an indexing error if there's
# more than 20 results.
detections = np.zeros((20,6), np.float32)
for i, score in enumerate(scores):
if i < 20:
detections[i] = [label_codes[i], score, boxes[i][0][1], boxes[i][0][0], boxes[i][1][1], boxes[i][1][0]]

return detections

def run_detector(name: str, detection_queue: mp.Queue, out_events: Dict[str, mp.Event], avg_speed, start, model_config, tf_device, num_threads):
threading.current_thread().name = f"detector:{name}"
logger = logging.getLogger(f"detector.{name}")
logger.info(f"Starting detection process: {os.getpid()}")
Expand All @@ -121,7 +241,7 @@ def receiveSignal(signalNumber, frame):
signal.signal(signal.SIGINT, receiveSignal)

frame_manager = SharedMemoryFrameManager()
object_detector = LocalObjectDetector(tf_device=tf_device, num_threads=num_threads)
object_detector = LocalObjectDetector(model_config, tf_device=tf_device, num_threads=num_threads)

outputs = {}
for name in out_events.keys():
Expand All @@ -140,7 +260,7 @@ def receiveSignal(signalNumber, frame):
connection_id = detection_queue.get(timeout=5)
except queue.Empty:
continue
input_frame = frame_manager.get(connection_id, (1,model_shape[0],model_shape[1],3))
input_frame = frame_manager.get(connection_id, (1, model_config.height, model_config.width,3))

if input_frame is None:
continue
Expand All @@ -156,16 +276,16 @@ def receiveSignal(signalNumber, frame):
avg_speed.value = (avg_speed.value*9 + duration)/10

class EdgeTPUProcess():
def __init__(self, name, detection_queue, out_events, model_shape, tf_device=None, num_threads=3):
def __init__(self, name, detection_queue, out_events, model_config, tf_device=None, num_threads=3):
self.name = name
self.out_events = out_events
self.detection_queue = detection_queue
self.avg_inference_speed = mp.Value('d', 0.01)
self.detection_start = mp.Value('d', 0.0)
self.detect_process = None
self.model_shape = model_shape
self.tf_device = tf_device
self.num_threads = num_threads
self.model_config = model_config
self.start_or_restart()

def stop(self):
Expand All @@ -181,7 +301,7 @@ def start_or_restart(self):
self.detection_start.value = 0.0
if (not self.detect_process is None) and self.detect_process.is_alive():
self.stop()
self.detect_process = mp.Process(target=run_detector, name=f"detector:{self.name}", args=(self.name, self.detection_queue, self.out_events, self.avg_inference_speed, self.detection_start, self.model_shape, self.tf_device, self.num_threads))
self.detect_process = mp.Process(target=run_detector, name=f"detector:{self.name}", args=(self.name, self.detection_queue, self.out_events, self.avg_inference_speed, self.detection_start, self.model_config, self.tf_device, self.num_threads))
self.detect_process.daemon = True
self.detect_process.start()

Expand Down