diff --git a/README.md b/README.md
index 1f117ff..4b304ca 100644
--- a/README.md
+++ b/README.md
@@ -1,6 +1,6 @@
 # chainer-pose-proposal-net
 
-- This is an implementation of [Pose Proposal Networks](http://openaccess.thecvf.com/content_ECCV_2018/papers/Sekii_Pose_Proposal_Networks_ECCV_2018_paper.pdf) with Chainer including training and prediction tools.
+- This is an (unofficial) implementation of [Pose Proposal Networks](http://openaccess.thecvf.com/content_ECCV_2018/papers/Sekii_Pose_Proposal_Networks_ECCV_2018_paper.pdf) with Chainer including training and prediction tools.
 
 # License
 
@@ -19,31 +19,43 @@ This project is licensed under the terms of the [license](LICENSE).
 ### MPII
 
 - If you train with COCO dataset you can skip.
-- Access [MPII Human Pose Dataset](http://human-pose.mpi-inf.mpg.de/) and jump to `Download` page. Then download and extract both `Images (12.9 GB)` and `Annotations (12.5 MB)`.
+- Access [MPII Human Pose Dataset](http://human-pose.mpi-inf.mpg.de/) and jump to `Download` page. Then download and extract both `Images (12.9 GB)` and `Annotations (12.5 MB)` at `~/work/dataset/mpii_dataset` for example.
 
 #### Create `mpii.json`
 
 We need decode `mpii_human_pose_v1_u12_1.mat` to generate `mpii.json`. This will be used on training or evaluating test dataset of MPII.
 
 ```
-$ sudo docker run --rm -v $(pwd):/work -v path/to/dataset:/data -w /work idein/chainer:4.5.0 python3 convert_mpii_dataset.py /data/mpii_human_pose_v1_u12_2/mpii_human_pose_v1_u12_1.mat /data/mpii.json
+$ sudo docker run --rm -v $(pwd):/work -v path/to/dataset:mpii_dataset -w /work idein/chainer:4.5.0 python3 convert_mpii_dataset.py mpii_dataset/mpii_human_pose_v1_u12_2/mpii_human_pose_v1_u12_1.mat mpii_dataset/mpii.json
 ```
 
-It will generate `mpii.json` at `path/to/dataset` where is the root directory of MPII dataset. For those who hesitate to use Docker, you may edit `config.ini` as necessary.
+It will generate `mpii.json` at `path/to/dataset`. Where `path/to/dataset` is the root directory of MPII dataset, for example, `~/work/dataset/mpii_dataset`. For those who hesitate to use Docker, you may edit `config.ini` as necessary.
 
 ### COCO
 
 - If you train with MPII dataset you can skip.
-- Access [COCO dataset](http://cocodataset.org/) and jump to `Dataset` -> `download`. Then download and extract `2017 Train images [118K/18GB]`, `2017 Val images [5K/1GB]` and `2017 Train/Val annotations [241MB]`.
+- Access [COCO dataset](http://cocodataset.org/) and jump to `Dataset` -> `download`. Then download and extract `2017 Train images [118K/18GB]`, `2017 Val images [5K/1GB]` and `2017 Train/Val annotations [241MB]` at `~/work/dataset/coco_dataset:/coco_dataset` for example.
 
 ## Running Training Scripts
 
+OK let's begin!
+
 ```
-$ sudo docker run --rm -v $(pwd):/work -v path/to/dataset:/data -w /work idein/chainer:4.5.0 python3 train.py
+$ cat begin_train.sh
+cat config.ini
+docker run --rm \
+-v $(pwd):/work \
+-v ~/work/dataset/mpii_dataset:/mpii_dataset \
+-v ~/work/dataset/coco_dataset:/coco_dataset \
+--name ppn_idein \
+-w /work \
+idein/chainer:5.1.0 \
+python3 train.py
+$ sudo bash begin_train.sh
 ```
-
+
 - Optional argument `--runtime=nvidia` maybe require for some environment.
-- This will train a model the base network is MobileNetV2 with MPII dataset located in `path/to/dataset` on host machine.
+- It will train a model the base network is MobileNetV2 with MPII dataset located in `path/to/dataset` on host machine.
 - If we would like to train with COCO dataset, edit a part of `config.ini` as follow:
 
 before
@@ -81,10 +93,22 @@ model_name = resnet18
 
 # Prediction
 
-- Very easy, all we have to do is:
+- Very easy, all we have to do is, for example:
+
+```
+$ sudo bash run_predict.sh ./trained
+```
+
+- If you would like to configure parameter or hide bounding box, edit a part of `config.ini` as follow:
 
 ```
-$ sudo docker run --rm -v $(pwd):/work -v path/to/dataset:/data -w /work idein/chainer:4.5.0 python3 predict.py
+[predict]
+# If `False` is set, hide bbox of annotation other than human instance.
+visbbox = True
+# detection_thresh
+detection_thresh = 0.15
+# ignore human its num of keypoints is less than min_num_keypoints
+min_num_keypoints= 1
 ```
 
 # Demo: Realtime Pose Estimation
@@ -102,6 +126,10 @@ docker build -t ppn .
 $ sudo bash build.sh
 ```
 
+Here is an result of ResNet18 trained with COCO running on laptop PC.
+
+![](readmedata/cpu-example.gif)
+
 ## Run video.py
 
 - Set your USB camera that can recognize from OpenCV.
@@ -109,17 +137,17 @@ $ sudo bash build.sh
 - Run `video.py`
 
 ```
-$ python video.py
+$ python video.py ./trained
 ```
 
 or
 
 ```
-$ sudo bash run_video.sh
+$ sudo bash run_video.sh ./trained
 ```
 
 ## High Performance Version
-- To use feature of [Static Subgraph Optimizations](http://docs.chainer.org/en/stable/reference/static_graph_design.html) to accelerate inference speed, we should install Chainer 5.0.0 and CuPy 5.0.0 .
+- To use feature of [Static Subgraph Optimizations](http://docs.chainer.org/en/stable/reference/static_graph_design.html) to accelerate inference speed, we should install Chainer 5.y.z and CuPy 5.y.z e.g. 5.0.0 or 5.1.0 .
 - Prepare high performance USB camera so that takes more than 60 FPS.
 - Run `high_speed.py` instead of `video.py`
 - Do not fall from the chair with surprise :D.
diff --git a/augment.py b/augment.py
index fd9e742..a192a7f 100644
--- a/augment.py
+++ b/augment.py
@@ -1,43 +1,427 @@
+import random
+
 import numpy as np
 from scipy import ndimage
+import chainercv.transforms as transforms
+from chainercv.links.model.ssd.transforms import random_distort
+import PIL
+from PIL import ImageChops, ImageOps, ImageFilter, ImageEnhance
 
 
-def rotate_point(point_yx, degree, center_yx):
-    offset_x, offset_y = center_yx
+def rotate_point(point_yx, angle, center_yx):
+    offset_y, offset_x = center_yx
     shift = point_yx - center_yx
     shift_y, shift_x = shift[:, 0], shift[:, 1]
-    cos_rad = np.cos(np.deg2rad(degree))
-    sin_rad = np.sin(np.deg2rad(degree))
+    cos_rad = np.cos(np.deg2rad(angle))
+    sin_rad = np.sin(np.deg2rad(angle))
     qx = offset_x + cos_rad * shift_x + sin_rad * shift_y
     qy = offset_y - sin_rad * shift_x + cos_rad * shift_y
     return np.array([qy, qx]).transpose()
 
 
-def rot_image(image, degree):
-    # CHW => HWC
-    image = image.transpose(1, 2, 0)
-    rot = ndimage.rotate(image, degree, reshape=False)
-    # HWC => CHW
-    rot = rot.transpose(2, 0, 1)
+def rotate_image(image, angle):
+    rot = ndimage.rotate(image, angle, axes=(2, 1), reshape=False)
+    # disable image collapse
+    rot = np.clip(rot, 0, 255)
     return rot
 
 
-def rotate(image, keypoints, bbox, degree):
+def random_rotate(image, keypoints, bbox):
+    angle = np.random.uniform(-40, 40)
+    param = {}
+    param['angle'] = angle
     new_keypoints = []
     center_yx = np.array(image.shape[1:]) / 2
     for points in keypoints:
         rot_points = rotate_point(np.array(points),
-                                  degree,
+                                  angle,
                                   center_yx)
         new_keypoints.append(rot_points)
 
     new_bbox = []
     for x, y, w, h in bbox:
-        points = np.array([[y + h / 2, x + w / 2]])
-        ry, rx = rotate_point(points,
-                              degree,
-                              center_yx)[0]
-        new_bbox.append([rx - w / 2, ry - h / 2, w, h])
-
-    rot = rot_image(image, degree)
-    return rot, new_keypoints, new_bbox
+
+        points = np.array(
+            [
+                [y, x],
+                [y, x + w],
+                [y + h, x],
+                [y + h, x + w]
+            ]
+        )
+
+        rot_points = rotate_point(
+            points,
+            angle,
+            center_yx
+        )
+        xmax = np.max(rot_points[:, 1])
+        ymax = np.max(rot_points[:, 0])
+        xmin = np.min(rot_points[:, 1])
+        ymin = np.min(rot_points[:, 0])
+        # x,y,w,h
+        new_bbox.append([xmin, ymin, xmax - xmin, ymax - ymin])
+
+    image = rotate_image(image, angle)
+    return image, new_keypoints, new_bbox, param
+
+
+def spot_light(pil_img):
+    w, h = pil_img.size
+    effect_img = np.zeros((h, w, 3))
+    scale_w = random.choice([5, 6, 7, 8, 9])
+    scale_h = random.choice([5, 6, 7, 8, 9])
+    x = random.choice(range(w // scale_w, w - w // scale_w))
+    y = random.choice(range(h // scale_h, h - h // scale_h))
+    light = random.choice(range(128, 220))
+    effect_img[y - h // scale_h:y + h // scale_h, x - w // scale_w:x + w // scale_w] = light
+    effect_img = PIL.Image.fromarray(effect_img.astype(np.uint8))
+    return ImageChops.add(pil_img, effect_img)
+
+
+def blend_alpha(pil_img, direction='left'):
+    w, h = pil_img.size
+    effect_img = np.zeros((h, w, 3))
+    if direction == 'right':
+        for x in range(w):
+            effect_img[:, x] = x * 255 / w
+    elif direction == 'left':
+        for x in range(w):
+            effect_img[:, x] = (w - x) * 255 / w
+    elif direction == 'up':
+        for y in range(h):
+            effect_img[y, :] = (h - y) * 255 / h
+    elif direction == 'down':
+        for y in range(h):
+            effect_img[y, :] = y * 255 / h
+    else:
+        raise Exception("invalid argument direction is 'right','left','up','down' actual {}".format(direction))
+    effect_img = PIL.Image.fromarray(effect_img.astype(np.uint8))
+    return PIL.Image.blend(pil_img, effect_img, 0.5)
+
+
+def chop_image(pil_img, direction='left', op='add'):
+    w, h = pil_img.size
+    effect_img = np.zeros((h, w, 3))
+    if direction == 'right':
+        for x in range(w):
+            effect_img[:, x] = x * 255 / w
+    elif direction == 'left':
+        for x in range(w):
+            effect_img[:, x] = (w - x) * 255 / w
+    elif direction == 'up':
+        for y in range(h):
+            effect_img[y, :] = (h - y) * 255 / h
+    elif direction == 'down':
+        for y in range(h):
+            effect_img[y, :] = y * 255 / h
+    else:
+        raise Exception("invalid argument direction. It should be 'right','left','up','down' actual {}".format(direction))
+    effect_img = PIL.Image.fromarray(effect_img.astype(np.uint8))
+    if op == 'add':
+        operation = ImageChops.add
+    elif op == 'subtract':
+        operation = ImageChops.subtract
+    elif op == 'multiply':
+        operation = ImageChops.multiply
+    elif op == 'screen':
+        operation = ImageChops.screen
+    elif op == 'lighter':
+        operation = ImageChops.lighter
+    elif op == 'darker':
+        operation = ImageChops.darker
+    else:
+        ops = ['add', 'subtract', 'multiply', 'screen', 'lighter', 'darker']
+        raise Exception("invalid argument op. {} actual {}".format(ops, direction))
+    return operation(pil_img, effect_img)
+
+
+def filter_image(pil_img):
+    method = random.choice(['gaussian', 'blur', 'sharpen'])
+    if method == 'gaussian':
+        return pil_img.filter(ImageFilter.GaussianBlur(random.choice([0.5, 1.0, 1.5])))
+    if method == 'blur':
+        return pil_img.filter(ImageFilter.BLUR)
+    if method == 'sharpen':
+        return pil_img.filter(ImageFilter.SHARPEN)
+
+
+def random_process_by_PIL(image):
+    # convert CHW -> HWC -> PIL.Image
+    pil_img = PIL.Image.fromarray(image.transpose(1, 2, 0).astype(np.uint8))
+
+    method = np.random.choice(
+        ['equalize', 'spot_light', 'chop', 'blend'],
+        p=[0.15, 0.15, 0.35, 0.35]
+    )
+
+    param = {'pil': method, 'filter': False}
+    if method == 'equalize':
+        pil_img = ImageOps.equalize(pil_img)
+    if method == 'spot_light':
+        pil_img = spot_light(pil_img)
+    if method == 'chop':
+        direction = random.choice(['left', 'right', 'up', 'down'])
+        op = random.choice(['add', 'subtract', 'multiply', 'screen', 'lighter', 'darker'])
+        pil_img = chop_image(pil_img, direction, op)
+    if method == 'blend':
+        direction = random.choice(['left', 'right', 'up', 'down'])
+        pil_img = blend_alpha(pil_img, direction)
+
+    if np.random.choice([True, False], p=[0.1, 0.9]):
+        pil_img = filter_image(pil_img)
+        param['filter'] = True
+    # back to CHW
+    image = np.asarray(pil_img).transpose(2, 0, 1).astype(np.float32)
+    return image, param
+
+
+def augment_image(image, dataset_type):
+    """color augmentation"""
+    param = {}
+
+    if dataset_type == 'mpii':
+        method = np.random.choice(
+            ['random_distort', 'pil'],
+            p=[1.0, 0.0],
+        )
+    elif dataset_type == 'coco':
+        method = np.random.choice(
+            ['random_distort', 'pil'],
+            p=[0.5, 0.5],
+        )
+
+    if method == 'random_distort':
+        param['method'] = method
+        # color augmentation provided by ChainerCV
+        ret = random_distort(image, contrast_low=0.3, contrast_high=2)
+        return ret, param
+    if method == 'pil':
+        ret, param = random_process_by_PIL(image)
+        param['method'] = method
+        return ret, param
+
+
+def random_flip(image, keypoints, bbox, is_labeled, is_visible, flip_indices):
+    """
+    random x_flip
+    Note that if image is flipped, `flip_indices` translate elements.
+    e.g. left_shoulder -> right_shoulder.
+    """
+    _, H, W = image.shape
+    image, param = transforms.random_flip(image, x_random=True, return_param=True)
+
+    if param['x_flip']:
+        keypoints = [
+            transforms.flip_point(points, (H, W), x_flip=True)[flip_indices]
+            for points in keypoints
+        ]
+
+        is_labeled = [label[flip_indices] for label in is_labeled]
+        is_visible = [vis[flip_indices] for vis in is_visible]
+
+        new_bbox = []
+        for x, y, w, h in bbox:
+            [[y, x]] = transforms.flip_point(np.array([[y, x + w]]), (H, W), x_flip=True)
+            new_bbox.append([x, y, w, h])
+        bbox = new_bbox
+
+    return image, keypoints, bbox, is_labeled, is_visible, param
+
+
+def scale_fit_short(image, keypoints, bbox, length):
+    _, H, W = image.shape
+    min_hw = min(H, W)
+    scale = length / min_hw
+    new_image = transforms.scale(image, size=length, fit_short=True)
+    new_keypoints = [scale * k for k in keypoints]
+    new_bbox = [scale * np.asarray(b) for b in bbox]
+    return new_image, new_keypoints, new_bbox
+
+
+def intersection(bbox0, bbox1):
+    x0, y0, w0, h0 = bbox0
+    x1, y1, w1, h1 = bbox1
+
+    def relu(x): return max(0, x)
+    w = relu(min(x0 + w0, x1 + w1) - max(x0, x1))
+    h = relu(min(y0 + h0, y1 + h1) - max(y0, y1))
+    return w * h
+
+
+def translate_bbox(bbox, size, y_offset, x_offset):
+    cropped_H, cropped_W = size
+    new_bbox = []
+    for x, y, w, h in bbox:
+        x_shift = x + x_offset
+        y_shift = y + y_offset
+        is_intersect = intersection([0, 0, cropped_W, cropped_H], [x_shift, y_shift, w, h])
+        if is_intersect:
+            xmin = max(0, x_shift)
+            ymin = max(0, y_shift)
+            xmax = min(cropped_W, x_shift + w)
+            ymax = min(cropped_H, y_shift + h)
+            wnew = xmax - xmin
+            hnew = ymax - ymin
+            new_bbox.append([xmin, ymin, wnew, hnew])
+        else:
+            new_bbox.append([x_shift, y_shift, w, h])
+    return new_bbox
+
+
+def crop(img, y_slice, x_slice, copy=False):
+    ret = img.copy() if copy else img
+    return ret[:, y_slice, x_slice]
+
+
+def crop_all_humans(image, keypoints, bbox, is_labeled):
+    _, H, W = image.shape
+    aspect = W / H
+    param = {}
+    if len(keypoints) == 0:
+        param['do_nothing'] = True
+        return image, keypoints, bbox, param
+
+    kymax = max([np.max(ks[l, 0]) for l, ks in zip(is_labeled, keypoints)])
+    kxmax = max([np.max(ks[l, 1]) for l, ks in zip(is_labeled, keypoints)])
+    kymin = min([np.min(ks[l, 0]) for l, ks in zip(is_labeled, keypoints)])
+    kxmin = min([np.min(ks[l, 1]) for l, ks in zip(is_labeled, keypoints)])
+
+    bxmax = max([b[0] + b[2] for b in bbox])
+    bymax = max([b[1] + b[3] for b in bbox])
+    bxmin = min([b[0] for b in bbox])
+    bymin = min([b[1] for b in bbox])
+
+    ymax = max(kymax, bymax)
+    xmax = max(kxmax, bxmax)
+    ymin = min(kymin, bymin)
+    xmin = min(kxmin, bxmin)
+
+    if (xmax + xmin) / 2 < W / 2:
+        x_start = random.randint(0, max(0, int(xmin)))
+        y_start = random.randint(0, max(0, int(ymin)))
+        y_end = random.randint(min(H, int(ymax)), H)
+        ylen = y_end - y_start
+        xlen = aspect * ylen
+        x_end = min(W, int(x_start + xlen))
+        x_slice = slice(x_start, x_end, None)
+        y_slice = slice(y_start, y_end, None)
+    else:
+        x_end = random.randint(min(int(xmax), W), W)
+        y_end = random.randint(min(int(ymax), H), H)
+        y_start = random.randint(0, max(0, int(ymin)))
+        ylen = y_end - y_start
+        xlen = aspect * ylen
+        x_start = max(0, int(x_end - xlen))
+        x_slice = slice(x_start, x_end, None)
+        y_slice = slice(y_start, y_end, None)
+
+    cropped = crop(image, y_slice=y_slice, x_slice=x_slice, copy=True)
+    _, cropped_H, cropped_W = cropped.shape
+    param['x_slice'] = x_slice
+    param['y_slice'] = y_slice
+    if cropped_H <= 50 or cropped_W <= 50:
+        """
+        This case, for example, cropped_H=0 will cause an error when try to resize image
+        or resize small image to insize will cause low resolution human image.
+        To avoid situations, we will stop crop image.
+        """
+        param['do_nothing'] = True
+        return image, keypoints, bbox, param
+    image = cropped
+
+    keypoints = [
+        transforms.translate_point(
+            points, x_offset=-x_slice.start, y_offset=-y_slice.start)
+        for points in keypoints
+    ]
+
+    bbox = translate_bbox(
+        bbox,
+        size=(cropped_H, cropped_W),
+        x_offset=-x_slice.start,
+        y_offset=-y_slice.start,
+    )
+
+    return image, keypoints, bbox, param
+
+
+def random_sized_crop(image, keypoints, bbox):
+    image, param = transforms.random_sized_crop(
+        image,
+        scale_ratio_range=(0.5, 5),
+        aspect_ratio_range=(0.75, 1.3333333333333333),
+        return_param=True
+    )
+
+    keypoints = [
+        transforms.translate_point(points,
+                                   x_offset=-param['x_slice'].start,
+                                   y_offset=-param['y_slice'].start
+                                   )
+        for points in keypoints
+    ]
+
+    _, cropped_H, cropped_W = image.shape
+
+    bbox = translate_bbox(
+        bbox,
+        size=(cropped_H, cropped_W),
+        x_offset=-param['x_slice'].start,
+        y_offset=-param['y_slice'].start,
+    )
+
+    return image, keypoints, bbox, {random_sized_crop.__name__: param}
+
+
+def resize(image, keypoints, bbox, size):
+    _, H, W = image.shape
+    new_h, new_w = size
+    image = transforms.resize(image, (new_h, new_w))
+
+    keypoints = [
+        transforms.resize_point(points, (H, W), (new_h, new_w))
+        for points in keypoints
+    ]
+
+    new_bbox = []
+    for x, y, bw, bh in bbox:
+        [[y, x]] = transforms.resize_point(np.array([[y, x]]), (H, W), (new_h, new_w))
+        bw *= new_w / W
+        bh *= new_h / H
+        new_bbox.append([x, y, bw, bh])
+    return image, keypoints, new_bbox
+
+
+def random_resize(image, keypoints, bbox):
+    # Random resize
+    _, H, W = image.shape
+    scalew, scaleh = np.random.uniform(0.7, 1.5, 2)
+    resizeW, resizeH = int(W * scalew), int(H * scaleh)
+    image, keypoints, bbox = resize(image, keypoints, bbox, (resizeH, resizeW))
+    return image, keypoints, bbox, {'H': resizeH, 'W': resizeW}
+
+
+def random_crop(image, keypoints, bbox, is_labeled, dataset_type):
+    if dataset_type == 'mpii':
+        crop_target = np.random.choice(
+            ['random_sized_crop', 'crop_all_humans'],
+            p=[0.1, 0.9],
+        )
+    if dataset_type == 'coco':
+        crop_target = np.random.choice(
+            ['random_sized_crop', 'crop_all_humans'],
+            p=[0.5, 0.5],
+        )
+
+    param = {'crop_target': crop_target}
+    if crop_target == 'random_sized_crop':
+        image, keypoints, bbox, p = random_resize(image, keypoints, bbox)
+        param.update(p)
+        image, keypoints, bbox, p = random_sized_crop(image, keypoints, bbox)
+        param.update(p)
+    elif crop_target == 'crop_all_humans':
+        image, keypoints, bbox, p = crop_all_humans(image, keypoints, bbox, is_labeled)
+        param.update(p)
+
+    return image, keypoints, bbox, param
diff --git a/begin_train.sh b/begin_train.sh
new file mode 100644
index 0000000..a4826c0
--- /dev/null
+++ b/begin_train.sh
@@ -0,0 +1,10 @@
+cat config.ini
+docker run --rm \
+-v $(pwd):/work \
+-v ~/work/dataset/mpii_dataset:/mpii_dataset \
+-v ~/work/dataset/coco_dataset:/coco_dataset \
+--runtime=nvidia \
+--name ppn_idein \
+-w /work \
+idein/chainer:5.1.0 \
+python3 train.py
diff --git a/coco_dataset.py b/coco_dataset.py
index 3f0463d..6dcd9a3 100644
--- a/coco_dataset.py
+++ b/coco_dataset.py
@@ -157,10 +157,10 @@ def get_coco_dataset(insize, image_root, annotations,
         is_labeled = d[:, 2] >= 1
 
         entry = images[image_id]
-        entry[1].append(keypoints)
-        entry[2].append(bbox)
-        entry[3].append(is_visible)
-        entry[4].append(is_labeled)
+        entry[1].append(np.asarray(keypoints))
+        entry[2].append(np.asarray(bbox))
+        entry[3].append(np.asarray(is_visible).astype(np.bool))
+        entry[4].append(np.asarray(is_labeled).astype(np.bool))
 
     # filter-out non annotated images
     image_paths = []
diff --git a/config.ini b/config.ini
index 26cf41e..88120fb 100644
--- a/config.ini
+++ b/config.ini
@@ -2,7 +2,7 @@
 # batchsize / num of gpus equals the batchsize per gpu
 batchsize = 22
 gpus = main=0
-num_process = 8
+num_process = 11
 seed = 0
 train_iter = 260000
 learning_rate = 0.007
@@ -10,8 +10,8 @@ learning_rate = 0.007
 
 ## mpii dataset
 [mpii]
-images = /data/images
-annotations = /data/mpii.json
+images = /mpii_dataset/images
+annotations = /mpii_dataset/mpii.json
 parts_scale = 0.5x0.5
 instance_scale = 2.0x2.0
 train_size = 0.9
@@ -22,10 +22,10 @@ use_cache = False
 
 ## coco dataset
 [coco]
-train_images = /data/train2017
-train_annotations = /data/annotations/person_keypoints_train2017.json
-val_images = /data/val2017
-val_annotations = /data/annotations/person_keypoints_val2017.json
+train_images = /coco_dataset/train2017
+train_annotations = /coco_dataset/annotations/person_keypoints_train2017.json
+val_images = /coco_dataset/val2017
+val_annotations = /coco_dataset/annotations/person_keypoints_val2017.json
 parts_scale = 0.2x0.2
 instance_scale = 1.0x1.0
 min_num_keypoints = 5
@@ -55,4 +55,12 @@ lambda_resp = 0.25
 lambda_iou = 1.0
 lambda_coor = 5.0
 lambda_size = 5.0
-lambda_limb = 0.5
\ No newline at end of file
+lambda_limb = 0.5
+
+[predict]
+# If `False` is set, hide bbox of annotation other than human instance.
+visbbox = True
+# detection_thresh
+detection_thresh = 0.15
+# ignore human its num of keypoints is less than min_num_keypoints
+min_num_keypoints= 1
diff --git a/dataset.py b/dataset.py
index e0413f5..f37632f 100644
--- a/dataset.py
+++ b/dataset.py
@@ -1,11 +1,13 @@
 import os
-import numpy as np
+
 from chainer.dataset import DatasetMixin
 from chainercv import utils
 import chainercv.transforms as transforms
 import numpy as np
 
-from augment import rotate
+from augment import random_rotate, random_flip, random_crop
+from augment import scale_fit_short, resize
+from augment import augment_image
 
 
 class KeypointDataset2D(DatasetMixin):
@@ -43,93 +45,60 @@ def __init__(self,
     def __len__(self):
         return len(self.image_paths)
 
-    def transform(self, image, keypoints, bbox, is_labeled):
-        _, H, W = image.shape
-        # PCA Lighting
-        image = transforms.pca_lighting(image, sigma=5)
+    def transform(self, image, keypoints, bbox, is_labeled, is_visible, dataset_type):
+        transform_param = {}
+
+        # Color augmentation
+        image, param = augment_image(image, dataset_type)
+        transform_param['augment_image'] = param
 
         # Random rotate
-        degree = np.random.uniform(-40, 40)
-        image, keypoints, bbox = rotate(image, keypoints, bbox, degree)
+        image, keypoints, bbox, param = random_rotate(image, keypoints, bbox)
+        transform_param['random_rotate'] = param
+
         # Random flip
-        image, param = transforms.random_flip(image, x_random=True, return_param=True)
-        if param['x_flip']:
-            keypoints = [
-                transforms.flip_point(points, (H, W), x_flip=True)[self.flip_indices]
-                for points in keypoints
-            ]
-
-            is_labeled = [label[self.flip_indices] for label in is_labeled]
-
-            new_bbox = []
-            for x, y, w, h in bbox:
-                [[y, x]] = transforms.flip_point(np.array([[y, x + w]]), (H, W), x_flip=True)
-                new_bbox.append([x, y, w, h])
-            bbox = new_bbox
-
-        # Random resize
-        scalew, scaleh = np.random.uniform(1.0, 2.0, 2)
-        resizeW, resizeH = int(W * scalew), int(H * scalew)
-        image, keypoints, bbox = self.resize(image, keypoints, bbox, (resizeH, resizeW))
+        image, keypoints, bbox, is_labeled, is_visible, param = random_flip(image, keypoints, bbox, is_labeled, is_visible, self.flip_indices)
+        transform_param['random_flip'] = param
 
         # Random crop
-        image, param = transforms.random_sized_crop(image,
-                                                    scale_ratio_range=(0.5, 5), return_param=True)
-        keypoints = [
-            transforms.translate_point(points,
-                                       x_offset=-param['x_slice'].start,
-                                       y_offset=-param['y_slice'].start
-                                       )
-            for points in keypoints
-        ]
-        new_bbox = []
-        for x, y, w, h in bbox:
-            new_bbox.append([x - param['x_slice'].start, y - param['y_slice'].start, w, h])
-        bbox = new_bbox
-
-        return image, keypoints, bbox, is_labeled
-
-    def resize(self, image, keypoints, bbox, size):
-        _, h, w = image.shape
-        new_h, new_w = size
-
-        image = transforms.resize(image, (new_h, new_w))
-        keypoints = [
-            transforms.resize_point(points, (h, w), (new_h, new_w))
-            for points in keypoints
-        ]
-        new_bbox = []
-        for x, y, bw, bh in bbox:
-            [[y, x]] = transforms.resize_point(np.array([[y, x]]), (h, w), (new_h, new_w))
-            bw *= new_w / w
-            bh *= new_h / h
-            new_bbox.append([x, y, bw, bh])
-        return image, keypoints, new_bbox
+        image, keypoints, bbox, param = random_crop(image, keypoints, bbox, is_labeled, dataset_type)
+        transform_param['random_crop'] = param
+
+        return image, keypoints, bbox, is_labeled, is_visible, transform_param
 
     def get_example(self, i):
         w, h = self.insize
 
         if self.use_cache and self.cached_samples[i] is not None:
-            image, keypoints, bbox, is_labeled = self.cached_samples[i]
+            image, keypoints, bbox, is_labeled, is_visible = self.cached_samples[i]
         else:
             path = os.path.join(self.image_root, self.image_paths[i])
             image = utils.read_image(path, dtype=np.float32, color=True)
             keypoints = self.keypoints[i]
             bbox = self.bbox[i]
             is_labeled = self.is_labeled[i]
+            is_visible = self.is_visible[i]
 
-            image, keypoints, bbox = self.resize(image, keypoints, bbox, (h, w))
             if self.use_cache:
-                self.cached_samples[i] = image, keypoints, bbox, is_labeled
+                image, keypoints, bbox = resize(image, keypoints, bbox, (h, w))
+                self.cached_samples[i] = image, keypoints, bbox, is_labeled, is_visible
 
         image = image.copy()
         keypoints = keypoints.copy()
         bbox = bbox.copy()
         is_labeled = is_labeled.copy()
-
-        if self.do_augmentation:
-            image, keypoints, bbox, is_labeled = self.transform(image, keypoints, bbox, is_labeled)
-            image, keypoints, bbox = self.resize(image, keypoints, bbox, (h, w))
+        is_visible = is_visible.copy()
+
+        transform_param = {}
+        try:
+            if self.do_augmentation:
+                image, keypoints, bbox = scale_fit_short(image, keypoints, bbox, length=int(min(h, w) * 1.25))
+                image, keypoints, bbox, is_labeled, is_visible, transform_param = self.transform(
+                    image, keypoints, bbox, is_labeled, is_visible, self.dataset_type)
+            transform_param['do_augmentation'] = self.do_augmentation
+            image, keypoints, bbox = resize(image, keypoints, bbox, (h, w))
+        except Exception as e:
+            raise Exception("something wrong...transform_param = {}".format(transform_param))
 
         return {
             'path': self.image_paths[i],
@@ -139,5 +108,7 @@ def get_example(self, i):
             'keypoints': keypoints,
             'bbox': bbox,
             'is_labeled': is_labeled,
+            'is_visible': is_visible,
             'dataset_type': self.dataset_type,
+            'transform_param': transform_param
         }
diff --git a/docker/gpu/Dockerfile b/docker/gpu/Dockerfile
index d0a31e1..a4e7bb8 100644
--- a/docker/gpu/Dockerfile
+++ b/docker/gpu/Dockerfile
@@ -26,7 +26,7 @@ rm -rf /tmp/*.tar.gz && \
 apt-get clean && rm -rf /tmp/* /var/tmp* /var/lib/apt/lists/* && \
 rm -f /etc/ssh/ssh_host_* && rm -rf /usr/share/man?? /usr/share/man/??_*
 # install python dependencies
-RUN pip3 install pillow matplotlib scipy
-RUN pip3 install chainer==5.0.0 cupy-cuda90==5.0.0 chainercv==0.11.0 ideep4py
+RUN pip3 install pillow matplotlib scipy tqdm
+RUN pip3 install chainer==5.1.0 cupy-cuda90==5.1.0 chainercv==0.11.0 ideep4py
 # Use Agg backend for matplotlib
 ENV DISPLAY 0
\ No newline at end of file
diff --git a/export_onnx.py b/export_onnx.py
new file mode 100644
index 0000000..bd0c555
--- /dev/null
+++ b/export_onnx.py
@@ -0,0 +1,105 @@
+"""
+Export pretrained model to ONNX format.
+This is a rough sketch.
+For more information see
+
+https://github.com/chainer/onnx-chainer
+
+"""
+import argparse
+import configparser
+import logging
+logger = logging.getLogger(__name__)
+logging.basicConfig(level=logging.INFO)
+
+import os
+
+import chainer
+import chainer.links as L
+from chainer import initializers
+import numpy as np
+import onnx
+import onnx_chainer
+
+from predict import load_config
+from utils import parse_size
+
+
+def get_network(model, **kwargs):
+    if model == 'mv2':
+        from network_mobilenetv2 import MobilenetV2
+        return MobilenetV2(**kwargs)
+    elif model == 'resnet50':
+        from network_resnet import ResNet50
+        return ResNet50(**kwargs)
+    elif model == 'resnet18':
+        from network_resnet import ResNet
+        return ResNet(n_layers=18)
+    elif model == 'resnet34':
+        from network_resnet import ResNet
+        return ResNet(n_layers=34)
+    else:
+        raise Exception('Invalid model name')
+
+
+class MyModel(chainer.Chain):
+
+    def __init__(self, config):
+        super(MyModel, self).__init__()
+
+        dataset_type = config.get('dataset', 'type')
+        if dataset_type == 'mpii':
+            import mpii_dataset as x_dataset
+        elif dataset_type == 'coco':
+            import coco_dataset as x_dataset
+        else:
+            raise Exception('Unknown dataset {}'.format(dataset_type))
+
+        with self.init_scope():
+            dtype = np.float32
+            self.feature_layer = get_network(config.get('model_param', 'model_name'), dtype=dtype, width_multiplier=1.0)
+            ksize = self.feature_layer.last_ksize
+            self.local_grid_size = parse_size(config.get('model_param', 'local_grid_size'))
+            self.keypoint_names = x_dataset.KEYPOINT_NAMES
+            self.edges = x_dataset.EDGES
+            self.lastconv = L.Convolution2D(None,
+                                            6 * len(self.keypoint_names) +
+                                            self.local_grid_size[0] * self.local_grid_size[1] * len(self.edges),
+                                            ksize=ksize, stride=1, pad=ksize // 2,
+                                            initialW=initializers.HeNormal(1 / np.sqrt(2), dtype))
+
+    def __call__(self, x):
+        h = self.feature_layer(x)
+        h = self.feature_layer.last_activation(self.lastconv(h))
+        return h
+
+
+def export_onnx(args):
+    config = load_config(args)
+    model = MyModel(config)
+    chainer.serializers.load_npz(os.path.join(args.model, 'bestmodel.npz'), model)
+    w, h = parse_size(config.get('model_param', 'insize'))
+    x = np.zeros((1, 3, h, w), dtype=np.float32)
+    logger.info('begin export')
+    output = os.path.join(args.model, 'bestmodel.onnx')
+    with chainer.using_config('train', False):
+        onnx_chainer.export(model, x, filename=output)
+    logger.info('end export')
+    logger.info('run onnx.check')
+    onnx_model = onnx.load(output)
+    onnx.checker.check_model(onnx_model)
+    logger.info('done')
+
+
+def parse_arguments():
+    parser = argparse.ArgumentParser()
+    parser.add_argument('model', help='path/to/model', type=str)
+    return parser.parse_args()
+
+
+def main():
+    args = parse_arguments()
+    export_onnx(args)
+
+if __name__ == '__main__':
+    main()
diff --git a/high_speed.py b/high_speed.py
index b0e9227..7aea6e1 100644
--- a/high_speed.py
+++ b/high_speed.py
@@ -1,3 +1,4 @@
+import argparse
 import configparser
 import os
 import queue
@@ -12,7 +13,7 @@
 import numpy as np
 from PIL import Image
 
-from predict import get_feature, get_humans_by_feature, draw_humans, create_model
+from predict import get_feature, get_humans_by_feature, draw_humans, create_model, load_config
 from utils import parse_size
 
 QUEUE_SIZE = 5
@@ -83,11 +84,12 @@ def stop(self):
         self.stop_event.set()
 
 
-def main():
-    config = configparser.ConfigParser()
-    config.read('config.ini', 'UTF-8')
-
-    model = create_model(config)
+def high_speed(args):
+    config = load_config(args)
+    dataset_type = config.get('dataset', 'type')
+    detection_thresh = config.getfloat('predict', 'detection_thresh')
+    min_num_keypoints = config.getint('predict', 'min_num_keypoints')
+    model = create_model(args, config)
 
     if os.path.exists('mask.png'):
         mask = Image.open('mask.png')
@@ -123,7 +125,12 @@ def main():
             degree = degree % 360
             try:
                 image, feature_map = predictor.get()
-                humans = get_humans_by_feature(model, feature_map)
+                humans = get_humans_by_feature(
+                    model,
+                    feature_map,
+                    detection_thresh,
+                    min_num_keypoints
+                )
             except queue.Empty:
                 continue
             except Exception:
@@ -134,7 +141,8 @@ def main():
                 model.edges,
                 pilImg,
                 humans,
-                mask=mask.rotate(degree) if mask else None
+                mask=mask.rotate(degree) if mask else None,
+                visbbox=config.getboolean('predict', 'visbbox'),
             )
             img_with_humans = cv2.cvtColor(np.asarray(pilImg), cv2.COLOR_RGB2BGR)
             msg = 'GPU ON' if chainer.backends.cuda.available else 'GPU OFF'
@@ -158,5 +166,16 @@ def main():
     capture.join()
     predictor.join()
 
+
+def parse_arguments():
+    parser = argparse.ArgumentParser()
+    parser.add_argument('model', help='path/to/model', type=str)
+    return parser.parse_args()
+
+
+def main():
+    args = parse_arguments()
+    high_speed(args)
+
 if __name__ == '__main__':
     main()
diff --git a/model.py b/model.py
index 20fd3de..ec20bcf 100644
--- a/model.py
+++ b/model.py
@@ -226,12 +226,13 @@ def static_forward(self, x):
 
     def forward(self, x):
         """
-        this provide interface of forwarding
-        chainer 5.0.0 gives us static_graph to increase of speed of training
-        but for some reason this does train i.e. loss does not decrease at all.
-        We do not trust it for now on training. On the other hand. the speed of
-        inference increases very well.Also note that if we use ideep option,
-        the output result between `static_forward` and `_forward` will be different.
+        This provides an interface of forwarding.
+        ChainerV5 has a feature Static Subgraph Optimizations to increase training speed.
+        But for some reason, our model does not decrease loss value at all.
+        We do not trust it for now on training. On the other hand, by decorating `static_graph`
+        at forward function, it increases speed of inference very well.
+        Also note that if we use ideep option, the output result between 
+        `static_forward` and `_forward` will be different.
         """
         if chainer.config.train:
             return self._forward(x)
diff --git a/mpii_dataset.py b/mpii_dataset.py
index b15776f..2f1cfea 100644
--- a/mpii_dataset.py
+++ b/mpii_dataset.py
@@ -122,7 +122,8 @@ def get_mpii_dataset(insize, image_root, annotations,
         entry[0].append(np.array(keypoints))  # array of y,x
         entry[1].append(np.array([x1, y1, x2 - x1, y2 - y1]))  # x, y, w, h
         entry[2].append(np.array(is_visible, dtype=np.bool))
-        entry[3].append(np.ones(len(is_visible), dtype=np.bool))
+        is_labeled = np.ones(len(is_visible), dtype=np.bool)
+        entry[3].append(is_labeled)
 
     # split dataset
     train_images, test_images = split_dataset_random(
diff --git a/predict.py b/predict.py
index e78ab0e..bab5c4d 100644
--- a/predict.py
+++ b/predict.py
@@ -11,9 +11,6 @@
 import random
 import time
 
-import matplotlib
-matplotlib.use('Agg')
-from matplotlib import pyplot as plt
 import numpy as np
 
 import chainer
@@ -22,15 +19,12 @@
 else:
     xp = np
 
-import chainercv.transforms as transforms
 from chainercv.utils import non_maximum_suppression
-from chainercv.visualizations import vis_bbox
 from PIL import ImageDraw, Image
 
 from coco_dataset import get_coco_dataset
 from mpii_dataset import get_mpii_dataset
 from model import PoseProposalNet
-from train import create_model
 from network_resnet import ResNet50
 from utils import parse_size
 
@@ -63,12 +57,12 @@ def get_feature(model, image):
     return resp, conf, x, y, w, h, e
 
 
-def estimate(model, image):
+def estimate(model, image, detection_thresh=0.15, min_num_keypoints=-1):
     feature_map = get_feature(model, image)
-    return get_humans_by_feature(model, feature_map)
+    return get_humans_by_feature(model, feature_map, detection_thresh, min_num_keypoints)
 
 
-def get_humans_by_feature(model, feature_map, detection_thresh=0.15):
+def get_humans_by_feature(model, feature_map, detection_thresh=0.15, min_num_keypoints=-1):
     resp, conf, x, y, w, h, e = feature_map
     start = time.time()
     delta = resp * conf
@@ -113,14 +107,14 @@ def get_humans_by_feature(model, feature_map, detection_thresh=0.15):
                     break
                 human[t] = bbox[(t, j_h, j_w)]
                 i_h, i_w = j_h, j_w
-
-        humans.append(human)
+        if min_num_keypoints <= len(human) - 1:
+            humans.append(human)
     logger.info('alchemy time {:.5f}'.format(time.time() - start))
     logger.info('num humans = {}'.format(len(humans)))
     return humans
 
 
-def draw_humans(keypoint_names, edges, pil_image, humans, mask=None):
+def draw_humans(keypoint_names, edges, pil_image, humans, mask=None, visbbox=True):
     """
     This is what happens when you use alchemy on humans...
     note that image should be PIL object
@@ -134,7 +128,7 @@ def draw_humans(keypoint_names, edges, pil_image, humans, mask=None):
             else:
                 fill = None
             ymin, xmin, ymax, xmax = b
-            if k == 0:
+            if k == 0:  # human instance
                 # adjust size
                 t = 1
                 xmin = int(xmin * t + xmax * (1 - t))
@@ -149,9 +143,17 @@ def draw_humans(keypoint_names, edges, pil_image, humans, mask=None):
                                      fill=fill,
                                      outline=COLOR_MAP[keypoint_names[k]])
             else:
-                drawer.rectangle(xy=[xmin, ymin, xmax, ymax],
-                                 fill=fill,
-                                 outline=COLOR_MAP[keypoint_names[k]])
+                if visbbox:
+                    drawer.rectangle(xy=[xmin, ymin, xmax, ymax],
+                                     fill=fill,
+                                     outline=COLOR_MAP[keypoint_names[k]])
+                else:
+                    r = 2
+                    x = (xmin + xmax) / 2
+                    y = (ymin + ymax) / 2
+                    drawer.ellipse((x - r, y - r, x + r, y + r),
+                                   fill=COLOR_MAP[keypoint_names[k]])
+
         for s, t in edges:
             if s in human and t in human:
                 by = (human[s][0] + human[s][2]) / 2
@@ -166,7 +168,7 @@ def draw_humans(keypoint_names, edges, pil_image, humans, mask=None):
     return pil_image
 
 
-def create_model(config):
+def create_model(args, config):
     global DIRECTED_GRAPHS, COLOR_MAP
 
     dataset_type = config.get('dataset', 'type')
@@ -194,7 +196,7 @@ def create_model(config):
         width_multiplier=config.getfloat('model_param', 'width_multiplier'),
     )
 
-    result_dir = config.get('result', 'dir')
+    result_dir = args.model
     chainer.serializers.load_npz(
         os.path.join(result_dir, 'bestmodel.npz'),
         model
@@ -211,9 +213,18 @@ def create_model(config):
     return model
 
 
-def main():
+def load_config(args):
     config = configparser.ConfigParser()
-    config.read('config.ini', 'UTF-8')
+    config_path = os.path.join(args.model, 'src', 'config.ini')
+    logger.info(config_path)
+    config.read(config_path, 'UTF-8')
+    return config
+
+
+def predict(args):
+    config = load_config(args)
+    detection_thresh = config.getfloat('predict', 'detection_thresh')
+    min_num_keypoints = config.getint('predict', 'min_num_keypoints')
     dataset_type = config.get('dataset', 'type')
     logger.info('loading {}'.format(dataset_type))
     if dataset_type == 'mpii':
@@ -235,22 +246,37 @@ def main():
     else:
         raise Exception('Unknown dataset {}'.format(dataset_type))
 
-    model = create_model(config)
+    model = create_model(args, config)
 
     idx = random.choice(range(len(test_set)))
     image = test_set.get_example(idx)['image']
-    humans = estimate(model,
-                      image.astype(np.float32))
+    humans = estimate(
+        model,
+        image.astype(np.float32),
+        detection_thresh,
+        min_num_keypoints,
+    )
     pil_image = Image.fromarray(image.transpose(1, 2, 0).astype(np.uint8))
     pil_image = draw_humans(
         keypoint_names=model.keypoint_names,
         edges=model.edges,
         pil_image=pil_image,
-        humans=humans
+        humans=humans,
+        visbbox=config.getboolean('predict', 'visbbox')
     )
 
     pil_image.save('result.png', 'PNG')
 
 
+def parse_arguments():
+    parser = argparse.ArgumentParser()
+    parser.add_argument('model', help='path/to/model', type=str)
+    return parser.parse_args()
+
+
+def main():
+    args = parse_arguments()
+    predict(args)
+
 if __name__ == '__main__':
     main()
diff --git a/readmedata/cpu-example.gif b/readmedata/cpu-example.gif
new file mode 100644
index 0000000..5d725be
Binary files /dev/null and b/readmedata/cpu-example.gif differ
diff --git a/run_high_speed.sh b/run_high_speed.sh
new file mode 100644
index 0000000..0bab818
--- /dev/null
+++ b/run_high_speed.sh
@@ -0,0 +1,19 @@
+CMDNAME=`basename $0`
+BASEMODELDIR=$(pwd)
+
+if [ $# -ne 1 ]; then
+    echo "Usage: $CMDNAME path/to/model" 1>&2
+    exit 1
+fi
+
+xhost +local:docker
+docker run --rm \
+-e DISPLAY=$DISPLAY \
+-v /tmp/.X11-unix/:/tmp/.X11-unix \
+-v $(pwd):/work \
+-v $BASEMODELDIR:/models \
+--device=/dev/video0:/dev/video0 \
+--runtime=nvidia \
+-w /work \
+ppn:latest python3 high_speed.py /models/$1
+xhost -local:docker
diff --git a/run_predict.sh b/run_predict.sh
new file mode 100644
index 0000000..b0b1cf0
--- /dev/null
+++ b/run_predict.sh
@@ -0,0 +1,15 @@
+CMDNAME=`basename $0`
+BASEMODELDIR=$(pwd)
+
+if [ $# -ne 1 ]; then
+    echo "Usage: $CMDNAME path/to/model" 1>&2
+    exit 1
+fi
+
+docker run --rm \
+-v $(pwd):/work \
+-v $BASEMODELDIR:/models \
+-v ~/work/dataset/mpii_dataset:/mpii_dataset \
+-v ~/work/dataset/coco_dataset:/coco_dataset \
+-w /work \
+idein/chainer:5.1.0 python3 predict.py /models/$1
\ No newline at end of file
diff --git a/run_video.sh b/run_video.sh
index 6ac5132..a711b28 100644
--- a/run_video.sh
+++ b/run_video.sh
@@ -1,10 +1,19 @@
+CMDNAME=`basename $0`
+BASEMODELDIR=$(pwd)
+
+if [ $# -ne 1 ]; then
+    echo "Usage: $CMDNAME path/to/model" 1>&2
+    exit 1
+fi
+
 xhost +local:docker
 docker run --rm \
 -e DISPLAY=$DISPLAY \
 -v /tmp/.X11-unix/:/tmp/.X11-unix \
--v $PWD:/work \
--w /work \
+-v $(pwd):/work \
+-v $BASEMODELDIR:/models \
 --device=/dev/video0:/dev/video0 \
 --runtime=nvidia \
-ppn:latest python3 video.py
-xhost -local:docker
\ No newline at end of file
+-w /work \
+ppn:latest python3 video.py /models/$1
+xhost -local:docker
diff --git a/train.py b/train.py
index c1a9cfc..5a52383 100644
--- a/train.py
+++ b/train.py
@@ -2,8 +2,9 @@
 import random
 import configparser
 
-from logging import getLogger
-logger = getLogger('__main__')
+import logging
+logger = logging.getLogger(__name__)
+logging.basicConfig(level=logging.INFO)
 
 import matplotlib
 matplotlib.use('Agg')
@@ -15,12 +16,11 @@
 from chainer.training import extensions
 import numpy as np
 
-import visualize
 from model import PoseProposalNet
-
 from coco_dataset import get_coco_dataset
 from mpii_dataset import get_mpii_dataset
 from utils import parse_size, parse_kwargs, save_files
+import visualize
 
 
 def setup_devices(ids):
@@ -73,7 +73,9 @@ def main():
     config.read(args.config_path, 'UTF-8')
 
     chainer.global_config.autotune = True
-    chainer.cuda.set_max_workspace_size(11388608)
+    # chainer.cuda.set_max_workspace_size(11388608)
+    chainer.cuda.set_max_workspace_size(512 * 1024 * 1024)
+    chainer.config.cudnn_fast_batch_normalization = True
 
     # create result dir and copy file
     logger.info('> store file to result dir %s', config.get('result', 'dir'))
@@ -139,9 +141,10 @@ def main():
         train_set, config.getint('training_param', 'batchsize'),
         n_processes=config.getint('training_param', 'num_process')
     )
-    test_iter = chainer.iterators.SerialIterator(
+    test_iter = chainer.iterators.MultiprocessIterator(
         test_set, config.getint('training_param', 'batchsize'),
-        repeat=False, shuffle=False
+        repeat=False, shuffle=False,
+        n_processes=config.getint('training_param', 'num_process')
     )
 
     logger.info('> setup optimizer')
@@ -194,6 +197,7 @@ def main():
     logger.info('> start training')
     trainer.run()
 
+
 if __name__ == '__main__':
     import logging
     logger.addHandler(logging.StreamHandler())
diff --git a/video.py b/video.py
index 2a680fe..c10f700 100644
--- a/video.py
+++ b/video.py
@@ -1,3 +1,4 @@
+import argparse
 import configparser
 import logging
 logger = logging.getLogger(__name__)
@@ -13,15 +14,13 @@
 from PIL import ImageDraw, Image
 
 from predict import COLOR_MAP
-from predict import estimate, draw_humans, create_model
+from predict import estimate, draw_humans, create_model, load_config
 from utils import parse_size
 
 
-def main():
-    config = configparser.ConfigParser()
-    config.read('config.ini', 'UTF-8')
-
-    model = create_model(config)
+def video(args):
+    config = load_config(args)
+    model = create_model(args, config)
 
     cap = cv2.VideoCapture(0)
     if cap.isOpened() is False:
@@ -37,6 +36,8 @@ def main():
 
     fps_time = 0
     degree = 0
+    detection_thresh = config.getfloat('predict', 'detection_thresh')
+    min_num_keypoints = config.getint('predict', 'min_num_keypoints')
     while cap.isOpened():
         degree += 5
         degree = degree % 360
@@ -45,14 +46,17 @@ def main():
         image = cv2.resize(image, model.insize)
         with chainer.using_config('autotune', True):
             humans = estimate(model,
-                              image.transpose(2, 0, 1).astype(np.float32))
+                              image.transpose(2, 0, 1).astype(np.float32),
+                              detection_thresh,
+                              min_num_keypoints)
         pilImg = Image.fromarray(image)
         pilImg = draw_humans(
             model.keypoint_names,
             model.edges,
             pilImg,
             humans,
-            mask=mask.rotate(degree) if mask else None
+            mask=mask.rotate(degree) if mask else None,
+            visbbox=config.getboolean('predict', 'visbbox'),
         )
         img_with_humans = cv2.cvtColor(np.asarray(pilImg), cv2.COLOR_RGB2BGR)
         msg = 'GPU ON' if chainer.backends.cuda.available else 'GPU OFF'
@@ -66,5 +70,16 @@ def main():
         if cv2.waitKey(1) == 27:
             break
 
+
+def parse_arguments():
+    parser = argparse.ArgumentParser()
+    parser.add_argument('model', help='path/to/model', type=str)
+    return parser.parse_args()
+
+
+def main():
+    args = parse_arguments()
+    video(args)
+
 if __name__ == '__main__':
     main()