Dev (#121)

* Dev fix generate_metadata (#106) * fix generate_metadata * temp fix in callbacks * Dev loader refactor (#111) * refactored loaders, fixed weighed loss calculation * updated config * reverted resnet 152 to 32 filters with no dropout * removed print * back to 1000 valid size * back to old batch size * back other params * config * fix bug in loaders (#112) * fix validation bug (#115) * Dev tta (#116) * scheleton added * transformation and inverse done * tta working * Dev validation using mAP precision (#117) * mege TTA with mAP validation * mAP validation * fix mAP valid in eval (#120)
neptune-ai · May 29, 2018 · 91d889c · 91d889c
1 parent c4fed87
commit 91d889c
Show file tree

Hide file tree

Showing 14 changed files with 712 additions and 557 deletions.
diff --git a/augmentation.py b/augmentation.py
@@ -5,8 +5,8 @@
 fast_seq = iaa.SomeOf((1, 2),
                       [iaa.Fliplr(0.5),
                        iaa.Flipud(0.5),
-                       iaa.Affine(rotate=(0, 360),
-                                  translate_percent=(-0.1, 0.1), mode='reflect'),
+                       iaa.Affine(rotate=(-10, 10),
+                                  translate_percent=(-0.1, 0.1)),
                        ], random_order=True)
 
 
@@ -35,23 +35,37 @@ def __init__(self, pad=None, pad_method=None, name=None, deterministic=False, ra
     def _augment_images(self, images, random_state, parents, hooks):
         result = []
         for i, image in enumerate(images):
-            image_pad = self._reflect_pad(image)
+            image_pad = self._pad(image)
             result.append(image_pad)
         return result
 
     def _augment_keypoints(self, keypoints_on_images, random_state, parents, hooks):
         result = []
         return result
 
-    def _reflect_pad(self, img):
+    def _pad(self, img):
+        img_ = img.copy()
+
+        if self._is_expanded_grey_format(img):
+            img_ = np.squeeze(img_, axis=-1)
+
         h_pad, w_pad = self.pad
-        img_padded = cv2.copyMakeBorder(img.copy(), h_pad, h_pad, w_pad, w_pad,
-                                        PadFixed.PAD_FUNCTION[self.pad_method])
-        return img_padded
+        img_ = cv2.copyMakeBorder(img_.copy(), h_pad, h_pad, w_pad, w_pad, PadFixed.PAD_FUNCTION[self.pad_method])
+
+        if self._is_expanded_grey_format(img):
+            img_ = np.expand_dims(img_, axis=-1)
+
+        return img_
 
     def get_parameters(self):
         return []
 
+    def _is_expanded_grey_format(self, img):
+        if len(img.shape) == 3 and img.shape[2] == 1:
+            return True
+        else:
+            return False
+
 
 class RandomCropFixedSize(iaa.Augmenter):
     def __init__(self, px=None, name=None, deterministic=False, random_state=None):

diff --git a/callbacks.py b/callbacks.py
@@ -1,12 +1,21 @@
+import os
 import numpy as np
 import torch
+import json
+import subprocess
 from PIL import Image
 from deepsense import neptune
 from torch.autograd import Variable
+from tempfile import TemporaryDirectory
 
-from steps.pytorch.callbacks import NeptuneMonitor
-from utils import softmax, categorize_image
-from pipeline_config import CATEGORY_IDS
+import postprocessing as post
+from steps.base import Step, Dummy
+from steps.utils import get_logger
+from steps.pytorch.callbacks import NeptuneMonitor, ValidationMonitor
+from utils import softmax, categorize_image, coco_evaluation, create_annotations
+from pipeline_config import CATEGORY_IDS, Y_COLUMNS_SCORING
+
+logger = get_logger()
 
 
 class NeptuneMonitorSegmentation(NeptuneMonitor):
@@ -91,3 +100,140 @@ def get_prediction_masks(self):
                                 prediction_masks[mask_key] = np.stack([prediction, channel_ground_truth], axis=1)
             break
         return prediction_masks
+
+
+class ValidationMonitorSegmentation(ValidationMonitor):
+    def __init__(self, data_dir, small_annotations_size, validate_with_map=False, *args, **kwargs):
+        super().__init__(*args, **kwargs)
+        self.data_dir = data_dir
+        self.small_annotations_size = small_annotations_size
+        self.validate_with_map = validate_with_map
+        self.validation_pipeline = postprocessing__pipeline_simplified
+        self.validation_loss = None
+        self.meta_valid = None
+
+    def set_params(self, transformer, validation_datagen, meta_valid=None, *args, **kwargs):
+        self.model = transformer.model
+        self.optimizer = transformer.optimizer
+        self.loss_function = transformer.loss_function
+        self.output_names = transformer.output_names
+        self.validation_datagen = validation_datagen
+        self.meta_valid = meta_valid
+        self.validation_loss = transformer.validation_loss
+
+    def get_validation_loss(self):
+        if self.validate_with_map:
+            return self._get_validation_loss()
+        else:
+            return super().get_validation_loss()
+
+    def _get_validation_loss(self):
+        with TemporaryDirectory() as temp_dir:
+            outputs = self._transform()
+            prediction = self._generate_prediction(temp_dir, outputs)
+            if len(prediction) == 0:
+                return self.validation_loss.setdefault(self.epoch_id, {'sum': Variable(torch.Tensor([0]))})
+
+            prediction_filepath = os.path.join(temp_dir, 'prediction.json')
+            with open(prediction_filepath, "w") as fp:
+                fp.write(json.dumps(prediction))
+
+            annotation_file_path = os.path.join(self.data_dir, 'val', "annotation.json")
+
+            logger.info('Calculating mean precision and recall')
+            average_precision, average_recall = coco_evaluation(gt_filepath=annotation_file_path,
+                                                                prediction_filepath=prediction_filepath,
+                                                                image_ids=self.meta_valid[Y_COLUMNS_SCORING].values,
+                                                                category_ids=CATEGORY_IDS[1:],
+                                                                small_annotations_size=self.small_annotations_size)
+        return self.validation_loss.setdefault(self.epoch_id, {'sum': Variable(torch.Tensor([average_precision]))})
+
+    def _transform(self):
+        self.model.eval()
+        batch_gen, steps = self.validation_datagen
+        outputs = {}
+        for batch_id, data in enumerate(batch_gen):
+            if isinstance(data, list):
+                X = data[0]
+            else:
+                X = data
+
+            if torch.cuda.is_available():
+                X = Variable(X, volatile=True).cuda()
+            else:
+                X = Variable(X, volatile=True)
+
+            outputs_batch = self.model(X)
+            if len(self.output_names) == 1:
+                outputs.setdefault(self.output_names[0], []).append(outputs_batch.data.cpu().numpy())
+            else:
+                for name, output in zip(self.output_names, outputs_batch):
+                    output_ = output.data.cpu().numpy()
+                    outputs.setdefault(name, []).append(output_)
+            if batch_id == steps:
+                break
+        self.model.train()
+        outputs = {'{}_prediction'.format(name): np.vstack(outputs_) for name, outputs_ in outputs.items()}
+        for name, prediction in outputs.items():
+            outputs[name] = softmax(prediction, axis=1)
+
+        return outputs
+
+    def _generate_prediction(self, cache_dirpath, outputs):
+        data = {'callback_input': {'meta': self.meta_valid,
+                                   'meta_valid': None,
+                                   'target_sizes': [(300, 300)] * len(self.meta_valid),
+                                   },
+                'unet_output': {**outputs}
+                }
+
+        pipeline = self.validation_pipeline(cache_dirpath)
+        for step_name in pipeline.all_steps:
+            cmd = 'touch {}'.format(os.path.join(cache_dirpath, 'transformers', step_name))
+            subprocess.call(cmd, shell=True)
+        output = pipeline.transform(data)
+        y_pred = output['y_pred']
+
+        prediction = create_annotations(self.meta_valid, y_pred, logger, CATEGORY_IDS)
+        return prediction
+
+
+def postprocessing__pipeline_simplified(cache_dirpath):
+    mask_resize = Step(name='mask_resize',
+                       transformer=post.Resizer(),
+                       input_data=['unet_output', 'callback_input'],
+                       adapter={'images': ([('unet_output', 'multichannel_map_prediction')]),
+                                'target_sizes': ([('callback_input', 'target_sizes')]),
+                                },
+                       cache_dirpath=cache_dirpath)
+
+    category_mapper = Step(name='category_mapper',
+                           transformer=post.CategoryMapper(),
+                           input_steps=[mask_resize],
+                           adapter={'images': ([('mask_resize', 'resized_images')]),
+                                    },
+                           cache_dirpath=cache_dirpath)
+
+    labeler = Step(name='labeler',
+                   transformer=post.MulticlassLabeler(),
+                   input_steps=[category_mapper],
+                   adapter={'images': ([(category_mapper.name, 'categorized_images')]),
+                            },
+                   cache_dirpath=cache_dirpath)
+
+    score_builder = Step(name='score_builder',
+                         transformer=post.ScoreBuilder(),
+                         input_steps=[labeler, mask_resize],
+                         adapter={'images': ([(labeler.name, 'labeled_images')]),
+                                  'probabilities': ([(mask_resize.name, 'resized_images')]),
+                                  },
+                         cache_dirpath=cache_dirpath)
+
+    output = Step(name='output',
+                  transformer=Dummy(),
+                  input_steps=[score_builder],
+                  adapter={'y_pred': ([(score_builder.name, 'images_with_scores')]),
+                           },
+                  cache_dirpath=cache_dirpath)
+
+    return output
diff --git a/evaluate_checkpoint.py b/evaluate_checkpoint.py
@@ -51,7 +51,7 @@ def run(temp_inference_dir, experiment_dir, neptune_file):
     with open("temporary_neptune.yaml", 'w+') as f:
         yaml.dump(doc, f, default_flow_style=False)
 
-    cmd = 'neptune run --config temporary_neptune.yaml main.py -- evaluate -p unet_weighted_padded'
+    cmd = 'neptune run --config temporary_neptune.yaml main.py -- evaluate -p unet_padded'
     subprocess.call(cmd, shell=True)
 
     cmd = 'rm temporary_neptune.yaml'