ANHIR submission (#44)

* add experiment name * update eval. submission * eval: jit filter landmarks * update compute stats * fix cols swap in eval. submission * ext. evaluate tissue-state * add codecov info * update docs * refactor splitext * fix pkg setup * update CI * update Results ipynb * exporting figures * update drawing (cmap) * elastix params for ANHIR * update visual
Borda · Nov 19, 2019 · ec2ca0d · ec2ca0d
1 parent 43e4ca6
commit ec2ca0d
Show file tree

Hide file tree

Showing 31 changed files with 2,348 additions and 1,310 deletions.
diff --git a/.codecov.yml b/.codecov.yml
@@ -1,4 +1,7 @@
-#see https://github.com/codecov/support/wiki/Codecov-Yaml
+# see https://docs.codecov.io/docs/codecov-yaml
+# Validation check:
+# $ curl --data-binary @.codecov.yml https://codecov.io/validate
+
 #codecov:
 #  notify:
 #    require_ci_to_pass: yes

diff --git a/.travis.yml b/.travis.yml
@@ -8,6 +8,8 @@
 # this file is *not* meant to cover or endorse the use of travis, but rather to
 # help confirm pull requests to this project.
 
+dist: bionic  # Ubuntu 18.04
+
 env:
   global:
     - DISPLAY=""
@@ -72,8 +74,8 @@ script:
   - python bm_experiments/bm_comp_perform.py -o ./results -n 1
   - python birl/bm_template.py -t ./data_images/pairs-imgs-lnds_mix.csv -o ./results --visual --unique -cfg configs/sample_config.yaml
   - rm ./data_images/*_/*/*_HE.csv  # remove target landmarks from histol. tissue
-  - python birl/bm_template.py -t ./data_images/pairs-imgs-lnds_histol.csv -d ./data_images -o ./results --preprocessing matching-rgb gray -cfg configs/sample_config.yaml
-  - python bm_experiments/evaluate_experiment.py -d ./data_images -e ./results/BmTemplate --visual
+  - python birl/bm_template.py -n anhir -t ./data_images/pairs-imgs-lnds_histol.csv -d ./data_images -o ./results --preprocessing matching-rgb gray -cfg configs/sample_config.yaml
+  - python bm_experiments/evaluate_experiment.py -d ./data_images -e ./results/BmTemplate_anhir --visual
 
 after_success:
   - coverage report

diff --git a/appveyor.yml b/appveyor.yml
@@ -78,7 +78,7 @@ test_script:
   - tox -v --sitepackages --parallel auto
   - mkdir results && touch configs/sample_config.yaml
   - python bm_experiments/bm_comp_perform.py -o ./results -n 1
-  - python birl/bm_template.py -t ./data_images/pairs-imgs-lnds_mix.csv -o ./results --preprocessing matching-rgb gray --unique --visual -cfg configs/sample_config.yaml
+  - python birl/bm_template.py -n anhir -t ./data_images/pairs-imgs-lnds_mix.csv -o ./results --preprocessing matching-rgb gray --unique --visual -cfg configs/sample_config.yaml
 
 on_success:
   - coverage report

diff --git a/birl/__init__.py b/birl/__init__.py
@@ -12,12 +12,12 @@
     traceback.print_exc()
 
 
-__version__ = '0.2.3'
-__author__ = 'Jiri Borovec'
-__author_email__ = 'jiri.borovec@fel.cvut.cz'
-__license__ = 'BSD 3-clause'
-__homepage__ = 'https://borda.github.io/BIRL',
-__copyright__ = 'Copyright (c) 2014-2019, %s.' % __author__
+__version__ = "0.2.3"
+__author__ = "Jiri Borovec"
+__author_email__ = "jiri.borovec@fel.cvut.cz"
+__license__ = "BSD 3-clause"
+__homepage__ = "https://borda.github.io/BIRL",
+__copyright__ = "Copyright (c) 2014-2019, %s." % __author__
 __doc__ = 'BIRL: Benchmark on Image Registration methods with Landmark validation'
 __long_doc__ = "# %s" % __doc__ + """
 

diff --git a/birl/benchmark.py b/birl/benchmark.py
@@ -1,3 +1,4 @@
+# -*- coding: utf-8 -*-
 """
 General benchmark template for all registration methods.
 It also serves for evaluating the input registration pairs
@@ -25,10 +26,11 @@
 import pandas as pd
 from skimage.color import rgb2gray
 
+# this is used while calling this file as a script
 sys.path += [os.path.abspath('.'), os.path.abspath('..')]  # Add path to root
 from .utilities.data_io import (
     update_path, create_folder, image_sizes, load_landmarks, load_image, save_image)
-from .utilities.dataset import image_histogram_matching
+from .utilities.dataset import image_histogram_matching, common_landmarks
 from .utilities.evaluate import (
     compute_target_regist_error_statistic, compute_affine_transf_diff, compute_tre_robustness)
 from .utilities.experiments import (
@@ -38,6 +40,9 @@
     export_figure, draw_image_points, draw_images_warped_landmarks, overlap_two_images)
 from .utilities.registration import estimate_affine_transform
 
+#: In case provided dataset and complete (true) dataset differ
+COL_PAIRED_LANDMARKS = 'Ration matched landmarks'
+
 
 class ImRegBenchmark(Experiment):
     """ General benchmark class for all registration methods.
@@ -152,6 +157,10 @@ class ImRegBenchmark(Experiment):
     COL_STATUS = 'status'
     #: extension to the image column name for temporary pre-process image
     COL_IMAGE_EXT_TEMP = ' TEMP'
+    #: number of landmarks in dataset (min of moving and reference)
+    COL_NB_LANDMARKS_INPUT = 'nb. dataset landmarks'
+    #: number of warped landmarks
+    COL_NB_LANDMARKS_WARP = 'nb. warped landmarks'
     #: required experiment parameters
     REQUIRED_PARAMS = Experiment.REQUIRED_PARAMS + ['path_table']
 
@@ -276,6 +285,7 @@ def _load_data(self):
         assert os.path.isfile(self.params['path_table']), \
             'path to csv cover is not defined - %s' % self.params['path_table']
         self._df_overview = pd.read_csv(self.params['path_table'], index_col=None)
+        self._df_overview = _df_drop_unnamed(self._df_overview)
         assert all(col in self._df_overview.columns for col in self.COVER_COLUMNS), \
             'Some required columns are missing in the cover file.'
 
@@ -286,8 +296,8 @@ def _run(self):
         # load existing result of create new entity
         if os.path.isfile(self._path_csv_regist):
             logging.info('loading existing csv: "%s"', self._path_csv_regist)
-            self._df_experiments = pd.read_csv(self._path_csv_regist,
-                                               index_col=None)
+            self._df_experiments = pd.read_csv(self._path_csv_regist, index_col=None)
+            self._df_experiments = _df_drop_unnamed(self._df_experiments)
             if 'ID' in self._df_experiments.columns:
                 self._df_experiments.set_index('ID', inplace=True)
         else:
@@ -521,13 +531,13 @@ def _execute_img_registration(self, item):
         path_log = os.path.join(path_dir_reg, self.NAME_LOG_REGISTRATION)
         # TODO, add lock to single thread, create pool with possible thread ids
         # (USE taskset [native], numactl [need install])
-        if not (isinstance(commands, list) or isinstance(commands, tuple)):
+        if not isinstance(commands, (list, tuple)):
             commands = [commands]
         # measure execution time
         cmd_result = exec_commands(commands, path_log, timeout=self.EXECUTE_TIMEOUT)
         # if the experiment failed, return back None
         if not cmd_result:
-            return None
+            item = None
         return item
 
     def _generate_regist_command(self, item):
@@ -627,7 +637,6 @@ def main(cls, params=None):
             params = parse_arg_params(arg_parser)
 
         logging.info('running...')
-        logging.info(cls.__doc__)
         benchmark = cls(params)
         benchmark.run()
         path_expt = benchmark.params['path_exp']
@@ -660,17 +669,19 @@ def _load_landmarks(cls, item, path_dataset):
 
     @classmethod
     def compute_registration_statistic(cls, idx_row, df_experiments,
-                                       path_dataset=None, path_experiment=None):
+                                       path_dataset=None, path_experiment=None, path_reference=None):
         """ after successful registration load initial nad estimated landmarks
         afterwords compute various statistic for init, and final alignment
 
         :param tuple(int,dict) df_row: row from iterated table
         :param DF df_experiments: DataFrame with experiments
-        :param str|None path_dataset: path to the dataset folder
+        :param str|None path_dataset: path to the provided dataset folder
+        :param str|None path_reference: path to the complete landmark collection folder
         :param str|None path_experiment: path to the experiment folder
         """
         idx, row = idx_row
         row = dict(row)  # convert even series to dictionary
+        # load common landmarks and image size
         points_ref, points_move, path_img_ref = cls._load_landmarks(row, path_dataset)
         img_diag = cls._image_diag(row, path_img_ref)
         df_experiments.loc[idx, cls.COL_IMAGE_DIAGONAL] = img_diag
@@ -679,31 +690,44 @@ def compute_registration_statistic(cls, idx_row, df_experiments,
         cls.compute_registration_accuracy(df_experiments, idx, points_ref, points_move,
                                           'init', img_diag, wo_affine=False)
 
+        # define what is the target and init state according to the experiment results
+        use_move_warp = isinstance(row.get(cls.COL_POINTS_MOVE_WARP, None), str)
+        if use_move_warp:
+            points_init, points_target = points_move, points_ref
+            col_source, col_target = cls.COL_POINTS_MOVE, cls.COL_POINTS_REF
+            col_lnds_warp = cls.COL_POINTS_MOVE_WARP
+        else:
+            points_init, points_target = points_ref, points_move
+            col_lnds_warp = cls.COL_POINTS_REF_WARP
+            col_source, col_target = cls.COL_POINTS_REF, cls.COL_POINTS_MOVE
+
+        # optional filtering
+        if path_reference:
+            ratio, points_target, _ = \
+                filter_paired_landmarks(row, path_dataset, path_reference, col_source, col_target)
+            df_experiments.loc[idx, COL_PAIRED_LANDMARKS] = np.round(ratio, 2)
+
         # load transformed landmarks
         if (cls.COL_POINTS_MOVE_WARP not in row) and (cls.COL_POINTS_REF_WARP not in row):
             logging.error('Statistic: no output landmarks')
             return
 
-        # define what is the target and init state according to the experiment results
-        is_move_warp = isinstance(row.get(cls.COL_POINTS_MOVE_WARP, None), str)
-        points_init = points_move if is_move_warp else points_ref
-        points_target = points_ref if is_move_warp else points_move
-        col_lnds_warp = cls.COL_POINTS_MOVE_WARP if is_move_warp else cls.COL_POINTS_REF_WARP
-
         # check if there are reference landmarks
         if points_target is None:
             logging.warning('Missing landmarks in "%s"',
-                            cls.COL_POINTS_REF if is_move_warp else cls.COL_POINTS_MOVE)
+                            cls.COL_POINTS_REF if use_move_warp else cls.COL_POINTS_MOVE)
             return
         # load warped landmarks
-        path_lnds_wapr = update_path(row[col_lnds_warp], pre_path=path_experiment)
-        if path_lnds_wapr and os.path.isfile(path_lnds_wapr):
-            points_warp = load_landmarks(path_lnds_wapr)
+        path_lnds_warp = update_path(row[col_lnds_warp], pre_path=path_experiment)
+        if path_lnds_warp and os.path.isfile(path_lnds_warp):
+            points_warp = load_landmarks(path_lnds_warp)
             points_warp = np.nan_to_num(points_warp)
         else:
             logging.warning('Invalid path to the landmarks: "%s" <- "%s"',
-                            path_lnds_wapr, row[col_lnds_warp])
+                            path_lnds_warp, row[col_lnds_warp])
             return
+        df_experiments.loc[idx, cls.COL_NB_LANDMARKS_INPUT] = min(len(points_ref), len(points_ref))
+        df_experiments.loc[idx, cls.COL_NB_LANDMARKS_WARP] = len(points_warp)
 
         # compute Affine statistic
         affine_diff = compute_affine_transf_diff(points_init, points_target, points_warp)
@@ -732,8 +756,8 @@ def compute_registration_accuracy(cls, df_experiments, idx, points1, points2,
 
         :param DF df_experiments: DataFrame with experiments
         :param int idx: index of tha particular record
-        :param points1: np.array<nb_points, dim>
-        :param points2: np.array<nb_points, dim>
+        :param ndarray points1: np.array<nb_points, dim>
+        :param ndarray points2: np.array<nb_points, dim>
         :param str state: whether it was before of after registration
         :param float img_diag: target image diagonal
         :param bool wo_affine: without affine transform, assume only local/elastic deformation
@@ -886,6 +910,63 @@ def visualise_registration(cls, idx_row, path_dataset=None, path_experiment=None
         return path_fig
 
 
+def _df_drop_unnamed(df):
+    """Drop columns was index without name and was loaded as `Unnamed: 0.`"""
+    df = df[list(filter(lambda c: not c.startswith('Unnamed:'), df.columns))]
+    return df
+
+
+def filter_paired_landmarks(item, path_dataset, path_reference, col_source, col_target):
+    """ filter all relevant landmarks which were used and copy them to experiment
+
+    The case is that in certain challenge stage users had provided just a subset
+     of all image landmarks which could be laos shuffled. The idea is to filter identify
+     all user used (provided in dataset) landmarks and filter them from temporary
+     reference dataset.
+
+    :param dict|Series item: experiment DataFrame
+    :param str path_dataset: path to provided landmarks
+    :param str path_reference: path to the complete landmark collection
+    :param str col_source: column name of landmarks to be transformed
+    :param str col_target: column name of landmarks to be compared
+    :return tuple(float,ndarray,ndarray): match ratio, filtered ref and move landmarks
+
+    >>> p_data = update_path('data_images')
+    >>> p_csv = os.path.join(p_data, 'pairs-imgs-lnds_histol.csv')
+    >>> df = pd.read_csv(p_csv)
+    >>> ratio, lnds_ref, lnds_move = filter_paired_landmarks(dict(df.iloc[0]), p_data, p_data,
+    ...     ImRegBenchmark.COL_POINTS_MOVE, ImRegBenchmark.COL_POINTS_REF)
+    >>> ratio
+    1.0
+    >>> lnds_ref.shape == lnds_move.shape
+    True
+    """
+    path_ref = update_path(item[col_source], pre_path=path_reference)
+    assert os.path.isfile(path_ref), 'missing landmarks: %s' % path_ref
+    path_load = update_path(item[col_source], pre_path=path_dataset)
+    assert os.path.isfile(path_load), 'missing landmarks: %s' % path_load
+    pairs = common_landmarks(load_landmarks(path_ref), load_landmarks(path_load), threshold=1)
+    if not pairs.size:
+        logging.warning('there is not pairing between landmarks or dataset and user reference')
+        return 0., np.empty([0]), np.empty([0])
+
+    pairs = sorted(pairs.tolist(), key=lambda p: p[1])
+    ind_ref = np.asarray(pairs)[:, 0]
+    nb_common = min([len(load_landmarks(update_path(item[col], pre_path=path_reference)))
+                     for col in (col_target, col_source)])
+    ind_ref = ind_ref[ind_ref < nb_common]
+
+    path_lnd_ref = update_path(item[col_target], pre_path=path_reference)
+    lnds_filter_ref = load_landmarks(path_lnd_ref)[ind_ref]
+    path_lnd_move = update_path(item[col_source], pre_path=path_reference)
+    lnds_filter_move = load_landmarks(path_lnd_move)[ind_ref]
+
+    ratio_matches = len(ind_ref) / float(nb_common)
+    assert ratio_matches <= 1, 'suspicious ratio for %i paired and %i common landmarks' \
+                               % (len(pairs), nb_common)
+    return ratio_matches, lnds_filter_ref, lnds_filter_move
+
+
 def export_summary_results(df_experiments, path_out, params=None,
                            name_txt=ImRegBenchmark.NAME_RESULTS_TXT,
                            name_csv=ImRegBenchmark.NAME_RESULTS_CSV):

diff --git a/birl/bm_template.py b/birl/bm_template.py
@@ -25,6 +25,7 @@
 import sys
 import logging
 
+# this is used while calling this file as a script
 sys.path += [os.path.abspath('.'), os.path.abspath('..')]  # Add path to root
 from birl.utilities.experiments import create_basic_parser
 from birl.benchmark import ImRegBenchmark
@@ -174,6 +175,7 @@ def extend_parse(arg_parser):
 # RUN by given parameters
 if __name__ == '__main__':
     logging.basicConfig(level=logging.INFO)
+    logging.info(__doc__)
     arg_params, path_expt = BmTemplate.main()
 
     if arg_params.get('run_comp_benchmark', False):

diff --git a/birl/utilities/data_io.py b/birl/utilities/data_io.py
@@ -77,7 +77,7 @@ def load_landmarks(path_file):
     if not os.path.isfile(path_file):
         logging.warning('missing landmarks "%s"', path_file)
         return None
-    ext = os.path.splitext(path_file)[-1]
+    _, ext = os.path.splitext(path_file)
     if ext == '.csv':
         return load_landmarks_csv(path_file)
     elif ext == '.pts':
@@ -160,7 +160,7 @@ def save_landmarks(path_file, landmarks):
     """
     assert os.path.isdir(os.path.dirname(path_file)), \
         'missing folder "%s"' % os.path.dirname(path_file)
-    path_file = os.path.splitext(path_file)[0]
+    path_file, _ = os.path.splitext(path_file)
     landmarks = landmarks.values if isinstance(landmarks, pd.DataFrame) else landmarks
     save_landmarks_csv(path_file + '.csv', landmarks)
     save_landmarks_pts(path_file + '.pts', landmarks)
@@ -433,7 +433,7 @@ def _gene_out_path(path_file, file_ext, path_out_dir=None):
     """
     if not path_out_dir:
         path_out_dir = os.path.dirname(path_file)
-    img_name = os.path.splitext(os.path.basename(path_file))[0]
+    img_name, _ = os.path.splitext(os.path.basename(path_file))
     path_out = os.path.join(path_out_dir, img_name + file_ext)
     return path_out
 

diff --git a/birl/utilities/dataset.py b/birl/utilities/dataset.py
@@ -600,25 +600,31 @@ def list_sub_folders(path_folder, name='*'):
     return sub_dirs
 
 
-def common_landmarks(points1, points2, threshold=0.5):
+def common_landmarks(points1, points2, threshold=1.5):
     """ find common landmarks in two sets
 
     :param ndarray|list(list(float)) points1: first point set
     :param ndarray|list(list(float)) points2: second point set
-    :param float threshold: threshold for assignment
+    :param float threshold: threshold for assignment (for landmarks in pixels)
     :return list(bool): flags
 
     >>> np.random.seed(0)
     >>> common = np.random.random((5, 2))
     >>> pts1 = np.vstack([common, np.random.random((10, 2))])
     >>> pts2 = np.vstack([common, np.random.random((15, 2))])
-    >>> common_landmarks(pts1, pts2, threshold=0.1)
-    array([[ 0,  0],
-           [ 1,  1],
-           [ 2,  2],
-           [ 3,  3],
-           [ 4,  4],
-           [14, 15]])
+    >>> common_landmarks(pts1, pts2, threshold=1e-3)
+    array([[0, 0],
+           [1, 1],
+           [2, 2],
+           [3, 3],
+           [4, 4]])
+    >>> np.random.shuffle(pts2)
+    >>> common_landmarks(pts1, pts2, threshold=1e-3)
+    array([[ 0, 13],
+           [ 1, 10],
+           [ 2,  9],
+           [ 3, 14],
+           [ 4,  8]])
     """
     points1 = np.asarray(points1)
     points2 = np.asarray(points2)
@@ -627,6 +633,7 @@ def common_landmarks(points1, points2, threshold=0.5):
     dist_sel = dist[ind_row, ind_col]
     pairs = [(i, j) for (i, j, d) in zip(ind_row, ind_col, dist_sel)
              if d < threshold]
+    assert len(pairs) <= min([len(points1), len(points2)])
     return np.array(pairs, dtype=int)