update classif. (#15)

* CrossVal - allow larger test then train * fix classif. nb searches
Borda · Jan 1, 2019 · f3955cf · f3955cf
1 parent 41cd661
commit f3955cf
Show file tree

Hide file tree

Showing 10 changed files with 106 additions and 56 deletions.
diff --git a/experiments_ovary_centres/run_center_candidate_training.py b/experiments_ovary_centres/run_center_candidate_training.py
@@ -38,7 +38,7 @@
 import matplotlib
 if os.environ.get('DISPLAY', '') == '' \
         and matplotlib.rcParams['backend'] != 'agg':
-    # logging.warning('No display found. Using non-interactive Agg backend.')
+    print('No display found. Using non-interactive Agg backend.')
     matplotlib.use('Agg')
 
 import matplotlib.pyplot as plt

diff --git a/experiments_ovary_centres/run_center_clustering.py b/experiments_ovary_centres/run_center_clustering.py
@@ -20,7 +20,7 @@
 import matplotlib
 if os.environ.get('DISPLAY', '') == '' \
         and matplotlib.rcParams['backend'] != 'agg':
-    # logging.warning('No display found. Using non-interactive Agg backend.')
+    print('No display found. Using non-interactive Agg backend.')
     matplotlib.use('Agg')
 
 import matplotlib.pylab as plt

diff --git a/experiments_ovary_detect/run_ovary_egg-segmentation.py b/experiments_ovary_detect/run_ovary_egg-segmentation.py
@@ -37,7 +37,7 @@
 import matplotlib
 if os.environ.get('DISPLAY', '') == '' \
         and matplotlib.rcParams['backend'] != 'agg':
-    # logging.warning('No display found. Using non-interactive Agg backend.')
+    print('No display found. Using non-interactive Agg backend.')
     matplotlib.use('Agg')
 
 import numpy as np

diff --git a/experiments_segmentation/run_compute_stat_annot_segm.py b/experiments_segmentation/run_compute_stat_annot_segm.py
@@ -21,7 +21,7 @@
 import matplotlib
 if os.environ.get('DISPLAY', '') == '' \
         and matplotlib.rcParams['backend'] != 'agg':
-    # logging.warning('No display found. Using non-interactive Agg backend.')
+    print('No display found. Using non-interactive Agg backend.')
     matplotlib.use('Agg')
 
 import pandas as pd

diff --git a/handling_annotations/run_overlap_images_segms.py b/handling_annotations/run_overlap_images_segms.py
@@ -23,7 +23,7 @@
 import matplotlib
 if os.environ.get('DISPLAY','') == '' \
         and matplotlib.rcParams['backend'] != 'agg':
-    # logging.warning('No display found. Using non-interactive Agg backend.')
+    print('No display found. Using non-interactive Agg backend.')
     matplotlib.use('Agg')
 
 import numpy as np

diff --git a/imsegm/classification.py b/imsegm/classification.py
@@ -20,6 +20,7 @@
 from scipy import interp
 from scipy.stats import randint as sp_randint
 from scipy.stats import uniform as sp_random
+from sklearn.base import clone
 from sklearn import preprocessing, feature_selection, decomposition
 from sklearn import cluster, metrics
 from sklearn import ensemble, neighbors, svm, tree
@@ -897,9 +898,10 @@ def eval_classif_cross_val_roc(clf_name, classif, features, labels,
         cross_val = cross_val.split(features, labels)
     count = 0.
     for train, test in cross_val:
-        classif.fit(np.copy(features[train], order='C'),
-                    np.copy(labels[train], order='C'))
-        proba = classif.predict_proba(np.copy(features[test], order='C'))
+        classif_cv = clone(classif)
+        classif_cv.fit(np.copy(features[train], order='C'),
+                       np.copy(labels[train], order='C'))
+        proba = classif_cv.predict_proba(np.copy(features[test], order='C'))
         # Compute ROC curve and area the curve
         for i, lb in enumerate(unique_labels):
             fpr, tpr, _ = metrics.roc_curve(labels_bin[test, lb], proba[:, i])
@@ -934,6 +936,12 @@ def search_params_cut_down_max_nb_iter(clf_parameters, nb_iter):
     :param clf_parameters: {str: ...}
     :param nb_iter: int, nb of random tryes
     :return: int
+
+    >>> clf_params = create_clf_param_search_grid(DEFAULT_CLASSIF_NAME)
+    >>> search_params_cut_down_max_nb_iter(clf_params, 100)
+    100
+    >>> search_params_cut_down_max_nb_iter(clf_params, 1e6)
+    1450
     """
     counts = []
     for k in clf_parameters:
@@ -944,7 +952,7 @@ def search_params_cut_down_max_nb_iter(clf_parameters, nb_iter):
             return nb_iter
     count = np.product(counts)
     if count < nb_iter:
-        nb_iter < count
+        nb_iter = count
     return nb_iter
 
 
@@ -1384,32 +1392,40 @@ class HoldOut:
 
     Example
     -------
-    >>> ho = HoldOut(10, 7)
+    >>> ho = HoldOut(10, 7, rand_seed=None)
     >>> len(ho)
     1
     >>> list(ho)
     [([0, 1, 2, 3, 4, 5, 6], [7, 8, 9])]
+    >>> ho = HoldOut(10, 7, rand_seed=0)
+    >>> list(ho)
+    [([2, 8, 4, 9, 1, 6, 7], [3, 0, 5])]
     """
-    def __init__(self, nb, hold_idx, random_state=0):
+    def __init__(self, nb_samples, hold_out, rand_seed=0):
         """
 
-        :param int nb: total number of samples
-        :param int hold_idx: index where the test starts
-        :param obj random_state: Seed for the random number generator.
+        :param int nb_samples: total number of samples
+        :param int hold_out: index where the test starts
+        :param obj rand_seed: Seed for the random number generator.
         """
-        self.total = nb
-        self.hold_idx = hold_idx
-        self.random_state = random_state
-        assert self.total > self.hold_idx, \
-            'total %i should be higher than hold Idx %i' % (self.total, self.hold_idx)
+        assert nb_samples > hold_out, \
+            'total %i should be higher than hold Idx %i' % (nb_samples, hold_out)
+
+        self._total = nb_samples
+        self.hold_out = hold_out
+        self._indexes = list(range(nb_samples))
+
+        if rand_seed is not None and rand_seed is not False:
+            np.random.seed(rand_seed)
+            np.random.shuffle(self._indexes)
 
     def __iter__(self):
         """ iterate the folds
 
         :return ([int], [int]):
         """
-        ind_train = list(range(self.hold_idx))
-        ind_test = list(range(self.hold_idx, self.total))
+        ind_train = self._indexes[:self.hold_out]
+        ind_test = self._indexes[self.hold_out:]
         yield ind_train, ind_test
 
     def __len__(self):
@@ -1438,22 +1454,29 @@ class CrossValidatePOut:
     >>> len(cv)
     2
     >>> list(cv)  # doctest: +NORMALIZE_WHITESPACE
-    [([3, 4, 5], [0, 1, 2]), \
+    [([3, 4, 5], [0, 1, 2]),
      ([0, 1, 2], [3, 4, 5])]
 
     Example 2
     ---------
     >>> cv = CrossValidatePOut(7, 3, rand_seed=0)
     >>> list(cv)  # doctest: +NORMALIZE_WHITESPACE
-    [([3, 0, 5, 4], [6, 2, 1]), \
-     ([6, 2, 1, 4], [3, 0, 5]), \
+    [([3, 0, 5, 4], [6, 2, 1]),
+     ([6, 2, 1, 4], [3, 0, 5]),
      ([6, 2, 1, 3, 0, 5], [4])]
-
-
     >>> len(list(cv))
     3
     >>> cv.indexes
     [6, 2, 1, 3, 0, 5, 4]
+
+    Example 3
+    ---------
+    >>> cv = CrossValidatePOut(7, 5, rand_seed=0)
+    >>> list(cv)  # doctest: +NORMALIZE_WHITESPACE
+    [([6, 2], [1, 3, 0, 5, 4]),
+     ([1, 3], [6, 2, 0, 5, 4]),
+     ([0, 5], [6, 2, 1, 3, 4]),
+     ([4], [6, 2, 1, 3, 0, 5])]
     """
 
     def __init__(self, nb_samples, nb_hold_out, rand_seed=None):
@@ -1464,13 +1487,21 @@ def __init__(self, nb_samples, nb_hold_out, rand_seed=None):
         :param obj rand_seed: int or None
         """
         assert nb_samples > nb_hold_out, \
-            'number of holdout has to be smaller then total size'
-        self.nb_samples = nb_samples
-        self.nb_hold_out = nb_hold_out
+            'number of holdout has to be smaller then _total size'
+        self._nb_samples = nb_samples
+        self._nb_hold_out = nb_hold_out
+
+        self._revert = False  # sets the sizes
+        if self._nb_hold_out > (self._nb_samples / 2.):
+            logging.debug('WARNING: you are running in reverse mode, '
+                          'while using all training examples '
+                          'there are much more yield test cases.')
+            self._nb_hold_out = self._nb_samples - self._nb_hold_out
+            self._revert = True
 
-        self.indexes = list(range(self.nb_samples))
+        self.indexes = list(range(self._nb_samples))
 
-        if rand_seed is not False:
+        if rand_seed is not None and rand_seed is not False:
             np.random.seed(rand_seed)
             np.random.shuffle(self.indexes)
         logging.debug('sets ordering: %s', repr(self.indexes))
@@ -1482,17 +1513,19 @@ def __iter__(self):
 
         :return ([int], [int]):
         """
-        for i in range(0, self.nb_samples, self.nb_hold_out):
-            inds_test = self.indexes[i:i + self.nb_hold_out]
+        for i in range(0, self._nb_samples, self._nb_hold_out):
+            inds_test = self.indexes[i:i + self._nb_hold_out]
             inds_train = [i for i in self.indexes if i not in inds_test]
+            if self._revert:
+                inds_train, inds_test = inds_test, inds_train
             yield inds_train, inds_test
 
     def __len__(self):
         """ number of folds
 
         :return int:
         """
-        return int(np.ceil(self.nb_samples / float(self.nb_hold_out)))
+        return int(np.ceil(self._nb_samples / float(self._nb_hold_out)))
 
 
 class CrossValidatePSetsOut:
@@ -1513,7 +1546,7 @@ class CrossValidatePSetsOut:
     >>> len(cv)
     2
     >>> list(cv)  # doctest: +NORMALIZE_WHITESPACE
-    [([5, 6, 7, 8, 9], [0, 1, 2, 3, 4]), \
+    [([5, 6, 7, 8, 9], [0, 1, 2, 3, 4]),
      ([0, 1, 2, 3, 4], [5, 6, 7, 8, 9])]
 
     Example 2
@@ -1522,13 +1555,21 @@ class CrossValidatePSetsOut:
     >>> cv.set_indexes
     [[0, 1], [2, 3], [4], [5, 6], [7]]
     >>> list(cv)  # doctest: +NORMALIZE_WHITESPACE
-    [([2, 3, 5, 6, 7], [4, 0, 1]), \
-     ([4, 0, 1, 7], [2, 3, 5, 6]), \
+    [([2, 3, 5, 6, 7], [4, 0, 1]),
+     ([4, 0, 1, 7], [2, 3, 5, 6]),
      ([4, 0, 1, 2, 3, 5, 6], [7])]
     >>> len(cv)
     3
     >>> cv.sets_order
     [2, 0, 1, 3, 4]
+
+    Example 3
+    ---------
+    >>> cv = CrossValidatePSetsOut([2, 2, 1, 2, 1, 1], 4, rand_seed=0)
+    >>> list(cv)  # doctest: +NORMALIZE_WHITESPACE
+    [([8, 4], [2, 3, 5, 6, 0, 1, 7]),
+     ([2, 3, 5, 6], [8, 4, 0, 1, 7]),
+     ([0, 1, 7], [8, 4, 2, 3, 5, 6])]
     """
 
     def __init__(self, set_sizes, nb_hold_out, rand_seed=None):
@@ -1539,24 +1580,32 @@ def __init__(self, set_sizes, nb_hold_out, rand_seed=None):
         :param obj rand_seed: int or None
         """
         assert len(set_sizes) > nb_hold_out, \
-            'nb of hold out (%i) has to be smaller then total size %i' \
+            'nb of hold out (%i) has to be smaller then _total size %i' \
             % (nb_hold_out, len(set_sizes))
-        self.set_sizes = list(set_sizes)
-        self.total = np.sum(self.set_sizes)
-        self.nb_hold_out = nb_hold_out
+        self._set_sizes = list(set_sizes)
+        self._total = np.sum(self._set_sizes)
+        self._nb_hold_out = nb_hold_out
+
+        self._revert = False  # sets the sizes
+        if self._nb_hold_out > (len(self._set_sizes) / 2.):
+            logging.debug('WARNING: you are running in reverse mode, '
+                          'while using all training examples '
+                          'there are much more yield test cases.')
+            self._nb_hold_out = len(self._set_sizes) - self._nb_hold_out
+            self._revert = True
 
         self.set_indexes = []
-        for i, size in enumerate(self.set_sizes):
-            start = int(np.sum(self.set_sizes[:i]))
+        for i, size in enumerate(self._set_sizes):
+            start = int(np.sum(self._set_sizes[:i]))
             inds = range(start, start + size)
             self.set_indexes.append(list(inds))
 
-        assert np.sum(len(i) for i in self.set_indexes) == self.total, \
-            'all indexes should sum to total count %i' % self.total
+        assert np.sum(len(i) for i in self.set_indexes) == self._total, \
+            'all indexes should sum to _total count %i' % self._total
 
-        self.sets_order = list(range(len(self.set_sizes)))
+        self.sets_order = list(range(len(self._set_sizes)))
 
-        if rand_seed is not False:
+        if rand_seed is not None and rand_seed is not False:
             np.random.seed(rand_seed)
             np.random.shuffle(self.sets_order)
         logging.debug('sets ordering: %s', repr(self.sets_order))
@@ -1568,20 +1617,22 @@ def __iter__(self):
 
         :return ([int], [int]):
         """
-        for i in range(0, len(self.set_sizes), self.nb_hold_out):
-            test = self.sets_order[i:i + self.nb_hold_out]
+        for i in range(0, len(self._set_sizes), self._nb_hold_out):
+            test = self.sets_order[i:i + self._nb_hold_out]
             inds_train = list(itertools.chain.from_iterable(
                 self.set_indexes[i] for i in self.sets_order if i not in test))
             inds_test = list(itertools.chain.from_iterable(
                 self.set_indexes[i] for i in self.sets_order if i in test))
+            if self._revert:
+                inds_train, inds_test = inds_test, inds_train
             yield inds_train, inds_test
 
     def __len__(self):
         """ number of folds
 
         :return int:
         """
-        nb = len(self.set_sizes) / float(self.nb_hold_out)
+        nb = len(self._set_sizes) / float(self._nb_hold_out)
         return int(np.ceil(nb))
 
 

diff --git a/imsegm/descriptors.py b/imsegm/descriptors.py
@@ -1734,7 +1734,7 @@ def interpolate_ray_dist(ray_dists, order='spline'):
     """ interpolate ray distances
 
     :param [float] ray_dists:
-    :param str order: degree of interpolation
+    :param str|int order: degree of interpolation
     :return [float]:
 
     >>> interpolate_ray_dist([-1] * 5)
@@ -1799,7 +1799,7 @@ def _fn_cos_residual(x, t, y):
 def reconstruct_ray_features_2d(position, ray_features, shift=0):
     """ reconstruct ray features for 2D image
 
-    :param (int, int) position:
+    :param (int, int)|(float, float) position:
     :param [float] ray_features:
     :param float shift:
     :return [[float, float]]:

diff --git a/imsegm/ellipse_fitting.py b/imsegm/ellipse_fitting.py
@@ -401,7 +401,7 @@ def split_segm_background_foreground(seg, sel_bg=STRUC_ELEM_BG,
     """ smoothing segmentation with morphological operation
 
     :param ndarray seg: input segmentation
-    :param int sel_bg: smoothing background with morphological operation
+    :param int|float sel_bg: smoothing background with morphological operation
     :param int sel_fg: smoothing foreground with morphological operation
     :return:
 

diff --git a/imsegm/utilities/__init__.py b/imsegm/utilities/__init__.py
@@ -1,5 +1,4 @@
 import os
-import logging
 
 import matplotlib
 import numpy as np
@@ -8,7 +7,7 @@
 # in case you are running on machine without display, e.g. server
 if os.environ.get('DISPLAY', '') == '' \
         and matplotlib.rcParams['backend'] != 'agg':
-    # logging.warning('No display found. Using non-interactive Agg backend.')
+    print('No display found. Using non-interactive Agg backend.')
     # https://matplotlib.org/faq/usage_faq.html
     matplotlib.use('Agg')
 

diff --git a/imsegm/utilities/drawing.py b/imsegm/utilities/drawing.py
@@ -10,7 +10,7 @@
 import matplotlib
 if os.environ.get('DISPLAY', '') == '' \
         and matplotlib.rcParams['backend'] != 'agg':
-    # logging.warning('No display found. Using non-interactive Agg backend.')
+    print('No display found. Using non-interactive Agg backend.')
     matplotlib.use('Agg')
 
 import numpy as np