From 43049c1f49615ce905e948712cf598677ff158e3 Mon Sep 17 00:00:00 2001
From: Jirka Borovec <Borda@users.noreply.github.com>
Date: Sun, 13 Jan 2019 09:41:02 +0100
Subject: [PATCH] Cross-Validation (#17)

* fix & refactor CrossVal
* update CI
* update manifest
* refactoring
---
 .shippable.yml                                |  34 -----
 .travis.yml                                   |  11 +-
 MANIFEST.in                                   |   4 +-
 README.md                                     |   8 +-
 circle.yml                                    |  40 +----
 .../run_center_clustering.py                  |   4 +-
 .../test_ovary_cebters.sh                     |   8 +
 .../run_ovary_egg-segmentation.py             |   2 +-
 experiments_ovary_detect/test_ovary_detect.sh |  13 ++
 .../run_segm_slic_model_graphcut.py           |   2 +-
 .../test_segmentations.sh                     |  16 ++
 handling_annotations/test_annotations.sh      |  23 +++
 imsegm/classification.py                      | 137 +++++++++---------
 imsegm/descriptors.py                         | 124 +++++++---------
 imsegm/region_growing.py                      |   4 +-
 setup.py                                      |   6 +-
 16 files changed, 212 insertions(+), 224 deletions(-)
 create mode 100644 experiments_ovary_centres/test_ovary_cebters.sh
 create mode 100644 experiments_ovary_detect/test_ovary_detect.sh
 create mode 100644 experiments_segmentation/test_segmentations.sh
 create mode 100644 handling_annotations/test_annotations.sh

diff --git a/.shippable.yml b/.shippable.yml
index 41c52f47..adbeafe8 100755
--- a/.shippable.yml
+++ b/.shippable.yml
@@ -52,40 +52,6 @@ script:
   - python setup.py check -m -s
   - flake8 . --ignore=E402,E731 --max-line-length=100
 
-  # ANNOTATION section
-  - python handling_annotations/run_image_color_quantization.py -imgs "./data_images/drosophila_ovary_slice/segm_rgb/*.png"
-  - python handling_annotations/run_image_convert_label_color.py -imgs "./data_images/drosophila_ovary_slice/segm/*.png" -out ./data_images/drosophila_ovary_slice/segm_rgb
-  - python handling_annotations/run_overlap_images_segms.py  -imgs "./data_images/drosophila_ovary_slice/image/*.jpg" -segs ./data_images/drosophila_ovary_slice/segm -out ./results/overlap_ovary_segment
-  - python handling_annotations/run_segm_annot_inpaint.py -imgs "./data_images/drosophila_ovary_slice/segm/*.png" --label 0
-  - python handling_annotations/run_segm_annot_relabel.py -imgs "./data_images/drosophila_ovary_slice/center_levels/*.png" -out ./results/relabel_center_levels
-
-  # SEGMENTATION section
-  - rm -r -f results && mkdir results
-  - python experiments_segmentation/run_compute_stat_annot_segm.py -a "data_images/drosophila_ovary_slice/annot_struct/*.png" -s "data_images/drosophila_ovary_slice/segm/*.png" --visual
-  - python experiments_segmentation/run_segm_slic_model_graphcut.py -i "data_images/drosophila_disc/image/img_[5,6].jpg" -cfg ./experiments_segmentation/sample_config.json --visual
-  - python experiments_segmentation/run_segm_slic_classif_graphcut.py -l data_images/drosophila_ovary_slice/list_imgs-annot-struct_short.csv -i "data_images/drosophila_ovary_slice/image/insitu41*.jpg" -cfg ./experiments_segmentation/sample_config.json --visual
-
-  # CENTER DETECT. section
-  - rm -r -f results && mkdir results
-  - python experiments_ovary_centres/run_create_annotation.py
-  - python experiments_ovary_centres/run_center_candidate_training.py
-  - python experiments_ovary_centres/run_center_prediction.py
-  - python experiments_ovary_centres/run_center_clustering.py
-  - python experiments_ovary_centres/run_center_evaluation.py
-
-  # download MorphSnake
-  - pip install git+https://github.com/Borda/morph-snakes.git
-  # REGION GROWING section
-  - rm -r -f results && mkdir results
-  - python experiments_ovary_detect/run_RG2Sp_estim_shape-models.py
-  - python experiments_ovary_detect/run_ovary_egg-segmentation.py -m ellipse_moments ellipse_ransac_mmt ellipse_ransac_crit GC_pixels-large GC_pixels-shape GC_slic-small GC_slic-shape rg2sp_greedy-single rg2sp_GC-mixture watershed_morph
-  - python experiments_ovary_detect/run_ovary_segm_evaluation.py
-  - python experiments_ovary_detect/run_export_user-annot-segm.py
-  - python experiments_ovary_detect/run_cut_segmented_objects.py
-  - python experiments_ovary_detect/run_ellipse_annot_match.py
-  - python experiments_ovary_detect/run_ellipse_cut_scale.py
-  - python experiments_ovary_detect/run_egg_swap_orientation.py
-
 after_success:
   - python setup.py install
 
diff --git a/.travis.yml b/.travis.yml
index 43f3a748..f80d2c7c 100755
--- a/.travis.yml
+++ b/.travis.yml
@@ -48,9 +48,16 @@ script:
 
 after_success:
   - codecov  # public repository on Travis CI
-  # private repository on Travis CI
-  # - codecov -t 80efed4e-ac2b-4fea-a642-0a8b1c82e1c8
   - coverage xml
   - python-codacy-coverage -r coverage.xml
   - coverage report
+  # ANNOTATION section
+  - bash handling_annotations/test_annotations.sh
+  # SEGMENTATION section
+  - bash experiments_segmentation/test_segmentations.sh
+  # CENTER DETECT. section
+  - bash experiments_ovary_centres/test_ovary_cebters.sh
+  # REGION GROWING section
+  - bash experiments_ovary_detect/test_ovary_detect.sh
+  # test installed package
   - cd .. && python -c "import imsegm.descriptors"
diff --git a/MANIFEST.in b/MANIFEST.in
index 1be92896..793a415e 100644
--- a/MANIFEST.in
+++ b/MANIFEST.in
@@ -21,8 +21,8 @@ include setup.*
 # Exclude build configs
 exclude *.yml
 
-# Exclude tests
-exclude test*.py
+# Exclude experiments
+exclude */*.sh
 
 # Include the experiments
 recursive-include experiments_* *.py *.json
diff --git a/README.md b/README.md
index b524f9be..b46b5447 100755
--- a/README.md
+++ b/README.md
@@ -90,7 +90,7 @@ Moreover, we are using python [GraphCut wrapper](https://github.com/Borda/pyGCO)
 
 **Compilation**
 
-We have implemented cython version of some functions, especially computing descriptors, which require to compile them before using them
+We have implemented `cython` version of some functions, especially computing descriptors, which require to compile them before using them
 ```bash
 python setup.py build_ext --inplace
 ```
@@ -98,7 +98,11 @@ If loading of compiled descriptors in `cython` fails, it is automatically swappe
 
 **Installation**
 
-The package can be installed via pip from the folder
+The package can be installed via pip 
+```bash
+pip install git+https://github.com/Borda/pyImSegm.git
+```
+ or using `setuptools` from local folder 
 ```bash
 python setup.py install
 ```
diff --git a/circle.yml b/circle.yml
index 9c3b9770..dbf0a36e 100755
--- a/circle.yml
+++ b/circle.yml
@@ -35,48 +35,16 @@ jobs:
               flake8 . --ignore=E402,E731 --max-line-length=100
 
         # ANNOTATION section
-        - run:
-            name: Annotations
-            command: |
-              python handling_annotations/run_image_color_quantization.py -imgs "./data_images/drosophila_ovary_slice/segm_rgb/*.png"
-              python handling_annotations/run_image_color_quantization.py -imgs "./data_images/drosophila_ovary_slice/segm_rgb/*.png" -m position
-              python handling_annotations/run_image_convert_label_color.py -imgs "./data_images/drosophila_ovary_slice/segm/*.png" -out ./data_images/drosophila_ovary_slice/segm_rgb
-              python handling_annotations/run_image_convert_label_color.py -imgs "./data_images/drosophila_ovary_slice/segm_rgb/*.png" -out ./data_images/drosophila_ovary_slice/segm
-              python handling_annotations/run_overlap_images_segms.py  -imgs "./data_images/drosophila_ovary_slice/image/*.jpg" -segs ./data_images/drosophila_ovary_slice/segm -out ./results/overlap_ovary_segment
-              python handling_annotations/run_segm_annot_inpaint.py -imgs "./data_images/drosophila_ovary_slice/segm/*.png" --label 0
-              python handling_annotations/run_segm_annot_relabel.py -imgs "./data_images/drosophila_ovary_slice/center_levels/*.png" -out ./results/relabel_center_levels
+        - run: bash handling_annotations/test_annotations.sh
 
         # SEGMENTATION section
-        - run:
-            name: Segmentation
-            command: |
-              python experiments_segmentation/run_compute_stat_annot_segm.py -a "data_images/drosophila_ovary_slice/annot_struct/*.png" -s "data_images/drosophila_ovary_slice/segm/*.png" --visual
-              python experiments_segmentation/run_segm_slic_model_graphcut.py -i "data_images/drosophila_disc/image/img_[5,6].jpg" -cfg ./experiments_segmentation/sample_config.json --visual
-              python experiments_segmentation/run_segm_slic_classif_graphcut.py -l data_images/drosophila_ovary_slice/list_imgs-annot-struct_short.csv -i "data_images/drosophila_ovary_slice/image/insitu41*.jpg" -cfg ./experiments_segmentation/sample_config.json --visual
+        - run: bash experiments_segmentation/test_segmentations.sh
 
         # CENTER DETECT. section
-        - run:
-            name: Center detection
-            command: |
-              python experiments_ovary_centres/run_create_annotation.py
-              python experiments_ovary_centres/run_center_candidate_training.py
-              python experiments_ovary_centres/run_center_prediction.py
-              python experiments_ovary_centres/run_center_clustering.py
-              python experiments_ovary_centres/run_center_evaluation.py
+        - run: bash experiments_ovary_centres/test_ovary_cebters.sh
 
         # REGION GROWING section
-        - run:
-            name: Region Growing
-            command: |
-              pip install --user git+https://github.com/Borda/morph-snakes.git
-              python experiments_ovary_detect/run_RG2Sp_estim_shape-models.py
-              python experiments_ovary_detect/run_ovary_egg-segmentation.py -m ellipse_moments ellipse_ransac_mmt ellipse_ransac_crit GC_pixels-large GC_pixels-shape GC_slic-small GC_slic-shape rg2sp_greedy-single rg2sp_GC-mixture watershed_morph
-              python experiments_ovary_detect/run_ovary_segm_evaluation.py --visual
-              python experiments_ovary_detect/run_export_user-annot-segm.py
-              python experiments_ovary_detect/run_cut_segmented_objects.py
-              python experiments_ovary_detect/run_ellipse_annot_match.py
-              python experiments_ovary_detect/run_ellipse_cut_scale.py
-              python experiments_ovary_detect/run_egg_swap_orientation.py
+        - run: bash experiments_ovary_detect/test_ovary_detect.sh
 
         # PASSING
         - run:
diff --git a/experiments_ovary_centres/run_center_clustering.py b/experiments_ovary_centres/run_center_clustering.py
index b940af5f..71aeb879 100755
--- a/experiments_ovary_centres/run_center_clustering.py
+++ b/experiments_ovary_centres/run_center_clustering.py
@@ -66,7 +66,7 @@ def cluster_center_candidates(points, max_dist=100, min_samples=1):
     :return (ndarray, [int]):
     """
     points = np.array(points)
-    if len(points) == 0:
+    if not list(points):
         return points, []
     dbscan = cluster.DBSCAN(eps=max_dist, min_samples=min_samples)
     dbscan.fit(points)
@@ -134,7 +134,7 @@ def cluster_points_draw_export(dict_row, params, path_out=None):
         'missing some required fields: %s' % repr(dict_row)
     name = os.path.splitext(os.path.basename(dict_row['path_points']))[0]
     points = tl_data.load_landmarks_csv(dict_row['path_points'])
-    if len(points) == 0:
+    if not list(points):
         logging.debug('no points to cluster for "%s"', name)
     points = tl_data.swap_coord_x_y(points)
 
diff --git a/experiments_ovary_centres/test_ovary_cebters.sh b/experiments_ovary_centres/test_ovary_cebters.sh
new file mode 100644
index 00000000..f80b4021
--- /dev/null
+++ b/experiments_ovary_centres/test_ovary_cebters.sh
@@ -0,0 +1,8 @@
+#!/usr/bin/env bash
+
+rm -r -f results && mkdir results
+python experiments_ovary_centres/run_create_annotation.py
+python experiments_ovary_centres/run_center_candidate_training.py
+python experiments_ovary_centres/run_center_prediction.py
+python experiments_ovary_centres/run_center_clustering.py
+python experiments_ovary_centres/run_center_evaluation.py
\ No newline at end of file
diff --git a/experiments_ovary_detect/run_ovary_egg-segmentation.py b/experiments_ovary_detect/run_ovary_egg-segmentation.py
index 9adc28ec..1007eab6 100755
--- a/experiments_ovary_detect/run_ovary_egg-segmentation.py
+++ b/experiments_ovary_detect/run_ovary_egg-segmentation.py
@@ -701,7 +701,7 @@ def image_segmentation(idx_row, params, debug_export=DEBUG_EXPORT):
         return name
     centers = tl_data.load_landmarks_csv(row_path['path_centers'])
     centers = tl_data.swap_coord_x_y(centers)
-    if len(centers) == 0:
+    if not list(centers):
         logging.warning('no center was detected for "%s"', name)
         return name
     # img = seg / float(seg.max())
diff --git a/experiments_ovary_detect/test_ovary_detect.sh b/experiments_ovary_detect/test_ovary_detect.sh
new file mode 100644
index 00000000..396cb2a6
--- /dev/null
+++ b/experiments_ovary_detect/test_ovary_detect.sh
@@ -0,0 +1,13 @@
+#!/usr/bin/env bash
+
+pip install --user git+https://github.com/Borda/morph-snakes.git
+rm -r -f results && mkdir results
+python experiments_ovary_detect/run_RG2Sp_estim_shape-models.py
+python experiments_ovary_detect/run_ovary_egg-segmentation.py \
+    -m ellipse_moments ellipse_ransac_mmt ellipse_ransac_crit GC_pixels-large GC_pixels-shape GC_slic-small GC_slic-shape rg2sp_greedy-single rg2sp_GC-mixture watershed_morph
+python experiments_ovary_detect/run_ovary_segm_evaluation.py --visual
+python experiments_ovary_detect/run_export_user-annot-segm.py
+python experiments_ovary_detect/run_cut_segmented_objects.py
+python experiments_ovary_detect/run_ellipse_annot_match.py
+python experiments_ovary_detect/run_ellipse_cut_scale.py
+python experiments_ovary_detect/run_egg_swap_orientation.py
\ No newline at end of file
diff --git a/experiments_segmentation/run_segm_slic_model_graphcut.py b/experiments_segmentation/run_segm_slic_model_graphcut.py
index ef6a4cd3..252bff0a 100644
--- a/experiments_segmentation/run_segm_slic_model_graphcut.py
+++ b/experiments_segmentation/run_segm_slic_model_graphcut.py
@@ -509,7 +509,7 @@ def main(params):
         tl_expt.create_subfolders(params['path_exp'], LIST_FOLDERS_DEBUG)
 
     paths_img = load_path_images(params)
-    assert len(paths_img) > 0, 'missing images'
+    assert paths_img, 'missing images'
 
     def _path_expt(n):
         return os.path.join(params['path_exp'], n)
diff --git a/experiments_segmentation/test_segmentations.sh b/experiments_segmentation/test_segmentations.sh
new file mode 100644
index 00000000..0ea7d204
--- /dev/null
+++ b/experiments_segmentation/test_segmentations.sh
@@ -0,0 +1,16 @@
+#!/usr/bin/env bash
+
+rm -r -f results && mkdir results
+python experiments_segmentation/run_compute_stat_annot_segm.py \
+    -a "data_images/drosophila_ovary_slice/annot_struct/*.png" \
+    -s "data_images/drosophila_ovary_slice/segm/*.png" \
+    --visual
+python experiments_segmentation/run_segm_slic_model_graphcut.py \
+    -i "data_images/drosophila_disc/image/img_[5,6].jpg" \
+    -cfg ./experiments_segmentation/sample_config.json \
+    --visual
+python experiments_segmentation/run_segm_slic_classif_graphcut.py \
+    -l data_images/drosophila_ovary_slice/list_imgs-annot-struct_short.csv \
+    -i "data_images/drosophila_ovary_slice/image/insitu41*.jpg" \
+    -cfg ./experiments_segmentation/sample_config.json \
+    --visual
\ No newline at end of file
diff --git a/handling_annotations/test_annotations.sh b/handling_annotations/test_annotations.sh
new file mode 100644
index 00000000..d9cc2206
--- /dev/null
+++ b/handling_annotations/test_annotations.sh
@@ -0,0 +1,23 @@
+#!/usr/bin/env bash
+
+python handling_annotations/run_image_color_quantization.py \
+    -imgs "./data_images/drosophila_ovary_slice/segm_rgb/*.png"
+python handling_annotations/run_image_color_quantization.py \
+    -imgs "./data_images/drosophila_ovary_slice/segm_rgb/*.png" \
+    -m position
+python handling_annotations/run_image_convert_label_color.py \
+    -imgs "./data_images/drosophila_ovary_slice/segm/*.png" \
+    -out ./data_images/drosophila_ovary_slice/segm_rgb
+python handling_annotations/run_image_convert_label_color.py \
+    -imgs "./data_images/drosophila_ovary_slice/segm_rgb/*.png" \
+    -out ./data_images/drosophila_ovary_slice/segm
+python handling_annotations/run_overlap_images_segms.py \
+    -imgs "./data_images/drosophila_ovary_slice/image/*.jpg" \
+    -segs ./data_images/drosophila_ovary_slice/segm \
+    -out ./results/overlap_ovary_segment
+python handling_annotations/run_segm_annot_inpaint.py \
+    -imgs "./data_images/drosophila_ovary_slice/segm/*.png" \
+    --label 0
+python handling_annotations/run_segm_annot_relabel.py \
+    -imgs "./data_images/drosophila_ovary_slice/center_levels/*.png" \
+    -out ./results/relabel_center_levels
\ No newline at end of file
diff --git a/imsegm/classification.py b/imsegm/classification.py
index 38a76083..6816cc20 100755
--- a/imsegm/classification.py
+++ b/imsegm/classification.py
@@ -695,7 +695,7 @@ def create_classif_search_train_export(clf_name, features, labels, cross_val=10,
      './classif_RandForest_search_params_scores.txt']
     >>> for p in files: os.remove(p)
     """
-    assert len(labels) > 0, 'some labels has to be given'
+    assert list(labels), 'some labels has to be given'
     features = np.nan_to_num(features)
     assert len(features) == len(labels), \
         'features (%i) and labels (%i) should have equal length' \
@@ -1376,7 +1376,7 @@ def compute_metric_tpfp_tpfn(annot, segm, label_positive=None):
 #     return stat
 
 
-class HoldOut:
+class HoldOut(object):
     """
     Hold-out cross-validator generator. In the hold-out, the
     data is split only once into a train set and a test set.
@@ -1385,9 +1385,9 @@ class HoldOut:
 
     Parameters
     ----------
-    nb : total number of samples
-    hold_idx : int index where the test starts
-    random_state :  Seed for the random number generator.
+    nb_samples : int, total number of samples
+    hold_out : int, number where the test starts
+    rand_seed :  seed for the random number generator
 
     Example
     -------
@@ -1435,15 +1435,16 @@ def __len__(self):
         return 1
 
 
-class CrossValidatePOut:
+class CrossValidatePOut(object):
     """
     Hold-out cross-validator generator. In the hold-out, the
     data is split only once into a train set and a test set.
-    Unlike in other cross-validation schemes, the hold-out
-    consists of only one iteration.
 
     Parameters
     ----------
+    nb_samples : integer, total number of samples
+    nb_hold_out : integer, number of samples hold out
+    rand_seed :  seed for the random number generator
 
     Example 1
     ---------
@@ -1455,6 +1456,8 @@ class CrossValidatePOut:
     >>> list(cv)  # doctest: +NORMALIZE_WHITESPACE
     [([3, 4, 5], [0, 1, 2]),
      ([0, 1, 2], [3, 4, 5])]
+    >>> [(len(tr), len(ts)) for tr, ts in CrossValidatePOut(340, 0.41)]
+    [(201, 139), (201, 139), (201, 139)]
 
     Example 2
     ---------
@@ -1462,8 +1465,8 @@ class CrossValidatePOut:
     >>> list(cv)  # doctest: +NORMALIZE_WHITESPACE
     [([3, 0, 5, 4], [6, 2, 1]),
      ([6, 2, 1, 4], [3, 0, 5]),
-     ([6, 2, 1, 3, 0, 5], [4])]
-    >>> len(list(cv))
+     ([1, 3, 0, 5], [4, 6, 2])]
+    >>> len(cv)
     3
     >>> cv.indexes
     [6, 2, 1, 3, 0, 5, 4]
@@ -1475,20 +1478,24 @@ class CrossValidatePOut:
     [([6, 2], [1, 3, 0, 5, 4]),
      ([1, 3], [6, 2, 0, 5, 4]),
      ([0, 5], [6, 2, 1, 3, 4]),
-     ([4], [6, 2, 1, 3, 0, 5])]
+     ([4, 6], [2, 1, 3, 0, 5])]
+    >>> [(len(tr), len(ts)) for tr, ts in CrossValidatePOut(340, 0.55)]
+    [(153, 187), (153, 187), (153, 187)]
     """
 
     def __init__(self, nb_samples, nb_hold_out, rand_seed=None):
-        """
+        """ constructor
 
-        :param [int] nb_samples: list of sizes
-        :param int nb_hold_out: how much hold out
+        :param int nb_samples: list of sizes
+        :param int|float nb_hold_out: how much hold out
         :param int|None rand_seed:
         """
         assert nb_samples > nb_hold_out, \
-            'number of holdout has to be smaller then _total size'
+            'number of holdout has to be smaller then total size'
+        assert nb_hold_out > 0, 'number of holdout has to be positive number'
         self._nb_samples = nb_samples
-        self._nb_hold_out = nb_hold_out
+        self._nb_hold_out = int(np.round(nb_samples * nb_hold_out)) \
+            if nb_hold_out < 1 else nb_hold_out
 
         self._revert = False  # sets the sizes
         if self._nb_hold_out > (self._nb_samples / 2.):
@@ -1501,9 +1508,12 @@ def __init__(self, nb_samples, nb_hold_out, rand_seed=None):
         self.indexes = list(range(self._nb_samples))
 
         if rand_seed is not None and rand_seed is not False:
+            self._shuffle = True
             np.random.seed(rand_seed)
             np.random.shuffle(self.indexes)
-        logging.debug('sets ordering: %s', repr(self.indexes))
+        else:
+            self._shuffle = False
+        logging.debug('sets ordering: %s', repr(np.array(self.indexes)))
 
         self.iter = 0
 
@@ -1513,8 +1523,19 @@ def __iter__(self):
         :return ([int], [int]):
         """
         for i in range(0, self._nb_samples, self._nb_hold_out):
-            inds_test = self.indexes[i:i + self._nb_hold_out]
-            inds_train = [i for i in self.indexes if i not in inds_test]
+            i_end = i + self._nb_hold_out
+            inds_test = self.indexes[i:i_end]
+            inds_train = self.indexes[:i] + self.indexes[i_end:]
+            # over flow the limited set
+            if i_end > self._nb_samples:
+                i_begin = i_end - self._nb_samples
+                inds_test += self.indexes[:i_begin]
+                inds_train = self.indexes[i_begin:i]
+                logging.warning('Your demand for last test fold overflow by %i, '
+                                'to keep the train-test ration we reuse part '
+                                'of the already tested samples from the %s beginning.',
+                                i_begin, 'shuffled' if self._shuffle else '')
+            # reverting the train -test split
             if self._revert:
                 inds_train, inds_test = inds_test, inds_train
             yield inds_train, inds_test
@@ -1527,15 +1548,16 @@ def __len__(self):
         return int(np.ceil(self._nb_samples / float(self._nb_hold_out)))
 
 
-class CrossValidatePSetsOut:
+class CrossValidatePSetsOut(CrossValidatePOut):
     """
     Hold-out cross-validator generator. In the hold-out, the
     data is split only once into a train set and a test set.
-    Unlike in other cross-validation schemes, the hold-out
-    consists of only one iteration.
 
     Parameters
     ----------
+    set_sizes : list of integers, number of samples in each set
+    nb_hold_out : integer, number of sets hold out
+    rand_seed :  seed for the random number generator
 
     Example 1
     ---------
@@ -1547,6 +1569,8 @@ class CrossValidatePSetsOut:
     >>> list(cv)  # doctest: +NORMALIZE_WHITESPACE
     [([5, 6, 7, 8, 9], [0, 1, 2, 3, 4]),
      ([0, 1, 2, 3, 4], [5, 6, 7, 8, 9])]
+    >>> [(len(tr), len(ts)) for tr, ts in CrossValidatePSetsOut([7] * 340, 0.41)]
+    [(1407, 973), (1407, 973), (1407, 973)]
 
     Example 2
     ---------
@@ -1556,10 +1580,10 @@ class CrossValidatePSetsOut:
     >>> list(cv)  # doctest: +NORMALIZE_WHITESPACE
     [([2, 3, 5, 6, 7], [4, 0, 1]),
      ([4, 0, 1, 7], [2, 3, 5, 6]),
-     ([4, 0, 1, 2, 3, 5, 6], [7])]
+     ([0, 1, 2, 3, 5, 6], [7, 4])]
     >>> len(cv)
     3
-    >>> cv.sets_order
+    >>> cv.indexes
     [2, 0, 1, 3, 4]
 
     Example 3
@@ -1569,71 +1593,52 @@ class CrossValidatePSetsOut:
     [([8, 4], [2, 3, 5, 6, 0, 1, 7]),
      ([2, 3, 5, 6], [8, 4, 0, 1, 7]),
      ([0, 1, 7], [8, 4, 2, 3, 5, 6])]
+    >>> [(len(tr), len(ts)) for tr, ts in CrossValidatePSetsOut([7] * 340, 0.55)]
+    [(1071, 1309), (1071, 1309), (1071, 1309)]
     """
 
     def __init__(self, set_sizes, nb_hold_out, rand_seed=None):
-        """
+        """ construct
 
         :param [int] set_sizes: list of sizes
-        :param int nb_hold_out: how much hold out
+        :param int|float nb_hold_out: how much hold out
         :param int|None rand_seed:
         """
-        assert len(set_sizes) > nb_hold_out, \
-            'nb of hold out (%i) has to be smaller then _total size %i' \
-            % (nb_hold_out, len(set_sizes))
-        self._set_sizes = list(set_sizes)
-        self._total = np.sum(self._set_sizes)
-        self._nb_hold_out = nb_hold_out
-
-        self._revert = False  # sets the sizes
-        if self._nb_hold_out > (len(self._set_sizes) / 2.):
-            logging.debug('WARNING: you are running in reverse mode, '
-                          'while using all training examples '
-                          'there are much more yield test cases.')
-            self._nb_hold_out = len(self._set_sizes) - self._nb_hold_out
-            self._revert = True
+        super(CrossValidatePSetsOut, self).__init__(
+            len(set_sizes), nb_hold_out, rand_seed)
 
+        self._set_sizes = list(set_sizes)
         self.set_indexes = []
+
+        start = 0
         for i, size in enumerate(self._set_sizes):
-            start = int(np.sum(self._set_sizes[:i]))
             inds = range(start, start + size)
             self.set_indexes.append(list(inds))
+            start += size
 
-        assert np.sum(len(i) for i in self.set_indexes) == self._total, \
-            'all indexes should sum to _total count %i' % self._total
+        total = np.sum(self._set_sizes)
+        assert np.sum(len(i) for i in self.set_indexes) == total, \
+            'all indexes should sum to total count %i' % total
 
-        self.sets_order = list(range(len(self._set_sizes)))
-
-        if rand_seed is not None and rand_seed is not False:
-            np.random.seed(rand_seed)
-            np.random.shuffle(self.sets_order)
-        logging.debug('sets ordering: %s', repr(self.sets_order))
+    def __iter_indexes(self, sets):
+        """ return enrol indexes from sets
 
-        self.iter = 0
+        :param [int] sets: selection of indexes
+        :return [int]:
+        """
+        inds = list(itertools.chain(*[self.set_indexes[i] for i in sets]))
+        return inds
 
     def __iter__(self):
         """ iterate the folds
 
         :return ([int], [int]):
         """
-        for i in range(0, len(self._set_sizes), self._nb_hold_out):
-            test = self.sets_order[i:i + self._nb_hold_out]
-            inds_train = list(itertools.chain.from_iterable(
-                self.set_indexes[i] for i in self.sets_order if i not in test))
-            inds_test = list(itertools.chain.from_iterable(
-                self.set_indexes[i] for i in self.sets_order if i in test))
-            if self._revert:
-                inds_train, inds_test = inds_test, inds_train
+        for train, test in super(CrossValidatePSetsOut, self).__iter__():
+            inds_train = self.__iter_indexes(train)
+            inds_test = self.__iter_indexes(test)
             yield inds_train, inds_test
 
-    def __len__(self):
-        """ number of folds
-
-        :return int:
-        """
-        nb = len(self._set_sizes) / float(self._nb_hold_out)
-        return int(np.ceil(nb))
-
 
 # DEPRECATED
 # ==========
diff --git a/imsegm/descriptors.py b/imsegm/descriptors.py
index 8a248171..aa78605e 100755
--- a/imsegm/descriptors.py
+++ b/imsegm/descriptors.py
@@ -8,6 +8,7 @@
 Copyright (C) 2014-2018 Jiri Borovec <jiri.borovec@fel.cvut.cz>
 """
 
+import itertools
 import logging
 
 import numpy as np
@@ -630,15 +631,15 @@ def numpy_img3d_gray_median(img, seg):
 
 
 def compute_image3d_gray_statistic(image, segm,
-                                   list_feature_flags=NAMES_FEATURE_FLAGS,
+                                   feature_flags=NAMES_FEATURE_FLAGS,
                                    ch_name='gray'):
     """ compute complete descriptors / statistic on gray (3D) images
 
     :param ndarray image:
     :param ndarray segm: segmentation
-    :param list_feature_flags:
+    :param [str] feature_flags:
     :param str ch_name: channel name
-    :return np.ndarray<nb_samples, nb_features>, [str]:
+    :return (ndarray, [str]): np.ndarray<nb_samples, nb_features>
 
     >>> image = np.zeros((2, 3, 8))
     >>> image[0, :, 2:6] = 1
@@ -666,54 +667,42 @@ def compute_image3d_gray_statistic(image, segm,
     """
     _check_gray_image_segm(image, segm)
 
-    assert len(list_feature_flags) > 0, 'some features has to be selected'
+    assert list(feature_flags), 'some features has to be selected'
     image = np.nan_to_num(image)
-    features, names = [], []
+    features = []
     # nb_fts = image.shape[0]
     # ch_names = ['%s-ch%i' % (ch_name, i + 1) for i in range(nb_fts)]
 
+    _fn_mean = cython_img3d_gray_mean if USE_CYTHON else numpy_img3d_gray_mean
+    _fn_std = cython_img3d_gray_std if USE_CYTHON else numpy_img3d_gray_std
+    _fn_energy = cython_img3d_gray_energy if USE_CYTHON else numpy_img3d_gray_energy
+
     # MEAN
     mean = None
-    if 'mean' in list_feature_flags:
-        if USE_CYTHON:
-            mean = cython_img3d_gray_mean(image, segm)
-        else:
-            mean = numpy_img3d_gray_mean(image, segm)
+    if 'mean' in feature_flags:
+        mean = _fn_mean(image, segm)
         features.append(mean)
-        names += ['%s_mean' % ch_name]
     # Standard Deviation
-    if 'std' in list_feature_flags:
-        if USE_CYTHON:
-            std = cython_img3d_gray_std(image, segm, mean)
-        else:
-            std = numpy_img3d_gray_std(image, segm, mean)
-        features.append(std)
-        names += ['%s_std' % ch_name]
+    if 'std' in feature_flags:
+        features.append(_fn_std(image, segm, mean))
     # ENERGY
-    if 'energy' in list_feature_flags:
-        if USE_CYTHON:
-            energy = cython_img3d_gray_energy(image, segm)
-        else:
-            energy = numpy_img3d_gray_energy(image, segm)
-        features.append(energy)
-        names += ['%s_energy' % ch_name]
+    if 'energy' in feature_flags:
+        features.append(_fn_energy(image, segm))
     # MEDIAN
-    if 'median' in list_feature_flags:
-        median = numpy_img3d_gray_median(image, segm)
-        features.append(median)
-        names += ['%s_median' % ch_name]
+    if 'median' in feature_flags:
+        features.append(numpy_img3d_gray_median(image, segm))
     # mean Gradient
-    if 'meanGrad' in list_feature_flags:
+    if 'meanGrad' in feature_flags:
         grad_matrix = np.zeros_like(image)
         for i in range(image.shape[0]):
             grad_matrix[i, :, :] = np.sum(np.gradient(image[i]), axis=0)
-        if USE_CYTHON:
-            grad = cython_img3d_gray_mean(grad_matrix, segm)
-        else:
-            grad = numpy_img3d_gray_mean(grad_matrix, segm)
-        features.append(grad)
-        names += ['%s_meanGrad' % ch_name]
-    _check_unrecognised_feature_names(list_feature_flags)
+        features.append(_fn_mean(grad_matrix, segm))
+
+    names = ['%s_%s' % (ch_name, fts_name)
+             for fts_name in ('mean', 'std', 'energy', 'median', 'meanGrad')
+             if fts_name in feature_flags]
+    _check_unrecognised_feature_names(feature_flags)
+
     features = np.concatenate(tuple([fts] for fts in features), axis=0)
     features = np.nan_to_num(features).T
     # normalise +/- zeros as set all as positive
@@ -724,15 +713,15 @@ def compute_image3d_gray_statistic(image, segm,
 
 
 def compute_image2d_color_statistic(image, segm,
-                                    list_feature_flags=NAMES_FEATURE_FLAGS,
+                                    feature_flags=NAMES_FEATURE_FLAGS,
                                     color_name='color'):
     """ compute complete descriptors / statistic on color (2D) images
 
     :param ndarray image:
     :param ndarray segm: segmentation
-    :param list_feature_flags:
+    :param [str] feature_flags:
     :param str color_name: channel name
-    :return np.ndarray<nb_samples, nb_features>, [str]:
+    :return (ndarray, [str]): np.ndarray<nb_samples, nb_features>
 
     >>> image = np.zeros((2, 10, 3))
     >>> image[:, 2:6, 0] = 1
@@ -758,51 +747,38 @@ def compute_image2d_color_statistic(image, segm,
 
     image = np.nan_to_num(image)
     features = np.empty((np.max(segm) + 1, 0))
-    names = []
     ch_names = ['%s-ch%i' % (color_name, i + 1) for i in range(3)]
 
+    _fn_mean = cython_img2d_color_mean if USE_CYTHON else numpy_img2d_color_mean
+    _fn_std = cython_img2d_color_std if USE_CYTHON else numpy_img2d_color_std
+    _fn_energy = cython_img2d_color_energy if USE_CYTHON else numpy_img2d_color_energy
+
     # MEAN
     mean = None
-    if 'mean' in list_feature_flags:
-        if USE_CYTHON:
-            mean = cython_img2d_color_mean(image, segm)
-        else:
-            mean = numpy_img2d_color_mean(image, segm)
+    if 'mean' in feature_flags:
+        mean = _fn_mean(image, segm)
         features = np.hstack((features, mean))
-        names += ['%s_mean' % n for n in ch_names]
     # Standard Deviation
-    if 'std' in list_feature_flags:
-        if USE_CYTHON:
-            std = cython_img2d_color_std(image, segm, mean)
-        else:
-            std = numpy_img2d_color_std(image, segm, mean)
-        features = np.hstack((features, std))
-        names += ['%s_std' % n for n in ch_names]
+    if 'std' in feature_flags:
+        features = np.hstack((features, _fn_std(image, segm, mean)))
     # ENERGY
-    if 'energy' in list_feature_flags:
-        if USE_CYTHON:
-            energy = cython_img2d_color_energy(image, segm)
-        else:
-            energy = numpy_img2d_color_energy(image, segm)
-        features = np.hstack((features, energy))
-        names += ['%s_energy' % n for n in ch_names]
-    # Median
-    if 'median' in list_feature_flags:
-        median = numpy_img2d_color_median(image, segm)
-        features = np.hstack((features, median))
-        names += ['%s_median' % n for n in ch_names]
+    if 'energy' in feature_flags:
+        features = np.hstack((features, _fn_energy(image, segm)))
+    # MEDIAN
+    if 'median' in feature_flags:
+        features = np.hstack((features, numpy_img2d_color_median(image, segm)))
     # mean Gradient
-    if 'meanGrad' in list_feature_flags:
+    if 'meanGrad' in feature_flags:
         grad_matrix = np.zeros_like(image)
         for i in range(image.shape[-1]):
             grad_matrix[:, :, i] = np.sum(np.gradient(image[:, :, i]), axis=0)
-        if USE_CYTHON:
-            grad = cython_img2d_color_mean(grad_matrix, segm)
-        else:
-            grad = numpy_img2d_color_mean(grad_matrix, segm)
-        features = np.hstack((features, grad))
-        names += ['%s_meanGrad' % n for n in ch_names]
-    _check_unrecognised_feature_names(list_feature_flags)
+        features = np.hstack((features, _fn_mean(grad_matrix, segm)))
+
+    feature_names = ('mean', 'std', 'energy', 'median', 'meanGrad')
+    names = list(itertools.chain.from_iterable(['%s_%s' % (n, fts_name) for n in ch_names]
+                                               for fts_name in feature_names
+                                               if fts_name in feature_flags))
+    _check_unrecognised_feature_names(feature_flags)
     # mean Gradient
     # G = np.zeros_like(image)
     # for i in range(image.shape[0]):
diff --git a/imsegm/region_growing.py b/imsegm/region_growing.py
index 8d2c0f24..caff7b2f 100755
--- a/imsegm/region_growing.py
+++ b/imsegm/region_growing.py
@@ -71,7 +71,7 @@ def object_segmentation_graphcut_slic(slic, segm, centres,
     labels_fg_prob = np.array(labels_fg_prob)
     labels_bg_prob = 1. - labels_fg_prob
 
-    assert len(centres) > 0, 'at least one center has to be given'
+    assert list(centres), 'at least one center has to be given'
     centres = [np.round(c).astype(int) for c in centres]
     slic_points = seg_spx.superpixel_centers(slic)
 
@@ -191,7 +191,7 @@ def object_segmentation_graphcut_pixels(segm, centres,
     labels_fg_prob = np.array(labels_fg_prob)
     labels_bg_prob = 1. - labels_fg_prob
 
-    assert len(centres) > 0, 'at least one center has to be given'
+    assert list(centres), 'at least one center has to be given'
     centres = [np.round(c).astype(int) for c in centres]
 
     proba = np.ones((height, width, len(centres) + 1))
diff --git a/setup.py b/setup.py
index cbe3d5a4..56da25d2 100644
--- a/setup.py
+++ b/setup.py
@@ -58,7 +58,7 @@ def _parse_requirements(file_path):
 
 setup(
     name='ImSegm',
-    version='0.1.3',
+    version='0.1.4',
     url='https://borda.github.io/pyImSegm',
 
     author='Jiri Borovec',
@@ -67,7 +67,9 @@ def _parse_requirements(file_path):
     description='superpixel image segmentation: '
                 '(un)supervised, center detection, region growing',
 
-    packages=find_packages(),
+    packages=find_packages(exclude=['docs', 'notebooks',
+                                    'handling_annotations',
+                                    'experiments_*']),
     cmdclass={'build_ext': BuildExt},
     ext_modules=[Extension('imsegm.features_cython',
                            language='c++',