update sklearn (#14)

* fix sklearn due version changes * cleaning CI config * using logging.exception() * update setup
Borda · Nov 28, 2018 · 41cd661 · 41cd661
1 parent 8285bb3
commit 41cd661
Show file tree

Hide file tree

Showing 15 changed files with 62 additions and 73 deletions.
diff --git a/.shippable.yml b/.shippable.yml
@@ -37,10 +37,6 @@ before_install:
 install:
   - root=$PWD
   - mkdir libs
-#  - cd $root/libs && git clone https://github.com/Borda/pyGCO.git
-#  - cd $root/libs/pyGCO && pip install -r requirements.txt && python setup.py install
-#  - rm -r -f $root/libs/pyGCO
-#  - cd $root
 
   - pip install -r requirements.txt
   - pip install "nose>=1.3.7" coverage codecov "pytest>=3.0.5"

diff --git a/.travis.yml b/.travis.yml
@@ -28,11 +28,6 @@ before_install:
 
 install:
   - mkdir libs
-#  - root=$PWD
-#  - cd $root/libs && git clone https://github.com/Borda/pyGCO.git
-#  - cd $root/libs/pyGCO && pip install -r requirements.txt && python setup.py install
-#  - rm -r -f $root/libs/pyGCO
-#  - cd $root
 
   - pip install -r requirements.txt
   - pip install nose coverage codecov pytest codacy-coverage check-manifest

diff --git a/experiments_ovary_centres/run_center_evaluation.py b/experiments_ovary_centres/run_center_evaluation.py
@@ -16,7 +16,6 @@
 import sys
 import time
 import logging
-import traceback
 import gc
 # import multiprocessing as mproc
 from functools import partial
@@ -180,7 +179,7 @@ def load_center_evaluate(idx_row, df_annot, path_annot, path_visu=None,
                                                    mask_eggs, img, segm,
                                                    path_visu, col_prefix)
     except Exception:
-        logging.error(traceback.format_exc())
+        logging.exception('load_center_evaluate')
         dict_stat = dict_row
     return dict_stat
 

diff --git a/experiments_ovary_centres/run_center_prediction.py b/experiments_ovary_centres/run_center_prediction.py
@@ -16,7 +16,6 @@
 import sys
 import time, gc
 import logging
-import traceback
 # import multiprocessing as mproc
 from functools import partial
 
@@ -87,7 +86,7 @@ def load_compute_detect_centers(idx_row, params, classif=None, path_classif='',
         dict_center = run_clust.cluster_points_draw_export(dict_center, params,
                                                            path_output)
     except Exception:
-        logging.error(traceback.format_exc())
+        logging.exception('load_compute_detect_centers')
     gc.collect()
     time.sleep(1)
     return dict_center

diff --git a/experiments_ovary_detect/run_export_user-annot-segm.py b/experiments_ovary_detect/run_export_user-annot-segm.py
@@ -16,7 +16,6 @@
 
 import os
 import sys
-import traceback
 import logging
 import argparse
 import multiprocessing as mproc
@@ -197,8 +196,7 @@ def export_figure(idx_row, df_slices_info, path_out):
         fig.savefig(os.path.join(path_out, img_name + '.png'))
         plt.close(fig)
     except Exception:
-        logging.error('failed for: %s', img_name)
-        logging.error(traceback.format_exc())
+        logging.exception('failed for: %s', img_name)
 
 
 def main(params):

diff --git a/experiments_ovary_detect/run_ovary_egg-segmentation.py b/experiments_ovary_detect/run_ovary_egg-segmentation.py
@@ -31,7 +31,6 @@
 import argparse
 import logging
 import pickle
-import traceback
 import multiprocessing as mproc
 from functools import partial
 
@@ -761,8 +760,7 @@ def image_segmentation(idx_row, params, debug_export=DEBUG_EXPORT):
             centers = tl_data.swap_coord_x_y(centers)
             tl_data.save_landmarks_csv(path_centre, centers)
         except Exception:
-            logging.error('segment fail for "%s" via %s with \n %s',
-                          name, method, traceback.format_exc())
+            logging.exception('segment fail for "%s" via %s', name, method)
 
     return name
 

diff --git a/experiments_segmentation/run_compute_stat_annot_segm.py b/experiments_segmentation/run_compute_stat_annot_segm.py
@@ -15,7 +15,6 @@
 import sys
 import logging
 import argparse
-# import traceback
 import multiprocessing as mproc
 from functools import partial
 

diff --git a/experiments_segmentation/run_segm_slic_classif_graphcut.py b/experiments_segmentation/run_segm_slic_classif_graphcut.py
@@ -31,7 +31,6 @@
 import logging
 import glob
 import time
-import traceback
 import gc
 import multiprocessing as mproc
 from functools import partial
@@ -759,7 +758,7 @@ def try_segment_image(img_idx_path, params, classif, path_out, path_visu,
                              path_out, path_visu,
                              show_debug_imgs=show_debug_imgs)
     except Exception:
-        logging.error(traceback.format_exc())
+        logging.exception('segment_image')
         return '', None, None
 
 

diff --git a/experiments_segmentation/run_segm_slic_model_graphcut.py b/experiments_segmentation/run_segm_slic_model_graphcut.py
@@ -27,7 +27,6 @@
 import argparse
 import logging
 import time, gc
-import traceback
 import multiprocessing as mproc
 from functools import partial
 
@@ -329,7 +328,7 @@ def segment_image_independent(img_idx_path, params, path_out, path_visu=None,
         path_npz = os.path.join(path_out, idx_name + '.npz')
         np.savez_compressed(path_npz, segm_soft)
     except Exception:
-        logging.error(traceback.format_exc())
+        logging.exception('pipe_color2d_slic_features_model_graphcut(...)')
         segm = np.zeros(img.shape[:2])
 
     boundary_size = int(params['slic_size'] * 3)
@@ -375,7 +374,7 @@ def segment_image_model(imgs_idx_path, params, model, path_out=None,
         path_npz = os.path.join(path_out, idx_name + '.npz')
         np.savez_compressed(path_npz, segm_soft)
     except Exception:
-        logging.error(traceback.format_exc())
+        logging.exception('segment_color2d_slic_features_model_graphcut(...)')
         segm = np.zeros(img.shape[:2])
 
     boundary_size = int(np.sqrt(np.prod(segm.shape)) * 0.01)

diff --git a/handling_annotations/run_overlap_images_segms.py b/handling_annotations/run_overlap_images_segms.py
@@ -17,7 +17,6 @@
 import glob
 import logging
 import argparse
-import traceback
 import multiprocessing as mproc
 from functools import partial
 
@@ -128,7 +127,7 @@ def perform_visu_overlap(path_img, paths, segm_alpha=MIDDLE_ALPHA_OVERLAP):
     try:
         visualise_overlap(path_img, path_seg, path_out, segm_alpha=segm_alpha)
     except Exception:
-        logging.error(traceback.format_exc())
+        logging.exception('visualise_overlap')
         return False
     return True
 

diff --git a/imsegm/__init__.py b/imsegm/__init__.py
@@ -1,3 +1,3 @@
-from imsegm.utilities import __init__
+import imsegm.utilities
 
-__init__
+imsegm.utilities
diff --git a/imsegm/classification.py b/imsegm/classification.py
@@ -10,7 +10,6 @@
 import logging
 import random
 import collections
-import traceback
 import itertools
 from functools import partial
 
@@ -21,12 +20,15 @@
 from scipy import interp
 from scipy.stats import randint as sp_randint
 from scipy.stats import uniform as sp_random
-from sklearn import grid_search, metrics
 from sklearn import preprocessing, feature_selection, decomposition
-from sklearn import cluster
+from sklearn import cluster, metrics
 from sklearn import ensemble, neighbors, svm, tree
 from sklearn import pipeline, linear_model, neural_network
 from sklearn import model_selection
+try:  # due to some chnages in between versions
+    from sklearn.grid_search import GridSearchCV, RandomizedSearchCV, ParameterSampler, ParameterGrid
+except Exception:
+    from sklearn.model_selection import GridSearchCV, RandomizedSearchCV, ParameterSampler, ParameterGrid
 
 import imsegm.labeling as seg_lbs
 import imsegm.utilities.experiments as tl_expt
@@ -323,7 +325,7 @@ def compute_classif_metrics(y_true, y_pred, metric_averages=METRIC_AVERAGES):
         for l, _ in enumerate(p):
             logging.debug(EVAL_STR.format(l, p[l], r[l], f[l], s[l]))
     except Exception:
-        logging.error(traceback.format_exc())
+        logging.exception('metrics.precision_recall_fscore_support')
 
     dict_metrics = {
         'ARS': metrics.adjusted_rand_score(y_true, y_pred),
@@ -341,7 +343,7 @@ def compute_classif_metrics(y_true, y_pred, metric_averages=METRIC_AVERAGES):
                                                           average=avg)
             res = dict(zip(['{}_{}'.format(n, avg) for n in names], mtr))
         except Exception:
-            logging.error(traceback.format_exc())
+            logging.exception('metrics.precision_recall_fscore_support')
             res = dict(zip(['{}_{}'.format(n, avg) for n in names], [-1] * 4))
         dict_metrics.update(res)
     return dict_metrics
@@ -607,14 +609,20 @@ def export_results_clf_search(path_out, clf_name, clf_search):
     def _fn_path_out(s):
         return os.path.join(path_out, 'classif_%s_%s.txt' % (clf_name, s))
 
-    with open(_fn_path_out('search_params_scores'), 'w') as f:
-        f.write('\n'.join([repr(gs) for gs in clf_search.grid_scores_]))
+    with open(_fn_path_out('search_params_scores'), 'w') as fp:
+        if hasattr(clf_search, 'grid_scores_'):  # for sklearn < 0.18
+            fp.write('\n'.join([repr(gs) for gs in clf_search.grid_scores_]))
+        elif hasattr(clf_search, 'cv_results_'):
+            fp.write('\n'.join(['"%s": %s' % (k, repr(clf_search.cv_results_[k]))
+                                for k in clf_search.cv_results_]))
+        else:
+            fp.write('no results')
 
-    with open(_fn_path_out('search_params_best'), 'w') as f:
+    with open(_fn_path_out('search_params_best'), 'w') as fp:
         params = clf_search.best_params_
         rows = ['{:30s} {}'.format('"{}":'.format(k), params[k])
                 for k in params]
-        f.write('\n'.join(rows))
+        fp.write('\n'.join(rows))
 
 
 def relabel_sequential(labels, uq_lbs=None):
@@ -754,8 +762,8 @@ def eval_classif_cross_val_scores(clf_name, classif, features, labels,
     >>> data += 0.5 - np.random.random(data.shape)
     >>> data.shape
     (300, 6)
-    >>> from sklearn.cross_validation import StratifiedKFold
-    >>> cv = StratifiedKFold(labels, n_folds=5, random_state=0)
+    >>> from sklearn.model_selection import StratifiedKFold
+    >>> cv = StratifiedKFold(n_splits=5, random_state=0)
     >>> classif = create_classifiers()[DEFAULT_CLASSIF_NAME]
     >>> eval_classif_cross_val_scores(DEFAULT_CLASSIF_NAME, classif,
     ...                               data, labels, cv)
@@ -766,7 +774,7 @@ def eval_classif_cross_val_scores(clf_name, classif, features, labels,
     3       1.0       1.0              1.0           1.0
     4       1.0       1.0              1.0           1.0
     >>> labels[labels == 1] = 2
-    >>> cv = StratifiedKFold(labels, n_folds=3, random_state=0)
+    >>> cv = StratifiedKFold(n_splits=3, random_state=0)
     >>> eval_classif_cross_val_scores(DEFAULT_CLASSIF_NAME, classif,
     ...                               data, labels, cv, path_out='.')
        f1_macro  accuracy  precision_macro  recall_macro
@@ -796,7 +804,7 @@ def eval_classif_cross_val_scores(clf_name, classif, features, labels,
                          scoring, np.mean(scores), repr(scores))
             df_scoring[scoring] = scores
         except Exception:
-            logging.error(traceback.format_exc())
+            logging.exception('model_selection.cross_val_score')
 
     if path_out is not None:
         assert os.path.exists(path_out), 'missing: "%s"' % path_out
@@ -838,8 +846,8 @@ def eval_classif_cross_val_roc(clf_name, classif, features, labels,
     >>> data += np.random.random(data.shape)
     >>> data.shape
     (300, 6)
-    >>> from sklearn.cross_validation import StratifiedKFold
-    >>> cv = StratifiedKFold(labels, n_folds=5, random_state=0)
+    >>> from sklearn.model_selection import StratifiedKFold
+    >>> cv = StratifiedKFold(n_splits=5, random_state=0)
     >>> classif = create_classifiers()[DEFAULT_CLASSIF_NAME]
     >>> fp_tp, auc = eval_classif_cross_val_roc(DEFAULT_CLASSIF_NAME, classif,
     ...                                         data, labels, cv, nb_thr=10)
@@ -884,37 +892,34 @@ def eval_classif_cross_val_roc(clf_name, classif, features, labels,
     for lb in unique_labels:
         labels_bin[:, lb] = (labels == lb)
 
-    count = 0
+    # since version change the CV is not iterable by default
+    if not hasattr(cross_val, '__iter__'):
+        cross_val = cross_val.split(features, labels)
+    count = 0.
     for train, test in cross_val:
-        features_train = np.copy(features[train], order='C')
-        labels_train = np.copy(labels[train], order='C')
-        features_test = np.copy(features[test], order='C')
-        classif.fit(features_train, labels_train)
-        proba = classif.predict_proba(features_test)
+        classif.fit(np.copy(features[train], order='C'),
+                    np.copy(labels[train], order='C'))
+        proba = classif.predict_proba(np.copy(features[test], order='C'))
         # Compute ROC curve and area the curve
         for i, lb in enumerate(unique_labels):
             fpr, tpr, _ = metrics.roc_curve(labels_bin[test, lb], proba[:, i])
             fpr = [0.] + fpr.tolist() + [1.]
             tpr = [0.] + tpr.tolist() + [1.]
             mean_tpr += interp(mean_fpr, fpr, tpr)
             mean_tpr[0] = 0.0
-            count += 1.
+            count += 1
         # roc_auc = metrics.auc(fpr, tpr)
 
     mean_tpr /= count
     mean_tpr[-1] = 1.0
     # mean_auc = metrics.auc(mean_fpr, mean_tpr)
-    df_roc = pd.DataFrame(np.array([mean_fpr, mean_tpr]).T,
-                          columns=['FP', 'TP'])
-
+    df_roc = pd.DataFrame(np.array([mean_fpr, mean_tpr]).T, columns=['FP', 'TP'])
     auc = metrics.auc(mean_fpr, mean_tpr)
 
     if path_out is not None:
         assert os.path.exists(path_out), 'missing: "%s"' % path_out
         name_csv = NAME_CSV_CLASSIF_CV_ROC.format(clf_name, 'mean')
-        path_csv = os.path.join(path_out, name_csv)
-        df_roc.to_csv(path_csv)
-
+        df_roc.to_csv(os.path.join(path_out, name_csv))
         name_txt = NAME_TXT_CLASSIF_CV_AUC.format(clf_name, 'mean')
         with open(os.path.join(path_out, name_txt), 'w') as fp:
             fp.write(str(auc))
@@ -930,14 +935,16 @@ def search_params_cut_down_max_nb_iter(clf_parameters, nb_iter):
     :param nb_iter: int, nb of random tryes
     :return: int
     """
-    param_list = grid_search.ParameterSampler(clf_parameters, n_iter=nb_iter)
-    param_grid = grid_search.ParameterGrid(param_list.param_distributions)
-    try:  # this works only in case the set of params is finite, otherwise crash
-        if len(param_grid) < nb_iter:
-            nb_iter = len(param_grid.param_grid)
-            logging.debug('nb iter: -> %i', nb_iter)
-    except Exception:
-        logging.debug('something went wrong in cutting down nb iter')
+    counts = []
+    for k in clf_parameters:
+        vals = clf_parameters[k]
+        if hasattr(vals, '__iter__'):
+            counts.append(len(vals))
+        else:
+            return nb_iter
+    count = np.product(counts)
+    if count < nb_iter:
+        nb_iter < count
     return nb_iter
 
 
@@ -964,14 +971,14 @@ def create_classif_search(name_clf, clf_pipeline, nb_labels,
     if search_type == 'grid':
         clf_parameters = create_clf_param_search_grid(name_clf)
         logging.info('init Grid search...')
-        clf_search = grid_search.GridSearchCV(
+        clf_search = GridSearchCV(
             clf_pipeline, clf_parameters, scoring=scoring, cv=cross_val,
             n_jobs=nb_jobs, verbose=1, refit=True)
     else:
         clf_parameters = create_clf_param_search_distrib(name_clf)
         nb_iter = search_params_cut_down_max_nb_iter(clf_parameters, nb_iter)
         logging.info('init Randomized search...')
-        clf_search = grid_search.RandomizedSearchCV(
+        clf_search = RandomizedSearchCV(
             clf_pipeline, clf_parameters, scoring=scoring, cv=cross_val,
             n_jobs=nb_jobs, n_iter=nb_iter, verbose=1, refit=True)
     return clf_search

diff --git a/imsegm/tests/__init__.py b/imsegm/tests/__init__.py
@@ -0,0 +1,3 @@
+import imsegm.utilities
+
+imsegm.utilities
diff --git a/imsegm/utilities/experiments.py b/imsegm/utilities/experiments.py
@@ -10,7 +10,6 @@
 import time
 import types
 import logging
-import traceback
 import multiprocessing as mproc
 # from functools import partial
 
@@ -296,7 +295,7 @@ def create_subfolders(path_out, list_folders):
                 os.mkdir(path_dir)
                 count += 1
             except Exception:
-                logging.error(traceback.format_exc())
+                logging.exception('mkdir: %s', path_dir)
     return count