Skip to content
This repository has been archived by the owner on Feb 11, 2023. It is now read-only.

Commit

Permalink
update sklearn (#14)
Browse files Browse the repository at this point in the history
* fix sklearn due version changes
* cleaning CI config
* using logging.exception()
* update setup
  • Loading branch information
Borda committed Nov 28, 2018
1 parent 8285bb3 commit 41cd661
Show file tree
Hide file tree
Showing 15 changed files with 62 additions and 73 deletions.
4 changes: 0 additions & 4 deletions .shippable.yml
Original file line number Diff line number Diff line change
Expand Up @@ -37,10 +37,6 @@ before_install:
install:
- root=$PWD
- mkdir libs
# - cd $root/libs && git clone https://github.com/Borda/pyGCO.git
# - cd $root/libs/pyGCO && pip install -r requirements.txt && python setup.py install
# - rm -r -f $root/libs/pyGCO
# - cd $root

- pip install -r requirements.txt
- pip install "nose>=1.3.7" coverage codecov "pytest>=3.0.5"
Expand Down
5 changes: 0 additions & 5 deletions .travis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -28,11 +28,6 @@ before_install:

install:
- mkdir libs
# - root=$PWD
# - cd $root/libs && git clone https://github.com/Borda/pyGCO.git
# - cd $root/libs/pyGCO && pip install -r requirements.txt && python setup.py install
# - rm -r -f $root/libs/pyGCO
# - cd $root

- pip install -r requirements.txt
- pip install nose coverage codecov pytest codacy-coverage check-manifest
Expand Down
3 changes: 1 addition & 2 deletions experiments_ovary_centres/run_center_evaluation.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,6 @@
import sys
import time
import logging
import traceback
import gc
# import multiprocessing as mproc
from functools import partial
Expand Down Expand Up @@ -180,7 +179,7 @@ def load_center_evaluate(idx_row, df_annot, path_annot, path_visu=None,
mask_eggs, img, segm,
path_visu, col_prefix)
except Exception:
logging.error(traceback.format_exc())
logging.exception('load_center_evaluate')
dict_stat = dict_row
return dict_stat

Expand Down
3 changes: 1 addition & 2 deletions experiments_ovary_centres/run_center_prediction.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,6 @@
import sys
import time, gc
import logging
import traceback
# import multiprocessing as mproc
from functools import partial

Expand Down Expand Up @@ -87,7 +86,7 @@ def load_compute_detect_centers(idx_row, params, classif=None, path_classif='',
dict_center = run_clust.cluster_points_draw_export(dict_center, params,
path_output)
except Exception:
logging.error(traceback.format_exc())
logging.exception('load_compute_detect_centers')
gc.collect()
time.sleep(1)
return dict_center
Expand Down
4 changes: 1 addition & 3 deletions experiments_ovary_detect/run_export_user-annot-segm.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,6 @@

import os
import sys
import traceback
import logging
import argparse
import multiprocessing as mproc
Expand Down Expand Up @@ -197,8 +196,7 @@ def export_figure(idx_row, df_slices_info, path_out):
fig.savefig(os.path.join(path_out, img_name + '.png'))
plt.close(fig)
except Exception:
logging.error('failed for: %s', img_name)
logging.error(traceback.format_exc())
logging.exception('failed for: %s', img_name)


def main(params):
Expand Down
4 changes: 1 addition & 3 deletions experiments_ovary_detect/run_ovary_egg-segmentation.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,6 @@
import argparse
import logging
import pickle
import traceback
import multiprocessing as mproc
from functools import partial

Expand Down Expand Up @@ -761,8 +760,7 @@ def image_segmentation(idx_row, params, debug_export=DEBUG_EXPORT):
centers = tl_data.swap_coord_x_y(centers)
tl_data.save_landmarks_csv(path_centre, centers)
except Exception:
logging.error('segment fail for "%s" via %s with \n %s',
name, method, traceback.format_exc())
logging.exception('segment fail for "%s" via %s', name, method)

return name

Expand Down
1 change: 0 additions & 1 deletion experiments_segmentation/run_compute_stat_annot_segm.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,6 @@
import sys
import logging
import argparse
# import traceback
import multiprocessing as mproc
from functools import partial

Expand Down
3 changes: 1 addition & 2 deletions experiments_segmentation/run_segm_slic_classif_graphcut.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,6 @@
import logging
import glob
import time
import traceback
import gc
import multiprocessing as mproc
from functools import partial
Expand Down Expand Up @@ -759,7 +758,7 @@ def try_segment_image(img_idx_path, params, classif, path_out, path_visu,
path_out, path_visu,
show_debug_imgs=show_debug_imgs)
except Exception:
logging.error(traceback.format_exc())
logging.exception('segment_image')
return '', None, None


Expand Down
5 changes: 2 additions & 3 deletions experiments_segmentation/run_segm_slic_model_graphcut.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,6 @@
import argparse
import logging
import time, gc
import traceback
import multiprocessing as mproc
from functools import partial

Expand Down Expand Up @@ -329,7 +328,7 @@ def segment_image_independent(img_idx_path, params, path_out, path_visu=None,
path_npz = os.path.join(path_out, idx_name + '.npz')
np.savez_compressed(path_npz, segm_soft)
except Exception:
logging.error(traceback.format_exc())
logging.exception('pipe_color2d_slic_features_model_graphcut(...)')
segm = np.zeros(img.shape[:2])

boundary_size = int(params['slic_size'] * 3)
Expand Down Expand Up @@ -375,7 +374,7 @@ def segment_image_model(imgs_idx_path, params, model, path_out=None,
path_npz = os.path.join(path_out, idx_name + '.npz')
np.savez_compressed(path_npz, segm_soft)
except Exception:
logging.error(traceback.format_exc())
logging.exception('segment_color2d_slic_features_model_graphcut(...)')
segm = np.zeros(img.shape[:2])

boundary_size = int(np.sqrt(np.prod(segm.shape)) * 0.01)
Expand Down
3 changes: 1 addition & 2 deletions handling_annotations/run_overlap_images_segms.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,6 @@
import glob
import logging
import argparse
import traceback
import multiprocessing as mproc
from functools import partial

Expand Down Expand Up @@ -128,7 +127,7 @@ def perform_visu_overlap(path_img, paths, segm_alpha=MIDDLE_ALPHA_OVERLAP):
try:
visualise_overlap(path_img, path_seg, path_out, segm_alpha=segm_alpha)
except Exception:
logging.error(traceback.format_exc())
logging.exception('visualise_overlap')
return False
return True

Expand Down
4 changes: 2 additions & 2 deletions imsegm/__init__.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
from imsegm.utilities import __init__
import imsegm.utilities

__init__
imsegm.utilities
83 changes: 45 additions & 38 deletions imsegm/classification.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,6 @@
import logging
import random
import collections
import traceback
import itertools
from functools import partial

Expand All @@ -21,12 +20,15 @@
from scipy import interp
from scipy.stats import randint as sp_randint
from scipy.stats import uniform as sp_random
from sklearn import grid_search, metrics
from sklearn import preprocessing, feature_selection, decomposition
from sklearn import cluster
from sklearn import cluster, metrics
from sklearn import ensemble, neighbors, svm, tree
from sklearn import pipeline, linear_model, neural_network
from sklearn import model_selection
try: # due to some chnages in between versions
from sklearn.grid_search import GridSearchCV, RandomizedSearchCV, ParameterSampler, ParameterGrid
except Exception:
from sklearn.model_selection import GridSearchCV, RandomizedSearchCV, ParameterSampler, ParameterGrid

import imsegm.labeling as seg_lbs
import imsegm.utilities.experiments as tl_expt
Expand Down Expand Up @@ -323,7 +325,7 @@ def compute_classif_metrics(y_true, y_pred, metric_averages=METRIC_AVERAGES):
for l, _ in enumerate(p):
logging.debug(EVAL_STR.format(l, p[l], r[l], f[l], s[l]))
except Exception:
logging.error(traceback.format_exc())
logging.exception('metrics.precision_recall_fscore_support')

dict_metrics = {
'ARS': metrics.adjusted_rand_score(y_true, y_pred),
Expand All @@ -341,7 +343,7 @@ def compute_classif_metrics(y_true, y_pred, metric_averages=METRIC_AVERAGES):
average=avg)
res = dict(zip(['{}_{}'.format(n, avg) for n in names], mtr))
except Exception:
logging.error(traceback.format_exc())
logging.exception('metrics.precision_recall_fscore_support')
res = dict(zip(['{}_{}'.format(n, avg) for n in names], [-1] * 4))
dict_metrics.update(res)
return dict_metrics
Expand Down Expand Up @@ -607,14 +609,20 @@ def export_results_clf_search(path_out, clf_name, clf_search):
def _fn_path_out(s):
return os.path.join(path_out, 'classif_%s_%s.txt' % (clf_name, s))

with open(_fn_path_out('search_params_scores'), 'w') as f:
f.write('\n'.join([repr(gs) for gs in clf_search.grid_scores_]))
with open(_fn_path_out('search_params_scores'), 'w') as fp:
if hasattr(clf_search, 'grid_scores_'): # for sklearn < 0.18
fp.write('\n'.join([repr(gs) for gs in clf_search.grid_scores_]))
elif hasattr(clf_search, 'cv_results_'):
fp.write('\n'.join(['"%s": %s' % (k, repr(clf_search.cv_results_[k]))
for k in clf_search.cv_results_]))
else:
fp.write('no results')

with open(_fn_path_out('search_params_best'), 'w') as f:
with open(_fn_path_out('search_params_best'), 'w') as fp:
params = clf_search.best_params_
rows = ['{:30s} {}'.format('"{}":'.format(k), params[k])
for k in params]
f.write('\n'.join(rows))
fp.write('\n'.join(rows))


def relabel_sequential(labels, uq_lbs=None):
Expand Down Expand Up @@ -754,8 +762,8 @@ def eval_classif_cross_val_scores(clf_name, classif, features, labels,
>>> data += 0.5 - np.random.random(data.shape)
>>> data.shape
(300, 6)
>>> from sklearn.cross_validation import StratifiedKFold
>>> cv = StratifiedKFold(labels, n_folds=5, random_state=0)
>>> from sklearn.model_selection import StratifiedKFold
>>> cv = StratifiedKFold(n_splits=5, random_state=0)
>>> classif = create_classifiers()[DEFAULT_CLASSIF_NAME]
>>> eval_classif_cross_val_scores(DEFAULT_CLASSIF_NAME, classif,
... data, labels, cv)
Expand All @@ -766,7 +774,7 @@ def eval_classif_cross_val_scores(clf_name, classif, features, labels,
3 1.0 1.0 1.0 1.0
4 1.0 1.0 1.0 1.0
>>> labels[labels == 1] = 2
>>> cv = StratifiedKFold(labels, n_folds=3, random_state=0)
>>> cv = StratifiedKFold(n_splits=3, random_state=0)
>>> eval_classif_cross_val_scores(DEFAULT_CLASSIF_NAME, classif,
... data, labels, cv, path_out='.')
f1_macro accuracy precision_macro recall_macro
Expand Down Expand Up @@ -796,7 +804,7 @@ def eval_classif_cross_val_scores(clf_name, classif, features, labels,
scoring, np.mean(scores), repr(scores))
df_scoring[scoring] = scores
except Exception:
logging.error(traceback.format_exc())
logging.exception('model_selection.cross_val_score')

if path_out is not None:
assert os.path.exists(path_out), 'missing: "%s"' % path_out
Expand Down Expand Up @@ -838,8 +846,8 @@ def eval_classif_cross_val_roc(clf_name, classif, features, labels,
>>> data += np.random.random(data.shape)
>>> data.shape
(300, 6)
>>> from sklearn.cross_validation import StratifiedKFold
>>> cv = StratifiedKFold(labels, n_folds=5, random_state=0)
>>> from sklearn.model_selection import StratifiedKFold
>>> cv = StratifiedKFold(n_splits=5, random_state=0)
>>> classif = create_classifiers()[DEFAULT_CLASSIF_NAME]
>>> fp_tp, auc = eval_classif_cross_val_roc(DEFAULT_CLASSIF_NAME, classif,
... data, labels, cv, nb_thr=10)
Expand Down Expand Up @@ -884,37 +892,34 @@ def eval_classif_cross_val_roc(clf_name, classif, features, labels,
for lb in unique_labels:
labels_bin[:, lb] = (labels == lb)

count = 0
# since version change the CV is not iterable by default
if not hasattr(cross_val, '__iter__'):
cross_val = cross_val.split(features, labels)
count = 0.
for train, test in cross_val:
features_train = np.copy(features[train], order='C')
labels_train = np.copy(labels[train], order='C')
features_test = np.copy(features[test], order='C')
classif.fit(features_train, labels_train)
proba = classif.predict_proba(features_test)
classif.fit(np.copy(features[train], order='C'),
np.copy(labels[train], order='C'))
proba = classif.predict_proba(np.copy(features[test], order='C'))
# Compute ROC curve and area the curve
for i, lb in enumerate(unique_labels):
fpr, tpr, _ = metrics.roc_curve(labels_bin[test, lb], proba[:, i])
fpr = [0.] + fpr.tolist() + [1.]
tpr = [0.] + tpr.tolist() + [1.]
mean_tpr += interp(mean_fpr, fpr, tpr)
mean_tpr[0] = 0.0
count += 1.
count += 1
# roc_auc = metrics.auc(fpr, tpr)

mean_tpr /= count
mean_tpr[-1] = 1.0
# mean_auc = metrics.auc(mean_fpr, mean_tpr)
df_roc = pd.DataFrame(np.array([mean_fpr, mean_tpr]).T,
columns=['FP', 'TP'])

df_roc = pd.DataFrame(np.array([mean_fpr, mean_tpr]).T, columns=['FP', 'TP'])
auc = metrics.auc(mean_fpr, mean_tpr)

if path_out is not None:
assert os.path.exists(path_out), 'missing: "%s"' % path_out
name_csv = NAME_CSV_CLASSIF_CV_ROC.format(clf_name, 'mean')
path_csv = os.path.join(path_out, name_csv)
df_roc.to_csv(path_csv)

df_roc.to_csv(os.path.join(path_out, name_csv))
name_txt = NAME_TXT_CLASSIF_CV_AUC.format(clf_name, 'mean')
with open(os.path.join(path_out, name_txt), 'w') as fp:
fp.write(str(auc))
Expand All @@ -930,14 +935,16 @@ def search_params_cut_down_max_nb_iter(clf_parameters, nb_iter):
:param nb_iter: int, nb of random tryes
:return: int
"""
param_list = grid_search.ParameterSampler(clf_parameters, n_iter=nb_iter)
param_grid = grid_search.ParameterGrid(param_list.param_distributions)
try: # this works only in case the set of params is finite, otherwise crash
if len(param_grid) < nb_iter:
nb_iter = len(param_grid.param_grid)
logging.debug('nb iter: -> %i', nb_iter)
except Exception:
logging.debug('something went wrong in cutting down nb iter')
counts = []
for k in clf_parameters:
vals = clf_parameters[k]
if hasattr(vals, '__iter__'):
counts.append(len(vals))
else:
return nb_iter
count = np.product(counts)
if count < nb_iter:
nb_iter < count
return nb_iter


Expand All @@ -964,14 +971,14 @@ def create_classif_search(name_clf, clf_pipeline, nb_labels,
if search_type == 'grid':
clf_parameters = create_clf_param_search_grid(name_clf)
logging.info('init Grid search...')
clf_search = grid_search.GridSearchCV(
clf_search = GridSearchCV(
clf_pipeline, clf_parameters, scoring=scoring, cv=cross_val,
n_jobs=nb_jobs, verbose=1, refit=True)
else:
clf_parameters = create_clf_param_search_distrib(name_clf)
nb_iter = search_params_cut_down_max_nb_iter(clf_parameters, nb_iter)
logging.info('init Randomized search...')
clf_search = grid_search.RandomizedSearchCV(
clf_search = RandomizedSearchCV(
clf_pipeline, clf_parameters, scoring=scoring, cv=cross_val,
n_jobs=nb_jobs, n_iter=nb_iter, verbose=1, refit=True)
return clf_search
Expand Down
3 changes: 3 additions & 0 deletions imsegm/tests/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
import imsegm.utilities

imsegm.utilities
3 changes: 1 addition & 2 deletions imsegm/utilities/experiments.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,6 @@
import time
import types
import logging
import traceback
import multiprocessing as mproc
# from functools import partial

Expand Down Expand Up @@ -296,7 +295,7 @@ def create_subfolders(path_out, list_folders):
os.mkdir(path_dir)
count += 1
except Exception:
logging.error(traceback.format_exc())
logging.exception('mkdir: %s', path_dir)
return count


Expand Down
Loading

0 comments on commit 41cd661

Please sign in to comment.