Skip to content
This repository has been archived by the owner on Feb 11, 2023. It is now read-only.

Commit

Permalink
update classif. (#15)
Browse files Browse the repository at this point in the history
* CrossVal - allow larger test then train
* fix classif. nb searches
  • Loading branch information
Borda committed Jan 1, 2019
1 parent 41cd661 commit f3955cf
Show file tree
Hide file tree
Showing 10 changed files with 106 additions and 56 deletions.
2 changes: 1 addition & 1 deletion experiments_ovary_centres/run_center_candidate_training.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@
import matplotlib
if os.environ.get('DISPLAY', '') == '' \
and matplotlib.rcParams['backend'] != 'agg':
# logging.warning('No display found. Using non-interactive Agg backend.')
print('No display found. Using non-interactive Agg backend.')
matplotlib.use('Agg')

import matplotlib.pyplot as plt
Expand Down
2 changes: 1 addition & 1 deletion experiments_ovary_centres/run_center_clustering.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
import matplotlib
if os.environ.get('DISPLAY', '') == '' \
and matplotlib.rcParams['backend'] != 'agg':
# logging.warning('No display found. Using non-interactive Agg backend.')
print('No display found. Using non-interactive Agg backend.')
matplotlib.use('Agg')

import matplotlib.pylab as plt
Expand Down
2 changes: 1 addition & 1 deletion experiments_ovary_detect/run_ovary_egg-segmentation.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@
import matplotlib
if os.environ.get('DISPLAY', '') == '' \
and matplotlib.rcParams['backend'] != 'agg':
# logging.warning('No display found. Using non-interactive Agg backend.')
print('No display found. Using non-interactive Agg backend.')
matplotlib.use('Agg')

import numpy as np
Expand Down
2 changes: 1 addition & 1 deletion experiments_segmentation/run_compute_stat_annot_segm.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@
import matplotlib
if os.environ.get('DISPLAY', '') == '' \
and matplotlib.rcParams['backend'] != 'agg':
# logging.warning('No display found. Using non-interactive Agg backend.')
print('No display found. Using non-interactive Agg backend.')
matplotlib.use('Agg')

import pandas as pd
Expand Down
2 changes: 1 addition & 1 deletion handling_annotations/run_overlap_images_segms.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@
import matplotlib
if os.environ.get('DISPLAY','') == '' \
and matplotlib.rcParams['backend'] != 'agg':
# logging.warning('No display found. Using non-interactive Agg backend.')
print('No display found. Using non-interactive Agg backend.')
matplotlib.use('Agg')

import numpy as np
Expand Down
141 changes: 96 additions & 45 deletions imsegm/classification.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
from scipy import interp
from scipy.stats import randint as sp_randint
from scipy.stats import uniform as sp_random
from sklearn.base import clone
from sklearn import preprocessing, feature_selection, decomposition
from sklearn import cluster, metrics
from sklearn import ensemble, neighbors, svm, tree
Expand Down Expand Up @@ -897,9 +898,10 @@ def eval_classif_cross_val_roc(clf_name, classif, features, labels,
cross_val = cross_val.split(features, labels)
count = 0.
for train, test in cross_val:
classif.fit(np.copy(features[train], order='C'),
np.copy(labels[train], order='C'))
proba = classif.predict_proba(np.copy(features[test], order='C'))
classif_cv = clone(classif)
classif_cv.fit(np.copy(features[train], order='C'),
np.copy(labels[train], order='C'))
proba = classif_cv.predict_proba(np.copy(features[test], order='C'))
# Compute ROC curve and area the curve
for i, lb in enumerate(unique_labels):
fpr, tpr, _ = metrics.roc_curve(labels_bin[test, lb], proba[:, i])
Expand Down Expand Up @@ -934,6 +936,12 @@ def search_params_cut_down_max_nb_iter(clf_parameters, nb_iter):
:param clf_parameters: {str: ...}
:param nb_iter: int, nb of random tryes
:return: int
>>> clf_params = create_clf_param_search_grid(DEFAULT_CLASSIF_NAME)
>>> search_params_cut_down_max_nb_iter(clf_params, 100)
100
>>> search_params_cut_down_max_nb_iter(clf_params, 1e6)
1450
"""
counts = []
for k in clf_parameters:
Expand All @@ -944,7 +952,7 @@ def search_params_cut_down_max_nb_iter(clf_parameters, nb_iter):
return nb_iter
count = np.product(counts)
if count < nb_iter:
nb_iter < count
nb_iter = count
return nb_iter


Expand Down Expand Up @@ -1384,32 +1392,40 @@ class HoldOut:
Example
-------
>>> ho = HoldOut(10, 7)
>>> ho = HoldOut(10, 7, rand_seed=None)
>>> len(ho)
1
>>> list(ho)
[([0, 1, 2, 3, 4, 5, 6], [7, 8, 9])]
>>> ho = HoldOut(10, 7, rand_seed=0)
>>> list(ho)
[([2, 8, 4, 9, 1, 6, 7], [3, 0, 5])]
"""
def __init__(self, nb, hold_idx, random_state=0):
def __init__(self, nb_samples, hold_out, rand_seed=0):
"""
:param int nb: total number of samples
:param int hold_idx: index where the test starts
:param obj random_state: Seed for the random number generator.
:param int nb_samples: total number of samples
:param int hold_out: index where the test starts
:param obj rand_seed: Seed for the random number generator.
"""
self.total = nb
self.hold_idx = hold_idx
self.random_state = random_state
assert self.total > self.hold_idx, \
'total %i should be higher than hold Idx %i' % (self.total, self.hold_idx)
assert nb_samples > hold_out, \
'total %i should be higher than hold Idx %i' % (nb_samples, hold_out)

self._total = nb_samples
self.hold_out = hold_out
self._indexes = list(range(nb_samples))

if rand_seed is not None and rand_seed is not False:
np.random.seed(rand_seed)
np.random.shuffle(self._indexes)

def __iter__(self):
""" iterate the folds
:return ([int], [int]):
"""
ind_train = list(range(self.hold_idx))
ind_test = list(range(self.hold_idx, self.total))
ind_train = self._indexes[:self.hold_out]
ind_test = self._indexes[self.hold_out:]
yield ind_train, ind_test

def __len__(self):
Expand Down Expand Up @@ -1438,22 +1454,29 @@ class CrossValidatePOut:
>>> len(cv)
2
>>> list(cv) # doctest: +NORMALIZE_WHITESPACE
[([3, 4, 5], [0, 1, 2]), \
[([3, 4, 5], [0, 1, 2]),
([0, 1, 2], [3, 4, 5])]
Example 2
---------
>>> cv = CrossValidatePOut(7, 3, rand_seed=0)
>>> list(cv) # doctest: +NORMALIZE_WHITESPACE
[([3, 0, 5, 4], [6, 2, 1]), \
([6, 2, 1, 4], [3, 0, 5]), \
[([3, 0, 5, 4], [6, 2, 1]),
([6, 2, 1, 4], [3, 0, 5]),
([6, 2, 1, 3, 0, 5], [4])]
>>> len(list(cv))
3
>>> cv.indexes
[6, 2, 1, 3, 0, 5, 4]
Example 3
---------
>>> cv = CrossValidatePOut(7, 5, rand_seed=0)
>>> list(cv) # doctest: +NORMALIZE_WHITESPACE
[([6, 2], [1, 3, 0, 5, 4]),
([1, 3], [6, 2, 0, 5, 4]),
([0, 5], [6, 2, 1, 3, 4]),
([4], [6, 2, 1, 3, 0, 5])]
"""

def __init__(self, nb_samples, nb_hold_out, rand_seed=None):
Expand All @@ -1464,13 +1487,21 @@ def __init__(self, nb_samples, nb_hold_out, rand_seed=None):
:param obj rand_seed: int or None
"""
assert nb_samples > nb_hold_out, \
'number of holdout has to be smaller then total size'
self.nb_samples = nb_samples
self.nb_hold_out = nb_hold_out
'number of holdout has to be smaller then _total size'
self._nb_samples = nb_samples
self._nb_hold_out = nb_hold_out

self._revert = False # sets the sizes
if self._nb_hold_out > (self._nb_samples / 2.):
logging.debug('WARNING: you are running in reverse mode, '
'while using all training examples '
'there are much more yield test cases.')
self._nb_hold_out = self._nb_samples - self._nb_hold_out
self._revert = True

self.indexes = list(range(self.nb_samples))
self.indexes = list(range(self._nb_samples))

if rand_seed is not False:
if rand_seed is not None and rand_seed is not False:
np.random.seed(rand_seed)
np.random.shuffle(self.indexes)
logging.debug('sets ordering: %s', repr(self.indexes))
Expand All @@ -1482,17 +1513,19 @@ def __iter__(self):
:return ([int], [int]):
"""
for i in range(0, self.nb_samples, self.nb_hold_out):
inds_test = self.indexes[i:i + self.nb_hold_out]
for i in range(0, self._nb_samples, self._nb_hold_out):
inds_test = self.indexes[i:i + self._nb_hold_out]
inds_train = [i for i in self.indexes if i not in inds_test]
if self._revert:
inds_train, inds_test = inds_test, inds_train
yield inds_train, inds_test

def __len__(self):
""" number of folds
:return int:
"""
return int(np.ceil(self.nb_samples / float(self.nb_hold_out)))
return int(np.ceil(self._nb_samples / float(self._nb_hold_out)))


class CrossValidatePSetsOut:
Expand All @@ -1513,7 +1546,7 @@ class CrossValidatePSetsOut:
>>> len(cv)
2
>>> list(cv) # doctest: +NORMALIZE_WHITESPACE
[([5, 6, 7, 8, 9], [0, 1, 2, 3, 4]), \
[([5, 6, 7, 8, 9], [0, 1, 2, 3, 4]),
([0, 1, 2, 3, 4], [5, 6, 7, 8, 9])]
Example 2
Expand All @@ -1522,13 +1555,21 @@ class CrossValidatePSetsOut:
>>> cv.set_indexes
[[0, 1], [2, 3], [4], [5, 6], [7]]
>>> list(cv) # doctest: +NORMALIZE_WHITESPACE
[([2, 3, 5, 6, 7], [4, 0, 1]), \
([4, 0, 1, 7], [2, 3, 5, 6]), \
[([2, 3, 5, 6, 7], [4, 0, 1]),
([4, 0, 1, 7], [2, 3, 5, 6]),
([4, 0, 1, 2, 3, 5, 6], [7])]
>>> len(cv)
3
>>> cv.sets_order
[2, 0, 1, 3, 4]
Example 3
---------
>>> cv = CrossValidatePSetsOut([2, 2, 1, 2, 1, 1], 4, rand_seed=0)
>>> list(cv) # doctest: +NORMALIZE_WHITESPACE
[([8, 4], [2, 3, 5, 6, 0, 1, 7]),
([2, 3, 5, 6], [8, 4, 0, 1, 7]),
([0, 1, 7], [8, 4, 2, 3, 5, 6])]
"""

def __init__(self, set_sizes, nb_hold_out, rand_seed=None):
Expand All @@ -1539,24 +1580,32 @@ def __init__(self, set_sizes, nb_hold_out, rand_seed=None):
:param obj rand_seed: int or None
"""
assert len(set_sizes) > nb_hold_out, \
'nb of hold out (%i) has to be smaller then total size %i' \
'nb of hold out (%i) has to be smaller then _total size %i' \
% (nb_hold_out, len(set_sizes))
self.set_sizes = list(set_sizes)
self.total = np.sum(self.set_sizes)
self.nb_hold_out = nb_hold_out
self._set_sizes = list(set_sizes)
self._total = np.sum(self._set_sizes)
self._nb_hold_out = nb_hold_out

self._revert = False # sets the sizes
if self._nb_hold_out > (len(self._set_sizes) / 2.):
logging.debug('WARNING: you are running in reverse mode, '
'while using all training examples '
'there are much more yield test cases.')
self._nb_hold_out = len(self._set_sizes) - self._nb_hold_out
self._revert = True

self.set_indexes = []
for i, size in enumerate(self.set_sizes):
start = int(np.sum(self.set_sizes[:i]))
for i, size in enumerate(self._set_sizes):
start = int(np.sum(self._set_sizes[:i]))
inds = range(start, start + size)
self.set_indexes.append(list(inds))

assert np.sum(len(i) for i in self.set_indexes) == self.total, \
'all indexes should sum to total count %i' % self.total
assert np.sum(len(i) for i in self.set_indexes) == self._total, \
'all indexes should sum to _total count %i' % self._total

self.sets_order = list(range(len(self.set_sizes)))
self.sets_order = list(range(len(self._set_sizes)))

if rand_seed is not False:
if rand_seed is not None and rand_seed is not False:
np.random.seed(rand_seed)
np.random.shuffle(self.sets_order)
logging.debug('sets ordering: %s', repr(self.sets_order))
Expand All @@ -1568,20 +1617,22 @@ def __iter__(self):
:return ([int], [int]):
"""
for i in range(0, len(self.set_sizes), self.nb_hold_out):
test = self.sets_order[i:i + self.nb_hold_out]
for i in range(0, len(self._set_sizes), self._nb_hold_out):
test = self.sets_order[i:i + self._nb_hold_out]
inds_train = list(itertools.chain.from_iterable(
self.set_indexes[i] for i in self.sets_order if i not in test))
inds_test = list(itertools.chain.from_iterable(
self.set_indexes[i] for i in self.sets_order if i in test))
if self._revert:
inds_train, inds_test = inds_test, inds_train
yield inds_train, inds_test

def __len__(self):
""" number of folds
:return int:
"""
nb = len(self.set_sizes) / float(self.nb_hold_out)
nb = len(self._set_sizes) / float(self._nb_hold_out)
return int(np.ceil(nb))


Expand Down
4 changes: 2 additions & 2 deletions imsegm/descriptors.py
Original file line number Diff line number Diff line change
Expand Up @@ -1734,7 +1734,7 @@ def interpolate_ray_dist(ray_dists, order='spline'):
""" interpolate ray distances
:param [float] ray_dists:
:param str order: degree of interpolation
:param str|int order: degree of interpolation
:return [float]:
>>> interpolate_ray_dist([-1] * 5)
Expand Down Expand Up @@ -1799,7 +1799,7 @@ def _fn_cos_residual(x, t, y):
def reconstruct_ray_features_2d(position, ray_features, shift=0):
""" reconstruct ray features for 2D image
:param (int, int) position:
:param (int, int)|(float, float) position:
:param [float] ray_features:
:param float shift:
:return [[float, float]]:
Expand Down
2 changes: 1 addition & 1 deletion imsegm/ellipse_fitting.py
Original file line number Diff line number Diff line change
Expand Up @@ -401,7 +401,7 @@ def split_segm_background_foreground(seg, sel_bg=STRUC_ELEM_BG,
""" smoothing segmentation with morphological operation
:param ndarray seg: input segmentation
:param int sel_bg: smoothing background with morphological operation
:param int|float sel_bg: smoothing background with morphological operation
:param int sel_fg: smoothing foreground with morphological operation
:return:
Expand Down
3 changes: 1 addition & 2 deletions imsegm/utilities/__init__.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
import os
import logging

import matplotlib
import numpy as np
Expand All @@ -8,7 +7,7 @@
# in case you are running on machine without display, e.g. server
if os.environ.get('DISPLAY', '') == '' \
and matplotlib.rcParams['backend'] != 'agg':
# logging.warning('No display found. Using non-interactive Agg backend.')
print('No display found. Using non-interactive Agg backend.')
# https://matplotlib.org/faq/usage_faq.html
matplotlib.use('Agg')

Expand Down
2 changes: 1 addition & 1 deletion imsegm/utilities/drawing.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
import matplotlib
if os.environ.get('DISPLAY', '') == '' \
and matplotlib.rcParams['backend'] != 'agg':
# logging.warning('No display found. Using non-interactive Agg backend.')
print('No display found. Using non-interactive Agg backend.')
matplotlib.use('Agg')

import numpy as np
Expand Down

0 comments on commit f3955cf

Please sign in to comment.