diff --git a/imodels/experimental/figs_ensembles.py b/imodels/experimental/figs_ensembles.py index 634e6048..ea6fa6c6 100644 --- a/imodels/experimental/figs_ensembles.py +++ b/imodels/experimental/figs_ensembles.py @@ -2,7 +2,6 @@ import numpy as np from matplotlib import pyplot as plt -import sklearn from sklearn import datasets from sklearn import tree from sklearn.base import BaseEstimator @@ -73,22 +72,18 @@ def setattrs(self, **kwargs): setattr(self, k, v) def __str__(self): - try: - sklearn.utils.validation.check_is_fitted(self) - if self.split_or_linear == 'linear': - if self.is_root: - return f'X_{self.feature} * {self.value:0.3f} (Tree #{self.tree_num} linear root)' - else: - return f'X_{self.feature} * {self.value:0.3f} (linear)' + if self.split_or_linear == 'linear': + if self.is_root: + return f'X_{self.feature} * {self.value:0.3f} (Tree #{self.tree_num} linear root)' else: - if self.is_root: - return f'X_{self.feature} <= {self.threshold:0.3f} (Tree #{self.tree_num} root)' - elif self.left is None and self.right is None: - return f'Val: {self.value[0][0]:0.3f} (leaf)' - else: - return f'X_{self.feature} <= {self.threshold:0.3f} (split)' - except ValueError: - return self.__class__.__name__ + return f'X_{self.feature} * {self.value:0.3f} (linear)' + else: + if self.is_root: + return f'X_{self.feature} <= {self.threshold:0.3f} (Tree #{self.tree_num} root)' + elif self.left is None and self.right is None: + return f'Val: {self.value[0][0]:0.3f} (leaf)' + else: + return f'X_{self.feature} <= {self.threshold:0.3f} (split)' def __repr__(self): return self.__str__() @@ -422,17 +417,13 @@ def _tree_to_str(self, root: Node, prefix=''): pprefix) def __str__(self): - try: - sklearn.utils.validation.check_is_fitted(self) - s = '------------\n' + \ - '\n\t+\n'.join([self._tree_to_str(t) for t in self.trees_]) - if hasattr(self, 'feature_names_') and self.feature_names_ is not None: - for i in range(len(self.feature_names_))[::-1]: - s = s.replace(f'X_{i}', self.feature_names_[i]) - return s - except ValueError: - return self.__class__.__name__ - + s = '------------\n' + \ + '\n\t+\n'.join([self._tree_to_str(t) for t in self.trees_]) + if hasattr(self, 'feature_names_') and self.feature_names_ is not None: + for i in range(len(self.feature_names_))[::-1]: + s = s.replace(f'X_{i}', self.feature_names_[i]) + return s + def predict(self, X): if self.posthoc_ridge and self.weighted_model_: # note, during fitting don't use the weighted moel X_feats = self._extract_tree_predictions(X) diff --git a/imodels/rule_list/corels_wrapper.py b/imodels/rule_list/corels_wrapper.py index f968e3f5..39788df6 100644 --- a/imodels/rule_list/corels_wrapper.py +++ b/imodels/rule_list/corels_wrapper.py @@ -4,7 +4,6 @@ import numpy as np import pandas as pd -import sklearn from sklearn.preprocessing import KBinsDiscretizer from imodels.rule_list.greedy_rule_list import GreedyRuleListClassifier @@ -234,18 +233,14 @@ def _traverse_rule(self, X: np.ndarray, y: np.ndarray, feature_names: List[str], self.str_print = str_print def __str__(self): - try: - sklearn.utils.validation.check_is_fitted(self) - if corels_supported: - if self.str_print is not None: - return 'OptimalRuleList:\n\n' + self.str_print - else: - return 'OptimalRuleList:\n\n' + self.rl_.__str__() + if corels_supported: + if self.str_print is not None: + return 'OptimalRuleList:\n\n' + self.str_print else: - return super().__str__() - except ValueError: - return self.__class__.__name__ - + return 'OptimalRuleList:\n\n' + self.rl_.__str__() + else: + return super().__str__() + def _get_complexity(self): return sum([len(corule['antecedents']) for corule in self.rl_.rules]) diff --git a/imodels/rule_list/greedy_rule_list.py b/imodels/rule_list/greedy_rule_list.py index 962f9998..0991e2f4 100644 --- a/imodels/rule_list/greedy_rule_list.py +++ b/imodels/rule_list/greedy_rule_list.py @@ -8,7 +8,6 @@ from copy import deepcopy import numpy as np -import sklearn from sklearn.base import BaseEstimator, ClassifierMixin from sklearn.utils.multiclass import unique_labels from sklearn.utils.validation import check_array, check_is_fitted @@ -141,43 +140,48 @@ def predict(self, X): X = check_array(X) return np.argmax(self.predict_proba(X), axis=1) + """ + def __str__(self): + # s = '' + # for rule in self.rules_: + # s += f"mean {rule['val'].round(3)} ({rule['num_pts']} pts)\n" + # if 'col' in rule: + # s += f"if {rule['col']} >= {rule['cutoff']} then {rule['val_right'].round(3)} ({rule['num_pts_right']} pts)\n" + # return s + """ def __str__(self): '''Print out the list in a nice way ''' - try: - sklearn.utils.validation.check_is_fitted(self) - s = '> ------------------------------\n> Greedy Rule List\n> ------------------------------\n' - - def red(s): - # return f"\033[91m{s}\033[00m" - return s - - def cyan(s): - # return f"\033[96m{s}\033[00m" - return s - - def rule_name(rule): - if rule['flip']: - return '~' + rule['col'] - return rule['col'] - - # rule = self.rules_[0] - # s += f"{red((100 * rule['val']).round(3))}% IwI ({rule['num_pts']} pts)\n" - for rule in self.rules_: - s += u'\u2193\n' + f"{cyan((100 * rule['val']).round(2))}% risk ({rule['num_pts']} pts)\n" - # s += f"\t{'Else':>45} => {cyan((100 * rule['val']).round(2)):>6}% IwI ({rule['val'] * rule['num_pts']:.0f}/{rule['num_pts']} pts)\n" - if 'col' in rule: - # prefix = f"if {rule['col']} >= {rule['cutoff']}" - prefix = f"if {rule_name(rule)}" - val = f"{100 * rule['val_right'].round(3)}" - s += f"\t{prefix} ==> {red(val)}% risk ({rule['num_pts_right']} pts)\n" - # rule = self.rules_[-1] - # s += f"{red((100 * rule['val']).round(3))}% IwI ({rule['num_pts']} pts)\n" + s = '> ------------------------------\n> Greedy Rule List\n> ------------------------------\n' + + def red(s): + # return f"\033[91m{s}\033[00m" + return s + + def cyan(s): + # return f"\033[96m{s}\033[00m" return s - except ValueError: - return self.__class__.__name__ - + + def rule_name(rule): + if rule['flip']: + return '~' + rule['col'] + return rule['col'] + + # rule = self.rules_[0] + # s += f"{red((100 * rule['val']).round(3))}% IwI ({rule['num_pts']} pts)\n" + for rule in self.rules_: + s += u'\u2193\n' + f"{cyan((100 * rule['val']).round(2))}% risk ({rule['num_pts']} pts)\n" + # s += f"\t{'Else':>45} => {cyan((100 * rule['val']).round(2)):>6}% IwI ({rule['val'] * rule['num_pts']:.0f}/{rule['num_pts']} pts)\n" + if 'col' in rule: + # prefix = f"if {rule['col']} >= {rule['cutoff']}" + prefix = f"if {rule_name(rule)}" + val = f"{100 * rule['val_right'].round(3)}" + s += f"\t{prefix} ==> {red(val)}% risk ({rule['num_pts_right']} pts)\n" + # rule = self.rules_[-1] + # s += f"{red((100 * rule['val']).round(3))}% IwI ({rule['num_pts']} pts)\n" + return s + ######## HERE ONWARDS CUSTOM SPLITTING (DEPRECATED IN FAVOR OF SKLEARN STUMP) ######## ###################################################################################### def _find_best_split(self, x, y): diff --git a/imodels/rule_set/brs.py b/imodels/rule_set/brs.py index a65c2abb..933864ca 100644 --- a/imodels/rule_set/brs.py +++ b/imodels/rule_set/brs.py @@ -18,7 +18,6 @@ from numpy.random import random from pandas import read_csv from scipy.sparse import csc_matrix -import sklearn from sklearn.base import BaseEstimator, ClassifierMixin from sklearn.ensemble import RandomForestClassifier from sklearn.utils.multiclass import check_classification_targets @@ -193,12 +192,8 @@ def fit(self, X, y, feature_names: list = None, init=[], verbose=False): return self def __str__(self): - try: - sklearn.utils.validation.check_is_fitted(self) - return ' '.join(str(r) for r in self.rules_) - except ValueError: - return self.__class__.__name__ - + return ' '.join(str(r) for r in self.rules_) + def predict(self, X): check_is_fitted(self) if isinstance(X, np.ndarray): diff --git a/imodels/rule_set/rule_fit.py b/imodels/rule_set/rule_fit.py index a6d05d2f..dee403e7 100644 --- a/imodels/rule_set/rule_fit.py +++ b/imodels/rule_set/rule_fit.py @@ -13,7 +13,6 @@ import pandas as pd import scipy from scipy.special import softmax -import sklearn from sklearn.base import BaseEstimator, ClassifierMixin, RegressorMixin from sklearn.base import TransformerMixin from sklearn.utils.multiclass import unique_labels @@ -243,16 +242,12 @@ def visualize(self, decimals=2): return rules[['rule', 'coef']].round(decimals) def __str__(self): - try: - sklearn.utils.validation.check_is_fitted(self) - s = '> ------------------------------\n' - s += '> RuleFit:\n' - s += '> \tPredictions are made by summing the coefficients of each rule\n' - s += '> ------------------------------\n' - return s + self.visualize().to_string(index=False) + '\n' - except ValueError: - return self.__class__.__name__ - + s = '> ------------------------------\n' + s += '> RuleFit:\n' + s += '> \tPredictions are made by summing the coefficients of each rule\n' + s += '> ------------------------------\n' + return s + self.visualize().to_string(index=False) + '\n' + def _extract_rules(self, X, y) -> List[str]: return extract_rulefit(X, y, feature_names=self.feature_placeholders, diff --git a/imodels/tree/cart_wrapper.py b/imodels/tree/cart_wrapper.py index 2f6f7021..7bb9ec93 100644 --- a/imodels/tree/cart_wrapper.py +++ b/imodels/tree/cart_wrapper.py @@ -1,7 +1,6 @@ # This is just a simple wrapper around sklearn decisiontree # https://scikit-learn.org/stable/modules/generated/sklearn.tree.DecisionTreeClassifier.html -import sklearn from sklearn.tree import DecisionTreeClassifier, export_text, DecisionTreeRegressor from imodels.util.arguments import check_fit_arguments @@ -49,18 +48,15 @@ def _set_complexity(self): self.complexity_ = compute_tree_complexity(self.tree_) def __str__(self): - try: - sklearn.utils.validation.check_is_fitted(self) - s = '> ------------------------------\n' - s += '> Greedy CART Tree:\n' - s += '> \tPrediction is made by looking at the value in the appropriate leaf of the tree\n' - s += '> ------------------------------' + '\n' - if hasattr(self, 'feature_names') and self.feature_names is not None: - return s + export_text(self, feature_names=self.feature_names, show_weights=True) - else: - return s + export_text(self, show_weights=True) - except ValueError: - return self.__class__.__name__ + s = '> ------------------------------\n' + s += '> Greedy CART Tree:\n' + s += '> \tPrediction is made by looking at the value in the appropriate leaf of the tree\n' + s += '> ------------------------------' + '\n' + if hasattr(self, 'feature_names') and self.feature_names is not None: + return s + export_text(self, feature_names=self.feature_names, show_weights=True) + else: + return s + export_text(self, show_weights=True) + class GreedyTreeRegressor(DecisionTreeRegressor): """Wrapper around sklearn greedy tree regressor @@ -102,11 +98,7 @@ def _set_complexity(self): self.complexity_ = compute_tree_complexity(self.tree_) def __str__(self): - try: - sklearn.utils.validation.check_is_fitted(self) - if hasattr(self, 'feature_names') and self.feature_names is not None: - return 'GreedyTree:\n' + export_text(self, feature_names=self.feature_names, show_weights=True) - else: - return 'GreedyTree:\n' + export_text(self, show_weights=True) - except ValueError: - return self.__class__.__name__ + if hasattr(self, 'feature_names') and self.feature_names is not None: + return 'GreedyTree:\n' + export_text(self, feature_names=self.feature_names, show_weights=True) + else: + return 'GreedyTree:\n' + export_text(self, show_weights=True) \ No newline at end of file diff --git a/imodels/tree/figs.py b/imodels/tree/figs.py index 2c2e5104..7baacb9b 100644 --- a/imodels/tree/figs.py +++ b/imodels/tree/figs.py @@ -5,7 +5,6 @@ import numpy as np import pandas as pd from scipy.special import expit -import sklearn from sklearn import datasets from sklearn import tree from sklearn.base import BaseEstimator, ClassifierMixin, RegressorMixin @@ -52,17 +51,13 @@ def setattrs(self, **kwargs): setattr(self, k, v) def __str__(self): - try: - sklearn.utils.validation.check_is_fitted(self) - if self.is_root: - return f'X_{self.feature} <= {self.threshold:0.3f} (Tree #{self.tree_num} root)' - elif self.left is None and self.right is None: - return f'Val: {self.value[0][0]:0.3f} (leaf)' - else: - return f'X_{self.feature} <= {self.threshold:0.3f} (split)' - except ValueError: - return self.__class__.__name__ - + if self.is_root: + return f'X_{self.feature} <= {self.threshold:0.3f} (Tree #{self.tree_num} root)' + elif self.left is None and self.right is None: + return f'Val: {self.value[0][0]:0.3f} (leaf)' + else: + return f'X_{self.feature} <= {self.threshold:0.3f} (split)' + def print_root(self, y): try: one_count = pd.Series(y).value_counts()[1.0] @@ -77,6 +72,8 @@ def print_root(self, y): else: return f'X_{self.feature} <= {self.threshold:0.3f}' + one_proportion + def __repr__(self): + return self.__str__() class FIGS(BaseEstimator): @@ -414,21 +411,17 @@ def _tree_to_str_with_data(self, X, y, root: Node, prefix=''): self._tree_to_str_with_data(X[~left], y[~left], root.right, pprefix)) def __str__(self): - try: - sklearn.utils.validation.check_is_fitted(self) - s = '> ------------------------------\n' - s += '> FIGS-Fast Interpretable Greedy-Tree Sums:\n' - s += '> \tPredictions are made by summing the "Val" reached by traversing each tree.\n' - s += '> \tFor classifiers, a sigmoid function is then applied to the sum.\n' - s += '> ------------------------------\n' - s += '\n\t+\n'.join([self._tree_to_str(t) for t in self.trees_]) - if hasattr(self, 'feature_names_') and self.feature_names_ is not None: - for i in range(len(self.feature_names_))[::-1]: - s = s.replace(f'X_{i}', self.feature_names_[i]) - return s - except ValueError: - return self.__class__.__name__ - + s = '> ------------------------------\n' + s += '> FIGS-Fast Interpretable Greedy-Tree Sums:\n' + s += '> \tPredictions are made by summing the "Val" reached by traversing each tree.\n' + s += '> \tFor classifiers, a sigmoid function is then applied to the sum.\n' + s += '> ------------------------------\n' + s += '\n\t+\n'.join([self._tree_to_str(t) for t in self.trees_]) + if hasattr(self, 'feature_names_') and self.feature_names_ is not None: + for i in range(len(self.feature_names_))[::-1]: + s = s.replace(f'X_{i}', self.feature_names_[i]) + return s + def print_tree(self, X, y, feature_names=None): s = '------------\n' + \ '\n\t+\n'.join([self._tree_to_str_with_data(X, y, t) diff --git a/imodels/tree/hierarchical_shrinkage.py b/imodels/tree/hierarchical_shrinkage.py index bfe99c33..ac525438 100644 --- a/imodels/tree/hierarchical_shrinkage.py +++ b/imodels/tree/hierarchical_shrinkage.py @@ -1,33 +1,25 @@ +import time from copy import deepcopy from typing import List import numpy as np -import sklearn from sklearn import datasets -from sklearn.base import BaseEstimator, RegressorMixin, ClassifierMixin, clone -from sklearn.metrics import r2_score -from sklearn.model_selection import cross_val_score +from sklearn.base import BaseEstimator, RegressorMixin, ClassifierMixin +from sklearn.metrics import r2_score, mean_squared_error, log_loss +from sklearn.model_selection import cross_val_score, KFold from sklearn.model_selection import train_test_split from sklearn.tree import DecisionTreeRegressor, DecisionTreeClassifier, \ export_text -from sklearn.utils import check_X_y -from sklearn.ensemble import GradientBoostingClassifier +from sklearn.ensemble import GradientBoostingClassifier, RandomForestRegressor from imodels.util import checks from imodels.util.arguments import check_fit_arguments from imodels.util.tree import compute_tree_complexity -# leading and traiing undescores -# https://github.com/rasbt/python-machine-learning-book/blob/master/faq/underscore-convention.md -# developer guideline -# https://scikit-learn.org/stable/developers/contributing.html#estimated-attributes -# https://scikit-learn.org/stable/developers/contributing.html - - -class HSTree(BaseEstimator): - def __init__(self, estimator=None, - reg_param: float = 1, shrinkage_scheme_: str = 'node_based'): +class HSTree: + def __init__(self, estimator_: BaseEstimator = DecisionTreeClassifier(max_leaf_nodes=20), + reg_param: float = 1, shrinkage_scheme_: str = 'node_based'): """HSTree (Tree with hierarchical shrinkage applied). Hierarchical shinkage is an extremely fast post-hoc regularization method which works on any decision tree (or tree-based ensemble, such as Random Forest). It does not modify the tree structure, and instead regularizes the tree by shrinking the prediction over each node towards the sample means of its ancestors (using a single regularization parameter). @@ -42,7 +34,7 @@ def __init__(self, estimator=None, reg_param: float Higher is more regularization (can be arbitrarily large, should not be < 0) - + shrinkage_scheme: str Experimental: Used to experiment with different forms of shrinkage. options are: (i) node_based shrinks based on number of samples in parent node @@ -51,35 +43,24 @@ def __init__(self, estimator=None, """ super().__init__() self.reg_param = reg_param - self.estimator = estimator + self.estimator_ = estimator_ self.shrinkage_scheme_ = shrinkage_scheme_ - - - def _validate_estimator(self, default=None): - """Check the base estimator. - - Sets the `estimator_` attributes. - """ - if self.estimator is not None: - self.estimator_ = self.estimator - else: - self.estimator_ = default - - - def fit(self, X, y, sample_weight=None, *args, **kwargs): - - self._validate_estimator() - if checks.check_is_fitted(self.estimator_): self._shrink() - else: - # remove feature_names if it exists (note: only works as keyword-arg) - feature_names = kwargs.pop('feature_names', None) # None returned if not passed - X, y, feature_names = check_fit_arguments(self, X, y, feature_names) - X, y = check_X_y(X,y) - self.estimator_.fit(X, y, *args, sample_weight=sample_weight, **kwargs) - self._shrink() + def get_params(self, deep=True): + if deep: + return deepcopy({'reg_param': self.reg_param, 'estimator_': self.estimator_, + 'shrinkage_scheme_': self.shrinkage_scheme_}) + return {'reg_param': self.reg_param, 'estimator_': self.estimator_, + 'shrinkage_scheme_': self.shrinkage_scheme_} + + def fit(self, X, y, sample_weight=None, *args, **kwargs): + # remove feature_names if it exists (note: only works as keyword-arg) + feature_names = kwargs.pop('feature_names', None) # None returned if not passed + X, y, feature_names = check_fit_arguments(self, X, y, feature_names) + self.estimator_ = self.estimator_.fit(X, y, *args, sample_weight=sample_weight, **kwargs) + self._shrink() # compute complexity if hasattr(self.estimator_, 'tree_'): @@ -92,7 +73,6 @@ def fit(self, X, y, sample_weight=None, *args, **kwargs): assert t.size == 1, 'multiple trees stored under tree_?' t = t[0] self.complexity_ += compute_tree_complexity(t.tree_) - return self def _shrink_tree(self, tree, reg_param, i=0, parent_val=None, parent_num=None, cum_sum=0): @@ -106,7 +86,7 @@ def _shrink_tree(self, tree, reg_param, i=0, parent_val=None, parent_num=None, c n_samples = tree.weighted_n_node_samples[i] if isinstance(self, RegressorMixin) or isinstance(self.estimator_, GradientBoostingClassifier): val = deepcopy(tree.value[i, :, :]) - else: # If classification, normalize to probability vector + else: # If classification, normalize to probability vector val = tree.value[i, :, :] / n_samples # Step 1: Update cum_sum @@ -120,15 +100,15 @@ def _shrink_tree(self, tree, reg_param, i=0, parent_val=None, parent_num=None, c val_new = (val - parent_val) / (1 + reg_param / parent_num) elif self.shrinkage_scheme_ == 'constant': val_new = (val - parent_val) / (1 + reg_param) - else: # leaf_based + else: # leaf_based val_new = 0 cum_sum += val_new # Step 2: Update node values if self.shrinkage_scheme_ == 'node_based' or self.shrinkage_scheme_ == 'constant': tree.value[i, :, :] = cum_sum - else: # leaf_based - if is_leaf: # update node values if leaf_based + else: # leaf_based + if is_leaf: # update node values if leaf_based root_val = tree.value[0, :, :] tree.value[i, :, :] = root_val + (val - root_val) / (1 + reg_param / n_samples) else: @@ -137,11 +117,11 @@ def _shrink_tree(self, tree, reg_param, i=0, parent_val=None, parent_num=None, c # Step 3: Recurse if not leaf if not is_leaf: self._shrink_tree(tree, reg_param, left, - parent_val=val, parent_num=n_samples, cum_sum=deepcopy(cum_sum)) + parent_val=val, parent_num=n_samples, cum_sum=deepcopy(cum_sum)) self._shrink_tree(tree, reg_param, right, - parent_val=val, parent_num=n_samples, cum_sum=deepcopy(cum_sum)) + parent_val=val, parent_num=n_samples, cum_sum=deepcopy(cum_sum)) - # edit the non-leaf nodes for later visualization (doesn't effect predictions) + # edit the non-leaf nodes for later visualization (doesn't effect predictions) return tree @@ -171,88 +151,93 @@ def score(self, X, y, *args, **kwargs): return NotImplemented def __str__(self): - try: - sklearn.utils.validation.check_is_fitted(self) - s = '> ------------------------------\n' - s += '> Decision Tree with Hierarchical Shrinkage\n' - s += '> \tPrediction is made by looking at the value in the appropriate leaf of the tree\n' - s += '> ------------------------------' + '\n' - if hasattr(self, 'feature_names') and self.feature_names is not None: - return s + export_text(self.estimator_, feature_names=self.feature_names, show_weights=True) - else: - return s + export_text(self.estimator_, show_weights=True) - except: - return self.__class__.__name__ + s = '> ------------------------------\n' + s += '> Decision Tree with Hierarchical Shrinkage\n' + s += '> \tPrediction is made by looking at the value in the appropriate leaf of the tree\n' + s += '> ------------------------------' + '\n' + if hasattr(self, 'feature_names') and self.feature_names is not None: + return s + export_text(self.estimator_, feature_names=self.feature_names, show_weights=True) + else: + return s + export_text(self.estimator_, show_weights=True) def __repr__(self): - try: - sklearn.utils.validation.check_is_fitted(self) - # s = self.__class__.__name__ - # s += "(" - # s += "estimator_=" - # s += repr(self.estimator_) - # s += ", " - # s += "reg_param=" - # s += str(self.reg_param) - # s += ", " - # s += "shrinkage_scheme_=" - # s += self.shrinkage_scheme_ - # s += ")" - # return s - attr_list = ["estimator_", "reg_param", "shrinkage_scheme_"] - s = self.__class__.__name__ - s += "(" - for attr in attr_list: - s += attr + "=" + repr(getattr(self, attr)) + ", " - s = s[:-2] + ")" - return s - except : - return self.__class__.__name__ + # s = self.__class__.__name__ + # s += "(" + # s += "estimator_=" + # s += repr(self.estimator_) + # s += ", " + # s += "reg_param=" + # s += str(self.reg_param) + # s += ", " + # s += "shrinkage_scheme_=" + # s += self.shrinkage_scheme_ + # s += ")" + # return s + attr_list = ["estimator_", "reg_param", "shrinkage_scheme_"] + s = self.__class__.__name__ + s += "(" + for attr in attr_list: + s += attr + "=" + repr(getattr(self, attr)) + ", " + s = s[:-2] + ")" + return s + + +class HSTreeRegressor(HSTree, RegressorMixin): + def __init__(self, estimator_: BaseEstimator = DecisionTreeRegressor(max_leaf_nodes=20), + reg_param: float = 1, shrinkage_scheme_: str = 'node_based'): + super().__init__(estimator_=estimator_, + reg_param=reg_param, + shrinkage_scheme_=shrinkage_scheme_, + ) + class HSTreeClassifier(HSTree, ClassifierMixin): - def __init__(self, estimator=None, - reg_param: float = 1, shrinkage_scheme_: str = 'node_based'): - super().__init__(estimator=estimator, - reg_param=reg_param, - shrinkage_scheme_=shrinkage_scheme_, - ) + def __init__(self, estimator_: BaseEstimator = DecisionTreeClassifier(max_leaf_nodes=20), + reg_param: float = 1, shrinkage_scheme_: str = 'node_based'): + super().__init__(estimator_=estimator_, + reg_param=reg_param, + shrinkage_scheme_=shrinkage_scheme_, + ) - def _validate_estimator(self): - """Check the estimator and set the estimator_ attribute.""" - super()._validate_estimator(default=DecisionTreeClassifier(max_leaf_nodes=20)) -class HSTreeRegressor(HSTree, RegressorMixin): - def __init__(self, estimator=None, - reg_param: float = 1, shrinkage_scheme_: str = 'node_based'): - super().__init__(estimator=estimator, - reg_param=reg_param, - shrinkage_scheme_=shrinkage_scheme_, - ) - def _validate_estimator(self): - """Check the estimator and set the estimator_ attribute.""" - super()._validate_estimator(default=DecisionTreeRegressor(max_leaf_nodes=20)) +def _get_cv_criterion(scorer): + y_true = np.random.binomial(n=1, p=.5, size=100) + + y_pred_good = y_true + y_pred_bad = np.random.uniform(0, 1, 100) + + score_good = scorer(y_true, y_pred_good) + score_bad = scorer(y_true, y_pred_bad) + + if score_good > score_bad: + return np.argmax + elif score_good < score_bad: + return np.argmin class HSTreeClassifierCV(HSTreeClassifier): - def __init__(self, estimator=None, - reg_param_list: List[float] = [0.1, 1, 10, 50, 100, 500], - shrinkage_scheme_: str = 'node_based', - cv: int = 3, scoring=None): + def __init__(self, estimator_: BaseEstimator = None, + reg_param_list: List[float] = [0, 0.1, 1, 10, 50, 100, 500], + shrinkage_scheme_: str = 'node_based', + max_leaf_nodes: int = 20, + cv: int = 3, scoring=None, *args, **kwargs): """Cross-validation is used to select the best regularization parameter for hierarchical shrinkage. - Params + Params ------ estimator_ Sklearn estimator (already initialized). If no estimator_ is passed, sklearn decision tree is used - reg_param_list : list + max_rules If estimator is None, then max_leaf_nodes is passed to the default decision tree args, kwargs Note: args, kwargs are not used but left so that imodels-experiments can still pass redundant args. """ - super().__init__(estimator, reg_param=None) + if estimator_ is None: + estimator_ = DecisionTreeClassifier(max_leaf_nodes=max_leaf_nodes) + super().__init__(estimator_, reg_param=None) self.reg_param_list = np.array(reg_param_list) self.cv = cv self.scoring = scoring @@ -263,26 +248,44 @@ def __init__(self, estimator=None, # raise Warning('Passed an already fitted estimator,' # 'but shrinking not applied until fit method is called.') - def fit(self, X, y, *args, **kwargs): - self.scores_ = [] - for reg_param in self.reg_param_list: - est = HSTreeClassifier(deepcopy(self.estimator), reg_param) - cv_scores = cross_val_score(est, X, y, cv=self.cv, scoring=self.scoring) - self.scores_.append(np.mean(cv_scores)) - self.reg_param = self.reg_param_list[np.argmax(self.scores_)] + self.scores_ = [[] for _ in self.reg_param_list] + scorer = kwargs.get('scoring', log_loss) + kf = KFold(n_splits=self.cv) + for train_index, test_index in kf.split(X): + X_out, y_out = X[test_index, :], y[test_index] + X_in, y_in = X[train_index, :], y[train_index] + base_est = deepcopy(self.estimator_) + base_est.fit(X_in, y_in) + for i, reg_param in enumerate(self.reg_param_list): + est_hs = HSTreeClassifier(base_est, reg_param) + est_hs.fit(X_in, y_in) + self.scores_[i].append(scorer(y_out, est_hs.predict_proba(X_out))) + self.scores_ = [np.mean(s) for s in self.scores_] + cv_criterion = _get_cv_criterion(scorer) + self.reg_param = self.reg_param_list[cv_criterion(self.scores_)] super().fit(X=X, y=y, *args, **kwargs) - return self + + def __repr__(self): + attr_list = ["estimator_", "reg_param_list", "shrinkage_scheme_", + "cv", "scoring"] + s = self.__class__.__name__ + s += "(" + for attr in attr_list: + s += attr + "=" + repr(getattr(self, attr)) + ", " + s = s[:-2] + ")" + return s class HSTreeRegressorCV(HSTreeRegressor): - def __init__(self, estimator=None, - reg_param_list: List[float] = [0.1, 1, 10, 50, 100, 500], - shrinkage_scheme_: str = 'node_based', - cv: int = 3, scoring=None): + def __init__(self, estimator_: BaseEstimator = None, + reg_param_list: List[float] = [0, 0.1, 1, 10, 50, 100, 500], + shrinkage_scheme_: str = 'node_based', + max_leaf_nodes: int = 20, + cv: int = 3, scoring=None, *args, **kwargs): """Cross-validation is used to select the best regularization parameter for hierarchical shrinkage. - Params + Params ------ estimator_ Sklearn estimator (already initialized). @@ -294,7 +297,9 @@ def __init__(self, estimator=None, args, kwargs Note: args, kwargs are not used but left so that imodels-experiments can still pass redundant args. """ - super().__init__(estimator, reg_param=None) + if estimator_ is None: + estimator_ = DecisionTreeRegressor(max_leaf_nodes=max_leaf_nodes) + super().__init__(estimator_, reg_param=None) self.reg_param_list = np.array(reg_param_list) self.cv = cv self.scoring = scoring @@ -306,14 +311,32 @@ def __init__(self, estimator=None, # 'but shrinking not applied until fit method is called.') def fit(self, X, y, *args, **kwargs): - self.scores_ = [] - for reg_param in self.reg_param_list: - est = HSTreeRegressor(deepcopy(self.estimator), reg_param) - cv_scores = cross_val_score(est, X, y, cv=self.cv, scoring=self.scoring) - self.scores_.append(np.mean(cv_scores)) - self.reg_param = self.reg_param_list[np.argmax(self.scores_)] + self.scores_ = [[] for _ in self.reg_param_list] + kf = KFold(n_splits=self.cv) + scorer = kwargs.get('scoring', mean_squared_error) + for train_index, test_index in kf.split(X): + X_out, y_out = X[test_index, :], y[test_index] + X_in, y_in = X[train_index, :], y[train_index] + base_est = deepcopy(self.estimator_) + base_est.fit(X_in, y_in) + for i, reg_param in enumerate(self.reg_param_list): + est_hs = HSTreeRegressor(base_est, reg_param) + est_hs.fit(X_in, y_in) + self.scores_[i].append(scorer(est_hs.predict(X_out), y_out)) + self.scores_ = [np.mean(s) for s in self.scores_] + cv_criterion = _get_cv_criterion(scorer) + self.reg_param = self.reg_param_list[cv_criterion(self.scores_)] super().fit(X=X, y=y, *args, **kwargs) - return self + + def __repr__(self): + attr_list = ["estimator_", "reg_param_list", "shrinkage_scheme_", + "cv", "scoring"] + s = self.__class__.__name__ + s += "(" + for attr in attr_list: + s += attr + "=" + repr(getattr(self, attr)) + ", " + s = s[:-2] + ")" + return s if __name__ == '__main__': @@ -330,9 +353,9 @@ def fit(self, X, y, *args, **kwargs): print('X.shape', X.shape) print('ys', np.unique(y_train)) - # m = HSTree(estimator=DecisionTreeClassifier(), reg_param=0.1) + # m = HSTree(estimator_=DecisionTreeClassifier(), reg_param=0.1) # m = DecisionTreeClassifier(max_leaf_nodes = 20,random_state=1, max_features=None) - m = DecisionTreeRegressor(random_state=42, max_leaf_nodes=20) + m = DecisionTreeClassifier(random_state=42) # print('best alpha', m.reg_param) m.fit(X_train, y_train) # m.predict_proba(X_train) # just run this @@ -342,15 +365,14 @@ def fit(self, X, y, *args, **kwargs): # x = DecisionTreeRegressor(random_state = 42, ccp_alpha = 0.3) # x.fit(X_train,y_train) - # m = HSTree(estimator=DecisionTreeRegressor(random_state=42, max_features=None), reg_param=10) - # m = HSTree(estimator=DecisionTreeClassifier(random_state=42, max_features=None), reg_param=0) - m = HSTreeClassifierCV(estimator=DecisionTreeRegressor(max_leaf_nodes=10, random_state=1), - shrinkage_scheme_='node_based', - reg_param_list=[0.1, 1, 2, 5, 10, 25, 50, 100, 500]) - print(m) - # m = ShrunkTreeCV(estimator=DecisionTreeClassifier()) + # m = HSTree(estimator_=DecisionTreeRegressor(random_state=42, max_features=None), reg_param=10) + # m = HSTree(estimator_=DecisionTreeClassifier(random_state=42, max_features=None), reg_param=0) + m = HSTreeRegressorCV(estimator_=DecisionTreeClassifier(random_state=42), + shrinkage_scheme_='node_based', + reg_param_list=[0.1, 1, 2, 5, 10, 25, 50, 100, 500]) + # m = ShrunkTreeCV(estimator_=DecisionTreeClassifier()) - # m = HSTreeClassifier(estimator = GradientBoostingClassifier(random_state = 10),reg_param = 5) + # m = HSTreeClassifier(estimator_ = GradientBoostingClassifier(random_state = 10),reg_param = 5) m.fit(X_train, y_train) print('best alpha', m.reg_param) # m.predict_proba(X_train) # just run this