Merge pull request #199 from dswah/easy-plural

term are easier to broadcast
dswah · Sep 13, 2018 · 75701bc · 75701bc
2 parents 05d7fac + 1092e06
commit 75701bc
Show file tree

Hide file tree

Showing 4 changed files with 91 additions and 141 deletions.
diff --git a/pygam/__init__.py b/pygam/__init__.py
@@ -21,4 +21,4 @@
 __all__ = ['GAM', 'LinearGAM', 'LogisticGAM', 'GammaGAM', 'PoissonGAM',
            'InvGaussGAM', 'ExpectileGAM', 'l', 's', 'f', 'te', 'intercept']
 
-__version__ = '0.6.0'
+__version__ = '0.6.1'
diff --git a/pygam/pygam.py b/pygam/pygam.py
@@ -104,14 +104,6 @@ class GAM(Core, MetaTermMixin):
     link : str or Link object, default: 'identity'
         Link function to use in the model.
 
-    lam : float or iterable of floats > 0, default: 0.6
-        Smoothing strength; must be a positive float, or one positive float
-        per feature.
-
-        Larger values enforce stronger smoothing.
-
-        If only one float is specified, then it is copied for all features.
-
     fit_intercept : bool, default: True
         Specifies if a constant (a.k.a. bias or intercept) should be
         added to the decision function.
@@ -156,10 +148,10 @@ class GAM(Core, MetaTermMixin):
     International Biometric Society: A Crash Course on P-splines
     http://www.ibschannel2015.nl/project/userfiles/Crash_course_handout.pdf
     """
-    def __init__(self, terms='auto', lam=0.6, max_iter=100, tol=1e-4,
+    def __init__(self, terms='auto', max_iter=100, tol=1e-4,
                  distribution='normal', link='identity',
                  callbacks=['deviance', 'diffs'],
-                 fit_intercept=True, verbose=False):
+                 fit_intercept=True, verbose=False, **kwargs):
 
         self.max_iter = max_iter
         self.tol = tol
@@ -169,32 +161,36 @@ def __init__(self, terms='auto', lam=0.6, max_iter=100, tol=1e-4,
         self.verbose = verbose
         self.terms = TermList(terms) if isinstance(terms, Term) else terms
         self.fit_intercept = fit_intercept
-        self.lam = lam
+
+        for k, v in kwargs.items():
+            if k not in self._plural:
+                raise TypeError('__init__() got an unexpected keyword argument {}'.format(k))
+            setattr(self, k, v)
 
         # internal settings
         self._constraint_lam = 1e9 # regularization intensity for constraints
         self._constraint_l2 = 1e-3 # diagononal loading to improve conditioning
         self._constraint_l2_max = 1e-1 # maximum loading
         # self._opt = 0 # use 0 for numerically stable optimizer, 1 for naive
         self._term_location = 'terms' # for locating sub terms
-        self._include = ['lam']
+        # self._include = ['lam']
 
         # call super and exclude any variables
         super(GAM, self).__init__()
 
-    @property
-    def lam(self):
-        if self._has_terms():
-            return self.terms.lam
-        else:
-            return self._lam
-
-    @lam.setter
-    def lam(self, value):
-        if self._has_terms():
-            self.terms.lam = value
-        else:
-            self._lam = value
+    # @property
+    # def lam(self):
+    #     if self._has_terms():
+    #         return self.terms.lam
+    #     else:
+    #         return self._lam
+    #
+    # @lam.setter
+    # def lam(self, value):
+    #     if self._has_terms():
+    #         self.terms.lam = value
+    #     else:
+    #         self._lam = value
 
     @property
     def _is_fitted(self):
@@ -281,7 +277,6 @@ def _validate_data_dep_params(self, X):
         if self.terms is 'auto':
             # one numerical spline per feature
             self.terms = TermList(*[SplineTerm(feat, verbose=self.verbose) for feat in range(m_features)])
-            self.terms.lam = self._lam
 
         elif self.terms is None:
             # no terms
@@ -298,6 +293,15 @@ def _validate_data_dep_params(self, X):
         if len(self.terms) == 0:
             raise ValueError('At least 1 term must be specified')
 
+        # copy over things from plural
+        remove = []
+        for k, v in self.__dict__.items():
+            if k in self._plural:
+                setattr(self.terms, k, v)
+                remove.append(k)
+        for k in remove:
+            delattr(self, k)
+
         self.terms.compile(X)
 
     def loglikelihood(self, X, y, weights=None):
@@ -1709,6 +1713,7 @@ def gridsearch(self, X, y, weights=None, return_scores=False,
         # check if model fitted
         if not self._is_fitted:
             self._validate_params()
+            self._validate_data_dep_params(X)
 
         y = check_y(y, self.link, self.distribution, verbose=self.verbose)
         X = check_X(X, verbose=self.verbose)
@@ -1801,23 +1806,21 @@ def gridsearch(self, X, y, weights=None, return_scores=False,
 
         # loop through candidate model params
         for param_grid in pbar(param_grid_list):
-
-            # define new model
-            gam = deepcopy(self)
-            gam.set_params(self.get_params())
-            gam.set_params(**param_grid)
-
-            # warm start with parameters from previous build
-            if models:
-                coef = models[-1].coef_
-                gam.set_params(coef_=coef, force=True, verbose=False)
-
             try:
                 # try fitting
+                # define new model
+                gam = deepcopy(self)
+                gam.set_params(self.get_params())
+                gam.set_params(**param_grid)
+
+                # warm start with parameters from previous build
+                if models:
+                    coef = models[-1].coef_
+                    gam.set_params(coef_=coef, force=True, verbose=False)
                 gam.fit(X, y, weights)
 
             except ValueError as error:
-                msg = str(error) + '\non model:\n' + str(gam)
+                msg = str(error) + '\non model with params:\n' + str(param_grid)
                 msg += '\nskipping...\n'
                 if self.verbose:
                     warnings.warn(msg)
@@ -2125,14 +2128,6 @@ class LinearGAM(GAM):
                 default: ['deviance', 'diffs']
         Names of callback objects to call during the optimization loop.
 
-    lam : float or iterable of floats > 0, default: 0.6
-        Smoothing strength; must be a positive float, or one positive float
-        per feature.
-
-        Larger values enforce stronger smoothing.
-
-        If only one float is specified, then it is copied for all features.
-
     fit_intercept : bool, default: True
         Specifies if a constant (a.k.a. bias or intercept) should be
         added to the decision function.
@@ -2181,18 +2176,18 @@ class LinearGAM(GAM):
     International Biometric Society: A Crash Course on P-splines
     http://www.ibschannel2015.nl/project/userfiles/Crash_course_handout.pdf
     """
-    def __init__(self, terms='auto', lam=0.6, max_iter=100, tol=1e-4,
+    def __init__(self, terms='auto', max_iter=100, tol=1e-4,
                  scale=None, callbacks=['deviance', 'diffs'],
-                 fit_intercept=True, verbose=False):
+                 fit_intercept=True, verbose=False, **kwargs):
         self.scale = scale
         super(LinearGAM, self).__init__(terms=terms,
                                         distribution=NormalDist(scale=self.scale),
                                         link='identity',
-                                        lam=lam,
                                         max_iter=max_iter,
                                         tol=tol,
                                         fit_intercept=fit_intercept,
-                                        verbose=verbose)
+                                        verbose=verbose,
+                                        **kwargs)
 
         self._exclude += ['distribution', 'link']
 
@@ -2258,14 +2253,6 @@ class LogisticGAM(GAM):
                 default: ['deviance', 'diffs']
         Names of callback objects to call during the optimization loop.
 
-    lam : float or iterable of floats > 0, default: 0.6
-        Smoothing strength; must be a positive float, or one positive float
-        per feature.
-
-        Larger values enforce stronger smoothing.
-
-        If only one float is specified, then it is copied for all features.
-
     fit_intercept : bool, default: True
         Specifies if a constant (a.k.a. bias or intercept) should be
         added to the decision function.
@@ -2310,20 +2297,20 @@ class LogisticGAM(GAM):
     International Biometric Society: A Crash Course on P-splines
     http://www.ibschannel2015.nl/project/userfiles/Crash_course_handout.pdf
     """
-    def __init__(self, terms='auto', lam=0.6, max_iter=100, tol=1e-4,
+    def __init__(self, terms='auto', max_iter=100, tol=1e-4,
                  callbacks=['deviance', 'diffs', 'accuracy'],
-                 fit_intercept=True, verbose=False):
+                 fit_intercept=True, verbose=False, **kwargs):
 
         # call super
         super(LogisticGAM, self).__init__(terms=terms,
                                           distribution='binomial',
                                           link='logit',
-                                          lam=lam,
                                           max_iter=max_iter,
                                           tol=tol,
                                           callbacks=callbacks,
                                           fit_intercept=fit_intercept,
-                                          verbose=verbose)
+                                          verbose=verbose,
+                                          **kwargs)
         # ignore any variables
         self._exclude += ['distribution', 'link']
 
@@ -2414,14 +2401,6 @@ class PoissonGAM(GAM):
                 default: ['deviance', 'diffs']
         Names of callback objects to call during the optimization loop.
 
-    lam : float or iterable of floats > 0, default: 0.6
-        Smoothing strength; must be a positive float, or one positive float
-        per feature.
-
-        Larger values enforce stronger smoothing.
-
-        If only one float is specified, then it is copied for all features.
-
     fit_intercept : bool, default: True
         Specifies if a constant (a.k.a. bias or intercept) should be
         added to the decision function.
@@ -2466,20 +2445,20 @@ class PoissonGAM(GAM):
     International Biometric Society: A Crash Course on P-splines
     http://www.ibschannel2015.nl/project/userfiles/Crash_course_handout.pdf
     """
-    def __init__(self, terms='auto', lam=0.6, max_iter=100, tol=1e-4,
+    def __init__(self, terms='auto', max_iter=100, tol=1e-4,
                  callbacks=['deviance', 'diffs'],
-                 fit_intercept=True, verbose=False):
+                 fit_intercept=True, verbose=False, **kwargs):
 
         # call super
         super(PoissonGAM, self).__init__(terms=terms,
                                          distribution='poisson',
                                          link='log',
-                                         lam=lam,
                                          max_iter=max_iter,
                                          tol=tol,
                                          callbacks=callbacks,
                                          fit_intercept=fit_intercept,
-                                         verbose=verbose)
+                                         verbose=verbose,
+                                         **kwargs)
         # ignore any variables
         self._exclude += ['distribution', 'link']
 
@@ -2769,14 +2748,6 @@ class GammaGAM(GAM):
                 default: ['deviance', 'diffs']
         Names of callback objects to call during the optimization loop.
 
-    lam : float or iterable of floats > 0, default: 0.6
-        Smoothing strength; must be a positive float, or one positive float
-        per feature.
-
-        Larger values enforce stronger smoothing.
-
-        If only one float is specified, then it is copied for all features.
-
     fit_intercept : bool, default: True
         Specifies if a constant (a.k.a. bias or intercept) should be
         added to the decision function.
@@ -2825,19 +2796,19 @@ class GammaGAM(GAM):
     International Biometric Society: A Crash Course on P-splines
     http://www.ibschannel2015.nl/project/userfiles/Crash_course_handout.pdf
     """
-    def __init__(self, terms='auto', lam=0.6, max_iter=100, tol=1e-4,
+    def __init__(self, terms='auto', max_iter=100, tol=1e-4,
                  scale=None, callbacks=['deviance', 'diffs'],
-                 fit_intercept=True, verbose=False):
+                 fit_intercept=True, verbose=False, **kwargs):
         self.scale = scale
         super(GammaGAM, self).__init__(terms=terms,
                                         distribution=GammaDist(scale=self.scale),
                                         link='log',
-                                        lam=lam,
                                         max_iter=max_iter,
                                         tol=tol,
                                         callbacks=callbacks,
                                         fit_intercept=fit_intercept,
-                                        verbose=verbose)
+                                        verbose=verbose,
+                                        **kwargs)
 
         self._exclude += ['distribution', 'link']
 
@@ -2890,14 +2861,6 @@ class InvGaussGAM(GAM):
                 default: ['deviance', 'diffs']
         Names of callback objects to call during the optimization loop.
 
-    lam : float or iterable of floats > 0, default: 0.6
-        Smoothing strength; must be a positive float, or one positive float
-        per feature.
-
-        Larger values enforce stronger smoothing.
-
-        If only one float is specified, then it is copied for all features.
-
     fit_intercept : bool, default: True
         Specifies if a constant (a.k.a. bias or intercept) should be
         added to the decision function.
@@ -2946,19 +2909,19 @@ class InvGaussGAM(GAM):
     International Biometric Society: A Crash Course on P-splines
     http://www.ibschannel2015.nl/project/userfiles/Crash_course_handout.pdf
     """
-    def __init__(self, terms='auto', lam=0.6, max_iter=100, tol=1e-4,
+    def __init__(self, terms='auto', max_iter=100, tol=1e-4,
                  scale=None, callbacks=['deviance', 'diffs'],
-                 fit_intercept=True, verbose=False):
+                 fit_intercept=True, verbose=False, **kwargs):
         self.scale = scale
         super(InvGaussGAM, self).__init__(terms=terms,
                                           distribution=InvGaussDist(scale=self.scale),
                                           link='log',
-                                          lam=lam,
                                           max_iter=max_iter,
                                           tol=tol,
                                           callbacks=callbacks,
                                           fit_intercept=fit_intercept,
-                                          verbose=verbose)
+                                          verbose=verbose,
+                                          **kwargs)
 
         self._exclude += ['distribution', 'link']
 
@@ -3004,14 +2967,6 @@ class ExpectileGAM(GAM):
                 default: ['deviance', 'diffs']
         Names of callback objects to call during the optimization loop.
 
-    lam : float or iterable of floats > 0, default: 0.6
-        Smoothing strength; must be a positive float, or one positive float
-        per feature.
-
-        Larger values enforce stronger smoothing.
-
-        If only one float is specified, then it is copied for all features.
-
     fit_intercept : bool, default: True
         Specifies if a constant (a.k.a. bias or intercept) should be
         added to the decision function.
@@ -3057,20 +3012,20 @@ class ExpectileGAM(GAM):
     International Biometric Society: A Crash Course on P-splines
     http://www.ibschannel2015.nl/project/userfiles/Crash_course_handout.pdf
     """
-    def __init__(self, terms='auto', lam=0.6, max_iter=100, tol=1e-4,
+    def __init__(self, terms='auto', max_iter=100, tol=1e-4,
                  scale=None, callbacks=['deviance', 'diffs'],
-                 fit_intercept=True, expectile=0.5, verbose=False):
+                 fit_intercept=True, expectile=0.5, verbose=False, **kwargs):
         self.scale = scale
         self.expectile = expectile
         super(ExpectileGAM, self).__init__(terms=terms,
                                           distribution=NormalDist(scale=self.scale),
                                           link='identity',
-                                          lam=lam,
                                           max_iter=max_iter,
                                           tol=tol,
                                           callbacks=callbacks,
                                           fit_intercept=fit_intercept,
-                                          verbose=verbose)
+                                          verbose=verbose,
+                                          **kwargs)
 
         self._exclude += ['distribution', 'link']