Merge pull request #202 from dswah/no-auto

No auto
dswah · Sep 17, 2018 · 10615a9 · 10615a9
2 parents 58df6e9 + 422fdea
commit 10615a9
Show file tree

Hide file tree

Showing 5 changed files with 86 additions and 83 deletions.
diff --git a/pygam/__init__.py b/pygam/__init__.py
@@ -21,4 +21,4 @@
 __all__ = ['GAM', 'LinearGAM', 'LogisticGAM', 'GammaGAM', 'PoissonGAM',
            'InvGaussGAM', 'ExpectileGAM', 'l', 's', 'f', 'te', 'intercept']
 
-__version__ = '0.6.2'
+__version__ = '0.6.3'
diff --git a/pygam/pygam.py b/pygam/pygam.py
@@ -1089,7 +1089,7 @@ def _estimate_r2(self, X=None, y=None, mu=None, weights=None):
         None
         """
         if mu is None:
-            mu = self.predict_mu_(X=X)
+            mu = self.predict_mu(X=X)
 
         if weights is None:
             weights = np.ones_like(y).astype('float64')

diff --git a/pygam/terms.py b/pygam/terms.py
@@ -15,17 +15,6 @@
 from pygam.utils import isiterable, check_param, flatten, gen_edge_knots, b_spline_basis, tensor_product
 from pygam.penalties import PENALTIES, CONSTRAINTS
 
-DEFAULTS = {'lam': 0.6,
-            'dtype': 'numerical',
-            'fit_linear': False,
-            'fit_splines': True,
-            'penalties': 'auto',
-            'constraints': None,
-            'basis': 'ps',
-            'by': None,
-            'spline_order': 3,
-            'n_splines': 20
-            }
 
 class Term(Core):
     __metaclass__ = ABCMeta
@@ -151,9 +140,8 @@ def _validate_arguments(self):
         None
         """
         # dtype
-        if self.dtype not in ['auto', 'numerical', 'categorical']:
-            raise ValueError("dtype must be in ['auto', 'numerical', "\
-                             "'categorical'], "\
+        if self.dtype not in ['numerical', 'categorical']:
+            raise ValueError("dtype must be in ['numerical','categorical'], "\
                              "but found dtype = {}".format(self.dtype))
 
         # fit_linear XOR fit_splines

diff --git a/pygam/tests/test_GAM_methods.py b/pygam/tests/test_GAM_methods.py
@@ -393,73 +393,6 @@ def test_pvalue_rejects_useless_feature(wage_X_y):
     print(p_values)
     assert(p_values[-2] > .5) # because -1 is intercept
 
-def test_pvalue_invariant_to_scale(wage_X_y):
-    """
-    regression test.
-
-    a bug made the F-statistic sensitive to scale changes, when it should be invariant.
-
-    check that a p-value should not change when we change the scale of the response
-    """
-    X, y = wage_X_y
-
-    gamA = LinearGAM(s(0) + s(1) + f(2)).fit(X, y * 1000000)
-    gamB = LinearGAM(s(0) + s(1) + f(2)).fit(X, y)
-
-    assert np.allclose(gamA.statistics_['p_values'], gamB.statistics_['p_values'])
-
-def test_2d_y_still_allow_fitting_in_PoissonGAM(coal_X_y):
-    """
-    regression test.
-
-    there was a bug where we forgot to check the y_array before converting
-    exposure to weights.
-    """
-    X, y = coal_X_y
-    two_d_data = np.ones_like(y).ravel()[:, None]
-
-    # 2d y should cause no problems now
-    gam = PoissonGAM().fit(X, y[:, None])
-    assert gam._is_fitted
-
-    # 2d weghts should cause no problems now
-    gam = PoissonGAM().fit(X, y, weights=two_d_data)
-    assert gam._is_fitted
-
-    # 2d exposure should cause no problems now
-    gam = PoissonGAM().fit(X, y, exposure=two_d_data)
-    assert gam._is_fitted
-
-def test_non_int_exposure_produced_no_inf_in_PoissonGAM_ll(coal_X_y):
-    """
-    regression test.
-
-    there was a bug where we forgot to round the rescaled counts before
-    computing the loglikelihood. since Poisson requires integer observations,
-    small numerical errors caused the pmf to return -inf, which shows up
-    in the loglikelihood computations, AIC, AICc..
-    """
-    X, y = coal_X_y
-
-    rate = 1.2 + np.cos(np.linspace(0, 2. * np.pi, len(y)))
-
-    gam = PoissonGAM().fit(X, y, exposure=rate)
-
-    assert np.isfinite(gam.statistics_['loglikelihood'])
-
-def test_initial_estimate_runs_for_int_obseravtions(toy_classification_X_y):
-    """
-    regression test
-
-    ._initial_estimate would fail when trying to add small numbers to
-    integer observations
-
-    casting the observations to float in that method fixes that
-    """
-    X, y = toy_classification_X_y
-    gam = LogisticGAM().fit(X, y)
-    assert gam._is_fitted
-
 def test_fit_quantile_is_close_enough(head_circumference_X_y):
     """see that we get close to the desired quantile
 
@@ -522,3 +455,80 @@ def test_fit_quantile_raises_ValueError(head_circumference_X_y):
 
     with pytest.raises(ValueError):
         ExpectileGAM().fit_quantile(X, y, max_iter=-1, quantile=0.5)
+
+class TestRegressions(object):
+    def test_pvalue_invariant_to_scale(self, wage_X_y):
+        """
+        regression test.
+
+        a bug made the F-statistic sensitive to scale changes, when it should be invariant.
+
+        check that a p-value should not change when we change the scale of the response
+        """
+        X, y = wage_X_y
+
+        gamA = LinearGAM(s(0) + s(1) + f(2)).fit(X, y * 1000000)
+        gamB = LinearGAM(s(0) + s(1) + f(2)).fit(X, y)
+
+        assert np.allclose(gamA.statistics_['p_values'], gamB.statistics_['p_values'])
+
+    def test_2d_y_still_allow_fitting_in_PoissonGAM(self, coal_X_y):
+        """
+        regression test.
+
+        there was a bug where we forgot to check the y_array before converting
+        exposure to weights.
+        """
+        X, y = coal_X_y
+        two_d_data = np.ones_like(y).ravel()[:, None]
+
+        # 2d y should cause no problems now
+        gam = PoissonGAM().fit(X, y[:, None])
+        assert gam._is_fitted
+
+        # 2d weghts should cause no problems now
+        gam = PoissonGAM().fit(X, y, weights=two_d_data)
+        assert gam._is_fitted
+
+        # 2d exposure should cause no problems now
+        gam = PoissonGAM().fit(X, y, exposure=two_d_data)
+        assert gam._is_fitted
+
+    def test_non_int_exposure_produced_no_inf_in_PoissonGAM_ll(self, coal_X_y):
+        """
+        regression test.
+
+        there was a bug where we forgot to round the rescaled counts before
+        computing the loglikelihood. since Poisson requires integer observations,
+        small numerical errors caused the pmf to return -inf, which shows up
+        in the loglikelihood computations, AIC, AICc..
+        """
+        X, y = coal_X_y
+
+        rate = 1.2 + np.cos(np.linspace(0, 2. * np.pi, len(y)))
+
+        gam = PoissonGAM().fit(X, y, exposure=rate)
+
+        assert np.isfinite(gam.statistics_['loglikelihood'])
+
+    def test_initial_estimate_runs_for_int_obseravtions(self, toy_classification_X_y):
+        """
+        regression test
+
+        ._initial_estimate would fail when trying to add small numbers to
+        integer observations
+
+        casting the observations to float in that method fixes that
+        """
+        X, y = toy_classification_X_y
+        gam = LogisticGAM().fit(X, y)
+        assert gam._is_fitted
+
+    def test_r_squared_for_new_dataset(self, mcycle_gam, mcycle_X_y):
+        """
+        regression test
+
+        estimate r squared used to refer to a non-existant method when `mu=None`
+        """
+        X, y = mcycle_X_y
+        mcycle_gam._estimate_r2(X, y)
diff --git a/pygam/tests/test_terms.py b/pygam/tests/test_terms.py
@@ -190,3 +190,8 @@ def test_correct_smoothing_in_tensors(toy_interaction_X_y):
     #  smoothing the sinusoid function heavily reduces fit quality
     gam = LinearGAM(te(0, 1, lam=[10000, 0.6])).fit(X, y)
     assert gam.statistics_['pseudo_r2']['explained_deviance'] < 0.1
+
+class TestRegressions(object):
+    def test_no_auto_dtype(self):
+        with pytest.raises(ValueError):
+            SplineTerm(feature=0, dtype='auto')