diff --git a/hyppo/conditional/FCIT.py b/hyppo/conditional/FCIT.py index 333fa9dc..59cdb7a6 100644 --- a/hyppo/conditional/FCIT.py +++ b/hyppo/conditional/FCIT.py @@ -164,8 +164,8 @@ def test(self, x, y, z=None): >>> model = DecisionTreeRegressor() >>> cv_grid = {"min_samples_split": [2, 8, 64, 512, 1e-2, 0.2, 0.4]} >>> stat, pvalue = FCIT(model=model, cv_grid=cv_grid).test(x1.T, y1.T, z1) - >>> '%.2f, %.3f' % (stat, pvalue) - '-3.59, 0.995' + >>> '%.1f, %.2f' % (stat, pvalue) + '-3.6, 1.00' """ n_samples = x.shape[0] diff --git a/hyppo/conditional/kci.py b/hyppo/conditional/kci.py index 97f87482..08696ed7 100644 --- a/hyppo/conditional/kci.py +++ b/hyppo/conditional/kci.py @@ -98,10 +98,10 @@ def test(self, x, y): >>> from hyppo.conditional import KCI >>> from hyppo.tools.indep_sim import linear >>> np.random.seed(123456789) - >>> x, y = linear(n, 1) + >>> x, y = linear(100, 1) >>> stat, pvalue = KCI().test(x, y) - >>> print("Statistic: ", stat) - >>> print("p-value: ", pvalue) + >>> '%.1f, %.2f' % (stat, pvalue) + '544.7, 0.00' """ T = len(y) diff --git a/hyppo/independence/dcorr.py b/hyppo/independence/dcorr.py index 3959be45..8530aebf 100644 --- a/hyppo/independence/dcorr.py +++ b/hyppo/independence/dcorr.py @@ -404,7 +404,9 @@ def _dcov(distx, disty, bias=False, only_dcov=True): # pragma: no cover if only_dcov: N = distx.shape[0] if bias: - stat = 1 / (N**2) * stat + # only biased Dcov is square-rooted + # https://search.r-project.org/CRAN/refmans/energy/html/dcovu.html + stat = np.sqrt(1 / (N**2) * stat) else: stat = 1 / (N * (N - 3)) * stat @@ -437,6 +439,10 @@ def _dcorr(distx, disty, bias=False, is_fast=False): # pragma: no cover # calculate generalized test statistic else: - stat = np.sqrt(covar / np.real(np.sqrt(varx * vary))) + stat = covar / np.sqrt(np.real(varx * vary)) + # only biased Dcov is square-rooted + # https://search.r-project.org/CRAN/refmans/energy/html/dcovu.html + if bias: + stat = np.sqrt(stat) return stat diff --git a/hyppo/independence/kmerf.py b/hyppo/independence/kmerf.py index 70477c23..1dd36360 100644 --- a/hyppo/independence/kmerf.py +++ b/hyppo/independence/kmerf.py @@ -242,7 +242,7 @@ def test(self, x, y, reps=1000, workers=1, auto=True, random_state=None): stat = self.statistic(x, y) statx = _dcorr(self.distx, self.distx, bias=False, is_fast=False) staty = _dcorr(self.disty, self.disty, bias=False, is_fast=False) - pvalue = chi2.sf(stat ** 2 / np.sqrt(statx ** 2 * staty ** 2) * n + 1, 1) + pvalue = chi2.sf(stat / np.sqrt(statx * staty) * n + 1, 1) self.stat = stat self.pvalue = pvalue self.null_dist = None diff --git a/hyppo/independence/tests/test_kmerf.py b/hyppo/independence/tests/test_kmerf.py index 4fb0c213..1a889173 100644 --- a/hyppo/independence/tests/test_kmerf.py +++ b/hyppo/independence/tests/test_kmerf.py @@ -16,7 +16,7 @@ class TestKMERFStat(object): (linear, 1.0, 9.19834440770024e-24), # test linear simulation ( spiral, - 0.4284332761214592, + 0.1835550720881665, 1.3087565060526607e-05, ), # test spiral simulation ( @@ -34,7 +34,7 @@ def test_oned(self, sim, obs_stat, obs_pvalue): # test stat and pvalue stat1 = KMERF().statistic(x, y) - stat2, pvalue, _ = KMERF().test(x, y, reps=0) + stat2, pvalue, _ = KMERF().test(x, y) assert_approx_equal(stat1, obs_stat, significant=1) assert_approx_equal(stat2, obs_stat, significant=1) assert_approx_equal(pvalue, obs_pvalue, significant=1) diff --git a/hyppo/independence/tests/test_maxmargin.py b/hyppo/independence/tests/test_maxmargin.py index 47a6a5fc..7ee36cea 100644 --- a/hyppo/independence/tests/test_maxmargin.py +++ b/hyppo/independence/tests/test_maxmargin.py @@ -8,7 +8,7 @@ class TestMaxMarginStat: @pytest.mark.parametrize( - "n, obs_stat", [(100, 0.8356763870539561), (200, 0.8382397975358477)] + "n, obs_stat", [(100, 0.6983550238795534), (200, 0.7026459581729388)] ) @pytest.mark.parametrize("obs_pvalue", [1 / 1000]) def test_linear_oned(self, n, obs_stat, obs_pvalue): diff --git a/hyppo/ksample/energy.py b/hyppo/ksample/energy.py index ed110cf9..633179ec 100644 --- a/hyppo/ksample/energy.py +++ b/hyppo/ksample/energy.py @@ -116,7 +116,7 @@ def statistic(self, x, y): # exact equivalence transformation Dcorr and Energy stat = ( - _dcov(distx, disty, self.bias, only_dcov=True) + _dcov(distx, disty, bias=self.bias, only_dcov=True) * ((n + m) ** 4) / (2 * (n**2) * (m**2)) ) diff --git a/hyppo/ksample/ksamp.py b/hyppo/ksample/ksamp.py index 1636153e..33064577 100644 --- a/hyppo/ksample/ksamp.py +++ b/hyppo/ksample/ksamp.py @@ -281,7 +281,7 @@ def test(self, *args, reps=1000, workers=1, auto=True, random_state=None): >>> z = np.arange(10) >>> stat, pvalue = KSample("Dcorr").test(x, y) >>> '%.3f, %.1f' % (stat, pvalue) - '-0.136, 1.0' + '0.000, 1.0' """ inputs = list(args) check_input = _CheckInputs( diff --git a/hyppo/ksample/mmd.py b/hyppo/ksample/mmd.py index ec5fc8d3..005c4b53 100644 --- a/hyppo/ksample/mmd.py +++ b/hyppo/ksample/mmd.py @@ -159,7 +159,7 @@ def test(self, x, y, reps=1000, workers=1, auto=True, random_state=None): >>> y = x >>> stat, pvalue = MMD().test(x, y) >>> '%.3f, %.1f' % (stat, pvalue) - '-0.015, 1.0' + '0.000, 1.0' """ check_input = _CheckInputs( inputs=[x, y], diff --git a/hyppo/ksample/tests/test_energy.py b/hyppo/ksample/tests/test_energy.py index 82b808ae..09ea63ce 100644 --- a/hyppo/ksample/tests/test_energy.py +++ b/hyppo/ksample/tests/test_energy.py @@ -36,7 +36,7 @@ def test_against_dcor(self): y = x**2 stat = Energy(bias=True).statistic(x.reshape(-1, 1), y.reshape(-1, 1)) - assert_almost_equal(stat, 3146.5236, decimal=4) + assert_almost_equal(stat, 158.6574574358228, decimal=4) class TestEnergyTypeIError: diff --git a/hyppo/ksample/tests/test_ksamp.py b/hyppo/ksample/tests/test_ksamp.py index 81c0150e..867e8c00 100644 --- a/hyppo/ksample/tests/test_ksamp.py +++ b/hyppo/ksample/tests/test_ksamp.py @@ -9,7 +9,7 @@ class TestKSample: @pytest.mark.parametrize( "n, obs_stat, obs_pvalue, indep_test", - [(1000, 4.28e-7, 1.0, "CCA"), (100, 0.2136515250373329, 0.001, "Dcorr")], + [(1000, 4.28e-7, 1.0, "CCA"), (100, 0.045646974150778084, 0.001, "Dcorr")], ) def test_twosamp_linear_oned(self, n, obs_stat, obs_pvalue, indep_test): np.random.seed(123456789) @@ -24,7 +24,7 @@ def test_rf(self): x, y = rot_ksamp("linear", 50, 1, k=2) stat, _, _ = KSample("KMERF").test(x, y, reps=0) - assert_almost_equal(stat, 0.5209659346243869, decimal=1) + assert_almost_equal(stat, 0.27140550503906097, decimal=1) def test_maxmargin(self): np.random.seed(123456789) diff --git a/hyppo/time_series/dcorrx.py b/hyppo/time_series/dcorrx.py index 61e5cd26..6f47186e 100644 --- a/hyppo/time_series/dcorrx.py +++ b/hyppo/time_series/dcorrx.py @@ -151,7 +151,7 @@ def test(self, x, y, reps=1000, workers=1, random_state=None): >>> y = x >>> stat, pvalue, dcorrx_dict = DcorrX().test(x, y, reps = 100) >>> '%.1f, %.2f, %d' % (stat, pvalue, dcorrx_dict['opt_lag']) - '1.0, 0.04, 0' + '1.0, 0.05, 0' """ check_input = _CheckInputs( x, diff --git a/hyppo/time_series/mgcx.py b/hyppo/time_series/mgcx.py index f0691fa4..18f3b8f7 100644 --- a/hyppo/time_series/mgcx.py +++ b/hyppo/time_series/mgcx.py @@ -171,7 +171,7 @@ def test(self, x, y, reps=1000, workers=1, random_state=None): >>> stat, pvalue, mgcx_dict = MGCX().test(x, y, reps = 100) >>> '%.1f, %.2f, [%d, %d]' % (stat, pvalue, mgcx_dict['opt_scale'][0], ... mgcx_dict['opt_scale'][1]) - '1.0, 0.05, [7, 7]' + '1.0, 0.06, [7, 7]' The increasing the max_lag can increase the ability to identify dependence. @@ -182,7 +182,7 @@ def test(self, x, y, reps=1000, workers=1, random_state=None): >>> y = np.roll(x, -1) >>> stat, pvalue, mgcx_dict = MGCX(max_lag=1).test(x, y, reps=1000) >>> '%.1f, %.2f, %d' % (stat, pvalue, mgcx_dict['opt_lag']) - '1.1, 0.01, 1' + '1.1, 0.00, 1' """ check_input = _CheckInputs( x, diff --git a/hyppo/tools/common.py b/hyppo/tools/common.py index c3265cd5..8e47ed59 100644 --- a/hyppo/tools/common.py +++ b/hyppo/tools/common.py @@ -678,6 +678,6 @@ def chi2_approx(calc_stat, x, y): """ n = x.shape[0] stat = calc_stat(x, y) - pvalue = chi2.sf((stat ** 2) * n + 1, 1) + pvalue = chi2.sf(stat * n + 1, 1) return stat, pvalue