Speed improvements for pg.multicomp (#271)

* Speed improvements in holm + accept Series * Speed improvements in FDR * Handle missing values * Update changelog
raphaelvallat · Jun 18, 2022 · d7481ab · d7481ab
1 parent 4bea8eb
commit d7481ab
Show file tree

Hide file tree

Showing 2 changed files with 6 additions and 4 deletions.
diff --git a/docs/changelog.rst b/docs/changelog.rst
@@ -25,6 +25,7 @@ a. The eta-squared (``n2``) effect size was not properly calculated in one-way a
 a. The :py:func:`pingouin.pairwise_ttests` has been renamed to :py:func:`pingouin.pairwise_tests`. Non-parametric tests are also supported in this function with the `parametric=False` argument, and thus the name "ttests" was misleading (see `issue 209 <https://github.com/raphaelvallat/pingouin/issues/209>`_).
 b. Allow :py:func:`pingouin.bayesfactor_binom` to take Beta alternative model. `PR 252 <https://github.com/raphaelvallat/pingouin/pull/252>`_.
 c. Allow keyword arguments for logistic regression in :py:func:`pingouin.mediation_analysis`. `PR 245 <https://github.com/raphaelvallat/pingouin/pull/245>`_.
+d. Major speed improvements for the Holm and FDR correction in :py:func:`pingouin.bayesfactor_binom`. `PR 271 <https://github.com/raphaelvallat/pingouin/pull/271>`_.
 
 v0.5.1 (February 2022)
 ----------------------

diff --git a/pingouin/multicomp.py b/pingouin/multicomp.py
@@ -1,6 +1,7 @@
 # Author: Raphael Vallat <raphaelvallat9@gmail.com>
 # Date: April 2018
 import numpy as np
+from pandas import Series
 
 __all__ = ["multicomp"]
 
@@ -104,7 +105,7 @@ def fdr(pvals, alpha=0.05, method='fdr_bh'):
         ecdffactor /= cm
 
     # Now we adjust the p-values
-    pvals_corr = np.diag(pvals_sorted / ecdffactor[..., None])
+    pvals_corr = pvals_sorted[:ntests] / ecdffactor
     pvals_corr = np.minimum.accumulate(pvals_corr[::-1])[::-1]
     pvals_corr = np.clip(pvals_corr, None, 1)
 
@@ -267,7 +268,7 @@ def holm(pvals, alpha=.05):
     ntests = pvals.size - num_nan
 
     # Now we adjust the p-values
-    pvals_corr = np.diag(pvals_sorted * np.arange(ntests, 0, -1)[..., None])
+    pvals_corr = pvals_sorted[:ntests] * np.arange(ntests, 0, -1)
     pvals_corr = np.maximum.accumulate(pvals_corr)
     pvals_corr = np.clip(pvals_corr, None, 1)
 
@@ -470,11 +471,11 @@ def multicomp(pvals, alpha=0.05, method='holm'):
     [False  True False False  True] [0.5    0.009     nan 0.108  0.0012]
     """
     # Safety check
-    assert isinstance(pvals, (list, np.ndarray)), "pvals must be list or array"
-    pvals = np.squeeze(np.asarray(pvals))
+    assert isinstance(pvals, (list, np.ndarray, Series)), "pvals must be list or array"
     assert isinstance(alpha, float), 'alpha must be a float.'
     assert isinstance(method, str), 'method must be a string.'
     assert 0 < alpha < 1, 'alpha must be between 0 and 1.'
+    pvals = np.asarray(pvals)
 
     if method.lower() in ['b', 'bonf', 'bonferroni']:
         reject, pvals_corrected = bonf(pvals, alpha=alpha)