Skip to content

Commit

Permalink
Fix ci in plot_shift + doc improvements in bootstrap (#282)
Browse files Browse the repository at this point in the history
* Fix invalid ci in plot_shift + better doc in bootstrap

* Check doc for 0.5.2
  • Loading branch information
raphaelvallat committed Jun 19, 2022
1 parent 1bc5a86 commit e7479ae
Show file tree
Hide file tree
Showing 7 changed files with 91 additions and 86 deletions.
5 changes: 3 additions & 2 deletions docs/changelog.rst
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,8 @@ What's new

*************

v0.5.2
------
v0.5.2 (June 2022)
------------------

**Bugfixes**

Expand All @@ -18,6 +18,7 @@ a. The eta-squared (``n2``) effect size was not properly calculated in one-way a
.. warning:: Please double check any effect sizes previously obtained with the :py:func:`pingouin.rm_anova` function.

b. Fixed invalid resampling behavior for bivariate functions in :py:func:`pingouin.compute_bootci` when x and y were not paired. `PR 281 <https://github.com/raphaelvallat/pingouin/pull/281>`_.
c. Fixed bug where ``confidence`` (previously ``ci``) was ignored when calculating the bootstrapped confidence intervals in :py:func:`pingouin.plot_shift`. `PR 282 <https://github.com/raphaelvallat/pingouin/pull/282>`_.

**Enhancements**

Expand Down
2 changes: 1 addition & 1 deletion pingouin/bayesian.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@ def bayesfactor_ttest(t, nx, ny=None, paired=False, alternative='two-sided', r=.
See also
--------
ttest : T-test
pairwise_ttest : Pairwise T-tests
pairwise_test : Pairwise T-tests
bayesfactor_pearson : Bayes Factor of a correlation
bayesfactor_binom : Bayes Factor of a binomial test
Expand Down
48 changes: 30 additions & 18 deletions pingouin/effsize.py
Original file line number Diff line number Diff line change
Expand Up @@ -194,11 +194,11 @@ def compute_bootci(x, y=None, func=None, method='cper', paired=False, confidence
* ``'std'``: Standard deviation (univariate)
* ``'var'``: Variance (univariate)
method : str
Method to compute the confidence intervals:
Method to compute the confidence intervals (see Notes):
* ``'cper'``: Bias-corrected percentile method (default)
* ``'norm'``: Normal approximation with bootstrapped bias and standard error
* ``'per'``: Basic percentile method
* ``'cper'``: Bias corrected percentile method (default)
* ``'per'``: Simple percentile
paired : boolean
Indicates whether x and y are paired or not. For example, for correlation functions or
paired T-test, x and y are assumed to be paired. Pingouin will resample the pairs
Expand All @@ -219,7 +219,7 @@ def compute_bootci(x, y=None, func=None, method='cper', paired=False, confidence
Returns
-------
ci : array
Desired converted effect size
Bootstrapped confidence intervals.
Notes
-----
Expand All @@ -232,13 +232,29 @@ def compute_bootci(x, y=None, func=None, method='cper', paired=False, confidence
(BCa) confidence intervals for univariate functions. However, unlike Pingouin, it does not
return the bootstrap distribution.
The percentile bootstrap method (``per``) is defined as the
:math:`100 \\times \\frac{\\alpha}{2}` and :math:`100 \\times \\frac{1 - \\alpha}{2}`
percentiles of the distribution of :math:`\\theta` estimates obtained from resampling, where
:math:`\\alpha` is the level of significance (1 - confidence, default = 0.05 for 95% CIs).
The bias-corrected percentile method (``cper``) corrects for bias of the bootstrap
distribution. This method is different from the BCa method — the default in Matlab and SciPy —
which corrects for both bias and skewness of the bootstrap distribution using jackknife
resampling.
The normal approximation method (``norm``) calculates the confidence intervals with the
standard normal distribution using bootstrapped bias and standard error.
References
----------
* DiCiccio, T. J., & Efron, B. (1996). Bootstrap confidence intervals.
Statistical science, 189-212.
* DiCiccio, T. J., & Efron, B. (1996). Bootstrap confidence intervals. Statistical science,
189-212.
* Davison, A. C., & Hinkley, D. V. (1997). Bootstrap methods and their application (Vol. 1).
Cambridge university press.
* Davison, A. C., & Hinkley, D. V. (1997). Bootstrap methods and their
application (Vol. 1). Cambridge university press.
* Jung, Lee, Gupta, & Cho (2019). Comparison of bootstrap confidence interval methods for
GSCA using a Monte Carlo simulation. Frontiers in psychology, 10, 2215.
Examples
--------
Expand Down Expand Up @@ -420,27 +436,23 @@ def func(x):
bootstat[i] = func(x[boot_x[:, i]])

# CONFIDENCE INTERVALS
# See Matlab bootci function
alpha = (1 - confidence) / 2

if method in ['norm', 'normal']:
# Normal approximation
za = norm.ppf(alpha)
za = norm.ppf(alpha) # = 1.96
se = np.std(bootstat, ddof=1)
bias = np.mean(bootstat - reference)
ci = np.array([reference - bias + se * za, reference - bias - se * za])
elif method in ['percentile', 'per']:
# Uncorrected percentile
# Simple percentile
interval = 100 * np.array([alpha, 1 - alpha])
ci = np.percentile(bootstat, interval)
pass
else:
# Corrected percentile bootstrap
# Compute bias-correction constant z0
z_0 = norm.ppf(np.mean(bootstat < reference) + np.mean(bootstat == reference) / 2)
z_alpha = norm.ppf(alpha)
pct_ul = 100 * norm.cdf(2 * z_0 - z_alpha)
pct_ll = 100 * norm.cdf(2 * z_0 + z_alpha)
ci = np.percentile(bootstat, [pct_ll, pct_ul])
# Bias-corrected percentile bootstrap
from pingouin.regression import _bias_corrected_ci
ci = _bias_corrected_ci(bootstat, reference, alpha=(1 - confidence))

ci = np.round(ci, decimals)

Expand Down
2 changes: 1 addition & 1 deletion pingouin/pairwise.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ def pairwise_tests(data=None, dv=None, between=None, within=None, subject=None,
parametric=True, marginal=True, alpha=.05, alternative='two-sided',
padjust='none', effsize='hedges', correction='auto', nan_policy='listwise',
return_desc=False, interaction=True, within_first=True):
"""Pairwise T-tests.
"""Pairwise tests.
Parameters
----------
Expand Down
68 changes: 28 additions & 40 deletions pingouin/plotting.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,8 @@
# See https://github.com/raphaelvallat/pingouin/issues/85
# sns.set(style='ticks', context='notebook')

__all__ = ["plot_blandaltman", "qqplot", "plot_paired",
"plot_shift", "plot_rm_corr", "plot_circmean"]
__all__ = [
"plot_blandaltman", "qqplot", "plot_paired", "plot_shift", "plot_rm_corr", "plot_circmean"]


def plot_blandaltman(x, y, agreement=1.96, xaxis="mean", confidence=.95,
Expand Down Expand Up @@ -631,35 +631,31 @@ def plot_paired(data=None, dv=None, within=None, subject=None, order=None,
return ax


def plot_shift(x, y, paired=False, n_boot=1000,
percentiles=np.arange(10, 100, 10),
ci=.95, seed=None, show_median=True, violin=True):
def plot_shift(x, y, paired=False, n_boot=1000, percentiles=np.arange(10, 100, 10), confidence=.95,
seed=None, show_median=True, violin=True):
"""Shift plot.
Parameters
----------
x, y : array_like
First and second set of observations.
paired : bool
Specify whether ``x`` and ``y`` are related (i.e. repeated
measures) or independent.
Specify whether ``x`` and ``y`` are related (i.e. repeated measures) or independent.
.. versionadded:: 0.3.0
n_boot : int
Number of bootstrap iterations. The higher, the better, the slower.
percentiles: array_like
Sequence of percentiles to compute, which must be between 0 and 100
inclusive. Default set to [10, 20, 30, 40, 50, 60, 70, 80, 90].
ci: float
Confidence level (0.95 = 95%).
Sequence of percentiles to compute, which must be between 0 and 100 inclusive.
Default set to [10, 20, 30, 40, 50, 60, 70, 80, 90].
confidence : float
Confidence level (0.95 = 95%) for the confidence intervals.
seed : int or None
Random seed for generating bootstrap samples, can be integer or
None for no seed (default).
Random seed for generating bootstrap samples, can be integer or None for no seed (default).
show_median: boolean
If True (default), show the median with black lines.
violin: boolean
If True (default), plot the density of X and Y distributions.
Defaut set to True.
If True (default), plot the density of X and Y distributions. Defaut set to True.
Returns
-------
Expand All @@ -672,9 +668,8 @@ def plot_shift(x, y, paired=False, n_boot=1000,
Notes
-----
The shift plot is described in [1]_.
It computes a shift function [2]_ for two (in)dependent groups using the
robust Harrell-Davis quantile estimator in conjunction with bias-corrected
The shift plot is described in [1]_. It computes a shift function [2]_ for two (in)dependent
groups using the robust Harrell-Davis quantile estimator in conjunction with bias-corrected
bootstrap confidence intervals.
References
Expand Down Expand Up @@ -708,11 +703,10 @@ def plot_shift(x, y, paired=False, n_boot=1000,
>>> np.random.seed(42)
>>> x = np.random.normal(5.5, 2, 30)
>>> y = np.random.normal(6, 1.5, 30)
>>> fig = pg.plot_shift(x, y, paired=True, n_boot=2000,
... percentiles=[25, 50, 75],
>>> fig = pg.plot_shift(x, y, paired=True, n_boot=2000, percentiles=[25, 50, 75],
... show_median=False, seed=456, violin=False)
"""
from pingouin.regression import _bca
from pingouin.regression import _bias_corrected_ci
from pingouin.nonparametric import harrelldavis as hd

# Safety check
Expand All @@ -726,7 +720,7 @@ def plot_shift(x, y, paired=False, n_boot=1000,
assert not np.isnan(y).any(), 'Missing values are not allowed.'
assert nx >= 10, 'x must have at least 10 samples.'
assert ny >= 10, 'y must have at least 10 samples.'
assert 0 < ci < 1, 'ci must be between 0 and 1.'
assert 0 < confidence < 1, 'confidence must be between 0 and 1.'
if paired:
assert nx == ny, 'x and y must have the same size when paired=True.'

Expand All @@ -739,20 +733,18 @@ def plot_shift(x, y, paired=False, n_boot=1000,
rng = np.random.RandomState(seed)
if paired:
bootsam = rng.choice(np.arange(nx), size=(nx, n_boot), replace=True)
bootstat = (hd(y[bootsam], percentiles, axis=0) -
hd(x[bootsam], percentiles, axis=0))
bootstat = hd(y[bootsam], percentiles, axis=0) - hd(x[bootsam], percentiles, axis=0)
else:
x_list = rng.choice(x, size=(nx, n_boot), replace=True)
y_list = rng.choice(y, size=(ny, n_boot), replace=True)
bootstat = (hd(y_list, percentiles, axis=0) -
hd(x_list, percentiles, axis=0))
bootstat = hd(y_list, percentiles, axis=0) - hd(x_list, percentiles, axis=0)

# Find upper and lower confidence interval for each quantiles
# Bias-corrected confidence interval
# Bias-corrected bootstrapped confidence interval
lower, median_per, upper = [], [], []
for i, d in enumerate(delta):
ci = _bca(bootstat[i, :], d, n_boot)
median_per.append(_bca(bootstat[i, :], d, n_boot, alpha=1)[0])
ci = _bias_corrected_ci(bootstat[i, :], d, alpha=(1 - confidence))
median_per.append(_bias_corrected_ci(bootstat[i, :], d, alpha=1)[0])
lower.append(ci[0])
upper.append(ci[1])

Expand All @@ -761,8 +753,7 @@ def plot_shift(x, y, paired=False, n_boot=1000,
upper = np.asarray(upper)

# Create long-format dataFrame for use with Seaborn
data = pd.DataFrame({'value': np.concatenate([x, y]),
'variable': ['X'] * nx + ['Y'] * ny})
data = pd.DataFrame({'value': np.concatenate([x, y]), 'variable': ['X'] * nx + ['Y'] * ny})

#############################
# Plots X and Y distributions
Expand All @@ -783,14 +774,12 @@ def adjacent_values(vals, q1, q3):
qrt1, medians, qrt3 = np.percentile(dis, [25, 50, 75])
whiskers = adjacent_values(np.sort(dis), qrt1, qrt3)
ax1.plot(medians, pos, marker='o', color='white', zorder=10)
ax1.hlines(pos, qrt1, qrt3, color='k',
linestyle='-', lw=7, zorder=9)
ax1.hlines(pos, whiskers[0], whiskers[1],
color='k', linestyle='-', lw=2, zorder=9)
ax1.hlines(pos, qrt1, qrt3, color='k', linestyle='-', lw=7, zorder=9)
ax1.hlines(pos, whiskers[0], whiskers[1], color='k', linestyle='-', lw=2, zorder=9)

ax1 = sns.stripplot(data=data, x='value', y='variable',
orient='h', order=['Y', 'X'],
palette=['#88bedc', '#cfcfcf'])
ax1 = sns.stripplot(
data=data, x='value', y='variable', orient='h', order=['Y', 'X'],
palette=['#88bedc', '#cfcfcf'])

if violin:
vl = plt.violinplot([y, x], showextrema=False, vert=False, widths=1)
Expand Down Expand Up @@ -822,8 +811,7 @@ def adjacent_values(vals, q1, q3):
col = '#c34e52'
else:
col = 'darkgray'
plt.plot([y_per[i], x_per[i]], [0.2, 0.8],
marker='o', color=col, zorder=10)
plt.plot([y_per[i], x_per[i]], [0.2, 0.8], marker='o', color=col, zorder=10)
# X quantiles
plt.plot([x_per[i], x_per[i]], [0.8, 1.2], 'k--', zorder=9)
# Y quantiles
Expand Down
43 changes: 24 additions & 19 deletions pingouin/regression.py
Original file line number Diff line number Diff line change
Expand Up @@ -937,37 +937,43 @@ def _point_estimate(X_val, XM_val, M_val, y_val, idx, n_mediator,
return beta_m * beta_y


def _bca(ab_estimates, sample_point, n_boot, alpha=0.05):
"""Get (1 - alpha) * 100 bias-corrected confidence interval estimate
Note that this is similar to the "cper" module implemented in
:py:func:`pingouin.compute_bootci`.
def _bias_corrected_ci(bootdist, sample_point, alpha=0.05):
"""Bias-corrected confidence intervals
Parameters
----------
ab_estimates : 1d array-like
bootdist : array-like
Array with bootstrap estimates for each sample.
sample_point : float
Indirect effect point estimate based on full sample.
n_boot : int
Number of bootstrap samples
Point estimate based on full sample.
alpha : float
Alpha for confidence interval
Alpha for confidence interval.
Returns
-------
CI : 1d array-like
Lower limit and upper limit bias-corrected confidence interval
estimates.
Lower and upper bias-corrected confidence interval estimates.
Notes
-----
This is what's used in the "cper" method implemented in :py:func:`pingouin.compute_bootci`.
This differs from the bias-corrected and accelerated method (BCa, default in Matlab and
SciPy) because it does not correct for skewness. Indeed, the acceleration parameter, a,
is proportional to the skewness of the bootstrap distribution. The bias-correction parameter,
z0, is related to the proportion of bootstrap estimates that are less than the observed
statistic.
"""
# Bias of bootstrap estimates
z0 = norm.ppf(np.sum(ab_estimates < sample_point) / n_boot)
# In Matlab bootci they also count when bootdist == sample_point
# >>> z0 = norminv(mean(bstat < stat,1) + mean(bstat == stat,1)/2);
z0 = norm.ppf(np.mean(bootdist < sample_point))
# Adjusted intervals
adjusted_ll = norm.cdf(2 * z0 + norm.ppf(alpha / 2)) * 100
adjusted_ul = norm.cdf(2 * z0 + norm.ppf(1 - alpha / 2)) * 100
ll = np.percentile(ab_estimates, q=adjusted_ll)
ul = np.percentile(ab_estimates, q=adjusted_ul)
return np.array([ll, ul])
# Adjusted percentiles
ci = np.percentile(bootdist, q=[adjusted_ll, adjusted_ul])
return ci


def _pval_from_bootci(boot, estimate):
Expand Down Expand Up @@ -1015,8 +1021,7 @@ def mediation_analysis(data=None, x=None, m=None, y=None, covar=None,
seed : int or None
Random state seed.
logreg_kwargs : dict or None
Dictionary with optional arguments passed to
:py:func:`logistic_regression()`
Dictionary with optional arguments passed to :py:func:`pingouin.logistic_regression`
return_dist : bool
If True, the function also returns the indirect bootstrapped beta
samples (size = n_boot). Can be plotted for instance using
Expand Down Expand Up @@ -1248,7 +1253,7 @@ def mediation_analysis(data=None, x=None, m=None, y=None, covar=None,
'pval': [], ll_name: [], ul_name: [], 'sig': []}

for j in range(n_mediator):
ci_j = _bca(ab_estimates[:, j], indirect['coef'][j], alpha=alpha, n_boot=n_boot)
ci_j = _bias_corrected_ci(ab_estimates[:, j], indirect['coef'][j], alpha=alpha)
indirect[ll_name].append(min(ci_j))
indirect[ul_name].append(max(ci_j))
# Bootstrapped p-value of indirect effect
Expand Down
9 changes: 4 additions & 5 deletions pingouin/tests/test_plotting.py
Original file line number Diff line number Diff line change
Expand Up @@ -104,17 +104,16 @@ def test_plot_shift(self):
x = np.random.normal(5.5, 2, 50)
y = np.random.normal(6, 1.5, 50)
plot_shift(x, y)
plot_shift(x, y, n_boot=100, percentiles=[5, 55, 95], ci=0.68,
show_median=False, seed=456, violin=False)
plot_shift(x, y, paired=True, n_boot=100, percentiles=[25, 75])
plot_shift(
x, y, n_boot=100, percentiles=[5, 55, 95], show_median=False, seed=456, violin=False)
plot_shift(x, y, paired=True, n_boot=100, percentiles=[25, 75], confidence=.90)
plt.close('all')

def test_plot_rm_corr(self):
"""Test plot_shift()."""
df = read_dataset('rm_corr')
g = plot_rm_corr(data=df, x='pH', y='PacO2', subject='Subject')
g = plot_rm_corr(data=df, x='pH', y='PacO2', subject='Subject',
legend=False)
g = plot_rm_corr(data=df, x='pH', y='PacO2', subject='Subject', legend=False)
assert isinstance(g, sns.FacetGrid)
plt.close('all')

Expand Down

0 comments on commit e7479ae

Please sign in to comment.