From 85807f0ef83f1b53d9dd0e771200896f40d52447 Mon Sep 17 00:00:00 2001
From: "Steven Paul Sanderson II, MPH" <spsanderson@users.noreply.github.com>
Date: Mon, 23 May 2022 13:25:32 -0400
Subject: [PATCH] Fixes #189

---
 DESCRIPTION                  |  2 +-
 NAMESPACE                    |  1 +
 NEWS.md                      |  1 +
 R/empirical-tidy-bootstrap.R | 95 ++++++++++++++++++++++++++++++++++++
 _pkgdown.yml                 |  4 ++
 man/tidy_bootstrap.Rd        | 50 +++++++++++++++++++
 man/tidyeval.Rd              | 30 ++++++++----
 7 files changed, 172 insertions(+), 11 deletions(-)
 create mode 100644 R/empirical-tidy-bootstrap.R
 create mode 100644 man/tidy_bootstrap.Rd

diff --git a/DESCRIPTION b/DESCRIPTION
index 56696df5..adde8a4d 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -13,7 +13,7 @@ Description:
 License: MIT + file LICENSE
 Encoding: UTF-8
 Roxygen: list(markdown = TRUE)
-RoxygenNote: 7.1.2
+RoxygenNote: 7.2.0
 URL: https://github.com/spsanderson/TidyDensity
 BugReports: https://github.com/spsanderson/TidyDensity/issues
 Imports: 
diff --git a/NAMESPACE b/NAMESPACE
index d16bc68c..7618b47c 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -15,6 +15,7 @@ export(td_scale_fill_colorblind)
 export(tidy_autoplot)
 export(tidy_beta)
 export(tidy_binomial)
+export(tidy_bootstrap)
 export(tidy_burr)
 export(tidy_cauchy)
 export(tidy_chisquare)
diff --git a/NEWS.md b/NEWS.md
index 558d761e..2cea6e11 100644
--- a/NEWS.md
+++ b/NEWS.md
@@ -7,6 +7,7 @@ None
 1. Fix #181 - Add functions `color_blind()` `td_scale_fill_colorblind()` and 
 `td_scale_color_colorblind()`
 2. Fix #187 - Add functions `ci_lo()` and `ci_hi()`
+3. Fix #189 - Add function `tidy_bootstrap()`
 
 ## Minor Fixes and Improvements
 1. Fix #176 - Update `_autoplot` functions to include cumulative mean MCMC chart
diff --git a/R/empirical-tidy-bootstrap.R b/R/empirical-tidy-bootstrap.R
new file mode 100644
index 00000000..ba82959a
--- /dev/null
+++ b/R/empirical-tidy-bootstrap.R
@@ -0,0 +1,95 @@
+#' Bootstrap Empirical Data
+#'
+#' @family Bootstrap
+#'
+#' @author Steven P. Sanderson II, MPH
+#'
+#' @details This function will take in a numeric input vector and produce a tibble
+#' of bootstrapped values in a list. The table that is output will have two columns:
+#' `sim_number` and `bootstrap_samples`
+#'
+#' The `sim_number` corresponds to how many times you want the data to be resampled,
+#' and the `bootstrap_samples` column contains a list of the boostrapped resampled
+#' data.
+#'
+#' @description Takes an input vector of numeric data and produces a bootstrapped
+#' nested tibble by simulation number.
+#'
+#' @param .x The vector of data being passed to the function. Must be a numeric
+#' vector.
+#' @param .num_sims The default is 2000, can be set to anything desired. A warning
+#' will pass to the console if the value is less than 2000.
+#' @param .proportion How much of the original data do you want to pass through
+#' to the sampling function. The default is 0.80 (80%)
+#' @param .distribution_type This can either be 'continuous' or 'discrete'
+#'
+#' @examples
+#' x <- mtcars$mpg
+#' tidy_bootstrap(x)
+#'
+#' @return
+#' A nested tibble
+#'
+#' @export
+#'
+
+tidy_bootstrap <- function(.x, .num_sims = 2000, .proportion = 0.8,
+                           .distribution_type = "continuous"){
+
+    # Tidyeval ----
+    x_term <- as.numeric(.x)
+    n <- length(x_term)
+    dist_type <- tolower(as.character(.distribution_type))
+    num_sims <- as.integer(.num_sims)
+    size <- as.numeric(.proportion)
+
+    # Checks ----
+    if (!is.vector(x_term)) {
+        rlang::abort(
+            message = "You must pass a vector as the .x argument to this function.",
+            use_cli_format = TRUE
+        )
+    }
+
+    if (size < 0 | size > 1){
+        rlang::abort(
+            message = "The '.proportion' parameter must be between 0 and 1 inclusive.",
+            use_cli_format = TRUE
+        )
+    }
+
+    if (!dist_type %in% c("continuous","discrete")){
+        rlang::abort(
+            message = "You must choose either 'continuous' or 'discrete'.",
+            use_cli_format = TRUE
+        )
+    }
+
+    if (num_sims < 2000){
+        rlang::warn(
+            message = "Setting '.num_sims' to less than 2000 means that results can be
+      potentially unstable. Consider setting to 2000 or more.",
+            use_cli_format = TRUE
+        )
+    }
+
+    # Data ----
+    df <- dplyr::tibble(sim_number = as.factor(1:num_sims)) %>%
+        dplyr::group_by(sim_number) %>%
+        dplyr::mutate(bootstrap_samples = list(
+            sample(x = x_term, size = floor(size * n) ,replace = TRUE)
+          )
+        ) %>%
+        dplyr::ungroup()
+
+    # Attach descriptive attributes to tibble
+    attr(df, "distribution_family_type") <- dist_type
+    attr(df, ".x") <- .x
+    attr(df, ".num_sims") <- .num_sims
+    attr(df, "tibble_type") <- "tidy_bootstrap"
+    attr(df, "dist_with_params") <- "Empirical"
+
+    # Return ----
+    return(df)
+
+}
diff --git a/_pkgdown.yml b/_pkgdown.yml
index 03df8aed..c939521c 100644
--- a/_pkgdown.yml
+++ b/_pkgdown.yml
@@ -37,6 +37,10 @@ reference:
      desc: Function for creating mixture model data
      contents:
      -  has_concept("Mixture Data")
+  -  title: Bootstrap
+     desc: Functions for bootstrapping data
+     contents:
+     -  has_concept("Bootstrap")
   -  title: Parameter Estimation Functions
      desc: Functions that help to estimate parameters from raw data.
      contents:
diff --git a/man/tidy_bootstrap.Rd b/man/tidy_bootstrap.Rd
new file mode 100644
index 00000000..2fbe95e4
--- /dev/null
+++ b/man/tidy_bootstrap.Rd
@@ -0,0 +1,50 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/empirical-tidy-bootstrap.R
+\name{tidy_bootstrap}
+\alias{tidy_bootstrap}
+\title{Bootstrap Empirical Data}
+\usage{
+tidy_bootstrap(
+  .x,
+  .num_sims = 2000,
+  .proportion = 0.8,
+  .distribution_type = "continuous"
+)
+}
+\arguments{
+\item{.x}{The vector of data being passed to the function. Must be a numeric
+vector.}
+
+\item{.num_sims}{The default is 2000, can be set to anything desired. A warning
+will pass to the console if the value is less than 2000.}
+
+\item{.proportion}{How much of the original data do you want to pass through
+to the sampling function. The default is 0.80 (80\%)}
+
+\item{.distribution_type}{This can either be 'continuous' or 'discrete'}
+}
+\value{
+A nested tibble
+}
+\description{
+Takes an input vector of numeric data and produces a bootstrapped
+nested tibble by simulation number.
+}
+\details{
+This function will take in a numeric input vector and produce a tibble
+of bootstrapped values in a list. The table that is output will have two columns:
+\code{sim_number} and \code{bootstrap_samples}
+
+The \code{sim_number} corresponds to how many times you want the data to be resampled,
+and the \code{bootstrap_samples} column contains a list of the boostrapped resampled
+data.
+}
+\examples{
+x <- mtcars$mpg
+tidy_bootstrap(x)
+
+}
+\author{
+Steven P. Sanderson II, MPH
+}
+\concept{Bootstrap}
diff --git a/man/tidyeval.Rd b/man/tidyeval.Rd
index 1fde2d29..f773abf2 100644
--- a/man/tidyeval.Rd
+++ b/man/tidyeval.Rd
@@ -24,18 +24,22 @@ operators which you should not have to use in simple cases.
 The curly-curly operator \verb{\{\{} allows you to tunnel data-variables
 passed from function arguments inside other tidy eval functions.
 \verb{\{\{} is designed for individual arguments. To pass multiple
-arguments contained in dots, use \code{...} in the normal way.\preformatted{my_function <- function(data, var, ...) \{
+arguments contained in dots, use \code{...} in the normal way.
+
+\if{html}{\out{<div class="sourceCode">}}\preformatted{my_function <- function(data, var, ...) \{
   data \%>\%
     group_by(...) \%>\%
     summarise(mean = mean(\{\{ var \}\}))
 \}
-}
+}\if{html}{\out{</div>}}
 \item \code{\link[=enquo]{enquo()}} and \code{\link[=enquos]{enquos()}} delay the execution of one or several
 function arguments. The former returns a single expression, the
 latter returns a list of expressions. Once defused, expressions
 will no longer evaluate on their own. They must be injected back
 into an evaluation context with \verb{!!} (for a single expression) and
-\verb{!!!} (for a list of expressions).\preformatted{my_function <- function(data, var, ...) \{
+\verb{!!!} (for a list of expressions).
+
+\if{html}{\out{<div class="sourceCode">}}\preformatted{my_function <- function(data, var, ...) \{
   # Defuse
   var <- enquo(var)
   dots <- enquos(...)
@@ -45,7 +49,7 @@ into an evaluation context with \verb{!!} (for a single expression) and
     group_by(!!!dots) \%>\%
     summarise(mean = mean(!!var))
 \}
-}
+}\if{html}{\out{</div>}}
 
 In this simple case, the code is equivalent to the usage of \verb{\{\{}
 and \code{...} above. Defusing with \code{enquo()} or \code{enquos()} is only
@@ -53,30 +57,36 @@ needed in more complex cases, for instance if you need to inspect
 or modify the expressions in some way.
 \item The \code{.data} pronoun is an object that represents the current
 slice of data. If you have a variable name in a string, use the
-\code{.data} pronoun to subset that variable with \code{[[}.\preformatted{my_var <- "disp"
+\code{.data} pronoun to subset that variable with \code{[[}.
+
+\if{html}{\out{<div class="sourceCode">}}\preformatted{my_var <- "disp"
 mtcars \%>\% summarise(mean = mean(.data[[my_var]]))
-}
+}\if{html}{\out{</div>}}
 \item Another tidy eval operator is \verb{:=}. It makes it possible to use
 glue and curly-curly syntax on the LHS of \code{=}. For technical
 reasons, the R language doesn't support complex expressions on
-the left of \code{=}, so we use \verb{:=} as a workaround.\preformatted{my_function <- function(data, var, suffix = "foo") \{
+the left of \code{=}, so we use \verb{:=} as a workaround.
+
+\if{html}{\out{<div class="sourceCode">}}\preformatted{my_function <- function(data, var, suffix = "foo") \{
   # Use `\{\{` to tunnel function arguments and the usual glue
   # operator `\{` to interpolate plain strings.
   data \%>\%
     summarise("\{\{ var \}\}_mean_\{suffix\}" := mean(\{\{ var \}\}))
 \}
-}
+}\if{html}{\out{</div>}}
 \item Many tidy eval functions like \code{dplyr::mutate()} or
 \code{dplyr::summarise()} give an automatic name to unnamed inputs. If
 you need to create the same sort of automatic names by yourself,
 use \code{as_label()}. For instance, the glue-tunnelling syntax above
-can be reproduced manually with:\preformatted{my_function <- function(data, var, suffix = "foo") \{
+can be reproduced manually with:
+
+\if{html}{\out{<div class="sourceCode">}}\preformatted{my_function <- function(data, var, suffix = "foo") \{
   var <- enquo(var)
   prefix <- as_label(var)
   data \%>\%
     summarise("\{prefix\}_mean_\{suffix\}" := mean(!!var))
 \}
-}
+}\if{html}{\out{</div>}}
 
 Expressions defused with \code{enquo()} (or tunnelled with \verb{\{\{}) need
 not be simple column names, they can be arbitrarily complex.