diff --git a/DESCRIPTION b/DESCRIPTION
index e065fef..a62e0bd 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -1,7 +1,7 @@
 Package: vip
 Type: Package
 Title: Variable Importance Plots
-Version: 0.4.0
+Version: 0.4.1
 Authors@R: c(
   person(c("Brandon", "M."), family = "Greenwell", 
          email = "greenwell.brandon@gmail.com",
diff --git a/NEWS.md b/NEWS.md
index 7bcfa94..aad5e95 100644
--- a/NEWS.md
+++ b/NEWS.md
@@ -1,14 +1,23 @@
+# vip 0.4.1
+
+## Changed
+
+* Minor tweaks to URLs and tests to pass CRAN checks.
+
+
 # vip 0.4.0
 
 ## Changed
 
 * This NEWS file now follows the [Keep a Changelog](https://keepachangelog.com/en/1.0.0/) format.
 
-* The training data has to be explicitly passed in more cases.
+* Removed lifecycle badge from `README` file.
+
+* The training data has to be explicitly passed in more cases when using `vi_permute()`, `vi_shap()`, and `vi_firm()`.
 
 * Raised R version dependency to >= 4.1.0 (the introduction of the native piper operator `|>`).
 
-* The `vi_permute` function now uses [yardstick](https://cran.r-project.org/package=yardstick); consequently, metric functions now conform to [yardstick](https://cran.r-project.org/package=yardstick) metric argument names.
+* The `vi_permute` function now relies on the [yardstick](https://cran.r-project.org/package=yardstick) package for compouting performance measures (e.g., RMSE and log loss); consequently, user-supplied metric functions now nned to conform to [yardstick](https://cran.r-project.org/package=yardstick) metric argument names.
 
 * The `var_fun` argument in `vi_firm()` has been deprecated; use the new `var_continuous` and `var_categorical` instead.
 
diff --git a/R/vi_shap.R b/R/vi_shap.R
index e4e0096..d052165 100644
--- a/R/vi_shap.R
+++ b/R/vi_shap.R
@@ -30,7 +30,7 @@
 #'
 #' @details This approach to computing VI scores is based on the mean absolute
 #' value of the SHAP values for each feature; see, for example,
-#' <https://github.com/slundberg/shap> and the references therein.
+#' <https://github.com/shap/shap> and the references therein.
 #'
 #' Strumbelj, E., and Kononenko, I. Explaining prediction models and individual
 #' predictions with feature contributions. Knowledge and information systems
diff --git a/R/vip.R b/R/vip.R
index cfc835e..be30c27 100644
--- a/R/vip.R
+++ b/R/vip.R
@@ -27,8 +27,9 @@
 #'  only for the permutation-based importance method with \code{nsim > 1} and
 #'  `keep = TRUE`; see [vi_permute][vip::vi_permute] for details.
 #'
-#' @param mapping Set of aesthetic mappings created by [aes][ggplot2::aes]
-#' or [aes_][ggplot2::aes_]. See example usage below.
+#' @param mapping Set of aesthetic mappings created by
+#' [aes][ggplot2::aes]-related functions and/or tidy eval helpers. See example
+#' usage below.
 #'
 #' @param aesthetics List specifying additional arguments passed on to
 #' [layer][ggplot2::layer]. These are often aesthetics, used to set an aesthetic
@@ -79,9 +80,10 @@
 #' vip(vis, geom = "point", horiz = FALSE, aesthetics = list(size = 3))
 #'
 #' # Plot unaggregated permutation scores (boxplot colored by feature)
-#' library(ggplot2)  # for `aes_string()` function
+#' library(ggplot2)  # for `aes()`-related functions and tidy eval helpers
 #' vip(vis, geom = "boxplot", all_permutations = TRUE, jitter = TRUE,
-#'     mapping = aes_string(fill = "Variable"),
+#'     #mapping = aes_string(fill = "Variable"),   # for ggplot2 (< 3.0.0)
+#'     mapping = aes(fill = .data[["Variable"]]),  # for ggplot2 (>= 3.0.0)
 #'     aesthetics = list(color = "grey35", size = 0.8))
 #'
 #' #
@@ -161,7 +163,7 @@ vip.default <- function(
   }
   imp <- sort_importance_scores(imp, decreasing = TRUE)  # make sure these are sorted first!
   imp <- imp[seq_len(num_features), ]  # only retain num_features variable importance scores
-  x.string <- "reorder(Variable, Importance)"
+  # x.string <- "reorder(Variable, Importance)"
 
   # Clean up raw scores for permutation-based VI scores
   if (!is.null(attr(imp, which = "raw_scores"))) {
@@ -178,7 +180,11 @@ vip.default <- function(
   }
 
   # Initialize plot
-  p <- ggplot2::ggplot(imp, ggplot2::aes_string(x = x.string, y = "Importance"))
+  # p <- ggplot2::ggplot(imp, ggplot2::aes_string(x = x.string, y = "Importance"))
+  p <- ggplot2::ggplot(imp, ggplot2::aes(
+    x = reorder(Variable, Importance),
+    y = Importance
+  ))
 
   # Construct a barplot
   if (geom == "col") {
diff --git a/README.Rmd b/README.Rmd
index 6b15a32..99a9033 100644
--- a/README.Rmd
+++ b/README.Rmd
@@ -17,7 +17,6 @@ knitr::opts_chunk$set(
 [![CRAN_Status_Badge](https://www.r-pkg.org/badges/version/vip)](https://cran.r-project.org/package=vip)
 [![R-CMD-check](https://github.com/koalaverse/vip/actions/workflows/R-CMD-check.yaml/badge.svg)](https://github.com/koalaverse/vip/actions/workflows/R-CMD-check.yaml)
 [![Coverage Status](https://codecov.io/gh/koalaverse/vip/graph/badge.svg)](https://app.codecov.io/github/koalaverse/vip?branch=master)
-[![Lifecycle: stable](https://img.shields.io/badge/lifecycle-stable-brightgreen.svg)](https://lifecycle.r-lib.org/articles/stages.html#stable)
 [![Downloads](https://cranlogs.r-pkg.org/badges/vip)](https://cran.r-project.org/package=vip/)
 [![The R Journal](https://img.shields.io/badge/The%20R%20Journal-10.32614%2FRJ--2020--013-brightgreen)](https://doi.org/10.32614/RJ-2020-013)
 <!-- badges: end -->
diff --git a/README.md b/README.md
index ca85523..82da3da 100644
--- a/README.md
+++ b/README.md
@@ -7,8 +7,6 @@
 [![R-CMD-check](https://github.com/koalaverse/vip/actions/workflows/R-CMD-check.yaml/badge.svg)](https://github.com/koalaverse/vip/actions/workflows/R-CMD-check.yaml)
 [![Coverage
 Status](https://codecov.io/gh/koalaverse/vip/graph/badge.svg)](https://app.codecov.io/github/koalaverse/vip?branch=master)
-[![Lifecycle:
-stable](https://img.shields.io/badge/lifecycle-stable-brightgreen.svg)](https://lifecycle.r-lib.org/articles/stages.html#stable)
 [![Downloads](https://cranlogs.r-pkg.org/badges/vip)](https://cran.r-project.org/package=vip/)
 [![The R
 Journal](https://img.shields.io/badge/The%20R%20Journal-10.32614%2FRJ--2020--013-brightgreen)](https://doi.org/10.32614/RJ-2020-013)
diff --git a/inst/tinytest/test_pkg_partykit.R b/inst/tinytest/test_pkg_partykit.R
index 95b6acc..a306a57 100644
--- a/inst/tinytest/test_pkg_partykit.R
+++ b/inst/tinytest/test_pkg_partykit.R
@@ -36,18 +36,6 @@ expect_identical(
   target = unname(vis4)
 )
 
-# Expectations for `get_training_data()`
-expect_equal(
-  current = vip:::get_training_data.default(fit1),
-  target = friedman1,
-  check.attributes = FALSE
-)
-expect_equal(
-  current = vip:::get_training_data.default(fit2),
-  target = friedman1,
-  check.attributes = FALSE
-)
-
 # Expectations for `get_feature_names()`
 expect_identical(
   current = vip:::get_feature_names.constparty(fit1),
diff --git a/inst/tinytest/test_pkg_pls.R b/inst/tinytest/test_pkg_pls.R
index 36ed6ef..2c98b32 100644
--- a/inst/tinytest/test_pkg_pls.R
+++ b/inst/tinytest/test_pkg_pls.R
@@ -27,12 +27,6 @@ expect_identical(
   target = caret::varImp(fit)$Overall
 )
 
-# Expectations for `get_training_data()`
-expect_identical(
-  current = vip:::get_training_data.default(fit),
-  target = friedman1
-)
-
 # Expectations for `get_feature_names()`
 expect_identical(
   current = vip:::get_feature_names.mvr(fit),
diff --git a/man/vi_shap.Rd b/man/vi_shap.Rd
index e11c242..aa8f6f7 100644
--- a/man/vi_shap.Rd
+++ b/man/vi_shap.Rd
@@ -44,7 +44,7 @@ below.
 \details{
 This approach to computing VI scores is based on the mean absolute
 value of the SHAP values for each feature; see, for example,
-\url{https://github.com/slundberg/shap} and the references therein.
+\url{https://github.com/shap/shap} and the references therein.
 
 Strumbelj, E., and Kononenko, I. Explaining prediction models and individual
 predictions with feature contributions. Knowledge and information systems
diff --git a/vignettes/vip.Rmd b/vignettes/vip.Rmd
index 8bf9dc0..784998e 100644
--- a/vignettes/vip.Rmd
+++ b/vignettes/vip.Rmd
@@ -139,7 +139,7 @@ vi_bst <- xgb.importance(model = bst)
 
 <img src="../man/figures/vi-plots-1.png" alt="Model-specific VIPs for the three different tree-based models fit to the simulated Friedman data." width="100%" />
 
-As we would expect, all three methods rank the variables `x1`--`x5` as more important than the others. While this is good news, it is unfortunate that we have to remember the different functions and ways of extracting and plotting VI scores from various model fitting functions. This is one place where [vip](https://cran.r-project.org/package=vip) can help...one function to rule them all! Once [vip](https://cran.r-project.org/package=vip) is loaded, we can use `vi()` to extract a tibble of VI scores.^[In order to avoid deprecation warnings due to recent updates to [tibble](https://cran.r-project.org/package=tibble) and [ggplot2](https://cran.r-project.org/package=ggplot2), the code examples in this article are based on the latest development versions of both [vip](https://cran.r-project.org/package=vip) (version 0.4.0) and [pdp](https://cran.r-project.org/package=pdp) (version 0.8.1); the URL to the development version of each package is available on its associated CRAN landing page.]
+As we would expect, all three methods rank the variables `x1`--`x5` as more important than the others. While this is good news, it is unfortunate that we have to remember the different functions and ways of extracting and plotting VI scores from various model fitting functions. This is one place where [vip](https://cran.r-project.org/package=vip) can help...one function to rule them all! Once [vip](https://cran.r-project.org/package=vip) is loaded, we can use `vi()` to extract a tibble of VI scores.^[In order to avoid deprecation warnings due to recent updates to [tibble](https://cran.r-project.org/package=tibble) and [ggplot2](https://cran.r-project.org/package=ggplot2), the code examples in this article are based on the latest development versions of both [vip](https://cran.r-project.org/package=vip) (version 0.4.1) and [pdp](https://cran.r-project.org/package=pdp) (version 0.8.1); the URL to the development version of each package is available on its associated CRAN landing page.]
 
 
 ```r
@@ -249,7 +249,20 @@ backward <- step(linmod, direction = "backward", trace = 0)
 ```
 
 ```
-## Error in vi(backward): class name too long in 'vi'
+## # A tibble: 21 × 3
+##    Variable Importance Sign 
+##    <chr>         <dbl> <chr>
+##  1 x4            14.2  POS  
+##  2 x2             7.31 POS  
+##  3 x1             5.63 POS  
+##  4 x5             5.21 POS  
+##  5 x3:x5          2.46 POS  
+##  6 x1:x10         2.41 NEG  
+##  7 x2:x6          2.41 NEG  
+##  8 x1:x5          2.37 NEG  
+##  9 x10            2.21 POS  
+## 10 x3:x4          2.01 NEG  
+## # ℹ 11 more rows
 ```
 
 ```r
@@ -262,9 +275,7 @@ vip(vi_backward, num_features = length(coef(backward)),  # Figure 3
   theme(axis.text.x = element_text(angle = 45, hjust = 1))
 ```
 
-```
-## Error in vip(vi_backward, num_features = length(coef(backward)), geom = "point", : object 'vi_backward' not found
-```
+<img src="../man/figures/vip-step-1.png" alt="Example VIP from a linear model fit to the simulated Friedman data. The points are colored according to the sign of the associated coefficient." width="70%" />
 
 A major limitation of this approach is that a VI score is assigned to each term in the model, rather than to each individual feature! We can solve this problem using one of the model-agnostic approaches discussed later.
 
@@ -357,8 +368,8 @@ Next, we compute PDP-based VI scores for the fitted PPR and NN models. The PDP m
 pp <- ppr(y ~ ., data = trn, nterms = 11)
 
 # Construct VIPs
-p1 <- vip(pp, method = "firm") + ggtitle("PPR")
-p2 <- vip(nn, method = "firm") + ggtitle("NN")
+p1 <- vip(pp, method = "firm", train = trn) + ggtitle("PPR")
+p2 <- vip(nn, method = "firm", train = trn) + ggtitle("NN")
 
 # Display plots in a grid (Figure 7)
 p1 + p2
@@ -379,8 +390,8 @@ Obtaining the ICE-based feature importance scores is also straightforward, just
 
 ```r
 # Construct VIPs
-p1 <- vip(pp, method = "firm", ice = TRUE) + ggtitle("PPR")
-p2 <- vip(nn, method = "firm", ice = TRUE) + ggtitle("NN")
+p1 <- vip(pp, method = "firm", ice = TRUE, train = trn) + ggtitle("PPR")
+p2 <- vip(nn, method = "firm", ice = TRUE, train = trn) + ggtitle("NN")
 
 # Display plots in a grid (Figure 9)
 p1 + p2
@@ -393,7 +404,7 @@ When using `method = "firm"`, the feature effect values are stored in an attribu
 
 ```r
 # Construct PDP-based VI scores
-(vis <- vi(pp, method = "firm"))
+(vis <- vi(pp, method = "firm", train = trn))
 ```
 
 ```
@@ -457,9 +468,9 @@ pfun_nnet <- function(object, newdata) {  # needs to return a numeric vector
 
 # Plot VI scores
 set.seed(2021)  # for reproducibility
-p1 <- vip(pp, method = "permute", target = "y", metric = "rsq",
+p1 <- vip(pp, method = "permute", train = trn, target = "y", metric = "rsq",
           pred_wrapper = pfun_ppr) + ggtitle("PPR")
-p2 <- vip(nn, method = "permute", target = "y", metric = "rsq",
+p2 <- vip(nn, method = "permute", train = trn, target = "y", metric = "rsq",
           pred_wrapper = pfun_nnet) + ggtitle("NN")
 
 # Display plots in a grid (Figure 11)
@@ -474,7 +485,7 @@ The permutation approach introduces randomness into the procedure and therefore
 ```r
 # Use 10 Monte Carlo reps
 set.seed(403)  # for reproducibility
-vis <- vi(pp, method = "permute", target = "y", metric = "rsq",
+vis <- vi(pp, method = "permute", train = trn, target = "y", metric = "rsq",
           pred_wrapper = pfun_ppr, nsim = 30)
 vip(vis, geom = "boxplot")  # Figure 12
 ```
@@ -538,12 +549,13 @@ To use this for computing permutation-based VI scores just pass it via the `metr
 ```r
 # Construct VIP (Figure 13)
 set.seed(2321)  # for reproducibility
-p1 <- vip(nn, method = "permute", target = "y", metric = mae,
-    smaller_is_better = TRUE, pred_wrapper = pfun_nnet) +
+p1 <- vip(nn, method = "permute", train = trn, target = "y", metric = mae,
+          smaller_is_better = TRUE, pred_wrapper = pfun_nnet) +
   ggtitle("Custom loss function: MAE")
 set.seed(2321)  # for reproducibility
-p2 <- vip(nn, method = "permute", target = "y", metric = yardstick::mae_vec,
-    smaller_is_better = TRUE, pred_wrapper = pfun_nnet) +
+p2 <- vip(nn, method = "permute", train = trn, target = "y",
+          metric = yardstick::mae_vec, smaller_is_better = TRUE,
+          pred_wrapper = pfun_nnet) +
   ggtitle("Using `yardstick`'s MAE function")
 p1 + p2
 ```
@@ -570,8 +582,8 @@ When using the permutation method with `nsim > 1`, the default is to keep all th
 ```r
 # Construct VIP (Figure 15)
 set.seed(8264)  # for reproducibility
-vip(nn, method = "permute", pred_wrapper = pfun_nnet, target = "y",
-    metric = "mae", nsim = 10, geom = "point",
+vip(nn, method = "permute", pred_wrapper = pfun_nnet, train = trn,
+    target = "y", metric = "mae", nsim = 10, geom = "point",
     all_permutations = TRUE, jitter = TRUE) +
   ggtitle("Plotting all permutation scores")
 ```
@@ -806,9 +818,9 @@ Much better (and just the negative of the previous results, as expected)! For a
 
 ### Benchmarks
 
-In this section, we compare the performance of four implementations of permutation-based VI scores: `iml::FeatureImp()` (version 0.11.1), `ingredients::feature_importance()` (version 2.3.0),  `mmpf::permutationImportance` (version 0.0.5), and `vip::vi()` (version 0.4.0).
+In this section, we compare the performance of four implementations of permutation-based VI scores: `iml::FeatureImp()` (version 0.11.1), `ingredients::feature_importance()` (version 2.3.0),  `mmpf::permutationImportance` (version 0.0.5), and `vip::vi()` (version 0.4.1).
 
-We simulated 10,000 training observations from the Friedman 1 benchmark problem and trained a random forest using the [ranger](https://cran.r-project.org/package=ranger) package. For each implementation, we computed permutation-based VI scores 100 times using the [microbenchmark](https://cran.r-project.org/package=microbenchmark) package [@R-microbenchmark]. For this benchmark we did not use any of the parallel processing capability available in the [iml](https://cran.r-project.org/package=iml) and [vip](https://cran.r-project.org/package=vip) implementations. The results from [microbenchmark](https://cran.r-project.org/package=microbenchmark) are displayed in Figure \ref@(fig:benchmark) and summarized in the output below. In this case, the [vip](https://cran.r-project.org/package=vip) package (version 0.4.0) was the fastest, followed closely by [ingredients](https://cran.r-project.org/package=ingredients) and [mmpf](https://cran.r-project.org/package=mmpf). It should be noted, however, that the implementations in [vip](https://cran.r-project.org/package=vip) and [iml](https://cran.r-project.org/package=iml) can be parallelized. To the best of our knowledge, this is not the case for [ingredients](https://cran.r-project.org/package=ingredients) or [mmpf](https://cran.r-project.org/package=mmpf) (although it would not be difficult to write a simple parallel wrapper for either). The code used to generate these benchmarks can be found at https://github.com/koalaverse/vip/blob/master/slowtests/slowtests-benchmarks.R.
+We simulated 10,000 training observations from the Friedman 1 benchmark problem and trained a random forest using the [ranger](https://cran.r-project.org/package=ranger) package. For each implementation, we computed permutation-based VI scores 100 times using the [microbenchmark](https://cran.r-project.org/package=microbenchmark) package [@R-microbenchmark]. For this benchmark we did not use any of the parallel processing capability available in the [iml](https://cran.r-project.org/package=iml) and [vip](https://cran.r-project.org/package=vip) implementations. The results from [microbenchmark](https://cran.r-project.org/package=microbenchmark) are displayed in Figure \ref@(fig:benchmark) and summarized in the output below. In this case, the [vip](https://cran.r-project.org/package=vip) package (version 0.4.1) was the fastest, followed closely by [ingredients](https://cran.r-project.org/package=ingredients) and [mmpf](https://cran.r-project.org/package=mmpf). It should be noted, however, that the implementations in [vip](https://cran.r-project.org/package=vip) and [iml](https://cran.r-project.org/package=iml) can be parallelized. To the best of our knowledge, this is not the case for [ingredients](https://cran.r-project.org/package=ingredients) or [mmpf](https://cran.r-project.org/package=mmpf) (although it would not be difficult to write a simple parallel wrapper for either). The code used to generate these benchmarks can be found at https://github.com/koalaverse/vip/blob/master/slowtests/slowtests-benchmarks.R.
 
 <img src="../man/figures/benchmark-1.png" alt="Violin plots comparing the computation time from three different implementations of permutation-based VI scores across 100 simulations." width="70%" />
 
@@ -819,7 +831,7 @@ Although [vip](https://cran.r-project.org/package=vip) focuses on global VI meth
 
 Obtaining a global VI score from Shapley values requires aggregating the Shapley values for each feature across the entire training set (or at least a reasonable sample thereof). In particular, we use the mean of the absolute value of the individual Shapley values for each feature. Unfortunately, Shapley values can be computationally expensive, and therefore this approach may not be feasible for large training sets (say, >3000 observations). The [fastshap](https://cran.r-project.org/package=fastshap) package provides some relief by exploiting a few computational tricks, including the option to perform computations in parallel (see \code{?fastshap::explain} for details). Also, fast and exact algorithms \citep{lundberg-explainable-2019} can be exploited for certain classes of models.
 
-Starting with [vip](https://cran.r-project.org/package=vip) version 0.4.0 you can now use `method = "shap"` in the call to `vi()` (or use `vi_shap()` directly) to compute global Shapley-based VI scores using the method described above (provided you have the [fastshap](https://cran.r-project.org/package=fastshap) package installed)---see `?vip::vi_shap` for details. To illustrate, we compute Shapley-based VI scores from an [xgboost](https://cran.r-project.org/package=xgboost) model [R-xgboost] using the Friedman data from earlier; the results are displayed in Figure \ref@(fig:vi-shap).^[Note that the `exact = TRUE` option is only available if you have [fastshap](https://cran.r-project.org/package=fastshap) version 0.0.4 or later.] (**{Note:** specifying `include_type = TRUE` in the call to `vip()` causes the type of VI computed to be displayed as part of the axis label.)
+Starting with [vip](https://cran.r-project.org/package=vip) version 0.4.1 you can now use `method = "shap"` in the call to `vi()` (or use `vi_shap()` directly) to compute global Shapley-based VI scores using the method described above (provided you have the [fastshap](https://cran.r-project.org/package=fastshap) package installed)---see `?vip::vi_shap` for details. To illustrate, we compute Shapley-based VI scores from an [xgboost](https://cran.r-project.org/package=xgboost) model [R-xgboost] using the Friedman data from earlier; the results are displayed in Figure \ref@(fig:vi-shap).^[Note that the `exact = TRUE` option is only available if you have [fastshap](https://cran.r-project.org/package=fastshap) version 0.0.4 or later.] (**{Note:** specifying `include_type = TRUE` in the call to `vip()` causes the type of VI computed to be displayed as part of the axis label.)
 
 
 ```r
@@ -883,7 +895,7 @@ We already mentioned that PDPs can be misleading in the presence of strong inter
 
 # Summary
 
-VIPs help to visualize the strength of the relationship between each feature and the predicted response, while accounting for all the other features in the model. We've discussed two types of VI: model-specific and model-agnostic, as well as some of their strengths and weaknesses. In this paper, we showed how to construct VIPs for various types of "black box" models in R using the [vip](https://cran.r-project.org/package=vip) package. We also briefly discussed related approaches available in a number of other R packages. Suggestions to avoid high execution times were discussed and demonstrated via examples. This paper is based on [vip](https://cran.r-project.org/package=vip) version 0.4.0. In terms of future development, [vip](https://cran.r-project.org/package=vip) can be expanded in a number of ways. For example, we plan to incorporate the option to compute group-based and conditional permutation scores. Although not discussed in this paper, [vip](https://cran.r-project.org/package=vip) also includes a promising statistic (similar to the variance-based VI scores previously discussed) for measuring the relative strength of interaction between features. Although VIPs can help understand which features are driving the model's predictions, ML practitioners should be cognizant of the fact that none of the methods discussed in this paper are uniformly best across all situations; they require an accurate model that has been properly tuned, and should be checked for consistency with human domain knowledge.
+VIPs help to visualize the strength of the relationship between each feature and the predicted response, while accounting for all the other features in the model. We've discussed two types of VI: model-specific and model-agnostic, as well as some of their strengths and weaknesses. In this paper, we showed how to construct VIPs for various types of "black box" models in R using the [vip](https://cran.r-project.org/package=vip) package. We also briefly discussed related approaches available in a number of other R packages. Suggestions to avoid high execution times were discussed and demonstrated via examples. This paper is based on [vip](https://cran.r-project.org/package=vip) version 0.4.1. In terms of future development, [vip](https://cran.r-project.org/package=vip) can be expanded in a number of ways. For example, we plan to incorporate the option to compute group-based and conditional permutation scores. Although not discussed in this paper, [vip](https://cran.r-project.org/package=vip) also includes a promising statistic (similar to the variance-based VI scores previously discussed) for measuring the relative strength of interaction between features. Although VIPs can help understand which features are driving the model's predictions, ML practitioners should be cognizant of the fact that none of the methods discussed in this paper are uniformly best across all situations; they require an accurate model that has been properly tuned, and should be checked for consistency with human domain knowledge.
 
 
 # Acknowledgments
diff --git a/vignettes/vip.Rmd.orig b/vignettes/vip.Rmd.orig
index 9c5f0b7..e718528 100644
--- a/vignettes/vip.Rmd.orig
+++ b/vignettes/vip.Rmd.orig
@@ -32,14 +32,12 @@ knitr::opts_chunk$set(
   out.width = "70%"
 )
 
-# Execute the code from the vignette
-# knitr::knit("vignettes/vip.Rmd.orig", output = "vignettes/vip.Rmd")
-#
+
 # TODO:
-# * Don't forget to replace "man/" with "../man/" in `vip.Rmd` file.
-# * Don't forget to manually add
-#   '`r system.file("references.bib", package = "vip")`' to the `bibliography`
-#   field in `vip.Rmd`.
+# * Run knitr::knit("vignettes/vip.Rmd.orig", output = "vignettes/vip.Rmd")
+# * Replace "man/" with "../man/" in `vip.Rmd` file.
+# * Add '`r system.file("references.bib", package = "vip")`' to the
+#   `bibliography` field in `vip.Rmd`.
 ```
 
 This vignette is essentially an up-to-date version of @RJ-2020-013. Please use that if you'd like to cite our work.
@@ -310,8 +308,8 @@ Next, we compute PDP-based VI scores for the fitted PPR and NN models. The PDP m
 pp <- ppr(y ~ ., data = trn, nterms = 11)
 
 # Construct VIPs
-p1 <- vip(pp, method = "firm") + ggtitle("PPR")
-p2 <- vip(nn, method = "firm") + ggtitle("NN")
+p1 <- vip(pp, method = "firm", train = trn) + ggtitle("PPR")
+p2 <- vip(nn, method = "firm", train = trn) + ggtitle("NN")
 
 # Display plots in a grid (Figure 7)
 p1 + p2
@@ -340,8 +338,8 @@ Obtaining the ICE-based feature importance scores is also straightforward, just
 
 ```{r vip-ice-ppr-nn, fig.cap="ICE-based feature importance for the PPR and NN models fit to the simulated Friedman data."}
 # Construct VIPs
-p1 <- vip(pp, method = "firm", ice = TRUE) + ggtitle("PPR")
-p2 <- vip(nn, method = "firm", ice = TRUE) + ggtitle("NN")
+p1 <- vip(pp, method = "firm", ice = TRUE, train = trn) + ggtitle("PPR")
+p2 <- vip(nn, method = "firm", ice = TRUE, train = trn) + ggtitle("NN")
 
 # Display plots in a grid (Figure 9)
 p1 + p2
@@ -351,7 +349,7 @@ When using `method = "firm"`, the feature effect values are stored in an attribu
 
 ```{r pdp-from-attr, fig.width=10, fig.asp=0.5, out.width="100%", fig.cap="PDPs for all ten features reconstructed from the \\code{pdp} attribute of the \\code{vis} object."}
 # Construct PDP-based VI scores
-(vis <- vi(pp, method = "firm"))
+(vis <- vi(pp, method = "firm", train = trn))
 
 # Reconstruct PDPs for all 10 features (Figure 10)
 par(mfrow = c(2, 5))
@@ -394,9 +392,9 @@ pfun_nnet <- function(object, newdata) {  # needs to return a numeric vector
 
 # Plot VI scores
 set.seed(2021)  # for reproducibility
-p1 <- vip(pp, method = "permute", target = "y", metric = "rsq",
+p1 <- vip(pp, method = "permute", train = trn, target = "y", metric = "rsq",
           pred_wrapper = pfun_ppr) + ggtitle("PPR")
-p2 <- vip(nn, method = "permute", target = "y", metric = "rsq",
+p2 <- vip(nn, method = "permute", train = trn, target = "y", metric = "rsq",
           pred_wrapper = pfun_nnet) + ggtitle("NN")
 
 # Display plots in a grid (Figure 11)
@@ -408,7 +406,7 @@ The permutation approach introduces randomness into the procedure and therefore
 ```{r vip-boxplots, fig.cap="Boxplots of VI scores using the permutation method with 15 Monte Carlo repetitions."}
 # Use 10 Monte Carlo reps
 set.seed(403)  # for reproducibility
-vis <- vi(pp, method = "permute", target = "y", metric = "rsq",
+vis <- vi(pp, method = "permute", train = trn, target = "y", metric = "rsq",
           pred_wrapper = pfun_ppr, nsim = 30)
 vip(vis, geom = "boxplot")  # Figure 12
 ```
@@ -438,12 +436,13 @@ To use this for computing permutation-based VI scores just pass it via the `metr
 ```{r vip-nn-mae, fig.cap="Permutation-based VI scores for the NN model fit to the simulated Friedman data. In this example, permutation importance is based on the MAE metric."}
 # Construct VIP (Figure 13)
 set.seed(2321)  # for reproducibility
-p1 <- vip(nn, method = "permute", target = "y", metric = mae,
-    smaller_is_better = TRUE, pred_wrapper = pfun_nnet) +
+p1 <- vip(nn, method = "permute", train = trn, target = "y", metric = mae,
+          smaller_is_better = TRUE, pred_wrapper = pfun_nnet) +
   ggtitle("Custom loss function: MAE")
 set.seed(2321)  # for reproducibility
-p2 <- vip(nn, method = "permute", target = "y", metric = yardstick::mae_vec,
-    smaller_is_better = TRUE, pred_wrapper = pfun_nnet) +
+p2 <- vip(nn, method = "permute", train = trn, target = "y",
+          metric = yardstick::mae_vec, smaller_is_better = TRUE,
+          pred_wrapper = pfun_nnet) +
   ggtitle("Using `yardstick`'s MAE function")
 p1 + p2
 ```
@@ -464,8 +463,8 @@ When using the permutation method with `nsim > 1`, the default is to keep all th
 ```{r vip-nn-mae-all, fig.cap="Permutation-based feature importance for the NN model fit to the simulated Friedman data. In this example, all the permutation importance scores (points) are displayed for each feature along with their average (bars)."}
 # Construct VIP (Figure 15)
 set.seed(8264)  # for reproducibility
-vip(nn, method = "permute", pred_wrapper = pfun_nnet, target = "y",
-    metric = "mae", nsim = 10, geom = "point",
+vip(nn, method = "permute", pred_wrapper = pfun_nnet, train = trn,
+    target = "y", metric = "mae", nsim = 10, geom = "point",
     all_permutations = TRUE, jitter = TRUE) +
   ggtitle("Plotting all permutation scores")
 ```