Merge pull request #65 from bgreenwell/devel

tweak tests
bgreenwell · May 10, 2023 · 0f32d30 · 0f32d30
2 parents fa8f18d + c209517
commit 0f32d30
Show file tree

Hide file tree

Showing 15 changed files with 94 additions and 168 deletions.
diff --git a/.Rproj.user/shared/notebooks/paths b/.Rproj.user/shared/notebooks/paths
@@ -1,5 +1,6 @@
 /Users/bgreenwell/.R/Makevars="EEC1896A"
 /Users/bgreenwell/Dropbox/devel/fastshap/.Rbuildignore="B0549DA2"
+/Users/bgreenwell/Dropbox/devel/fastshap/.github/workflows/R-CMD-check.yaml="8B7AA513"
 /Users/bgreenwell/Dropbox/devel/fastshap/DESCRIPTION="300503D2"
 /Users/bgreenwell/Dropbox/devel/fastshap/NAMESPACE="0716B2F8"
 /Users/bgreenwell/Dropbox/devel/fastshap/NEWS.md="708C4D66"
@@ -19,9 +20,10 @@
 /Users/bgreenwell/Dropbox/devel/fastshap/inst/tinytest/test-exact.R="2A0B1DDE"
 /Users/bgreenwell/Dropbox/devel/fastshap/inst/tinytest/test-fastshap-vs-other-pkgs.R="F70E1DED"
 /Users/bgreenwell/Dropbox/devel/fastshap/inst/tinytest/test-genOMat.R="54612D12"
-/Users/bgreenwell/Dropbox/devel/fastshap/inst/tinytest/test_fastshap_ames.R="F485343A"
+/Users/bgreenwell/Dropbox/devel/fastshap/inst/tinytest/test-shapviz.R="29D8DEE8"
 /Users/bgreenwell/Dropbox/devel/fastshap/slowtests/fastshap-genOMat.cpp="99FEC81E"
 /Users/bgreenwell/Dropbox/devel/fastshap/slowtests/slowtest-benchmark.R="29ADFB84"
 /Users/bgreenwell/Dropbox/devel/fastshap/slowtests/slowtest-parallel.R="7B058F98"
 /Users/bgreenwell/Dropbox/devel/fastshap/vignettes/fastshap.Rmd="536A2979"
+/Users/bgreenwell/Dropbox/devel/fastshap/vignettes/fastshap.Rmd.orig="EB3E698E"
 /Users/bgreenwell/Dropbox/trees/book.tex="4ECC8BA9"
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -17,17 +17,18 @@ Imports:
     foreach,
     Rcpp (>= 1.0.1), 
     utils
+Enhances:
+    lightgbm,
+    xgboost
 Suggests: 
     AmesHousing,
     covr,
     knitr,
-    lightgbm,
     ranger,
     rmarkdown,
-    shapviz,
+    shapviz (>= 0.8.0),
     tibble,
-    tinytest,
-    xgboost
+    tinytest (>= 1.4.1)
 LinkingTo: 
     Rcpp,
     RcppArmadillo

diff --git a/docs/authors.html b/docs/authors.html
diff --git a/docs/news/index.html b/docs/news/index.html
diff --git a/docs/pkgdown.yml b/docs/pkgdown.yml
@@ -3,5 +3,5 @@ pkgdown: 2.0.7
 pkgdown_sha: ~
 articles:
   fastshap: fastshap.html
-last_built: 2023-05-05T16:21Z
+last_built: 2023-05-05T18:01Z
 
diff --git a/docs/reference/explain.html b/docs/reference/explain.html
diff --git a/docs/reference/fastshap-package.html b/docs/reference/fastshap-package.html
diff --git a/inst/tinytest/test-adjust.R b/inst/tinytest/test-adjust.R
@@ -1,15 +1,10 @@
-# Exits
-if (!requireNamespace("lightgbm", quietly = TRUE)) {
-  exit_file("Package 'lightgbm' missing")
-}
-if (!requireNamespace("xgboost", quietly = TRUE)) {
-  exit_file("Package 'xgboost' missing")
-}
-
-library(fastshap)
+exit_if_not(
+  requireNamespace("lightgbm", quietly = TRUE),
+  requireNamespace("xgboost", quietly = TRUE)
+)
 
 # Use one of the available (imputed) versions of the Titanic data
-titanic <- titanic_mice[[1L]]
+titanic <- fastshap::titanic_mice[[1L]]
 
 # Packages 'lightgbm' and 'xgboost' require numeric values
 titanic$survived <- ifelse(titanic$survived == "yes", 1, 0)
@@ -34,6 +29,7 @@ jack.dawson <- data.matrix(data.frame(
 # Package: lightgbm
 ################################################################################
 
+# lightgbm params
 params.lgb <- list(
   num_leaves = 4L,
   learning_rate = 0.1,
@@ -63,17 +59,18 @@ diff.lgb <- jack.logit.lgb - baseline.lgb
 
 # Compute feature contributions using MC SHAP using the fastshap package
 set.seed(1306)  # for reproducibility
-ex.fastshap <- explain(bst.lgb, X = X, nsim = 1000, pred_wrapper = pfun.lgb,
-                       newdata = jack.dawson, adjust = TRUE)
+ex.fastshap <- fastshap::explain(bst.lgb, X = X, nsim = 1000, 
+                                 pred_wrapper = pfun.lgb, newdata = jack.dawson, 
+                                 adjust = TRUE)
 
 # Expectations
 expect_equal(sum(ex.fastshap), jack.logit.lgb - baseline.lgb, tolerance = 1e-06)
 
 # Explain a few rows of the training data
 X.new <- X[1L:5L, ]
 set.seed(2033)  # for reproducibility
-ex.new <- explain(bst.lgb, X = X, nsim = 2, pred_wrapper = pfun.lgb,
-                  newdata = X.new, adjust = TRUE)  # nsim = 2 here ONLY for speed
+ex.new <- fastshap::explain(bst.lgb, X = X, nsim = 2, pred_wrapper = pfun.lgb,
+                            newdata = X.new, adjust = TRUE)  # `nsim = 2` here ONLY for speed
 
 # Expectations
 expect_equal(rowSums(ex.new), pfun.lgb(bst.lgb, newdata = X.new) - baseline.lgb,
@@ -86,7 +83,7 @@ expect_equal(rowSums(ex.new), pfun.lgb(bst.lgb, newdata = X.new) - baseline.lgb,
 
 for (obj in c("binary:logistic", "binary:logitraw")) {
 
-  # Set task parameters
+  # xgboost params
   params.xgb <- list(
     max_depth = 2L,
     eta = 0.1,
@@ -115,8 +112,9 @@ for (obj in c("binary:logistic", "binary:logitraw")) {
 
   # Compute feature contributions using MC SHAP using the fastshap package
   set.seed(2026)  # for reproducibility
-  ex.fastshap <- explain(bst.xgb, X = X, nsim = 1000, pred_wrapper = pfun.xgb,
-                         newdata = jack.dawson, adjust = TRUE)
+  ex.fastshap <- fastshap::explain(bst.xgb, X = X, nsim = 1000, 
+                                   pred_wrapper = pfun.xgb, 
+                                   newdata = jack.dawson, adjust = TRUE)
 
   # Expectations
   expect_equal(sum(ex.fastshap), jack.logit.xgb - baseline.xgb, 

diff --git a/inst/tinytest/test-baseline.R b/inst/tinytest/test-baseline.R
@@ -1,10 +1,7 @@
-# Exits
-if (!requireNamespace("lightgbm", quietly = TRUE)) {
-  exit_file("Package 'lightgbm' missing")
-}
+exit_if_not(requireNamespace("lightgbm", quietly = TRUE))
 
 # Use one of the available (imputed) versions of the Titanic data
-titanic <- titanic_mice[[1L]]
+titanic <- fastshap::titanic_mice[[1L]]
 
 # Packages 'lightgbm' and 'xgboost' require numeric values
 titanic$survived <- ifelse(titanic$survived == "yes", 1, 0)
@@ -24,7 +21,7 @@ jack.dawson <- data.matrix(data.frame(
   parch = 0L  
 ))
 
-# LightGBM paramater list
+# lightgbm paramaters
 params.lgb <- list(
   num_leaves = 4L,
   learning_rate = 0.1,
@@ -61,8 +58,9 @@ diff.lgb <- jack.logit.lgb - baseline.lgb
 
 # Compute feature contributions using MC SHAP using the fastshap package
 set.seed(1306)  # for reproducibility
-ex.fastshap <- explain(bst.lgb, X = X, nsim = 1000, pred_wrapper = pfun.lgb,
-                       newdata = jack.dawson, adjust = TRUE, shap_only = FALSE)
+ex.fastshap <- fastshap::explain(bst.lgb, X = X, nsim = 1000, 
+                                 pred_wrapper = pfun.lgb, ewdata = jack.dawson, 
+                                 adjust = TRUE, shap_only = FALSE)
 
 # Expect Shapley values to have additivity property
 expect_equal(sum(ex.fastshap$shapley_values), jack.logit.lgb - baseline.lgb, 
@@ -73,7 +71,7 @@ expect_equal(ex.fastshap$baseline, ex.lgb[1L, 6L])
 
 # Compute feature contributions with a different baseline
 set.seed(1308)  # for reproducibility
-ex.fastshap.baseline <- explain(
+ex.fastshap.baseline <- fastshap::explain(
   object = bst.lgb, 
   X = X, 
   nsim = 1000, 

diff --git a/inst/tinytest/test-convergence.R b/inst/tinytest/test-convergence.R
@@ -1,11 +1,8 @@
-# Exits
 exit_file("Test not ready")
 
-library(fastshap)
-
 # Generate some training data
 set.seed(1318)
-trn <- gen_friedman(1000)
+trn <- fastshap::gen_friedman(1000)
 X <- subset(trn, select = -y)
 
 # Fit an additive ML
@@ -18,15 +15,15 @@ pfun <- function(object, newdata) {  # prediction wrapper
 
 # Generate a new instance to explain
 set.seed(1319)
-newx <- subset(gen_friedman(1), select = -y)
+newx <- subset(fastshap::gen_friedman(1), select = -y)
 
 # Exact Shapley values
-ex.exact <- explain(fit, newdata = newx, exact = TRUE)
+ex.exact <- fastshap::explain(fit, newdata = newx, exact = TRUE)
 
 # Approximate Shapley values
 set.seed(1306)  # for reproducibility
-ex.fastshap <- explain(fit, X = X, nsim = 10, pred_wrapper = pfun,
-                       newdata = newx, adjust = FALSE)
+ex.fastshap <- fastshap::explain(fit, X = X, nsim = 10, pred_wrapper = pfun,
+                                 newdata = newx, adjust = FALSE)
 
 # plot(ex.exact, ex.fastshap)
 # abline(0, 1)

diff --git a/inst/tinytest/test-exact.R b/inst/tinytest/test-exact.R
@@ -1,10 +1,7 @@
-# Exits
-if (!requireNamespace("xgboost", quietly = TRUE)) {
-  exit_file("Package xgboost missing")
-}
+exit_if_not(requireNamespace("xgboost", quietly = TRUE))
 
 # Generate training data from the Friedman 1 benchmark problem
-trn <- gen_friedman(500, seed = 101)
+trn <- fastshap::gen_friedman(500, seed = 101)
 X <- subset(trn, select = -y)
 x <- X[1L, , drop = FALSE]
 
@@ -17,10 +14,10 @@ pfun <- function(object, newdata) {
 }
 
 # Generate exact and approximate Shapley values for entire training set
-ex_exact <- explain(fit_lm, exact = TRUE, newdata = x)
+ex_exact <- fastshap::explain(fit_lm, exact = TRUE, newdata = x)
 set.seed(102)
-ex_apprx <- explain(fit_lm, X = X, pred_wrapper = pfun, nsim = 1000,
-                    newdata = x, adjust = TRUE)
+ex_apprx <- fastshap::explain(fit_lm, X = X, pred_wrapper = pfun, nsim = 1000,
+                              newdata = x, adjust = TRUE)
 
 # Check accuracy
 expect_true(cor(as.numeric(ex_exact), as.numeric((ex_apprx))) > 0.999)
@@ -56,10 +53,10 @@ fit_xgb <- xgboost::xgboost(
 
 # Generate exact and approximate Shapley values for entire training set
 x <- data.matrix(X)[1L, , drop = FALSE]
-ex_exact <- explain(fit_xgb, X = x, exact = TRUE)
+ex_exact <- fastshap::explain(fit_xgb, X = x, exact = TRUE)
 set.seed(132)
-ex_apprx <- explain(fit_xgb, X = data.matrix(X), newdata = x, adjust = TRUE,
-                    pred_wrapper = pfun, nsim = 1000)
+ex_apprx <- fastshap::explain(fit_xgb, X = data.matrix(X), newdata = x, 
+                              adjust = TRUE, pred_wrapper = pfun, nsim = 1000)
 
 # Check accuracy
 expect_true(cor(as.numeric(ex_exact), as.numeric((ex_apprx))) > 0.999)

diff --git a/inst/tinytest/test-fastshap-vs-other-pkgs.R b/inst/tinytest/test-fastshap-vs-other-pkgs.R
@@ -1,18 +1,10 @@
-# Exits
-if (!requireNamespace("iml", quietly = TRUE)) {
-  exit_file("Package 'iml' missing")
-}
-if (!requireNamespace("lightgbm", quietly = TRUE)) {
-  exit_file("Package 'lightgbm' missing")
-}
-
-library(fastshap)
-library(iml)
-library(lightgbm)
-library(ranger)
+exit_if_not(
+  requireNamespace("iml", quietly = TRUE),
+  requireNamespace("lightgbm", quietly = TRUE)
+)
 
 # Use one of the available (imputed) versions of the Titanic data
-titanic <- titanic_mice[[1L]]
+titanic <- fastshap::titanic_mice[[1L]]
 
 # Package 'lightgbm' requires numeric values
 titanic$survived <- ifelse(titanic$survived == "yes", 1, 0)
@@ -21,6 +13,7 @@ titanic$sex <- ifelse(titanic$sex == "male", 1, 0)
 # Matrix of only predictor values
 X <- data.matrix(subset(titanic, select = -survived))
 
+# lightgbm params
 params <- list(
   num_leaves = 10L,
   learning_rate = 0.1,
@@ -29,14 +22,8 @@ params <- list(
 )
 
 set.seed(1420)  # for reproducibility
-bst <- lightgbm(X, label = titanic$survived, params = params, nrounds = 45,
-                verbose = 0)
-
-
-# Fit a default random forest
-set.seed(1250)  # for reproducibility
-rfo <- ranger(survived ~ ., data = titanic, probability = TRUE, 
-              respect.unordered.factors = "partition")
+bst <- lightgbm::lightgbm(X, label = titanic$survived, params = params, 
+                        nrounds = 45, verbose = 0)
 
 # Prediction wrapper for computing predicted probability of surviving
 pfun <- function(object, newdata) {  # prediction wrapper
@@ -63,14 +50,14 @@ jack.dawson <- data.matrix(jack.dawson)
 
 # Compute feature contributions using MC SHAP using the fastshap package
 set.seed(1306)  # for reproducibility
-ex.fastshap <- explain(bst, X = X, nsim = 1000, pred_wrapper = pfun,
-                       newdata = jack.dawson, adjust = FALSE)
+ex.fastshap <- fastshap::explain(bst, X = X, nsim = 1000, pred_wrapper = pfun,
+                                 newdata = jack.dawson, adjust = FALSE)
 
 # Compute feature contributions using MC SHAP using the iml package
-pred <- Predictor$new(bst, data = as.data.frame(X), predict.fun = pfun)
+pred <- iml::Predictor$new(bst, data = as.data.frame(X), predict.fun = pfun)
 set.seed(1316)  # for reproducibility
-ex.iml <- Shapley$new(pred, x.interest = data.frame(jack.dawson), 
-                      sample.size = 1000)
+ex.iml <- iml::Shapley$new(pred, x.interest = data.frame(jack.dawson), 
+                           sample.size = 1000)
 
 # Compare results
 res <- cbind(