Skip to content

Commit

Permalink
Merge pull request #65 from bgreenwell/devel
Browse files Browse the repository at this point in the history
tweak tests
  • Loading branch information
bgreenwell committed May 10, 2023
2 parents fa8f18d + c209517 commit 0f32d30
Show file tree
Hide file tree
Showing 15 changed files with 94 additions and 168 deletions.
4 changes: 3 additions & 1 deletion .Rproj.user/shared/notebooks/paths
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
/Users/bgreenwell/.R/Makevars="EEC1896A"
/Users/bgreenwell/Dropbox/devel/fastshap/.Rbuildignore="B0549DA2"
/Users/bgreenwell/Dropbox/devel/fastshap/.github/workflows/R-CMD-check.yaml="8B7AA513"
/Users/bgreenwell/Dropbox/devel/fastshap/DESCRIPTION="300503D2"
/Users/bgreenwell/Dropbox/devel/fastshap/NAMESPACE="0716B2F8"
/Users/bgreenwell/Dropbox/devel/fastshap/NEWS.md="708C4D66"
Expand All @@ -19,9 +20,10 @@
/Users/bgreenwell/Dropbox/devel/fastshap/inst/tinytest/test-exact.R="2A0B1DDE"
/Users/bgreenwell/Dropbox/devel/fastshap/inst/tinytest/test-fastshap-vs-other-pkgs.R="F70E1DED"
/Users/bgreenwell/Dropbox/devel/fastshap/inst/tinytest/test-genOMat.R="54612D12"
/Users/bgreenwell/Dropbox/devel/fastshap/inst/tinytest/test_fastshap_ames.R="F485343A"
/Users/bgreenwell/Dropbox/devel/fastshap/inst/tinytest/test-shapviz.R="29D8DEE8"
/Users/bgreenwell/Dropbox/devel/fastshap/slowtests/fastshap-genOMat.cpp="99FEC81E"
/Users/bgreenwell/Dropbox/devel/fastshap/slowtests/slowtest-benchmark.R="29ADFB84"
/Users/bgreenwell/Dropbox/devel/fastshap/slowtests/slowtest-parallel.R="7B058F98"
/Users/bgreenwell/Dropbox/devel/fastshap/vignettes/fastshap.Rmd="536A2979"
/Users/bgreenwell/Dropbox/devel/fastshap/vignettes/fastshap.Rmd.orig="EB3E698E"
/Users/bgreenwell/Dropbox/trees/book.tex="4ECC8BA9"
9 changes: 5 additions & 4 deletions DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -17,17 +17,18 @@ Imports:
foreach,
Rcpp (>= 1.0.1),
utils
Enhances:
lightgbm,
xgboost
Suggests:
AmesHousing,
covr,
knitr,
lightgbm,
ranger,
rmarkdown,
shapviz,
shapviz (>= 0.8.0),
tibble,
tinytest,
xgboost
tinytest (>= 1.4.1)
LinkingTo:
Rcpp,
RcppArmadillo
Expand Down
7 changes: 4 additions & 3 deletions docs/authors.html

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

3 changes: 2 additions & 1 deletion docs/news/index.html

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion docs/pkgdown.yml
Original file line number Diff line number Diff line change
Expand Up @@ -3,5 +3,5 @@ pkgdown: 2.0.7
pkgdown_sha: ~
articles:
fastshap: fastshap.html
last_built: 2023-05-05T16:21Z
last_built: 2023-05-05T18:01Z

16 changes: 16 additions & 0 deletions docs/reference/explain.html

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions docs/reference/fastshap-package.html

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

32 changes: 15 additions & 17 deletions inst/tinytest/test-adjust.R
Original file line number Diff line number Diff line change
@@ -1,15 +1,10 @@
# Exits
if (!requireNamespace("lightgbm", quietly = TRUE)) {
exit_file("Package 'lightgbm' missing")
}
if (!requireNamespace("xgboost", quietly = TRUE)) {
exit_file("Package 'xgboost' missing")
}

library(fastshap)
exit_if_not(
requireNamespace("lightgbm", quietly = TRUE),
requireNamespace("xgboost", quietly = TRUE)
)

# Use one of the available (imputed) versions of the Titanic data
titanic <- titanic_mice[[1L]]
titanic <- fastshap::titanic_mice[[1L]]

# Packages 'lightgbm' and 'xgboost' require numeric values
titanic$survived <- ifelse(titanic$survived == "yes", 1, 0)
Expand All @@ -34,6 +29,7 @@ jack.dawson <- data.matrix(data.frame(
# Package: lightgbm
################################################################################

# lightgbm params
params.lgb <- list(
num_leaves = 4L,
learning_rate = 0.1,
Expand Down Expand Up @@ -63,17 +59,18 @@ diff.lgb <- jack.logit.lgb - baseline.lgb

# Compute feature contributions using MC SHAP using the fastshap package
set.seed(1306) # for reproducibility
ex.fastshap <- explain(bst.lgb, X = X, nsim = 1000, pred_wrapper = pfun.lgb,
newdata = jack.dawson, adjust = TRUE)
ex.fastshap <- fastshap::explain(bst.lgb, X = X, nsim = 1000,
pred_wrapper = pfun.lgb, newdata = jack.dawson,
adjust = TRUE)

# Expectations
expect_equal(sum(ex.fastshap), jack.logit.lgb - baseline.lgb, tolerance = 1e-06)

# Explain a few rows of the training data
X.new <- X[1L:5L, ]
set.seed(2033) # for reproducibility
ex.new <- explain(bst.lgb, X = X, nsim = 2, pred_wrapper = pfun.lgb,
newdata = X.new, adjust = TRUE) # nsim = 2 here ONLY for speed
ex.new <- fastshap::explain(bst.lgb, X = X, nsim = 2, pred_wrapper = pfun.lgb,
newdata = X.new, adjust = TRUE) # `nsim = 2` here ONLY for speed

# Expectations
expect_equal(rowSums(ex.new), pfun.lgb(bst.lgb, newdata = X.new) - baseline.lgb,
Expand All @@ -86,7 +83,7 @@ expect_equal(rowSums(ex.new), pfun.lgb(bst.lgb, newdata = X.new) - baseline.lgb,

for (obj in c("binary:logistic", "binary:logitraw")) {

# Set task parameters
# xgboost params
params.xgb <- list(
max_depth = 2L,
eta = 0.1,
Expand Down Expand Up @@ -115,8 +112,9 @@ for (obj in c("binary:logistic", "binary:logitraw")) {

# Compute feature contributions using MC SHAP using the fastshap package
set.seed(2026) # for reproducibility
ex.fastshap <- explain(bst.xgb, X = X, nsim = 1000, pred_wrapper = pfun.xgb,
newdata = jack.dawson, adjust = TRUE)
ex.fastshap <- fastshap::explain(bst.xgb, X = X, nsim = 1000,
pred_wrapper = pfun.xgb,
newdata = jack.dawson, adjust = TRUE)

# Expectations
expect_equal(sum(ex.fastshap), jack.logit.xgb - baseline.xgb,
Expand Down
16 changes: 7 additions & 9 deletions inst/tinytest/test-baseline.R
Original file line number Diff line number Diff line change
@@ -1,10 +1,7 @@
# Exits
if (!requireNamespace("lightgbm", quietly = TRUE)) {
exit_file("Package 'lightgbm' missing")
}
exit_if_not(requireNamespace("lightgbm", quietly = TRUE))

# Use one of the available (imputed) versions of the Titanic data
titanic <- titanic_mice[[1L]]
titanic <- fastshap::titanic_mice[[1L]]

# Packages 'lightgbm' and 'xgboost' require numeric values
titanic$survived <- ifelse(titanic$survived == "yes", 1, 0)
Expand All @@ -24,7 +21,7 @@ jack.dawson <- data.matrix(data.frame(
parch = 0L
))

# LightGBM paramater list
# lightgbm paramaters
params.lgb <- list(
num_leaves = 4L,
learning_rate = 0.1,
Expand Down Expand Up @@ -61,8 +58,9 @@ diff.lgb <- jack.logit.lgb - baseline.lgb

# Compute feature contributions using MC SHAP using the fastshap package
set.seed(1306) # for reproducibility
ex.fastshap <- explain(bst.lgb, X = X, nsim = 1000, pred_wrapper = pfun.lgb,
newdata = jack.dawson, adjust = TRUE, shap_only = FALSE)
ex.fastshap <- fastshap::explain(bst.lgb, X = X, nsim = 1000,
pred_wrapper = pfun.lgb, ewdata = jack.dawson,
adjust = TRUE, shap_only = FALSE)

# Expect Shapley values to have additivity property
expect_equal(sum(ex.fastshap$shapley_values), jack.logit.lgb - baseline.lgb,
Expand All @@ -73,7 +71,7 @@ expect_equal(ex.fastshap$baseline, ex.lgb[1L, 6L])

# Compute feature contributions with a different baseline
set.seed(1308) # for reproducibility
ex.fastshap.baseline <- explain(
ex.fastshap.baseline <- fastshap::explain(
object = bst.lgb,
X = X,
nsim = 1000,
Expand Down
13 changes: 5 additions & 8 deletions inst/tinytest/test-convergence.R
Original file line number Diff line number Diff line change
@@ -1,11 +1,8 @@
# Exits
exit_file("Test not ready")

library(fastshap)

# Generate some training data
set.seed(1318)
trn <- gen_friedman(1000)
trn <- fastshap::gen_friedman(1000)
X <- subset(trn, select = -y)

# Fit an additive ML
Expand All @@ -18,15 +15,15 @@ pfun <- function(object, newdata) { # prediction wrapper

# Generate a new instance to explain
set.seed(1319)
newx <- subset(gen_friedman(1), select = -y)
newx <- subset(fastshap::gen_friedman(1), select = -y)

# Exact Shapley values
ex.exact <- explain(fit, newdata = newx, exact = TRUE)
ex.exact <- fastshap::explain(fit, newdata = newx, exact = TRUE)

# Approximate Shapley values
set.seed(1306) # for reproducibility
ex.fastshap <- explain(fit, X = X, nsim = 10, pred_wrapper = pfun,
newdata = newx, adjust = FALSE)
ex.fastshap <- fastshap::explain(fit, X = X, nsim = 10, pred_wrapper = pfun,
newdata = newx, adjust = FALSE)

# plot(ex.exact, ex.fastshap)
# abline(0, 1)
Expand Down
19 changes: 8 additions & 11 deletions inst/tinytest/test-exact.R
Original file line number Diff line number Diff line change
@@ -1,10 +1,7 @@
# Exits
if (!requireNamespace("xgboost", quietly = TRUE)) {
exit_file("Package xgboost missing")
}
exit_if_not(requireNamespace("xgboost", quietly = TRUE))

# Generate training data from the Friedman 1 benchmark problem
trn <- gen_friedman(500, seed = 101)
trn <- fastshap::gen_friedman(500, seed = 101)
X <- subset(trn, select = -y)
x <- X[1L, , drop = FALSE]

Expand All @@ -17,10 +14,10 @@ pfun <- function(object, newdata) {
}

# Generate exact and approximate Shapley values for entire training set
ex_exact <- explain(fit_lm, exact = TRUE, newdata = x)
ex_exact <- fastshap::explain(fit_lm, exact = TRUE, newdata = x)
set.seed(102)
ex_apprx <- explain(fit_lm, X = X, pred_wrapper = pfun, nsim = 1000,
newdata = x, adjust = TRUE)
ex_apprx <- fastshap::explain(fit_lm, X = X, pred_wrapper = pfun, nsim = 1000,
newdata = x, adjust = TRUE)

# Check accuracy
expect_true(cor(as.numeric(ex_exact), as.numeric((ex_apprx))) > 0.999)
Expand Down Expand Up @@ -56,10 +53,10 @@ fit_xgb <- xgboost::xgboost(

# Generate exact and approximate Shapley values for entire training set
x <- data.matrix(X)[1L, , drop = FALSE]
ex_exact <- explain(fit_xgb, X = x, exact = TRUE)
ex_exact <- fastshap::explain(fit_xgb, X = x, exact = TRUE)
set.seed(132)
ex_apprx <- explain(fit_xgb, X = data.matrix(X), newdata = x, adjust = TRUE,
pred_wrapper = pfun, nsim = 1000)
ex_apprx <- fastshap::explain(fit_xgb, X = data.matrix(X), newdata = x,
adjust = TRUE, pred_wrapper = pfun, nsim = 1000)

# Check accuracy
expect_true(cor(as.numeric(ex_exact), as.numeric((ex_apprx))) > 0.999)
Expand Down
39 changes: 13 additions & 26 deletions inst/tinytest/test-fastshap-vs-other-pkgs.R
Original file line number Diff line number Diff line change
@@ -1,18 +1,10 @@
# Exits
if (!requireNamespace("iml", quietly = TRUE)) {
exit_file("Package 'iml' missing")
}
if (!requireNamespace("lightgbm", quietly = TRUE)) {
exit_file("Package 'lightgbm' missing")
}

library(fastshap)
library(iml)
library(lightgbm)
library(ranger)
exit_if_not(
requireNamespace("iml", quietly = TRUE),
requireNamespace("lightgbm", quietly = TRUE)
)

# Use one of the available (imputed) versions of the Titanic data
titanic <- titanic_mice[[1L]]
titanic <- fastshap::titanic_mice[[1L]]

# Package 'lightgbm' requires numeric values
titanic$survived <- ifelse(titanic$survived == "yes", 1, 0)
Expand All @@ -21,6 +13,7 @@ titanic$sex <- ifelse(titanic$sex == "male", 1, 0)
# Matrix of only predictor values
X <- data.matrix(subset(titanic, select = -survived))

# lightgbm params
params <- list(
num_leaves = 10L,
learning_rate = 0.1,
Expand All @@ -29,14 +22,8 @@ params <- list(
)

set.seed(1420) # for reproducibility
bst <- lightgbm(X, label = titanic$survived, params = params, nrounds = 45,
verbose = 0)


# Fit a default random forest
set.seed(1250) # for reproducibility
rfo <- ranger(survived ~ ., data = titanic, probability = TRUE,
respect.unordered.factors = "partition")
bst <- lightgbm::lightgbm(X, label = titanic$survived, params = params,
nrounds = 45, verbose = 0)

# Prediction wrapper for computing predicted probability of surviving
pfun <- function(object, newdata) { # prediction wrapper
Expand All @@ -63,14 +50,14 @@ jack.dawson <- data.matrix(jack.dawson)

# Compute feature contributions using MC SHAP using the fastshap package
set.seed(1306) # for reproducibility
ex.fastshap <- explain(bst, X = X, nsim = 1000, pred_wrapper = pfun,
newdata = jack.dawson, adjust = FALSE)
ex.fastshap <- fastshap::explain(bst, X = X, nsim = 1000, pred_wrapper = pfun,
newdata = jack.dawson, adjust = FALSE)

# Compute feature contributions using MC SHAP using the iml package
pred <- Predictor$new(bst, data = as.data.frame(X), predict.fun = pfun)
pred <- iml::Predictor$new(bst, data = as.data.frame(X), predict.fun = pfun)
set.seed(1316) # for reproducibility
ex.iml <- Shapley$new(pred, x.interest = data.frame(jack.dawson),
sample.size = 1000)
ex.iml <- iml::Shapley$new(pred, x.interest = data.frame(jack.dawson),
sample.size = 1000)

# Compare results
res <- cbind(
Expand Down
Loading

0 comments on commit 0f32d30

Please sign in to comment.