From 4272cecf5ed5e379143645c5fcc65c0ff702bed9 Mon Sep 17 00:00:00 2001 From: mayer79 Date: Tue, 28 Dec 2021 16:56:25 +0100 Subject: [PATCH 1/7] added argument eval_train_metric --- R-package/R/lgb.cv.R | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/R-package/R/lgb.cv.R b/R-package/R/lgb.cv.R index e5a638854de9..3caccfca6b2c 100644 --- a/R-package/R/lgb.cv.R +++ b/R-package/R/lgb.cv.R @@ -31,6 +31,9 @@ CVBooster <- R6::R6Class( #' @param showsd \code{boolean}, whether to show standard deviation of cross validation. #' This parameter defaults to \code{TRUE}. Setting it to \code{FALSE} can lead to a #' slight speedup by avoiding unnecessary computation. +#' @param eval_train_metric \code{boolean}, whether to add the cross validation results on the +#' training data. This parameter defaults to \code{FALSE}. Setting it to \code{TRUE} +#' will increase run time. #' @param stratified a \code{boolean} indicating whether sampling of folds should be stratified #' by the values of outcome labels. #' @param folds \code{list} provides a possibility to use a list of pre-defined CV folds @@ -78,6 +81,7 @@ lgb.cv <- function(params = list() , record = TRUE , eval_freq = 1L , showsd = TRUE + , eval_train_metric = FALSE , stratified = TRUE , folds = NULL , init_model = NULL @@ -336,6 +340,9 @@ lgb.cv <- function(params = list() } booster <- Booster$new(params = params, train_set = dtrain) + if (isTRUE(eval_train_metric)) { + booster$add_valid(data = dtrain, name = "train") + } booster$add_valid(data = dtest, name = "valid") return( list(booster = booster) @@ -372,20 +379,21 @@ lgb.cv <- function(params = list() } return(out) }) - + # Prepare collection of evaluation results merged_msg <- lgb.merge.cv.result( msg = msg , showsd = showsd ) - + # Write evaluation result in environment env$eval_list <- merged_msg$eval_list - + # Check for standard deviation requirement if (showsd) { env$eval_err_list <- merged_msg$eval_err_list - } + } + # Loop through env for (f in cb$post_iter) { From 86d90777d79d34f2de9181caec08d6ce45046d0b Mon Sep 17 00:00:00 2001 From: mayer79 Date: Tue, 28 Dec 2021 17:24:35 +0100 Subject: [PATCH 2/7] remove unnecessary whitespace --- R-package/R/lgb.cv.R | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/R-package/R/lgb.cv.R b/R-package/R/lgb.cv.R index 3caccfca6b2c..563c565d36d5 100644 --- a/R-package/R/lgb.cv.R +++ b/R-package/R/lgb.cv.R @@ -379,21 +379,20 @@ lgb.cv <- function(params = list() } return(out) }) - + # Prepare collection of evaluation results merged_msg <- lgb.merge.cv.result( msg = msg , showsd = showsd ) - + # Write evaluation result in environment env$eval_list <- merged_msg$eval_list - + # Check for standard deviation requirement if (showsd) { env$eval_err_list <- merged_msg$eval_err_list - } - + } # Loop through env for (f in cb$post_iter) { From 1c156639e016dac975b7cad85f098ba0408adff3 Mon Sep 17 00:00:00 2001 From: mayer79 Date: Tue, 28 Dec 2021 17:32:47 +0100 Subject: [PATCH 3/7] removed further trailing whitespace --- R-package/R/lgb.cv.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/R-package/R/lgb.cv.R b/R-package/R/lgb.cv.R index 563c565d36d5..6eeae5af36f8 100644 --- a/R-package/R/lgb.cv.R +++ b/R-package/R/lgb.cv.R @@ -31,7 +31,7 @@ CVBooster <- R6::R6Class( #' @param showsd \code{boolean}, whether to show standard deviation of cross validation. #' This parameter defaults to \code{TRUE}. Setting it to \code{FALSE} can lead to a #' slight speedup by avoiding unnecessary computation. -#' @param eval_train_metric \code{boolean}, whether to add the cross validation results on the +#' @param eval_train_metric \code{boolean}, whether to add the cross validation results on the #' training data. This parameter defaults to \code{FALSE}. Setting it to \code{TRUE} #' will increase run time. #' @param stratified a \code{boolean} indicating whether sampling of folds should be stratified @@ -341,7 +341,7 @@ lgb.cv <- function(params = list() booster <- Booster$new(params = params, train_set = dtrain) if (isTRUE(eval_train_metric)) { - booster$add_valid(data = dtrain, name = "train") + booster$add_valid(data = dtrain, name = "train") } booster$add_valid(data = dtest, name = "valid") return( From 987c51d344a29abfbe651098d602552bffcafe89 Mon Sep 17 00:00:00 2001 From: mayer79 Date: Wed, 29 Dec 2021 10:35:41 +0100 Subject: [PATCH 4/7] move new argument to the last position --- R-package/R/lgb.cv.R | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/R-package/R/lgb.cv.R b/R-package/R/lgb.cv.R index 6eeae5af36f8..025c2e56f10f 100644 --- a/R-package/R/lgb.cv.R +++ b/R-package/R/lgb.cv.R @@ -31,9 +31,6 @@ CVBooster <- R6::R6Class( #' @param showsd \code{boolean}, whether to show standard deviation of cross validation. #' This parameter defaults to \code{TRUE}. Setting it to \code{FALSE} can lead to a #' slight speedup by avoiding unnecessary computation. -#' @param eval_train_metric \code{boolean}, whether to add the cross validation results on the -#' training data. This parameter defaults to \code{FALSE}. Setting it to \code{TRUE} -#' will increase run time. #' @param stratified a \code{boolean} indicating whether sampling of folds should be stratified #' by the values of outcome labels. #' @param folds \code{list} provides a possibility to use a list of pre-defined CV folds @@ -46,6 +43,9 @@ CVBooster <- R6::R6Class( #' @param callbacks List of callback functions that are applied at each iteration. #' @param reset_data Boolean, setting it to TRUE (not the default value) will transform the booster model #' into a predictor model which frees up memory and the original datasets +#' @param eval_train_metric \code{boolean}, whether to add the cross validation results on the +#' training data. This parameter defaults to \code{FALSE}. Setting it to \code{TRUE} +#' will increase run time. #' @inheritSection lgb_shared_params Early Stopping #' @return a trained model \code{lgb.CVBooster}. #' @@ -81,7 +81,6 @@ lgb.cv <- function(params = list() , record = TRUE , eval_freq = 1L , showsd = TRUE - , eval_train_metric = FALSE , stratified = TRUE , folds = NULL , init_model = NULL @@ -91,6 +90,7 @@ lgb.cv <- function(params = list() , callbacks = list() , reset_data = FALSE , serializable = TRUE + , eval_train_metric = FALSE ) { if (nrounds <= 0L) { From 2b0524334e8fe178987ce8f2247305e9da1d85da Mon Sep 17 00:00:00 2001 From: mayer79 Date: Wed, 29 Dec 2021 10:55:15 +0100 Subject: [PATCH 5/7] update R docu --- R-package/man/lgb.cv.Rd | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/R-package/man/lgb.cv.Rd b/R-package/man/lgb.cv.Rd index f240a241b7ac..94fb385b0d10 100644 --- a/R-package/man/lgb.cv.Rd +++ b/R-package/man/lgb.cv.Rd @@ -25,7 +25,8 @@ lgb.cv( early_stopping_rounds = NULL, callbacks = list(), reset_data = FALSE, - serializable = TRUE + serializable = TRUE, + eval_train_metric = FALSE ) } \arguments{ @@ -120,6 +121,10 @@ into a predictor model which frees up memory and the original datasets} \item{serializable}{whether to make the resulting objects serializable through functions such as \code{save} or \code{saveRDS} (see section "Model serialization").} + +\item{eval_train_metric}{\code{boolean}, whether to add the cross validation results on the +training data. This parameter defaults to \code{FALSE}. Setting it to \code{TRUE} +will increase run time.} } \value{ a trained model \code{lgb.CVBooster}. From 21dc2d776ae42517679474afeb709e4b2c7b6040 Mon Sep 17 00:00:00 2001 From: mayer79 Date: Wed, 29 Dec 2021 11:32:38 +0100 Subject: [PATCH 6/7] unit tests for eval_train_metric --- R-package/tests/testthat/test_basic.R | 39 +++++++++++++++++++++++++++ 1 file changed, 39 insertions(+) diff --git a/R-package/tests/testthat/test_basic.R b/R-package/tests/testthat/test_basic.R index 3efa9a21777b..e882f894343b 100644 --- a/R-package/tests/testthat/test_basic.R +++ b/R-package/tests/testthat/test_basic.R @@ -554,6 +554,45 @@ test_that("lgb.cv() respects showsd argument", { expect_identical(evals_no_showsd[["eval_err"]], list()) }) +test_that("lgb.cv() respects eval_train_metric argument", { + dtrain <- lgb.Dataset(train$data, label = train$label) + params <- list( + objective = "regression" + , metric = "l2" + , min_data = 1L + ) + nrounds <- 5L + set.seed(708L) + bst_train <- lgb.cv( + params = params + , data = dtrain + , nrounds = nrounds + , nfold = 3L + , showsd = FALSE + , eval_train_metric = TRUE + ) + set.seed(708L) + bst_no_train <- lgb.cv( + params = params + , data = dtrain + , nrounds = nrounds + , nfold = 3L + , showsd = FALSE + , eval_train_metric = FALSE + ) + expect_equal( + bst_train$record_evals[["valid"]][["l2"]] + , bst_no_train$record_evals[["valid"]][["l2"]] + ) + expect_true("train" %in% names(bst_train$record_evals)) + expect_false("train" %in% names(bst_no_train$record_evals)) + expect_is(bst_train$record_evals[["train"]][["l2"]][["eval"]], "list") + expect_equal( + length(bst_train$record_evals[["train"]][["l2"]][["eval"]]) + , nrounds + ) +}) + context("lgb.train()") test_that("lgb.train() works as expected with multiple eval metrics", { From a82e0631733623477d4d8215d0d1259b69ec1bb6 Mon Sep 17 00:00:00 2001 From: Michael Mayer Date: Wed, 29 Dec 2021 18:32:00 +0100 Subject: [PATCH 7/7] Update R-package/tests/testthat/test_basic.R Co-authored-by: James Lamb --- R-package/tests/testthat/test_basic.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R-package/tests/testthat/test_basic.R b/R-package/tests/testthat/test_basic.R index e882f894343b..a55af2ab8150 100644 --- a/R-package/tests/testthat/test_basic.R +++ b/R-package/tests/testthat/test_basic.R @@ -586,7 +586,7 @@ test_that("lgb.cv() respects eval_train_metric argument", { ) expect_true("train" %in% names(bst_train$record_evals)) expect_false("train" %in% names(bst_no_train$record_evals)) - expect_is(bst_train$record_evals[["train"]][["l2"]][["eval"]], "list") + expect_true(methods::is(bst_train$record_evals[["train"]][["l2"]][["eval"]], "list")) expect_equal( length(bst_train$record_evals[["train"]][["l2"]][["eval"]]) , nrounds