From 4272cecf5ed5e379143645c5fcc65c0ff702bed9 Mon Sep 17 00:00:00 2001
From: mayer79 <mayermichael79@gmail.com>
Date: Tue, 28 Dec 2021 16:56:25 +0100
Subject: [PATCH 1/7] added argument eval_train_metric

---
 R-package/R/lgb.cv.R | 16 ++++++++++++----
 1 file changed, 12 insertions(+), 4 deletions(-)

diff --git a/R-package/R/lgb.cv.R b/R-package/R/lgb.cv.R
index e5a638854de9..3caccfca6b2c 100644
--- a/R-package/R/lgb.cv.R
+++ b/R-package/R/lgb.cv.R
@@ -31,6 +31,9 @@ CVBooster <- R6::R6Class(
 #' @param showsd \code{boolean}, whether to show standard deviation of cross validation.
 #'               This parameter defaults to \code{TRUE}. Setting it to \code{FALSE} can lead to a
 #'               slight speedup by avoiding unnecessary computation.
+#' @param eval_train_metric \code{boolean}, whether to add the cross validation results on the 
+#'               training data. This parameter defaults to \code{FALSE}. Setting it to \code{TRUE}
+#'               will increase run time.
 #' @param stratified a \code{boolean} indicating whether sampling of folds should be stratified
 #'                   by the values of outcome labels.
 #' @param folds \code{list} provides a possibility to use a list of pre-defined CV folds
@@ -78,6 +81,7 @@ lgb.cv <- function(params = list()
                    , record = TRUE
                    , eval_freq = 1L
                    , showsd = TRUE
+                   , eval_train_metric = FALSE
                    , stratified = TRUE
                    , folds = NULL
                    , init_model = NULL
@@ -336,6 +340,9 @@ lgb.cv <- function(params = list()
       }
 
       booster <- Booster$new(params = params, train_set = dtrain)
+      if (isTRUE(eval_train_metric)) {
+        booster$add_valid(data = dtrain, name = "train")  
+      }
       booster$add_valid(data = dtest, name = "valid")
       return(
         list(booster = booster)
@@ -372,20 +379,21 @@ lgb.cv <- function(params = list()
       }
       return(out)
     })
-
+    
     # Prepare collection of evaluation results
     merged_msg <- lgb.merge.cv.result(
       msg = msg
       , showsd = showsd
     )
-
+    
     # Write evaluation result in environment
     env$eval_list <- merged_msg$eval_list
-
+    
     # Check for standard deviation requirement
     if (showsd) {
       env$eval_err_list <- merged_msg$eval_err_list
-    }
+    } 
+    
 
     # Loop through env
     for (f in cb$post_iter) {

From 86d90777d79d34f2de9181caec08d6ce45046d0b Mon Sep 17 00:00:00 2001
From: mayer79 <mayermichael79@gmail.com>
Date: Tue, 28 Dec 2021 17:24:35 +0100
Subject: [PATCH 2/7] remove unnecessary whitespace

---
 R-package/R/lgb.cv.R | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

diff --git a/R-package/R/lgb.cv.R b/R-package/R/lgb.cv.R
index 3caccfca6b2c..563c565d36d5 100644
--- a/R-package/R/lgb.cv.R
+++ b/R-package/R/lgb.cv.R
@@ -379,21 +379,20 @@ lgb.cv <- function(params = list()
       }
       return(out)
     })
-    
+
     # Prepare collection of evaluation results
     merged_msg <- lgb.merge.cv.result(
       msg = msg
       , showsd = showsd
     )
-    
+
     # Write evaluation result in environment
     env$eval_list <- merged_msg$eval_list
-    
+
     # Check for standard deviation requirement
     if (showsd) {
       env$eval_err_list <- merged_msg$eval_err_list
-    } 
-    
+    }
 
     # Loop through env
     for (f in cb$post_iter) {

From 1c156639e016dac975b7cad85f098ba0408adff3 Mon Sep 17 00:00:00 2001
From: mayer79 <mayermichael79@gmail.com>
Date: Tue, 28 Dec 2021 17:32:47 +0100
Subject: [PATCH 3/7] removed further trailing whitespace

---
 R-package/R/lgb.cv.R | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/R-package/R/lgb.cv.R b/R-package/R/lgb.cv.R
index 563c565d36d5..6eeae5af36f8 100644
--- a/R-package/R/lgb.cv.R
+++ b/R-package/R/lgb.cv.R
@@ -31,7 +31,7 @@ CVBooster <- R6::R6Class(
 #' @param showsd \code{boolean}, whether to show standard deviation of cross validation.
 #'               This parameter defaults to \code{TRUE}. Setting it to \code{FALSE} can lead to a
 #'               slight speedup by avoiding unnecessary computation.
-#' @param eval_train_metric \code{boolean}, whether to add the cross validation results on the 
+#' @param eval_train_metric \code{boolean}, whether to add the cross validation results on the
 #'               training data. This parameter defaults to \code{FALSE}. Setting it to \code{TRUE}
 #'               will increase run time.
 #' @param stratified a \code{boolean} indicating whether sampling of folds should be stratified
@@ -341,7 +341,7 @@ lgb.cv <- function(params = list()
 
       booster <- Booster$new(params = params, train_set = dtrain)
       if (isTRUE(eval_train_metric)) {
-        booster$add_valid(data = dtrain, name = "train")  
+        booster$add_valid(data = dtrain, name = "train")
       }
       booster$add_valid(data = dtest, name = "valid")
       return(

From 987c51d344a29abfbe651098d602552bffcafe89 Mon Sep 17 00:00:00 2001
From: mayer79 <mayermichael79@gmail.com>
Date: Wed, 29 Dec 2021 10:35:41 +0100
Subject: [PATCH 4/7] move new argument to the last position

---
 R-package/R/lgb.cv.R | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/R-package/R/lgb.cv.R b/R-package/R/lgb.cv.R
index 6eeae5af36f8..025c2e56f10f 100644
--- a/R-package/R/lgb.cv.R
+++ b/R-package/R/lgb.cv.R
@@ -31,9 +31,6 @@ CVBooster <- R6::R6Class(
 #' @param showsd \code{boolean}, whether to show standard deviation of cross validation.
 #'               This parameter defaults to \code{TRUE}. Setting it to \code{FALSE} can lead to a
 #'               slight speedup by avoiding unnecessary computation.
-#' @param eval_train_metric \code{boolean}, whether to add the cross validation results on the
-#'               training data. This parameter defaults to \code{FALSE}. Setting it to \code{TRUE}
-#'               will increase run time.
 #' @param stratified a \code{boolean} indicating whether sampling of folds should be stratified
 #'                   by the values of outcome labels.
 #' @param folds \code{list} provides a possibility to use a list of pre-defined CV folds
@@ -46,6 +43,9 @@ CVBooster <- R6::R6Class(
 #' @param callbacks List of callback functions that are applied at each iteration.
 #' @param reset_data Boolean, setting it to TRUE (not the default value) will transform the booster model
 #'                   into a predictor model which frees up memory and the original datasets
+#' @param eval_train_metric \code{boolean}, whether to add the cross validation results on the
+#'               training data. This parameter defaults to \code{FALSE}. Setting it to \code{TRUE}
+#'               will increase run time.
 #' @inheritSection lgb_shared_params Early Stopping
 #' @return a trained model \code{lgb.CVBooster}.
 #'
@@ -81,7 +81,6 @@ lgb.cv <- function(params = list()
                    , record = TRUE
                    , eval_freq = 1L
                    , showsd = TRUE
-                   , eval_train_metric = FALSE
                    , stratified = TRUE
                    , folds = NULL
                    , init_model = NULL
@@ -91,6 +90,7 @@ lgb.cv <- function(params = list()
                    , callbacks = list()
                    , reset_data = FALSE
                    , serializable = TRUE
+                   , eval_train_metric = FALSE
                    ) {
 
   if (nrounds <= 0L) {

From 2b0524334e8fe178987ce8f2247305e9da1d85da Mon Sep 17 00:00:00 2001
From: mayer79 <mayermichael79@gmail.com>
Date: Wed, 29 Dec 2021 10:55:15 +0100
Subject: [PATCH 5/7] update R docu

---
 R-package/man/lgb.cv.Rd | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/R-package/man/lgb.cv.Rd b/R-package/man/lgb.cv.Rd
index f240a241b7ac..94fb385b0d10 100644
--- a/R-package/man/lgb.cv.Rd
+++ b/R-package/man/lgb.cv.Rd
@@ -25,7 +25,8 @@ lgb.cv(
   early_stopping_rounds = NULL,
   callbacks = list(),
   reset_data = FALSE,
-  serializable = TRUE
+  serializable = TRUE,
+  eval_train_metric = FALSE
 )
 }
 \arguments{
@@ -120,6 +121,10 @@ into a predictor model which frees up memory and the original datasets}
 
 \item{serializable}{whether to make the resulting objects serializable through functions such as
 \code{save} or \code{saveRDS} (see section "Model serialization").}
+
+\item{eval_train_metric}{\code{boolean}, whether to add the cross validation results on the
+training data. This parameter defaults to \code{FALSE}. Setting it to \code{TRUE}
+will increase run time.}
 }
 \value{
 a trained model \code{lgb.CVBooster}.

From 21dc2d776ae42517679474afeb709e4b2c7b6040 Mon Sep 17 00:00:00 2001
From: mayer79 <mayermichael79@gmail.com>
Date: Wed, 29 Dec 2021 11:32:38 +0100
Subject: [PATCH 6/7] unit tests for eval_train_metric

---
 R-package/tests/testthat/test_basic.R | 39 +++++++++++++++++++++++++++
 1 file changed, 39 insertions(+)

diff --git a/R-package/tests/testthat/test_basic.R b/R-package/tests/testthat/test_basic.R
index 3efa9a21777b..e882f894343b 100644
--- a/R-package/tests/testthat/test_basic.R
+++ b/R-package/tests/testthat/test_basic.R
@@ -554,6 +554,45 @@ test_that("lgb.cv() respects showsd argument", {
   expect_identical(evals_no_showsd[["eval_err"]], list())
 })
 
+test_that("lgb.cv() respects eval_train_metric argument", {
+  dtrain <- lgb.Dataset(train$data, label = train$label)
+  params <- list(
+    objective = "regression"
+    , metric = "l2"
+    , min_data = 1L
+  )
+  nrounds <- 5L
+  set.seed(708L)
+  bst_train <- lgb.cv(
+    params = params
+    , data = dtrain
+    , nrounds = nrounds
+    , nfold = 3L
+    , showsd = FALSE
+    , eval_train_metric = TRUE
+  )
+  set.seed(708L)
+  bst_no_train <- lgb.cv(
+    params = params
+    , data = dtrain
+    , nrounds = nrounds
+    , nfold = 3L
+    , showsd = FALSE
+    , eval_train_metric = FALSE
+  )
+  expect_equal(
+    bst_train$record_evals[["valid"]][["l2"]]
+    , bst_no_train$record_evals[["valid"]][["l2"]]
+  )
+  expect_true("train" %in% names(bst_train$record_evals))
+  expect_false("train" %in% names(bst_no_train$record_evals))
+  expect_is(bst_train$record_evals[["train"]][["l2"]][["eval"]], "list")
+  expect_equal(
+    length(bst_train$record_evals[["train"]][["l2"]][["eval"]])
+    , nrounds
+  )
+})
+
 context("lgb.train()")
 
 test_that("lgb.train() works as expected with multiple eval metrics", {

From a82e0631733623477d4d8215d0d1259b69ec1bb6 Mon Sep 17 00:00:00 2001
From: Michael Mayer <mayermichael79@gmail.com>
Date: Wed, 29 Dec 2021 18:32:00 +0100
Subject: [PATCH 7/7] Update R-package/tests/testthat/test_basic.R

Co-authored-by: James Lamb <jaylamb20@gmail.com>
---
 R-package/tests/testthat/test_basic.R | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/R-package/tests/testthat/test_basic.R b/R-package/tests/testthat/test_basic.R
index e882f894343b..a55af2ab8150 100644
--- a/R-package/tests/testthat/test_basic.R
+++ b/R-package/tests/testthat/test_basic.R
@@ -586,7 +586,7 @@ test_that("lgb.cv() respects eval_train_metric argument", {
   )
   expect_true("train" %in% names(bst_train$record_evals))
   expect_false("train" %in% names(bst_no_train$record_evals))
-  expect_is(bst_train$record_evals[["train"]][["l2"]][["eval"]], "list")
+  expect_true(methods::is(bst_train$record_evals[["train"]][["l2"]][["eval"]], "list"))
   expect_equal(
     length(bst_train$record_evals[["train"]][["l2"]][["eval"]])
     , nrounds