From 11bd7e7fb6baf5d09ae1711f9f6348e629238da8 Mon Sep 17 00:00:00 2001 From: James Lamb Date: Thu, 27 Aug 2020 10:54:55 -0500 Subject: [PATCH 01/37] [R-package] update DESCRIPTION per CRAN comments --- R-package/DESCRIPTION | 7 ++++-- R-package/LICENSE | 23 ++--------------- R-package/cran-comments.md | 51 ++++++++++++++++++++++++++++++++++++++ 3 files changed, 58 insertions(+), 23 deletions(-) diff --git a/R-package/DESCRIPTION b/R-package/DESCRIPTION index d0c5a429198c..0528ae6c0d9c 100755 --- a/R-package/DESCRIPTION +++ b/R-package/DESCRIPTION @@ -9,7 +9,8 @@ Authors@R: c( person("Yachen", "Yan", role = c("ctb")), person("James", "Lamb", email="jaylamb20@gmail.com", role = c("aut")) ) -Description: Tree based algorithms can be improved by introducing boosting frameworks. 'LightGBM' is one such framework, and this package offers an R interface to work with it. +Description: Tree based algorithms can be improved by introducing boosting frameworks. 'LightGBM' is one such framework, based on Ke, Guolin et al. (2017). See the paper "LightGBM: a highly efficient gradient boosting decision tree", linked below. + This package offers an R interface to work with it. It is designed to be distributed and efficient with the following advantages: 1. Faster training speed and higher efficiency. 2. Lower memory usage. @@ -20,7 +21,9 @@ Description: Tree based algorithms can be improved by introducing boosting frame Comparison experiments on public datasets suggest that 'LightGBM' can outperform existing boosting frameworks on both efficiency and accuracy, with significantly lower memory consumption. In addition, parallel experiments suggest that in certain circumstances, 'LightGBM' can achieve a linear speed-up in training time by using multiple machines. Encoding: UTF-8 License: MIT + file LICENSE -URL: https://github.com/Microsoft/LightGBM +URL: + https://github.com/Microsoft/LightGBM + http://papers.nips.cc/paper/6907-lightgbm-a-highly-efficient-gradient-boosting-decision BugReports: https://github.com/Microsoft/LightGBM/issues NeedsCompilation: yes Biarch: true diff --git a/R-package/LICENSE b/R-package/LICENSE index 5ae193c94d0c..f8c67ce8d1d2 100644 --- a/R-package/LICENSE +++ b/R-package/LICENSE @@ -1,21 +1,2 @@ -The MIT License (MIT) - -Copyright (c) Microsoft Corporation - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in all -copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -SOFTWARE. +YEAR: 2016 +COPYRIGHT HOLDER: Microsoft Corporation \ No newline at end of file diff --git a/R-package/cran-comments.md b/R-package/cran-comments.md index a405da6a792b..ba68fa7ef34b 100644 --- a/R-package/cran-comments.md +++ b/R-package/cran-comments.md @@ -1,5 +1,56 @@ # CRAN Submission History +## v3.0.0 - Submission 2 - (August 28, 2020) + +## v3.0.0 - Submission 1 - (August 24, 2020) + +NOTE: 3.0.0-1 was never released to CRAN. CRAN was on vacation August 14-24, 2020, and in that time version 3.0.0-1 (a release candidate) because 3.0.0. + +### CRAN respoonse + +> Please only ship the CRAN template fior the MIT license. + +> Is there some reference about the method you can add in the Description field in the form Authors (year) doi:.....? + +> Please fix and resubmit. + +### `R CMD check` results + +* Debian: 1 NOTE + + ```text + * checking CRAN incoming feasibility ... NOTE + Maintainer: ‘Guolin Ke ’ + + New submission + + License components with restrictions and base license permitting such: + MIT + file LICENSE + ``` + +* Windows: 1 NOTE + + ```text + * checking CRAN incoming feasibility ... NOTE + Maintainer: 'Guolin Ke ' + + New submission + + License components with restrictions and base license permitting such: + MIT + file LICENSE + ``` + +### Maintainer Notes + +Tried updating `LICENSE` file to this template: + +```yaml +YEAR: 2016 +COPYRIGHT HOLDER: Microsoft Corporation +``` + +Added a citation and link for [the main paper](http://papers.nips.cc/paper/6907-lightgbm-a-highly-efficient-gradient-boosting-decision) in `DESCRIPTION`. + ## v3.0.0-1 - Submission 3 - (August 12, 2020) ### CRAN response From aa1ad24219b289762a2fef385fc3b5aa722afc5c Mon Sep 17 00:00:00 2001 From: James Lamb Date: Thu, 27 Aug 2020 10:58:36 -0500 Subject: [PATCH 02/37] newlines --- R-package/DESCRIPTION | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/R-package/DESCRIPTION b/R-package/DESCRIPTION index 0528ae6c0d9c..46d80c70af99 100755 --- a/R-package/DESCRIPTION +++ b/R-package/DESCRIPTION @@ -9,7 +9,9 @@ Authors@R: c( person("Yachen", "Yan", role = c("ctb")), person("James", "Lamb", email="jaylamb20@gmail.com", role = c("aut")) ) -Description: Tree based algorithms can be improved by introducing boosting frameworks. 'LightGBM' is one such framework, based on Ke, Guolin et al. (2017). See the paper "LightGBM: a highly efficient gradient boosting decision tree", linked below. +Description: Tree based algorithms can be improved by introducing boosting frameworks. + 'LightGBM' is one such framework, based on Ke, Guolin et al. (2017). + See the paper "LightGBM: a highly efficient gradient boosting decision tree", linked below. This package offers an R interface to work with it. It is designed to be distributed and efficient with the following advantages: 1. Faster training speed and higher efficiency. @@ -22,7 +24,7 @@ Description: Tree based algorithms can be improved by introducing boosting frame Encoding: UTF-8 License: MIT + file LICENSE URL: - https://github.com/Microsoft/LightGBM + https://github.com/Microsoft/LightGBM, http://papers.nips.cc/paper/6907-lightgbm-a-highly-efficient-gradient-boosting-decision BugReports: https://github.com/Microsoft/LightGBM/issues NeedsCompilation: yes From c41a9471039edf17bfadd1a8e3d98f2bd841d3a2 Mon Sep 17 00:00:00 2001 From: James Lamb Date: Thu, 27 Aug 2020 22:34:23 +0100 Subject: [PATCH 03/37] Apply suggestions from code review Co-authored-by: Nikita Titov --- R-package/DESCRIPTION | 2 +- R-package/cran-comments.md | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/R-package/DESCRIPTION b/R-package/DESCRIPTION index 46d80c70af99..203e5d5e4339 100755 --- a/R-package/DESCRIPTION +++ b/R-package/DESCRIPTION @@ -25,7 +25,7 @@ Encoding: UTF-8 License: MIT + file LICENSE URL: https://github.com/Microsoft/LightGBM, - http://papers.nips.cc/paper/6907-lightgbm-a-highly-efficient-gradient-boosting-decision + https://papers.nips.cc/paper/6907-lightgbm-a-highly-efficient-gradient-boosting-decision BugReports: https://github.com/Microsoft/LightGBM/issues NeedsCompilation: yes Biarch: true diff --git a/R-package/cran-comments.md b/R-package/cran-comments.md index ba68fa7ef34b..3996acbddd6b 100644 --- a/R-package/cran-comments.md +++ b/R-package/cran-comments.md @@ -4,7 +4,7 @@ ## v3.0.0 - Submission 1 - (August 24, 2020) -NOTE: 3.0.0-1 was never released to CRAN. CRAN was on vacation August 14-24, 2020, and in that time version 3.0.0-1 (a release candidate) because 3.0.0. +NOTE: 3.0.0-1 was never released to CRAN. CRAN was on vacation August 14-24, 2020, and in that time version 3.0.0-1 (a release candidate) became 3.0.0. ### CRAN respoonse @@ -49,7 +49,7 @@ YEAR: 2016 COPYRIGHT HOLDER: Microsoft Corporation ``` -Added a citation and link for [the main paper](http://papers.nips.cc/paper/6907-lightgbm-a-highly-efficient-gradient-boosting-decision) in `DESCRIPTION`. +Added a citation and link for [the main paper](https://papers.nips.cc/paper/6907-lightgbm-a-highly-efficient-gradient-boosting-decision) in `DESCRIPTION`. ## v3.0.0-1 - Submission 3 - (August 12, 2020) From e80cb543982eb486d8838472657ad06cd3bd24ad Mon Sep 17 00:00:00 2001 From: James Lamb Date: Fri, 28 Aug 2020 09:31:04 -0500 Subject: [PATCH 04/37] more fixes --- .ci/test_r_package.sh | 4 +++ .ci/test_r_package_windows.ps1 | 4 +++ R-package/cran-comments.md | 54 +++++++++++++++++++++++++++++++++- build-cran-package.sh | 1 + 4 files changed, 62 insertions(+), 1 deletion(-) diff --git a/.ci/test_r_package.sh b/.ci/test_r_package.sh index 0d65a5dcec8a..2f429df85ea4 100755 --- a/.ci/test_r_package.sh +++ b/.ci/test_r_package.sh @@ -7,6 +7,10 @@ mkdir -p $R_LIB_PATH export R_LIBS=$R_LIB_PATH export PATH="$R_LIB_PATH/R/bin:$PATH" +# hack to get around this: +# https://stat.ethz.ch/pipermail/r-package-devel/2020q3/005930.html +export _R_CHECK_SYSTEM_CLOCK_=0 + # Get details needed for installing R components # # NOTES: diff --git a/.ci/test_r_package_windows.ps1 b/.ci/test_r_package_windows.ps1 index 04ed1b062c07..6b97b4b88fd8 100644 --- a/.ci/test_r_package_windows.ps1 +++ b/.ci/test_r_package_windows.ps1 @@ -74,6 +74,10 @@ $env:CTAN_MIRROR = "https://ctan.math.illinois.edu/systems/win32/miktex" $env:CTAN_MIKTEX_ARCHIVE = "$env:CTAN_MIRROR/setup/windows-x64/" $env:CTAN_PACKAGE_ARCHIVE = "$env:CTAN_MIRROR/tm/packages/" +# hack to get around this: +# https://stat.ethz.ch/pipermail/r-package-devel/2020q3/005930.html +$env:_R_CHECK_SYSTEM_CLOCK_ = 0 + if (($env:COMPILER -eq "MINGW") -and ($env:R_BUILD_TYPE -eq "cmake")) { $env:CXX = "$env:RTOOLS_MINGW_BIN/g++.exe" $env:CC = "$env:RTOOLS_MINGW_BIN/gcc.exe" diff --git a/R-package/cran-comments.md b/R-package/cran-comments.md index 3996acbddd6b..62bbfba67908 100644 --- a/R-package/cran-comments.md +++ b/R-package/cran-comments.md @@ -2,13 +2,65 @@ ## v3.0.0 - Submission 2 - (August 28, 2020) +### CRAN response + +Failing pre-checks. + +### `R CMD check` results + +* Debian: 2 NOTEs + + ```text + * checking CRAN incoming feasibility ... NOTE + Maintainer: 'Guolin Ke ' + + New submission + + Possibly mis-spelled words in DESCRIPTION: + Guolin (13:52) + Ke (13:48) + LightGBM (14:20) + al (13:62) + et (13:59) + + * checking top-level files ... NOTE + Non-standard files/directories found at top level: + 'docs' 'lightgbm-hex-logo.png' 'lightgbm-hex-logo.svg' + ``` + +* Windows: 2 NOTEs + + ```text + * checking CRAN incoming feasibility ... NOTE + Maintainer: 'Guolin Ke ' + + New submission + + Possibly mis-spelled words in DESCRIPTION: + Guolin (13:52) + Ke (13:48) + LightGBM (14:20) + al (13:62) + et (13:59) + + * checking top-level files ... NOTE + Non-standard files/directories found at top level: + 'docs' 'lightgbm-hex-logo.png' 'lightgbm-hex-logo.svg' + ``` + +### Maintainer Notes + +We should tell them the misspellings note is a false positive. + +For the note about included files, that is my fault. I had extra files laying around when I generated the package. I'm surprised to see `docs/` in that list, since it is ignored in `.Rbuildignore`. I even tested that with [the exact code Rbuildignore uses](https://github.com/wch/r-source/blob/9d13622f41cfa0f36db2595bd6a5bf93e2010e21/src/library/tools/R/build.R#L85). For now, I added `rm -r docs/` to `build-cran-package.sh`. We can figure out what is happening with `.Rbuildignore` in the future, but it shouldn't block a release. + ## v3.0.0 - Submission 1 - (August 24, 2020) NOTE: 3.0.0-1 was never released to CRAN. CRAN was on vacation August 14-24, 2020, and in that time version 3.0.0-1 (a release candidate) became 3.0.0. ### CRAN respoonse -> Please only ship the CRAN template fior the MIT license. +> Please only ship the CRAN template for the MIT license. > Is there some reference about the method you can add in the Description field in the form Authors (year) doi:.....? diff --git a/build-cran-package.sh b/build-cran-package.sh index eb48bde1e563..d0a025239035 100755 --- a/build-cran-package.sh +++ b/build-cran-package.sh @@ -33,6 +33,7 @@ cd ${TEMP_R_DIR} # Remove files not needed for CRAN echo "Removing files not needed for CRAN" rm src/install.libs.R + rm -r docs/ rm -r src/cmake/ rm -r inst/ rm -r pkgdown/ From 400c35bd066a37e28b1e3b91e83c265b6064f6e2 Mon Sep 17 00:00:00 2001 From: James Lamb Date: Sat, 29 Aug 2020 00:01:56 -0500 Subject: [PATCH 05/37] update Rbuildignore --- R-package/.Rbuildignore | 2 +- build-cran-package.sh | 1 - 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/R-package/.Rbuildignore b/R-package/.Rbuildignore index 7eb89dac330b..5437db0dd4dc 100644 --- a/R-package/.Rbuildignore +++ b/R-package/.Rbuildignore @@ -3,7 +3,7 @@ AUTOCONF_UBUNTU_VERSION ^.*\.bin ^build_r.R$ ^cran-comments\.md$ -^docs/.*$ +^docs$ ^.*\.dll \.gitkeep$ ^Makefile$ diff --git a/build-cran-package.sh b/build-cran-package.sh index d0a025239035..eb48bde1e563 100755 --- a/build-cran-package.sh +++ b/build-cran-package.sh @@ -33,7 +33,6 @@ cd ${TEMP_R_DIR} # Remove files not needed for CRAN echo "Removing files not needed for CRAN" rm src/install.libs.R - rm -r docs/ rm -r src/cmake/ rm -r inst/ rm -r pkgdown/ From 541d2d96146de2ab14883acc37b81f4990bd15da Mon Sep 17 00:00:00 2001 From: James Lamb Date: Fri, 4 Sep 2020 10:17:07 -0500 Subject: [PATCH 06/37] more changes --- R-package/DESCRIPTION | 18 ++++++++++++------ R-package/configure | 18 +++++++++--------- R-package/cran-comments.md | 38 ++++++++++++++++++++++++++++++++++++++ VERSION.txt | 2 +- 4 files changed, 60 insertions(+), 16 deletions(-) diff --git a/R-package/DESCRIPTION b/R-package/DESCRIPTION index 203e5d5e4339..d68c42a09150 100755 --- a/R-package/DESCRIPTION +++ b/R-package/DESCRIPTION @@ -7,11 +7,19 @@ Authors@R: c( person("Guolin", "Ke", email = "guolin.ke@microsoft.com", role = c("aut", "cre")), person("Damien", "Soukhavong", email = "damien.soukhavong@skema.edu", role = c("aut")), person("Yachen", "Yan", role = c("ctb")), - person("James", "Lamb", email="jaylamb20@gmail.com", role = c("aut")) + person("James", "Lamb", email="jaylamb20@gmail.com", role = c("aut")), + person("Qi", "Meng", role = c("aut")), + person("Thomas", "Finley", role = c("aut")), + person("Taifeng", "Wang", role = c("aut")), + person("Wei", "Chen", role = c("aut")), + person("Weidong", "Ma", role = c("aut")), + person("Qiwei", "Ye", role = c("aut")), + person("Tie-Yan", "Liu", role = c("aut")), + person("Microsoft Corporation", role = c("cph")), + person("Dropbox, Inc.", role = c("cph")) ) Description: Tree based algorithms can be improved by introducing boosting frameworks. - 'LightGBM' is one such framework, based on Ke, Guolin et al. (2017). - See the paper "LightGBM: a highly efficient gradient boosting decision tree", linked below. + 'LightGBM' is one such framework, based on Ke, Guolin et al. (2017) . This package offers an R interface to work with it. It is designed to be distributed and efficient with the following advantages: 1. Faster training speed and higher efficiency. @@ -23,9 +31,7 @@ Description: Tree based algorithms can be improved by introducing boosting frame Comparison experiments on public datasets suggest that 'LightGBM' can outperform existing boosting frameworks on both efficiency and accuracy, with significantly lower memory consumption. In addition, parallel experiments suggest that in certain circumstances, 'LightGBM' can achieve a linear speed-up in training time by using multiple machines. Encoding: UTF-8 License: MIT + file LICENSE -URL: - https://github.com/Microsoft/LightGBM, - https://papers.nips.cc/paper/6907-lightgbm-a-highly-efficient-gradient-boosting-decision +URL: https://github.com/Microsoft/LightGBM BugReports: https://github.com/Microsoft/LightGBM/issues NeedsCompilation: yes Biarch: true diff --git a/R-package/configure b/R-package/configure index 02fcfa028b0d..c5ed06752af3 100755 --- a/R-package/configure +++ b/R-package/configure @@ -1,6 +1,6 @@ #! /bin/sh # Guess values for system-dependent variables and create Makefiles. -# Generated by GNU Autoconf 2.69 for lightgbm 3.0.0.99. +# Generated by GNU Autoconf 2.69 for lightgbm 3.0.0. # # # Copyright (C) 1992-1996, 1998-2012 Free Software Foundation, Inc. @@ -576,8 +576,8 @@ MAKEFLAGS= # Identity of this package. PACKAGE_NAME='lightgbm' PACKAGE_TARNAME='lightgbm' -PACKAGE_VERSION='3.0.0.99' -PACKAGE_STRING='lightgbm 3.0.0.99' +PACKAGE_VERSION='3.0.0' +PACKAGE_STRING='lightgbm 3.0.0' PACKAGE_BUGREPORT='' PACKAGE_URL='' @@ -1182,7 +1182,7 @@ if test "$ac_init_help" = "long"; then # Omit some internal or obsolete options to make the list less imposing. # This message is too long to be a string in the A/UX 3.1 sh. cat <<_ACEOF -\`configure' configures lightgbm 3.0.0.99 to adapt to many kinds of systems. +\`configure' configures lightgbm 3.0.0 to adapt to many kinds of systems. Usage: $0 [OPTION]... [VAR=VALUE]... @@ -1244,7 +1244,7 @@ fi if test -n "$ac_init_help"; then case $ac_init_help in - short | recursive ) echo "Configuration of lightgbm 3.0.0.99:";; + short | recursive ) echo "Configuration of lightgbm 3.0.0:";; esac cat <<\_ACEOF @@ -1311,7 +1311,7 @@ fi test -n "$ac_init_help" && exit $ac_status if $ac_init_version; then cat <<\_ACEOF -lightgbm configure 3.0.0.99 +lightgbm configure 3.0.0 generated by GNU Autoconf 2.69 Copyright (C) 2012 Free Software Foundation, Inc. @@ -1328,7 +1328,7 @@ cat >config.log <<_ACEOF This file contains any messages produced by compilers while running configure, to aid debugging if configure makes a mistake. -It was created by lightgbm $as_me 3.0.0.99, which was +It was created by lightgbm $as_me 3.0.0, which was generated by GNU Autoconf 2.69. Invocation command line was $ $0 $@ @@ -2377,7 +2377,7 @@ cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1 # report actual input values of CONFIG_FILES etc. instead of their # values after options handling. ac_log=" -This file was extended by lightgbm $as_me 3.0.0.99, which was +This file was extended by lightgbm $as_me 3.0.0, which was generated by GNU Autoconf 2.69. Invocation command line was CONFIG_FILES = $CONFIG_FILES @@ -2430,7 +2430,7 @@ _ACEOF cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1 ac_cs_config="`$as_echo "$ac_configure_args" | sed 's/^ //; s/[\\""\`\$]/\\\\&/g'`" ac_cs_version="\\ -lightgbm config.status 3.0.0.99 +lightgbm config.status 3.0.0 configured by $0, generated by GNU Autoconf 2.69, with options \\"\$ac_cs_config\\" diff --git a/R-package/cran-comments.md b/R-package/cran-comments.md index 62bbfba67908..6931b05d7539 100644 --- a/R-package/cran-comments.md +++ b/R-package/cran-comments.md @@ -1,5 +1,43 @@ # CRAN Submission History +## v3.0.0 - Submission 3 - (August 29, 2020) + +### CRAN response + +* Please write references in the description of the DESCRIPTION file in +the form + - authors (year) doi:... + - authors (year) arXiv:... + - authors (year, ISBN:...) +* if those are not available: authors (year) https:... with no space after 'doi:', 'arXiv:', 'https:' and angle brackets for auto-linking. +* (If you want to add a title as well please put it in quotes: "Title") + +* \dontrun{} should only be used if the example really cannot be executed (e.g. because of missing additional software, missing API keys, ...) by the user. That's why wrapping examples in \dontrun{} adds the comment ("# Not run:") as a warning for the user. Does not seem necessary. Please unwrap the examples if they are executable in < 5 sec, or replace +\dontrun{} with \donttest{}. + +* Please do not modify the global environment (e.g. by using <<-) in your +functions. This is not allowed by the CRAN policies. + +* Please always add all authors, contributors and copyright holders in the Authors@R field with the appropriate roles. From CRAN policies you agreed to: "The ownership of copyright and intellectual property rights of all components of the package must be clear and unambiguous (including from the authors specification in the DESCRIPTION file). Where code is copied (or derived) from the work of others (including from R itself), care must be taken that any copyright/license statements are preserved and authorship is not misrepresented." e.g.: Microsoft Corporation, Dropbox Inc. Please explain in the submission comments what you did about this issue. + +Please fix and resubmit + +### Maintainer Notes + +responded to CRAN with the following: + +The paper citation has been adjusted as requested. We were using 'glmnet' as a guide on how to include the URL but maybe they are no longer in compliance with CRAN policies: https://github.com/cran/glmnet/blob/b1a4b50de01e0cd24343959d7cf86452bac17b26/DESCRIPTION + +All authors from the original LightGBM paper have been added to Authors@R as `"aut"`. We have also added Microsoft and DropBox, Inc. as `"cph"` (copyright holders). These roles were chosen based on the guidance in https://journal.r-project.org/archive/2012-1/RJournal_2012-1_Hornik~et~al.pdf. + +lightgbm's code does use `<<-`, but it does not modify the global environment. The uses of `<<-` in R/lgb.interprete.R and R/callback.R are in functions which are called in an environment created by the lightgbm functions that call them, and this operator is used to reach one level up into the calling function's environment. + +We chose to wrap our examples in `\dontrun{}` because we found, through testing on https://builder.r-hub.io/ and in our own continuous integration environments, that their run time varies a lot between platforms, and we cannot guarantee that all examples will run in under 5 seconds. We intentionally chose `\dontrun{}` over `\donttest{}` because this item in the R 4.0.0 changelog (https://cran.r-project.org/doc/manuals/r-devel/NEWS.html) seems to indicate that \donttest will be ignored by CRAN's automated checks: + +> "`R CMD check --as-cran` now runs \donttest examples (which are run by example()) instead of instructing the tester to do so. This can be temporarily circumvented during development by setting environment variable `_R_CHECK_DONTTEST_EXAMPLES_` to a false value." + +We run all examples with `R CMD check --as-cran --run-dontrun` in our continuous integration tests on every commit to the package, so we have high confidence that they are working correctly. + ## v3.0.0 - Submission 2 - (August 28, 2020) ### CRAN response diff --git a/VERSION.txt b/VERSION.txt index 96533ec0ba84..4a36342fcab7 100644 --- a/VERSION.txt +++ b/VERSION.txt @@ -1 +1 @@ -3.0.0.99 +3.0.0 From d16bafc28492404385d05ad0eb759dd8a3345dd6 Mon Sep 17 00:00:00 2001 From: James Lamb Date: Fri, 11 Sep 2020 09:53:15 -0500 Subject: [PATCH 07/37] more changes per CRAN response --- .ci/lint_r_code.R | 5 ++- R-package/R/callback.R | 15 ++++--- R-package/R/lgb.Booster.R | 12 +++--- R-package/R/lgb.Dataset.R | 36 +++++++++------- R-package/R/lgb.convert_with_rules.R | 2 +- R-package/R/lgb.cv.R | 2 +- R-package/R/lgb.importance.R | 2 +- R-package/R/lgb.interprete.R | 2 +- R-package/R/lgb.model.dt.tree.R | 2 +- R-package/R/lgb.plot.importance.R | 2 +- R-package/R/lgb.plot.interpretation.R | 2 +- R-package/R/lgb.train.R | 2 +- R-package/R/lgb.unloader.R | 2 +- R-package/R/lightgbm.R | 1 + R-package/R/readRDS.lgb.Booster.R | 4 +- R-package/R/removed.R | 8 +++- R-package/R/saveRDS.lgb.Booster.R | 2 +- R-package/cran-comments.md | 44 ++++++++++++++++++-- R-package/man/dim.Rd | 2 +- R-package/man/dimnames.lgb.Dataset.Rd | 7 +++- R-package/man/getinfo.Rd | 4 +- R-package/man/lgb.Dataset.Rd | 2 +- R-package/man/lgb.Dataset.construct.Rd | 5 ++- R-package/man/lgb.Dataset.create.valid.Rd | 2 +- R-package/man/lgb.Dataset.save.Rd | 4 +- R-package/man/lgb.Dataset.set.categorical.Rd | 4 +- R-package/man/lgb.Dataset.set.reference.Rd | 4 +- R-package/man/lgb.convert_with_rules.Rd | 2 +- R-package/man/lgb.cv.Rd | 2 +- R-package/man/lgb.dump.Rd | 2 +- R-package/man/lgb.get.eval.result.Rd | 4 +- R-package/man/lgb.importance.Rd | 2 +- R-package/man/lgb.interprete.Rd | 2 +- R-package/man/lgb.load.Rd | 2 +- R-package/man/lgb.model.dt.tree.Rd | 2 +- R-package/man/lgb.plot.importance.Rd | 2 +- R-package/man/lgb.plot.interpretation.Rd | 2 +- R-package/man/lgb.prepare.Rd | 3 ++ R-package/man/lgb.prepare2.Rd | 3 ++ R-package/man/lgb.prepare_rules.Rd | 5 ++- R-package/man/lgb.prepare_rules2.Rd | 5 ++- R-package/man/lgb.save.Rd | 2 +- R-package/man/lgb.train.Rd | 2 +- R-package/man/lgb.unloader.Rd | 2 +- R-package/man/lightgbm.Rd | 3 ++ R-package/man/predict.lgb.Booster.Rd | 2 +- R-package/man/readRDS.lgb.Booster.Rd | 4 +- R-package/man/saveRDS.lgb.Booster.Rd | 2 +- R-package/man/setinfo.Rd | 6 ++- R-package/man/slice.Rd | 4 +- 50 files changed, 163 insertions(+), 81 deletions(-) diff --git a/.ci/lint_r_code.R b/.ci/lint_r_code.R index ce4bad696dc6..91b72432e489 100755 --- a/.ci/lint_r_code.R +++ b/.ci/lint_r_code.R @@ -53,7 +53,8 @@ LINTERS_TO_USE <- list( , "true_false" = lintr::T_and_F_symbol_linter , "undesirable_function" = lintr::undesirable_function_linter( fun = c( - "cbind" = paste0( + "cat" = "CRAN forbids the use of cat() in packages except in special cases. Use message() or warning()." + , "cbind" = paste0( "cbind is an unsafe way to build up a data frame. merge() or direct " , "column assignment is preferred." ) @@ -85,7 +86,7 @@ LINTERS_TO_USE <- list( , "unneeded_concatenation" = lintr::unneeded_concatenation_linter ) -cat(sprintf("Found %i R files to lint\n", length(FILES_TO_LINT))) +print(sprintf("Found %i R files to lint\n", length(FILES_TO_LINT))) results <- NULL diff --git a/R-package/R/callback.R b/R-package/R/callback.R index 9f56a445db25..a01ace2144c8 100644 --- a/R-package/R/callback.R +++ b/R-package/R/callback.R @@ -175,7 +175,7 @@ cb.print.evaluation <- function(period = 1L) { # Check if message is existing if (nchar(msg) > 0L) { - cat(merge.eval.string(env), "\n") + print(merge.eval.string(env)) } } @@ -290,7 +290,12 @@ cb.early.stop <- function(stopping_rounds, first_metric_only = FALSE, verbose = # Check if verbose or not if (isTRUE(verbose)) { - cat("Will train until there is no improvement in ", stopping_rounds, " rounds.\n\n", sep = "") + msg <- paste0( + "Will train until there is no improvement in " + , stopping_rounds + , " rounds." + ) + print(msg) } # Internally treat everything as a maximization task @@ -365,8 +370,7 @@ cb.early.stop <- function(stopping_rounds, first_metric_only = FALSE, verbose = # Print message if verbose if (isTRUE(verbose)) { - cat("Early stopping, best iteration is:", "\n") - cat(best_msg[[i]], "\n") + print(paste0("Early stopping, best iteration is: ", best_msg[[i]])) } @@ -386,8 +390,7 @@ cb.early.stop <- function(stopping_rounds, first_metric_only = FALSE, verbose = # Print message if verbose if (isTRUE(verbose)) { - cat("Did not meet early stopping, best iteration is:", "\n") - cat(best_msg[[i]], "\n") + print(paste0("Did not meet early stopping, best iteration is: ", best_msg[[i]])) } # Store best iteration and stop diff --git a/R-package/R/lgb.Booster.R b/R-package/R/lgb.Booster.R index 8765214ddcb6..625bd9314445 100644 --- a/R-package/R/lgb.Booster.R +++ b/R-package/R/lgb.Booster.R @@ -730,7 +730,7 @@ Booster <- R6::R6Class( #' number of columns corresponding to the number of trees. #' #' @examples -#' \dontrun{ +#' \donttest{ #' data(agaricus.train, package = "lightgbm") #' train <- agaricus.train #' dtrain <- lgb.Dataset(train$data, label = train$label) @@ -790,7 +790,7 @@ predict.lgb.Booster <- function(object, #' @return lgb.Booster #' #' @examples -#' \dontrun{ +#' \donttest{ #' data(agaricus.train, package = "lightgbm") #' train <- agaricus.train #' dtrain <- lgb.Dataset(train$data, label = train$label) @@ -850,7 +850,7 @@ lgb.load <- function(filename = NULL, model_str = NULL) { #' @return lgb.Booster #' #' @examples -#' \dontrun{ +#' \donttest{ #' library(lightgbm) #' data(agaricus.train, package = "lightgbm") #' train <- agaricus.train @@ -898,7 +898,7 @@ lgb.save <- function(booster, filename, num_iteration = NULL) { #' @return json format of model #' #' @examples -#' \dontrun{ +#' \donttest{ #' library(lightgbm) #' data(agaricus.train, package = "lightgbm") #' train <- agaricus.train @@ -943,10 +943,10 @@ lgb.dump <- function(booster, num_iteration = NULL) { #' (the default), evaluation results for all iterations will be returned. #' @param is_err TRUE will return evaluation error instead #' -#' @return vector of evaluation result +#' @return numeric vector of evaluation result #' #' @examples -#' \dontrun{ +#' \donttest{ #' # train a regression model #' data(agaricus.train, package = "lightgbm") #' train <- agaricus.train diff --git a/R-package/R/lgb.Dataset.R b/R-package/R/lgb.Dataset.R index d531bd5fa236..f8666b1dd853 100644 --- a/R-package/R/lgb.Dataset.R +++ b/R-package/R/lgb.Dataset.R @@ -725,7 +725,7 @@ Dataset <- R6::R6Class( #' @return constructed dataset #' #' @examples -#' \dontrun{ +#' \donttest{ #' data(agaricus.train, package = "lightgbm") #' train <- agaricus.train #' dtrain <- lgb.Dataset(train$data, label = train$label) @@ -771,7 +771,7 @@ lgb.Dataset <- function(data, #' @return constructed dataset #' #' @examples -#' \dontrun{ +#' \donttest{ #' data(agaricus.train, package = "lightgbm") #' train <- agaricus.train #' dtrain <- lgb.Dataset(train$data, label = train$label) @@ -798,12 +798,13 @@ lgb.Dataset.create.valid <- function(dataset, data, info = list(), ...) { #' @param dataset Object of class \code{lgb.Dataset} #' #' @examples -#' \dontrun{ +#' \donttest{ #' data(agaricus.train, package = "lightgbm") #' train <- agaricus.train #' dtrain <- lgb.Dataset(train$data, label = train$label) #' lgb.Dataset.construct(dtrain) #' } +#' @return constructed dataset #' @export lgb.Dataset.construct <- function(dataset) { @@ -829,7 +830,7 @@ lgb.Dataset.construct <- function(dataset) { #' be directly used with an \code{lgb.Dataset} object. #' #' @examples -#' \dontrun{ +#' \donttest{ #' data(agaricus.train, package = "lightgbm") #' train <- agaricus.train #' dtrain <- lgb.Dataset(train$data, label = train$label) @@ -864,7 +865,7 @@ dim.lgb.Dataset <- function(x, ...) { #' Since row names are irrelevant, it is recommended to use \code{colnames} directly. #' #' @examples -#' \dontrun{ +#' \donttest{ #' data(agaricus.train, package = "lightgbm") #' train <- agaricus.train #' dtrain <- lgb.Dataset(train$data, label = train$label) @@ -875,6 +876,7 @@ dim.lgb.Dataset <- function(x, ...) { #' print(dtrain, verbose = TRUE) #' } #' @rdname dimnames.lgb.Dataset +#' @return A list with the dimensioon names of the dataset #' @export dimnames.lgb.Dataset <- function(x) { @@ -889,6 +891,7 @@ dimnames.lgb.Dataset <- function(x) { } #' @rdname dimnames.lgb.Dataset +#' @return A list with the dimensioon names of the dataset #' @export `dimnames<-.lgb.Dataset` <- function(x, value) { @@ -937,7 +940,7 @@ dimnames.lgb.Dataset <- function(x) { #' @return constructed sub dataset #' #' @examples -#' \dontrun{ +#' \donttest{ #' data(agaricus.train, package = "lightgbm") #' train <- agaricus.train #' dtrain <- lgb.Dataset(train$data, label = train$label) @@ -952,6 +955,7 @@ slice <- function(dataset, ...) { } #' @rdname slice +#' @return constructed sub dataset #' @export slice.lgb.Dataset <- function(dataset, idxset, ...) { @@ -984,7 +988,7 @@ slice.lgb.Dataset <- function(dataset, idxset, ...) { #' } #' #' @examples -#' \dontrun{ +#' \donttest{ #' data(agaricus.train, package = "lightgbm") #' train <- agaricus.train #' dtrain <- lgb.Dataset(train$data, label = train$label) @@ -1002,6 +1006,7 @@ getinfo <- function(dataset, ...) { } #' @rdname getinfo +#' @return info data #' @export getinfo.lgb.Dataset <- function(dataset, name, ...) { @@ -1022,7 +1027,7 @@ getinfo.lgb.Dataset <- function(dataset, name, ...) { #' @param name the name of the field to get #' @param info the specific field of information to set #' @param ... other parameters -#' @return passed object +#' @return the dataset you passed in #' #' @details #' The \code{name} field can be one of the following: @@ -1038,7 +1043,7 @@ getinfo.lgb.Dataset <- function(dataset, name, ...) { #' } #' #' @examples -#' \dontrun{ +#' \donttest{ #' data(agaricus.train, package = "lightgbm") #' train <- agaricus.train #' dtrain <- lgb.Dataset(train$data, label = train$label) @@ -1056,6 +1061,7 @@ setinfo <- function(dataset, ...) { } #' @rdname setinfo +#' @return the dataset you passed in #' @export setinfo.lgb.Dataset <- function(dataset, name, info, ...) { @@ -1076,10 +1082,10 @@ setinfo.lgb.Dataset <- function(dataset, name, info, ...) { #' @param categorical_feature categorical features. This can either be a character vector of feature #' names or an integer vector with the indices of the features (e.g. #' \code{c(1L, 10L)} to say "the first and tenth columns"). -#' @return passed dataset +#' @return the dataset you passed in #' #' @examples -#' \dontrun{ +#' \donttest{ #' data(agaricus.train, package = "lightgbm") #' train <- agaricus.train #' dtrain <- lgb.Dataset(train$data, label = train$label) @@ -1108,10 +1114,10 @@ lgb.Dataset.set.categorical <- function(dataset, categorical_feature) { #' @param dataset object of class \code{lgb.Dataset} #' @param reference object of class \code{lgb.Dataset} #' -#' @return passed dataset +#' @return the dataset you passed in #' #' @examples -#' \dontrun{ +#' \donttest{ #' data(agaricus.train, package ="lightgbm") #' train <- agaricus.train #' dtrain <- lgb.Dataset(train$data, label = train$label) @@ -1140,10 +1146,10 @@ lgb.Dataset.set.reference <- function(dataset, reference) { #' @param dataset object of class \code{lgb.Dataset} #' @param fname object filename of output file #' -#' @return passed dataset +#' @return the dataset you passed in #' #' @examples -#' \dontrun{ +#' \donttest{ #' data(agaricus.train, package = "lightgbm") #' train <- agaricus.train #' dtrain <- lgb.Dataset(train$data, label = train$label) diff --git a/R-package/R/lgb.convert_with_rules.R b/R-package/R/lgb.convert_with_rules.R index 8aa98a5a44c2..b4d7d8983cb1 100644 --- a/R-package/R/lgb.convert_with_rules.R +++ b/R-package/R/lgb.convert_with_rules.R @@ -68,7 +68,7 @@ #' \code{lgb.Dataset}. #' #' @examples -#' \dontrun{ +#' \donttest{ #' data(iris) #' #' str(iris) diff --git a/R-package/R/lgb.cv.R b/R-package/R/lgb.cv.R index 671c0f10a850..b778ee1fe2aa 100644 --- a/R-package/R/lgb.cv.R +++ b/R-package/R/lgb.cv.R @@ -52,7 +52,7 @@ CVBooster <- R6::R6Class( #' @return a trained model \code{lgb.CVBooster}. #' #' @examples -#' \dontrun{ +#' \donttest{ #' data(agaricus.train, package = "lightgbm") #' train <- agaricus.train #' dtrain <- lgb.Dataset(train$data, label = train$label) diff --git a/R-package/R/lgb.importance.R b/R-package/R/lgb.importance.R index 764e11cd5948..aad674d5a105 100644 --- a/R-package/R/lgb.importance.R +++ b/R-package/R/lgb.importance.R @@ -13,7 +13,7 @@ #' } #' #' @examples -#' \dontrun{ +#' \donttest{ #' data(agaricus.train, package = "lightgbm") #' train <- agaricus.train #' dtrain <- lgb.Dataset(train$data, label = train$label) diff --git a/R-package/R/lgb.interprete.R b/R-package/R/lgb.interprete.R index 1885cf666ae7..594d71094837 100644 --- a/R-package/R/lgb.interprete.R +++ b/R-package/R/lgb.interprete.R @@ -16,7 +16,7 @@ #' Contribution columns to each class. #' #' @examples -#' \dontrun{ +#' \donttest{ #' Logit <- function(x) log(x / (1.0 - x)) #' data(agaricus.train, package = "lightgbm") #' train <- agaricus.train diff --git a/R-package/R/lgb.model.dt.tree.R b/R-package/R/lgb.model.dt.tree.R index 436fe5109ff5..b29d2f981591 100644 --- a/R-package/R/lgb.model.dt.tree.R +++ b/R-package/R/lgb.model.dt.tree.R @@ -28,7 +28,7 @@ #' } #' #' @examples -#' \dontrun{ +#' \donttest{ #' data(agaricus.train, package = "lightgbm") #' train <- agaricus.train #' dtrain <- lgb.Dataset(train$data, label = train$label) diff --git a/R-package/R/lgb.plot.importance.R b/R-package/R/lgb.plot.importance.R index 109c43633320..fe2782794a64 100644 --- a/R-package/R/lgb.plot.importance.R +++ b/R-package/R/lgb.plot.importance.R @@ -18,7 +18,7 @@ #' and silently returns a processed data.table with \code{top_n} features sorted by defined importance. #' #' @examples -#' \dontrun{ +#' \donttest{ #' data(agaricus.train, package = "lightgbm") #' train <- agaricus.train #' dtrain <- lgb.Dataset(train$data, label = train$label) diff --git a/R-package/R/lgb.plot.interpretation.R b/R-package/R/lgb.plot.interpretation.R index 6752edc197a2..486b80dd46dd 100644 --- a/R-package/R/lgb.plot.interpretation.R +++ b/R-package/R/lgb.plot.interpretation.R @@ -15,7 +15,7 @@ #' The \code{lgb.plot.interpretation} function creates a \code{barplot}. #' #' @examples -#' \dontrun{ +#' \donttest{ #' Logit <- function(x) { #' log(x / (1.0 - x)) #' } diff --git a/R-package/R/lgb.train.R b/R-package/R/lgb.train.R index e1637ed3c007..038ef48df2f7 100644 --- a/R-package/R/lgb.train.R +++ b/R-package/R/lgb.train.R @@ -26,7 +26,7 @@ #' @return a trained booster model \code{lgb.Booster}. #' #' @examples -#' \dontrun{ +#' \donttest{ #' data(agaricus.train, package = "lightgbm") #' train <- agaricus.train #' dtrain <- lgb.Dataset(train$data, label = train$label) diff --git a/R-package/R/lgb.unloader.R b/R-package/R/lgb.unloader.R index a018222d93c2..a24f622cf787 100644 --- a/R-package/R/lgb.unloader.R +++ b/R-package/R/lgb.unloader.R @@ -14,7 +14,7 @@ #' @return NULL invisibly. #' #' @examples -#' \dontrun{ +#' \donttest{ #' data(agaricus.train, package = "lightgbm") #' train <- agaricus.train #' dtrain <- lgb.Dataset(train$data, label = train$label) diff --git a/R-package/R/lightgbm.R b/R-package/R/lightgbm.R index 2ea789278af6..2aa053671b4a 100644 --- a/R-package/R/lightgbm.R +++ b/R-package/R/lightgbm.R @@ -98,6 +98,7 @@ NULL #' CPU using hyper-threading to generate 2 threads per CPU core).} #' } #' @inheritSection lgb_shared_params Early Stopping +#' @return a trained \code{lgb.Booster} #' @export lightgbm <- function(data, label = NULL, diff --git a/R-package/R/readRDS.lgb.Booster.R b/R-package/R/readRDS.lgb.Booster.R index b14e77c2df86..220edd1f28e5 100644 --- a/R-package/R/readRDS.lgb.Booster.R +++ b/R-package/R/readRDS.lgb.Booster.R @@ -4,10 +4,10 @@ #' @param file a connection or the name of the file where the R object is saved to or read from. #' @param refhook a hook function for handling reference objects. #' -#' @return \code{lgb.Booster}. +#' @return \code{lgb.Booster} #' #' @examples -#' \dontrun{ +#' \donttest{ #' library(lightgbm) #' data(agaricus.train, package = "lightgbm") #' train <- agaricus.train diff --git a/R-package/R/removed.R b/R-package/R/removed.R index 3fda5f27c07b..43a2395d38ee 100644 --- a/R-package/R/removed.R +++ b/R-package/R/removed.R @@ -2,6 +2,7 @@ #' @name lgb.prepare #' @description removed functions #' @param ... catch-all too match old calls +#' @return Nothing. This function always raises an exception #' @export lgb.prepare <- function(...) { stop("lgb.prepare() was removed in LightGBM 3.0.0. Please use lgb.convert_with_rules()") @@ -11,6 +12,7 @@ lgb.prepare <- function(...) { #' @name lgb.prepare2 #' @description removed functions #' @param ... catch-all too match old calls +#' @return Nothing. This function always raises an exception #' @export lgb.prepare2 <- function(...) { stop("lgb.prepare2() was removed in LightGBM 3.0.0. Please use lgb.convert_with_rules()") @@ -19,7 +21,8 @@ lgb.prepare2 <- function(...) { #' @title removed functions #' @name lgb.prepare_rules #' @description removed functions -#' @param ... catch-all too match old calls +#' @param ... catch-all to match old calls +#' @return Nothing. This function always raises an exception #' @export lgb.prepare_rules <- function(...) { stop("lgb.prepare_rules() was removed in LightGBM 3.0.0. Please use lgb.convert_with_rules()") @@ -28,7 +31,8 @@ lgb.prepare_rules <- function(...) { #' @title removed functions #' @name lgb.prepare_rules2 #' @description removed functions -#' @param ... catch-all too match old calls +#' @param ... catch-all to match old calls +#' @return Nothing. This function always raises an exception #' @export lgb.prepare_rules2 <- function(...) { stop("lgb.prepare_rules2() was removed in LightGBM 3.0.0. Please use lgb.convert_with_rules()") diff --git a/R-package/R/saveRDS.lgb.Booster.R b/R-package/R/saveRDS.lgb.Booster.R index f54e3645d463..7a2f838e9ff5 100644 --- a/R-package/R/saveRDS.lgb.Booster.R +++ b/R-package/R/saveRDS.lgb.Booster.R @@ -18,7 +18,7 @@ #' @return NULL invisibly. #' #' @examples -#' \dontrun{ +#' \donttest{ #' library(lightgbm) #' data(agaricus.train, package = "lightgbm") #' train <- agaricus.train diff --git a/R-package/cran-comments.md b/R-package/cran-comments.md index 6931b05d7539..e852808a2bca 100644 --- a/R-package/cran-comments.md +++ b/R-package/cran-comments.md @@ -1,5 +1,43 @@ # CRAN Submission History +## v3.0.0 - Submission 5 - (September 11, 2020) + +### CRAN Response + +### Maintainer Notes + +## v3.0.0 - Submission 4 - (September 4, 2020) + +### CRAN Response + +> Thanks, if the running time is the only reason to wrap the examples in +\donttest, please replace \donttest by \donttest (\donttest examples are +not executed in the CRAN checks). + +> Please replace cat() by message() or warning() in your functions (except +for print() and summary() functions). Messages and warnings can be +suppressed if needed. + +> Missing Rd-tags: + lightgbm/man/dimnames.lgb.Dataset.Rd: \value + lightgbm/man/lgb.Dataset.construct.Rd: \value + lightgbm/man/lgb.prepare.Rd: \value + ... + +> Please add the tag and explain in detail the returned objects. + +### Maintainer Notes + +responded to CRAN with the following: + +All examples have been wrapped with `\donttest` as requested. We have replied to Swetlana Herbrandt asking for clarification on what this item in the R 4.0.2 changelog (https://cran.r-project.org/doc/manuals/r-devel/NEWS.html) means: + +> "`R CMD check --as-cran` now runs \donttest examples (which are run by example()) instead of instructing the tester to do so. This can be temporarily circumvented during development by setting environment variable `_R_CHECK_DONTTEST_EXAMPLES_` to a false value." + +All uses of `cat()` have been replaced with `print()`. We chose `print()` over `message()` because it's important that they be written to stdout alongside all the other logs coming from the library's C++ code. `message()` and `warning()` write to stderr. + +All exported objects now have `\value{}` statements in their documentation files in `man/`. + ## v3.0.0 - Submission 3 - (August 29, 2020) ### CRAN response @@ -12,8 +50,8 @@ the form * if those are not available: authors (year) https:... with no space after 'doi:', 'arXiv:', 'https:' and angle brackets for auto-linking. * (If you want to add a title as well please put it in quotes: "Title") -* \dontrun{} should only be used if the example really cannot be executed (e.g. because of missing additional software, missing API keys, ...) by the user. That's why wrapping examples in \dontrun{} adds the comment ("# Not run:") as a warning for the user. Does not seem necessary. Please unwrap the examples if they are executable in < 5 sec, or replace -\dontrun{} with \donttest{}. +* \donttest{} should only be used if the example really cannot be executed (e.g. because of missing additional software, missing API keys, ...) by the user. That's why wrapping examples in \donttest{} adds the comment ("# Not run:") as a warning for the user. Does not seem necessary. Please unwrap the examples if they are executable in < 5 sec, or replace +\donttest{} with \donttest{}. * Please do not modify the global environment (e.g. by using <<-) in your functions. This is not allowed by the CRAN policies. @@ -32,7 +70,7 @@ All authors from the original LightGBM paper have been added to Authors@R as `"a lightgbm's code does use `<<-`, but it does not modify the global environment. The uses of `<<-` in R/lgb.interprete.R and R/callback.R are in functions which are called in an environment created by the lightgbm functions that call them, and this operator is used to reach one level up into the calling function's environment. -We chose to wrap our examples in `\dontrun{}` because we found, through testing on https://builder.r-hub.io/ and in our own continuous integration environments, that their run time varies a lot between platforms, and we cannot guarantee that all examples will run in under 5 seconds. We intentionally chose `\dontrun{}` over `\donttest{}` because this item in the R 4.0.0 changelog (https://cran.r-project.org/doc/manuals/r-devel/NEWS.html) seems to indicate that \donttest will be ignored by CRAN's automated checks: +We chose to wrap our examples in `\donttest{}` because we found, through testing on https://builder.r-hub.io/ and in our own continuous integration environments, that their run time varies a lot between platforms, and we cannot guarantee that all examples will run in under 5 seconds. We intentionally chose `\donttest{}` over `\donttest{}` because this item in the R 4.0.0 changelog (https://cran.r-project.org/doc/manuals/r-devel/NEWS.html) seems to indicate that \donttest will be ignored by CRAN's automated checks: > "`R CMD check --as-cran` now runs \donttest examples (which are run by example()) instead of instructing the tester to do so. This can be temporarily circumvented during development by setting environment variable `_R_CHECK_DONTTEST_EXAMPLES_` to a false value." diff --git a/R-package/man/dim.Rd b/R-package/man/dim.Rd index 5361b5c0c467..a1059353a6ed 100644 --- a/R-package/man/dim.Rd +++ b/R-package/man/dim.Rd @@ -22,7 +22,7 @@ Note: since \code{nrow} and \code{ncol} internally use \code{dim}, they can also be directly used with an \code{lgb.Dataset} object. } \examples{ -\dontrun{ +\donttest{ data(agaricus.train, package = "lightgbm") train <- agaricus.train dtrain <- lgb.Dataset(train$data, label = train$label) diff --git a/R-package/man/dimnames.lgb.Dataset.Rd b/R-package/man/dimnames.lgb.Dataset.Rd index 5fb3edcefba5..a968fcac4663 100644 --- a/R-package/man/dimnames.lgb.Dataset.Rd +++ b/R-package/man/dimnames.lgb.Dataset.Rd @@ -15,6 +15,11 @@ \item{value}{a list of two elements: the first one is ignored and the second one is column names} } +\value{ +A list with the dimensioon names of the dataset + +A list with the dimensioon names of the dataset +} \description{ Only column names are supported for \code{lgb.Dataset}, thus setting of row names would have no effect and returned row names would be NULL. @@ -24,7 +29,7 @@ Generic \code{dimnames} methods are used by \code{colnames}. Since row names are irrelevant, it is recommended to use \code{colnames} directly. } \examples{ -\dontrun{ +\donttest{ data(agaricus.train, package = "lightgbm") train <- agaricus.train dtrain <- lgb.Dataset(train$data, label = train$label) diff --git a/R-package/man/getinfo.Rd b/R-package/man/getinfo.Rd index f12e8b39e871..11a489112d1d 100644 --- a/R-package/man/getinfo.Rd +++ b/R-package/man/getinfo.Rd @@ -17,6 +17,8 @@ getinfo(dataset, ...) \item{name}{the name of the information field to get (see details)} } \value{ +info data + info data } \description{ @@ -33,7 +35,7 @@ The \code{name} field can be one of the following: } } \examples{ -\dontrun{ +\donttest{ data(agaricus.train, package = "lightgbm") train <- agaricus.train dtrain <- lgb.Dataset(train$data, label = train$label) diff --git a/R-package/man/lgb.Dataset.Rd b/R-package/man/lgb.Dataset.Rd index 771efff6f1db..67f129bdcdf6 100644 --- a/R-package/man/lgb.Dataset.Rd +++ b/R-package/man/lgb.Dataset.Rd @@ -40,7 +40,7 @@ Construct \code{lgb.Dataset} object from dense matrix, sparse matrix or local file (that was created previously by saving an \code{lgb.Dataset}). } \examples{ -\dontrun{ +\donttest{ data(agaricus.train, package = "lightgbm") train <- agaricus.train dtrain <- lgb.Dataset(train$data, label = train$label) diff --git a/R-package/man/lgb.Dataset.construct.Rd b/R-package/man/lgb.Dataset.construct.Rd index 4bceed705773..97c9e7887602 100644 --- a/R-package/man/lgb.Dataset.construct.Rd +++ b/R-package/man/lgb.Dataset.construct.Rd @@ -9,11 +9,14 @@ lgb.Dataset.construct(dataset) \arguments{ \item{dataset}{Object of class \code{lgb.Dataset}} } +\value{ +constructed dataset +} \description{ Construct Dataset explicitly } \examples{ -\dontrun{ +\donttest{ data(agaricus.train, package = "lightgbm") train <- agaricus.train dtrain <- lgb.Dataset(train$data, label = train$label) diff --git a/R-package/man/lgb.Dataset.create.valid.Rd b/R-package/man/lgb.Dataset.create.valid.Rd index 0c6efcff1f96..ce34908e1828 100644 --- a/R-package/man/lgb.Dataset.create.valid.Rd +++ b/R-package/man/lgb.Dataset.create.valid.Rd @@ -22,7 +22,7 @@ constructed dataset Construct validation data according to training data } \examples{ -\dontrun{ +\donttest{ data(agaricus.train, package = "lightgbm") train <- agaricus.train dtrain <- lgb.Dataset(train$data, label = train$label) diff --git a/R-package/man/lgb.Dataset.save.Rd b/R-package/man/lgb.Dataset.save.Rd index fc5f765138bb..5ea38227ba66 100644 --- a/R-package/man/lgb.Dataset.save.Rd +++ b/R-package/man/lgb.Dataset.save.Rd @@ -12,14 +12,14 @@ lgb.Dataset.save(dataset, fname) \item{fname}{object filename of output file} } \value{ -passed dataset +the dataset you passed in } \description{ Please note that \code{init_score} is not saved in binary file. If you need it, please set it again after loading Dataset. } \examples{ -\dontrun{ +\donttest{ data(agaricus.train, package = "lightgbm") train <- agaricus.train dtrain <- lgb.Dataset(train$data, label = train$label) diff --git a/R-package/man/lgb.Dataset.set.categorical.Rd b/R-package/man/lgb.Dataset.set.categorical.Rd index 5b935791b8a1..26eb10770e47 100644 --- a/R-package/man/lgb.Dataset.set.categorical.Rd +++ b/R-package/man/lgb.Dataset.set.categorical.Rd @@ -14,14 +14,14 @@ names or an integer vector with the indices of the features (e.g. \code{c(1L, 10L)} to say "the first and tenth columns").} } \value{ -passed dataset +the dataset you passed in } \description{ Set the categorical features of an \code{lgb.Dataset} object. Use this function to tell LightGBM which features should be treated as categorical. } \examples{ -\dontrun{ +\donttest{ data(agaricus.train, package = "lightgbm") train <- agaricus.train dtrain <- lgb.Dataset(train$data, label = train$label) diff --git a/R-package/man/lgb.Dataset.set.reference.Rd b/R-package/man/lgb.Dataset.set.reference.Rd index 882254c53456..ee945b4c3bda 100644 --- a/R-package/man/lgb.Dataset.set.reference.Rd +++ b/R-package/man/lgb.Dataset.set.reference.Rd @@ -12,13 +12,13 @@ lgb.Dataset.set.reference(dataset, reference) \item{reference}{object of class \code{lgb.Dataset}} } \value{ -passed dataset +the dataset you passed in } \description{ If you want to use validation data, you should set reference to training data } \examples{ -\dontrun{ +\donttest{ data(agaricus.train, package ="lightgbm") train <- agaricus.train dtrain <- lgb.Dataset(train$data, label = train$label) diff --git a/R-package/man/lgb.convert_with_rules.Rd b/R-package/man/lgb.convert_with_rules.Rd index 610fe10403dc..a8a6a00b9ce6 100644 --- a/R-package/man/lgb.convert_with_rules.Rd +++ b/R-package/man/lgb.convert_with_rules.Rd @@ -34,7 +34,7 @@ Attempts to prepare a clean dataset to prepare to put in a \code{lgb.Dataset}. NOTE: In previous releases of LightGBM, this function was called \code{lgb.prepare_rules2}. } \examples{ -\dontrun{ +\donttest{ data(iris) str(iris) diff --git a/R-package/man/lgb.cv.Rd b/R-package/man/lgb.cv.Rd index 987b04ec1a0a..b502faf4f837 100644 --- a/R-package/man/lgb.cv.Rd +++ b/R-package/man/lgb.cv.Rd @@ -149,7 +149,7 @@ Cross validation logic used by LightGBM } \examples{ -\dontrun{ +\donttest{ data(agaricus.train, package = "lightgbm") train <- agaricus.train dtrain <- lgb.Dataset(train$data, label = train$label) diff --git a/R-package/man/lgb.dump.Rd b/R-package/man/lgb.dump.Rd index bdcdcda6c237..6fbc5cbe9b43 100644 --- a/R-package/man/lgb.dump.Rd +++ b/R-package/man/lgb.dump.Rd @@ -18,7 +18,7 @@ json format of model Dump LightGBM model to json } \examples{ -\dontrun{ +\donttest{ library(lightgbm) data(agaricus.train, package = "lightgbm") train <- agaricus.train diff --git a/R-package/man/lgb.get.eval.result.Rd b/R-package/man/lgb.get.eval.result.Rd index ac88b8292315..917e0a403a18 100644 --- a/R-package/man/lgb.get.eval.result.Rd +++ b/R-package/man/lgb.get.eval.result.Rd @@ -25,14 +25,14 @@ lgb.get.eval.result( \item{is_err}{TRUE will return evaluation error instead} } \value{ -vector of evaluation result +numeric vector of evaluation result } \description{ Given a \code{lgb.Booster}, return evaluation results for a particular metric on a particular dataset. } \examples{ -\dontrun{ +\donttest{ # train a regression model data(agaricus.train, package = "lightgbm") train <- agaricus.train diff --git a/R-package/man/lgb.importance.Rd b/R-package/man/lgb.importance.Rd index b1d450815a0f..2fd5d4938de5 100644 --- a/R-package/man/lgb.importance.Rd +++ b/R-package/man/lgb.importance.Rd @@ -24,7 +24,7 @@ For a tree model, a \code{data.table} with the following columns: Creates a \code{data.table} of feature importances in a model. } \examples{ -\dontrun{ +\donttest{ data(agaricus.train, package = "lightgbm") train <- agaricus.train dtrain <- lgb.Dataset(train$data, label = train$label) diff --git a/R-package/man/lgb.interprete.Rd b/R-package/man/lgb.interprete.Rd index a167de62ac6c..c1905282623d 100644 --- a/R-package/man/lgb.interprete.Rd +++ b/R-package/man/lgb.interprete.Rd @@ -29,7 +29,7 @@ For regression, binary classification and lambdarank model, a \code{list} of \co Computes feature contribution components of rawscore prediction. } \examples{ -\dontrun{ +\donttest{ Logit <- function(x) log(x / (1.0 - x)) data(agaricus.train, package = "lightgbm") train <- agaricus.train diff --git a/R-package/man/lgb.load.Rd b/R-package/man/lgb.load.Rd index 399f588db4b9..72633e7baef8 100644 --- a/R-package/man/lgb.load.Rd +++ b/R-package/man/lgb.load.Rd @@ -19,7 +19,7 @@ Load LightGBM takes in either a file path or model string. If both are provided, Load will default to loading from file } \examples{ -\dontrun{ +\donttest{ data(agaricus.train, package = "lightgbm") train <- agaricus.train dtrain <- lgb.Dataset(train$data, label = train$label) diff --git a/R-package/man/lgb.model.dt.tree.Rd b/R-package/man/lgb.model.dt.tree.Rd index 6ef028868b9e..c5c88156ff4d 100644 --- a/R-package/man/lgb.model.dt.tree.Rd +++ b/R-package/man/lgb.model.dt.tree.Rd @@ -39,7 +39,7 @@ The columns of the \code{data.table} are: Parse a LightGBM model json dump into a \code{data.table} structure. } \examples{ -\dontrun{ +\donttest{ data(agaricus.train, package = "lightgbm") train <- agaricus.train dtrain <- lgb.Dataset(train$data, label = train$label) diff --git a/R-package/man/lgb.plot.importance.Rd b/R-package/man/lgb.plot.importance.Rd index da3914d6daaa..4b915e35fc86 100644 --- a/R-package/man/lgb.plot.importance.Rd +++ b/R-package/man/lgb.plot.importance.Rd @@ -37,7 +37,7 @@ The graph represents each feature as a horizontal bar of length proportional to Features are shown ranked in a decreasing importance order. } \examples{ -\dontrun{ +\donttest{ data(agaricus.train, package = "lightgbm") train <- agaricus.train dtrain <- lgb.Dataset(train$data, label = train$label) diff --git a/R-package/man/lgb.plot.interpretation.Rd b/R-package/man/lgb.plot.interpretation.Rd index 6f1cc5b8fc80..f8266308552d 100644 --- a/R-package/man/lgb.plot.interpretation.Rd +++ b/R-package/man/lgb.plot.interpretation.Rd @@ -34,7 +34,7 @@ The graph represents each feature as a horizontal bar of length proportional to contribution of a feature. Features are shown ranked in a decreasing contribution order. } \examples{ -\dontrun{ +\donttest{ Logit <- function(x) { log(x / (1.0 - x)) } diff --git a/R-package/man/lgb.prepare.Rd b/R-package/man/lgb.prepare.Rd index 7b534bae9047..50d1a48deb6b 100644 --- a/R-package/man/lgb.prepare.Rd +++ b/R-package/man/lgb.prepare.Rd @@ -9,6 +9,9 @@ lgb.prepare(...) \arguments{ \item{...}{catch-all too match old calls} } +\value{ +Nothing. This function always raises an exception +} \description{ removed functions } diff --git a/R-package/man/lgb.prepare2.Rd b/R-package/man/lgb.prepare2.Rd index 033c2f546684..5a15b1ec64d9 100644 --- a/R-package/man/lgb.prepare2.Rd +++ b/R-package/man/lgb.prepare2.Rd @@ -9,6 +9,9 @@ lgb.prepare2(...) \arguments{ \item{...}{catch-all too match old calls} } +\value{ +Nothing. This function always raises an exception +} \description{ removed functions } diff --git a/R-package/man/lgb.prepare_rules.Rd b/R-package/man/lgb.prepare_rules.Rd index ee16f9cf95af..95fd56fdeaad 100644 --- a/R-package/man/lgb.prepare_rules.Rd +++ b/R-package/man/lgb.prepare_rules.Rd @@ -7,7 +7,10 @@ lgb.prepare_rules(...) } \arguments{ -\item{...}{catch-all too match old calls} +\item{...}{catch-all to match old calls} +} +\value{ +Nothing. This function always raises an exception } \description{ removed functions diff --git a/R-package/man/lgb.prepare_rules2.Rd b/R-package/man/lgb.prepare_rules2.Rd index 02bee7c7dcf1..9378a649b8c2 100644 --- a/R-package/man/lgb.prepare_rules2.Rd +++ b/R-package/man/lgb.prepare_rules2.Rd @@ -7,7 +7,10 @@ lgb.prepare_rules2(...) } \arguments{ -\item{...}{catch-all too match old calls} +\item{...}{catch-all to match old calls} +} +\value{ +Nothing. This function always raises an exception } \description{ removed functions diff --git a/R-package/man/lgb.save.Rd b/R-package/man/lgb.save.Rd index 119aaec8c231..9ac19eadb3fc 100644 --- a/R-package/man/lgb.save.Rd +++ b/R-package/man/lgb.save.Rd @@ -20,7 +20,7 @@ lgb.Booster Save LightGBM model } \examples{ -\dontrun{ +\donttest{ library(lightgbm) data(agaricus.train, package = "lightgbm") train <- agaricus.train diff --git a/R-package/man/lgb.train.Rd b/R-package/man/lgb.train.Rd index 199644458bf7..77c3ee79c523 100644 --- a/R-package/man/lgb.train.Rd +++ b/R-package/man/lgb.train.Rd @@ -132,7 +132,7 @@ Logic to train with LightGBM } \examples{ -\dontrun{ +\donttest{ data(agaricus.train, package = "lightgbm") train <- agaricus.train dtrain <- lgb.Dataset(train$data, label = train$label) diff --git a/R-package/man/lgb.unloader.Rd b/R-package/man/lgb.unloader.Rd index 8aae35a98836..d3a0bbd01f74 100644 --- a/R-package/man/lgb.unloader.Rd +++ b/R-package/man/lgb.unloader.Rd @@ -26,7 +26,7 @@ Attempts to unload LightGBM packages so you can remove objects cleanly without apparent reason and you do not want to restart R to fix the lost object. } \examples{ -\dontrun{ +\donttest{ data(agaricus.train, package = "lightgbm") train <- agaricus.train dtrain <- lgb.Dataset(train$data, label = train$label) diff --git a/R-package/man/lightgbm.Rd b/R-package/man/lightgbm.Rd index 13806fbc05a7..22554771eaf8 100644 --- a/R-package/man/lightgbm.Rd +++ b/R-package/man/lightgbm.Rd @@ -71,6 +71,9 @@ List of callback functions that are applied at each iteration.} CPU using hyper-threading to generate 2 threads per CPU core).} }} } +\value{ +a trained \code{lgb.Booster} +} \description{ Simple interface for training a LightGBM model. } diff --git a/R-package/man/predict.lgb.Booster.Rd b/R-package/man/predict.lgb.Booster.Rd index 3f56d0886648..c1c4cfb0cc77 100644 --- a/R-package/man/predict.lgb.Booster.Rd +++ b/R-package/man/predict.lgb.Booster.Rd @@ -61,7 +61,7 @@ For regression or binary classification, it returns a vector of length \code{nro Predicted values based on class \code{lgb.Booster} } \examples{ -\dontrun{ +\donttest{ data(agaricus.train, package = "lightgbm") train <- agaricus.train dtrain <- lgb.Dataset(train$data, label = train$label) diff --git a/R-package/man/readRDS.lgb.Booster.Rd b/R-package/man/readRDS.lgb.Booster.Rd index 69eda2e989eb..aff69a01b313 100644 --- a/R-package/man/readRDS.lgb.Booster.Rd +++ b/R-package/man/readRDS.lgb.Booster.Rd @@ -12,13 +12,13 @@ readRDS.lgb.Booster(file = "", refhook = NULL) \item{refhook}{a hook function for handling reference objects.} } \value{ -\code{lgb.Booster}. +\code{lgb.Booster} } \description{ Attempts to load a model stored in a \code{.rds} file, using \code{\link[base]{readRDS}} } \examples{ -\dontrun{ +\donttest{ library(lightgbm) data(agaricus.train, package = "lightgbm") train <- agaricus.train diff --git a/R-package/man/saveRDS.lgb.Booster.Rd b/R-package/man/saveRDS.lgb.Booster.Rd index 87b095a9c924..f267293f9d96 100644 --- a/R-package/man/saveRDS.lgb.Booster.Rd +++ b/R-package/man/saveRDS.lgb.Booster.Rd @@ -42,7 +42,7 @@ Attempts to save a model using RDS. Has an additional parameter (\code{raw}) which decides whether to save the raw model or not. } \examples{ -\dontrun{ +\donttest{ library(lightgbm) data(agaricus.train, package = "lightgbm") train <- agaricus.train diff --git a/R-package/man/setinfo.Rd b/R-package/man/setinfo.Rd index e38811978073..b96cd9c4e83b 100644 --- a/R-package/man/setinfo.Rd +++ b/R-package/man/setinfo.Rd @@ -19,7 +19,9 @@ setinfo(dataset, ...) \item{info}{the specific field of information to set} } \value{ -passed object +the dataset you passed in + +the dataset you passed in } \description{ Set one attribute of a \code{lgb.Dataset} @@ -38,7 +40,7 @@ The \code{name} field can be one of the following: } } \examples{ -\dontrun{ +\donttest{ data(agaricus.train, package = "lightgbm") train <- agaricus.train dtrain <- lgb.Dataset(train$data, label = train$label) diff --git a/R-package/man/slice.Rd b/R-package/man/slice.Rd index 587f77c80849..1595ec9efaa4 100644 --- a/R-package/man/slice.Rd +++ b/R-package/man/slice.Rd @@ -17,6 +17,8 @@ slice(dataset, ...) \item{idxset}{an integer vector of indices of rows needed} } \value{ +constructed sub dataset + constructed sub dataset } \description{ @@ -24,7 +26,7 @@ Get a new \code{lgb.Dataset} containing the specified rows of original \code{lgb.Dataset} object } \examples{ -\dontrun{ +\donttest{ data(agaricus.train, package = "lightgbm") train <- agaricus.train dtrain <- lgb.Dataset(train$data, label = train$label) From 4a9a6f8bcbbe6718c802e430acb0b8a2295a0d69 Mon Sep 17 00:00:00 2001 From: James Lamb Date: Fri, 11 Sep 2020 10:03:24 -0500 Subject: [PATCH 08/37] add email --- R-package/cran-comments.md | 19 +++++++++++++++---- 1 file changed, 15 insertions(+), 4 deletions(-) diff --git a/R-package/cran-comments.md b/R-package/cran-comments.md index e852808a2bca..4775f94b85b8 100644 --- a/R-package/cran-comments.md +++ b/R-package/cran-comments.md @@ -30,14 +30,25 @@ suppressed if needed. responded to CRAN with the following: -All examples have been wrapped with `\donttest` as requested. We have replied to Swetlana Herbrandt asking for clarification on what this item in the R 4.0.2 changelog (https://cran.r-project.org/doc/manuals/r-devel/NEWS.html) means: - -> "`R CMD check --as-cran` now runs \donttest examples (which are run by example()) instead of instructing the tester to do so. This can be temporarily circumvented during development by setting environment variable `_R_CHECK_DONTTEST_EXAMPLES_` to a false value." +All examples have been wrapped with `\donttest` as requested. We have replied to Swetlana Herbrandt asking for clarification on the donttest news item in the R 4.0.2 changelog (https://cran.r-project.org/doc/manuals/r-devel/NEWS.html). All uses of `cat()` have been replaced with `print()`. We chose `print()` over `message()` because it's important that they be written to stdout alongside all the other logs coming from the library's C++ code. `message()` and `warning()` write to stderr. All exported objects now have `\value{}` statements in their documentation files in `man/`. +**we also replied directly to CRAN's feedback email** + +> Swetlana, + +> Thank you for your comments. I've just created a new submission that I believe addresses them. + +> Can you help us understand something? In your message you said "\donttest examples are +not executed in the CRAN checks)", but in https://cran.r-project.org/doc/manuals/r-devel/NEWS.html we see the following: + +> > "`R CMD check --as-cran` now runs \donttest examples (which are run by example()) instead of instructing the tester to do so. This can be temporarily circumvented during development by setting environment variable `_R_CHECK_DONTTEST_EXAMPLES_` to a false value." + +> Could you help us understand how both of those statements can be true? + ## v3.0.0 - Submission 3 - (August 29, 2020) ### CRAN response @@ -134,7 +145,7 @@ For the note about included files, that is my fault. I had extra files laying ar NOTE: 3.0.0-1 was never released to CRAN. CRAN was on vacation August 14-24, 2020, and in that time version 3.0.0-1 (a release candidate) became 3.0.0. -### CRAN respoonse +### CRAN response > Please only ship the CRAN template for the MIT license. From 173c7dd9260c7639358f0b3f1ab61349f9482e4a Mon Sep 17 00:00:00 2001 From: James Lamb Date: Fri, 11 Sep 2020 10:05:18 -0500 Subject: [PATCH 09/37] run examples in CI --- .ci/test_r_package.sh | 2 +- .ci/test_r_package_windows.ps1 | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/.ci/test_r_package.sh b/.ci/test_r_package.sh index 2f429df85ea4..5d9fa7b0245d 100755 --- a/.ci/test_r_package.sh +++ b/.ci/test_r_package.sh @@ -171,7 +171,7 @@ check_succeeded="yes" ( R CMD check ${PKG_TARBALL} \ --as-cran \ - --run-dontrun \ + --run-donttest \ || check_succeeded="no" ) & diff --git a/.ci/test_r_package_windows.ps1 b/.ci/test_r_package_windows.ps1 index 6b97b4b88fd8..9d5cc1895ba4 100644 --- a/.ci/test_r_package_windows.ps1 +++ b/.ci/test_r_package_windows.ps1 @@ -165,9 +165,9 @@ if ($env:COMPILER -ne "MSVC") { Write-Output "Running R CMD check" if ($env:R_BUILD_TYPE -eq "cran") { # CRAN packages must pass without --no-multiarch (build on 64-bit and 32-bit) - $check_args = "c('CMD', 'check', '--as-cran', '--run-dontrun', '$PKG_FILE_NAME')" + $check_args = "c('CMD', 'check', '--as-cran', '--run-donttest', '$PKG_FILE_NAME')" } else { - $check_args = "c('CMD', 'check', '--no-multiarch', '--as-cran', '--run-dontrun', '$PKG_FILE_NAME')" + $check_args = "c('CMD', 'check', '--no-multiarch', '--as-cran', '--run-donttest', '$PKG_FILE_NAME')" } Run-R-Code-Redirect-Stderr "result <- processx::run(command = 'R.exe', args = $check_args, echo = TRUE, windows_verbatim_args = FALSE)" ; $check_succeeded = $? From 3a0de88359b9af6a3f8948154e0107937e66ca01 Mon Sep 17 00:00:00 2001 From: James Lamb Date: Tue, 22 Sep 2020 20:53:15 -0500 Subject: [PATCH 10/37] add newest CRAN response --- R-package/cran-comments.md | 36 ++++++++++++++++++++++++++++++++++++ 1 file changed, 36 insertions(+) diff --git a/R-package/cran-comments.md b/R-package/cran-comments.md index 4775f94b85b8..e445995f83ed 100644 --- a/R-package/cran-comments.md +++ b/R-package/cran-comments.md @@ -1,11 +1,47 @@ # CRAN Submission History +## v3.0.0 - Submission 6 - (TBD) + +### CRAN Response + +### Maintainer Notes + ## v3.0.0 - Submission 5 - (September 11, 2020) ### CRAN Response +Accepted to CRAN! + +Please correct the problems below before 2020-10-05 to safely retain your package on CRAN: + +```text +checking installed package size ... NOTE + installed size is 49.7Mb + sub-directories of 1Mb or more: + libs 49.1Mb + +"network/socket_wrapper.hpp", line 30: Error: Could not open include file. +"network/socket_wrapper.hpp", line 216: Error: The type "ifaddrs" is incomplete. +"network/socket_wrapper.hpp", line 217: Error: The type "ifaddrs" is incomplete. +"network/socket_wrapper.hpp", line 220: Error: The type "ifaddrs" is incomplete. +"network/socket_wrapper.hpp", line 222: Error: The type "ifaddrs" is incomplete. +"network/socket_wrapper.hpp", line 214: Error: The function "getifaddrs" must have a prototype. +"network/socket_wrapper.hpp", line 228: Error: The function "freeifaddrs" must have a prototype. +"network/linkers_socket.cpp", line 76: Warning: A non-POD object of type "std::chrono::duration>" passed as a variable argument to function "static LightGBM::Log::Info(const char*, ...)". +7 Error(s) and 1 Warning(s) detected. +*** Error code 2 +make: Fatal error: Command failed for target `network/linkers_socket.o' +Current working directory /tmp/RtmpNfaavG/R.INSTALL40a84f70130a/lightgbm/src +ERROR: compilation failed for package ‘lightgbm’ +* removing ‘/home/ripley/R/Lib32/lightgbm’ +``` + ### Maintainer Notes +Will try using a patch that `psutil` has used to fix missing `ifaddrs.h` on Solaris 10: https://github.com/microsoft/LightGBM/issues/629#issuecomment-665091451. + +If that doesn't work, we can detect Solaris and disable distributed training on that operating system. + ## v3.0.0 - Submission 4 - (September 4, 2020) ### CRAN Response From 8b6a3a711cdaca343f72f7c03598948837dc90f2 Mon Sep 17 00:00:00 2001 From: James Lamb Date: Wed, 23 Sep 2020 22:14:50 -0500 Subject: [PATCH 11/37] add Solaris patch --- src/network/ifaddrs_patch.h | 30 ++++++++ src/network/socket_wrapper.hpp | 126 ++++++++++++++++++++++++++++++++- 2 files changed, 155 insertions(+), 1 deletion(-) create mode 100644 src/network/ifaddrs_patch.h diff --git a/src/network/ifaddrs_patch.h b/src/network/ifaddrs_patch.h new file mode 100644 index 000000000000..02c1ff6ae267 --- /dev/null +++ b/src/network/ifaddrs_patch.h @@ -0,0 +1,30 @@ +/* + +- https://lists.samba.org/archive/samba-technical/2009-February/063079.html +- https://github.com/giampaolo/psutil/blob/master/psutil/arch/solaris/v10/ifaddrs.h + +*/ + +#ifndef __IFADDRS_H__ +#define __IFADDRS_H__ + +#include +#include + +#undef ifa_dstaddr +#undef ifa_broadaddr +#define ifa_broadaddr ifa_dstaddr + +struct ifaddrs { + struct ifaddrs *ifa_next; + char *ifa_name; + unsigned int ifa_flags; + struct sockaddr *ifa_addr; + struct sockaddr *ifa_netmask; + struct sockaddr *ifa_dstaddr; +}; + +extern int getifaddrs(struct ifaddrs **); +extern void freeifaddrs(struct ifaddrs *); + +#endif diff --git a/src/network/socket_wrapper.hpp b/src/network/socket_wrapper.hpp index 70f9586b99c5..d82990125ce8 100644 --- a/src/network/socket_wrapper.hpp +++ b/src/network/socket_wrapper.hpp @@ -25,9 +25,14 @@ #else +// #include +// #include +#include +#include + #include #include -#include +// #include #include #include #include @@ -36,6 +41,8 @@ #include #include +#include "ifaddrs_patch.h" + #endif // defined(_WIN32) #ifdef _MSC_VER @@ -53,6 +60,123 @@ const int INVALID_SOCKET = -1; #endif +// ------------------------------------------------------------------ +// TERRIBLE IFADDRS HACK +// ------------------------------------------------------------------ + +#define MAX(x,y) ((x)>(y)?(x):(y)) +#define SIZE(p) MAX((p).ss_len,sizeof(p)) + + +static struct sockaddr * +sa_dup (struct sockaddr_storage *sa1) +{ + struct sockaddr *sa2; + size_t sz = sizeof(struct sockaddr_storage); + sa2 = (struct sockaddr *) calloc(1,sz); + memcpy(sa2,sa1,sz); + return(sa2); +} + + +void freeifaddrs (struct ifaddrs *ifp) +{ + if (NULL == ifp) return; + free(ifp->ifa_name); + free(ifp->ifa_addr); + free(ifp->ifa_netmask); + free(ifp->ifa_dstaddr); + freeifaddrs(ifp->ifa_next); + free(ifp); +} + + +int getifaddrs (struct ifaddrs **ifap) +{ + int sd = -1; + char *ccp, *ecp; + struct lifconf ifc; + struct lifreq *ifr; + struct lifnum lifn; + struct ifaddrs *cifa = NULL; /* current */ + struct ifaddrs *pifa = NULL; /* previous */ + const size_t IFREQSZ = sizeof(struct lifreq); + + sd = socket(AF_INET, SOCK_STREAM, 0); + if (sd < 0) + goto error; + + ifc.lifc_buf = NULL; + *ifap = NULL; + /* find how much memory to allocate for the SIOCGLIFCONF call */ + lifn.lifn_family = AF_UNSPEC; + lifn.lifn_flags = 0; + if (ioctl(sd, SIOCGLIFNUM, &lifn) < 0) + goto error; + + /* Sun and Apple code likes to pad the interface count here in case interfaces + * are coming up between calls */ + lifn.lifn_count += 4; + + ifc.lifc_family = AF_UNSPEC; + ifc.lifc_len = lifn.lifn_count * sizeof(struct lifreq); + ifc.lifc_buf = static_cast(calloc(1, ifc.lifc_len)); + if (ioctl(sd, SIOCGLIFCONF, &ifc) < 0) + goto error; + + ccp = reinterpret_cast(ifc.lifc_req); + ecp = ccp + ifc.lifc_len; + + while (ccp < ecp) { + + ifr = (struct lifreq *) ccp; + cifa = (struct ifaddrs *) calloc(1, sizeof(struct ifaddrs)); + cifa->ifa_next = NULL; + cifa->ifa_name = strdup(ifr->lifr_name); + + if (pifa == NULL) *ifap = cifa; /* first one */ + else pifa->ifa_next = cifa; + + if (ioctl(sd, SIOCGLIFADDR, ifr, IFREQSZ) < 0) + goto error; + cifa->ifa_addr = sa_dup(&ifr->lifr_addr); + + if (ioctl(sd, SIOCGLIFNETMASK, ifr, IFREQSZ) < 0) + goto error; + cifa->ifa_netmask = sa_dup(&ifr->lifr_addr); + + cifa->ifa_flags = 0; + cifa->ifa_dstaddr = NULL; + + if (0 == ioctl(sd, SIOCGLIFFLAGS, ifr)) /* optional */ + cifa->ifa_flags = ifr->lifr_flags; + + if (ioctl(sd, SIOCGLIFDSTADDR, ifr, IFREQSZ) < 0) { + if (0 == ioctl(sd, SIOCGLIFBRDADDR, ifr, IFREQSZ)) + cifa->ifa_dstaddr = sa_dup(&ifr->lifr_addr); + } + else cifa->ifa_dstaddr = sa_dup(&ifr->lifr_addr); + + pifa = cifa; + ccp += IFREQSZ; + } + free(ifc.lifc_buf); + close(sd); + return 0; +error: + if (ifc.lifc_buf != NULL) + free(ifc.lifc_buf); + if (sd != -1) + close(sd); + freeifaddrs(*ifap); + return (-1); +} + +// ------------------------------------------------------------------ +// ------------------------------------------------------------------ +// ------------------------------------------------------------------ + + #ifdef _WIN32 #ifndef _MSC_VER // not using visual studio in windows From e7eb0c4ccded1fa189a8440c93ae6b4bf95173ec Mon Sep 17 00:00:00 2001 From: James Lamb Date: Thu, 24 Sep 2020 18:42:15 -0500 Subject: [PATCH 12/37] update patch --- R-package/src/Makevars.in | 1 + R-package/src/Makevars.win.in | 1 + src/network/ifaddrs_patch.cpp | 129 +++++++++++++++++++++++++++++++++ src/network/ifaddrs_patch.h | 7 +- src/network/socket_wrapper.hpp | 121 +------------------------------ 5 files changed, 139 insertions(+), 120 deletions(-) create mode 100644 src/network/ifaddrs_patch.cpp diff --git a/R-package/src/Makevars.in b/R-package/src/Makevars.in index 4663c1480281..934a77337324 100644 --- a/R-package/src/Makevars.in +++ b/R-package/src/Makevars.in @@ -40,6 +40,7 @@ OBJECTS = \ metric/dcg_calculator.o \ metric/metric.o \ objective/objective_function.o \ + network/ifaddrs_patch.o \ network/linker_topo.o \ network/linkers_mpi.o \ network/linkers_socket.o \ diff --git a/R-package/src/Makevars.win.in b/R-package/src/Makevars.win.in index 340afad3002a..c8d1be11f4bc 100644 --- a/R-package/src/Makevars.win.in +++ b/R-package/src/Makevars.win.in @@ -41,6 +41,7 @@ OBJECTS = \ metric/dcg_calculator.o \ metric/metric.o \ objective/objective_function.o \ + network/ifaddrs_patch.o \ network/linker_topo.o \ network/linkers_mpi.o \ network/linkers_socket.o \ diff --git a/src/network/ifaddrs_patch.cpp b/src/network/ifaddrs_patch.cpp new file mode 100644 index 000000000000..87b983466898 --- /dev/null +++ b/src/network/ifaddrs_patch.cpp @@ -0,0 +1,129 @@ +/*! +* Copyright (c) 2009, Jay Loden, Dave Daeschler, Giampaolo Rodola. + * Licensed under the BSD 3-Clause License. + * See https://github.com/giampaolo/psutil/blob/master/LICENSE + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "ifaddrs_patch.h" + +#define MAX(x, y) ((x) > (y)?(x):(y)) +#define SIZE(p) MAX((p).ss_len, sizeof(p)) + + +static struct sockaddr * +sa_dup(struct sockaddr_storage *sa1) { + struct sockaddr *sa2; + size_t sz = sizeof(struct sockaddr_storage); + sa2 = (struct sockaddr *) calloc(1, sz); + memcpy(sa2, sa1, sz); + return(sa2); +} + + +void freeifaddrs(struct ifaddrs *ifp) { + if (NULL == ifp) return; + free(ifp->ifa_name); + free(ifp->ifa_addr); + free(ifp->ifa_netmask); + free(ifp->ifa_dstaddr); + freeifaddrs(ifp->ifa_next); + free(ifp); +} + + +int getifaddrs(struct ifaddrs **ifap) { + int sd = -1; + char *ccp, *ecp; + struct lifconf ifc; + struct lifreq *ifr; + struct lifnum lifn; + struct ifaddrs *cifa = NULL; /* current */ + struct ifaddrs *pifa = NULL; /* previous */ + const size_t IFREQSZ = sizeof(struct lifreq); + + sd = socket(AF_INET, SOCK_STREAM, 0); + if (sd < 0) + goto error; + + ifc.lifc_buf = NULL; + *ifap = NULL; + /* find how much memory to allocate for the SIOCGLIFCONF call */ + lifn.lifn_family = AF_UNSPEC; + lifn.lifn_flags = 0; + if (ioctl(sd, SIOCGLIFNUM, &lifn) < 0) + goto error; + + /* Sun and Apple code likes to pad the interface count here in case interfaces + * are coming up between calls */ + lifn.lifn_count += 4; + + ifc.lifc_family = AF_UNSPEC; + ifc.lifc_len = lifn.lifn_count * sizeof(struct lifreq); + ifc.lifc_buf = static_cast(calloc(1, ifc.lifc_len)); + if (ioctl(sd, SIOCGLIFCONF, &ifc) < 0) + goto error; + + ccp = reinterpret_cast(ifc.lifc_req); + ecp = ccp + ifc.lifc_len; + + while (ccp < ecp) { + ifr = (struct lifreq *) ccp; + cifa = (struct ifaddrs *) calloc(1, sizeof(struct ifaddrs)); + cifa->ifa_next = NULL; + cifa->ifa_name = strdup(ifr->lifr_name); + + if (pifa == NULL) { + *ifap = cifa; /* first one */ + } else { + pifa->ifa_next = cifa; + } + + if (ioctl(sd, SIOCGLIFADDR, ifr, IFREQSZ) < 0) + goto error; + cifa->ifa_addr = sa_dup(&ifr->lifr_addr); + + if (ioctl(sd, SIOCGLIFNETMASK, ifr, IFREQSZ) < 0) + goto error; + cifa->ifa_netmask = sa_dup(&ifr->lifr_addr); + + cifa->ifa_flags = 0; + cifa->ifa_dstaddr = NULL; + + if (0 == ioctl(sd, SIOCGLIFFLAGS, ifr)) /* optional */ + cifa->ifa_flags = ifr->lifr_flags; + + if (ioctl(sd, SIOCGLIFDSTADDR, ifr, IFREQSZ) < 0) { + if (0 == ioctl(sd, SIOCGLIFBRDADDR, ifr, IFREQSZ)) + cifa->ifa_dstaddr = sa_dup(&ifr->lifr_addr); + } else { + cifa->ifa_dstaddr = sa_dup(&ifr->lifr_addr); + } + + pifa = cifa; + ccp += IFREQSZ; + } + free(ifc.lifc_buf); + close(sd); + return 0; +error: + if (ifc.lifc_buf != NULL) + free(ifc.lifc_buf); + if (sd != -1) + close(sd); + freeifaddrs(*ifap); + return (-1); +} + +// ------------------------------------------------------------------ +// ------------------------------------------------------------------ +// ------------------------------------------------------------------ diff --git a/src/network/ifaddrs_patch.h b/src/network/ifaddrs_patch.h index 02c1ff6ae267..d4e0620adaa1 100644 --- a/src/network/ifaddrs_patch.h +++ b/src/network/ifaddrs_patch.h @@ -1,8 +1,13 @@ /* +/*! +* Copyright (c) 2009, Jay Loden, Dave Daeschler, Giampaolo Rodola. + * Licensed under the BSD 3-Clause License. + * See https://github.com/giampaolo/psutil/blob/master/LICENSE + */ +/* - https://lists.samba.org/archive/samba-technical/2009-February/063079.html - https://github.com/giampaolo/psutil/blob/master/psutil/arch/solaris/v10/ifaddrs.h - */ #ifndef __IFADDRS_H__ diff --git a/src/network/socket_wrapper.hpp b/src/network/socket_wrapper.hpp index d82990125ce8..c5b00a2409d5 100644 --- a/src/network/socket_wrapper.hpp +++ b/src/network/socket_wrapper.hpp @@ -25,8 +25,8 @@ #else -// #include -// #include +#include +#include #include #include @@ -60,123 +60,6 @@ const int INVALID_SOCKET = -1; #endif -// ------------------------------------------------------------------ -// TERRIBLE IFADDRS HACK -// ------------------------------------------------------------------ - -#define MAX(x,y) ((x)>(y)?(x):(y)) -#define SIZE(p) MAX((p).ss_len,sizeof(p)) - - -static struct sockaddr * -sa_dup (struct sockaddr_storage *sa1) -{ - struct sockaddr *sa2; - size_t sz = sizeof(struct sockaddr_storage); - sa2 = (struct sockaddr *) calloc(1,sz); - memcpy(sa2,sa1,sz); - return(sa2); -} - - -void freeifaddrs (struct ifaddrs *ifp) -{ - if (NULL == ifp) return; - free(ifp->ifa_name); - free(ifp->ifa_addr); - free(ifp->ifa_netmask); - free(ifp->ifa_dstaddr); - freeifaddrs(ifp->ifa_next); - free(ifp); -} - - -int getifaddrs (struct ifaddrs **ifap) -{ - int sd = -1; - char *ccp, *ecp; - struct lifconf ifc; - struct lifreq *ifr; - struct lifnum lifn; - struct ifaddrs *cifa = NULL; /* current */ - struct ifaddrs *pifa = NULL; /* previous */ - const size_t IFREQSZ = sizeof(struct lifreq); - - sd = socket(AF_INET, SOCK_STREAM, 0); - if (sd < 0) - goto error; - - ifc.lifc_buf = NULL; - *ifap = NULL; - /* find how much memory to allocate for the SIOCGLIFCONF call */ - lifn.lifn_family = AF_UNSPEC; - lifn.lifn_flags = 0; - if (ioctl(sd, SIOCGLIFNUM, &lifn) < 0) - goto error; - - /* Sun and Apple code likes to pad the interface count here in case interfaces - * are coming up between calls */ - lifn.lifn_count += 4; - - ifc.lifc_family = AF_UNSPEC; - ifc.lifc_len = lifn.lifn_count * sizeof(struct lifreq); - ifc.lifc_buf = static_cast(calloc(1, ifc.lifc_len)); - if (ioctl(sd, SIOCGLIFCONF, &ifc) < 0) - goto error; - - ccp = reinterpret_cast(ifc.lifc_req); - ecp = ccp + ifc.lifc_len; - - while (ccp < ecp) { - - ifr = (struct lifreq *) ccp; - cifa = (struct ifaddrs *) calloc(1, sizeof(struct ifaddrs)); - cifa->ifa_next = NULL; - cifa->ifa_name = strdup(ifr->lifr_name); - - if (pifa == NULL) *ifap = cifa; /* first one */ - else pifa->ifa_next = cifa; - - if (ioctl(sd, SIOCGLIFADDR, ifr, IFREQSZ) < 0) - goto error; - cifa->ifa_addr = sa_dup(&ifr->lifr_addr); - - if (ioctl(sd, SIOCGLIFNETMASK, ifr, IFREQSZ) < 0) - goto error; - cifa->ifa_netmask = sa_dup(&ifr->lifr_addr); - - cifa->ifa_flags = 0; - cifa->ifa_dstaddr = NULL; - - if (0 == ioctl(sd, SIOCGLIFFLAGS, ifr)) /* optional */ - cifa->ifa_flags = ifr->lifr_flags; - - if (ioctl(sd, SIOCGLIFDSTADDR, ifr, IFREQSZ) < 0) { - if (0 == ioctl(sd, SIOCGLIFBRDADDR, ifr, IFREQSZ)) - cifa->ifa_dstaddr = sa_dup(&ifr->lifr_addr); - } - else cifa->ifa_dstaddr = sa_dup(&ifr->lifr_addr); - - pifa = cifa; - ccp += IFREQSZ; - } - free(ifc.lifc_buf); - close(sd); - return 0; -error: - if (ifc.lifc_buf != NULL) - free(ifc.lifc_buf); - if (sd != -1) - close(sd); - freeifaddrs(*ifap); - return (-1); -} - -// ------------------------------------------------------------------ -// ------------------------------------------------------------------ -// ------------------------------------------------------------------ - - #ifdef _WIN32 #ifndef _MSC_VER // not using visual studio in windows From 86552d258c280cce278e442ee5ee26ebd1205c3a Mon Sep 17 00:00:00 2001 From: James Lamb Date: Thu, 24 Sep 2020 19:19:10 -0500 Subject: [PATCH 13/37] another attempt at ifaddrs patch --- R-package/DESCRIPTION | 7 +++++-- R-package/configure | 10 ++++++++++ R-package/configure.ac | 10 ++++++++++ src/network/ifaddrs_patch.cpp | 6 +++--- src/network/socket_wrapper.hpp | 13 ++++++------- 5 files changed, 34 insertions(+), 12 deletions(-) diff --git a/R-package/DESCRIPTION b/R-package/DESCRIPTION index d68c42a09150..74d89daec20b 100755 --- a/R-package/DESCRIPTION +++ b/R-package/DESCRIPTION @@ -6,7 +6,6 @@ Date: ~~DATE~~ Authors@R: c( person("Guolin", "Ke", email = "guolin.ke@microsoft.com", role = c("aut", "cre")), person("Damien", "Soukhavong", email = "damien.soukhavong@skema.edu", role = c("aut")), - person("Yachen", "Yan", role = c("ctb")), person("James", "Lamb", email="jaylamb20@gmail.com", role = c("aut")), person("Qi", "Meng", role = c("aut")), person("Thomas", "Finley", role = c("aut")), @@ -15,8 +14,12 @@ Authors@R: c( person("Weidong", "Ma", role = c("aut")), person("Qiwei", "Ye", role = c("aut")), person("Tie-Yan", "Liu", role = c("aut")), + person("Yachen", "Yan", role = c("ctb")), person("Microsoft Corporation", role = c("cph")), - person("Dropbox, Inc.", role = c("cph")) + person("Dropbox, Inc.", role = c("cph")), + person("Jay", "Loden", role = c("cph")), + person("Dave", "Daeschler", role = c("cph")), + person("Giampaolo", "Rodola", role = c("cph")) ) Description: Tree based algorithms can be improved by introducing boosting frameworks. 'LightGBM' is one such framework, based on Ke, Guolin et al. (2017) . diff --git a/R-package/configure b/R-package/configure index c5ed06752af3..abc53fab69b1 100755 --- a/R-package/configure +++ b/R-package/configure @@ -1827,6 +1827,16 @@ $as_echo "${ac_pkg_openmp}" >&6; } fi fi +########### +# Solaris # +########### + +# some headers used in lib_lightgbm are not available on the Solaris +# systems CRAN tests on +if $(uname) = "SunOS" ; then + LGB_CPPFLAGS+=" -DON_SOLARIS=1" +fi + # substitute variables from this script into Makevars.in diff --git a/R-package/configure.ac b/R-package/configure.ac index 20182666b502..fe74731be98b 100644 --- a/R-package/configure.ac +++ b/R-package/configure.ac @@ -127,6 +127,16 @@ then fi fi +########### +# Solaris # +########### + +# some headers used in lib_lightgbm are not available on the Solaris +# systems CRAN tests on +if [ $(uname) = "SunOS" ]; then + LGB_CPPFLAGS+=" -DON_SOLARIS=1" +fi + # substitute variables from this script into Makevars.in AC_SUBST(OPENMP_CXXFLAGS) AC_SUBST(OPENMP_LIB) diff --git a/src/network/ifaddrs_patch.cpp b/src/network/ifaddrs_patch.cpp index 87b983466898..31ca37300db0 100644 --- a/src/network/ifaddrs_patch.cpp +++ b/src/network/ifaddrs_patch.cpp @@ -3,6 +3,7 @@ * Licensed under the BSD 3-Clause License. * See https://github.com/giampaolo/psutil/blob/master/LICENSE */ +#ifdef ON_SOLARIS #include #include @@ -124,6 +125,5 @@ int getifaddrs(struct ifaddrs **ifap) { return (-1); } -// ------------------------------------------------------------------ -// ------------------------------------------------------------------ -// ------------------------------------------------------------------ +#endif +// ON_SOLARIS diff --git a/src/network/socket_wrapper.hpp b/src/network/socket_wrapper.hpp index c5b00a2409d5..06a8e5448c51 100644 --- a/src/network/socket_wrapper.hpp +++ b/src/network/socket_wrapper.hpp @@ -25,14 +25,8 @@ #else -#include -#include -#include -#include - #include #include -// #include #include #include #include @@ -41,7 +35,12 @@ #include #include -#include "ifaddrs_patch.h" +// ifaddrs.h is not available on Solaris 10 +#ifndef ON_SOLARIS + #include +#else + #include "ifaddrs_patch.h" +#endif #endif // defined(_WIN32) From e1415f705bb0b5800508059f9fdee63847a5fc96 Mon Sep 17 00:00:00 2001 From: James Lamb Date: Thu, 24 Sep 2020 19:26:13 -0500 Subject: [PATCH 14/37] fix unnecessary comment --- src/network/ifaddrs_patch.h | 1 - 1 file changed, 1 deletion(-) diff --git a/src/network/ifaddrs_patch.h b/src/network/ifaddrs_patch.h index d4e0620adaa1..ab023ef03c27 100644 --- a/src/network/ifaddrs_patch.h +++ b/src/network/ifaddrs_patch.h @@ -1,4 +1,3 @@ -/* /*! * Copyright (c) 2009, Jay Loden, Dave Daeschler, Giampaolo Rodola. * Licensed under the BSD 3-Clause License. From f7f3f41d3bb0bf160496f57e742f4702c77d99ad Mon Sep 17 00:00:00 2001 From: James Lamb Date: Thu, 24 Sep 2020 19:57:25 -0500 Subject: [PATCH 15/37] update configure --- R-package/configure | 5 +++-- R-package/configure.ac | 5 +++-- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/R-package/configure b/R-package/configure index abc53fab69b1..5f4e135120d5 100755 --- a/R-package/configure +++ b/R-package/configure @@ -1833,8 +1833,9 @@ fi # some headers used in lib_lightgbm are not available on the Solaris # systems CRAN tests on -if $(uname) = "SunOS" ; then - LGB_CPPFLAGS+=" -DON_SOLARIS=1" +if test `uname -s` = "SunOS" +then + LGB_CPPFLAGS+=" -DON_SOLARIS=1" fi # substitute variables from this script into Makevars.in diff --git a/R-package/configure.ac b/R-package/configure.ac index fe74731be98b..da7df65a7fb2 100644 --- a/R-package/configure.ac +++ b/R-package/configure.ac @@ -133,8 +133,9 @@ fi # some headers used in lib_lightgbm are not available on the Solaris # systems CRAN tests on -if [ $(uname) = "SunOS" ]; then - LGB_CPPFLAGS+=" -DON_SOLARIS=1" +if test `uname -s` = "SunOS" +then + LGB_CPPFLAGS+=" -DON_SOLARIS=1" fi # substitute variables from this script into Makevars.in From a9c301a0ede01afac965dad9b2dc718c60c39e72 Mon Sep 17 00:00:00 2001 From: James Lamb Date: Thu, 24 Sep 2020 20:32:19 -0500 Subject: [PATCH 16/37] comments --- R-package/cran-comments.md | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/R-package/cran-comments.md b/R-package/cran-comments.md index e445995f83ed..4afed5c5b451 100644 --- a/R-package/cran-comments.md +++ b/R-package/cran-comments.md @@ -1,6 +1,6 @@ # CRAN Submission History -## v3.0.0 - Submission 6 - (TBD) +## v3.0.0 - Submission 6 - (September 24, 2020) ### CRAN Response @@ -38,9 +38,7 @@ ERROR: compilation failed for package ‘lightgbm’ ### Maintainer Notes -Will try using a patch that `psutil` has used to fix missing `ifaddrs.h` on Solaris 10: https://github.com/microsoft/LightGBM/issues/629#issuecomment-665091451. - -If that doesn't work, we can detect Solaris and disable distributed training on that operating system. +Added a patch that `psutil` has used to fix missing `ifaddrs.h` on Solaris 10: https://github.com/microsoft/LightGBM/issues/629#issuecomment-665091451. ## v3.0.0 - Submission 4 - (September 4, 2020) From a75a0e8d655b4719e66a8111dceb9a49b9c4c130 Mon Sep 17 00:00:00 2001 From: James Lamb Date: Fri, 25 Sep 2020 00:28:13 -0500 Subject: [PATCH 17/37] bump version --- R-package/configure | 18 +++++++++--------- R-package/cran-comments.md | 25 ++++++++++++++++++++++++- VERSION.txt | 2 +- 3 files changed, 34 insertions(+), 11 deletions(-) diff --git a/R-package/configure b/R-package/configure index 5f4e135120d5..42734236e289 100755 --- a/R-package/configure +++ b/R-package/configure @@ -1,6 +1,6 @@ #! /bin/sh # Guess values for system-dependent variables and create Makefiles. -# Generated by GNU Autoconf 2.69 for lightgbm 3.0.0. +# Generated by GNU Autoconf 2.69 for lightgbm 3.0.0.1. # # # Copyright (C) 1992-1996, 1998-2012 Free Software Foundation, Inc. @@ -576,8 +576,8 @@ MAKEFLAGS= # Identity of this package. PACKAGE_NAME='lightgbm' PACKAGE_TARNAME='lightgbm' -PACKAGE_VERSION='3.0.0' -PACKAGE_STRING='lightgbm 3.0.0' +PACKAGE_VERSION='3.0.0.1' +PACKAGE_STRING='lightgbm 3.0.0.1' PACKAGE_BUGREPORT='' PACKAGE_URL='' @@ -1182,7 +1182,7 @@ if test "$ac_init_help" = "long"; then # Omit some internal or obsolete options to make the list less imposing. # This message is too long to be a string in the A/UX 3.1 sh. cat <<_ACEOF -\`configure' configures lightgbm 3.0.0 to adapt to many kinds of systems. +\`configure' configures lightgbm 3.0.0.1 to adapt to many kinds of systems. Usage: $0 [OPTION]... [VAR=VALUE]... @@ -1244,7 +1244,7 @@ fi if test -n "$ac_init_help"; then case $ac_init_help in - short | recursive ) echo "Configuration of lightgbm 3.0.0:";; + short | recursive ) echo "Configuration of lightgbm 3.0.0.1:";; esac cat <<\_ACEOF @@ -1311,7 +1311,7 @@ fi test -n "$ac_init_help" && exit $ac_status if $ac_init_version; then cat <<\_ACEOF -lightgbm configure 3.0.0 +lightgbm configure 3.0.0.1 generated by GNU Autoconf 2.69 Copyright (C) 2012 Free Software Foundation, Inc. @@ -1328,7 +1328,7 @@ cat >config.log <<_ACEOF This file contains any messages produced by compilers while running configure, to aid debugging if configure makes a mistake. -It was created by lightgbm $as_me 3.0.0, which was +It was created by lightgbm $as_me 3.0.0.1, which was generated by GNU Autoconf 2.69. Invocation command line was $ $0 $@ @@ -2388,7 +2388,7 @@ cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1 # report actual input values of CONFIG_FILES etc. instead of their # values after options handling. ac_log=" -This file was extended by lightgbm $as_me 3.0.0, which was +This file was extended by lightgbm $as_me 3.0.0.1, which was generated by GNU Autoconf 2.69. Invocation command line was CONFIG_FILES = $CONFIG_FILES @@ -2441,7 +2441,7 @@ _ACEOF cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1 ac_cs_config="`$as_echo "$ac_configure_args" | sed 's/^ //; s/[\\""\`\$]/\\\\&/g'`" ac_cs_version="\\ -lightgbm config.status 3.0.0 +lightgbm config.status 3.0.0.1 configured by $0, generated by GNU Autoconf 2.69, with options \\"\$ac_cs_config\\" diff --git a/R-package/cran-comments.md b/R-package/cran-comments.md index 4afed5c5b451..124ec213b17d 100644 --- a/R-package/cran-comments.md +++ b/R-package/cran-comments.md @@ -1,11 +1,34 @@ # CRAN Submission History +## v3.0.0 - Submission 7 - (September 24, 2020) + +### CRAN response + +### Maintainer Notes + ## v3.0.0 - Submission 6 - (September 24, 2020) -### CRAN Response +### CRAN response + +Failing pre-checks. + +### `R CMD check` results + +```text +* checking CRAN incoming feasibility ... WARNING +Maintainer: ‘Guolin Ke ’ + +Insufficient package version (submitted: 3.0.0, existing: 3.0.0) + +Days since last update: 4 +``` ### Maintainer Notes +Did not think the version needed to be incremented if submitting a package in response to CRAN saying "you are failing checks and will be kicked off if you don't fix it", but I guess you do! + +This can be fixed by just re-submitting but with the version changed froom `3.0.0` to `3.0.0.1`. + ## v3.0.0 - Submission 5 - (September 11, 2020) ### CRAN Response diff --git a/VERSION.txt b/VERSION.txt index 4a36342fcab7..527e56990a37 100644 --- a/VERSION.txt +++ b/VERSION.txt @@ -1 +1 @@ -3.0.0 +3.0.0.1 From 6c37a57637596d181310aabc6fffb2dd7ebe346f Mon Sep 17 00:00:00 2001 From: James Lamb Date: Mon, 28 Sep 2020 23:31:48 -0500 Subject: [PATCH 18/37] tabs --- R-package/R/callback.R | 2 +- R-package/R/lgb.cv.R | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/R-package/R/callback.R b/R-package/R/callback.R index eb807606f99c..a41a03ffe23b 100644 --- a/R-package/R/callback.R +++ b/R-package/R/callback.R @@ -150,7 +150,7 @@ merge.eval.string <- function(env) { } - paste0(msg, collapse = "\t") + paste0(msg, collapse = " ") } diff --git a/R-package/R/lgb.cv.R b/R-package/R/lgb.cv.R index 17be56c5f780..45a11a46f78d 100644 --- a/R-package/R/lgb.cv.R +++ b/R-package/R/lgb.cv.R @@ -466,7 +466,7 @@ generate.cv.folds <- function(nfold, nrows, stratified, label, group, params) { # When doing group, stratified is not possible (only random selection) if (nfold > length(group)) { - stop("\n\tYou requested too many folds for the number of available groups.\n") + stop("\nYou requested too many folds for the number of available groups.\n") } # Degroup the groups From 3aa79b42c6137a98d63ca8e9636f84c635af8da9 Mon Sep 17 00:00:00 2001 From: Guolin Ke Date: Wed, 30 Sep 2020 09:37:32 +0800 Subject: [PATCH 19/37] fix address alignment, required by cran (#3415) * fix dataset binary file alignment * many fixes * fix warnings * fix bug * Update file_io.cpp * Update file_io.cpp * simplify code * Apply suggestions from code review * general * remove unneeded alignment * Update file_io.h * int32 to byte8 alignment * Apply suggestions from code review * Apply suggestions from code review --- include/LightGBM/feature_group.h | 16 +++---- include/LightGBM/utils/file_io.h | 19 +++++++++ src/io/bin.cpp | 57 ++++++++++++++----------- src/io/dataset.cpp | 72 +++++++++++++++++++------------- src/io/dataset_loader.cpp | 61 ++++++++++++++++----------- src/io/dense_bin.hpp | 6 ++- src/io/metadata.cpp | 38 +++++++++-------- src/io/sparse_bin.hpp | 15 +++---- 8 files changed, 173 insertions(+), 111 deletions(-) diff --git a/include/LightGBM/feature_group.h b/include/LightGBM/feature_group.h index 2b17e98bb9c1..21ea927f187d 100644 --- a/include/LightGBM/feature_group.h +++ b/include/LightGBM/feature_group.h @@ -93,11 +93,11 @@ class FeatureGroup { const char* memory_ptr = reinterpret_cast(memory); // get is_sparse is_multi_val_ = *(reinterpret_cast(memory_ptr)); - memory_ptr += sizeof(is_multi_val_); + memory_ptr += VirtualFileWriter::AlignedSize(sizeof(is_multi_val_)); is_sparse_ = *(reinterpret_cast(memory_ptr)); - memory_ptr += sizeof(is_sparse_); + memory_ptr += VirtualFileWriter::AlignedSize(sizeof(is_sparse_)); num_feature_ = *(reinterpret_cast(memory_ptr)); - memory_ptr += sizeof(num_feature_); + memory_ptr += VirtualFileWriter::AlignedSize(sizeof(num_feature_)); // get bin mapper bin_mappers_.clear(); bin_offsets_.clear(); @@ -290,9 +290,9 @@ class FeatureGroup { * \param file File want to write */ void SaveBinaryToFile(const VirtualFileWriter* writer) const { - writer->Write(&is_multi_val_, sizeof(is_multi_val_)); - writer->Write(&is_sparse_, sizeof(is_sparse_)); - writer->Write(&num_feature_, sizeof(num_feature_)); + writer->AlignedWrite(&is_multi_val_, sizeof(is_multi_val_)); + writer->AlignedWrite(&is_sparse_, sizeof(is_sparse_)); + writer->AlignedWrite(&num_feature_, sizeof(num_feature_)); for (int i = 0; i < num_feature_; ++i) { bin_mappers_[i]->SaveBinaryToFile(writer); } @@ -309,7 +309,9 @@ class FeatureGroup { * \brief Get sizes in byte of this object */ size_t SizesInByte() const { - size_t ret = sizeof(is_multi_val_) + sizeof(is_sparse_) + sizeof(num_feature_); + size_t ret = VirtualFileWriter::AlignedSize(sizeof(is_multi_val_)) + + VirtualFileWriter::AlignedSize(sizeof(is_sparse_)) + + VirtualFileWriter::AlignedSize(sizeof(num_feature_)); for (int i = 0; i < num_feature_; ++i) { ret += bin_mappers_[i]->SizesInByte(); } diff --git a/include/LightGBM/utils/file_io.h b/include/LightGBM/utils/file_io.h index 64d7487a3d3f..62ec3dbdc326 100644 --- a/include/LightGBM/utils/file_io.h +++ b/include/LightGBM/utils/file_io.h @@ -11,6 +11,7 @@ #include #include #include +#include namespace LightGBM { @@ -31,6 +32,16 @@ struct VirtualFileWriter { * \return Number of bytes written */ virtual size_t Write(const void* data, size_t bytes) const = 0; + + size_t AlignedWrite(const void* data, size_t bytes, size_t alignment = 8) const { + auto ret = Write(data, bytes); + if (bytes % alignment != 0) { + size_t padding = AlignedSize(bytes, alignment) - bytes; + std::vector tmp(padding, 0); + ret += Write(tmp.data(), padding); + } + return ret; + } /*! * \brief Create appropriate writer for filename * \param filename Filename of the data @@ -43,6 +54,14 @@ struct VirtualFileWriter { * \return True when the file exists */ static bool Exists(const std::string& filename); + + static size_t AlignedSize(size_t bytes, size_t alignment = 8) { + if (bytes % alignment == 0) { + return bytes; + } else { + return bytes / alignment * alignment + alignment; + } + } }; /** diff --git a/src/io/bin.cpp b/src/io/bin.cpp index 4be390404383..c0005007ad9d 100644 --- a/src/io/bin.cpp +++ b/src/io/bin.cpp @@ -522,36 +522,37 @@ namespace LightGBM { int BinMapper::SizeForSpecificBin(int bin) { int size = 0; - size += sizeof(int); - size += sizeof(MissingType); - size += sizeof(bool); + size += static_cast(VirtualFileWriter::AlignedSize(sizeof(int))); + size += + static_cast(VirtualFileWriter::AlignedSize(sizeof(MissingType))); + size += static_cast(VirtualFileWriter::AlignedSize(sizeof(bool))); size += sizeof(double); - size += sizeof(BinType); + size += static_cast(VirtualFileWriter::AlignedSize(sizeof(BinType))); size += 2 * sizeof(double); size += bin * sizeof(double); - size += sizeof(uint32_t) * 2; + size += static_cast(VirtualFileWriter::AlignedSize(sizeof(uint32_t))) * 2; return size; } void BinMapper::CopyTo(char * buffer) const { std::memcpy(buffer, &num_bin_, sizeof(num_bin_)); - buffer += sizeof(num_bin_); + buffer += VirtualFileWriter::AlignedSize(sizeof(num_bin_)); std::memcpy(buffer, &missing_type_, sizeof(missing_type_)); - buffer += sizeof(missing_type_); + buffer += VirtualFileWriter::AlignedSize(sizeof(missing_type_)); std::memcpy(buffer, &is_trivial_, sizeof(is_trivial_)); - buffer += sizeof(is_trivial_); + buffer += VirtualFileWriter::AlignedSize(sizeof(is_trivial_)); std::memcpy(buffer, &sparse_rate_, sizeof(sparse_rate_)); buffer += sizeof(sparse_rate_); std::memcpy(buffer, &bin_type_, sizeof(bin_type_)); - buffer += sizeof(bin_type_); + buffer += VirtualFileWriter::AlignedSize(sizeof(bin_type_)); std::memcpy(buffer, &min_val_, sizeof(min_val_)); buffer += sizeof(min_val_); std::memcpy(buffer, &max_val_, sizeof(max_val_)); buffer += sizeof(max_val_); std::memcpy(buffer, &default_bin_, sizeof(default_bin_)); - buffer += sizeof(default_bin_); + buffer += VirtualFileWriter::AlignedSize(sizeof(default_bin_)); std::memcpy(buffer, &most_freq_bin_, sizeof(most_freq_bin_)); - buffer += sizeof(most_freq_bin_); + buffer += VirtualFileWriter::AlignedSize(sizeof(most_freq_bin_)); if (bin_type_ == BinType::NumericalBin) { std::memcpy(buffer, bin_upper_bound_.data(), num_bin_ * sizeof(double)); } else { @@ -561,23 +562,23 @@ namespace LightGBM { void BinMapper::CopyFrom(const char * buffer) { std::memcpy(&num_bin_, buffer, sizeof(num_bin_)); - buffer += sizeof(num_bin_); + buffer += VirtualFileWriter::AlignedSize(sizeof(num_bin_)); std::memcpy(&missing_type_, buffer, sizeof(missing_type_)); - buffer += sizeof(missing_type_); + buffer += VirtualFileWriter::AlignedSize(sizeof(missing_type_)); std::memcpy(&is_trivial_, buffer, sizeof(is_trivial_)); - buffer += sizeof(is_trivial_); + buffer += VirtualFileWriter::AlignedSize(sizeof(is_trivial_)); std::memcpy(&sparse_rate_, buffer, sizeof(sparse_rate_)); buffer += sizeof(sparse_rate_); std::memcpy(&bin_type_, buffer, sizeof(bin_type_)); - buffer += sizeof(bin_type_); + buffer += VirtualFileWriter::AlignedSize(sizeof(bin_type_)); std::memcpy(&min_val_, buffer, sizeof(min_val_)); buffer += sizeof(min_val_); std::memcpy(&max_val_, buffer, sizeof(max_val_)); buffer += sizeof(max_val_); std::memcpy(&default_bin_, buffer, sizeof(default_bin_)); - buffer += sizeof(default_bin_); + buffer += VirtualFileWriter::AlignedSize(sizeof(default_bin_)); std::memcpy(&most_freq_bin_, buffer, sizeof(most_freq_bin_)); - buffer += sizeof(most_freq_bin_); + buffer += VirtualFileWriter::AlignedSize(sizeof(most_freq_bin_)); if (bin_type_ == BinType::NumericalBin) { bin_upper_bound_ = std::vector(num_bin_); std::memcpy(bin_upper_bound_.data(), buffer, num_bin_ * sizeof(double)); @@ -592,15 +593,15 @@ namespace LightGBM { } void BinMapper::SaveBinaryToFile(const VirtualFileWriter* writer) const { - writer->Write(&num_bin_, sizeof(num_bin_)); - writer->Write(&missing_type_, sizeof(missing_type_)); - writer->Write(&is_trivial_, sizeof(is_trivial_)); + writer->AlignedWrite(&num_bin_, sizeof(num_bin_)); + writer->AlignedWrite(&missing_type_, sizeof(missing_type_)); + writer->AlignedWrite(&is_trivial_, sizeof(is_trivial_)); writer->Write(&sparse_rate_, sizeof(sparse_rate_)); - writer->Write(&bin_type_, sizeof(bin_type_)); + writer->AlignedWrite(&bin_type_, sizeof(bin_type_)); writer->Write(&min_val_, sizeof(min_val_)); writer->Write(&max_val_, sizeof(max_val_)); - writer->Write(&default_bin_, sizeof(default_bin_)); - writer->Write(&most_freq_bin_, sizeof(most_freq_bin_)); + writer->AlignedWrite(&default_bin_, sizeof(default_bin_)); + writer->AlignedWrite(&most_freq_bin_, sizeof(most_freq_bin_)); if (bin_type_ == BinType::NumericalBin) { writer->Write(bin_upper_bound_.data(), sizeof(double) * num_bin_); } else { @@ -609,8 +610,14 @@ namespace LightGBM { } size_t BinMapper::SizesInByte() const { - size_t ret = sizeof(num_bin_) + sizeof(missing_type_) + sizeof(is_trivial_) + sizeof(sparse_rate_) - + sizeof(bin_type_) + sizeof(min_val_) + sizeof(max_val_) + sizeof(default_bin_) + sizeof(most_freq_bin_); + size_t ret = VirtualFileWriter::AlignedSize(sizeof(num_bin_)) + + VirtualFileWriter::AlignedSize(sizeof(missing_type_)) + + VirtualFileWriter::AlignedSize(sizeof(is_trivial_)) + + sizeof(sparse_rate_) + + VirtualFileWriter::AlignedSize(sizeof(bin_type_)) + + sizeof(min_val_) + sizeof(max_val_) + + VirtualFileWriter::AlignedSize(sizeof(default_bin_)) + + VirtualFileWriter::AlignedSize(sizeof(most_freq_bin_)); if (bin_type_ == BinType::NumericalBin) { ret += sizeof(double) * num_bin_; } else { diff --git a/src/io/dataset.cpp b/src/io/dataset.cpp index 6e17eeb8917c..e30f1f6f6e38 100644 --- a/src/io/dataset.cpp +++ b/src/io/dataset.cpp @@ -912,47 +912,61 @@ void Dataset::SaveBinaryFile(const char* bin_filename) { } Log::Info("Saving data to binary file %s", bin_filename); size_t size_of_token = std::strlen(binary_file_token); - writer->Write(binary_file_token, size_of_token); + writer->AlignedWrite(binary_file_token, size_of_token); // get size of header - size_t size_of_header = sizeof(num_data_) + sizeof(num_features_) + sizeof(num_total_features_) - + sizeof(int) * num_total_features_ + sizeof(label_idx_) + sizeof(num_groups_) - + 3 * sizeof(int) * num_features_ + sizeof(uint64_t) * (num_groups_ + 1) + 2 * sizeof(int) * num_groups_ - + sizeof(int32_t) * num_total_features_ + sizeof(int) * 3 + sizeof(bool) * 2; + size_t size_of_header = + VirtualFileWriter::AlignedSize(sizeof(num_data_)) + + VirtualFileWriter::AlignedSize(sizeof(num_features_)) + + VirtualFileWriter::AlignedSize(sizeof(num_total_features_)) + + VirtualFileWriter::AlignedSize(sizeof(int) * num_total_features_) + + VirtualFileWriter::AlignedSize(sizeof(label_idx_)) + + VirtualFileWriter::AlignedSize(sizeof(num_groups_)) + + 3 * VirtualFileWriter::AlignedSize(sizeof(int) * num_features_) + + sizeof(uint64_t) * (num_groups_ + 1) + + 2 * VirtualFileWriter::AlignedSize(sizeof(int) * num_groups_) + + VirtualFileWriter::AlignedSize(sizeof(int32_t) * num_total_features_) + + VirtualFileWriter::AlignedSize(sizeof(int)) * 3 + + VirtualFileWriter::AlignedSize(sizeof(bool)) * 2; // size of feature names for (int i = 0; i < num_total_features_; ++i) { - size_of_header += feature_names_[i].size() + sizeof(int); + size_of_header += + VirtualFileWriter::AlignedSize(feature_names_[i].size()) + + VirtualFileWriter::AlignedSize(sizeof(int)); } // size of forced bins for (int i = 0; i < num_total_features_; ++i) { - size_of_header += - forced_bin_bounds_[i].size() * sizeof(double) + sizeof(int); + size_of_header += forced_bin_bounds_[i].size() * sizeof(double) + + VirtualFileWriter::AlignedSize(sizeof(int)); } writer->Write(&size_of_header, sizeof(size_of_header)); // write header - writer->Write(&num_data_, sizeof(num_data_)); - writer->Write(&num_features_, sizeof(num_features_)); - writer->Write(&num_total_features_, sizeof(num_total_features_)); - writer->Write(&label_idx_, sizeof(label_idx_)); - writer->Write(&max_bin_, sizeof(max_bin_)); - writer->Write(&bin_construct_sample_cnt_, - sizeof(bin_construct_sample_cnt_)); - writer->Write(&min_data_in_bin_, sizeof(min_data_in_bin_)); - writer->Write(&use_missing_, sizeof(use_missing_)); - writer->Write(&zero_as_missing_, sizeof(zero_as_missing_)); - writer->Write(used_feature_map_.data(), sizeof(int) * num_total_features_); - writer->Write(&num_groups_, sizeof(num_groups_)); - writer->Write(real_feature_idx_.data(), sizeof(int) * num_features_); - writer->Write(feature2group_.data(), sizeof(int) * num_features_); - writer->Write(feature2subfeature_.data(), sizeof(int) * num_features_); + writer->AlignedWrite(&num_data_, sizeof(num_data_)); + writer->AlignedWrite(&num_features_, sizeof(num_features_)); + writer->AlignedWrite(&num_total_features_, sizeof(num_total_features_)); + writer->AlignedWrite(&label_idx_, sizeof(label_idx_)); + writer->AlignedWrite(&max_bin_, sizeof(max_bin_)); + writer->AlignedWrite(&bin_construct_sample_cnt_, + sizeof(bin_construct_sample_cnt_)); + writer->AlignedWrite(&min_data_in_bin_, sizeof(min_data_in_bin_)); + writer->AlignedWrite(&use_missing_, sizeof(use_missing_)); + writer->AlignedWrite(&zero_as_missing_, sizeof(zero_as_missing_)); + writer->AlignedWrite(used_feature_map_.data(), + sizeof(int) * num_total_features_); + writer->AlignedWrite(&num_groups_, sizeof(num_groups_)); + writer->AlignedWrite(real_feature_idx_.data(), sizeof(int) * num_features_); + writer->AlignedWrite(feature2group_.data(), sizeof(int) * num_features_); + writer->AlignedWrite(feature2subfeature_.data(), + sizeof(int) * num_features_); writer->Write(group_bin_boundaries_.data(), sizeof(uint64_t) * (num_groups_ + 1)); - writer->Write(group_feature_start_.data(), sizeof(int) * num_groups_); - writer->Write(group_feature_cnt_.data(), sizeof(int) * num_groups_); + writer->AlignedWrite(group_feature_start_.data(), + sizeof(int) * num_groups_); + writer->AlignedWrite(group_feature_cnt_.data(), sizeof(int) * num_groups_); if (max_bin_by_feature_.empty()) { ArrayArgs::Assign(&max_bin_by_feature_, -1, num_total_features_); } - writer->Write(max_bin_by_feature_.data(), + writer->AlignedWrite(max_bin_by_feature_.data(), sizeof(int32_t) * num_total_features_); if (ArrayArgs::CheckAll(max_bin_by_feature_, -1)) { max_bin_by_feature_.clear(); @@ -960,14 +974,14 @@ void Dataset::SaveBinaryFile(const char* bin_filename) { // write feature names for (int i = 0; i < num_total_features_; ++i) { int str_len = static_cast(feature_names_[i].size()); - writer->Write(&str_len, sizeof(int)); + writer->AlignedWrite(&str_len, sizeof(int)); const char* c_str = feature_names_[i].c_str(); - writer->Write(c_str, sizeof(char) * str_len); + writer->AlignedWrite(c_str, sizeof(char) * str_len); } // write forced bins for (int i = 0; i < num_total_features_; ++i) { int num_bounds = static_cast(forced_bin_bounds_[i].size()); - writer->Write(&num_bounds, sizeof(int)); + writer->AlignedWrite(&num_bounds, sizeof(int)); for (size_t j = 0; j < forced_bin_bounds_[i].size(); ++j) { writer->Write(&forced_bin_bounds_[i][j], sizeof(double)); diff --git a/src/io/dataset_loader.cpp b/src/io/dataset_loader.cpp index 6d8e73af193a..df0e0a115280 100644 --- a/src/io/dataset_loader.cpp +++ b/src/io/dataset_loader.cpp @@ -286,8 +286,10 @@ Dataset* DatasetLoader::LoadFromBinFile(const char* data_filename, const char* b // check token size_t size_of_token = std::strlen(Dataset::binary_file_token); - size_t read_cnt = reader->Read(buffer.data(), sizeof(char) * size_of_token); - if (read_cnt != sizeof(char) * size_of_token) { + size_t read_cnt = reader->Read( + buffer.data(), + VirtualFileWriter::AlignedSize(sizeof(char) * size_of_token)); + if (read_cnt < sizeof(char) * size_of_token) { Log::Fatal("Binary file error: token has the wrong size"); } if (std::string(buffer.data()) != std::string(Dataset::binary_file_token)) { @@ -317,53 +319,59 @@ Dataset* DatasetLoader::LoadFromBinFile(const char* data_filename, const char* b // get header const char* mem_ptr = buffer.data(); dataset->num_data_ = *(reinterpret_cast(mem_ptr)); - mem_ptr += sizeof(dataset->num_data_); + mem_ptr += VirtualFileWriter::AlignedSize(sizeof(dataset->num_data_)); dataset->num_features_ = *(reinterpret_cast(mem_ptr)); - mem_ptr += sizeof(dataset->num_features_); + mem_ptr += VirtualFileWriter::AlignedSize(sizeof(dataset->num_features_)); dataset->num_total_features_ = *(reinterpret_cast(mem_ptr)); - mem_ptr += sizeof(dataset->num_total_features_); + mem_ptr += + VirtualFileWriter::AlignedSize(sizeof(dataset->num_total_features_)); dataset->label_idx_ = *(reinterpret_cast(mem_ptr)); - mem_ptr += sizeof(dataset->label_idx_); + mem_ptr += VirtualFileWriter::AlignedSize(sizeof(dataset->label_idx_)); dataset->max_bin_ = *(reinterpret_cast(mem_ptr)); - mem_ptr += sizeof(dataset->max_bin_); + mem_ptr += VirtualFileWriter::AlignedSize(sizeof(dataset->max_bin_)); dataset->bin_construct_sample_cnt_ = *(reinterpret_cast(mem_ptr)); - mem_ptr += sizeof(dataset->bin_construct_sample_cnt_); + mem_ptr += VirtualFileWriter::AlignedSize( + sizeof(dataset->bin_construct_sample_cnt_)); dataset->min_data_in_bin_ = *(reinterpret_cast(mem_ptr)); - mem_ptr += sizeof(dataset->min_data_in_bin_); + mem_ptr += VirtualFileWriter::AlignedSize(sizeof(dataset->min_data_in_bin_)); dataset->use_missing_ = *(reinterpret_cast(mem_ptr)); - mem_ptr += sizeof(dataset->use_missing_); + mem_ptr += VirtualFileWriter::AlignedSize(sizeof(dataset->use_missing_)); dataset->zero_as_missing_ = *(reinterpret_cast(mem_ptr)); - mem_ptr += sizeof(dataset->zero_as_missing_); + mem_ptr += VirtualFileWriter::AlignedSize(sizeof(dataset->zero_as_missing_)); const int* tmp_feature_map = reinterpret_cast(mem_ptr); dataset->used_feature_map_.clear(); for (int i = 0; i < dataset->num_total_features_; ++i) { dataset->used_feature_map_.push_back(tmp_feature_map[i]); } - mem_ptr += sizeof(int) * dataset->num_total_features_; + mem_ptr += VirtualFileWriter::AlignedSize(sizeof(int) * + dataset->num_total_features_); // num_groups dataset->num_groups_ = *(reinterpret_cast(mem_ptr)); - mem_ptr += sizeof(dataset->num_groups_); + mem_ptr += VirtualFileWriter::AlignedSize(sizeof(dataset->num_groups_)); // real_feature_idx_ const int* tmp_ptr_real_feature_idx_ = reinterpret_cast(mem_ptr); dataset->real_feature_idx_.clear(); for (int i = 0; i < dataset->num_features_; ++i) { dataset->real_feature_idx_.push_back(tmp_ptr_real_feature_idx_[i]); } - mem_ptr += sizeof(int) * dataset->num_features_; + mem_ptr += + VirtualFileWriter::AlignedSize(sizeof(int) * dataset->num_features_); // feature2group const int* tmp_ptr_feature2group = reinterpret_cast(mem_ptr); dataset->feature2group_.clear(); for (int i = 0; i < dataset->num_features_; ++i) { dataset->feature2group_.push_back(tmp_ptr_feature2group[i]); } - mem_ptr += sizeof(int) * dataset->num_features_; + mem_ptr += + VirtualFileWriter::AlignedSize(sizeof(int) * dataset->num_features_); // feature2subfeature const int* tmp_ptr_feature2subfeature = reinterpret_cast(mem_ptr); dataset->feature2subfeature_.clear(); for (int i = 0; i < dataset->num_features_; ++i) { dataset->feature2subfeature_.push_back(tmp_ptr_feature2subfeature[i]); } - mem_ptr += sizeof(int) * dataset->num_features_; + mem_ptr += + VirtualFileWriter::AlignedSize(sizeof(int) * dataset->num_features_); // group_bin_boundaries const uint64_t* tmp_ptr_group_bin_boundaries = reinterpret_cast(mem_ptr); dataset->group_bin_boundaries_.clear(); @@ -378,7 +386,8 @@ Dataset* DatasetLoader::LoadFromBinFile(const char* data_filename, const char* b for (int i = 0; i < dataset->num_groups_; ++i) { dataset->group_feature_start_.push_back(tmp_ptr_group_feature_start[i]); } - mem_ptr += sizeof(int) * (dataset->num_groups_); + mem_ptr += + VirtualFileWriter::AlignedSize(sizeof(int) * (dataset->num_groups_)); // group_feature_cnt_ const int* tmp_ptr_group_feature_cnt = reinterpret_cast(mem_ptr); @@ -386,7 +395,8 @@ Dataset* DatasetLoader::LoadFromBinFile(const char* data_filename, const char* b for (int i = 0; i < dataset->num_groups_; ++i) { dataset->group_feature_cnt_.push_back(tmp_ptr_group_feature_cnt[i]); } - mem_ptr += sizeof(int) * (dataset->num_groups_); + mem_ptr += + VirtualFileWriter::AlignedSize(sizeof(int) * (dataset->num_groups_)); if (!config_.max_bin_by_feature.empty()) { CHECK_EQ(static_cast(dataset->num_total_features_), config_.max_bin_by_feature.size()); @@ -400,7 +410,8 @@ Dataset* DatasetLoader::LoadFromBinFile(const char* data_filename, const char* b dataset->max_bin_by_feature_.push_back(tmp_ptr_max_bin_by_feature[i]); } } - mem_ptr += sizeof(int32_t) * (dataset->num_total_features_); + mem_ptr += VirtualFileWriter::AlignedSize(sizeof(int32_t) * + (dataset->num_total_features_)); if (ArrayArgs::CheckAll(dataset->max_bin_by_feature_, -1)) { dataset->max_bin_by_feature_.clear(); } @@ -410,22 +421,24 @@ Dataset* DatasetLoader::LoadFromBinFile(const char* data_filename, const char* b // write feature names for (int i = 0; i < dataset->num_total_features_; ++i) { int str_len = *(reinterpret_cast(mem_ptr)); - mem_ptr += sizeof(int); + mem_ptr += VirtualFileWriter::AlignedSize(sizeof(int)); std::stringstream str_buf; + auto tmp_arr = reinterpret_cast(mem_ptr); for (int j = 0; j < str_len; ++j) { - char tmp_char = *(reinterpret_cast(mem_ptr)); - mem_ptr += sizeof(char); + char tmp_char = tmp_arr[j]; str_buf << tmp_char; } + mem_ptr += VirtualFileWriter::AlignedSize(sizeof(char) * str_len); dataset->feature_names_.emplace_back(str_buf.str()); } // get forced_bin_bounds_ dataset->forced_bin_bounds_ = std::vector>(dataset->num_total_features_, std::vector()); for (int i = 0; i < dataset->num_total_features_; ++i) { int num_bounds = *(reinterpret_cast(mem_ptr)); - mem_ptr += sizeof(int); + mem_ptr += VirtualFileWriter::AlignedSize(sizeof(int)); dataset->forced_bin_bounds_[i] = std::vector(); - const double* tmp_ptr_forced_bounds = reinterpret_cast(mem_ptr); + const double* tmp_ptr_forced_bounds = + reinterpret_cast(mem_ptr); for (int j = 0; j < num_bounds; ++j) { double bound = tmp_ptr_forced_bounds[j]; dataset->forced_bin_bounds_[i].push_back(bound); diff --git a/src/io/dense_bin.hpp b/src/io/dense_bin.hpp index e821fe32f08d..7d356970aee8 100644 --- a/src/io/dense_bin.hpp +++ b/src/io/dense_bin.hpp @@ -449,10 +449,12 @@ class DenseBin : public Bin { } void SaveBinaryToFile(const VirtualFileWriter* writer) const override { - writer->Write(data_.data(), sizeof(VAL_T) * data_.size()); + writer->AlignedWrite(data_.data(), sizeof(VAL_T) * data_.size()); } - size_t SizesInByte() const override { return sizeof(VAL_T) * data_.size(); } + size_t SizesInByte() const override { + return VirtualFileWriter::AlignedSize(sizeof(VAL_T) * data_.size()); + } DenseBin* Clone() override; diff --git a/src/io/metadata.cpp b/src/io/metadata.cpp index ea0d5b08def8..8ab4da8d74f2 100644 --- a/src/io/metadata.cpp +++ b/src/io/metadata.cpp @@ -472,44 +472,46 @@ void Metadata::LoadFromMemory(const void* memory) { const char* mem_ptr = reinterpret_cast(memory); num_data_ = *(reinterpret_cast(mem_ptr)); - mem_ptr += sizeof(num_data_); + mem_ptr += VirtualFileWriter::AlignedSize(sizeof(num_data_)); num_weights_ = *(reinterpret_cast(mem_ptr)); - mem_ptr += sizeof(num_weights_); + mem_ptr += VirtualFileWriter::AlignedSize(sizeof(num_weights_)); num_queries_ = *(reinterpret_cast(mem_ptr)); - mem_ptr += sizeof(num_queries_); + mem_ptr += VirtualFileWriter::AlignedSize(sizeof(num_queries_)); if (!label_.empty()) { label_.clear(); } label_ = std::vector(num_data_); std::memcpy(label_.data(), mem_ptr, sizeof(label_t) * num_data_); - mem_ptr += sizeof(label_t) * num_data_; + mem_ptr += VirtualFileWriter::AlignedSize(sizeof(label_t) * num_data_); if (num_weights_ > 0) { if (!weights_.empty()) { weights_.clear(); } weights_ = std::vector(num_weights_); std::memcpy(weights_.data(), mem_ptr, sizeof(label_t) * num_weights_); - mem_ptr += sizeof(label_t) * num_weights_; + mem_ptr += VirtualFileWriter::AlignedSize(sizeof(label_t) * num_weights_); weight_load_from_file_ = true; } if (num_queries_ > 0) { if (!query_boundaries_.empty()) { query_boundaries_.clear(); } query_boundaries_ = std::vector(num_queries_ + 1); std::memcpy(query_boundaries_.data(), mem_ptr, sizeof(data_size_t) * (num_queries_ + 1)); - mem_ptr += sizeof(data_size_t) * (num_queries_ + 1); + mem_ptr += VirtualFileWriter::AlignedSize(sizeof(data_size_t) * + (num_queries_ + 1)); query_load_from_file_ = true; } LoadQueryWeights(); } void Metadata::SaveBinaryToFile(const VirtualFileWriter* writer) const { - writer->Write(&num_data_, sizeof(num_data_)); - writer->Write(&num_weights_, sizeof(num_weights_)); - writer->Write(&num_queries_, sizeof(num_queries_)); - writer->Write(label_.data(), sizeof(label_t) * num_data_); + writer->AlignedWrite(&num_data_, sizeof(num_data_)); + writer->AlignedWrite(&num_weights_, sizeof(num_weights_)); + writer->AlignedWrite(&num_queries_, sizeof(num_queries_)); + writer->AlignedWrite(label_.data(), sizeof(label_t) * num_data_); if (!weights_.empty()) { - writer->Write(weights_.data(), sizeof(label_t) * num_weights_); + writer->AlignedWrite(weights_.data(), sizeof(label_t) * num_weights_); } if (!query_boundaries_.empty()) { - writer->Write(query_boundaries_.data(), sizeof(data_size_t) * (num_queries_ + 1)); + writer->AlignedWrite(query_boundaries_.data(), + sizeof(data_size_t) * (num_queries_ + 1)); } if (num_init_score_ > 0) { Log::Warning("Please note that `init_score` is not saved in binary file.\n" @@ -518,14 +520,16 @@ void Metadata::SaveBinaryToFile(const VirtualFileWriter* writer) const { } size_t Metadata::SizesInByte() const { - size_t size = sizeof(num_data_) + sizeof(num_weights_) - + sizeof(num_queries_); - size += sizeof(label_t) * num_data_; + size_t size = VirtualFileWriter::AlignedSize(sizeof(num_data_)) + + VirtualFileWriter::AlignedSize(sizeof(num_weights_)) + + VirtualFileWriter::AlignedSize(sizeof(num_queries_)); + size += VirtualFileWriter::AlignedSize(sizeof(label_t) * num_data_); if (!weights_.empty()) { - size += sizeof(label_t) * num_weights_; + size += VirtualFileWriter::AlignedSize(sizeof(label_t) * num_weights_); } if (!query_boundaries_.empty()) { - size += sizeof(data_size_t) * (num_queries_ + 1); + size += VirtualFileWriter::AlignedSize(sizeof(data_size_t) * + (num_queries_ + 1)); } return size; } diff --git a/src/io/sparse_bin.hpp b/src/io/sparse_bin.hpp index 07f57c4480a2..1be48c5508fc 100644 --- a/src/io/sparse_bin.hpp +++ b/src/io/sparse_bin.hpp @@ -501,14 +501,15 @@ class SparseBin : public Bin { } void SaveBinaryToFile(const VirtualFileWriter* writer) const override { - writer->Write(&num_vals_, sizeof(num_vals_)); - writer->Write(deltas_.data(), sizeof(uint8_t) * (num_vals_ + 1)); - writer->Write(vals_.data(), sizeof(VAL_T) * num_vals_); + writer->AlignedWrite(&num_vals_, sizeof(num_vals_)); + writer->AlignedWrite(deltas_.data(), sizeof(uint8_t) * (num_vals_ + 1)); + writer->AlignedWrite(vals_.data(), sizeof(VAL_T) * num_vals_); } size_t SizesInByte() const override { - return sizeof(num_vals_) + sizeof(uint8_t) * (num_vals_ + 1) + - sizeof(VAL_T) * num_vals_; + return VirtualFileWriter::AlignedSize(sizeof(num_vals_)) + + VirtualFileWriter::AlignedSize(sizeof(uint8_t) * (num_vals_ + 1)) + + VirtualFileWriter::AlignedSize(sizeof(VAL_T) * num_vals_); } void LoadFromMemory( @@ -516,9 +517,9 @@ class SparseBin : public Bin { const std::vector& local_used_indices) override { const char* mem_ptr = reinterpret_cast(memory); data_size_t tmp_num_vals = *(reinterpret_cast(mem_ptr)); - mem_ptr += sizeof(tmp_num_vals); + mem_ptr += VirtualFileWriter::AlignedSize(sizeof(tmp_num_vals)); const uint8_t* tmp_delta = reinterpret_cast(mem_ptr); - mem_ptr += sizeof(uint8_t) * (tmp_num_vals + 1); + mem_ptr += VirtualFileWriter::AlignedSize(sizeof(uint8_t) * (tmp_num_vals + 1)); const VAL_T* tmp_vals = reinterpret_cast(mem_ptr); deltas_.clear(); From 5c1929601074561f1df4c2c875aedd22cc46b718 Mon Sep 17 00:00:00 2001 From: James Lamb Date: Sat, 26 Sep 2020 19:12:15 +0100 Subject: [PATCH 20/37] [R-package] add new copyright holder in DESCRIPTION (#3409) * [R-package] add new copyright holder in DESCRIPTION * fix role --- R-package/DESCRIPTION | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/R-package/DESCRIPTION b/R-package/DESCRIPTION index 74d89daec20b..6e969b087c30 100755 --- a/R-package/DESCRIPTION +++ b/R-package/DESCRIPTION @@ -19,7 +19,8 @@ Authors@R: c( person("Dropbox, Inc.", role = c("cph")), person("Jay", "Loden", role = c("cph")), person("Dave", "Daeschler", role = c("cph")), - person("Giampaolo", "Rodola", role = c("cph")) + person("Giampaolo", "Rodola", role = c("cph")), + person("IBM Corporation", role = c("ctb")) ) Description: Tree based algorithms can be improved by introducing boosting frameworks. 'LightGBM' is one such framework, based on Ke, Guolin et al. (2017) . From 1ea66bff82d7a661e4c06a3ca3aa20d1885d0b63 Mon Sep 17 00:00:00 2001 From: James Lamb Date: Tue, 29 Sep 2020 22:42:10 -0500 Subject: [PATCH 21/37] fixing conflicts --- R-package/DESCRIPTION | 3 +-- R-package/configure | 18 +++++++++--------- VERSION.txt | 2 +- 3 files changed, 11 insertions(+), 12 deletions(-) diff --git a/R-package/DESCRIPTION b/R-package/DESCRIPTION index 6e969b087c30..74d89daec20b 100755 --- a/R-package/DESCRIPTION +++ b/R-package/DESCRIPTION @@ -19,8 +19,7 @@ Authors@R: c( person("Dropbox, Inc.", role = c("cph")), person("Jay", "Loden", role = c("cph")), person("Dave", "Daeschler", role = c("cph")), - person("Giampaolo", "Rodola", role = c("cph")), - person("IBM Corporation", role = c("ctb")) + person("Giampaolo", "Rodola", role = c("cph")) ) Description: Tree based algorithms can be improved by introducing boosting frameworks. 'LightGBM' is one such framework, based on Ke, Guolin et al. (2017) . diff --git a/R-package/configure b/R-package/configure index 42734236e289..4cb5afa82921 100755 --- a/R-package/configure +++ b/R-package/configure @@ -1,6 +1,6 @@ #! /bin/sh # Guess values for system-dependent variables and create Makefiles. -# Generated by GNU Autoconf 2.69 for lightgbm 3.0.0.1. +# Generated by GNU Autoconf 2.69 for lightgbm 3.0.0.2. # # # Copyright (C) 1992-1996, 1998-2012 Free Software Foundation, Inc. @@ -576,8 +576,8 @@ MAKEFLAGS= # Identity of this package. PACKAGE_NAME='lightgbm' PACKAGE_TARNAME='lightgbm' -PACKAGE_VERSION='3.0.0.1' -PACKAGE_STRING='lightgbm 3.0.0.1' +PACKAGE_VERSION='3.0.0.2' +PACKAGE_STRING='lightgbm 3.0.0.2' PACKAGE_BUGREPORT='' PACKAGE_URL='' @@ -1182,7 +1182,7 @@ if test "$ac_init_help" = "long"; then # Omit some internal or obsolete options to make the list less imposing. # This message is too long to be a string in the A/UX 3.1 sh. cat <<_ACEOF -\`configure' configures lightgbm 3.0.0.1 to adapt to many kinds of systems. +\`configure' configures lightgbm 3.0.0.2 to adapt to many kinds of systems. Usage: $0 [OPTION]... [VAR=VALUE]... @@ -1244,7 +1244,7 @@ fi if test -n "$ac_init_help"; then case $ac_init_help in - short | recursive ) echo "Configuration of lightgbm 3.0.0.1:";; + short | recursive ) echo "Configuration of lightgbm 3.0.0.2:";; esac cat <<\_ACEOF @@ -1311,7 +1311,7 @@ fi test -n "$ac_init_help" && exit $ac_status if $ac_init_version; then cat <<\_ACEOF -lightgbm configure 3.0.0.1 +lightgbm configure 3.0.0.2 generated by GNU Autoconf 2.69 Copyright (C) 2012 Free Software Foundation, Inc. @@ -1328,7 +1328,7 @@ cat >config.log <<_ACEOF This file contains any messages produced by compilers while running configure, to aid debugging if configure makes a mistake. -It was created by lightgbm $as_me 3.0.0.1, which was +It was created by lightgbm $as_me 3.0.0.2, which was generated by GNU Autoconf 2.69. Invocation command line was $ $0 $@ @@ -2388,7 +2388,7 @@ cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1 # report actual input values of CONFIG_FILES etc. instead of their # values after options handling. ac_log=" -This file was extended by lightgbm $as_me 3.0.0.1, which was +This file was extended by lightgbm $as_me 3.0.0.2, which was generated by GNU Autoconf 2.69. Invocation command line was CONFIG_FILES = $CONFIG_FILES @@ -2441,7 +2441,7 @@ _ACEOF cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1 ac_cs_config="`$as_echo "$ac_configure_args" | sed 's/^ //; s/[\\""\`\$]/\\\\&/g'`" ac_cs_version="\\ -lightgbm config.status 3.0.0.1 +lightgbm config.status 3.0.0.2 configured by $0, generated by GNU Autoconf 2.69, with options \\"\$ac_cs_config\\" diff --git a/VERSION.txt b/VERSION.txt index 527e56990a37..57f1ce9bd58e 100644 --- a/VERSION.txt +++ b/VERSION.txt @@ -1 +1 @@ -3.0.0.1 +3.0.0.2 From cac7cba5080df65b26963bf9899c88619ba6f627 Mon Sep 17 00:00:00 2001 From: James Lamb Date: Sat, 26 Sep 2020 19:12:15 +0100 Subject: [PATCH 22/37] [R-package] add new copyright holder in DESCRIPTION (#3409) * [R-package] add new copyright holder in DESCRIPTION * fix role --- R-package/DESCRIPTION | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/R-package/DESCRIPTION b/R-package/DESCRIPTION index 74d89daec20b..6e969b087c30 100755 --- a/R-package/DESCRIPTION +++ b/R-package/DESCRIPTION @@ -19,7 +19,8 @@ Authors@R: c( person("Dropbox, Inc.", role = c("cph")), person("Jay", "Loden", role = c("cph")), person("Dave", "Daeschler", role = c("cph")), - person("Giampaolo", "Rodola", role = c("cph")) + person("Giampaolo", "Rodola", role = c("cph")), + person("IBM Corporation", role = c("ctb")) ) Description: Tree based algorithms can be improved by introducing boosting frameworks. 'LightGBM' is one such framework, based on Ke, Guolin et al. (2017) . From 4fbf9338fc9160949178a934f45a2cef74a28e18 Mon Sep 17 00:00:00 2001 From: James Lamb Date: Tue, 29 Sep 2020 22:47:49 -0500 Subject: [PATCH 23/37] trying to fix conflicts --- R-package/DESCRIPTION | 1 + 1 file changed, 1 insertion(+) diff --git a/R-package/DESCRIPTION b/R-package/DESCRIPTION index 6e969b087c30..58d6dee76466 100755 --- a/R-package/DESCRIPTION +++ b/R-package/DESCRIPTION @@ -15,6 +15,7 @@ Authors@R: c( person("Qiwei", "Ye", role = c("aut")), person("Tie-Yan", "Liu", role = c("aut")), person("Yachen", "Yan", role = c("ctb")), + person("James", "Lamb", email="jaylamb20@gmail.com", role = c("aut")), person("Microsoft Corporation", role = c("cph")), person("Dropbox, Inc.", role = c("cph")), person("Jay", "Loden", role = c("cph")), From d9aa856056388c488127c44bc29697d01d5b70ee Mon Sep 17 00:00:00 2001 From: James Lamb Date: Tue, 29 Sep 2020 22:48:51 -0500 Subject: [PATCH 24/37] more fixes --- R-package/DESCRIPTION | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/R-package/DESCRIPTION b/R-package/DESCRIPTION index 58d6dee76466..9b2f10dcfcf7 100755 --- a/R-package/DESCRIPTION +++ b/R-package/DESCRIPTION @@ -15,13 +15,7 @@ Authors@R: c( person("Qiwei", "Ye", role = c("aut")), person("Tie-Yan", "Liu", role = c("aut")), person("Yachen", "Yan", role = c("ctb")), - person("James", "Lamb", email="jaylamb20@gmail.com", role = c("aut")), - person("Microsoft Corporation", role = c("cph")), - person("Dropbox, Inc.", role = c("cph")), - person("Jay", "Loden", role = c("cph")), - person("Dave", "Daeschler", role = c("cph")), - person("Giampaolo", "Rodola", role = c("cph")), - person("IBM Corporation", role = c("ctb")) + person("James", "Lamb", email="jaylamb20@gmail.com", role = c("aut")) ) Description: Tree based algorithms can be improved by introducing boosting frameworks. 'LightGBM' is one such framework, based on Ke, Guolin et al. (2017) . From 82bb6c7db53d35c273e459f9055eace0c6f1bbd2 Mon Sep 17 00:00:00 2001 From: James Lamb Date: Tue, 29 Sep 2020 22:49:37 -0500 Subject: [PATCH 25/37] this will work --- R-package/DESCRIPTION | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/R-package/DESCRIPTION b/R-package/DESCRIPTION index 9b2f10dcfcf7..6e969b087c30 100755 --- a/R-package/DESCRIPTION +++ b/R-package/DESCRIPTION @@ -15,7 +15,12 @@ Authors@R: c( person("Qiwei", "Ye", role = c("aut")), person("Tie-Yan", "Liu", role = c("aut")), person("Yachen", "Yan", role = c("ctb")), - person("James", "Lamb", email="jaylamb20@gmail.com", role = c("aut")) + person("Microsoft Corporation", role = c("cph")), + person("Dropbox, Inc.", role = c("cph")), + person("Jay", "Loden", role = c("cph")), + person("Dave", "Daeschler", role = c("cph")), + person("Giampaolo", "Rodola", role = c("cph")), + person("IBM Corporation", role = c("ctb")) ) Description: Tree based algorithms can be improved by introducing boosting frameworks. 'LightGBM' is one such framework, based on Ke, Guolin et al. (2017) . From 240fbfe05cbe7c4db844900c55553797ba254c8a Mon Sep 17 00:00:00 2001 From: James Lamb Date: Sat, 3 Oct 2020 00:07:49 -0500 Subject: [PATCH 26/37] update cran-comments --- R-package/cran-comments.md | 57 +++++++++++++++++++++++++++++++++++++- 1 file changed, 56 insertions(+), 1 deletion(-) diff --git a/R-package/cran-comments.md b/R-package/cran-comments.md index 124ec213b17d..37ce52ebdb31 100644 --- a/R-package/cran-comments.md +++ b/R-package/cran-comments.md @@ -1,11 +1,66 @@ # CRAN Submission History -## v3.0.0 - Submission 7 - (September 24, 2020) +## v3.0.0.2 - Submission 1 - (September 29, 2020) ### CRAN response +First response was a message talking about failing checks on 3.0.0. + +```text +package lightgbm_3.0.0.2.tar.gz has been auto-processed. +The auto-check found additional issues for the last version released on CRAN: +gcc-UBSAN +valgrind +CRAN incoming checks do not test for these additional issues and you will need an appropriately instrumented build of R to reproduce these. +Hence please reply-all and explain: Have these been fixed? + +Please correct before 2020-10-05 to safely retain your package on CRAN. + +There is still a valgrind error. This did not happen when tested on +submission, but the tests did run until timeout at 4 hours. When you +write illegally, corruption is common. + +Illegal writes are serious errors. +``` + +Then in later responses to email correspondence with CRAN, CRAN expressed frustration with the number of failed submission and banned this package from new submissions for a month. + +The content of that frustrated message was regrettable and it does not need to be preserved forever in this file. + +### Maintainer Notes + +The 3.0.0.x series is officially not making it to CRAN. We will wait until November, and try again. + +Detailed plan about what will be tried before November 2020 to increase the likelihood of success for that package: https://github.com/microsoft/LightGBM/pull/3338#issuecomment-702756840. + +## v3.0.0.1 - Submission 1 - (September 24, 2020) + +### CRAN response + +```text +Thanks, we see: + +Still lots of alignment errors, such as + +lightgbm.Rcheck/tests/testthat.Rout:io/dataset_loader.cpp:340:59: +runtime error: reference binding to misaligned address 0x7f51fefad81e for type 'const value_type', which requires 4 byte alignment +lightgbm.Rcheck/tests/testthat.Rout:/usr/include/c++/10/bits/stl_vector.h:1198:21: +runtime error: reference binding to misaligned address 0x7f51fefad81e for type 'const int', which requires 4 byte alignment lightgbm.Rcheck/tests/testthat.Rout:/usr/include/c++/10/bits/vector.tcc:449:28:runtime +error: reference binding to misaligned address 0x7f51fefad81e for type 'const type', which requires 4 byte alignment +lightgbm.Rcheck/tests/testthat.Rout:/usr/include/c++/10/bits/move.h:77:36: +runtime error: reference binding to misaligned address 0x7f51fefad81e for type 'const int', which requires 4 byte alignment +lightgbm.Rcheck/tests/testthat.Rout:/usr/include/c++/10/bits/alloc_traits.h:512:17: +runtime error: reference binding to misaligned address 0x7f51fefad81e for type 'const type', which requires 4 byte alignment + +Please fix and resubmit. +``` + ### Maintainer Notes +Ok, these are the notes from the UBSAN tests. Was able to reproduce them with https://github.com/microsoft/LightGBM/pull/3338#issuecomment-700399862, and they were fixed in https://github.com/microsoft/LightGBM/pull/3415. + +Struggling to replicate the valgrind result (running `R CMD check --use-valgrind` returns no issues), so trying submission again. Hoping that the fixes for mis-alignment fix the other errors too. + ## v3.0.0 - Submission 6 - (September 24, 2020) ### CRAN response From c10e23dd9284ee8e703e9d7324b4bb2c947f7f29 Mon Sep 17 00:00:00 2001 From: James Lamb Date: Mon, 5 Oct 2020 20:56:57 -0500 Subject: [PATCH 27/37] simplify solaris, add more testing docs --- R-package/README.md | 91 ++++++++++++++++++++++++++++++++++ R-package/configure | 11 ---- R-package/configure.ac | 11 ---- src/network/ifaddrs_patch.cpp | 4 +- src/network/socket_wrapper.hpp | 6 +-- 5 files changed, 96 insertions(+), 27 deletions(-) diff --git a/R-package/README.md b/R-package/README.md index f3151c972288..cfeecd0d801c 100644 --- a/R-package/README.md +++ b/R-package/README.md @@ -338,6 +338,97 @@ mv \ lightgbm-${LGB_VERSION}-r40-windows.zip ``` +### Testing the CRAN Package + +`{lightgbm}` is tested automatically on every commit, across many combinations of operating system, R version, and compiler. This section describes how to test the package loccally while you are developing. + +#### Windows, Mac, and Linux + +```shell +sh build-cran-package.sh +R CMD check --as-cran lightgbm_*.tar.gz +``` + +#### Solaris + +All packages uploaded to CRAN must pass `R CMD check` on Solaris 10. To test LightGBM on this operating system, you can use the free service [R Hub](https://builder.r-hub.io/), a free service generously provided by the R Consortium. + +```shell +sh build-cran-package.sh +``` + +```r +package_tarball <- paste0("lightgbm_", readLines("VERSION.txt")[1], ".tar.gz") +rhub::check( + path = package_tarball + , email = "jaylamb20@gmail.com" + , check_args = "--as-cran" + , platform = c( + "solaris-x86-patched" + , "solaris-x86-patched-ods" + ) + , env_vars = c( + "R_COMPILE_AND_INSTALL_PACKAGES" = "always" + ) +) +``` + +#### UBSAN + +All packages uploaded to CRAN must pass a build using `gcc` instrumented with two sanitizers: the Address Sanitizer (ASAN) and the Undefined Behavior Sanitizer (UBSAN). For more background, see [this blog post](http://dirk.eddelbuettel.com/code/sanitizers.html). + +You can replicate these checks locally using Docker. + +```shell +docker run \ + -v $(pwd):/opt/LightGBM \ + -it rhub/rocker-gcc-san \ + /bin/bash + +cd /opt/LightGBM +Rscript -e "install.packages(c('R6', 'data.table', 'jsonlite', 'testthat'), repos = 'http://cran.rstudio.com')" + +sh build-cran-package.sh + +Rdevel CMD install lightgbm_*.tar.gz +cd R-package/tests +Rscriptdevel ttestthat.R +``` + +#### Valgrind + +All packages uplooaded to CRAN must be built and tested without raising any issues from `valgrind`. `valgrind` is a profiler that can catch serious issues like memory leaks and illegal writes. For more information, see [this blog post](https://reside-ic.github.io/blog/debugging-and-fixing-crans-additional-checks-errors/). + +You can replicate these checks locally using Docker. Note that instrumented versions of R built to use `valgrind` run much slower, and these tests may take as long as 20 minutes to run. + +```shell +docker run \ + -v $(pwd):/opt/LightGBM \ + -it \ + wch1/r-debug + +cd /opt/LightGBM +RDscriptvalgrind -e "install.packages(c('R6', 'data.table', 'jsonlite', 'testthat'), repos = 'http://cran.rstudio.com')" + +sh build-cran-package.sh + +RDvalgrind CMD INSTALL \ + --preclean \ + --install-tests \ + lightgbm_*.tar.gz + +cd R-package/tests + +RDvalgrind \ + --no-readline \ + --vanilla \ + -d valgrind \ + -f testthat.R \ +2>&1 \ +| tee out.log \ +| cat +``` + External (Unofficial) Repositories ---------------------------------- diff --git a/R-package/configure b/R-package/configure index e8f316adf664..02fcfa028b0d 100755 --- a/R-package/configure +++ b/R-package/configure @@ -1827,17 +1827,6 @@ $as_echo "${ac_pkg_openmp}" >&6; } fi fi -########### -# Solaris # -########### - -# some headers used in lib_lightgbm are not available on the Solaris -# systems CRAN tests on -if test `uname -s` = "SunOS" -then - LGB_CPPFLAGS+=" -DON_SOLARIS=1" -fi - # substitute variables from this script into Makevars.in diff --git a/R-package/configure.ac b/R-package/configure.ac index da7df65a7fb2..20182666b502 100644 --- a/R-package/configure.ac +++ b/R-package/configure.ac @@ -127,17 +127,6 @@ then fi fi -########### -# Solaris # -########### - -# some headers used in lib_lightgbm are not available on the Solaris -# systems CRAN tests on -if test `uname -s` = "SunOS" -then - LGB_CPPFLAGS+=" -DON_SOLARIS=1" -fi - # substitute variables from this script into Makevars.in AC_SUBST(OPENMP_CXXFLAGS) AC_SUBST(OPENMP_LIB) diff --git a/src/network/ifaddrs_patch.cpp b/src/network/ifaddrs_patch.cpp index 31ca37300db0..f77bd7d24fb9 100644 --- a/src/network/ifaddrs_patch.cpp +++ b/src/network/ifaddrs_patch.cpp @@ -3,7 +3,7 @@ * Licensed under the BSD 3-Clause License. * See https://github.com/giampaolo/psutil/blob/master/LICENSE */ -#ifdef ON_SOLARIS +#if defined(sun) || defined(__sun) #include #include @@ -126,4 +126,4 @@ int getifaddrs(struct ifaddrs **ifap) { } #endif -// ON_SOLARIS +// defined(sun) || defined(__sun) diff --git a/src/network/socket_wrapper.hpp b/src/network/socket_wrapper.hpp index 06a8e5448c51..7afa1e696853 100644 --- a/src/network/socket_wrapper.hpp +++ b/src/network/socket_wrapper.hpp @@ -36,10 +36,10 @@ #include // ifaddrs.h is not available on Solaris 10 -#ifndef ON_SOLARIS - #include -#else +#if defined(sun) || defined(__sun) #include "ifaddrs_patch.h" +#else + #include #endif #endif // defined(_WIN32) From 35955846ece589c02adeece0f37c44387c9fb2d7 Mon Sep 17 00:00:00 2001 From: James Lamb Date: Mon, 5 Oct 2020 21:36:21 -0500 Subject: [PATCH 28/37] stuff --- R-package/README.md | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/R-package/README.md b/R-package/README.md index cfeecd0d801c..cb3aa0e156b3 100644 --- a/R-package/README.md +++ b/R-package/README.md @@ -429,6 +429,10 @@ RDvalgrind \ | cat ``` +#### rchk + +[`rchk`](https://github.com/kalibera/rchk) is a static analyzer for C/C++ code used in R packages. It can catch issues like failing to `PROTECT` / `UNPROTECT` calls correctly, which can result in a failure to protect memory from R's garbage collector. + External (Unofficial) Repositories ---------------------------------- From fab12346dd21a0e3b9318a48ebe3d2b758c81f09 Mon Sep 17 00:00:00 2001 From: James Lamb Date: Mon, 5 Oct 2020 21:53:24 -0500 Subject: [PATCH 29/37] remove rchck docs --- R-package/README.md | 4 ---- 1 file changed, 4 deletions(-) diff --git a/R-package/README.md b/R-package/README.md index cb3aa0e156b3..cfeecd0d801c 100644 --- a/R-package/README.md +++ b/R-package/README.md @@ -429,10 +429,6 @@ RDvalgrind \ | cat ``` -#### rchk - -[`rchk`](https://github.com/kalibera/rchk) is a static analyzer for C/C++ code used in R packages. It can catch issues like failing to `PROTECT` / `UNPROTECT` calls correctly, which can result in a failure to protect memory from R's garbage collector. - External (Unofficial) Repositories ---------------------------------- From bc5decf0247fa5d05db70dd2e9ac05bf793eeab4 Mon Sep 17 00:00:00 2001 From: James Lamb Date: Wed, 7 Oct 2020 05:44:27 +0100 Subject: [PATCH 30/37] Apply suggestions from code review Co-authored-by: Nikita Titov --- R-package/R/lgb.Dataset.R | 4 ++-- R-package/README.md | 12 ++++++------ R-package/cran-comments.md | 10 +++++----- src/network/ifaddrs_patch.h | 14 +++++++------- 4 files changed, 20 insertions(+), 20 deletions(-) diff --git a/R-package/R/lgb.Dataset.R b/R-package/R/lgb.Dataset.R index 165d27eceb57..9bbe83340a6a 100644 --- a/R-package/R/lgb.Dataset.R +++ b/R-package/R/lgb.Dataset.R @@ -870,7 +870,7 @@ dim.lgb.Dataset <- function(x, ...) { #' print(dtrain, verbose = TRUE) #' } #' @rdname dimnames.lgb.Dataset -#' @return A list with the dimensioon names of the dataset +#' @return A list with the dimension names of the dataset #' @export dimnames.lgb.Dataset <- function(x) { @@ -885,7 +885,7 @@ dimnames.lgb.Dataset <- function(x) { } #' @rdname dimnames.lgb.Dataset -#' @return A list with the dimensioon names of the dataset +#' @return A list with the dimension names of the dataset #' @export `dimnames<-.lgb.Dataset` <- function(x, value) { diff --git a/R-package/README.md b/R-package/README.md index cfeecd0d801c..a7accdecd863 100644 --- a/R-package/README.md +++ b/R-package/README.md @@ -340,7 +340,7 @@ mv \ ### Testing the CRAN Package -`{lightgbm}` is tested automatically on every commit, across many combinations of operating system, R version, and compiler. This section describes how to test the package loccally while you are developing. +`{lightgbm}` is tested automatically on every commit, across many combinations of operating system, R version, and compiler. This section describes how to test the package locally while you are developing. #### Windows, Mac, and Linux @@ -361,7 +361,7 @@ sh build-cran-package.sh package_tarball <- paste0("lightgbm_", readLines("VERSION.txt")[1], ".tar.gz") rhub::check( path = package_tarball - , email = "jaylamb20@gmail.com" + , email = "your_email_here" , check_args = "--as-cran" , platform = c( "solaris-x86-patched" @@ -386,18 +386,18 @@ docker run \ /bin/bash cd /opt/LightGBM -Rscript -e "install.packages(c('R6', 'data.table', 'jsonlite', 'testthat'), repos = 'http://cran.rstudio.com')" +Rscript -e "install.packages(c('R6', 'data.table', 'jsonlite', 'testthat'), repos = 'https://cran.rstudio.com')" sh build-cran-package.sh Rdevel CMD install lightgbm_*.tar.gz cd R-package/tests -Rscriptdevel ttestthat.R +Rscriptdevel testthat.R ``` #### Valgrind -All packages uplooaded to CRAN must be built and tested without raising any issues from `valgrind`. `valgrind` is a profiler that can catch serious issues like memory leaks and illegal writes. For more information, see [this blog post](https://reside-ic.github.io/blog/debugging-and-fixing-crans-additional-checks-errors/). +All packages uploaded to CRAN must be built and tested without raising any issues from `valgrind`. `valgrind` is a profiler that can catch serious issues like memory leaks and illegal writes. For more information, see [this blog post](https://reside-ic.github.io/blog/debugging-and-fixing-crans-additional-checks-errors/). You can replicate these checks locally using Docker. Note that instrumented versions of R built to use `valgrind` run much slower, and these tests may take as long as 20 minutes to run. @@ -408,7 +408,7 @@ docker run \ wch1/r-debug cd /opt/LightGBM -RDscriptvalgrind -e "install.packages(c('R6', 'data.table', 'jsonlite', 'testthat'), repos = 'http://cran.rstudio.com')" +RDscriptvalgrind -e "install.packages(c('R6', 'data.table', 'jsonlite', 'testthat'), repos = 'https://cran.rstudio.com')" sh build-cran-package.sh diff --git a/R-package/cran-comments.md b/R-package/cran-comments.md index 37ce52ebdb31..e6387f65a125 100644 --- a/R-package/cran-comments.md +++ b/R-package/cran-comments.md @@ -82,7 +82,7 @@ Days since last update: 4 Did not think the version needed to be incremented if submitting a package in response to CRAN saying "you are failing checks and will be kicked off if you don't fix it", but I guess you do! -This can be fixed by just re-submitting but with the version changed froom `3.0.0` to `3.0.0.1`. +This can be fixed by just re-submitting but with the version changed from `3.0.0` to `3.0.0.1`. ## v3.0.0 - Submission 5 - (September 11, 2020) @@ -140,7 +140,7 @@ suppressed if needed. ### Maintainer Notes -responded to CRAN with the following: +Responded to CRAN with the following: All examples have been wrapped with `\donttest` as requested. We have replied to Swetlana Herbrandt asking for clarification on the donttest news item in the R 4.0.2 changelog (https://cran.r-project.org/doc/manuals/r-devel/NEWS.html). @@ -148,7 +148,7 @@ All uses of `cat()` have been replaced with `print()`. We chose `print()` over ` All exported objects now have `\value{}` statements in their documentation files in `man/`. -**we also replied directly to CRAN's feedback email** +**We also replied directly to CRAN's feedback email** > Swetlana, @@ -185,13 +185,13 @@ Please fix and resubmit ### Maintainer Notes -responded to CRAN with the following: +Responded to CRAN with the following: The paper citation has been adjusted as requested. We were using 'glmnet' as a guide on how to include the URL but maybe they are no longer in compliance with CRAN policies: https://github.com/cran/glmnet/blob/b1a4b50de01e0cd24343959d7cf86452bac17b26/DESCRIPTION All authors from the original LightGBM paper have been added to Authors@R as `"aut"`. We have also added Microsoft and DropBox, Inc. as `"cph"` (copyright holders). These roles were chosen based on the guidance in https://journal.r-project.org/archive/2012-1/RJournal_2012-1_Hornik~et~al.pdf. -lightgbm's code does use `<<-`, but it does not modify the global environment. The uses of `<<-` in R/lgb.interprete.R and R/callback.R are in functions which are called in an environment created by the lightgbm functions that call them, and this operator is used to reach one level up into the calling function's environment. +lightgbm's code does use `<<-`, but it does not modify the global environment. The uses of `<<-` in R/lgb.interprete.R and R/callback.R are in functions which are called in an environment created by the lightgbm functions that call them, and this operator is used to reach one level up into the calling function's environment. We chose to wrap our examples in `\donttest{}` because we found, through testing on https://builder.r-hub.io/ and in our own continuous integration environments, that their run time varies a lot between platforms, and we cannot guarantee that all examples will run in under 5 seconds. We intentionally chose `\donttest{}` over `\donttest{}` because this item in the R 4.0.0 changelog (https://cran.r-project.org/doc/manuals/r-devel/NEWS.html) seems to indicate that \donttest will be ignored by CRAN's automated checks: diff --git a/src/network/ifaddrs_patch.h b/src/network/ifaddrs_patch.h index ab023ef03c27..1b9f60d7c4b1 100644 --- a/src/network/ifaddrs_patch.h +++ b/src/network/ifaddrs_patch.h @@ -1,16 +1,16 @@ /*! -* Copyright (c) 2009, Jay Loden, Dave Daeschler, Giampaolo Rodola. + * Copyright (c) 2009, Jay Loden, Dave Daeschler, Giampaolo Rodola. * Licensed under the BSD 3-Clause License. * See https://github.com/giampaolo/psutil/blob/master/LICENSE */ /* -- https://lists.samba.org/archive/samba-technical/2009-February/063079.html -- https://github.com/giampaolo/psutil/blob/master/psutil/arch/solaris/v10/ifaddrs.h -*/ + * - https://lists.samba.org/archive/samba-technical/2009-February/063079.html + * - https://github.com/giampaolo/psutil/blob/master/psutil/arch/solaris/v10/ifaddrs.h + */ -#ifndef __IFADDRS_H__ -#define __IFADDRS_H__ +#ifndef LIGHTGBM_NETWORK_IFADDRS_PATCH_H_ +#define LIGHTGBM_NETWORK_IFADDRS_PATCH_H_ #include #include @@ -31,4 +31,4 @@ struct ifaddrs { extern int getifaddrs(struct ifaddrs **); extern void freeifaddrs(struct ifaddrs *); -#endif +#endif // LIGHTGBM_NETWORK_IFADDRS_PATCH_H_ From 9d9165aaea55bfcc33811502b2a4ff02c4b59fd1 Mon Sep 17 00:00:00 2001 From: James Lamb Date: Tue, 6 Oct 2020 23:52:25 -0500 Subject: [PATCH 31/37] remove extra use of cat() --- R-package/R/callback.R | 1 - 1 file changed, 1 deletion(-) diff --git a/R-package/R/callback.R b/R-package/R/callback.R index 60e0ebc437f4..e7d052f1ebc2 100644 --- a/R-package/R/callback.R +++ b/R-package/R/callback.R @@ -174,7 +174,6 @@ cb.print.evaluation <- function(period = 1L) { # Check if message is existing if (nchar(msg) > 0L) { print(merge.eval.string(env = env)) - cat(merge.eval.string(env = env), "\n") } } From 465fbc4d982185eb74706fc6eba17ca341b1d7e4 Mon Sep 17 00:00:00 2001 From: James Lamb Date: Tue, 6 Oct 2020 23:56:03 -0500 Subject: [PATCH 32/37] change solaris check --- src/network/ifaddrs_patch.cpp | 6 +++--- src/network/socket_wrapper.hpp | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/src/network/ifaddrs_patch.cpp b/src/network/ifaddrs_patch.cpp index f77bd7d24fb9..095752b87f91 100644 --- a/src/network/ifaddrs_patch.cpp +++ b/src/network/ifaddrs_patch.cpp @@ -3,7 +3,7 @@ * Licensed under the BSD 3-Clause License. * See https://github.com/giampaolo/psutil/blob/master/LICENSE */ -#if defined(sun) || defined(__sun) +#if (defined(sun) || defined(__sun)) && (defined(__SVR4) || defined(__svr4__)) #include #include @@ -125,5 +125,5 @@ int getifaddrs(struct ifaddrs **ifap) { return (-1); } -#endif -// defined(sun) || defined(__sun) +#endif // (defined(sun) || defined(__sun)) && (defined(__SVR4) || defined(__svr4__)) + diff --git a/src/network/socket_wrapper.hpp b/src/network/socket_wrapper.hpp index 7afa1e696853..b02a7c532137 100644 --- a/src/network/socket_wrapper.hpp +++ b/src/network/socket_wrapper.hpp @@ -36,7 +36,7 @@ #include // ifaddrs.h is not available on Solaris 10 -#if defined(sun) || defined(__sun) +#if (defined(sun) || defined(__sun)) && (defined(__SVR4) || defined(__svr4__)) #include "ifaddrs_patch.h" #else #include From 988bbf2b57fbc600ca9ac160f47a74dde5e907ae Mon Sep 17 00:00:00 2001 From: James Lamb Date: Wed, 7 Oct 2020 00:07:30 -0500 Subject: [PATCH 33/37] update docs --- R-package/R/lgb.cv.R | 1 + R-package/man/dimnames.lgb.Dataset.Rd | 4 ++-- R-package/man/lgb.cv.Rd | 1 + 3 files changed, 4 insertions(+), 2 deletions(-) diff --git a/R-package/R/lgb.cv.R b/R-package/R/lgb.cv.R index cc248abfa0ce..ac42ddb6bab6 100644 --- a/R-package/R/lgb.cv.R +++ b/R-package/R/lgb.cv.R @@ -53,6 +53,7 @@ CVBooster <- R6::R6Class( #' #' @examples #' \donttest{ +#' stop("I am an error") #' data(agaricus.train, package = "lightgbm") #' train <- agaricus.train #' dtrain <- lgb.Dataset(train$data, label = train$label) diff --git a/R-package/man/dimnames.lgb.Dataset.Rd b/R-package/man/dimnames.lgb.Dataset.Rd index a968fcac4663..58a753ca98fe 100644 --- a/R-package/man/dimnames.lgb.Dataset.Rd +++ b/R-package/man/dimnames.lgb.Dataset.Rd @@ -16,9 +16,9 @@ and the second one is column names} } \value{ -A list with the dimensioon names of the dataset +A list with the dimension names of the dataset -A list with the dimensioon names of the dataset +A list with the dimension names of the dataset } \description{ Only column names are supported for \code{lgb.Dataset}, thus setting of diff --git a/R-package/man/lgb.cv.Rd b/R-package/man/lgb.cv.Rd index b502faf4f837..6bea9041a9ec 100644 --- a/R-package/man/lgb.cv.Rd +++ b/R-package/man/lgb.cv.Rd @@ -150,6 +150,7 @@ Cross validation logic used by LightGBM \examples{ \donttest{ +stop("I am an error") data(agaricus.train, package = "lightgbm") train <- agaricus.train dtrain <- lgb.Dataset(train$data, label = train$label) From 57d9879f2bfa1caa9e0e5f7686c46a0aab2d594c Mon Sep 17 00:00:00 2001 From: James Lamb Date: Wed, 7 Oct 2020 00:16:02 -0500 Subject: [PATCH 34/37] remove testing code --- R-package/R/lgb.cv.R | 1 - R-package/man/lgb.cv.Rd | 1 - 2 files changed, 2 deletions(-) diff --git a/R-package/R/lgb.cv.R b/R-package/R/lgb.cv.R index ac42ddb6bab6..cc248abfa0ce 100644 --- a/R-package/R/lgb.cv.R +++ b/R-package/R/lgb.cv.R @@ -53,7 +53,6 @@ CVBooster <- R6::R6Class( #' #' @examples #' \donttest{ -#' stop("I am an error") #' data(agaricus.train, package = "lightgbm") #' train <- agaricus.train #' dtrain <- lgb.Dataset(train$data, label = train$label) diff --git a/R-package/man/lgb.cv.Rd b/R-package/man/lgb.cv.Rd index 6bea9041a9ec..b502faf4f837 100644 --- a/R-package/man/lgb.cv.Rd +++ b/R-package/man/lgb.cv.Rd @@ -150,7 +150,6 @@ Cross validation logic used by LightGBM \examples{ \donttest{ -stop("I am an error") data(agaricus.train, package = "lightgbm") train <- agaricus.train dtrain <- lgb.Dataset(train$data, label = train$label) From 46f4c1f0f61e82ad275da925ce204ed4430ba61d Mon Sep 17 00:00:00 2001 From: James Lamb Date: Wed, 7 Oct 2020 00:39:07 -0500 Subject: [PATCH 35/37] fix warning about cleanup not having execute permissions --- R-package/cleanup | 0 1 file changed, 0 insertions(+), 0 deletions(-) mode change 100644 => 100755 R-package/cleanup diff --git a/R-package/cleanup b/R-package/cleanup old mode 100644 new mode 100755 From c6ff472016c9ad9e6e66bf4a5cc0e53712561b39 Mon Sep 17 00:00:00 2001 From: James Lamb Date: Wed, 7 Oct 2020 00:56:15 -0500 Subject: [PATCH 36/37] fix cmake builds --- R-package/cleanup | 0 1 file changed, 0 insertions(+), 0 deletions(-) mode change 100755 => 100644 R-package/cleanup diff --git a/R-package/cleanup b/R-package/cleanup old mode 100755 new mode 100644 From 6c423a54c0019545c05dd7d3861e9e70ecd41fb5 Mon Sep 17 00:00:00 2001 From: James Lamb Date: Wed, 7 Oct 2020 20:53:49 -0500 Subject: [PATCH 37/37] remove blank line --- src/network/ifaddrs_patch.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/src/network/ifaddrs_patch.cpp b/src/network/ifaddrs_patch.cpp index 095752b87f91..f7f30a8e4770 100644 --- a/src/network/ifaddrs_patch.cpp +++ b/src/network/ifaddrs_patch.cpp @@ -126,4 +126,3 @@ int getifaddrs(struct ifaddrs **ifap) { } #endif // (defined(sun) || defined(__sun)) && (defined(__SVR4) || defined(__svr4__)) -