From d2e83a56fe7debfa8e891b09176dce730c5bae6b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Adrian=20W=C3=A4lchli?= Date: Sun, 28 Mar 2021 23:06:57 +0200 Subject: [PATCH 1/5] fix should stop --- pytorch_lightning/trainer/trainer.py | 1 + 1 file changed, 1 insertion(+) diff --git a/pytorch_lightning/trainer/trainer.py b/pytorch_lightning/trainer/trainer.py index f0f1d3e6b11e1..3ebff4c8dcaa0 100644 --- a/pytorch_lightning/trainer/trainer.py +++ b/pytorch_lightning/trainer/trainer.py @@ -614,6 +614,7 @@ def run_train(self) -> None: f' ({self.min_epochs}) or minimum steps ({self.min_steps}) has' ' not been met. Training will continue...' ) + self.should_stop = False # hook self.train_loop.on_train_end() From 2813e772d2b7c23ac9eea834f2f3728400a7f593 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Adrian=20W=C3=A4lchli?= Date: Mon, 29 Mar 2021 00:42:09 +0200 Subject: [PATCH 2/5] add test --- tests/trainer/test_trainer.py | 36 +++++++++++++++++++++++++++++++++++ 1 file changed, 36 insertions(+) diff --git a/tests/trainer/test_trainer.py b/tests/trainer/test_trainer.py index 4ca2f737f5106..f66e211e2dc8f 100644 --- a/tests/trainer/test_trainer.py +++ b/tests/trainer/test_trainer.py @@ -11,6 +11,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. +import logging import math import os import pickle @@ -546,6 +547,41 @@ def test_trainer_min_steps_and_epochs(tmpdir): assert trainer.global_step >= math.floor(num_train_samples * 1.5), "Model did not train for at least min_steps" +class TestModel(BoringModel): + training_step_invoked = 0 + + def training_step(self, batch, batch_idx): + output = super().training_step(batch, batch_idx) + output["loss"] = output["loss"] * 0.0 # force minimal loss to trigger early stopping + self.log("loss", output["loss"]) + self.training_step_invoked += 1 + # print(batch_idx, self.trainer.current_epoch) + assert not self.trainer.should_stop + return output + + +def test_trainer_min_steps_and_min_epochs_not_reached(tmpdir, caplog): + """ Test that min_epochs/min_steps in Trainer are enforced even if EarlyStopping is triggered. """ + model = TestModel() + early_stop = EarlyStopping(monitor="loss", patience=0) + min_epochs = 5 + trainer = Trainer( + default_root_dir=tmpdir, + progress_bar_refresh_rate=0, + min_epochs=min_epochs, + limit_val_batches=0, + limit_train_batches=2, + callbacks=[early_stop] + ) + with caplog.at_level(logging.INFO, logger="pytorch_lightning.trainer.trainer"): + trainer.fit(model) + + message = f"minimum epochs ({min_epochs}) or minimum steps (None) has not been met. Training will continue" + num_messages = len([record.message for record in caplog.records if message in record.message]) + assert num_messages == min_epochs - 2 + assert model.training_step_invoked == min_epochs * 2 + + def test_trainer_max_steps_accumulate_batches(tmpdir): """Verify model trains according to specified max steps with grad accumulated batches""" model = BoringModel() From 991c650af85075eff0ae2dde24525c5a5e0da67c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Adrian=20W=C3=A4lchli?= Date: Mon, 29 Mar 2021 00:49:18 +0200 Subject: [PATCH 3/5] add chlog --- CHANGELOG.md | 3 +++ 1 file changed, 3 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 5076d2e5ad848..49a92d9bbbf41 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -214,6 +214,9 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/). - Fixed a bug where gradients were disabled after calling `Trainer.predict` ([#6657](https://github.com/PyTorchLightning/pytorch-lightning/pull/6657)) +- Fixed `EarlyStopping` logic when `min_epochs` or `min_steps` requirement is not met ([#6705](https://github.com/PyTorchLightning/pytorch-lightning/pull/6705)) + + ## [1.2.4] - 2021-03-16 ### Changed From bcb5d3eafbbbba5e5a7339bce0ad4c08aa93dff6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Adrian=20W=C3=A4lchli?= Date: Mon, 29 Mar 2021 01:09:06 +0200 Subject: [PATCH 4/5] move model inside test --- tests/trainer/test_trainer.py | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/tests/trainer/test_trainer.py b/tests/trainer/test_trainer.py index f66e211e2dc8f..1437229a19b36 100644 --- a/tests/trainer/test_trainer.py +++ b/tests/trainer/test_trainer.py @@ -547,21 +547,21 @@ def test_trainer_min_steps_and_epochs(tmpdir): assert trainer.global_step >= math.floor(num_train_samples * 1.5), "Model did not train for at least min_steps" -class TestModel(BoringModel): - training_step_invoked = 0 +def test_trainer_min_steps_and_min_epochs_not_reached(tmpdir, caplog): + """ Test that min_epochs/min_steps in Trainer are enforced even if EarlyStopping is triggered. """ - def training_step(self, batch, batch_idx): - output = super().training_step(batch, batch_idx) - output["loss"] = output["loss"] * 0.0 # force minimal loss to trigger early stopping - self.log("loss", output["loss"]) - self.training_step_invoked += 1 - # print(batch_idx, self.trainer.current_epoch) - assert not self.trainer.should_stop - return output + class TestModel(BoringModel): + training_step_invoked = 0 + def training_step(self, batch, batch_idx): + output = super().training_step(batch, batch_idx) + output["loss"] = output["loss"] * 0.0 # force minimal loss to trigger early stopping + self.log("loss", output["loss"]) + self.training_step_invoked += 1 + # print(batch_idx, self.trainer.current_epoch) + assert not self.trainer.should_stop + return output -def test_trainer_min_steps_and_min_epochs_not_reached(tmpdir, caplog): - """ Test that min_epochs/min_steps in Trainer are enforced even if EarlyStopping is triggered. """ model = TestModel() early_stop = EarlyStopping(monitor="loss", patience=0) min_epochs = 5 From ec984e55cbc162b7a34ece30e87f3893360bf5c2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Adrian=20W=C3=A4lchli?= Date: Mon, 29 Mar 2021 17:40:45 +0200 Subject: [PATCH 5/5] Update tests/trainer/test_trainer.py MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Carlos MocholĂ­ --- tests/trainer/test_trainer.py | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/trainer/test_trainer.py b/tests/trainer/test_trainer.py index 1437229a19b36..ecb75b6fcb27c 100644 --- a/tests/trainer/test_trainer.py +++ b/tests/trainer/test_trainer.py @@ -558,7 +558,6 @@ def training_step(self, batch, batch_idx): output["loss"] = output["loss"] * 0.0 # force minimal loss to trigger early stopping self.log("loss", output["loss"]) self.training_step_invoked += 1 - # print(batch_idx, self.trainer.current_epoch) assert not self.trainer.should_stop return output