From c021dc04181e93c6bd1f23dce690b25274a0e45f Mon Sep 17 00:00:00 2001 From: ayush chaurasia Date: Tue, 28 Dec 2021 18:38:02 +0530 Subject: [PATCH 1/7] log best.pt metrics at train end --- utils/loggers/__init__.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/utils/loggers/__init__.py b/utils/loggers/__init__.py index 7a1df2a45ea7..233d6d655402 100644 --- a/utils/loggers/__init__.py +++ b/utils/loggers/__init__.py @@ -147,9 +147,11 @@ def on_train_end(self, last, best, plots, epoch, results): self.tb.add_image(f.stem, cv2.imread(str(f))[..., ::-1], epoch, dataformats='HWC') if self.wandb: + x = {k: v for k, v in zip(self.keys[3:10], results)} # dict + self.wandb.log(x) self.wandb.log({"Results": [wandb.Image(str(f), caption=f.name) for f in files]}) # Calling wandb.log. TODO: Refactor this into WandbLogger.log_model - if not self.opt.evolve: + if not self.opt.evolve: wandb.log_artifact(str(best if best.exists() else last), type='model', name='run_' + self.wandb.wandb_run.id + '_model', aliases=['latest', 'best', 'stripped']) From 3a552ff3f92ae515e2c9a4cd464bbe704b497a81 Mon Sep 17 00:00:00 2001 From: ayush chaurasia Date: Tue, 28 Dec 2021 18:43:21 +0530 Subject: [PATCH 2/7] update --- utils/loggers/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/utils/loggers/__init__.py b/utils/loggers/__init__.py index 233d6d655402..3c95dcaeb07c 100644 --- a/utils/loggers/__init__.py +++ b/utils/loggers/__init__.py @@ -151,7 +151,7 @@ def on_train_end(self, last, best, plots, epoch, results): self.wandb.log(x) self.wandb.log({"Results": [wandb.Image(str(f), caption=f.name) for f in files]}) # Calling wandb.log. TODO: Refactor this into WandbLogger.log_model - if not self.opt.evolve: + if not self.opt.evolve: wandb.log_artifact(str(best if best.exists() else last), type='model', name='run_' + self.wandb.wandb_run.id + '_model', aliases=['latest', 'best', 'stripped']) From 83ae5da1d70c86f7b41c4b680cad35d2d717d864 Mon Sep 17 00:00:00 2001 From: Glenn Jocher Date: Thu, 30 Dec 2021 11:42:25 -0800 Subject: [PATCH 3/7] Update __init__.py --- utils/loggers/__init__.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/utils/loggers/__init__.py b/utils/loggers/__init__.py index 3c95dcaeb07c..8af5c402d5ee 100644 --- a/utils/loggers/__init__.py +++ b/utils/loggers/__init__.py @@ -147,8 +147,7 @@ def on_train_end(self, last, best, plots, epoch, results): self.tb.add_image(f.stem, cv2.imread(str(f))[..., ::-1], epoch, dataformats='HWC') if self.wandb: - x = {k: v for k, v in zip(self.keys[3:10], results)} # dict - self.wandb.log(x) + self.wandb.log({k: v for k, v in zip(self.keys[3:10], results)}) # log best.pt val results self.wandb.log({"Results": [wandb.Image(str(f), caption=f.name) for f in files]}) # Calling wandb.log. TODO: Refactor this into WandbLogger.log_model if not self.opt.evolve: From 4fb9d7f70f1a60fc977c2b4e845d95556e7e2002 Mon Sep 17 00:00:00 2001 From: ayush chaurasia Date: Fri, 21 Jan 2022 03:19:19 +0530 Subject: [PATCH 4/7] flush callbacks when using evolve --- train.py | 2 +- utils/loggers/__init__.py | 5 +---- 2 files changed, 2 insertions(+), 5 deletions(-) diff --git a/train.py b/train.py index ebe6c2e8f5f9..b20b7dbb2dda 100644 --- a/train.py +++ b/train.py @@ -612,7 +612,7 @@ def main(opt, callbacks=Callbacks()): # Train mutation results = train(hyp.copy(), opt, device, callbacks) - + callbacks = Callbacks() # Write mutation results print_mutation(results, hyp.copy(), save_dir, opt.bucket) diff --git a/utils/loggers/__init__.py b/utils/loggers/__init__.py index 7679ee70f176..86ccf38443a9 100644 --- a/utils/loggers/__init__.py +++ b/utils/loggers/__init__.py @@ -159,10 +159,7 @@ def on_train_end(self, last, best, plots, epoch, results): wandb.log_artifact(str(best if best.exists() else last), type='model', name='run_' + self.wandb.wandb_run.id + '_model', aliases=['latest', 'best', 'stripped']) - self.wandb.finish_run() - else: - self.wandb.finish_run() - self.wandb = WandbLogger(self.opt) + self.wandb.finish_run() def on_params_update(self, params): # Update hyperparams or configs of the experiment From 405c5a496269bc70da98a10b5d36ab35285b80e8 Mon Sep 17 00:00:00 2001 From: ayush chaurasia Date: Wed, 2 Feb 2022 22:57:26 +0530 Subject: [PATCH 5/7] remember batch size on resuming --- train.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/train.py b/train.py index b20b7dbb2dda..b0fe2134c9af 100644 --- a/train.py +++ b/train.py @@ -96,7 +96,7 @@ def train(hyp, # path/to/hyp.yaml or hyp dictionary if loggers.wandb: data_dict = loggers.wandb.data_dict if resume: - weights, epochs, hyp = opt.weights, opt.epochs, opt.hyp + weights, epochs, hyp, batch_size = opt.weights, opt.epochs, opt.hyp, batch_size # Register actions for k in methods(loggers): From b3b86c89303518526a0737644bad6b3f36eec2fa Mon Sep 17 00:00:00 2001 From: Ayush Chaurasia Date: Thu, 3 Feb 2022 02:00:58 +0530 Subject: [PATCH 6/7] Update train.py --- train.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/train.py b/train.py index 900b535c872b..2a973fb7164b 100644 --- a/train.py +++ b/train.py @@ -96,7 +96,7 @@ def train(hyp, # path/to/hyp.yaml or hyp dictionary if loggers.wandb: data_dict = loggers.wandb.data_dict if resume: - weights, epochs, hyp, batch_size = opt.weights, opt.epochs, opt.hyp, batch_size + weights, epochs, hyp, batch_size = opt.weights, opt.epochs, opt.hyp, opt.batch_size # Register actions for k in methods(loggers): From c56fbac74158fadd8529b0d88d0a4fc3ed6902e0 Mon Sep 17 00:00:00 2001 From: ayush chaurasia Date: Thu, 10 Feb 2022 19:57:30 +0530 Subject: [PATCH 7/7] improve stability of resume --- utils/loggers/wandb/wandb_utils.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/utils/loggers/wandb/wandb_utils.py b/utils/loggers/wandb/wandb_utils.py index 221d3c88c56e..701caf66cc4b 100644 --- a/utils/loggers/wandb/wandb_utils.py +++ b/utils/loggers/wandb/wandb_utils.py @@ -225,9 +225,9 @@ def setup_training(self, opt): if modeldir: self.weights = Path(modeldir) / "last.pt" config = self.wandb_run.config - opt.weights, opt.save_period, opt.batch_size, opt.bbox_interval, opt.epochs, opt.hyp = str( - self.weights), config.save_period, config.batch_size, config.bbox_interval, config.epochs, \ - config.hyp + opt.weights, opt.save_period, opt.batch_size, opt.bbox_interval, opt.epochs, opt.hyp, opt.imgsz = str( + self.weights), config.save_period, config.batch_size, config.bbox_interval, config.epochs,\ + config.hyp, config.imgsz data_dict = self.data_dict if self.val_artifact is None: # If --upload_dataset is set, use the existing artifact, don't download self.train_artifact_path, self.train_artifact = self.download_dataset_artifact(data_dict.get('train'),