From 4b3ec0a60431d92a97c45477cea19c57ee3d9ddd Mon Sep 17 00:00:00 2001 From: Ayush Chaurasia Date: Sun, 14 Nov 2021 17:56:53 +0530 Subject: [PATCH] W&B refactor, handle exceptions, CI example (#5618) * handle exceptions| attempt CI * update * Pre-commit manual run * yaml one-liner * Update ci-testing.yml * Comment W&B CI Leave as example for future separate CI * Update ci-testing.yml Co-authored-by: Glenn Jocher --- .github/workflows/ci-testing.yml | 5 ++++- utils/loggers/wandb/log_dataset.py | 4 ++++ utils/loggers/wandb/wandb_utils.py | 16 +++++++++++----- 3 files changed, 19 insertions(+), 6 deletions(-) diff --git a/.github/workflows/ci-testing.yml b/.github/workflows/ci-testing.yml index abfe21ef8726..5db6d41f4bcc 100644 --- a/.github/workflows/ci-testing.yml +++ b/.github/workflows/ci-testing.yml @@ -51,12 +51,15 @@ jobs: run: | python -m pip install --upgrade pip pip install -qr requirements.txt -f https://download.pytorch.org/whl/cpu/torch_stable.html - pip install -q onnx tensorflow-cpu keras==2.6.0 # for export + pip install -q onnx tensorflow-cpu keras==2.6.0 # wandb # extras python --version pip --version pip list shell: bash + # - name: W&B login + # run: wandb login 345011b3fb26dc8337fd9b20e53857c1d403f2aa + - name: Download data run: | # curl -L -o tmp.zip https://github.com/ultralytics/yolov5/releases/download/v1.0/coco128.zip diff --git a/utils/loggers/wandb/log_dataset.py b/utils/loggers/wandb/log_dataset.py index 8447272cdb48..06e81fb69307 100644 --- a/utils/loggers/wandb/log_dataset.py +++ b/utils/loggers/wandb/log_dataset.py @@ -2,11 +2,15 @@ from wandb_utils import WandbLogger +from utils.general import LOGGER + WANDB_ARTIFACT_PREFIX = 'wandb-artifact://' def create_dataset_artifact(opt): logger = WandbLogger(opt, None, job_type='Dataset Creation') # TODO: return value unused + if not logger.wandb: + LOGGER.info("install wandb using `pip install wandb` to log the dataset") if __name__ == '__main__': diff --git a/utils/loggers/wandb/wandb_utils.py b/utils/loggers/wandb/wandb_utils.py index a71bc6ce96d2..47757dd1a74e 100644 --- a/utils/loggers/wandb/wandb_utils.py +++ b/utils/loggers/wandb/wandb_utils.py @@ -17,7 +17,7 @@ sys.path.append(str(ROOT)) # add ROOT to PATH from utils.datasets import LoadImagesAndLabels, img2label_paths -from utils.general import check_dataset, check_file +from utils.general import LOGGER, check_dataset, check_file try: import wandb @@ -203,7 +203,7 @@ def check_and_upload_dataset(self, opt): config_path = self.log_dataset_artifact(opt.data, opt.single_cls, 'YOLOv5' if opt.project == 'runs/train' else Path(opt.project).stem) - print("Created dataset config file ", config_path) + LOGGER.info(f"Created dataset config file {config_path}") with open(config_path, errors='ignore') as f: wandb_data_dict = yaml.safe_load(f) return wandb_data_dict @@ -316,7 +316,7 @@ def log_model(self, path, opt, epoch, fitness_score, best_model=False): model_artifact.add_file(str(path / 'last.pt'), name='last.pt') wandb.log_artifact(model_artifact, aliases=['latest', 'last', 'epoch ' + str(self.current_epoch), 'best' if best_model else '']) - print("Saving model artifact on epoch ", epoch + 1) + LOGGER.info(f"Saving model artifact on epoch {epoch + 1}") def log_dataset_artifact(self, data_file, single_cls, project, overwrite_config=False): """ @@ -368,7 +368,7 @@ def map_val_table_path(self): Useful for - referencing artifacts for evaluation. """ self.val_table_path_map = {} - print("Mapping dataset") + LOGGER.info("Mapping dataset") for i, data in enumerate(tqdm(self.val_table.data)): self.val_table_path_map[data[3]] = data[0] @@ -488,7 +488,13 @@ def end_epoch(self, best_result=False): with all_logging_disabled(): if self.bbox_media_panel_images: self.log_dict["BoundingBoxDebugger"] = self.bbox_media_panel_images - wandb.log(self.log_dict) + try: + wandb.log(self.log_dict) + except BaseException as e: + LOGGER.info(f"An error occurred in wandb logger. The training will proceed without interruption. More info\n{e}") + self.wandb_run.finish() + self.wandb_run = None + self.log_dict = {} self.bbox_media_panel_images = [] if self.result_artifact: