Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

W&B refactor, handle exceptions, CI example #5618

Merged
merged 9 commits into from
Nov 14, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 4 additions & 1 deletion .github/workflows/ci-testing.yml
Original file line number Diff line number Diff line change
Expand Up @@ -51,12 +51,15 @@ jobs:
run: |
python -m pip install --upgrade pip
pip install -qr requirements.txt -f https://download.pytorch.org/whl/cpu/torch_stable.html
pip install -q onnx tensorflow-cpu keras==2.6.0 # for export
pip install -q onnx tensorflow-cpu keras==2.6.0 # wandb # extras
python --version
pip --version
pip list
shell: bash

# - name: W&B login
# run: wandb login 345011b3fb26dc8337fd9b20e53857c1d403f2aa

- name: Download data
run: |
# curl -L -o tmp.zip https://github.com/ultralytics/yolov5/releases/download/v1.0/coco128.zip
Expand Down
4 changes: 4 additions & 0 deletions utils/loggers/wandb/log_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,15 @@

from wandb_utils import WandbLogger

from utils.general import LOGGER

WANDB_ARTIFACT_PREFIX = 'wandb-artifact://'


def create_dataset_artifact(opt):
logger = WandbLogger(opt, None, job_type='Dataset Creation') # TODO: return value unused
if not logger.wandb:
LOGGER.info("install wandb using `pip install wandb` to log the dataset")


if __name__ == '__main__':
Expand Down
16 changes: 11 additions & 5 deletions utils/loggers/wandb/wandb_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
sys.path.append(str(ROOT)) # add ROOT to PATH

from utils.datasets import LoadImagesAndLabels, img2label_paths
from utils.general import check_dataset, check_file
from utils.general import LOGGER, check_dataset, check_file

try:
import wandb
Expand Down Expand Up @@ -203,7 +203,7 @@ def check_and_upload_dataset(self, opt):
config_path = self.log_dataset_artifact(opt.data,
opt.single_cls,
'YOLOv5' if opt.project == 'runs/train' else Path(opt.project).stem)
print("Created dataset config file ", config_path)
LOGGER.info(f"Created dataset config file {config_path}")
with open(config_path, errors='ignore') as f:
wandb_data_dict = yaml.safe_load(f)
return wandb_data_dict
Expand Down Expand Up @@ -316,7 +316,7 @@ def log_model(self, path, opt, epoch, fitness_score, best_model=False):
model_artifact.add_file(str(path / 'last.pt'), name='last.pt')
wandb.log_artifact(model_artifact,
aliases=['latest', 'last', 'epoch ' + str(self.current_epoch), 'best' if best_model else ''])
print("Saving model artifact on epoch ", epoch + 1)
LOGGER.info(f"Saving model artifact on epoch {epoch + 1}")

def log_dataset_artifact(self, data_file, single_cls, project, overwrite_config=False):
"""
Expand Down Expand Up @@ -368,7 +368,7 @@ def map_val_table_path(self):
Useful for - referencing artifacts for evaluation.
"""
self.val_table_path_map = {}
print("Mapping dataset")
LOGGER.info("Mapping dataset")
for i, data in enumerate(tqdm(self.val_table.data)):
self.val_table_path_map[data[3]] = data[0]

Expand Down Expand Up @@ -488,7 +488,13 @@ def end_epoch(self, best_result=False):
with all_logging_disabled():
if self.bbox_media_panel_images:
self.log_dict["BoundingBoxDebugger"] = self.bbox_media_panel_images
wandb.log(self.log_dict)
try:
wandb.log(self.log_dict)
except BaseException as e:
LOGGER.info(f"An error occurred in wandb logger. The training will proceed without interruption. More info\n{e}")
self.wandb_run.finish()
self.wandb_run = None

self.log_dict = {}
self.bbox_media_panel_images = []
if self.result_artifact:
Expand Down