Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Weights & Biases (W&B) Feature Addition #1235

Merged
merged 18 commits into from
Oct 31, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,9 @@ torch>=1.6.0
torchvision>=0.7.0
tqdm>=4.41.0

# logging -------------------------------------
# wandb

# coco ----------------------------------------
# pycocotools>=2.0

Expand Down
25 changes: 23 additions & 2 deletions test.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,9 @@ def test(data,
save_dir=Path(''), # for saving images
save_txt=False, # for auto-labelling
save_conf=False,
plots=True):
plots=True,
log_imgs=0): # number of logged images

# Initialize/load model and set device
training = model is not None
if training: # called by train.py
Expand Down Expand Up @@ -77,6 +79,13 @@ def test(data,
iouv = torch.linspace(0.5, 0.95, 10).to(device) # iou vector for mAP@0.5:0.95
niou = iouv.numel()

# Logging
log_imgs = min(log_imgs, 100) # ceil
try:
import wandb # Weights & Biases
except ImportError:
log_imgs = 0

# Dataloader
if not training:
img = torch.zeros((1, 3, imgsz, imgsz), device=device) # init img
Expand All @@ -91,7 +100,7 @@ def test(data,
s = ('%20s' + '%12s' * 6) % ('Class', 'Images', 'Targets', 'P', 'R', 'mAP@.5', 'mAP@.5:.95')
p, r, f1, mp, mr, map50, map, t0, t1 = 0., 0., 0., 0., 0., 0., 0., 0., 0.
loss = torch.zeros(3, device=device)
jdict, stats, ap, ap_class = [], [], [], []
jdict, stats, ap, ap_class, wandb_images = [], [], [], [], []
for batch_i, (img, targets, paths, shapes) in enumerate(tqdm(dataloader, desc=s)):
img = img.to(device, non_blocking=True)
img = img.half() if half else img.float() # uint8 to fp16/32
Expand Down Expand Up @@ -139,6 +148,14 @@ def test(data,
with open(str(out / Path(paths[si]).stem) + '.txt', 'a') as f:
f.write(('%g ' * len(line) + '\n') % line)

# W&B logging
if len(wandb_images) < log_imgs:
bbox_data = [{"position": {"minX": xyxy[0], "minY": xyxy[1], "maxX": xyxy[2], "maxY": xyxy[3]},
"class_id": int(cls),
"scores": {"class_score": conf},
"domain": "pixel"} for *xyxy, conf, cls in pred.clone().tolist()]
wandb_images.append(wandb.Image(img[si], boxes={"predictions": {"box_data": bbox_data}}))

# Clip boxes to image bounds
clip_coords(pred, (height, width))

Expand Down Expand Up @@ -196,6 +213,10 @@ def test(data,
f = save_dir / f'test_batch{batch_i}_pred.jpg'
plot_images(img, output_to_target(output, width, height), paths, str(f), names) # predictions

# W&B logging
if wandb_images:
wandb.log({"outputs": wandb_images})

# Compute statistics
stats = [np.concatenate(x, 0) for x in zip(*stats)] # to numpy
if len(stats) and stats[0].any():
Expand Down
48 changes: 35 additions & 13 deletions train.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@
logger = logging.getLogger(__name__)


def train(hyp, opt, device, tb_writer=None):
def train(hyp, opt, device, tb_writer=None, wandb=None):
logger.info(f'Hyperparameters {hyp}')
log_dir = Path(tb_writer.log_dir) if tb_writer else Path(opt.logdir) / 'evolve' # logging directory
wdir = log_dir / 'weights' # weights directory
Expand Down Expand Up @@ -118,6 +118,11 @@ def train(hyp, opt, device, tb_writer=None):
scheduler = lr_scheduler.LambdaLR(optimizer, lr_lambda=lf)
# plot_lr_scheduler(optimizer, scheduler, epochs)

# Logging
if wandb and wandb.run is None:
id = ckpt.get('wandb_id') if 'ckpt' in locals() else None
wandb_run = wandb.init(config=opt, resume="allow", project=os.path.basename(log_dir), id=id)

glenn-jocher marked this conversation as resolved.
Show resolved Hide resolved
# Resume
start_epoch, best_fitness = 0, 0.0
if pretrained:
Expand Down Expand Up @@ -317,22 +322,25 @@ def train(hyp, opt, device, tb_writer=None):
single_cls=opt.single_cls,
dataloader=testloader,
save_dir=log_dir,
plots=epoch == 0 or final_epoch) # plot first and last
plots=epoch == 0 or final_epoch, # plot first and last
log_imgs=opt.log_imgs)

# Write
with open(results_file, 'a') as f:
f.write(s + '%10.4g' * 7 % results + '\n') # P, R, mAP@.5, mAP@.5-.95, val_loss(box, obj, cls)
if len(opt.name) and opt.bucket:
os.system('gsutil cp %s gs://%s/results/results%s.txt' % (results_file, opt.bucket, opt.name))

# Tensorboard
if tb_writer:
tags = ['train/box_loss', 'train/obj_loss', 'train/cls_loss', # train loss
'metrics/precision', 'metrics/recall', 'metrics/mAP_0.5', 'metrics/mAP_0.5:0.95',
'val/box_loss', 'val/obj_loss', 'val/cls_loss', # val loss
'x/lr0', 'x/lr1', 'x/lr2'] # params
for x, tag in zip(list(mloss[:-1]) + list(results) + lr, tags):
tb_writer.add_scalar(tag, x, epoch)
# Log
tags = ['train/giou_loss', 'train/obj_loss', 'train/cls_loss', # train loss
'metrics/precision', 'metrics/recall', 'metrics/mAP_0.5', 'metrics/mAP_0.5:0.95',
'val/giou_loss', 'val/obj_loss', 'val/cls_loss', # val loss
'x/lr0', 'x/lr1', 'x/lr2'] # params
for x, tag in zip(list(mloss[:-1]) + list(results) + lr, tags):
if tb_writer:
tb_writer.add_scalar(tag, x, epoch) # tensorboard
if wandb:
wandb.log({tag: x}) # W&B

# Update best mAP
fi = fitness(np.array(results).reshape(1, -1)) # weighted combination of [P, R, mAP@.5, mAP@.5-.95]
Expand All @@ -347,7 +355,8 @@ def train(hyp, opt, device, tb_writer=None):
'best_fitness': best_fitness,
'training_results': f.read(),
'model': ema.ema,
'optimizer': None if final_epoch else optimizer.state_dict()}
'optimizer': None if final_epoch else optimizer.state_dict(),
'wandb_id': wandb_run.id if wandb else None}

# Save last, best and delete
torch.save(ckpt, last)
Expand Down Expand Up @@ -403,7 +412,9 @@ def train(hyp, opt, device, tb_writer=None):
parser.add_argument('--sync-bn', action='store_true', help='use SyncBatchNorm, only available in DDP mode')
parser.add_argument('--local_rank', type=int, default=-1, help='DDP parameter, do not modify')
parser.add_argument('--logdir', type=str, default='runs/', help='logging directory')
parser.add_argument('--log-imgs', type=int, default=10, help='number of images for W&B logging, max 100')
parser.add_argument('--workers', type=int, default=8, help='maximum number of dataloader workers')

opt = parser.parse_args()

# Set DDP variables
Expand Down Expand Up @@ -452,12 +463,23 @@ def train(hyp, opt, device, tb_writer=None):
# Train
logger.info(opt)
if not opt.evolve:
tb_writer = None
tb_writer, wandb = None, None # init loggers
if opt.global_rank in [-1, 0]:
# Tensorboard
logger.info(f'Start Tensorboard with "tensorboard --logdir {opt.logdir}", view at http://localhost:6006/')
tb_writer = SummaryWriter(log_dir=log_dir) # runs/exp0

train(hyp, opt, device, tb_writer)
# W&B
try:
import wandb

assert os.environ.get('WANDB_DISABLED') != 'true'
logger.info("Weights & Biases logging enabled, to disable set os.environ['WANDB_DISABLED'] = 'true'")
except (ImportError, AssertionError):
opt.log_imgs = 0
logger.info("Install Weights & Biases for experiment logging via 'pip install wandb' (recommended)")

train(hyp, opt, device, tb_writer, wandb)

# Evolve hyperparameters (optional)
else:
Expand Down