From f7bc685c2c0f57005b83355715cb7282e61416eb Mon Sep 17 00:00:00 2001
From: Glenn Jocher <glenn.jocher@ultralytics.com>
Date: Wed, 21 Apr 2021 14:34:45 +0200
Subject: [PATCH] Implement yaml.safe_load() (#2876)

* Implement yaml.safe_load()

* yaml.safe_dump()
---
 data/coco.yaml                     |  2 +-
 models/yolo.py                     |  2 +-
 test.py                            |  2 +-
 train.py                           | 19 ++++++++++---------
 utils/autoanchor.py                |  2 +-
 utils/aws/resume.py                |  2 +-
 utils/general.py                   |  2 +-
 utils/plots.py                     |  2 +-
 utils/wandb_logging/log_dataset.py |  2 +-
 utils/wandb_logging/wandb_utils.py | 10 +++++-----
 10 files changed, 23 insertions(+), 22 deletions(-)

diff --git a/data/coco.yaml b/data/coco.yaml
index b9da2bf5919b..fa33a1210004 100644
--- a/data/coco.yaml
+++ b/data/coco.yaml
@@ -30,6 +30,6 @@ names: [ 'person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', 'train', '
 
 # Print classes
 # with open('data/coco.yaml') as f:
-#   d = yaml.load(f, Loader=yaml.FullLoader)  # dict
+#   d = yaml.safe_load(f)  # dict
 #   for i, x in enumerate(d['names']):
 #     print(i, x)
diff --git a/models/yolo.py b/models/yolo.py
index f730a1efa3b3..7db0e7da2629 100644
--- a/models/yolo.py
+++ b/models/yolo.py
@@ -72,7 +72,7 @@ def __init__(self, cfg='yolov5s.yaml', ch=3, nc=None, anchors=None):  # model, i
             import yaml  # for torch hub
             self.yaml_file = Path(cfg).name
             with open(cfg) as f:
-                self.yaml = yaml.load(f, Loader=yaml.SafeLoader)  # model dict
+                self.yaml = yaml.safe_load(f)  # model dict
 
         # Define model
         ch = self.yaml['ch'] = self.yaml.get('ch', ch)  # input channels
diff --git a/test.py b/test.py
index db1651d07f65..43c03cf0e094 100644
--- a/test.py
+++ b/test.py
@@ -71,7 +71,7 @@ def test(data,
     if isinstance(data, str):
         is_coco = data.endswith('coco.yaml')
         with open(data) as f:
-            data = yaml.load(f, Loader=yaml.SafeLoader)
+            data = yaml.safe_load(f)
     check_dataset(data)  # check
     nc = 1 if single_cls else int(data['nc'])  # number of classes
     iouv = torch.linspace(0.5, 0.95, 10).to(device)  # iou vector for mAP@0.5:0.95
diff --git a/train.py b/train.py
index 17b5ac5dda50..acfc9ef5527b 100644
--- a/train.py
+++ b/train.py
@@ -41,7 +41,7 @@
 def train(hyp, opt, device, tb_writer=None):
     logger.info(colorstr('hyperparameters: ') + ', '.join(f'{k}={v}' for k, v in hyp.items()))
     save_dir, epochs, batch_size, total_batch_size, weights, rank = \
-        opt.save_dir, opt.epochs, opt.batch_size, opt.total_batch_size, opt.weights, opt.global_rank
+        Path(opt.save_dir), opt.epochs, opt.batch_size, opt.total_batch_size, opt.weights, opt.global_rank
 
     # Directories
     wdir = save_dir / 'weights'
@@ -52,16 +52,16 @@ def train(hyp, opt, device, tb_writer=None):
 
     # Save run settings
     with open(save_dir / 'hyp.yaml', 'w') as f:
-        yaml.dump(hyp, f, sort_keys=False)
+        yaml.safe_dump(hyp, f, sort_keys=False)
     with open(save_dir / 'opt.yaml', 'w') as f:
-        yaml.dump(vars(opt), f, sort_keys=False)
+        yaml.safe_dump(vars(opt), f, sort_keys=False)
 
     # Configure
     plots = not opt.evolve  # create plots
     cuda = device.type != 'cpu'
     init_seeds(2 + rank)
     with open(opt.data) as f:
-        data_dict = yaml.load(f, Loader=yaml.SafeLoader)  # data dict
+        data_dict = yaml.safe_load(f)  # data dict
     is_coco = opt.data.endswith('coco.yaml')
 
     # Logging- Doing this before checking the dataset. Might update data_dict
@@ -506,8 +506,9 @@ def train(hyp, opt, device, tb_writer=None):
         assert os.path.isfile(ckpt), 'ERROR: --resume checkpoint does not exist'
         apriori = opt.global_rank, opt.local_rank
         with open(Path(ckpt).parent.parent / 'opt.yaml') as f:
-            opt = argparse.Namespace(**yaml.load(f, Loader=yaml.SafeLoader))  # replace
-        opt.cfg, opt.weights, opt.resume, opt.batch_size, opt.global_rank, opt.local_rank = '', ckpt, True, opt.total_batch_size, *apriori  # reinstate
+            opt = argparse.Namespace(**yaml.safe_load(f))  # replace
+        opt.cfg, opt.weights, opt.resume, opt.batch_size, opt.global_rank, opt.local_rank = \
+            '', ckpt, True, opt.total_batch_size, *apriori  # reinstate
         logger.info('Resuming training from %s' % ckpt)
     else:
         # opt.hyp = opt.hyp or ('hyp.finetune.yaml' if opt.weights else 'hyp.scratch.yaml')
@@ -515,7 +516,7 @@ def train(hyp, opt, device, tb_writer=None):
         assert len(opt.cfg) or len(opt.weights), 'either --cfg or --weights must be specified'
         opt.img_size.extend([opt.img_size[-1]] * (2 - len(opt.img_size)))  # extend to 2 sizes (train, test)
         opt.name = 'evolve' if opt.evolve else opt.name
-        opt.save_dir = increment_path(Path(opt.project) / opt.name, exist_ok=opt.exist_ok | opt.evolve)  # increment run
+        opt.save_dir = str(increment_path(Path(opt.project) / opt.name, exist_ok=opt.exist_ok | opt.evolve))
 
     # DDP mode
     opt.total_batch_size = opt.batch_size
@@ -530,7 +531,7 @@ def train(hyp, opt, device, tb_writer=None):
 
     # Hyperparameters
     with open(opt.hyp) as f:
-        hyp = yaml.load(f, Loader=yaml.SafeLoader)  # load hyps
+        hyp = yaml.safe_load(f)  # load hyps
 
     # Train
     logger.info(opt)
@@ -577,7 +578,7 @@ def train(hyp, opt, device, tb_writer=None):
         assert opt.local_rank == -1, 'DDP mode not implemented for --evolve'
         opt.notest, opt.nosave = True, True  # only test/save final epoch
         # ei = [isinstance(x, (int, float)) for x in hyp.values()]  # evolvable indices
-        yaml_file = opt.save_dir / 'hyp_evolved.yaml'  # save best result here
+        yaml_file = Path(opt.save_dir) / 'hyp_evolved.yaml'  # save best result here
         if opt.bucket:
             os.system('gsutil cp gs://%s/evolve.txt .' % opt.bucket)  # download evolve.txt if exists
 
diff --git a/utils/autoanchor.py b/utils/autoanchor.py
index 57777462e89f..75b350da729c 100644
--- a/utils/autoanchor.py
+++ b/utils/autoanchor.py
@@ -102,7 +102,7 @@ def print_results(k):
 
     if isinstance(path, str):  # *.yaml file
         with open(path) as f:
-            data_dict = yaml.load(f, Loader=yaml.SafeLoader)  # model dict
+            data_dict = yaml.safe_load(f)  # model dict
         from utils.datasets import LoadImagesAndLabels
         dataset = LoadImagesAndLabels(data_dict['train'], augment=True, rect=True)
     else:
diff --git a/utils/aws/resume.py b/utils/aws/resume.py
index faad8d247411..4b0d4246b594 100644
--- a/utils/aws/resume.py
+++ b/utils/aws/resume.py
@@ -19,7 +19,7 @@
 
     # Load opt.yaml
     with open(last.parent.parent / 'opt.yaml') as f:
-        opt = yaml.load(f, Loader=yaml.SafeLoader)
+        opt = yaml.safe_load(f)
 
     # Get device count
     d = opt['device'].split(',')  # devices
diff --git a/utils/general.py b/utils/general.py
index 817023f33dd3..9898549d3eaf 100755
--- a/utils/general.py
+++ b/utils/general.py
@@ -550,7 +550,7 @@ def print_mutation(hyp, results, yaml_file='hyp_evolved.yaml', bucket=''):
         results = tuple(x[0, :7])
         c = '%10.4g' * len(results) % results  # results (P, R, mAP@0.5, mAP@0.5:0.95, val_losses x 3)
         f.write('# Hyperparameter Evolution Results\n# Generations: %g\n# Metrics: ' % len(x) + c + '\n\n')
-        yaml.dump(hyp, f, sort_keys=False)
+        yaml.safe_dump(hyp, f, sort_keys=False)
 
     if bucket:
         os.system('gsutil cp evolve.txt %s gs://%s' % (yaml_file, bucket))  # upload
diff --git a/utils/plots.py b/utils/plots.py
index 09b6bcd15a9f..f24513c6998d 100644
--- a/utils/plots.py
+++ b/utils/plots.py
@@ -323,7 +323,7 @@ def plot_labels(labels, names=(), save_dir=Path(''), loggers=None):
 def plot_evolution(yaml_file='data/hyp.finetune.yaml'):  # from utils.plots import *; plot_evolution()
     # Plot hyperparameter evolution results in evolve.txt
     with open(yaml_file) as f:
-        hyp = yaml.load(f, Loader=yaml.SafeLoader)
+        hyp = yaml.safe_load(f)
     x = np.loadtxt('evolve.txt', ndmin=2)
     f = fitness(x)
     # weights = (f - f.min()) ** 2  # for weighted results
diff --git a/utils/wandb_logging/log_dataset.py b/utils/wandb_logging/log_dataset.py
index d7a521f1414b..f45a23011f15 100644
--- a/utils/wandb_logging/log_dataset.py
+++ b/utils/wandb_logging/log_dataset.py
@@ -9,7 +9,7 @@
 
 def create_dataset_artifact(opt):
     with open(opt.data) as f:
-        data = yaml.load(f, Loader=yaml.SafeLoader)  # data dict
+        data = yaml.safe_load(f)  # data dict
     logger = WandbLogger(opt, '', None, data, job_type='Dataset Creation')
 
 
diff --git a/utils/wandb_logging/wandb_utils.py b/utils/wandb_logging/wandb_utils.py
index d8f50ae8a80e..d8fbd1ef42aa 100644
--- a/utils/wandb_logging/wandb_utils.py
+++ b/utils/wandb_logging/wandb_utils.py
@@ -55,7 +55,7 @@ def check_wandb_resume(opt):
 
 def process_wandb_config_ddp_mode(opt):
     with open(opt.data) as f:
-        data_dict = yaml.load(f, Loader=yaml.SafeLoader)  # data dict
+        data_dict = yaml.safe_load(f)  # data dict
     train_dir, val_dir = None, None
     if isinstance(data_dict['train'], str) and data_dict['train'].startswith(WANDB_ARTIFACT_PREFIX):
         api = wandb.Api()
@@ -73,7 +73,7 @@ def process_wandb_config_ddp_mode(opt):
     if train_dir or val_dir:
         ddp_data_path = str(Path(val_dir) / 'wandb_local_data.yaml')
         with open(ddp_data_path, 'w') as f:
-            yaml.dump(data_dict, f)
+            yaml.safe_dump(data_dict, f)
         opt.data = ddp_data_path
 
 
@@ -120,7 +120,7 @@ def check_and_upload_dataset(self, opt):
                                                 'YOLOv5' if opt.project == 'runs/train' else Path(opt.project).stem)
         print("Created dataset config file ", config_path)
         with open(config_path) as f:
-            wandb_data_dict = yaml.load(f, Loader=yaml.SafeLoader)
+            wandb_data_dict = yaml.safe_load(f)
         return wandb_data_dict
 
     def setup_training(self, opt, data_dict):
@@ -192,7 +192,7 @@ def log_model(self, path, opt, epoch, fitness_score, best_model=False):
 
     def log_dataset_artifact(self, data_file, single_cls, project, overwrite_config=False):
         with open(data_file) as f:
-            data = yaml.load(f, Loader=yaml.SafeLoader)  # data dict
+            data = yaml.safe_load(f)  # data dict
         nc, names = (1, ['item']) if single_cls else (int(data['nc']), data['names'])
         names = {k: v for k, v in enumerate(names)}  # to index dictionary
         self.train_artifact = self.create_dataset_table(LoadImagesAndLabels(
@@ -206,7 +206,7 @@ def log_dataset_artifact(self, data_file, single_cls, project, overwrite_config=
         path = data_file if overwrite_config else '_wandb.'.join(data_file.rsplit('.', 1))  # updated data.yaml path
         data.pop('download', None)
         with open(path, 'w') as f:
-            yaml.dump(data, f)
+            yaml.safe_dump(data, f)
 
         if self.job_type == 'Training':  # builds correct artifact pipeline graph
             self.wandb_run.use_artifact(self.val_artifact)