From 7dd30307a7dae985eac64574d703140699e16b26 Mon Sep 17 00:00:00 2001
From: Jason <jasoli@nvidia.com>
Date: Tue, 14 Jan 2020 15:00:24 -0800
Subject: [PATCH 01/10] add nemo.logging; get_logger() now returns nemo.logging

Signed-off-by: Jason <jasoli@nvidia.com>
---
 nemo/nemo/__init__.py          |  4 ++++
 nemo/nemo/utils/exp_logging.py | 20 ++++++--------------
 2 files changed, 10 insertions(+), 14 deletions(-)

diff --git a/nemo/nemo/__init__.py b/nemo/nemo/__init__.py
index c6a6136cac6f..7d322052dc6e 100644
--- a/nemo/nemo/__init__.py
+++ b/nemo/nemo/__init__.py
@@ -12,9 +12,13 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # =============================================================================
+import logging
+
 from . import backends
 from . import core
 from . import utils
 from .backends.pytorch import tutorials
 
 __version__ = "0.9.0"
+
+logging = logging.getLogger()
diff --git a/nemo/nemo/utils/exp_logging.py b/nemo/nemo/utils/exp_logging.py
index 2de523e15318..e613500472c7 100644
--- a/nemo/nemo/utils/exp_logging.py
+++ b/nemo/nemo/utils/exp_logging.py
@@ -6,7 +6,11 @@
 import subprocess
 import sys
 
-loggers = {}
+import nemo
+
+
+def get_logger(name):
+    return nemo.logging
 
 
 class ContextFilter(logging.Filter):
@@ -31,18 +35,6 @@ def filter(self, record):
         return True
 
 
-def get_logger(name):
-    """ A wrapper function around logging.getLogger
-    to ensure that we don't create duplicate loggers
-    """
-    global loggers
-
-    if name not in loggers:
-        loggers[name] = logging.getLogger(name)
-
-    return loggers[name]
-
-
 class ExpManager:
     """ Note: Users should not have to call ExpManager as it is done
     automically inside NeuralFactory. Not all defaults match NeuralFactory
@@ -160,7 +152,7 @@ def __init__(
             self.make_dir(self.ckpt_dir, exist_ok)
 
     def create_logger(self, name='', level=logging.INFO, log_file=True):
-        logger = get_logger(name)
+        logger = nemo.logging
         tmp = logging.Formatter('%(asctime)s - %(levelname)s - %(message)s')
 
         if self.global_rank == 0:

From 9da624ce59c23f05bf25c31f60cb37f4ad0add5a Mon Sep 17 00:00:00 2001
From: Jason <jasoli@nvidia.com>
Date: Tue, 14 Jan 2020 16:47:01 -0800
Subject: [PATCH 02/10] add deprecation warning

Signed-off-by: Jason <jasoli@nvidia.com>
---
 nemo/nemo/utils/exp_logging.py | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/nemo/nemo/utils/exp_logging.py b/nemo/nemo/utils/exp_logging.py
index e613500472c7..96d41b7e2442 100644
--- a/nemo/nemo/utils/exp_logging.py
+++ b/nemo/nemo/utils/exp_logging.py
@@ -5,11 +5,14 @@
 from shutil import copyfile
 import subprocess
 import sys
+import warnings
 
 import nemo
 
 
 def get_logger(name):
+    warnings.warn("This function will be deprecated in the future. You "
+                  "can just use nemo.logging instead")
     return nemo.logging
 
 

From 620594851e8d7762af218f47b6013390d654bcef Mon Sep 17 00:00:00 2001
From: Jason <jasoli@nvidia.com>
Date: Tue, 14 Jan 2020 17:22:06 -0800
Subject: [PATCH 03/10] Part 1 of removing logger in favour of nemo.logging

Signed-off-by: Jason <jasoli@nvidia.com>
---
 .../nemo_asr/nemo_asr/parts/dataset.py        |   3 +-
 .../nemo_asr/nemo_asr/parts/manifest.py       |  23 ++--
 .../nemo_nlp/nemo_nlp/data/datasets/glue.py   |  22 ++--
 .../data/datasets/joint_intent_slot.py        |   9 +-
 .../datasets/punctuation_capitalization.py    |  65 +++++----
 .../data/datasets/sentence_classification.py  |  25 ++--
 .../nemo_nlp/nemo_nlp/data/datasets/utils.py  | 123 ++++++++++--------
 .../utils/callbacks/bert_pretraining.py       |   7 +-
 .../nemo_nlp/nemo_nlp/utils/callbacks/glue.py |  13 +-
 .../utils/callbacks/joint_intent_slot.py      |  29 ++---
 .../utils/callbacks/language_modeling.py      |  14 +-
 .../callbacks/punctuation_capitalization.py   |  15 +--
 .../callbacks/sentence_classification.py      |  12 +-
 .../utils/callbacks/token_classification.py   |  15 +--
 .../nemo_nlp/nemo_nlp/utils/nlp_utils.py      |  13 +-
 nemo/nemo/__init__.py                         |   2 +-
 nemo/nemo/backends/pytorch/actions.py         |  26 ++--
 nemo/nemo/core/callbacks.py                   |  70 +++++-----
 nemo/nemo/core/neural_factory.py              |  21 ++-
 nemo/nemo/utils/exp_logging.py                |   2 +-
 20 files changed, 241 insertions(+), 268 deletions(-)

diff --git a/collections/nemo_asr/nemo_asr/parts/dataset.py b/collections/nemo_asr/nemo_asr/parts/dataset.py
index d1df0ab71696..ef29c83a6895 100644
--- a/collections/nemo_asr/nemo_asr/parts/dataset.py
+++ b/collections/nemo_asr/nemo_asr/parts/dataset.py
@@ -141,8 +141,7 @@ def __init__(
                                        max_utts=max_utts,
                                        blank_index=blank_index,
                                        unk_index=unk_index,
-                                       normalize=normalize,
-                                       logger=logger)
+                                       normalize=normalize)
         self.featurizer = featurizer
         self.trim = trim
         self.eos_id = eos_id
diff --git a/collections/nemo_asr/nemo_asr/parts/manifest.py b/collections/nemo_asr/nemo_asr/parts/manifest.py
index 5609529a0591..03f5a52bd26f 100644
--- a/collections/nemo_asr/nemo_asr/parts/manifest.py
+++ b/collections/nemo_asr/nemo_asr/parts/manifest.py
@@ -1,9 +1,9 @@
 # Taken straight from Patter https://github.com/ryanleary/patter
 # TODO: review, and copyright and fix/add comments
 import json
+import nemo
 import string
 
-from nemo.utils import get_logger
 from .cleaners import clean_text
 
 
@@ -17,8 +17,7 @@ def __init__(self,
                  max_utts=0,
                  blank_index=-1,
                  unk_index=-1,
-                 normalize=True,
-                 logger=None):
+                 normalize=True):
         self.min_duration = min_duration
         self.max_duration = max_duration
         self.sort_by_duration = sort_by_duration
@@ -27,9 +26,6 @@ def __init__(self,
         self.unk_index = unk_index
         self.normalize = normalize
         self.labels_map = {label: i for i, label in enumerate(labels)}
-        self.logger = logger
-        if logger is None:
-            self.logger = get_logger('')
 
         data = []
         duration = 0.0
@@ -53,9 +49,9 @@ def __init__(self,
                 filtered_duration += item['duration']
                 continue
             if normalize:
-                text = self.normalize_text(text, labels, logger=self.logger)
+                text = self.normalize_text(text, labels)
             if not isinstance(text, str):
-                self.logger.warning(
+                nemo.logging.warning(
                     "WARNING: Got transcript: {}. It is not a "
                     "string. Dropping data point".format(text)
                 )
@@ -69,7 +65,7 @@ def __init__(self,
 
             # support files using audio_filename
             if 'audio_filename' in item and 'audio_filepath' not in item:
-                self.logger.warning(
+                nemo.logging.warning(
                     "Malformed manifest: The key audio_filepath was not "
                     "found in the manifest. Using audio_filename instead."
                 )
@@ -79,7 +75,7 @@ def __init__(self,
             duration += item['duration']
 
             if max_utts > 0 and len(data) >= max_utts:
-                self.logger.info(
+                nemo.logging.info(
                     'Stop parsing due to max_utts ({})'.format(max_utts))
                 break
 
@@ -155,7 +151,7 @@ def __init__(self, *args, **kwargs):
         super().__init__(*args, **kwargs)
 
     @staticmethod
-    def normalize_text(text, labels, logger=None):
+    def normalize_text(text, labels):
         # Punctuation to remove
         punctuation = string.punctuation
         # Define punctuation that will be handled by text cleaner
@@ -183,10 +179,7 @@ def normalize_text(text, labels, logger=None):
         try:
             text = clean_text(text, table, punctuation_to_replace)
         except BaseException:
-            if logger:
-                logger.warning("WARNING: Normalizing {} failed".format(text))
-            else:
-                print("WARNING: Normalizing {} failed".format(text))
+            nemo.logging.warning("WARNING: Normalizing {} failed".format(text))
             return None
 
         return text
diff --git a/collections/nemo_nlp/nemo_nlp/data/datasets/glue.py b/collections/nemo_nlp/nemo_nlp/data/datasets/glue.py
index ad56847b15b7..fa70f776b184 100644
--- a/collections/nemo_nlp/nemo_nlp/data/datasets/glue.py
+++ b/collections/nemo_nlp/nemo_nlp/data/datasets/glue.py
@@ -20,13 +20,10 @@
 https://github.com/huggingface/transformers
 """
 
+import nemo
 import numpy as np
 from torch.utils.data import Dataset
 
-from nemo.utils.exp_logging import get_logger
-
-logger = get_logger('')
-
 
 class GLUEDataset(Dataset):
     def __init__(self,
@@ -112,7 +109,8 @@ def convert_examples_to_features(examples,
     features = []
     for ex_index, example in enumerate(examples):
         if ex_index % 10000 == 0:
-            logger.info("Writing example %d of %d" % (ex_index, len(examples)))
+            nemo.logging.info(
+                "Writing example %d of %d" % (ex_index, len(examples)))
 
         tokens_a = tokenizer.text_to_tokens(example.text_a)
 
@@ -199,17 +197,17 @@ def convert_examples_to_features(examples,
             raise KeyError(output_mode)
 
         if ex_index < 5:
-            logger.info("*** Example ***")
-            logger.info("guid: %s" % (example.guid))
-            logger.info(
+            nemo.logging.info("*** Example ***")
+            nemo.logging.info("guid: %s" % (example.guid))
+            nemo.logging.info(
                 "tokens: %s" % " ".join(list(map(str, tokens))))
-            logger.info(
+            nemo.logging.info(
                 "input_ids: %s" % " ".join(list(map(str, input_ids))))
-            logger.info(
+            nemo.logging.info(
                 "input_mask: %s" % " ".join(list(map(str, input_mask))))
-            logger.info(
+            nemo.logging.info(
                 "segment_ids: %s" % " ".join(list(map(str, segment_ids))))
-            logger.info(
+            nemo.logging.info(
                 "label: %s (id = %d)" % (example.label, label_id))
 
         features.append(
diff --git a/collections/nemo_nlp/nemo_nlp/data/datasets/joint_intent_slot.py b/collections/nemo_nlp/nemo_nlp/data/datasets/joint_intent_slot.py
index 3d6c4f370ee2..0b028a0e20e7 100644
--- a/collections/nemo_nlp/nemo_nlp/data/datasets/joint_intent_slot.py
+++ b/collections/nemo_nlp/nemo_nlp/data/datasets/joint_intent_slot.py
@@ -25,14 +25,9 @@
 import numpy as np
 from torch.utils.data import Dataset
 
-from nemo.utils.exp_logging import get_logger
-
 from . import utils
 
 
-logger = get_logger('')
-
-
 def get_features(queries,
                  max_seq_length,
                  tokenizer,
@@ -88,7 +83,7 @@ def get_features(queries,
             all_slots.append(slots)
 
     max_seq_length = min(max_seq_length, max(sent_lengths))
-    logger.info(f'Max length: {max_seq_length}')
+    nemo.logging.info(f'Max length: {max_seq_length}')
     utils.get_stats(sent_lengths)
     too_long_count = 0
 
@@ -120,7 +115,7 @@ def get_features(queries,
 
         all_segment_ids.append([0] * max_seq_length)
 
-    logger.info(f'{too_long_count} are longer than {max_seq_length}')
+    nemo.logging.info(f'{too_long_count} are longer than {max_seq_length}')
 
     return (all_input_ids,
             all_segment_ids,
diff --git a/collections/nemo_nlp/nemo_nlp/data/datasets/punctuation_capitalization.py b/collections/nemo_nlp/nemo_nlp/data/datasets/punctuation_capitalization.py
index 95f7b165c88e..78dd8e2e7e5f 100644
--- a/collections/nemo_nlp/nemo_nlp/data/datasets/punctuation_capitalization.py
+++ b/collections/nemo_nlp/nemo_nlp/data/datasets/punctuation_capitalization.py
@@ -24,17 +24,13 @@
 import pickle
 import random
 
+import nemo
 import numpy as np
 from torch.utils.data import Dataset
 
-from nemo.utils.exp_logging import get_logger
-
 from . import utils
 
 
-logger = get_logger('')
-
-
 def get_features(queries,
                  max_seq_length,
                  tokenizer,
@@ -128,7 +124,7 @@ def get_features(queries,
             capit_all_labels.append(capit_labels)
 
     max_seq_length = min(max_seq_length, max(sent_lengths))
-    logger.info(f'Max length: {max_seq_length}')
+    nemo.logging.info(f'Max length: {max_seq_length}')
     utils.get_stats(sent_lengths)
     too_long_count = 0
 
@@ -164,25 +160,25 @@ def get_features(queries,
 
         all_segment_ids.append([0] * max_seq_length)
 
-    logger.info(f'{too_long_count} are longer than {max_seq_length}')
+    nemo.logging.info(f'{too_long_count} are longer than {max_seq_length}')
 
     for i in range(min(len(all_input_ids), 5)):
-        logger.info("*** Example ***")
-        logger.info("i: %s" % (i))
-        logger.info(
+        nemo.logging.info("*** Example ***")
+        nemo.logging.info("i: %s" % (i))
+        nemo.logging.info(
             "subtokens: %s" % " ".join(list(map(str, all_subtokens[i]))))
-        logger.info(
+        nemo.logging.info(
             "loss_mask: %s" % " ".join(list(map(str, all_loss_mask[i]))))
-        logger.info(
+        nemo.logging.info(
             "input_mask: %s" % " ".join(list(map(str, all_input_mask[i]))))
-        logger.info(
+        nemo.logging.info(
             "subtokens_mask: %s" % " ".join(list(map(
                 str, all_subtokens_mask[i]))))
         if with_label:
-            logger.info("punct_labels: %s" %
-                        " ".join(list(map(str, punct_all_labels[i]))))
-            logger.info("capit_labels: %s" %
-                        " ".join(list(map(str, capit_all_labels[i]))))
+            nemo.logging.info("punct_labels: %s" %
+                              " ".join(list(map(str, punct_all_labels[i]))))
+            nemo.logging.info("capit_labels: %s" %
+                              " ".join(list(map(str, capit_all_labels[i]))))
 
     return (all_input_ids,
             all_segment_ids,
@@ -258,7 +254,7 @@ def __init__(self,
         if use_cache and os.path.exists(features_pkl):
             # If text_file was already processed, load from pickle
             features = pickle.load(open(features_pkl, 'rb'))
-            logger.info(f'features restored from {features_pkl}')
+            nemo.logging.info(f'features restored from {features_pkl}')
         else:
             if num_samples == 0:
                 raise ValueError("num_samples has to be positive", num_samples)
@@ -304,19 +300,22 @@ def __init__(self,
             # for dev/test sets use label mapping from training set
             if punct_label_ids:
                 if len(punct_label_ids) != len(punct_unique_labels):
-                    logger.info(f'Not all labels from the specified' +
-                                'label_ids dictionary are present in the' +
-                                'current dataset. Using the provided' +
-                                'label_ids dictionary.')
+                    nemo.logging.info(
+                        'Not all labels from the specified' +
+                        'label_ids dictionary are present in the' +
+                        'current dataset. Using the provided' +
+                        'label_ids dictionary.')
                 else:
-                    logger.info(f'Using the provided label_ids dictionary.')
+                    nemo.logging.info(
+                        'Using the provided label_ids dictionary.')
             else:
-                logger.info(f'Creating a new label to label_id dictionary.' +
-                            ' It\'s recommended to use label_ids generated' +
-                            ' during training for dev/test sets to avoid' +
-                            ' errors if some labels are not' +
-                            ' present in the dev/test sets.' +
-                            ' For training set label_ids should be None.')
+                nemo.logging.info(
+                    'Creating a new label to label_id dictionary.' +
+                    ' It\'s recommended to use label_ids generated' +
+                    ' during training for dev/test sets to avoid' +
+                    ' errors if some labels are not' +
+                    ' present in the dev/test sets.' +
+                    ' For training set label_ids should be None.')
 
                 def create_label_ids(unique_labels, pad_label=pad_label):
                     label_ids = {pad_label: 0}
@@ -342,7 +341,7 @@ def create_label_ids(unique_labels, pad_label=pad_label):
 
             if use_cache:
                 pickle.dump(features, open(features_pkl, "wb"))
-                logger.info(f'features saved to {features_pkl}')
+                nemo.logging.info(f'features saved to {features_pkl}')
 
         self.all_input_ids = features[0]
         self.all_segment_ids = features[1]
@@ -358,7 +357,7 @@ def create_label_ids(unique_labels, pad_label=pad_label):
         def get_stats_and_save(all_labels, label_ids, name):
             infold = text_file[:text_file.rfind('/')]
             merged_labels = itertools.chain.from_iterable(all_labels)
-            logger.info('Three most popular labels')
+            nemo.logging.info('Three most popular labels')
             _, label_frequencies = \
                 utils.get_label_stats(merged_labels,
                                       infold + '/label_count_' + name + '.tsv')
@@ -366,8 +365,8 @@ def get_stats_and_save(all_labels, label_ids, name):
             out = open(os.path.join(infold, name + '_label_ids.csv'), 'w')
             labels, _ = zip(*sorted(label_ids.items(),  key=lambda x: x[1]))
             out.write('\n'.join(labels))
-            logger.info(f'Labels: {label_ids}')
-            logger.info(f'Labels mapping saved to : {out.name}')
+            nemo.logging.info(f'Labels: {label_ids}')
+            nemo.logging.info(f'Labels mapping saved to : {out.name}')
 
             return label_frequencies
 
diff --git a/collections/nemo_nlp/nemo_nlp/data/datasets/sentence_classification.py b/collections/nemo_nlp/nemo_nlp/data/datasets/sentence_classification.py
index ad9ddde3e4fe..87e71be853e7 100644
--- a/collections/nemo_nlp/nemo_nlp/data/datasets/sentence_classification.py
+++ b/collections/nemo_nlp/nemo_nlp/data/datasets/sentence_classification.py
@@ -20,18 +20,14 @@
 https://github.com/huggingface/pytorch-pretrained-BERT
 """
 
-import logging
 import random
-import string
-import time
 
+import nemo
 import numpy as np
 from torch.utils.data import Dataset
 
 from . import utils
 
-logger = logging.getLogger('log')
-
 
 class BertSentenceClassificationDataset(Dataset):
     """A dataset class that converts from raw data to
@@ -60,7 +56,7 @@ def __init__(self,
             too_long_count = 0
 
             lines = f.readlines()[1:]
-            logger.info(f'{input_file}: {len(lines)}')
+            nemo.logging.info(f'{input_file}: {len(lines)}')
 
             if shuffle or num_samples > -1:
                 random.seed(0)
@@ -70,7 +66,7 @@ def __init__(self,
 
             for index, line in enumerate(lines):
                 if index % 20000 == 0:
-                    logger.debug(f"Processing line {index}/{len(lines)}")
+                    nemo.logging.debug(f"Processing line {index}/{len(lines)}")
 
                 sent_label = int(line.split()[-1])
                 sent_labels.append(sent_label)
@@ -95,7 +91,7 @@ def __init__(self,
                 all_sent_subtokens[i] = ['[CLS]'] + shorten_sent
                 too_long_count += 1
 
-        logger.info(f'{too_long_count} out of {len(sent_lengths)} \
+        nemo.logging.info(f'{too_long_count} out of {len(sent_lengths)} \
                        sentencess with more than {max_seq_length} subtokens.')
 
         self.convert_sequences_to_features(all_sent_subtokens,
@@ -149,12 +145,13 @@ def convert_sequences_to_features(self,
             assert len(input_mask) == max_seq_length
 
             if sent_id == 0:
-                logger.info("*** Example ***")
-                logger.info("example_index: %s" % sent_id)
-                logger.info("subtokens: %s" % " ".join(sent_subtokens))
-                logger.info("sent_label: %s" % sent_label)
-                logger.info("input_ids: %s" % utils.list2str(input_ids))
-                logger.info("input_mask: %s" % utils.list2str(input_mask))
+                nemo.logging.info("*** Example ***")
+                nemo.logging.info("example_index: %s" % sent_id)
+                nemo.logging.info("subtokens: %s" % " ".join(sent_subtokens))
+                nemo.logging.info("sent_label: %s" % sent_label)
+                nemo.logging.info("input_ids: %s" % utils.list2str(input_ids))
+                nemo.logging.info(
+                    "input_mask: %s" % utils.list2str(input_mask))
 
             self.features.append(InputFeatures(
                 sent_id=sent_id,
diff --git a/collections/nemo_nlp/nemo_nlp/data/datasets/utils.py b/collections/nemo_nlp/nemo_nlp/data/datasets/utils.py
index 7dd661f79caf..c1cf244343e8 100644
--- a/collections/nemo_nlp/nemo_nlp/data/datasets/utils.py
+++ b/collections/nemo_nlp/nemo_nlp/data/datasets/utils.py
@@ -11,18 +11,17 @@
 import subprocess
 import sys
 
+import nemo
 import numpy as np
 from sentencepiece import SentencePieceTrainer as SPT
 from tqdm import tqdm
 
-from nemo.utils.exp_logging import get_logger
 from ...utils.nlp_utils import (get_vocab,
                                 write_vocab,
                                 write_vocab_in_order,
                                 label2idx)
 
 
-logger = get_logger('')
 DATABASE_EXISTS_TMP = '{} dataset has already been processed and stored at {}'
 MODE_EXISTS_TMP = \
     '{} mode of {} dataset has already been processed and stored at {}'
@@ -30,12 +29,12 @@
 
 def get_stats(lengths):
     lengths = np.asarray(lengths)
-    logger.info(f'Min: {np.min(lengths)} | \
+    nemo.logging.info(f'Min: {np.min(lengths)} | \
                  Max: {np.max(lengths)} | \
                  Mean: {np.mean(lengths)} | \
                  Median: {np.median(lengths)}')
-    logger.info(f'75 percentile: {np.percentile(lengths, 75)}')
-    logger.info(f'99 percentile: {np.percentile(lengths, 99)}')
+    nemo.logging.info(f'75 percentile: {np.percentile(lengths, 75)}')
+    nemo.logging.info(f'99 percentile: {np.percentile(lengths, 99)}')
 
 
 def get_label_stats(labels, outfile='stats.tsv'):
@@ -47,7 +46,7 @@ def get_label_stats(labels, outfile='stats.tsv'):
     for k, v in label_frequencies:
         out.write(f'{k}\t{v/total}\n')
         if i < 3:
-            logger.info(f'{i} item: {k}, {v} out of {total}, {v/total}.')
+            nemo.logging.info(f'{i} item: {k}, {v} out of {total}, {v/total}.')
         i += 1
     return total, label_frequencies
 
@@ -74,7 +73,8 @@ def process_sst_2(data_dir):
         link = 'https://gluebenchmark.com/tasks'
         raise ValueError(f'Data not found at {data_dir}. '
                          f'Please download SST-2 from {link}.')
-    logger.info('Keep in mind that SST-2 is only available in lower case.')
+    nemo.logging.info(
+        'Keep in mind that SST-2 is only available in lower case.')
     return data_dir
 
 
@@ -90,9 +90,9 @@ def process_imdb(data_dir, uncased, modes=['train', 'test']):
         outfold = f'{outfold}_uncased'
 
     if if_exist(outfold, [f'{mode}.tsv' for mode in modes]):
-        logger.info(DATABASE_EXISTS_TMP.format('IMDB', outfold))
+        nemo.logging.info(DATABASE_EXISTS_TMP.format('IMDB', outfold))
         return outfold
-    logger.info(f'Processing IMDB dataset and store at {outfold}')
+    nemo.logging.info(f'Processing IMDB dataset and store at {outfold}')
 
     os.makedirs(outfold, exist_ok=True)
 
@@ -131,9 +131,9 @@ def process_thucnews(data_dir):
     outfold = f'{data_dir}/nemo-processed-thucnews'
 
     if if_exist(outfold, [f'{mode}.tsv' for mode in modes]):
-        logger.info(DATABASE_EXISTS_TMP.format('THUCNews', outfold))
+        nemo.logging.info(DATABASE_EXISTS_TMP.format('THUCNews', outfold))
         return outfold
-    logger.info(f'Processing THUCNews dataset and store at {outfold}')
+    nemo.logging.info(f'Processing THUCNews dataset and store at {outfold}')
 
     os.makedirs(outfold, exist_ok=True)
 
@@ -152,7 +152,8 @@ def process_thucnews(data_dir):
         test_files = category_files[:test_num]
         train_files = category_files[test_num:]
         for mode in modes:
-            logger.info(f'Processing {mode} data of the category {category}')
+            nemo.logging.info(
+                f'Processing {mode} data of the category {category}')
             if mode == 'test':
                 files = test_files
             else:
@@ -210,9 +211,10 @@ def process_nlu(filename,
         outfold = f'{outfold}_uncased'
 
     if if_exist(outfold, [f'{mode}.tsv' for mode in modes]):
-        logger.info(DATABASE_EXISTS_TMP.format(dataset_name.upper(), outfold))
+        nemo.logging.info(
+            DATABASE_EXISTS_TMP.format(dataset_name.upper(), outfold))
         return outfold
-    logger.info(f'Processing data and store at {outfold}')
+    nemo.logging.info(f'Processing data and store at {outfold}')
 
     os.makedirs(outfold, exist_ok=True)
 
@@ -274,9 +276,9 @@ def process_atis(infold, uncased, modes=['train', 'test'], dev_split=0):
         outfold = f'{outfold}-uncased'
 
     if if_exist(outfold, [f'{mode}.tsv' for mode in modes]):
-        logger.info(DATABASE_EXISTS_TMP.format('ATIS', outfold))
+        nemo.logging.info(DATABASE_EXISTS_TMP.format('ATIS', outfold))
         return outfold
-    logger.info(f'Processing ATIS dataset and store at {outfold}')
+    nemo.logging.info(f'Processing ATIS dataset and store at {outfold}')
 
     os.makedirs(outfold, exist_ok=True)
 
@@ -319,10 +321,11 @@ def process_jarvis_datasets(infold, uncased, dataset_name,
         outfold = f'{outfold}-uncased'
 
     if if_exist(outfold, ['dict.intents.csv', 'dict.slots.csv']):
-        logger.info(DATABASE_EXISTS_TMP.format(dataset_name, outfold))
+        nemo.logging.info(DATABASE_EXISTS_TMP.format(dataset_name, outfold))
         return outfold
 
-    logger.info(f'Processing {dataset_name} dataset and store at {outfold}')
+    nemo.logging.info(
+        f'Processing {dataset_name} dataset and store at {outfold}')
 
     os.makedirs(outfold, exist_ok=True)
 
@@ -340,13 +343,13 @@ def process_jarvis_datasets(infold, uncased, dataset_name,
 
     for mode in modes:
         if if_exist(outfold, [f'{mode}.tsv']):
-            logger.info(
+            nemo.logging.info(
                 MODE_EXISTS_TMP.format(mode, dataset_name, outfold, mode))
             continue
 
         if not if_exist(infold, [f'{mode}.tsv']):
-            logger.info(f'{mode} mode of {dataset_name}'
-                        f' is skipped as it was not found.')
+            nemo.logging.info(f'{mode} mode of {dataset_name}'
+                              f' is skipped as it was not found.')
             continue
 
         outfiles[mode] = open(os.path.join(outfold, mode + '.tsv'), 'w')
@@ -549,14 +552,14 @@ def process_snips(data_dir, uncased, modes=['train', 'test'], dev_split=0.1):
     exist = True
     for dataset in ['light', 'speak', 'all']:
         if if_exist(f'{outfold}/{dataset}', [f'{mode}.tsv' for mode in modes]):
-            logger.info(DATABASE_EXISTS_TMP.format(
+            nemo.logging.info(DATABASE_EXISTS_TMP.format(
                 'SNIPS-' + dataset.upper(), outfold))
         else:
             exist = False
     if exist:
         return outfold
 
-    logger.info(f'Processing SNIPS dataset and store at {outfold}')
+    nemo.logging.info(f'Processing SNIPS dataset and store at {outfold}')
 
     os.makedirs(outfold, exist_ok=True)
 
@@ -590,7 +593,7 @@ def list2str(nums):
 def merge(data_dir, subdirs, dataset_name, modes=['train', 'test']):
     outfold = f'{data_dir}/{dataset_name}'
     if if_exist(outfold, [f'{mode}.tsv' for mode in modes]):
-        logger.info(DATABASE_EXISTS_TMP.format('SNIPS-ATIS', outfold))
+        nemo.logging.info(DATABASE_EXISTS_TMP.format('SNIPS-ATIS', outfold))
         slots = get_vocab(f'{outfold}/dict.slots.csv')
         none_slot = 0
         for key in slots:
@@ -916,10 +919,11 @@ def process_mturk(
     outfold = f'{data_dir}/nemo-processed'
 
     if if_exist(outfold, [f'{mode}.tsv' for mode in modes]):
-        logger.info(DATABASE_EXISTS_TMP.format('mturk', outfold))
+        nemo.logging.info(DATABASE_EXISTS_TMP.format('mturk', outfold))
         return outfold
 
-    logger.info(f'Processing dataset from mturk and storing at {outfold}')
+    nemo.logging.info(
+        f'Processing dataset from mturk and storing at {outfold}')
 
     os.makedirs(outfold, exist_ok=True)
 
@@ -1094,8 +1098,8 @@ def __init__(self,
         for mode in ['train', 'test', 'eval']:
 
             if not if_exist(self.data_dir, [f'{mode}.tsv']):
-                logger.info(f' Stats calculation for {mode} mode'
-                            f' is skipped as {mode}.tsv was not found.')
+                nemo.logging.info(f' Stats calculation for {mode} mode'
+                                  f' is skipped as {mode}.tsv was not found.')
                 continue
 
             slot_file = f'{self.data_dir}/{mode}_slots.tsv'
@@ -1124,27 +1128,28 @@ def __init__(self,
 
             infold = input_file[:input_file.rfind('/')]
 
-            logger.info(f'Three most popular intents during {mode}ing')
+            nemo.logging.info(f'Three most popular intents during {mode}ing')
             total_intents, intent_label_freq = get_label_stats(
                 raw_intents, infold + f'/{mode}_intent_stats.tsv')
             merged_slots = itertools.chain.from_iterable(raw_slots)
 
-            logger.info(f'Three most popular slots during {mode}ing')
+            nemo.logging.info(f'Three most popular slots during {mode}ing')
             slots_total, slots_label_freq = get_label_stats(
                 merged_slots, infold + f'/{mode}_slot_stats.tsv')
 
             if mode == 'train':
 
                 self.slot_weights = calc_class_weights(slots_label_freq)
-                logger.info(f'Slot weights are - {self.slot_weights}')
+                nemo.logging.info(f'Slot weights are - {self.slot_weights}')
 
                 self.intent_weights = calc_class_weights(intent_label_freq)
-                logger.info(f'Intent weights are - {self.intent_weights}')
+                nemo.logging.info(
+                    f'Intent weights are - {self.intent_weights}')
 
-            logger.info(f'Total intents - {total_intents}')
-            logger.info(f'Intent label frequency - {intent_label_freq}')
-            logger.info(f'Total Slots - {slots_total}')
-            logger.info(f'Slots label frequency - {slots_label_freq}')
+            nemo.logging.info(f'Total intents - {total_intents}')
+            nemo.logging.info(f'Intent label frequency - {intent_label_freq}')
+            nemo.logging.info(f'Total Slots - {slots_total}')
+            nemo.logging.info(f'Slots label frequency - {slots_label_freq}')
 
         if pad_label != -1:
             self.pad_label = pad_label
@@ -1204,8 +1209,8 @@ def __init__(self, dataset_name, data_dir, do_lower_case):
         for mode in ['train', 'test', 'eval']:
 
             if not if_exist(self.data_dir, [f'{mode}.tsv']):
-                logger.info(f' Stats calculation for {mode} mode'
-                            f' is skipped as {mode}.tsv was not found.')
+                nemo.logging.info(f' Stats calculation for {mode} mode'
+                                  f' is skipped as {mode}.tsv was not found.')
                 continue
 
             input_file = f'{self.data_dir}/{mode}.tsv'
@@ -1220,27 +1225,28 @@ def __init__(self, dataset_name, data_dir, do_lower_case):
 
             infold = input_file[:input_file.rfind('/')]
 
-            logger.info(f'Three most popular classes during {mode}ing')
+            nemo.logging.info(f'Three most popular classes during {mode}ing')
             total_sents, sent_label_freq = get_label_stats(
                 raw_sentences, infold + f'/{mode}_sentence_stats.tsv')
 
             if mode == 'train':
 
                 self.class_weights = calc_class_weights(sent_label_freq)
-                logger.info(f'Class weights are - {self.class_weights}')
+                nemo.logging.info(f'Class weights are - {self.class_weights}')
 
-            logger.info(f'Total Sentences - {total_sents}')
-            logger.info(f'Sentence class frequencies - {sent_label_freq}')
+            nemo.logging.info(f'Total Sentences - {total_sents}')
+            nemo.logging.info(
+                f'Sentence class frequencies - {sent_label_freq}')
 
 
 def create_vocab_lm(data_dir, do_lower_case):
     if if_exist(data_dir, ['train.txt', 'vocab.txt']):
-        logger.info("Vocabulary has been created.")
+        nemo.logging.info("Vocabulary has been created.")
         with open(os.path.join(data_dir, 'vocab.txt'), 'r') as f:
             vocab_size = len(f.readlines())
         return vocab_size
 
-    logger.info(f'Creating vocabulary from training data at {data_dir}')
+    nemo.logging.info(f'Creating vocabulary from training data at {data_dir}')
 
     with open(f'{data_dir}/train.txt', 'r') as f:
         txt = f.read()
@@ -1260,15 +1266,15 @@ def create_vocab_lm(data_dir, do_lower_case):
     with open(f'{data_dir}/vocab.txt', 'w') as f:
         for word in sorted(vocab.keys()):
             f.write(word + '\n')
-    logger.info(f"Created vocabulary of size {len(vocab)}")
+    nemo.logging.info(f"Created vocabulary of size {len(vocab)}")
 
     return len(vocab)
 
 
 def download_wkt2(data_dir):
     os.makedirs('data/lm', exist_ok=True)
-    logger.warning(f'Data not found at {data_dir}. '
-                   f'Download {dataset_name} to data/lm')
+    nemo.logging.warning(f'Data not found at {data_dir}. '
+                         f'Download {dataset_name} to data/lm')
     data_dir = 'data/lm/wikitext-2'
     subprocess.call('scripts/get_wkt2.sh')
     return data_dir
@@ -1282,9 +1288,10 @@ def __init__(self, dataset_name, data_dir, do_lower_case):
             self.vocab_size = create_vocab_lm(data_dir, do_lower_case)
             self.data_dir = data_dir
         else:
-            logger.info("Looks like you passed a dataset name that isn't "
-                        "already supported by NeMo. Please make sure that "
-                        "you build the preprocessing method for it.")
+            nemo.logging.info(
+                "Looks like you passed a dataset name that isn't "
+                "already supported by NeMo. Please make sure that "
+                "you build the preprocessing method for it.")
 
 
 def create_vocab_mlm(data_dir,
@@ -1296,15 +1303,16 @@ def create_vocab_mlm(data_dir,
     vocab = special_tokens[:]
     bert_dir = f'{data_dir}/bert'
     if if_exist(bert_dir, ['tokenizer.model']):
-        logger.info(DATABASE_EXISTS_TMP.format('WikiText_BERT', bert_dir))
+        nemo.logging.info(
+            DATABASE_EXISTS_TMP.format('WikiText_BERT', bert_dir))
         return data_dir, f'{bert_dir}/tokenizer.model'
-    logger.info(f'Processing WikiText dataset and store at {bert_dir}')
+    nemo.logging.info(f'Processing WikiText dataset and store at {bert_dir}')
     os.makedirs(bert_dir, exist_ok=True)
 
     if not train_file:
         files = glob.glob(f'{data_dir}/*.txt')
         train_file = f'{bert_dir}/merged.txt'
-        logger.info(f"Merging {len(files)} txt files into {train_file}")
+        nemo.logging.info(f"Merging {len(files)} txt files into {train_file}")
 
         with open(train_file, "w") as merged:
             for file in tqdm(files):
@@ -1360,9 +1368,10 @@ def __init__(self,
                 special_tokens,
                 train_file)
         else:
-            logger.info("Looks like you passed a dataset name that isn't "
-                        "already supported by NeMo. Please make sure that "
-                        "you build the preprocessing method for it.")
+            nemo.logging.info(
+                "Looks like you passed a dataset name that isn't "
+                "already supported by NeMo. Please make sure that "
+                "you build the preprocessing method for it.")
 
         self.train_file = f'{data_dir}/train.txt'
         self.eval_file = f'{data_dir}/valid.txt'
@@ -1430,7 +1439,7 @@ class MrpcProcessor(DataProcessor):
 
     def get_train_examples(self, data_dir):
         """See base class."""
-        logger.info(f'LOOKING AT {os.path.join(data_dir, "train.tsv")}')
+        nemo.logging.info(f'LOOKING AT {os.path.join(data_dir, "train.tsv")}')
         return self._create_examples(
             self._read_tsv(os.path.join(data_dir, "train.tsv")), "train")
 
diff --git a/collections/nemo_nlp/nemo_nlp/utils/callbacks/bert_pretraining.py b/collections/nemo_nlp/nemo_nlp/utils/callbacks/bert_pretraining.py
index 261339bfe656..1b0c1bcbf3b7 100644
--- a/collections/nemo_nlp/nemo_nlp/utils/callbacks/bert_pretraining.py
+++ b/collections/nemo_nlp/nemo_nlp/utils/callbacks/bert_pretraining.py
@@ -1,6 +1,7 @@
 # Copyright (c) 2019 NVIDIA Corporation
 __all__ = ['eval_iter_callback', 'eval_epochs_done_callback']
 
+import nemo
 import numpy as np
 
 
@@ -22,14 +23,16 @@ def eval_iter_callback(tensors, global_vars):
 def eval_epochs_done_callback(global_vars):
     if 'dev_mlm_loss' in global_vars:
         mlm_loss = np.mean(global_vars["dev_mlm_loss"])
-        print("Dev MLM perplexity: {0}".format(np.round(np.exp(mlm_loss), 3)))
+        nemo.logging.info(
+            "Dev MLM perplexity: {0}".format(np.round(np.exp(mlm_loss), 3)))
         global_vars["dev_mlm_loss"] = []
     else:
         mlm_loss = -123.0
 
     if 'dev_nsp_loss' in global_vars:
         nsp_loss = np.mean(global_vars["dev_nsp_loss"])
-        print("Dev NSP perplexity: {0}".format(np.round(np.exp(nsp_loss), 3)))
+        nemo.logging.info(
+            "Dev NSP perplexity: {0}".format(np.round(np.exp(nsp_loss), 3)))
         global_vars["dev_nsp_loss"] = []
     else:
         nsp_loss = -123.0
diff --git a/collections/nemo_nlp/nemo_nlp/utils/callbacks/glue.py b/collections/nemo_nlp/nemo_nlp/utils/callbacks/glue.py
index deba55d7a7b2..e1749f1279b5 100644
--- a/collections/nemo_nlp/nemo_nlp/utils/callbacks/glue.py
+++ b/collections/nemo_nlp/nemo_nlp/utils/callbacks/glue.py
@@ -24,14 +24,11 @@
 import os
 import random
 
+import nemo
 import numpy as np
 from scipy.stats import pearsonr, spearmanr
 from sklearn.metrics import matthews_corrcoef, f1_score
 
-from nemo.utils.exp_logging import get_logger
-
-logger = get_logger('')
-
 
 def eval_iter_callback(tensors, global_vars):
     if "all_preds" not in global_vars.keys():
@@ -80,9 +77,9 @@ def eval_epochs_done_callback(global_vars, output_dir, task_name):
     if preds.shape[0] > 21:
         i = random.randint(0, preds.shape[0] - 21)
 
-    logger.info("Task name: %s" % task_name.upper())
-    logger.info("Sampled preds: [%s]" % list2str(preds[i:i+20]))
-    logger.info("Sampled labels: [%s]" % list2str(labels[i:i+20]))
+    nemo.logging.info("Task name: %s" % task_name.upper())
+    nemo.logging.info("Sampled preds: [%s]" % list2str(preds[i:i+20]))
+    nemo.logging.info("Sampled labels: [%s]" % list2str(labels[i:i+20]))
 
     results = compute_metrics(task_name, preds, labels)
 
@@ -91,7 +88,7 @@ def eval_epochs_done_callback(global_vars, output_dir, task_name):
         f.write('labels\t' + list2str(labels) + '\n')
         f.write('preds\t' + list2str(preds) + '\n')
 
-    logger.info(results)
+    nemo.logging.info(results)
 
     return results
 
diff --git a/collections/nemo_nlp/nemo_nlp/utils/callbacks/joint_intent_slot.py b/collections/nemo_nlp/nemo_nlp/utils/callbacks/joint_intent_slot.py
index f81127a77437..4ada1d2ba5b8 100644
--- a/collections/nemo_nlp/nemo_nlp/utils/callbacks/joint_intent_slot.py
+++ b/collections/nemo_nlp/nemo_nlp/utils/callbacks/joint_intent_slot.py
@@ -6,17 +6,13 @@
 
 import matplotlib
 from matplotlib import pyplot as plt
+import nemo
 import numpy as np
 from sklearn.metrics import confusion_matrix, classification_report
 
-from nemo.utils.exp_logging import get_logger
-
 __all__ = ['eval_iter_callback', 'eval_epochs_done_callback']
 
 
-logger = get_logger('')
-
-
 def tensor2list(tensor):
     return tensor.detach().cpu().tolist()
 
@@ -92,12 +88,13 @@ def eval_epochs_done_callback(global_vars, graph_fold):
     i = 0
     if intent_preds.shape[0] > 21:
         i = random.randint(0, intent_preds.shape[0] - 21)
-    logger.info("Sampled i_preds: [%s]" % list2str(intent_preds[i:i+20]))
-    logger.info("Sampled intents: [%s]" % list2str(intent_labels[i:i+20]))
-    logger.info("Sampled s_preds: [%s]" % list2str(slot_preds[i:i+20]))
-    logger.info("Sampled slots: [%s]" % list2str(slot_labels[i:i+20]))
+    nemo.logging.info("Sampled i_preds: [%s]" % list2str(intent_preds[i:i+20]))
+    nemo.logging.info(
+        "Sampled intents: [%s]" % list2str(intent_labels[i:i+20]))
+    nemo.logging.info("Sampled s_preds: [%s]" % list2str(slot_preds[i:i+20]))
+    nemo.logging.info("Sampled slots: [%s]" % list2str(slot_labels[i:i+20]))
     cm = confusion_matrix(intent_labels, intent_preds)
-    logger.info(f'Confusion matrix:\n{cm}')
+    nemo.logging.info(f'Confusion matrix:\n{cm}')
     fig = plt.figure()
     ax = fig.add_subplot(111)
     cax = ax.matshow(cm)
@@ -108,17 +105,17 @@ def eval_epochs_done_callback(global_vars, graph_fold):
     os.makedirs(graph_fold, exist_ok=True)
     plt.savefig(os.path.join(graph_fold, time.strftime('%Y%m%d-%H%M%S')))
 
-    logger.info('Intent prediction results')
+    nemo.logging.info('Intent prediction results')
     correct_preds = sum(intent_labels == intent_preds)
     intent_accuracy = correct_preds / intent_labels.shape[0]
-    logger.info(f'Intent accuracy: {intent_accuracy}')
-    logger.info(f'Classification report:\n \
+    nemo.logging.info(f'Intent accuracy: {intent_accuracy}')
+    nemo.logging.info(f'Classification report:\n \
         {classification_report(intent_labels, intent_preds)}')
 
-    logger.info('Slot prediction results')
+    nemo.logging.info('Slot prediction results')
     slot_accuracy = sum(slot_labels == slot_preds) / slot_labels.shape[0]
-    logger.info(f'Slot accuracy: {slot_accuracy}')
-    logger.info(f'Classification report:\n \
+    nemo.logging.info(f'Slot accuracy: {slot_accuracy}')
+    nemo.logging.info(f'Classification report:\n \
         {classification_report(slot_labels[:-2], slot_preds[:-2])}')
 
     return dict({'intent_accuracy': intent_accuracy,
diff --git a/collections/nemo_nlp/nemo_nlp/utils/callbacks/language_modeling.py b/collections/nemo_nlp/nemo_nlp/utils/callbacks/language_modeling.py
index fdcfb03e2a76..48ca507482f7 100644
--- a/collections/nemo_nlp/nemo_nlp/utils/callbacks/language_modeling.py
+++ b/collections/nemo_nlp/nemo_nlp/utils/callbacks/language_modeling.py
@@ -1,17 +1,13 @@
 # Copyright (c) 2019 NVIDIA Corporation
 __all__ = ['eval_iter_callback', 'eval_epochs_done_callback']
 
+import nemo
 import numpy as np
 
-from nemo.utils.exp_logging import get_logger
-
 GLOBAL_KEYS = ["eval_loss", "sys"]
 
-logger = get_logger('')
-
 
 def eval_iter_callback(tensors, global_vars):
-
     for key in GLOBAL_KEYS:
         if key not in global_vars.keys():
             global_vars[key] = []
@@ -26,10 +22,10 @@ def eval_epochs_done_callback(global_vars):
     eval_loss = np.mean(global_vars["eval_loss"])
     eval_ppl = np.exp(eval_loss)
 
-    logger.info("------------------------------------------------------------")
-    logger.info("Eval loss: {0}".format(np.round(eval_loss, 3)))
-    logger.info("Eval  ppl: {0}".format(np.round(eval_ppl, 3)))
-    logger.info("------------------------------------------------------------")
+    nemo.logging.info("------------------------------------------------------")
+    nemo.logging.info("Eval loss: {0}".format(np.round(eval_loss, 3)))
+    nemo.logging.info("Eval  ppl: {0}".format(np.round(eval_ppl, 3)))
+    nemo.logging.info("------------------------------------------------------")
     for key in GLOBAL_KEYS:
         global_vars[key] = []
     return dict({"Eval_loss": eval_loss, "Eval_ppl": eval_ppl})
diff --git a/collections/nemo_nlp/nemo_nlp/utils/callbacks/punctuation_capitalization.py b/collections/nemo_nlp/nemo_nlp/utils/callbacks/punctuation_capitalization.py
index 4ae0dc76650f..b6c5e6b0e9e8 100644
--- a/collections/nemo_nlp/nemo_nlp/utils/callbacks/punctuation_capitalization.py
+++ b/collections/nemo_nlp/nemo_nlp/utils/callbacks/punctuation_capitalization.py
@@ -3,15 +3,12 @@
 
 import random
 
+import nemo
 import numpy as np
 from sklearn.metrics import classification_report
 
 from nemo_nlp.data.datasets.utils import list2str, tensor2list
 from nemo_nlp.utils.nlp_utils import plot_confusion_matrix
-from nemo.utils.exp_logging import get_logger
-
-
-logger = get_logger('')
 
 
 def eval_iter_callback(tensors, global_vars):
@@ -112,22 +109,24 @@ def _eval_epochs_done_callback(task_name,
     preds = preds[subtokens_mask]
 
     accuracy = sum(labels == preds) / labels.shape[0]
-    logger.info(f'Accuracy for task {task_name}: {accuracy}')
+    nemo.logging.info(f'Accuracy for task {task_name}: {accuracy}')
 
     # print predictions and labels for a small random subset of data
     sample_size = 20
     i = 0
     if preds.shape[0] > sample_size + 1:
         i = random.randint(0, preds.shape[0] - sample_size - 1)
-    logger.info("Sampled preds: [%s]" % list2str(preds[i:i+sample_size]))
-    logger.info("Sampled labels: [%s]" % list2str(labels[i:i+sample_size]))
+    nemo.logging.info("Sampled preds: [%s]" % list2str(preds[i:i+sample_size]))
+    nemo.logging.info(
+        "Sampled labels: [%s]" % list2str(labels[i:i+sample_size]))
 
     # remove labels from label_ids that don't appear in the dev set
     used_labels = set(labels) | set(preds)
     label_ids = \
         {k: label_ids[k] for k, v in label_ids.items() if v in used_labels}
 
-    logger.info(classification_report(labels, preds, target_names=label_ids))
+    nemo.logging.info(
+        classification_report(labels, preds, target_names=label_ids))
 
     # calculate and plot confusion_matrix
     if graph_fold:
diff --git a/collections/nemo_nlp/nemo_nlp/utils/callbacks/sentence_classification.py b/collections/nemo_nlp/nemo_nlp/utils/callbacks/sentence_classification.py
index f6d0e6496d61..959fb8f6fd99 100644
--- a/collections/nemo_nlp/nemo_nlp/utils/callbacks/sentence_classification.py
+++ b/collections/nemo_nlp/nemo_nlp/utils/callbacks/sentence_classification.py
@@ -5,15 +5,11 @@
 import random
 import time
 
-import logging
-
 import matplotlib
 from matplotlib import pyplot as plt  # nopep8
 import numpy as np  # nopep8
 from sklearn.metrics import confusion_matrix, classification_report  # nopep8
 
-logger = logging.getLogger('log')
-
 __all__ = ['eval_iter_callback', 'eval_epochs_done_callback']
 
 
@@ -52,12 +48,12 @@ def eval_epochs_done_callback(global_vars, graph_fold):
     labels = np.asarray(global_vars['all_labels'])
     preds = np.asarray(global_vars['all_preds'])
     accuracy = sum(labels == preds) / labels.shape[0]
-    logger.info(f'Accuracy: {accuracy}')
+    nemo.logging.info(f'Accuracy: {accuracy}')
     i = 0
     if preds.shape[0] > 21:
         i = random.randint(0, preds.shape[0] - 21)
-    logger.info("Sampled preds: [%s]" % list2str(preds[i:i+20]))
-    logger.info("Sampled labels: [%s]" % list2str(labels[i:i+20]))
+    nemo.logging.info("Sampled preds: [%s]" % list2str(preds[i:i+20]))
+    nemo.logging.info("Sampled labels: [%s]" % list2str(labels[i:i+20]))
     cm = confusion_matrix(labels, preds)
     fig = plt.figure()
     ax = fig.add_subplot(111)
@@ -69,6 +65,6 @@ def eval_epochs_done_callback(global_vars, graph_fold):
     os.makedirs(graph_fold, exist_ok=True)
     plt.savefig(os.path.join(graph_fold, time.strftime('%Y%m%d-%H%M%S')))
 
-    logger.info(classification_report(labels, preds))
+    nemo.logging.info(classification_report(labels, preds))
 
     return dict({"accuracy": accuracy})
diff --git a/collections/nemo_nlp/nemo_nlp/utils/callbacks/token_classification.py b/collections/nemo_nlp/nemo_nlp/utils/callbacks/token_classification.py
index 90c7b74a1868..a12a2f74f77d 100644
--- a/collections/nemo_nlp/nemo_nlp/utils/callbacks/token_classification.py
+++ b/collections/nemo_nlp/nemo_nlp/utils/callbacks/token_classification.py
@@ -6,12 +6,9 @@
 import numpy as np
 from sklearn.metrics import classification_report
 
+import nemo
 from nemo_nlp.data.datasets.utils import list2str, tensor2list
 from nemo_nlp.utils.nlp_utils import plot_confusion_matrix
-from nemo.utils.exp_logging import get_logger
-
-
-logger = get_logger('')
 
 
 def eval_iter_callback(tensors, global_vars):
@@ -60,22 +57,24 @@ def eval_epochs_done_callback(global_vars,
     preds = preds[subtokens_mask]
 
     accuracy = sum(labels == preds) / labels.shape[0]
-    logger.info(f'Accuracy: {accuracy}')
+    nemo.logging.info(f'Accuracy: {accuracy}')
 
     # print predictions and labels for a small random subset of data
     sample_size = 20
     i = 0
     if preds.shape[0] > sample_size + 1:
         i = random.randint(0, preds.shape[0] - sample_size - 1)
-    logger.info("Sampled preds: [%s]" % list2str(preds[i:i+sample_size]))
-    logger.info("Sampled labels: [%s]" % list2str(labels[i:i+sample_size]))
+    nemo.logging.info("Sampled preds: [%s]" % list2str(preds[i:i+sample_size]))
+    nemo.logging.info(
+        "Sampled labels: [%s]" % list2str(labels[i:i+sample_size]))
 
     # remove labels from label_ids that don't appear in the dev set
     used_labels = set(labels) | set(preds)
     label_ids = \
         {k: label_ids[k] for k, v in label_ids.items() if v in used_labels}
 
-    logger.info(classification_report(labels, preds, target_names=label_ids))
+    nemo.logging.info(
+        classification_report(labels, preds, target_names=label_ids))
 
     # calculate and plot confusion_matrix
     if graph_fold:
diff --git a/collections/nemo_nlp/nemo_nlp/utils/nlp_utils.py b/collections/nemo_nlp/nemo_nlp/utils/nlp_utils.py
index 8baf9f5b9c95..04fab5de7e3f 100644
--- a/collections/nemo_nlp/nemo_nlp/utils/nlp_utils.py
+++ b/collections/nemo_nlp/nemo_nlp/utils/nlp_utils.py
@@ -2,13 +2,10 @@
 import time
 
 from matplotlib import pyplot as plt
+import nemo
 import numpy as np
 from sklearn.metrics import confusion_matrix
 
-from nemo.utils.exp_logging import get_logger
-
-logger = get_logger('')
-
 
 def mask_padded_tokens(tokens, pad_id):
     mask = (tokens != pad_id)
@@ -29,11 +26,11 @@ def read_intent_slot_outputs(queries,
     pred_slots = np.argmax(slot_logits, axis=2)
     slot_masks = slot_masks > 0.5
     for i, query in enumerate(queries):
-        logger.info(f'Query: {query}')
+        nemo.logging.info(f'Query: {query}')
         pred = pred_intents[i]
-        logger.info(f'Predicted intent:\t{pred}\t{intent_dict[pred]}')
+        nemo.logging.info(f'Predicted intent:\t{pred}\t{intent_dict[pred]}')
         if intents is not None:
-            logger.info(
+            nemo.logging.info(
                 f'True intent:\t{intents[i]}\t{intent_dict[intents[i]]}')
 
         pred_slot = pred_slots[i][slot_masks[i]]
@@ -46,7 +43,7 @@ def read_intent_slot_outputs(queries,
             output = f'{token}\t{slot_dict[pred_slot[j]]}'
             if slots is not None:
                 output = f'{output}\t{slot_dict[slots[i][j]]}'
-            logger.info(output)
+            nemo.logging.info(output)
 
 
 def get_vocab(file):
diff --git a/nemo/nemo/__init__.py b/nemo/nemo/__init__.py
index 7d322052dc6e..ed63f759f04f 100644
--- a/nemo/nemo/__init__.py
+++ b/nemo/nemo/__init__.py
@@ -21,4 +21,4 @@
 
 __version__ = "0.9.0"
 
-logging = logging.getLogger()
+logging = logging.getLogger(__name__)
diff --git a/nemo/nemo/backends/pytorch/actions.py b/nemo/nemo/backends/pytorch/actions.py
index 2e76c50c3f6c..f1f0a2f7d6eb 100644
--- a/nemo/nemo/backends/pytorch/actions.py
+++ b/nemo/nemo/backends/pytorch/actions.py
@@ -13,8 +13,9 @@
 import torch.distributed as dist
 import torch.nn as nn
 import torch.optim as optim
-from nemo.backends.pytorch.nm import TrainableNM
 
+import nemo
+from nemo.backends.pytorch.nm import TrainableNM
 from .module_wrapper import TrainableNeuralModuleWrapper
 from .nm import DataLayerNM
 from .optimizers import Novograd, AdamW, master_params
@@ -93,8 +94,7 @@ def __init__(
         super(PtActions, self).__init__(
             local_rank=local_rank,
             global_rank=global_rank,
-            optimization_level=optimization_level,
-            logger=logger)
+            optimization_level=optimization_level)
 
         # will be [unique_instance_id -> (NMModule, PTModule)]
         self.module_reference_table = {}
@@ -369,7 +369,7 @@ def __setup_optimizer(optimizer_instance,
                     "Unknown optimizer class: {0}".format(optimizer_class))
 
             if optimization_params.get("larc", False):
-                self.logger.info("Enabling larc")
+                nemo.logging.info("Enabling larc")
                 optimizer = LARC(
                     optimizer,
                     trust_coefficient=optimization_params.get("larc_eta", 2e-2)
@@ -555,7 +555,7 @@ def _eval(self, tensors_2_evaluate, callback, step, verbose=False):
                 assert dist.is_initialized()
                 is_distributed = True
                 world_size = torch.distributed.get_world_size()
-                # self.logger.info(
+                # nemo.logging.info(
                 #     "Doing distributed evaluation. Rank {0} of {1}".format(
                 #         self.local_rank, world_size
                 #     )
@@ -608,7 +608,7 @@ def _eval(self, tensors_2_evaluate, callback, step, verbose=False):
                         num_batches < 10 or (
                         epoch_i % int(num_batches / 10) == 0)
                 ):
-                    self.logger.info(
+                    nemo.logging.info(
                         f"Evaluating batch {epoch_i} out of {num_batches}")
                 tensors = []
                 if isinstance(data, torch.Tensor):
@@ -636,7 +636,7 @@ def _eval(self, tensors_2_evaluate, callback, step, verbose=False):
                 for t2e in tensors_2_evaluate:
                     key = t2e.unique_name
                     if key not in registered_e_tensors.keys():
-                        self.logger.info(
+                        nemo.logging.info(
                             "WARNING: Tensor {} was not found during "
                             "eval".format(
                                 key)
@@ -756,7 +756,7 @@ def _infer(self,
                 assert dist.is_initialized()
                 is_distributed = True
                 world_size = torch.distributed.get_world_size()
-                # self.logger.info(
+                # nemo.logging.info(
                 #     "Doing distributed evaluation. Rank {0} of {1}".format(
                 #         self.local_rank, world_size
                 #     )
@@ -819,7 +819,7 @@ def _infer(self,
                         num_batches < 10 or (
                         epoch_i % int(num_batches / 10) == 0)
                 ):
-                    self.logger.info(
+                    nemo.logging.info(
                         f"Evaluating batch {epoch_i} out of {num_batches}")
                 tensors = []
                 if use_cache:
@@ -864,7 +864,7 @@ def _infer(self,
                 for t2e in tensors_to_return:
                     key = t2e.unique_name
                     if key not in registered_e_tensors.keys():
-                        self.logger.info(
+                        nemo.logging.info(
                             "WARNING: Tensor {} was not found during "
                             "eval".format(
                                 key)
@@ -1337,7 +1337,7 @@ def train(self,
             #     raise NotImplementedError(
             #         "Distributed training does nor work with multiple "
             #         "optimizers")
-            self.logger.info("Doing distributed training")
+            nemo.logging.info("Doing distributed training")
             if t_dataset is not None:
                 train_sampler = \
                     torch.utils.data.distributed.DistributedSampler(
@@ -1489,7 +1489,7 @@ def train(self,
                             registered_tensors[tensor.unique_name]).any():
                         if stop_on_nan_loss:
                             raise ValueError('Loss is NaN or inf - exiting')
-                        self.logger.warning('WARNING: Loss is NaN or inf')
+                        nemo.logging.warning('WARNING: Loss is NaN or inf')
                         curr_optimizer.zero_grad()
                         nan = True
                         break
@@ -1508,7 +1508,7 @@ def train(self,
                             if stop_on_nan_loss:
                                 raise ValueError('Loss is NaN or inf -'
                                                  ' exiting')
-                            self.logger.warning('WARNING: Loss is NaN or inf')
+                            nemo.logging.warning('WARNING: Loss is NaN or inf')
                             curr_optimizer.zero_grad()
                             continue
                         scaled_loss.backward(
diff --git a/nemo/nemo/core/callbacks.py b/nemo/nemo/core/callbacks.py
index 3f19a57eaad7..4695971c22de 100644
--- a/nemo/nemo/core/callbacks.py
+++ b/nemo/nemo/core/callbacks.py
@@ -5,8 +5,11 @@
 import os
 import sys
 import time
+import warnings
 
-from ..utils import get_logger, get_checkpoint_from_dir
+import nemo
+
+from ..utils import get_checkpoint_from_dir
 
 
 class ActionCallback(ABC):
@@ -47,10 +50,9 @@ def action(self, action_obj):
 
     @property
     def logger(self):
-        if self.action is None or self.action.logger is None:
-            return get_logger('')
-        else:
-            return self.action.logger
+        warnings.warn("This will be deprecated in future releases. Please use "
+                      "nemo.logging instead")
+        return nemo.logging
 
     def on_action_start(self):
         pass
@@ -106,9 +108,9 @@ def on_iteration_end(self):
                     file_name = fn
                 else:
                     file_name = os.path.join(self._folder, fn)
-                self.logger.info(f"Saving module {class_name} in {file_name}")
+                nemo.logging.info(f"Saving module {class_name} in {file_name}")
                 m.save_to(file_name)
-                self.logger.info("Saved.")
+                nemo.logging.info("Saved.")
             self._saved_ckpts.append(f'-{self.step}.pt')
             if len(self._saved_ckpts) > self._ckpt2keep:
                 for end in self._saved_ckpts[:-self._ckpt2keep]:
@@ -127,9 +129,9 @@ def on_action_end(self):
                     file_name = fn
                 else:
                     file_name = os.path.join(self._folder, fn)
-                self.logger.info(f"Saving module {class_name} in {file_name}")
+                nemo.logging.info(f"Saving module {class_name} in {file_name}")
                 m.save_to(file_name)
-                self.logger.info("Saved.")
+                nemo.logging.info("Saved.")
 
 
 class SimpleLossLoggerCallback(ActionCallback):
@@ -165,25 +167,25 @@ def tensors(self):
 
     def on_action_start(self):
         if self.global_rank is None or self.global_rank == 0:
-            self.logger.info("Starting .....")
+            nemo.logging.info("Starting .....")
             self._start_time = time.time()
 
     def on_action_end(self):
         if self.global_rank is None or self.global_rank == 0:
             if self._swriter is not None:
                 self._swriter.close()
-            self.logger.info(f"Done in {time.time() - self._start_time}")
+            nemo.logging.info(f"Done in {time.time() - self._start_time}")
 
     def on_epoch_start(self):
         if self.global_rank is None or self.global_rank == 0:
-            self.logger.info(f"Starting epoch {self.epoch_num}")
+            nemo.logging.info(f"Starting epoch {self.epoch_num}")
             self._last_epoch_start = time.time()
 
     def on_epoch_end(self):
         if self.global_rank is None or self.global_rank == 0:
             step = self.step
             run_time = time.time() - self._last_epoch_start
-            self.logger.info(f"Finished epoch {self.epoch_num} in {run_time}")
+            nemo.logging.info(f"Finished epoch {self.epoch_num} in {run_time}")
             if self._swriter is not None:
                 value = self.epoch_num
                 self._swriter.add_scalar('misc/epoch', value, step)
@@ -203,7 +205,7 @@ def on_iteration_end(self):
                     for t in self.tensors
                 ]
 
-                self.logger.info(f"Step: {step}")
+                nemo.logging.info(f"Step: {step}")
                 if self._print_func:
                     self._print_func(tensor_values)
                 sys.stdout.flush()
@@ -219,7 +221,7 @@ def on_iteration_end(self):
                     run_time = time.time() - self._last_iter_start
                     self._swriter.add_scalar('misc/step_time', run_time, step)
                 run_time = time.time() - self._last_iter_start
-                self.logger.info(f"Step time: {run_time} seconds")
+                nemo.logging.info(f"Step time: {run_time} seconds")
 
 
 class CheckpointCallback(ActionCallback):
@@ -232,13 +234,13 @@ def __init__(self, folder, load_from_folder=None, step_freq=-1,
                  epoch_freq=-1, checkpoints_to_keep=4, force_load=False):
         super().__init__()
         if step_freq == -1 and epoch_freq == -1:
-            self.logger.warning(
+            nemo.logging.warning(
                 "No checkpoints will be saved because step_freq and "
                 "epoch_freq are both -1."
             )
 
         if step_freq > -1 and epoch_freq > -1:
-            self.logger.warning(
+            nemo.logging.warning(
                 "You config the model to save by both steps and epochs. "
                 "Save by step_freq only"
             )
@@ -258,7 +260,7 @@ def __save_to(self, path):
         if self.global_rank is not None and self.global_rank != 0:
             return
         if not os.path.isdir(path):
-            self.logger.info(f"Creating {path} folder")
+            nemo.logging.info(f"Creating {path} folder")
             os.makedirs(path, exist_ok=True)
         unique_mod_names = set()
         for module in self.action.modules:
@@ -288,16 +290,16 @@ def __save_to(self, path):
                 for file in glob.glob(f'{path}/*{end}'):
                     os.remove(file)
             self._saved_ckpts = self._saved_ckpts[-self._ckpt2keep:]
-        self.logger.info(f'Saved checkpoint: {path}/{filename}')
+        nemo.logging.info(f'Saved checkpoint: {path}/{filename}')
 
     def __restore_from(self, path):
         if not os.path.isdir(path):
             if self._force_load:
                 raise ValueError("force_load was set to True for checkpoint "
                                  "callback but a checkpoint was not found.")
-            self.logger.warning(f"Checkpoint folder {path} not found!")
+            nemo.logging.warning(f"Checkpoint folder {path} not found!")
         else:
-            self.logger.info(f"Restoring checkpoint from folder {path} ...")
+            nemo.logging.info(f"Restoring checkpoint from folder {path} ...")
             modules_to_restore = []
             modules_to_restore_name = []
             for module in self.action.modules:
@@ -317,8 +319,8 @@ def __restore_from(self, path):
                     raise ValueError(
                         "force_load was set to True for checkpoint callback"
                         "but a checkpoint was not found.")
-                self.logger.warning(e)
-                self.logger.warning(
+                nemo.logging.warning(e)
+                nemo.logging.warning(
                     f"Checkpoint folder {path} present but did not restore")
                 return
 
@@ -328,8 +330,8 @@ def __restore_from(self, path):
                 for tr, checkpoint in zip([self.action], trainer_checkpoints):
                     tr.restore_state_from(checkpoint)
             except (BaseException, ValueError) as e:
-                self.logger.warning(e)
-                self.logger.warning("Trainer state wasn't restored")
+                nemo.logging.warning(e)
+                nemo.logging.warning("Trainer state wasn't restored")
                 return
 
     def on_action_start(self):
@@ -343,11 +345,11 @@ def on_action_start(self):
                         "overwrite __str__() of one of the modules.")
                 unique_mod_names.add(str(module))
                 num_parameters += module.num_weights
-        self.logger.info(f"Found {len(unique_mod_names)} modules with "
-                         f"weights:")
+        nemo.logging.info(f"Found {len(unique_mod_names)} modules with "
+                          f"weights:")
         for name in unique_mod_names:
-            self.logger.info(f"{name}")
-        self.logger.info(f"Total model parameters: {num_parameters}")
+            nemo.logging.info(f"{name}")
+        nemo.logging.info(f"Total model parameters: {num_parameters}")
         self.__restore_from(path=self._load_from_folder)
 
     def on_iteration_end(self):
@@ -366,7 +368,7 @@ def on_epoch_end(self):
         if self._epoch_freq > 0:
             if self.global_rank is None or self.global_rank == 0:
                 run_time = time.time() - self._last_epoch_start
-                self.logger.info(
+                nemo.logging.info(
                     f'Finished epoch {self.epoch_num} in {run_time}')
                 if (self.epoch_num + 1) % self._epoch_freq == 0:
                     self.__save_to(path=self._folder)
@@ -430,22 +432,22 @@ def on_iteration_end(self):
         step = self.step
         if step % self._eval_frequency == 0:
             if self.global_rank == 0 or self.global_rank is None:
-                self.logger.info('Doing Evaluation ' + '.' * 30)
+                nemo.logging.info('Doing Evaluation ' + '.' * 30)
             start_time = time.time()
             self.action._eval(self._eval_tensors, self, step)
             elapsed_time = time.time() - start_time
             if self.global_rank == 0 or self.global_rank is None:
-                self.logger.info(f'Evaluation time: {elapsed_time} seconds')
+                nemo.logging.info(f'Evaluation time: {elapsed_time} seconds')
 
     def on_action_end(self):
         step = self.step
         if self.global_rank == 0 or self.global_rank is None:
-            self.logger.info('Final Evaluation ' + '.' * 30)
+            nemo.logging.info('Final Evaluation ' + '.' * 30)
         start_time = time.time()
         self.action._eval(self._eval_tensors, self, step)
         elapsed_time = time.time() - start_time
         if self.global_rank == 0 or self.global_rank is None:
-            self.logger.info(f'Evaluation time: {elapsed_time} seconds')
+            nemo.logging.info(f'Evaluation time: {elapsed_time} seconds')
 
     def clear_global_var_dict(self):
         self._global_var_dict = {}
diff --git a/nemo/nemo/core/neural_factory.py b/nemo/nemo/core/neural_factory.py
index c6a6f653b112..1c0a6f409b52 100644
--- a/nemo/nemo/core/neural_factory.py
+++ b/nemo/nemo/core/neural_factory.py
@@ -10,13 +10,15 @@
 from abc import ABC, abstractmethod
 import random
 from typing import List, Optional
+import warnings
 
 from enum import Enum
 import numpy as np
 
+import nemo
 from .callbacks import ActionCallback, EvaluatorCallback
 from .neural_types import *
-from ..utils import ExpManager, get_logger
+from ..utils import ExpManager
 
 
 class DeploymentFormat(Enum):
@@ -66,16 +68,12 @@ def __init__(
             self,
             local_rank,
             global_rank,
-            optimization_level=Optimization.mxprO0,
-            logger=None):
+            optimization_level=Optimization.mxprO0):
         self._local_rank = local_rank
         self._global_rank = global_rank
         self._optim_level = optimization_level
         self.step = None
         self.epoch_num = None
-        self.logger = logger
-        if logger is None:
-            self.logger = get_logger('')
 
     @property
     def local_rank(self):
@@ -670,8 +668,7 @@ def deployment_export(self,
             output=output,
             d_format=d_format,
             input_example=input_example,
-            output_example=output_example,
-            logger=self.logger,
+            output_example=output_example
         )
 
     def infer(self,
@@ -716,7 +713,6 @@ def infer(self,
             checkpoint_dir=checkpoint_dir,
             ckpt_pattern=ckpt_pattern,
             verbose=verbose,
-            logger=self.logger,
             cache=cache,
             use_cache=use_cache,
             offload_to_cpu=offload_to_cpu,
@@ -735,8 +731,7 @@ def _get_trainer(self, tb_writer=None):
                 local_rank=self._local_rank,
                 global_rank=self._global_rank,
                 tb_writer=tb_writer,
-                optimization_level=self._optim_level,
-                logger=self.logger)
+                optimization_level=self._optim_level)
             return instance
         else:
             raise ValueError("Only PyTorch backend is currently supported.")
@@ -805,7 +800,9 @@ def optim_level(self):
 
     @property
     def logger(self):
-        return self._exp_manager.logger
+        warnings.warn("This will be deprecated in future releases. Please use "
+                      "nemo.logging instead")
+        return nemo.logging
 
     @property
     def checkpoint_dir(self):
diff --git a/nemo/nemo/utils/exp_logging.py b/nemo/nemo/utils/exp_logging.py
index 96d41b7e2442..cb82afd8aba8 100644
--- a/nemo/nemo/utils/exp_logging.py
+++ b/nemo/nemo/utils/exp_logging.py
@@ -10,7 +10,7 @@
 import nemo
 
 
-def get_logger(name):
+def get_logger(unused):
     warnings.warn("This function will be deprecated in the future. You "
                   "can just use nemo.logging instead")
     return nemo.logging

From a11faf7087fdd34025b3a83accc7c18aad3948a8 Mon Sep 17 00:00:00 2001
From: Jason <jasoli@nvidia.com>
Date: Tue, 14 Jan 2020 17:35:45 -0800
Subject: [PATCH 04/10] update unittests

Signed-off-by: Jason <jasoli@nvidia.com>
---
 collections/nemo_asr/nemo_asr/parts/dataset.py |  2 +-
 nemo/nemo/utils/exp_logging.py                 | 16 ++++++++--------
 2 files changed, 9 insertions(+), 9 deletions(-)

diff --git a/collections/nemo_asr/nemo_asr/parts/dataset.py b/collections/nemo_asr/nemo_asr/parts/dataset.py
index ef29c83a6895..efcaac410944 100644
--- a/collections/nemo_asr/nemo_asr/parts/dataset.py
+++ b/collections/nemo_asr/nemo_asr/parts/dataset.py
@@ -264,7 +264,7 @@ def __init__(
                     text = line[split_idx:].strip()
                     if normalize:
                         text = ManifestEN.normalize_text(
-                            text, labels, logger=logger)
+                            text, labels)
                     dur = id2dur[utt_id] if id2dur else None
 
                     # Filter by duration if specified & utt2dur exists
diff --git a/nemo/nemo/utils/exp_logging.py b/nemo/nemo/utils/exp_logging.py
index cb82afd8aba8..52987000d703 100644
--- a/nemo/nemo/utils/exp_logging.py
+++ b/nemo/nemo/utils/exp_logging.py
@@ -154,7 +154,7 @@ def __init__(
         if self.ckpt_dir:
             self.make_dir(self.ckpt_dir, exist_ok)
 
-    def create_logger(self, name='', level=logging.INFO, log_file=True):
+    def create_logger(self, level=logging.INFO, log_file=True):
         logger = nemo.logging
         tmp = logging.Formatter('%(asctime)s - %(levelname)s - %(message)s')
 
@@ -194,20 +194,20 @@ def get_tb_writer(self, tb_dir=None, exist_ok=True):
                 self.tb_writer = SummaryWriter(self.tb_dir)
             except ImportError:
                 self.tb_writer = None
-                if self.logger is not None:
-                    self.logger.info('Not using TensorBoard.')
-                    self.logger.info('Install tensorboardX to use TensorBoard')
+                nemo.logging.info('Not using TensorBoard.')
+                nemo.logging.info('Install tensorboardX to use TensorBoard')
         return self.tb_writer
 
     def log_exp_info(self, params, print_everywhere=False):
         if print_everywhere or self.global_rank == 0:
-            self.logger.info("NEMO MODEL'S PARAMETERS")
+            nemo.logging.info("NEMO MODEL'S PARAMETERS")
             for key in params:
-                self.logger.info(f'{key}\t{params[key]}')
-            self.logger.info(f'Experiment output is stored in {self.work_dir}')
+                nemo.logging.info(f'{key}\t{params[key]}')
+            nemo.logging.info(
+                f'Experiment output is stored in {self.work_dir}')
 
     def reset_loggers(self):
-        self.logger.handlers = []
+        nemo.logging.handlers = []
 
 
 def get_git_hash():

From 00d3c84d4ea4e04c4f611996bf9395dd01224b96 Mon Sep 17 00:00:00 2001
From: Jason <jasoli@nvidia.com>
Date: Tue, 14 Jan 2020 17:55:34 -0800
Subject: [PATCH 05/10] fix issue with double printing

Signed-off-by: Jason <jasoli@nvidia.com>
---
 nemo/nemo/backends/pytorch/actions.py |  1 -
 nemo/nemo/backends/pytorch/nm.py      | 32 +++++++++++++--------------
 2 files changed, 16 insertions(+), 17 deletions(-)

diff --git a/nemo/nemo/backends/pytorch/actions.py b/nemo/nemo/backends/pytorch/actions.py
index f1f0a2f7d6eb..4f424b69142e 100644
--- a/nemo/nemo/backends/pytorch/actions.py
+++ b/nemo/nemo/backends/pytorch/actions.py
@@ -1236,7 +1236,6 @@ def train(self,
                 self.__get_top_sorted_modules_and_dataloader(
                     hook=tensors_to_optimize
                 )
-
             # Extract trainable weights which will be optimized
             params_list = [
                 p[0].parameters() for p in opt_call_chain
diff --git a/nemo/nemo/backends/pytorch/nm.py b/nemo/nemo/backends/pytorch/nm.py
index 3a19b1617fb0..b2b2693321b1 100644
--- a/nemo/nemo/backends/pytorch/nm.py
+++ b/nemo/nemo/backends/pytorch/nm.py
@@ -1,11 +1,11 @@
 # Copyright (c) 2019 NVIDIA Corporation
-import logging
 from abc import abstractmethod
 from typing import Dict, Set, Tuple, Optional, List
 
 import torch as t
 import torch.nn as nn
 
+import nemo
 from ...core import NeuralModule, DeviceType, WeightShareTransform
 from ...utils.helpers import rgetattr, rsetattr, get_cuda_device
 
@@ -193,7 +193,7 @@ class DataLayerNM(NeuralModule):
 
     def __init__(self, **kwargs):
         # if 'batch_size' not in kwargs:
-        #    logging.warning("No batch_size specified in the data layer. "
+        #    nemo.logging.warning("No batch_size specified in the data layer. "
         #                    "Setting batch_size to 1.")
         #    kwargs['batch_size'] = 1
         NeuralModule.__init__(self, **kwargs)  # For NeuralModule API
@@ -209,7 +209,7 @@ def input_ports(self):
         return {}
 
     def get_weights(self):
-        logging.warning(
+        nemo.logging.warning(
             "Data Layer does not have any weights to return. "
             "This get_weights call returns None."
         )
@@ -217,21 +217,21 @@ def get_weights(self):
 
     def set_weights(self, name2weight: Dict[(str, bool)],
                     name2name_and_transform):
-        logging.warning(
+        nemo.logging.warning(
             "Data Layer does not have any weights to set. "
             "This set_weights call is ignored."
         )
         return None
 
     def tie_weights_with(self, module, weight_names):
-        logging.warning(
+        nemo.logging.warning(
             "Data Layer does not have any weights to tie. "
             "This tie_weights_with call is ignored."
         )
         return None
 
     def save_to(self, path):
-        logging.warning(
+        nemo.logging.warning(
             "Data Layer does not have any state to save. "
             "This save_to call is ignored."
         )
@@ -244,14 +244,14 @@ def restore_from(self, path):
         return None
 
     def freeze(self, weights: Set[str] = None):
-        logging.warning(
+        nemo.logging.warning(
             "Data Layer does not have any weights to freeze. "
             "This freeze call is ignored."
         )
         return None
 
     def unfreeze(self, weights: Set[str] = None):
-        logging.warning(
+        nemo.logging.warning(
             "Data Layer does not have any weights to unfreeze. "
             "This unfreeze call is ignored."
         )
@@ -295,28 +295,28 @@ def __init__(self, **kwargs):
         self._device = get_cuda_device(self.placement)
 
     def get_weights(self):
-        # logging.warning("Loss function module does not have any weights to
-        # return. "
-        #                "This get_weights call returns None.")
+        # nemo.logging.warning(
+        #     "Loss function module does not have any weights "
+        #      "to return. This get_weights call returns None.")
         return None
 
     def set_weights(self, name2weight: Dict[(str, bool)],
                     name2name_and_transform):
-        logging.warning(
+        nemo.logging.warning(
             "Loss function module does not have any weights to set. "
             "This set_weights call is ignored."
         )
         return None
 
     def tie_weights_with(self, module, weight_names):
-        logging.warning(
+        nemo.logging.warning(
             "Loss function module does not have any weights to tie. "
             "This tie_weights_with call is ignored."
         )
         return None
 
     def save_to(self, path):
-        logging.warning(
+        nemo.logging.warning(
             "Loss function module does not have any state to save. "
             "This save_to call is ignored."
         )
@@ -330,14 +330,14 @@ def restore_from(self, path):
         return None
 
     def freeze(self, weights: Set[str] = None):
-        logging.warning(
+        nemo.logging.warning(
             "Loss function module does not have any weights to freeze. "
             "This freeze call is ignored."
         )
         return None
 
     def unfreeze(self, weights: Set[str] = None):
-        logging.warning(
+        nemo.logging.warning(
             "Loss function module does not have any weights to "
             "unfreeze. This unfreeze call is ignored."
         )

From 852b4d991b967e43a0b93916cfe8a6c5b8a62ca2 Mon Sep 17 00:00:00 2001
From: Jason <jasoli@nvidia.com>
Date: Wed, 15 Jan 2020 18:01:39 -0800
Subject: [PATCH 06/10] refactor code to use nemo.logging

Signed-off-by: Jason <jasoli@nvidia.com>
---
 .../nemo_asr/nemo_asr/audio_preprocessing.py  |  3 +-
 collections/nemo_asr/nemo_asr/data_layer.py   |  9 ++-
 collections/nemo_asr/nemo_asr/helpers.py      | 44 ++++---------
 collections/nemo_asr/nemo_asr/las/helpers.py  | 13 ++--
 .../nemo_asr/nemo_asr/parts/dataset.py        | 25 ++++---
 .../nemo_asr/nemo_asr/parts/features.py       | 13 ++--
 .../data/datasets/token_classification.py     | 65 ++++++++++---------
 collections/nemo_nlp/nemo_nlp/data/utils.py   | 13 ++--
 .../nemo_nlp/utils/metrics/sacrebleu.py       |  4 +-
 collections/nemo_tts/nemo_tts/data_layers.py  |  4 +-
 .../nemo_tts/nemo_tts/parts/datasets.py       | 13 ++--
 .../nemo_tts/nemo_tts/parts/helpers.py        | 10 ++-
 examples/asr/jasper.py                        | 24 +++----
 examples/asr/jasper_aishell.py                | 24 +++----
 examples/asr/jasper_aishell_infer.py          | 19 +++---
 examples/asr/jasper_an4.py                    | 21 +++---
 examples/asr/jasper_eval.py                   | 33 +++++-----
 examples/asr/quartznet.py                     | 11 ++--
 examples/image/gan.py                         |  8 +--
 examples/nlp/asr_postprocessor.py             |  2 +-
 examples/nlp/bert_pretraining.py              |  6 +-
 examples/nlp/glue_with_BERT.py                |  2 +-
 examples/nlp/joint_intent_slot_infer.py       | 14 ++--
 examples/nlp/joint_intent_slot_with_bert.py   |  8 +--
 examples/nlp/nmt_tutorial.py                  |  2 +-
 examples/nlp/punctuation_capitalization.py    | 10 +--
 .../nlp/punctuation_capitalization_infer.py   |  4 +-
 .../nlp/sentence_classification_with_bert.py  |  8 +--
 examples/nlp/token_classification.py          | 12 ++--
 examples/nlp/token_classification_infer.py    |  4 +-
 examples/tts/tacotron2.py                     | 26 ++++----
 examples/tts/waveglow.py                      | 17 +++--
 nemo/nemo/backends/pytorch/actions.py         | 39 ++++-------
 nemo/nemo/core/neural_factory.py              | 20 +++---
 nemo/nemo/core/neural_modules.py              |  7 +-
 nemo/nemo/utils/helpers.py                    |  6 +-
 36 files changed, 240 insertions(+), 303 deletions(-)

diff --git a/collections/nemo_asr/nemo_asr/audio_preprocessing.py b/collections/nemo_asr/nemo_asr/audio_preprocessing.py
index 653624bcfb8e..650d7d651ad1 100644
--- a/collections/nemo_asr/nemo_asr/audio_preprocessing.py
+++ b/collections/nemo_asr/nemo_asr/audio_preprocessing.py
@@ -384,8 +384,7 @@ def __init__(
             frame_splicing=frame_splicing,
             stft_conv=stft_conv,
             pad_value=pad_value,
-            mag_power=mag_power,
-            logger=self._logger
+            mag_power=mag_power
         )
         self.featurizer.to(self._device)
 
diff --git a/collections/nemo_asr/nemo_asr/data_layer.py b/collections/nemo_asr/nemo_asr/data_layer.py
index 119c3b45fded..399be6467ddc 100644
--- a/collections/nemo_asr/nemo_asr/data_layer.py
+++ b/collections/nemo_asr/nemo_asr/data_layer.py
@@ -10,6 +10,7 @@
 from functools import partial
 import torch
 
+import nemo
 from nemo.backends.pytorch import DataLayerNM
 from nemo.core import DeviceType
 from nemo.core.neural_types import *
@@ -146,14 +147,13 @@ def __init__(
                           'trim': trim_silence,
                           'bos_id': bos_id,
                           'eos_id': eos_id,
-                          'logger': self._logger,
                           'load_audio': load_audio}
 
         self._dataset = AudioDataset(**dataset_params)
 
         # Set up data loader
         if self._placement == DeviceType.AllGpu:
-            self._logger.info('Parallelizing DATALAYER')
+            nemo.logging.info('Parallelizing DATALAYER')
             sampler = torch.utils.data.distributed.DistributedSampler(
                 self._dataset)
         else:
@@ -272,13 +272,12 @@ def __init__(
                           'labels': labels,
                           'min_duration': min_duration,
                           'max_duration': max_duration,
-                          'normalize': normalize_transcripts,
-                          'logger': self._logger}
+                          'normalize': normalize_transcripts}
         self._dataset = KaldiFeatureDataset(**dataset_params)
 
         # Set up data loader
         if self._placement == DeviceType.AllGpu:
-            self._logger.info('Parallelizing DATALAYER')
+            nemo.logging.info('Parallelizing DATALAYER')
             sampler = torch.utils.data.distributed.DistributedSampler(
                 self._dataset)
         else:
diff --git a/collections/nemo_asr/nemo_asr/helpers.py b/collections/nemo_asr/nemo_asr/helpers.py
index 5b74016a0a4a..33c8e9a5d6ab 100644
--- a/collections/nemo_asr/nemo_asr/helpers.py
+++ b/collections/nemo_asr/nemo_asr/helpers.py
@@ -1,6 +1,7 @@
 # Copyright (c) 2019 NVIDIA Corporation
 
 import torch
+import nemo
 
 from .metrics import word_error_rate
 
@@ -31,8 +32,7 @@ def __ctc_decoder_predictions_tensor(tensor, labels):
 def monitor_asr_train_progress(tensors: list,
                                labels: list,
                                eval_metric='WER',
-                               tb_logger=None,
-                               logger=None):
+                               tb_logger=None):
     """
     Takes output of greedy ctc decoder and performs ctc decoding algorithm to
     remove duplicates and special symbol. Prints sample to screen, computes
@@ -42,7 +42,6 @@ def monitor_asr_train_progress(tensors: list,
       labels: A list of labels
       eval_metric: An optional string from 'WER', 'CER'. Defaults to 'WER'.
       tb_logger: Tensorboard logging object
-      logger:
     Returns:
       None
     """
@@ -72,16 +71,10 @@ def monitor_asr_train_progress(tensors: list,
     wer = word_error_rate(hypotheses, references, use_cer=use_cer)
     if tb_logger is not None:
         tb_logger.add_scalar(tag, wer)
-    if logger:
-        logger.info(f'Loss: {tensors[0]}')
-        logger.info(f'{tag}: {wer*100 : 5.2f}%')
-        logger.info(f'Prediction: {hypotheses[0]}')
-        logger.info(f'Reference: {references[0]}')
-    else:
-        print(f'Loss: {tensors[0]}')
-        print(f'{tag}: {wer*100 : 5.2f}%')
-        print(f'Prediction: {hypotheses[0]}')
-        print(f'Reference: {references[0]}')
+    nemo.logging.info(f'Loss: {tensors[0]}')
+    nemo.logging.info(f'{tag}: {wer*100 : 5.2f}%')
+    nemo.logging.info(f'Prediction: {hypotheses[0]}')
+    nemo.logging.info(f'Reference: {references[0]}')
 
 
 def __gather_losses(losses_list: list) -> list:
@@ -146,8 +139,7 @@ def process_evaluation_batch(tensors: dict, global_vars: dict, labels: list):
 
 def process_evaluation_epoch(global_vars: dict,
                              eval_metric='WER',
-                             tag=None,
-                             logger=None):
+                             tag=None):
     """
     Calculates the aggregated loss and WER across the entire evaluation dataset
     """
@@ -165,24 +157,14 @@ def process_evaluation_epoch(global_vars: dict,
                           use_cer=use_cer)
 
     if tag is None:
-        if logger:
-            logger.info(f"==========>>>>>>Evaluation Loss: {eloss}")
-            logger.info(f"==========>>>>>>Evaluation {eval_metric}: "
-                        f"{wer*100 : 5.2f}%")
-        else:
-            print(f"==========>>>>>>Evaluation Loss: {eloss}")
-            print(f"==========>>>>>>Evaluation {eval_metric}: "
-                  f"{wer*100 : 5.2f}%")
+        nemo.logging.info(f"==========>>>>>>Evaluation Loss: {eloss}")
+        nemo.logging.info(f"==========>>>>>>Evaluation {eval_metric}: "
+                          f"{wer*100 : 5.2f}%")
         return {"Evaluation_Loss": eloss, f"Evaluation_{eval_metric}": wer}
     else:
-        if logger:
-            logger.info(f"==========>>>>>>Evaluation Loss {tag}: {eloss}")
-            logger.info(f"==========>>>>>>Evaluation {eval_metric} {tag}: "
-                        f"{wer*100 : 5.2f}%")
-        else:
-            print(f"==========>>>>>>Evaluation Loss {tag}: {eloss}")
-            print(f"==========>>>>>>Evaluation {eval_metric} {tag}:"
-                  f" {wer*100 : 5.2f}%")
+        nemo.logging.info(f"==========>>>>>>Evaluation Loss {tag}: {eloss}")
+        nemo.logging.info(f"==========>>>>>>Evaluation {eval_metric} {tag}: "
+                          f"{wer*100 : 5.2f}%")
         return {f"Evaluation_Loss_{tag}": eloss,
                 f"Evaluation_{eval_metric}_{tag}": wer}
 
diff --git a/collections/nemo_asr/nemo_asr/las/helpers.py b/collections/nemo_asr/nemo_asr/las/helpers.py
index 993bf3385605..bcba3ec28402 100644
--- a/collections/nemo_asr/nemo_asr/las/helpers.py
+++ b/collections/nemo_asr/nemo_asr/las/helpers.py
@@ -2,6 +2,7 @@
 from pprint import pformat
 
 import torch
+import nemo
 from nemo.backends.pytorch.common.metrics import char_lm_metrics
 
 from nemo_asr.metrics import word_error_rate
@@ -55,7 +56,7 @@ def process_evaluation_batch(tensors, global_vars, labels, specials,
 
 def process_evaluation_epoch(global_vars,
                              metrics=('loss', 'bpc', 'ppl'), calc_wer=False,
-                             logger=None, mode='eval', tag='none'):
+                             mode='eval', tag='none'):
     tag = '_'.join(tag.lower().strip().split())
     return_dict = {}
     for metric in metrics:
@@ -70,17 +71,15 @@ def process_evaluation_epoch(global_vars,
         transcript_texts = list(chain(*global_vars['transcript_texts']))
         prediction_texts = list(chain(*global_vars['prediction_texts']))
 
-        if logger:
-            logger.info(f'Ten examples (transcripts and predictions)')
-            logger.info(transcript_texts[:10])
-            logger.info(prediction_texts[:10])
+        nemo.logging.info(f'Ten examples (transcripts and predictions)')
+        nemo.logging.info(transcript_texts[:10])
+        nemo.logging.info(prediction_texts[:10])
 
         wer = word_error_rate(hypotheses=prediction_texts,
                               references=transcript_texts)
         return_dict[f'metric/{mode}_wer_{tag}'] = wer
 
-    if logger:
-        logger.info(pformat(return_dict))
+    nemo.logging.info(pformat(return_dict))
 
     return return_dict
 
diff --git a/collections/nemo_asr/nemo_asr/parts/dataset.py b/collections/nemo_asr/nemo_asr/parts/dataset.py
index efcaac410944..1eb78a2763c1 100644
--- a/collections/nemo_asr/nemo_asr/parts/dataset.py
+++ b/collections/nemo_asr/nemo_asr/parts/dataset.py
@@ -8,6 +8,8 @@
 import torch
 from torch.utils.data import Dataset
 
+import nemo
+
 from .manifest import ManifestBase, ManifestEN
 
 
@@ -131,7 +133,6 @@ def __init__(
             trim=False,
             bos_id=None,
             eos_id=None,
-            logger=False,
             load_audio=True,
             manifest_class=ManifestEN):
         m_paths = manifest_filepath.split(',')
@@ -147,12 +148,11 @@ def __init__(
         self.eos_id = eos_id
         self.bos_id = bos_id
         self.load_audio = load_audio
-        if logger:
-            logger.info(
-                "Dataset loaded with {0:.2f} hours. Filtered {1:.2f} "
-                "hours.".format(
-                    self.manifest.duration / 3600,
-                    self.manifest.filtered_duration / 3600))
+        nemo.logging.info(
+            "Dataset loaded with {0:.2f} hours. Filtered {1:.2f} "
+            "hours.".format(
+                self.manifest.duration / 3600,
+                self.manifest.filtered_duration / 3600))
 
     def __getitem__(self, index):
         sample = self.manifest[index]
@@ -213,8 +213,7 @@ def __init__(
             unk_index=-1,
             blank_index=-1,
             normalize=True,
-            eos_id=None,
-            logger=None):
+            eos_id=None):
         self.eos_id = eos_id
         self.unk_index = unk_index
         self.blank_index = blank_index
@@ -244,8 +243,8 @@ def __init__(
                 f"KaldiFeatureDataset max_duration or min_duration is set but"
                 f" utt2dur file not found in {kaldi_dir}."
             )
-        elif logger:
-            logger.info(
+        else:
+            nemo.logging.info(
                 f"Did not find utt2dur when loading data from "
                 f"{kaldi_dir}. Skipping dataset duration calculations."
             )
@@ -294,9 +293,9 @@ def __init__(
                         print(f"Stop parsing due to max_utts ({max_utts})")
                         break
 
-        if logger and id2dur:
+        if id2dur:
             # utt2dur durations are in seconds
-            logger.info(
+            nemo.logging.info(
                     f"Dataset loaded with {duration/60 : .2f} hours. "
                     f"Filtered {filtered_duration/60 : .2f} hours.")
 
diff --git a/collections/nemo_asr/nemo_asr/parts/features.py b/collections/nemo_asr/nemo_asr/parts/features.py
index 6abfbb5a08ee..679b38d114f8 100644
--- a/collections/nemo_asr/nemo_asr/parts/features.py
+++ b/collections/nemo_asr/nemo_asr/parts/features.py
@@ -8,6 +8,8 @@
 from .segment import AudioSegment
 from torch_stft import STFT
 
+import nemo
+
 CONSTANT = 1e-5
 
 
@@ -127,7 +129,6 @@ def __init__(
             stft_conv=False,
             pad_value=0,
             mag_power=2.,
-            logger=None
     ):
         super(FilterbankFeatures, self).__init__()
         if (n_window_size is None or n_window_stride is None
@@ -137,10 +138,7 @@ def __init__(
             raise ValueError(
                 f"{self} got an invalid value for either n_window_size or "
                 f"n_window_stride. Both must be positive ints.")
-        if logger:
-            logger.info(f"PADDING: {pad_to}")
-        else:
-            print(f"PADDING: {pad_to}")
+        nemo.logging.info(f"PADDING: {pad_to}")
 
         self.win_length = n_window_size
         self.hop_length = n_window_stride
@@ -148,10 +146,7 @@ def __init__(
         self.stft_conv = stft_conv
 
         if stft_conv:
-            if logger:
-                logger.info("STFT using conv")
-            else:
-                print("STFT using conv")
+            nemo.logging.info("STFT using conv")
 
             # Create helper class to patch forward func for use with AMP
             class STFTPatch(STFT):
diff --git a/collections/nemo_nlp/nemo_nlp/data/datasets/token_classification.py b/collections/nemo_nlp/nemo_nlp/data/datasets/token_classification.py
index b941f21f5c5d..3a7dd7974da6 100644
--- a/collections/nemo_nlp/nemo_nlp/data/datasets/token_classification.py
+++ b/collections/nemo_nlp/nemo_nlp/data/datasets/token_classification.py
@@ -24,17 +24,13 @@
 import pickle
 import random
 
+import nemo
 import numpy as np
 from torch.utils.data import Dataset
 
-from nemo.utils.exp_logging import get_logger
-
 from . import utils
 
 
-logger = get_logger('')
-
-
 def get_features(queries,
                  max_seq_length,
                  tokenizer,
@@ -112,7 +108,7 @@ def get_features(queries,
             all_labels.append(labels)
 
     max_seq_length = min(max_seq_length, max(sent_lengths))
-    logger.info(f'Max length: {max_seq_length}')
+    nemo.logging.info(f'Max length: {max_seq_length}')
     utils.get_stats(sent_lengths)
     too_long_count = 0
 
@@ -144,22 +140,22 @@ def get_features(queries,
 
         all_segment_ids.append([0] * max_seq_length)
 
-    logger.info(f'{too_long_count} are longer than {max_seq_length}')
+    nemo.logging.info(f'{too_long_count} are longer than {max_seq_length}')
 
     for i in range(min(len(all_input_ids), 5)):
-        logger.info("*** Example ***")
-        logger.info("i: %s" % (i))
-        logger.info(
+        nemo.logging.info("*** Example ***")
+        nemo.logging.info("i: %s" % (i))
+        nemo.logging.info(
             "subtokens: %s" % " ".join(list(map(str, all_subtokens[i]))))
-        logger.info(
+        nemo.logging.info(
             "loss_mask: %s" % " ".join(list(map(str, all_loss_mask[i]))))
-        logger.info(
+        nemo.logging.info(
             "input_mask: %s" % " ".join(list(map(str, all_input_mask[i]))))
-        logger.info(
+        nemo.logging.info(
             "subtokens_mask: %s" % " ".join(list(map(
                 str, all_subtokens_mask[i]))))
         if with_label:
-            logger.info(
+            nemo.logging.info(
                 "labels: %s" % " ".join(list(map(str, all_labels[i]))))
     return (all_input_ids,
             all_segment_ids,
@@ -232,10 +228,11 @@ def __init__(self,
                 os.path.exists(features_pkl) and os.path.exists(label_ids_pkl):
             # If text_file was already processed, load from pickle
             features = pickle.load(open(features_pkl, 'rb'))
-            logger.info(f'features restored from {features_pkl}')
+            nemo.logging.info(f'features restored from {features_pkl}')
 
             label_ids = pickle.load(open(label_ids_pkl, 'rb'))
-            logger.info(f'Labels to ids dict restored from {label_ids_pkl}')
+            nemo.logging.info(
+                f'Labels to ids dict restored from {label_ids_pkl}')
         else:
             if num_samples == 0:
                 raise ValueError("num_samples has to be positive", num_samples)
@@ -270,19 +267,22 @@ def __init__(self,
             # for dev/test sets use label mapping from training set
             if label_ids:
                 if len(label_ids) != len(unique_labels):
-                    logger.info(f'Not all labels from the specified' +
-                                ' label_ids dictionary are present in the' +
-                                ' current dataset. Using the provided' +
-                                ' label_ids dictionary.')
+                    nemo.logging.info(
+                        f'Not all labels from the specified' +
+                        ' label_ids dictionary are present in the' +
+                        ' current dataset. Using the provided' +
+                        ' label_ids dictionary.')
                 else:
-                    logger.info(f'Using the provided label_ids dictionary.')
+                    nemo.logging.info(
+                        f'Using the provided label_ids dictionary.')
             else:
-                logger.info(f'Creating a new label to label_id dictionary.' +
-                            ' It\'s recommended to use label_ids generated' +
-                            ' during training for dev/test sets to avoid' +
-                            ' errors if some labels are not' +
-                            ' present in the dev/test sets.' +
-                            ' For training set label_ids should be None.')
+                nemo.logging.info(
+                    f'Creating a new label to label_id dictionary.' +
+                    ' It\'s recommended to use label_ids generated' +
+                    ' during training for dev/test sets to avoid' +
+                    ' errors if some labels are not' +
+                    ' present in the dev/test sets.' +
+                    ' For training set label_ids should be None.')
 
                 label_ids = {pad_label: 0}
                 if pad_label in unique_labels:
@@ -301,10 +301,11 @@ def __init__(self,
 
             if use_cache:
                 pickle.dump(features, open(features_pkl, "wb"))
-                logger.info(f'features saved to {features_pkl}')
+                nemo.logging.info(f'features saved to {features_pkl}')
 
                 pickle.dump(label_ids, open(label_ids_pkl, "wb"))
-                logger.info(f'labels to ids dict saved to {label_ids_pkl}')
+                nemo.logging.info(
+                    f'labels to ids dict saved to {label_ids_pkl}')
 
         self.all_input_ids = features[0]
         self.all_segment_ids = features[1]
@@ -316,7 +317,7 @@ def __init__(self,
 
         infold = text_file[:text_file.rfind('/')]
         merged_labels = itertools.chain.from_iterable(self.all_labels)
-        logger.info('Three most popular labels')
+        nemo.logging.info('Three most popular labels')
         _, self.label_frequencies = \
             utils.get_label_stats(merged_labels, infold + '/label_stats.tsv')
 
@@ -324,8 +325,8 @@ def __init__(self,
         out = open(infold + '/label_ids.csv', 'w')
         labels, _ = zip(*sorted(self.label_ids.items(),  key=lambda x: x[1]))
         out.write('\n'.join(labels))
-        logger.info(f'Labels: {self.label_ids}')
-        logger.info(f'Labels mapping saved to : {out.name}')
+        nemo.logging.info(f'Labels: {self.label_ids}')
+        nemo.logging.info(f'Labels mapping saved to : {out.name}')
 
     def __len__(self):
         return len(self.all_input_ids)
diff --git a/collections/nemo_nlp/nemo_nlp/data/utils.py b/collections/nemo_nlp/nemo_nlp/data/utils.py
index 59e1c1cf6df9..e0bab809c939 100644
--- a/collections/nemo_nlp/nemo_nlp/data/utils.py
+++ b/collections/nemo_nlp/nemo_nlp/data/utils.py
@@ -1,12 +1,11 @@
 import os
 import pickle
+import re
+import string
 
+import nemo
 import numpy as np
 
-from nemo.utils.exp_logging import get_logger
-
-logger = get_logger('')
-
 
 def dataset_to_ids(dataset, tokenizer, cache_ids=False, add_bos_eos=True):
     """
@@ -25,10 +24,10 @@ def dataset_to_ids(dataset, tokenizer, cache_ids=False, add_bos_eos=True):
 
     cached_ids_dataset = dataset + str(".pkl")
     if os.path.isfile(cached_ids_dataset):
-        logger.info("Loading cached tokenized dataset ...")
+        nemo.logging.info("Loading cached tokenized dataset ...")
         ids = pickle.load(open(cached_ids_dataset, "rb"))
     else:
-        logger.info("Tokenizing dataset ...")
+        nemo.logging.info("Tokenizing dataset ...")
         data = open(dataset, "rb").readlines()
         ids = []
         for sentence in data:
@@ -38,7 +37,7 @@ def dataset_to_ids(dataset, tokenizer, cache_ids=False, add_bos_eos=True):
                            [tokenizer.eos_id()]
             ids.append(sent_ids)
         if cache_ids:
-            logger.info("Caching tokenized dataset ...")
+            nemo.logging.info("Caching tokenized dataset ...")
             pickle.dump(ids, open(cached_ids_dataset, "wb"))
     return ids
 
diff --git a/collections/nemo_nlp/nemo_nlp/utils/metrics/sacrebleu.py b/collections/nemo_nlp/nemo_nlp/utils/metrics/sacrebleu.py
index 4034b3151dff..28e1218c8705 100755
--- a/collections/nemo_nlp/nemo_nlp/utils/metrics/sacrebleu.py
+++ b/collections/nemo_nlp/nemo_nlp/utils/metrics/sacrebleu.py
@@ -25,11 +25,11 @@
 import gzip
 import hashlib
 import io
-import logging
 import math
 import os
 import re
 import sys
+import ssl
 import unicodedata
 import urllib.request
 
@@ -37,6 +37,7 @@
 from itertools import zip_longest
 from typing import List, Iterable, Tuple, Union
 
+from nemo import logging
 from .fairseq_tokenizer import tokenize_en
 
 
@@ -1315,6 +1316,7 @@ class UnicodeRegex:
 
     without depending on https://pypi.python.org/pypi/regex/."""
 
+    @staticmethod
     def _property_chars(prefix):
         return ''.join(
             chr(x) for x in range(sys.maxunicode)
diff --git a/collections/nemo_tts/nemo_tts/data_layers.py b/collections/nemo_tts/nemo_tts/data_layers.py
index d1a61c6c614d..402cfa4bc538 100644
--- a/collections/nemo_tts/nemo_tts/data_layers.py
+++ b/collections/nemo_tts/nemo_tts/data_layers.py
@@ -1,6 +1,7 @@
 # Copyright (c) 2019 NVIDIA Corporation
 import torch
 
+import nemo
 from nemo.backends.pytorch.nm import DataLayerNM
 from nemo.core import DeviceType
 from nemo.core.neural_types import *
@@ -83,13 +84,12 @@ def __init__(
             max_duration=max_duration,
             min_duration=min_duration,
             trim=trim_silence,
-            logger=self._logger,
             n_segments=n_segments
         )
 
         sampler = None
         if self._placement == DeviceType.AllGpu:
-            self._logger.info('Parallelizing DATALAYER')
+            nemo.logging.info('Parallelizing DATALAYER')
             sampler = torch.utils.data.distributed.DistributedSampler(
                 self._dataset)
 
diff --git a/collections/nemo_tts/nemo_tts/parts/datasets.py b/collections/nemo_tts/nemo_tts/parts/datasets.py
index 8b9e42e21aef..ea780171e91d 100644
--- a/collections/nemo_tts/nemo_tts/parts/datasets.py
+++ b/collections/nemo_tts/nemo_tts/parts/datasets.py
@@ -2,6 +2,7 @@
 import torch
 from torch.utils.data import Dataset
 
+import nemo
 from nemo_asr.parts.segment import AudioSegment
 from .manifest import AudioManifest
 
@@ -12,8 +13,7 @@ def __init__(self,
                  n_segments=0,
                  max_duration=None,
                  min_duration=None,
-                 trim=False,
-                 logger=False):
+                 trim=False):
         """See AudioDataLayer"""
         m_paths = manifest_filepath.split(',')
         self.manifest = AudioManifest(m_paths,
@@ -21,11 +21,10 @@ def __init__(self,
                                       min_duration=min_duration)
         self.trim = trim
         self.n_segments = n_segments
-        if logger:
-            logger.info(
-                f"Dataset loaded with {self.manifest.duration / 3600:.2f} "
-                f"hours. Filtered {self.manifest.filtered_duration / 3600:.2f}"
-                f" hours.")
+        nemo.logging.info(
+            f"Dataset loaded with {self.manifest.duration / 3600:.2f} "
+            f"hours. Filtered {self.manifest.filtered_duration / 3600:.2f}"
+            f" hours.")
 
     def AudioCollateFunc(self, batch):
         def find_max_len(seq, index):
diff --git a/collections/nemo_tts/nemo_tts/parts/helpers.py b/collections/nemo_tts/nemo_tts/parts/helpers.py
index 0dcd2bf6462d..f8197bf25853 100644
--- a/collections/nemo_tts/nemo_tts/parts/helpers.py
+++ b/collections/nemo_tts/nemo_tts/parts/helpers.py
@@ -1,10 +1,11 @@
 # Copyright (c) 2019 NVIDIA Corporation
 import librosa
-import matplotlib
 import matplotlib.pylab as plt
 import numpy as np
 import torch
 
+import nemo
+
 
 def waveglow_log_to_tb_func(swriter,
                             tensors,
@@ -123,13 +124,10 @@ def tacotron2_process_eval_batch(tensors: dict, global_vars: dict):
     global_vars['EvalLoss'].append(torch.mean(torch.stack(tensors[loss_key])))
 
 
-def tacotron2_process_final_eval(global_vars: dict, tag=None, logger=None):
+def tacotron2_process_final_eval(global_vars: dict, tag=None):
     eloss = torch.mean(torch.stack(global_vars['EvalLoss'])).item()
     global_vars['EvalLoss'] = eloss
-    if logger:
-        logger.info(f"==========>>>>>>Evaluation Loss {tag}: {eloss}")
-    else:
-        print(f"==========>>>>>>Evaluation Loss {tag}: {eloss}")
+    nemo.logging.info(f"==========>>>>>>Evaluation Loss {tag}: {eloss}")
     return global_vars
 
 
diff --git a/examples/asr/jasper.py b/examples/asr/jasper.py
index 61b68eaf3ce2..8c00db9b279a 100644
--- a/examples/asr/jasper.py
+++ b/examples/asr/jasper.py
@@ -74,7 +74,6 @@ def construct_name(name, lr, batch_size, max_steps, num_epochs, wd, optimizer,
 
 
 def create_all_dags(args, neural_factory):
-    logger = neural_factory.logger
     yaml = YAML(typ="safe")
     with open(args.model_config) as f:
         jasper_params = yaml.load(f)
@@ -105,7 +104,7 @@ def create_all_dags(args, neural_factory):
     N = len(data_layer)
     steps_per_epoch = math.ceil(
         N / (args.batch_size * args.iter_per_step * args.num_gpus))
-    logger.info('Have {0} examples to train on.'.format(N))
+    nemo.logging.info('Have {0} examples to train on.'.format(N))
 
     data_preprocessor = nemo_asr.AudioToMelSpectrogramPreprocessor(
         sample_rate=sample_rate,
@@ -139,7 +138,7 @@ def create_all_dags(args, neural_factory):
 
             data_layers_eval.append(data_layer_eval)
     else:
-        neural_factory.logger.info("There were no val datasets passed")
+        neural_factory.nemo.logging.info("There were no val datasets passed")
 
     jasper_encoder = nemo_asr.JasperEncoder(
         feat_in=jasper_params["AudioToMelSpectrogramPreprocessor"]["features"],
@@ -155,15 +154,15 @@ def create_all_dags(args, neural_factory):
 
     greedy_decoder = nemo_asr.GreedyCTCDecoder()
 
-    logger.info('================================')
-    logger.info(
+    nemo.logging.info('================================')
+    nemo.logging.info(
         f"Number of parameters in encoder: {jasper_encoder.num_weights}")
-    logger.info(
+    nemo.logging.info(
         f"Number of parameters in decoder: {jasper_decoder.num_weights}")
-    logger.info(
+    nemo.logging.info(
         f"Total number of parameters in model: "
         f"{jasper_decoder.num_weights + jasper_encoder.num_weights}")
-    logger.info('================================')
+    nemo.logging.info('================================')
 
     # Train DAG
     audio_signal_t, a_sig_length_t, \
@@ -199,8 +198,7 @@ def create_all_dags(args, neural_factory):
         tensors=[loss_t, predictions_t, transcript_t, transcript_len_t],
         print_func=partial(
             monitor_asr_train_progress,
-            labels=vocab,
-            logger=logger),
+            labels=vocab),
         get_tb_values=lambda x: [("loss", x[0])],
         tb_writer=neural_factory.tb_writer,
     )
@@ -240,8 +238,7 @@ def create_all_dags(args, neural_factory):
                 labels=vocab),
             user_epochs_done_callback=partial(
                 process_evaluation_epoch,
-                tag=tagname,
-                logger=logger),
+                tag=tagname),
             eval_step=args.eval_freq,
             tb_writer=neural_factory.tb_writer)
 
@@ -278,10 +275,9 @@ def main():
         tensorboard_dir=args.tensorboard_dir)
     args.num_gpus = neural_factory.world_size
 
-    logger = neural_factory.logger
     checkpoint_dir = neural_factory.checkpoint_dir
     if args.local_rank is not None:
-        logger.info('Doing ALL GPU')
+        nemo.logging.info('Doing ALL GPU')
 
     # build dags
     train_loss, callbacks, steps_per_epoch = \
diff --git a/examples/asr/jasper_aishell.py b/examples/asr/jasper_aishell.py
index f9900d962aa4..fec8ea185640 100644
--- a/examples/asr/jasper_aishell.py
+++ b/examples/asr/jasper_aishell.py
@@ -76,7 +76,6 @@ def load_vocab(vocab_file):
 
 
 def create_all_dags(args, neural_factory):
-    logger = neural_factory.logger
     yaml = YAML(typ="safe")
     with open(args.model_config) as f:
         jasper_params = yaml.load(f)
@@ -105,7 +104,7 @@ def create_all_dags(args, neural_factory):
 
     N = len(data_layer)
     steps_per_epoch = int(N / (args.batch_size * args.num_gpus))
-    logger.info('Have {0} examples to train on.'.format(N))
+    nemo.logging.info('Have {0} examples to train on.'.format(N))
 
     data_preprocessor = nemo_asr.AudioToMelSpectrogramPreprocessor(
         sample_rate=sample_rate,
@@ -140,7 +139,7 @@ def create_all_dags(args, neural_factory):
 
             data_layers_eval.append(data_layer_eval)
     else:
-        neural_factory.logger.info("There were no val datasets passed")
+        nemo.logging.info("There were no val datasets passed")
 
     jasper_encoder = nemo_asr.JasperEncoder(
         feat_in=jasper_params["AudioToMelSpectrogramPreprocessor"]["features"],
@@ -156,15 +155,15 @@ def create_all_dags(args, neural_factory):
 
     greedy_decoder = nemo_asr.GreedyCTCDecoder()
 
-    logger.info('================================')
-    logger.info(
+    nemo.logging.info('================================')
+    nemo.logging.info(
         f"Number of parameters in encoder: {jasper_encoder.num_weights}")
-    logger.info(
+    nemo.logging.info(
         f"Number of parameters in decoder: {jasper_decoder.num_weights}")
-    logger.info(
+    nemo.logging.info(
         f"Total number of parameters in model: "
         f"{jasper_decoder.num_weights + jasper_encoder.num_weights}")
-    logger.info('================================')
+    nemo.logging.info('================================')
 
     # Train DAG
     audio_signal_t, a_sig_length_t, \
@@ -201,8 +200,7 @@ def create_all_dags(args, neural_factory):
         print_func=partial(
             monitor_asr_train_progress,
             labels=vocab,
-            eval_metric='CER',
-            logger=logger),
+            eval_metric='CER'),
         step_freq=args.train_eval_freq,
         get_tb_values=lambda x: [("loss", x[0])],
         tb_writer=neural_factory.tb_writer,
@@ -243,8 +241,7 @@ def create_all_dags(args, neural_factory):
             user_epochs_done_callback=partial(
                 process_evaluation_epoch,
                 eval_metric='CER',
-                tag=tagname,
-                logger=logger),
+                tag=tagname),
             eval_step=args.eval_freq,
             tb_writer=neural_factory.tb_writer)
 
@@ -279,10 +276,9 @@ def main():
         tensorboard_dir=args.tensorboard_dir)
     args.num_gpus = neural_factory.world_size
 
-    logger = neural_factory.logger
     checkpoint_dir = neural_factory.checkpoint_dir
     if args.local_rank is not None:
-        logger.info('Doing ALL GPU')
+        nemo.logging.info('Doing ALL GPU')
 
     # build dags
     train_loss, callbacks, steps_per_epoch = \
diff --git a/examples/asr/jasper_aishell_infer.py b/examples/asr/jasper_aishell_infer.py
index fff58e0626cc..bb19b8e48759 100644
--- a/examples/asr/jasper_aishell_infer.py
+++ b/examples/asr/jasper_aishell_infer.py
@@ -60,10 +60,9 @@ def main():
         local_rank=args.local_rank,
         optimization_level=nemo.core.Optimization.mxprO1,
         placement=device)
-    logger = neural_factory.logger
 
     if args.local_rank is not None:
-        logger.info('Doing ALL GPU')
+        nemo.logging.info('Doing ALL GPU')
 
     yaml = YAML(typ="safe")
     with open(args.model_config) as f:
@@ -88,7 +87,7 @@ def main():
         **eval_dl_params)
 
     n = len(data_layer)
-    logger.info('Evaluating {0} examples'.format(n))
+    nemo.logging.info('Evaluating {0} examples'.format(n))
 
     data_preprocessor = nemo_asr.AudioToMelSpectrogramPreprocessor(
         sample_rate=sample_rate,
@@ -118,15 +117,15 @@ def main():
             lm_path=args.lm_path,
             num_cpus=max(os.cpu_count(), 1))
 
-    logger.info('================================')
-    logger.info(
+    nemo.logging.info('================================')
+    nemo.logging.info(
         f"Number of parameters in encoder: {jasper_encoder.num_weights}")
-    logger.info(
+    nemo.logging.info(
         f"Number of parameters in decoder: {jasper_decoder.num_weights}")
-    logger.info(
+    nemo.logging.info(
         f"Total number of parameters in model: "
         f"{jasper_decoder.num_weights + jasper_encoder.num_weights}")
-    logger.info('================================')
+    nemo.logging.info('================================')
 
     audio_signal_e1, a_sig_length_e1, transcript_e1, transcript_len_e1 = \
         data_layer()
@@ -158,7 +157,7 @@ def main():
     cer = word_error_rate(hypotheses=greedy_hypotheses,
                           references=references,
                           use_cer=True)
-    logger.info("Greedy CER {:.2f}%".format(cer * 100))
+    nemo.logging.info("Greedy CER {:.2f}%".format(cer * 100))
 
     if args.lm_path:
         beam_hypotheses = []
@@ -170,7 +169,7 @@ def main():
 
         cer = word_error_rate(
             hypotheses=beam_hypotheses, references=references, use_cer=True)
-        logger.info("Beam CER {:.2f}".format(cer * 100))
+        nemo.logging.info("Beam CER {:.2f}".format(cer * 100))
 
     if args.save_logprob:
         # Convert logits to list of numpy arrays
diff --git a/examples/asr/jasper_an4.py b/examples/asr/jasper_an4.py
index 813b6dcc8d3f..d01151a1e1cb 100644
--- a/examples/asr/jasper_an4.py
+++ b/examples/asr/jasper_an4.py
@@ -59,7 +59,7 @@ def create_dags(jasper_params, args, nf):
     )
 
     num_samples = len(data_layer_eval)
-    nf.logger.info(f"Eval samples={num_samples}")
+    nemo.logging.info(f"Eval samples={num_samples}")
 
     jasper_encoder = nemo_asr.JasperEncoder(**jasper_params["JasperEncoder"])
 
@@ -93,7 +93,7 @@ def create_dags(jasper_params, args, nf):
     loss_e = ctc_loss(log_probs=log_probs_e, targets=transcript_e,
                       input_length=encoded_len_e,
                       target_length=transcript_len_e)
-    nf.logger.info(
+    nemo.logging.info(
         "Num of params in encoder: {0}".format(jasper_encoder.num_weights))
 
     # Callbacks to print info to console and Tensorboard
@@ -101,8 +101,7 @@ def create_dags(jasper_params, args, nf):
         tensors=[loss, predictions, transcript, transcript_len],
         print_func=partial(
             monitor_asr_train_progress,
-            labels=vocab,
-            logger=nf.logger),
+            labels=vocab),
         get_tb_values=lambda x: [["loss", x[0]]],
         tb_writer=nf.tb_writer,
     )
@@ -116,9 +115,7 @@ def create_dags(jasper_params, args, nf):
         user_iter_callback=partial(
             process_evaluation_batch,
             labels=vocab),
-        user_epochs_done_callback=partial(
-            process_evaluation_epoch,
-            logger=nf.logger),
+        user_epochs_done_callback=process_evaluation_epoch,
         eval_step=args.eval_freq,
         tb_writer=nf.tb_writer)
     callbacks = [train_callback, checkpointer_callback, eval_callback]
@@ -205,10 +202,10 @@ def main():
     )
 
     if args.test_after_training:
-        nf.logger.info("Testing greedy and beam search with LM WER.")
+        nemo.logging.info("Testing greedy and beam search with LM WER.")
         # Create BeamSearch NM
         if nf.world_size > 1:
-            nf.logger.warning("Skipping beam search WER as it does not work "
+            nemo.logging.warning("Skipping beam search WER as it does not work "
                               "if doing distributed training.")
         else:
             beam_search_with_lm = nemo_asr.BeamSearchDecoderWithLM(
@@ -230,7 +227,7 @@ def main():
                 evaluated_tensors[2], evaluated_tensors[3], vocab)
             wer = word_error_rate(
                 hypotheses=greedy_hypotheses, references=references)
-            nf.logger.info("Greedy WER: {:.2f}%".format(wer * 100))
+            nemo.logging.info("Greedy WER: {:.2f}%".format(wer * 100))
             if wer > wer_thr:
                 nf.sync_all_processes(False)
                 raise ValueError(f"Final eval greedy WER {wer*100:.2f}% > :"
@@ -247,7 +244,7 @@ def main():
 
             beam_wer = word_error_rate(
                 hypotheses=beam_hypotheses, references=references)
-            nf.logger.info("Beam WER {:.2f}%".format(beam_wer * 100))
+            nemo.logging.info("Beam WER {:.2f}%".format(beam_wer * 100))
             assert beam_wer <= beam_wer_thr, (
                 "Final eval beam WER {:.2f}%  > than {:.2f}%".format(
                     beam_wer*100, beam_wer_thr*100))
@@ -293,7 +290,7 @@ def main():
                 evaluated_tensors[2], evaluated_tensors[3], vocab)
             wer_new = word_error_rate(
                 hypotheses=greedy_hypotheses, references=references)
-            nf.logger.info("New greedy WER: {:.2f}%".format(wer_new * 100))
+            nemo.logging.info("New greedy WER: {:.2f}%".format(wer_new * 100))
             if wer_new > wer * 1.1:
                 nf.sync_all_processes(False)
                 raise ValueError(
diff --git a/examples/asr/jasper_eval.py b/examples/asr/jasper_eval.py
index 1b39ad7a079c..69f47586bf3b 100644
--- a/examples/asr/jasper_eval.py
+++ b/examples/asr/jasper_eval.py
@@ -71,10 +71,9 @@ def main():
         local_rank=args.local_rank,
         optimization_level=nemo.core.Optimization.mxprO1,
         placement=device)
-    logger = neural_factory.logger
 
     if args.local_rank is not None:
-        logger.info('Doing ALL GPU')
+        nemo.logging.info('Doing ALL GPU')
 
     yaml = YAML(typ="safe")
     with open(args.model_config) as f:
@@ -96,7 +95,7 @@ def main():
         **eval_dl_params)
 
     N = len(data_layer)
-    logger.info('Evaluating {0} examples'.format(N))
+    nemo.logging.info('Evaluating {0} examples'.format(N))
 
     data_preprocessor = nemo_asr.AudioToMelSpectrogramPreprocessor(
         sample_rate=sample_rate,
@@ -110,15 +109,15 @@ def main():
         num_classes=len(vocab))
     greedy_decoder = nemo_asr.GreedyCTCDecoder()
 
-    logger.info('================================')
-    logger.info(
+    nemo.logging.info('================================')
+    nemo.logging.info(
         f"Number of parameters in encoder: {jasper_encoder.num_weights}")
-    logger.info(
+    nemo.logging.info(
         f"Number of parameters in decoder: {jasper_decoder.num_weights}")
-    logger.info(
+    nemo.logging.info(
         f"Total number of parameters in model: "
         f"{jasper_decoder.num_weights + jasper_encoder.num_weights}")
-    logger.info('================================')
+    nemo.logging.info('================================')
 
     audio_signal_e1, a_sig_length_e1, transcript_e1, transcript_len_e1 =\
         data_layer()
@@ -144,7 +143,7 @@ def main():
     references = post_process_transcripts(
         evaluated_tensors[2], evaluated_tensors[3], vocab)
     wer = word_error_rate(hypotheses=greedy_hypotheses, references=references)
-    logger.info("Greedy WER {:.2f}%".format(wer*100))
+    nemo.logging.info("Greedy WER {:.2f}%".format(wer*100))
 
     if args.lm_path:
         if args.alpha_max is None:
@@ -161,8 +160,8 @@ def main():
 
         for alpha in np.arange(args.alpha, args.alpha_max, args.alpha_step):
             for beta in np.arange(args.beta, args.beta_max, args.beta_step):
-                logger.info('================================')
-                logger.info(f'Infering with (alpha, beta): ({alpha}, {beta})')
+                nemo.logging.info('================================')
+                nemo.logging.info(f'Infering with (alpha, beta): ({alpha}, {beta})')
                 beam_search_with_lm = nemo_asr.BeamSearchDecoderWithLM(
                     vocab=vocab,
                     beam_width=args.beam_width,
@@ -188,15 +187,15 @@ def main():
 
                 wer = word_error_rate(
                     hypotheses=beam_hypotheses, references=references)
-                logger.info("Beam WER {:.2f}%".format(wer*100))
+                nemo.logging.info("Beam WER {:.2f}%".format(wer*100))
                 beam_wers.append(((alpha, beta), wer*100))
 
-        logger.info('Beam WER for (alpha, beta)')
-        logger.info('================================')
-        logger.info('\n' + '\n'.join([str(e) for e in beam_wers]))
-        logger.info('================================')
+        nemo.logging.info('Beam WER for (alpha, beta)')
+        nemo.logging.info('================================')
+        nemo.logging.info('\n' + '\n'.join([str(e) for e in beam_wers]))
+        nemo.logging.info('================================')
         best_beam_wer = min(beam_wers, key=lambda x: x[1])
-        logger.info('Best (alpha, beta): '
+        nemo.logging.info('Best (alpha, beta): '
                     f'{best_beam_wer[0]}, '
                     f'WER: {best_beam_wer[1]:.2f}%')
 
diff --git a/examples/asr/quartznet.py b/examples/asr/quartznet.py
index 72a78385bc1c..5ab384245202 100644
--- a/examples/asr/quartznet.py
+++ b/examples/asr/quartznet.py
@@ -123,7 +123,7 @@ def create_all_dags(args, neural_factory):
 
             data_layers_eval.append(data_layer_eval)
     else:
-        neural_factory.logger.info("There were no val datasets passed")
+        nemo.logging.info("There were no val datasets passed")
 
     # create shared modules
 
@@ -193,8 +193,7 @@ def create_all_dags(args, neural_factory):
         tensors=[loss_t, predictions_t, transcript_t, transcript_len_t],
         print_func=partial(
             monitor_asr_train_progress,
-            labels=vocab,
-            logger=neural_factory.logger),
+            labels=vocab),
         get_tb_values=lambda x: [["loss", x[0]]],
         tb_writer=neural_factory.tb_writer)
 
@@ -238,8 +237,7 @@ def create_all_dags(args, neural_factory):
                 labels=vocab),
             user_epochs_done_callback=partial(
                 process_evaluation_epoch,
-                tag=tagname,
-                logger=neural_factory.logger),
+                tag=tagname),
             eval_step=args.eval_freq,
             tb_writer=neural_factory.tb_writer)
 
@@ -276,11 +274,10 @@ def main():
         tensorboard_dir=args.tensorboard_dir)
     args.num_gpus = neural_factory.world_size
 
-    logger = neural_factory.logger
     args.checkpoint_dir = neural_factory.checkpoint_dir
 
     if args.local_rank is not None:
-        logger.info('Doing ALL GPU')
+        nemo.logging.info('Doing ALL GPU')
 
     # build dags
     train_loss, callbacks, steps_per_epoch = \
diff --git a/examples/image/gan.py b/examples/image/gan.py
index e8bfef5aa615..9f5a93486cf3 100644
--- a/examples/image/gan.py
+++ b/examples/image/gan.py
@@ -121,10 +121,10 @@ def put_tensor_in_dict(tensors, global_vars):
 
 def print_losses(tensors):
     g_loss, i_loss, r_loss, grad_p = tensors
-    neural_factory.logger.info(f"Generator Loss: {g_loss}")
-    neural_factory.logger.info(f"Interpolated Loss: {i_loss}")
-    neural_factory.logger.info(f"Real Loss: {r_loss}")
-    neural_factory.logger.info(f"Grad Penalty: {grad_p}")
+    nemo.logging.info(f"Generator Loss: {g_loss}")
+    nemo.logging.info(f"Interpolated Loss: {i_loss}")
+    nemo.logging.info(f"Real Loss: {r_loss}")
+    nemo.logging.info(f"Grad Penalty: {grad_p}")
 
 
 def get_tb_name_value(tensors):
diff --git a/examples/nlp/asr_postprocessor.py b/examples/nlp/asr_postprocessor.py
index 9ce21edbc651..1df03e98e8c8 100644
--- a/examples/nlp/asr_postprocessor.py
+++ b/examples/nlp/asr_postprocessor.py
@@ -162,7 +162,7 @@ def create_pipeline(dataset, tokens_in_batch, clean=False, training=True):
 
 def print_loss(x):
     loss = x[0].item()
-    nf.logger.info("Training loss: {:.4f}".format(loss))
+    nemo.logging.info("Training loss: {:.4f}".format(loss))
 
 
 # callbacks
diff --git a/examples/nlp/bert_pretraining.py b/examples/nlp/bert_pretraining.py
index 4292182acb43..5cdd5550ff9d 100644
--- a/examples/nlp/bert_pretraining.py
+++ b/examples/nlp/bert_pretraining.py
@@ -156,12 +156,12 @@
                                         special_tokens,
                                         'train.txt')
     if args.tokenizer == "sentence-piece":
-        nf.logger.info("To use SentencePieceTokenizer.")
+        nemo.logging.info("To use SentencePieceTokenizer.")
         tokenizer = nemo_nlp.SentencePieceTokenizer(
             model_path=data_desc.tokenizer_model)
         tokenizer.add_special_tokens(special_tokens)
     elif args.tokenizer == "nemo-bert":
-        nf.logger.info("To use NemoBertTokenizer.")
+        nemo.logging.info("To use NemoBertTokenizer.")
         vocab_file = os.path.join(args.data_dir, 'vocab.txt')
         # To train on a Chinese dataset, use NemoBertTokenizer
         tokenizer = nemo_nlp.NemoBertTokenizer(vocab_file=vocab_file)
@@ -292,7 +292,7 @@ def create_pipeline(data_file,
 train_callback = nemo.core.SimpleLossLoggerCallback(
     tensors=log_tensors,
     step_freq=args.print_step_freq,
-    print_func=lambda x: nf.logger.info(
+    print_func=lambda x: nemo.logging.info(
         print_msg.format(
             *[y.item() for y in x])),
     get_tb_values=lambda x: [["loss", x[0]]],
diff --git a/examples/nlp/glue_with_BERT.py b/examples/nlp/glue_with_BERT.py
index 21a1cf800392..cf6388d895bf 100644
--- a/examples/nlp/glue_with_BERT.py
+++ b/examples/nlp/glue_with_BERT.py
@@ -292,7 +292,7 @@ def create_pipeline(max_seq_length=args.max_seq_length,
         tb_writer=nf.tb_writer,
         eval_step=steps_per_epoch))
 
-nf.logger.info(f"steps_per_epoch = {steps_per_epoch}")
+nemo.logging.info(f"steps_per_epoch = {steps_per_epoch}")
 callback_train = nemo.core.SimpleLossLoggerCallback(
     tensors=[train_loss],
     print_func=lambda x: print("Loss: {:.3f}".format(x[0].item())),
diff --git a/examples/nlp/joint_intent_slot_infer.py b/examples/nlp/joint_intent_slot_infer.py
index 104b7f308429..ebeada220fae 100644
--- a/examples/nlp/joint_intent_slot_infer.py
+++ b/examples/nlp/joint_intent_slot_infer.py
@@ -54,7 +54,7 @@
                                     args.dataset_name)
 
 # Evaluation pipeline
-nf.logger.info("Loading eval data...")
+nemo.logging.info("Loading eval data...")
 data_layer = nemo_nlp.BertJointIntentSlotDataLayer(
     input_file=f'{data_desc.data_dir}/{args.eval_file_prefix}.tsv',
     slot_file=f'{data_desc.data_dir}/{args.eval_file_prefix}_slots.tsv',
@@ -105,13 +105,13 @@ def get_preds(logits):
 
 
 pred_intents = np.argmax(intent_logits, 1)
-nf.logger.info('Intent prediction results')
+nemo.logging.info('Intent prediction results')
 
 intents = np.asarray(intents)
 pred_intents = np.asarray(pred_intents)
 intent_accuracy = sum(intents == pred_intents) / len(pred_intents)
-nf.logger.info(f'Intent accuracy: {intent_accuracy}')
-nf.logger.info(classification_report(intents, pred_intents))
+nemo.logging.info(f'Intent accuracy: {intent_accuracy}')
+nemo.logging.info(classification_report(intents, pred_intents))
 
 
 slot_preds = np.argmax(slot_logits, axis=2)
@@ -121,10 +121,10 @@ def get_preds(logits):
     slot_preds_list.extend(list(slot_preds[i][subtokens_mask[i]]))
     slot_labels_list.extend(list(slot_labels[i][subtokens_mask[i]]))
 
-nf.logger.info('Slot prediction results')
+nemo.logging.info('Slot prediction results')
 slot_labels_list = np.asarray(slot_labels_list)
 slot_preds_list = np.asarray(slot_preds_list)
 slot_accuracy = sum(slot_labels_list == slot_preds_list) / \
     len(slot_labels_list)
-nf.logger.info(f'Slot accuracy: {slot_accuracy}')
-nf.logger.info(classification_report(slot_labels_list, slot_preds_list))
+nemo.logging.info(f'Slot accuracy: {slot_accuracy}')
+nemo.logging.info(classification_report(slot_labels_list, slot_preds_list))
diff --git a/examples/nlp/joint_intent_slot_with_bert.py b/examples/nlp/joint_intent_slot_with_bert.py
index a6ab675cf619..f33d53c123d7 100644
--- a/examples/nlp/joint_intent_slot_with_bert.py
+++ b/examples/nlp/joint_intent_slot_with_bert.py
@@ -115,7 +115,7 @@ def create_pipeline(num_samples=-1,
                     num_gpus=1,
                     local_rank=0,
                     mode='train'):
-    nf.logger.info(f"Loading {mode} data...")
+    nemo.logging.info(f"Loading {mode} data...")
     data_file = f'{data_desc.data_dir}/{mode}.tsv'
     slot_file = f'{data_desc.data_dir}/{mode}_slots.tsv'
     shuffle = args.shuffle_data if mode == 'train' else False
@@ -142,12 +142,12 @@ def create_pipeline(num_samples=-1,
     print(f'The length of data layer is {data_size}')
 
     if data_size < batch_size:
-        nf.logger.warning("Batch_size is larger than the dataset size")
-        nf.logger.warning("Reducing batch_size to dataset size")
+        nemo.logging.warning("Batch_size is larger than the dataset size")
+        nemo.logging.warning("Reducing batch_size to dataset size")
         batch_size = data_size
 
     steps_per_epoch = math.ceil(data_size / (batch_size * num_gpus))
-    nf.logger.info(f"Steps_per_epoch = {steps_per_epoch}")
+    nemo.logging.info(f"Steps_per_epoch = {steps_per_epoch}")
 
     hidden_states = pretrained_bert_model(input_ids=ids,
                                           token_type_ids=type_ids,
diff --git a/examples/nlp/nmt_tutorial.py b/examples/nlp/nmt_tutorial.py
index 0715166777df..4430f8d5e1f0 100644
--- a/examples/nlp/nmt_tutorial.py
+++ b/examples/nlp/nmt_tutorial.py
@@ -104,7 +104,7 @@
     # source and target use different tokenizers, set tie_weight to False
     tie_weight = False
 else:
-    nf.logger.info(
+    nemo.logging.info(
         f"Unsupported language pair:{args.src_lang}-{args.tgt_lang}.")
     exit(1)
 
diff --git a/examples/nlp/punctuation_capitalization.py b/examples/nlp/punctuation_capitalization.py
index 3c431709055a..c12c3ad43911 100644
--- a/examples/nlp/punctuation_capitalization.py
+++ b/examples/nlp/punctuation_capitalization.py
@@ -81,7 +81,7 @@
                                    files_to_copy=[__file__],
                                    add_time_to_log_dir=True)
 
-nf.logger.info(args)
+nemo.logging.info(args)
 
 output_file = f'{nf.work_dir}/output.txt'
 
@@ -112,7 +112,7 @@
             pretrained_model_name=args.pretrained_bert_model)
 
     model.restore_from(args.bert_checkpoint)
-    nf.logger.info(f"Model restored from {args.bert_checkpoint}")
+    nemo.logging.info(f"Model restored from {args.bert_checkpoint}")
 
 
 hidden_size = model.local_parameters["hidden_size"]
@@ -143,7 +143,7 @@ def create_pipeline(num_samples=-1,
     global punct_classifier, punct_loss, \
         capit_classifier, capit_loss, task_loss
 
-    nf.logger.info(f"Loading {mode} data...")
+    nemo.logging.info(f"Loading {mode} data...")
     shuffle = args.shuffle_data if mode == 'train' else False
 
     text_file = f'{args.data_dir}/text_{mode}.txt'
@@ -185,7 +185,7 @@ def create_pipeline(num_samples=-1,
         class_weights = None
 
         if args.use_weighted_loss_punct:
-            nf.logger.info(f"Using weighted loss for punctuation task")
+            nemo.logging.info(f"Using weighted loss for punctuation task")
             punct_label_freqs = data_layer.dataset.punct_label_frequencies
             class_weights = utils.calc_class_weights(punct_label_freqs)
 
@@ -252,7 +252,7 @@ def create_pipeline(num_samples=-1,
                                            punct_label_ids=punct_label_ids,
                                            capit_label_ids=capit_label_ids)
 
-nf.logger.info(f"steps_per_epoch = {steps_per_epoch}")
+nemo.logging.info(f"steps_per_epoch = {steps_per_epoch}")
 
 # Create trainer and execute training action
 train_callback = nemo.core.SimpleLossLoggerCallback(
diff --git a/examples/nlp/punctuation_capitalization_infer.py b/examples/nlp/punctuation_capitalization_infer.py
index 520b54968aff..b9078bd9a1ac 100644
--- a/examples/nlp/punctuation_capitalization_infer.py
+++ b/examples/nlp/punctuation_capitalization_infer.py
@@ -121,7 +121,7 @@ def get_preds(logits):
 capit_preds = np.argmax(capit_logits, axis=2)
 
 for i, query in enumerate(args.queries):
-    nf.logger.info(f'Query: {query}')
+    nemo.logging.info(f'Query: {query}')
 
     punct_pred = punct_preds[i][subtokens_mask[i] > 0.5]
     capit_pred = capit_preds[i][subtokens_mask[i] > 0.5]
@@ -140,4 +140,4 @@ def get_preds(logits):
         if punct_label != args.none_label:
             output += punct_label
         output += ' '
-    nf.logger.info(f'Combined: {output.strip()}\n')
+    nemo.logging.info(f'Combined: {output.strip()}\n')
diff --git a/examples/nlp/sentence_classification_with_bert.py b/examples/nlp/sentence_classification_with_bert.py
index 19bf2303ac41..9f1ac416e8ea 100644
--- a/examples/nlp/sentence_classification_with_bert.py
+++ b/examples/nlp/sentence_classification_with_bert.py
@@ -97,7 +97,7 @@ def create_pipeline(num_samples=-1,
                     num_gpus=1,
                     local_rank=0,
                     mode='train'):
-    nf.logger.info(f"Loading {mode} data...")
+    nemo.logging.info(f"Loading {mode} data...")
     data_file = f'{data_desc.data_dir}/{mode}.tsv'
     shuffle = args.shuffle_data if mode == 'train' else False
 
@@ -115,12 +115,12 @@ def create_pipeline(num_samples=-1,
     data_size = len(data_layer)
 
     if data_size < batch_size:
-        nf.logger.warning("Batch_size is larger than the dataset size")
-        nf.logger.warning("Reducing batch_size to dataset size")
+        nemo.logging.warning("Batch_size is larger than the dataset size")
+        nemo.logging.warning("Reducing batch_size to dataset size")
         batch_size = data_size
 
     steps_per_epoch = math.ceil(data_size / (batch_size * num_gpus))
-    nf.logger.info(f"Steps_per_epoch = {steps_per_epoch}")
+    nemo.logging.info(f"Steps_per_epoch = {steps_per_epoch}")
 
     hidden_states = pretrained_bert_model(input_ids=ids,
                                           token_type_ids=type_ids,
diff --git a/examples/nlp/token_classification.py b/examples/nlp/token_classification.py
index f5138d290214..5309107d7dda 100644
--- a/examples/nlp/token_classification.py
+++ b/examples/nlp/token_classification.py
@@ -81,7 +81,7 @@
                                    files_to_copy=[__file__],
                                    add_time_to_log_dir=True)
 
-nf.logger.info(args)
+nemo.logging.info(args)
 
 output_file = f'{nf.work_dir}/output.txt'
 
@@ -112,7 +112,7 @@
             pretrained_model_name=args.pretrained_bert_model)
 
     model.restore_from(args.bert_checkpoint)
-    nf.logger.info(f"Model restored from {args.bert_checkpoint}")
+    nemo.logging.info(f"Model restored from {args.bert_checkpoint}")
 
 
 hidden_size = model.local_parameters["hidden_size"]
@@ -137,7 +137,7 @@ def create_pipeline(num_samples=-1,
 
     global classifier, task_loss
 
-    nf.logger.info(f"Loading {mode} data...")
+    nemo.logging.info(f"Loading {mode} data...")
     shuffle = args.shuffle_data if mode == 'train' else False
 
     text_file = f'{args.data_dir}/text_{mode}.txt'
@@ -177,11 +177,11 @@ def create_pipeline(num_samples=-1,
         class_weights = None
 
         if args.use_weighted_loss:
-            nf.logger.info(f"Using weighted loss")
+            nemo.logging.info(f"Using weighted loss")
             label_freqs = data_layer.dataset.label_frequencies
             class_weights = utils.calc_class_weights(label_freqs)
 
-            nf.logger.info(f"class_weights: {class_weights}")
+            nemo.logging.info(f"class_weights: {class_weights}")
 
         classifier = getattr(sys.modules[__name__], classifier)
         classifier = classifier(hidden_size=hidden_size,
@@ -214,7 +214,7 @@ def create_pipeline(num_samples=-1,
 eval_tensors, _, _, _, data_layer = create_pipeline(mode='dev',
                                                     label_ids=label_ids)
 
-nf.logger.info(f"steps_per_epoch = {steps_per_epoch}")
+nemo.logging.info(f"steps_per_epoch = {steps_per_epoch}")
 
 # Create trainer and execute training action
 train_callback = nemo.core.SimpleLossLoggerCallback(
diff --git a/examples/nlp/token_classification_infer.py b/examples/nlp/token_classification_infer.py
index c03bed04bbd4..981ab55f0b7b 100644
--- a/examples/nlp/token_classification_infer.py
+++ b/examples/nlp/token_classification_infer.py
@@ -99,7 +99,7 @@ def add_brackets(text, add=args.add_brackets):
 preds = np.argmax(logits, axis=2)
 
 for i, query in enumerate(args.queries):
-    nf.logger.info(f'Query: {query}')
+    nemo.logging.info(f'Query: {query}')
 
     pred = preds[i][subtokens_mask[i] > 0.5]
     words = query.strip().split()
@@ -114,4 +114,4 @@ def add_brackets(text, add=args.add_brackets):
             label = add_brackets(label)
             output += label
         output += ' '
-    nf.logger.info(f'Combined: {output.strip()}')
+    nemo.logging.info(f'Combined: {output.strip()}')
diff --git a/examples/tts/tacotron2.py b/examples/tts/tacotron2.py
index 9fb7005c2592..958102ff5738 100644
--- a/examples/tts/tacotron2.py
+++ b/examples/tts/tacotron2.py
@@ -72,7 +72,7 @@ def parse_args():
     return args, "".join(exp_directory)
 
 
-def create_NMs(tacotron2_params, logger=None, decoder_infer=False):
+def create_NMs(tacotron2_params, decoder_infer=False):
     data_preprocessor = nemo_asr.AudioToMelSpectrogramPreprocessor(
         **tacotron2_params["AudioToMelSpectrogramPreprocessor"])
     text_embedding = nemo_tts.TextEmbedding(
@@ -90,13 +90,12 @@ def create_NMs(tacotron2_params, logger=None, decoder_infer=False):
     t2_loss = nemo_tts.Tacotron2Loss(**tacotron2_params["Tacotron2Loss"])
     makegatetarget = nemo_tts.MakeGate()
 
-    if logger:
-        total_weights = (text_embedding.num_weights + t2_enc.num_weights
-                         + t2_dec.num_weights + t2_postnet.num_weights)
+    total_weights = (text_embedding.num_weights + t2_enc.num_weights
+                     + t2_dec.num_weights + t2_postnet.num_weights)
 
-        logger.info('================================')
-        logger.info(f"Total number of parameters: {total_weights}")
-        logger.info('================================')
+    nemo.logging.info('================================')
+    nemo.logging.info(f"Total number of parameters: {total_weights}")
+    nemo.logging.info('================================')
     return (data_preprocessor, text_embedding, t2_enc, t2_dec, t2_postnet,
             t2_loss, makegatetarget)
 
@@ -130,7 +129,7 @@ def create_train_dag(neural_factory,
 
     N = len(data_layer)
     steps_per_epoch = math.ceil(N / (batch_size * neural_factory.world_size))
-    neural_factory.logger.info(f'Have {N} examples to train on.')
+    nemo.logging.info(f'Have {N} examples to train on.')
 
     # Train DAG
     audio, audio_len, transcript, transcript_len = data_layer()
@@ -162,7 +161,7 @@ def create_train_dag(neural_factory,
     train_callback = nemo.core.SimpleLossLoggerCallback(
         tensors=[loss_t, spec_target, mel_postnet, gate, gate_target,
                  alignments],
-        print_func=lambda x: neural_factory.logger.info(f"Loss: {x[0].data}"),
+        print_func=lambda x: nemo.logging.info(f"Loss: {x[0].data}"),
         log_to_tb_func=partial(
             tacotron2_log_to_tb_func, log_images=True,
             log_images_freq=log_freq),
@@ -241,8 +240,7 @@ def create_eval_dags(neural_factory,
             user_iter_callback=tacotron2_process_eval_batch,
             user_epochs_done_callback=partial(
                 tacotron2_process_final_eval,
-                tag=tagname,
-                logger=neural_factory.logger),
+                tag=tagname),
             tb_writer_func=partial(
                 tacotron2_eval_log_to_tb_func,
                 tag=tagname),
@@ -286,7 +284,7 @@ def create_all_dags(neural_factory,
             eval_freq=eval_freq,
             cpu_per_dl=cpu_per_dl)
     else:
-        neural_factory.logger.info("There were no val datasets passed")
+        nemo.logging.info("There were no val datasets passed")
 
     callbacks = training_callbacks + eval_callbacks
     return training_loss, callbacks, steps_per_epoch
@@ -312,13 +310,13 @@ def main():
         tensorboard_dir=args.tensorboard_dir)
 
     if args.local_rank is not None:
-        neural_factory.logger.info('Doing ALL GPU')
+        nemo.logging.info('Doing ALL GPU')
 
     yaml = YAML(typ="safe")
     with open(args.model_config) as file:
         tacotron2_params = yaml.load(file)
     # instantiate neural modules
-    neural_modules = create_NMs(tacotron2_params, neural_factory.logger)
+    neural_modules = create_NMs(tacotron2_params)
 
     # build dags
     train_loss, callbacks, steps_per_epoch = create_all_dags(
diff --git a/examples/tts/waveglow.py b/examples/tts/waveglow.py
index 1daeb862d2a5..46fe671ee7e1 100644
--- a/examples/tts/waveglow.py
+++ b/examples/tts/waveglow.py
@@ -65,16 +65,15 @@ def parse_args():
     return args, "".join(exp_directory)
 
 
-def create_NMs(waveglow_params, logger=None):
+def create_NMs(waveglow_params):
     data_preprocessor = nemo_asr.AudioToMelSpectrogramPreprocessor(
         **waveglow_params["AudioToMelSpectrogramPreprocessor"])
     waveglow = nemo_tts.WaveGlowNM(**waveglow_params["WaveGlowNM"])
     waveglow_loss = nemo_tts.WaveGlowLoss()
 
-    if logger:
-        logger.info('================================')
-        logger.info(f"Total number of parameters: {waveglow.num_weights}")
-        logger.info('================================')
+    nemo.logging.info('================================')
+    nemo.logging.info(f"Total number of parameters: {waveglow.num_weights}")
+    nemo.logging.info('================================')
     return (data_preprocessor, waveglow, waveglow_loss)
 
 
@@ -101,7 +100,7 @@ def create_train_dag(neural_factory,
 
     N = len(data_layer)
     steps_per_epoch = int(N / (batch_size * neural_factory.world_size))
-    neural_factory.logger.info('Have {0} examples to train on.'.format(N))
+    nemo.logging.info('Have {0} examples to train on.'.format(N))
 
     # Train DAG
     audio, audio_len, = data_layer()
@@ -215,7 +214,7 @@ def create_all_dags(neural_factory,
             eval_freq=eval_freq,
             cpu_per_dl=cpu_per_dl)
     else:
-        neural_factory.logger.info("There were no val datasets passed")
+        nemo.logging.info("There were no val datasets passed")
 
     callbacks = training_callbacks + eval_callbacks
     return training_loss, callbacks, steps_per_epoch
@@ -241,13 +240,13 @@ def main():
         tensorboard_dir=args.tensorboard_dir)
 
     if args.local_rank is not None:
-        neural_factory.logger.info('Doing ALL GPU')
+        nemo.logging.info('Doing ALL GPU')
 
     yaml = YAML(typ="safe")
     with open(args.model_config) as file:
         waveglow_params = yaml.load(file)
     # instantiate neural modules
-    neural_modules = create_NMs(waveglow_params, neural_factory.logger)
+    neural_modules = create_NMs(waveglow_params)
 
     # build dags
     train_loss, callbacks, steps_per_epoch = create_all_dags(
diff --git a/nemo/nemo/backends/pytorch/actions.py b/nemo/nemo/backends/pytorch/actions.py
index 4f424b69142e..e38d6cf901f8 100644
--- a/nemo/nemo/backends/pytorch/actions.py
+++ b/nemo/nemo/backends/pytorch/actions.py
@@ -2,7 +2,6 @@
 import importlib
 import itertools
 import json
-import logging
 import os
 from pathlib import Path
 from typing import List, Optional, Dict
@@ -55,8 +54,7 @@ def __init__(
             local_rank=None,
             global_rank=None,
             tb_writer=None,
-            optimization_level=Optimization.mxprO0,
-            logger=None):
+            optimization_level=Optimization.mxprO0):
         need_apex = local_rank is not None or \
             optimization_level != Optimization.mxprO0
         if need_apex:
@@ -375,14 +373,15 @@ def __setup_optimizer(optimizer_instance,
                     trust_coefficient=optimization_params.get("larc_eta", 2e-2)
                 )
         else:
-            logging.info("Optimizer instance: {0} is provided.")
+            nemo.logging.info("Optimizer instance: {0} is provided.")
             if optimizer_class is not None and optimizer_class != "":
-                logging.warning("Ignoring `optimizer_class` parameter because"
-                                "`optimizer_instance` is provided")
+                nemo.logging.warning(
+                    "Ignoring `optimizer_class` parameter because"
+                    "`optimizer_instance` is provided")
             if optimization_params is not None and optimization_params != {}:
-                logging.warning("Ignoring `optimization_params` parameter for "
-                                "optimizer because `optimizer_instance` "
-                                "is provided")
+                nemo.logging.warning(
+                    "Ignoring `optimization_params` parameter for "
+                    "optimizer because `optimizer_instance` is provided")
             optimizer = optimizer_instance
         return optimizer
 
@@ -1040,8 +1039,7 @@ def __module_export(module,
                         output,
                         d_format: DeploymentFormat,
                         input_example=None,
-                        output_example=None,
-                        logger=None):
+                        output_example=None):
         # Check if output already exists
         destination = Path(output)
         if destination.exists():
@@ -1095,7 +1093,6 @@ def __extract_dynamic_axes(port_name: str, ntype: NeuralType,
         # in the finally block
         type(module).__call__ = torch.nn.Module.__call__
         module._local_parameters = None
-        module._logger = None
         module._placement = None
         module._factory = None
         module._device = None
@@ -1151,12 +1148,8 @@ def __extract_dynamic_axes(port_name: str, ntype: NeuralType,
                 raise NotImplementedError(
                     f"Not supported deployment format: {d_format}")
         except Exception as e:  # nopep8
-            if logger:
-                logger.error(f'ERROR: module export failed for {module} with '
-                             f'exception {e}')
-            else:
-                print(f'ERROR: module export failed for {module} with '
-                      f'exception {e}')
+            nemo.logging.error(f'ERROR: module export failed for {module} '
+                               f'with exception {e}')
         finally:
             def __old_call__(self, force_pt=False, *input, **kwargs):
                 pt_call = len(input) > 0 or force_pt
@@ -1172,8 +1165,7 @@ def deployment_export(module,
                           output: str,
                           d_format: DeploymentFormat,
                           input_example=None,
-                          output_example=None,
-                          logger=None):
+                          output_example=None):
         """Exports Neural Module instance for deployment.
 
         Args:
@@ -1191,8 +1183,7 @@ def deployment_export(module,
                 output=output,
                 d_format=d_format,
                 input_example=input_example,
-                output_example=output_example,
-                logger=logger)
+                output_example=output_example)
 
     def train(self,
               tensors_to_optimize,
@@ -1548,7 +1539,6 @@ def infer(self,
               tensors,
               checkpoint_dir=None,
               ckpt_pattern='',
-              logger=None,
               verbose=True,
               cache=False,
               use_cache=False,
@@ -1586,8 +1576,7 @@ def infer(self,
             )
 
             for mod, checkpoint in zip(modules_to_restore, module_checkpoints):
-                if logger:
-                    logger.info(f"Restoring {mod} from {checkpoint}")
+                nemo.logging.info(f"Restoring {mod} from {checkpoint}")
                 mod.restore_from(checkpoint, self._local_rank)
 
         # Init Amp
diff --git a/nemo/nemo/core/neural_factory.py b/nemo/nemo/core/neural_factory.py
index 1c0a6f409b52..eefe394c7120 100644
--- a/nemo/nemo/core/neural_factory.py
+++ b/nemo/nemo/core/neural_factory.py
@@ -555,7 +555,7 @@ def get_module(self, name, params, collection, pretrained=False):
         """
         if params is not None and "optimization_level" in params:
             if params["optimization_level"] != self._optim_level:
-                self.logger.warning(
+                nemo.logging.warning(
                     "Module's {0} requested optimization level {1} is"
                     "different from the one specified by factory - {2}."
                     "Using: {3} for this module".format(
@@ -649,9 +649,9 @@ def deployment_export(self,
         # Custom hacks: These will be put into a proper place soon
         # We are checking type like this to avoid taking dependency on nemo_asr
         if type(module).__name__ == "JasperEncoder":
-            # self.logger.warning(f"Module is JasperEncoder. We are removing"
-            #                    f"input and output length ports since they "
-            #                    f"are not needed for deployment")
+            # nemo.logging.warning(f"Module is JasperEncoder. We are removing"
+            #                     f"input and output length ports since they "
+            #                     f"are not needed for deployment")
             # del module._input_ports['length']
             # del module._output_ports['encoded_lengths']
 
@@ -661,7 +661,7 @@ def deployment_export(self,
                 if type(m).__name__ == "MaskedConv1d":
                     m.use_mask = False
                     m_count += 1
-            self.logger.warning(f"Turned off {m_count} masked convolutions")
+            nemo.logging.warning(f"Turned off {m_count} masked convolutions")
 
         return self._trainer.deployment_export(
             module=module,
@@ -737,14 +737,14 @@ def _get_trainer(self, tb_writer=None):
             raise ValueError("Only PyTorch backend is currently supported.")
 
     def get_trainer(self, tb_writer=None):
-        self.logger.warning(
+        nemo.logging.warning(
             f"This function is deprecated and will be removed"
             f"in future versions of NeMo."
             f"Please use .train(...), .eval(...), .infer(...) and "
             f".create_optimizer(...) directly methods from "
             f"NeuralModuleFactory instance.")
         if self._trainer:
-            self.logger.warning(
+            nemo.logging.warning(
                 "The trainer instance was created during initialization of "
                 "Neural factory, using the already created instance.")
             return self._trainer
@@ -766,8 +766,8 @@ def sync_all_processes(self, status=True):
                 message on its own and exit
         """
         if self._world_size == 1:
-            self.logger.info("sync_all_processes does nothing if there is "
-                             "one process")
+            nemo.logging.info("sync_all_processes does nothing if there is "
+                              "one process")
             return
         if self._backend == Backend.PyTorch:
             import torch
@@ -775,7 +775,7 @@ def sync_all_processes(self, status=True):
             torch.distributed.all_reduce(
                 status_tensor, op=torch.distributed.ReduceOp.MIN)
             if status_tensor.item() == 0:
-                self.logger.error("At least one process had a failure")
+                nemo.logging.error("At least one process had a failure")
                 if status:
                     raise ValueError(
                         f"Process with global rank {self._global_rank} entered"
diff --git a/nemo/nemo/core/neural_modules.py b/nemo/nemo/core/neural_modules.py
index fbd30809bc03..9ffba4d5d7d6 100644
--- a/nemo/nemo/core/neural_modules.py
+++ b/nemo/nemo/core/neural_modules.py
@@ -8,7 +8,6 @@
 from collections import namedtuple
 from enum import Enum
 from inspect import getargvalues, stack
-import logging
 from typing import Optional, Dict, Set, Tuple, List
 import uuid
 
@@ -66,8 +65,6 @@ def __init__(
             else DeviceType.GPU
         self._opt_level = factory.optim_level if factory is not None\
             else Optimization.mxprO0
-        self._logger = factory.logger if factory is not None\
-            else logging
 
         # Update module properties using overrides if overrides exist
         if placement is not None:
@@ -77,11 +74,11 @@ def __init__(
         self._uuid = str(uuid.uuid4())
 
         # if kwargs:
-        #    self._logger.warning(
+        #    nemo.logging.warning(
         #        "When constructing {}. The base "
         #        "NeuralModule class received the following unused "
         #        "arguments:".format(self.__class__.__name__))
-        #    self._logger.warning("{}".format(kwargs.keys()))
+        #    nemo.logging.warning("{}".format(kwargs.keys()))
 
     @staticmethod
     def create_ports(**kwargs):
diff --git a/nemo/nemo/utils/helpers.py b/nemo/nemo/utils/helpers.py
index 236ef0daeca1..3104f76a8516 100644
--- a/nemo/nemo/utils/helpers.py
+++ b/nemo/nemo/utils/helpers.py
@@ -122,14 +122,13 @@ def get_cuda_device(placement):
 #                                          placement=device)
 
 
-def maybe_download_from_cloud(url, filename, logger=None) -> str:
+def maybe_download_from_cloud(url, filename) -> str:
     """
     Helper function to download pre-trained weights from the cloud
     Args:
         url: (str) URL of storage
         filename: (str) what to download. The request will be issued
         to url/filename or url/filename.tar.gz
-        logger: logger to log stuff
 
     Returns:
         If successful - absolute local path to the directory where
@@ -160,6 +159,5 @@ def maybe_download_from_cloud(url, filename, logger=None) -> str:
         else:
             return ""
     except (FileNotFoundError, ConnectionError, OSError):
-        if logger is not None:
-            logger.info(f"Could not obtain {filename} from the cloud")
+        nemo.logging.info(f"Could not obtain {filename} from the cloud")
         return ""

From e4434a37b2856115623023ccf5996869a474cec4 Mon Sep 17 00:00:00 2001
From: Jason <jasoli@nvidia.com>
Date: Wed, 15 Jan 2020 18:12:54 -0800
Subject: [PATCH 07/10] update docs

Signed-off-by: Jason <jasoli@nvidia.com>
---
 docs/docs_zh/sources/source/asr/tutorial.rst         |  6 ++----
 .../sources/source/nlp/joint_intent_slot_filling.rst |  2 +-
 docs/docs_zh/sources/source/tutorials/callbacks.rst  | 12 ++++--------
 docs/sources/source/asr/tutorial.rst                 |  6 ++----
 .../sources/source/nlp/joint_intent_slot_filling.rst |  8 ++++----
 docs/sources/source/nlp/punctuation.rst              |  4 ++--
 docs/sources/source/tutorials/callbacks.rst          | 12 ++++--------
 7 files changed, 19 insertions(+), 31 deletions(-)

diff --git a/docs/docs_zh/sources/source/asr/tutorial.rst b/docs/docs_zh/sources/source/asr/tutorial.rst
index 91943636d9f3..b32943dbc3c6 100644
--- a/docs/docs_zh/sources/source/asr/tutorial.rst
+++ b/docs/docs_zh/sources/source/asr/tutorial.rst
@@ -86,7 +86,6 @@ Jasper 家族的模型的结构可以这样表示 Jasper_[BxR] 其中 B 是块
         log_dir='jasper12x1SEP',
         create_tb_writer=True)
     tb_writer = nf.tb_writer
-    logger = nf.logger
 
     # 到训练列表文件的路径
     train_dataset = "<path_to_where_you_put_data>/train_clean_100.json"
@@ -167,8 +166,7 @@ Jasper 家族的模型的结构可以这样表示 Jasper_[BxR] 其中 B 是块
         # 为了能把日志打印到屏幕，定义一个 print_func 函数
         print_func=partial(
             monitor_asr_train_progress,
-            labels=labels,
-            logger=logger
+            labels=labels
         ))
 
     saver_callback = nemo.core.CheckpointCallback(
@@ -188,7 +186,7 @@ Jasper 家族的模型的结构可以这样表示 Jasper_[BxR] 其中 B 是块
             ),
         # 如何把每个 batch 的验证集统计指标（比如WER）合并起来
         user_epochs_done_callback=partial(
-            process_evaluation_epoch, tag="DEV-CLEAN", logger=logger
+            process_evaluation_epoch, tag="DEV-CLEAN"
             ),
         eval_step=500,
         tb_writer=tb_writer)
diff --git a/docs/docs_zh/sources/source/nlp/joint_intent_slot_filling.rst b/docs/docs_zh/sources/source/nlp/joint_intent_slot_filling.rst
index 8041190e90da..905b2a0400a0 100644
--- a/docs/docs_zh/sources/source/nlp/joint_intent_slot_filling.rst
+++ b/docs/docs_zh/sources/source/nlp/joint_intent_slot_filling.rst
@@ -66,7 +66,7 @@
     .. code-block:: python
 
         def get_dataset(data_desc, mode, num_samples):
-            nf.logger.info(f"Loading {mode} data...")
+            nemo.logging.info(f"Loading {mode} data...")
             data_file = getattr(data_desc, mode + '_file')
             slot_file = getattr(data_desc, mode + '_slot_file')
             shuffle = args.shuffle_data if mode == 'train' else False
diff --git a/docs/docs_zh/sources/source/tutorials/callbacks.rst b/docs/docs_zh/sources/source/tutorials/callbacks.rst
index 18728b575ee9..ff935f8e18b9 100644
--- a/docs/docs_zh/sources/source/tutorials/callbacks.rst
+++ b/docs/docs_zh/sources/source/tutorials/callbacks.rst
@@ -24,16 +24,13 @@ SimpleLossLoggerCallback 是用来记录训练过程中的一些指标数据比
 get_tb_values() 和 log_to_tb_func() 函数的输入。两个推荐重写的参数是 print_func() 和
 get_tb_values() 或者 log_to_tb_func() 任选其一。
 
-print_func() 应该用来记录打印到屏幕上的值。我们推荐使用 neural_factory.logger.info()
+print_func() 应该用来记录打印到屏幕上的值。我们推荐使用 nemo.logging.info()
 来取代 print() 函数。比如，可以这么打印 loss 值：
 
 .. code-block:: python
 
-    def my_print_func(tensors, logger=None):
-        if logger:
-            logger.info(f"Loss {tensors[0]}")
-        else:
-            print(f"Loss {tensors[0]}")
+    def my_print_func(tensors):
+        nemo.logging.info(f"Loss {tensors[0]}")
 
 我们提供了两个方法来打印到 tensorboard: get_tb_values() 和
 log_to_tb_func()。对于记录标量的简单用例，我们推荐使用 get_tb_values()。
@@ -69,8 +66,7 @@ SimpleLossLoggerCallback可以像下面这样创建:
         # 定义我们想要传给print_func和get_tb_values的张量
         tensors=[train_loss],
         # 传入我们想要用的打印函数
-        # 注意我们用 partial 来指定可以额外传入的参数
-        print_func=partial(my_print_func, logger=neural_factory.logger),
+        print_func=partial(my_print_func),
         # 传入可以返回tensorboard标签和张量的函数
         get_tb_values=my_get_tb_values,
         # 我们想要回调这个函数的频次
diff --git a/docs/sources/source/asr/tutorial.rst b/docs/sources/source/asr/tutorial.rst
index 025868ada51b..dc04e17a6920 100644
--- a/docs/sources/source/asr/tutorial.rst
+++ b/docs/sources/source/asr/tutorial.rst
@@ -94,7 +94,6 @@ The script below does both training (on `train_clean_100.json`) and evaluation (
         log_dir='jasper12x1SEP',
         create_tb_writer=True)
     tb_writer = nf.tb_writer
-    logger = nf.logger
 
     # Path to our training manifest
     train_dataset = "<path_to_where_you_put_data>/train_clean_100.json"
@@ -177,8 +176,7 @@ The script below does both training (on `train_clean_100.json`) and evaluation (
         # To print logs to screen, define a print_func
         print_func=partial(
             monitor_asr_train_progress,
-            labels=labels,
-            logger=logger
+            labels=labels
         ))
 
     saver_callback = nemo.core.CheckpointCallback(
@@ -199,7 +197,7 @@ The script below does both training (on `train_clean_100.json`) and evaluation (
             ),
         # how to aggregate statistics (e.g. WER) for the evaluation epoch
         user_epochs_done_callback=partial(
-            process_evaluation_epoch, tag="DEV-CLEAN", logger=logger
+            process_evaluation_epoch, tag="DEV-CLEAN"
             ),
         eval_step=500,
         tb_writer=tb_writer)
diff --git a/docs/sources/source/nlp/joint_intent_slot_filling.rst b/docs/sources/source/nlp/joint_intent_slot_filling.rst
index 89a3437f26d8..7d11f8e7b364 100644
--- a/docs/sources/source/nlp/joint_intent_slot_filling.rst
+++ b/docs/sources/source/nlp/joint_intent_slot_filling.rst
@@ -109,7 +109,7 @@ Next, we define all Neural Modules participating in our joint intent slot fillin
                             num_gpus=1,
                             local_rank=0,
                             mode='train'):
-            nf.logger.info(f"Loading {mode} data...")
+            nemo.logging.info(f"Loading {mode} data...")
             data_file = f'{data_desc.data_dir}/{mode}.tsv'
             slot_file = f'{data_desc.data_dir}/{mode}_slots.tsv'
             shuffle = args.shuffle_data if mode == 'train' else False
@@ -136,12 +136,12 @@ Next, we define all Neural Modules participating in our joint intent slot fillin
             print(f'The length of data layer is {data_size}')
 
             if data_size < batch_size:
-                nf.logger.warning("Batch_size is larger than the dataset size")
-                nf.logger.warning("Reducing batch_size to dataset size")
+                nemo.logging.warning("Batch_size is larger than the dataset size")
+                nemo.logging.warning("Reducing batch_size to dataset size")
                 batch_size = data_size
 
             steps_per_epoch = math.ceil(data_size / (batch_size * num_gpus))
-            nf.logger.info(f"Steps_per_epoch = {steps_per_epoch}")
+            nemo.logging.info(f"Steps_per_epoch = {steps_per_epoch}")
 
             hidden_states = pretrained_bert_model(input_ids=ids,
                                                   token_type_ids=type_ids,
diff --git a/docs/sources/source/nlp/punctuation.rst b/docs/sources/source/nlp/punctuation.rst
index f5063767cf1b..36833ac898e2 100644
--- a/docs/sources/source/nlp/punctuation.rst
+++ b/docs/sources/source/nlp/punctuation.rst
@@ -293,7 +293,7 @@ Run inference, append punctuation and capitalize words based on the generated pr
     capit_preds = np.argmax(capit_logits, axis=2)
 
     for i, query in enumerate(queries):
-        nf.logger.info(f'Query: {query}')
+        nemo.logging.info(f'Query: {query}')
 
         punct_pred = punct_preds[i][subtokens_mask[i] > 0.5]
         capit_pred = capit_preds[i][subtokens_mask[i] > 0.5]
@@ -312,7 +312,7 @@ Run inference, append punctuation and capitalize words based on the generated pr
             if punct_label != 'O':
                 output += punct_label
             output += ' '
-        nf.logger.info(f'Combined: {output.strip()}\n')
+        nemo.logging.info(f'Combined: {output.strip()}\n')
 
 Inference results:
     
diff --git a/docs/sources/source/tutorials/callbacks.rst b/docs/sources/source/tutorials/callbacks.rst
index 1906442079ef..0a9ee1941060 100644
--- a/docs/sources/source/tutorials/callbacks.rst
+++ b/docs/sources/source/tutorials/callbacks.rst
@@ -28,16 +28,13 @@ training. The two reccomended arguments to override are print_func(), and
 either get_tb_values() or log_to_tb_func().
 
 print_func() should be used to log values to screen. We recommend using
-neural_factory.logger.info() in place
+nemo.logging.info() in place
 of print(). For example, it can be used to print the loss value:
 
 .. code-block:: python
 
-    def my_print_func(tensors, logger=None):
-        if logger:
-            logger.info(f"Loss {tensors[0]}")
-        else:
-            print(f"Loss {tensors[0]}")
+    def my_print_func(tensors):
+        nemo.logging.info(f"Loss {tensors[0]}")
 
 We provide two methods to log to tensorboard: get_tb_values() and
 log_to_tb_func(). For simple use case of logging scalars, we recommend
@@ -76,8 +73,7 @@ SimpleLossLoggerCallback can be constructed as follows:
         # Define tensors that we want to pass to print_func, and get_tb_values
         tensors=[train_loss],
         # Pass the print function that we want to use
-        # Note we use partial to specify additional parameters
-        print_func=partial(my_print_func, logger=neural_factory.logger),
+        print_func=my_print_func,
         # Pass the function that returns tensorboard tags and scalars
         get_tb_values=my_get_tb_values,
         # How often we want to call this callback

From 5bac6004c22ba94ed497991d3deed72fbe14cfd2 Mon Sep 17 00:00:00 2001
From: Jason <jasoli@nvidia.com>
Date: Wed, 15 Jan 2020 18:15:00 -0800
Subject: [PATCH 08/10] pep8

Signed-off-by: Jason <jasoli@nvidia.com>
---
 examples/asr/jasper_an4.py  | 4 ++--
 examples/asr/jasper_eval.py | 7 ++++---
 2 files changed, 6 insertions(+), 5 deletions(-)

diff --git a/examples/asr/jasper_an4.py b/examples/asr/jasper_an4.py
index d01151a1e1cb..5a3bdd7ed976 100644
--- a/examples/asr/jasper_an4.py
+++ b/examples/asr/jasper_an4.py
@@ -205,8 +205,8 @@ def main():
         nemo.logging.info("Testing greedy and beam search with LM WER.")
         # Create BeamSearch NM
         if nf.world_size > 1:
-            nemo.logging.warning("Skipping beam search WER as it does not work "
-                              "if doing distributed training.")
+            nemo.logging.warning("Skipping beam search WER as it does not "
+                                 "work if doing distributed training.")
         else:
             beam_search_with_lm = nemo_asr.BeamSearchDecoderWithLM(
                 vocab=vocab,
diff --git a/examples/asr/jasper_eval.py b/examples/asr/jasper_eval.py
index 69f47586bf3b..fefd92f0ba17 100644
--- a/examples/asr/jasper_eval.py
+++ b/examples/asr/jasper_eval.py
@@ -161,7 +161,8 @@ def main():
         for alpha in np.arange(args.alpha, args.alpha_max, args.alpha_step):
             for beta in np.arange(args.beta, args.beta_max, args.beta_step):
                 nemo.logging.info('================================')
-                nemo.logging.info(f'Infering with (alpha, beta): ({alpha}, {beta})')
+                nemo.logging.info(
+                    f'Infering with (alpha, beta): ({alpha}, {beta})')
                 beam_search_with_lm = nemo_asr.BeamSearchDecoderWithLM(
                     vocab=vocab,
                     beam_width=args.beam_width,
@@ -196,8 +197,8 @@ def main():
         nemo.logging.info('================================')
         best_beam_wer = min(beam_wers, key=lambda x: x[1])
         nemo.logging.info('Best (alpha, beta): '
-                    f'{best_beam_wer[0]}, '
-                    f'WER: {best_beam_wer[1]:.2f}%')
+                          f'{best_beam_wer[0]}, '
+                          f'WER: {best_beam_wer[1]:.2f}%')
 
     if args.save_logprob:
         # Convert logits to list of numpy arrays

From 9f83e1afb1202c0d5c14a2b8f812288ce0034411 Mon Sep 17 00:00:00 2001
From: Jason <jasoli@nvidia.com>
Date: Wed, 15 Jan 2020 18:29:07 -0800
Subject: [PATCH 09/10] bug fix

Signed-off-by: Jason <jasoli@nvidia.com>
---
 collections/nemo_nlp/nemo_nlp/utils/metrics/sacrebleu.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/collections/nemo_nlp/nemo_nlp/utils/metrics/sacrebleu.py b/collections/nemo_nlp/nemo_nlp/utils/metrics/sacrebleu.py
index 28e1218c8705..88735750237e 100755
--- a/collections/nemo_nlp/nemo_nlp/utils/metrics/sacrebleu.py
+++ b/collections/nemo_nlp/nemo_nlp/utils/metrics/sacrebleu.py
@@ -1316,7 +1316,6 @@ class UnicodeRegex:
 
     without depending on https://pypi.python.org/pypi/regex/."""
 
-    @staticmethod
     def _property_chars(prefix):
         return ''.join(
             chr(x) for x in range(sys.maxunicode)

From 0200c9e7222a4ded6d4ac716bd85078076bdaeed Mon Sep 17 00:00:00 2001
From: Jason <jasoli@nvidia.com>
Date: Tue, 21 Jan 2020 10:30:59 -0800
Subject: [PATCH 10/10] address PR comments

Signed-off-by: Jason <jasoli@nvidia.com>
---
 .../data/datasets/token_classification.py     | 28 +++---
 .../nemo_nlp/nemo_nlp/data/datasets/utils.py  |  4 +-
 .../callbacks/sentence_classification.py      |  3 +-
 examples/asr/jasper.py                        |  2 +-
 examples/asr/jasper_aishell.py                |  2 +-
 examples/asr/quartznet.py                     |  2 +-
 examples/nlp/nmt_tutorial.py                  |  3 +-
 nemo/nemo/backends/pytorch/nm.py              | 88 +++++++++----------
 8 files changed, 66 insertions(+), 66 deletions(-)

diff --git a/collections/nemo_nlp/nemo_nlp/data/datasets/token_classification.py b/collections/nemo_nlp/nemo_nlp/data/datasets/token_classification.py
index 3a7dd7974da6..de3688b4b5cf 100644
--- a/collections/nemo_nlp/nemo_nlp/data/datasets/token_classification.py
+++ b/collections/nemo_nlp/nemo_nlp/data/datasets/token_classification.py
@@ -140,23 +140,23 @@ def get_features(queries,
 
         all_segment_ids.append([0] * max_seq_length)
 
-    nemo.logging.info(f'{too_long_count} are longer than {max_seq_length}')
+    nemo.logging.warning(f'{too_long_count} are longer than {max_seq_length}')
 
     for i in range(min(len(all_input_ids), 5)):
-        nemo.logging.info("*** Example ***")
-        nemo.logging.info("i: %s" % (i))
-        nemo.logging.info(
-            "subtokens: %s" % " ".join(list(map(str, all_subtokens[i]))))
-        nemo.logging.info(
-            "loss_mask: %s" % " ".join(list(map(str, all_loss_mask[i]))))
-        nemo.logging.info(
-            "input_mask: %s" % " ".join(list(map(str, all_input_mask[i]))))
-        nemo.logging.info(
-            "subtokens_mask: %s" % " ".join(list(map(
+        nemo.logging.debug("*** Example ***")
+        nemo.logging.debug("i: %s", i)
+        nemo.logging.debug(
+            "subtokens: %s", " ".join(list(map(str, all_subtokens[i]))))
+        nemo.logging.debug(
+            "loss_mask: %s", " ".join(list(map(str, all_loss_mask[i]))))
+        nemo.logging.debug(
+            "input_mask: %s", " ".join(list(map(str, all_input_mask[i]))))
+        nemo.logging.debug(
+            "subtokens_mask: %s", " ".join(list(map(
                 str, all_subtokens_mask[i]))))
         if with_label:
-            nemo.logging.info(
-                "labels: %s" % " ".join(list(map(str, all_labels[i]))))
+            nemo.logging.debug(
+                "labels: %s", " ".join(list(map(str, all_labels[i]))))
     return (all_input_ids,
             all_segment_ids,
             all_input_mask,
@@ -267,7 +267,7 @@ def __init__(self,
             # for dev/test sets use label mapping from training set
             if label_ids:
                 if len(label_ids) != len(unique_labels):
-                    nemo.logging.info(
+                    nemo.logging.warning(
                         f'Not all labels from the specified' +
                         ' label_ids dictionary are present in the' +
                         ' current dataset. Using the provided' +
diff --git a/collections/nemo_nlp/nemo_nlp/data/datasets/utils.py b/collections/nemo_nlp/nemo_nlp/data/datasets/utils.py
index c1cf244343e8..aed728ffd9b9 100644
--- a/collections/nemo_nlp/nemo_nlp/data/datasets/utils.py
+++ b/collections/nemo_nlp/nemo_nlp/data/datasets/utils.py
@@ -1288,7 +1288,7 @@ def __init__(self, dataset_name, data_dir, do_lower_case):
             self.vocab_size = create_vocab_lm(data_dir, do_lower_case)
             self.data_dir = data_dir
         else:
-            nemo.logging.info(
+            nemo.logging.warning(
                 "Looks like you passed a dataset name that isn't "
                 "already supported by NeMo. Please make sure that "
                 "you build the preprocessing method for it.")
@@ -1368,7 +1368,7 @@ def __init__(self,
                 special_tokens,
                 train_file)
         else:
-            nemo.logging.info(
+            nemo.logging.warning(
                 "Looks like you passed a dataset name that isn't "
                 "already supported by NeMo. Please make sure that "
                 "you build the preprocessing method for it.")
diff --git a/collections/nemo_nlp/nemo_nlp/utils/callbacks/sentence_classification.py b/collections/nemo_nlp/nemo_nlp/utils/callbacks/sentence_classification.py
index 959fb8f6fd99..feb73faa004c 100644
--- a/collections/nemo_nlp/nemo_nlp/utils/callbacks/sentence_classification.py
+++ b/collections/nemo_nlp/nemo_nlp/utils/callbacks/sentence_classification.py
@@ -5,11 +5,12 @@
 import random
 import time
 
-import matplotlib
 from matplotlib import pyplot as plt  # nopep8
 import numpy as np  # nopep8
 from sklearn.metrics import confusion_matrix, classification_report  # nopep8
 
+import nemo
+
 __all__ = ['eval_iter_callback', 'eval_epochs_done_callback']
 
 
diff --git a/examples/asr/jasper.py b/examples/asr/jasper.py
index 8c00db9b279a..7b32349e8db8 100644
--- a/examples/asr/jasper.py
+++ b/examples/asr/jasper.py
@@ -138,7 +138,7 @@ def create_all_dags(args, neural_factory):
 
             data_layers_eval.append(data_layer_eval)
     else:
-        neural_factory.nemo.logging.info("There were no val datasets passed")
+        nemo.logging.warning("There were no val datasets passed")
 
     jasper_encoder = nemo_asr.JasperEncoder(
         feat_in=jasper_params["AudioToMelSpectrogramPreprocessor"]["features"],
diff --git a/examples/asr/jasper_aishell.py b/examples/asr/jasper_aishell.py
index fec8ea185640..1f45d4b52a83 100644
--- a/examples/asr/jasper_aishell.py
+++ b/examples/asr/jasper_aishell.py
@@ -139,7 +139,7 @@ def create_all_dags(args, neural_factory):
 
             data_layers_eval.append(data_layer_eval)
     else:
-        nemo.logging.info("There were no val datasets passed")
+        nemo.logging.warning("There were no val datasets passed")
 
     jasper_encoder = nemo_asr.JasperEncoder(
         feat_in=jasper_params["AudioToMelSpectrogramPreprocessor"]["features"],
diff --git a/examples/asr/quartznet.py b/examples/asr/quartznet.py
index 5ab384245202..e1f77d415a81 100644
--- a/examples/asr/quartznet.py
+++ b/examples/asr/quartznet.py
@@ -123,7 +123,7 @@ def create_all_dags(args, neural_factory):
 
             data_layers_eval.append(data_layer_eval)
     else:
-        nemo.logging.info("There were no val datasets passed")
+        nemo.logging.warning("There were no val datasets passed")
 
     # create shared modules
 
diff --git a/examples/nlp/nmt_tutorial.py b/examples/nlp/nmt_tutorial.py
index 4430f8d5e1f0..c0bb62587518 100644
--- a/examples/nlp/nmt_tutorial.py
+++ b/examples/nlp/nmt_tutorial.py
@@ -104,9 +104,8 @@
     # source and target use different tokenizers, set tie_weight to False
     tie_weight = False
 else:
-    nemo.logging.info(
+    raise ValueError(
         f"Unsupported language pair:{args.src_lang}-{args.tgt_lang}.")
-    exit(1)
 
 # instantiate necessary modules for the whole translation pipeline, namely
 # data layers, encoder, decoder, output log_softmax, beam_search_translator
diff --git a/nemo/nemo/backends/pytorch/nm.py b/nemo/nemo/backends/pytorch/nm.py
index b2b2693321b1..2266bde78506 100644
--- a/nemo/nemo/backends/pytorch/nm.py
+++ b/nemo/nemo/backends/pytorch/nm.py
@@ -209,32 +209,32 @@ def input_ports(self):
         return {}
 
     def get_weights(self):
-        nemo.logging.warning(
-            "Data Layer does not have any weights to return. "
-            "This get_weights call returns None."
-        )
+        # nemo.logging.warning(
+        #     "Data Layer does not have any weights to return. "
+        #     "This get_weights call returns None."
+        # )
         return None
 
     def set_weights(self, name2weight: Dict[(str, bool)],
                     name2name_and_transform):
-        nemo.logging.warning(
-            "Data Layer does not have any weights to set. "
-            "This set_weights call is ignored."
-        )
+        # nemo.logging.warning(
+        #     "Data Layer does not have any weights to set. "
+        #     "This set_weights call is ignored."
+        # )
         return None
 
     def tie_weights_with(self, module, weight_names):
-        nemo.logging.warning(
-            "Data Layer does not have any weights to tie. "
-            "This tie_weights_with call is ignored."
-        )
+        # nemo.logging.warning(
+        #     "Data Layer does not have any weights to tie. "
+        #     "This tie_weights_with call is ignored."
+        # )
         return None
 
     def save_to(self, path):
-        nemo.logging.warning(
-            "Data Layer does not have any state to save. "
-            "This save_to call is ignored."
-        )
+        # nemo.logging.warning(
+        #     "Data Layer does not have any state to save. "
+        #     "This save_to call is ignored."
+        # )
         return None
 
     def restore_from(self, path):
@@ -244,17 +244,17 @@ def restore_from(self, path):
         return None
 
     def freeze(self, weights: Set[str] = None):
-        nemo.logging.warning(
-            "Data Layer does not have any weights to freeze. "
-            "This freeze call is ignored."
-        )
+        # nemo.logging.warning(
+        #     "Data Layer does not have any weights to freeze. "
+        #     "This freeze call is ignored."
+        # )
         return None
 
     def unfreeze(self, weights: Set[str] = None):
-        nemo.logging.warning(
-            "Data Layer does not have any weights to unfreeze. "
-            "This unfreeze call is ignored."
-        )
+        # nemo.logging.warning(
+        #     "Data Layer does not have any weights to unfreeze. "
+        #     "This unfreeze call is ignored."
+        # )
         return None
 
     @property
@@ -302,24 +302,24 @@ def get_weights(self):
 
     def set_weights(self, name2weight: Dict[(str, bool)],
                     name2name_and_transform):
-        nemo.logging.warning(
-            "Loss function module does not have any weights to set. "
-            "This set_weights call is ignored."
-        )
+        # nemo.logging.warning(
+        #     "Loss function module does not have any weights to set. "
+        #     "This set_weights call is ignored."
+        # )
         return None
 
     def tie_weights_with(self, module, weight_names):
-        nemo.logging.warning(
-            "Loss function module does not have any weights to tie. "
-            "This tie_weights_with call is ignored."
-        )
+        # nemo.logging.warning(
+        #     "Loss function module does not have any weights to tie. "
+        #     "This tie_weights_with call is ignored."
+        # )
         return None
 
     def save_to(self, path):
-        nemo.logging.warning(
-            "Loss function module does not have any state to save. "
-            "This save_to call is ignored."
-        )
+        # nemo.logging.warning(
+        #     "Loss function module does not have any state to save. "
+        #     "This save_to call is ignored."
+        # )
         return None
 
     def restore_from(self, path):
@@ -330,17 +330,17 @@ def restore_from(self, path):
         return None
 
     def freeze(self, weights: Set[str] = None):
-        nemo.logging.warning(
-            "Loss function module does not have any weights to freeze. "
-            "This freeze call is ignored."
-        )
+        # nemo.logging.warning(
+        #     "Loss function module does not have any weights to freeze. "
+        #     "This freeze call is ignored."
+        # )
         return None
 
     def unfreeze(self, weights: Set[str] = None):
-        nemo.logging.warning(
-            "Loss function module does not have any weights to "
-            "unfreeze. This unfreeze call is ignored."
-        )
+        # nemo.logging.warning(
+        #     "Loss function module does not have any weights to "
+        #     "unfreeze. This unfreeze call is ignored."
+        # )
         return None
 
     @property