NVIDIA · tkornuta-nvidia · Feb 3, 2020 · Jan 29, 2020 · Jan 29, 2020 · Jan 29, 2020
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
@@ -48,6 +48,6 @@ There are several tools to automatically format your code to be PEP 8 compliant,
 
 ## Nemo style
 1. If you import a module from the same collection, use relative path instead of absolute path. For example, inside ``nemo_nlp``, use ``.utils`` instead of ``nemo_nelp.utils``.
-1. Before accessing something, always make sure that it exists. E.g. right now, in ``actions.py``, there's this line of code ``batch_size=dl_nm.local_parameters["batch_size"]`` but nowhere in the codebase we check that ``batch_size`` is passed into datalayer.
+1. Before accessing something, always make sure that it exists.
 1. Right inheritance. For example, if a module doesn't have any trainable weights, don't inherit from TrainableNM.
 1. Naming consistency, both within NeMo and between NeMo and external literature. E.g. use the name ``logits`` for ``log_probs``, ``hidden_size`` for ``d_model``.
diff --git a/docs/docs_zh/sources/source/nlp/ner.rst b/docs/docs_zh/sources/source/nlp/ner.rst
@@ -90,7 +90,7 @@ text.txt 每一行包含文本序列，其中词以空格来进行分隔。label
         label_ids = train_data_layer.dataset.label_ids
         num_classes = len(label_ids)
 
-        hidden_size = bert_model.local_parameters["hidden_size"]
+        hidden_size = bert_model.hidden_size
         ner_classifier = nemo_nlp.TokenClassifier(hidden_size=hidden_size,
                                               num_classes=num_classes,
                                               dropout=CLASSIFICATION_DROPOUT)
@@ -217,8 +217,8 @@ text.txt 每一行包含文本序列，其中词以空格来进行分隔。label
 
     tokenizer = NemoBertTokenizer(pretrained_model="scibert_scivocab_cased")
     bert_model = nemo_nlp.huggingface.BERT(
-        pretrained_model_name="scibert_scivocab_cased",
-        factory=neural_factory)
+        pretrained_model_name="scibert_scivocab_cased"
+    )
 
 如果你想使用 TensorFlow 训练好的模型，例如 BioBERT ，你需要首先使用 Hugging Face 提供的 `model conversion script`_ 进行模型转换，再在 NeMo 中使用这个模型。
 

diff --git a/docs/docs_zh/sources/source/tutorials/custommodules.rst b/docs/docs_zh/sources/source/tutorials/custommodules.rst
@@ -48,8 +48,8 @@
 
 .. code-block:: python
 
-    def __init__(self, *, module_params, ..., **kwargs)
-        super().__init__(**kwargs)
+    def __init__(self, module_params, ...)
+        super().__init__()
 
 (4) 实现 ``torch.nn.Module`` 模块里的 ``forward`` 方法 
 
@@ -76,11 +76,11 @@
                 0: AxisType(BatchTag),
                 1: AxisType(ChannelTag)})}
 
-        def __init__(self, **kwargs):
+        def __init__(self, dim):
             # (3) 调用基类构造函数
-            TrainableNM.__init__(self, **kwargs)
+            TrainableNM.__init__(self)
             # Neural Modules 的特定部分，剩下的是 PyTorch 代码
-            self._dim = self.local_parameters["dim"]
+            self._dim = dim
             self.fc1 = nn.Linear(self._dim, 1)
             t.nn.init.xavier_uniform_(self.fc1.weight)
             self._device = t.device(
@@ -115,8 +115,8 @@
         def output_ports(self):
             return {...}
 
-        def __init__(self, *, module_params, .., **kwargs)
-            TrainableNM.__init__(self, **kwargs)
+        def __init__(self, module_params, ...)
+            TrainableNM.__init__(self)
 
 (4) 修改 ``forward`` 方法，使得它的输入参数和你的输入端口名字匹配。
 
@@ -162,11 +162,11 @@
                 "label": NeuralType({0: AxisType(BatchTag)}),
             }
 
-        def __init__(self, **kwargs):
-            DataLayerNM.__init__(self, **kwargs)
+        def __init__(self, input_size, path):
+            DataLayerNM.__init__(self)
 
-            self._input_size = kwargs["input_size"]
-            self._path = kwargs["path"]
+            self._input_size = input_size"
+            self._path = path
 
             self._transforms = transforms.Compose([
                 transforms.RandomResizedCrop(self._input_size),
@@ -216,15 +216,13 @@ Example
         def output_ports(self):
             return {"loss": NeuralType(None)}
 
-        def __init__(self, **kwargs):
+        def __init__(self):
             # 神经模块 API
-            super().__init__(**kwargs)
+            super().__init__()
 
             # 结束神经模块 API
             self._criterion = torch.nn.CrossEntropyLoss()
 
         # 你需要实现这个方法
         def _loss_function(self, **kwargs):
             return self._criterion(*(kwargs.values()))
-
-
diff --git a/docs/sources/source/nlp/joint_intent_slot_filling.rst b/docs/sources/source/nlp/joint_intent_slot_filling.rst
@@ -59,7 +59,7 @@ This will tokenize text following the mapping of the original BERT model.
     .. code-block:: python
 
         from transformers import BertTokenizer
-        hidden_size = pretrained_bert_model.local_parameters["hidden_size"]
+        hidden_size = pretrained_bert_model.hidden_size
         tokenizer = BertTokenizer.from_pretrained(args.pretrained_bert_model)
 
 Next, we define all Neural Modules participating in our joint intent slot filling classification pipeline.
@@ -79,7 +79,8 @@ Next, we define all Neural Modules participating in our joint intent slot fillin
     .. code-block:: python
 
         pretrained_bert_model = nemo_nlp.huggingface.BERT(
-        pretrained_model_name=args.pretrained_bert_model, factory=nf)
+            pretrained_model_name=args.pretrained_bert_model
+        )
         hidden_states = pretrained_bert_model(input_ids=ids,
                                               token_type_ids=type_ids,
                                               attention_mask=input_mask)
@@ -256,4 +257,4 @@ References
 .. bibliography:: nlp_all.bib
     :style: plain
     :labelprefix: NLP-SLOT
-    :keyprefix: nlp-slot-
+    :keyprefix: nlp-slot-
diff --git a/docs/sources/source/nlp/ner.rst b/docs/sources/source/nlp/ner.rst
@@ -94,7 +94,7 @@ We need to create the classifier to sit on top of the pretrained model and defin
 
     .. code-block:: python
 
-        hidden_size = bert_model.local_parameters["hidden_size"]
+        hidden_size = bert_model.hidden_size
         ner_classifier = nemo_nlp.TokenClassifier(hidden_size=hidden_size,
                                               num_classes=num_classes,
                                               dropout=CLASSIFICATION_DROPOUT)

diff --git a/docs/sources/source/nlp/punctuation.rst b/docs/sources/source/nlp/punctuation.rst
@@ -116,7 +116,7 @@ Now, create the train and evaluation data layers:
         punct_label_ids = train_data_layer.dataset.punct_label_ids
         capit_label_ids = train_data_layer.dataset.capit_label_ids
 
-        hidden_size = bert_model.local_parameters["hidden_size"]
+        hidden_size = bert_model.hidden_size
 
         # Note that you need to specify punct_label_ids and capit_label_ids  - mapping form labels
         # to label_ids generated during creation of the train_data_layer to make sure that

diff --git a/docs/sources/source/nlp/question_answering.rst b/docs/sources/source/nlp/question_answering.rst
@@ -61,7 +61,7 @@ This will tokenize text following the mapping of the original BERT model.
     .. code-block:: python
 
         from nemo.collections.nlp import NemoBertTokenizer
-        hidden_size = pretrained_bert_model.local_parameters["hidden_size"]
+        hidden_size = pretrained_bert_model.hidden_size
         tokenizer = NemoBertTokenizer(args.pretrained_bert_model)
 
 Next, we define all Neural Modules participating in our question answering classification pipeline.

diff --git a/docs/sources/source/tutorials/custommodules.rst b/docs/sources/source/tutorials/custommodules.rst
@@ -48,8 +48,8 @@ Defining a module from scratch
 
 .. code-block:: python
 
-    def __init__(self, *, module_params, ..., **kwargs)
-        super().__init__(**kwargs)
+    def __init__(self, module_params, ...)
+        super().__init__()
 
 (4) Implement ``forward`` method from ``torch.nn.Module``
 
@@ -76,11 +76,11 @@ Example 1
                 0: AxisType(BatchTag),
                 1: AxisType(ChannelTag)})}
 
-        def __init__(self, **kwargs):
+        def __init__(self, dim):
             # (3) Call base constructor
-            TrainableNM.__init__(self, **kwargs)
+            TrainableNM.__init__(self)
             # And of Neural Modules specific part. Rest is PyTorch code
-            self._dim = self.local_parameters["dim"]
+            self._dim = dim
             self.fc1 = nn.Linear(self._dim, 1)
             t.nn.init.xavier_uniform_(self.fc1.weight)
             self._device = t.device(
@@ -116,8 +116,8 @@ Converting from PyTorch's nn.Module
         def output_ports(self):
             return {...}
 
-        def __init__(self, *, module_params, .., **kwargs)
-            TrainableNM.__init__(self, **kwargs)
+        def __init__(self, module_params, ...)
+            TrainableNM.__init__(self)
 
 (4) Modify ``forward`` method so that its input arguments match your input port names exactly.
 
@@ -167,11 +167,11 @@ This example wraps PyTorch's *ImageFolder* dataset into a neural module data lay
                 "label": NeuralType({0: AxisType(BatchTag)}),
             }
 
-        def __init__(self, **kwargs):
-            DataLayerNM.__init__(self, **kwargs)
+        def __init__(self, input_size, path):
+            DataLayerNM.__init__(self)
 
-            self._input_size = kwargs["input_size"]
-            self._path = kwargs["path"]
+            self._input_size = input_size
+            self._path = path
 
             self._transforms = transforms.Compose([
                 transforms.RandomResizedCrop(self._input_size),
@@ -223,15 +223,13 @@ Example
         def output_ports(self):
             return {"loss": NeuralType(None)}
 
-        def __init__(self, **kwargs):
+        def __init__(self):
             # Neural Module API specific
-            super().__init__(**kwargs)
+            super().__init__()
 
             # End of Neural Module API specific
             self._criterion = torch.nn.CrossEntropyLoss()
 
         # You need to implement this function
         def _loss_function(self, **kwargs):
             return self._criterion(*(kwargs.values()))
-
-
diff --git a/examples/asr/jasper.py b/examples/asr/jasper.py
@@ -136,9 +136,7 @@ def create_all_dags(args, neural_factory):
     )
 
     jasper_decoder = nemo_asr.JasperDecoderForCTC(
-        feat_in=jasper_params["JasperEncoder"]["jasper"][-1]["filters"],
-        num_classes=len(vocab),
-        factory=neural_factory,
+        feat_in=jasper_params["JasperEncoder"]["jasper"][-1]["filters"], num_classes=len(vocab)
     )
 
     ctc_loss = nemo_asr.CTCLossNM(num_classes=len(vocab))

diff --git a/examples/asr/jasper_aishell.py b/examples/asr/jasper_aishell.py
@@ -137,9 +137,7 @@ def create_all_dags(args, neural_factory):
     )
 
     jasper_decoder = nemo_asr.JasperDecoderForCTC(
-        feat_in=jasper_params["JasperEncoder"]["jasper"][-1]["filters"],
-        num_classes=len(vocab),
-        factory=neural_factory,
+        feat_in=jasper_params["JasperEncoder"]["jasper"][-1]["filters"], num_classes=len(vocab)
     )
 
     ctc_loss = nemo_asr.CTCLossNM(num_classes=len(vocab))

diff --git a/examples/asr/notebooks/2_Online_ASR_Microphone_Demo.ipynb b/examples/asr/notebooks/2_Online_ASR_Microphone_Demo.ipynb
@@ -133,7 +133,7 @@
     "        }\n",
     "\n",
     "    def __init__(self, **kwargs):\n",
-    "        DataLayerNM.__init__(self, **kwargs)\n",
+    "        DataLayerNM.__init__(self)\n",
     "        self.output = True\n",
     "        \n",
     "    def __iter__(self):\n",
@@ -393,4 +393,4 @@
  },
  "nbformat": 4,
  "nbformat_minor": 2
-}
+}
diff --git a/examples/nlp/BERTPretrainingTutorial.ipynb b/examples/nlp/BERTPretrainingTutorial.ipynb
@@ -133,8 +133,8 @@
     "    num_attention_heads=NUM_HEADS,\n",
     "    intermediate_size=D_INNER,\n",
     "    max_position_embeddings=MAX_SEQ_LENGTH,\n",
-    "    hidden_act=HIDDEN_ACT,\n",
-    "    factory=neural_factory)"
+    "    hidden_act=HIDDEN_ACT\n",
+    ")"
    ]
   },
   {
@@ -167,22 +167,21 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "import os\n",
     "train_data_layer = nemo_nlp.BertPretrainingDataLayer(\n",
     "    tokenizer=tokenizer,\n",
     "    dataset=os.path.join(\"data/lm/wikitext-2\", \"train.txt\"),\n",
     "    max_seq_length=MAX_SEQ_LENGTH,\n",
     "    mask_probability=MASK_PROBABILITY,\n",
-    "    batch_size=BATCH_SIZE,\n",
-    "    factory=neural_factory)\n",
+    "    batch_size=BATCH_SIZE\n",
+    ")\n",
     "\n",
     "eval_data_layer = nemo_nlp.BertPretrainingDataLayer(\n",
     "    tokenizer=tokenizer,\n",
     "    dataset=os.path.join(\"data/lm/wikitext-2\", \"valid.txt\"),\n",
     "    max_seq_length=MAX_SEQ_LENGTH,\n",
     "    mask_probability=MASK_PROBABILITY,\n",
-    "    batch_size=BATCH_SIZE_EVAL,\n",
-    "    factory=neural_factory)"
+    "    batch_size=BATCH_SIZE_EVAL\n",
+    ")"
    ]
   },
   {
@@ -301,7 +300,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.7.5"
+   "version": "3.7.4"
   }
  },
  "nbformat": 4,

diff --git a/examples/nlp/NERWithBERT.ipynb b/examples/nlp/NERWithBERT.ipynb
@@ -99,8 +99,7 @@
     "label_ids = train_data_layer.dataset.label_ids\n",
     "num_classes = len(label_ids)\n",
     "\n",
-    "hidden_size = bert_model.local_parameters[\"hidden_size\"]\n",
-    "ner_classifier = nemo_nlp.TokenClassifier(hidden_size=hidden_size,\n",
+    "ner_classifier = nemo_nlp.TokenClassifier(hidden_size=bert_model.hidden_size,\n",
     "                                          num_classes=num_classes,\n",
     "                                          dropout=CLASSIFICATION_DROPOUT)\n",
     "\n",
@@ -204,9 +203,9 @@
  ],
  "metadata": {
   "kernelspec": {
-   "display_name": "Python 3",
+   "display_name": "Python 3.7.4 64-bit",
    "language": "python",
-   "name": "python3"
+   "name": "python37464bitc56e562f54084a24b5afed5459c99218"
   },
   "language_info": {
    "codemirror_mode": {

diff --git a/examples/nlp/PunctuationWithBERT.ipynb b/examples/nlp/PunctuationWithBERT.ipynb
@@ -142,19 +142,17 @@
     "punct_label_ids = train_data_layer.dataset.punct_label_ids\n",
     "capit_label_ids = train_data_layer.dataset.capit_label_ids\n",
     "\n",
-    "hidden_size = bert_model.local_parameters[\"hidden_size\"]\n",
-    "\n",
     "\n",
     "# Define classifier for Punctuation and Capitalization tasks\n",
     "punct_classifier = nemo_nlp.TokenClassifier(\n",
-    "    hidden_size=hidden_size,\n",
+    "    hidden_size=bert_model.hidden_size,\n",
     "    num_classes=len(punct_label_ids),\n",
     "    dropout=CLASSIFICATION_DROPOUT,\n",
     "    num_layers=PUNCT_NUM_FC_LAYERS,\n",
     "    name='Punctuation')\n",
     "\n",
     "capit_classifier = nemo_nlp.TokenClassifier(\n",
-    "    hidden_size=hidden_size,\n",
+    "    hidden_size=bert_model.hidden_size,\n",
     "    num_classes=len(capit_label_ids),\n",
     "    dropout=CLASSIFICATION_DROPOUT,\n",
     "    name='Capitalization')\n",

diff --git a/examples/nlp/asr_postprocessor.py b/examples/nlp/asr_postprocessor.py
@@ -92,7 +92,7 @@
 
 t_log_softmax = nemo_nlp.TokenClassifier(args.d_model, num_classes=vocab_size, num_layers=1, log_softmax=True)
 
-loss_fn = nemo_nlp.PaddedSmoothedCrossEntropyLossNM(pad_id=tokenizer.pad_id(), smoothing=0.1)
+loss_fn = nemo_nlp.PaddedSmoothedCrossEntropyLossNM(pad_id=tokenizer.pad_id(), label_smoothing=0.1)
 
 beam_search = nemo_nlp.BeamSearchTranslatorNM(
     decoder=decoder,

diff --git a/examples/nlp/glue_with_BERT.py b/examples/nlp/glue_with_BERT.py
@@ -240,7 +240,7 @@
 
     model.restore_from(args.bert_checkpoint)
 
-hidden_size = model.local_parameters["hidden_size"]
+hidden_size = model.hidden_size
 
 # uses [CLS] token for classification (the first token)
 if args.task_name == 'sts-b':

diff --git a/examples/nlp/joint_intent_slot_infer.py b/examples/nlp/joint_intent_slot_infer.py
@@ -36,7 +36,7 @@
 nemo_nlp.huggingface.BERT.list_pretrained_models()
 """
 pretrained_bert_model = nemo_nlp.huggingface.BERT(pretrained_model_name=args.pretrained_bert_model)
-hidden_size = pretrained_bert_model.local_parameters["hidden_size"]
+hidden_size = pretrained_bert_model.hidden_size
 tokenizer = BertTokenizer.from_pretrained(args.pretrained_bert_model)
 
 data_desc = JointIntentSlotDataDesc(args.data_dir, args.do_lower_case, args.dataset_name)

diff --git a/examples/nlp/joint_intent_slot_infer_b1.py b/examples/nlp/joint_intent_slot_infer_b1.py
@@ -30,9 +30,9 @@
 See the list of pretrained models, call:
 nemo_nlp.huggingface.BERT.list_pretrained_models()
 """
-pretrained_bert_model = nemo_nlp.huggingface.BERT(pretrained_model_name=args.pretrained_bert_model, factory=nf)
+pretrained_bert_model = nemo_nlp.huggingface.BERT(pretrained_model_name=args.pretrained_bert_model)
 tokenizer = BertTokenizer.from_pretrained(args.pretrained_bert_model)
-hidden_size = pretrained_bert_model.local_parameters["hidden_size"]
+hidden_size = pretrained_bert_model.hidden_size
 
 data_desc = JointIntentSlotDataDesc(args.data_dir, args.do_lower_case, args.dataset_name)
 

diff --git a/examples/nlp/joint_intent_slot_with_bert.py b/examples/nlp/joint_intent_slot_with_bert.py
@@ -76,7 +76,7 @@
 else:
     pretrained_bert_model = nemo_nlp.huggingface.BERT(pretrained_model_name=args.pretrained_bert_model, factory=nf)
 
-hidden_size = pretrained_bert_model.local_parameters["hidden_size"]
+hidden_size = pretrained_bert_model.hidden_size
 
 data_desc = JointIntentSlotDataDesc(
     args.data_dir, args.do_lower_case, args.dataset_name, args.none_slot_label, args.pad_label,