diff --git a/Jenkinsfile b/Jenkinsfile
index d0d2b0eaa5b1..7c2ee564dac4 100644
--- a/Jenkinsfile
+++ b/Jenkinsfile
@@ -24,9 +24,9 @@ pipeline {
         sh 'python setup.py style'
       }
     }
-    stage('Unittests general') {
+    stage('Unittests Core') {
       steps {
-        sh './reinstall.sh && python -m unittest tests/*.py'
+        sh './reinstall.sh && python -m unittest tests/core/*.py'
       }
     }
     stage('Unittests ASR') {
diff --git a/nemo/backends/pytorch/common/zero_data.py b/nemo/backends/pytorch/common/zero_data.py
index 0c7b14fe1a11..18f366c46140 100644
--- a/nemo/backends/pytorch/common/zero_data.py
+++ b/nemo/backends/pytorch/common/zero_data.py
@@ -18,11 +18,11 @@ def neuralType2TensorShape(neural_type: NeuralType, default_dim=32, skip_batch_a
       torch.Size
     """
     dims = []
-    for axis_ind, axis_type in neural_type.axis2type.items():
-        if axis_type._semantics == BatchTag and skip_batch_axis:
+    for axis in neural_type.axes:
+        if axis.kind == AxisKind.Batch and skip_batch_axis:
             continue
-        if axis_type.dim is not None:
-            dims.append(axis_type.dim)
+        if axis.size is not None:
+            dims.append(axis.size)
         else:
             dims.append(default_dim)
     return torch.Size(dims)
diff --git a/nemo/core/neural_types/axes.py b/nemo/core/neural_types/axes.py
index 5efba5d20ca7..acb9a27646f2 100644
--- a/nemo/core/neural_types/axes.py
+++ b/nemo/core/neural_types/axes.py
@@ -69,7 +69,7 @@ class AxisType(object):
            is_list (bool, default=False):
     """
 
-    def __init__(self, kind: AxisKindAbstract, size: Optional[int], is_list=False):
+    def __init__(self, kind: AxisKindAbstract, size: Optional[int] = None, is_list=False):
         if size is not None and is_list:
             raise ValueError("The axis can't be list and have a fixed size")
         self.kind = kind
diff --git a/nemo/core/neural_types/elements.py b/nemo/core/neural_types/elements.py
index f4c4d12445b4..37f35867a159 100644
--- a/nemo/core/neural_types/elements.py
+++ b/nemo/core/neural_types/elements.py
@@ -28,6 +28,9 @@
     'LabelsType',
     'LossType',
     'RegressionValuesType',
+    'PredictionsType',
+    'LogprobsType',
+    'LengthsType',
 ]
 import abc
 from abc import ABC, abstractmethod
@@ -55,7 +58,6 @@ def fields(self) -> Optional[Tuple]:
 
     def compare(self, second) -> NeuralTypeComparisonResult:
         # First, check general compatibility
-        result = NeuralTypeComparisonResult.SAME
         first_t = type(self)
         second_t = type(second)
 
@@ -108,11 +110,21 @@ def __str__(self):
         return "neural type representing logits"
 
 
+class LogprobsType(ElementType):
+    def __str__(self):
+        return "neural type representing log probabilities"
+
+
 class LabelsType(ElementType):
     def __str__(self):
         return "neural type representing labels"
 
 
+class LengthsType(ElementType):
+    def __str__(self):
+        return "neural type representing lengths of something"
+
+
 class LossType(ElementType):
     def __str__(self):
         return "neural type representing loss value"
@@ -151,6 +163,11 @@ def __str__(self):
         return "mfcc spectorgram type"
 
 
-class RegressionValuesType(ElementType):
+class PredictionsType(ElementType):
+    def __str__(self):
+        return "predictions values type"
+
+
+class RegressionValuesType(PredictionsType):
     def __str__(self):
         return "regression values type"
diff --git a/nemo/core/neural_types/neural_type.py b/nemo/core/neural_types/neural_type.py
index a2df777c9296..9cb7513963e4 100644
--- a/nemo/core/neural_types/neural_type.py
+++ b/nemo/core/neural_types/neural_type.py
@@ -44,7 +44,7 @@ def __init__(self, elements_type: ElementType = VoidType(), axes: Optional[Tuple
             )
         self.elements_type = elements_type
         if axes is not None:
-            self.__check_sanity(axes)
+            NeuralType.__check_sanity(axes)
             axes_list = []
             for axis in axes:
                 if isinstance(axis, str):
@@ -63,9 +63,7 @@ def compare(self, second) -> NeuralTypeComparisonResult:
         axes_a = self.axes
         axes_b = second.axes
 
-        kinds_a = dict()
-        kinds_b = dict()
-
+        # "Big void" type
         if isinstance(self.elements_type, VoidType) and self.axes is None:
             return NeuralTypeComparisonResult.SAME
 
@@ -75,28 +73,29 @@ def compare(self, second) -> NeuralTypeComparisonResult:
             else:
                 return NeuralTypeComparisonResult.INCOMPATIBLE
 
-        dimensions_pass = True
-        for axis_a, axis_b in zip(axes_a, axes_b):
-            kinds_a[axis_a.kind] = axis_a.size
-            kinds_b[axis_b.kind] = axis_b.size
-            if axis_a.kind != axis_b.kind or axis_a.is_list != axis_b.is_list:
-                dimensions_pass = False
-
-        if kinds_a.keys() != kinds_b.keys():
-            return NeuralTypeComparisonResult.INCOMPATIBLE
-        for kind, size in kinds_a.items():
-            if size != kinds_b[kind]:
-                return NeuralTypeComparisonResult.DIM_INCOMPATIBLE
-
+        dimensions_pass = NeuralType.__compare_axes(axes_a, axes_b)
         element_comparison_result = self.elements_type.compare(second.elements_type)
-        if dimensions_pass:
+
+        # SAME DIMS
+        if dimensions_pass == 0:
             return element_comparison_result
-        elif element_comparison_result == NeuralTypeComparisonResult.SAME:
-            return NeuralTypeComparisonResult.TRANSPOSE_SAME
+        # TRANSPOSE_SAME DIMS
+        elif dimensions_pass == 1:
+            if element_comparison_result == NeuralTypeComparisonResult.SAME:
+                return NeuralTypeComparisonResult.TRANSPOSE_SAME
+            else:
+                return NeuralTypeComparisonResult.INCOMPATIBLE
+        # DIM_INCOMPATIBLE DIMS
+        elif dimensions_pass == 2:
+            if element_comparison_result == NeuralTypeComparisonResult.SAME:
+                return NeuralTypeComparisonResult.DIM_INCOMPATIBLE
+            else:
+                return NeuralTypeComparisonResult.INCOMPATIBLE
         else:
             return NeuralTypeComparisonResult.INCOMPATIBLE
 
-    def __check_sanity(self, axes):
+    @staticmethod
+    def __check_sanity(axes):
         # check that list come before any tensor dimension
         are_strings = True
         for axis in axes:
@@ -119,6 +118,50 @@ def __check_sanity(self, axes):
                 "You have list dimension after Tensor dimension. All list dimensions must preceed Tensor dimensions"
             )
 
+    @staticmethod
+    def __compare_axes(axes_a, axes_b) -> int:
+        """
+        Compares axes_a and axes_b
+        Args:
+            axes_a: first axes tuple
+            axes_b: second axes tuple
+
+        Returns:
+            0 - if they are exactly the same
+            1 - if they are "TRANSPOSE_SAME"
+            2 - if the are "DIM_INCOMPATIBLE"
+            3 - if they are different
+        """
+        if axes_a is None and axes_b is None:
+            return 0
+        elif axes_a is None and axes_b is not None:
+            return 3
+        elif axes_a is not None and axes_b is None:
+            return 3
+        elif len(axes_a) != len(axes_b):
+            return 3
+        # After these ifs we know that len(axes_a) == len(axes_b)
+
+        same = True
+        kinds_a = dict()
+        kinds_b = dict()
+        for axis_a, axis_b in zip(axes_a, axes_b):
+            kinds_a[axis_a.kind] = axis_a.size
+            kinds_b[axis_b.kind] = axis_b.size
+            if axis_a.kind != axis_b.kind or axis_a.is_list != axis_b.is_list or axis_a.size != axis_b.size:
+                same = False
+        if same:
+            return 0
+        else:
+            # can be TRANSPOSE_SAME, DIM_INCOMPATIBLE
+            if kinds_a.keys() == kinds_b.keys():
+                for key, value in kinds_a.items():
+                    if kinds_b[key] != value:
+                        return 2
+                return 1
+            else:
+                return 3
+
 
 class NmTensor(NeuralType):
     """Class representing data which flows between NeuralModules' ports.
diff --git a/requirements/requirements_test.txt b/requirements/requirements_test.txt
index 493b8268cfd1..544127fca734 100644
--- a/requirements/requirements_test.txt
+++ b/requirements/requirements_test.txt
@@ -4,3 +4,4 @@ pytest-runner
 black
 isort[requirements]
 wrapt
+onnxruntime
diff --git a/tests/test_actions_api.py b/tests/core/test_actions_api.py
similarity index 100%
rename from tests/test_actions_api.py
rename to tests/core/test_actions_api.py
diff --git a/tests/core/test_deploy_export.py b/tests/core/test_deploy_export.py
new file mode 100644
index 000000000000..6ef415c6c8cf
--- /dev/null
+++ b/tests/core/test_deploy_export.py
@@ -0,0 +1,149 @@
+# ! /usr/bin/python
+# -*- coding: utf-8 -*-
+
+# Copyright 2019 NVIDIA. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# =============================================================================
+
+# import os
+# from pathlib import Path
+#
+# # git clone git@github.com:microsoft/onnxruntime.git
+# # cd onnxruntime
+# # ./build.sh --update --build --config RelWithDebInfo --build_shared_lib --parallel --use_cuda \
+# #            --cudnn_home /usr/lib/x86_64-linux-gnu --cuda_home /usr/local/cuda --enable_pybind --build_wheel
+# # pip install --upgrade ./build/Linux/RelWithDebInfo/dist/onnxruntime_gpu-1.1.0-cp37-cp37m-linux_x86_64.whl
+# import onnxruntime as ort
+# import torch
+# from ruamel.yaml import YAML
+#
+# import nemo
+# import nemo.collections.asr as nemo_asr
+# import nemo.collections.nlp as nemo_nlp
+# import nemo.collections.nlp.nm.trainables.common.token_classification_nm
+# from tests.common_setup import NeMoUnitTest
+#
+#
+# class TestDeployExport(NeMoUnitTest):
+#     def setUp(self):
+#         """ Setups neural factory so it will use GPU instead of CPU. """
+#         NeMoUnitTest.setUp(self)
+#
+#         # Perform computations on GPU.
+#         self.nf._placement = nemo.core.DeviceType.GPU
+#
+#     def __test_export_route(self, module, out_name, mode, input_example=None):
+#         out = Path(out_name)
+#         if out.exists():
+#             os.remove(out)
+#
+#         self.nf.deployment_export(module=module, output=out_name, input_example=input_example, d_format=mode)
+#
+#         self.assertTrue(out.exists())
+#         if mode == nemo.core.DeploymentFormat.ONNX:
+#             if isinstance(input_example, tuple):
+#                 outputs_fwd = module.forward(*input_example)
+#             else:
+#                 outputs_fwd = module.forward(input_example)
+#             sess_options = ort.SessionOptions()
+#             sess_options.graph_optimization_level = ort.GraphOptimizationLevel.ORT_ENABLE_EXTENDED
+#             ort_session = ort.InferenceSession(out_name, sess_options)
+#             inputs = dict()
+#             input_names = list(module.input_ports)
+#             for i in range(len(input_names)):
+#                 input_name = (
+#                     "encoded_lengths"
+#                     if type(module).__name__ == "JasperEncoder" and input_names[i] == "length"
+#                     else input_names[i]
+#                 )
+#                 inputs[input_name] = (
+#                     input_example[i].cpu().numpy() if isinstance(input_example, tuple) else input_example.cpu().numpy()
+#                 )
+#             outputs_ort = ort_session.run(None, inputs)
+#             outputs_ort = torch.from_numpy(outputs_ort[0]).cuda()
+#             self.assertLess(
+#                 (outputs_ort - (outputs_fwd[0] if isinstance(outputs_fwd, tuple) else outputs_fwd)).norm(p=2), 5.0e-4
+#             )
+#         if out.exists():
+#             os.remove(out)
+#
+#     def test_simple_module_export(self):
+#         simplest_module = nemo.backends.pytorch.tutorials.TaylorNet(dim=4)
+#         self.__test_export_route(
+#             module=simplest_module,
+#             out_name="simple.pt",
+#             mode=nemo.core.DeploymentFormat.TORCHSCRIPT,
+#             input_example=None,
+#         )
+#
+#     def test_TokenClassifier_module_export(self):
+#         t_class = nemo.collections.nlp.nm.trainables.common.token_classification_nm.TokenClassifier(
+#             hidden_size=512, num_classes=16, use_transformer_pretrained=False
+#         )
+#         self.__test_export_route(
+#             module=t_class,
+#             out_name="t_class.pt",
+#             mode=nemo.core.DeploymentFormat.TORCHSCRIPT,
+#             input_example=torch.randn(16, 16, 512).cuda(),
+#         )
+#
+#     def test_TokenClassifier_module_onnx_export(self):
+#         t_class = nemo.collections.nlp.nm.trainables.common.token_classification_nm.TokenClassifier(
+#             hidden_size=512, num_classes=16, use_transformer_pretrained=False
+#         )
+#         self.__test_export_route(
+#             module=t_class,
+#             out_name="t_class.onnx",
+#             mode=nemo.core.DeploymentFormat.ONNX,
+#             input_example=torch.randn(16, 16, 512).cuda(),
+#         )
+#
+#     def test_jasper_decoder_export_ts(self):
+#         j_decoder = nemo_asr.JasperDecoderForCTC(feat_in=1024, num_classes=33)
+#         self.__test_export_route(
+#             module=j_decoder, out_name="j_decoder.ts", mode=nemo.core.DeploymentFormat.TORCHSCRIPT, input_example=None
+#         )
+#
+#     def test_hf_bert_ts(self):
+#         bert = nemo.collections.nlp.nm.trainables.common.huggingface.BERT(pretrained_model_name="bert-base-uncased")
+#         input_example = (
+#             torch.randint(low=0, high=16, size=(2, 16)).cuda(),
+#             torch.randint(low=0, high=1, size=(2, 16)).cuda(),
+#             torch.randint(low=0, high=1, size=(2, 16)).cuda(),
+#         )
+#         self.__test_export_route(
+#             module=bert, out_name="bert.ts", mode=nemo.core.DeploymentFormat.TORCHSCRIPT, input_example=input_example
+#         )
+#
+#     def test_hf_bert_pt(self):
+#         bert = nemo.collections.nlp.nm.trainables.common.huggingface.BERT(pretrained_model_name="bert-base-uncased")
+#         self.__test_export_route(module=bert, out_name="bert.pt", mode=nemo.core.DeploymentFormat.PYTORCH)
+#
+#     def test_jasper_encoder_to_onnx(self):
+#         with open("tests/data/jasper_smaller.yaml") as file:
+#             yaml = YAML(typ="safe")
+#             jasper_model_definition = yaml.load(file)
+#
+#         jasper_encoder = nemo_asr.JasperEncoder(
+#             conv_mask=False,
+#             feat_in=jasper_model_definition['AudioToMelSpectrogramPreprocessor']['features'],
+#             **jasper_model_definition['JasperEncoder']
+#         )
+#
+#         self.__test_export_route(
+#             module=jasper_encoder,
+#             out_name="jasper_encoder.onnx",
+#             mode=nemo.core.DeploymentFormat.ONNX,
+#             input_example=(torch.randn(16, 64, 256).cuda(), torch.randn(256).cuda()),
+#         )
diff --git a/tests/test_deprecated.py b/tests/core/test_deprecated.py
similarity index 100%
rename from tests/test_deprecated.py
rename to tests/core/test_deprecated.py
diff --git a/tests/test_infer.py b/tests/core/test_infer.py
similarity index 81%
rename from tests/test_infer.py
rename to tests/core/test_infer.py
index 05cec60c6fb9..e9611ea43967 100644
--- a/tests/test_infer.py
+++ b/tests/core/test_infer.py
@@ -30,11 +30,13 @@ def __init__(self):
 
     @property
     def input_ports(self):
-        return {"mod_in": NeuralType({0: AxisType(BatchTag), 1: AxisType(BaseTag, dim=1)})}
+        # return {"mod_in": NeuralType({0: AxisType(BatchTag), 1: AxisType(BaseTag, dim=1)})}
+        return {"mod_in": NeuralType(ChannelType(), (AxisType(AxisKind.Batch), AxisType(AxisKind.Dimension, 1)))}
 
     @property
     def output_ports(self):
-        return {"mod_out": NeuralType({0: AxisType(BatchTag), 1: AxisType(BaseTag, dim=1)})}
+        # return {"mod_out": NeuralType({0: AxisType(BatchTag), 1: AxisType(BaseTag, dim=1)})}
+        return {"mod_out": NeuralType(ChannelType(), (AxisType(AxisKind.Batch), AxisType(AxisKind.Dimension, 1)))}
 
     def forward(self, mod_in):
         return mod_in + 10
@@ -46,11 +48,11 @@ def __init__(self):
 
     @property
     def input_ports(self):
-        return {"mod_in": NeuralType({0: AxisType(BatchTag), 1: AxisType(BaseTag, dim=1)})}
+        return {"mod_in": NeuralType(ChannelType(), (AxisType(AxisKind.Batch), AxisType(AxisKind.Dimension, 1)))}
 
     @property
     def output_ports(self):
-        return {"mod_out": NeuralType({0: AxisType(BatchTag), 1: AxisType(BaseTag, dim=1)})}
+        return {"mod_out": NeuralType(ChannelType(), (AxisType(AxisKind.Batch), AxisType(AxisKind.Dimension, 1)))}
 
     def forward(self, mod_in):
         return mod_in - 10
@@ -66,7 +68,9 @@ def test_infer_caching(self):
             size=1,
             dtype=torch.FloatTensor,
             batch_size=1,
-            output_ports={"dl_out": NeuralType({0: AxisType(BatchTag), 1: AxisType(BaseTag, dim=1)})},
+            output_ports={
+                "dl_out": NeuralType(ChannelType(), (AxisType(AxisKind.Batch), AxisType(AxisKind.Dimension, 1)))
+            },
         )
         addten = AddsTen()
         minusten = SubtractsTen()
@@ -93,7 +97,9 @@ def test_infer_errors(self):
             size=1,
             dtype=torch.FloatTensor,
             batch_size=1,
-            output_ports={"dl_out": NeuralType({0: AxisType(BatchTag), 1: AxisType(BaseTag, dim=1)})},
+            output_ports={
+                "dl_out": NeuralType(ChannelType(), (AxisType(AxisKind.Batch), AxisType(AxisKind.Dimension, 1)))
+            },
         )
         addten = AddsTen()
         minusten = SubtractsTen()
diff --git a/tests/test_neural_factory.py b/tests/core/test_neural_factory.py
similarity index 100%
rename from tests/test_neural_factory.py
rename to tests/core/test_neural_factory.py
diff --git a/tests/test_neural_modules_initialization.py b/tests/core/test_neural_modules_initialization.py
similarity index 100%
rename from tests/test_neural_modules_initialization.py
rename to tests/core/test_neural_modules_initialization.py
diff --git a/tests/test_neural_modules_pytorch.py b/tests/core/test_neural_modules_pytorch.py
similarity index 90%
rename from tests/test_neural_modules_pytorch.py
rename to tests/core/test_neural_modules_pytorch.py
index 13ff0226262b..236844031e25 100644
--- a/tests/test_neural_modules_pytorch.py
+++ b/tests/core/test_neural_modules_pytorch.py
@@ -17,10 +17,13 @@
 # limitations under the License.
 # =============================================================================
 
+# TODO: These test look bad/useless - redo
+
 import unittest
 
 import nemo
 from nemo.backends.pytorch.nm import TrainableNM
+from nemo.core.neural_types import ChannelType, NeuralType
 from tests.common_setup import NeMoUnitTest
 
 
@@ -67,15 +70,7 @@ def test_constructor_TaylorNet(self):
 
     def test_call_TaylorNet(self):
         x_tg = nemo.core.neural_modules.NmTensor(
-            producer=None,
-            producer_args=None,
-            name=None,
-            ntype=nemo.core.neural_types.NeuralType(
-                {
-                    0: nemo.core.neural_types.AxisType(nemo.core.neural_types.BatchTag),
-                    1: nemo.core.neural_types.AxisType(nemo.core.neural_types.ChannelTag),
-                }
-            ),
+            producer=None, producer_args=None, name=None, ntype=NeuralType(ChannelType(), ('B', 'D'))
         )
 
         tn = nemo.backends.pytorch.tutorials.TaylorNet(dim=4)
diff --git a/tests/core/test_neural_types.py b/tests/core/test_neural_types.py
index 51a1c9b18044..537813b76f07 100644
--- a/tests/core/test_neural_types.py
+++ b/tests/core/test_neural_types.py
@@ -15,7 +15,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # =============================================================================
-
+import nemo
 from nemo.core.neural_types import (
     AcousticEncodedRepresentation,
     AudioSignal,
@@ -24,9 +24,11 @@
     ChannelType,
     MelSpectrogramType,
     MFCCSpectrogramType,
+    NeuralPortNmTensorMismatchError,
     NeuralType,
     NeuralTypeComparisonResult,
     SpectrogramType,
+    VoidType,
 )
 from tests.common_setup import NeMoUnitTest
 
@@ -102,3 +104,66 @@ def test_list_of_lists(self):
         )
         # TODO: should this be incompatible instead???
         self.assertEqual(T1.compare(T2), NeuralTypeComparisonResult.TRANSPOSE_SAME)
+
+    def test_void(self):
+        btc_spctr = NeuralType(SpectrogramType(), ('B', 'T', 'C'))
+        btc_spct_bad = NeuralType(SpectrogramType(), ('B', 'T'))
+        btc_void = NeuralType(VoidType(), ('B', 'T', 'C'))
+        self.assertEqual(btc_void.compare(btc_spctr), NeuralTypeComparisonResult.SAME)
+        self.assertEqual(btc_spctr.compare(btc_void), NeuralTypeComparisonResult.INCOMPATIBLE)
+        self.assertEqual(btc_void.compare(btc_spct_bad), NeuralTypeComparisonResult.INCOMPATIBLE)
+
+    def test_big_void(self):
+        big_void_1 = NeuralType(VoidType())
+        big_void_2 = NeuralType()
+
+        btc_spctr = NeuralType(SpectrogramType(), ('B', 'T', 'C'))
+        btc_spct_bad = NeuralType(SpectrogramType(), ('B', 'T'))
+        t1 = NeuralType(
+            elements_type=ChannelType(),
+            axes=(
+                AxisType(kind=AxisKind.Batch, size=None, is_list=True),
+                AxisType(kind=AxisKind.Time, size=None, is_list=True),
+                AxisType(kind=AxisKind.Dimension, size=32, is_list=False),
+                AxisType(kind=AxisKind.Dimension, size=128, is_list=False),
+                AxisType(kind=AxisKind.Dimension, size=256, is_list=False),
+            ),
+        )
+        t2 = NeuralType(
+            elements_type=ChannelType(),
+            axes=(
+                AxisType(kind=AxisKind.Batch, size=None, is_list=False),
+                AxisType(kind=AxisKind.Time, size=None, is_list=False),
+                AxisType(kind=AxisKind.Dimension, size=32, is_list=False),
+                AxisType(kind=AxisKind.Dimension, size=128, is_list=False),
+                AxisType(kind=AxisKind.Dimension, size=256, is_list=False),
+            ),
+        )
+
+        self.assertEqual(big_void_1.compare(btc_spctr), NeuralTypeComparisonResult.SAME)
+        self.assertEqual(big_void_1.compare(btc_spct_bad), NeuralTypeComparisonResult.SAME)
+        self.assertEqual(big_void_1.compare(t1), NeuralTypeComparisonResult.SAME)
+        self.assertEqual(big_void_1.compare(t2), NeuralTypeComparisonResult.SAME)
+
+        self.assertEqual(big_void_2.compare(btc_spctr), NeuralTypeComparisonResult.SAME)
+        self.assertEqual(big_void_2.compare(btc_spct_bad), NeuralTypeComparisonResult.SAME)
+        self.assertEqual(big_void_2.compare(t1), NeuralTypeComparisonResult.SAME)
+        self.assertEqual(big_void_2.compare(t2), NeuralTypeComparisonResult.SAME)
+
+    def test_dag(self):
+        data_source = nemo.backends.pytorch.tutorials.RealFunctionDataLayer(n=10000, batch_size=128)
+        trainable_module = nemo.backends.pytorch.tutorials.TaylorNet(dim=4)
+        loss = nemo.backends.pytorch.tutorials.MSELoss()
+        x, y = data_source()
+        y_pred = trainable_module(x=x)
+        _ = loss(predictions=y_pred, target=y)
+
+        def wrong():
+            data_source = nemo.backends.pytorch.tutorials.RealFunctionDataLayer(n=10000, batch_size=128)
+            trainable_module = nemo.backends.pytorch.tutorials.TaylorNet(dim=4)
+            loss = nemo.backends.pytorch.tutorials.MSELoss()
+            x, y = data_source()
+            loss_tensor = loss(predictions=x, target=x)
+            _ = trainable_module(x=loss_tensor)
+
+        self.assertRaises(NeuralPortNmTensorMismatchError, wrong)
diff --git a/tests/test_policies.py b/tests/core/test_policies.py
similarity index 100%
rename from tests/test_policies.py
rename to tests/core/test_policies.py
diff --git a/tests/test_deploy_export.py b/tests/test_deploy_export.py
deleted file mode 100644
index be6a1a39573c..000000000000
--- a/tests/test_deploy_export.py
+++ /dev/null
@@ -1,149 +0,0 @@
-# ! /usr/bin/python
-# -*- coding: utf-8 -*-
-
-# Copyright 2019 NVIDIA. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# =============================================================================
-
-import os
-from pathlib import Path
-
-# git clone git@github.com:microsoft/onnxruntime.git
-# cd onnxruntime
-# ./build.sh --update --build --config RelWithDebInfo --build_shared_lib --parallel --use_cuda \
-#            --cudnn_home /usr/lib/x86_64-linux-gnu --cuda_home /usr/local/cuda --enable_pybind --build_wheel
-# pip install --upgrade ./build/Linux/RelWithDebInfo/dist/onnxruntime_gpu-1.1.0-cp37-cp37m-linux_x86_64.whl
-import onnxruntime as ort
-import torch
-from ruamel.yaml import YAML
-
-import nemo
-import nemo.collections.asr as nemo_asr
-import nemo.collections.nlp as nemo_nlp
-import nemo.collections.nlp.nm.trainables.common.token_classification_nm
-from tests.common_setup import NeMoUnitTest
-
-
-class TestDeployExport(NeMoUnitTest):
-    def setUp(self):
-        """ Setups neural factory so it will use GPU instead of CPU. """
-        NeMoUnitTest.setUp(self)
-
-        # Perform computations on GPU.
-        self.nf._placement = nemo.core.DeviceType.GPU
-
-    def __test_export_route(self, module, out_name, mode, input_example=None):
-        out = Path(out_name)
-        if out.exists():
-            os.remove(out)
-
-        self.nf.deployment_export(module=module, output=out_name, input_example=input_example, d_format=mode)
-
-        self.assertTrue(out.exists())
-        if mode == nemo.core.DeploymentFormat.ONNX:
-            if isinstance(input_example, tuple):
-                outputs_fwd = module.forward(*input_example)
-            else:
-                outputs_fwd = module.forward(input_example)
-            sess_options = ort.SessionOptions()
-            sess_options.graph_optimization_level = ort.GraphOptimizationLevel.ORT_ENABLE_EXTENDED
-            ort_session = ort.InferenceSession(out_name, sess_options)
-            inputs = dict()
-            input_names = list(module.input_ports)
-            for i in range(len(input_names)):
-                input_name = (
-                    "encoded_lengths"
-                    if type(module).__name__ == "JasperEncoder" and input_names[i] == "length"
-                    else input_names[i]
-                )
-                inputs[input_name] = (
-                    input_example[i].cpu().numpy() if isinstance(input_example, tuple) else input_example.cpu().numpy()
-                )
-            outputs_ort = ort_session.run(None, inputs)
-            outputs_ort = torch.from_numpy(outputs_ort[0]).cuda()
-            self.assertLess(
-                (outputs_ort - (outputs_fwd[0] if isinstance(outputs_fwd, tuple) else outputs_fwd)).norm(p=2), 5.0e-4
-            )
-        if out.exists():
-            os.remove(out)
-
-    def test_simple_module_export(self):
-        simplest_module = nemo.backends.pytorch.tutorials.TaylorNet(dim=4)
-        self.__test_export_route(
-            module=simplest_module,
-            out_name="simple.pt",
-            mode=nemo.core.DeploymentFormat.TORCHSCRIPT,
-            input_example=None,
-        )
-
-    def test_TokenClassifier_module_export(self):
-        t_class = nemo.collections.nlp.nm.trainables.common.token_classification_nm.TokenClassifier(
-            hidden_size=512, num_classes=16, use_transformer_pretrained=False
-        )
-        self.__test_export_route(
-            module=t_class,
-            out_name="t_class.pt",
-            mode=nemo.core.DeploymentFormat.TORCHSCRIPT,
-            input_example=torch.randn(16, 16, 512).cuda(),
-        )
-
-    def test_TokenClassifier_module_onnx_export(self):
-        t_class = nemo.collections.nlp.nm.trainables.common.token_classification_nm.TokenClassifier(
-            hidden_size=512, num_classes=16, use_transformer_pretrained=False
-        )
-        self.__test_export_route(
-            module=t_class,
-            out_name="t_class.onnx",
-            mode=nemo.core.DeploymentFormat.ONNX,
-            input_example=torch.randn(16, 16, 512).cuda(),
-        )
-
-    def test_jasper_decoder_export_ts(self):
-        j_decoder = nemo_asr.JasperDecoderForCTC(feat_in=1024, num_classes=33)
-        self.__test_export_route(
-            module=j_decoder, out_name="j_decoder.ts", mode=nemo.core.DeploymentFormat.TORCHSCRIPT, input_example=None
-        )
-
-    def test_hf_bert_ts(self):
-        bert = nemo.collections.nlp.nm.trainables.common.huggingface.BERT(pretrained_model_name="bert-base-uncased")
-        input_example = (
-            torch.randint(low=0, high=16, size=(2, 16)).cuda(),
-            torch.randint(low=0, high=1, size=(2, 16)).cuda(),
-            torch.randint(low=0, high=1, size=(2, 16)).cuda(),
-        )
-        self.__test_export_route(
-            module=bert, out_name="bert.ts", mode=nemo.core.DeploymentFormat.TORCHSCRIPT, input_example=input_example
-        )
-
-    def test_hf_bert_pt(self):
-        bert = nemo.collections.nlp.nm.trainables.common.huggingface.BERT(pretrained_model_name="bert-base-uncased")
-        self.__test_export_route(module=bert, out_name="bert.pt", mode=nemo.core.DeploymentFormat.PYTORCH)
-
-    def test_jasper_encoder_to_onnx(self):
-        with open("tests/data/jasper_smaller.yaml") as file:
-            yaml = YAML(typ="safe")
-            jasper_model_definition = yaml.load(file)
-
-        jasper_encoder = nemo_asr.JasperEncoder(
-            conv_mask=False,
-            feat_in=jasper_model_definition['AudioToMelSpectrogramPreprocessor']['features'],
-            **jasper_model_definition['JasperEncoder']
-        )
-
-        self.__test_export_route(
-            module=jasper_encoder,
-            out_name="jasper_encoder.onnx",
-            mode=nemo.core.DeploymentFormat.ONNX,
-            input_example=(torch.randn(16, 64, 256).cuda(), torch.randn(256).cuda()),
-        )
diff --git a/tests/test_neural_types.py b/tests/test_neural_types.py
deleted file mode 100644
index c2741ca3d7c6..000000000000
--- a/tests/test_neural_types.py
+++ /dev/null
@@ -1,258 +0,0 @@
-# ! /usr/bin/python
-# -*- coding: utf-8 -*-
-
-# Copyright 2019 NVIDIA. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# =============================================================================
-
-import tarfile
-
-from ruamel.yaml import YAML
-
-import nemo.collections.asr as nemo_asr
-from nemo import logging
-from nemo.core import *
-from tests.common_setup import NeMoUnitTest
-
-
-class TestNeuralTypes(NeMoUnitTest):
-    manifest_filepath = "tests/data/asr/an4_train.json"
-    yaml = YAML(typ="safe")
-
-    def setUp(self) -> None:
-        super().setUp()
-        data_folder = "tests/data/"
-        logging.info("Looking up for test ASR data")
-        if not os.path.exists(data_folder + "asr"):
-            logging.info("Extracting ASR data to: {0}".format(data_folder + "asr"))
-            tar = tarfile.open("tests/data/asr.tar.gz", "r:gz")
-            tar.extractall(path=data_folder)
-            tar.close()
-        else:
-            logging.info("ASR data found in: {0}".format(data_folder + "asr"))
-
-    def test_same(self):
-        btc = NeuralType(axis2type={0: AxisType(BatchTag), 1: AxisType(TimeTag), 2: AxisType(ChannelTag)})
-        btc2 = NeuralType(axis2type={0: AxisType(BatchTag), 1: AxisType(TimeTag), 2: AxisType(ChannelTag)})
-        self.assertEqual(btc2.compare(btc), NeuralTypeComparisonResult.SAME)
-
-    def test_transpose_same(self):
-        btc = NeuralType(axis2type={0: AxisType(BatchTag), 1: AxisType(TimeTag), 2: AxisType(ChannelTag)})
-        tbc = NeuralType(axis2type={1: AxisType(BatchTag), 0: AxisType(TimeTag), 2: AxisType(ChannelTag)})
-
-        self.assertEqual(btc.compare(tbc), NeuralTypeComparisonResult.TRANSPOSE_SAME)
-        self.assertEqual(tbc.compare(btc), NeuralTypeComparisonResult.TRANSPOSE_SAME)
-
-    def test_dim_incompatible(self):
-        nchw1 = NeuralType(
-            axis2type={
-                0: AxisType(BatchTag),
-                1: AxisType(ChannelTag),
-                2: AxisType(HeightTag, 224),
-                3: AxisType(WidthTag, 224),
-            }
-        )
-        nchw2 = NeuralType(
-            axis2type={
-                0: AxisType(BatchTag),
-                1: AxisType(ChannelTag),
-                2: AxisType(HeightTag, 256),
-                3: AxisType(WidthTag, 256),
-            }
-        )
-        self.assertEqual(nchw1.compare(nchw2), NeuralTypeComparisonResult.DIM_INCOMPATIBLE)
-
-    def test_rank_incompatible(self):
-        btc = NeuralType(axis2type={0: AxisType(BatchTag), 1: AxisType(TimeTag), 2: AxisType(ChannelTag)})
-        nchw = NeuralType(
-            axis2type={0: AxisType(BatchTag), 1: AxisType(ChannelTag), 2: AxisType(HeightTag), 3: AxisType(WidthTag)}
-        )
-        self.assertEqual(nchw.compare(btc), NeuralTypeComparisonResult.INCOMPATIBLE)
-
-    def test_axis_type(self):
-        ax1 = AxisType(BatchTag)
-        ax2 = AxisType(TimeTag)
-        ax3 = AxisType(ProcessedTimeTag)
-        self.assertEqual(ax1.compare_to(ax2), NeuralTypeComparisonResult.INCOMPATIBLE)
-        self.assertEqual(ax3.compare_to(ax2), NeuralTypeComparisonResult.LESS)
-        self.assertEqual(ax2.compare_to(ax3), NeuralTypeComparisonResult.GREATER)
-        self.assertEqual(ax2.compare_to(AxisType(TimeTag)), NeuralTypeComparisonResult.SAME)
-
-    def test_semantic_incompatible(self):
-        nchw = NeuralType(
-            axis2type={0: AxisType(BatchTag), 1: AxisType(ChannelTag), 2: AxisType(HeightTag), 3: AxisType(WidthTag)}
-        )
-        badd = NeuralType(
-            axis2type={0: AxisType(BatchTag), 1: AxisType(ChannelTag), 2: AxisType(ChannelTag), 3: AxisType(WidthTag)}
-        )
-        self.assertEqual(nchw.compare(badd), NeuralTypeComparisonResult.INCOMPATIBLE)
-        self.assertEqual(badd.compare(nchw), NeuralTypeComparisonResult.INCOMPATIBLE)
-
-    def test_root(self):
-        root = NeuralType({})
-        non_tensor = NeuralType(None)
-        btc = NeuralType(axis2type={0: AxisType(BatchTag), 1: AxisType(TimeTag), 2: AxisType(ChannelTag)})
-        nchw = NeuralType(
-            axis2type={0: AxisType(BatchTag), 1: AxisType(ChannelTag), 2: AxisType(HeightTag), 3: AxisType(WidthTag)}
-        )
-        self.assertEqual(root.compare(btc), NeuralTypeComparisonResult.SAME)
-        self.assertEqual(root.compare(nchw), NeuralTypeComparisonResult.SAME)
-        self.assertEqual(root.compare(non_tensor), NeuralTypeComparisonResult.SAME)
-
-        self.assertEqual(non_tensor.compare(root), NeuralTypeComparisonResult.INCOMPATIBLE)
-        self.assertEqual(btc.compare(root), NeuralTypeComparisonResult.INCOMPATIBLE)
-        self.assertEqual(nchw.compare(root), NeuralTypeComparisonResult.INCOMPATIBLE)
-
-    def test_combiner_type_infer(self):
-        combiner = nemo.backends.pytorch.common.SimpleCombiner(mode="add")
-        x_tg = nemo.core.NmTensor(
-            producer=None, producer_args=None, name=None, ntype=NeuralType({0: AxisType(BatchTag)})
-        )
-        y_tg = nemo.core.NmTensor(
-            producer=None, producer_args=None, name=None, ntype=NeuralType({0: AxisType(BatchTag)})
-        )
-        res = combiner(x1=y_tg, x2=x_tg)
-        self.assertEqual(res.compare(x_tg), NeuralTypeComparisonResult.SAME)
-        self.assertEqual(res.compare(y_tg), NeuralTypeComparisonResult.SAME)
-        self.assertEqual(x_tg.compare(res), NeuralTypeComparisonResult.SAME)
-        self.assertEqual(y_tg.compare(res), NeuralTypeComparisonResult.SAME)
-
-        combiner1 = nemo.backends.pytorch.common.SimpleCombiner(mode="add")
-        x_tg1 = NmTensor(
-            producer=None,
-            producer_args=None,
-            name=None,
-            ntype=NeuralType({0: AxisType(BatchTag), 1: AxisType(ChannelTag)}),
-        )
-        y_tg1 = NmTensor(
-            producer=None,
-            producer_args=None,
-            name=None,
-            ntype=NeuralType({0: AxisType(BatchTag), 1: AxisType(ChannelTag)}),
-        )
-        res1 = combiner1(x1=y_tg1, x2=x_tg1)
-        self.assertEqual(res1.compare(x_tg1), NeuralTypeComparisonResult.SAME)
-        self.assertEqual(res1.compare(y_tg1), NeuralTypeComparisonResult.SAME)
-        self.assertEqual(x_tg1.compare(res1), NeuralTypeComparisonResult.SAME)
-        self.assertEqual(y_tg1.compare(res1), NeuralTypeComparisonResult.SAME)
-
-    def test_optional_input_no_input(self):
-        data_source = nemo.backends.pytorch.tutorials.RealFunctionDataLayer(n=100, batch_size=128)
-        trainable_module = nemo.backends.pytorch.tutorials.TaylorNetO(dim=4)
-        loss = nemo.backends.pytorch.tutorials.MSELoss()
-        x, y = data_source()
-        y_pred = trainable_module(x=x)
-        loss_tensor = loss(predictions=y_pred, target=y)
-
-        optimizer = nemo.backends.pytorch.actions.PtActions()
-        optimizer.train(
-            tensors_to_optimize=[loss_tensor], optimizer="sgd", optimization_params={"lr": 0.0003, "num_epochs": 1}
-        )
-
-    def test_optional_input_no_with_input(self):
-        data_source = nemo.backends.pytorch.tutorials.RealFunctionDataLayer(n=100, batch_size=128)
-        trainable_module = nemo.backends.pytorch.tutorials.TaylorNetO(dim=4)
-        loss = nemo.backends.pytorch.tutorials.MSELoss()
-        x, y = data_source()
-        y_pred = trainable_module(x=x, o=x)
-        loss_tensor = loss(predictions=y_pred, target=y)
-        optimizer = nemo.backends.pytorch.actions.PtActions()
-        optimizer.train(
-            tensors_to_optimize=[loss_tensor], optimizer="sgd", optimization_params={"lr": 0.0003, "num_epochs": 1}
-        )
-
-    def test_optional_input_no_with_wrong_input(self):
-        def wrong_fn():
-            data_source = nemo.backends.pytorch.tutorials.RealFunctionDataLayer(n=100, batch_size=128)
-            trainable_module = nemo.backends.pytorch.tutorials.TaylorNetO(dim=4)
-            loss = nemo.backends.pytorch.tutorials.MSELoss()
-            x, y = data_source()
-            wrong_optional = NmTensor(
-                producer=None,
-                producer_args=None,
-                name=None,
-                ntype=NeuralType({0: AxisType(ChannelTag), 1: AxisType(BatchTag)}),
-            )
-            y_pred = trainable_module(x=x, o=wrong_optional)
-            loss_tensor = loss(predictions=y_pred, target=y)
-            optimizer = nemo.backends.pytorch.actions.PtActions()
-            optimizer.train(
-                tensors_to_optimize=[loss_tensor], optimizer="sgd", optimization_params={"lr": 0.0003, "num_epochs": 1}
-            )
-
-        self.assertRaises(NeuralPortNmTensorMismatchError, wrong_fn)
-
-    def test_simple_dags(self):
-        # module instantiation
-        with open("tests/data/jasper_smaller.yaml") as file:
-            jasper_model_definition = self.yaml.load(file)
-        labels = jasper_model_definition['labels']
-
-        data_layer = nemo_asr.AudioToTextDataLayer(
-            manifest_filepath=self.manifest_filepath, labels=labels, batch_size=4
-        )
-        data_preprocessor = nemo_asr.AudioToMelSpectrogramPreprocessor(
-            **jasper_model_definition['AudioToMelSpectrogramPreprocessor']
-        )
-        jasper_encoder = nemo_asr.JasperEncoder(
-            feat_in=jasper_model_definition['AudioToMelSpectrogramPreprocessor']['features'],
-            **jasper_model_definition['JasperEncoder'],
-        )
-        jasper_decoder = nemo_asr.JasperDecoderForCTC(feat_in=1024, num_classes=len(labels))
-        ctc_loss = nemo_asr.CTCLossNM(num_classes=len(labels))
-        greedy_decoder = nemo_asr.GreedyCTCDecoder()
-
-        # DAG definition
-        (audio_signal, audio_signal_len, transcript, transcript_len) = data_layer()
-        processed_signal, processed_signal_len = data_preprocessor(input_signal=audio_signal, length=audio_signal_len)
-
-        spec_augment = nemo_asr.SpectrogramAugmentation(rect_masks=5)
-        aug_signal = spec_augment(input_spec=processed_signal)
-
-        encoded, encoded_len = jasper_encoder(audio_signal=aug_signal, length=processed_signal_len)
-        log_probs = jasper_decoder(encoder_output=encoded)
-        predictions = greedy_decoder(log_probs=log_probs)
-        loss = ctc_loss(
-            log_probs=log_probs, targets=transcript, input_length=encoded_len, target_length=transcript_len
-        )
-
-        def wrong():
-            with open("tests/data/jasper_smaller.yaml") as file:
-                jasper_config = self.yaml.load(file)
-            labels = jasper_config['labels']
-
-            data_layer = nemo_asr.AudioToTextDataLayer(
-                manifest_filepath=self.manifest_filepath, labels=labels, batch_size=4
-            )
-            data_preprocessor = nemo_asr.AudioToMelSpectrogramPreprocessor(
-                **jasper_config['AudioToMelSpectrogramPreprocessor']
-            )
-            jasper_encoder = nemo_asr.JasperEncoder(
-                feat_in=jasper_config['AudioToMelSpectrogramPreprocessor']['features'],
-                **jasper_config['JasperEncoder'],
-            )
-            jasper_decoder = nemo_asr.JasperDecoderForCTC(feat_in=1024, num_classes=len(labels))
-            # DAG definition
-            (audio_signal, audio_signal_len, transcript, transcript_len) = data_layer()
-            processed_signal, processed_signal_len = data_preprocessor(
-                input_signal=audio_signal, length=audio_signal_len
-            )
-
-            spec_augment = nemo_asr.SpectrogramAugmentation(rect_masks=5)
-            aug_signal = spec_augment(input_spec=processed_signal)
-
-            encoded, encoded_len = jasper_encoder(audio_signal=aug_signal, length=processed_signal_len)
-            log_probs = jasper_decoder(encoder_output=processed_signal)
-
-        self.assertRaises(NeuralPortNmTensorMismatchError, wrong)
diff --git a/tests/test_tutorials_pytorch.py b/tests/test_tutorials_pytorch.py
deleted file mode 100644
index 183fd67e1d1b..000000000000
--- a/tests/test_tutorials_pytorch.py
+++ /dev/null
@@ -1,29 +0,0 @@
-# # ! /usr/bin/python
-# # -*- coding: utf-8 -*-
-#
-# # Copyright 2019 NVIDIA. All Rights Reserved.
-# #
-# # Licensed under the Apache License, Version 2.0 (the "License");
-# # you may not use this file except in compliance with the License.
-# # You may obtain a copy of the License at
-# #
-# #     http://www.apache.org/licenses/LICENSE-2.0
-# #
-# # Unless required by applicable law or agreed to in writing, software
-# # distributed under the License is distributed on an "AS IS" BASIS,
-# # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# # See the License for the specific language governing permissions and
-# # limitations under the License.
-# # =============================================================================
-#
-# from .common_setup import NeMoUnitTest
-# from nemo.backends.pytorch.tutorials.chatbot.data import loadPrepareData
-#
-#
-# class TestPytorchChatBotTutorial(NeMoUnitTest):
-#     def test_simple_train(self):
-#         datafile = "tests/data/dialog_sample.txt"
-#         logging.info(datafile)
-#         voc, pairs = loadPrepareData("cornell", datafile=datafile)
-#         self.assertEqual(voc.name, 'cornell')
-#         self.assertEqual(voc.num_words, 675)