diff --git a/Jenkinsfile b/Jenkinsfile index d0d2b0eaa5b1..7c2ee564dac4 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -24,9 +24,9 @@ pipeline { sh 'python setup.py style' } } - stage('Unittests general') { + stage('Unittests Core') { steps { - sh './reinstall.sh && python -m unittest tests/*.py' + sh './reinstall.sh && python -m unittest tests/core/*.py' } } stage('Unittests ASR') { diff --git a/nemo/backends/pytorch/common/zero_data.py b/nemo/backends/pytorch/common/zero_data.py index 0c7b14fe1a11..18f366c46140 100644 --- a/nemo/backends/pytorch/common/zero_data.py +++ b/nemo/backends/pytorch/common/zero_data.py @@ -18,11 +18,11 @@ def neuralType2TensorShape(neural_type: NeuralType, default_dim=32, skip_batch_a torch.Size """ dims = [] - for axis_ind, axis_type in neural_type.axis2type.items(): - if axis_type._semantics == BatchTag and skip_batch_axis: + for axis in neural_type.axes: + if axis.kind == AxisKind.Batch and skip_batch_axis: continue - if axis_type.dim is not None: - dims.append(axis_type.dim) + if axis.size is not None: + dims.append(axis.size) else: dims.append(default_dim) return torch.Size(dims) diff --git a/nemo/core/neural_types/axes.py b/nemo/core/neural_types/axes.py index 5efba5d20ca7..acb9a27646f2 100644 --- a/nemo/core/neural_types/axes.py +++ b/nemo/core/neural_types/axes.py @@ -69,7 +69,7 @@ class AxisType(object): is_list (bool, default=False): """ - def __init__(self, kind: AxisKindAbstract, size: Optional[int], is_list=False): + def __init__(self, kind: AxisKindAbstract, size: Optional[int] = None, is_list=False): if size is not None and is_list: raise ValueError("The axis can't be list and have a fixed size") self.kind = kind diff --git a/nemo/core/neural_types/elements.py b/nemo/core/neural_types/elements.py index f4c4d12445b4..37f35867a159 100644 --- a/nemo/core/neural_types/elements.py +++ b/nemo/core/neural_types/elements.py @@ -28,6 +28,9 @@ 'LabelsType', 'LossType', 'RegressionValuesType', + 'PredictionsType', + 'LogprobsType', + 'LengthsType', ] import abc from abc import ABC, abstractmethod @@ -55,7 +58,6 @@ def fields(self) -> Optional[Tuple]: def compare(self, second) -> NeuralTypeComparisonResult: # First, check general compatibility - result = NeuralTypeComparisonResult.SAME first_t = type(self) second_t = type(second) @@ -108,11 +110,21 @@ def __str__(self): return "neural type representing logits" +class LogprobsType(ElementType): + def __str__(self): + return "neural type representing log probabilities" + + class LabelsType(ElementType): def __str__(self): return "neural type representing labels" +class LengthsType(ElementType): + def __str__(self): + return "neural type representing lengths of something" + + class LossType(ElementType): def __str__(self): return "neural type representing loss value" @@ -151,6 +163,11 @@ def __str__(self): return "mfcc spectorgram type" -class RegressionValuesType(ElementType): +class PredictionsType(ElementType): + def __str__(self): + return "predictions values type" + + +class RegressionValuesType(PredictionsType): def __str__(self): return "regression values type" diff --git a/nemo/core/neural_types/neural_type.py b/nemo/core/neural_types/neural_type.py index a2df777c9296..9cb7513963e4 100644 --- a/nemo/core/neural_types/neural_type.py +++ b/nemo/core/neural_types/neural_type.py @@ -44,7 +44,7 @@ def __init__(self, elements_type: ElementType = VoidType(), axes: Optional[Tuple ) self.elements_type = elements_type if axes is not None: - self.__check_sanity(axes) + NeuralType.__check_sanity(axes) axes_list = [] for axis in axes: if isinstance(axis, str): @@ -63,9 +63,7 @@ def compare(self, second) -> NeuralTypeComparisonResult: axes_a = self.axes axes_b = second.axes - kinds_a = dict() - kinds_b = dict() - + # "Big void" type if isinstance(self.elements_type, VoidType) and self.axes is None: return NeuralTypeComparisonResult.SAME @@ -75,28 +73,29 @@ def compare(self, second) -> NeuralTypeComparisonResult: else: return NeuralTypeComparisonResult.INCOMPATIBLE - dimensions_pass = True - for axis_a, axis_b in zip(axes_a, axes_b): - kinds_a[axis_a.kind] = axis_a.size - kinds_b[axis_b.kind] = axis_b.size - if axis_a.kind != axis_b.kind or axis_a.is_list != axis_b.is_list: - dimensions_pass = False - - if kinds_a.keys() != kinds_b.keys(): - return NeuralTypeComparisonResult.INCOMPATIBLE - for kind, size in kinds_a.items(): - if size != kinds_b[kind]: - return NeuralTypeComparisonResult.DIM_INCOMPATIBLE - + dimensions_pass = NeuralType.__compare_axes(axes_a, axes_b) element_comparison_result = self.elements_type.compare(second.elements_type) - if dimensions_pass: + + # SAME DIMS + if dimensions_pass == 0: return element_comparison_result - elif element_comparison_result == NeuralTypeComparisonResult.SAME: - return NeuralTypeComparisonResult.TRANSPOSE_SAME + # TRANSPOSE_SAME DIMS + elif dimensions_pass == 1: + if element_comparison_result == NeuralTypeComparisonResult.SAME: + return NeuralTypeComparisonResult.TRANSPOSE_SAME + else: + return NeuralTypeComparisonResult.INCOMPATIBLE + # DIM_INCOMPATIBLE DIMS + elif dimensions_pass == 2: + if element_comparison_result == NeuralTypeComparisonResult.SAME: + return NeuralTypeComparisonResult.DIM_INCOMPATIBLE + else: + return NeuralTypeComparisonResult.INCOMPATIBLE else: return NeuralTypeComparisonResult.INCOMPATIBLE - def __check_sanity(self, axes): + @staticmethod + def __check_sanity(axes): # check that list come before any tensor dimension are_strings = True for axis in axes: @@ -119,6 +118,50 @@ def __check_sanity(self, axes): "You have list dimension after Tensor dimension. All list dimensions must preceed Tensor dimensions" ) + @staticmethod + def __compare_axes(axes_a, axes_b) -> int: + """ + Compares axes_a and axes_b + Args: + axes_a: first axes tuple + axes_b: second axes tuple + + Returns: + 0 - if they are exactly the same + 1 - if they are "TRANSPOSE_SAME" + 2 - if the are "DIM_INCOMPATIBLE" + 3 - if they are different + """ + if axes_a is None and axes_b is None: + return 0 + elif axes_a is None and axes_b is not None: + return 3 + elif axes_a is not None and axes_b is None: + return 3 + elif len(axes_a) != len(axes_b): + return 3 + # After these ifs we know that len(axes_a) == len(axes_b) + + same = True + kinds_a = dict() + kinds_b = dict() + for axis_a, axis_b in zip(axes_a, axes_b): + kinds_a[axis_a.kind] = axis_a.size + kinds_b[axis_b.kind] = axis_b.size + if axis_a.kind != axis_b.kind or axis_a.is_list != axis_b.is_list or axis_a.size != axis_b.size: + same = False + if same: + return 0 + else: + # can be TRANSPOSE_SAME, DIM_INCOMPATIBLE + if kinds_a.keys() == kinds_b.keys(): + for key, value in kinds_a.items(): + if kinds_b[key] != value: + return 2 + return 1 + else: + return 3 + class NmTensor(NeuralType): """Class representing data which flows between NeuralModules' ports. diff --git a/requirements/requirements_test.txt b/requirements/requirements_test.txt index 493b8268cfd1..544127fca734 100644 --- a/requirements/requirements_test.txt +++ b/requirements/requirements_test.txt @@ -4,3 +4,4 @@ pytest-runner black isort[requirements] wrapt +onnxruntime diff --git a/tests/test_actions_api.py b/tests/core/test_actions_api.py similarity index 100% rename from tests/test_actions_api.py rename to tests/core/test_actions_api.py diff --git a/tests/core/test_deploy_export.py b/tests/core/test_deploy_export.py new file mode 100644 index 000000000000..6ef415c6c8cf --- /dev/null +++ b/tests/core/test_deploy_export.py @@ -0,0 +1,149 @@ +# ! /usr/bin/python +# -*- coding: utf-8 -*- + +# Copyright 2019 NVIDIA. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================= + +# import os +# from pathlib import Path +# +# # git clone git@github.com:microsoft/onnxruntime.git +# # cd onnxruntime +# # ./build.sh --update --build --config RelWithDebInfo --build_shared_lib --parallel --use_cuda \ +# # --cudnn_home /usr/lib/x86_64-linux-gnu --cuda_home /usr/local/cuda --enable_pybind --build_wheel +# # pip install --upgrade ./build/Linux/RelWithDebInfo/dist/onnxruntime_gpu-1.1.0-cp37-cp37m-linux_x86_64.whl +# import onnxruntime as ort +# import torch +# from ruamel.yaml import YAML +# +# import nemo +# import nemo.collections.asr as nemo_asr +# import nemo.collections.nlp as nemo_nlp +# import nemo.collections.nlp.nm.trainables.common.token_classification_nm +# from tests.common_setup import NeMoUnitTest +# +# +# class TestDeployExport(NeMoUnitTest): +# def setUp(self): +# """ Setups neural factory so it will use GPU instead of CPU. """ +# NeMoUnitTest.setUp(self) +# +# # Perform computations on GPU. +# self.nf._placement = nemo.core.DeviceType.GPU +# +# def __test_export_route(self, module, out_name, mode, input_example=None): +# out = Path(out_name) +# if out.exists(): +# os.remove(out) +# +# self.nf.deployment_export(module=module, output=out_name, input_example=input_example, d_format=mode) +# +# self.assertTrue(out.exists()) +# if mode == nemo.core.DeploymentFormat.ONNX: +# if isinstance(input_example, tuple): +# outputs_fwd = module.forward(*input_example) +# else: +# outputs_fwd = module.forward(input_example) +# sess_options = ort.SessionOptions() +# sess_options.graph_optimization_level = ort.GraphOptimizationLevel.ORT_ENABLE_EXTENDED +# ort_session = ort.InferenceSession(out_name, sess_options) +# inputs = dict() +# input_names = list(module.input_ports) +# for i in range(len(input_names)): +# input_name = ( +# "encoded_lengths" +# if type(module).__name__ == "JasperEncoder" and input_names[i] == "length" +# else input_names[i] +# ) +# inputs[input_name] = ( +# input_example[i].cpu().numpy() if isinstance(input_example, tuple) else input_example.cpu().numpy() +# ) +# outputs_ort = ort_session.run(None, inputs) +# outputs_ort = torch.from_numpy(outputs_ort[0]).cuda() +# self.assertLess( +# (outputs_ort - (outputs_fwd[0] if isinstance(outputs_fwd, tuple) else outputs_fwd)).norm(p=2), 5.0e-4 +# ) +# if out.exists(): +# os.remove(out) +# +# def test_simple_module_export(self): +# simplest_module = nemo.backends.pytorch.tutorials.TaylorNet(dim=4) +# self.__test_export_route( +# module=simplest_module, +# out_name="simple.pt", +# mode=nemo.core.DeploymentFormat.TORCHSCRIPT, +# input_example=None, +# ) +# +# def test_TokenClassifier_module_export(self): +# t_class = nemo.collections.nlp.nm.trainables.common.token_classification_nm.TokenClassifier( +# hidden_size=512, num_classes=16, use_transformer_pretrained=False +# ) +# self.__test_export_route( +# module=t_class, +# out_name="t_class.pt", +# mode=nemo.core.DeploymentFormat.TORCHSCRIPT, +# input_example=torch.randn(16, 16, 512).cuda(), +# ) +# +# def test_TokenClassifier_module_onnx_export(self): +# t_class = nemo.collections.nlp.nm.trainables.common.token_classification_nm.TokenClassifier( +# hidden_size=512, num_classes=16, use_transformer_pretrained=False +# ) +# self.__test_export_route( +# module=t_class, +# out_name="t_class.onnx", +# mode=nemo.core.DeploymentFormat.ONNX, +# input_example=torch.randn(16, 16, 512).cuda(), +# ) +# +# def test_jasper_decoder_export_ts(self): +# j_decoder = nemo_asr.JasperDecoderForCTC(feat_in=1024, num_classes=33) +# self.__test_export_route( +# module=j_decoder, out_name="j_decoder.ts", mode=nemo.core.DeploymentFormat.TORCHSCRIPT, input_example=None +# ) +# +# def test_hf_bert_ts(self): +# bert = nemo.collections.nlp.nm.trainables.common.huggingface.BERT(pretrained_model_name="bert-base-uncased") +# input_example = ( +# torch.randint(low=0, high=16, size=(2, 16)).cuda(), +# torch.randint(low=0, high=1, size=(2, 16)).cuda(), +# torch.randint(low=0, high=1, size=(2, 16)).cuda(), +# ) +# self.__test_export_route( +# module=bert, out_name="bert.ts", mode=nemo.core.DeploymentFormat.TORCHSCRIPT, input_example=input_example +# ) +# +# def test_hf_bert_pt(self): +# bert = nemo.collections.nlp.nm.trainables.common.huggingface.BERT(pretrained_model_name="bert-base-uncased") +# self.__test_export_route(module=bert, out_name="bert.pt", mode=nemo.core.DeploymentFormat.PYTORCH) +# +# def test_jasper_encoder_to_onnx(self): +# with open("tests/data/jasper_smaller.yaml") as file: +# yaml = YAML(typ="safe") +# jasper_model_definition = yaml.load(file) +# +# jasper_encoder = nemo_asr.JasperEncoder( +# conv_mask=False, +# feat_in=jasper_model_definition['AudioToMelSpectrogramPreprocessor']['features'], +# **jasper_model_definition['JasperEncoder'] +# ) +# +# self.__test_export_route( +# module=jasper_encoder, +# out_name="jasper_encoder.onnx", +# mode=nemo.core.DeploymentFormat.ONNX, +# input_example=(torch.randn(16, 64, 256).cuda(), torch.randn(256).cuda()), +# ) diff --git a/tests/test_deprecated.py b/tests/core/test_deprecated.py similarity index 100% rename from tests/test_deprecated.py rename to tests/core/test_deprecated.py diff --git a/tests/test_infer.py b/tests/core/test_infer.py similarity index 81% rename from tests/test_infer.py rename to tests/core/test_infer.py index 05cec60c6fb9..e9611ea43967 100644 --- a/tests/test_infer.py +++ b/tests/core/test_infer.py @@ -30,11 +30,13 @@ def __init__(self): @property def input_ports(self): - return {"mod_in": NeuralType({0: AxisType(BatchTag), 1: AxisType(BaseTag, dim=1)})} + # return {"mod_in": NeuralType({0: AxisType(BatchTag), 1: AxisType(BaseTag, dim=1)})} + return {"mod_in": NeuralType(ChannelType(), (AxisType(AxisKind.Batch), AxisType(AxisKind.Dimension, 1)))} @property def output_ports(self): - return {"mod_out": NeuralType({0: AxisType(BatchTag), 1: AxisType(BaseTag, dim=1)})} + # return {"mod_out": NeuralType({0: AxisType(BatchTag), 1: AxisType(BaseTag, dim=1)})} + return {"mod_out": NeuralType(ChannelType(), (AxisType(AxisKind.Batch), AxisType(AxisKind.Dimension, 1)))} def forward(self, mod_in): return mod_in + 10 @@ -46,11 +48,11 @@ def __init__(self): @property def input_ports(self): - return {"mod_in": NeuralType({0: AxisType(BatchTag), 1: AxisType(BaseTag, dim=1)})} + return {"mod_in": NeuralType(ChannelType(), (AxisType(AxisKind.Batch), AxisType(AxisKind.Dimension, 1)))} @property def output_ports(self): - return {"mod_out": NeuralType({0: AxisType(BatchTag), 1: AxisType(BaseTag, dim=1)})} + return {"mod_out": NeuralType(ChannelType(), (AxisType(AxisKind.Batch), AxisType(AxisKind.Dimension, 1)))} def forward(self, mod_in): return mod_in - 10 @@ -66,7 +68,9 @@ def test_infer_caching(self): size=1, dtype=torch.FloatTensor, batch_size=1, - output_ports={"dl_out": NeuralType({0: AxisType(BatchTag), 1: AxisType(BaseTag, dim=1)})}, + output_ports={ + "dl_out": NeuralType(ChannelType(), (AxisType(AxisKind.Batch), AxisType(AxisKind.Dimension, 1))) + }, ) addten = AddsTen() minusten = SubtractsTen() @@ -93,7 +97,9 @@ def test_infer_errors(self): size=1, dtype=torch.FloatTensor, batch_size=1, - output_ports={"dl_out": NeuralType({0: AxisType(BatchTag), 1: AxisType(BaseTag, dim=1)})}, + output_ports={ + "dl_out": NeuralType(ChannelType(), (AxisType(AxisKind.Batch), AxisType(AxisKind.Dimension, 1))) + }, ) addten = AddsTen() minusten = SubtractsTen() diff --git a/tests/test_neural_factory.py b/tests/core/test_neural_factory.py similarity index 100% rename from tests/test_neural_factory.py rename to tests/core/test_neural_factory.py diff --git a/tests/test_neural_modules_initialization.py b/tests/core/test_neural_modules_initialization.py similarity index 100% rename from tests/test_neural_modules_initialization.py rename to tests/core/test_neural_modules_initialization.py diff --git a/tests/test_neural_modules_pytorch.py b/tests/core/test_neural_modules_pytorch.py similarity index 90% rename from tests/test_neural_modules_pytorch.py rename to tests/core/test_neural_modules_pytorch.py index 13ff0226262b..236844031e25 100644 --- a/tests/test_neural_modules_pytorch.py +++ b/tests/core/test_neural_modules_pytorch.py @@ -17,10 +17,13 @@ # limitations under the License. # ============================================================================= +# TODO: These test look bad/useless - redo + import unittest import nemo from nemo.backends.pytorch.nm import TrainableNM +from nemo.core.neural_types import ChannelType, NeuralType from tests.common_setup import NeMoUnitTest @@ -67,15 +70,7 @@ def test_constructor_TaylorNet(self): def test_call_TaylorNet(self): x_tg = nemo.core.neural_modules.NmTensor( - producer=None, - producer_args=None, - name=None, - ntype=nemo.core.neural_types.NeuralType( - { - 0: nemo.core.neural_types.AxisType(nemo.core.neural_types.BatchTag), - 1: nemo.core.neural_types.AxisType(nemo.core.neural_types.ChannelTag), - } - ), + producer=None, producer_args=None, name=None, ntype=NeuralType(ChannelType(), ('B', 'D')) ) tn = nemo.backends.pytorch.tutorials.TaylorNet(dim=4) diff --git a/tests/core/test_neural_types.py b/tests/core/test_neural_types.py index 51a1c9b18044..537813b76f07 100644 --- a/tests/core/test_neural_types.py +++ b/tests/core/test_neural_types.py @@ -15,7 +15,7 @@ # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================= - +import nemo from nemo.core.neural_types import ( AcousticEncodedRepresentation, AudioSignal, @@ -24,9 +24,11 @@ ChannelType, MelSpectrogramType, MFCCSpectrogramType, + NeuralPortNmTensorMismatchError, NeuralType, NeuralTypeComparisonResult, SpectrogramType, + VoidType, ) from tests.common_setup import NeMoUnitTest @@ -102,3 +104,66 @@ def test_list_of_lists(self): ) # TODO: should this be incompatible instead??? self.assertEqual(T1.compare(T2), NeuralTypeComparisonResult.TRANSPOSE_SAME) + + def test_void(self): + btc_spctr = NeuralType(SpectrogramType(), ('B', 'T', 'C')) + btc_spct_bad = NeuralType(SpectrogramType(), ('B', 'T')) + btc_void = NeuralType(VoidType(), ('B', 'T', 'C')) + self.assertEqual(btc_void.compare(btc_spctr), NeuralTypeComparisonResult.SAME) + self.assertEqual(btc_spctr.compare(btc_void), NeuralTypeComparisonResult.INCOMPATIBLE) + self.assertEqual(btc_void.compare(btc_spct_bad), NeuralTypeComparisonResult.INCOMPATIBLE) + + def test_big_void(self): + big_void_1 = NeuralType(VoidType()) + big_void_2 = NeuralType() + + btc_spctr = NeuralType(SpectrogramType(), ('B', 'T', 'C')) + btc_spct_bad = NeuralType(SpectrogramType(), ('B', 'T')) + t1 = NeuralType( + elements_type=ChannelType(), + axes=( + AxisType(kind=AxisKind.Batch, size=None, is_list=True), + AxisType(kind=AxisKind.Time, size=None, is_list=True), + AxisType(kind=AxisKind.Dimension, size=32, is_list=False), + AxisType(kind=AxisKind.Dimension, size=128, is_list=False), + AxisType(kind=AxisKind.Dimension, size=256, is_list=False), + ), + ) + t2 = NeuralType( + elements_type=ChannelType(), + axes=( + AxisType(kind=AxisKind.Batch, size=None, is_list=False), + AxisType(kind=AxisKind.Time, size=None, is_list=False), + AxisType(kind=AxisKind.Dimension, size=32, is_list=False), + AxisType(kind=AxisKind.Dimension, size=128, is_list=False), + AxisType(kind=AxisKind.Dimension, size=256, is_list=False), + ), + ) + + self.assertEqual(big_void_1.compare(btc_spctr), NeuralTypeComparisonResult.SAME) + self.assertEqual(big_void_1.compare(btc_spct_bad), NeuralTypeComparisonResult.SAME) + self.assertEqual(big_void_1.compare(t1), NeuralTypeComparisonResult.SAME) + self.assertEqual(big_void_1.compare(t2), NeuralTypeComparisonResult.SAME) + + self.assertEqual(big_void_2.compare(btc_spctr), NeuralTypeComparisonResult.SAME) + self.assertEqual(big_void_2.compare(btc_spct_bad), NeuralTypeComparisonResult.SAME) + self.assertEqual(big_void_2.compare(t1), NeuralTypeComparisonResult.SAME) + self.assertEqual(big_void_2.compare(t2), NeuralTypeComparisonResult.SAME) + + def test_dag(self): + data_source = nemo.backends.pytorch.tutorials.RealFunctionDataLayer(n=10000, batch_size=128) + trainable_module = nemo.backends.pytorch.tutorials.TaylorNet(dim=4) + loss = nemo.backends.pytorch.tutorials.MSELoss() + x, y = data_source() + y_pred = trainable_module(x=x) + _ = loss(predictions=y_pred, target=y) + + def wrong(): + data_source = nemo.backends.pytorch.tutorials.RealFunctionDataLayer(n=10000, batch_size=128) + trainable_module = nemo.backends.pytorch.tutorials.TaylorNet(dim=4) + loss = nemo.backends.pytorch.tutorials.MSELoss() + x, y = data_source() + loss_tensor = loss(predictions=x, target=x) + _ = trainable_module(x=loss_tensor) + + self.assertRaises(NeuralPortNmTensorMismatchError, wrong) diff --git a/tests/test_policies.py b/tests/core/test_policies.py similarity index 100% rename from tests/test_policies.py rename to tests/core/test_policies.py diff --git a/tests/test_deploy_export.py b/tests/test_deploy_export.py deleted file mode 100644 index be6a1a39573c..000000000000 --- a/tests/test_deploy_export.py +++ /dev/null @@ -1,149 +0,0 @@ -# ! /usr/bin/python -# -*- coding: utf-8 -*- - -# Copyright 2019 NVIDIA. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================= - -import os -from pathlib import Path - -# git clone git@github.com:microsoft/onnxruntime.git -# cd onnxruntime -# ./build.sh --update --build --config RelWithDebInfo --build_shared_lib --parallel --use_cuda \ -# --cudnn_home /usr/lib/x86_64-linux-gnu --cuda_home /usr/local/cuda --enable_pybind --build_wheel -# pip install --upgrade ./build/Linux/RelWithDebInfo/dist/onnxruntime_gpu-1.1.0-cp37-cp37m-linux_x86_64.whl -import onnxruntime as ort -import torch -from ruamel.yaml import YAML - -import nemo -import nemo.collections.asr as nemo_asr -import nemo.collections.nlp as nemo_nlp -import nemo.collections.nlp.nm.trainables.common.token_classification_nm -from tests.common_setup import NeMoUnitTest - - -class TestDeployExport(NeMoUnitTest): - def setUp(self): - """ Setups neural factory so it will use GPU instead of CPU. """ - NeMoUnitTest.setUp(self) - - # Perform computations on GPU. - self.nf._placement = nemo.core.DeviceType.GPU - - def __test_export_route(self, module, out_name, mode, input_example=None): - out = Path(out_name) - if out.exists(): - os.remove(out) - - self.nf.deployment_export(module=module, output=out_name, input_example=input_example, d_format=mode) - - self.assertTrue(out.exists()) - if mode == nemo.core.DeploymentFormat.ONNX: - if isinstance(input_example, tuple): - outputs_fwd = module.forward(*input_example) - else: - outputs_fwd = module.forward(input_example) - sess_options = ort.SessionOptions() - sess_options.graph_optimization_level = ort.GraphOptimizationLevel.ORT_ENABLE_EXTENDED - ort_session = ort.InferenceSession(out_name, sess_options) - inputs = dict() - input_names = list(module.input_ports) - for i in range(len(input_names)): - input_name = ( - "encoded_lengths" - if type(module).__name__ == "JasperEncoder" and input_names[i] == "length" - else input_names[i] - ) - inputs[input_name] = ( - input_example[i].cpu().numpy() if isinstance(input_example, tuple) else input_example.cpu().numpy() - ) - outputs_ort = ort_session.run(None, inputs) - outputs_ort = torch.from_numpy(outputs_ort[0]).cuda() - self.assertLess( - (outputs_ort - (outputs_fwd[0] if isinstance(outputs_fwd, tuple) else outputs_fwd)).norm(p=2), 5.0e-4 - ) - if out.exists(): - os.remove(out) - - def test_simple_module_export(self): - simplest_module = nemo.backends.pytorch.tutorials.TaylorNet(dim=4) - self.__test_export_route( - module=simplest_module, - out_name="simple.pt", - mode=nemo.core.DeploymentFormat.TORCHSCRIPT, - input_example=None, - ) - - def test_TokenClassifier_module_export(self): - t_class = nemo.collections.nlp.nm.trainables.common.token_classification_nm.TokenClassifier( - hidden_size=512, num_classes=16, use_transformer_pretrained=False - ) - self.__test_export_route( - module=t_class, - out_name="t_class.pt", - mode=nemo.core.DeploymentFormat.TORCHSCRIPT, - input_example=torch.randn(16, 16, 512).cuda(), - ) - - def test_TokenClassifier_module_onnx_export(self): - t_class = nemo.collections.nlp.nm.trainables.common.token_classification_nm.TokenClassifier( - hidden_size=512, num_classes=16, use_transformer_pretrained=False - ) - self.__test_export_route( - module=t_class, - out_name="t_class.onnx", - mode=nemo.core.DeploymentFormat.ONNX, - input_example=torch.randn(16, 16, 512).cuda(), - ) - - def test_jasper_decoder_export_ts(self): - j_decoder = nemo_asr.JasperDecoderForCTC(feat_in=1024, num_classes=33) - self.__test_export_route( - module=j_decoder, out_name="j_decoder.ts", mode=nemo.core.DeploymentFormat.TORCHSCRIPT, input_example=None - ) - - def test_hf_bert_ts(self): - bert = nemo.collections.nlp.nm.trainables.common.huggingface.BERT(pretrained_model_name="bert-base-uncased") - input_example = ( - torch.randint(low=0, high=16, size=(2, 16)).cuda(), - torch.randint(low=0, high=1, size=(2, 16)).cuda(), - torch.randint(low=0, high=1, size=(2, 16)).cuda(), - ) - self.__test_export_route( - module=bert, out_name="bert.ts", mode=nemo.core.DeploymentFormat.TORCHSCRIPT, input_example=input_example - ) - - def test_hf_bert_pt(self): - bert = nemo.collections.nlp.nm.trainables.common.huggingface.BERT(pretrained_model_name="bert-base-uncased") - self.__test_export_route(module=bert, out_name="bert.pt", mode=nemo.core.DeploymentFormat.PYTORCH) - - def test_jasper_encoder_to_onnx(self): - with open("tests/data/jasper_smaller.yaml") as file: - yaml = YAML(typ="safe") - jasper_model_definition = yaml.load(file) - - jasper_encoder = nemo_asr.JasperEncoder( - conv_mask=False, - feat_in=jasper_model_definition['AudioToMelSpectrogramPreprocessor']['features'], - **jasper_model_definition['JasperEncoder'] - ) - - self.__test_export_route( - module=jasper_encoder, - out_name="jasper_encoder.onnx", - mode=nemo.core.DeploymentFormat.ONNX, - input_example=(torch.randn(16, 64, 256).cuda(), torch.randn(256).cuda()), - ) diff --git a/tests/test_neural_types.py b/tests/test_neural_types.py deleted file mode 100644 index c2741ca3d7c6..000000000000 --- a/tests/test_neural_types.py +++ /dev/null @@ -1,258 +0,0 @@ -# ! /usr/bin/python -# -*- coding: utf-8 -*- - -# Copyright 2019 NVIDIA. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================= - -import tarfile - -from ruamel.yaml import YAML - -import nemo.collections.asr as nemo_asr -from nemo import logging -from nemo.core import * -from tests.common_setup import NeMoUnitTest - - -class TestNeuralTypes(NeMoUnitTest): - manifest_filepath = "tests/data/asr/an4_train.json" - yaml = YAML(typ="safe") - - def setUp(self) -> None: - super().setUp() - data_folder = "tests/data/" - logging.info("Looking up for test ASR data") - if not os.path.exists(data_folder + "asr"): - logging.info("Extracting ASR data to: {0}".format(data_folder + "asr")) - tar = tarfile.open("tests/data/asr.tar.gz", "r:gz") - tar.extractall(path=data_folder) - tar.close() - else: - logging.info("ASR data found in: {0}".format(data_folder + "asr")) - - def test_same(self): - btc = NeuralType(axis2type={0: AxisType(BatchTag), 1: AxisType(TimeTag), 2: AxisType(ChannelTag)}) - btc2 = NeuralType(axis2type={0: AxisType(BatchTag), 1: AxisType(TimeTag), 2: AxisType(ChannelTag)}) - self.assertEqual(btc2.compare(btc), NeuralTypeComparisonResult.SAME) - - def test_transpose_same(self): - btc = NeuralType(axis2type={0: AxisType(BatchTag), 1: AxisType(TimeTag), 2: AxisType(ChannelTag)}) - tbc = NeuralType(axis2type={1: AxisType(BatchTag), 0: AxisType(TimeTag), 2: AxisType(ChannelTag)}) - - self.assertEqual(btc.compare(tbc), NeuralTypeComparisonResult.TRANSPOSE_SAME) - self.assertEqual(tbc.compare(btc), NeuralTypeComparisonResult.TRANSPOSE_SAME) - - def test_dim_incompatible(self): - nchw1 = NeuralType( - axis2type={ - 0: AxisType(BatchTag), - 1: AxisType(ChannelTag), - 2: AxisType(HeightTag, 224), - 3: AxisType(WidthTag, 224), - } - ) - nchw2 = NeuralType( - axis2type={ - 0: AxisType(BatchTag), - 1: AxisType(ChannelTag), - 2: AxisType(HeightTag, 256), - 3: AxisType(WidthTag, 256), - } - ) - self.assertEqual(nchw1.compare(nchw2), NeuralTypeComparisonResult.DIM_INCOMPATIBLE) - - def test_rank_incompatible(self): - btc = NeuralType(axis2type={0: AxisType(BatchTag), 1: AxisType(TimeTag), 2: AxisType(ChannelTag)}) - nchw = NeuralType( - axis2type={0: AxisType(BatchTag), 1: AxisType(ChannelTag), 2: AxisType(HeightTag), 3: AxisType(WidthTag)} - ) - self.assertEqual(nchw.compare(btc), NeuralTypeComparisonResult.INCOMPATIBLE) - - def test_axis_type(self): - ax1 = AxisType(BatchTag) - ax2 = AxisType(TimeTag) - ax3 = AxisType(ProcessedTimeTag) - self.assertEqual(ax1.compare_to(ax2), NeuralTypeComparisonResult.INCOMPATIBLE) - self.assertEqual(ax3.compare_to(ax2), NeuralTypeComparisonResult.LESS) - self.assertEqual(ax2.compare_to(ax3), NeuralTypeComparisonResult.GREATER) - self.assertEqual(ax2.compare_to(AxisType(TimeTag)), NeuralTypeComparisonResult.SAME) - - def test_semantic_incompatible(self): - nchw = NeuralType( - axis2type={0: AxisType(BatchTag), 1: AxisType(ChannelTag), 2: AxisType(HeightTag), 3: AxisType(WidthTag)} - ) - badd = NeuralType( - axis2type={0: AxisType(BatchTag), 1: AxisType(ChannelTag), 2: AxisType(ChannelTag), 3: AxisType(WidthTag)} - ) - self.assertEqual(nchw.compare(badd), NeuralTypeComparisonResult.INCOMPATIBLE) - self.assertEqual(badd.compare(nchw), NeuralTypeComparisonResult.INCOMPATIBLE) - - def test_root(self): - root = NeuralType({}) - non_tensor = NeuralType(None) - btc = NeuralType(axis2type={0: AxisType(BatchTag), 1: AxisType(TimeTag), 2: AxisType(ChannelTag)}) - nchw = NeuralType( - axis2type={0: AxisType(BatchTag), 1: AxisType(ChannelTag), 2: AxisType(HeightTag), 3: AxisType(WidthTag)} - ) - self.assertEqual(root.compare(btc), NeuralTypeComparisonResult.SAME) - self.assertEqual(root.compare(nchw), NeuralTypeComparisonResult.SAME) - self.assertEqual(root.compare(non_tensor), NeuralTypeComparisonResult.SAME) - - self.assertEqual(non_tensor.compare(root), NeuralTypeComparisonResult.INCOMPATIBLE) - self.assertEqual(btc.compare(root), NeuralTypeComparisonResult.INCOMPATIBLE) - self.assertEqual(nchw.compare(root), NeuralTypeComparisonResult.INCOMPATIBLE) - - def test_combiner_type_infer(self): - combiner = nemo.backends.pytorch.common.SimpleCombiner(mode="add") - x_tg = nemo.core.NmTensor( - producer=None, producer_args=None, name=None, ntype=NeuralType({0: AxisType(BatchTag)}) - ) - y_tg = nemo.core.NmTensor( - producer=None, producer_args=None, name=None, ntype=NeuralType({0: AxisType(BatchTag)}) - ) - res = combiner(x1=y_tg, x2=x_tg) - self.assertEqual(res.compare(x_tg), NeuralTypeComparisonResult.SAME) - self.assertEqual(res.compare(y_tg), NeuralTypeComparisonResult.SAME) - self.assertEqual(x_tg.compare(res), NeuralTypeComparisonResult.SAME) - self.assertEqual(y_tg.compare(res), NeuralTypeComparisonResult.SAME) - - combiner1 = nemo.backends.pytorch.common.SimpleCombiner(mode="add") - x_tg1 = NmTensor( - producer=None, - producer_args=None, - name=None, - ntype=NeuralType({0: AxisType(BatchTag), 1: AxisType(ChannelTag)}), - ) - y_tg1 = NmTensor( - producer=None, - producer_args=None, - name=None, - ntype=NeuralType({0: AxisType(BatchTag), 1: AxisType(ChannelTag)}), - ) - res1 = combiner1(x1=y_tg1, x2=x_tg1) - self.assertEqual(res1.compare(x_tg1), NeuralTypeComparisonResult.SAME) - self.assertEqual(res1.compare(y_tg1), NeuralTypeComparisonResult.SAME) - self.assertEqual(x_tg1.compare(res1), NeuralTypeComparisonResult.SAME) - self.assertEqual(y_tg1.compare(res1), NeuralTypeComparisonResult.SAME) - - def test_optional_input_no_input(self): - data_source = nemo.backends.pytorch.tutorials.RealFunctionDataLayer(n=100, batch_size=128) - trainable_module = nemo.backends.pytorch.tutorials.TaylorNetO(dim=4) - loss = nemo.backends.pytorch.tutorials.MSELoss() - x, y = data_source() - y_pred = trainable_module(x=x) - loss_tensor = loss(predictions=y_pred, target=y) - - optimizer = nemo.backends.pytorch.actions.PtActions() - optimizer.train( - tensors_to_optimize=[loss_tensor], optimizer="sgd", optimization_params={"lr": 0.0003, "num_epochs": 1} - ) - - def test_optional_input_no_with_input(self): - data_source = nemo.backends.pytorch.tutorials.RealFunctionDataLayer(n=100, batch_size=128) - trainable_module = nemo.backends.pytorch.tutorials.TaylorNetO(dim=4) - loss = nemo.backends.pytorch.tutorials.MSELoss() - x, y = data_source() - y_pred = trainable_module(x=x, o=x) - loss_tensor = loss(predictions=y_pred, target=y) - optimizer = nemo.backends.pytorch.actions.PtActions() - optimizer.train( - tensors_to_optimize=[loss_tensor], optimizer="sgd", optimization_params={"lr": 0.0003, "num_epochs": 1} - ) - - def test_optional_input_no_with_wrong_input(self): - def wrong_fn(): - data_source = nemo.backends.pytorch.tutorials.RealFunctionDataLayer(n=100, batch_size=128) - trainable_module = nemo.backends.pytorch.tutorials.TaylorNetO(dim=4) - loss = nemo.backends.pytorch.tutorials.MSELoss() - x, y = data_source() - wrong_optional = NmTensor( - producer=None, - producer_args=None, - name=None, - ntype=NeuralType({0: AxisType(ChannelTag), 1: AxisType(BatchTag)}), - ) - y_pred = trainable_module(x=x, o=wrong_optional) - loss_tensor = loss(predictions=y_pred, target=y) - optimizer = nemo.backends.pytorch.actions.PtActions() - optimizer.train( - tensors_to_optimize=[loss_tensor], optimizer="sgd", optimization_params={"lr": 0.0003, "num_epochs": 1} - ) - - self.assertRaises(NeuralPortNmTensorMismatchError, wrong_fn) - - def test_simple_dags(self): - # module instantiation - with open("tests/data/jasper_smaller.yaml") as file: - jasper_model_definition = self.yaml.load(file) - labels = jasper_model_definition['labels'] - - data_layer = nemo_asr.AudioToTextDataLayer( - manifest_filepath=self.manifest_filepath, labels=labels, batch_size=4 - ) - data_preprocessor = nemo_asr.AudioToMelSpectrogramPreprocessor( - **jasper_model_definition['AudioToMelSpectrogramPreprocessor'] - ) - jasper_encoder = nemo_asr.JasperEncoder( - feat_in=jasper_model_definition['AudioToMelSpectrogramPreprocessor']['features'], - **jasper_model_definition['JasperEncoder'], - ) - jasper_decoder = nemo_asr.JasperDecoderForCTC(feat_in=1024, num_classes=len(labels)) - ctc_loss = nemo_asr.CTCLossNM(num_classes=len(labels)) - greedy_decoder = nemo_asr.GreedyCTCDecoder() - - # DAG definition - (audio_signal, audio_signal_len, transcript, transcript_len) = data_layer() - processed_signal, processed_signal_len = data_preprocessor(input_signal=audio_signal, length=audio_signal_len) - - spec_augment = nemo_asr.SpectrogramAugmentation(rect_masks=5) - aug_signal = spec_augment(input_spec=processed_signal) - - encoded, encoded_len = jasper_encoder(audio_signal=aug_signal, length=processed_signal_len) - log_probs = jasper_decoder(encoder_output=encoded) - predictions = greedy_decoder(log_probs=log_probs) - loss = ctc_loss( - log_probs=log_probs, targets=transcript, input_length=encoded_len, target_length=transcript_len - ) - - def wrong(): - with open("tests/data/jasper_smaller.yaml") as file: - jasper_config = self.yaml.load(file) - labels = jasper_config['labels'] - - data_layer = nemo_asr.AudioToTextDataLayer( - manifest_filepath=self.manifest_filepath, labels=labels, batch_size=4 - ) - data_preprocessor = nemo_asr.AudioToMelSpectrogramPreprocessor( - **jasper_config['AudioToMelSpectrogramPreprocessor'] - ) - jasper_encoder = nemo_asr.JasperEncoder( - feat_in=jasper_config['AudioToMelSpectrogramPreprocessor']['features'], - **jasper_config['JasperEncoder'], - ) - jasper_decoder = nemo_asr.JasperDecoderForCTC(feat_in=1024, num_classes=len(labels)) - # DAG definition - (audio_signal, audio_signal_len, transcript, transcript_len) = data_layer() - processed_signal, processed_signal_len = data_preprocessor( - input_signal=audio_signal, length=audio_signal_len - ) - - spec_augment = nemo_asr.SpectrogramAugmentation(rect_masks=5) - aug_signal = spec_augment(input_spec=processed_signal) - - encoded, encoded_len = jasper_encoder(audio_signal=aug_signal, length=processed_signal_len) - log_probs = jasper_decoder(encoder_output=processed_signal) - - self.assertRaises(NeuralPortNmTensorMismatchError, wrong) diff --git a/tests/test_tutorials_pytorch.py b/tests/test_tutorials_pytorch.py deleted file mode 100644 index 183fd67e1d1b..000000000000 --- a/tests/test_tutorials_pytorch.py +++ /dev/null @@ -1,29 +0,0 @@ -# # ! /usr/bin/python -# # -*- coding: utf-8 -*- -# -# # Copyright 2019 NVIDIA. All Rights Reserved. -# # -# # Licensed under the Apache License, Version 2.0 (the "License"); -# # you may not use this file except in compliance with the License. -# # You may obtain a copy of the License at -# # -# # http://www.apache.org/licenses/LICENSE-2.0 -# # -# # Unless required by applicable law or agreed to in writing, software -# # distributed under the License is distributed on an "AS IS" BASIS, -# # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# # See the License for the specific language governing permissions and -# # limitations under the License. -# # ============================================================================= -# -# from .common_setup import NeMoUnitTest -# from nemo.backends.pytorch.tutorials.chatbot.data import loadPrepareData -# -# -# class TestPytorchChatBotTutorial(NeMoUnitTest): -# def test_simple_train(self): -# datafile = "tests/data/dialog_sample.txt" -# logging.info(datafile) -# voc, pairs = loadPrepareData("cornell", datafile=datafile) -# self.assertEqual(voc.name, 'cornell') -# self.assertEqual(voc.num_words, 675)