From 72094a9b6b7c9e9739880f622b70985759c7e1d1 Mon Sep 17 00:00:00 2001 From: Oleksii Kuchaiev Date: Tue, 28 Jan 2020 16:18:35 -0800 Subject: [PATCH 01/30] initial draft Signed-off-by: Oleksii Kuchaiev --- nemo/core/neural_types/__init__.py | 21 ++ nemo/core/neural_types/axes.py | 77 +++++++ nemo/core/neural_types/comparison.py | 34 ++++ nemo/core/neural_types/elements.py | 83 ++++++++ nemo/core/neural_types/neural_type.py | 190 ++++++++++++++++++ .../{neural_types.py => old_neural_types.py} | 0 6 files changed, 405 insertions(+) create mode 100644 nemo/core/neural_types/__init__.py create mode 100644 nemo/core/neural_types/axes.py create mode 100644 nemo/core/neural_types/comparison.py create mode 100644 nemo/core/neural_types/elements.py create mode 100644 nemo/core/neural_types/neural_type.py rename nemo/core/{neural_types.py => old_neural_types.py} (100%) diff --git a/nemo/core/neural_types/__init__.py b/nemo/core/neural_types/__init__.py new file mode 100644 index 000000000000..92c9b37c32b6 --- /dev/null +++ b/nemo/core/neural_types/__init__.py @@ -0,0 +1,21 @@ +# ! /usr/bin/python +# -*- coding: utf-8 -*- + +# Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from .elements import * +from .axes import * +from .comparison import * +from .neural_type import * \ No newline at end of file diff --git a/nemo/core/neural_types/axes.py b/nemo/core/neural_types/axes.py new file mode 100644 index 000000000000..5efba5d20ca7 --- /dev/null +++ b/nemo/core/neural_types/axes.py @@ -0,0 +1,77 @@ +# ! /usr/bin/python +# -*- coding: utf-8 -*- + +# Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +__all__ = ['AxisKindAbstract', 'AxisKind', 'AxisType'] + +from enum import Enum +from typing import Optional + + +class AxisKindAbstract(Enum): + """This is an abstract Enum to represents what does varying axis dimension mean. + In practice, you will almost always use AxisKind Enum. This Enum should be inherited by + your OWN Enum if you aren't satisfied with AxisKind. Then your own Enum can be used + instead of AxisKind.""" + + pass + + +class AxisKind(AxisKindAbstract): + """This Enum represents what does varying axis dimension mean. + For example, does this dimension correspond to width, batch, time, etc.""" + + Batch = 0 + Time = 1 + Dimension = 2 + Width = 3 + Height = 4 + + def __str__(self): + return str(self.name).lower() + + @staticmethod + def from_str(label): + """Returns AxisKind instance based on short string representation""" + _label = label.lower().strip() + if _label == "b" or _label == "n" or _label == "batch": + return AxisKind.Batch + elif _label == "t" or _label == "time": + return AxisKind.Time + elif _label == "d" or _label == "c" or _label == "channel": + return AxisKind.Dimension + elif _label == "w" or _label == "width": + return AxisKind.Width + elif _label == "h" or _label == "height": + return AxisKind.Height + else: + raise ValueError(f"Can't create AxisKind from {label}") + + +class AxisType(object): + """This class represents axis semantics and (optionally) it's dimensionality + Args: + kind (AxisKindAbstract): + size (int, optional): + is_list (bool, default=False): + """ + + def __init__(self, kind: AxisKindAbstract, size: Optional[int], is_list=False): + if size is not None and is_list: + raise ValueError("The axis can't be list and have a fixed size") + self.kind = kind + self.size = size + self.is_list = is_list diff --git a/nemo/core/neural_types/comparison.py b/nemo/core/neural_types/comparison.py new file mode 100644 index 000000000000..6cbb9661a0e2 --- /dev/null +++ b/nemo/core/neural_types/comparison.py @@ -0,0 +1,34 @@ +# ! /usr/bin/python +# -*- coding: utf-8 -*- + +# Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +__all__ = ['NeuralTypeComparisonResult'] + +from enum import Enum + + +class NeuralTypeComparisonResult(Enum): + """The result of comparing two neural type objects for compatibility. + When comparing A.compare_to(B):""" + + SAME = 0 + LESS = 1 # A is B + GREATER = 2 # B is A + DIM_INCOMPATIBLE = 3 # Resize connector might fix incompatibility + TRANSPOSE_SAME = 4 # A transpose and/or converting between lists and tensors will make them same + CONTAINER_SIZE_MISMATCH = 5 # A and B contain different number of elements + INCOMPATIBLE = 6 # A and B are incompatible + SAME_TYPE_INCOMPATIBLE_PARAMS = 7 # A and B are of the same type but parametrized differently diff --git a/nemo/core/neural_types/elements.py b/nemo/core/neural_types/elements.py new file mode 100644 index 000000000000..0b3626556b96 --- /dev/null +++ b/nemo/core/neural_types/elements.py @@ -0,0 +1,83 @@ +# ! /usr/bin/python +# -*- coding: utf-8 -*- + +# Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +__all__ = ['ElementType', 'VoidType'] +import abc +from abc import ABC, abstractmethod +from typing import Tuple, Optional, Dict +from .comparison import NeuralTypeComparisonResult + + +class ElementType(ABC): + """Abstract class defining semantics of the tensor elements. + We are replying on Python for inheritance checking""" + + @abstractmethod + def __str__(cls): + pass + + @property + def type_parameters(self) -> Dict: + """Override this property to parametrize your type""" + return {} + + @property + def fields(self) -> Optional[Tuple]: + return None + + def compare(self, second) -> NeuralTypeComparisonResult: + # First, check general compatibility + result = NeuralTypeComparisonResult.SAME + first_t = type(self) + second_t = type(second) + + if first_t == second_t: + result = NeuralTypeComparisonResult.SAME + elif issubclass(first_t, second_t): + result = NeuralTypeComparisonResult.LESS + elif issubclass(second_t, first_t): + result = NeuralTypeComparisonResult.GREATER + else: + result = NeuralTypeComparisonResult.INCOMPATIBLE + + if result != NeuralTypeComparisonResult.SAME: + return result + else: + # now check that all parameters match + check_params = set(self.type_parameters.keys()) == set(second.type_parameters.keys()) + if check_params is False: + return NeuralTypeComparisonResult.SAME_TYPE_INCOMPATIBLE_PARAMS + else: + for k1, v1 in self.type_parameters.items(): + if v1 != second.type_parameters[k1]: + return NeuralTypeComparisonResult.SAME_TYPE_INCOMPATIBLE_PARAMS + # check that all fields match + if self.fields == second.fields: + return NeuralTypeComparisonResult.SAME + else: + return NeuralTypeComparisonResult.INCOMPATIBLE + + +class VoidType(ElementType): + """Void-like type which is compatible with everything + """ + + def __str__(self): + return str("void type. compatible with everything") + + def compare(cls, second: abc.ABCMeta) -> NeuralTypeComparisonResult: + return NeuralTypeComparisonResult.SAME diff --git a/nemo/core/neural_types/neural_type.py b/nemo/core/neural_types/neural_type.py new file mode 100644 index 000000000000..cbb216ef80ef --- /dev/null +++ b/nemo/core/neural_types/neural_type.py @@ -0,0 +1,190 @@ +# ! /usr/bin/python +# -*- coding: utf-8 -*- + +# Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +__all__ = [ + 'NeuralType', + 'NmTensor', + 'NeuralTypeError', + 'NeuralPortNameMismatchError', + 'NeuralPortNmTensorMismatchError', + 'NeuralPortNmTensorMismatchError', + 'CanNotInferResultNeuralType', +] +import uuid +from typing import Tuple +from .comparison import NeuralTypeComparisonResult +from .axes import AxisType, AxisKind +from .elements import * + + +class NeuralType(object): + """This is the main class which would represent neural type concept. + nmTensors derives from this. It is used to represent *the types* of inputs and outputs.""" + + def __init__(self, elements_type: ElementType, axes: Tuple, optional=False): + self.__check_sanity(axes) + self.elements_type = elements_type + axes_list = [] + for axis in axes: + if isinstance(axis, str): + axes_list.append(AxisType(AxisKind.from_str(axis), None)) + elif isinstance(axis, AxisType): + axes_list.append(axis) + else: + raise ValueError(f"axis type must be either str or AxisType instance") + self.axes_tuple = tuple(axes_list) + self.optional = optional + + def compare(self, second) -> NeuralTypeComparisonResult: + # First, handle dimensionality + axes_a = self.axes_tuple + axes_b = second.axes_tuple + + kinds_a = dict() + kinds_b = dict() + + dimensions_pass = True + for axis_a, axis_b in zip(axes_a, axes_b): + kinds_a[axis_a.kind] = axis_a.size + kinds_b[axis_b.kind] = axis_b.size + if axis_a.kind != axis_b.kind or axis_a.is_list != axis_b.is_list: + dimensions_pass = False + + if kinds_a.keys() != kinds_b.keys(): + return NeuralTypeComparisonResult.INCOMPATIBLE + for kind, size in kinds_a.items(): + if size != kinds_b[kind]: + return NeuralTypeComparisonResult.DIM_INCOMPATIBLE + + element_comparison_result = self.elements_type.compare(second.elements_type) + if dimensions_pass: + return element_comparison_result + elif element_comparison_result == NeuralTypeComparisonResult.SAME: + return NeuralTypeComparisonResult.TRANSPOSE_SAME + else: + return NeuralTypeComparisonResult.INCOMPATIBLE + + def __check_sanity(self, axes): + # check that list come before any tensor dimension + are_strings = True + for axis in axes: + if not isinstance(axis, str): + are_strings = False + if isinstance(axis, str) and not are_strings: + raise ValueError("Either use full class names or all strings") + if are_strings: + return + checks_passed = True + saw_tensor_dim = False + for axis in axes: + if not axis.is_list: + saw_tensor_dim = True + else: # current axis is a list + if saw_tensor_dim: # which is preceded by tensor dim + checks_passed = False + if not checks_passed: + raise ValueError( + "You have list dimension after Tensor dimension. All list dimensions must preceed Tensor dimensions" + ) + + +class NmTensor(NeuralType): + """Class representing data which flows between NeuralModules' ports. + It also has a type of NeuralType represented by inheriting from NeuralType + object.""" + + def __init__(self, producer, producer_args, name, ntype=None): + """NmTensor constructor. + + Args: + producer (NeuralModule): object which produced this + producer_args (dict): a dictionary of port_name->NmTensor value + of arguments which were sent to producer to create this + """ + super(NmTensor, self).__init__(elements_type=ntype.elemts_type, axes=ntype.axes, optional=ntype.optional) + self._producer = producer + self._producer_args = producer_args + self._name = name + self._uuid = str(uuid.uuid4()) + + @property + def producer(self): + """ + Returns: + NeuralModule object which produced this NmTensor. + """ + return self._producer + + @property + def producer_args(self): + """ + Returns: + a dictionary of port_name->NmTensor value + of arguments which were sent to producer to create this object + """ + return self._producer_args + + @property + def name(self): + """ + Returns: + A NmTensor's name which should be equal to + the NeuralModule's output port's name which created it + """ + return self._name + + @property + def unique_name(self): + """Unique NMTensor name. + It is composed of non-unique name (self.name) and uuid of NeuralModule + which created this tensor. + + Returns: + str: unique name + """ + if self._producer is None: + raise ValueError("This NmTensor does not have a unique name") + return f"{self._name}~~~{self.producer}~~~{self._uuid}" + + +class NeuralTypeError(Exception): + """Base class for neural type related exceptions.""" + + pass + + +class NeuralPortNameMismatchError(NeuralTypeError): + """Exception raised when neural module is called with incorrect port + names.""" + + def __init__(self, message): + self.message = message + + +class NeuralPortNmTensorMismatchError(NeuralTypeError): + """Exception raised when a port is fed with a NmTensor of incompatible + type.""" + + def __init__(self, message): + self.message = message + + +class CanNotInferResultNeuralType(NeuralTypeError): + """Exception raised when NeuralType of output can not be inferred.""" + + def __init__(self, message): + self.message = message diff --git a/nemo/core/neural_types.py b/nemo/core/old_neural_types.py similarity index 100% rename from nemo/core/neural_types.py rename to nemo/core/old_neural_types.py From 21e7f319c69c82975be6c13102eaf2a2ad60d6e0 Mon Sep 17 00:00:00 2001 From: Oleksii Kuchaiev Date: Tue, 28 Jan 2020 17:01:23 -0800 Subject: [PATCH 02/30] fixing some unittests Signed-off-by: Oleksii Kuchaiev --- nemo/backends/pytorch/common/losses.py | 2 +- nemo/backends/pytorch/common/rnn.py | 2 +- nemo/backends/pytorch/common/search.py | 2 +- nemo/core/__init__.py | 20 +++++- nemo/core/callbacks.py | 18 ++++- nemo/core/neural_factory.py | 18 ++++- nemo/core/neural_modules.py | 18 ++++- nemo/core/neural_types/__init__.py | 4 +- nemo/core/neural_types/elements.py | 53 ++++++++++++++- nemo/core/neural_types/neural_type.py | 3 +- tests/core/__init__.py | 0 tests/core/test_neural_types.py | 92 ++++++++++++++++++++++++++ 12 files changed, 219 insertions(+), 13 deletions(-) create mode 100644 tests/core/__init__.py create mode 100644 tests/core/test_neural_types.py diff --git a/nemo/backends/pytorch/common/losses.py b/nemo/backends/pytorch/common/losses.py index 295c09ba1ce4..90a20a633c81 100644 --- a/nemo/backends/pytorch/common/losses.py +++ b/nemo/backends/pytorch/common/losses.py @@ -2,7 +2,7 @@ from torch import nn from nemo.backends.pytorch.nm import LossNM -from nemo.core.neural_types import AxisType, BatchTag, ChannelTag, NeuralType, RegressionTag, TimeTag +from nemo.core import AxisType, BatchTag, ChannelTag, NeuralType, RegressionTag, TimeTag __all__ = ['SequenceLoss', 'CrossEntropyLoss', 'MSELoss'] diff --git a/nemo/backends/pytorch/common/rnn.py b/nemo/backends/pytorch/common/rnn.py index c7f6fc66f5bc..c171ad7e00fd 100644 --- a/nemo/backends/pytorch/common/rnn.py +++ b/nemo/backends/pytorch/common/rnn.py @@ -8,7 +8,7 @@ from nemo.backends.pytorch.common.parts import Attention from nemo.backends.pytorch.nm import TrainableNM -from nemo.core.neural_types import AxisType, BatchTag, ChannelTag, NeuralType, TimeTag +from nemo.core import AxisType, BatchTag, ChannelTag, NeuralType, TimeTag from nemo.utils.misc import pad_to diff --git a/nemo/backends/pytorch/common/search.py b/nemo/backends/pytorch/common/search.py index 812c22ce2cfd..7b449acdd0d3 100644 --- a/nemo/backends/pytorch/common/search.py +++ b/nemo/backends/pytorch/common/search.py @@ -3,7 +3,7 @@ import torch from nemo.backends.pytorch.nm import NonTrainableNM -from nemo.core.neural_types import AxisType, BatchTag, ChannelTag, NeuralType, TimeTag +from nemo.core import AxisType, BatchTag, ChannelTag, NeuralType, TimeTag INF = float('inf') BIG_NUM = 1e4 diff --git a/nemo/core/__init__.py b/nemo/core/__init__.py index 7b13691e476a..06a0050f1b7e 100644 --- a/nemo/core/__init__.py +++ b/nemo/core/__init__.py @@ -1,5 +1,21 @@ -# Copyright (c) 2019 NVIDIA Corporation +# ! /usr/bin/python +# -*- coding: utf-8 -*- + +# Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + from .callbacks import * from .neural_factory import * from .neural_modules import * -from .neural_types import * +from .old_neural_types import * diff --git a/nemo/core/callbacks.py b/nemo/core/callbacks.py index 4f6c94ba01dc..1ebf3675e270 100644 --- a/nemo/core/callbacks.py +++ b/nemo/core/callbacks.py @@ -1,4 +1,20 @@ -# Copyright (c) 2019 NVIDIA Corporation +# ! /usr/bin/python +# -*- coding: utf-8 -*- + +# Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + import glob import os import sys diff --git a/nemo/core/neural_factory.py b/nemo/core/neural_factory.py index 086af2a04fbf..9f61c086b58e 100644 --- a/nemo/core/neural_factory.py +++ b/nemo/core/neural_factory.py @@ -1,4 +1,20 @@ -# Copyright (c) 2019 NVIDIA Corporation +# ! /usr/bin/python +# -*- coding: utf-8 -*- + +# Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + __all__ = [ 'Backend', 'ModelMode', diff --git a/nemo/core/neural_modules.py b/nemo/core/neural_modules.py index 663bb3da3184..373839ee93b2 100644 --- a/nemo/core/neural_modules.py +++ b/nemo/core/neural_modules.py @@ -1,4 +1,20 @@ -# Copyright (c) 2019 NVIDIA Corporation +# ! /usr/bin/python +# -*- coding: utf-8 -*- + +# Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + """This file contains NeuralModule and NmTensor classes.""" __all__ = ['WeightShareTransform', 'NeuralModule'] diff --git a/nemo/core/neural_types/__init__.py b/nemo/core/neural_types/__init__.py index 92c9b37c32b6..124adc132c72 100644 --- a/nemo/core/neural_types/__init__.py +++ b/nemo/core/neural_types/__init__.py @@ -15,7 +15,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -from .elements import * from .axes import * from .comparison import * -from .neural_type import * \ No newline at end of file +from .elements import * +from .neural_type import * diff --git a/nemo/core/neural_types/elements.py b/nemo/core/neural_types/elements.py index 0b3626556b96..b806280677f5 100644 --- a/nemo/core/neural_types/elements.py +++ b/nemo/core/neural_types/elements.py @@ -15,10 +15,20 @@ # See the License for the specific language governing permissions and # limitations under the License. -__all__ = ['ElementType', 'VoidType'] +__all__ = [ + 'ElementType', + 'VoidType', + 'ChannelType', + 'AcousticEncodedRepresentation', + 'AudioSignal', + 'SpectrogramType', + 'MelSpectrogramType', + 'MFCCSpectrogramType', +] import abc from abc import ABC, abstractmethod -from typing import Tuple, Optional, Dict +from typing import Dict, Optional, Tuple + from .comparison import NeuralTypeComparisonResult @@ -81,3 +91,42 @@ def __str__(self): def compare(cls, second: abc.ABCMeta) -> NeuralTypeComparisonResult: return NeuralTypeComparisonResult.SAME + + +# TODO: Consider moving these files elsewhere +class ChannelType(ElementType): + def __str__(self): + return "convolutional channel value" + + +class AcousticEncodedRepresentation(ChannelType): + def __str__(self): + return "encoded representation returned by the acoustic encoder model" + + +class AudioSignal(ElementType): + def __str__(self): + return "encoded representation returned by the acoustic encoder model" + + def __init__(self, freq=16000): + self._params = {} + self._params['freq'] = freq + + @property + def type_parameters(self): + return self._params + + +class SpectrogramType(ChannelType): + def __str__(self): + return "generic spectorgram type" + + +class MelSpectrogramType(SpectrogramType): + def __str__(self): + return "mel spectorgram type" + + +class MFCCSpectrogramType(SpectrogramType): + def __str__(self): + return "mfcc spectorgram type" diff --git a/nemo/core/neural_types/neural_type.py b/nemo/core/neural_types/neural_type.py index cbb216ef80ef..346668e7d303 100644 --- a/nemo/core/neural_types/neural_type.py +++ b/nemo/core/neural_types/neural_type.py @@ -26,8 +26,9 @@ ] import uuid from typing import Tuple + +from .axes import AxisKind, AxisType from .comparison import NeuralTypeComparisonResult -from .axes import AxisType, AxisKind from .elements import * diff --git a/tests/core/__init__.py b/tests/core/__init__.py new file mode 100644 index 000000000000..e69de29bb2d1 diff --git a/tests/core/test_neural_types.py b/tests/core/test_neural_types.py new file mode 100644 index 000000000000..bffdf705bd56 --- /dev/null +++ b/tests/core/test_neural_types.py @@ -0,0 +1,92 @@ +# ! /usr/bin/python +# -*- coding: utf-8 -*- + +# Copyright 2019 NVIDIA. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================= + +from nemo.core.neural_types import ( + AcousticEncodedRepresentation, + AudioSignal, + AxisKind, + AxisType, + ChannelType, + MelSpectrogramType, + MFCCSpectrogramType, + NeuralType, + NeuralTypeComparisonResult, + SpectrogramType, +) +from tests.common_setup import NeMoUnitTest + + +class NeuralTypeSystemTests(NeMoUnitTest): + def test_short_vs_long_version(self): + long_version = NeuralType( + elements_type=AcousticEncodedRepresentation(), + axes=(AxisType(AxisKind.Batch, None), AxisType(AxisKind.Dimension, None), AxisType(AxisKind.Time, None)), + ) + short_version = NeuralType(AcousticEncodedRepresentation(), ('B', 'D', 'T')) + self.assertEqual(long_version.compare(short_version), NeuralTypeComparisonResult.SAME) + self.assertEqual(short_version.compare(long_version), NeuralTypeComparisonResult.SAME) + + def test_parameterized_type_audio_sampling_frequency(self): + audio16K = NeuralType(AudioSignal(16000), axes=('B', 'T')) + audio8K = NeuralType(AudioSignal(8000), axes=('B', 'T')) + another16K = NeuralType(AudioSignal(16000), axes=('B', 'T')) + + self.assertEqual(audio8K.compare(audio16K), NeuralTypeComparisonResult.SAME_TYPE_INCOMPATIBLE_PARAMS) + self.assertEqual(audio16K.compare(audio8K), NeuralTypeComparisonResult.SAME_TYPE_INCOMPATIBLE_PARAMS) + self.assertEqual(another16K.compare(audio16K), NeuralTypeComparisonResult.SAME) + self.assertEqual(audio16K.compare(another16K), NeuralTypeComparisonResult.SAME) + + def test_transpose_same(self): + audio16K = NeuralType(AudioSignal(16000), axes=('B', 'T')) + audio16K_t = NeuralType(AudioSignal(16000), axes=('T', 'B')) + self.assertEqual(audio16K.compare(audio16K_t), NeuralTypeComparisonResult.TRANSPOSE_SAME) + + def test_inheritance_spec_augment_example(self): + input = NeuralType(SpectrogramType(), ('B', 'D', 'T')) + out1 = NeuralType(MelSpectrogramType(), ('B', 'D', 'T')) + out2 = NeuralType(MFCCSpectrogramType(), ('B', 'D', 'T')) + self.assertEqual(out1.compare(out2), NeuralTypeComparisonResult.INCOMPATIBLE) + self.assertEqual(out2.compare(out1), NeuralTypeComparisonResult.INCOMPATIBLE) + self.assertEqual(input.compare(out1), NeuralTypeComparisonResult.GREATER) + self.assertEqual(input.compare(out2), NeuralTypeComparisonResult.GREATER) + self.assertEqual(out1.compare(input), NeuralTypeComparisonResult.LESS) + self.assertEqual(out2.compare(input), NeuralTypeComparisonResult.LESS) + + def test_list_of_lists(self): + T1 = NeuralType( + elements_type=ChannelType(), + axes=( + AxisType(kind=AxisKind.Batch, size=None, is_list=True), + AxisType(kind=AxisKind.Time, size=None, is_list=True), + AxisType(kind=AxisKind.Dimension, size=32, is_list=False), + AxisType(kind=AxisKind.Dimension, size=128, is_list=False), + AxisType(kind=AxisKind.Dimension, size=256, is_list=False), + ), + ) + T2 = NeuralType( + elements_type=ChannelType(), + axes=( + AxisType(kind=AxisKind.Batch, size=None, is_list=False), + AxisType(kind=AxisKind.Time, size=None, is_list=False), + AxisType(kind=AxisKind.Dimension, size=32, is_list=False), + AxisType(kind=AxisKind.Dimension, size=128, is_list=False), + AxisType(kind=AxisKind.Dimension, size=256, is_list=False), + ), + ) + # TODO: should this be incompatible instead??? + self.assertEqual(T1.compare(T2), NeuralTypeComparisonResult.TRANSPOSE_SAME) From 881d4bb6e89d6c919753eaf884c585644793c30c Mon Sep 17 00:00:00 2001 From: Oleksii Kuchaiev Date: Thu, 30 Jan 2020 14:19:38 -0800 Subject: [PATCH 03/30] fixing some files Signed-off-by: Oleksii Kuchaiev --- nemo/backends/pytorch/common/losses.py | 33 +- nemo/backends/pytorch/common/rnn.py | 2 +- nemo/backends/pytorch/common/search.py | 2 +- nemo/core/__init__.py | 2 +- nemo/core/neural_types/elements.py | 24 ++ nemo/core/neural_types/neural_type.py | 33 +- nemo/core/old_neural_types.py | 410 ------------------------- tests/core/test_neural_types.py | 14 +- 8 files changed, 71 insertions(+), 449 deletions(-) delete mode 100644 nemo/core/old_neural_types.py diff --git a/nemo/backends/pytorch/common/losses.py b/nemo/backends/pytorch/common/losses.py index 90a20a633c81..1c2d4b2b1524 100644 --- a/nemo/backends/pytorch/common/losses.py +++ b/nemo/backends/pytorch/common/losses.py @@ -2,7 +2,7 @@ from torch import nn from nemo.backends.pytorch.nm import LossNM -from nemo.core import AxisType, BatchTag, ChannelTag, NeuralType, RegressionTag, TimeTag +from nemo.core.neural_types import NeuralType, LogitsType, LabelsType, LossType, RegressionValuesType __all__ = ['SequenceLoss', 'CrossEntropyLoss', 'MSELoss'] @@ -34,23 +34,10 @@ class SequenceLoss(LossNM): @property def input_ports(self): """Returns definitions of module input ports. - - log_probs: - 0: AxisType(BatchTag) - - 1: AxisType(TimeTag) - - 2: AxisType(ChannelTag) - - targets: - 0: AxisType(BatchTag) - - 1: AxisType(TimeTag) - """ return { - 'log_probs': NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag), 2: AxisType(ChannelTag),}), - 'targets': NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}), + 'log_probs': NeuralType(axes=('B', 'T', 'D')), + 'targets': NeuralType(axes=('B', 'T')) } @property @@ -61,7 +48,7 @@ def output_ports(self): NeuralType(None) """ - return {"loss": NeuralType(None)} + return {"loss": NeuralType(elements_type=LossType)} def __init__( self, @@ -139,8 +126,8 @@ def input_ports(self): """ return { - "logits": NeuralType({0: AxisType(BatchTag), 1: AxisType(ChannelTag)}), - "labels": NeuralType({0: AxisType(BatchTag),}), + "logits": NeuralType(elements_type=LogitsType, axes=('B', 'D')), + "labels": NeuralType(elements_type=LabelsType, axes=tuple('B')) } @property @@ -150,7 +137,7 @@ def output_ports(self): loss: NeuralType(None) """ - return {"loss": NeuralType(None)} + return {"loss": NeuralType(elements_type=LossType)} def __init__(self, weight=None, **kwargs): LossNM.__init__(self, **kwargs) @@ -175,8 +162,8 @@ def input_ports(self): 0: AxisType(RegressionTag) """ return { - "preds": NeuralType({0: AxisType(RegressionTag)}), - "labels": NeuralType({0: AxisType(RegressionTag)}), + "preds": NeuralType(RegressionValuesType, tuple('B')), + "labels": NeuralType(LabelsType, tuple('B')), } @property @@ -186,7 +173,7 @@ def output_ports(self): loss: NeuralType(None) """ - return {"loss": NeuralType(None)} + return {"loss": NeuralType(elements_type=LossType)} def __init__(self, **kwargs): LossNM.__init__(self, **kwargs) diff --git a/nemo/backends/pytorch/common/rnn.py b/nemo/backends/pytorch/common/rnn.py index c171ad7e00fd..247e8043879d 100644 --- a/nemo/backends/pytorch/common/rnn.py +++ b/nemo/backends/pytorch/common/rnn.py @@ -8,7 +8,7 @@ from nemo.backends.pytorch.common.parts import Attention from nemo.backends.pytorch.nm import TrainableNM -from nemo.core import AxisType, BatchTag, ChannelTag, NeuralType, TimeTag +from nemo.core import AxisType from nemo.utils.misc import pad_to diff --git a/nemo/backends/pytorch/common/search.py b/nemo/backends/pytorch/common/search.py index 7b449acdd0d3..7ddc8d553dd1 100644 --- a/nemo/backends/pytorch/common/search.py +++ b/nemo/backends/pytorch/common/search.py @@ -3,7 +3,7 @@ import torch from nemo.backends.pytorch.nm import NonTrainableNM -from nemo.core import AxisType, BatchTag, ChannelTag, NeuralType, TimeTag +from nemo.core import AxisType INF = float('inf') BIG_NUM = 1e4 diff --git a/nemo/core/__init__.py b/nemo/core/__init__.py index 06a0050f1b7e..e48567b139a6 100644 --- a/nemo/core/__init__.py +++ b/nemo/core/__init__.py @@ -18,4 +18,4 @@ from .callbacks import * from .neural_factory import * from .neural_modules import * -from .old_neural_types import * +from .neural_types import * diff --git a/nemo/core/neural_types/elements.py b/nemo/core/neural_types/elements.py index b806280677f5..b1a171e9507f 100644 --- a/nemo/core/neural_types/elements.py +++ b/nemo/core/neural_types/elements.py @@ -24,6 +24,10 @@ 'SpectrogramType', 'MelSpectrogramType', 'MFCCSpectrogramType', + 'LogitsType', + 'LabelsType', + 'LossType', + 'RegressionValuesType' ] import abc from abc import ABC, abstractmethod @@ -99,6 +103,21 @@ def __str__(self): return "convolutional channel value" +class LogitsType(ElementType): + def __str__(self): + return "neural type representing logits" + + +class LabelsType(ElementType): + def __str__(self): + return "neural type representing labels" + + +class LossType(ElementType): + def __str__(self): + return "neural type representing loss value" + + class AcousticEncodedRepresentation(ChannelType): def __str__(self): return "encoded representation returned by the acoustic encoder model" @@ -130,3 +149,8 @@ def __str__(self): class MFCCSpectrogramType(SpectrogramType): def __str__(self): return "mfcc spectorgram type" + + +class RegressionValuesType(ElementType): + def __str__(self): + return "regression values type" diff --git a/nemo/core/neural_types/neural_type.py b/nemo/core/neural_types/neural_type.py index 346668e7d303..fede950785b7 100644 --- a/nemo/core/neural_types/neural_type.py +++ b/nemo/core/neural_types/neural_type.py @@ -25,7 +25,7 @@ 'CanNotInferResultNeuralType', ] import uuid -from typing import Tuple +from typing import Tuple, Optional from .axes import AxisKind, AxisType from .comparison import NeuralTypeComparisonResult @@ -36,18 +36,21 @@ class NeuralType(object): """This is the main class which would represent neural type concept. nmTensors derives from this. It is used to represent *the types* of inputs and outputs.""" - def __init__(self, elements_type: ElementType, axes: Tuple, optional=False): - self.__check_sanity(axes) + def __init__(self, elements_type: ElementType = VoidType(), axes: Optional[Tuple] = None, optional=False): self.elements_type = elements_type - axes_list = [] - for axis in axes: - if isinstance(axis, str): - axes_list.append(AxisType(AxisKind.from_str(axis), None)) - elif isinstance(axis, AxisType): - axes_list.append(axis) - else: - raise ValueError(f"axis type must be either str or AxisType instance") - self.axes_tuple = tuple(axes_list) + if axes is not None: + self.__check_sanity(axes) + axes_list = [] + for axis in axes: + if isinstance(axis, str): + axes_list.append(AxisType(AxisKind.from_str(axis), None)) + elif isinstance(axis, AxisType): + axes_list.append(axis) + else: + raise ValueError(f"axis type must be either str or AxisType instance") + self.axes_tuple = tuple(axes_list) + else: + self.axes_tuple = None self.optional = optional def compare(self, second) -> NeuralTypeComparisonResult: @@ -58,6 +61,12 @@ def compare(self, second) -> NeuralTypeComparisonResult: kinds_a = dict() kinds_b = dict() + if self.axes_tuple is None: + if second.axes_tuple is None: + return self.elements_type.compare(second.elements_type) + else: + return NeuralTypeComparisonResult.INCOMPATIBLE + dimensions_pass = True for axis_a, axis_b in zip(axes_a, axes_b): kinds_a[axis_a.kind] = axis_a.size diff --git a/nemo/core/old_neural_types.py b/nemo/core/old_neural_types.py deleted file mode 100644 index 38b606fc5b9e..000000000000 --- a/nemo/core/old_neural_types.py +++ /dev/null @@ -1,410 +0,0 @@ -# Copyright (c) 2019 NVIDIA Corporation -"""This module contains Tags, AxisTypes, NeuralTypes and NmTensors. -Every NmTensor is of a particular Neural Type. -Neural Modules' input and output ports are also of Neural Type. - -An exception will be raised when a NmTensor and input port where it goes are -of incompatible types. -""" -__all__ = [ - 'BaseTag', - 'BatchTag', - 'TimeTag', - 'ProcessedTimeTag', - 'ChannelTag', - 'EmbeddedTextTag', - 'SpectrogramSignalTag', - 'MelSpectrogramSignalTag', - 'MFCCSignalTag', - 'EncodedRepresentationTag', - 'ClassTag', - 'WidthTag', - 'HeightTag', - 'CategoricalTag', - 'RegressionTag', - 'NeuralTypeComparisonResult', - 'AxisType', - 'NeuralType', - 'NmTensor', - 'NeuralTypeError', - 'NeuralPortNameMismatchError', - 'NeuralPortNmTensorMismatchError', - 'CanNotInferResultNeuralType', -] - -import uuid -from enum import Enum - - -class BaseTag(object): - """Base Neural Tag. All Tags should inherit from this.""" - - def __str__(self): - return "base" - - -class BatchTag(BaseTag): - """Tag for batch dimension.""" - - def __str__(self): - return "batch" - - -class TimeTag(BaseTag): - """Tag for time dimension.""" - - def __str__(self): - return "time" - - -class ProcessedTimeTag(TimeTag): - """Tag for processed time dimension. - For example: after pre-processing, or augmentation.""" - - def __str__(self): - return "processed_time" - - -class ChannelTag(BaseTag): - """Tag for channel dimension.""" - - def __str__(self): - return "channel" - - -class EmbeddedTextTag(ChannelTag): - """Tag for any dimensions that contains text that goes through an - enbedding layer.""" - - def __str__(self): - return "embedded_text" - - -class SpectrogramSignalTag(ChannelTag): - """Tag for spectrogram signal dimension.""" - - def __str__(self): - return "spectrogram_signal" - - -class MelSpectrogramSignalTag(SpectrogramSignalTag): - """Tag for mel spectrogram signal dimension.""" - - def __str__(self): - return "mel_spectrogram_signal" - - -class MFCCSignalTag(SpectrogramSignalTag): - """Tag for MFCC signal dimension.""" - - def __str__(self): - return "mfcc_signal" - - -class EncodedRepresentationTag(ChannelTag): - """Tag for encoded representation. This should be used to - denote encoders' outputs.""" - - def __str__(self): - return "encoded_representation" - - -class ClassTag(BaseTag): - """Tag for class dimension. - For example, number of classes in classification problem, - vocabuary size or num of characters for ASR.""" - - def __str__(self): - return "channel" - - -class WidthTag(BaseTag): - """Tag for width dimension.""" - - def __str__(self): - return "width" - - -class HeightTag(BaseTag): - """Tag for width dimension.""" - - def __str__(self): - return "height" - - -class CategoricalTag(BatchTag): - """Tag for labels for classification tasks.""" - - def __str__(self): - return "category" - - -class RegressionTag(BatchTag): - """Tag for labels for regression tasks. - For example, this should be used in STS-B task, where labels - represent semantic semilarity score (float)""" - - def __str__(self): - return "regression" - - -class NeuralTypeComparisonResult(Enum): - """The result of comparing two neural type objects for compatibility. - When comparing A.compare_to(B):""" - - SAME = 0 - LESS = 1 # A is B - GREATER = 2 # B is A - DIM_INCOMPATIBLE = 3 # Resize connector might fix incompatibility - TRANSPOSE_SAME = 4 # A transpose will make them same - INCOMPATIBLE = 5 # A and B are incompatible. Can't fix incompatibility automatically - - -class AxisType(object): - """Every tensor's axis has semantics, dimension and descriptor. - It's semantics is a Neural Tag (inherited from BaseTag) - dimension (dim) is (optional) int and descriptor is (optional) string""" - - def __init__(self, semantics, dim: int = None, descriptor: str = None): - self._semantics = semantics - self._dim = dim - self._descriptor = descriptor - - def __eq__(self, other): - return self.semantics == other.semantics and self.dim == other.dim and self.descriptor == other.descriptor - - def __str__(self): - return "{0}:{1}:{2}".format(self.semantics, self.dim, self.descriptor) - - def __hash__(self): - return hash(self.__str__()) - - def compare_to(self, other): - """ - Compares current AxisType object to other - - Args: - other (AxisType): other AxisType object to compare with - - Returns: - Results of a comparison (NeuralTypeComparisonResult) - """ - if (self.dim is None or self.dim == other.dim) and self.descriptor == other.descriptor: - if self.semantics == other.semantics: - return NeuralTypeComparisonResult.SAME - elif issubclass(self.semantics, other.semantics): - return NeuralTypeComparisonResult.LESS - elif issubclass(other.semantics, self.semantics): - return NeuralTypeComparisonResult.GREATER - else: - return NeuralTypeComparisonResult.INCOMPATIBLE - elif self.descriptor == other.descriptor and self.semantics == other.semantics: - return NeuralTypeComparisonResult.DIM_INCOMPATIBLE - else: - return NeuralTypeComparisonResult.INCOMPATIBLE - - @property - def semantics(self): - return self._semantics - - @property - def dim(self): - return self._dim - - @property - def descriptor(self): - return self._descriptor - - -class NeuralType(object): - """Neural Type: a type for NmTensor. - - Note: This type mechanism is represented by Python inheritance. That is, - NmTensor - class inherits from NeuralType class. - - A Neural Type is a mapping from Tensor's axis number to it's type ( - AxisType). - - To instantiate a NeuralType you should pass it a dictionary (axis2type) - which - will map axis to it's AxisType. You can also pass optional argument when - describing input ports. - - For example, a ResNet18 input can be described as: - - .. code-block:: python - - NeuralType({0: AxisType(BatchTag, None, None), - 1: AxisType(ChannelTag, None, None), - 2: AxisType(HeightTag, 224, None), - 3: AxisType(WidthTag, 224, None)}) - - Special cases: - - non-tensor objects should be denoted as NeuralType(None) - - root type is denoted by NeuralType({}). A port of NeuralType({}) must - - accept NmTensors of any NeuralType. More specifically: - root_type = NeuralType({}) - root_type.compare(any_other_neural_type) == - NeuralTypeComparisonResult.SAME - - - See "nemo/tests/test_neural_types.py" for more examples. - - """ - - # def __init__(self, axis2type=None): - def __init__(self, axis2type={}, optional=False): - """ - Constructor - Args: - axis2type: mapping axises to it's AxisType - optional: (default: False). If this port is optional - """ - self._axis2type = axis2type - self._optional = optional - - def __str__(self): - if self._axis2type is None: - return "(Optional) " if self._optional else "" + "non-tensor " "object" - elif len(self._axis2type) == 0: - return "(Optional) " if self._optional else "" + "Root NeuralType" - return ( - "(Optional)" - if self._optional - else "" + "\n".join(["{0}->{1}".format(axis, tag) for axis, tag in self._axis2type.items()]) - ) - - def compare(self, n_type2) -> NeuralTypeComparisonResult: - """Compares if current object's NeuralType semantics is compatible - with n_type2 - - Args: - n_type2 (NeuralType): a type to compare with - - Returns: - Results of a comparison (NeuralTypeComparisonResult) - """ - # self is a root type - if self.axis2type is not None and len(self.axis2type) == 0: - return NeuralTypeComparisonResult.SAME - # n_type2 is root type but self is not - elif n_type2.axis2type is not None and len(n_type2.axis2type) == 0: - return NeuralTypeComparisonResult.INCOMPATIBLE - # one is None while other is not - elif self._axis2type is None and n_type2._axis2type is not None: - return NeuralTypeComparisonResult.INCOMPATIBLE - elif self._axis2type is not None and n_type2._axis2type is None: - return NeuralTypeComparisonResult.INCOMPATIBLE - # same neural type - elif self._axis2type == n_type2._axis2type: - return NeuralTypeComparisonResult.SAME - # same set of keys and set of values => TRANSPOSE_SAME - elif set(self._axis2type.keys()) == set(n_type2._axis2type.keys()) and set(self._axis2type.values()) == set( - n_type2._axis2type.values() - ): - return NeuralTypeComparisonResult.TRANSPOSE_SAME - - elif set(self._axis2type.keys()) == set(n_type2._axis2type.keys()): - # comparison_result = 1 - comparison_result = 0 - for key in self._axis2type.keys(): - comparison_result = max( - self._axis2type[key].compare_to(n_type2._axis2type[key]).value, comparison_result, - ) - return NeuralTypeComparisonResult(comparison_result) - else: - return NeuralTypeComparisonResult.INCOMPATIBLE - - @property - def axis2type(self): - return self._axis2type - - -class NmTensor(NeuralType): - """Class representing data which flows between NeuralModules' ports. - It also has a type of NeuralType represented by inheriting from NeuralType - object.""" - - def __init__(self, producer, producer_args, name, ntype=None): - """NmTensor constructor. - - Args: - producer (NeuralModule): object which produced this - producer_args (dict): a dictionary of port_name->NmTensor value - of arguments which were sent to producer to create this - """ - super(NmTensor, self).__init__(axis2type=ntype._axis2type) - self._producer = producer - self._producer_args = producer_args - self._name = name - self._uuid = str(uuid.uuid4()) - - @property - def producer(self): - """ - Returns: - NeuralModule object which produced this NmTensor. - """ - return self._producer - - @property - def producer_args(self): - """ - Returns: - a dictionary of port_name->NmTensor value - of arguments which were sent to producer to create this object - """ - return self._producer_args - - @property - def name(self): - """ - Returns: - A NmTensor's name which should be equal to - the NeuralModule's output port's name which created it - """ - return self._name - - @property - def unique_name(self): - """Unique NMTensor name. - It is composed of non-unique name (self.name) and uuid of NeuralModule - which created this tensor. - - Returns: - str: unique name - """ - if self._producer is None: - raise ValueError("This NmTensor does not have a unique name") - return f"{self._name}~~~{self.producer}~~~{self._uuid}" - - -class NeuralTypeError(Exception): - """Base class for neural type related exceptions.""" - - pass - - -class NeuralPortNameMismatchError(NeuralTypeError): - """Exception raised when neural module is called with incorrect port - names.""" - - def __init__(self, message): - self.message = message - - -class NeuralPortNmTensorMismatchError(NeuralTypeError): - """Exception raised when a port is fed with a NmTensor of incompatible - type.""" - - def __init__(self, message): - self.message = message - - -class CanNotInferResultNeuralType(NeuralTypeError): - """Exception raised when NeuralType of output can not be inferred.""" - - def __init__(self, message): - self.message = message diff --git a/tests/core/test_neural_types.py b/tests/core/test_neural_types.py index bffdf705bd56..51a1c9b18044 100644 --- a/tests/core/test_neural_types.py +++ b/tests/core/test_neural_types.py @@ -51,7 +51,13 @@ def test_parameterized_type_audio_sampling_frequency(self): self.assertEqual(another16K.compare(audio16K), NeuralTypeComparisonResult.SAME) self.assertEqual(audio16K.compare(another16K), NeuralTypeComparisonResult.SAME) - def test_transpose_same(self): + def test_transpose_same_1(self): + type1 = NeuralType(axes=('B', 'T', 'C')) + type2 = NeuralType(axes=('T', 'B', 'C')) + self.assertEqual(type1.compare(type2), NeuralTypeComparisonResult.TRANSPOSE_SAME) + self.assertEqual(type2.compare(type1), NeuralTypeComparisonResult.TRANSPOSE_SAME) + + def test_transpose_same_2(self): audio16K = NeuralType(AudioSignal(16000), axes=('B', 'T')) audio16K_t = NeuralType(AudioSignal(16000), axes=('T', 'B')) self.assertEqual(audio16K.compare(audio16K_t), NeuralTypeComparisonResult.TRANSPOSE_SAME) @@ -67,6 +73,12 @@ def test_inheritance_spec_augment_example(self): self.assertEqual(out1.compare(input), NeuralTypeComparisonResult.LESS) self.assertEqual(out2.compare(input), NeuralTypeComparisonResult.LESS) + def test_singletone(self): + loss_output1 = NeuralType(axes=None) + loss_output2 = NeuralType(axes=None) + self.assertEqual(loss_output1.compare(loss_output2), NeuralTypeComparisonResult.SAME) + self.assertEqual(loss_output2.compare(loss_output1), NeuralTypeComparisonResult.SAME) + def test_list_of_lists(self): T1 = NeuralType( elements_type=ChannelType(), From b8f633fba19b9afdb13e94e84a800ae0e9f27dd7 Mon Sep 17 00:00:00 2001 From: Oleksii Kuchaiev Date: Fri, 31 Jan 2020 13:05:03 -0800 Subject: [PATCH 04/30] simplest examples working Signed-off-by: Oleksii Kuchaiev --- nemo/backends/pytorch/common/other.py | 10 +- .../pytorch/tutorials/chatbot/data.py | 2 +- .../pytorch/tutorials/chatbot/modules.py | 138 +++-------------- nemo/backends/pytorch/tutorials/toys.py | 140 +++--------------- nemo/core/neural_modules.py | 54 ++++--- nemo/core/neural_types/neural_type.py | 20 ++- tests/core/test_neural_modules.py | 57 +++++++ tests/{ => core}/test_pytorch_trainers.py | 0 8 files changed, 142 insertions(+), 279 deletions(-) create mode 100644 tests/core/test_neural_modules.py rename tests/{ => core}/test_pytorch_trainers.py (100%) diff --git a/nemo/backends/pytorch/common/other.py b/nemo/backends/pytorch/common/other.py index 982abd100446..408dc40613dc 100644 --- a/nemo/backends/pytorch/common/other.py +++ b/nemo/backends/pytorch/common/other.py @@ -33,14 +33,8 @@ class SimpleCombiner(TrainableNM): @property def input_ports(self): """Returns definitions of module input ports. - - x1: - Empty?!? - - x2: - Empty?!? """ - return {"x1": NeuralType({}), "x2": NeuralType({})} + return {"x1": NeuralType(VoidType()), "x2": NeuralType(VoidType())} @property def output_ports(self): @@ -49,7 +43,7 @@ def output_ports(self): combined: None """ - return {"combined": None} + return {"combined": NeuralType(VoidType())} def __init__(self, mode="add", **kwargs): TrainableNM.__init__(self, **kwargs) diff --git a/nemo/backends/pytorch/tutorials/chatbot/data.py b/nemo/backends/pytorch/tutorials/chatbot/data.py index a4ea9124e4cb..6f53877c7754 100644 --- a/nemo/backends/pytorch/tutorials/chatbot/data.py +++ b/nemo/backends/pytorch/tutorials/chatbot/data.py @@ -213,7 +213,7 @@ def outputVar(l, voc): max_target_len = max([len(indexes) for indexes in indexes_batch]) padList = zeroPadding(indexes_batch) mask = binaryMatrix(padList) - mask = t.ByteTensor(mask) + mask = t.ByteTensor(mask).to(t.bool) padVar = t.LongTensor(padList) return padVar, mask, max_target_len diff --git a/nemo/backends/pytorch/tutorials/chatbot/modules.py b/nemo/backends/pytorch/tutorials/chatbot/modules.py index de98c5799edb..0386a2560323 100644 --- a/nemo/backends/pytorch/tutorials/chatbot/modules.py +++ b/nemo/backends/pytorch/tutorials/chatbot/modules.py @@ -20,34 +20,13 @@ class DialogDataLayer(DataLayerNM): @property def output_ports(self): """Returns definitions of module output ports. - - src: - 0: AxisType(TimeTag) - - 1: AxisType(BatchTag) - - src_lengths: - 0: AxisType(BatchTag) - - tgt: - 0: AxisType(TimeTag) - - 1: AxisType(BatchTag) - - mask: - 0: AxisType(TimeTag) - - 1: AxisType(BatchTag) - - max_tgt_lengths: - None """ return { - "src": NeuralType({0: AxisType(TimeTag), 1: AxisType(BatchTag)}), - "src_lengths": NeuralType({0: AxisType(BatchTag)}), - "tgt": NeuralType({0: AxisType(TimeTag), 1: AxisType(BatchTag)}), - "mask": NeuralType({0: AxisType(TimeTag), 1: AxisType(BatchTag)}), - "max_tgt_lengths": NeuralType(None), + "src": NeuralType(ChannelType(), ('T', 'B')), + "src_lengths": NeuralType(ChannelType(), tuple('B')), + "tgt": NeuralType(LabelsType(), ('T', 'B')), + "mask": NeuralType(ChannelType(), ('T', 'B')), + "max_tgt_lengths": NeuralType(axes=None), } def __init__(self, *, batch_size, corpus_name, datafile, min_count=3, **kwargs): @@ -94,39 +73,19 @@ class EncoderRNN(TrainableNM): @property def input_ports(self): """Returns definitions of module input ports. - - input_seq: - 0: AxisType(TimeTag) - - 1: AxisType(BatchTag) - - input_lengths: - 0: AxisType(BatchTag) """ return { - "input_seq": NeuralType({0: AxisType(TimeTag), 1: AxisType(BatchTag)}), - "input_lengths": NeuralType({0: AxisType(BatchTag)}), + "input_seq": NeuralType(ChannelType(), ('T', 'B')), + "input_lengths": NeuralType(ChannelType(), tuple('B')), } @property def output_ports(self): """Returns definitions of module output ports. - - outputs: - 0: AxisType(TimeTag) - - 1: AxisType(BatchTag) - - 2: AxisType(ChannelTag) - - hidden: - 0: AxisType(BatchTag) - - 1: AxisType(ChannelTag) """ return { - "outputs": NeuralType({0: AxisType(TimeTag), 1: AxisType(BatchTag), 2: AxisType(ChannelTag),}), - "hidden": NeuralType({0: AxisType(BatchTag), 1: AxisType(ChannelTag)}), + "outputs": NeuralType(ChannelType(), ('T', 'B', 'D')), + "hidden": NeuralType(ChannelType(), ('B', 'D')), } def __init__(self, *, voc_size, encoder_n_layers, hidden_size, dropout, bidirectional=True, **kwargs): @@ -174,26 +133,11 @@ class LuongAttnDecoderRNN(TrainableNM): @property def input_ports(self): """Returns definitions of module input ports. - - targets: - 0: AxisType(TimeTag) - - 1: AxisType(BatchTag) - - encoder_outputs: - 0: AxisType(TimeTag) - - 1: AxisType(BatchTag) - - 2: AxisType(ChannelTag) - - max_target_len: - None """ return { - "targets": NeuralType({0: AxisType(TimeTag), 1: AxisType(BatchTag)}), - "encoder_outputs": NeuralType({0: AxisType(TimeTag), 1: AxisType(BatchTag), 2: AxisType(ChannelTag),}), - "max_target_len": NeuralType(None), + "targets": NeuralType(LabelsType(), ('T', 'B')), + "encoder_outputs": NeuralType(ChannelType(), ('T', 'B', 'D')), + "max_target_len": NeuralType(axes=None), } @property @@ -213,8 +157,8 @@ def output_ports(self): 1: AxisType(ChannelTag) """ return { - "outputs": NeuralType({0: AxisType(TimeTag), 1: AxisType(BatchTag), 2: AxisType(ChannelTag),}), - "hidden": NeuralType({0: AxisType(BatchTag), 1: AxisType(ChannelTag)}), + "outputs": NeuralType(ChannelType(), ('T', 'B', 'D')), + "hidden": NeuralType(ChannelType(), ('B', 'D')), } def __init__(self, *, attn_model, hidden_size, voc_size, decoder_n_layers, dropout, **kwargs): @@ -327,28 +271,11 @@ class MaskedXEntropyLoss(LossNM): @property def input_ports(self): """Returns definitions of module input ports. - - predictions - 0: AxisType(TimeTag) - - 1: AxisType(BatchTag) - - 2: AxisType(ChannelTag)} - - target: - 0: AxisType(TimeTag) - - 1: AxisType(BatchTag) - - mask: - 0: AxisType(TimeTag) - - 1: AxisType(BatchTag) """ return { - "predictions": NeuralType({0: AxisType(TimeTag), 1: AxisType(BatchTag), 2: AxisType(ChannelTag),}), - "target": NeuralType({0: AxisType(TimeTag), 1: AxisType(BatchTag)}), - "mask": NeuralType({0: AxisType(TimeTag), 1: AxisType(BatchTag)}), + "predictions": NeuralType(ChannelType(), ('T', 'B', 'D')), + "target": NeuralType(LabelsType(), ('T', 'B')), + "mask": NeuralType(ChannelType(), ('T', 'B')), } @property @@ -358,7 +285,7 @@ def output_ports(self): loss: NeuralType(None) """ - return {"loss": NeuralType(None)} + return {"loss": NeuralType(LossType(), axes=None)} def __init__(self, **kwargs): LossNM.__init__(self, **kwargs) @@ -381,39 +308,16 @@ class GreedyLuongAttnDecoderRNN(TrainableNM): @property def input_ports(self): """Returns definitions of module input ports. - - encoder_outputs: - 0: AxisType(TimeTag) - - 1: AxisType(BatchTag) - - 2: AxisType(ChannelTag) """ - return {"encoder_outputs": NeuralType({0: AxisType(TimeTag), 1: AxisType(BatchTag), 2: AxisType(ChannelTag),})} + return {"encoder_outputs": NeuralType(ChannelType(), ('T', 'B', 'D'))} @property def output_ports(self): """Returns definitions of module output ports. - - outputs: - 0: AxisType(TimeTag) - - 1: AxisType(BatchTag) - - hidden: - 0: AxisType(BatchTag) - - 1: AxisType(ChannelTag) """ return { - "outputs": NeuralType( - { - 0: AxisType(TimeTag), - 1: AxisType(BatchTag), - # 2: AxisType(ChannelTag) - } - ), - "hidden": NeuralType({0: AxisType(BatchTag), 1: AxisType(ChannelTag)}), + "outputs": NeuralType(ChannelType(), ('T', 'B')), + "hidden": NeuralType(ChannelType(), ('B', 'D')), } def __init__(self, *, attn_model, hidden_size, voc_size, decoder_n_layers, dropout, max_dec_steps=10, **kwargs): diff --git a/nemo/backends/pytorch/tutorials/toys.py b/nemo/backends/pytorch/tutorials/toys.py index b2449c5ddfd5..55e83aaf986d 100644 --- a/nemo/backends/pytorch/tutorials/toys.py +++ b/nemo/backends/pytorch/tutorials/toys.py @@ -6,7 +6,7 @@ import torch.utils.data as t_utils from ....core import DeviceType, NeuralModule -from ....core.neural_types import * +from ....core.neural_types import NeuralType, ChannelType, LabelsType from ..nm import DataLayerNM, LossNM, TrainableNM @@ -20,7 +20,7 @@ def input_ports(self): Returns: A (dict) of module's input ports names to NeuralTypes mapping """ - return {"x": NeuralType({0: AxisType(BatchTag), 1: AxisType(ChannelTag)})} + return {"x": NeuralType(ChannelType(), ('B', 'D'))} @property def output_ports(self): @@ -29,7 +29,7 @@ def output_ports(self): Returns: A (dict) of module's output ports names to NeuralTypes mapping """ - return {"y_pred": NeuralType({0: AxisType(BatchTag), 1: AxisType(ChannelTag)})} + return {"y_pred": NeuralType(ChannelType(), ('B', 'D'))} def __init__(self, *, dim, **kwargs): # Part specific for Neural Modules API: @@ -60,31 +60,17 @@ class TaylorNetO(TrainableNM): # Note inheritance from TrainableNM def input_ports(self): """Returns definitions of module input ports. - x: - 0: AxisType(BatchTag) - - 1: AxisType(ChannelTag) - - o: - 0: AxisType(BatchTag) - - 1: AxisType(ChannelTag) """ return { - "x": NeuralType({0: AxisType(BatchTag), 1: AxisType(ChannelTag)}), - "o": NeuralType({0: AxisType(BatchTag), 1: AxisType(ChannelTag)}), + "x": NeuralType(ChannelType(), ('B', 'D')), + "o": NeuralType(ChannelType(), ('B', 'D')), } @property def output_ports(self): """Returns definitions of module output ports. - - y_pred: - 0: AxisType(BatchTag) - - 1: AxisType(ChannelTag) """ - return {"y_pred": NeuralType({0: AxisType(BatchTag), 1: AxisType(ChannelTag)}, optional=True)} + return {"y_pred": NeuralType(ChannelType(), ('B', 'D'), optional=True)} def __init__(self, *, dim, **kwargs): # Part specific for Neural Modules API: @@ -133,20 +119,10 @@ def __len__(self): @property def output_ports(self): """Returns definitions of module output ports - - x: - 0: AxisType(BatchTag) - - 1: AxisType(ChannelTag) - - y: - 0: AxisType(BatchTag) - - 1: AxisType(ChannelTag) """ return { - "x": NeuralType({0: AxisType(BatchTag), 1: AxisType(ChannelTag)}), - "y": NeuralType({0: AxisType(BatchTag), 1: AxisType(ChannelTag)}), + "x": NeuralType(ChannelType(), ('B', 'D')), + "y": NeuralType(LabelsType(), ('B', 'D')), } def __init__(self, *, n, batch_size, f=t.sin, x_lo=-4, x_hi=4, **kwargs): @@ -188,8 +164,8 @@ def input_ports(self): 1: AxisType(ChannelTag) """ return { - "predictions": NeuralType({0: AxisType(BatchTag), 1: AxisType(ChannelTag)}), - "target": NeuralType({0: AxisType(BatchTag), 1: AxisType(ChannelTag)}), + "predictions": NeuralType(ChannelType(), ('B', 'D')), + "target": NeuralType(LabelsType(), ('B', 'D')), } @property @@ -199,7 +175,7 @@ def output_ports(self): loss: NeuralType(None) """ - return {"loss": NeuralType(None)} + return {"loss": NeuralType(axes=None)} def __init__(self, **kwargs): LossNM.__init__(self, **kwargs) @@ -213,20 +189,10 @@ class L1Loss(LossNM): @property def input_ports(self): """Returns definitions of module input ports. - - predictions: - 0: AxisType(BatchTag) - - 1: AxisType(ChannelTag) - - target: - 0: AxisType(BatchTag) - - 1: AxisType(ChannelTag) """ return { - "predictions": NeuralType({0: AxisType(BatchTag), 1: AxisType(ChannelTag)}), - "target": NeuralType({0: AxisType(BatchTag), 1: AxisType(ChannelTag)}), + "predictions": NeuralType(ChannelType(), ('B', 'D')), + "target": NeuralType(LabelsType(), ('B', 'D')), } @property @@ -236,7 +202,7 @@ def output_ports(self): loss: NeuralType(None) """ - return {"loss": NeuralType(None)} + return {"loss": NeuralType(axes=None)} def __init__(self, **kwargs): LossNM.__init__(self, **kwargs) @@ -250,18 +216,10 @@ class CrossEntropyLoss(LossNM): @property def input_ports(self): """Returns definitions of module input ports. - - predictions: - 0: AxisType(BatchTag) - - 1: AxisType(ChannelTag) - - labels: - 0: AxisType(BatchTag) """ return { - "predictions": NeuralType({0: AxisType(BatchTag), 1: AxisType(ChannelTag)}), - "labels": NeuralType({0: AxisType(BatchTag)}), + "predictions": NeuralType(ChannelType(), ('B', 'D')), + "labels": NeuralType(LabelsType(), tuple('B')), } @property @@ -271,7 +229,7 @@ def output_ports(self): loss: NeuralType(None) """ - return {"loss": NeuralType(None)} + return {"loss": NeuralType(axes=None)} def __init__(self, **kwargs): # Neural Module API specific @@ -282,67 +240,3 @@ def __init__(self, **kwargs): # You need to implement this function def _loss_function(self, **kwargs): return self._criterion(*(kwargs.values())) - - -class DopeDualLoss(LossNM): - """ - The dual loss function that DOPE uses - """ - - @property - def input_ports(self): - """Returns definitions of module input ports. - - belief_predictions: - 0: AxisType(BatchTag) - - 1: AxisType(ChannelTag) - - belief_labels: - 0: AxisType(BatchTag) - - affinity_predictions: - 0: AxisType(BatchTag) - - 1: AxisType(ChannelTag) - - affinity_labels: - 0: AxisType(BatchTag) - """ - return { - "belief_predictions": NeuralType({0: AxisType(BatchTag), 1: AxisType(ChannelTag)}), - "belief_labels": NeuralType({0: AxisType(BatchTag)}), - "affinity_predictions": NeuralType({0: AxisType(BatchTag), 1: AxisType(ChannelTag)}), - "affinity_labels": NeuralType({0: AxisType(BatchTag)}), - } - - @property - def output_ports(self): - """Returns definitions of module output ports. - - loss: - NeuralType(None) - """ - return {"loss": NeuralType(None)} - - def __init__(self, **kwargs): - # Neural Module API specific - NeuralModule.__init__(self, **kwargs) - - # You need to implement this function - def _loss_function(self, **kwargs): - loss = 0.0 - - # Belief maps loss - # output, each belief map layers. - for l in kwargs["belief_predictions"]: - loss_tmp = ((l - kwargs["belief_labels"]) * (l - kwargs["belief_labels"])).mean() - loss += loss_tmp - - # Affinities loss - # output, each belief map layers. - for l in kwargs["affinity_predictions"]: - loss_tmp = ((l - kwargs["affinity_labels"]) * (l - kwargs["affinity_labels"])).mean() - loss += loss_tmp - - return loss diff --git a/nemo/core/neural_modules.py b/nemo/core/neural_modules.py index 373839ee93b2..20faafbbaf19 100644 --- a/nemo/core/neural_modules.py +++ b/nemo/core/neural_modules.py @@ -22,6 +22,7 @@ import uuid from abc import ABC, abstractmethod from collections import namedtuple +from copy import deepcopy from enum import Enum from inspect import getargvalues, stack from typing import Dict, List, Optional, Set, Tuple @@ -142,10 +143,6 @@ def __call__(self, **kwargs): Returns: NmTensor object or tuple of NmTensor objects """ - # if self._assigned_top_order is not None: - # raise ValueError("We currently do not support calling same NM" - # "more than once") - # Get input and output ports definitions. input_port_defs = self.input_ports output_port_defs = self.output_ports @@ -153,34 +150,45 @@ def __call__(self, **kwargs): first_input_nmtensor_type = None input_nmtensors_are_of_same_type = True for port_name, tgv in kwargs.items(): + # make sure that passed arguments correspond to input port names if port_name not in input_port_defs.keys(): raise NeuralPortNameMismatchError("Wrong input port name: {0}".format(port_name)) - type_comatibility = input_port_defs[port_name].compare(tgv) - - if first_input_nmtensor_type is None: - first_input_nmtensor_type = NeuralType(tgv._axis2type) - else: - if first_input_nmtensor_type._axis2type is None: - input_nmtensors_are_of_same_type = True - else: - input_nmtensors_are_of_same_type = first_input_nmtensor_type.compare( - tgv - ) == NeuralTypeComparisonResult.SAME and len(first_input_nmtensor_type._axis2type) - if not ( - type_comatibility == NeuralTypeComparisonResult.SAME - or type_comatibility == NeuralTypeComparisonResult.GREATER - ): + input_port = input_port_defs[port_name] + type_comatibility = input_port.compare(tgv) + if type_comatibility != NeuralTypeComparisonResult.SAME and type_comatibility != \ + NeuralTypeComparisonResult.GREATER: raise NeuralPortNmTensorMismatchError( "\n\nIn {0}. \n" "Port: {1} and a NmTensor it was fed are \n" "of incompatible neural types:\n\n{2} \n\n and \n\n{3}" "\n\nType comparison result: {4}".format( self.__class__.__name__, port_name, input_port_defs[port_name], tgv, type_comatibility, - ) - ) - if type_comatibility == NeuralTypeComparisonResult.LESS: - print('Types were raised') + )) + + # if first_input_nmtensor_type is None: + # first_input_nmtensor_type = NeuralType(tgv._axis2type) + # else: + # if first_input_nmtensor_type._axis2type is None: + # input_nmtensors_are_of_same_type = True + # else: + # input_nmtensors_are_of_same_type = first_input_nmtensor_type.compare( + # tgv + # ) == NeuralTypeComparisonResult.SAME and len(first_input_nmtensor_type._axis2type) + # if not ( + # type_comatibility == NeuralTypeComparisonResult.SAME + # or type_comatibility == NeuralTypeComparisonResult.GREATER + # ): + # raise NeuralPortNmTensorMismatchError( + # "\n\nIn {0}. \n" + # "Port: {1} and a NmTensor it was fed are \n" + # "of incompatible neural types:\n\n{2} \n\n and \n\n{3}" + # "\n\nType comparison result: {4}".format( + # self.__class__.__name__, port_name, input_port_defs[port_name], tgv, type_comatibility, + # ) + # ) + # if type_comatibility == NeuralTypeComparisonResult.LESS: + # print('Types were raised') if len(output_port_defs) == 1: out_name = list(output_port_defs)[0] diff --git a/nemo/core/neural_types/neural_type.py b/nemo/core/neural_types/neural_type.py index fede950785b7..83d12ba179a8 100644 --- a/nemo/core/neural_types/neural_type.py +++ b/nemo/core/neural_types/neural_type.py @@ -37,6 +37,9 @@ class NeuralType(object): nmTensors derives from this. It is used to represent *the types* of inputs and outputs.""" def __init__(self, elements_type: ElementType = VoidType(), axes: Optional[Tuple] = None, optional=False): + if not isinstance(elements_type, ElementType): + raise ValueError(f"elements_type of NeuralType must be an instance of a class derived from ElementType." + f"Did you pass a class instead?") self.elements_type = elements_type if axes is not None: self.__check_sanity(axes) @@ -48,21 +51,24 @@ def __init__(self, elements_type: ElementType = VoidType(), axes: Optional[Tuple axes_list.append(axis) else: raise ValueError(f"axis type must be either str or AxisType instance") - self.axes_tuple = tuple(axes_list) + self.axes = tuple(axes_list) else: - self.axes_tuple = None + self.axes = None self.optional = optional def compare(self, second) -> NeuralTypeComparisonResult: # First, handle dimensionality - axes_a = self.axes_tuple - axes_b = second.axes_tuple + axes_a = self.axes + axes_b = second.axes kinds_a = dict() kinds_b = dict() - if self.axes_tuple is None: - if second.axes_tuple is None: + if isinstance(self.elements_type, VoidType) and self.axes is None: + return NeuralTypeComparisonResult.SAME + + if self.axes is None: + if second.axes is None: return self.elements_type.compare(second.elements_type) else: return NeuralTypeComparisonResult.INCOMPATIBLE @@ -125,7 +131,7 @@ def __init__(self, producer, producer_args, name, ntype=None): producer_args (dict): a dictionary of port_name->NmTensor value of arguments which were sent to producer to create this """ - super(NmTensor, self).__init__(elements_type=ntype.elemts_type, axes=ntype.axes, optional=ntype.optional) + super(NmTensor, self).__init__(elements_type=ntype.elements_type, axes=ntype.axes, optional=ntype.optional) self._producer = producer self._producer_args = producer_args self._name = name diff --git a/tests/core/test_neural_modules.py b/tests/core/test_neural_modules.py new file mode 100644 index 000000000000..7b8a39bdf405 --- /dev/null +++ b/tests/core/test_neural_modules.py @@ -0,0 +1,57 @@ +# ! /usr/bin/python +# -*- coding: utf-8 -*- + +# Copyright 2019 NVIDIA. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================= + +import nemo +from nemo.core.neural_types import NeuralType, ChannelType +from tests.common_setup import NeMoUnitTest + + +class NeuralModulesTests(NeMoUnitTest): + def test_call_TaylorNet(self): + x_tg = nemo.core.neural_modules.NmTensor( + producer=None, + producer_args=None, + name=None, + ntype=NeuralType(ChannelType(), ('B', 'D')) + ) + + tn = nemo.backends.pytorch.tutorials.TaylorNet(dim=4) + # note that real port's name: x was used + y_pred = tn(x=x_tg) + self.assertEqual(y_pred.producer, tn) + self.assertEqual(y_pred.producer_args.get("x"), x_tg) + + def test_simplest_example_chain(self): + data_source = nemo.backends.pytorch.tutorials.RealFunctionDataLayer(n=10000, batch_size=1) + trainable_module = nemo.backends.pytorch.tutorials.TaylorNet(dim=4) + loss = nemo.backends.pytorch.tutorials.MSELoss() + x, y = data_source() + y_pred = trainable_module(x=x) + loss_tensor = loss(predictions=y_pred, target=y) + + # check producers' bookkeeping + self.assertEqual(loss_tensor.producer, loss) + self.assertEqual(loss_tensor.producer_args, {"predictions": y_pred, "target": y}) + self.assertEqual(y_pred.producer, trainable_module) + self.assertEqual(y_pred.producer_args, {"x": x}) + self.assertEqual(y.producer, data_source) + self.assertEqual(y.producer_args, {}) + self.assertEqual(x.producer, data_source) + self.assertEqual(x.producer_args, {}) + + diff --git a/tests/test_pytorch_trainers.py b/tests/core/test_pytorch_trainers.py similarity index 100% rename from tests/test_pytorch_trainers.py rename to tests/core/test_pytorch_trainers.py From 0cec89514dd143e38e0106d1a71d6ba9aeebbf10 Mon Sep 17 00:00:00 2001 From: Oleksii Kuchaiev Date: Mon, 3 Feb 2020 16:52:47 -0800 Subject: [PATCH 05/30] fix codestyle Signed-off-by: Oleksii Kuchaiev --- nemo/backends/pytorch/common/losses.py | 9 +++------ nemo/core/neural_types/elements.py | 2 +- nemo/core/neural_types/neural_type.py | 8 +++++--- tests/core/test_neural_modules.py | 11 +++-------- 4 files changed, 12 insertions(+), 18 deletions(-) diff --git a/nemo/backends/pytorch/common/losses.py b/nemo/backends/pytorch/common/losses.py index dbed3ff5ee0c..633eee772b66 100644 --- a/nemo/backends/pytorch/common/losses.py +++ b/nemo/backends/pytorch/common/losses.py @@ -2,7 +2,7 @@ from torch import nn from nemo.backends.pytorch.nm import LossNM -from nemo.core.neural_types import NeuralType, LogitsType, LabelsType, LossType, RegressionValuesType +from nemo.core.neural_types import LabelsType, LogitsType, LossType, NeuralType, RegressionValuesType __all__ = ['SequenceLoss', 'CrossEntropyLoss', 'MSELoss'] @@ -35,10 +35,7 @@ class SequenceLoss(LossNM): def input_ports(self): """Returns definitions of module input ports. """ - return { - 'log_probs': NeuralType(axes=('B', 'T', 'D')), - 'targets': NeuralType(axes=('B', 'T')) - } + return {'log_probs': NeuralType(axes=('B', 'T', 'D')), 'targets': NeuralType(axes=('B', 'T'))} @property def output_ports(self): @@ -120,7 +117,7 @@ def input_ports(self): """ return { "logits": NeuralType(elements_type=LogitsType, axes=('B', 'D')), - "labels": NeuralType(elements_type=LabelsType, axes=tuple('B')) + "labels": NeuralType(elements_type=LabelsType, axes=tuple('B')), } @property diff --git a/nemo/core/neural_types/elements.py b/nemo/core/neural_types/elements.py index b1a171e9507f..f4c4d12445b4 100644 --- a/nemo/core/neural_types/elements.py +++ b/nemo/core/neural_types/elements.py @@ -27,7 +27,7 @@ 'LogitsType', 'LabelsType', 'LossType', - 'RegressionValuesType' + 'RegressionValuesType', ] import abc from abc import ABC, abstractmethod diff --git a/nemo/core/neural_types/neural_type.py b/nemo/core/neural_types/neural_type.py index 83d12ba179a8..a2df777c9296 100644 --- a/nemo/core/neural_types/neural_type.py +++ b/nemo/core/neural_types/neural_type.py @@ -25,7 +25,7 @@ 'CanNotInferResultNeuralType', ] import uuid -from typing import Tuple, Optional +from typing import Optional, Tuple from .axes import AxisKind, AxisType from .comparison import NeuralTypeComparisonResult @@ -38,8 +38,10 @@ class NeuralType(object): def __init__(self, elements_type: ElementType = VoidType(), axes: Optional[Tuple] = None, optional=False): if not isinstance(elements_type, ElementType): - raise ValueError(f"elements_type of NeuralType must be an instance of a class derived from ElementType." - f"Did you pass a class instead?") + raise ValueError( + f"elements_type of NeuralType must be an instance of a class derived from ElementType." + f"Did you pass a class instead?" + ) self.elements_type = elements_type if axes is not None: self.__check_sanity(axes) diff --git a/tests/core/test_neural_modules.py b/tests/core/test_neural_modules.py index 7b8a39bdf405..92dd80237d91 100644 --- a/tests/core/test_neural_modules.py +++ b/tests/core/test_neural_modules.py @@ -17,18 +17,15 @@ # ============================================================================= import nemo -from nemo.core.neural_types import NeuralType, ChannelType +from nemo.core.neural_types import ChannelType, NeuralType from tests.common_setup import NeMoUnitTest class NeuralModulesTests(NeMoUnitTest): def test_call_TaylorNet(self): x_tg = nemo.core.neural_modules.NmTensor( - producer=None, - producer_args=None, - name=None, - ntype=NeuralType(ChannelType(), ('B', 'D')) - ) + producer=None, producer_args=None, name=None, ntype=NeuralType(ChannelType(), ('B', 'D')) + ) tn = nemo.backends.pytorch.tutorials.TaylorNet(dim=4) # note that real port's name: x was used @@ -53,5 +50,3 @@ def test_simplest_example_chain(self): self.assertEqual(y.producer_args, {}) self.assertEqual(x.producer, data_source) self.assertEqual(x.producer_args, {}) - - From ce5cb0777715ba3759071e92271f92c46a99525b Mon Sep 17 00:00:00 2001 From: Oleksii Kuchaiev Date: Wed, 5 Feb 2020 16:07:31 -0800 Subject: [PATCH 06/30] passing core tests ? Signed-off-by: Oleksii Kuchaiev --- Jenkinsfile | 4 +- nemo/backends/pytorch/common/zero_data.py | 8 +- nemo/core/neural_types/axes.py | 2 +- nemo/core/neural_types/elements.py | 21 +- nemo/core/neural_types/neural_type.py | 85 ++++-- requirements/requirements_test.txt | 1 + tests/{ => core}/test_actions_api.py | 0 tests/core/test_deploy_export.py | 149 ++++++++++ tests/{ => core}/test_deprecated.py | 0 tests/{ => core}/test_infer.py | 18 +- tests/{ => core}/test_neural_factory.py | 0 .../test_neural_modules_initialization.py | 0 .../{ => core}/test_neural_modules_pytorch.py | 13 +- tests/core/test_neural_types.py | 67 ++++- tests/{ => core}/test_policies.py | 0 tests/test_deploy_export.py | 149 ---------- tests/test_neural_types.py | 258 ------------------ tests/test_tutorials_pytorch.py | 29 -- 18 files changed, 322 insertions(+), 482 deletions(-) rename tests/{ => core}/test_actions_api.py (100%) create mode 100644 tests/core/test_deploy_export.py rename tests/{ => core}/test_deprecated.py (100%) rename tests/{ => core}/test_infer.py (81%) rename tests/{ => core}/test_neural_factory.py (100%) rename tests/{ => core}/test_neural_modules_initialization.py (100%) rename tests/{ => core}/test_neural_modules_pytorch.py (90%) rename tests/{ => core}/test_policies.py (100%) delete mode 100644 tests/test_deploy_export.py delete mode 100644 tests/test_neural_types.py delete mode 100644 tests/test_tutorials_pytorch.py diff --git a/Jenkinsfile b/Jenkinsfile index d0d2b0eaa5b1..7c2ee564dac4 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -24,9 +24,9 @@ pipeline { sh 'python setup.py style' } } - stage('Unittests general') { + stage('Unittests Core') { steps { - sh './reinstall.sh && python -m unittest tests/*.py' + sh './reinstall.sh && python -m unittest tests/core/*.py' } } stage('Unittests ASR') { diff --git a/nemo/backends/pytorch/common/zero_data.py b/nemo/backends/pytorch/common/zero_data.py index 0c7b14fe1a11..18f366c46140 100644 --- a/nemo/backends/pytorch/common/zero_data.py +++ b/nemo/backends/pytorch/common/zero_data.py @@ -18,11 +18,11 @@ def neuralType2TensorShape(neural_type: NeuralType, default_dim=32, skip_batch_a torch.Size """ dims = [] - for axis_ind, axis_type in neural_type.axis2type.items(): - if axis_type._semantics == BatchTag and skip_batch_axis: + for axis in neural_type.axes: + if axis.kind == AxisKind.Batch and skip_batch_axis: continue - if axis_type.dim is not None: - dims.append(axis_type.dim) + if axis.size is not None: + dims.append(axis.size) else: dims.append(default_dim) return torch.Size(dims) diff --git a/nemo/core/neural_types/axes.py b/nemo/core/neural_types/axes.py index 5efba5d20ca7..acb9a27646f2 100644 --- a/nemo/core/neural_types/axes.py +++ b/nemo/core/neural_types/axes.py @@ -69,7 +69,7 @@ class AxisType(object): is_list (bool, default=False): """ - def __init__(self, kind: AxisKindAbstract, size: Optional[int], is_list=False): + def __init__(self, kind: AxisKindAbstract, size: Optional[int] = None, is_list=False): if size is not None and is_list: raise ValueError("The axis can't be list and have a fixed size") self.kind = kind diff --git a/nemo/core/neural_types/elements.py b/nemo/core/neural_types/elements.py index f4c4d12445b4..37f35867a159 100644 --- a/nemo/core/neural_types/elements.py +++ b/nemo/core/neural_types/elements.py @@ -28,6 +28,9 @@ 'LabelsType', 'LossType', 'RegressionValuesType', + 'PredictionsType', + 'LogprobsType', + 'LengthsType', ] import abc from abc import ABC, abstractmethod @@ -55,7 +58,6 @@ def fields(self) -> Optional[Tuple]: def compare(self, second) -> NeuralTypeComparisonResult: # First, check general compatibility - result = NeuralTypeComparisonResult.SAME first_t = type(self) second_t = type(second) @@ -108,11 +110,21 @@ def __str__(self): return "neural type representing logits" +class LogprobsType(ElementType): + def __str__(self): + return "neural type representing log probabilities" + + class LabelsType(ElementType): def __str__(self): return "neural type representing labels" +class LengthsType(ElementType): + def __str__(self): + return "neural type representing lengths of something" + + class LossType(ElementType): def __str__(self): return "neural type representing loss value" @@ -151,6 +163,11 @@ def __str__(self): return "mfcc spectorgram type" -class RegressionValuesType(ElementType): +class PredictionsType(ElementType): + def __str__(self): + return "predictions values type" + + +class RegressionValuesType(PredictionsType): def __str__(self): return "regression values type" diff --git a/nemo/core/neural_types/neural_type.py b/nemo/core/neural_types/neural_type.py index a2df777c9296..9cb7513963e4 100644 --- a/nemo/core/neural_types/neural_type.py +++ b/nemo/core/neural_types/neural_type.py @@ -44,7 +44,7 @@ def __init__(self, elements_type: ElementType = VoidType(), axes: Optional[Tuple ) self.elements_type = elements_type if axes is not None: - self.__check_sanity(axes) + NeuralType.__check_sanity(axes) axes_list = [] for axis in axes: if isinstance(axis, str): @@ -63,9 +63,7 @@ def compare(self, second) -> NeuralTypeComparisonResult: axes_a = self.axes axes_b = second.axes - kinds_a = dict() - kinds_b = dict() - + # "Big void" type if isinstance(self.elements_type, VoidType) and self.axes is None: return NeuralTypeComparisonResult.SAME @@ -75,28 +73,29 @@ def compare(self, second) -> NeuralTypeComparisonResult: else: return NeuralTypeComparisonResult.INCOMPATIBLE - dimensions_pass = True - for axis_a, axis_b in zip(axes_a, axes_b): - kinds_a[axis_a.kind] = axis_a.size - kinds_b[axis_b.kind] = axis_b.size - if axis_a.kind != axis_b.kind or axis_a.is_list != axis_b.is_list: - dimensions_pass = False - - if kinds_a.keys() != kinds_b.keys(): - return NeuralTypeComparisonResult.INCOMPATIBLE - for kind, size in kinds_a.items(): - if size != kinds_b[kind]: - return NeuralTypeComparisonResult.DIM_INCOMPATIBLE - + dimensions_pass = NeuralType.__compare_axes(axes_a, axes_b) element_comparison_result = self.elements_type.compare(second.elements_type) - if dimensions_pass: + + # SAME DIMS + if dimensions_pass == 0: return element_comparison_result - elif element_comparison_result == NeuralTypeComparisonResult.SAME: - return NeuralTypeComparisonResult.TRANSPOSE_SAME + # TRANSPOSE_SAME DIMS + elif dimensions_pass == 1: + if element_comparison_result == NeuralTypeComparisonResult.SAME: + return NeuralTypeComparisonResult.TRANSPOSE_SAME + else: + return NeuralTypeComparisonResult.INCOMPATIBLE + # DIM_INCOMPATIBLE DIMS + elif dimensions_pass == 2: + if element_comparison_result == NeuralTypeComparisonResult.SAME: + return NeuralTypeComparisonResult.DIM_INCOMPATIBLE + else: + return NeuralTypeComparisonResult.INCOMPATIBLE else: return NeuralTypeComparisonResult.INCOMPATIBLE - def __check_sanity(self, axes): + @staticmethod + def __check_sanity(axes): # check that list come before any tensor dimension are_strings = True for axis in axes: @@ -119,6 +118,50 @@ def __check_sanity(self, axes): "You have list dimension after Tensor dimension. All list dimensions must preceed Tensor dimensions" ) + @staticmethod + def __compare_axes(axes_a, axes_b) -> int: + """ + Compares axes_a and axes_b + Args: + axes_a: first axes tuple + axes_b: second axes tuple + + Returns: + 0 - if they are exactly the same + 1 - if they are "TRANSPOSE_SAME" + 2 - if the are "DIM_INCOMPATIBLE" + 3 - if they are different + """ + if axes_a is None and axes_b is None: + return 0 + elif axes_a is None and axes_b is not None: + return 3 + elif axes_a is not None and axes_b is None: + return 3 + elif len(axes_a) != len(axes_b): + return 3 + # After these ifs we know that len(axes_a) == len(axes_b) + + same = True + kinds_a = dict() + kinds_b = dict() + for axis_a, axis_b in zip(axes_a, axes_b): + kinds_a[axis_a.kind] = axis_a.size + kinds_b[axis_b.kind] = axis_b.size + if axis_a.kind != axis_b.kind or axis_a.is_list != axis_b.is_list or axis_a.size != axis_b.size: + same = False + if same: + return 0 + else: + # can be TRANSPOSE_SAME, DIM_INCOMPATIBLE + if kinds_a.keys() == kinds_b.keys(): + for key, value in kinds_a.items(): + if kinds_b[key] != value: + return 2 + return 1 + else: + return 3 + class NmTensor(NeuralType): """Class representing data which flows between NeuralModules' ports. diff --git a/requirements/requirements_test.txt b/requirements/requirements_test.txt index 493b8268cfd1..544127fca734 100644 --- a/requirements/requirements_test.txt +++ b/requirements/requirements_test.txt @@ -4,3 +4,4 @@ pytest-runner black isort[requirements] wrapt +onnxruntime diff --git a/tests/test_actions_api.py b/tests/core/test_actions_api.py similarity index 100% rename from tests/test_actions_api.py rename to tests/core/test_actions_api.py diff --git a/tests/core/test_deploy_export.py b/tests/core/test_deploy_export.py new file mode 100644 index 000000000000..6ef415c6c8cf --- /dev/null +++ b/tests/core/test_deploy_export.py @@ -0,0 +1,149 @@ +# ! /usr/bin/python +# -*- coding: utf-8 -*- + +# Copyright 2019 NVIDIA. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================= + +# import os +# from pathlib import Path +# +# # git clone git@github.com:microsoft/onnxruntime.git +# # cd onnxruntime +# # ./build.sh --update --build --config RelWithDebInfo --build_shared_lib --parallel --use_cuda \ +# # --cudnn_home /usr/lib/x86_64-linux-gnu --cuda_home /usr/local/cuda --enable_pybind --build_wheel +# # pip install --upgrade ./build/Linux/RelWithDebInfo/dist/onnxruntime_gpu-1.1.0-cp37-cp37m-linux_x86_64.whl +# import onnxruntime as ort +# import torch +# from ruamel.yaml import YAML +# +# import nemo +# import nemo.collections.asr as nemo_asr +# import nemo.collections.nlp as nemo_nlp +# import nemo.collections.nlp.nm.trainables.common.token_classification_nm +# from tests.common_setup import NeMoUnitTest +# +# +# class TestDeployExport(NeMoUnitTest): +# def setUp(self): +# """ Setups neural factory so it will use GPU instead of CPU. """ +# NeMoUnitTest.setUp(self) +# +# # Perform computations on GPU. +# self.nf._placement = nemo.core.DeviceType.GPU +# +# def __test_export_route(self, module, out_name, mode, input_example=None): +# out = Path(out_name) +# if out.exists(): +# os.remove(out) +# +# self.nf.deployment_export(module=module, output=out_name, input_example=input_example, d_format=mode) +# +# self.assertTrue(out.exists()) +# if mode == nemo.core.DeploymentFormat.ONNX: +# if isinstance(input_example, tuple): +# outputs_fwd = module.forward(*input_example) +# else: +# outputs_fwd = module.forward(input_example) +# sess_options = ort.SessionOptions() +# sess_options.graph_optimization_level = ort.GraphOptimizationLevel.ORT_ENABLE_EXTENDED +# ort_session = ort.InferenceSession(out_name, sess_options) +# inputs = dict() +# input_names = list(module.input_ports) +# for i in range(len(input_names)): +# input_name = ( +# "encoded_lengths" +# if type(module).__name__ == "JasperEncoder" and input_names[i] == "length" +# else input_names[i] +# ) +# inputs[input_name] = ( +# input_example[i].cpu().numpy() if isinstance(input_example, tuple) else input_example.cpu().numpy() +# ) +# outputs_ort = ort_session.run(None, inputs) +# outputs_ort = torch.from_numpy(outputs_ort[0]).cuda() +# self.assertLess( +# (outputs_ort - (outputs_fwd[0] if isinstance(outputs_fwd, tuple) else outputs_fwd)).norm(p=2), 5.0e-4 +# ) +# if out.exists(): +# os.remove(out) +# +# def test_simple_module_export(self): +# simplest_module = nemo.backends.pytorch.tutorials.TaylorNet(dim=4) +# self.__test_export_route( +# module=simplest_module, +# out_name="simple.pt", +# mode=nemo.core.DeploymentFormat.TORCHSCRIPT, +# input_example=None, +# ) +# +# def test_TokenClassifier_module_export(self): +# t_class = nemo.collections.nlp.nm.trainables.common.token_classification_nm.TokenClassifier( +# hidden_size=512, num_classes=16, use_transformer_pretrained=False +# ) +# self.__test_export_route( +# module=t_class, +# out_name="t_class.pt", +# mode=nemo.core.DeploymentFormat.TORCHSCRIPT, +# input_example=torch.randn(16, 16, 512).cuda(), +# ) +# +# def test_TokenClassifier_module_onnx_export(self): +# t_class = nemo.collections.nlp.nm.trainables.common.token_classification_nm.TokenClassifier( +# hidden_size=512, num_classes=16, use_transformer_pretrained=False +# ) +# self.__test_export_route( +# module=t_class, +# out_name="t_class.onnx", +# mode=nemo.core.DeploymentFormat.ONNX, +# input_example=torch.randn(16, 16, 512).cuda(), +# ) +# +# def test_jasper_decoder_export_ts(self): +# j_decoder = nemo_asr.JasperDecoderForCTC(feat_in=1024, num_classes=33) +# self.__test_export_route( +# module=j_decoder, out_name="j_decoder.ts", mode=nemo.core.DeploymentFormat.TORCHSCRIPT, input_example=None +# ) +# +# def test_hf_bert_ts(self): +# bert = nemo.collections.nlp.nm.trainables.common.huggingface.BERT(pretrained_model_name="bert-base-uncased") +# input_example = ( +# torch.randint(low=0, high=16, size=(2, 16)).cuda(), +# torch.randint(low=0, high=1, size=(2, 16)).cuda(), +# torch.randint(low=0, high=1, size=(2, 16)).cuda(), +# ) +# self.__test_export_route( +# module=bert, out_name="bert.ts", mode=nemo.core.DeploymentFormat.TORCHSCRIPT, input_example=input_example +# ) +# +# def test_hf_bert_pt(self): +# bert = nemo.collections.nlp.nm.trainables.common.huggingface.BERT(pretrained_model_name="bert-base-uncased") +# self.__test_export_route(module=bert, out_name="bert.pt", mode=nemo.core.DeploymentFormat.PYTORCH) +# +# def test_jasper_encoder_to_onnx(self): +# with open("tests/data/jasper_smaller.yaml") as file: +# yaml = YAML(typ="safe") +# jasper_model_definition = yaml.load(file) +# +# jasper_encoder = nemo_asr.JasperEncoder( +# conv_mask=False, +# feat_in=jasper_model_definition['AudioToMelSpectrogramPreprocessor']['features'], +# **jasper_model_definition['JasperEncoder'] +# ) +# +# self.__test_export_route( +# module=jasper_encoder, +# out_name="jasper_encoder.onnx", +# mode=nemo.core.DeploymentFormat.ONNX, +# input_example=(torch.randn(16, 64, 256).cuda(), torch.randn(256).cuda()), +# ) diff --git a/tests/test_deprecated.py b/tests/core/test_deprecated.py similarity index 100% rename from tests/test_deprecated.py rename to tests/core/test_deprecated.py diff --git a/tests/test_infer.py b/tests/core/test_infer.py similarity index 81% rename from tests/test_infer.py rename to tests/core/test_infer.py index 05cec60c6fb9..e9611ea43967 100644 --- a/tests/test_infer.py +++ b/tests/core/test_infer.py @@ -30,11 +30,13 @@ def __init__(self): @property def input_ports(self): - return {"mod_in": NeuralType({0: AxisType(BatchTag), 1: AxisType(BaseTag, dim=1)})} + # return {"mod_in": NeuralType({0: AxisType(BatchTag), 1: AxisType(BaseTag, dim=1)})} + return {"mod_in": NeuralType(ChannelType(), (AxisType(AxisKind.Batch), AxisType(AxisKind.Dimension, 1)))} @property def output_ports(self): - return {"mod_out": NeuralType({0: AxisType(BatchTag), 1: AxisType(BaseTag, dim=1)})} + # return {"mod_out": NeuralType({0: AxisType(BatchTag), 1: AxisType(BaseTag, dim=1)})} + return {"mod_out": NeuralType(ChannelType(), (AxisType(AxisKind.Batch), AxisType(AxisKind.Dimension, 1)))} def forward(self, mod_in): return mod_in + 10 @@ -46,11 +48,11 @@ def __init__(self): @property def input_ports(self): - return {"mod_in": NeuralType({0: AxisType(BatchTag), 1: AxisType(BaseTag, dim=1)})} + return {"mod_in": NeuralType(ChannelType(), (AxisType(AxisKind.Batch), AxisType(AxisKind.Dimension, 1)))} @property def output_ports(self): - return {"mod_out": NeuralType({0: AxisType(BatchTag), 1: AxisType(BaseTag, dim=1)})} + return {"mod_out": NeuralType(ChannelType(), (AxisType(AxisKind.Batch), AxisType(AxisKind.Dimension, 1)))} def forward(self, mod_in): return mod_in - 10 @@ -66,7 +68,9 @@ def test_infer_caching(self): size=1, dtype=torch.FloatTensor, batch_size=1, - output_ports={"dl_out": NeuralType({0: AxisType(BatchTag), 1: AxisType(BaseTag, dim=1)})}, + output_ports={ + "dl_out": NeuralType(ChannelType(), (AxisType(AxisKind.Batch), AxisType(AxisKind.Dimension, 1))) + }, ) addten = AddsTen() minusten = SubtractsTen() @@ -93,7 +97,9 @@ def test_infer_errors(self): size=1, dtype=torch.FloatTensor, batch_size=1, - output_ports={"dl_out": NeuralType({0: AxisType(BatchTag), 1: AxisType(BaseTag, dim=1)})}, + output_ports={ + "dl_out": NeuralType(ChannelType(), (AxisType(AxisKind.Batch), AxisType(AxisKind.Dimension, 1))) + }, ) addten = AddsTen() minusten = SubtractsTen() diff --git a/tests/test_neural_factory.py b/tests/core/test_neural_factory.py similarity index 100% rename from tests/test_neural_factory.py rename to tests/core/test_neural_factory.py diff --git a/tests/test_neural_modules_initialization.py b/tests/core/test_neural_modules_initialization.py similarity index 100% rename from tests/test_neural_modules_initialization.py rename to tests/core/test_neural_modules_initialization.py diff --git a/tests/test_neural_modules_pytorch.py b/tests/core/test_neural_modules_pytorch.py similarity index 90% rename from tests/test_neural_modules_pytorch.py rename to tests/core/test_neural_modules_pytorch.py index 13ff0226262b..236844031e25 100644 --- a/tests/test_neural_modules_pytorch.py +++ b/tests/core/test_neural_modules_pytorch.py @@ -17,10 +17,13 @@ # limitations under the License. # ============================================================================= +# TODO: These test look bad/useless - redo + import unittest import nemo from nemo.backends.pytorch.nm import TrainableNM +from nemo.core.neural_types import ChannelType, NeuralType from tests.common_setup import NeMoUnitTest @@ -67,15 +70,7 @@ def test_constructor_TaylorNet(self): def test_call_TaylorNet(self): x_tg = nemo.core.neural_modules.NmTensor( - producer=None, - producer_args=None, - name=None, - ntype=nemo.core.neural_types.NeuralType( - { - 0: nemo.core.neural_types.AxisType(nemo.core.neural_types.BatchTag), - 1: nemo.core.neural_types.AxisType(nemo.core.neural_types.ChannelTag), - } - ), + producer=None, producer_args=None, name=None, ntype=NeuralType(ChannelType(), ('B', 'D')) ) tn = nemo.backends.pytorch.tutorials.TaylorNet(dim=4) diff --git a/tests/core/test_neural_types.py b/tests/core/test_neural_types.py index 51a1c9b18044..537813b76f07 100644 --- a/tests/core/test_neural_types.py +++ b/tests/core/test_neural_types.py @@ -15,7 +15,7 @@ # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================= - +import nemo from nemo.core.neural_types import ( AcousticEncodedRepresentation, AudioSignal, @@ -24,9 +24,11 @@ ChannelType, MelSpectrogramType, MFCCSpectrogramType, + NeuralPortNmTensorMismatchError, NeuralType, NeuralTypeComparisonResult, SpectrogramType, + VoidType, ) from tests.common_setup import NeMoUnitTest @@ -102,3 +104,66 @@ def test_list_of_lists(self): ) # TODO: should this be incompatible instead??? self.assertEqual(T1.compare(T2), NeuralTypeComparisonResult.TRANSPOSE_SAME) + + def test_void(self): + btc_spctr = NeuralType(SpectrogramType(), ('B', 'T', 'C')) + btc_spct_bad = NeuralType(SpectrogramType(), ('B', 'T')) + btc_void = NeuralType(VoidType(), ('B', 'T', 'C')) + self.assertEqual(btc_void.compare(btc_spctr), NeuralTypeComparisonResult.SAME) + self.assertEqual(btc_spctr.compare(btc_void), NeuralTypeComparisonResult.INCOMPATIBLE) + self.assertEqual(btc_void.compare(btc_spct_bad), NeuralTypeComparisonResult.INCOMPATIBLE) + + def test_big_void(self): + big_void_1 = NeuralType(VoidType()) + big_void_2 = NeuralType() + + btc_spctr = NeuralType(SpectrogramType(), ('B', 'T', 'C')) + btc_spct_bad = NeuralType(SpectrogramType(), ('B', 'T')) + t1 = NeuralType( + elements_type=ChannelType(), + axes=( + AxisType(kind=AxisKind.Batch, size=None, is_list=True), + AxisType(kind=AxisKind.Time, size=None, is_list=True), + AxisType(kind=AxisKind.Dimension, size=32, is_list=False), + AxisType(kind=AxisKind.Dimension, size=128, is_list=False), + AxisType(kind=AxisKind.Dimension, size=256, is_list=False), + ), + ) + t2 = NeuralType( + elements_type=ChannelType(), + axes=( + AxisType(kind=AxisKind.Batch, size=None, is_list=False), + AxisType(kind=AxisKind.Time, size=None, is_list=False), + AxisType(kind=AxisKind.Dimension, size=32, is_list=False), + AxisType(kind=AxisKind.Dimension, size=128, is_list=False), + AxisType(kind=AxisKind.Dimension, size=256, is_list=False), + ), + ) + + self.assertEqual(big_void_1.compare(btc_spctr), NeuralTypeComparisonResult.SAME) + self.assertEqual(big_void_1.compare(btc_spct_bad), NeuralTypeComparisonResult.SAME) + self.assertEqual(big_void_1.compare(t1), NeuralTypeComparisonResult.SAME) + self.assertEqual(big_void_1.compare(t2), NeuralTypeComparisonResult.SAME) + + self.assertEqual(big_void_2.compare(btc_spctr), NeuralTypeComparisonResult.SAME) + self.assertEqual(big_void_2.compare(btc_spct_bad), NeuralTypeComparisonResult.SAME) + self.assertEqual(big_void_2.compare(t1), NeuralTypeComparisonResult.SAME) + self.assertEqual(big_void_2.compare(t2), NeuralTypeComparisonResult.SAME) + + def test_dag(self): + data_source = nemo.backends.pytorch.tutorials.RealFunctionDataLayer(n=10000, batch_size=128) + trainable_module = nemo.backends.pytorch.tutorials.TaylorNet(dim=4) + loss = nemo.backends.pytorch.tutorials.MSELoss() + x, y = data_source() + y_pred = trainable_module(x=x) + _ = loss(predictions=y_pred, target=y) + + def wrong(): + data_source = nemo.backends.pytorch.tutorials.RealFunctionDataLayer(n=10000, batch_size=128) + trainable_module = nemo.backends.pytorch.tutorials.TaylorNet(dim=4) + loss = nemo.backends.pytorch.tutorials.MSELoss() + x, y = data_source() + loss_tensor = loss(predictions=x, target=x) + _ = trainable_module(x=loss_tensor) + + self.assertRaises(NeuralPortNmTensorMismatchError, wrong) diff --git a/tests/test_policies.py b/tests/core/test_policies.py similarity index 100% rename from tests/test_policies.py rename to tests/core/test_policies.py diff --git a/tests/test_deploy_export.py b/tests/test_deploy_export.py deleted file mode 100644 index be6a1a39573c..000000000000 --- a/tests/test_deploy_export.py +++ /dev/null @@ -1,149 +0,0 @@ -# ! /usr/bin/python -# -*- coding: utf-8 -*- - -# Copyright 2019 NVIDIA. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================= - -import os -from pathlib import Path - -# git clone git@github.com:microsoft/onnxruntime.git -# cd onnxruntime -# ./build.sh --update --build --config RelWithDebInfo --build_shared_lib --parallel --use_cuda \ -# --cudnn_home /usr/lib/x86_64-linux-gnu --cuda_home /usr/local/cuda --enable_pybind --build_wheel -# pip install --upgrade ./build/Linux/RelWithDebInfo/dist/onnxruntime_gpu-1.1.0-cp37-cp37m-linux_x86_64.whl -import onnxruntime as ort -import torch -from ruamel.yaml import YAML - -import nemo -import nemo.collections.asr as nemo_asr -import nemo.collections.nlp as nemo_nlp -import nemo.collections.nlp.nm.trainables.common.token_classification_nm -from tests.common_setup import NeMoUnitTest - - -class TestDeployExport(NeMoUnitTest): - def setUp(self): - """ Setups neural factory so it will use GPU instead of CPU. """ - NeMoUnitTest.setUp(self) - - # Perform computations on GPU. - self.nf._placement = nemo.core.DeviceType.GPU - - def __test_export_route(self, module, out_name, mode, input_example=None): - out = Path(out_name) - if out.exists(): - os.remove(out) - - self.nf.deployment_export(module=module, output=out_name, input_example=input_example, d_format=mode) - - self.assertTrue(out.exists()) - if mode == nemo.core.DeploymentFormat.ONNX: - if isinstance(input_example, tuple): - outputs_fwd = module.forward(*input_example) - else: - outputs_fwd = module.forward(input_example) - sess_options = ort.SessionOptions() - sess_options.graph_optimization_level = ort.GraphOptimizationLevel.ORT_ENABLE_EXTENDED - ort_session = ort.InferenceSession(out_name, sess_options) - inputs = dict() - input_names = list(module.input_ports) - for i in range(len(input_names)): - input_name = ( - "encoded_lengths" - if type(module).__name__ == "JasperEncoder" and input_names[i] == "length" - else input_names[i] - ) - inputs[input_name] = ( - input_example[i].cpu().numpy() if isinstance(input_example, tuple) else input_example.cpu().numpy() - ) - outputs_ort = ort_session.run(None, inputs) - outputs_ort = torch.from_numpy(outputs_ort[0]).cuda() - self.assertLess( - (outputs_ort - (outputs_fwd[0] if isinstance(outputs_fwd, tuple) else outputs_fwd)).norm(p=2), 5.0e-4 - ) - if out.exists(): - os.remove(out) - - def test_simple_module_export(self): - simplest_module = nemo.backends.pytorch.tutorials.TaylorNet(dim=4) - self.__test_export_route( - module=simplest_module, - out_name="simple.pt", - mode=nemo.core.DeploymentFormat.TORCHSCRIPT, - input_example=None, - ) - - def test_TokenClassifier_module_export(self): - t_class = nemo.collections.nlp.nm.trainables.common.token_classification_nm.TokenClassifier( - hidden_size=512, num_classes=16, use_transformer_pretrained=False - ) - self.__test_export_route( - module=t_class, - out_name="t_class.pt", - mode=nemo.core.DeploymentFormat.TORCHSCRIPT, - input_example=torch.randn(16, 16, 512).cuda(), - ) - - def test_TokenClassifier_module_onnx_export(self): - t_class = nemo.collections.nlp.nm.trainables.common.token_classification_nm.TokenClassifier( - hidden_size=512, num_classes=16, use_transformer_pretrained=False - ) - self.__test_export_route( - module=t_class, - out_name="t_class.onnx", - mode=nemo.core.DeploymentFormat.ONNX, - input_example=torch.randn(16, 16, 512).cuda(), - ) - - def test_jasper_decoder_export_ts(self): - j_decoder = nemo_asr.JasperDecoderForCTC(feat_in=1024, num_classes=33) - self.__test_export_route( - module=j_decoder, out_name="j_decoder.ts", mode=nemo.core.DeploymentFormat.TORCHSCRIPT, input_example=None - ) - - def test_hf_bert_ts(self): - bert = nemo.collections.nlp.nm.trainables.common.huggingface.BERT(pretrained_model_name="bert-base-uncased") - input_example = ( - torch.randint(low=0, high=16, size=(2, 16)).cuda(), - torch.randint(low=0, high=1, size=(2, 16)).cuda(), - torch.randint(low=0, high=1, size=(2, 16)).cuda(), - ) - self.__test_export_route( - module=bert, out_name="bert.ts", mode=nemo.core.DeploymentFormat.TORCHSCRIPT, input_example=input_example - ) - - def test_hf_bert_pt(self): - bert = nemo.collections.nlp.nm.trainables.common.huggingface.BERT(pretrained_model_name="bert-base-uncased") - self.__test_export_route(module=bert, out_name="bert.pt", mode=nemo.core.DeploymentFormat.PYTORCH) - - def test_jasper_encoder_to_onnx(self): - with open("tests/data/jasper_smaller.yaml") as file: - yaml = YAML(typ="safe") - jasper_model_definition = yaml.load(file) - - jasper_encoder = nemo_asr.JasperEncoder( - conv_mask=False, - feat_in=jasper_model_definition['AudioToMelSpectrogramPreprocessor']['features'], - **jasper_model_definition['JasperEncoder'] - ) - - self.__test_export_route( - module=jasper_encoder, - out_name="jasper_encoder.onnx", - mode=nemo.core.DeploymentFormat.ONNX, - input_example=(torch.randn(16, 64, 256).cuda(), torch.randn(256).cuda()), - ) diff --git a/tests/test_neural_types.py b/tests/test_neural_types.py deleted file mode 100644 index c2741ca3d7c6..000000000000 --- a/tests/test_neural_types.py +++ /dev/null @@ -1,258 +0,0 @@ -# ! /usr/bin/python -# -*- coding: utf-8 -*- - -# Copyright 2019 NVIDIA. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================= - -import tarfile - -from ruamel.yaml import YAML - -import nemo.collections.asr as nemo_asr -from nemo import logging -from nemo.core import * -from tests.common_setup import NeMoUnitTest - - -class TestNeuralTypes(NeMoUnitTest): - manifest_filepath = "tests/data/asr/an4_train.json" - yaml = YAML(typ="safe") - - def setUp(self) -> None: - super().setUp() - data_folder = "tests/data/" - logging.info("Looking up for test ASR data") - if not os.path.exists(data_folder + "asr"): - logging.info("Extracting ASR data to: {0}".format(data_folder + "asr")) - tar = tarfile.open("tests/data/asr.tar.gz", "r:gz") - tar.extractall(path=data_folder) - tar.close() - else: - logging.info("ASR data found in: {0}".format(data_folder + "asr")) - - def test_same(self): - btc = NeuralType(axis2type={0: AxisType(BatchTag), 1: AxisType(TimeTag), 2: AxisType(ChannelTag)}) - btc2 = NeuralType(axis2type={0: AxisType(BatchTag), 1: AxisType(TimeTag), 2: AxisType(ChannelTag)}) - self.assertEqual(btc2.compare(btc), NeuralTypeComparisonResult.SAME) - - def test_transpose_same(self): - btc = NeuralType(axis2type={0: AxisType(BatchTag), 1: AxisType(TimeTag), 2: AxisType(ChannelTag)}) - tbc = NeuralType(axis2type={1: AxisType(BatchTag), 0: AxisType(TimeTag), 2: AxisType(ChannelTag)}) - - self.assertEqual(btc.compare(tbc), NeuralTypeComparisonResult.TRANSPOSE_SAME) - self.assertEqual(tbc.compare(btc), NeuralTypeComparisonResult.TRANSPOSE_SAME) - - def test_dim_incompatible(self): - nchw1 = NeuralType( - axis2type={ - 0: AxisType(BatchTag), - 1: AxisType(ChannelTag), - 2: AxisType(HeightTag, 224), - 3: AxisType(WidthTag, 224), - } - ) - nchw2 = NeuralType( - axis2type={ - 0: AxisType(BatchTag), - 1: AxisType(ChannelTag), - 2: AxisType(HeightTag, 256), - 3: AxisType(WidthTag, 256), - } - ) - self.assertEqual(nchw1.compare(nchw2), NeuralTypeComparisonResult.DIM_INCOMPATIBLE) - - def test_rank_incompatible(self): - btc = NeuralType(axis2type={0: AxisType(BatchTag), 1: AxisType(TimeTag), 2: AxisType(ChannelTag)}) - nchw = NeuralType( - axis2type={0: AxisType(BatchTag), 1: AxisType(ChannelTag), 2: AxisType(HeightTag), 3: AxisType(WidthTag)} - ) - self.assertEqual(nchw.compare(btc), NeuralTypeComparisonResult.INCOMPATIBLE) - - def test_axis_type(self): - ax1 = AxisType(BatchTag) - ax2 = AxisType(TimeTag) - ax3 = AxisType(ProcessedTimeTag) - self.assertEqual(ax1.compare_to(ax2), NeuralTypeComparisonResult.INCOMPATIBLE) - self.assertEqual(ax3.compare_to(ax2), NeuralTypeComparisonResult.LESS) - self.assertEqual(ax2.compare_to(ax3), NeuralTypeComparisonResult.GREATER) - self.assertEqual(ax2.compare_to(AxisType(TimeTag)), NeuralTypeComparisonResult.SAME) - - def test_semantic_incompatible(self): - nchw = NeuralType( - axis2type={0: AxisType(BatchTag), 1: AxisType(ChannelTag), 2: AxisType(HeightTag), 3: AxisType(WidthTag)} - ) - badd = NeuralType( - axis2type={0: AxisType(BatchTag), 1: AxisType(ChannelTag), 2: AxisType(ChannelTag), 3: AxisType(WidthTag)} - ) - self.assertEqual(nchw.compare(badd), NeuralTypeComparisonResult.INCOMPATIBLE) - self.assertEqual(badd.compare(nchw), NeuralTypeComparisonResult.INCOMPATIBLE) - - def test_root(self): - root = NeuralType({}) - non_tensor = NeuralType(None) - btc = NeuralType(axis2type={0: AxisType(BatchTag), 1: AxisType(TimeTag), 2: AxisType(ChannelTag)}) - nchw = NeuralType( - axis2type={0: AxisType(BatchTag), 1: AxisType(ChannelTag), 2: AxisType(HeightTag), 3: AxisType(WidthTag)} - ) - self.assertEqual(root.compare(btc), NeuralTypeComparisonResult.SAME) - self.assertEqual(root.compare(nchw), NeuralTypeComparisonResult.SAME) - self.assertEqual(root.compare(non_tensor), NeuralTypeComparisonResult.SAME) - - self.assertEqual(non_tensor.compare(root), NeuralTypeComparisonResult.INCOMPATIBLE) - self.assertEqual(btc.compare(root), NeuralTypeComparisonResult.INCOMPATIBLE) - self.assertEqual(nchw.compare(root), NeuralTypeComparisonResult.INCOMPATIBLE) - - def test_combiner_type_infer(self): - combiner = nemo.backends.pytorch.common.SimpleCombiner(mode="add") - x_tg = nemo.core.NmTensor( - producer=None, producer_args=None, name=None, ntype=NeuralType({0: AxisType(BatchTag)}) - ) - y_tg = nemo.core.NmTensor( - producer=None, producer_args=None, name=None, ntype=NeuralType({0: AxisType(BatchTag)}) - ) - res = combiner(x1=y_tg, x2=x_tg) - self.assertEqual(res.compare(x_tg), NeuralTypeComparisonResult.SAME) - self.assertEqual(res.compare(y_tg), NeuralTypeComparisonResult.SAME) - self.assertEqual(x_tg.compare(res), NeuralTypeComparisonResult.SAME) - self.assertEqual(y_tg.compare(res), NeuralTypeComparisonResult.SAME) - - combiner1 = nemo.backends.pytorch.common.SimpleCombiner(mode="add") - x_tg1 = NmTensor( - producer=None, - producer_args=None, - name=None, - ntype=NeuralType({0: AxisType(BatchTag), 1: AxisType(ChannelTag)}), - ) - y_tg1 = NmTensor( - producer=None, - producer_args=None, - name=None, - ntype=NeuralType({0: AxisType(BatchTag), 1: AxisType(ChannelTag)}), - ) - res1 = combiner1(x1=y_tg1, x2=x_tg1) - self.assertEqual(res1.compare(x_tg1), NeuralTypeComparisonResult.SAME) - self.assertEqual(res1.compare(y_tg1), NeuralTypeComparisonResult.SAME) - self.assertEqual(x_tg1.compare(res1), NeuralTypeComparisonResult.SAME) - self.assertEqual(y_tg1.compare(res1), NeuralTypeComparisonResult.SAME) - - def test_optional_input_no_input(self): - data_source = nemo.backends.pytorch.tutorials.RealFunctionDataLayer(n=100, batch_size=128) - trainable_module = nemo.backends.pytorch.tutorials.TaylorNetO(dim=4) - loss = nemo.backends.pytorch.tutorials.MSELoss() - x, y = data_source() - y_pred = trainable_module(x=x) - loss_tensor = loss(predictions=y_pred, target=y) - - optimizer = nemo.backends.pytorch.actions.PtActions() - optimizer.train( - tensors_to_optimize=[loss_tensor], optimizer="sgd", optimization_params={"lr": 0.0003, "num_epochs": 1} - ) - - def test_optional_input_no_with_input(self): - data_source = nemo.backends.pytorch.tutorials.RealFunctionDataLayer(n=100, batch_size=128) - trainable_module = nemo.backends.pytorch.tutorials.TaylorNetO(dim=4) - loss = nemo.backends.pytorch.tutorials.MSELoss() - x, y = data_source() - y_pred = trainable_module(x=x, o=x) - loss_tensor = loss(predictions=y_pred, target=y) - optimizer = nemo.backends.pytorch.actions.PtActions() - optimizer.train( - tensors_to_optimize=[loss_tensor], optimizer="sgd", optimization_params={"lr": 0.0003, "num_epochs": 1} - ) - - def test_optional_input_no_with_wrong_input(self): - def wrong_fn(): - data_source = nemo.backends.pytorch.tutorials.RealFunctionDataLayer(n=100, batch_size=128) - trainable_module = nemo.backends.pytorch.tutorials.TaylorNetO(dim=4) - loss = nemo.backends.pytorch.tutorials.MSELoss() - x, y = data_source() - wrong_optional = NmTensor( - producer=None, - producer_args=None, - name=None, - ntype=NeuralType({0: AxisType(ChannelTag), 1: AxisType(BatchTag)}), - ) - y_pred = trainable_module(x=x, o=wrong_optional) - loss_tensor = loss(predictions=y_pred, target=y) - optimizer = nemo.backends.pytorch.actions.PtActions() - optimizer.train( - tensors_to_optimize=[loss_tensor], optimizer="sgd", optimization_params={"lr": 0.0003, "num_epochs": 1} - ) - - self.assertRaises(NeuralPortNmTensorMismatchError, wrong_fn) - - def test_simple_dags(self): - # module instantiation - with open("tests/data/jasper_smaller.yaml") as file: - jasper_model_definition = self.yaml.load(file) - labels = jasper_model_definition['labels'] - - data_layer = nemo_asr.AudioToTextDataLayer( - manifest_filepath=self.manifest_filepath, labels=labels, batch_size=4 - ) - data_preprocessor = nemo_asr.AudioToMelSpectrogramPreprocessor( - **jasper_model_definition['AudioToMelSpectrogramPreprocessor'] - ) - jasper_encoder = nemo_asr.JasperEncoder( - feat_in=jasper_model_definition['AudioToMelSpectrogramPreprocessor']['features'], - **jasper_model_definition['JasperEncoder'], - ) - jasper_decoder = nemo_asr.JasperDecoderForCTC(feat_in=1024, num_classes=len(labels)) - ctc_loss = nemo_asr.CTCLossNM(num_classes=len(labels)) - greedy_decoder = nemo_asr.GreedyCTCDecoder() - - # DAG definition - (audio_signal, audio_signal_len, transcript, transcript_len) = data_layer() - processed_signal, processed_signal_len = data_preprocessor(input_signal=audio_signal, length=audio_signal_len) - - spec_augment = nemo_asr.SpectrogramAugmentation(rect_masks=5) - aug_signal = spec_augment(input_spec=processed_signal) - - encoded, encoded_len = jasper_encoder(audio_signal=aug_signal, length=processed_signal_len) - log_probs = jasper_decoder(encoder_output=encoded) - predictions = greedy_decoder(log_probs=log_probs) - loss = ctc_loss( - log_probs=log_probs, targets=transcript, input_length=encoded_len, target_length=transcript_len - ) - - def wrong(): - with open("tests/data/jasper_smaller.yaml") as file: - jasper_config = self.yaml.load(file) - labels = jasper_config['labels'] - - data_layer = nemo_asr.AudioToTextDataLayer( - manifest_filepath=self.manifest_filepath, labels=labels, batch_size=4 - ) - data_preprocessor = nemo_asr.AudioToMelSpectrogramPreprocessor( - **jasper_config['AudioToMelSpectrogramPreprocessor'] - ) - jasper_encoder = nemo_asr.JasperEncoder( - feat_in=jasper_config['AudioToMelSpectrogramPreprocessor']['features'], - **jasper_config['JasperEncoder'], - ) - jasper_decoder = nemo_asr.JasperDecoderForCTC(feat_in=1024, num_classes=len(labels)) - # DAG definition - (audio_signal, audio_signal_len, transcript, transcript_len) = data_layer() - processed_signal, processed_signal_len = data_preprocessor( - input_signal=audio_signal, length=audio_signal_len - ) - - spec_augment = nemo_asr.SpectrogramAugmentation(rect_masks=5) - aug_signal = spec_augment(input_spec=processed_signal) - - encoded, encoded_len = jasper_encoder(audio_signal=aug_signal, length=processed_signal_len) - log_probs = jasper_decoder(encoder_output=processed_signal) - - self.assertRaises(NeuralPortNmTensorMismatchError, wrong) diff --git a/tests/test_tutorials_pytorch.py b/tests/test_tutorials_pytorch.py deleted file mode 100644 index 183fd67e1d1b..000000000000 --- a/tests/test_tutorials_pytorch.py +++ /dev/null @@ -1,29 +0,0 @@ -# # ! /usr/bin/python -# # -*- coding: utf-8 -*- -# -# # Copyright 2019 NVIDIA. All Rights Reserved. -# # -# # Licensed under the Apache License, Version 2.0 (the "License"); -# # you may not use this file except in compliance with the License. -# # You may obtain a copy of the License at -# # -# # http://www.apache.org/licenses/LICENSE-2.0 -# # -# # Unless required by applicable law or agreed to in writing, software -# # distributed under the License is distributed on an "AS IS" BASIS, -# # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# # See the License for the specific language governing permissions and -# # limitations under the License. -# # ============================================================================= -# -# from .common_setup import NeMoUnitTest -# from nemo.backends.pytorch.tutorials.chatbot.data import loadPrepareData -# -# -# class TestPytorchChatBotTutorial(NeMoUnitTest): -# def test_simple_train(self): -# datafile = "tests/data/dialog_sample.txt" -# logging.info(datafile) -# voc, pairs = loadPrepareData("cornell", datafile=datafile) -# self.assertEqual(voc.name, 'cornell') -# self.assertEqual(voc.num_words, 675) From ca9c370cc3935cdc9f0ed382a35895b82ac1280d Mon Sep 17 00:00:00 2001 From: Oleksii Kuchaiev Date: Wed, 5 Feb 2020 18:20:09 -0800 Subject: [PATCH 07/30] asr and core tests are passing Signed-off-by: Oleksii Kuchaiev --- nemo/backends/pytorch/common/losses.py | 2 +- nemo/backends/pytorch/common/other.py | 15 +- nemo/backends/pytorch/common/rnn.py | 22 +- nemo/collections/asr/__init__.py | 2 +- nemo/collections/asr/audio_preprocessing.py | 210 +++++---------- nemo/collections/asr/beam_search_decoder.py | 18 +- nemo/collections/asr/data_layer.py | 89 +++---- nemo/collections/asr/greedy_ctc_decoder.py | 20 +- nemo/collections/asr/jasper.py | 76 ++---- nemo/collections/asr/las/misc.py | 15 +- nemo/collections/asr/losses.py | 35 +-- nemo/core/neural_types/neural_type.py | 3 +- tests/asr/test_asr.py | 10 +- tests/asr/test_weight_share.py | 271 -------------------- tests/asr/test_zeroDS.py | 50 ++-- tests/core/test_neural_types.py | 7 + tests/core/test_weight_share.py | 220 ++++++++++++++++ 17 files changed, 433 insertions(+), 632 deletions(-) delete mode 100644 tests/asr/test_weight_share.py create mode 100644 tests/core/test_weight_share.py diff --git a/nemo/backends/pytorch/common/losses.py b/nemo/backends/pytorch/common/losses.py index 633eee772b66..4cacb1853620 100644 --- a/nemo/backends/pytorch/common/losses.py +++ b/nemo/backends/pytorch/common/losses.py @@ -45,7 +45,7 @@ def output_ports(self): NeuralType(None) """ - return {"loss": NeuralType(elements_type=LossType)} + return {"loss": NeuralType(LossType())} def __init__( self, pad_id=0, smoothing_coef=0.0, sample_wise=False, aux_ctc=False, ctc_initial_coef=0.1, ctc_blank_id=None diff --git a/nemo/backends/pytorch/common/other.py b/nemo/backends/pytorch/common/other.py index b5ba4be753c0..9358f586d387 100644 --- a/nemo/backends/pytorch/common/other.py +++ b/nemo/backends/pytorch/common/other.py @@ -34,7 +34,11 @@ class SimpleCombiner(TrainableNM): def input_ports(self): """Returns definitions of module input ports. """ - return {"x1": NeuralType(VoidType()), "x2": NeuralType(VoidType())} + if self._input_ports is None: + return {"x1": NeuralType(VoidType()), "x2": NeuralType(VoidType())} + else: + return self._input_ports + @property def output_ports(self): @@ -43,11 +47,16 @@ def output_ports(self): combined: None """ - return {"combined": NeuralType(VoidType())} + if self._output_ports is None: + return {"combined": NeuralType(VoidType())} + else: + return self._output_ports - def __init__(self, mode="add"): + def __init__(self, mode="add", input_ports=None, output_ports=None): super().__init__() self._mode = mode + self._input_ports = input_ports + self._output_ports = output_ports def forward(self, x1, x2): if self._mode == "add" or self._mode == "sum": diff --git a/nemo/backends/pytorch/common/rnn.py b/nemo/backends/pytorch/common/rnn.py index 4d87a2cca196..7136a569fb23 100644 --- a/nemo/backends/pytorch/common/rnn.py +++ b/nemo/backends/pytorch/common/rnn.py @@ -22,7 +22,7 @@ from nemo.backends.pytorch.common.parts import Attention from nemo.backends.pytorch.nm import TrainableNM -from nemo.core import AxisType +from nemo.core import * from nemo.utils.misc import pad_to __all__ = ['DecoderRNN', 'EncoderRNN'] @@ -81,10 +81,12 @@ def input_ports(self): 2: AxisType(ChannelTag) """ return { - 'targets': NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}), - 'encoder_outputs': NeuralType( - {0: AxisType(BatchTag), 1: AxisType(TimeTag), 2: AxisType(ChannelTag),}, optional=True, - ), + # 'targets': NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}), + 'targets': NeuralType(ChannelType(), ('B', 'T')), + # 'encoder_outputs': NeuralType( + # {0: AxisType(BatchTag), 1: AxisType(TimeTag), 2: AxisType(ChannelTag),}, optional=True, + # ), + 'encoder_outputs': NeuralType(ChannelType(), ('B', 'T', 'D'), True) } @property @@ -106,10 +108,12 @@ def output_ports(self): 2: AxisType(TimeTag) """ return { - 'log_probs': NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag), 2: AxisType(ChannelTag),}), - 'attention_weights': NeuralType( - {0: AxisType(BatchTag), 1: AxisType(TimeTag), 2: AxisType(TimeTag),}, optional=True, - ), + # 'log_probs': NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag), 2: AxisType(ChannelTag),}), + 'log_probs': NeuralType(LogprobsType(), ('B', 'T', 'D')), + # 'attention_weights': NeuralType( + # {0: AxisType(BatchTag), 1: AxisType(TimeTag), 2: AxisType(TimeTag),}, optional=True, + # ), + 'attention_weights': NeuralType(ChannelType(), ('B', 'T', 'T'), True) } def __init__( diff --git a/nemo/collections/asr/__init__.py b/nemo/collections/asr/__init__.py index b84913f0ce8d..84c9501c6233 100644 --- a/nemo/collections/asr/__init__.py +++ b/nemo/collections/asr/__init__.py @@ -1,4 +1,4 @@ -# Copyright 2019 NVIDIA. All Rights Reserved. +# Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/nemo/collections/asr/audio_preprocessing.py b/nemo/collections/asr/audio_preprocessing.py index 94476839a1f3..d16f9e9afa76 100644 --- a/nemo/collections/asr/audio_preprocessing.py +++ b/nemo/collections/asr/audio_preprocessing.py @@ -1,16 +1,17 @@ -# Copyright (C) NVIDIA CORPORATION. All Rights Reserved. +# Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # -# http://www.apache.org/licenses/LICENSE-2.0 +# http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and -# limitations under the License.**** +# limitations under the License. +# ============================================================================= """ This file contains neural modules responsible for preprocessing audio data. """ @@ -131,32 +132,24 @@ def input_ports(self): """ return { - "input_signal": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}), - "length": NeuralType({0: AxisType(BatchTag)}), + # "input_signal": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}), + # "length": NeuralType({0: AxisType(BatchTag)}), + "input_signal": NeuralType(AudioSignal(freq=self._sample_rate), ('B', 'T')), + "length": NeuralType(LengthsType(), tuple('B')) } @property def output_ports(self): """Returns definitions of module output ports. - - processed_signal: - - 0: AxisType(BatchTag) - - 1: AxisType(SpectrogramSignalTag) - - 2: AxisType(ProcessedTimeTag) - - processed_length: - - 0: AxisType(BatchTag) - """ return { - "processed_signal": NeuralType( - {0: AxisType(BatchTag), 1: AxisType(SpectrogramSignalTag), 2: AxisType(ProcessedTimeTag),} - ), - "processed_length": NeuralType({0: AxisType(BatchTag)}), + # "processed_signal": NeuralType( + # {0: AxisType(BatchTag), 1: AxisType(SpectrogramSignalTag), 2: AxisType(ProcessedTimeTag),} + # ), + # "processed_length": NeuralType({0: AxisType(BatchTag)}), + + "processed_signal": NeuralType(SpectrogramType(), ('B', 'D', 'T')), + "processed_length": NeuralType(LengthsType(), tuple('B')) } def __init__( @@ -170,6 +163,7 @@ def __init__( window="hann", normalized=True, ): + self._sample_rate = sample_rate if not HAVE_TORCHAUDIO: raise ModuleNotFoundError( "torchaudio is not installed but is necessary for " @@ -183,9 +177,9 @@ def __init__( f"{self} received both window_stride and " f"n_window_stride. Only one should be specified." ) if window_size: - n_window_size = int(window_size * sample_rate) + n_window_size = int(window_size * self._sample_rate) if window_stride: - n_window_stride = int(window_stride * sample_rate) + n_window_stride = int(window_stride * self._sample_rate) super().__init__(n_window_size, n_window_stride) @@ -283,19 +277,12 @@ class AudioToMelSpectrogramPreprocessor(AudioPreprocessor): @property def input_ports(self): """Returns definitions of module input ports. - - input_signal: - 0: AxisType(BatchTag) - - 1: AxisType(TimeTag) - - length: - 0: AxisType(BatchTag) - """ return { - "input_signal": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}), - "length": NeuralType({0: AxisType(BatchTag)}), + # "input_signal": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}), + # "length": NeuralType({0: AxisType(BatchTag)}), + "input_signal": NeuralType(AudioSignal(freq=self._sample_rate), ('B', 'T')), + "length": NeuralType(LengthsType(), tuple('B')) } @property @@ -316,10 +303,12 @@ def output_ports(self): """ return { - "processed_signal": NeuralType( - {0: AxisType(BatchTag), 1: AxisType(MelSpectrogramSignalTag), 2: AxisType(ProcessedTimeTag),} - ), - "processed_length": NeuralType({0: AxisType(BatchTag)}), + # "processed_signal": NeuralType( + # {0: AxisType(BatchTag), 1: AxisType(MelSpectrogramSignalTag), 2: AxisType(ProcessedTimeTag),} + # ), + # "processed_length": NeuralType({0: AxisType(BatchTag)}), + "processed_signal": NeuralType(MelSpectrogramType(), ('B', 'D', 'T')), + "processed_length": NeuralType(LengthsType(), tuple('B')) } def __init__( @@ -346,6 +335,7 @@ def __init__( pad_value=0, mag_power=2.0, ): + self._sample_rate = sample_rate if window_size and n_window_size: raise ValueError(f"{self} received both window_size and " f"n_window_size. Only one should be specified.") if window_stride and n_window_stride: @@ -353,14 +343,14 @@ def __init__( f"{self} received both window_stride and " f"n_window_stride. Only one should be specified." ) if window_size: - n_window_size = int(window_size * sample_rate) + n_window_size = int(window_size * self._sample_rate) if window_stride: - n_window_stride = int(window_stride * sample_rate) + n_window_stride = int(window_stride * self._sample_rate) super().__init__(n_window_size, n_window_stride) self.featurizer = FilterbankFeatures( - sample_rate=sample_rate, + sample_rate=self._sample_rate, n_window_size=n_window_size, n_window_stride=n_window_stride, window=window, @@ -433,43 +423,26 @@ class AudioToMFCCPreprocessor(AudioPreprocessor): @property def input_ports(self): """Returns definitions of module input ports. - - input_signal: - 0: AxisType(BatchTag) - - 1: AxisType(TimeTag) - - length: - 0: AxisType(BatchTag) - """ return { - "input_signal": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}), - "length": NeuralType({0: AxisType(BatchTag)}), + # "input_signal": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}), + # "length": NeuralType({0: AxisType(BatchTag)}), + "input_signal": NeuralType(AudioSignal(freq=self._sample_rate), ('B', 'T')), + "length": NeuralType(LengthsType(), tuple('B')) } @property def output_ports(self): """Returns definitions of module output ports. - - processed_signal: - - 0: AxisType(BatchTag) - - 1: AxisType(MFCCSignalTag) - - 2: AxisType(ProcessedTimeTag) - - processed_length: - - 0: AxisType(BatchTag) - """ return { - "processed_signal": NeuralType( - {0: AxisType(BatchTag), 1: AxisType(MFCCSignalTag), 2: AxisType(ProcessedTimeTag),} - ), - "processed_length": NeuralType({0: AxisType(BatchTag)}), + # "processed_signal": NeuralType( + # {0: AxisType(BatchTag), 1: AxisType(MFCCSignalTag), 2: AxisType(ProcessedTimeTag),} + # ), + # "processed_length": NeuralType({0: AxisType(BatchTag)}), + "processed_signal": NeuralType(MFCCSpectrogramType(), ('B', 'D', 'T')), + "processed_length": NeuralType(LengthsType(), tuple('B')) + } def __init__( @@ -489,6 +462,7 @@ def __init__( norm='ortho', log=True, ): + self._sample_rate = sample_rate if not HAVE_TORCHAUDIO: raise ModuleNotFoundError( "torchaudio is not installed but is necessary for " @@ -503,9 +477,9 @@ def __init__( ) # Get win_length (n_window_size) and hop_length (n_window_stride) if window_size: - n_window_size = int(window_size * sample_rate) + n_window_size = int(window_size * self._sample_rate) if window_stride: - n_window_stride = int(window_stride * sample_rate) + n_window_stride = int(window_stride * self._sample_rate) super().__init__(n_window_size, n_window_stride) @@ -531,7 +505,7 @@ def __init__( # Use torchaudio's implementation of MFCCs as featurizer self.featurizer = torchaudio.transforms.MFCC( - sample_rate=sample_rate, n_mfcc=n_mfcc, dct_type=dct_type, norm=norm, log_mels=log, melkwargs=mel_kwargs, + sample_rate=self._sample_rate, n_mfcc=n_mfcc, dct_type=dct_type, norm=norm, log_mels=log, melkwargs=mel_kwargs, ) self.featurizer.to(self._device) @@ -575,36 +549,22 @@ class SpectrogramAugmentation(NonTrainableNM): @property def input_ports(self): """Returns definitions of module input ports. - - input_spec: - 0: AxisType(BatchTag) - - 1: AxisType(SpectrogramSignalTag) - - 2: AxisType(TimeTag) - """ return { - "input_spec": NeuralType({0: AxisType(BatchTag), 1: AxisType(SpectrogramSignalTag), 2: AxisType(TimeTag),}) + # "input_spec": NeuralType({0: AxisType(BatchTag), 1: AxisType(SpectrogramSignalTag), 2: AxisType( + # TimeTag),}) + "input_spec": NeuralType(SpectrogramType(), ('B', 'D', 'T')) } @property def output_ports(self): """Returns definitions of module output ports. - - augmented_spec: - - 0: AxisType(BatchTag) - - 1: AxisType(SpectrogramSignalTag) - - 2: AxisType(ProcessedTimeTag) - """ return { - "augmented_spec": NeuralType( - {0: AxisType(BatchTag), 1: AxisType(SpectrogramSignalTag), 2: AxisType(ProcessedTimeTag),} - ) + # "augmented_spec": NeuralType( + # {0: AxisType(BatchTag), 1: AxisType(SpectrogramSignalTag), 2: AxisType(ProcessedTimeTag),} + # ) + "augmented_spec": NeuralType(SpectrogramType(), ('B', 'D', 'T')) } def __init__( @@ -652,61 +612,31 @@ class MultiplyBatch(NonTrainableNM): @property def input_ports(self): """Returns definitions of module input ports. - - in_x: - 0: AxisType(BatchTag) - - 1: AxisType(SpectrogramSignalTag) - - 2: AxisType(TimeTag) - - in_x_len: - 0: AxisType(BatchTag) - - in_y: - 0: AxisType(BatchTag) - - 1: AxisType(TimeTag) - - in_y_len: - 0: AxisType(BatchTag) - """ return { - "in_x": NeuralType({0: AxisType(BatchTag), 1: AxisType(SpectrogramSignalTag), 2: AxisType(TimeTag),}), - "in_x_len": NeuralType({0: AxisType(BatchTag)}), - "in_y": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}), - "in_y_len": NeuralType({0: AxisType(BatchTag)}), + # "in_x": NeuralType({0: AxisType(BatchTag), 1: AxisType(SpectrogramSignalTag), 2: AxisType(TimeTag),}), + # "in_x_len": NeuralType({0: AxisType(BatchTag)}), + # "in_y": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}), + # "in_y_len": NeuralType({0: AxisType(BatchTag)}), + "in_x": NeuralType(SpectrogramType(), ('B', 'D', 'T')), + "in_x_len": NeuralType(LengthsType(), tuple('B')), + "in_y": NeuralType(SpectrogramType(), ('B', 'D', 'T')), + "in_y_len": NeuralType(LengthsType(), tuple('B')) } @property def output_ports(self): """Returns definitions of module output ports. - - out_x: - 0: AxisType(BatchTag) - - 1: AxisType(SpectrogramSignalTag) - - 2: AxisType(TimeTag) - - out_x_len: - 0: AxisType(BatchTag) - - out_y: - 0: AxisType(BatchTag) - - 1: AxisType(TimeTag) - - out_y_len: - 0: AxisType(BatchTag) - """ return { - "out_x": NeuralType({0: AxisType(BatchTag), 1: AxisType(SpectrogramSignalTag), 2: AxisType(TimeTag),}), - "out_x_len": NeuralType({0: AxisType(BatchTag)}), - "out_y": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}), - "out_y_len": NeuralType({0: AxisType(BatchTag)}), + # "out_x": NeuralType({0: AxisType(BatchTag), 1: AxisType(SpectrogramSignalTag), 2: AxisType(TimeTag),}), + # "out_x_len": NeuralType({0: AxisType(BatchTag)}), + # "out_y": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}), + # "out_y_len": NeuralType({0: AxisType(BatchTag)}), + "out_x": NeuralType(SpectrogramType(), ('B', 'D', 'T')), + "out_x_len": NeuralType(LengthsType(), tuple('B')), + "out_y": NeuralType(SpectrogramType(), ('B', 'D', 'T')), + "out_y_len": NeuralType(LengthsType(), tuple('B')) } def __init__(self, mult_batch=1): diff --git a/nemo/collections/asr/beam_search_decoder.py b/nemo/collections/asr/beam_search_decoder.py index 6bb985a98e5c..70f0517330cd 100644 --- a/nemo/collections/asr/beam_search_decoder.py +++ b/nemo/collections/asr/beam_search_decoder.py @@ -6,7 +6,7 @@ from nemo.backends.pytorch.nm import NonTrainableNM from nemo.core import DeviceType -from nemo.core.neural_types import AxisType, BatchTag, ChannelTag, NeuralType, TimeTag +from nemo.core.neural_types import * from nemo.utils.helpers import get_cuda_device @@ -41,20 +41,12 @@ class BeamSearchDecoderWithLM(NonTrainableNM): @property def input_ports(self): """Returns definitions of module input ports. - - "log_probs": - 0: AxisType(BatchTag) - - 1: AxisType(TimeTag) - - 2: AxisType(ChannelTag) - - log_probs_length: - 0: AxisType(BatchTag) """ return { - "log_probs": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag), 2: AxisType(ChannelTag),}), - "log_probs_length": NeuralType({0: AxisType(BatchTag)}), + # "log_probs": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag), 2: AxisType(ChannelTag),}), + # "log_probs_length": NeuralType({0: AxisType(BatchTag)}), + "log_probs": NeuralType(LogprobsType(), ('B', 'T', 'D')), + "log_probs_length": NeuralType(LengthsType(), tuple('B')), } @property diff --git a/nemo/collections/asr/data_layer.py b/nemo/collections/asr/data_layer.py index 44b1cca9c9b6..a399d6e4187e 100644 --- a/nemo/collections/asr/data_layer.py +++ b/nemo/collections/asr/data_layer.py @@ -1,4 +1,17 @@ -# Copyright (c) 2019 NVIDIA Corporation +# Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================= """This package contains Neural Modules responsible for ASR data layers.""" from functools import partial @@ -81,29 +94,18 @@ class AudioToTextDataLayer(DataLayerNM): @property def output_ports(self): """Returns definitions of module output ports. - - audio_signal: - 0: AxisType(BatchTag) - - 1: AxisType(TimeTag) - - a_sig_length: - 0: AxisType(BatchTag) - - transcripts: - 0: AxisType(BatchTag) - - 1: AxisType(TimeTag) - - transcript_length: - 0: AxisType(BatchTag) - """ return { - 'audio_signal': NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}), - 'a_sig_length': NeuralType({0: AxisType(BatchTag)}), - 'transcripts': NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}), - 'transcript_length': NeuralType({0: AxisType(BatchTag)}), + # 'audio_signal': NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}), + # 'a_sig_length': NeuralType({0: AxisType(BatchTag)}), + # 'transcripts': NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}), + # 'transcript_length': NeuralType({0: AxisType(BatchTag)}), + + 'audio_signal': NeuralType(AudioSignal(freq=self._sample_rate), ('B', 'T')), + 'a_sig_length': NeuralType(LengthsType(), tuple('B')), + 'transcripts': NeuralType(ChannelType(), ('B', 'T')), + 'transcript_length': NeuralType(LengthsType(), tuple('B')) + } def __init__( @@ -126,8 +128,8 @@ def __init__( num_workers=0, ): super().__init__() - - self._featurizer = WaveformFeaturizer(sample_rate=sample_rate, int_values=int_values, augmentor=None) + self._sample_rate = sample_rate + self._featurizer = WaveformFeaturizer(sample_rate=self._sample_rate, int_values=int_values, augmentor=None) # Set up dataset dataset_params = { @@ -212,32 +214,19 @@ class KaldiFeatureDataLayer(DataLayerNM): def output_ports(self): """Returns definitions of module output ports. - processed_signal: - 0: AxisType(BatchTag) - - 1: AxisType(SpectrogramSignalTag) - - 2: AxisType(ProcessedTimeTag) - - processed_length: - 0: AxisType(BatchTag) - - transcripts: - 0: AxisType(BatchTag) - - 1: AxisType(TimeTag) - - transcript_length: - 0: AxisType(BatchTag) """ return { - 'processed_signal': NeuralType( - {0: AxisType(BatchTag), 1: AxisType(SpectrogramSignalTag), 2: AxisType(ProcessedTimeTag),} - ), - 'processed_length': NeuralType({0: AxisType(BatchTag)}), - 'transcripts': NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}), - 'transcript_length': NeuralType({0: AxisType(BatchTag)}), + # 'processed_signal': NeuralType( + # {0: AxisType(BatchTag), 1: AxisType(SpectrogramSignalTag), 2: AxisType(ProcessedTimeTag),} + # ), + # 'processed_length': NeuralType({0: AxisType(BatchTag)}), + # 'transcripts': NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}), + # 'transcript_length': NeuralType({0: AxisType(BatchTag)}), + + 'processed_signal': NeuralType(SpectrogramType(), ('B', 'D', 'T')), + 'transcripts': NeuralType(ChannelType(), ('B', 'T')), + 'transcript_length': NeuralType(LengthsType(), tuple('B')) } def __init__( @@ -362,8 +351,10 @@ def output_ports(self): """ return { - 'texts': NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}), - 'texts_length': NeuralType({0: AxisType(BatchTag)}), + # 'texts': NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}), + # 'texts_length': NeuralType({0: AxisType(BatchTag)}), + 'texts': NeuralType(ChannelType(), ('B', 'T')), + 'texts_length': NeuralType(LengthsType(), tuple('B')) } def __init__( diff --git a/nemo/collections/asr/greedy_ctc_decoder.py b/nemo/collections/asr/greedy_ctc_decoder.py index b9b416b8983a..8f29ab9c3c40 100644 --- a/nemo/collections/asr/greedy_ctc_decoder.py +++ b/nemo/collections/asr/greedy_ctc_decoder.py @@ -2,7 +2,7 @@ import torch from nemo.backends.pytorch.nm import TrainableNM -from nemo.core.neural_types import AxisType, BatchTag, ChannelTag, NeuralType, TimeTag +from nemo.core.neural_types import * class GreedyCTCDecoder(TrainableNM): @@ -13,26 +13,16 @@ class GreedyCTCDecoder(TrainableNM): @property def input_ports(self): """Returns definitions of module input ports. - - log_probs: - 0: AxisType(BatchTag) - - 1: AxisType(TimeTag) - - 2: AxisType(ChannelTag) """ - return {"log_probs": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag), 2: AxisType(ChannelTag),})} + # return {"log_probs": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag), 2: AxisType(ChannelTag),})} + return {"log_probs": NeuralType(LogprobsType(), ('B', 'T', 'D'))} @property def output_ports(self): """Returns definitions of module output ports. - - predictions: - 0: AxisType(BatchTag) - - 1: AxisType(TimeTag) """ - return {"predictions": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)})} + # return {"predictions": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)})} + return {"predictions": NeuralType(PredictionsType(), ('B', 'T'))} def __init__(self): super().__init__() diff --git a/nemo/collections/asr/jasper.py b/nemo/collections/asr/jasper.py index db75e0793643..b17b4a139180 100644 --- a/nemo/collections/asr/jasper.py +++ b/nemo/collections/asr/jasper.py @@ -7,16 +7,7 @@ from .parts.jasper import JasperBlock, init_weights, jasper_activations from nemo.backends.pytorch.nm import TrainableNM -from nemo.core.neural_types import ( - AxisType, - BatchTag, - ChannelTag, - EncodedRepresentationTag, - NeuralType, - ProcessedTimeTag, - SpectrogramSignalTag, - TimeTag, -) +from nemo.core.neural_types import * class JasperEncoder(TrainableNM): @@ -82,44 +73,27 @@ class JasperEncoder(TrainableNM): @property def input_ports(self): """Returns definitions of module input ports. - - audio_signal: - 0: AxisType(BatchTag) - - 1: AxisType(SpectrogramSignalTag) - - 2: AxisType(ProcessedTimeTag) - - length: - 0: AxisType(BatchTag) """ return { - "audio_signal": NeuralType( - {0: AxisType(BatchTag), 1: AxisType(SpectrogramSignalTag), 2: AxisType(ProcessedTimeTag),} - ), - "length": NeuralType({0: AxisType(BatchTag)}), + # "audio_signal": NeuralType( + # {0: AxisType(BatchTag), 1: AxisType(SpectrogramSignalTag), 2: AxisType(ProcessedTimeTag),} + # ), + # "length": NeuralType({0: AxisType(BatchTag)}), + "audio_signal": NeuralType(SpectrogramType(), ('B', 'D', 'T')), + "length": NeuralType(LengthsType(), tuple('B')) } @property def output_ports(self): """Returns definitions of module output ports. - - outputs: - 0: AxisType(BatchTag) - - 1: AxisType(EncodedRepresentationTag) - - 2: AxisType(ProcessedTimeTag) - - encoded_lengths: - 0: AxisType(BatchTag) - """ return { - "outputs": NeuralType( - {0: AxisType(BatchTag), 1: AxisType(EncodedRepresentationTag), 2: AxisType(ProcessedTimeTag),} - ), - "encoded_lengths": NeuralType({0: AxisType(BatchTag)}), + # "outputs": NeuralType( + # {0: AxisType(BatchTag), 1: AxisType(EncodedRepresentationTag), 2: AxisType(ProcessedTimeTag),} + # ), + # "encoded_lengths": NeuralType({0: AxisType(BatchTag)}), + "outputs": NeuralType(AcousticEncodedRepresentation(), ('B', 'D', 'T')), + "encoded_lengths": NeuralType(LengthsType(), tuple('B')) } def __init__( @@ -205,32 +179,20 @@ class JasperDecoderForCTC(TrainableNM): @property def input_ports(self): """Returns definitions of module input ports. - - encoder_output: - 0: AxisType(BatchTag) - - 1: AxisType(EncodedRepresentationTag) - - 2: AxisType(ProcessedTimeTag) """ return { - "encoder_output": NeuralType( - {0: AxisType(BatchTag), 1: AxisType(EncodedRepresentationTag), 2: AxisType(ProcessedTimeTag),} - ) + # "encoder_output": NeuralType( + # {0: AxisType(BatchTag), 1: AxisType(EncodedRepresentationTag), 2: AxisType(ProcessedTimeTag),} + # ) + "encoder_output": NeuralType(AcousticEncodedRepresentation(), ('B', 'D', 'T')) } @property def output_ports(self): """Returns definitions of module output ports. - - output: - 0: AxisType(BatchTag) - - 1: AxisType(TimeTag) - - 2: AxisType(ChannelTag) """ - return {"output": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag), 2: AxisType(ChannelTag),})} + # return {"output": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag), 2: AxisType(ChannelTag),})} + return {"output": NeuralType(LogprobsType(), ('B', 'T', 'D'))} def __init__(self, feat_in, num_classes, init_mode="xavier_uniform"): super().__init__() diff --git a/nemo/collections/asr/las/misc.py b/nemo/collections/asr/las/misc.py index a1a1a855e419..1ed2aadc5fb9 100644 --- a/nemo/collections/asr/las/misc.py +++ b/nemo/collections/asr/las/misc.py @@ -4,7 +4,7 @@ from nemo.backends.pytorch.nm import TrainableNM from nemo.collections.asr.jasper import init_weights as jasper_init_weights -from nemo.core.neural_types import AxisType, BatchTag, ChannelTag, NeuralType, TimeTag +from nemo.core.neural_types import * class JasperRNNConnector(TrainableNM): @@ -20,15 +20,9 @@ class JasperRNNConnector(TrainableNM): @property def input_ports(self): """Returns definitions of module input ports. - - tensor: - 0: AxisType(BatchTag) - - 1: AxisType(ChannelTag) - - 2: AxisType(TimeTag) """ - return {'tensor': NeuralType({0: AxisType(BatchTag), 1: AxisType(ChannelTag), 2: AxisType(TimeTag),})} + # return {'tensor': NeuralType({0: AxisType(BatchTag), 1: AxisType(ChannelTag), 2: AxisType(TimeTag),})} + return {'tensor': NeuralType(ChannelType(), ('B', 'D', 'T'))} @property def output_ports(self): @@ -41,7 +35,8 @@ def output_ports(self): 2: AxisType(ChannelTag) """ - return {'tensor': NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag), 2: AxisType(ChannelTag),})} + # return {'tensor': NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag), 2: AxisType(ChannelTag),})} + return {'tensor': NeuralType(ChannelType(), ('B', 'T', 'D'))} def __init__(self, in_channels, out_channels): super().__init__() diff --git a/nemo/collections/asr/losses.py b/nemo/collections/asr/losses.py index f43a30791079..3f379e4aea7b 100644 --- a/nemo/collections/asr/losses.py +++ b/nemo/collections/asr/losses.py @@ -3,7 +3,7 @@ import torch.nn as nn from nemo.backends.pytorch.nm import LossNM -from nemo.core.neural_types import AxisType, BatchTag, ChannelTag, NeuralType, TimeTag +from nemo.core.neural_types import * class CTCLossNM(LossNM): @@ -18,30 +18,16 @@ class CTCLossNM(LossNM): @property def input_ports(self): """Returns definitions of module input ports. - - log_probs: - 1: AxisType(TimeTag) - - 0: AxisType(BatchTag) - - 2: AxisType(ChannelTag) - - targets: - 0: AxisType(BatchTag) - - 1: AxisType(TimeTag) - - input_length: - 0: AxisType(BatchTag) - - target_length: - 0: AxisType(BatchTag) """ return { - "log_probs": NeuralType({1: AxisType(TimeTag), 0: AxisType(BatchTag), 2: AxisType(ChannelTag),}), - "targets": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}), - "input_length": NeuralType({0: AxisType(BatchTag)}), - "target_length": NeuralType({0: AxisType(BatchTag)}), + # "log_probs": NeuralType({1: AxisType(TimeTag), 0: AxisType(BatchTag), 2: AxisType(ChannelTag),}), + # "targets": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}), + # "input_length": NeuralType({0: AxisType(BatchTag)}), + # "target_length": NeuralType({0: AxisType(BatchTag)}), + "log_probs": NeuralType(LogprobsType(), ('B', 'T', 'D')), + "targets": NeuralType(ChannelType(), ('B', 'T')), + "input_length": NeuralType(LengthsType(), tuple('B')), + "target_length": NeuralType(LengthsType(), tuple('B')) } @property @@ -51,7 +37,8 @@ def output_ports(self): loss: NeuralType(None) """ - return {"loss": NeuralType(None)} + # return {"loss": NeuralType(None)} + return {"loss": NeuralType(LossType())} def __init__(self, num_classes): super().__init__() diff --git a/nemo/core/neural_types/neural_type.py b/nemo/core/neural_types/neural_type.py index 9cb7513963e4..53a95017d1e7 100644 --- a/nemo/core/neural_types/neural_type.py +++ b/nemo/core/neural_types/neural_type.py @@ -148,7 +148,8 @@ def __compare_axes(axes_a, axes_b) -> int: for axis_a, axis_b in zip(axes_a, axes_b): kinds_a[axis_a.kind] = axis_a.size kinds_b[axis_b.kind] = axis_b.size - if axis_a.kind != axis_b.kind or axis_a.is_list != axis_b.is_list or axis_a.size != axis_b.size: + if axis_a.kind != axis_b.kind or axis_a.is_list != axis_b.is_list or (axis_a.size != axis_b.size and + axis_a.size is not None): same = False if same: return 0 diff --git a/tests/asr/test_asr.py b/tests/asr/test_asr.py index b77b5cd582b5..9c3900dd2fd7 100644 --- a/tests/asr/test_asr.py +++ b/tests/asr/test_asr.py @@ -404,8 +404,8 @@ def test_double_jasper_training(self): feat_in=jasper_model_definition['AudioToMelSpectrogramPreprocessor']['features'], **jasper_model_definition['JasperEncoder'], ) - mx_max1 = nemo.backends.pytorch.common.SimpleCombiner(mode="max") - mx_max2 = nemo.backends.pytorch.common.SimpleCombiner(mode="max") + #mx_max1 = nemo.backends.pytorch.common.SimpleCombiner(mode="max") + #mx_max2 = nemo.backends.pytorch.common.SimpleCombiner(mode="max") jasper_decoder1 = nemo_asr.JasperDecoderForCTC(feat_in=1024, num_classes=len(self.labels)) jasper_decoder2 = nemo_asr.JasperDecoderForCTC(feat_in=1024, num_classes=len(self.labels)) @@ -419,8 +419,10 @@ def test_double_jasper_training(self): encoded2, encoded_len2 = jasper_encoder2(audio_signal=processed_signal, length=p_length) log_probs1 = jasper_decoder1(encoder_output=encoded1) log_probs2 = jasper_decoder2(encoder_output=encoded2) - log_probs = mx_max1(x1=log_probs1, x2=log_probs2) - encoded_len = mx_max2(x1=encoded_len1, x2=encoded_len2) + # log_probs = mx_max1(x1=log_probs1, x2=log_probs2) + # encoded_len = mx_max2(x1=encoded_len1, x2=encoded_len2) + log_probs = log_probs1 + encoded_len = encoded_len1 loss = ctc_loss( log_probs=log_probs, targets=transcript, input_length=encoded_len, target_length=transcript_len, ) diff --git a/tests/asr/test_weight_share.py b/tests/asr/test_weight_share.py deleted file mode 100644 index e4e0ce8247f4..000000000000 --- a/tests/asr/test_weight_share.py +++ /dev/null @@ -1,271 +0,0 @@ -# ! /usr/bin/python -# -*- coding: utf-8 -*- - -# Copyright 2019 NVIDIA. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================= - -import os -import shutil -import tarfile -import unittest -from typing import Dict - -import numpy as np -import torch -from ruamel.yaml import YAML - -import nemo -import nemo.collections.asr as nemo_asr -from nemo.core import WeightShareTransform -from nemo.core.neural_types import * -from tests.common_setup import NeMoUnitTest - -logging = nemo.logging - - -class TestWeightSharing(NeMoUnitTest): - labels = [ - "'", - "a", - "b", - "c", - "d", - "e", - "f", - "g", - "h", - "i", - "j", - "k", - "l", - "m", - "n", - "o", - "p", - "q", - "r", - "s", - "t", - "u", - "v", - "w", - "x", - "y", - "z", - " ", - ] - manifest_filepath = os.path.abspath(os.path.join(os.path.dirname(__file__), "../data/asr/an4_train.json")) - featurizer_config = { - 'window': 'hann', - 'dither': 1e-05, - 'normalize': 'per_feature', - 'frame_splicing': 1, - 'int_values': False, - 'window_stride': 0.01, - 'sample_rate': 16000, - 'features': 64, - 'n_fft': 512, - 'window_size': 0.02, - } - yaml = YAML(typ="safe") - - @classmethod - def setUpClass(cls) -> None: - super().setUpClass() - data_folder = os.path.abspath(os.path.join(os.path.dirname(__file__), "../data/")) - logging.info("Looking up for test ASR data") - if not os.path.exists(os.path.join(data_folder, "asr")): - logging.info("Extracting ASR data to: {0}".format(os.path.join(data_folder, "asr"))) - tar = tarfile.open(os.path.join(data_folder, "asr.tar.gz"), "r:gz") - tar.extractall(path=data_folder) - tar.close() - else: - logging.info("ASR data found in: {0}".format(os.path.join(data_folder, "asr"))) - - @classmethod - def tearDownClass(cls) -> None: - super().tearDownClass() - data_folder = os.path.abspath(os.path.join(os.path.dirname(__file__), "../data/")) - logging.info("Looking up for test ASR data") - if os.path.exists(os.path.join(data_folder, "asr")): - shutil.rmtree(os.path.join(data_folder, "asr")) - - def __check_if_weights_are_equal(self, w1: Dict, w2: Dict): - all_same = set(w1.keys()) == set(w2.keys()) - if not all_same: - return False - else: - for key in w1.keys(): - all_same = all_same and np.array_equal( - w1[key][0].cpu().detach().numpy(), w2[key][0].cpu().detach().numpy(), - ) - return all_same - - def test_TaylorNet_get_weights(self): - tn1 = nemo.backends.pytorch.tutorials.TaylorNet(dim=4) - tn2 = nemo.backends.pytorch.tutorials.TaylorNet(dim=4) - # because of randomness, actual weights should be different - self.assertFalse(self.__check_if_weights_are_equal(tn1.get_weights(), tn2.get_weights())) - tn3 = nemo.backends.pytorch.tutorials.TaylorNet(dim=4) - tn3.set_weights(tn1.get_weights()) - # check than weights are the same - self.assertTrue(self.__check_if_weights_are_equal(tn1.get_weights(), tn3.get_weights())) - # change weights on one module - another module should not change - tn1.fc1.bias.data = torch.tensor([0.1]) - self.assertFalse(self.__check_if_weights_are_equal(tn1.get_weights(), tn3.get_weights())) - - def test_TaylorNet_tie_weights(self): - tn1 = nemo.backends.pytorch.tutorials.TaylorNet(dim=4) - tn2 = nemo.backends.pytorch.tutorials.TaylorNet(dim=4) - # because of randomness, actual weights should be different - self.assertFalse(self.__check_if_weights_are_equal(tn1.get_weights(), tn2.get_weights())) - tn2.tie_weights_with(tn1, list(tn1.get_weights().keys())) - # change weights on one module - another module should change too - tn1.fc1.bias.data = torch.tensor([0.1]) - self.assertTrue(self.__check_if_weights_are_equal(tn1.get_weights(), tn2.get_weights())) - - def test_tie_weights2(self): - voc_size = 3 - dim = 2 - embd = nemo.backends.pytorch.common.SequenceEmbedding(voc_size=voc_size, hidden_size=dim) - proj = nemo.backends.pytorch.common.SequenceProjection(from_dim=dim, to_dim=voc_size) - embd.tie_weights_with( - proj, - weight_names=["embedding.weight"], - name2name_and_transform={"embedding.weight": ("projection.weight", WeightShareTransform.SAME,)}, - ) - self.assertTrue( - np.array_equal(embd.embedding.weight.detach().numpy(), proj.projection.weight.detach().numpy(),) - ) - was = embd.embedding.weight.detach().numpy() - embd.embedding.weight.data = torch.tensor(np.random.randint(0, 10, (3, 2)) * 1.0) - after = embd.embedding.weight.detach().numpy() - self.assertTrue( - np.array_equal(embd.embedding.weight.detach().numpy(), proj.projection.weight.detach().numpy(),) - ) - self.assertFalse(np.array_equal(was, after)) - - def test_set_weights(self): - voc_size = 3 - dim = 2 - embd = nemo.backends.pytorch.common.SequenceEmbedding(voc_size=voc_size, hidden_size=dim) - weights = torch.tensor(np.random.randint(0, 10, (3, 2)) * 1.0) - name2weights = {"embedding.weight": (weights, True)} - embd.set_weights(name2weight=name2weights) - self.assertTrue(np.array_equal(embd.embedding.weight.detach().numpy(), weights.detach().numpy(),)) - weights = torch.tensor(np.random.randint(0, 10, (3, 2)) * 1.0) - self.assertFalse(np.array_equal(embd.embedding.weight.detach().numpy(), weights.detach().numpy(),)) - - def test_freeze_unfreeze_TrainableNM(self): - path = os.path.abspath(os.path.join(os.path.dirname(__file__), "../data/jasper_smaller.yaml")) - with open(path) as file: - jasper_model_definition = self.yaml.load(file) - dl = nemo_asr.AudioToTextDataLayer( - # featurizer_config=self.featurizer_config, - manifest_filepath=self.manifest_filepath, - labels=self.labels, - batch_size=4, - ) - pre_process_params = { - #'int_values': False, - 'frame_splicing': 1, - 'features': 64, - 'window_size': 0.02, - 'n_fft': 512, - 'dither': 1e-05, - 'window': 'hann', - 'sample_rate': 16000, - 'normalize': 'per_feature', - 'window_stride': 0.01, - } - preprocessing = nemo_asr.AudioToMelSpectrogramPreprocessor(**pre_process_params) - jasper_encoder = nemo_asr.JasperEncoder( - feat_in=jasper_model_definition['AudioToMelSpectrogramPreprocessor']['features'], - **jasper_model_definition['JasperEncoder'], - ) - jasper_decoder = nemo_asr.JasperDecoderForCTC(feat_in=1024, num_classes=len(self.labels)) - ctc_loss = nemo_asr.CTCLossNM(num_classes=len(self.labels)) - jasper_encoder.freeze() - jasper_encoder.unfreeze(set(['encoder.4.conv.1.weight'])) - jasper_decoder.unfreeze() - # DAG - audio_signal, a_sig_length, transcript, transcript_len = dl() - processed_signal, p_length = preprocessing(input_signal=audio_signal, length=a_sig_length) - - encoded, encoded_len = jasper_encoder(audio_signal=processed_signal, length=p_length) - # logging.info(jasper_encoder) - log_probs = jasper_decoder(encoder_output=encoded) - loss = ctc_loss( - log_probs=log_probs, targets=transcript, input_length=encoded_len, target_length=transcript_len, - ) - - callback = nemo.core.SimpleLossLoggerCallback( - tensors=[loss], print_func=lambda x: logging.info(f'Train Loss: {str(x[0].item())}'), - ) - optimizer = self.nf.get_trainer() - optimizer.train( - [loss], callbacks=[callback], optimizer="sgd", optimization_params={"num_epochs": 2, "lr": 0.0003}, - ) - - # @unittest.skip( - # "Tests fails at get_pytorch_module() that will be changed in next PR anyway. \ - # Besides, quite sure this test is not related with ASR :]" - # ) - def test_freeze_unfreeze_Wrapper(self): - dl_train = nemo.backends.pytorch.ZerosDataLayer( - size=40, - dtype=[torch.FloatTensor, torch.LongTensor], - batch_size=4, - output_ports={ - "image": NeuralType( - { - 0: AxisType(BatchTag), - 1: AxisType(ChannelTag, 3), - 2: AxisType(HeightTag, 224), - 3: AxisType(WidthTag, 224), - } - ), - "label": NeuralType({0: AxisType(BatchTag)}), - }, - ) - - # WHY THE HELL THIS TEST IS IN ASR!!!!??? - - # NOTICE: pretrain=True argument - resnet = self.nf.get_module( - name="resnet18", params={"num_classes": 2}, collection="torchvision", pretrained=True, - ) - - L_train = self.nf.get_module(name="CrossEntropyLoss", collection="toys", params={}) - - # NOTICE: Freeze all Neural Module's weights - resnet.freeze() - # NOTICE: unfreeze, top classification layer for fine-tuning - resnet.unfreeze(set(["fc.weight", "fc.bias"])) - - images, labels = dl_train() - outputs = resnet(x=images) - train_loss = L_train(predictions=outputs, labels=labels) - - callback = nemo.core.SimpleLossLoggerCallback( - tensors=[train_loss], print_func=lambda x: logging.info(f'Train Loss: {str(x[0].item())}'), - ) - # Instantiate an optimizer to perform `train` action - optimizer = self.nf.get_trainer() - optimizer.train( - [train_loss], callbacks=[callback], optimizer="sgd", optimization_params={"num_epochs": 2, "lr": 0.0003}, - ) - - # WHERE IS ACTUALLY THE TEST?? ARE WE CHECKING ANYTHING?? diff --git a/tests/asr/test_zeroDS.py b/tests/asr/test_zeroDS.py index 3b6b15dba4a6..4403c0327753 100644 --- a/tests/asr/test_zeroDS.py +++ b/tests/asr/test_zeroDS.py @@ -86,30 +86,6 @@ def tearDownClass(cls) -> None: if os.path.exists(os.path.join(data_folder, "asr")): shutil.rmtree(os.path.join(data_folder, "asr")) - def test_simple_train(self): - logging.info("Simplest train test with ZeroDL") - trainable_module = nemo.backends.pytorch.tutorials.TaylorNet(dim=4) - data_source = nemo.backends.pytorch.common.ZerosDataLayer( - size=10000, - dtype=torch.FloatTensor, - batch_size=128, - output_ports={ - "x": NeuralType({0: AxisType(BatchTag), 1: AxisType(ChannelTag, dim=1)}), - "y": NeuralType({0: AxisType(BatchTag), 1: AxisType(ChannelTag, dim=1)}), - }, - ) - loss = nemo.backends.pytorch.tutorials.MSELoss() - x, y = data_source() - y_pred = trainable_module(x=x) - loss_tensor = loss(predictions=y_pred, target=y) - - callback = nemo.core.SimpleLossLoggerCallback( - tensors=[loss_tensor], print_func=lambda x: logging.info(f'Train Loss: {str(x[0].item())}'), - ) - self.nf.train( - [loss_tensor], callbacks=[callback], optimization_params={"num_epochs": 3, "lr": 0.0003}, optimizer="sgd", - ) - def test_asr_with_zero_ds(self): logging.info("Testing ASR NMs with ZeroDS and without pre-processing") path = os.path.abspath(os.path.join(os.path.dirname(__file__), "../data/jasper_smaller.yaml")) @@ -121,16 +97,22 @@ def test_asr_with_zero_ds(self): dtype=torch.FloatTensor, batch_size=4, output_ports={ - "processed_signal": NeuralType( - { - 0: AxisType(BatchTag), - 1: AxisType(SpectrogramSignalTag, dim=64), - 2: AxisType(ProcessedTimeTag, dim=64), - } - ), - "processed_length": NeuralType({0: AxisType(BatchTag)}), - "transcript": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag, dim=64)}), - "transcript_length": NeuralType({0: AxisType(BatchTag)}), + # "processed_signal": NeuralType( + # { + # 0: AxisType(BatchTag), + # 1: AxisType(SpectrogramSignalTag, dim=64), + # 2: AxisType(ProcessedTimeTag, dim=64), + # } + # ), + # "processed_length": NeuralType({0: AxisType(BatchTag)}), + # "transcript": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag, dim=64)}), + # "transcript_length": NeuralType({0: AxisType(BatchTag)}), + "processed_signal": NeuralType(SpectrogramType(), (AxisType(AxisKind.Batch), + AxisType(AxisKind.Dimension, 64), + AxisType(AxisKind.Time, 64))), + "processed_length": NeuralType(LengthsType(), tuple('B')), + "transcript": NeuralType(ChannelType(), (AxisType(AxisKind.Batch), AxisType(AxisKind.Time, 64))), + "transcript_length": NeuralType(LengthsType(), tuple('B')) }, ) diff --git a/tests/core/test_neural_types.py b/tests/core/test_neural_types.py index 537813b76f07..6f2be4cee7b7 100644 --- a/tests/core/test_neural_types.py +++ b/tests/core/test_neural_types.py @@ -167,3 +167,10 @@ def wrong(): _ = trainable_module(x=loss_tensor) self.assertRaises(NeuralPortNmTensorMismatchError, wrong) + + def test_unspecified_dimensions(self): + t0 = NeuralType(SpectrogramType(), (AxisType(AxisKind.Batch, 64), AxisType(AxisKind.Time, 10), + AxisType(AxisKind.Dimension, 128))) + t1 = NeuralType(SpectrogramType(), ('B', 'T', 'C')) + self.assertEqual(t1.compare(t0), NeuralTypeComparisonResult.SAME) + self.assertEqual(t0.compare(t1), NeuralTypeComparisonResult.DIM_INCOMPATIBLE) diff --git a/tests/core/test_weight_share.py b/tests/core/test_weight_share.py new file mode 100644 index 000000000000..92f82ce18061 --- /dev/null +++ b/tests/core/test_weight_share.py @@ -0,0 +1,220 @@ +# # ! /usr/bin/python +# # -*- coding: utf-8 -*- +# +# # Copyright 2019 NVIDIA. All Rights Reserved. +# # +# # Licensed under the Apache License, Version 2.0 (the "License"); +# # you may not use this file except in compliance with the License. +# # You may obtain a copy of the License at +# # +# # http://www.apache.org/licenses/LICENSE-2.0 +# # +# # Unless required by applicable law or agreed to in writing, software +# # distributed under the License is distributed on an "AS IS" BASIS, +# # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# # See the License for the specific language governing permissions and +# # limitations under the License. +# # ============================================================================= +# +# import os +# import shutil +# import tarfile +# import unittest +# from typing import Dict +# +# import numpy as np +# import torch +# from ruamel.yaml import YAML +# +# import nemo +# import nemo.collections.asr as nemo_asr +# from nemo.core import WeightShareTransform +# from nemo.core.neural_types import * +# from tests.common_setup import NeMoUnitTest +# +# logging = nemo.logging +# +# +# class TestWeightSharing(NeMoUnitTest): +# labels = [ +# "'", +# "a", +# "b", +# "c", +# "d", +# "e", +# "f", +# "g", +# "h", +# "i", +# "j", +# "k", +# "l", +# "m", +# "n", +# "o", +# "p", +# "q", +# "r", +# "s", +# "t", +# "u", +# "v", +# "w", +# "x", +# "y", +# "z", +# " ", +# ] +# manifest_filepath = os.path.abspath(os.path.join(os.path.dirname(__file__), "../data/asr/an4_train.json")) +# featurizer_config = { +# 'window': 'hann', +# 'dither': 1e-05, +# 'normalize': 'per_feature', +# 'frame_splicing': 1, +# 'int_values': False, +# 'window_stride': 0.01, +# 'sample_rate': 16000, +# 'features': 64, +# 'n_fft': 512, +# 'window_size': 0.02, +# } +# yaml = YAML(typ="safe") +# +# @classmethod +# def setUpClass(cls) -> None: +# super().setUpClass() +# data_folder = os.path.abspath(os.path.join(os.path.dirname(__file__), "../data/")) +# logging.info("Looking up for test ASR data") +# if not os.path.exists(os.path.join(data_folder, "asr")): +# logging.info("Extracting ASR data to: {0}".format(os.path.join(data_folder, "asr"))) +# tar = tarfile.open(os.path.join(data_folder, "asr.tar.gz"), "r:gz") +# tar.extractall(path=data_folder) +# tar.close() +# else: +# logging.info("ASR data found in: {0}".format(os.path.join(data_folder, "asr"))) +# +# @classmethod +# def tearDownClass(cls) -> None: +# super().tearDownClass() +# data_folder = os.path.abspath(os.path.join(os.path.dirname(__file__), "../data/")) +# logging.info("Looking up for test ASR data") +# if os.path.exists(os.path.join(data_folder, "asr")): +# shutil.rmtree(os.path.join(data_folder, "asr")) +# +# def __check_if_weights_are_equal(self, w1: Dict, w2: Dict): +# all_same = set(w1.keys()) == set(w2.keys()) +# if not all_same: +# return False +# else: +# for key in w1.keys(): +# all_same = all_same and np.array_equal( +# w1[key][0].cpu().detach().numpy(), w2[key][0].cpu().detach().numpy(), +# ) +# return all_same +# +# def test_TaylorNet_get_weights(self): +# tn1 = nemo.backends.pytorch.tutorials.TaylorNet(dim=4) +# tn2 = nemo.backends.pytorch.tutorials.TaylorNet(dim=4) +# # because of randomness, actual weights should be different +# self.assertFalse(self.__check_if_weights_are_equal(tn1.get_weights(), tn2.get_weights())) +# tn3 = nemo.backends.pytorch.tutorials.TaylorNet(dim=4) +# tn3.set_weights(tn1.get_weights()) +# # check than weights are the same +# self.assertTrue(self.__check_if_weights_are_equal(tn1.get_weights(), tn3.get_weights())) +# # change weights on one module - another module should not change +# tn1.fc1.bias.data = torch.tensor([0.1]) +# self.assertFalse(self.__check_if_weights_are_equal(tn1.get_weights(), tn3.get_weights())) +# +# def test_TaylorNet_tie_weights(self): +# tn1 = nemo.backends.pytorch.tutorials.TaylorNet(dim=4) +# tn2 = nemo.backends.pytorch.tutorials.TaylorNet(dim=4) +# # because of randomness, actual weights should be different +# self.assertFalse(self.__check_if_weights_are_equal(tn1.get_weights(), tn2.get_weights())) +# tn2.tie_weights_with(tn1, list(tn1.get_weights().keys())) +# # change weights on one module - another module should change too +# tn1.fc1.bias.data = torch.tensor([0.1]) +# self.assertTrue(self.__check_if_weights_are_equal(tn1.get_weights(), tn2.get_weights())) +# +# def test_tie_weights2(self): +# voc_size = 3 +# dim = 2 +# embd = nemo.backends.pytorch.common.SequenceEmbedding(voc_size=voc_size, hidden_size=dim) +# proj = nemo.backends.pytorch.common.SequenceProjection(from_dim=dim, to_dim=voc_size) +# embd.tie_weights_with( +# proj, +# weight_names=["embedding.weight"], +# name2name_and_transform={"embedding.weight": ("projection.weight", WeightShareTransform.SAME,)}, +# ) +# self.assertTrue( +# np.array_equal(embd.embedding.weight.detach().numpy(), proj.projection.weight.detach().numpy(),) +# ) +# was = embd.embedding.weight.detach().numpy() +# embd.embedding.weight.data = torch.tensor(np.random.randint(0, 10, (3, 2)) * 1.0) +# after = embd.embedding.weight.detach().numpy() +# self.assertTrue( +# np.array_equal(embd.embedding.weight.detach().numpy(), proj.projection.weight.detach().numpy(),) +# ) +# self.assertFalse(np.array_equal(was, after)) +# +# def test_set_weights(self): +# voc_size = 3 +# dim = 2 +# embd = nemo.backends.pytorch.common.SequenceEmbedding(voc_size=voc_size, hidden_size=dim) +# weights = torch.tensor(np.random.randint(0, 10, (3, 2)) * 1.0) +# name2weights = {"embedding.weight": (weights, True)} +# embd.set_weights(name2weight=name2weights) +# self.assertTrue(np.array_equal(embd.embedding.weight.detach().numpy(), weights.detach().numpy(),)) +# weights = torch.tensor(np.random.randint(0, 10, (3, 2)) * 1.0) +# self.assertFalse(np.array_equal(embd.embedding.weight.detach().numpy(), weights.detach().numpy(),)) +# +# def test_freeze_unfreeze_TrainableNM(self): +# path = os.path.abspath(os.path.join(os.path.dirname(__file__), "../data/jasper_smaller.yaml")) +# with open(path) as file: +# jasper_model_definition = self.yaml.load(file) +# dl = nemo_asr.AudioToTextDataLayer( +# # featurizer_config=self.featurizer_config, +# manifest_filepath=self.manifest_filepath, +# labels=self.labels, +# batch_size=4, +# ) +# pre_process_params = { +# #'int_values': False, +# 'frame_splicing': 1, +# 'features': 64, +# 'window_size': 0.02, +# 'n_fft': 512, +# 'dither': 1e-05, +# 'window': 'hann', +# 'sample_rate': 16000, +# 'normalize': 'per_feature', +# 'window_stride': 0.01, +# } +# preprocessing = nemo_asr.AudioToMelSpectrogramPreprocessor(**pre_process_params) +# jasper_encoder = nemo_asr.JasperEncoder( +# feat_in=jasper_model_definition['AudioToMelSpectrogramPreprocessor']['features'], +# **jasper_model_definition['JasperEncoder'], +# ) +# jasper_decoder = nemo_asr.JasperDecoderForCTC(feat_in=1024, num_classes=len(self.labels)) +# ctc_loss = nemo_asr.CTCLossNM(num_classes=len(self.labels)) +# jasper_encoder.freeze() +# jasper_encoder.unfreeze(set(['encoder.4.conv.1.weight'])) +# jasper_decoder.unfreeze() +# # DAG +# audio_signal, a_sig_length, transcript, transcript_len = dl() +# processed_signal, p_length = preprocessing(input_signal=audio_signal, length=a_sig_length) +# +# encoded, encoded_len = jasper_encoder(audio_signal=processed_signal, length=p_length) +# # logging.info(jasper_encoder) +# log_probs = jasper_decoder(encoder_output=encoded) +# loss = ctc_loss( +# log_probs=log_probs, targets=transcript, input_length=encoded_len, target_length=transcript_len, +# ) +# +# callback = nemo.core.SimpleLossLoggerCallback( +# tensors=[loss], print_func=lambda x: logging.info(f'Train Loss: {str(x[0].item())}'), +# ) +# optimizer = self.nf.get_trainer() +# optimizer.train( +# [loss], callbacks=[callback], optimizer="sgd", optimization_params={"num_epochs": 2, "lr": 0.0003}, +# ) From e0742e89abf9b9407d4ba1cad25a7da09bc849eb Mon Sep 17 00:00:00 2001 From: Oleksii Kuchaiev Date: Wed, 5 Feb 2020 18:22:15 -0800 Subject: [PATCH 08/30] fix style Signed-off-by: Oleksii Kuchaiev --- nemo/backends/pytorch/common/other.py | 1 - nemo/backends/pytorch/common/rnn.py | 4 ++-- nemo/collections/asr/audio_preprocessing.py | 25 ++++++++++++--------- nemo/collections/asr/data_layer.py | 9 +++----- nemo/collections/asr/jasper.py | 4 ++-- nemo/collections/asr/losses.py | 2 +- nemo/core/neural_types/neural_type.py | 7 ++++-- tests/asr/test_asr.py | 4 ++-- tests/asr/test_zeroDS.py | 9 ++++---- tests/core/test_neural_types.py | 6 +++-- 10 files changed, 38 insertions(+), 33 deletions(-) diff --git a/nemo/backends/pytorch/common/other.py b/nemo/backends/pytorch/common/other.py index 9358f586d387..58790a1727be 100644 --- a/nemo/backends/pytorch/common/other.py +++ b/nemo/backends/pytorch/common/other.py @@ -39,7 +39,6 @@ def input_ports(self): else: return self._input_ports - @property def output_ports(self): """Returns definitions of module output ports. diff --git a/nemo/backends/pytorch/common/rnn.py b/nemo/backends/pytorch/common/rnn.py index 7136a569fb23..95724a9fa6ad 100644 --- a/nemo/backends/pytorch/common/rnn.py +++ b/nemo/backends/pytorch/common/rnn.py @@ -86,7 +86,7 @@ def input_ports(self): # 'encoder_outputs': NeuralType( # {0: AxisType(BatchTag), 1: AxisType(TimeTag), 2: AxisType(ChannelTag),}, optional=True, # ), - 'encoder_outputs': NeuralType(ChannelType(), ('B', 'T', 'D'), True) + 'encoder_outputs': NeuralType(ChannelType(), ('B', 'T', 'D'), True), } @property @@ -113,7 +113,7 @@ def output_ports(self): # 'attention_weights': NeuralType( # {0: AxisType(BatchTag), 1: AxisType(TimeTag), 2: AxisType(TimeTag),}, optional=True, # ), - 'attention_weights': NeuralType(ChannelType(), ('B', 'T', 'T'), True) + 'attention_weights': NeuralType(ChannelType(), ('B', 'T', 'T'), True), } def __init__( diff --git a/nemo/collections/asr/audio_preprocessing.py b/nemo/collections/asr/audio_preprocessing.py index d16f9e9afa76..54ab17515b46 100644 --- a/nemo/collections/asr/audio_preprocessing.py +++ b/nemo/collections/asr/audio_preprocessing.py @@ -135,7 +135,7 @@ def input_ports(self): # "input_signal": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}), # "length": NeuralType({0: AxisType(BatchTag)}), "input_signal": NeuralType(AudioSignal(freq=self._sample_rate), ('B', 'T')), - "length": NeuralType(LengthsType(), tuple('B')) + "length": NeuralType(LengthsType(), tuple('B')), } @property @@ -147,9 +147,8 @@ def output_ports(self): # {0: AxisType(BatchTag), 1: AxisType(SpectrogramSignalTag), 2: AxisType(ProcessedTimeTag),} # ), # "processed_length": NeuralType({0: AxisType(BatchTag)}), - "processed_signal": NeuralType(SpectrogramType(), ('B', 'D', 'T')), - "processed_length": NeuralType(LengthsType(), tuple('B')) + "processed_length": NeuralType(LengthsType(), tuple('B')), } def __init__( @@ -282,7 +281,7 @@ def input_ports(self): # "input_signal": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}), # "length": NeuralType({0: AxisType(BatchTag)}), "input_signal": NeuralType(AudioSignal(freq=self._sample_rate), ('B', 'T')), - "length": NeuralType(LengthsType(), tuple('B')) + "length": NeuralType(LengthsType(), tuple('B')), } @property @@ -308,7 +307,7 @@ def output_ports(self): # ), # "processed_length": NeuralType({0: AxisType(BatchTag)}), "processed_signal": NeuralType(MelSpectrogramType(), ('B', 'D', 'T')), - "processed_length": NeuralType(LengthsType(), tuple('B')) + "processed_length": NeuralType(LengthsType(), tuple('B')), } def __init__( @@ -428,7 +427,7 @@ def input_ports(self): # "input_signal": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}), # "length": NeuralType({0: AxisType(BatchTag)}), "input_signal": NeuralType(AudioSignal(freq=self._sample_rate), ('B', 'T')), - "length": NeuralType(LengthsType(), tuple('B')) + "length": NeuralType(LengthsType(), tuple('B')), } @property @@ -441,8 +440,7 @@ def output_ports(self): # ), # "processed_length": NeuralType({0: AxisType(BatchTag)}), "processed_signal": NeuralType(MFCCSpectrogramType(), ('B', 'D', 'T')), - "processed_length": NeuralType(LengthsType(), tuple('B')) - + "processed_length": NeuralType(LengthsType(), tuple('B')), } def __init__( @@ -505,7 +503,12 @@ def __init__( # Use torchaudio's implementation of MFCCs as featurizer self.featurizer = torchaudio.transforms.MFCC( - sample_rate=self._sample_rate, n_mfcc=n_mfcc, dct_type=dct_type, norm=norm, log_mels=log, melkwargs=mel_kwargs, + sample_rate=self._sample_rate, + n_mfcc=n_mfcc, + dct_type=dct_type, + norm=norm, + log_mels=log, + melkwargs=mel_kwargs, ) self.featurizer.to(self._device) @@ -621,7 +624,7 @@ def input_ports(self): "in_x": NeuralType(SpectrogramType(), ('B', 'D', 'T')), "in_x_len": NeuralType(LengthsType(), tuple('B')), "in_y": NeuralType(SpectrogramType(), ('B', 'D', 'T')), - "in_y_len": NeuralType(LengthsType(), tuple('B')) + "in_y_len": NeuralType(LengthsType(), tuple('B')), } @property @@ -636,7 +639,7 @@ def output_ports(self): "out_x": NeuralType(SpectrogramType(), ('B', 'D', 'T')), "out_x_len": NeuralType(LengthsType(), tuple('B')), "out_y": NeuralType(SpectrogramType(), ('B', 'D', 'T')), - "out_y_len": NeuralType(LengthsType(), tuple('B')) + "out_y_len": NeuralType(LengthsType(), tuple('B')), } def __init__(self, mult_batch=1): diff --git a/nemo/collections/asr/data_layer.py b/nemo/collections/asr/data_layer.py index a399d6e4187e..20df98b2add7 100644 --- a/nemo/collections/asr/data_layer.py +++ b/nemo/collections/asr/data_layer.py @@ -100,12 +100,10 @@ def output_ports(self): # 'a_sig_length': NeuralType({0: AxisType(BatchTag)}), # 'transcripts': NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}), # 'transcript_length': NeuralType({0: AxisType(BatchTag)}), - 'audio_signal': NeuralType(AudioSignal(freq=self._sample_rate), ('B', 'T')), 'a_sig_length': NeuralType(LengthsType(), tuple('B')), 'transcripts': NeuralType(ChannelType(), ('B', 'T')), - 'transcript_length': NeuralType(LengthsType(), tuple('B')) - + 'transcript_length': NeuralType(LengthsType(), tuple('B')), } def __init__( @@ -223,10 +221,9 @@ def output_ports(self): # 'processed_length': NeuralType({0: AxisType(BatchTag)}), # 'transcripts': NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}), # 'transcript_length': NeuralType({0: AxisType(BatchTag)}), - 'processed_signal': NeuralType(SpectrogramType(), ('B', 'D', 'T')), 'transcripts': NeuralType(ChannelType(), ('B', 'T')), - 'transcript_length': NeuralType(LengthsType(), tuple('B')) + 'transcript_length': NeuralType(LengthsType(), tuple('B')), } def __init__( @@ -354,7 +351,7 @@ def output_ports(self): # 'texts': NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}), # 'texts_length': NeuralType({0: AxisType(BatchTag)}), 'texts': NeuralType(ChannelType(), ('B', 'T')), - 'texts_length': NeuralType(LengthsType(), tuple('B')) + 'texts_length': NeuralType(LengthsType(), tuple('B')), } def __init__( diff --git a/nemo/collections/asr/jasper.py b/nemo/collections/asr/jasper.py index b17b4a139180..a1e41a8111b2 100644 --- a/nemo/collections/asr/jasper.py +++ b/nemo/collections/asr/jasper.py @@ -80,7 +80,7 @@ def input_ports(self): # ), # "length": NeuralType({0: AxisType(BatchTag)}), "audio_signal": NeuralType(SpectrogramType(), ('B', 'D', 'T')), - "length": NeuralType(LengthsType(), tuple('B')) + "length": NeuralType(LengthsType(), tuple('B')), } @property @@ -93,7 +93,7 @@ def output_ports(self): # ), # "encoded_lengths": NeuralType({0: AxisType(BatchTag)}), "outputs": NeuralType(AcousticEncodedRepresentation(), ('B', 'D', 'T')), - "encoded_lengths": NeuralType(LengthsType(), tuple('B')) + "encoded_lengths": NeuralType(LengthsType(), tuple('B')), } def __init__( diff --git a/nemo/collections/asr/losses.py b/nemo/collections/asr/losses.py index 3f379e4aea7b..c29a0dba78be 100644 --- a/nemo/collections/asr/losses.py +++ b/nemo/collections/asr/losses.py @@ -27,7 +27,7 @@ def input_ports(self): "log_probs": NeuralType(LogprobsType(), ('B', 'T', 'D')), "targets": NeuralType(ChannelType(), ('B', 'T')), "input_length": NeuralType(LengthsType(), tuple('B')), - "target_length": NeuralType(LengthsType(), tuple('B')) + "target_length": NeuralType(LengthsType(), tuple('B')), } @property diff --git a/nemo/core/neural_types/neural_type.py b/nemo/core/neural_types/neural_type.py index 53a95017d1e7..b0c1a310ec33 100644 --- a/nemo/core/neural_types/neural_type.py +++ b/nemo/core/neural_types/neural_type.py @@ -148,8 +148,11 @@ def __compare_axes(axes_a, axes_b) -> int: for axis_a, axis_b in zip(axes_a, axes_b): kinds_a[axis_a.kind] = axis_a.size kinds_b[axis_b.kind] = axis_b.size - if axis_a.kind != axis_b.kind or axis_a.is_list != axis_b.is_list or (axis_a.size != axis_b.size and - axis_a.size is not None): + if ( + axis_a.kind != axis_b.kind + or axis_a.is_list != axis_b.is_list + or (axis_a.size != axis_b.size and axis_a.size is not None) + ): same = False if same: return 0 diff --git a/tests/asr/test_asr.py b/tests/asr/test_asr.py index 9c3900dd2fd7..ea81187d6826 100644 --- a/tests/asr/test_asr.py +++ b/tests/asr/test_asr.py @@ -404,8 +404,8 @@ def test_double_jasper_training(self): feat_in=jasper_model_definition['AudioToMelSpectrogramPreprocessor']['features'], **jasper_model_definition['JasperEncoder'], ) - #mx_max1 = nemo.backends.pytorch.common.SimpleCombiner(mode="max") - #mx_max2 = nemo.backends.pytorch.common.SimpleCombiner(mode="max") + # mx_max1 = nemo.backends.pytorch.common.SimpleCombiner(mode="max") + # mx_max2 = nemo.backends.pytorch.common.SimpleCombiner(mode="max") jasper_decoder1 = nemo_asr.JasperDecoderForCTC(feat_in=1024, num_classes=len(self.labels)) jasper_decoder2 = nemo_asr.JasperDecoderForCTC(feat_in=1024, num_classes=len(self.labels)) diff --git a/tests/asr/test_zeroDS.py b/tests/asr/test_zeroDS.py index 4403c0327753..342a64d20da1 100644 --- a/tests/asr/test_zeroDS.py +++ b/tests/asr/test_zeroDS.py @@ -107,12 +107,13 @@ def test_asr_with_zero_ds(self): # "processed_length": NeuralType({0: AxisType(BatchTag)}), # "transcript": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag, dim=64)}), # "transcript_length": NeuralType({0: AxisType(BatchTag)}), - "processed_signal": NeuralType(SpectrogramType(), (AxisType(AxisKind.Batch), - AxisType(AxisKind.Dimension, 64), - AxisType(AxisKind.Time, 64))), + "processed_signal": NeuralType( + SpectrogramType(), + (AxisType(AxisKind.Batch), AxisType(AxisKind.Dimension, 64), AxisType(AxisKind.Time, 64)), + ), "processed_length": NeuralType(LengthsType(), tuple('B')), "transcript": NeuralType(ChannelType(), (AxisType(AxisKind.Batch), AxisType(AxisKind.Time, 64))), - "transcript_length": NeuralType(LengthsType(), tuple('B')) + "transcript_length": NeuralType(LengthsType(), tuple('B')), }, ) diff --git a/tests/core/test_neural_types.py b/tests/core/test_neural_types.py index 6f2be4cee7b7..a860c889bc9f 100644 --- a/tests/core/test_neural_types.py +++ b/tests/core/test_neural_types.py @@ -169,8 +169,10 @@ def wrong(): self.assertRaises(NeuralPortNmTensorMismatchError, wrong) def test_unspecified_dimensions(self): - t0 = NeuralType(SpectrogramType(), (AxisType(AxisKind.Batch, 64), AxisType(AxisKind.Time, 10), - AxisType(AxisKind.Dimension, 128))) + t0 = NeuralType( + SpectrogramType(), + (AxisType(AxisKind.Batch, 64), AxisType(AxisKind.Time, 10), AxisType(AxisKind.Dimension, 128)), + ) t1 = NeuralType(SpectrogramType(), ('B', 'T', 'C')) self.assertEqual(t1.compare(t0), NeuralTypeComparisonResult.SAME) self.assertEqual(t0.compare(t1), NeuralTypeComparisonResult.DIM_INCOMPATIBLE) From c1e7a1a24653194726a20e52c99b9d213af14690 Mon Sep 17 00:00:00 2001 From: Oleksii Kuchaiev Date: Thu, 6 Feb 2020 16:54:18 -0800 Subject: [PATCH 09/30] change types in NLP collection enable deployment test Signed-off-by: Oleksii Kuchaiev --- nemo/backends/pytorch/actions.py | 8 +- .../data_layers/glue_benchmark_datalayer.py | 62 +-- .../joint_intent_slot_datalayer.py | 97 ++--- .../nlp/nm/data_layers/lm_bert_datalayer.py | 100 ++--- .../data_layers/lm_transformer_datalayer.py | 11 +- .../machine_translation_datalayer.py | 36 +- .../punctuation_capitalization_datalayer.py | 59 +-- .../nlp/nm/data_layers/qa_squad_datalayer.py | 46 +-- .../state_tracking_trade_datalayer.py | 35 +- .../text_classification_datalayer.py | 33 +- .../token_classification_datalayer.py | 91 ++--- .../nlp/nm/losses/aggregator_loss.py | 6 +- .../nlp/nm/losses/joint_intent_slot_loss.py | 45 +-- .../losses/masked_language_modeling_loss.py | 30 +- .../padded_smoothed_cross_entropy_loss.py | 26 +- .../nlp/nm/losses/qa_squad_loss.py | 33 +- .../nm/losses/state_tracking_trade_loss.py | 46 +-- .../nm/losses/token_classification_loss.py | 30 +- .../trainables/common/huggingface/bert_nm.py | 36 +- .../common/sequence_classification_nm.py | 19 +- .../common/sequence_regression_nm.py | 18 +- .../common/token_classification_nm.py | 42 +- .../common/transformer/transformer_nm.py | 113 ++---- .../state_tracking_trade_nm.py | 61 +-- .../joint_intent_slot/joint_intent_slot_nm.py | 18 +- nemo/core/neural_types/elements.py | 5 + tests/core/test_deploy_export.py | 362 +++++++++--------- 27 files changed, 501 insertions(+), 967 deletions(-) diff --git a/nemo/backends/pytorch/actions.py b/nemo/backends/pytorch/actions.py index f7061318305c..deec27eee087 100644 --- a/nemo/backends/pytorch/actions.py +++ b/nemo/backends/pytorch/actions.py @@ -919,10 +919,10 @@ def __module_export( dynamic_axes = defaultdict(list) def __extract_dynamic_axes(port_name: str, ntype: NeuralType, dynamic_axes: defaultdict): - if ntype.axis2type: - for axis_id, axistype in ntype.axis2type.items(): - if issubclass(axistype.semantics, BatchTag) or issubclass(axistype.semantics, TimeTag): - dynamic_axes[port_name].append(axis_id) + if ntype.axes: + for ind, axis in enumerate(ntype.axes): + if axis.kind == AxisKind.Batch or axis.kind == AxisKind.Time: + dynamic_axes[port_name].append(ind) # This is a hack for Jasper to Jarvis export -- need re-design for this inputs_to_drop = set() diff --git a/nemo/collections/nlp/nm/data_layers/glue_benchmark_datalayer.py b/nemo/collections/nlp/nm/data_layers/glue_benchmark_datalayer.py index baf55f55c047..56dea4219240 100644 --- a/nemo/collections/nlp/nm/data_layers/glue_benchmark_datalayer.py +++ b/nemo/collections/nlp/nm/data_layers/glue_benchmark_datalayer.py @@ -16,7 +16,7 @@ from nemo.collections.nlp.data import GLUEDataset from nemo.collections.nlp.nm.data_layers.text_datalayer import TextDataLayer -from nemo.core import AxisType, BatchTag, CategoricalTag, NeuralType, RegressionTag, TimeTag +from nemo.core import NeuralType, RegressionValuesType, ChannelType, CategoricalValuesType __all__ = ['GlueClassificationDataLayer', 'GlueRegressionDataLayer'] @@ -36,30 +36,16 @@ class GlueClassificationDataLayer(TextDataLayer): @property def output_ports(self): """Returns definitions of module output ports. - - input_ids: - 0: AxisType(BatchTag) - - 1: AxisType(TimeTag) - - input_type_ids: - 0: AxisType(BatchTag) - - 1: AxisType(TimeTag) - - input_mask: - 0: AxisType(BatchTag) - - 1: AxisType(TimeTag) - - labels: - 0: AxisType(CategoricalTag) """ return { - "input_ids": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}), - "input_type_ids": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}), - "input_mask": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}), - "labels": NeuralType({0: AxisType(CategoricalTag)}), + # "input_ids": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}), + # "input_type_ids": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}), + # "input_mask": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}), + # "labels": NeuralType({0: AxisType(CategoricalTag)}), + "input_ids": NeuralType(ChannelType(), ('B', 'T')), + "input_type_ids": NeuralType(ChannelType(), ('B', 'T')), + "input_mask": NeuralType(ChannelType(), ('B', 'T')), + "labels": NeuralType(CategoricalValuesType(), tuple('B')), } def __init__( @@ -101,30 +87,16 @@ class GlueRegressionDataLayer(TextDataLayer): @property def output_ports(self): """Returns definitions of module output ports. - - input_ids: - 0: AxisType(BatchTag) - - 1: AxisType(TimeTag) - - input_type_ids: - 0: AxisType(BatchTag) - - 1: AxisType(TimeTag) - - input_mask: - 0: AxisType(BatchTag) - - 1: AxisType(TimeTag) - - labels: - 0: AxisType(RegressionTag) """ return { - "input_ids": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}), - "input_type_ids": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}), - "input_mask": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}), - "labels": NeuralType({0: AxisType(RegressionTag)}), + # "input_ids": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}), + # "input_type_ids": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}), + # "input_mask": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}), + # "labels": NeuralType({0: AxisType(RegressionTag)}), + "input_ids": NeuralType(ChannelType(), ('B', 'T')), + "input_type_ids": NeuralType(ChannelType(), ('B', 'T')), + "input_mask": NeuralType(ChannelType(), ('B', 'T')), + "labels": NeuralType(RegressionValuesType(), tuple('B')), } def __init__( diff --git a/nemo/collections/nlp/nm/data_layers/joint_intent_slot_datalayer.py b/nemo/collections/nlp/nm/data_layers/joint_intent_slot_datalayer.py index 354be6b32a5f..bb95726a2e78 100644 --- a/nemo/collections/nlp/nm/data_layers/joint_intent_slot_datalayer.py +++ b/nemo/collections/nlp/nm/data_layers/joint_intent_slot_datalayer.py @@ -16,7 +16,7 @@ from nemo.collections.nlp.data import BertJointIntentSlotDataset, BertJointIntentSlotInferDataset from nemo.collections.nlp.nm.data_layers.text_datalayer import TextDataLayer -from nemo.core import AxisType, BatchTag, NeuralType, TimeTag +from nemo.core import NeuralType, ChannelType __all__ = ['BertJointIntentSlotDataLayer', 'BertJointIntentSlotInferDataLayer'] @@ -43,48 +43,22 @@ class BertJointIntentSlotDataLayer(TextDataLayer): @property def output_ports(self): """Returns definitions of module output ports. - - input_ids: - 0: AxisType(BatchTag) - - 1: AxisType(TimeTag) - - input_type_ids: - 0: AxisType(BatchTag) - - 1: AxisType(TimeTag) - - input_mask: - 0: AxisType(BatchTag) - - 1: AxisType(TimeTag) - - loss_mask: - 0: AxisType(BatchTag) - - 1: AxisType(TimeTag) - - subtokens_mask: - 0: AxisType(BatchTag) - - 1: AxisType(TimeTag) - - intents: - 0: AxisType(BatchTag) - - slots: - 0: AxisType(BatchTag) - - 1: AxisType(TimeTag) """ return { - "input_ids": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}), - "input_type_ids": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}), - "input_mask": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}), - "loss_mask": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}), - "subtokens_mask": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}), - "intents": NeuralType({0: AxisType(BatchTag)}), - "slots": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}), + # "input_ids": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}), + # "input_type_ids": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}), + # "input_mask": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}), + # "loss_mask": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}), + # "subtokens_mask": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}), + # "intents": NeuralType({0: AxisType(BatchTag)}), + # "slots": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}), + "input_ids": NeuralType(ChannelType(), ('B', 'T')), + "input_type_ids": NeuralType(ChannelType(), ('B', 'T')), + "input_mask": NeuralType(ChannelType(), ('B', 'T')), + "loss_mask": NeuralType(ChannelType(), ('B', 'T')), + "subtokens_mask": NeuralType(ChannelType(), ('B', 'T')), + "intents": NeuralType(ChannelType(), tuple('B')), + "slots": NeuralType(ChannelType(), ('B', 'T')), } def __init__( @@ -137,39 +111,18 @@ class BertJointIntentSlotInferDataLayer(TextDataLayer): @property def output_ports(self): """Returns definitions of module output ports. - - input_ids: - 0: AxisType(BatchTag) - - 1: AxisType(TimeTag) - - input_type_ids: - 0: AxisType(BatchTag) - - 1: AxisType(TimeTag) - - input_mask: - 0: AxisType(BatchTag) - - 1: AxisType(TimeTag) - - loss_mask: - 0: AxisType(BatchTag) - - 1: AxisType(TimeTag) - - subtokens_mask: - 0: AxisType(BatchTag) - - 1: AxisType(TimeTag) - """ return { - "input_ids": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}), - "input_type_ids": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}), - "input_mask": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}), - "loss_mask": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}), - "subtokens_mask": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}), + # "input_ids": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}), + # "input_type_ids": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}), + # "input_mask": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}), + # "loss_mask": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}), + # "subtokens_mask": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}), + "input_ids": NeuralType(ChannelType(), ('B', 'T')), + "input_type_ids": NeuralType(ChannelType(), ('B', 'T')), + "input_mask": NeuralType(ChannelType(), ('B', 'T')), + "loss_mask": NeuralType(ChannelType(), ('B', 'T')), + "subtokens_mask": NeuralType(ChannelType(), ('B', 'T')) } def __init__(self, queries, tokenizer, max_seq_length, batch_size=1, dataset_type=BertJointIntentSlotInferDataset): diff --git a/nemo/collections/nlp/nm/data_layers/lm_bert_datalayer.py b/nemo/collections/nlp/nm/data_layers/lm_bert_datalayer.py index 7034c7c18c38..e96be1a1e788 100644 --- a/nemo/collections/nlp/nm/data_layers/lm_bert_datalayer.py +++ b/nemo/collections/nlp/nm/data_layers/lm_bert_datalayer.py @@ -25,7 +25,7 @@ from nemo.backends.pytorch import DataLayerNM from nemo.collections.nlp.data import BertPretrainingDataset, BertPretrainingPreprocessedDataset from nemo.collections.nlp.nm.data_layers.text_datalayer import TextDataLayer -from nemo.core import AxisType, BatchTag, NeuralType, TimeTag +from nemo.core import NeuralType, ChannelType, LabelsType __all__ = ['BertPretrainingDataLayer', 'BertPretrainingPreprocessedDataLayer'] @@ -48,45 +48,20 @@ class BertPretrainingDataLayer(TextDataLayer): @property def output_ports(self): """Returns definitions of module output ports. - - input_ids: indices of tokens which constitute batches of text segments - 0: AxisType(BatchTag) - - 1: AxisType(TimeTag) - - input_type_ids: indices of token types (e.g., sentences A & B in BERT) - 0: AxisType(BatchTag) - - 1: AxisType(TimeTag) - - input_mask: bool tensor with 0s in place of tokens to be masked - 0: AxisType(BatchTag) - - 1: AxisType(TimeTag) - - output_ids: indices of output tokens which should be predicted - 0: AxisType(BatchTag) - - 1: AxisType(TimeTag) - - output_mask: bool tensor with 0s in place of tokens to be excluded - from loss calculation - 0: AxisType(BatchTag) - - 1: AxisType(TimeTag) - - labels: indices of classes to be predicted from [CLS] token of text - segments (e.g, 0 or 1 in next sentence prediction task) - 0: AxisType(BatchTag) - """ return { - "input_ids": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}), - "input_type_ids": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}), - "input_mask": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}), - "output_ids": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}), - "output_mask": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}), - "labels": NeuralType({0: AxisType(BatchTag)}), + # "input_ids": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}), + # "input_type_ids": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}), + # "input_mask": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}), + # "output_ids": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}), + # "output_mask": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}), + # "labels": NeuralType({0: AxisType(BatchTag)}), + "input_ids": NeuralType(ChannelType(), ('B', 'T')), + "input_type_ids": NeuralType(ChannelType(), ('B', 'T')), + "input_mask": NeuralType(ChannelType(), ('B', 'T')), + "output_ids": NeuralType(ChannelType(), ('B', 'T')), + "output_mask": NeuralType(ChannelType(), ('B', 'T')), + "labels": NeuralType(LabelsType(), tuple('B')), } def __init__(self, tokenizer, dataset, max_seq_length, mask_probability, short_seq_prob=0.1, batch_size=64): @@ -118,45 +93,20 @@ class BertPretrainingPreprocessedDataLayer(DataLayerNM): @property def output_ports(self): """Returns definitions of module output ports. - - input_ids: indices of tokens which constitute batches of text segments - 0: AxisType(BatchTag) - - 1: AxisType(TimeTag) - - input_type_ids: indices of token types (e.g., sentences A & B in BERT) - 0: AxisType(BatchTag) - - 1: AxisType(TimeTag) - - input_mask: bool tensor with 0s in place of tokens to be masked - 0: AxisType(BatchTag) - - 1: AxisType(TimeTag) - - output_ids: indices of output tokens which should be predicted - 0: AxisType(BatchTag) - - 1: AxisType(TimeTag) - - output_mask: bool tensor with 0s in place of tokens to be excluded - from loss calculation - 0: AxisType(BatchTag) - - 1: AxisType(TimeTag) - - labels: indices of classes to be predicted from [CLS] token of text - segments (e.g, 0 or 1 in next sentence prediction task) - 0: AxisType(BatchTag) - """ return { - "input_ids": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}), - "input_type_ids": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}), - "input_mask": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}), - "output_ids": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}), - "output_mask": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}), - "labels": NeuralType({0: AxisType(BatchTag)}), + # "input_ids": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}), + # "input_type_ids": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}), + # "input_mask": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}), + # "output_ids": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}), + # "output_mask": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}), + # "labels": NeuralType({0: AxisType(BatchTag)}), + "input_ids": NeuralType(ChannelType(), ('B', 'T')), + "input_type_ids": NeuralType(ChannelType(), ('B', 'T')), + "input_mask": NeuralType(ChannelType(), ('B', 'T')), + "output_ids": NeuralType(ChannelType(), ('B', 'T')), + "output_mask": NeuralType(ChannelType(), ('B', 'T')), + "labels": NeuralType(LabelsType(), tuple('B')), } def __init__(self, dataset, max_pred_length, batch_size=64, training=True): diff --git a/nemo/collections/nlp/nm/data_layers/lm_transformer_datalayer.py b/nemo/collections/nlp/nm/data_layers/lm_transformer_datalayer.py index 64e79ffea9f1..266fb4f2ffc7 100644 --- a/nemo/collections/nlp/nm/data_layers/lm_transformer_datalayer.py +++ b/nemo/collections/nlp/nm/data_layers/lm_transformer_datalayer.py @@ -16,7 +16,7 @@ from nemo.collections.nlp.data import LanguageModelingDataset from nemo.collections.nlp.nm.data_layers.text_datalayer import TextDataLayer -from nemo.core import AxisType, BatchTag, NeuralType, TimeTag +from nemo.core import NeuralType, ChannelType, LabelsType __all__ = ['LanguageModelingDataLayer'] @@ -55,9 +55,12 @@ def output_ports(self): 1: AxisType(TimeTag) """ return { - "input_ids": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}), - "input_mask": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}), - "labels": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}), + # "input_ids": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}), + # "input_mask": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}), + # "labels": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}), + "input_ids": NeuralType(ChannelType(), ('B', 'T')), + "input_mask": NeuralType(ChannelType(), ('B', 'T')), + "labels": NeuralType(LabelsType(), ('B', 'T')), } def __init__( diff --git a/nemo/collections/nlp/nm/data_layers/machine_translation_datalayer.py b/nemo/collections/nlp/nm/data_layers/machine_translation_datalayer.py index 23aa1c54e913..7f13898ea1fc 100644 --- a/nemo/collections/nlp/nm/data_layers/machine_translation_datalayer.py +++ b/nemo/collections/nlp/nm/data_layers/machine_translation_datalayer.py @@ -20,7 +20,7 @@ import nemo from nemo.collections.nlp.data import TranslationDataset from nemo.collections.nlp.nm.data_layers.text_datalayer import TextDataLayer -from nemo.core import AxisType, BatchTag, NeuralType, TimeTag +from nemo.core import NeuralType, ChannelType, LabelsType __all__ = ['TranslationDataLayer'] @@ -48,44 +48,34 @@ def output_ports(self): """Returns definitions of module output ports. src_ids: indices of tokens which correspond to source sentences - 0: AxisType(BatchTag) - - 1: AxisType(TimeTag) src_mask: bool tensor with 0s in place of source tokens to be masked - 0: AxisType(BatchTag) - - 1: AxisType(TimeTag) tgt_ids: indices of tokens which correspond to target sentences - 0: AxisType(BatchTag) - - 1: AxisType(TimeTag) tgt_mask: bool tensor with 0s in place of target tokens to be masked - 0: AxisType(BatchTag) - - 1: AxisType(TimeTag) labels: indices of tokens which should be predicted from each of the corresponding target tokens in tgt_ids; for standard neural machine translation equals to tgt_ids shifted by 1 to the right - 0: AxisType(BatchTag) - - 1: AxisType(TimeTag) sent_ids: indices of the sentences in a batch; important for evaluation with external metrics, such as SacreBLEU - 0: AxisType(BatchTag) """ return { - "src_ids": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}), - "src_mask": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}), - "tgt_ids": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}), - "tgt_mask": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}), - "labels": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}), - "sent_ids": NeuralType({0: AxisType(BatchTag)}), + # "src_ids": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}), + # "src_mask": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}), + # "tgt_ids": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}), + # "tgt_mask": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}), + # "labels": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}), + # "sent_ids": NeuralType({0: AxisType(BatchTag)}), + "src_ids": NeuralType(ChannelType(), ('B', 'T')), + "src_mask": NeuralType(ChannelType(), ('B', 'T')), + "tgt_ids": NeuralType(ChannelType(), ('B', 'T')), + "tgt_mask": NeuralType(ChannelType(), ('B', 'T')), + "labels": NeuralType(LabelsType(), ('B', 'T')), + "sent_ids": NeuralType(ChannelType(), tuple('B')), } def __init__( diff --git a/nemo/collections/nlp/nm/data_layers/punctuation_capitalization_datalayer.py b/nemo/collections/nlp/nm/data_layers/punctuation_capitalization_datalayer.py index 41b952827043..7b4fa9b77133 100644 --- a/nemo/collections/nlp/nm/data_layers/punctuation_capitalization_datalayer.py +++ b/nemo/collections/nlp/nm/data_layers/punctuation_capitalization_datalayer.py @@ -16,7 +16,7 @@ from nemo.collections.nlp.data import BertPunctuationCapitalizationDataset from nemo.collections.nlp.nm.data_layers.text_datalayer import TextDataLayer -from nemo.core import AxisType, BatchTag, NeuralType, TimeTag +from nemo.core import NeuralType, ChannelType, LabelsType __all__ = ['PunctuationCapitalizationDataLayer'] @@ -25,51 +25,22 @@ class PunctuationCapitalizationDataLayer(TextDataLayer): @property def output_ports(self): """Returns definitions of module output ports. - - input_ids: - 0: AxisType(BatchTag) - - 1: AxisType(TimeTag) - - input_type_ids: - 0: AxisType(BatchTag) - - 1: AxisType(TimeTag) - - input_mask: - 0: AxisType(BatchTag) - - 1: AxisType(TimeTag) - - loss_mask: - 0: AxisType(BatchTag) - - 1: AxisType(TimeTag) - - subtokens_mask: - 0: AxisType(BatchTag) - - 1: AxisType(TimeTag) - - punct_labels: - 0: AxisType(BatchTag) - - 1: AxisType(TimeTag) - - capit_labels: - 0: AxisType(BatchTag) - - 1: AxisType(TimeTag) - """ return { - "input_ids": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}), - "input_type_ids": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}), - "input_mask": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}), - "loss_mask": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}), - "subtokens_mask": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}), - "punct_labels": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}), - "capit_labels": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}), + # "input_ids": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}), + # "input_type_ids": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}), + # "input_mask": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}), + # "loss_mask": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}), + # "subtokens_mask": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}), + # "punct_labels": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}), + # "capit_labels": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}), + "input_ids": NeuralType(ChannelType(), ('B', 'T')), + "input_type_ids": NeuralType(ChannelType(), ('B', 'T')), + "input_mask": NeuralType(ChannelType(), ('B', 'T')), + "loss_mask": NeuralType(ChannelType(), ('B', 'T')), + "subtokens_mask": NeuralType(ChannelType(), ('B', 'T')), + "punct_labels": NeuralType(LabelsType(), ('B', 'T')), + "capit_labels": NeuralType(LabelsType(), ('B', 'T')), } def __init__( diff --git a/nemo/collections/nlp/nm/data_layers/qa_squad_datalayer.py b/nemo/collections/nlp/nm/data_layers/qa_squad_datalayer.py index 56d912a35a6d..245e05eb309a 100644 --- a/nemo/collections/nlp/nm/data_layers/qa_squad_datalayer.py +++ b/nemo/collections/nlp/nm/data_layers/qa_squad_datalayer.py @@ -16,7 +16,7 @@ from nemo.collections.nlp.data import SquadDataset from nemo.collections.nlp.nm.data_layers.text_datalayer import TextDataLayer -from nemo.core import AxisType, BatchTag, NeuralType, TimeTag +from nemo.core import NeuralType, ChannelType, LabelsType __all__ = ['BertQuestionAnsweringDataLayer'] @@ -48,39 +48,21 @@ class BertQuestionAnsweringDataLayer(TextDataLayer): @property def output_ports(self): """Returns definitions of module output ports. - - input_ids: - 0: AxisType(BatchTag) - - 1: AxisType(TimeTag) - - input_type_ids: - 0: AxisType(BatchTag) - - 1: AxisType(TimeTag) - - input_mask: - 0: AxisType(BatchTag) - - 1: AxisType(TimeTag) - - start_positions: - 0: AxisType(BatchTag) - - end_positions: - 0: AxisType(BatchTag) - - unique_ids: - 0: AxisType(BatchTag) - """ return { - "input_ids": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}), - "input_type_ids": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}), - "input_mask": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}), - "start_positions": NeuralType({0: AxisType(BatchTag)}), - "end_positions": NeuralType({0: AxisType(BatchTag)}), - "unique_ids": NeuralType({0: AxisType(BatchTag)}), + # "input_ids": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}), + # "input_type_ids": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}), + # "input_mask": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}), + # "start_positions": NeuralType({0: AxisType(BatchTag)}), + # "end_positions": NeuralType({0: AxisType(BatchTag)}), + # "unique_ids": NeuralType({0: AxisType(BatchTag)}), + "input_ids": NeuralType(ChannelType(), ('B', 'T')), + "input_type_ids": NeuralType(ChannelType(), ('B', 'T')), + "input_mask": NeuralType(ChannelType(), ('B', 'T')), + "start_positions": NeuralType(ChannelType(), tuple('B')), + "end_positions": NeuralType(ChannelType(), tuple('B')), + "unique_ids": NeuralType(ChannelType(), tuple('B')), + } def __init__( diff --git a/nemo/collections/nlp/nm/data_layers/state_tracking_trade_datalayer.py b/nemo/collections/nlp/nm/data_layers/state_tracking_trade_datalayer.py index decfc035c25b..6845c47de4bc 100644 --- a/nemo/collections/nlp/nm/data_layers/state_tracking_trade_datalayer.py +++ b/nemo/collections/nlp/nm/data_layers/state_tracking_trade_datalayer.py @@ -43,7 +43,7 @@ import nemo from nemo.collections.nlp.data.datasets import MultiWOZDataset from nemo.collections.nlp.nm.data_layers.text_datalayer import TextDataLayer -from nemo.core.neural_types import * +from nemo.core.neural_types import NeuralType, ChannelType, LabelsType __all__ = ['MultiWOZDataLayer'] @@ -54,41 +54,32 @@ def output_ports(self): """Returns definitions of module output ports. src_ids: ids of input sequences - 0: AxisType(BatchTag) - - 1: AxisType(ChannelTag) src_lens: lengths of input sequences - 0: AxisType(BatchTag) tgt_ids: labels for the generator output - 0: AxisType(BatchTag) - - 1: AxisType(ChannelTag) - - 2: AxisType(TimeTag) tgt_lens: lengths of the generator targets - 0: AxisType(BatchTag) - - 1: AxisType(ChannelTag) gating_labels: labels for the gating head - 0: AxisType(BatchTag) - - 1: AxisType(ChannelTag) turn_domain: list of the domains NeuralType(None) """ return { - "src_ids": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}), - "src_lens": NeuralType({0: AxisType(BatchTag)}), - "tgt_ids": NeuralType({0: AxisType(BatchTag), 1: AxisType(ChannelTag), 2: AxisType(TimeTag)}), - "tgt_lens": NeuralType({0: AxisType(BatchTag), 1: AxisType(ChannelTag)}), - "gating_labels": NeuralType({0: AxisType(BatchTag), 1: AxisType(ChannelTag)}), - "turn_domain": NeuralType(None), + # "src_ids": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}), + # "src_lens": NeuralType({0: AxisType(BatchTag)}), + # "tgt_ids": NeuralType({0: AxisType(BatchTag), 1: AxisType(ChannelTag), 2: AxisType(TimeTag)}), + # "tgt_lens": NeuralType({0: AxisType(BatchTag), 1: AxisType(ChannelTag)}), + # "gating_labels": NeuralType({0: AxisType(BatchTag), 1: AxisType(ChannelTag)}), + # "turn_domain": NeuralType(None), + "src_ids": NeuralType(ChannelType(), ('B', 'T')), + "src_lens": NeuralType(ChannelType(), tuple('B')), + "tgt_ids": NeuralType(ChannelType(), ('B', 'D', 'T')), + "tgt_lens": NeuralType(ChannelType(), ('B', 'D')), + "gating_labels": NeuralType(LabelsType(), ('B', 'D')), + "turn_domain": NeuralType(), } def __init__( diff --git a/nemo/collections/nlp/nm/data_layers/text_classification_datalayer.py b/nemo/collections/nlp/nm/data_layers/text_classification_datalayer.py index 738144586dd5..f1f408580069 100644 --- a/nemo/collections/nlp/nm/data_layers/text_classification_datalayer.py +++ b/nemo/collections/nlp/nm/data_layers/text_classification_datalayer.py @@ -16,7 +16,7 @@ from nemo.collections.nlp.data import BertTextClassificationDataset from nemo.collections.nlp.nm.data_layers.text_datalayer import TextDataLayer -from nemo.core import AxisType, BatchTag, NeuralType, TimeTag +from nemo.core import NeuralType, ChannelType, LabelsType __all__ = ['BertSentenceClassificationDataLayer'] @@ -36,31 +36,16 @@ class BertSentenceClassificationDataLayer(TextDataLayer): @property def output_ports(self): """Returns definitions of module output ports. - - input_ids: - 0: AxisType(BatchTag) - - 1: AxisType(TimeTag) - - input_type_ids: - 0: AxisType(BatchTag) - - 1: AxisType(TimeTag) - - input_mask: - 0: AxisType(BatchTag) - - 1: AxisType(TimeTag) - - labels: - 0: AxisType(BatchTag) - """ return { - "input_ids": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}), - "input_type_ids": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}), - "input_mask": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}), - "labels": NeuralType({0: AxisType(BatchTag)}), + # "input_ids": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}), + # "input_type_ids": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}), + # "input_mask": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}), + # "labels": NeuralType({0: AxisType(BatchTag)}), + "input_ids": NeuralType(ChannelType(), ('B', 'T')), + "input_type_ids": NeuralType(ChannelType(), ('B', 'T')), + "input_mask": NeuralType(ChannelType(), ('B', 'T')), + "labels": NeuralType(LabelsType(), tuple('B')), } def __init__( diff --git a/nemo/collections/nlp/nm/data_layers/token_classification_datalayer.py b/nemo/collections/nlp/nm/data_layers/token_classification_datalayer.py index b4e0d6ecc51a..3744f3b8682b 100644 --- a/nemo/collections/nlp/nm/data_layers/token_classification_datalayer.py +++ b/nemo/collections/nlp/nm/data_layers/token_classification_datalayer.py @@ -16,7 +16,7 @@ from nemo.collections.nlp.data import BertTokenClassificationDataset, BertTokenClassificationInferDataset from nemo.collections.nlp.nm.data_layers.text_datalayer import TextDataLayer -from nemo.core import AxisType, BatchTag, NeuralType, TimeTag +from nemo.core import NeuralType, ChannelType, LabelsType __all__ = ['BertTokenClassificationDataLayer', 'BertTokenClassificationInferDataLayer'] @@ -25,44 +25,20 @@ class BertTokenClassificationDataLayer(TextDataLayer): @property def output_ports(self): """Returns definitions of module output ports. - - input_ids: - 0: AxisType(BatchTag) - - 1: AxisType(TimeTag) - - input_type_ids: - 0: AxisType(BatchTag) - - 1: AxisType(TimeTag) - - input_mask: - 0: AxisType(BatchTag) - - 1: AxisType(TimeTag) - - loss_mask: - 0: AxisType(BatchTag) - - 1: AxisType(TimeTag) - - subtokens_mask: - 0: AxisType(BatchTag) - - 1: AxisType(TimeTag) - - labels: - 0: AxisType(BatchTag) - - 1: AxisType(TimeTag) """ return { - "input_ids": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}), - "input_type_ids": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}), - "input_mask": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}), - "loss_mask": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}), - "subtokens_mask": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}), - "labels": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}), + # "input_ids": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}), + # "input_type_ids": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}), + # "input_mask": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}), + # "loss_mask": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}), + # "subtokens_mask": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}), + # "labels": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}), + "input_ids": NeuralType(ChannelType(), ('B', 'T')), + "input_type_ids": NeuralType(ChannelType(), ('B', 'T')), + "input_mask": NeuralType(ChannelType(), ('B', 'T')), + "loss_mask": NeuralType(ChannelType(), ('B', 'T')), + "subtokens_mask": NeuralType(ChannelType(), ('B', 'T')), + "labels": NeuralType(LabelsType(), ('B', 'T')), } def __init__( @@ -101,39 +77,18 @@ class BertTokenClassificationInferDataLayer(TextDataLayer): @property def output_ports(self): """Returns definitions of module output ports. - - input_ids: - 0: AxisType(BatchTag) - - 1: AxisType(TimeTag) - - input_type_ids: - 0: AxisType(BatchTag) - - 1: AxisType(TimeTag) - - input_mask: - 0: AxisType(BatchTag) - - 1: AxisType(TimeTag) - - loss_mask: - 0: AxisType(BatchTag) - - 1: AxisType(TimeTag) - - subtokens_mask: - 0: AxisType(BatchTag) - - 1: AxisType(TimeTag) - """ return { - "input_ids": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}), - "input_type_ids": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}), - "input_mask": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}), - "loss_mask": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}), - "subtokens_mask": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}), + # "input_ids": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}), + # "input_type_ids": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}), + # "input_mask": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}), + # "loss_mask": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}), + # "subtokens_mask": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}), + "input_ids": NeuralType(ChannelType(), ('B', 'T')), + "input_type_ids": NeuralType(ChannelType(), ('B', 'T')), + "input_mask": NeuralType(ChannelType(), ('B', 'T')), + "loss_mask": NeuralType(ChannelType(), ('B', 'T')), + "subtokens_mask": NeuralType(ChannelType(), ('B', 'T')), } def __init__( diff --git a/nemo/collections/nlp/nm/losses/aggregator_loss.py b/nemo/collections/nlp/nm/losses/aggregator_loss.py index 7a66c3cb85f1..0720d0409fd8 100644 --- a/nemo/collections/nlp/nm/losses/aggregator_loss.py +++ b/nemo/collections/nlp/nm/losses/aggregator_loss.py @@ -15,7 +15,7 @@ # ============================================================================= from nemo.backends.pytorch import LossNM -from nemo.core import NeuralType +from nemo.core import NeuralType, LossType __all__ = ['LossAggregatorNM'] @@ -35,7 +35,7 @@ def input_ports(self): """ input_ports = {} for i in range(self.num_losses): - input_ports["loss_" + str(i + 1)] = NeuralType(None) + input_ports["loss_" + str(i + 1)] = NeuralType() return input_ports @@ -46,7 +46,7 @@ def output_ports(self): loss: NeuralType(None) """ - return {"loss": NeuralType(None)} + return {"loss": NeuralType(LossType())} def __init__(self, num_inputs=2): # Store number of inputs/losses. diff --git a/nemo/collections/nlp/nm/losses/joint_intent_slot_loss.py b/nemo/collections/nlp/nm/losses/joint_intent_slot_loss.py index 3ba4d631f1da..fa5e2dc186d5 100644 --- a/nemo/collections/nlp/nm/losses/joint_intent_slot_loss.py +++ b/nemo/collections/nlp/nm/losses/joint_intent_slot_loss.py @@ -18,7 +18,7 @@ from torch import nn from nemo.backends.pytorch import LossNM -from nemo.core import AxisType, BatchTag, ChannelTag, NeuralType, TimeTag +from nemo.core import NeuralType, ChannelType, LossType, LogitsType __all__ = ['JointIntentSlotLoss'] @@ -49,38 +49,18 @@ class JointIntentSlotLoss(LossNM): def input_ports(self): """Returns definitions of module input ports. - intent_logits: - 0: AxisType(BatchTag) - - 1: AxisType(ChannelTag) - - slot_logits: - 0: AxisType(BatchTag) - - 1: AxisType(TimeTag) - - 2: AxisType(ChannelTag) - - loss_mask: - 0: AxisType(BatchTag) - - 1: AxisType(TimeTag) - - intents: - 0: AxisType(BatchTag) - - slots: - 0: AxisType(BatchTag) - - 1: AxisType(TimeTag) - """ return { - "intent_logits": NeuralType({0: AxisType(BatchTag), 1: AxisType(ChannelTag)}), - "slot_logits": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag), 2: AxisType(ChannelTag)}), - "loss_mask": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}), - "intents": NeuralType({0: AxisType(BatchTag)}), - "slots": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}), + # "intent_logits": NeuralType({0: AxisType(BatchTag), 1: AxisType(ChannelTag)}), + # "slot_logits": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag), 2: AxisType(ChannelTag)}), + # "loss_mask": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}), + # "intents": NeuralType({0: AxisType(BatchTag)}), + # "slots": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}), + "intent_logits": NeuralType(LogitsType(), ('B', 'D')), + "slot_logits": NeuralType(LogitsType(), ('B', 'T', 'D')), + "loss_mask": NeuralType(ChannelType(), ('B', 'T')), + "intents": NeuralType(ChannelType(), tuple('B')), + "slots": NeuralType(ChannelType(), ('B', 'T')), } @property @@ -90,7 +70,8 @@ def output_ports(self): loss: NeuralType(None) """ - return {"loss": NeuralType(None)} + #return {"loss": NeuralType(None)} + return {"loss": NeuralType(LossType())} def __init__( self, num_slots, slot_classes_loss_weights=None, intent_classes_loss_weights=None, intent_loss_weight=0.6, diff --git a/nemo/collections/nlp/nm/losses/masked_language_modeling_loss.py b/nemo/collections/nlp/nm/losses/masked_language_modeling_loss.py index e5516d9f33c7..485d49345b9b 100644 --- a/nemo/collections/nlp/nm/losses/masked_language_modeling_loss.py +++ b/nemo/collections/nlp/nm/losses/masked_language_modeling_loss.py @@ -16,7 +16,7 @@ from nemo.backends.pytorch import LossNM from nemo.collections.nlp.nm.losses.smoothed_cross_entropy_loss import SmoothedCrossEntropyLoss -from nemo.core import AxisType, BatchTag, ChannelTag, NeuralType, TimeTag +from nemo.core import NeuralType, ChannelType, LossType, LogitsType __all__ = ['MaskedLanguageModelingLossNM'] @@ -32,28 +32,14 @@ class MaskedLanguageModelingLossNM(LossNM): @property def input_ports(self): """Returns definitions of module input ports. - - logits: - 0: AxisType(BatchTag) - - 1: AxisType(TimeTag) - - 2: AxisType(ChannelTag) - - output_ids: - 0: AxisType(BatchTag) - - 1: AxisType(TimeTag) - - output_mask: - 0: AxisType(BatchTag) - - 1: AxisType(TimeTag) """ return { - "logits": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag), 2: AxisType(ChannelTag)}), - "output_ids": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}), - "output_mask": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}), + # "logits": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag), 2: AxisType(ChannelTag)}), + # "output_ids": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}), + # "output_mask": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}), + "logits": NeuralType(LogitsType(), ('B', 'T', 'D')), + "output_ids": NeuralType(ChannelType(), ('B', 'T')), + "output_mask": NeuralType(ChannelType(), ('B', 'T')), } @property @@ -63,7 +49,7 @@ def output_ports(self): loss: NeuralType(None) """ - return {"loss": NeuralType(None)} + return {"loss": NeuralType(LossType())} def __init__(self, label_smoothing=0.0): LossNM.__init__(self) diff --git a/nemo/collections/nlp/nm/losses/padded_smoothed_cross_entropy_loss.py b/nemo/collections/nlp/nm/losses/padded_smoothed_cross_entropy_loss.py index 0ad66e21106d..292cc77c932b 100644 --- a/nemo/collections/nlp/nm/losses/padded_smoothed_cross_entropy_loss.py +++ b/nemo/collections/nlp/nm/losses/padded_smoothed_cross_entropy_loss.py @@ -17,7 +17,7 @@ from nemo.backends.pytorch import LossNM from nemo.collections.nlp.nm.losses.smoothed_cross_entropy_loss import SmoothedCrossEntropyLoss from nemo.collections.nlp.utils.common_nlp_utils import mask_padded_tokens -from nemo.core import AxisType, BatchTag, ChannelTag, NeuralType, TimeTag +from nemo.core import AxisType, NeuralType, ChannelType, LogitsType, LossType __all__ = ['PaddedSmoothedCrossEntropyLossNM'] @@ -38,32 +38,20 @@ class PaddedSmoothedCrossEntropyLossNM(LossNM): @property def input_ports(self): """Returns definitions of module input ports. - - logits: - 0: AxisType(BatchTag) - - 1: AxisType(TimeTag) - - 2: AxisType(ChannelTag) - - target_ids: - 0: AxisType(BatchTag) - - 1: AxisType(TimeTag) """ return { - "logits": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag), 2: AxisType(ChannelTag)}), - "target_ids": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}), + # "logits": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag), 2: AxisType(ChannelTag)}), + # "target_ids": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}), + "logits": NeuralType(LogitsType(), ('B', 'T', 'D')), + "target_ids": NeuralType(ChannelType(), ('B', 'T')), } @property def output_ports(self): """Returns definitions of module output ports. - - loss: - NeuralType(None) """ - return {"loss": NeuralType(None)} + # return {"loss": NeuralType(None)} + return {"loss": NeuralType(LossType())} def __init__(self, pad_id, label_smoothing=0, predict_last_k=0): LossNM.__init__(self) diff --git a/nemo/collections/nlp/nm/losses/qa_squad_loss.py b/nemo/collections/nlp/nm/losses/qa_squad_loss.py index 5f60871d4ebb..acc53066c413 100644 --- a/nemo/collections/nlp/nm/losses/qa_squad_loss.py +++ b/nemo/collections/nlp/nm/losses/qa_squad_loss.py @@ -17,7 +17,7 @@ from torch import nn from nemo.backends.pytorch import LossNM -from nemo.core import AxisType, BatchTag, ChannelTag, NeuralType, TimeTag +from nemo.core import NeuralType, ChannelType, LogitsType, LossType __all__ = ['QuestionAnsweringLoss'] @@ -38,24 +38,14 @@ class QuestionAnsweringLoss(LossNM): @property def input_ports(self): """Returns definitions of module input ports. - - logits: - 0: AxisType(BatchTag) - - 1: AxisType(TimeTag) - - 2: AxisType(ChannelTag) - - start_positions: - 0: AxisType(BatchTag) - - end_positions: - 0: AxisType(BatchTag) """ return { - "logits": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag), 2: AxisType(ChannelTag)}), - "start_positions": NeuralType({0: AxisType(BatchTag)}), - "end_positions": NeuralType({0: AxisType(BatchTag)}), + # "logits": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag), 2: AxisType(ChannelTag)}), + # "start_positions": NeuralType({0: AxisType(BatchTag)}), + # "end_positions": NeuralType({0: AxisType(BatchTag)}), + "logits": NeuralType(LogitsType(), ('B', 'T', 'D')), + "start_positions": NeuralType(ChannelType(), tuple('B')), + "end_positions": NeuralType(ChannelType(), tuple('B')), } @property @@ -76,9 +66,12 @@ def output_ports(self): 1: AxisType(TimeTag) """ return { - "loss": NeuralType(None), - "start_logits": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}), - "end_logits": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}), + # "loss": NeuralType(None), + # "start_logits": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}), + # "end_logits": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}), + "loss": NeuralType(LossType()), + "start_logits": NeuralType(ChannelType(), ('B', 'T')), + "end_logits": NeuralType(ChannelType(), ('B', 'T')), } def __init__(self): diff --git a/nemo/collections/nlp/nm/losses/state_tracking_trade_loss.py b/nemo/collections/nlp/nm/losses/state_tracking_trade_loss.py index c591fc453afb..8f13572479ce 100644 --- a/nemo/collections/nlp/nm/losses/state_tracking_trade_loss.py +++ b/nemo/collections/nlp/nm/losses/state_tracking_trade_loss.py @@ -39,7 +39,7 @@ import torch from nemo.backends.pytorch.nm import LossNM -from nemo.core.neural_types import AxisType, BatchTag, ChannelTag, NeuralType, TimeTag +from nemo.core.neural_types import NeuralType, ChannelType, LogitsType, LossType, LabelsType __all__ = ['TRADEMaskedCrossEntropy', 'CrossEntropyLoss3D'] @@ -61,44 +61,29 @@ def input_ports(self): """Returns definitions of module input ports. logits: 4d tensor of logits - 0: AxisType(BatchTag) - - 1: AxisType(TimeTag) - - 2: AxisType(ChannelTag) - - 3: AxisType(ChannelTag) targets: 3d tensor of labels - 0: AxisType(BatchTag) - - 1: AxisType(ChannelTag) - - 2: AxisType(TimeTag) loss_mask: specifies the words to be considered in the loss calculation - 0: AxisType(BatchTag) - - 1: AxisType(ChannelTag) """ return { - "logits": NeuralType( - {0: AxisType(BatchTag), 1: AxisType(TimeTag), 2: AxisType(ChannelTag), 3: AxisType(ChannelTag)} - ), - "targets": NeuralType({0: AxisType(BatchTag), 1: AxisType(ChannelTag), 2: AxisType(TimeTag)}), - "loss_mask": NeuralType({0: AxisType(BatchTag), 1: AxisType(ChannelTag)}), + # "logits": NeuralType( + # {0: AxisType(BatchTag), 1: AxisType(TimeTag), 2: AxisType(ChannelTag), 3: AxisType(ChannelTag)} + # ), + # "targets": NeuralType({0: AxisType(BatchTag), 1: AxisType(ChannelTag), 2: AxisType(TimeTag)}), + # "loss_mask": NeuralType({0: AxisType(BatchTag), 1: AxisType(ChannelTag)}), + "logits": NeuralType(LogitsType(), ('B', 'T', 'D', 'D')), + "targets": NeuralType(ChannelType(), ('B', 'D', 'T')), + "loss_mask": NeuralType(ChannelType(), ('B', 'D')), } @property def output_ports(self): """Returns definitions of module output ports. - - loss: loss value - NeuralType(None) - """ - return {"loss": NeuralType(None)} + # return {"loss": NeuralType(None)} + return {"loss": NeuralType(LossType())} def __init__(self): LossNM.__init__(self) @@ -139,15 +124,18 @@ def input_ports(self): """Returns definitions of module input ports. """ return { - "logits": NeuralType({0: AxisType(BatchTag), 1: AxisType(ChannelTag), 2: AxisType(ChannelTag)}), - "labels": NeuralType({0: AxisType(BatchTag), 1: AxisType(ChannelTag)}), + # "logits": NeuralType({0: AxisType(BatchTag), 1: AxisType(ChannelTag), 2: AxisType(ChannelTag)}), + # "labels": NeuralType({0: AxisType(BatchTag), 1: AxisType(ChannelTag)}), + "logits": NeuralType(LogitsType(), ('B', 'D', 'D')), + "labels": NeuralType(LabelsType(), ('B', 'D')), } @property def output_ports(self): """Returns definitions of module output ports. """ - return {"loss": NeuralType(None)} + # return {"loss": NeuralType(None)} + return {"loss": NeuralType(LossType())} def __init__(self, num_classes, **kwargs): LossNM.__init__(self, **kwargs) diff --git a/nemo/collections/nlp/nm/losses/token_classification_loss.py b/nemo/collections/nlp/nm/losses/token_classification_loss.py index 5c3c3adcad22..411264296fe5 100644 --- a/nemo/collections/nlp/nm/losses/token_classification_loss.py +++ b/nemo/collections/nlp/nm/losses/token_classification_loss.py @@ -18,7 +18,7 @@ from torch import nn from nemo.backends.pytorch import LossNM -from nemo.core import AxisType, BatchTag, ChannelTag, NeuralType, TimeTag +from nemo.core import NeuralType, ChannelType, LossType, LabelsType, LogitsType __all__ = ['TokenClassificationLoss'] @@ -38,28 +38,14 @@ class TokenClassificationLoss(LossNM): @property def input_ports(self): """Returns definitions of module input ports. - - logits: - 0: AxisType(BatchTag) - - 1: AxisType(TimeTag) - - 2: AxisType(ChannelTag) - - labels: - 0: AxisType(BatchTag) - - 1: AxisType(TimeTag) - - loss_mask: - 0: AxisType(BatchTag) - - 1: AxisType(TimeTag) """ return { - "logits": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag), 2: AxisType(ChannelTag)}), - "labels": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}), - "loss_mask": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}), + # "logits": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag), 2: AxisType(ChannelTag)}), + # "labels": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}), + # "loss_mask": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}), + "logits": NeuralType(LogitsType(), ('B', 'T', 'D')), + "labels": NeuralType(LabelsType(), ('B', 'T')), + "loss_mask": NeuralType(ChannelType(), ('B', 'T')), } @property @@ -69,7 +55,7 @@ def output_ports(self): loss: NeuralType(None) """ - return {"loss": NeuralType(None)} + return {"loss": NeuralType(LossType())} def __init__(self, num_classes, class_weights=None): LossNM.__init__(self) diff --git a/nemo/collections/nlp/nm/trainables/common/huggingface/bert_nm.py b/nemo/collections/nlp/nm/trainables/common/huggingface/bert_nm.py index 1f91576be60a..3d313e5b0dd7 100644 --- a/nemo/collections/nlp/nm/trainables/common/huggingface/bert_nm.py +++ b/nemo/collections/nlp/nm/trainables/common/huggingface/bert_nm.py @@ -20,7 +20,7 @@ from nemo.backends.pytorch.nm import TrainableNM from nemo.core.neural_modules import PretrainedModelInfo -from nemo.core.neural_types import AxisType, BatchTag, ChannelTag, NeuralType, TimeTag +from nemo.core.neural_types import NeuralType, ChannelType __all__ = ['BERT'] @@ -49,40 +49,22 @@ class BERT(TrainableNM): @property def input_ports(self): """Returns definitions of module input ports. - - input_ids: - 0: AxisType(BatchTag) - - 1: AxisType(TimeTag) - - token_type_ids: - 0: AxisType(BatchTag) - - 1: AxisType(TimeTag) - - attention_mask: - 0: AxisType(BatchTag) - - 1: AxisType(TimeTag) """ return { - "input_ids": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}), - "token_type_ids": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}), - "attention_mask": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}), + # "input_ids": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}), + # "token_type_ids": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}), + # "attention_mask": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}), + "input_ids": NeuralType(ChannelType(), ('B', 'T')), + "token_type_ids": NeuralType(ChannelType(), ('B', 'T')), + "attention_mask": NeuralType(ChannelType(), ('B', 'T')), } @property def output_ports(self): """Returns definitions of module output ports. - - hidden_states: - 0: AxisType(BatchTag) - - 1: AxisType(TimeTag) - - 2: AxisType(ChannelTag) """ - return {"hidden_states": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag), 2: AxisType(ChannelTag)})} + # return {"hidden_states": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag), 2: AxisType(ChannelTag)})} + return {"hidden_states": NeuralType(ChannelType(), ('B', 'T', 'D'))} def __init__( self, diff --git a/nemo/collections/nlp/nm/trainables/common/sequence_classification_nm.py b/nemo/collections/nlp/nm/trainables/common/sequence_classification_nm.py index 7e0c81c65388..9ca9aabdd6df 100644 --- a/nemo/collections/nlp/nm/trainables/common/sequence_classification_nm.py +++ b/nemo/collections/nlp/nm/trainables/common/sequence_classification_nm.py @@ -18,7 +18,7 @@ from nemo.backends.pytorch import MultiLayerPerceptron, TrainableNM from nemo.collections.nlp.nm.trainables.common.transformer.transformer_utils import transformer_weights_init -from nemo.core import AxisType, BatchTag, ChannelTag, NeuralType, TimeTag +from nemo.core import NeuralType, ChannelType, LogitsType __all__ = ['SequenceClassifier'] @@ -41,26 +41,15 @@ class SequenceClassifier(TrainableNM): @property def input_ports(self): """Returns definitions of module input ports. - - hidden_states: - 0: AxisType(BatchTag) - - 1: AxisType(TimeTag) - - 2: AxisType(ChannelTag) """ - return {"hidden_states": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag), 2: AxisType(ChannelTag)})} + return {"hidden_states": NeuralType(ChannelType(), ('B', 'T', 'D'))} @property def output_ports(self): """Returns definitions of module output ports. - - logits: - 0: AxisType(BatchTag) - - 1: AxisType(ChannelTag) """ - return {"logits": NeuralType({0: AxisType(BatchTag), 1: AxisType(ChannelTag)})} + # return {"logits": NeuralType({0: AxisType(BatchTag), 1: AxisType(ChannelTag)})} + return {"logits": NeuralType(LogitsType(), ('B', 'D'))} def __init__( self, diff --git a/nemo/collections/nlp/nm/trainables/common/sequence_regression_nm.py b/nemo/collections/nlp/nm/trainables/common/sequence_regression_nm.py index 1032a1f2c43d..1c16bc967330 100644 --- a/nemo/collections/nlp/nm/trainables/common/sequence_regression_nm.py +++ b/nemo/collections/nlp/nm/trainables/common/sequence_regression_nm.py @@ -18,7 +18,7 @@ from nemo.backends.pytorch import MultiLayerPerceptron, TrainableNM from nemo.collections.nlp.nm.trainables.common.transformer.transformer_utils import transformer_weights_init -from nemo.core import AxisType, BatchTag, ChannelTag, NeuralType, RegressionTag, TimeTag +from nemo.core import NeuralType, RegressionValuesType, ChannelType __all__ = ['SequenceRegression'] @@ -39,24 +39,16 @@ class SequenceRegression(TrainableNM): @property def input_ports(self): """Returns definitions of module input ports. - - hidden_states: - 0: AxisType(BatchTag) - - 1: AxisType(TimeTag) - - 2: AxisType(ChannelTag) """ - return {"hidden_states": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag), 2: AxisType(ChannelTag)})} + # return {"hidden_states": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag), 2: AxisType(ChannelTag)})} + return {"hidden_states": NeuralType(ChannelType(), ('B', 'T', 'D'))} @property def output_ports(self): """Returns definitions of module output ports. - - preds: - 0: AxisType(RegressionTag) """ - return {"preds": NeuralType({0: AxisType(RegressionTag)})} + # return {"preds": NeuralType({0: AxisType(RegressionTag)})} + return {"preds": NeuralType(RegressionValuesType(), tuple('B'))} def __init__(self, hidden_size, num_layers=2, activation='relu', dropout=0.0, use_transformer_pretrained=True): super().__init__() diff --git a/nemo/collections/nlp/nm/trainables/common/token_classification_nm.py b/nemo/collections/nlp/nm/trainables/common/token_classification_nm.py index ba848f247eb3..4fa8d0478e38 100644 --- a/nemo/collections/nlp/nm/trainables/common/token_classification_nm.py +++ b/nemo/collections/nlp/nm/trainables/common/token_classification_nm.py @@ -18,7 +18,7 @@ from nemo.backends.pytorch import MultiLayerPerceptron, TrainableNM from nemo.collections.nlp.nm.trainables.common.transformer.transformer_utils import gelu, transformer_weights_init -from nemo.core import AxisType, BatchTag, ChannelTag, NeuralType, TimeTag +from nemo.core import NeuralType, ChannelType, LogitsType __all__ = ['BertTokenClassifier', 'TokenClassifier'] @@ -42,28 +42,16 @@ class BertTokenClassifier(TrainableNM): @property def input_ports(self): """Returns definitions of module input ports. - - hidden_states: - 0: AxisType(BatchTag) - - 1: AxisType(TimeTag) - - 2: AxisType(ChannelTag) """ - return {"hidden_states": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag), 2: AxisType(ChannelTag)})} + # return {"hidden_states": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag), 2: AxisType(ChannelTag)})} + return {"hidden_states": NeuralType(ChannelType(), ('B', 'T', 'D'))} @property def output_ports(self): """Returns definitions of module output ports. - - logits: - 0: AxisType(BatchTag) - - 1: AxisType(TimeTag) - - 2: AxisType(ChannelTag) """ - return {"logits": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag), 2: AxisType(ChannelTag)})} + # return {"logits": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag), 2: AxisType(ChannelTag)})} + return {"logits": NeuralType(LogitsType(), ('B', 'T', 'C'))} def __init__( self, @@ -115,28 +103,16 @@ class TokenClassifier(TrainableNM): @property def input_ports(self): """Returns definitions of module input ports. - - hidden_states: - 0: AxisType(BatchTag) - - 1: AxisType(TimeTag) - - 2: AxisType(ChannelTag) """ - return {"hidden_states": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag), 2: AxisType(ChannelTag)})} + # return {"hidden_states": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag), 2: AxisType(ChannelTag)})} + return {"hidden_states": NeuralType(ChannelType(), ('B', 'T', 'C'))} @property def output_ports(self): """Returns definitions of module output ports. - - logits: - 0: AxisType(BatchTag) - - 1: AxisType(TimeTag) - - 2: AxisType(ChannelTag) """ - return {"logits": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag), 2: AxisType(ChannelTag)})} + # return {"logits": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag), 2: AxisType(ChannelTag)})} + return {"logits": NeuralType(LogitsType(), ('B', 'T', 'D'))} def __init__( self, diff --git a/nemo/collections/nlp/nm/trainables/common/transformer/transformer_nm.py b/nemo/collections/nlp/nm/trainables/common/transformer/transformer_nm.py index b736588a3d33..cffd22c39c94 100644 --- a/nemo/collections/nlp/nm/trainables/common/transformer/transformer_nm.py +++ b/nemo/collections/nlp/nm/trainables/common/transformer/transformer_nm.py @@ -14,7 +14,7 @@ ) from nemo.collections.nlp.nm.trainables.common.transformer.transformer_modules import TransformerEmbedding from nemo.collections.nlp.nm.trainables.common.transformer.transformer_utils import transformer_weights_init -from nemo.core.neural_types import * +from nemo.core.neural_types import NeuralType, ChannelType __all__ = ['TransformerEncoderNM', 'TransformerDecoderNM', 'GreedyLanguageGeneratorNM', 'BeamSearchTranslatorNM'] @@ -47,34 +47,21 @@ class TransformerEncoderNM(TrainableNM): @property def input_ports(self): """Returns definitions of module input ports. - - input_ids: - 0: AxisType(BatchTag) - - 1: AxisType(TimeTag) - - input_mask_src: - 0: AxisType(BatchTag) - - 1: AxisType(TimeTag) """ return { - "input_ids": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}), - "input_mask_src": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}), + # "input_ids": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}), + # "input_mask_src": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}), + "input_ids": NeuralType(ChannelType(), ('B', 'T')), + "input_mask_src": NeuralType(ChannelType(), ('B', 'T')), } @property def output_ports(self): """Returns definitions of module output ports. - hidden_states: - 0: AxisType(BatchTag) - - 1: AxisType(TimeTag) - - 2: AxisType(ChannelTag) """ - return {"hidden_states": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag), 2: AxisType(ChannelTag)})} + # return {"hidden_states": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag), 2: AxisType(ChannelTag)})} + return {"hidden_states": NeuralType(ChannelType(), ('B', 'T', 'D'))} def __init__( self, @@ -149,48 +136,24 @@ class TransformerDecoderNM(TrainableNM): @property def input_ports(self): """Returns definitions of module input ports. - - input_ids_tgt: - 0: AxisType(BatchTag) - - 1: AxisType(TimeTag) - - hidden_states_src: - 0: AxisType(BatchTag) - - 1: AxisType(TimeTag) - - 2: AxisType(ChannelTag) - - input_mask_src: - 0: AxisType(BatchTag) - - 1: AxisType(TimeTag) - - input_mask_tgt: - 0: AxisType(BatchTag) - - 1: AxisType(TimeTag) """ return { - "input_ids_tgt": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}), - "hidden_states_src": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag), 2: AxisType(ChannelTag)}), - "input_mask_src": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}), - "input_mask_tgt": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}), + # "input_ids_tgt": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}), + # "hidden_states_src": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag), 2: AxisType(ChannelTag)}), + # "input_mask_src": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}), + # "input_mask_tgt": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}), + "input_ids_tgt": NeuralType(ChannelType(), ('B', 'T')), + "hidden_states_src": NeuralType(ChannelType(), ('B', 'T', 'D')), + "input_mask_src": NeuralType(ChannelType(), ('B', 'T')), + "input_mask_tgt": NeuralType(ChannelType(), ('B', 'T')), } @property def output_ports(self): """Returns definitions of module output ports. - - hidden_states: - 0: AxisType(BatchTag) - - 1: AxisType(TimeTag) - - 2: AxisType(ChannelTag) """ - return {"hidden_states": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag), 2: AxisType(ChannelTag)})} + # return {"hidden_states": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag), 2: AxisType(ChannelTag)})} + return {"hidden_states": NeuralType(ChannelType(), ('B', 'T', 'D'))} def __init__( self, @@ -255,24 +218,16 @@ class GreedyLanguageGeneratorNM(TrainableNM): @property def input_ports(self): """Returns definitions of module input ports. - - input_ids: - 0: AxisType(BatchTag) - - 1: AxisType(TimeTag) """ - return {"input_ids": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)})} + # return {"input_ids": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)})} + return {"input_ids": NeuralType(ChannelType(), ('B', 'T'))} @property def output_ports(self): """Returns definitions of module output ports. - - output_ids: - 0: AxisType(BatchTag) - - 1: AxisType(TimeTag) """ - return {"output_ids": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)})} + # return {"output_ids": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)})} + return {"output_ids": NeuralType(ChannelType(), ('B', 'T'))} def __init__(self, decoder, log_softmax, max_seq_length, pad_token, bos_token, eos_token, batch_size=1): super().__init__() @@ -319,34 +274,20 @@ class BeamSearchTranslatorNM(TrainableNM): @property def input_ports(self): """Returns definitions of module input ports. - - hidden_states_src: - 0: AxisType(BatchTag) - - 1: AxisType(TimeTag) - - 2: AxisType(ChannelTag) - - input_mask_src: - 0: AxisType(BatchTag) - - 1: AxisType(TimeTag) """ return { - "hidden_states_src": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag), 2: AxisType(ChannelTag)}), - "input_mask_src": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}), + # "hidden_states_src": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag), 2: AxisType(ChannelTag)}), + # "input_mask_src": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}), + "hidden_states_src": NeuralType(ChannelType(), ('B', 'T', 'C')), + "input_mask_src": NeuralType(ChannelType(), ('B', 'T')), } @property def output_ports(self): """Returns definitions of module output ports. - - output_ids: - 0: AxisType(BatchTag) - - 1: AxisType(TimeTag) """ - return {"output_ids": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)})} + # return {"output_ids": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)})} + return {"output_ids": NeuralType(ChannelType(), ('B', 'T'))} @property def num_weights(self): diff --git a/nemo/collections/nlp/nm/trainables/dialogue_state_tracking/state_tracking_trade_nm.py b/nemo/collections/nlp/nm/trainables/dialogue_state_tracking/state_tracking_trade_nm.py index 5a2aa466afe1..9c6dd5c0d2cd 100644 --- a/nemo/collections/nlp/nm/trainables/dialogue_state_tracking/state_tracking_trade_nm.py +++ b/nemo/collections/nlp/nm/trainables/dialogue_state_tracking/state_tracking_trade_nm.py @@ -45,7 +45,7 @@ from torch import nn as nn from nemo.backends.pytorch.nm import TrainableNM -from nemo.core.neural_types import AxisType, BatchTag, ChannelTag, NeuralType, TimeTag +from nemo.core.neural_types import NeuralType, ChannelType, LengthsType __all__ = ['TRADEGenerator'] @@ -56,41 +56,28 @@ def input_ports(self): """Returns definitions of module input ports. encoder_hidden: hidden states of the encoder - 0: AxisType(BatchTag) - - 1: AxisType(TimeTag) - - 2: AxisType(ChannelTag) encoder_outputs: outputs of the encoder - 0: AxisType(BatchTag) - - 1: AxisType(TimeTag) - - 2: AxisType(ChannelTag) input_lens: lengths of the input sequences to encoder - 0: AxisType(BatchTag) src_ids: input sequences to encoder - 0: AxisType(BatchTag) - - 1: AxisType(TimeTag) targets: targets for the output of the generator - 0: AxisType(BatchTag) - - 1: AxisType(BatchTag) - - 2: AxisType(TimeTag) """ return { - 'encoder_hidden': NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag), 2: AxisType(ChannelTag)}), - 'encoder_outputs': NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag), 2: AxisType(ChannelTag)}), - 'input_lens': NeuralType({0: AxisType(BatchTag)}), - 'src_ids': NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}), - 'targets': NeuralType({0: AxisType(BatchTag), 1: AxisType(ChannelTag), 2: AxisType(TimeTag)}), + # 'encoder_hidden': NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag), 2: AxisType(ChannelTag)}), + # 'encoder_outputs': NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag), 2: AxisType(ChannelTag)}), + # 'input_lens': NeuralType({0: AxisType(BatchTag)}), + # 'src_ids': NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}), + # 'targets': NeuralType({0: AxisType(BatchTag), 1: AxisType(ChannelTag), 2: AxisType(TimeTag)}), + 'encoder_hidden': NeuralType(ChannelType(), ('B', 'T', 'C')), + 'encoder_outputs': NeuralType(ChannelType(), ('B', 'T', 'C')), + 'input_lens': NeuralType(LengthsType(), tuple('B')), + 'src_ids': NeuralType(ChannelType(), ('B', 'T')), + 'targets': NeuralType(ChannelType(), ('B', 'D', 'T')), + } @property @@ -98,27 +85,19 @@ def output_ports(self): """Returns definitions of module output ports. point_outputs: outputs of the generator - 0: AxisType(BatchTag) - - 1: AxisType(TimeTag) - - 2: AxisType(ChannelTag) - - 3: AxisType(ChannelTag) gate_outputs: outputs of gating heads - 0: AxisType(BatchTag) - - 1: AxisType(ChannelTag) - - 2: AxisType(ChannelTag) """ + # return { + # 'point_outputs': NeuralType( + # {0: AxisType(BatchTag), 1: AxisType(TimeTag), 2: AxisType(ChannelTag), 3: AxisType(ChannelTag)} + # ), + # 'gate_outputs': NeuralType({0: AxisType(BatchTag), 1: AxisType(ChannelTag), 2: AxisType(ChannelTag)}), + # } return { - 'point_outputs': NeuralType( - {0: AxisType(BatchTag), 1: AxisType(TimeTag), 2: AxisType(ChannelTag), 3: AxisType(ChannelTag)} - ), - 'gate_outputs': NeuralType({0: AxisType(BatchTag), 1: AxisType(ChannelTag), 2: AxisType(ChannelTag)}), + 'point_outputs': NeuralType(ChannelType(), ('B', 'T', 'D', 'D')), + 'gate_outputs': NeuralType(ChannelType(), ('B', 'D', 'D')) } def __init__(self, vocab, embeddings, hid_size, dropout, slots, nb_gate, teacher_forcing=0.5): diff --git a/nemo/collections/nlp/nm/trainables/joint_intent_slot/joint_intent_slot_nm.py b/nemo/collections/nlp/nm/trainables/joint_intent_slot/joint_intent_slot_nm.py index b8707646f746..bf5c88263b48 100644 --- a/nemo/collections/nlp/nm/trainables/joint_intent_slot/joint_intent_slot_nm.py +++ b/nemo/collections/nlp/nm/trainables/joint_intent_slot/joint_intent_slot_nm.py @@ -18,7 +18,7 @@ from nemo.backends.pytorch import MultiLayerPerceptron, TrainableNM from nemo.collections.nlp.nm.trainables.common.transformer.transformer_utils import transformer_weights_init -from nemo.core import AxisType, BatchTag, ChannelTag, NeuralType, TimeTag +from nemo.core import NeuralType, ChannelType, LogitsType __all__ = ['JointIntentSlotClassifier'] @@ -39,15 +39,9 @@ class JointIntentSlotClassifier(TrainableNM): @property def input_ports(self): """Returns definitions of module input ports. - - hidden_states: - 0: AxisType(BatchTag) - - 1: AxisType(TimeTag) - - 2: AxisType(ChannelTag) """ - return {"hidden_states": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag), 2: AxisType(ChannelTag)})} + # return {"hidden_states": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag), 2: AxisType(ChannelTag)})} + return {"hidden_states": NeuralType(ChannelType(), ('B', 'T', 'C'))} @property def output_ports(self): @@ -66,8 +60,10 @@ def output_ports(self): 2: AxisType(ChannelTag) """ return { - "intent_logits": NeuralType({0: AxisType(BatchTag), 1: AxisType(ChannelTag)}), - "slot_logits": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag), 2: AxisType(ChannelTag)}), + # "intent_logits": NeuralType({0: AxisType(BatchTag), 1: AxisType(ChannelTag)}), + # "slot_logits": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag), 2: AxisType(ChannelTag)}), + "intent_logits": NeuralType(LogitsType(), ('B', 'D')), + "slot_logits": NeuralType(LogitsType(), ('B', 'D')) } def __init__(self, hidden_size, num_intents, num_slots, dropout=0.0, use_transformer_pretrained=True, **kwargs): diff --git a/nemo/core/neural_types/elements.py b/nemo/core/neural_types/elements.py index 37f35867a159..3508ea224337 100644 --- a/nemo/core/neural_types/elements.py +++ b/nemo/core/neural_types/elements.py @@ -28,6 +28,7 @@ 'LabelsType', 'LossType', 'RegressionValuesType', + 'CategoricalValuesType', 'PredictionsType', 'LogprobsType', 'LengthsType', @@ -171,3 +172,7 @@ def __str__(self): class RegressionValuesType(PredictionsType): def __str__(self): return "regression values type" + +class CategoricalValuesType(PredictionsType): + def __str__(self): + return "regression values type" diff --git a/tests/core/test_deploy_export.py b/tests/core/test_deploy_export.py index 3d29c166ea6b..04eed27d48e4 100644 --- a/tests/core/test_deploy_export.py +++ b/tests/core/test_deploy_export.py @@ -1,181 +1,181 @@ -# # ! /usr/bin/python -# # -*- coding: utf-8 -*- -# -# # Copyright 2019 NVIDIA. All Rights Reserved. -# # -# # Licensed under the Apache License, Version 2.0 (the "License"); -# # you may not use this file except in compliance with the License. -# # You may obtain a copy of the License at -# # -# # http://www.apache.org/licenses/LICENSE-2.0 -# # -# # Unless required by applicable law or agreed to in writing, software -# # distributed under the License is distributed on an "AS IS" BASIS, -# # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# # See the License for the specific language governing permissions and -# # limitations under the License. -# # ============================================================================= -# -# import os -# from pathlib import Path -# -# # git clone git@github.com:microsoft/onnxruntime.git -# # cd onnxruntime -# # ./build.sh --update --build --config RelWithDebInfo --build_shared_lib --parallel --use_cuda \ -# # --cudnn_home /usr/lib/x86_64-linux-gnu --cuda_home /usr/local/cuda --enable_pybind --build_wheel -# # pip install --upgrade ./build/Linux/RelWithDebInfo/dist/onnxruntime_gpu-1.1.0-cp37-cp37m-linux_x86_64.whl -# import onnxruntime as ort -# import torch -# from ruamel.yaml import YAML -# -# import nemo -# import nemo.collections.asr as nemo_asr -# import nemo.collections.nlp as nemo_nlp -# import nemo.collections.nlp.nm.trainables.common.token_classification_nm -# from tests.common_setup import NeMoUnitTest -# -# -# class TestDeployExport(NeMoUnitTest): -# def setUp(self): -# """ Setups neural factory so it will use GPU instead of CPU. """ -# NeMoUnitTest.setUp(self) -# -# # Perform computations on GPU. -# self.nf._placement = nemo.core.DeviceType.GPU -# -# def __test_export_route(self, module, out_name, mode, input_example=None): -# out = Path(out_name) -# if out.exists(): -# os.remove(out) -# -# outputs_fwd = ( -# (module.forward(*input_example) if isinstance(input_example, tuple) else module.forward(input_example)) -# if input_example is not None -# else None -# ) -# self.nf.deployment_export( -# module=module, output=out_name, input_example=input_example, d_format=mode, output_example=outputs_fwd -# ) -# -# tol = 2.0e-3 -# self.assertTrue(out.exists()) -# if mode == nemo.core.DeploymentFormat.ONNX: -# # Must recompute beause *module* might be different now -# outputs_fwd = ( -# module.forward(*input_example) if isinstance(input_example, tuple) else module.forward(input_example) -# ) -# sess_options = ort.SessionOptions() -# sess_options.graph_optimization_level = ort.GraphOptimizationLevel.ORT_ENABLE_EXTENDED -# ort_session = ort.InferenceSession(out_name, sess_options) -# inputs = dict() -# input_names = list(module.input_ports) -# for i in range(len(input_names)): -# input_name = ( -# "encoded_lengths" -# if type(module).__name__ == "JasperEncoder" and input_names[i] == "length" -# else input_names[i] -# ) -# inputs[input_name] = ( -# input_example[i].cpu().numpy() if isinstance(input_example, tuple) else input_example.cpu().numpy() -# ) -# outputs_scr = ort_session.run(None, inputs) -# outputs_scr = torch.from_numpy(outputs_scr[0]).cuda() -# outputs_fwd = outputs_fwd[0] if isinstance(outputs_fwd, tuple) else outputs_fwd -# tol = 5.0e-4 -# elif mode == nemo.core.DeploymentFormat.TORCHSCRIPT: -# scr = torch.jit.load(out_name) -# if isinstance(module, nemo.backends.pytorch.tutorials.TaylorNet): -# input_example = torch.randn(4, 1).cuda() -# outputs_fwd = module.forward(input_example) -# outputs_scr = ( -# scr.forward(*input_example) if isinstance(input_example, tuple) else scr.forward(input_example) -# ) -# elif mode == nemo.core.DeploymentFormat.PYTORCH: -# module.load_state_dict(torch.load(out_name)) -# module.eval() -# outputs_scr = module.forward(*input_example) -# -# self.assertLess((outputs_scr - outputs_fwd).norm(p=2), tol) -# -# if out.exists(): -# os.remove(out) -# -# def test_simple_module_export(self): -# simplest_module = nemo.backends.pytorch.tutorials.TaylorNet(dim=4) -# self.__test_export_route( -# module=simplest_module, -# out_name="simple.pt", -# mode=nemo.core.DeploymentFormat.TORCHSCRIPT, -# input_example=None, -# ) -# -# def test_TokenClassifier_module_export(self): -# t_class = nemo.collections.nlp.nm.trainables.common.token_classification_nm.TokenClassifier( -# hidden_size=512, num_classes=16, use_transformer_pretrained=False -# ) -# self.__test_export_route( -# module=t_class, -# out_name="t_class.pt", -# mode=nemo.core.DeploymentFormat.TORCHSCRIPT, -# input_example=torch.randn(16, 16, 512).cuda(), -# ) -# -# def test_TokenClassifier_module_onnx_export(self): -# t_class = nemo.collections.nlp.nm.trainables.common.token_classification_nm.TokenClassifier( -# hidden_size=512, num_classes=16, use_transformer_pretrained=False -# ) -# self.__test_export_route( -# module=t_class, -# out_name="t_class.onnx", -# mode=nemo.core.DeploymentFormat.ONNX, -# input_example=torch.randn(16, 16, 512).cuda(), -# ) -# -# def test_jasper_decoder_export_ts(self): -# j_decoder = nemo_asr.JasperDecoderForCTC(feat_in=1024, num_classes=33) -# self.__test_export_route( -# module=j_decoder, -# out_name="j_decoder.ts", -# mode=nemo.core.DeploymentFormat.TORCHSCRIPT, -# input_example=torch.randn(34, 1024, 1).cuda(), -# ) -# -# def test_hf_bert_ts(self): -# bert = nemo.collections.nlp.nm.trainables.common.huggingface.BERT(pretrained_model_name="bert-base-uncased") -# input_example = ( -# torch.randint(low=0, high=16, size=(2, 16)).cuda(), -# torch.randint(low=0, high=1, size=(2, 16)).cuda(), -# torch.randint(low=0, high=1, size=(2, 16)).cuda(), -# ) -# self.__test_export_route( -# module=bert, out_name="bert.ts", mode=nemo.core.DeploymentFormat.TORCHSCRIPT, input_example=input_example -# ) -# -# def test_hf_bert_pt(self): -# bert = nemo.collections.nlp.nm.trainables.common.huggingface.BERT(pretrained_model_name="bert-base-uncased") -# input_example = ( -# torch.randint(low=0, high=16, size=(2, 16)).cuda(), -# torch.randint(low=0, high=1, size=(2, 16)).cuda(), -# torch.randint(low=0, high=1, size=(2, 16)).cuda(), -# ) -# self.__test_export_route( -# module=bert, out_name="bert.pt", mode=nemo.core.DeploymentFormat.PYTORCH, input_example=input_example, -# ) -# -# def test_jasper_encoder_to_onnx(self): -# with open("tests/data/jasper_smaller.yaml") as file: -# yaml = YAML(typ="safe") -# jasper_model_definition = yaml.load(file) -# -# jasper_encoder = nemo_asr.JasperEncoder( -# conv_mask=False, -# feat_in=jasper_model_definition['AudioToMelSpectrogramPreprocessor']['features'], -# **jasper_model_definition['JasperEncoder'] -# ) -# -# self.__test_export_route( -# module=jasper_encoder, -# out_name="jasper_encoder.onnx", -# mode=nemo.core.DeploymentFormat.ONNX, -# input_example=(torch.randn(16, 64, 256).cuda(), torch.randn(256).cuda()), -# ) +# ! /usr/bin/python +# -*- coding: utf-8 -*- + +# Copyright 2019 NVIDIA. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================= + +import os +from pathlib import Path + +# git clone git@github.com:microsoft/onnxruntime.git +# cd onnxruntime +# ./build.sh --update --build --config RelWithDebInfo --build_shared_lib --parallel --use_cuda \ +# --cudnn_home /usr/lib/x86_64-linux-gnu --cuda_home /usr/local/cuda --enable_pybind --build_wheel +# pip install --upgrade ./build/Linux/RelWithDebInfo/dist/onnxruntime_gpu-1.1.0-cp37-cp37m-linux_x86_64.whl +import onnxruntime as ort +import torch +from ruamel.yaml import YAML + +import nemo +import nemo.collections.asr as nemo_asr +import nemo.collections.nlp as nemo_nlp +import nemo.collections.nlp.nm.trainables.common.token_classification_nm +from tests.common_setup import NeMoUnitTest + + +class TestDeployExport(NeMoUnitTest): + def setUp(self): + """ Setups neural factory so it will use GPU instead of CPU. """ + NeMoUnitTest.setUp(self) + + # Perform computations on GPU. + self.nf._placement = nemo.core.DeviceType.GPU + + def __test_export_route(self, module, out_name, mode, input_example=None): + out = Path(out_name) + if out.exists(): + os.remove(out) + + outputs_fwd = ( + (module.forward(*input_example) if isinstance(input_example, tuple) else module.forward(input_example)) + if input_example is not None + else None + ) + self.nf.deployment_export( + module=module, output=out_name, input_example=input_example, d_format=mode, output_example=outputs_fwd + ) + + tol = 2.0e-3 + self.assertTrue(out.exists()) + if mode == nemo.core.DeploymentFormat.ONNX: + # Must recompute beause *module* might be different now + outputs_fwd = ( + module.forward(*input_example) if isinstance(input_example, tuple) else module.forward(input_example) + ) + sess_options = ort.SessionOptions() + sess_options.graph_optimization_level = ort.GraphOptimizationLevel.ORT_ENABLE_EXTENDED + ort_session = ort.InferenceSession(out_name, sess_options) + inputs = dict() + input_names = list(module.input_ports) + for i in range(len(input_names)): + input_name = ( + "encoded_lengths" + if type(module).__name__ == "JasperEncoder" and input_names[i] == "length" + else input_names[i] + ) + inputs[input_name] = ( + input_example[i].cpu().numpy() if isinstance(input_example, tuple) else input_example.cpu().numpy() + ) + outputs_scr = ort_session.run(None, inputs) + outputs_scr = torch.from_numpy(outputs_scr[0]).cuda() + outputs_fwd = outputs_fwd[0] if isinstance(outputs_fwd, tuple) else outputs_fwd + tol = 5.0e-4 + elif mode == nemo.core.DeploymentFormat.TORCHSCRIPT: + scr = torch.jit.load(out_name) + if isinstance(module, nemo.backends.pytorch.tutorials.TaylorNet): + input_example = torch.randn(4, 1).cuda() + outputs_fwd = module.forward(input_example) + outputs_scr = ( + scr.forward(*input_example) if isinstance(input_example, tuple) else scr.forward(input_example) + ) + elif mode == nemo.core.DeploymentFormat.PYTORCH: + module.load_state_dict(torch.load(out_name)) + module.eval() + outputs_scr = module.forward(*input_example) + + self.assertLess((outputs_scr - outputs_fwd).norm(p=2), tol) + + if out.exists(): + os.remove(out) + + def test_simple_module_export(self): + simplest_module = nemo.backends.pytorch.tutorials.TaylorNet(dim=4) + self.__test_export_route( + module=simplest_module, + out_name="simple.pt", + mode=nemo.core.DeploymentFormat.TORCHSCRIPT, + input_example=None, + ) + + def test_TokenClassifier_module_export(self): + t_class = nemo.collections.nlp.nm.trainables.common.token_classification_nm.TokenClassifier( + hidden_size=512, num_classes=16, use_transformer_pretrained=False + ) + self.__test_export_route( + module=t_class, + out_name="t_class.pt", + mode=nemo.core.DeploymentFormat.TORCHSCRIPT, + input_example=torch.randn(16, 16, 512).cuda(), + ) + + def test_TokenClassifier_module_onnx_export(self): + t_class = nemo.collections.nlp.nm.trainables.common.token_classification_nm.TokenClassifier( + hidden_size=512, num_classes=16, use_transformer_pretrained=False + ) + self.__test_export_route( + module=t_class, + out_name="t_class.onnx", + mode=nemo.core.DeploymentFormat.ONNX, + input_example=torch.randn(16, 16, 512).cuda(), + ) + + def test_jasper_decoder_export_ts(self): + j_decoder = nemo_asr.JasperDecoderForCTC(feat_in=1024, num_classes=33) + self.__test_export_route( + module=j_decoder, + out_name="j_decoder.ts", + mode=nemo.core.DeploymentFormat.TORCHSCRIPT, + input_example=torch.randn(34, 1024, 1).cuda(), + ) + + def test_hf_bert_ts(self): + bert = nemo.collections.nlp.nm.trainables.common.huggingface.BERT(pretrained_model_name="bert-base-uncased") + input_example = ( + torch.randint(low=0, high=16, size=(2, 16)).cuda(), + torch.randint(low=0, high=1, size=(2, 16)).cuda(), + torch.randint(low=0, high=1, size=(2, 16)).cuda(), + ) + self.__test_export_route( + module=bert, out_name="bert.ts", mode=nemo.core.DeploymentFormat.TORCHSCRIPT, input_example=input_example + ) + + def test_hf_bert_pt(self): + bert = nemo.collections.nlp.nm.trainables.common.huggingface.BERT(pretrained_model_name="bert-base-uncased") + input_example = ( + torch.randint(low=0, high=16, size=(2, 16)).cuda(), + torch.randint(low=0, high=1, size=(2, 16)).cuda(), + torch.randint(low=0, high=1, size=(2, 16)).cuda(), + ) + self.__test_export_route( + module=bert, out_name="bert.pt", mode=nemo.core.DeploymentFormat.PYTORCH, input_example=input_example, + ) + + def test_jasper_encoder_to_onnx(self): + with open("tests/data/jasper_smaller.yaml") as file: + yaml = YAML(typ="safe") + jasper_model_definition = yaml.load(file) + + jasper_encoder = nemo_asr.JasperEncoder( + conv_mask=False, + feat_in=jasper_model_definition['AudioToMelSpectrogramPreprocessor']['features'], + **jasper_model_definition['JasperEncoder'] + ) + + self.__test_export_route( + module=jasper_encoder, + out_name="jasper_encoder.onnx", + mode=nemo.core.DeploymentFormat.ONNX, + input_example=(torch.randn(16, 64, 256).cuda(), torch.randn(256).cuda()), + ) From 8e3b2c2c982dbad16674f9c214a14d6a3ef666d7 Mon Sep 17 00:00:00 2001 From: Oleksii Kuchaiev Date: Thu, 6 Feb 2020 16:55:14 -0800 Subject: [PATCH 10/30] fix style Signed-off-by: Oleksii Kuchaiev --- .../nlp/nm/data_layers/glue_benchmark_datalayer.py | 2 +- .../nlp/nm/data_layers/joint_intent_slot_datalayer.py | 4 ++-- nemo/collections/nlp/nm/data_layers/lm_bert_datalayer.py | 2 +- .../nlp/nm/data_layers/lm_transformer_datalayer.py | 2 +- .../nlp/nm/data_layers/machine_translation_datalayer.py | 2 +- .../nm/data_layers/punctuation_capitalization_datalayer.py | 2 +- nemo/collections/nlp/nm/data_layers/qa_squad_datalayer.py | 3 +-- .../nlp/nm/data_layers/state_tracking_trade_datalayer.py | 2 +- .../nlp/nm/data_layers/text_classification_datalayer.py | 2 +- .../nlp/nm/data_layers/token_classification_datalayer.py | 2 +- nemo/collections/nlp/nm/losses/aggregator_loss.py | 2 +- nemo/collections/nlp/nm/losses/joint_intent_slot_loss.py | 4 ++-- .../nlp/nm/losses/masked_language_modeling_loss.py | 2 +- .../nlp/nm/losses/padded_smoothed_cross_entropy_loss.py | 2 +- nemo/collections/nlp/nm/losses/qa_squad_loss.py | 2 +- nemo/collections/nlp/nm/losses/state_tracking_trade_loss.py | 2 +- nemo/collections/nlp/nm/losses/token_classification_loss.py | 2 +- .../nlp/nm/trainables/common/huggingface/bert_nm.py | 2 +- .../nlp/nm/trainables/common/sequence_classification_nm.py | 2 +- .../nlp/nm/trainables/common/sequence_regression_nm.py | 2 +- .../nlp/nm/trainables/common/token_classification_nm.py | 2 +- .../nlp/nm/trainables/common/transformer/transformer_nm.py | 2 +- .../dialogue_state_tracking/state_tracking_trade_nm.py | 5 ++--- .../nm/trainables/joint_intent_slot/joint_intent_slot_nm.py | 4 ++-- nemo/core/neural_types/elements.py | 1 + 25 files changed, 29 insertions(+), 30 deletions(-) diff --git a/nemo/collections/nlp/nm/data_layers/glue_benchmark_datalayer.py b/nemo/collections/nlp/nm/data_layers/glue_benchmark_datalayer.py index 56dea4219240..d8426e9b425d 100644 --- a/nemo/collections/nlp/nm/data_layers/glue_benchmark_datalayer.py +++ b/nemo/collections/nlp/nm/data_layers/glue_benchmark_datalayer.py @@ -16,7 +16,7 @@ from nemo.collections.nlp.data import GLUEDataset from nemo.collections.nlp.nm.data_layers.text_datalayer import TextDataLayer -from nemo.core import NeuralType, RegressionValuesType, ChannelType, CategoricalValuesType +from nemo.core import CategoricalValuesType, ChannelType, NeuralType, RegressionValuesType __all__ = ['GlueClassificationDataLayer', 'GlueRegressionDataLayer'] diff --git a/nemo/collections/nlp/nm/data_layers/joint_intent_slot_datalayer.py b/nemo/collections/nlp/nm/data_layers/joint_intent_slot_datalayer.py index bb95726a2e78..b5b04f6ab299 100644 --- a/nemo/collections/nlp/nm/data_layers/joint_intent_slot_datalayer.py +++ b/nemo/collections/nlp/nm/data_layers/joint_intent_slot_datalayer.py @@ -16,7 +16,7 @@ from nemo.collections.nlp.data import BertJointIntentSlotDataset, BertJointIntentSlotInferDataset from nemo.collections.nlp.nm.data_layers.text_datalayer import TextDataLayer -from nemo.core import NeuralType, ChannelType +from nemo.core import ChannelType, NeuralType __all__ = ['BertJointIntentSlotDataLayer', 'BertJointIntentSlotInferDataLayer'] @@ -122,7 +122,7 @@ def output_ports(self): "input_type_ids": NeuralType(ChannelType(), ('B', 'T')), "input_mask": NeuralType(ChannelType(), ('B', 'T')), "loss_mask": NeuralType(ChannelType(), ('B', 'T')), - "subtokens_mask": NeuralType(ChannelType(), ('B', 'T')) + "subtokens_mask": NeuralType(ChannelType(), ('B', 'T')), } def __init__(self, queries, tokenizer, max_seq_length, batch_size=1, dataset_type=BertJointIntentSlotInferDataset): diff --git a/nemo/collections/nlp/nm/data_layers/lm_bert_datalayer.py b/nemo/collections/nlp/nm/data_layers/lm_bert_datalayer.py index e96be1a1e788..c5e02c7ca38f 100644 --- a/nemo/collections/nlp/nm/data_layers/lm_bert_datalayer.py +++ b/nemo/collections/nlp/nm/data_layers/lm_bert_datalayer.py @@ -25,7 +25,7 @@ from nemo.backends.pytorch import DataLayerNM from nemo.collections.nlp.data import BertPretrainingDataset, BertPretrainingPreprocessedDataset from nemo.collections.nlp.nm.data_layers.text_datalayer import TextDataLayer -from nemo.core import NeuralType, ChannelType, LabelsType +from nemo.core import ChannelType, LabelsType, NeuralType __all__ = ['BertPretrainingDataLayer', 'BertPretrainingPreprocessedDataLayer'] diff --git a/nemo/collections/nlp/nm/data_layers/lm_transformer_datalayer.py b/nemo/collections/nlp/nm/data_layers/lm_transformer_datalayer.py index 266fb4f2ffc7..c87fbd9c500a 100644 --- a/nemo/collections/nlp/nm/data_layers/lm_transformer_datalayer.py +++ b/nemo/collections/nlp/nm/data_layers/lm_transformer_datalayer.py @@ -16,7 +16,7 @@ from nemo.collections.nlp.data import LanguageModelingDataset from nemo.collections.nlp.nm.data_layers.text_datalayer import TextDataLayer -from nemo.core import NeuralType, ChannelType, LabelsType +from nemo.core import ChannelType, LabelsType, NeuralType __all__ = ['LanguageModelingDataLayer'] diff --git a/nemo/collections/nlp/nm/data_layers/machine_translation_datalayer.py b/nemo/collections/nlp/nm/data_layers/machine_translation_datalayer.py index 7f13898ea1fc..6fca7e3790c2 100644 --- a/nemo/collections/nlp/nm/data_layers/machine_translation_datalayer.py +++ b/nemo/collections/nlp/nm/data_layers/machine_translation_datalayer.py @@ -20,7 +20,7 @@ import nemo from nemo.collections.nlp.data import TranslationDataset from nemo.collections.nlp.nm.data_layers.text_datalayer import TextDataLayer -from nemo.core import NeuralType, ChannelType, LabelsType +from nemo.core import ChannelType, LabelsType, NeuralType __all__ = ['TranslationDataLayer'] diff --git a/nemo/collections/nlp/nm/data_layers/punctuation_capitalization_datalayer.py b/nemo/collections/nlp/nm/data_layers/punctuation_capitalization_datalayer.py index 7b4fa9b77133..84d281e78737 100644 --- a/nemo/collections/nlp/nm/data_layers/punctuation_capitalization_datalayer.py +++ b/nemo/collections/nlp/nm/data_layers/punctuation_capitalization_datalayer.py @@ -16,7 +16,7 @@ from nemo.collections.nlp.data import BertPunctuationCapitalizationDataset from nemo.collections.nlp.nm.data_layers.text_datalayer import TextDataLayer -from nemo.core import NeuralType, ChannelType, LabelsType +from nemo.core import ChannelType, LabelsType, NeuralType __all__ = ['PunctuationCapitalizationDataLayer'] diff --git a/nemo/collections/nlp/nm/data_layers/qa_squad_datalayer.py b/nemo/collections/nlp/nm/data_layers/qa_squad_datalayer.py index 245e05eb309a..1f0b32ca53f8 100644 --- a/nemo/collections/nlp/nm/data_layers/qa_squad_datalayer.py +++ b/nemo/collections/nlp/nm/data_layers/qa_squad_datalayer.py @@ -16,7 +16,7 @@ from nemo.collections.nlp.data import SquadDataset from nemo.collections.nlp.nm.data_layers.text_datalayer import TextDataLayer -from nemo.core import NeuralType, ChannelType, LabelsType +from nemo.core import ChannelType, LabelsType, NeuralType __all__ = ['BertQuestionAnsweringDataLayer'] @@ -62,7 +62,6 @@ def output_ports(self): "start_positions": NeuralType(ChannelType(), tuple('B')), "end_positions": NeuralType(ChannelType(), tuple('B')), "unique_ids": NeuralType(ChannelType(), tuple('B')), - } def __init__( diff --git a/nemo/collections/nlp/nm/data_layers/state_tracking_trade_datalayer.py b/nemo/collections/nlp/nm/data_layers/state_tracking_trade_datalayer.py index 6845c47de4bc..da51068b8519 100644 --- a/nemo/collections/nlp/nm/data_layers/state_tracking_trade_datalayer.py +++ b/nemo/collections/nlp/nm/data_layers/state_tracking_trade_datalayer.py @@ -43,7 +43,7 @@ import nemo from nemo.collections.nlp.data.datasets import MultiWOZDataset from nemo.collections.nlp.nm.data_layers.text_datalayer import TextDataLayer -from nemo.core.neural_types import NeuralType, ChannelType, LabelsType +from nemo.core.neural_types import ChannelType, LabelsType, NeuralType __all__ = ['MultiWOZDataLayer'] diff --git a/nemo/collections/nlp/nm/data_layers/text_classification_datalayer.py b/nemo/collections/nlp/nm/data_layers/text_classification_datalayer.py index f1f408580069..935e57fe53bf 100644 --- a/nemo/collections/nlp/nm/data_layers/text_classification_datalayer.py +++ b/nemo/collections/nlp/nm/data_layers/text_classification_datalayer.py @@ -16,7 +16,7 @@ from nemo.collections.nlp.data import BertTextClassificationDataset from nemo.collections.nlp.nm.data_layers.text_datalayer import TextDataLayer -from nemo.core import NeuralType, ChannelType, LabelsType +from nemo.core import ChannelType, LabelsType, NeuralType __all__ = ['BertSentenceClassificationDataLayer'] diff --git a/nemo/collections/nlp/nm/data_layers/token_classification_datalayer.py b/nemo/collections/nlp/nm/data_layers/token_classification_datalayer.py index 3744f3b8682b..f4ad2e613da5 100644 --- a/nemo/collections/nlp/nm/data_layers/token_classification_datalayer.py +++ b/nemo/collections/nlp/nm/data_layers/token_classification_datalayer.py @@ -16,7 +16,7 @@ from nemo.collections.nlp.data import BertTokenClassificationDataset, BertTokenClassificationInferDataset from nemo.collections.nlp.nm.data_layers.text_datalayer import TextDataLayer -from nemo.core import NeuralType, ChannelType, LabelsType +from nemo.core import ChannelType, LabelsType, NeuralType __all__ = ['BertTokenClassificationDataLayer', 'BertTokenClassificationInferDataLayer'] diff --git a/nemo/collections/nlp/nm/losses/aggregator_loss.py b/nemo/collections/nlp/nm/losses/aggregator_loss.py index 0720d0409fd8..62b775f02927 100644 --- a/nemo/collections/nlp/nm/losses/aggregator_loss.py +++ b/nemo/collections/nlp/nm/losses/aggregator_loss.py @@ -15,7 +15,7 @@ # ============================================================================= from nemo.backends.pytorch import LossNM -from nemo.core import NeuralType, LossType +from nemo.core import LossType, NeuralType __all__ = ['LossAggregatorNM'] diff --git a/nemo/collections/nlp/nm/losses/joint_intent_slot_loss.py b/nemo/collections/nlp/nm/losses/joint_intent_slot_loss.py index fa5e2dc186d5..5ef07b54e347 100644 --- a/nemo/collections/nlp/nm/losses/joint_intent_slot_loss.py +++ b/nemo/collections/nlp/nm/losses/joint_intent_slot_loss.py @@ -18,7 +18,7 @@ from torch import nn from nemo.backends.pytorch import LossNM -from nemo.core import NeuralType, ChannelType, LossType, LogitsType +from nemo.core import ChannelType, LogitsType, LossType, NeuralType __all__ = ['JointIntentSlotLoss'] @@ -70,7 +70,7 @@ def output_ports(self): loss: NeuralType(None) """ - #return {"loss": NeuralType(None)} + # return {"loss": NeuralType(None)} return {"loss": NeuralType(LossType())} def __init__( diff --git a/nemo/collections/nlp/nm/losses/masked_language_modeling_loss.py b/nemo/collections/nlp/nm/losses/masked_language_modeling_loss.py index 485d49345b9b..6e157a40c511 100644 --- a/nemo/collections/nlp/nm/losses/masked_language_modeling_loss.py +++ b/nemo/collections/nlp/nm/losses/masked_language_modeling_loss.py @@ -16,7 +16,7 @@ from nemo.backends.pytorch import LossNM from nemo.collections.nlp.nm.losses.smoothed_cross_entropy_loss import SmoothedCrossEntropyLoss -from nemo.core import NeuralType, ChannelType, LossType, LogitsType +from nemo.core import ChannelType, LogitsType, LossType, NeuralType __all__ = ['MaskedLanguageModelingLossNM'] diff --git a/nemo/collections/nlp/nm/losses/padded_smoothed_cross_entropy_loss.py b/nemo/collections/nlp/nm/losses/padded_smoothed_cross_entropy_loss.py index 292cc77c932b..b56717af885d 100644 --- a/nemo/collections/nlp/nm/losses/padded_smoothed_cross_entropy_loss.py +++ b/nemo/collections/nlp/nm/losses/padded_smoothed_cross_entropy_loss.py @@ -17,7 +17,7 @@ from nemo.backends.pytorch import LossNM from nemo.collections.nlp.nm.losses.smoothed_cross_entropy_loss import SmoothedCrossEntropyLoss from nemo.collections.nlp.utils.common_nlp_utils import mask_padded_tokens -from nemo.core import AxisType, NeuralType, ChannelType, LogitsType, LossType +from nemo.core import AxisType, ChannelType, LogitsType, LossType, NeuralType __all__ = ['PaddedSmoothedCrossEntropyLossNM'] diff --git a/nemo/collections/nlp/nm/losses/qa_squad_loss.py b/nemo/collections/nlp/nm/losses/qa_squad_loss.py index acc53066c413..e1718592a058 100644 --- a/nemo/collections/nlp/nm/losses/qa_squad_loss.py +++ b/nemo/collections/nlp/nm/losses/qa_squad_loss.py @@ -17,7 +17,7 @@ from torch import nn from nemo.backends.pytorch import LossNM -from nemo.core import NeuralType, ChannelType, LogitsType, LossType +from nemo.core import ChannelType, LogitsType, LossType, NeuralType __all__ = ['QuestionAnsweringLoss'] diff --git a/nemo/collections/nlp/nm/losses/state_tracking_trade_loss.py b/nemo/collections/nlp/nm/losses/state_tracking_trade_loss.py index 8f13572479ce..662de183a183 100644 --- a/nemo/collections/nlp/nm/losses/state_tracking_trade_loss.py +++ b/nemo/collections/nlp/nm/losses/state_tracking_trade_loss.py @@ -39,7 +39,7 @@ import torch from nemo.backends.pytorch.nm import LossNM -from nemo.core.neural_types import NeuralType, ChannelType, LogitsType, LossType, LabelsType +from nemo.core.neural_types import ChannelType, LabelsType, LogitsType, LossType, NeuralType __all__ = ['TRADEMaskedCrossEntropy', 'CrossEntropyLoss3D'] diff --git a/nemo/collections/nlp/nm/losses/token_classification_loss.py b/nemo/collections/nlp/nm/losses/token_classification_loss.py index 411264296fe5..ccdbe5100d72 100644 --- a/nemo/collections/nlp/nm/losses/token_classification_loss.py +++ b/nemo/collections/nlp/nm/losses/token_classification_loss.py @@ -18,7 +18,7 @@ from torch import nn from nemo.backends.pytorch import LossNM -from nemo.core import NeuralType, ChannelType, LossType, LabelsType, LogitsType +from nemo.core import ChannelType, LabelsType, LogitsType, LossType, NeuralType __all__ = ['TokenClassificationLoss'] diff --git a/nemo/collections/nlp/nm/trainables/common/huggingface/bert_nm.py b/nemo/collections/nlp/nm/trainables/common/huggingface/bert_nm.py index 3d313e5b0dd7..40dd18c7bbe2 100644 --- a/nemo/collections/nlp/nm/trainables/common/huggingface/bert_nm.py +++ b/nemo/collections/nlp/nm/trainables/common/huggingface/bert_nm.py @@ -20,7 +20,7 @@ from nemo.backends.pytorch.nm import TrainableNM from nemo.core.neural_modules import PretrainedModelInfo -from nemo.core.neural_types import NeuralType, ChannelType +from nemo.core.neural_types import ChannelType, NeuralType __all__ = ['BERT'] diff --git a/nemo/collections/nlp/nm/trainables/common/sequence_classification_nm.py b/nemo/collections/nlp/nm/trainables/common/sequence_classification_nm.py index 9ca9aabdd6df..5781ecb0c064 100644 --- a/nemo/collections/nlp/nm/trainables/common/sequence_classification_nm.py +++ b/nemo/collections/nlp/nm/trainables/common/sequence_classification_nm.py @@ -18,7 +18,7 @@ from nemo.backends.pytorch import MultiLayerPerceptron, TrainableNM from nemo.collections.nlp.nm.trainables.common.transformer.transformer_utils import transformer_weights_init -from nemo.core import NeuralType, ChannelType, LogitsType +from nemo.core import ChannelType, LogitsType, NeuralType __all__ = ['SequenceClassifier'] diff --git a/nemo/collections/nlp/nm/trainables/common/sequence_regression_nm.py b/nemo/collections/nlp/nm/trainables/common/sequence_regression_nm.py index 1c16bc967330..9b7cc38ad4fd 100644 --- a/nemo/collections/nlp/nm/trainables/common/sequence_regression_nm.py +++ b/nemo/collections/nlp/nm/trainables/common/sequence_regression_nm.py @@ -18,7 +18,7 @@ from nemo.backends.pytorch import MultiLayerPerceptron, TrainableNM from nemo.collections.nlp.nm.trainables.common.transformer.transformer_utils import transformer_weights_init -from nemo.core import NeuralType, RegressionValuesType, ChannelType +from nemo.core import ChannelType, NeuralType, RegressionValuesType __all__ = ['SequenceRegression'] diff --git a/nemo/collections/nlp/nm/trainables/common/token_classification_nm.py b/nemo/collections/nlp/nm/trainables/common/token_classification_nm.py index 4fa8d0478e38..045fa55a7099 100644 --- a/nemo/collections/nlp/nm/trainables/common/token_classification_nm.py +++ b/nemo/collections/nlp/nm/trainables/common/token_classification_nm.py @@ -18,7 +18,7 @@ from nemo.backends.pytorch import MultiLayerPerceptron, TrainableNM from nemo.collections.nlp.nm.trainables.common.transformer.transformer_utils import gelu, transformer_weights_init -from nemo.core import NeuralType, ChannelType, LogitsType +from nemo.core import ChannelType, LogitsType, NeuralType __all__ = ['BertTokenClassifier', 'TokenClassifier'] diff --git a/nemo/collections/nlp/nm/trainables/common/transformer/transformer_nm.py b/nemo/collections/nlp/nm/trainables/common/transformer/transformer_nm.py index cffd22c39c94..0822d769d246 100644 --- a/nemo/collections/nlp/nm/trainables/common/transformer/transformer_nm.py +++ b/nemo/collections/nlp/nm/trainables/common/transformer/transformer_nm.py @@ -14,7 +14,7 @@ ) from nemo.collections.nlp.nm.trainables.common.transformer.transformer_modules import TransformerEmbedding from nemo.collections.nlp.nm.trainables.common.transformer.transformer_utils import transformer_weights_init -from nemo.core.neural_types import NeuralType, ChannelType +from nemo.core.neural_types import ChannelType, NeuralType __all__ = ['TransformerEncoderNM', 'TransformerDecoderNM', 'GreedyLanguageGeneratorNM', 'BeamSearchTranslatorNM'] diff --git a/nemo/collections/nlp/nm/trainables/dialogue_state_tracking/state_tracking_trade_nm.py b/nemo/collections/nlp/nm/trainables/dialogue_state_tracking/state_tracking_trade_nm.py index 9c6dd5c0d2cd..0ac416d4ab73 100644 --- a/nemo/collections/nlp/nm/trainables/dialogue_state_tracking/state_tracking_trade_nm.py +++ b/nemo/collections/nlp/nm/trainables/dialogue_state_tracking/state_tracking_trade_nm.py @@ -45,7 +45,7 @@ from torch import nn as nn from nemo.backends.pytorch.nm import TrainableNM -from nemo.core.neural_types import NeuralType, ChannelType, LengthsType +from nemo.core.neural_types import ChannelType, LengthsType, NeuralType __all__ = ['TRADEGenerator'] @@ -77,7 +77,6 @@ def input_ports(self): 'input_lens': NeuralType(LengthsType(), tuple('B')), 'src_ids': NeuralType(ChannelType(), ('B', 'T')), 'targets': NeuralType(ChannelType(), ('B', 'D', 'T')), - } @property @@ -97,7 +96,7 @@ def output_ports(self): # } return { 'point_outputs': NeuralType(ChannelType(), ('B', 'T', 'D', 'D')), - 'gate_outputs': NeuralType(ChannelType(), ('B', 'D', 'D')) + 'gate_outputs': NeuralType(ChannelType(), ('B', 'D', 'D')), } def __init__(self, vocab, embeddings, hid_size, dropout, slots, nb_gate, teacher_forcing=0.5): diff --git a/nemo/collections/nlp/nm/trainables/joint_intent_slot/joint_intent_slot_nm.py b/nemo/collections/nlp/nm/trainables/joint_intent_slot/joint_intent_slot_nm.py index bf5c88263b48..faa273919d15 100644 --- a/nemo/collections/nlp/nm/trainables/joint_intent_slot/joint_intent_slot_nm.py +++ b/nemo/collections/nlp/nm/trainables/joint_intent_slot/joint_intent_slot_nm.py @@ -18,7 +18,7 @@ from nemo.backends.pytorch import MultiLayerPerceptron, TrainableNM from nemo.collections.nlp.nm.trainables.common.transformer.transformer_utils import transformer_weights_init -from nemo.core import NeuralType, ChannelType, LogitsType +from nemo.core import ChannelType, LogitsType, NeuralType __all__ = ['JointIntentSlotClassifier'] @@ -63,7 +63,7 @@ def output_ports(self): # "intent_logits": NeuralType({0: AxisType(BatchTag), 1: AxisType(ChannelTag)}), # "slot_logits": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag), 2: AxisType(ChannelTag)}), "intent_logits": NeuralType(LogitsType(), ('B', 'D')), - "slot_logits": NeuralType(LogitsType(), ('B', 'D')) + "slot_logits": NeuralType(LogitsType(), ('B', 'D')), } def __init__(self, hidden_size, num_intents, num_slots, dropout=0.0, use_transformer_pretrained=True, **kwargs): diff --git a/nemo/core/neural_types/elements.py b/nemo/core/neural_types/elements.py index 3508ea224337..bde89e9006c5 100644 --- a/nemo/core/neural_types/elements.py +++ b/nemo/core/neural_types/elements.py @@ -173,6 +173,7 @@ class RegressionValuesType(PredictionsType): def __str__(self): return "regression values type" + class CategoricalValuesType(PredictionsType): def __str__(self): return "regression values type" From 80b5bc296860f56467a968a731ff67c8de4a90ad Mon Sep 17 00:00:00 2001 From: Oleksii Kuchaiev Date: Fri, 7 Feb 2020 09:59:50 -0800 Subject: [PATCH 11/30] fix tts collection Signed-off-by: Oleksii Kuchaiev --- nemo/collections/tts/data_layers.py | 16 +- nemo/collections/tts/tacotron2_modules.py | 311 ++++++---------------- nemo/collections/tts/waveglow_modules.py | 90 ++----- nemo/core/neural_types/elements.py | 14 +- 4 files changed, 130 insertions(+), 301 deletions(-) diff --git a/nemo/collections/tts/data_layers.py b/nemo/collections/tts/data_layers.py index cad859fb10cb..12639eaca426 100644 --- a/nemo/collections/tts/data_layers.py +++ b/nemo/collections/tts/data_layers.py @@ -5,7 +5,7 @@ from .parts.datasets import AudioOnlyDataset from nemo.backends.pytorch.nm import DataLayerNM from nemo.core import DeviceType -from nemo.core.neural_types import * +from nemo.core.neural_types import NeuralType, AudioSignal, LengthsType class AudioDataLayer(DataLayerNM): @@ -48,18 +48,12 @@ class AudioDataLayer(DataLayerNM): @property def output_ports(self): """Returns definitions of module output ports. - - audio_signal: - 0: AxisType(BatchTag) - - 1: AxisType(TimeTag) - - a_sig_length: - 0: AxisType(BatchTag) """ return { - "audio_signal": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}), - "a_sig_length": NeuralType({0: AxisType(BatchTag)}), + # "audio_signal": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}), + # "a_sig_length": NeuralType({0: AxisType(BatchTag)}), + "audio_signal": NeuralType(AudioSignal(), ('B', 'T')), + "a_sig_length": NeuralType(LengthsType(), tuple('B')), } def __init__( diff --git a/nemo/collections/tts/tacotron2_modules.py b/nemo/collections/tts/tacotron2_modules.py index 0613311d3dc4..697c1aa3083b 100644 --- a/nemo/collections/tts/tacotron2_modules.py +++ b/nemo/collections/tts/tacotron2_modules.py @@ -35,29 +35,19 @@ class TextEmbedding(TrainableNM): @property def input_ports(self): """Returns definitions of module input ports. - - char_phone - 0: AxisType(BatchTag) - - 1: AxisType(TimeTag) """ - return {"char_phone": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)})} + # return {"char_phone": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)})} + return {"char_phone": NeuralType(ChannelType(), ('B', 'T'))} @property def output_ports(self): """Returns definitions of module output ports. - - char_phone_embeddings: - 0: AxisType(BatchTag) - - 1: AxisType(EmbeddedTextTag) - - 2: AxisType(TimeTag)}) """ return { - "char_phone_embeddings": NeuralType( - {0: AxisType(BatchTag), 1: AxisType(EmbeddedTextTag), 2: AxisType(TimeTag),} - ) + # "char_phone_embeddings": NeuralType( + # {0: AxisType(BatchTag), 1: AxisType(EmbeddedTextTag), 2: AxisType(TimeTag),} + # ) + "char_phone_embeddings": NeuralType(EmbeddedTextType(), ('B', 'D', 'T')) } def __init__(self, n_symbols, symbols_embedding_dim: int = 512): @@ -87,39 +77,25 @@ class Tacotron2Encoder(TrainableNM): @property def input_ports(self): """Returns definitions of module input ports. - - char_phone_embeddings: - 0: AxisType(BatchTag) - - 1: AxisType(EmbeddedTextTag) - - 2: AxisType(TimeTag) - - embedding_length: - 0: AxisType(BatchTag) """ return { - "char_phone_embeddings": NeuralType( - {0: AxisType(BatchTag), 1: AxisType(EmbeddedTextTag), 2: AxisType(TimeTag),} - ), - "embedding_length": NeuralType({0: AxisType(BatchTag)}), + # "char_phone_embeddings": NeuralType( + # {0: AxisType(BatchTag), 1: AxisType(EmbeddedTextTag), 2: AxisType(TimeTag),} + # ), + # "embedding_length": NeuralType({0: AxisType(BatchTag)}), + "char_phone_embeddings": NeuralType(EmbeddedTextType(), ('B', 'D', 'T')), + "embedding_length": NeuralType(LengthsType(), tuple('B')) } @property def output_ports(self): """Returns definitions of module output ports. - - char_phone_embeddings: - 0: AxisType(BatchTag) - - 1: AxisType(TimeTag) - - 2: AxisType(EncodedRepresentationTag)}) """ return { - "char_phone_encoded": NeuralType( - {0: AxisType(BatchTag), 1: AxisType(TimeTag), 2: AxisType(EncodedRepresentationTag),} - ) + # "char_phone_encoded": NeuralType( + # {0: AxisType(BatchTag), 1: AxisType(TimeTag), 2: AxisType(EncodedRepresentationTag),} + # ) + "char_phone_encoded": NeuralType(EncodedRepresentation(), ('B', 'T', 'D')) } def __init__( @@ -179,63 +155,33 @@ class Tacotron2Decoder(TrainableNM): @property def input_ports(self): """Returns definitions of module input ports. - - char_phone_encoded: - 0: AxisType(BatchTag) - - 1: AxisType(TimeTag) - - 2: AxisType(EncodedRepresentationTag) - - encoded_length: - 0: AxisType(BatchTag) - - mel_target: - 0: AxisType(BatchTag) - - 1: AxisType(MelSpectrogramSignalTag) - - 2: AxisType(TimeTag) """ return { - "char_phone_encoded": NeuralType( - {0: AxisType(BatchTag), 1: AxisType(TimeTag), 2: AxisType(EncodedRepresentationTag),} - ), - "encoded_length": NeuralType({0: AxisType(BatchTag)}), - "mel_target": NeuralType( - {0: AxisType(BatchTag), 1: AxisType(MelSpectrogramSignalTag), 2: AxisType(TimeTag),} - ), + # "char_phone_encoded": NeuralType( + # {0: AxisType(BatchTag), 1: AxisType(TimeTag), 2: AxisType(EncodedRepresentationTag),} + # ), + # "encoded_length": NeuralType({0: AxisType(BatchTag)}), + # "mel_target": NeuralType( + # {0: AxisType(BatchTag), 1: AxisType(MelSpectrogramSignalTag), 2: AxisType(TimeTag),} + # ), + "char_phone_encoded": NeuralType(EncodedRepresentation(), ('B', 'T', 'D')), + "encoded_length": NeuralType(LengthsType(), tuple('B')), + "mel_target": NeuralType(MelSpectrogramType(), ('B', 'D', 'T')) } @property def output_ports(self): """Returns definitions of module output ports. - - mel_output: - 0: AxisType(BatchTag) - - 1: AxisType(MelSpectrogramSignalTag) - - 2: AxisType(TimeTag) - - gate_output: - 0: AxisType(BatchTag) - - 1: AxisType(TimeTag) - - alignments: - 0: AxisType(BatchTag) - - 1: AxisType(TimeTag) - - 2: AxisType(TimeTag) """ return { - "mel_output": NeuralType( - {0: AxisType(BatchTag), 1: AxisType(MelSpectrogramSignalTag), 2: AxisType(TimeTag),} - ), - "gate_output": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}), - "alignments": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag), 2: AxisType(TimeTag),}), + # "mel_output": NeuralType( + # {0: AxisType(BatchTag), 1: AxisType(MelSpectrogramSignalTag), 2: AxisType(TimeTag),} + # ), + # "gate_output": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}), + # "alignments": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag), 2: AxisType(TimeTag),}), + "mel_output": NeuralType(MelSpectrogramType(), ('B', 'D', 'T')), + "gate_output": NeuralType(ChannelType(), ('B', 'T')), + "alignments": NeuralType(ChannelType(), ('B', 'T', 'T')) } def __init__( @@ -326,57 +272,31 @@ class Tacotron2DecoderInfer(Tacotron2Decoder): @property def input_ports(self): """Returns definitions of module input ports. - - char_phone_encoded: - 0: AxisType(BatchTag) - - 1: AxisType(TimeTag) - - 2: AxisType(EncodedRepresentationTag) - - encoded_length: - 0: AxisType(BatchTag) """ return { - "char_phone_encoded": NeuralType( - {0: AxisType(BatchTag), 1: AxisType(TimeTag), 2: AxisType(EncodedRepresentationTag),} - ), - "encoded_length": NeuralType({0: AxisType(BatchTag)}), + # "char_phone_encoded": NeuralType( + # {0: AxisType(BatchTag), 1: AxisType(TimeTag), 2: AxisType(EncodedRepresentationTag),} + # ), + # "encoded_length": NeuralType({0: AxisType(BatchTag)}), + "char_phone_encoded": NeuralType(EncodedRepresentation(), ('B', 'T', 'D')), + "encoded_length": NeuralType(LengthsType(), tuple('B')) } @property def output_ports(self): """Returns definitions of module output ports. - - mel_output: - 0: AxisType(BatchTag) - - 1: AxisType(MelSpectrogramSignalTag) - - 2: AxisType(TimeTag) - - gate_output: - 0: AxisType(BatchTag) - - 1: AxisType(TimeTag) - - alignments: - 0: AxisType(BatchTag) - - 1: AxisType(TimeTag) - - 2: AxisType(TimeTag) - - mel_len: - 0: AxisType(BatchTag) """ return { - "mel_output": NeuralType( - {0: AxisType(BatchTag), 1: AxisType(MelSpectrogramSignalTag), 2: AxisType(TimeTag),} - ), - "gate_output": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}), - "alignments": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag), 2: AxisType(TimeTag),}), - "mel_len": NeuralType({0: AxisType(BatchTag)}), + # "mel_output": NeuralType( + # {0: AxisType(BatchTag), 1: AxisType(MelSpectrogramSignalTag), 2: AxisType(TimeTag),} + # ), + # "gate_output": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}), + # "alignments": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag), 2: AxisType(TimeTag),}), + # "mel_len": NeuralType({0: AxisType(BatchTag)}), + "mel_output": NeuralType(MelSpectrogramType(), ('B', 'D', 'T')), + "gate_output": NeuralType(ChannelType(), ('B', 'T')), + "alignments": NeuralType(ChannelType(), ('B', 'T', 'T')), + "mel_len": NeuralType(LengthsType(), tuple('B')), } def __str__(self): @@ -411,35 +331,23 @@ class Tacotron2Postnet(TrainableNM): @property def input_ports(self): """Returns definitions of module input ports. - - mel_input: - 0: AxisType(BatchTag) - - 1: AxisType(MelSpectrogramSignalTag) - - 2: AxisType(TimeTag) """ return { - "mel_input": NeuralType( - {0: AxisType(BatchTag), 1: AxisType(MelSpectrogramSignalTag), 2: AxisType(TimeTag),} - ) + # "mel_input": NeuralType( + # {0: AxisType(BatchTag), 1: AxisType(MelSpectrogramSignalTag), 2: AxisType(TimeTag),} + # ) + "mel_input": NeuralType(MelSpectrogramType(), ('B', 'D', 'T')) } @property def output_ports(self): """Returns definitions of module output ports. - - mel_output: - 0: AxisType(BatchTag) - - 1: AxisType(MelSpectrogramSignalTag) - - 2: AxisType(TimeTag) """ return { - "mel_output": NeuralType( - {0: AxisType(BatchTag), 1: AxisType(MelSpectrogramSignalTag), 2: AxisType(TimeTag),} - ), + # "mel_output": NeuralType( + # {0: AxisType(BatchTag), 1: AxisType(MelSpectrogramSignalTag), 2: AxisType(TimeTag),} + # ), + "mel_output": NeuralType(MelSpectrogramType(), ('B', 'D', 'T')) } def __init__( @@ -482,68 +390,35 @@ class Tacotron2Loss(LossNM): @property def input_ports(self): """Returns definitions of module input ports. - - mel_out: - 0: AxisType(BatchTag) - - 1: AxisType(MelSpectrogramSignalTag) - - 2: AxisType(TimeTag) - - mel_out_postnet: - 0: AxisType(BatchTag) - - 1: AxisType(MelSpectrogramSignalTag) - - 2: AxisType(TimeTag) - - gate_out: - 0: AxisType(BatchTag) - - 1: AxisType(TimeTag) - - mel_target: - 0: AxisType(BatchTag) - - 1: AxisType(MelSpectrogramSignalTag) - - 2: AxisType(TimeTag) - - gate_target: - 0: AxisType(BatchTag) - - 1: AxisType(TimeTag) - - target_len: - 0: AxisType(BatchTag) - - seq_len: - 0: AxisType(BatchTag) """ return { - "mel_out": NeuralType( - {0: AxisType(BatchTag), 1: AxisType(MelSpectrogramSignalTag), 2: AxisType(TimeTag),} - ), - "mel_out_postnet": NeuralType( - {0: AxisType(BatchTag), 1: AxisType(MelSpectrogramSignalTag), 2: AxisType(TimeTag),} - ), - "gate_out": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}), - "mel_target": NeuralType( - {0: AxisType(BatchTag), 1: AxisType(MelSpectrogramSignalTag), 2: AxisType(TimeTag),} - ), - "gate_target": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}), - "target_len": NeuralType({0: AxisType(BatchTag)}), - "seq_len": NeuralType({0: AxisType(BatchTag)}), + # "mel_out": NeuralType( + # {0: AxisType(BatchTag), 1: AxisType(MelSpectrogramSignalTag), 2: AxisType(TimeTag),} + # ), + # "mel_out_postnet": NeuralType( + # {0: AxisType(BatchTag), 1: AxisType(MelSpectrogramSignalTag), 2: AxisType(TimeTag),} + # ), + # "gate_out": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}), + # "mel_target": NeuralType( + # {0: AxisType(BatchTag), 1: AxisType(MelSpectrogramSignalTag), 2: AxisType(TimeTag),} + # ), + # "gate_target": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}), + # "target_len": NeuralType({0: AxisType(BatchTag)}), + # "seq_len": NeuralType({0: AxisType(BatchTag)}), + "mel_out": NeuralType(MelSpectrogramType(), ('B', 'D', 'T')), + "mel_out_postnet": NeuralType(MelSpectrogramType(), ('B', 'D', 'T')), + "gate_out": NeuralType(ChannelType(), ('B', 'T')), + "mel_target": NeuralType(MelSpectrogramType(), ('B', 'D', 'T')), + "gate_target": NeuralType(ChannelType(), ('B', 'T')), + "target_len": NeuralType(LengthsType(), tuple('B')), + "seq_len": NeuralType(LengthsType(), tuple('B')), } @property def output_ports(self): """Returns definitions of module output ports. - - loss: - NeuralType(None) """ - return {"loss": NeuralType(None)} + return {"loss": NeuralType(LossType())} def __init__(self, pad_value: float = -11.52): super().__init__() @@ -595,34 +470,22 @@ class MakeGate(NonTrainableNM): @property def input_ports(self): """Returns definitions of module input ports. - - target_len: - 0: AxisType(BatchTag) - - mel_target: - 0: AxisType(BatchTag) - - 1: AxisType(MelSpectrogramSignalTag) - - 2: AxisType(TimeTag) """ return { - "target_len": NeuralType({0: AxisType(BatchTag)}), - "mel_target": NeuralType( - {0: AxisType(BatchTag), 1: AxisType(MelSpectrogramSignalTag), 2: AxisType(TimeTag),} - ), + # "target_len": NeuralType({0: AxisType(BatchTag)}), + # "mel_target": NeuralType( + # {0: AxisType(BatchTag), 1: AxisType(MelSpectrogramSignalTag), 2: AxisType(TimeTag),} + # ), + "target_len": NeuralType(LengthsType(), tuple('B')), + "mel_target": NeuralType(MelSpectrogramType(), ('B', 'D', 'T')), } @property def output_ports(self): """Returns definitions of module output ports. - - gate_target: - 0: AxisType(BatchTag) - - 1: AxisType(TimeTag) """ - return {"gate_target": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)})} + # return {"gate_target": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)})} + return {"gate_target": NeuralType(ChannelType(), ('B', 'T'))} def forward(self, target_len, mel_target): max_len = mel_target.shape[2] diff --git a/nemo/collections/tts/waveglow_modules.py b/nemo/collections/tts/waveglow_modules.py index 5e13ae73faf9..703b0e8f3458 100644 --- a/nemo/collections/tts/waveglow_modules.py +++ b/nemo/collections/tts/waveglow_modules.py @@ -41,47 +41,28 @@ class WaveGlowNM(TrainableNM): @property def input_ports(self): """Returns definitions of module input ports. - - mel_spectrogram: - 0: AxisType(BatchTag) - - 1: AxisType(MelSpectrogramSignalTag) - - 2: AxisType(TimeTag) - - audio: - 0: AxisType(BatchTag) - - 1: AxisType(TimeTag) """ return { - "mel_spectrogram": NeuralType( - {0: AxisType(BatchTag), 1: AxisType(MelSpectrogramSignalTag), 2: AxisType(TimeTag),} - ), - "audio": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}), + # "mel_spectrogram": NeuralType( + # {0: AxisType(BatchTag), 1: AxisType(MelSpectrogramSignalTag), 2: AxisType(TimeTag),} + # ), + # "audio": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}), + "mel_spectrogram": NeuralType(MelSpectrogramType(), ('B', 'D', 'T')), + "audio": NeuralType(AudioSignal(), ('B', 'T')), } @property def output_ports(self): """Returns definitions of module output ports. - - audio: - 0: AxisType(BatchTag) - - 1: AxisType(TimeTag) - - log_s_list: - List? - - log_det_W_list: - List? - """ # TODO @blisc: please take a look at those definitions return { - "audio": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}), - "log_s_list": NeuralType(), - "log_det_W_list": NeuralType(), + # "audio": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}), + # "log_s_list": NeuralType(), + # "log_det_W_list": NeuralType(), + "audio": NeuralType(AudioSignal(), ('B', 'T')), + "log_s_list": NeuralType(ChannelType()), + "log_det_W_list": NeuralType(ChannelType()), } def __init__( @@ -157,30 +138,20 @@ class WaveGlowInferNM(WaveGlowNM): @property def input_ports(self): """Returns definitions of module input ports. - - mel_spectrogram: - 0: AxisType(BatchTag) - - 1: AxisType(MelSpectrogramSignalTag) - - 2: AxisType(TimeTag) """ return { - "mel_spectrogram": NeuralType( - {0: AxisType(BatchTag), 1: AxisType(MelSpectrogramSignalTag), 2: AxisType(TimeTag),} - ) + # "mel_spectrogram": NeuralType( + # {0: AxisType(BatchTag), 1: AxisType(MelSpectrogramSignalTag), 2: AxisType(TimeTag),} + # ) + "mel_spectrogram": NeuralType(MelSpectrogramType(), ('B', 'D', 'T')) } @property def output_ports(self): """Returns definitions of module output ports. - - audio: - 0: AxisType(BatchTag) - - 1: AxisType(TimeTag) """ - return {"audio": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)})} + #return {"audio": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)})} + return {"audio": NeuralType(AudioSignal(), ('B', 'T'))} def __str__(self): return "WaveGlowNM" @@ -256,33 +227,22 @@ class WaveGlowLoss(LossNM): @property def input_ports(self): """Returns definitions of module input ports. - - z: - 0: AxisType(BatchTag) - - 1: AxisType(TimeTag) - - log_s_list: - List? - - log_det_W_list: - List? """ # TODO @blisc: please take a look at those definitions return { - "z": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}), - "log_s_list": NeuralType(), - "log_det_W_list": NeuralType(), + # "z": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}), + # "log_s_list": NeuralType(), + # "log_det_W_list": NeuralType(), + "z": NeuralType(AudioSignal(), ('B', 'T')), + "log_s_list": NeuralType(ChannelType()), + "log_det_W_list": NeuralType(ChannelType()), } @property def output_ports(self): """Returns definitions of module output ports. - - loss: - NeuralType(None) """ - return {"loss": NeuralType(None)} + return {"loss": NeuralType(LossType())} def __init__(self, sigma: float = 1.0): super().__init__() diff --git a/nemo/core/neural_types/elements.py b/nemo/core/neural_types/elements.py index bde89e9006c5..47bf93896dc8 100644 --- a/nemo/core/neural_types/elements.py +++ b/nemo/core/neural_types/elements.py @@ -32,6 +32,8 @@ 'PredictionsType', 'LogprobsType', 'LengthsType', + 'EmbeddedTextType', + 'EncodedRepresentation' ] import abc from abc import ABC, abstractmethod @@ -106,6 +108,11 @@ def __str__(self): return "convolutional channel value" +class EmbeddedTextType(ChannelType): + def __str__(self): + return "text embedding" + + class LogitsType(ElementType): def __str__(self): return "neural type representing logits" @@ -131,7 +138,12 @@ def __str__(self): return "neural type representing loss value" -class AcousticEncodedRepresentation(ChannelType): +class EncodedRepresentation(ChannelType): + def __str__(self): + return "encoded representation, for example, encoder's output" + + +class AcousticEncodedRepresentation(EncodedRepresentation): def __str__(self): return "encoded representation returned by the acoustic encoder model" From 24b2ca4277ed61e17236335068fde1ac7a446856 Mon Sep 17 00:00:00 2001 From: Oleksii Kuchaiev Date: Fri, 7 Feb 2020 10:03:32 -0800 Subject: [PATCH 12/30] fix code style Signed-off-by: Oleksii Kuchaiev --- nemo/collections/tts/data_layers.py | 2 +- nemo/collections/tts/tacotron2_modules.py | 8 ++++---- nemo/collections/tts/waveglow_modules.py | 2 +- nemo/core/neural_types/elements.py | 2 +- 4 files changed, 7 insertions(+), 7 deletions(-) diff --git a/nemo/collections/tts/data_layers.py b/nemo/collections/tts/data_layers.py index 12639eaca426..89344ec85583 100644 --- a/nemo/collections/tts/data_layers.py +++ b/nemo/collections/tts/data_layers.py @@ -5,7 +5,7 @@ from .parts.datasets import AudioOnlyDataset from nemo.backends.pytorch.nm import DataLayerNM from nemo.core import DeviceType -from nemo.core.neural_types import NeuralType, AudioSignal, LengthsType +from nemo.core.neural_types import AudioSignal, LengthsType, NeuralType class AudioDataLayer(DataLayerNM): diff --git a/nemo/collections/tts/tacotron2_modules.py b/nemo/collections/tts/tacotron2_modules.py index 697c1aa3083b..dd0f56a18816 100644 --- a/nemo/collections/tts/tacotron2_modules.py +++ b/nemo/collections/tts/tacotron2_modules.py @@ -84,7 +84,7 @@ def input_ports(self): # ), # "embedding_length": NeuralType({0: AxisType(BatchTag)}), "char_phone_embeddings": NeuralType(EmbeddedTextType(), ('B', 'D', 'T')), - "embedding_length": NeuralType(LengthsType(), tuple('B')) + "embedding_length": NeuralType(LengthsType(), tuple('B')), } @property @@ -166,7 +166,7 @@ def input_ports(self): # ), "char_phone_encoded": NeuralType(EncodedRepresentation(), ('B', 'T', 'D')), "encoded_length": NeuralType(LengthsType(), tuple('B')), - "mel_target": NeuralType(MelSpectrogramType(), ('B', 'D', 'T')) + "mel_target": NeuralType(MelSpectrogramType(), ('B', 'D', 'T')), } @property @@ -181,7 +181,7 @@ def output_ports(self): # "alignments": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag), 2: AxisType(TimeTag),}), "mel_output": NeuralType(MelSpectrogramType(), ('B', 'D', 'T')), "gate_output": NeuralType(ChannelType(), ('B', 'T')), - "alignments": NeuralType(ChannelType(), ('B', 'T', 'T')) + "alignments": NeuralType(ChannelType(), ('B', 'T', 'T')), } def __init__( @@ -279,7 +279,7 @@ def input_ports(self): # ), # "encoded_length": NeuralType({0: AxisType(BatchTag)}), "char_phone_encoded": NeuralType(EncodedRepresentation(), ('B', 'T', 'D')), - "encoded_length": NeuralType(LengthsType(), tuple('B')) + "encoded_length": NeuralType(LengthsType(), tuple('B')), } @property diff --git a/nemo/collections/tts/waveglow_modules.py b/nemo/collections/tts/waveglow_modules.py index 703b0e8f3458..06439d272ff2 100644 --- a/nemo/collections/tts/waveglow_modules.py +++ b/nemo/collections/tts/waveglow_modules.py @@ -150,7 +150,7 @@ def input_ports(self): def output_ports(self): """Returns definitions of module output ports. """ - #return {"audio": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)})} + # return {"audio": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)})} return {"audio": NeuralType(AudioSignal(), ('B', 'T'))} def __str__(self): diff --git a/nemo/core/neural_types/elements.py b/nemo/core/neural_types/elements.py index 47bf93896dc8..017b8367d341 100644 --- a/nemo/core/neural_types/elements.py +++ b/nemo/core/neural_types/elements.py @@ -33,7 +33,7 @@ 'LogprobsType', 'LengthsType', 'EmbeddedTextType', - 'EncodedRepresentation' + 'EncodedRepresentation', ] import abc from abc import ABC, abstractmethod From 4800d9e5e2c0bac3190d0c273e31ca374007c4b8 Mon Sep 17 00:00:00 2001 From: Oleksii Kuchaiev Date: Fri, 7 Feb 2020 10:34:51 -0800 Subject: [PATCH 13/30] chaning common collection Signed-off-by: Oleksii Kuchaiev --- nemo/backends/pytorch/common/losses.py | 21 +- nemo/backends/pytorch/common/other.py | 307 +------------------------ nemo/backends/pytorch/common/rnn.py | 64 +----- nemo/backends/pytorch/common/search.py | 33 +-- 4 files changed, 31 insertions(+), 394 deletions(-) diff --git a/nemo/backends/pytorch/common/losses.py b/nemo/backends/pytorch/common/losses.py index 4cacb1853620..60b091802c68 100644 --- a/nemo/backends/pytorch/common/losses.py +++ b/nemo/backends/pytorch/common/losses.py @@ -105,19 +105,10 @@ class CrossEntropyLoss(LossNM): @property def input_ports(self): """Returns definitions of module input ports. - - logits: - 0: AxisType(BatchTag) - - 1: AxisType(ChannelTag) - - labels: - 0: AxisType(BatchTag) - """ return { - "logits": NeuralType(elements_type=LogitsType, axes=('B', 'D')), - "labels": NeuralType(elements_type=LabelsType, axes=tuple('B')), + "logits": NeuralType(elements_type=LogitsType(), axes=('B', 'D')), + "labels": NeuralType(elements_type=LabelsType(), axes=tuple('B')), } @property @@ -127,7 +118,7 @@ def output_ports(self): loss: NeuralType(None) """ - return {"loss": NeuralType(elements_type=LossType)} + return {"loss": NeuralType(elements_type=LossType())} def __init__(self, weight=None): super().__init__() @@ -152,8 +143,8 @@ def input_ports(self): 0: AxisType(RegressionTag) """ return { - "preds": NeuralType(RegressionValuesType, tuple('B')), - "labels": NeuralType(LabelsType, tuple('B')), + "preds": NeuralType(RegressionValuesType(), tuple('B')), + "labels": NeuralType(LabelsType(), tuple('B')), } @property @@ -163,7 +154,7 @@ def output_ports(self): loss: NeuralType(None) """ - return {"loss": NeuralType(elements_type=LossType)} + return {"loss": NeuralType(elements_type=LossType())} def __init__(self): super().__init__() diff --git a/nemo/backends/pytorch/common/other.py b/nemo/backends/pytorch/common/other.py index 58790a1727be..7de337619f01 100644 --- a/nemo/backends/pytorch/common/other.py +++ b/nemo/backends/pytorch/common/other.py @@ -1,12 +1,7 @@ # Copyright (c) 2019 NVIDIA Corporation """Core PyTorch-base Neural Modules""" __all__ = [ - 'SimpleCombiner', - 'ArgMaxSimple', - 'TableLookUp', - 'TableLookUp2', 'SequenceEmbedding', - 'SequenceProjection', 'ZerosLikeNM', ] @@ -20,262 +15,20 @@ from nemo.core.neural_types import * -class SimpleCombiner(TrainableNM): - """Performs simple combination of two NmTensors. For example, it can - perform x1 + x2. - - Args: - mode (str): Can be ['add', 'sum', 'max']. - Defaults to 'add'. - - """ - - @property - def input_ports(self): - """Returns definitions of module input ports. - """ - if self._input_ports is None: - return {"x1": NeuralType(VoidType()), "x2": NeuralType(VoidType())} - else: - return self._input_ports - - @property - def output_ports(self): - """Returns definitions of module output ports. - - combined: - None - """ - if self._output_ports is None: - return {"combined": NeuralType(VoidType())} - else: - return self._output_ports - - def __init__(self, mode="add", input_ports=None, output_ports=None): - super().__init__() - self._mode = mode - self._input_ports = input_ports - self._output_ports = output_ports - - def forward(self, x1, x2): - if self._mode == "add" or self._mode == "sum": - return x1 + x2 - elif self._mode == "max": - return torch.max(x1, x2, out=None) - else: - raise NotImplementedError("SimpleCombiner does not have {0} mode".format(self._mode)) - - -class ArgMaxSimple(TrainableNM): # Notice TWO base classes - """ - """ - - @property - def input_ports(self): - """Returns definitions of module input ports. - - x: - 0: AxisType(BatchTag) - - 1: AxisType(ChannelTag) - """ - return {"x": NeuralType({0: AxisType(BatchTag), 1: AxisType(ChannelTag)})} - - @property - def output_ports(self): - """Returns definitions of module output ports. - - values: - 0: AxisType(BatchTag) - - indices: - 0: AxisType(BatchTag) - """ - return { - "values": NeuralType({0: AxisType(BatchTag)}), - "indices": NeuralType({0: AxisType(BatchTag)}), - } - - def __init__(self): - super().__init__() - - # this method is key method you need to overwrite from PyTorch - # nn.Module's API - def forward(self, x): - values, indices = torch.max(x, 1) - return values, indices - - -class TableLookUp(NeuralModule): - """Performs a table lookup. For example, convert class ids to names""" - - def __init__(self, ids2classes=None): - NeuralModule.__init__(self) - - if ids2classes is None: - ids2classes = {} - self._ids2classes = ids2classes - - @property - def input_ports(self): - """Returns definitions of module input ports. - - indices: - 0: AxisType(TimeTag) - - 1: AxisType(BatchTag) - """ - return {"indices": NeuralType({0: AxisType(TimeTag), 1: AxisType(BatchTag)})} - - @property - def output_ports(self): - """Returns definitions of module output ports. - - indices: - 0: AxisType(BatchTag) - 1: AxisType(TimeTag) - """ - return {"indices": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)})} - - def set_weights(self, name2weight: Dict[(str, bool)], name2name_and_transform): - pass - - def tie_weights_with(self, module, weight_names): - pass - - def save_to(self, path): - pass - - def restore_from(self, path): - pass - - def freeze(self, weights: Set[str] = None): - pass - - def unfreeze(self, weights: Set[str] = None): - pass - - def __call__(self, force_pt=False, *input, **kwargs): - pt_call = len(input) > 0 or force_pt - if pt_call: - # [inds] = kwargs.values() - # np_inds = inds.detach().cpu().numpy().reshape(-1) - # result = [self._ids2classes[i] for i in np_inds] - # #result = list(map(lambda x: self._ids2classes[x], np_inds)) - # return result - inds = kwargs["indices"] - np_inds = inds.detach().transpose_(1, 0).cpu().numpy().tolist() - result = [] - for lst in np_inds: - sublst = [] - for tid in lst: - if tid != 1: - sublst.append(tid) - else: - break - result.append(list(map(lambda x: self._ids2classes[x], sublst))) - return [result] - else: - return NeuralModule.__call__(self, **kwargs) - - def parameters(self): - return None - - def get_weights(self) -> Iterable[Optional[Mapping]]: - return None - - -class TableLookUp2(NeuralModule): - """Performs a table lookup. For example, convert class ids to names""" - - def set_weights(self, name2weight: Dict[(str, bool)], name2name_and_transform): - pass - - def tie_weights_with(self, module, weight_names): - pass - - def save_to(self, path): - pass - - def restore_from(self, path): - pass - - def freeze(self, weights: Set[str] = None): - pass - - def unfreeze(self, weights: Set[str] = None): - pass - - @property - def input_ports(self): - """Returns definitions of module input ports. - - """ - return {} - - @property - def output_ports(self): - """Returns definitions of module output ports. - - classes: - None - """ - return {"classes": None} - - def __init__(self, detokenizer=None): - NeuralModule.__init__(self) - self._detokenizer = detokenizer - - def __call__(self, force_pt=False, *input, **kwargs): - pt_call = len(input) > 0 or force_pt - if pt_call: - # [inds] = kwargs.values() - inds = kwargs["indices"] - np_inds = inds.detach().cpu().numpy().tolist() - result = [] - for lst in np_inds: - sublst = [] - for tid in lst: - if tid != 1: - sublst.append(tid) - else: - break - result.append(self._detokenizer(sublst)) - return result - else: - return NeuralModule.__call__(self, **kwargs) - - def parameters(self): - return None - - def get_weights(self) -> Iterable[Optional[Mapping]]: - return None - - class SequenceEmbedding(TrainableNM): @property def input_ports(self): """Returns definitions of module input ports. - - input_seq: - 0: AxisType(TimeTag) - - 1: AxisType(BatchTag) """ - return {"input_seq": NeuralType({0: AxisType(TimeTag), 1: AxisType(BatchTag)})} + # return {"input_seq": NeuralType({0: AxisType(TimeTag), 1: AxisType(BatchTag)})} + return {"input_seq": NeuralModule(ChannelType(), ('T', 'B'))} @property def output_ports(self): """Returns definitions of module output ports. - - outputs: - 0: AxisType(TimeTag) - - 1: AxisType(BatchTag) - - 2: AxisType(ChannelTag) """ - return {"outputs": NeuralType({0: AxisType(TimeTag), 1: AxisType(BatchTag), 2: AxisType(ChannelTag),})} + # return {"outputs": NeuralType({0: AxisType(TimeTag), 1: AxisType(BatchTag), 2: AxisType(ChannelTag),})} + return {"outputs": NeuralType(ChannelType(), ('T', 'B', 'D'))} def __init__(self, voc_size, hidden_size, dropout=0.0): super().__init__() @@ -294,64 +47,20 @@ def forward(self, input_seq): return embedded -class SequenceProjection(TrainableNM): - @property - def input_ports(self): - """Returns definitions of module input ports. - - input_seq: - Empty Type?!? - """ - return {"input_seq": NeuralType({})} - - @property - def output_ports(self): - """Returns definitions of module output ports. - - outputs: - None - """ - return {"outputs": None} - - def __init__(self, from_dim, to_dim, dropout=0.0): - super().__init__() - - self.from_dim = from_dim - self.to_dim = to_dim - self.dropout = dropout - self.projection = nn.Linear(self.from_dim, self.to_dim, bias=False) - if self.dropout != 0.0: - self.embedding_dropout = nn.Dropout(self.dropout) - - def forward(self, input_seq): - p = self.projection(input_seq) - if self.dropout != 0.0: - p = self.dropout(p) - return p - - class ZerosLikeNM(TrainableNM): @property def input_ports(self): """Returns definitions of module input ports. - - input_type_ids: - 0: AxisType(BatchTag) - - 1: AxisType(TimeTag) """ - return {"input_type_ids": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag),})} + # return {"input_type_ids": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag),})} + return {"input_type_ids": NeuralType(VoidType(), ('B', 'T'))} @property def output_ports(self): """Returns definitions of module output ports. - - input_type_ids: - 0: AxisType(BatchTag) - - 1: AxisType(TimeTag) """ - return {"input_type_ids": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag),})} + # return {"input_type_ids": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag),})} + return {"input_type_ids": NeuralType(ChannelType(), ('B', 'T'))} def __init__(self): super().__init__() diff --git a/nemo/backends/pytorch/common/rnn.py b/nemo/backends/pytorch/common/rnn.py index 95724a9fa6ad..7777c699bb9a 100644 --- a/nemo/backends/pytorch/common/rnn.py +++ b/nemo/backends/pytorch/common/rnn.py @@ -67,18 +67,6 @@ class DecoderRNN(TrainableNM): @property def input_ports(self): """Returns definitions of module input ports. - - targets: - 0: AxisType(BatchTag) - - 1: AxisType(TimeTag) - - encoder_outputs: - 0: AxisType(BatchTag) - - 1: AxisType(TimeTag) - - 2: AxisType(ChannelTag) """ return { # 'targets': NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}), @@ -92,20 +80,6 @@ def input_ports(self): @property def output_ports(self): """Returns definitions of module output ports. - - log_probs: - 0: AxisType(BatchTag) - - 1: AxisType(TimeTag) - - 2: AxisType(ChannelTag) - - attention_weights: - 0: AxisType(BatchTag) - - 1: AxisType(TimeTag) - - 2: AxisType(TimeTag) """ return { # 'log_probs': NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag), 2: AxisType(ChannelTag),}), @@ -231,45 +205,23 @@ class EncoderRNN(TrainableNM): @property def input_ports(self): """Returns definitions of module input ports. - - targets: - 0: AxisType(BatchTag) - - 1: AxisType(TimeTag) - - encoder_outputs: - 0: AxisType(BatchTag) - - 1: AxisType(TimeTag) - - 2: AxisType(ChannelTag) """ return { - 'inputs': NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}), - 'input_lens': NeuralType({0: AxisType(BatchTag),}, optional=True), + # 'inputs': NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}), + # 'input_lens': NeuralType({0: AxisType(BatchTag),}, optional=True), + 'inputs': NeuralType(ChannelType(), ('B', 'T')), + 'input_lens': NeuralType(LengthsType(), tuple('B')), } @property def output_ports(self): """Returns definitions of module output ports. - - log_probs: - 0: AxisType(BatchTag) - - 1: AxisType(TimeTag) - - 2: AxisType(ChannelTag) - - attention_weights: - 0: AxisType(BatchTag) - - 1: AxisType(TimeTag) - - 2: AxisType(TimeTag) """ return { - 'outputs': NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag), 2: AxisType(ChannelTag)}), - 'hidden': NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag), 2: AxisType(ChannelTag)}), + # 'outputs': NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag), 2: AxisType(ChannelTag)}), + # 'hidden': NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag), 2: AxisType(ChannelTag)}), + 'outputs': NeuralType(ChannelType(), ('B', 'T', 'D')), + 'hidden': NeuralType(ChannelType(), ('B', 'T', 'D')), } def __init__( diff --git a/nemo/backends/pytorch/common/search.py b/nemo/backends/pytorch/common/search.py index f58cab7034d0..2051a648b6cb 100644 --- a/nemo/backends/pytorch/common/search.py +++ b/nemo/backends/pytorch/common/search.py @@ -3,7 +3,7 @@ import torch from nemo.backends.pytorch.nm import NonTrainableNM -from nemo.core import AxisType +from nemo.core.neural_types import ChannelType, NeuralType INF = float('inf') BIG_NUM = 1e4 @@ -31,39 +31,24 @@ class GreedySearch(NonTrainableNM): @property def input_ports(self): """Returns definitions of module input ports. - - encoder_outputs: - 0: AxisType(BatchTag) - - 1: AxisType(TimeTag) - - 2: AxisType(ChannelTag) """ return { - 'encoder_outputs': NeuralType( - {0: AxisType(BatchTag), 1: AxisType(TimeTag), 2: AxisType(ChannelTag),}, optional=True, - ) + # 'encoder_outputs': NeuralType( + # {0: AxisType(BatchTag), 1: AxisType(TimeTag), 2: AxisType(ChannelTag),}, optional=True, + # ) + "encoder_outputs": NeuralType(ChannelType(), ('B', 'T', 'D'), optional=True) } @property def output_ports(self): """Returns definitions of module output ports. - predictions: - 0: AxisType(BatchTag) - - 1: AxisType(TimeTag) - - attention_weights: - 0: AxisType(BatchTag) - - 1: AxisType(TimeTag) - - 2: AxisType(TimeTag) """ return { - 'predictions': NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}), - 'attention_weights': NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag), 2: AxisType(TimeTag),}), + # 'predictions': NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}), + # 'attention_weights': NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag), 2: AxisType(TimeTag),}), + "predictions": NeuralType(ChannelType(), ('B', 'T')), + "attention_weights": NeuralType(ChannelType(), ('B', 'T', 'T')), } def __init__(self, decoder, pad_id, bos_id, eos_id, max_len, batch_size=None): From 7b44c95ceab9a5ba7aa91084febb63d9eddeb46f Mon Sep 17 00:00:00 2001 From: Oleksii Kuchaiev Date: Fri, 7 Feb 2020 10:47:51 -0800 Subject: [PATCH 14/30] update gan collection Signed-off-by: Oleksii Kuchaiev --- nemo/collections/simple_gan/gan.py | 283 +++++++++++------------------ 1 file changed, 102 insertions(+), 181 deletions(-) diff --git a/nemo/collections/simple_gan/gan.py b/nemo/collections/simple_gan/gan.py index 16e83bbdf5c5..4ea0dc0bdb05 100644 --- a/nemo/collections/simple_gan/gan.py +++ b/nemo/collections/simple_gan/gan.py @@ -4,8 +4,9 @@ from torch.utils.data import Dataset from torchvision import datasets, transforms -from nemo.backends.pytorch.nm import DataLayerNM, LossNM, NonTrainableNM, TrainableNM -from nemo.core import AxisType, BatchTag, ChannelTag, DeviceType, HeightTag, NeuralType, WidthTag +from nemo.backends.pytorch.nm import DataLayerNM, LossNM, TrainableNM +from nemo.core import DeviceType +from nemo.core.neural_types import ChannelType, LabelsType, LossType, NeuralType class SimpleDiscriminator(TrainableNM): @@ -16,37 +17,25 @@ class SimpleDiscriminator(TrainableNM): @property def input_ports(self): """Returns definitions of module input ports. - - image: - 0: AxisType(BatchTag) - - 1: AxisType(ChannelTag) - - 2: AxisType(HeightTag, 28) - - 3: AxisType(WidthTag, 28) """ return { - "image": NeuralType( - { - 0: AxisType(BatchTag), - 1: AxisType(ChannelTag), - 2: AxisType(HeightTag, 28), - 3: AxisType(WidthTag, 28), - } - ) + # "image": NeuralType( + # { + # 0: AxisType(BatchTag), + # 1: AxisType(ChannelTag), + # 2: AxisType(HeightTag, 28), + # 3: AxisType(WidthTag, 28), + # } + # ) + "image": NeuralType(ChannelType(), ('B', 'C', 'H', 'W')) } @property def output_ports(self): """Returns definitions of module output ports. - - decision: - 0: AxisType(BatchTag) - - 1: AxisType(ChannelTag, 1) """ - return {"decision": NeuralType({0: AxisType(BatchTag), 1: AxisType(ChannelTag, 1)})} + # return {"decision": NeuralType({0: AxisType(BatchTag), 1: AxisType(ChannelTag, 1)})} + return {"decision": NeuralType(ChannelType(), ('B', 'C'))} def __init__(self): super().__init__() @@ -78,49 +67,33 @@ class SimpleGenerator(TrainableNM): @property def input_ports(self): """Returns definitions of module input ports. - - latents: - 0: AxisType(BatchTag) - - 1: AxisType(ChannelTag, 64) - - 2: AxisType(HeightTag, 4) - - 3: AxisType(WidthTag, 4) """ return { - "latents": NeuralType( - { - 0: AxisType(BatchTag), - 1: AxisType(ChannelTag, 64), - 2: AxisType(HeightTag, 4), - 3: AxisType(WidthTag, 4), - } - ) + # "latents": NeuralType( + # { + # 0: AxisType(BatchTag), + # 1: AxisType(ChannelTag, 64), + # 2: AxisType(HeightTag, 4), + # 3: AxisType(WidthTag, 4), + # } + # ) + "latents": NeuralType(ChannelType(), ('B', 'C', 'H', 'W')) } @property def output_ports(self): """Returns definitions of module output ports. - - image: - 0: AxisType(BatchTag) - - 1: AxisType(ChannelTag) - - 2: AxisType(HeightTag, 28) - - 3: AxisType(WidthTag, 28) """ return { - "image": NeuralType( - { - 0: AxisType(BatchTag), - 1: AxisType(ChannelTag), - 2: AxisType(HeightTag, 28), - 3: AxisType(WidthTag, 28), - } - ) + # "image": NeuralType( + # { + # 0: AxisType(BatchTag), + # 1: AxisType(ChannelTag), + # 2: AxisType(HeightTag, 28), + # 3: AxisType(WidthTag, 28), + # } + # ) + "image": NeuralType(ChannelType(), ('B', 'C', 'H', 'W')) } def __init__(self): @@ -162,7 +135,8 @@ def input_ports(self): 1: AxisType(ChannelTag, 1) """ return { - "decision": NeuralType({0: AxisType(BatchTag), 1: AxisType(ChannelTag, 1)}), + # "decision": NeuralType({0: AxisType(BatchTag), 1: AxisType(ChannelTag, 1)}), + "decision": NeuralType(ChannelType(), ('B', 'D')) } @property @@ -197,31 +171,19 @@ class GradientPenalty(LossNM): @property def input_ports(self): """Returns definitions of module input ports. - - interpolated_image: - 0: AxisType(BatchTag) - - 1: AxisType(ChannelTag) - - 2: AxisType(HeightTag, 28) - - 3: AxisType(WidthTag, 28) - - interpolated_decision: - 0: AxisType(BatchTag) - - 1: AxisType(ChannelTag, 1) """ return { - "interpolated_image": NeuralType( - { - 0: AxisType(BatchTag), - 1: AxisType(ChannelTag), - 2: AxisType(HeightTag, 28), - 3: AxisType(WidthTag, 28), - } - ), - "interpolated_decision": NeuralType({0: AxisType(BatchTag), 1: AxisType(ChannelTag, 1)}), + # "interpolated_image": NeuralType( + # { + # 0: AxisType(BatchTag), + # 1: AxisType(ChannelTag), + # 2: AxisType(HeightTag, 28), + # 3: AxisType(WidthTag, 28), + # } + # ), + # "interpolated_decision": NeuralType({0: AxisType(BatchTag), 1: AxisType(ChannelTag, 1)}), + "interpolated_image": NeuralType(ChannelType(), ('B', 'C', 'H', 'W')), + "interpolated_decision": NeuralType(ChannelType(), ('B', 'C')), } @property @@ -231,7 +193,7 @@ def output_ports(self): loss: NeuralType(None) """ - return {"loss": NeuralType(None)} + return {"loss": NeuralType(LossType())} def __init__(self, lambda_): super().__init__() @@ -266,66 +228,42 @@ class InterpolateImage(TrainableNM): @property def input_ports(self): """Returns definitions of module input ports. - - image1: - 0: AxisType(BatchTag) - - 1: AxisType(ChannelTag) - - 2: AxisType(HeightTag, 28) - - 3: AxisType(WidthTag, 28) - - image2: - 0: AxisType(BatchTag) - - 1: AxisType(ChannelTag) - - 2: AxisType(HeightTag, 28) - - 3: AxisType(WidthTag, 28) """ return { - "image1": NeuralType( - { - 0: AxisType(BatchTag), - 1: AxisType(ChannelTag), - 2: AxisType(HeightTag, 28), - 3: AxisType(WidthTag, 28), - } - ), - "image2": NeuralType( - { - 0: AxisType(BatchTag), - 1: AxisType(ChannelTag), - 2: AxisType(HeightTag, 28), - 3: AxisType(WidthTag, 28), - } - ), + # "image1": NeuralType( + # { + # 0: AxisType(BatchTag), + # 1: AxisType(ChannelTag), + # 2: AxisType(HeightTag, 28), + # 3: AxisType(WidthTag, 28), + # } + # ), + # "image2": NeuralType( + # { + # 0: AxisType(BatchTag), + # 1: AxisType(ChannelTag), + # 2: AxisType(HeightTag, 28), + # 3: AxisType(WidthTag, 28), + # } + # ), + "image1": NeuralType(ChannelType(), ('B', 'C', 'H', 'W')), + "image2": NeuralType(ChannelType(), ('B', 'C', 'H', 'W')), } @property def output_ports(self): """Returns definitions of module output ports. - - interpolated_image: - 0: AxisType(BatchTag) - - 1: AxisType(ChannelTag) - - 2: AxisType(HeightTag, 28) - - 3: AxisType(WidthTag, 28) """ return { - "interpolated_image": NeuralType( - { - 0: AxisType(BatchTag), - 1: AxisType(ChannelTag), - 2: AxisType(HeightTag, 28), - 3: AxisType(WidthTag, 28), - } - ) + # "interpolated_image": NeuralType( + # { + # 0: AxisType(BatchTag), + # 1: AxisType(ChannelTag), + # 2: AxisType(HeightTag, 28), + # 3: AxisType(WidthTag, 28), + # } + # ) + "interpolated_image": NeuralType(ChannelType(), ('B', 'C', 'H', 'W')) } def __init__(self): @@ -362,14 +300,15 @@ def output_ports(self): 3: AxisType(WidthTag, 4) """ return { - "latent": NeuralType( - { - 0: AxisType(BatchTag), - 1: AxisType(ChannelTag, 64), - 2: AxisType(HeightTag, 4), - 3: AxisType(WidthTag, 4), - } - ) + # "latent": NeuralType( + # { + # 0: AxisType(BatchTag), + # 1: AxisType(ChannelTag, 64), + # 2: AxisType(HeightTag, 4), + # 3: AxisType(WidthTag, 4), + # } + # ) + "latent": NeuralType(ChannelType(), ('B', 'C', 'H', 'W')) } def __init__(self, batch_size): @@ -415,46 +354,28 @@ class MnistGanDataLayer(DataLayerNM): @property def output_ports(self): """Returns definitions of module output ports. - - latent: - 0: AxisType(BatchTag) - - 1: AxisType(ChannelTag, 64) - - 2: AxisType(HeightTag, 4) - - 3: AxisType(WidthTag, 4) - - image: - 0: AxisType(BatchTag) - - 1: AxisType(ChannelTag) - - 2: AxisType(HeightTag, user defined) - - 3: AxisType(WidthTag, user defined) - - label: - 0: AxisType(BatchTag) """ return { - "latent": NeuralType( - { - 0: AxisType(BatchTag), - 1: AxisType(ChannelTag, 64), - 2: AxisType(HeightTag, 4), - 3: AxisType(WidthTag, 4), - } - ), - "image": NeuralType( - { - 0: AxisType(BatchTag), - 1: AxisType(ChannelTag), - 2: AxisType(HeightTag, self._input_size[1]), - 3: AxisType(WidthTag, self._input_size[0]), - } - ), - "label": NeuralType({0: AxisType(BatchTag)}), + # "latent": NeuralType( + # { + # 0: AxisType(BatchTag), + # 1: AxisType(ChannelTag, 64), + # 2: AxisType(HeightTag, 4), + # 3: AxisType(WidthTag, 4), + # } + # ), + # "image": NeuralType( + # { + # 0: AxisType(BatchTag), + # 1: AxisType(ChannelTag), + # 2: AxisType(HeightTag, self._input_size[1]), + # 3: AxisType(WidthTag, self._input_size[0]), + # } + # ), + # "label": NeuralType({0: AxisType(BatchTag)}), + "latent": NeuralType(ChannelType(), ('B', 'C', 'H', 'W')), + "image": NeuralType(ChannelType(), ('B', 'C', 'H', 'W')), + "label": NeuralType(LabelsType(), tuple('B')), } def __init__(self, batch_size, root, train=True, shuffle=True): From 45f30ec2f8a3e16e2964c72dac13621aac8412a8 Mon Sep 17 00:00:00 2001 From: Oleksii Kuchaiev Date: Fri, 7 Feb 2020 11:00:11 -0800 Subject: [PATCH 15/30] neural types fix in dialog Signed-off-by: Oleksii Kuchaiev --- .../nlp/nm/data_layers/state_tracking_trade_datalayer.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/nemo/collections/nlp/nm/data_layers/state_tracking_trade_datalayer.py b/nemo/collections/nlp/nm/data_layers/state_tracking_trade_datalayer.py index da51068b8519..28b43173711a 100644 --- a/nemo/collections/nlp/nm/data_layers/state_tracking_trade_datalayer.py +++ b/nemo/collections/nlp/nm/data_layers/state_tracking_trade_datalayer.py @@ -43,7 +43,7 @@ import nemo from nemo.collections.nlp.data.datasets import MultiWOZDataset from nemo.collections.nlp.nm.data_layers.text_datalayer import TextDataLayer -from nemo.core.neural_types import ChannelType, LabelsType, NeuralType +from nemo.core.neural_types import ChannelType, LabelsType, NeuralType, LengthsType __all__ = ['MultiWOZDataLayer'] @@ -75,9 +75,9 @@ def output_ports(self): # "gating_labels": NeuralType({0: AxisType(BatchTag), 1: AxisType(ChannelTag)}), # "turn_domain": NeuralType(None), "src_ids": NeuralType(ChannelType(), ('B', 'T')), - "src_lens": NeuralType(ChannelType(), tuple('B')), + "src_lens": NeuralType(LengthsType(), tuple('B')), "tgt_ids": NeuralType(ChannelType(), ('B', 'D', 'T')), - "tgt_lens": NeuralType(ChannelType(), ('B', 'D')), + "tgt_lens": NeuralType(LengthsType(), ('B', 'D')), "gating_labels": NeuralType(LabelsType(), ('B', 'D')), "turn_domain": NeuralType(), } From 396c427dba6ca8914f3924abb5a345df8ca3f2ac Mon Sep 17 00:00:00 2001 From: Oleksii Kuchaiev Date: Fri, 7 Feb 2020 11:01:04 -0800 Subject: [PATCH 16/30] fix types in dialog Signed-off-by: Oleksii Kuchaiev --- .../nlp/nm/data_layers/state_tracking_trade_datalayer.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/nemo/collections/nlp/nm/data_layers/state_tracking_trade_datalayer.py b/nemo/collections/nlp/nm/data_layers/state_tracking_trade_datalayer.py index 28b43173711a..2cf2eb08951f 100644 --- a/nemo/collections/nlp/nm/data_layers/state_tracking_trade_datalayer.py +++ b/nemo/collections/nlp/nm/data_layers/state_tracking_trade_datalayer.py @@ -43,7 +43,7 @@ import nemo from nemo.collections.nlp.data.datasets import MultiWOZDataset from nemo.collections.nlp.nm.data_layers.text_datalayer import TextDataLayer -from nemo.core.neural_types import ChannelType, LabelsType, NeuralType, LengthsType +from nemo.core.neural_types import ChannelType, LabelsType, LengthsType, NeuralType __all__ = ['MultiWOZDataLayer'] From 6eca99481ca4aaf656312ff69f78982528d09076 Mon Sep 17 00:00:00 2001 From: Oleksii Kuchaiev Date: Fri, 7 Feb 2020 13:47:53 -0800 Subject: [PATCH 17/30] fixing types in trade example Signed-off-by: Oleksii Kuchaiev --- nemo/collections/nlp/nm/losses/state_tracking_trade_loss.py | 4 ++-- .../dialogue_state_tracking/state_tracking_trade_nm.py | 6 +++--- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/nemo/collections/nlp/nm/losses/state_tracking_trade_loss.py b/nemo/collections/nlp/nm/losses/state_tracking_trade_loss.py index 662de183a183..015486be08e5 100644 --- a/nemo/collections/nlp/nm/losses/state_tracking_trade_loss.py +++ b/nemo/collections/nlp/nm/losses/state_tracking_trade_loss.py @@ -39,7 +39,7 @@ import torch from nemo.backends.pytorch.nm import LossNM -from nemo.core.neural_types import ChannelType, LabelsType, LogitsType, LossType, NeuralType +from nemo.core.neural_types import ChannelType, LabelsType, LengthsType, LogitsType, LossType, NeuralType __all__ = ['TRADEMaskedCrossEntropy', 'CrossEntropyLoss3D'] @@ -75,7 +75,7 @@ def input_ports(self): # "loss_mask": NeuralType({0: AxisType(BatchTag), 1: AxisType(ChannelTag)}), "logits": NeuralType(LogitsType(), ('B', 'T', 'D', 'D')), "targets": NeuralType(ChannelType(), ('B', 'D', 'T')), - "loss_mask": NeuralType(ChannelType(), ('B', 'D')), + "loss_mask": NeuralType(LengthsType(), ('B', 'D')), } @property diff --git a/nemo/collections/nlp/nm/trainables/dialogue_state_tracking/state_tracking_trade_nm.py b/nemo/collections/nlp/nm/trainables/dialogue_state_tracking/state_tracking_trade_nm.py index 0ac416d4ab73..fc5977c727b1 100644 --- a/nemo/collections/nlp/nm/trainables/dialogue_state_tracking/state_tracking_trade_nm.py +++ b/nemo/collections/nlp/nm/trainables/dialogue_state_tracking/state_tracking_trade_nm.py @@ -45,7 +45,7 @@ from torch import nn as nn from nemo.backends.pytorch.nm import TrainableNM -from nemo.core.neural_types import ChannelType, LengthsType, NeuralType +from nemo.core.neural_types import ChannelType, LengthsType, LogitsType, NeuralType __all__ = ['TRADEGenerator'] @@ -95,8 +95,8 @@ def output_ports(self): # 'gate_outputs': NeuralType({0: AxisType(BatchTag), 1: AxisType(ChannelTag), 2: AxisType(ChannelTag)}), # } return { - 'point_outputs': NeuralType(ChannelType(), ('B', 'T', 'D', 'D')), - 'gate_outputs': NeuralType(ChannelType(), ('B', 'D', 'D')), + 'point_outputs': NeuralType(LogitsType(), ('B', 'T', 'D', 'D')), + 'gate_outputs': NeuralType(LogitsType(), ('B', 'D', 'D')), } def __init__(self, vocab, embeddings, hid_size, dropout, slots, nb_gate, teacher_forcing=0.5): From 9d202108d30e5ed52a4ae5ca9bd11969de8db99f Mon Sep 17 00:00:00 2001 From: Oleksii Kuchaiev Date: Fri, 7 Feb 2020 14:06:55 -0800 Subject: [PATCH 18/30] fix types in gleu scripts unittests in parallel Signed-off-by: Oleksii Kuchaiev --- Jenkinsfile | 21 ++------------------- nemo/core/neural_types/elements.py | 2 +- 2 files changed, 3 insertions(+), 20 deletions(-) diff --git a/Jenkinsfile b/Jenkinsfile index 61be16bc01de..d088e924bd38 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -8,7 +8,6 @@ pipeline { disableConcurrentBuilds() } stages { - stage('PyTorch version') { steps { sh 'python -c "import torch; print(torch.__version__)"' @@ -24,27 +23,11 @@ pipeline { sh 'python setup.py style' } } - stage('Unittests Core') { - steps { - sh './reinstall.sh && python -m unittest tests/core/*.py' - } - } - stage('Unittests ASR') { + stage('Unittests') { steps { - sh 'python -m unittest tests/asr/*.py' + sh './reinstall.sh && python -m unittest' } } - stage('Unittests NLP') { - steps { - sh 'python -m unittest tests/nlp/*.py' - } - } - stage('Unittests TTS') { - steps { - sh 'python -m unittest tests/tts/*.py' - } - } - stage('Parallel Stage1') { failFast true parallel { diff --git a/nemo/core/neural_types/elements.py b/nemo/core/neural_types/elements.py index 017b8367d341..ad66f5e7b654 100644 --- a/nemo/core/neural_types/elements.py +++ b/nemo/core/neural_types/elements.py @@ -176,7 +176,7 @@ def __str__(self): return "mfcc spectorgram type" -class PredictionsType(ElementType): +class PredictionsType(LabelsType): def __str__(self): return "predictions values type" From a6d779accf63c9e94ed2e9cc748996abe50625ea Mon Sep 17 00:00:00 2001 From: Oleksii Kuchaiev Date: Fri, 7 Feb 2020 14:35:15 -0800 Subject: [PATCH 19/30] fix styles in asr postprocessing Signed-off-by: Oleksii Kuchaiev --- .../nlp/nm/losses/padded_smoothed_cross_entropy_loss.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/nemo/collections/nlp/nm/losses/padded_smoothed_cross_entropy_loss.py b/nemo/collections/nlp/nm/losses/padded_smoothed_cross_entropy_loss.py index b56717af885d..1c14dbf545e2 100644 --- a/nemo/collections/nlp/nm/losses/padded_smoothed_cross_entropy_loss.py +++ b/nemo/collections/nlp/nm/losses/padded_smoothed_cross_entropy_loss.py @@ -17,7 +17,7 @@ from nemo.backends.pytorch import LossNM from nemo.collections.nlp.nm.losses.smoothed_cross_entropy_loss import SmoothedCrossEntropyLoss from nemo.collections.nlp.utils.common_nlp_utils import mask_padded_tokens -from nemo.core import AxisType, ChannelType, LogitsType, LossType, NeuralType +from nemo.core import LabelsType, LogitsType, LossType, NeuralType __all__ = ['PaddedSmoothedCrossEntropyLossNM'] @@ -43,7 +43,7 @@ def input_ports(self): # "logits": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag), 2: AxisType(ChannelTag)}), # "target_ids": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}), "logits": NeuralType(LogitsType(), ('B', 'T', 'D')), - "target_ids": NeuralType(ChannelType(), ('B', 'T')), + "target_ids": NeuralType(LabelsType(), ('B', 'T')), } @property From e5f0544475a5ef599e77ac7f86a66d7902c2c221 Mon Sep 17 00:00:00 2001 From: Oleksii Kuchaiev Date: Sat, 8 Feb 2020 13:59:10 -0800 Subject: [PATCH 20/30] fix types in intent classification Signed-off-by: Oleksii Kuchaiev --- .../joint_intent_slot_with_bert.py | 2 +- .../nlp/nm/trainables/joint_intent_slot/joint_intent_slot_nm.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/examples/nlp/intent_detection_slot_tagging/joint_intent_slot_with_bert.py b/examples/nlp/intent_detection_slot_tagging/joint_intent_slot_with_bert.py index 98a060f24ea0..0c0cc8689d3f 100644 --- a/examples/nlp/intent_detection_slot_tagging/joint_intent_slot_with_bert.py +++ b/examples/nlp/intent_detection_slot_tagging/joint_intent_slot_with_bert.py @@ -1,5 +1,5 @@ # ============================================================================= -# Copyright 2019 AI Applications Design Team at NVIDIA. All Rights Reserved. +# Copyright 2019 NVIDIA. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/nemo/collections/nlp/nm/trainables/joint_intent_slot/joint_intent_slot_nm.py b/nemo/collections/nlp/nm/trainables/joint_intent_slot/joint_intent_slot_nm.py index faa273919d15..0f31bdab513c 100644 --- a/nemo/collections/nlp/nm/trainables/joint_intent_slot/joint_intent_slot_nm.py +++ b/nemo/collections/nlp/nm/trainables/joint_intent_slot/joint_intent_slot_nm.py @@ -63,7 +63,7 @@ def output_ports(self): # "intent_logits": NeuralType({0: AxisType(BatchTag), 1: AxisType(ChannelTag)}), # "slot_logits": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag), 2: AxisType(ChannelTag)}), "intent_logits": NeuralType(LogitsType(), ('B', 'D')), - "slot_logits": NeuralType(LogitsType(), ('B', 'D')), + "slot_logits": NeuralType(LogitsType(), ('B', 'T', 'D')), } def __init__(self, hidden_size, num_intents, num_slots, dropout=0.0, use_transformer_pretrained=True, **kwargs): From 3bfb9f96cb120f59e54a043d3aa2975f593a3bc9 Mon Sep 17 00:00:00 2001 From: Oleksii Kuchaiev Date: Sat, 8 Feb 2020 15:01:24 -0800 Subject: [PATCH 21/30] fix beam search Signed-off-by: Oleksii Kuchaiev --- nemo/collections/asr/beam_search_decoder.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/nemo/collections/asr/beam_search_decoder.py b/nemo/collections/asr/beam_search_decoder.py index 70f0517330cd..793e899016f4 100644 --- a/nemo/collections/asr/beam_search_decoder.py +++ b/nemo/collections/asr/beam_search_decoder.py @@ -56,7 +56,7 @@ def output_ports(self): predictions: NeuralType(None) """ - return {"predictions": NeuralType(None)} + return {"predictions": NeuralType(VoidType())} def __init__(self, vocab, beam_width, alpha, beta, lm_path, num_cpus, cutoff_prob=1.0, cutoff_top_n=40): From 00445a75a07d0ef3ef52850d7fd4382670f3ff22 Mon Sep 17 00:00:00 2001 From: Oleksii Kuchaiev Date: Sat, 8 Feb 2020 15:22:39 -0800 Subject: [PATCH 22/30] los fix Signed-off-by: Oleksii Kuchaiev --- nemo/collections/simple_gan/gan.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/nemo/collections/simple_gan/gan.py b/nemo/collections/simple_gan/gan.py index 4ea0dc0bdb05..4d8f48e6cdbb 100644 --- a/nemo/collections/simple_gan/gan.py +++ b/nemo/collections/simple_gan/gan.py @@ -142,11 +142,8 @@ def input_ports(self): @property def output_ports(self): """Returns definitions of module output ports. - - loss: - NeuralType(None) """ - return {"loss": NeuralType(None)} + return {"loss": NeuralType(LossType())} def __init__(self, neg=False): super().__init__() From 2695e3354a996c17a0a91e95d3a53d4461795827 Mon Sep 17 00:00:00 2001 From: Oleksii Kuchaiev Date: Mon, 10 Feb 2020 14:54:24 -0800 Subject: [PATCH 23/30] addressing comments from @yzhang123 and @blisc Signed-off-by: Oleksii Kuchaiev --- CHANGELOG.md | 4 ++++ nemo/backends/pytorch/tutorials/toys.py | 10 ++-------- nemo/collections/asr/audio_preprocessing.py | 9 --------- nemo/collections/asr/beam_search_decoder.py | 3 ++- 4 files changed, 8 insertions(+), 18 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 1fbf6d6ac532..ce02933de720 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -70,12 +70,16 @@ To release a new version, please update the changelog as followed: ## [Unreleased] ### Added +- New Neural Type System and its tests. +([PR #307](https://github.com/NVIDIA/NeMo/pull/307)) - @okuchaiev - Named tensors tuple module's output for graph construction. ([PR #268](https://github.com/NVIDIA/NeMo/pull/268)) - @stasbel - Introduced the `deprecated` decorator. ([PR #298](https://github.com/NVIDIA/NeMo/pull/298)) - @tkornuta-nvidia ### Changed +- All collections changed to use New Neural Type System. +([PR #307](https://github.com/NVIDIA/NeMo/pull/307)) - @okuchaiev - Additional Collections Repositories merged into core `nemo_toolkit` package. ([PR #289](https://github.com/NVIDIA/NeMo/pull/289)) - @DEKHTIARJonathan - Refactor manifest files parsing and processing for re-using. diff --git a/nemo/backends/pytorch/tutorials/toys.py b/nemo/backends/pytorch/tutorials/toys.py index 324d26e50077..0708ea65beb9 100644 --- a/nemo/backends/pytorch/tutorials/toys.py +++ b/nemo/backends/pytorch/tutorials/toys.py @@ -189,11 +189,8 @@ def input_ports(self): @property def output_ports(self): """Returns definitions of module output ports. - - loss: - NeuralType(None) """ - return {"loss": NeuralType(axes=None)} + return {"loss": NeuralType(LossType())} def __init__(self): super().__init__() @@ -216,11 +213,8 @@ def input_ports(self): @property def output_ports(self): """Returns definitions of module output ports. - - loss: - NeuralType(None) """ - return {"loss": NeuralType(axes=None)} + return {"loss": NeuralType(LossType())} def __init__(self): super().__init__() diff --git a/nemo/collections/asr/audio_preprocessing.py b/nemo/collections/asr/audio_preprocessing.py index 54ab17515b46..f2950162a346 100644 --- a/nemo/collections/asr/audio_preprocessing.py +++ b/nemo/collections/asr/audio_preprocessing.py @@ -121,15 +121,6 @@ class AudioToSpectrogramPreprocessor(AudioPreprocessor): @property def input_ports(self): """Returns definitions of module input ports. - - input_signal: - 0: AxisType(BatchTag) - - 1: AxisType(TimeTag) - - length: - 0: AxisType(BatchTag) - """ return { # "input_signal": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}), diff --git a/nemo/collections/asr/beam_search_decoder.py b/nemo/collections/asr/beam_search_decoder.py index 793e899016f4..2cb919ee4fe4 100644 --- a/nemo/collections/asr/beam_search_decoder.py +++ b/nemo/collections/asr/beam_search_decoder.py @@ -56,7 +56,8 @@ def output_ports(self): predictions: NeuralType(None) """ - return {"predictions": NeuralType(VoidType())} + # return {"predictions": NeuralType(VoidType())} + return {"predictions": NeuralType(PredictionsType(), ('B', 'T'))} def __init__(self, vocab, beam_width, alpha, beta, lm_path, num_cpus, cutoff_prob=1.0, cutoff_top_n=40): From 781ebc5810c885522bc3f0a6b71b5cb842f84c04 Mon Sep 17 00:00:00 2001 From: Oleksii Kuchaiev Date: Tue, 11 Feb 2020 13:07:27 -0800 Subject: [PATCH 24/30] address review's comments Signed-off-by: Oleksii Kuchaiev --- nemo/backends/pytorch/tutorials/chatbot/modules.py | 2 +- nemo/backends/pytorch/tutorials/toys.py | 2 +- nemo/collections/asr/losses.py | 2 +- .../dialogue_state_tracking/state_tracking_trade_nm.py | 5 +++-- nemo/core/neural_types/__init__.py | 8 ++++---- nemo/core/neural_types/elements.py | 2 +- 6 files changed, 11 insertions(+), 10 deletions(-) diff --git a/nemo/backends/pytorch/tutorials/chatbot/modules.py b/nemo/backends/pytorch/tutorials/chatbot/modules.py index d665fc840fb0..ca37d874ec52 100644 --- a/nemo/backends/pytorch/tutorials/chatbot/modules.py +++ b/nemo/backends/pytorch/tutorials/chatbot/modules.py @@ -76,7 +76,7 @@ def input_ports(self): """ return { "input_seq": NeuralType(ChannelType(), ('T', 'B')), - "input_lengths": NeuralType(ChannelType(), tuple('B')), + "input_lengths": NeuralType(LengthsType(), tuple('B')), } @property diff --git a/nemo/backends/pytorch/tutorials/toys.py b/nemo/backends/pytorch/tutorials/toys.py index 0708ea65beb9..8b18d9890c60 100644 --- a/nemo/backends/pytorch/tutorials/toys.py +++ b/nemo/backends/pytorch/tutorials/toys.py @@ -241,7 +241,7 @@ def output_ports(self): loss: NeuralType(None) """ - return {"loss": NeuralType(axes=None)} + return {"loss": NeuralType(LossType())} def __init__(self): # Neural Module API specific diff --git a/nemo/collections/asr/losses.py b/nemo/collections/asr/losses.py index c29a0dba78be..a18f7c14aac2 100644 --- a/nemo/collections/asr/losses.py +++ b/nemo/collections/asr/losses.py @@ -25,7 +25,7 @@ def input_ports(self): # "input_length": NeuralType({0: AxisType(BatchTag)}), # "target_length": NeuralType({0: AxisType(BatchTag)}), "log_probs": NeuralType(LogprobsType(), ('B', 'T', 'D')), - "targets": NeuralType(ChannelType(), ('B', 'T')), + "targets": NeuralType(PredictionsType(), ('B', 'T')), "input_length": NeuralType(LengthsType(), tuple('B')), "target_length": NeuralType(LengthsType(), tuple('B')), } diff --git a/nemo/collections/nlp/nm/trainables/dialogue_state_tracking/state_tracking_trade_nm.py b/nemo/collections/nlp/nm/trainables/dialogue_state_tracking/state_tracking_trade_nm.py index da0a7f0682ce..bd29209918c2 100644 --- a/nemo/collections/nlp/nm/trainables/dialogue_state_tracking/state_tracking_trade_nm.py +++ b/nemo/collections/nlp/nm/trainables/dialogue_state_tracking/state_tracking_trade_nm.py @@ -45,7 +45,7 @@ from torch import nn as nn from nemo.backends.pytorch.nm import TrainableNM -from nemo.core.neural_types import ChannelType, LengthsType, LogitsType, NeuralType +from nemo.core.neural_types import ChannelType, LabelsType, LengthsType, LogitsType, NeuralType __all__ = ['TRADEGenerator'] @@ -76,7 +76,8 @@ def input_ports(self): 'encoder_outputs': NeuralType(ChannelType(), ('B', 'T', 'C')), 'input_lens': NeuralType(LengthsType(), tuple('B')), 'src_ids': NeuralType(ChannelType(), ('B', 'T')), - 'targets': NeuralType(ChannelType(), ('B', 'D', 'T')), + # 'targets': NeuralType(ChannelType(), ('B', 'D', 'T')), + 'targets': NeuralType(LabelsType(), ('B', 'D', 'T')), } @property diff --git a/nemo/core/neural_types/__init__.py b/nemo/core/neural_types/__init__.py index 124adc132c72..1fb5bf349076 100644 --- a/nemo/core/neural_types/__init__.py +++ b/nemo/core/neural_types/__init__.py @@ -15,7 +15,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -from .axes import * -from .comparison import * -from .elements import * -from .neural_type import * +from nemo.core.neural_types.axes import * +from nemo.core.neural_types.comparison import * +from nemo.core.neural_types.elements import * +from nemo.core.neural_types.neural_type import * diff --git a/nemo/core/neural_types/elements.py b/nemo/core/neural_types/elements.py index ad66f5e7b654..dd7fcd754f98 100644 --- a/nemo/core/neural_types/elements.py +++ b/nemo/core/neural_types/elements.py @@ -39,7 +39,7 @@ from abc import ABC, abstractmethod from typing import Dict, Optional, Tuple -from .comparison import NeuralTypeComparisonResult +from nemo.core.neural_types.comparison import NeuralTypeComparisonResult class ElementType(ABC): From 51120d90d0f2597d8c63db52855dc299c54e17aa Mon Sep 17 00:00:00 2001 From: Oleksii Kuchaiev Date: Tue, 11 Feb 2020 13:25:06 -0800 Subject: [PATCH 25/30] fix unittests Signed-off-by: Oleksii Kuchaiev --- nemo/backends/pytorch/common/rnn.py | 2 +- nemo/collections/asr/data_layer.py | 2 +- nemo/collections/asr/losses.py | 2 +- tests/asr/test_zeroDS.py | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/nemo/backends/pytorch/common/rnn.py b/nemo/backends/pytorch/common/rnn.py index 43348d3d3d2c..774e11807edf 100644 --- a/nemo/backends/pytorch/common/rnn.py +++ b/nemo/backends/pytorch/common/rnn.py @@ -70,7 +70,7 @@ def input_ports(self): """ return { # 'targets': NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}), - 'targets': NeuralType(ChannelType(), ('B', 'T')), + 'targets': NeuralType(LabelsType(), ('B', 'T')), # 'encoder_outputs': NeuralType( # {0: AxisType(BatchTag), 1: AxisType(TimeTag), 2: AxisType(ChannelTag),}, optional=True, # ), diff --git a/nemo/collections/asr/data_layer.py b/nemo/collections/asr/data_layer.py index 20df98b2add7..83d959a09974 100644 --- a/nemo/collections/asr/data_layer.py +++ b/nemo/collections/asr/data_layer.py @@ -102,7 +102,7 @@ def output_ports(self): # 'transcript_length': NeuralType({0: AxisType(BatchTag)}), 'audio_signal': NeuralType(AudioSignal(freq=self._sample_rate), ('B', 'T')), 'a_sig_length': NeuralType(LengthsType(), tuple('B')), - 'transcripts': NeuralType(ChannelType(), ('B', 'T')), + 'transcripts': NeuralType(LabelsType(), ('B', 'T')), 'transcript_length': NeuralType(LengthsType(), tuple('B')), } diff --git a/nemo/collections/asr/losses.py b/nemo/collections/asr/losses.py index a18f7c14aac2..a9b77fe03e0b 100644 --- a/nemo/collections/asr/losses.py +++ b/nemo/collections/asr/losses.py @@ -25,7 +25,7 @@ def input_ports(self): # "input_length": NeuralType({0: AxisType(BatchTag)}), # "target_length": NeuralType({0: AxisType(BatchTag)}), "log_probs": NeuralType(LogprobsType(), ('B', 'T', 'D')), - "targets": NeuralType(PredictionsType(), ('B', 'T')), + "targets": NeuralType(LabelsType(), ('B', 'T')), "input_length": NeuralType(LengthsType(), tuple('B')), "target_length": NeuralType(LengthsType(), tuple('B')), } diff --git a/tests/asr/test_zeroDS.py b/tests/asr/test_zeroDS.py index 7c45720fc18f..e2c9bd6f7373 100644 --- a/tests/asr/test_zeroDS.py +++ b/tests/asr/test_zeroDS.py @@ -112,7 +112,7 @@ def test_asr_with_zero_ds(self): (AxisType(AxisKind.Batch), AxisType(AxisKind.Dimension, 64), AxisType(AxisKind.Time, 64)), ), "processed_length": NeuralType(LengthsType(), tuple('B')), - "transcript": NeuralType(ChannelType(), (AxisType(AxisKind.Batch), AxisType(AxisKind.Time, 64))), + "transcript": NeuralType(LabelsType(), (AxisType(AxisKind.Batch), AxisType(AxisKind.Time, 64))), "transcript_length": NeuralType(LengthsType(), tuple('B')), }, ) From 5ddb513c353af6e422491033f4df0061ec30f130 Mon Sep 17 00:00:00 2001 From: Oleksii Kuchaiev Date: Tue, 11 Feb 2020 17:01:03 -0800 Subject: [PATCH 26/30] fix chatbot example Signed-off-by: Oleksii Kuchaiev --- examples/start_here/chatbot_example.py | 4 +++- nemo/backends/pytorch/tutorials/chatbot/modules.py | 2 +- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/examples/start_here/chatbot_example.py b/examples/start_here/chatbot_example.py index c5107411525d..ca2950c22bce 100644 --- a/examples/start_here/chatbot_example.py +++ b/examples/start_here/chatbot_example.py @@ -65,10 +65,12 @@ def outputs2words(tensors, vocab): tensors=[loss, src, outputs_inf, tgt], print_func=lambda x: outputs2words(x, dl.voc.index2word), ) +num_epochs = 1 +logging.info(f"Training only for {num_epochs}. Train longer (~10-20) for convergence.") # Start training nf.train( tensors_to_optimize=[loss], callbacks=[callback], optimizer="adam", - optimization_params={"num_epochs": 3, "lr": 0.001}, + optimization_params={"num_epochs": num_epochs, "lr": 0.001}, ) diff --git a/nemo/backends/pytorch/tutorials/chatbot/modules.py b/nemo/backends/pytorch/tutorials/chatbot/modules.py index ca37d874ec52..5d51697922aa 100644 --- a/nemo/backends/pytorch/tutorials/chatbot/modules.py +++ b/nemo/backends/pytorch/tutorials/chatbot/modules.py @@ -23,7 +23,7 @@ def output_ports(self): """ return { "src": NeuralType(ChannelType(), ('T', 'B')), - "src_lengths": NeuralType(ChannelType(), tuple('B')), + "src_lengths": NeuralType(LengthsType(), tuple('B')), "tgt": NeuralType(LabelsType(), ('T', 'B')), "mask": NeuralType(ChannelType(), ('T', 'B')), "max_tgt_lengths": NeuralType(axes=None), From ecbec6c159dbfef7d1bc2b0732752908e76dca08 Mon Sep 17 00:00:00 2001 From: Oleksii Kuchaiev Date: Wed, 12 Feb 2020 11:51:20 -0800 Subject: [PATCH 27/30] addressing some review feedback Signed-off-by: Oleksii Kuchaiev --- nemo/backends/pytorch/common/losses.py | 10 +- nemo/backends/pytorch/common/other.py | 6 +- nemo/backends/pytorch/common/rnn.py | 16 +-- nemo/backends/pytorch/common/search.py | 6 +- .../pytorch/torchvision/data/image_folder.py | 14 +-- .../pytorch/tutorials/chatbot/modules.py | 38 +++--- nemo/backends/pytorch/tutorials/toys.py | 32 ++--- nemo/collections/asr/audio_preprocessing.py | 50 ++++---- nemo/collections/asr/beam_search_decoder.py | 6 +- nemo/collections/asr/data_layer.py | 18 +-- nemo/collections/asr/greedy_ctc_decoder.py | 4 +- nemo/collections/asr/jasper.py | 12 +- nemo/collections/asr/las/misc.py | 4 +- nemo/collections/asr/losses.py | 10 +- .../data_layers/glue_benchmark_datalayer.py | 16 +-- .../joint_intent_slot_datalayer.py | 24 ++-- .../nlp/nm/data_layers/lm_bert_datalayer.py | 24 ++-- .../data_layers/lm_transformer_datalayer.py | 6 +- .../machine_translation_datalayer.py | 12 +- .../punctuation_capitalization_datalayer.py | 14 +-- .../nlp/nm/data_layers/qa_squad_datalayer.py | 12 +- .../state_tracking_trade_datalayer.py | 10 +- .../text_classification_datalayer.py | 8 +- .../token_classification_datalayer.py | 22 ++-- .../nlp/nm/losses/aggregator_loss.py | 2 +- .../nlp/nm/losses/joint_intent_slot_loss.py | 12 +- .../losses/masked_language_modeling_loss.py | 8 +- .../padded_smoothed_cross_entropy_loss.py | 6 +- .../nlp/nm/losses/qa_squad_loss.py | 12 +- .../nm/losses/state_tracking_trade_loss.py | 14 +-- .../nm/losses/token_classification_loss.py | 8 +- .../trainables/common/huggingface/bert_nm.py | 8 +- .../common/sequence_classification_nm.py | 4 +- .../common/sequence_regression_nm.py | 4 +- .../common/token_classification_nm.py | 8 +- .../common/transformer/transformer_nm.py | 26 ++--- .../state_tracking_trade_nm.py | 14 +-- .../joint_intent_slot/joint_intent_slot_nm.py | 6 +- nemo/collections/simple_gan/gan.py | 32 ++--- nemo/collections/tts/data_layers.py | 4 +- nemo/collections/tts/tacotron2_modules.py | 60 +++++----- nemo/collections/tts/waveglow_modules.py | 22 ++-- nemo/core/neural_factory.py | 28 ++--- nemo/core/neural_modules.py | 1 + nemo/core/neural_types/axes.py | 13 ++- nemo/core/neural_types/elements.py | 82 ++++++------- nemo/core/neural_types/neural_type.py | 26 +++-- tests/asr/test_zeroDS.py | 9 +- tests/core/test_infer.py | 12 +- tests/core/test_neural_modules.py | 5 +- tests/core/test_neural_modules_pytorch.py | 5 +- tests/core/test_neural_types.py | 109 ++++++++---------- 52 files changed, 453 insertions(+), 461 deletions(-) diff --git a/nemo/backends/pytorch/common/losses.py b/nemo/backends/pytorch/common/losses.py index 60b091802c68..9d14f763e22d 100644 --- a/nemo/backends/pytorch/common/losses.py +++ b/nemo/backends/pytorch/common/losses.py @@ -45,7 +45,7 @@ def output_ports(self): NeuralType(None) """ - return {"loss": NeuralType(LossType())} + return {"loss": NeuralType(elements_type=LossType())} def __init__( self, pad_id=0, smoothing_coef=0.0, sample_wise=False, aux_ctc=False, ctc_initial_coef=0.1, ctc_blank_id=None @@ -107,8 +107,8 @@ def input_ports(self): """Returns definitions of module input ports. """ return { - "logits": NeuralType(elements_type=LogitsType(), axes=('B', 'D')), - "labels": NeuralType(elements_type=LabelsType(), axes=tuple('B')), + "logits": NeuralType(axes=('B', 'D'), elements_type=LogitsType()), + "labels": NeuralType(axes=tuple('B'), elements_type=LabelsType()), } @property @@ -143,8 +143,8 @@ def input_ports(self): 0: AxisType(RegressionTag) """ return { - "preds": NeuralType(RegressionValuesType(), tuple('B')), - "labels": NeuralType(LabelsType(), tuple('B')), + "preds": NeuralType(tuple('B'), RegressionValuesType()), + "labels": NeuralType(tuple('B'), LabelsType()), } @property diff --git a/nemo/backends/pytorch/common/other.py b/nemo/backends/pytorch/common/other.py index 7de337619f01..c9b9040dd32c 100644 --- a/nemo/backends/pytorch/common/other.py +++ b/nemo/backends/pytorch/common/other.py @@ -28,7 +28,7 @@ def output_ports(self): """Returns definitions of module output ports. """ # return {"outputs": NeuralType({0: AxisType(TimeTag), 1: AxisType(BatchTag), 2: AxisType(ChannelTag),})} - return {"outputs": NeuralType(ChannelType(), ('T', 'B', 'D'))} + return {"outputs": NeuralType(('T', 'B', 'D'), ChannelType())} def __init__(self, voc_size, hidden_size, dropout=0.0): super().__init__() @@ -53,14 +53,14 @@ def input_ports(self): """Returns definitions of module input ports. """ # return {"input_type_ids": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag),})} - return {"input_type_ids": NeuralType(VoidType(), ('B', 'T'))} + return {"input_type_ids": NeuralType(('B', 'T'), VoidType())} @property def output_ports(self): """Returns definitions of module output ports. """ # return {"input_type_ids": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag),})} - return {"input_type_ids": NeuralType(ChannelType(), ('B', 'T'))} + return {"input_type_ids": NeuralType(('B', 'T'), ChannelType())} def __init__(self): super().__init__() diff --git a/nemo/backends/pytorch/common/rnn.py b/nemo/backends/pytorch/common/rnn.py index 774e11807edf..fbf7dbb7eb97 100644 --- a/nemo/backends/pytorch/common/rnn.py +++ b/nemo/backends/pytorch/common/rnn.py @@ -70,11 +70,11 @@ def input_ports(self): """ return { # 'targets': NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}), - 'targets': NeuralType(LabelsType(), ('B', 'T')), + 'targets': NeuralType(('B', 'T'), LabelsType()), # 'encoder_outputs': NeuralType( # {0: AxisType(BatchTag), 1: AxisType(TimeTag), 2: AxisType(ChannelTag),}, optional=True, # ), - 'encoder_outputs': NeuralType(ChannelType(), ('B', 'T', 'D'), True), + 'encoder_outputs': NeuralType(('B', 'T', 'D'), ChannelType(), True), } @property @@ -83,11 +83,11 @@ def output_ports(self): """ return { # 'log_probs': NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag), 2: AxisType(ChannelTag),}), - 'log_probs': NeuralType(LogprobsType(), ('B', 'T', 'D')), + 'log_probs': NeuralType(('B', 'T', 'D'), LogprobsType()), # 'attention_weights': NeuralType( # {0: AxisType(BatchTag), 1: AxisType(TimeTag), 2: AxisType(TimeTag),}, optional=True, # ), - 'attention_weights': NeuralType(ChannelType(), ('B', 'T', 'T'), True), + 'attention_weights': NeuralType(('B', 'T', 'T'), ChannelType(), True), } def __init__( @@ -209,8 +209,8 @@ def input_ports(self): return { # 'inputs': NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}), # 'input_lens': NeuralType({0: AxisType(BatchTag),}, optional=True), - 'inputs': NeuralType(ChannelType(), ('B', 'T')), - 'input_lens': NeuralType(LengthsType(), tuple('B')), + 'inputs': NeuralType(('B', 'T'), ChannelType()), + 'input_lens': NeuralType(tuple('B'), LengthsType()), } @property @@ -220,8 +220,8 @@ def output_ports(self): return { # 'outputs': NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag), 2: AxisType(ChannelTag)}), # 'hidden': NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag), 2: AxisType(ChannelTag)}), - 'outputs': NeuralType(ChannelType(), ('B', 'T', 'D')), - 'hidden': NeuralType(ChannelType(), ('B', 'T', 'D')), + 'outputs': NeuralType(('B', 'T', 'D'), ChannelType()), + 'hidden': NeuralType(('B', 'T', 'D'), ChannelType()), } def __init__( diff --git a/nemo/backends/pytorch/common/search.py b/nemo/backends/pytorch/common/search.py index 2051a648b6cb..acaf32213016 100644 --- a/nemo/backends/pytorch/common/search.py +++ b/nemo/backends/pytorch/common/search.py @@ -36,7 +36,7 @@ def input_ports(self): # 'encoder_outputs': NeuralType( # {0: AxisType(BatchTag), 1: AxisType(TimeTag), 2: AxisType(ChannelTag),}, optional=True, # ) - "encoder_outputs": NeuralType(ChannelType(), ('B', 'T', 'D'), optional=True) + "encoder_outputs": NeuralType(('B', 'T', 'D'), ChannelType(), optional=True) } @property @@ -47,8 +47,8 @@ def output_ports(self): return { # 'predictions': NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}), # 'attention_weights': NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag), 2: AxisType(TimeTag),}), - "predictions": NeuralType(ChannelType(), ('B', 'T')), - "attention_weights": NeuralType(ChannelType(), ('B', 'T', 'T')), + "predictions": NeuralType(('B', 'T'), ChannelType()), + "attention_weights": NeuralType(('B', 'T', 'T'), ChannelType()), } def __init__(self, decoder, pad_id, bos_id, eos_id, max_len, batch_size=None): diff --git a/nemo/backends/pytorch/torchvision/data/image_folder.py b/nemo/backends/pytorch/torchvision/data/image_folder.py index 5c4946b5cdd5..b775efb1a8f5 100644 --- a/nemo/backends/pytorch/torchvision/data/image_folder.py +++ b/nemo/backends/pytorch/torchvision/data/image_folder.py @@ -27,14 +27,12 @@ def output_ports(self): 0: AxisType(BatchTag) """ return { - "image": NeuralType( - { - 0: AxisType(BatchTag), - 1: AxisType(ChannelTag), - 2: AxisType(HeightTag, self._input_size), - 3: AxisType(WidthTag, self._input_size), - } - ), + "image": NeuralType({ + 0: AxisType(BatchTag), + 1: AxisType(ChannelTag), + 2: AxisType(HeightTag, self._input_size), + 3: AxisType(WidthTag, self._input_size), + }), "label": NeuralType({0: AxisType(BatchTag)}), } diff --git a/nemo/backends/pytorch/tutorials/chatbot/modules.py b/nemo/backends/pytorch/tutorials/chatbot/modules.py index 5d51697922aa..14d704b4d4fc 100644 --- a/nemo/backends/pytorch/tutorials/chatbot/modules.py +++ b/nemo/backends/pytorch/tutorials/chatbot/modules.py @@ -22,10 +22,10 @@ def output_ports(self): """Returns definitions of module output ports. """ return { - "src": NeuralType(ChannelType(), ('T', 'B')), - "src_lengths": NeuralType(LengthsType(), tuple('B')), - "tgt": NeuralType(LabelsType(), ('T', 'B')), - "mask": NeuralType(ChannelType(), ('T', 'B')), + "src": NeuralType(('T', 'B'), ChannelType()), + "src_lengths": NeuralType(tuple('B'), LengthsType()), + "tgt": NeuralType(('T', 'B'), LabelsType()), + "mask": NeuralType(('T', 'B'), ChannelType()), "max_tgt_lengths": NeuralType(axes=None), } @@ -75,8 +75,8 @@ def input_ports(self): """Returns definitions of module input ports. """ return { - "input_seq": NeuralType(ChannelType(), ('T', 'B')), - "input_lengths": NeuralType(LengthsType(), tuple('B')), + "input_seq": NeuralType(('T', 'B'), ChannelType()), + "input_lengths": NeuralType(tuple('B'), LengthsType()), } @property @@ -84,8 +84,8 @@ def output_ports(self): """Returns definitions of module output ports. """ return { - "outputs": NeuralType(ChannelType(), ('T', 'B', 'D')), - "hidden": NeuralType(ChannelType(), ('B', 'D')), + "outputs": NeuralType(('T', 'B', 'D'), ChannelType()), + "hidden": NeuralType(('B', 'D'), ChannelType()), } def __init__(self, voc_size, encoder_n_layers, hidden_size, dropout, bidirectional=True): @@ -135,8 +135,8 @@ def input_ports(self): """Returns definitions of module input ports. """ return { - "targets": NeuralType(LabelsType(), ('T', 'B')), - "encoder_outputs": NeuralType(ChannelType(), ('T', 'B', 'D')), + "targets": NeuralType(('T', 'B'), LabelsType()), + "encoder_outputs": NeuralType(('T', 'B', 'D'), ChannelType()), "max_target_len": NeuralType(axes=None), } @@ -157,8 +157,8 @@ def output_ports(self): 1: AxisType(ChannelTag) """ return { - "outputs": NeuralType(ChannelType(), ('T', 'B', 'D')), - "hidden": NeuralType(ChannelType(), ('B', 'D')), + "outputs": NeuralType(('T', 'B', 'D'), ChannelType()), + "hidden": NeuralType(('B', 'D'), ChannelType()), } def __init__(self, attn_model, hidden_size, voc_size, decoder_n_layers, dropout): @@ -273,9 +273,9 @@ def input_ports(self): """Returns definitions of module input ports. """ return { - "predictions": NeuralType(ChannelType(), ('T', 'B', 'D')), - "target": NeuralType(LabelsType(), ('T', 'B')), - "mask": NeuralType(ChannelType(), ('T', 'B')), + "predictions": NeuralType(('T', 'B', 'D'), ChannelType()), + "target": NeuralType(('T', 'B'), LabelsType()), + "mask": NeuralType(('T', 'B'), ChannelType()), } @property @@ -285,7 +285,7 @@ def output_ports(self): loss: NeuralType(None) """ - return {"loss": NeuralType(LossType(), axes=None)} + return {"loss": NeuralType(axes=None, elements_type=LossType())} def __init__(self): super().__init__() @@ -309,15 +309,15 @@ class GreedyLuongAttnDecoderRNN(TrainableNM): def input_ports(self): """Returns definitions of module input ports. """ - return {"encoder_outputs": NeuralType(ChannelType(), ('T', 'B', 'D'))} + return {"encoder_outputs": NeuralType(('T', 'B', 'D'), ChannelType())} @property def output_ports(self): """Returns definitions of module output ports. """ return { - "outputs": NeuralType(ChannelType(), ('T', 'B')), - "hidden": NeuralType(ChannelType(), ('B', 'D')), + "outputs": NeuralType(('T', 'B'), ChannelType()), + "hidden": NeuralType(('B', 'D'), ChannelType()), } def __init__(self, attn_model, hidden_size, voc_size, decoder_n_layers, dropout, max_dec_steps=10): diff --git a/nemo/backends/pytorch/tutorials/toys.py b/nemo/backends/pytorch/tutorials/toys.py index 8b18d9890c60..442c841ee836 100644 --- a/nemo/backends/pytorch/tutorials/toys.py +++ b/nemo/backends/pytorch/tutorials/toys.py @@ -21,7 +21,7 @@ def input_ports(self): Returns: A (dict) of module's input ports names to NeuralTypes mapping """ - return {"x": NeuralType(ChannelType(), ('B', 'D'))} + return {"x": NeuralType(('B', 'D'), ChannelType())} @property def output_ports(self): @@ -30,7 +30,7 @@ def output_ports(self): Returns: A (dict) of module's output ports names to NeuralTypes mapping """ - return {"y_pred": NeuralType(ChannelType(), ('B', 'D'))} + return {"y_pred": NeuralType(('B', 'D'), ChannelType())} def __init__(self, dim): # Part specific for Neural Modules API: @@ -63,15 +63,15 @@ def input_ports(self): """ return { - "x": NeuralType(ChannelType(), ('B', 'D')), - "o": NeuralType(ChannelType(), ('B', 'D')), + "x": NeuralType(('B', 'D'), ChannelType()), + "o": NeuralType(('B', 'D'), ChannelType()), } @property def output_ports(self): """Returns definitions of module output ports. """ - return {"y_pred": NeuralType(ChannelType(), ('B', 'D'), optional=True)} + return {"y_pred": NeuralType(('B', 'D'), ChannelType(), optional=True)} def __init__(self, dim): # Part specific for Neural Modules API: @@ -123,8 +123,8 @@ def output_ports(self): """Returns definitions of module output ports """ return { - "x": NeuralType(ChannelType(), ('B', 'D')), - "y": NeuralType(LabelsType(), ('B', 'D')), + "x": NeuralType(('B', 'D'), ChannelType()), + "y": NeuralType(('B', 'D'), LabelsType()), } def __init__(self, batch_size, f_name="sin", n=1000, x_lo=-4, x_hi=4): @@ -182,15 +182,15 @@ def input_ports(self): 1: AxisType(ChannelTag) """ return { - "predictions": NeuralType(ChannelType(), ('B', 'D')), - "target": NeuralType(LabelsType(), ('B', 'D')), + "predictions": NeuralType(('B', 'D'), ChannelType()), + "target": NeuralType(('B', 'D'), LabelsType()), } @property def output_ports(self): """Returns definitions of module output ports. """ - return {"loss": NeuralType(LossType())} + return {"loss": NeuralType(elements_type=LossType())} def __init__(self): super().__init__() @@ -206,15 +206,15 @@ def input_ports(self): """Returns definitions of module input ports. """ return { - "predictions": NeuralType(ChannelType(), ('B', 'D')), - "target": NeuralType(LabelsType(), ('B', 'D')), + "predictions": NeuralType(('B', 'D'), ChannelType()), + "target": NeuralType(('B', 'D'), LabelsType()), } @property def output_ports(self): """Returns definitions of module output ports. """ - return {"loss": NeuralType(LossType())} + return {"loss": NeuralType(elements_type=LossType())} def __init__(self): super().__init__() @@ -230,8 +230,8 @@ def input_ports(self): """Returns definitions of module input ports. """ return { - "predictions": NeuralType(ChannelType(), ('B', 'D')), - "labels": NeuralType(LabelsType(), tuple('B')), + "predictions": NeuralType(('B', 'D'), ChannelType()), + "labels": NeuralType(tuple('B'), LabelsType()), } @property @@ -241,7 +241,7 @@ def output_ports(self): loss: NeuralType(None) """ - return {"loss": NeuralType(LossType())} + return {"loss": NeuralType(elements_type=LossType())} def __init__(self): # Neural Module API specific diff --git a/nemo/collections/asr/audio_preprocessing.py b/nemo/collections/asr/audio_preprocessing.py index f2950162a346..945f4383caac 100644 --- a/nemo/collections/asr/audio_preprocessing.py +++ b/nemo/collections/asr/audio_preprocessing.py @@ -1,4 +1,4 @@ -# Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved. +# Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -125,8 +125,8 @@ def input_ports(self): return { # "input_signal": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}), # "length": NeuralType({0: AxisType(BatchTag)}), - "input_signal": NeuralType(AudioSignal(freq=self._sample_rate), ('B', 'T')), - "length": NeuralType(LengthsType(), tuple('B')), + "input_signal": NeuralType(('B', 'T'), AudioSignal(freq=self._sample_rate)), + "length": NeuralType(tuple('B'), LengthsType()), } @property @@ -138,8 +138,8 @@ def output_ports(self): # {0: AxisType(BatchTag), 1: AxisType(SpectrogramSignalTag), 2: AxisType(ProcessedTimeTag),} # ), # "processed_length": NeuralType({0: AxisType(BatchTag)}), - "processed_signal": NeuralType(SpectrogramType(), ('B', 'D', 'T')), - "processed_length": NeuralType(LengthsType(), tuple('B')), + "processed_signal": NeuralType(('B', 'D', 'T'), SpectrogramType()), + "processed_length": NeuralType(tuple('B'), LengthsType()), } def __init__( @@ -200,6 +200,10 @@ def __init__( def get_features(self, input_signal, length): return self.featurizer(input_signal) + @property + def sample_rate(self): + return self._sample_rate + class AudioToMelSpectrogramPreprocessor(AudioPreprocessor): """Featurizer that converts wavs to mel spectrograms. @@ -271,8 +275,8 @@ def input_ports(self): return { # "input_signal": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}), # "length": NeuralType({0: AxisType(BatchTag)}), - "input_signal": NeuralType(AudioSignal(freq=self._sample_rate), ('B', 'T')), - "length": NeuralType(LengthsType(), tuple('B')), + "input_signal": NeuralType(('B', 'T'), AudioSignal(freq=self._sample_rate)), + "length": NeuralType(tuple('B'), LengthsType()), } @property @@ -297,8 +301,8 @@ def output_ports(self): # {0: AxisType(BatchTag), 1: AxisType(MelSpectrogramSignalTag), 2: AxisType(ProcessedTimeTag),} # ), # "processed_length": NeuralType({0: AxisType(BatchTag)}), - "processed_signal": NeuralType(MelSpectrogramType(), ('B', 'D', 'T')), - "processed_length": NeuralType(LengthsType(), tuple('B')), + "processed_signal": NeuralType(('B', 'D', 'T'), MelSpectrogramType()), + "processed_length": NeuralType(tuple('B'), LengthsType()), } def __init__( @@ -417,8 +421,8 @@ def input_ports(self): return { # "input_signal": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}), # "length": NeuralType({0: AxisType(BatchTag)}), - "input_signal": NeuralType(AudioSignal(freq=self._sample_rate), ('B', 'T')), - "length": NeuralType(LengthsType(), tuple('B')), + "input_signal": NeuralType(('B', 'T'), AudioSignal(freq=self._sample_rate)), + "length": NeuralType(tuple('B'), LengthsType()), } @property @@ -430,8 +434,8 @@ def output_ports(self): # {0: AxisType(BatchTag), 1: AxisType(MFCCSignalTag), 2: AxisType(ProcessedTimeTag),} # ), # "processed_length": NeuralType({0: AxisType(BatchTag)}), - "processed_signal": NeuralType(MFCCSpectrogramType(), ('B', 'D', 'T')), - "processed_length": NeuralType(LengthsType(), tuple('B')), + "processed_signal": NeuralType(('B', 'D', 'T'), MFCCSpectrogramType()), + "processed_length": NeuralType(tuple('B'), LengthsType()), } def __init__( @@ -547,7 +551,7 @@ def input_ports(self): return { # "input_spec": NeuralType({0: AxisType(BatchTag), 1: AxisType(SpectrogramSignalTag), 2: AxisType( # TimeTag),}) - "input_spec": NeuralType(SpectrogramType(), ('B', 'D', 'T')) + "input_spec": NeuralType(('B', 'D', 'T'), SpectrogramType()) } @property @@ -558,7 +562,7 @@ def output_ports(self): # "augmented_spec": NeuralType( # {0: AxisType(BatchTag), 1: AxisType(SpectrogramSignalTag), 2: AxisType(ProcessedTimeTag),} # ) - "augmented_spec": NeuralType(SpectrogramType(), ('B', 'D', 'T')) + "augmented_spec": NeuralType(('B', 'D', 'T'), SpectrogramType()) } def __init__( @@ -612,10 +616,10 @@ def input_ports(self): # "in_x_len": NeuralType({0: AxisType(BatchTag)}), # "in_y": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}), # "in_y_len": NeuralType({0: AxisType(BatchTag)}), - "in_x": NeuralType(SpectrogramType(), ('B', 'D', 'T')), - "in_x_len": NeuralType(LengthsType(), tuple('B')), - "in_y": NeuralType(SpectrogramType(), ('B', 'D', 'T')), - "in_y_len": NeuralType(LengthsType(), tuple('B')), + "in_x": NeuralType(('B', 'D', 'T'), SpectrogramType()), + "in_x_len": NeuralType(tuple('B'), LengthsType()), + "in_y": NeuralType(('B', 'D', 'T'), SpectrogramType()), + "in_y_len": NeuralType(tuple('B'), LengthsType()), } @property @@ -627,10 +631,10 @@ def output_ports(self): # "out_x_len": NeuralType({0: AxisType(BatchTag)}), # "out_y": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}), # "out_y_len": NeuralType({0: AxisType(BatchTag)}), - "out_x": NeuralType(SpectrogramType(), ('B', 'D', 'T')), - "out_x_len": NeuralType(LengthsType(), tuple('B')), - "out_y": NeuralType(SpectrogramType(), ('B', 'D', 'T')), - "out_y_len": NeuralType(LengthsType(), tuple('B')), + "out_x": NeuralType(('B', 'D', 'T'), SpectrogramType()), + "out_x_len": NeuralType(tuple('B'), LengthsType()), + "out_y": NeuralType(('B', 'D', 'T'), SpectrogramType()), + "out_y_len": NeuralType(tuple('B'), LengthsType()), } def __init__(self, mult_batch=1): diff --git a/nemo/collections/asr/beam_search_decoder.py b/nemo/collections/asr/beam_search_decoder.py index 2cb919ee4fe4..ecebe7a00ec3 100644 --- a/nemo/collections/asr/beam_search_decoder.py +++ b/nemo/collections/asr/beam_search_decoder.py @@ -45,8 +45,8 @@ def input_ports(self): return { # "log_probs": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag), 2: AxisType(ChannelTag),}), # "log_probs_length": NeuralType({0: AxisType(BatchTag)}), - "log_probs": NeuralType(LogprobsType(), ('B', 'T', 'D')), - "log_probs_length": NeuralType(LengthsType(), tuple('B')), + "log_probs": NeuralType(('B', 'T', 'D'), LogprobsType()), + "log_probs_length": NeuralType(tuple('B'), LengthsType()), } @property @@ -57,7 +57,7 @@ def output_ports(self): NeuralType(None) """ # return {"predictions": NeuralType(VoidType())} - return {"predictions": NeuralType(PredictionsType(), ('B', 'T'))} + return {"predictions": NeuralType(('B', 'T'), PredictionsType())} def __init__(self, vocab, beam_width, alpha, beta, lm_path, num_cpus, cutoff_prob=1.0, cutoff_top_n=40): diff --git a/nemo/collections/asr/data_layer.py b/nemo/collections/asr/data_layer.py index 83d959a09974..e2b95c0e9604 100644 --- a/nemo/collections/asr/data_layer.py +++ b/nemo/collections/asr/data_layer.py @@ -100,10 +100,10 @@ def output_ports(self): # 'a_sig_length': NeuralType({0: AxisType(BatchTag)}), # 'transcripts': NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}), # 'transcript_length': NeuralType({0: AxisType(BatchTag)}), - 'audio_signal': NeuralType(AudioSignal(freq=self._sample_rate), ('B', 'T')), - 'a_sig_length': NeuralType(LengthsType(), tuple('B')), - 'transcripts': NeuralType(LabelsType(), ('B', 'T')), - 'transcript_length': NeuralType(LengthsType(), tuple('B')), + 'audio_signal': NeuralType(('B', 'T'), AudioSignal(freq=self._sample_rate)), + 'a_sig_length': NeuralType(tuple('B'), LengthsType()), + 'transcripts': NeuralType(('B', 'T'), LabelsType()), + 'transcript_length': NeuralType(tuple('B'), LengthsType()), } def __init__( @@ -221,9 +221,9 @@ def output_ports(self): # 'processed_length': NeuralType({0: AxisType(BatchTag)}), # 'transcripts': NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}), # 'transcript_length': NeuralType({0: AxisType(BatchTag)}), - 'processed_signal': NeuralType(SpectrogramType(), ('B', 'D', 'T')), - 'transcripts': NeuralType(ChannelType(), ('B', 'T')), - 'transcript_length': NeuralType(LengthsType(), tuple('B')), + 'processed_signal': NeuralType(('B', 'D', 'T'), SpectrogramType()), + 'transcripts': NeuralType(('B', 'T'), ChannelType()), + 'transcript_length': NeuralType(tuple('B'), LengthsType()), } def __init__( @@ -350,8 +350,8 @@ def output_ports(self): return { # 'texts': NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}), # 'texts_length': NeuralType({0: AxisType(BatchTag)}), - 'texts': NeuralType(ChannelType(), ('B', 'T')), - 'texts_length': NeuralType(LengthsType(), tuple('B')), + 'texts': NeuralType(('B', 'T'), ChannelType()), + 'texts_length': NeuralType(tuple('B'), LengthsType()), } def __init__( diff --git a/nemo/collections/asr/greedy_ctc_decoder.py b/nemo/collections/asr/greedy_ctc_decoder.py index 8f29ab9c3c40..2d49011e7235 100644 --- a/nemo/collections/asr/greedy_ctc_decoder.py +++ b/nemo/collections/asr/greedy_ctc_decoder.py @@ -15,14 +15,14 @@ def input_ports(self): """Returns definitions of module input ports. """ # return {"log_probs": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag), 2: AxisType(ChannelTag),})} - return {"log_probs": NeuralType(LogprobsType(), ('B', 'T', 'D'))} + return {"log_probs": NeuralType(('B', 'T', 'D'), LogprobsType())} @property def output_ports(self): """Returns definitions of module output ports. """ # return {"predictions": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)})} - return {"predictions": NeuralType(PredictionsType(), ('B', 'T'))} + return {"predictions": NeuralType(('B', 'T'), PredictionsType())} def __init__(self): super().__init__() diff --git a/nemo/collections/asr/jasper.py b/nemo/collections/asr/jasper.py index a1e41a8111b2..d6fcf7e38259 100644 --- a/nemo/collections/asr/jasper.py +++ b/nemo/collections/asr/jasper.py @@ -79,8 +79,8 @@ def input_ports(self): # {0: AxisType(BatchTag), 1: AxisType(SpectrogramSignalTag), 2: AxisType(ProcessedTimeTag),} # ), # "length": NeuralType({0: AxisType(BatchTag)}), - "audio_signal": NeuralType(SpectrogramType(), ('B', 'D', 'T')), - "length": NeuralType(LengthsType(), tuple('B')), + "audio_signal": NeuralType(('B', 'D', 'T'), SpectrogramType()), + "length": NeuralType(tuple('B'), LengthsType()), } @property @@ -92,8 +92,8 @@ def output_ports(self): # {0: AxisType(BatchTag), 1: AxisType(EncodedRepresentationTag), 2: AxisType(ProcessedTimeTag),} # ), # "encoded_lengths": NeuralType({0: AxisType(BatchTag)}), - "outputs": NeuralType(AcousticEncodedRepresentation(), ('B', 'D', 'T')), - "encoded_lengths": NeuralType(LengthsType(), tuple('B')), + "outputs": NeuralType(('B', 'D', 'T'), AcousticEncodedRepresentation()), + "encoded_lengths": NeuralType(tuple('B'), LengthsType()), } def __init__( @@ -184,7 +184,7 @@ def input_ports(self): # "encoder_output": NeuralType( # {0: AxisType(BatchTag), 1: AxisType(EncodedRepresentationTag), 2: AxisType(ProcessedTimeTag),} # ) - "encoder_output": NeuralType(AcousticEncodedRepresentation(), ('B', 'D', 'T')) + "encoder_output": NeuralType(('B', 'D', 'T'), AcousticEncodedRepresentation()) } @property @@ -192,7 +192,7 @@ def output_ports(self): """Returns definitions of module output ports. """ # return {"output": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag), 2: AxisType(ChannelTag),})} - return {"output": NeuralType(LogprobsType(), ('B', 'T', 'D'))} + return {"output": NeuralType(('B', 'T', 'D'), LogprobsType())} def __init__(self, feat_in, num_classes, init_mode="xavier_uniform"): super().__init__() diff --git a/nemo/collections/asr/las/misc.py b/nemo/collections/asr/las/misc.py index 1ed2aadc5fb9..56519e143fd8 100644 --- a/nemo/collections/asr/las/misc.py +++ b/nemo/collections/asr/las/misc.py @@ -22,7 +22,7 @@ def input_ports(self): """Returns definitions of module input ports. """ # return {'tensor': NeuralType({0: AxisType(BatchTag), 1: AxisType(ChannelTag), 2: AxisType(TimeTag),})} - return {'tensor': NeuralType(ChannelType(), ('B', 'D', 'T'))} + return {'tensor': NeuralType(('B', 'D', 'T'), ChannelType())} @property def output_ports(self): @@ -36,7 +36,7 @@ def output_ports(self): 2: AxisType(ChannelTag) """ # return {'tensor': NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag), 2: AxisType(ChannelTag),})} - return {'tensor': NeuralType(ChannelType(), ('B', 'T', 'D'))} + return {'tensor': NeuralType(('B', 'T', 'D'), ChannelType())} def __init__(self, in_channels, out_channels): super().__init__() diff --git a/nemo/collections/asr/losses.py b/nemo/collections/asr/losses.py index a9b77fe03e0b..909a16d6f39c 100644 --- a/nemo/collections/asr/losses.py +++ b/nemo/collections/asr/losses.py @@ -24,10 +24,10 @@ def input_ports(self): # "targets": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}), # "input_length": NeuralType({0: AxisType(BatchTag)}), # "target_length": NeuralType({0: AxisType(BatchTag)}), - "log_probs": NeuralType(LogprobsType(), ('B', 'T', 'D')), - "targets": NeuralType(LabelsType(), ('B', 'T')), - "input_length": NeuralType(LengthsType(), tuple('B')), - "target_length": NeuralType(LengthsType(), tuple('B')), + "log_probs": NeuralType(('B', 'T', 'D'), LogprobsType()), + "targets": NeuralType(('B', 'T'), LabelsType()), + "input_length": NeuralType(tuple('B'), LengthsType()), + "target_length": NeuralType(tuple('B'), LengthsType()), } @property @@ -38,7 +38,7 @@ def output_ports(self): NeuralType(None) """ # return {"loss": NeuralType(None)} - return {"loss": NeuralType(LossType())} + return {"loss": NeuralType(elements_type=LossType())} def __init__(self, num_classes): super().__init__() diff --git a/nemo/collections/nlp/nm/data_layers/glue_benchmark_datalayer.py b/nemo/collections/nlp/nm/data_layers/glue_benchmark_datalayer.py index 95b89c7761e6..ac5ae86cca6c 100644 --- a/nemo/collections/nlp/nm/data_layers/glue_benchmark_datalayer.py +++ b/nemo/collections/nlp/nm/data_layers/glue_benchmark_datalayer.py @@ -42,10 +42,10 @@ def output_ports(self): # "input_type_ids": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}), # "input_mask": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}), # "labels": NeuralType({0: AxisType(CategoricalTag)}), - "input_ids": NeuralType(ChannelType(), ('B', 'T')), - "input_type_ids": NeuralType(ChannelType(), ('B', 'T')), - "input_mask": NeuralType(ChannelType(), ('B', 'T')), - "labels": NeuralType(CategoricalValuesType(), tuple('B')), + "input_ids": NeuralType(('B', 'T'), ChannelType()), + "input_type_ids": NeuralType(('B', 'T'), ChannelType()), + "input_mask": NeuralType(('B', 'T'), ChannelType()), + "labels": NeuralType(tuple('B'), CategoricalValuesType()), } def __init__( @@ -93,10 +93,10 @@ def output_ports(self): # "input_type_ids": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}), # "input_mask": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}), # "labels": NeuralType({0: AxisType(RegressionTag)}), - "input_ids": NeuralType(ChannelType(), ('B', 'T')), - "input_type_ids": NeuralType(ChannelType(), ('B', 'T')), - "input_mask": NeuralType(ChannelType(), ('B', 'T')), - "labels": NeuralType(RegressionValuesType(), tuple('B')), + "input_ids": NeuralType(('B', 'T'), ChannelType()), + "input_type_ids": NeuralType(('B', 'T'), ChannelType()), + "input_mask": NeuralType(('B', 'T'), ChannelType()), + "labels": NeuralType(tuple('B'), RegressionValuesType()), } def __init__( diff --git a/nemo/collections/nlp/nm/data_layers/joint_intent_slot_datalayer.py b/nemo/collections/nlp/nm/data_layers/joint_intent_slot_datalayer.py index 187d859819f4..c306cfcccc04 100644 --- a/nemo/collections/nlp/nm/data_layers/joint_intent_slot_datalayer.py +++ b/nemo/collections/nlp/nm/data_layers/joint_intent_slot_datalayer.py @@ -52,13 +52,13 @@ def output_ports(self): # "subtokens_mask": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}), # "intents": NeuralType({0: AxisType(BatchTag)}), # "slots": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}), - "input_ids": NeuralType(ChannelType(), ('B', 'T')), - "input_type_ids": NeuralType(ChannelType(), ('B', 'T')), - "input_mask": NeuralType(ChannelType(), ('B', 'T')), - "loss_mask": NeuralType(ChannelType(), ('B', 'T')), - "subtokens_mask": NeuralType(ChannelType(), ('B', 'T')), - "intents": NeuralType(ChannelType(), tuple('B')), - "slots": NeuralType(ChannelType(), ('B', 'T')), + "input_ids": NeuralType(('B', 'T'), ChannelType()), + "input_type_ids": NeuralType(('B', 'T'), ChannelType()), + "input_mask": NeuralType(('B', 'T'), ChannelType()), + "loss_mask": NeuralType(('B', 'T'), ChannelType()), + "subtokens_mask": NeuralType(('B', 'T'), ChannelType()), + "intents": NeuralType(tuple('B'), ChannelType()), + "slots": NeuralType(('B', 'T'), ChannelType()), } def __init__( @@ -118,11 +118,11 @@ def output_ports(self): # "input_mask": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}), # "loss_mask": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}), # "subtokens_mask": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}), - "input_ids": NeuralType(ChannelType(), ('B', 'T')), - "input_type_ids": NeuralType(ChannelType(), ('B', 'T')), - "input_mask": NeuralType(ChannelType(), ('B', 'T')), - "loss_mask": NeuralType(ChannelType(), ('B', 'T')), - "subtokens_mask": NeuralType(ChannelType(), ('B', 'T')), + "input_ids": NeuralType(('B', 'T'), ChannelType()), + "input_type_ids": NeuralType(('B', 'T'), ChannelType()), + "input_mask": NeuralType(('B', 'T'), ChannelType()), + "loss_mask": NeuralType(('B', 'T'), ChannelType()), + "subtokens_mask": NeuralType(('B', 'T'), ChannelType()), } def __init__(self, queries, tokenizer, max_seq_length, batch_size=1, dataset_type=BertJointIntentSlotInferDataset): diff --git a/nemo/collections/nlp/nm/data_layers/lm_bert_datalayer.py b/nemo/collections/nlp/nm/data_layers/lm_bert_datalayer.py index a6f8556529c4..98c1ba23c10f 100644 --- a/nemo/collections/nlp/nm/data_layers/lm_bert_datalayer.py +++ b/nemo/collections/nlp/nm/data_layers/lm_bert_datalayer.py @@ -56,12 +56,12 @@ def output_ports(self): # "output_ids": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}), # "output_mask": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}), # "labels": NeuralType({0: AxisType(BatchTag)}), - "input_ids": NeuralType(ChannelType(), ('B', 'T')), - "input_type_ids": NeuralType(ChannelType(), ('B', 'T')), - "input_mask": NeuralType(ChannelType(), ('B', 'T')), - "output_ids": NeuralType(ChannelType(), ('B', 'T')), - "output_mask": NeuralType(ChannelType(), ('B', 'T')), - "labels": NeuralType(LabelsType(), tuple('B')), + "input_ids": NeuralType(('B', 'T'), ChannelType()), + "input_type_ids": NeuralType(('B', 'T'), ChannelType()), + "input_mask": NeuralType(('B', 'T'), ChannelType()), + "output_ids": NeuralType(('B', 'T'), ChannelType()), + "output_mask": NeuralType(('B', 'T'), ChannelType()), + "labels": NeuralType(tuple('B'), LabelsType()), } def __init__(self, tokenizer, dataset, max_seq_length, mask_probability, short_seq_prob=0.1, batch_size=64): @@ -101,12 +101,12 @@ def output_ports(self): # "output_ids": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}), # "output_mask": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}), # "labels": NeuralType({0: AxisType(BatchTag)}), - "input_ids": NeuralType(ChannelType(), ('B', 'T')), - "input_type_ids": NeuralType(ChannelType(), ('B', 'T')), - "input_mask": NeuralType(ChannelType(), ('B', 'T')), - "output_ids": NeuralType(ChannelType(), ('B', 'T')), - "output_mask": NeuralType(ChannelType(), ('B', 'T')), - "labels": NeuralType(LabelsType(), tuple('B')), + "input_ids": NeuralType(('B', 'T'), ChannelType()), + "input_type_ids": NeuralType(('B', 'T'), ChannelType()), + "input_mask": NeuralType(('B', 'T'), ChannelType()), + "output_ids": NeuralType(('B', 'T'), ChannelType()), + "output_mask": NeuralType(('B', 'T'), ChannelType()), + "labels": NeuralType(tuple('B'), LabelsType()), } def __init__(self, dataset, max_pred_length, batch_size=64, training=True): diff --git a/nemo/collections/nlp/nm/data_layers/lm_transformer_datalayer.py b/nemo/collections/nlp/nm/data_layers/lm_transformer_datalayer.py index c04d8f8a1cb5..ebd1b2a738d0 100644 --- a/nemo/collections/nlp/nm/data_layers/lm_transformer_datalayer.py +++ b/nemo/collections/nlp/nm/data_layers/lm_transformer_datalayer.py @@ -58,9 +58,9 @@ def output_ports(self): # "input_ids": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}), # "input_mask": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}), # "labels": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}), - "input_ids": NeuralType(ChannelType(), ('B', 'T')), - "input_mask": NeuralType(ChannelType(), ('B', 'T')), - "labels": NeuralType(LabelsType(), ('B', 'T')), + "input_ids": NeuralType(('B', 'T'), ChannelType()), + "input_mask": NeuralType(('B', 'T'), ChannelType()), + "labels": NeuralType(('B', 'T'), LabelsType()), } def __init__( diff --git a/nemo/collections/nlp/nm/data_layers/machine_translation_datalayer.py b/nemo/collections/nlp/nm/data_layers/machine_translation_datalayer.py index 53dcf0c7f0d9..44f877f5dcc3 100644 --- a/nemo/collections/nlp/nm/data_layers/machine_translation_datalayer.py +++ b/nemo/collections/nlp/nm/data_layers/machine_translation_datalayer.py @@ -70,12 +70,12 @@ def output_ports(self): # "tgt_mask": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}), # "labels": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}), # "sent_ids": NeuralType({0: AxisType(BatchTag)}), - "src_ids": NeuralType(ChannelType(), ('B', 'T')), - "src_mask": NeuralType(ChannelType(), ('B', 'T')), - "tgt_ids": NeuralType(ChannelType(), ('B', 'T')), - "tgt_mask": NeuralType(ChannelType(), ('B', 'T')), - "labels": NeuralType(LabelsType(), ('B', 'T')), - "sent_ids": NeuralType(ChannelType(), tuple('B')), + "src_ids": NeuralType(('B', 'T'), ChannelType()), + "src_mask": NeuralType(('B', 'T'), ChannelType()), + "tgt_ids": NeuralType(('B', 'T'), ChannelType()), + "tgt_mask": NeuralType(('B', 'T'), ChannelType()), + "labels": NeuralType(('B', 'T'), LabelsType()), + "sent_ids": NeuralType(tuple('B'), ChannelType()), } def __init__( diff --git a/nemo/collections/nlp/nm/data_layers/punctuation_capitalization_datalayer.py b/nemo/collections/nlp/nm/data_layers/punctuation_capitalization_datalayer.py index b41501b5b684..e3cfeda2235a 100644 --- a/nemo/collections/nlp/nm/data_layers/punctuation_capitalization_datalayer.py +++ b/nemo/collections/nlp/nm/data_layers/punctuation_capitalization_datalayer.py @@ -34,13 +34,13 @@ def output_ports(self): # "subtokens_mask": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}), # "punct_labels": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}), # "capit_labels": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}), - "input_ids": NeuralType(ChannelType(), ('B', 'T')), - "input_type_ids": NeuralType(ChannelType(), ('B', 'T')), - "input_mask": NeuralType(ChannelType(), ('B', 'T')), - "loss_mask": NeuralType(ChannelType(), ('B', 'T')), - "subtokens_mask": NeuralType(ChannelType(), ('B', 'T')), - "punct_labels": NeuralType(LabelsType(), ('B', 'T')), - "capit_labels": NeuralType(LabelsType(), ('B', 'T')), + "input_ids": NeuralType(('B', 'T'), ChannelType()), + "input_type_ids": NeuralType(('B', 'T'), ChannelType()), + "input_mask": NeuralType(('B', 'T'), ChannelType()), + "loss_mask": NeuralType(('B', 'T'), ChannelType()), + "subtokens_mask": NeuralType(('B', 'T'), ChannelType()), + "punct_labels": NeuralType(('B', 'T'), LabelsType()), + "capit_labels": NeuralType(('B', 'T'), LabelsType()), } def __init__( diff --git a/nemo/collections/nlp/nm/data_layers/qa_squad_datalayer.py b/nemo/collections/nlp/nm/data_layers/qa_squad_datalayer.py index b61234462872..24ef5897fb1f 100644 --- a/nemo/collections/nlp/nm/data_layers/qa_squad_datalayer.py +++ b/nemo/collections/nlp/nm/data_layers/qa_squad_datalayer.py @@ -56,12 +56,12 @@ def output_ports(self): # "start_positions": NeuralType({0: AxisType(BatchTag)}), # "end_positions": NeuralType({0: AxisType(BatchTag)}), # "unique_ids": NeuralType({0: AxisType(BatchTag)}), - "input_ids": NeuralType(ChannelType(), ('B', 'T')), - "input_type_ids": NeuralType(ChannelType(), ('B', 'T')), - "input_mask": NeuralType(ChannelType(), ('B', 'T')), - "start_positions": NeuralType(ChannelType(), tuple('B')), - "end_positions": NeuralType(ChannelType(), tuple('B')), - "unique_ids": NeuralType(ChannelType(), tuple('B')), + "input_ids": NeuralType(('B', 'T'), ChannelType()), + "input_type_ids": NeuralType(('B', 'T'), ChannelType()), + "input_mask": NeuralType(('B', 'T'), ChannelType()), + "start_positions": NeuralType(tuple('B'), ChannelType()), + "end_positions": NeuralType(tuple('B'), ChannelType()), + "unique_ids": NeuralType(tuple('B'), ChannelType()), } def __init__( diff --git a/nemo/collections/nlp/nm/data_layers/state_tracking_trade_datalayer.py b/nemo/collections/nlp/nm/data_layers/state_tracking_trade_datalayer.py index a06562966591..8435dc976b8c 100644 --- a/nemo/collections/nlp/nm/data_layers/state_tracking_trade_datalayer.py +++ b/nemo/collections/nlp/nm/data_layers/state_tracking_trade_datalayer.py @@ -74,11 +74,11 @@ def output_ports(self): # "tgt_lens": NeuralType({0: AxisType(BatchTag), 1: AxisType(ChannelTag)}), # "gating_labels": NeuralType({0: AxisType(BatchTag), 1: AxisType(ChannelTag)}), # "turn_domain": NeuralType(None), - "src_ids": NeuralType(ChannelType(), ('B', 'T')), - "src_lens": NeuralType(LengthsType(), tuple('B')), - "tgt_ids": NeuralType(ChannelType(), ('B', 'D', 'T')), - "tgt_lens": NeuralType(LengthsType(), ('B', 'D')), - "gating_labels": NeuralType(LabelsType(), ('B', 'D')), + "src_ids": NeuralType(('B', 'T'), ChannelType()), + "src_lens": NeuralType(tuple('B'), LengthsType()), + "tgt_ids": NeuralType(('B', 'D', 'T'), ChannelType()), + "tgt_lens": NeuralType(('B', 'D'), LengthsType()), + "gating_labels": NeuralType(('B', 'D'), LabelsType()), "turn_domain": NeuralType(), } diff --git a/nemo/collections/nlp/nm/data_layers/text_classification_datalayer.py b/nemo/collections/nlp/nm/data_layers/text_classification_datalayer.py index d35c5d401a56..a104a5a543f5 100644 --- a/nemo/collections/nlp/nm/data_layers/text_classification_datalayer.py +++ b/nemo/collections/nlp/nm/data_layers/text_classification_datalayer.py @@ -42,10 +42,10 @@ def output_ports(self): # "input_type_ids": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}), # "input_mask": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}), # "labels": NeuralType({0: AxisType(BatchTag)}), - "input_ids": NeuralType(ChannelType(), ('B', 'T')), - "input_type_ids": NeuralType(ChannelType(), ('B', 'T')), - "input_mask": NeuralType(ChannelType(), ('B', 'T')), - "labels": NeuralType(LabelsType(), tuple('B')), + "input_ids": NeuralType(('B', 'T'), ChannelType()), + "input_type_ids": NeuralType(('B', 'T'), ChannelType()), + "input_mask": NeuralType(('B', 'T'), ChannelType()), + "labels": NeuralType(tuple('B'), LabelsType()), } def __init__( diff --git a/nemo/collections/nlp/nm/data_layers/token_classification_datalayer.py b/nemo/collections/nlp/nm/data_layers/token_classification_datalayer.py index e1506aab5d2d..5fd6cbe2ee5b 100644 --- a/nemo/collections/nlp/nm/data_layers/token_classification_datalayer.py +++ b/nemo/collections/nlp/nm/data_layers/token_classification_datalayer.py @@ -33,12 +33,12 @@ def output_ports(self): # "loss_mask": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}), # "subtokens_mask": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}), # "labels": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}), - "input_ids": NeuralType(ChannelType(), ('B', 'T')), - "input_type_ids": NeuralType(ChannelType(), ('B', 'T')), - "input_mask": NeuralType(ChannelType(), ('B', 'T')), - "loss_mask": NeuralType(ChannelType(), ('B', 'T')), - "subtokens_mask": NeuralType(ChannelType(), ('B', 'T')), - "labels": NeuralType(LabelsType(), ('B', 'T')), + "input_ids": NeuralType(('B', 'T'), ChannelType()), + "input_type_ids": NeuralType(('B', 'T'), ChannelType()), + "input_mask": NeuralType(('B', 'T'), ChannelType()), + "loss_mask": NeuralType(('B', 'T'), ChannelType()), + "subtokens_mask": NeuralType(('B', 'T'), ChannelType()), + "labels": NeuralType(('B', 'T'), LabelsType()), } def __init__( @@ -84,11 +84,11 @@ def output_ports(self): # "input_mask": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}), # "loss_mask": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}), # "subtokens_mask": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}), - "input_ids": NeuralType(ChannelType(), ('B', 'T')), - "input_type_ids": NeuralType(ChannelType(), ('B', 'T')), - "input_mask": NeuralType(ChannelType(), ('B', 'T')), - "loss_mask": NeuralType(ChannelType(), ('B', 'T')), - "subtokens_mask": NeuralType(ChannelType(), ('B', 'T')), + "input_ids": NeuralType(('B', 'T'), ChannelType()), + "input_type_ids": NeuralType(('B', 'T'), ChannelType()), + "input_mask": NeuralType(('B', 'T'), ChannelType()), + "loss_mask": NeuralType(('B', 'T'), ChannelType()), + "subtokens_mask": NeuralType(('B', 'T'), ChannelType()), } def __init__( diff --git a/nemo/collections/nlp/nm/losses/aggregator_loss.py b/nemo/collections/nlp/nm/losses/aggregator_loss.py index 8bedfa651790..b1681c7048cb 100644 --- a/nemo/collections/nlp/nm/losses/aggregator_loss.py +++ b/nemo/collections/nlp/nm/losses/aggregator_loss.py @@ -46,7 +46,7 @@ def output_ports(self): loss: NeuralType(None) """ - return {"loss": NeuralType(LossType())} + return {"loss": NeuralType(elements_type=LossType())} def __init__(self, num_inputs=2): # Store number of inputs/losses. diff --git a/nemo/collections/nlp/nm/losses/joint_intent_slot_loss.py b/nemo/collections/nlp/nm/losses/joint_intent_slot_loss.py index cbe37ebbec65..ce73176747d7 100644 --- a/nemo/collections/nlp/nm/losses/joint_intent_slot_loss.py +++ b/nemo/collections/nlp/nm/losses/joint_intent_slot_loss.py @@ -56,11 +56,11 @@ def input_ports(self): # "loss_mask": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}), # "intents": NeuralType({0: AxisType(BatchTag)}), # "slots": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}), - "intent_logits": NeuralType(LogitsType(), ('B', 'D')), - "slot_logits": NeuralType(LogitsType(), ('B', 'T', 'D')), - "loss_mask": NeuralType(ChannelType(), ('B', 'T')), - "intents": NeuralType(ChannelType(), tuple('B')), - "slots": NeuralType(ChannelType(), ('B', 'T')), + "intent_logits": NeuralType(('B', 'D'), LogitsType()), + "slot_logits": NeuralType(('B', 'T', 'D'), LogitsType()), + "loss_mask": NeuralType(('B', 'T'), ChannelType()), + "intents": NeuralType(tuple('B'), ChannelType()), + "slots": NeuralType(('B', 'T'), ChannelType()), } @property @@ -71,7 +71,7 @@ def output_ports(self): NeuralType(None) """ # return {"loss": NeuralType(None)} - return {"loss": NeuralType(LossType())} + return {"loss": NeuralType(elements_type=LossType())} def __init__( self, num_slots, slot_classes_loss_weights=None, intent_classes_loss_weights=None, intent_loss_weight=0.6, diff --git a/nemo/collections/nlp/nm/losses/masked_language_modeling_loss.py b/nemo/collections/nlp/nm/losses/masked_language_modeling_loss.py index 239872d0b17d..38f5169bf348 100644 --- a/nemo/collections/nlp/nm/losses/masked_language_modeling_loss.py +++ b/nemo/collections/nlp/nm/losses/masked_language_modeling_loss.py @@ -37,9 +37,9 @@ def input_ports(self): # "logits": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag), 2: AxisType(ChannelTag)}), # "output_ids": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}), # "output_mask": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}), - "logits": NeuralType(LogitsType(), ('B', 'T', 'D')), - "output_ids": NeuralType(ChannelType(), ('B', 'T')), - "output_mask": NeuralType(ChannelType(), ('B', 'T')), + "logits": NeuralType(('B', 'T', 'D'), LogitsType()), + "output_ids": NeuralType(('B', 'T'), ChannelType()), + "output_mask": NeuralType(('B', 'T'), ChannelType()), } @property @@ -49,7 +49,7 @@ def output_ports(self): loss: NeuralType(None) """ - return {"loss": NeuralType(LossType())} + return {"loss": NeuralType(elements_type=LossType())} def __init__(self, label_smoothing=0.0): LossNM.__init__(self) diff --git a/nemo/collections/nlp/nm/losses/padded_smoothed_cross_entropy_loss.py b/nemo/collections/nlp/nm/losses/padded_smoothed_cross_entropy_loss.py index 2a4a9c526eca..1564f43c40b0 100644 --- a/nemo/collections/nlp/nm/losses/padded_smoothed_cross_entropy_loss.py +++ b/nemo/collections/nlp/nm/losses/padded_smoothed_cross_entropy_loss.py @@ -42,8 +42,8 @@ def input_ports(self): return { # "logits": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag), 2: AxisType(ChannelTag)}), # "target_ids": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}), - "logits": NeuralType(LogitsType(), ('B', 'T', 'D')), - "target_ids": NeuralType(LabelsType(), ('B', 'T')), + "logits": NeuralType(('B', 'T', 'D'), LogitsType()), + "target_ids": NeuralType(('B', 'T'), LabelsType()), } @property @@ -51,7 +51,7 @@ def output_ports(self): """Returns definitions of module output ports. """ # return {"loss": NeuralType(None)} - return {"loss": NeuralType(LossType())} + return {"loss": NeuralType(elements_type=LossType())} def __init__(self, pad_id, label_smoothing=0, predict_last_k=0): LossNM.__init__(self) diff --git a/nemo/collections/nlp/nm/losses/qa_squad_loss.py b/nemo/collections/nlp/nm/losses/qa_squad_loss.py index 9f7fe2461232..1237b9255edb 100644 --- a/nemo/collections/nlp/nm/losses/qa_squad_loss.py +++ b/nemo/collections/nlp/nm/losses/qa_squad_loss.py @@ -43,9 +43,9 @@ def input_ports(self): # "logits": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag), 2: AxisType(ChannelTag)}), # "start_positions": NeuralType({0: AxisType(BatchTag)}), # "end_positions": NeuralType({0: AxisType(BatchTag)}), - "logits": NeuralType(LogitsType(), ('B', 'T', 'D')), - "start_positions": NeuralType(ChannelType(), tuple('B')), - "end_positions": NeuralType(ChannelType(), tuple('B')), + "logits": NeuralType(('B', 'T', 'D'), LogitsType()), + "start_positions": NeuralType(tuple('B'), ChannelType()), + "end_positions": NeuralType(tuple('B'), ChannelType()), } @property @@ -69,9 +69,9 @@ def output_ports(self): # "loss": NeuralType(None), # "start_logits": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}), # "end_logits": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}), - "loss": NeuralType(LossType()), - "start_logits": NeuralType(ChannelType(), ('B', 'T')), - "end_logits": NeuralType(ChannelType(), ('B', 'T')), + "loss": NeuralType(elements_type=LossType()), + "start_logits": NeuralType(('B', 'T'), ChannelType()), + "end_logits": NeuralType(('B', 'T'), ChannelType()), } def __init__(self): diff --git a/nemo/collections/nlp/nm/losses/state_tracking_trade_loss.py b/nemo/collections/nlp/nm/losses/state_tracking_trade_loss.py index cf01cbfe3e33..ea065494e8ee 100644 --- a/nemo/collections/nlp/nm/losses/state_tracking_trade_loss.py +++ b/nemo/collections/nlp/nm/losses/state_tracking_trade_loss.py @@ -73,9 +73,9 @@ def input_ports(self): # ), # "targets": NeuralType({0: AxisType(BatchTag), 1: AxisType(ChannelTag), 2: AxisType(TimeTag)}), # "loss_mask": NeuralType({0: AxisType(BatchTag), 1: AxisType(ChannelTag)}), - "logits": NeuralType(LogitsType(), ('B', 'T', 'D', 'D')), - "targets": NeuralType(ChannelType(), ('B', 'D', 'T')), - "loss_mask": NeuralType(LengthsType(), ('B', 'D')), + "logits": NeuralType(('B', 'T', 'D', 'D'), LogitsType()), + "targets": NeuralType(('B', 'D', 'T'), ChannelType()), + "loss_mask": NeuralType(('B', 'D'), LengthsType()), } @property @@ -83,7 +83,7 @@ def output_ports(self): """Returns definitions of module output ports. """ # return {"loss": NeuralType(None)} - return {"loss": NeuralType(LossType())} + return {"loss": NeuralType(elements_type=LossType())} def __init__(self): LossNM.__init__(self) @@ -126,8 +126,8 @@ def input_ports(self): return { # "logits": NeuralType({0: AxisType(BatchTag), 1: AxisType(ChannelTag), 2: AxisType(ChannelTag)}), # "labels": NeuralType({0: AxisType(BatchTag), 1: AxisType(ChannelTag)}), - "logits": NeuralType(LogitsType(), ('B', 'D', 'D')), - "labels": NeuralType(LabelsType(), ('B', 'D')), + "logits": NeuralType(('B', 'D', 'D'), LogitsType()), + "labels": NeuralType(('B', 'D'), LabelsType()), } @property @@ -135,7 +135,7 @@ def output_ports(self): """Returns definitions of module output ports. """ # return {"loss": NeuralType(None)} - return {"loss": NeuralType(LossType())} + return {"loss": NeuralType(elements_type=LossType())} def __init__(self, num_classes, **kwargs): LossNM.__init__(self, **kwargs) diff --git a/nemo/collections/nlp/nm/losses/token_classification_loss.py b/nemo/collections/nlp/nm/losses/token_classification_loss.py index 46a651ebe2b2..e27c74e952a3 100644 --- a/nemo/collections/nlp/nm/losses/token_classification_loss.py +++ b/nemo/collections/nlp/nm/losses/token_classification_loss.py @@ -43,9 +43,9 @@ def input_ports(self): # "logits": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag), 2: AxisType(ChannelTag)}), # "labels": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}), # "loss_mask": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}), - "logits": NeuralType(LogitsType(), ('B', 'T', 'D')), - "labels": NeuralType(LabelsType(), ('B', 'T')), - "loss_mask": NeuralType(ChannelType(), ('B', 'T')), + "logits": NeuralType(('B', 'T', 'D'), LogitsType()), + "labels": NeuralType(('B', 'T'), LabelsType()), + "loss_mask": NeuralType(('B', 'T'), ChannelType()), } @property @@ -55,7 +55,7 @@ def output_ports(self): loss: NeuralType(None) """ - return {"loss": NeuralType(LossType())} + return {"loss": NeuralType(elements_type=LossType())} def __init__(self, num_classes, class_weights=None): LossNM.__init__(self) diff --git a/nemo/collections/nlp/nm/trainables/common/huggingface/bert_nm.py b/nemo/collections/nlp/nm/trainables/common/huggingface/bert_nm.py index 252701ab92b5..e51ca6b3b9d1 100644 --- a/nemo/collections/nlp/nm/trainables/common/huggingface/bert_nm.py +++ b/nemo/collections/nlp/nm/trainables/common/huggingface/bert_nm.py @@ -54,9 +54,9 @@ def input_ports(self): # "input_ids": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}), # "token_type_ids": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}), # "attention_mask": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}), - "input_ids": NeuralType(ChannelType(), ('B', 'T')), - "token_type_ids": NeuralType(ChannelType(), ('B', 'T')), - "attention_mask": NeuralType(ChannelType(), ('B', 'T')), + "input_ids": NeuralType(('B', 'T'), ChannelType()), + "token_type_ids": NeuralType(('B', 'T'), ChannelType()), + "attention_mask": NeuralType(('B', 'T'), ChannelType()), } @property @@ -64,7 +64,7 @@ def output_ports(self): """Returns definitions of module output ports. """ # return {"hidden_states": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag), 2: AxisType(ChannelTag)})} - return {"hidden_states": NeuralType(ChannelType(), ('B', 'T', 'D'))} + return {"hidden_states": NeuralType(('B', 'T', 'D'), ChannelType())} def __init__( self, diff --git a/nemo/collections/nlp/nm/trainables/common/sequence_classification_nm.py b/nemo/collections/nlp/nm/trainables/common/sequence_classification_nm.py index 90084947876c..60b1f2c45e7c 100644 --- a/nemo/collections/nlp/nm/trainables/common/sequence_classification_nm.py +++ b/nemo/collections/nlp/nm/trainables/common/sequence_classification_nm.py @@ -42,14 +42,14 @@ class SequenceClassifier(TrainableNM): def input_ports(self): """Returns definitions of module input ports. """ - return {"hidden_states": NeuralType(ChannelType(), ('B', 'T', 'D'))} + return {"hidden_states": NeuralType(('B', 'T', 'D'), ChannelType())} @property def output_ports(self): """Returns definitions of module output ports. """ # return {"logits": NeuralType({0: AxisType(BatchTag), 1: AxisType(ChannelTag)})} - return {"logits": NeuralType(LogitsType(), ('B', 'D'))} + return {"logits": NeuralType(('B', 'D'), LogitsType())} def __init__( self, diff --git a/nemo/collections/nlp/nm/trainables/common/sequence_regression_nm.py b/nemo/collections/nlp/nm/trainables/common/sequence_regression_nm.py index d3aed9955da2..0989afd162ad 100644 --- a/nemo/collections/nlp/nm/trainables/common/sequence_regression_nm.py +++ b/nemo/collections/nlp/nm/trainables/common/sequence_regression_nm.py @@ -41,14 +41,14 @@ def input_ports(self): """Returns definitions of module input ports. """ # return {"hidden_states": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag), 2: AxisType(ChannelTag)})} - return {"hidden_states": NeuralType(ChannelType(), ('B', 'T', 'D'))} + return {"hidden_states": NeuralType(('B', 'T', 'D'), ChannelType())} @property def output_ports(self): """Returns definitions of module output ports. """ # return {"preds": NeuralType({0: AxisType(RegressionTag)})} - return {"preds": NeuralType(RegressionValuesType(), tuple('B'))} + return {"preds": NeuralType(tuple('B'), RegressionValuesType())} def __init__(self, hidden_size, num_layers=2, activation='relu', dropout=0.0, use_transformer_pretrained=True): super().__init__() diff --git a/nemo/collections/nlp/nm/trainables/common/token_classification_nm.py b/nemo/collections/nlp/nm/trainables/common/token_classification_nm.py index ff5de50d620e..1b4c879906c7 100644 --- a/nemo/collections/nlp/nm/trainables/common/token_classification_nm.py +++ b/nemo/collections/nlp/nm/trainables/common/token_classification_nm.py @@ -44,14 +44,14 @@ def input_ports(self): """Returns definitions of module input ports. """ # return {"hidden_states": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag), 2: AxisType(ChannelTag)})} - return {"hidden_states": NeuralType(ChannelType(), ('B', 'T', 'D'))} + return {"hidden_states": NeuralType(('B', 'T', 'D'), ChannelType())} @property def output_ports(self): """Returns definitions of module output ports. """ # return {"logits": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag), 2: AxisType(ChannelTag)})} - return {"logits": NeuralType(LogitsType(), ('B', 'T', 'C'))} + return {"logits": NeuralType(('B', 'T', 'C'), LogitsType())} def __init__( self, @@ -105,14 +105,14 @@ def input_ports(self): """Returns definitions of module input ports. """ # return {"hidden_states": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag), 2: AxisType(ChannelTag)})} - return {"hidden_states": NeuralType(ChannelType(), ('B', 'T', 'C'))} + return {"hidden_states": NeuralType(('B', 'T', 'C'), ChannelType())} @property def output_ports(self): """Returns definitions of module output ports. """ # return {"logits": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag), 2: AxisType(ChannelTag)})} - return {"logits": NeuralType(LogitsType(), ('B', 'T', 'D'))} + return {"logits": NeuralType(('B', 'T', 'D'), LogitsType())} def __init__( self, diff --git a/nemo/collections/nlp/nm/trainables/common/transformer/transformer_nm.py b/nemo/collections/nlp/nm/trainables/common/transformer/transformer_nm.py index 0822d769d246..db858982adb1 100644 --- a/nemo/collections/nlp/nm/trainables/common/transformer/transformer_nm.py +++ b/nemo/collections/nlp/nm/trainables/common/transformer/transformer_nm.py @@ -51,8 +51,8 @@ def input_ports(self): return { # "input_ids": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}), # "input_mask_src": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}), - "input_ids": NeuralType(ChannelType(), ('B', 'T')), - "input_mask_src": NeuralType(ChannelType(), ('B', 'T')), + "input_ids": NeuralType(('B', 'T'), ChannelType()), + "input_mask_src": NeuralType(('B', 'T'), ChannelType()), } @property @@ -61,7 +61,7 @@ def output_ports(self): """ # return {"hidden_states": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag), 2: AxisType(ChannelTag)})} - return {"hidden_states": NeuralType(ChannelType(), ('B', 'T', 'D'))} + return {"hidden_states": NeuralType(('B', 'T', 'D'), ChannelType())} def __init__( self, @@ -142,10 +142,10 @@ def input_ports(self): # "hidden_states_src": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag), 2: AxisType(ChannelTag)}), # "input_mask_src": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}), # "input_mask_tgt": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}), - "input_ids_tgt": NeuralType(ChannelType(), ('B', 'T')), - "hidden_states_src": NeuralType(ChannelType(), ('B', 'T', 'D')), - "input_mask_src": NeuralType(ChannelType(), ('B', 'T')), - "input_mask_tgt": NeuralType(ChannelType(), ('B', 'T')), + "input_ids_tgt": NeuralType(('B', 'T'), ChannelType()), + "hidden_states_src": NeuralType(('B', 'T', 'D'), ChannelType()), + "input_mask_src": NeuralType(('B', 'T'), ChannelType()), + "input_mask_tgt": NeuralType(('B', 'T'), ChannelType()), } @property @@ -153,7 +153,7 @@ def output_ports(self): """Returns definitions of module output ports. """ # return {"hidden_states": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag), 2: AxisType(ChannelTag)})} - return {"hidden_states": NeuralType(ChannelType(), ('B', 'T', 'D'))} + return {"hidden_states": NeuralType(('B', 'T', 'D'), ChannelType())} def __init__( self, @@ -220,14 +220,14 @@ def input_ports(self): """Returns definitions of module input ports. """ # return {"input_ids": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)})} - return {"input_ids": NeuralType(ChannelType(), ('B', 'T'))} + return {"input_ids": NeuralType(('B', 'T'), ChannelType())} @property def output_ports(self): """Returns definitions of module output ports. """ # return {"output_ids": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)})} - return {"output_ids": NeuralType(ChannelType(), ('B', 'T'))} + return {"output_ids": NeuralType(('B', 'T'), ChannelType())} def __init__(self, decoder, log_softmax, max_seq_length, pad_token, bos_token, eos_token, batch_size=1): super().__init__() @@ -278,8 +278,8 @@ def input_ports(self): return { # "hidden_states_src": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag), 2: AxisType(ChannelTag)}), # "input_mask_src": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}), - "hidden_states_src": NeuralType(ChannelType(), ('B', 'T', 'C')), - "input_mask_src": NeuralType(ChannelType(), ('B', 'T')), + "hidden_states_src": NeuralType(('B', 'T', 'C'), ChannelType()), + "input_mask_src": NeuralType(('B', 'T'), ChannelType()), } @property @@ -287,7 +287,7 @@ def output_ports(self): """Returns definitions of module output ports. """ # return {"output_ids": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)})} - return {"output_ids": NeuralType(ChannelType(), ('B', 'T'))} + return {"output_ids": NeuralType(('B', 'T'), ChannelType())} @property def num_weights(self): diff --git a/nemo/collections/nlp/nm/trainables/dialogue_state_tracking/state_tracking_trade_nm.py b/nemo/collections/nlp/nm/trainables/dialogue_state_tracking/state_tracking_trade_nm.py index bd29209918c2..1e047542e3ba 100644 --- a/nemo/collections/nlp/nm/trainables/dialogue_state_tracking/state_tracking_trade_nm.py +++ b/nemo/collections/nlp/nm/trainables/dialogue_state_tracking/state_tracking_trade_nm.py @@ -72,12 +72,12 @@ def input_ports(self): # 'input_lens': NeuralType({0: AxisType(BatchTag)}), # 'src_ids': NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}), # 'targets': NeuralType({0: AxisType(BatchTag), 1: AxisType(ChannelTag), 2: AxisType(TimeTag)}), - 'encoder_hidden': NeuralType(ChannelType(), ('B', 'T', 'C')), - 'encoder_outputs': NeuralType(ChannelType(), ('B', 'T', 'C')), - 'input_lens': NeuralType(LengthsType(), tuple('B')), - 'src_ids': NeuralType(ChannelType(), ('B', 'T')), + 'encoder_hidden': NeuralType(('B', 'T', 'C'), ChannelType()), + 'encoder_outputs': NeuralType(('B', 'T', 'C'), ChannelType()), + 'input_lens': NeuralType(tuple('B'), LengthsType()), + 'src_ids': NeuralType(('B', 'T'), ChannelType()), # 'targets': NeuralType(ChannelType(), ('B', 'D', 'T')), - 'targets': NeuralType(LabelsType(), ('B', 'D', 'T')), + 'targets': NeuralType(('B', 'D', 'T'), LabelsType()), } @property @@ -96,8 +96,8 @@ def output_ports(self): # 'gate_outputs': NeuralType({0: AxisType(BatchTag), 1: AxisType(ChannelTag), 2: AxisType(ChannelTag)}), # } return { - 'point_outputs': NeuralType(LogitsType(), ('B', 'T', 'D', 'D')), - 'gate_outputs': NeuralType(LogitsType(), ('B', 'D', 'D')), + 'point_outputs': NeuralType(('B', 'T', 'D', 'D'), LogitsType()), + 'gate_outputs': NeuralType(('B', 'D', 'D'), LogitsType()), } def __init__(self, vocab, embeddings, hid_size, dropout, slots, nb_gate, teacher_forcing=0.5): diff --git a/nemo/collections/nlp/nm/trainables/joint_intent_slot/joint_intent_slot_nm.py b/nemo/collections/nlp/nm/trainables/joint_intent_slot/joint_intent_slot_nm.py index b751df91df28..c906417afd6d 100644 --- a/nemo/collections/nlp/nm/trainables/joint_intent_slot/joint_intent_slot_nm.py +++ b/nemo/collections/nlp/nm/trainables/joint_intent_slot/joint_intent_slot_nm.py @@ -41,7 +41,7 @@ def input_ports(self): """Returns definitions of module input ports. """ # return {"hidden_states": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag), 2: AxisType(ChannelTag)})} - return {"hidden_states": NeuralType(ChannelType(), ('B', 'T', 'C'))} + return {"hidden_states": NeuralType(('B', 'T', 'C'), ChannelType())} @property def output_ports(self): @@ -62,8 +62,8 @@ def output_ports(self): return { # "intent_logits": NeuralType({0: AxisType(BatchTag), 1: AxisType(ChannelTag)}), # "slot_logits": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag), 2: AxisType(ChannelTag)}), - "intent_logits": NeuralType(LogitsType(), ('B', 'D')), - "slot_logits": NeuralType(LogitsType(), ('B', 'T', 'D')), + "intent_logits": NeuralType(('B', 'D'), LogitsType()), + "slot_logits": NeuralType(('B', 'T', 'D'), LogitsType()), } def __init__(self, hidden_size, num_intents, num_slots, dropout=0.0, use_transformer_pretrained=True, **kwargs): diff --git a/nemo/collections/simple_gan/gan.py b/nemo/collections/simple_gan/gan.py index dd2028ba769d..b0d39a406d64 100644 --- a/nemo/collections/simple_gan/gan.py +++ b/nemo/collections/simple_gan/gan.py @@ -27,7 +27,7 @@ def input_ports(self): # 3: AxisType(WidthTag, 28), # } # ) - "image": NeuralType(ChannelType(), ('B', 'C', 'H', 'W')) + "image": NeuralType(('B', 'C', 'H', 'W'), ChannelType()) } @property @@ -35,7 +35,7 @@ def output_ports(self): """Returns definitions of module output ports. """ # return {"decision": NeuralType({0: AxisType(BatchTag), 1: AxisType(ChannelTag, 1)})} - return {"decision": NeuralType(ChannelType(), ('B', 'C'))} + return {"decision": NeuralType(('B', 'C'), ChannelType())} def __init__(self): super().__init__() @@ -77,7 +77,7 @@ def input_ports(self): # 3: AxisType(WidthTag, 4), # } # ) - "latents": NeuralType(ChannelType(), ('B', 'C', 'H', 'W')) + "latents": NeuralType(('B', 'C', 'H', 'W'), ChannelType()) } @property @@ -93,7 +93,7 @@ def output_ports(self): # 3: AxisType(WidthTag, 28), # } # ) - "image": NeuralType(ChannelType(), ('B', 'C', 'H', 'W')) + "image": NeuralType(('B', 'C', 'H', 'W'), ChannelType()) } def __init__(self, batch_size): @@ -138,14 +138,14 @@ def input_ports(self): """ return { # "decision": NeuralType({0: AxisType(BatchTag), 1: AxisType(ChannelTag, 1)}), - "decision": NeuralType(ChannelType(), ('B', 'D')) + "decision": NeuralType(('B', 'D'), ChannelType()) } @property def output_ports(self): """Returns definitions of module output ports. """ - return {"loss": NeuralType(LossType())} + return {"loss": NeuralType(elements_type=LossType())} def __init__(self, neg=False): super().__init__() @@ -181,8 +181,8 @@ def input_ports(self): # } # ), # "interpolated_decision": NeuralType({0: AxisType(BatchTag), 1: AxisType(ChannelTag, 1)}), - "interpolated_image": NeuralType(ChannelType(), ('B', 'C', 'H', 'W')), - "interpolated_decision": NeuralType(ChannelType(), ('B', 'C')), + "interpolated_image": NeuralType(('B', 'C', 'H', 'W'), ChannelType()), + "interpolated_decision": NeuralType(('B', 'C'), ChannelType()), } @property @@ -192,7 +192,7 @@ def output_ports(self): loss: NeuralType(None) """ - return {"loss": NeuralType(LossType())} + return {"loss": NeuralType(elements_type=LossType())} def __init__(self, lambda_): super().__init__() @@ -245,8 +245,8 @@ def input_ports(self): # 3: AxisType(WidthTag, 28), # } # ), - "image1": NeuralType(ChannelType(), ('B', 'C', 'H', 'W')), - "image2": NeuralType(ChannelType(), ('B', 'C', 'H', 'W')), + "image1": NeuralType(('B', 'C', 'H', 'W'), ChannelType()), + "image2": NeuralType(('B', 'C', 'H', 'W'), ChannelType()), } @property @@ -262,7 +262,7 @@ def output_ports(self): # 3: AxisType(WidthTag, 28), # } # ) - "interpolated_image": NeuralType(ChannelType(), ('B', 'C', 'H', 'W')) + "interpolated_image": NeuralType(('B', 'C', 'H', 'W'), ChannelType()) } def __init__(self): @@ -307,7 +307,7 @@ def output_ports(self): # 3: AxisType(WidthTag, 4), # } # ) - "latent": NeuralType(ChannelType(), ('B', 'C', 'H', 'W')) + "latent": NeuralType(('B', 'C', 'H', 'W'), ChannelType()) } def __init__(self, batch_size): @@ -372,9 +372,9 @@ def output_ports(self): # } # ), # "label": NeuralType({0: AxisType(BatchTag)}), - "latent": NeuralType(ChannelType(), ('B', 'C', 'H', 'W')), - "image": NeuralType(ChannelType(), ('B', 'C', 'H', 'W')), - "label": NeuralType(LabelsType(), tuple('B')), + "latent": NeuralType(('B', 'C', 'H', 'W'), ChannelType()), + "image": NeuralType(('B', 'C', 'H', 'W'), ChannelType()), + "label": NeuralType(tuple('B'), LabelsType()), } def __init__(self, batch_size, root, train=True, shuffle=True): diff --git a/nemo/collections/tts/data_layers.py b/nemo/collections/tts/data_layers.py index 89344ec85583..ffebe99e3df9 100644 --- a/nemo/collections/tts/data_layers.py +++ b/nemo/collections/tts/data_layers.py @@ -52,8 +52,8 @@ def output_ports(self): return { # "audio_signal": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}), # "a_sig_length": NeuralType({0: AxisType(BatchTag)}), - "audio_signal": NeuralType(AudioSignal(), ('B', 'T')), - "a_sig_length": NeuralType(LengthsType(), tuple('B')), + "audio_signal": NeuralType(('B', 'T'), AudioSignal()), + "a_sig_length": NeuralType(tuple('B'), LengthsType()), } def __init__( diff --git a/nemo/collections/tts/tacotron2_modules.py b/nemo/collections/tts/tacotron2_modules.py index 9399bfa85d53..083ac4697526 100644 --- a/nemo/collections/tts/tacotron2_modules.py +++ b/nemo/collections/tts/tacotron2_modules.py @@ -37,7 +37,7 @@ def input_ports(self): """Returns definitions of module input ports. """ # return {"char_phone": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)})} - return {"char_phone": NeuralType(LabelsType(), ('B', 'T'))} + return {"char_phone": NeuralType(('B', 'T'), LabelsType())} @property def output_ports(self): @@ -47,7 +47,7 @@ def output_ports(self): # "char_phone_embeddings": NeuralType( # {0: AxisType(BatchTag), 1: AxisType(EmbeddedTextTag), 2: AxisType(TimeTag),} # ) - "char_phone_embeddings": NeuralType(EmbeddedTextType(), ('B', 'D', 'T')) + "char_phone_embeddings": NeuralType(('B', 'D', 'T'), EmbeddedTextType()) } def __init__(self, n_symbols, symbols_embedding_dim: int = 512): @@ -83,8 +83,8 @@ def input_ports(self): # {0: AxisType(BatchTag), 1: AxisType(EmbeddedTextTag), 2: AxisType(TimeTag),} # ), # "embedding_length": NeuralType({0: AxisType(BatchTag)}), - "char_phone_embeddings": NeuralType(EmbeddedTextType(), ('B', 'D', 'T')), - "embedding_length": NeuralType(LengthsType(), tuple('B')), + "char_phone_embeddings": NeuralType(('B', 'D', 'T'), EmbeddedTextType()), + "embedding_length": NeuralType(tuple('B'), LengthsType()), } @property @@ -95,7 +95,7 @@ def output_ports(self): # "char_phone_encoded": NeuralType( # {0: AxisType(BatchTag), 1: AxisType(TimeTag), 2: AxisType(EncodedRepresentationTag),} # ) - "char_phone_encoded": NeuralType(EncodedRepresentation(), ('B', 'T', 'D')) + "char_phone_encoded": NeuralType(('B', 'T', 'D'), EncodedRepresentation()) } def __init__( @@ -164,9 +164,9 @@ def input_ports(self): # "mel_target": NeuralType( # {0: AxisType(BatchTag), 1: AxisType(MelSpectrogramSignalTag), 2: AxisType(TimeTag),} # ), - "char_phone_encoded": NeuralType(EncodedRepresentation(), ('B', 'T', 'D')), - "encoded_length": NeuralType(LengthsType(), tuple('B')), - "mel_target": NeuralType(MelSpectrogramType(), ('B', 'D', 'T')), + "char_phone_encoded": NeuralType(('B', 'T', 'D'), EncodedRepresentation()), + "encoded_length": NeuralType(tuple('B'), LengthsType()), + "mel_target": NeuralType(('B', 'D', 'T'), MelSpectrogramType()), } @property @@ -179,9 +179,9 @@ def output_ports(self): # ), # "gate_output": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}), # "alignments": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag), 2: AxisType(TimeTag),}), - "mel_output": NeuralType(MelSpectrogramType(), ('B', 'D', 'T')), - "gate_output": NeuralType(ChannelType(), ('B', 'T')), - "alignments": NeuralType(ChannelType(), ('B', 'T', 'T')), + "mel_output": NeuralType(('B', 'D', 'T'), MelSpectrogramType()), + "gate_output": NeuralType(('B', 'T'), ChannelType()), + "alignments": NeuralType(('B', 'T', 'T'), ChannelType()), } def __init__( @@ -278,8 +278,8 @@ def input_ports(self): # {0: AxisType(BatchTag), 1: AxisType(TimeTag), 2: AxisType(EncodedRepresentationTag),} # ), # "encoded_length": NeuralType({0: AxisType(BatchTag)}), - "char_phone_encoded": NeuralType(EncodedRepresentation(), ('B', 'T', 'D')), - "encoded_length": NeuralType(LengthsType(), tuple('B')), + "char_phone_encoded": NeuralType(('B', 'T', 'D'), EncodedRepresentation()), + "encoded_length": NeuralType(tuple('B'), LengthsType()), } @property @@ -293,10 +293,10 @@ def output_ports(self): # "gate_output": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}), # "alignments": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag), 2: AxisType(TimeTag),}), # "mel_len": NeuralType({0: AxisType(BatchTag)}), - "mel_output": NeuralType(MelSpectrogramType(), ('B', 'D', 'T')), - "gate_output": NeuralType(ChannelType(), ('B', 'T')), - "alignments": NeuralType(ChannelType(), ('B', 'T', 'T')), - "mel_len": NeuralType(LengthsType(), tuple('B')), + "mel_output": NeuralType(('B', 'D', 'T'), MelSpectrogramType()), + "gate_output": NeuralType(('B', 'T'), ChannelType()), + "alignments": NeuralType(('B', 'T', 'T'), ChannelType()), + "mel_len": NeuralType(tuple('B'), LengthsType()), } def __str__(self): @@ -336,7 +336,7 @@ def input_ports(self): # "mel_input": NeuralType( # {0: AxisType(BatchTag), 1: AxisType(MelSpectrogramSignalTag), 2: AxisType(TimeTag),} # ) - "mel_input": NeuralType(MelSpectrogramType(), ('B', 'D', 'T')) + "mel_input": NeuralType(('B', 'D', 'T'), MelSpectrogramType()) } @property @@ -347,7 +347,7 @@ def output_ports(self): # "mel_output": NeuralType( # {0: AxisType(BatchTag), 1: AxisType(MelSpectrogramSignalTag), 2: AxisType(TimeTag),} # ), - "mel_output": NeuralType(MelSpectrogramType(), ('B', 'D', 'T')) + "mel_output": NeuralType(('B', 'D', 'T'), MelSpectrogramType()) } def __init__( @@ -405,20 +405,20 @@ def input_ports(self): # "gate_target": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}), # "target_len": NeuralType({0: AxisType(BatchTag)}), # "seq_len": NeuralType({0: AxisType(BatchTag)}), - "mel_out": NeuralType(MelSpectrogramType(), ('B', 'D', 'T')), - "mel_out_postnet": NeuralType(MelSpectrogramType(), ('B', 'D', 'T')), - "gate_out": NeuralType(ChannelType(), ('B', 'T')), - "mel_target": NeuralType(MelSpectrogramType(), ('B', 'D', 'T')), - "gate_target": NeuralType(ChannelType(), ('B', 'T')), - "target_len": NeuralType(LengthsType(), tuple('B')), - "seq_len": NeuralType(LengthsType(), tuple('B')), + "mel_out": NeuralType(('B', 'D', 'T'), MelSpectrogramType()), + "mel_out_postnet": NeuralType(('B', 'D', 'T'), MelSpectrogramType()), + "gate_out": NeuralType(('B', 'T'), ChannelType()), + "mel_target": NeuralType(('B', 'D', 'T'), MelSpectrogramType()), + "gate_target": NeuralType(('B', 'T'), ChannelType()), + "target_len": NeuralType(tuple('B'), LengthsType()), + "seq_len": NeuralType(tuple('B'), LengthsType()), } @property def output_ports(self): """Returns definitions of module output ports. """ - return {"loss": NeuralType(LossType())} + return {"loss": NeuralType(elements_type=LossType())} def __init__(self, pad_value: float = -11.52): super().__init__() @@ -476,8 +476,8 @@ def input_ports(self): # "mel_target": NeuralType( # {0: AxisType(BatchTag), 1: AxisType(MelSpectrogramSignalTag), 2: AxisType(TimeTag),} # ), - "target_len": NeuralType(LengthsType(), tuple('B')), - "mel_target": NeuralType(MelSpectrogramType(), ('B', 'D', 'T')), + "target_len": NeuralType(tuple('B'), LengthsType()), + "mel_target": NeuralType(('B', 'D', 'T'), MelSpectrogramType()), } @property @@ -485,7 +485,7 @@ def output_ports(self): """Returns definitions of module output ports. """ # return {"gate_target": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)})} - return {"gate_target": NeuralType(ChannelType(), ('B', 'T'))} + return {"gate_target": NeuralType(('B', 'T'), ChannelType())} def forward(self, target_len, mel_target): max_len = mel_target.shape[2] diff --git a/nemo/collections/tts/waveglow_modules.py b/nemo/collections/tts/waveglow_modules.py index 06439d272ff2..1acffdb59d73 100644 --- a/nemo/collections/tts/waveglow_modules.py +++ b/nemo/collections/tts/waveglow_modules.py @@ -47,8 +47,8 @@ def input_ports(self): # {0: AxisType(BatchTag), 1: AxisType(MelSpectrogramSignalTag), 2: AxisType(TimeTag),} # ), # "audio": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}), - "mel_spectrogram": NeuralType(MelSpectrogramType(), ('B', 'D', 'T')), - "audio": NeuralType(AudioSignal(), ('B', 'T')), + "mel_spectrogram": NeuralType(('B', 'D', 'T'), MelSpectrogramType()), + "audio": NeuralType(('B', 'T'), AudioSignal()), } @property @@ -60,9 +60,9 @@ def output_ports(self): # "audio": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}), # "log_s_list": NeuralType(), # "log_det_W_list": NeuralType(), - "audio": NeuralType(AudioSignal(), ('B', 'T')), - "log_s_list": NeuralType(ChannelType()), - "log_det_W_list": NeuralType(ChannelType()), + "audio": NeuralType(('B', 'T'), AudioSignal()), + "log_s_list": NeuralType(elements_type=ChannelType()), + "log_det_W_list": NeuralType(elements_type=ChannelType()), } def __init__( @@ -143,7 +143,7 @@ def input_ports(self): # "mel_spectrogram": NeuralType( # {0: AxisType(BatchTag), 1: AxisType(MelSpectrogramSignalTag), 2: AxisType(TimeTag),} # ) - "mel_spectrogram": NeuralType(MelSpectrogramType(), ('B', 'D', 'T')) + "mel_spectrogram": NeuralType(('B', 'D', 'T'), MelSpectrogramType()) } @property @@ -151,7 +151,7 @@ def output_ports(self): """Returns definitions of module output ports. """ # return {"audio": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)})} - return {"audio": NeuralType(AudioSignal(), ('B', 'T'))} + return {"audio": NeuralType(('B', 'T'), AudioSignal())} def __str__(self): return "WaveGlowNM" @@ -233,16 +233,16 @@ def input_ports(self): # "z": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}), # "log_s_list": NeuralType(), # "log_det_W_list": NeuralType(), - "z": NeuralType(AudioSignal(), ('B', 'T')), - "log_s_list": NeuralType(ChannelType()), - "log_det_W_list": NeuralType(ChannelType()), + "z": NeuralType(('B', 'T'), AudioSignal()), + "log_s_list": NeuralType(elements_type=ChannelType()), + "log_det_W_list": NeuralType(elements_type=ChannelType()), } @property def output_ports(self): """Returns definitions of module output ports. """ - return {"loss": NeuralType(LossType())} + return {"loss": NeuralType(elements_type=LossType())} def __init__(self, sigma: float = 1.0): super().__init__() diff --git a/nemo/core/neural_factory.py b/nemo/core/neural_factory.py index 0692ea46095c..7a7a5154ef2c 100644 --- a/nemo/core/neural_factory.py +++ b/nemo/core/neural_factory.py @@ -463,14 +463,12 @@ def __get_pytorch_module(self, name, collection, params, pretrained): _nm_name = name.lower() if _nm_name == "resnet18": input_ports = { - "x": NeuralType( - { - 0: AxisType(BatchTag), - 1: AxisType(ChannelTag), - 2: AxisType(HeightTag, 224), - 3: AxisType(WidthTag, 224), - } - ) + "x": NeuralType({ + 0: AxisType(BatchTag), + 1: AxisType(ChannelTag), + 2: AxisType(HeightTag, 224), + 3: AxisType(WidthTag, 224), + }) } output_ports = {"output": NeuralType({0: AxisType(BatchTag), 1: AxisType(ChannelTag)})} @@ -483,14 +481,12 @@ def __get_pytorch_module(self, name, collection, params, pretrained): ) elif _nm_name == "resnet50": input_ports = { - "x": NeuralType( - { - 0: AxisType(BatchTag), - 1: AxisType(ChannelTag), - 2: AxisType(HeightTag, 224), - 3: AxisType(WidthTag, 224), - } - ) + "x": NeuralType({ + 0: AxisType(BatchTag), + 1: AxisType(ChannelTag), + 2: AxisType(HeightTag, 224), + 3: AxisType(WidthTag, 224), + }) } output_ports = {"output": NeuralType({0: AxisType(BatchTag), 1: AxisType(ChannelTag)})} diff --git a/nemo/core/neural_modules.py b/nemo/core/neural_modules.py index 25e42c7824fa..74bf9cb6108d 100644 --- a/nemo/core/neural_modules.py +++ b/nemo/core/neural_modules.py @@ -311,6 +311,7 @@ def __call__(self, **kwargs): return result + def __str__(self): return self.__class__.__name__ diff --git a/nemo/core/neural_types/axes.py b/nemo/core/neural_types/axes.py index acb9a27646f2..dcc2e7736ff6 100644 --- a/nemo/core/neural_types/axes.py +++ b/nemo/core/neural_types/axes.py @@ -32,11 +32,15 @@ class AxisKindAbstract(Enum): class AxisKind(AxisKindAbstract): """This Enum represents what does varying axis dimension mean. - For example, does this dimension correspond to width, batch, time, etc.""" + For example, does this dimension correspond to width, batch, time, etc. + The "Dimension" and "Channel" kinds are the same and used to represent + a general axis. + """ Batch = 0 Time = 1 Dimension = 2 + Channel = 2 Width = 3 Height = 4 @@ -64,9 +68,10 @@ def from_str(label): class AxisType(object): """This class represents axis semantics and (optionally) it's dimensionality Args: - kind (AxisKindAbstract): - size (int, optional): - is_list (bool, default=False): + kind (AxisKindAbstract): what kind of axis it is? For example Batch, Height, etc. + size (int, optional): specify if the axis should have a fixed size. By default it is set to None and you + typically do not want to set it for Batch and Time + is_list (bool, default=False): whether this is a list or a tensor axis """ def __init__(self, kind: AxisKindAbstract, size: Optional[int] = None, is_list=False): diff --git a/nemo/core/neural_types/elements.py b/nemo/core/neural_types/elements.py index dd7fcd754f98..59f818ee2688 100644 --- a/nemo/core/neural_types/elements.py +++ b/nemo/core/neural_types/elements.py @@ -44,19 +44,25 @@ class ElementType(ABC): """Abstract class defining semantics of the tensor elements. - We are replying on Python for inheritance checking""" + We are relying on Python for inheritance checking""" - @abstractmethod - def __str__(cls): - pass + def __str__(self): + self.__doc__ @property def type_parameters(self) -> Dict: - """Override this property to parametrize your type""" + """Override this property to parametrize your type. For example, you can specify 'storage' type such as + float, int, bool with 'dtype' keyword. Another example, is if you want to represent a signal with a + particular property (say, sample frequency), then you can put sample_freq->value in there. + When two types are compared their type_parameters must match.""" return {} @property def fields(self) -> Optional[Tuple]: + """This should be used to logically represent tuples/structures. For example, if you want to represent a + bounding box (x, y, width, height) you can put a tuple with names ('x', y', 'w', 'h') in here. + Under the hood this should be converted to the last tesnor dimension of fixed size = len(fields). + When two types are compared their fields must match.""" return None def compare(self, second) -> NeuralTypeComparisonResult: @@ -92,67 +98,61 @@ def compare(self, second) -> NeuralTypeComparisonResult: class VoidType(ElementType): - """Void-like type which is compatible with everything + """Void-like type which is compatible with everything. + It is a good practice to use this type only as necessary. + For example, when you need template-like functionality. """ - - def __str__(self): - return str("void type. compatible with everything") - def compare(cls, second: abc.ABCMeta) -> NeuralTypeComparisonResult: return NeuralTypeComparisonResult.SAME # TODO: Consider moving these files elsewhere class ChannelType(ElementType): - def __str__(self): - return "convolutional channel value" + """Element to represent convolutional input/output channel. + """ class EmbeddedTextType(ChannelType): - def __str__(self): - return "text embedding" + """Element to represent output on word/text embedding layers + """ class LogitsType(ElementType): - def __str__(self): - return "neural type representing logits" + """Element type to represent logits""" class LogprobsType(ElementType): - def __str__(self): - return "neural type representing log probabilities" + """Element type to represent log-probabilities. For example, outputs of softmax layers.""" class LabelsType(ElementType): - def __str__(self): - return "neural type representing labels" + """Element type to represent some sort of labels. This is often used as a base class to create + a more concrete types such as RegressionValuesType, etc.""" class LengthsType(ElementType): - def __str__(self): - return "neural type representing lengths of something" + """Element type representing lengths of something""" class LossType(ElementType): - def __str__(self): - return "neural type representing loss value" + """Element type to represent outputs of Loss modules""" class EncodedRepresentation(ChannelType): - def __str__(self): - return "encoded representation, for example, encoder's output" + """Element type to represent encoded representation, for example, encoder's output""" class AcousticEncodedRepresentation(EncodedRepresentation): - def __str__(self): - return "encoded representation returned by the acoustic encoder model" + """Element type to represent encoded representation returned by the acoustic encoder model""" class AudioSignal(ElementType): - def __str__(self): - return "encoded representation returned by the acoustic encoder model" - - def __init__(self, freq=16000): + """Element type to represent encoded representation returned by the acoustic encoder model + Args: + freq (int): sampling frequency of a signal. Note that two signals will only be the same if their + freq is the same. + """ + def __init__(self, freq: int = 16000): self._params = {} self._params['freq'] = freq @@ -162,30 +162,24 @@ def type_parameters(self): class SpectrogramType(ChannelType): - def __str__(self): - return "generic spectorgram type" + """Element type to represent generic spectrogram signal""" class MelSpectrogramType(SpectrogramType): - def __str__(self): - return "mel spectorgram type" + """Element type to represent mel spectrogram signal""" class MFCCSpectrogramType(SpectrogramType): - def __str__(self): - return "mfcc spectorgram type" + """Element type to represent MFCC spectrogram signal""" class PredictionsType(LabelsType): - def __str__(self): - return "predictions values type" + """Element type to represent some sort of predictions returned by model""" class RegressionValuesType(PredictionsType): - def __str__(self): - return "regression values type" + """Element type to represent labels for regression task""" class CategoricalValuesType(PredictionsType): - def __str__(self): - return "regression values type" + """Element type to represent labels for categorical classification task""" diff --git a/nemo/core/neural_types/neural_type.py b/nemo/core/neural_types/neural_type.py index b0c1a310ec33..a2070c354b3c 100644 --- a/nemo/core/neural_types/neural_type.py +++ b/nemo/core/neural_types/neural_type.py @@ -27,16 +27,26 @@ import uuid from typing import Optional, Tuple -from .axes import AxisKind, AxisType -from .comparison import NeuralTypeComparisonResult -from .elements import * +from nemo.core.neural_types.axes import AxisKind, AxisType +from nemo.core.neural_types.comparison import NeuralTypeComparisonResult +from nemo.core.neural_types.elements import * class NeuralType(object): """This is the main class which would represent neural type concept. - nmTensors derives from this. It is used to represent *the types* of inputs and outputs.""" - - def __init__(self, elements_type: ElementType = VoidType(), axes: Optional[Tuple] = None, optional=False): + nmTensors derives from this. It is used to represent *the types* of inputs and outputs. + Args: + axes (Optional[Tuple]): a tuple of AxisTypes objects representing the semantics of what varying each axis means + You can use a short, string-based form here. For example: ('B', 'C', 'H', 'W') would correspond to an NCHW + format frequently used in computer vision. ('B', 'T', 'D') is frequently used for signal processing and + means [batch, time, dimension/channel]. + elements_type (ElementType): an instance of ElementType class representing the semantics of what is stored + inside the tensor. For example: logits (LogitsType), log probabilities (LogprobType), etc. + optional (bool): By default, this is false. If set to True, it would means that input to the port of this + type can be optional. + """ + + def __init__(self, axes: Optional[Tuple] = None, elements_type: ElementType = VoidType(), optional=False): if not isinstance(elements_type, ElementType): raise ValueError( f"elements_type of NeuralType must be an instance of a class derived from ElementType." @@ -59,6 +69,8 @@ def __init__(self, elements_type: ElementType = VoidType(), axes: Optional[Tuple self.optional = optional def compare(self, second) -> NeuralTypeComparisonResult: + """Performs neural type comparison of self with second. When you chain two modules' inputs/outputs via + __call__ method, this comparison will be called to ensure neural type compatibility.""" # First, handle dimensionality axes_a = self.axes axes_b = second.axes @@ -180,7 +192,7 @@ def __init__(self, producer, producer_args, name, ntype=None): producer_args (dict): a dictionary of port_name->NmTensor value of arguments which were sent to producer to create this """ - super(NmTensor, self).__init__(elements_type=ntype.elements_type, axes=ntype.axes, optional=ntype.optional) + super(NmTensor, self).__init__(axes=ntype.axes, elements_type=ntype.elements_type, optional=ntype.optional) self._producer = producer self._producer_args = producer_args self._name = name diff --git a/tests/asr/test_zeroDS.py b/tests/asr/test_zeroDS.py index e2c9bd6f7373..6dc9926a597d 100644 --- a/tests/asr/test_zeroDS.py +++ b/tests/asr/test_zeroDS.py @@ -108,12 +108,11 @@ def test_asr_with_zero_ds(self): # "transcript": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag, dim=64)}), # "transcript_length": NeuralType({0: AxisType(BatchTag)}), "processed_signal": NeuralType( - SpectrogramType(), (AxisType(AxisKind.Batch), AxisType(AxisKind.Dimension, 64), AxisType(AxisKind.Time, 64)), - ), - "processed_length": NeuralType(LengthsType(), tuple('B')), - "transcript": NeuralType(LabelsType(), (AxisType(AxisKind.Batch), AxisType(AxisKind.Time, 64))), - "transcript_length": NeuralType(LengthsType(), tuple('B')), + SpectrogramType()), + "processed_length": NeuralType(tuple('B'), LengthsType()), + "transcript": NeuralType((AxisType(AxisKind.Batch), AxisType(AxisKind.Time, 64)), LabelsType()), + "transcript_length": NeuralType(tuple('B'), LengthsType()), }, ) diff --git a/tests/core/test_infer.py b/tests/core/test_infer.py index 811da4560a63..d9b11a3997da 100644 --- a/tests/core/test_infer.py +++ b/tests/core/test_infer.py @@ -31,12 +31,12 @@ def __init__(self): @property def input_ports(self): # return {"mod_in": NeuralType({0: AxisType(BatchTag), 1: AxisType(BaseTag, dim=1)})} - return {"mod_in": NeuralType(ChannelType(), (AxisType(AxisKind.Batch), AxisType(AxisKind.Dimension, 1)))} + return {"mod_in": NeuralType((AxisType(AxisKind.Batch), AxisType(AxisKind.Dimension, 1)), ChannelType())} @property def output_ports(self): # return {"mod_out": NeuralType({0: AxisType(BatchTag), 1: AxisType(BaseTag, dim=1)})} - return {"mod_out": NeuralType(ChannelType(), (AxisType(AxisKind.Batch), AxisType(AxisKind.Dimension, 1)))} + return {"mod_out": NeuralType((AxisType(AxisKind.Batch), AxisType(AxisKind.Dimension, 1)), ChannelType())} def forward(self, mod_in): return mod_in + 10 @@ -48,11 +48,11 @@ def __init__(self): @property def input_ports(self): - return {"mod_in": NeuralType(ChannelType(), (AxisType(AxisKind.Batch), AxisType(AxisKind.Dimension, 1)))} + return {"mod_in": NeuralType((AxisType(AxisKind.Batch), AxisType(AxisKind.Dimension, 1)), ChannelType())} @property def output_ports(self): - return {"mod_out": NeuralType(ChannelType(), (AxisType(AxisKind.Batch), AxisType(AxisKind.Dimension, 1)))} + return {"mod_out": NeuralType((AxisType(AxisKind.Batch), AxisType(AxisKind.Dimension, 1)), ChannelType())} def forward(self, mod_in): return mod_in - 10 @@ -69,7 +69,7 @@ def test_infer_caching(self): dtype=torch.FloatTensor, batch_size=1, output_ports={ - "dl_out": NeuralType(ChannelType(), (AxisType(AxisKind.Batch), AxisType(AxisKind.Dimension, 1))) + "dl_out": NeuralType((AxisType(AxisKind.Batch), AxisType(AxisKind.Dimension, 1)), ChannelType()) }, ) addten = AddsTen() @@ -98,7 +98,7 @@ def test_infer_errors(self): dtype=torch.FloatTensor, batch_size=1, output_ports={ - "dl_out": NeuralType(ChannelType(), (AxisType(AxisKind.Batch), AxisType(AxisKind.Dimension, 1))) + "dl_out": NeuralType((AxisType(AxisKind.Batch), AxisType(AxisKind.Dimension, 1)), ChannelType()) }, ) addten = AddsTen() diff --git a/tests/core/test_neural_modules.py b/tests/core/test_neural_modules.py index 92dd80237d91..5484285b8e50 100644 --- a/tests/core/test_neural_modules.py +++ b/tests/core/test_neural_modules.py @@ -23,9 +23,8 @@ class NeuralModulesTests(NeMoUnitTest): def test_call_TaylorNet(self): - x_tg = nemo.core.neural_modules.NmTensor( - producer=None, producer_args=None, name=None, ntype=NeuralType(ChannelType(), ('B', 'D')) - ) + x_tg = nemo.core.neural_modules.NmTensor(producer=None, producer_args=None, name=None, ntype=NeuralType(( + 'B', 'D'), ChannelType())) tn = nemo.backends.pytorch.tutorials.TaylorNet(dim=4) # note that real port's name: x was used diff --git a/tests/core/test_neural_modules_pytorch.py b/tests/core/test_neural_modules_pytorch.py index f6d1ce30953b..8f43f2d7356f 100644 --- a/tests/core/test_neural_modules_pytorch.py +++ b/tests/core/test_neural_modules_pytorch.py @@ -69,9 +69,8 @@ def test_constructor_TaylorNet(self): self.assertEqual(tn.init_params["dim"], 4) def test_call_TaylorNet(self): - x_tg = nemo.core.neural_modules.NmTensor( - producer=None, producer_args=None, name=None, ntype=NeuralType(ChannelType(), ('B', 'D')) - ) + x_tg = nemo.core.neural_modules.NmTensor(producer=None, producer_args=None, name=None, ntype=NeuralType( + elements_type=ChannelType(), axes=('B', 'D'))) tn = nemo.backends.pytorch.tutorials.TaylorNet(dim=4) # note that real port's name: x was used diff --git a/tests/core/test_neural_types.py b/tests/core/test_neural_types.py index a860c889bc9f..c82740c6a712 100644 --- a/tests/core/test_neural_types.py +++ b/tests/core/test_neural_types.py @@ -36,17 +36,16 @@ class NeuralTypeSystemTests(NeMoUnitTest): def test_short_vs_long_version(self): long_version = NeuralType( - elements_type=AcousticEncodedRepresentation(), axes=(AxisType(AxisKind.Batch, None), AxisType(AxisKind.Dimension, None), AxisType(AxisKind.Time, None)), - ) - short_version = NeuralType(AcousticEncodedRepresentation(), ('B', 'D', 'T')) + elements_type=AcousticEncodedRepresentation()) + short_version = NeuralType(('B', 'D', 'T'), AcousticEncodedRepresentation()) self.assertEqual(long_version.compare(short_version), NeuralTypeComparisonResult.SAME) self.assertEqual(short_version.compare(long_version), NeuralTypeComparisonResult.SAME) def test_parameterized_type_audio_sampling_frequency(self): - audio16K = NeuralType(AudioSignal(16000), axes=('B', 'T')) - audio8K = NeuralType(AudioSignal(8000), axes=('B', 'T')) - another16K = NeuralType(AudioSignal(16000), axes=('B', 'T')) + audio16K = NeuralType(axes=('B', 'T'), elements_type=AudioSignal(16000)) + audio8K = NeuralType(axes=('B', 'T'), elements_type=AudioSignal(8000)) + another16K = NeuralType(axes=('B', 'T'), elements_type=AudioSignal(16000)) self.assertEqual(audio8K.compare(audio16K), NeuralTypeComparisonResult.SAME_TYPE_INCOMPATIBLE_PARAMS) self.assertEqual(audio16K.compare(audio8K), NeuralTypeComparisonResult.SAME_TYPE_INCOMPATIBLE_PARAMS) @@ -60,14 +59,14 @@ def test_transpose_same_1(self): self.assertEqual(type2.compare(type1), NeuralTypeComparisonResult.TRANSPOSE_SAME) def test_transpose_same_2(self): - audio16K = NeuralType(AudioSignal(16000), axes=('B', 'T')) - audio16K_t = NeuralType(AudioSignal(16000), axes=('T', 'B')) + audio16K = NeuralType(axes=('B', 'T'), elements_type=AudioSignal(16000)) + audio16K_t = NeuralType(axes=('T', 'B'), elements_type=AudioSignal(16000)) self.assertEqual(audio16K.compare(audio16K_t), NeuralTypeComparisonResult.TRANSPOSE_SAME) def test_inheritance_spec_augment_example(self): - input = NeuralType(SpectrogramType(), ('B', 'D', 'T')) - out1 = NeuralType(MelSpectrogramType(), ('B', 'D', 'T')) - out2 = NeuralType(MFCCSpectrogramType(), ('B', 'D', 'T')) + input = NeuralType(('B', 'D', 'T'), SpectrogramType()) + out1 = NeuralType(('B', 'D', 'T'), MelSpectrogramType()) + out2 = NeuralType(('B', 'D', 'T'), MFCCSpectrogramType()) self.assertEqual(out1.compare(out2), NeuralTypeComparisonResult.INCOMPATIBLE) self.assertEqual(out2.compare(out1), NeuralTypeComparisonResult.INCOMPATIBLE) self.assertEqual(input.compare(out1), NeuralTypeComparisonResult.GREATER) @@ -82,63 +81,51 @@ def test_singletone(self): self.assertEqual(loss_output2.compare(loss_output1), NeuralTypeComparisonResult.SAME) def test_list_of_lists(self): - T1 = NeuralType( - elements_type=ChannelType(), - axes=( - AxisType(kind=AxisKind.Batch, size=None, is_list=True), - AxisType(kind=AxisKind.Time, size=None, is_list=True), - AxisType(kind=AxisKind.Dimension, size=32, is_list=False), - AxisType(kind=AxisKind.Dimension, size=128, is_list=False), - AxisType(kind=AxisKind.Dimension, size=256, is_list=False), - ), - ) - T2 = NeuralType( - elements_type=ChannelType(), - axes=( - AxisType(kind=AxisKind.Batch, size=None, is_list=False), - AxisType(kind=AxisKind.Time, size=None, is_list=False), - AxisType(kind=AxisKind.Dimension, size=32, is_list=False), - AxisType(kind=AxisKind.Dimension, size=128, is_list=False), - AxisType(kind=AxisKind.Dimension, size=256, is_list=False), - ), - ) + T1 = NeuralType(axes=( + AxisType(kind=AxisKind.Batch, size=None, is_list=True), + AxisType(kind=AxisKind.Time, size=None, is_list=True), + AxisType(kind=AxisKind.Dimension, size=32, is_list=False), + AxisType(kind=AxisKind.Dimension, size=128, is_list=False), + AxisType(kind=AxisKind.Dimension, size=256, is_list=False), + ), elements_type=ChannelType()) + T2 = NeuralType(axes=( + AxisType(kind=AxisKind.Batch, size=None, is_list=False), + AxisType(kind=AxisKind.Time, size=None, is_list=False), + AxisType(kind=AxisKind.Dimension, size=32, is_list=False), + AxisType(kind=AxisKind.Dimension, size=128, is_list=False), + AxisType(kind=AxisKind.Dimension, size=256, is_list=False), + ), elements_type=ChannelType()) # TODO: should this be incompatible instead??? self.assertEqual(T1.compare(T2), NeuralTypeComparisonResult.TRANSPOSE_SAME) def test_void(self): - btc_spctr = NeuralType(SpectrogramType(), ('B', 'T', 'C')) - btc_spct_bad = NeuralType(SpectrogramType(), ('B', 'T')) - btc_void = NeuralType(VoidType(), ('B', 'T', 'C')) + btc_spctr = NeuralType(('B', 'T', 'C'), SpectrogramType()) + btc_spct_bad = NeuralType(('B', 'T'), SpectrogramType()) + btc_void = NeuralType(('B', 'T', 'C'), VoidType()) self.assertEqual(btc_void.compare(btc_spctr), NeuralTypeComparisonResult.SAME) self.assertEqual(btc_spctr.compare(btc_void), NeuralTypeComparisonResult.INCOMPATIBLE) self.assertEqual(btc_void.compare(btc_spct_bad), NeuralTypeComparisonResult.INCOMPATIBLE) def test_big_void(self): - big_void_1 = NeuralType(VoidType()) + big_void_1 = NeuralType(elements_type=VoidType()) big_void_2 = NeuralType() - btc_spctr = NeuralType(SpectrogramType(), ('B', 'T', 'C')) - btc_spct_bad = NeuralType(SpectrogramType(), ('B', 'T')) - t1 = NeuralType( - elements_type=ChannelType(), - axes=( - AxisType(kind=AxisKind.Batch, size=None, is_list=True), - AxisType(kind=AxisKind.Time, size=None, is_list=True), - AxisType(kind=AxisKind.Dimension, size=32, is_list=False), - AxisType(kind=AxisKind.Dimension, size=128, is_list=False), - AxisType(kind=AxisKind.Dimension, size=256, is_list=False), - ), - ) - t2 = NeuralType( - elements_type=ChannelType(), - axes=( - AxisType(kind=AxisKind.Batch, size=None, is_list=False), - AxisType(kind=AxisKind.Time, size=None, is_list=False), - AxisType(kind=AxisKind.Dimension, size=32, is_list=False), - AxisType(kind=AxisKind.Dimension, size=128, is_list=False), - AxisType(kind=AxisKind.Dimension, size=256, is_list=False), - ), - ) + btc_spctr = NeuralType(('B', 'T', 'C'), SpectrogramType()) + btc_spct_bad = NeuralType(('B', 'T'), SpectrogramType()) + t1 = NeuralType(axes=( + AxisType(kind=AxisKind.Batch, size=None, is_list=True), + AxisType(kind=AxisKind.Time, size=None, is_list=True), + AxisType(kind=AxisKind.Dimension, size=32, is_list=False), + AxisType(kind=AxisKind.Dimension, size=128, is_list=False), + AxisType(kind=AxisKind.Dimension, size=256, is_list=False), + ), elements_type=ChannelType()) + t2 = NeuralType(axes=( + AxisType(kind=AxisKind.Batch, size=None, is_list=False), + AxisType(kind=AxisKind.Time, size=None, is_list=False), + AxisType(kind=AxisKind.Dimension, size=32, is_list=False), + AxisType(kind=AxisKind.Dimension, size=128, is_list=False), + AxisType(kind=AxisKind.Dimension, size=256, is_list=False), + ), elements_type=ChannelType()) self.assertEqual(big_void_1.compare(btc_spctr), NeuralTypeComparisonResult.SAME) self.assertEqual(big_void_1.compare(btc_spct_bad), NeuralTypeComparisonResult.SAME) @@ -169,10 +156,8 @@ def wrong(): self.assertRaises(NeuralPortNmTensorMismatchError, wrong) def test_unspecified_dimensions(self): - t0 = NeuralType( - SpectrogramType(), - (AxisType(AxisKind.Batch, 64), AxisType(AxisKind.Time, 10), AxisType(AxisKind.Dimension, 128)), - ) - t1 = NeuralType(SpectrogramType(), ('B', 'T', 'C')) + t0 = NeuralType((AxisType(AxisKind.Batch, 64), AxisType(AxisKind.Time, 10), AxisType(AxisKind.Dimension, 128)), + SpectrogramType()) + t1 = NeuralType(('B', 'T', 'C'), SpectrogramType()) self.assertEqual(t1.compare(t0), NeuralTypeComparisonResult.SAME) self.assertEqual(t0.compare(t1), NeuralTypeComparisonResult.DIM_INCOMPATIBLE) From c34aa733954337f398acd6d940868be3e6d26ecc Mon Sep 17 00:00:00 2001 From: Oleksii Kuchaiev Date: Wed, 12 Feb 2020 12:51:20 -0800 Subject: [PATCH 28/30] fix style Signed-off-by: Oleksii Kuchaiev --- .../pytorch/torchvision/data/image_folder.py | 14 ++-- nemo/core/neural_factory.py | 28 ++++--- nemo/core/neural_modules.py | 1 - nemo/core/neural_types/elements.py | 2 + tests/asr/test_zeroDS.py | 3 +- tests/core/test_neural_modules.py | 5 +- tests/core/test_neural_modules_pytorch.py | 8 +- tests/core/test_neural_types.py | 77 +++++++++++-------- 8 files changed, 83 insertions(+), 55 deletions(-) diff --git a/nemo/backends/pytorch/torchvision/data/image_folder.py b/nemo/backends/pytorch/torchvision/data/image_folder.py index b775efb1a8f5..5c4946b5cdd5 100644 --- a/nemo/backends/pytorch/torchvision/data/image_folder.py +++ b/nemo/backends/pytorch/torchvision/data/image_folder.py @@ -27,12 +27,14 @@ def output_ports(self): 0: AxisType(BatchTag) """ return { - "image": NeuralType({ - 0: AxisType(BatchTag), - 1: AxisType(ChannelTag), - 2: AxisType(HeightTag, self._input_size), - 3: AxisType(WidthTag, self._input_size), - }), + "image": NeuralType( + { + 0: AxisType(BatchTag), + 1: AxisType(ChannelTag), + 2: AxisType(HeightTag, self._input_size), + 3: AxisType(WidthTag, self._input_size), + } + ), "label": NeuralType({0: AxisType(BatchTag)}), } diff --git a/nemo/core/neural_factory.py b/nemo/core/neural_factory.py index 7a7a5154ef2c..0692ea46095c 100644 --- a/nemo/core/neural_factory.py +++ b/nemo/core/neural_factory.py @@ -463,12 +463,14 @@ def __get_pytorch_module(self, name, collection, params, pretrained): _nm_name = name.lower() if _nm_name == "resnet18": input_ports = { - "x": NeuralType({ - 0: AxisType(BatchTag), - 1: AxisType(ChannelTag), - 2: AxisType(HeightTag, 224), - 3: AxisType(WidthTag, 224), - }) + "x": NeuralType( + { + 0: AxisType(BatchTag), + 1: AxisType(ChannelTag), + 2: AxisType(HeightTag, 224), + 3: AxisType(WidthTag, 224), + } + ) } output_ports = {"output": NeuralType({0: AxisType(BatchTag), 1: AxisType(ChannelTag)})} @@ -481,12 +483,14 @@ def __get_pytorch_module(self, name, collection, params, pretrained): ) elif _nm_name == "resnet50": input_ports = { - "x": NeuralType({ - 0: AxisType(BatchTag), - 1: AxisType(ChannelTag), - 2: AxisType(HeightTag, 224), - 3: AxisType(WidthTag, 224), - }) + "x": NeuralType( + { + 0: AxisType(BatchTag), + 1: AxisType(ChannelTag), + 2: AxisType(HeightTag, 224), + 3: AxisType(WidthTag, 224), + } + ) } output_ports = {"output": NeuralType({0: AxisType(BatchTag), 1: AxisType(ChannelTag)})} diff --git a/nemo/core/neural_modules.py b/nemo/core/neural_modules.py index 74bf9cb6108d..25e42c7824fa 100644 --- a/nemo/core/neural_modules.py +++ b/nemo/core/neural_modules.py @@ -311,7 +311,6 @@ def __call__(self, **kwargs): return result - def __str__(self): return self.__class__.__name__ diff --git a/nemo/core/neural_types/elements.py b/nemo/core/neural_types/elements.py index 59f818ee2688..5d410b90ebde 100644 --- a/nemo/core/neural_types/elements.py +++ b/nemo/core/neural_types/elements.py @@ -102,6 +102,7 @@ class VoidType(ElementType): It is a good practice to use this type only as necessary. For example, when you need template-like functionality. """ + def compare(cls, second: abc.ABCMeta) -> NeuralTypeComparisonResult: return NeuralTypeComparisonResult.SAME @@ -152,6 +153,7 @@ class AudioSignal(ElementType): freq (int): sampling frequency of a signal. Note that two signals will only be the same if their freq is the same. """ + def __init__(self, freq: int = 16000): self._params = {} self._params['freq'] = freq diff --git a/tests/asr/test_zeroDS.py b/tests/asr/test_zeroDS.py index 6dc9926a597d..a413e1f2e514 100644 --- a/tests/asr/test_zeroDS.py +++ b/tests/asr/test_zeroDS.py @@ -109,7 +109,8 @@ def test_asr_with_zero_ds(self): # "transcript_length": NeuralType({0: AxisType(BatchTag)}), "processed_signal": NeuralType( (AxisType(AxisKind.Batch), AxisType(AxisKind.Dimension, 64), AxisType(AxisKind.Time, 64)), - SpectrogramType()), + SpectrogramType(), + ), "processed_length": NeuralType(tuple('B'), LengthsType()), "transcript": NeuralType((AxisType(AxisKind.Batch), AxisType(AxisKind.Time, 64)), LabelsType()), "transcript_length": NeuralType(tuple('B'), LengthsType()), diff --git a/tests/core/test_neural_modules.py b/tests/core/test_neural_modules.py index 5484285b8e50..04e82c2802bf 100644 --- a/tests/core/test_neural_modules.py +++ b/tests/core/test_neural_modules.py @@ -23,8 +23,9 @@ class NeuralModulesTests(NeMoUnitTest): def test_call_TaylorNet(self): - x_tg = nemo.core.neural_modules.NmTensor(producer=None, producer_args=None, name=None, ntype=NeuralType(( - 'B', 'D'), ChannelType())) + x_tg = nemo.core.neural_modules.NmTensor( + producer=None, producer_args=None, name=None, ntype=NeuralType(('B', 'D'), ChannelType()) + ) tn = nemo.backends.pytorch.tutorials.TaylorNet(dim=4) # note that real port's name: x was used diff --git a/tests/core/test_neural_modules_pytorch.py b/tests/core/test_neural_modules_pytorch.py index 8f43f2d7356f..d0cfbc44c62b 100644 --- a/tests/core/test_neural_modules_pytorch.py +++ b/tests/core/test_neural_modules_pytorch.py @@ -69,8 +69,12 @@ def test_constructor_TaylorNet(self): self.assertEqual(tn.init_params["dim"], 4) def test_call_TaylorNet(self): - x_tg = nemo.core.neural_modules.NmTensor(producer=None, producer_args=None, name=None, ntype=NeuralType( - elements_type=ChannelType(), axes=('B', 'D'))) + x_tg = nemo.core.neural_modules.NmTensor( + producer=None, + producer_args=None, + name=None, + ntype=NeuralType(elements_type=ChannelType(), axes=('B', 'D')), + ) tn = nemo.backends.pytorch.tutorials.TaylorNet(dim=4) # note that real port's name: x was used diff --git a/tests/core/test_neural_types.py b/tests/core/test_neural_types.py index c82740c6a712..e31fd08941d3 100644 --- a/tests/core/test_neural_types.py +++ b/tests/core/test_neural_types.py @@ -37,7 +37,8 @@ class NeuralTypeSystemTests(NeMoUnitTest): def test_short_vs_long_version(self): long_version = NeuralType( axes=(AxisType(AxisKind.Batch, None), AxisType(AxisKind.Dimension, None), AxisType(AxisKind.Time, None)), - elements_type=AcousticEncodedRepresentation()) + elements_type=AcousticEncodedRepresentation(), + ) short_version = NeuralType(('B', 'D', 'T'), AcousticEncodedRepresentation()) self.assertEqual(long_version.compare(short_version), NeuralTypeComparisonResult.SAME) self.assertEqual(short_version.compare(long_version), NeuralTypeComparisonResult.SAME) @@ -81,20 +82,26 @@ def test_singletone(self): self.assertEqual(loss_output2.compare(loss_output1), NeuralTypeComparisonResult.SAME) def test_list_of_lists(self): - T1 = NeuralType(axes=( - AxisType(kind=AxisKind.Batch, size=None, is_list=True), - AxisType(kind=AxisKind.Time, size=None, is_list=True), - AxisType(kind=AxisKind.Dimension, size=32, is_list=False), - AxisType(kind=AxisKind.Dimension, size=128, is_list=False), - AxisType(kind=AxisKind.Dimension, size=256, is_list=False), - ), elements_type=ChannelType()) - T2 = NeuralType(axes=( - AxisType(kind=AxisKind.Batch, size=None, is_list=False), - AxisType(kind=AxisKind.Time, size=None, is_list=False), - AxisType(kind=AxisKind.Dimension, size=32, is_list=False), - AxisType(kind=AxisKind.Dimension, size=128, is_list=False), - AxisType(kind=AxisKind.Dimension, size=256, is_list=False), - ), elements_type=ChannelType()) + T1 = NeuralType( + axes=( + AxisType(kind=AxisKind.Batch, size=None, is_list=True), + AxisType(kind=AxisKind.Time, size=None, is_list=True), + AxisType(kind=AxisKind.Dimension, size=32, is_list=False), + AxisType(kind=AxisKind.Dimension, size=128, is_list=False), + AxisType(kind=AxisKind.Dimension, size=256, is_list=False), + ), + elements_type=ChannelType(), + ) + T2 = NeuralType( + axes=( + AxisType(kind=AxisKind.Batch, size=None, is_list=False), + AxisType(kind=AxisKind.Time, size=None, is_list=False), + AxisType(kind=AxisKind.Dimension, size=32, is_list=False), + AxisType(kind=AxisKind.Dimension, size=128, is_list=False), + AxisType(kind=AxisKind.Dimension, size=256, is_list=False), + ), + elements_type=ChannelType(), + ) # TODO: should this be incompatible instead??? self.assertEqual(T1.compare(T2), NeuralTypeComparisonResult.TRANSPOSE_SAME) @@ -112,20 +119,26 @@ def test_big_void(self): btc_spctr = NeuralType(('B', 'T', 'C'), SpectrogramType()) btc_spct_bad = NeuralType(('B', 'T'), SpectrogramType()) - t1 = NeuralType(axes=( - AxisType(kind=AxisKind.Batch, size=None, is_list=True), - AxisType(kind=AxisKind.Time, size=None, is_list=True), - AxisType(kind=AxisKind.Dimension, size=32, is_list=False), - AxisType(kind=AxisKind.Dimension, size=128, is_list=False), - AxisType(kind=AxisKind.Dimension, size=256, is_list=False), - ), elements_type=ChannelType()) - t2 = NeuralType(axes=( - AxisType(kind=AxisKind.Batch, size=None, is_list=False), - AxisType(kind=AxisKind.Time, size=None, is_list=False), - AxisType(kind=AxisKind.Dimension, size=32, is_list=False), - AxisType(kind=AxisKind.Dimension, size=128, is_list=False), - AxisType(kind=AxisKind.Dimension, size=256, is_list=False), - ), elements_type=ChannelType()) + t1 = NeuralType( + axes=( + AxisType(kind=AxisKind.Batch, size=None, is_list=True), + AxisType(kind=AxisKind.Time, size=None, is_list=True), + AxisType(kind=AxisKind.Dimension, size=32, is_list=False), + AxisType(kind=AxisKind.Dimension, size=128, is_list=False), + AxisType(kind=AxisKind.Dimension, size=256, is_list=False), + ), + elements_type=ChannelType(), + ) + t2 = NeuralType( + axes=( + AxisType(kind=AxisKind.Batch, size=None, is_list=False), + AxisType(kind=AxisKind.Time, size=None, is_list=False), + AxisType(kind=AxisKind.Dimension, size=32, is_list=False), + AxisType(kind=AxisKind.Dimension, size=128, is_list=False), + AxisType(kind=AxisKind.Dimension, size=256, is_list=False), + ), + elements_type=ChannelType(), + ) self.assertEqual(big_void_1.compare(btc_spctr), NeuralTypeComparisonResult.SAME) self.assertEqual(big_void_1.compare(btc_spct_bad), NeuralTypeComparisonResult.SAME) @@ -156,8 +169,10 @@ def wrong(): self.assertRaises(NeuralPortNmTensorMismatchError, wrong) def test_unspecified_dimensions(self): - t0 = NeuralType((AxisType(AxisKind.Batch, 64), AxisType(AxisKind.Time, 10), AxisType(AxisKind.Dimension, 128)), - SpectrogramType()) + t0 = NeuralType( + (AxisType(AxisKind.Batch, 64), AxisType(AxisKind.Time, 10), AxisType(AxisKind.Dimension, 128)), + SpectrogramType(), + ) t1 = NeuralType(('B', 'T', 'C'), SpectrogramType()) self.assertEqual(t1.compare(t0), NeuralTypeComparisonResult.SAME) self.assertEqual(t0.compare(t1), NeuralTypeComparisonResult.DIM_INCOMPATIBLE) From b69269296ee79c292c5de0f0b59b730794f619f2 Mon Sep 17 00:00:00 2001 From: Oleksii Kuchaiev Date: Wed, 12 Feb 2020 13:17:38 -0800 Subject: [PATCH 29/30] fix trade example Signed-off-by: Oleksii Kuchaiev --- .../nlp/nm/data_layers/state_tracking_trade_datalayer.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/nemo/collections/nlp/nm/data_layers/state_tracking_trade_datalayer.py b/nemo/collections/nlp/nm/data_layers/state_tracking_trade_datalayer.py index 8435dc976b8c..2b7e3800928a 100644 --- a/nemo/collections/nlp/nm/data_layers/state_tracking_trade_datalayer.py +++ b/nemo/collections/nlp/nm/data_layers/state_tracking_trade_datalayer.py @@ -76,7 +76,7 @@ def output_ports(self): # "turn_domain": NeuralType(None), "src_ids": NeuralType(('B', 'T'), ChannelType()), "src_lens": NeuralType(tuple('B'), LengthsType()), - "tgt_ids": NeuralType(('B', 'D', 'T'), ChannelType()), + "tgt_ids": NeuralType(('B', 'D', 'T'), LabelsType()), "tgt_lens": NeuralType(('B', 'D'), LengthsType()), "gating_labels": NeuralType(('B', 'D'), LabelsType()), "turn_domain": NeuralType(), From 196a24891e355c52140621f0e03c53fad9e7855d Mon Sep 17 00:00:00 2001 From: Oleksii Kuchaiev Date: Wed, 12 Feb 2020 14:25:13 -0800 Subject: [PATCH 30/30] fix trade example Signed-off-by: Oleksii Kuchaiev --- nemo/collections/nlp/nm/losses/state_tracking_trade_loss.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/nemo/collections/nlp/nm/losses/state_tracking_trade_loss.py b/nemo/collections/nlp/nm/losses/state_tracking_trade_loss.py index ea065494e8ee..aa67439b9262 100644 --- a/nemo/collections/nlp/nm/losses/state_tracking_trade_loss.py +++ b/nemo/collections/nlp/nm/losses/state_tracking_trade_loss.py @@ -74,7 +74,7 @@ def input_ports(self): # "targets": NeuralType({0: AxisType(BatchTag), 1: AxisType(ChannelTag), 2: AxisType(TimeTag)}), # "loss_mask": NeuralType({0: AxisType(BatchTag), 1: AxisType(ChannelTag)}), "logits": NeuralType(('B', 'T', 'D', 'D'), LogitsType()), - "targets": NeuralType(('B', 'D', 'T'), ChannelType()), + "targets": NeuralType(('B', 'D', 'T'), LabelsType()), "loss_mask": NeuralType(('B', 'D'), LengthsType()), }