diff --git a/CHANGELOG.md b/CHANGELOG.md index 2d9b3cf64eab..2eac5e58c3af 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -70,12 +70,16 @@ To release a new version, please update the changelog as followed: ## [Unreleased] ### Added +- New Neural Type System and its tests. +([PR #307](https://github.com/NVIDIA/NeMo/pull/307)) - @okuchaiev - Named tensors tuple module's output for graph construction. ([PR #268](https://github.com/NVIDIA/NeMo/pull/268)) - @stasbel - Introduced the `deprecated` decorator. ([PR #298](https://github.com/NVIDIA/NeMo/pull/298)) - @tkornuta-nvidia ### Changed +- All collections changed to use New Neural Type System. +([PR #307](https://github.com/NVIDIA/NeMo/pull/307)) - @okuchaiev - Additional Collections Repositories merged into core `nemo_toolkit` package. ([PR #289](https://github.com/NVIDIA/NeMo/pull/289)) - @DEKHTIARJonathan - Refactor manifest files parsing and processing for re-using. diff --git a/examples/start_here/chatbot_example.py b/examples/start_here/chatbot_example.py index c5107411525d..ca2950c22bce 100644 --- a/examples/start_here/chatbot_example.py +++ b/examples/start_here/chatbot_example.py @@ -65,10 +65,12 @@ def outputs2words(tensors, vocab): tensors=[loss, src, outputs_inf, tgt], print_func=lambda x: outputs2words(x, dl.voc.index2word), ) +num_epochs = 1 +logging.info(f"Training only for {num_epochs}. Train longer (~10-20) for convergence.") # Start training nf.train( tensors_to_optimize=[loss], callbacks=[callback], optimizer="adam", - optimization_params={"num_epochs": 3, "lr": 0.001}, + optimization_params={"num_epochs": num_epochs, "lr": 0.001}, ) diff --git a/nemo/backends/pytorch/actions.py b/nemo/backends/pytorch/actions.py index f7061318305c..deec27eee087 100644 --- a/nemo/backends/pytorch/actions.py +++ b/nemo/backends/pytorch/actions.py @@ -919,10 +919,10 @@ def __module_export( dynamic_axes = defaultdict(list) def __extract_dynamic_axes(port_name: str, ntype: NeuralType, dynamic_axes: defaultdict): - if ntype.axis2type: - for axis_id, axistype in ntype.axis2type.items(): - if issubclass(axistype.semantics, BatchTag) or issubclass(axistype.semantics, TimeTag): - dynamic_axes[port_name].append(axis_id) + if ntype.axes: + for ind, axis in enumerate(ntype.axes): + if axis.kind == AxisKind.Batch or axis.kind == AxisKind.Time: + dynamic_axes[port_name].append(ind) # This is a hack for Jasper to Jarvis export -- need re-design for this inputs_to_drop = set() diff --git a/nemo/backends/pytorch/common/losses.py b/nemo/backends/pytorch/common/losses.py index f79917720bec..9d14f763e22d 100644 --- a/nemo/backends/pytorch/common/losses.py +++ b/nemo/backends/pytorch/common/losses.py @@ -2,7 +2,7 @@ from torch import nn from nemo.backends.pytorch.nm import LossNM -from nemo.core.neural_types import AxisType, BatchTag, ChannelTag, NeuralType, RegressionTag, TimeTag +from nemo.core.neural_types import LabelsType, LogitsType, LossType, NeuralType, RegressionValuesType __all__ = ['SequenceLoss', 'CrossEntropyLoss', 'MSELoss'] @@ -34,24 +34,8 @@ class SequenceLoss(LossNM): @property def input_ports(self): """Returns definitions of module input ports. - - log_probs: - 0: AxisType(BatchTag) - - 1: AxisType(TimeTag) - - 2: AxisType(ChannelTag) - - targets: - 0: AxisType(BatchTag) - - 1: AxisType(TimeTag) - """ - return { - 'log_probs': NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag), 2: AxisType(ChannelTag),}), - 'targets': NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}), - } + return {'log_probs': NeuralType(axes=('B', 'T', 'D')), 'targets': NeuralType(axes=('B', 'T'))} @property def output_ports(self): @@ -61,7 +45,7 @@ def output_ports(self): NeuralType(None) """ - return {"loss": NeuralType(None)} + return {"loss": NeuralType(elements_type=LossType())} def __init__( self, pad_id=0, smoothing_coef=0.0, sample_wise=False, aux_ctc=False, ctc_initial_coef=0.1, ctc_blank_id=None @@ -121,19 +105,10 @@ class CrossEntropyLoss(LossNM): @property def input_ports(self): """Returns definitions of module input ports. - - logits: - 0: AxisType(BatchTag) - - 1: AxisType(ChannelTag) - - labels: - 0: AxisType(BatchTag) - """ return { - "logits": NeuralType({0: AxisType(BatchTag), 1: AxisType(ChannelTag)}), - "labels": NeuralType({0: AxisType(BatchTag),}), + "logits": NeuralType(axes=('B', 'D'), elements_type=LogitsType()), + "labels": NeuralType(axes=tuple('B'), elements_type=LabelsType()), } @property @@ -143,7 +118,7 @@ def output_ports(self): loss: NeuralType(None) """ - return {"loss": NeuralType(None)} + return {"loss": NeuralType(elements_type=LossType())} def __init__(self, weight=None): super().__init__() @@ -168,8 +143,8 @@ def input_ports(self): 0: AxisType(RegressionTag) """ return { - "preds": NeuralType({0: AxisType(RegressionTag)}), - "labels": NeuralType({0: AxisType(RegressionTag)}), + "preds": NeuralType(tuple('B'), RegressionValuesType()), + "labels": NeuralType(tuple('B'), LabelsType()), } @property @@ -179,7 +154,7 @@ def output_ports(self): loss: NeuralType(None) """ - return {"loss": NeuralType(None)} + return {"loss": NeuralType(elements_type=LossType())} def __init__(self): super().__init__() diff --git a/nemo/backends/pytorch/common/other.py b/nemo/backends/pytorch/common/other.py index 80d43dadae15..c9b9040dd32c 100644 --- a/nemo/backends/pytorch/common/other.py +++ b/nemo/backends/pytorch/common/other.py @@ -1,12 +1,7 @@ # Copyright (c) 2019 NVIDIA Corporation """Core PyTorch-base Neural Modules""" __all__ = [ - 'SimpleCombiner', - 'ArgMaxSimple', - 'TableLookUp', - 'TableLookUp2', 'SequenceEmbedding', - 'SequenceProjection', 'ZerosLikeNM', ] @@ -20,260 +15,20 @@ from nemo.core.neural_types import * -class SimpleCombiner(TrainableNM): - """Performs simple combination of two NmTensors. For example, it can - perform x1 + x2. - - Args: - mode (str): Can be ['add', 'sum', 'max']. - Defaults to 'add'. - - """ - - @property - def input_ports(self): - """Returns definitions of module input ports. - - x1: - Empty?!? - - x2: - Empty?!? - """ - return {"x1": NeuralType({}), "x2": NeuralType({})} - - @property - def output_ports(self): - """Returns definitions of module output ports. - - combined: - None - """ - return {"combined": None} - - def __init__(self, mode="add"): - super().__init__() - self._mode = mode - - def forward(self, x1, x2): - if self._mode == "add" or self._mode == "sum": - return x1 + x2 - elif self._mode == "max": - return torch.max(x1, x2, out=None) - else: - raise NotImplementedError("SimpleCombiner does not have {0} mode".format(self._mode)) - - -class ArgMaxSimple(TrainableNM): # Notice TWO base classes - """ - """ - - @property - def input_ports(self): - """Returns definitions of module input ports. - - x: - 0: AxisType(BatchTag) - - 1: AxisType(ChannelTag) - """ - return {"x": NeuralType({0: AxisType(BatchTag), 1: AxisType(ChannelTag)})} - - @property - def output_ports(self): - """Returns definitions of module output ports. - - values: - 0: AxisType(BatchTag) - - indices: - 0: AxisType(BatchTag) - """ - return { - "values": NeuralType({0: AxisType(BatchTag)}), - "indices": NeuralType({0: AxisType(BatchTag)}), - } - - def __init__(self): - super().__init__() - - # this method is key method you need to overwrite from PyTorch - # nn.Module's API - def forward(self, x): - values, indices = torch.max(x, 1) - return values, indices - - -class TableLookUp(NeuralModule): - """Performs a table lookup. For example, convert class ids to names""" - - def __init__(self, ids2classes=None): - NeuralModule.__init__(self) - - if ids2classes is None: - ids2classes = {} - self._ids2classes = ids2classes - - @property - def input_ports(self): - """Returns definitions of module input ports. - - indices: - 0: AxisType(TimeTag) - - 1: AxisType(BatchTag) - """ - return {"indices": NeuralType({0: AxisType(TimeTag), 1: AxisType(BatchTag)})} - - @property - def output_ports(self): - """Returns definitions of module output ports. - - indices: - 0: AxisType(BatchTag) - 1: AxisType(TimeTag) - """ - return {"indices": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)})} - - def set_weights(self, name2weight: Dict[(str, bool)], name2name_and_transform): - pass - - def tie_weights_with(self, module, weight_names): - pass - - def save_to(self, path): - pass - - def restore_from(self, path): - pass - - def freeze(self, weights: Set[str] = None): - pass - - def unfreeze(self, weights: Set[str] = None): - pass - - def __call__(self, force_pt=False, *input, **kwargs): - pt_call = len(input) > 0 or force_pt - if pt_call: - # [inds] = kwargs.values() - # np_inds = inds.detach().cpu().numpy().reshape(-1) - # result = [self._ids2classes[i] for i in np_inds] - # #result = list(map(lambda x: self._ids2classes[x], np_inds)) - # return result - inds = kwargs["indices"] - np_inds = inds.detach().transpose_(1, 0).cpu().numpy().tolist() - result = [] - for lst in np_inds: - sublst = [] - for tid in lst: - if tid != 1: - sublst.append(tid) - else: - break - result.append(list(map(lambda x: self._ids2classes[x], sublst))) - return [result] - else: - return NeuralModule.__call__(self, **kwargs) - - def parameters(self): - return None - - def get_weights(self) -> Iterable[Optional[Mapping]]: - return None - - -class TableLookUp2(NeuralModule): - """Performs a table lookup. For example, convert class ids to names""" - - def set_weights(self, name2weight: Dict[(str, bool)], name2name_and_transform): - pass - - def tie_weights_with(self, module, weight_names): - pass - - def save_to(self, path): - pass - - def restore_from(self, path): - pass - - def freeze(self, weights: Set[str] = None): - pass - - def unfreeze(self, weights: Set[str] = None): - pass - - @property - def input_ports(self): - """Returns definitions of module input ports. - - """ - return {} - - @property - def output_ports(self): - """Returns definitions of module output ports. - - classes: - None - """ - return {"classes": None} - - def __init__(self, detokenizer=None): - NeuralModule.__init__(self) - self._detokenizer = detokenizer - - def __call__(self, force_pt=False, *input, **kwargs): - pt_call = len(input) > 0 or force_pt - if pt_call: - # [inds] = kwargs.values() - inds = kwargs["indices"] - np_inds = inds.detach().cpu().numpy().tolist() - result = [] - for lst in np_inds: - sublst = [] - for tid in lst: - if tid != 1: - sublst.append(tid) - else: - break - result.append(self._detokenizer(sublst)) - return result - else: - return NeuralModule.__call__(self, **kwargs) - - def parameters(self): - return None - - def get_weights(self) -> Iterable[Optional[Mapping]]: - return None - - class SequenceEmbedding(TrainableNM): @property def input_ports(self): """Returns definitions of module input ports. - - input_seq: - 0: AxisType(TimeTag) - - 1: AxisType(BatchTag) """ - return {"input_seq": NeuralType({0: AxisType(TimeTag), 1: AxisType(BatchTag)})} + # return {"input_seq": NeuralType({0: AxisType(TimeTag), 1: AxisType(BatchTag)})} + return {"input_seq": NeuralModule(ChannelType(), ('T', 'B'))} @property def output_ports(self): """Returns definitions of module output ports. - - outputs: - 0: AxisType(TimeTag) - - 1: AxisType(BatchTag) - - 2: AxisType(ChannelTag) """ - return {"outputs": NeuralType({0: AxisType(TimeTag), 1: AxisType(BatchTag), 2: AxisType(ChannelTag),})} + # return {"outputs": NeuralType({0: AxisType(TimeTag), 1: AxisType(BatchTag), 2: AxisType(ChannelTag),})} + return {"outputs": NeuralType(('T', 'B', 'D'), ChannelType())} def __init__(self, voc_size, hidden_size, dropout=0.0): super().__init__() @@ -292,64 +47,20 @@ def forward(self, input_seq): return embedded -class SequenceProjection(TrainableNM): - @property - def input_ports(self): - """Returns definitions of module input ports. - - input_seq: - Empty Type?!? - """ - return {"input_seq": NeuralType({})} - - @property - def output_ports(self): - """Returns definitions of module output ports. - - outputs: - None - """ - return {"outputs": None} - - def __init__(self, from_dim, to_dim, dropout=0.0): - super().__init__() - - self.from_dim = from_dim - self.to_dim = to_dim - self.dropout = dropout - self.projection = nn.Linear(self.from_dim, self.to_dim, bias=False) - if self.dropout != 0.0: - self.embedding_dropout = nn.Dropout(self.dropout) - - def forward(self, input_seq): - p = self.projection(input_seq) - if self.dropout != 0.0: - p = self.dropout(p) - return p - - class ZerosLikeNM(TrainableNM): @property def input_ports(self): """Returns definitions of module input ports. - - input_type_ids: - 0: AxisType(BatchTag) - - 1: AxisType(TimeTag) """ - return {"input_type_ids": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag),})} + # return {"input_type_ids": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag),})} + return {"input_type_ids": NeuralType(('B', 'T'), VoidType())} @property def output_ports(self): """Returns definitions of module output ports. - - input_type_ids: - 0: AxisType(BatchTag) - - 1: AxisType(TimeTag) """ - return {"input_type_ids": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag),})} + # return {"input_type_ids": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag),})} + return {"input_type_ids": NeuralType(('B', 'T'), ChannelType())} def __init__(self): super().__init__() diff --git a/nemo/backends/pytorch/common/rnn.py b/nemo/backends/pytorch/common/rnn.py index be541177c8d9..fbf7dbb7eb97 100644 --- a/nemo/backends/pytorch/common/rnn.py +++ b/nemo/backends/pytorch/common/rnn.py @@ -22,7 +22,7 @@ from nemo.backends.pytorch.common.parts import Attention from nemo.backends.pytorch.nm import TrainableNM -from nemo.core.neural_types import AxisType, BatchTag, ChannelTag, NeuralType, TimeTag +from nemo.core import * from nemo.utils.misc import pad_to __all__ = ['DecoderRNN', 'EncoderRNN'] @@ -67,49 +67,27 @@ class DecoderRNN(TrainableNM): @property def input_ports(self): """Returns definitions of module input ports. - - targets: - 0: AxisType(BatchTag) - - 1: AxisType(TimeTag) - - encoder_outputs: - 0: AxisType(BatchTag) - - 1: AxisType(TimeTag) - - 2: AxisType(ChannelTag) """ return { - 'targets': NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}), - 'encoder_outputs': NeuralType( - {0: AxisType(BatchTag), 1: AxisType(TimeTag), 2: AxisType(ChannelTag),}, optional=True, - ), + # 'targets': NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}), + 'targets': NeuralType(('B', 'T'), LabelsType()), + # 'encoder_outputs': NeuralType( + # {0: AxisType(BatchTag), 1: AxisType(TimeTag), 2: AxisType(ChannelTag),}, optional=True, + # ), + 'encoder_outputs': NeuralType(('B', 'T', 'D'), ChannelType(), True), } @property def output_ports(self): """Returns definitions of module output ports. - - log_probs: - 0: AxisType(BatchTag) - - 1: AxisType(TimeTag) - - 2: AxisType(ChannelTag) - - attention_weights: - 0: AxisType(BatchTag) - - 1: AxisType(TimeTag) - - 2: AxisType(TimeTag) """ return { - 'log_probs': NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag), 2: AxisType(ChannelTag),}), - 'attention_weights': NeuralType( - {0: AxisType(BatchTag), 1: AxisType(TimeTag), 2: AxisType(TimeTag),}, optional=True, - ), + # 'log_probs': NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag), 2: AxisType(ChannelTag),}), + 'log_probs': NeuralType(('B', 'T', 'D'), LogprobsType()), + # 'attention_weights': NeuralType( + # {0: AxisType(BatchTag), 1: AxisType(TimeTag), 2: AxisType(TimeTag),}, optional=True, + # ), + 'attention_weights': NeuralType(('B', 'T', 'T'), ChannelType(), True), } def __init__( @@ -227,45 +205,23 @@ class EncoderRNN(TrainableNM): @property def input_ports(self): """Returns definitions of module input ports. - - targets: - 0: AxisType(BatchTag) - - 1: AxisType(TimeTag) - - encoder_outputs: - 0: AxisType(BatchTag) - - 1: AxisType(TimeTag) - - 2: AxisType(ChannelTag) """ return { - 'inputs': NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}), - 'input_lens': NeuralType({0: AxisType(BatchTag),}, optional=True), + # 'inputs': NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}), + # 'input_lens': NeuralType({0: AxisType(BatchTag),}, optional=True), + 'inputs': NeuralType(('B', 'T'), ChannelType()), + 'input_lens': NeuralType(tuple('B'), LengthsType()), } @property def output_ports(self): """Returns definitions of module output ports. - - log_probs: - 0: AxisType(BatchTag) - - 1: AxisType(TimeTag) - - 2: AxisType(ChannelTag) - - attention_weights: - 0: AxisType(BatchTag) - - 1: AxisType(TimeTag) - - 2: AxisType(TimeTag) """ return { - 'outputs': NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag), 2: AxisType(ChannelTag)}), - 'hidden': NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag), 2: AxisType(ChannelTag)}), + # 'outputs': NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag), 2: AxisType(ChannelTag)}), + # 'hidden': NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag), 2: AxisType(ChannelTag)}), + 'outputs': NeuralType(('B', 'T', 'D'), ChannelType()), + 'hidden': NeuralType(('B', 'T', 'D'), ChannelType()), } def __init__( diff --git a/nemo/backends/pytorch/common/search.py b/nemo/backends/pytorch/common/search.py index 350fdb3dff5c..acaf32213016 100644 --- a/nemo/backends/pytorch/common/search.py +++ b/nemo/backends/pytorch/common/search.py @@ -3,7 +3,7 @@ import torch from nemo.backends.pytorch.nm import NonTrainableNM -from nemo.core.neural_types import AxisType, BatchTag, ChannelTag, NeuralType, TimeTag +from nemo.core.neural_types import ChannelType, NeuralType INF = float('inf') BIG_NUM = 1e4 @@ -31,39 +31,24 @@ class GreedySearch(NonTrainableNM): @property def input_ports(self): """Returns definitions of module input ports. - - encoder_outputs: - 0: AxisType(BatchTag) - - 1: AxisType(TimeTag) - - 2: AxisType(ChannelTag) """ return { - 'encoder_outputs': NeuralType( - {0: AxisType(BatchTag), 1: AxisType(TimeTag), 2: AxisType(ChannelTag),}, optional=True, - ) + # 'encoder_outputs': NeuralType( + # {0: AxisType(BatchTag), 1: AxisType(TimeTag), 2: AxisType(ChannelTag),}, optional=True, + # ) + "encoder_outputs": NeuralType(('B', 'T', 'D'), ChannelType(), optional=True) } @property def output_ports(self): """Returns definitions of module output ports. - predictions: - 0: AxisType(BatchTag) - - 1: AxisType(TimeTag) - - attention_weights: - 0: AxisType(BatchTag) - - 1: AxisType(TimeTag) - - 2: AxisType(TimeTag) """ return { - 'predictions': NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}), - 'attention_weights': NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag), 2: AxisType(TimeTag),}), + # 'predictions': NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}), + # 'attention_weights': NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag), 2: AxisType(TimeTag),}), + "predictions": NeuralType(('B', 'T'), ChannelType()), + "attention_weights": NeuralType(('B', 'T', 'T'), ChannelType()), } def __init__(self, decoder, pad_id, bos_id, eos_id, max_len, batch_size=None): diff --git a/nemo/backends/pytorch/common/zero_data.py b/nemo/backends/pytorch/common/zero_data.py index 0c7b14fe1a11..18f366c46140 100644 --- a/nemo/backends/pytorch/common/zero_data.py +++ b/nemo/backends/pytorch/common/zero_data.py @@ -18,11 +18,11 @@ def neuralType2TensorShape(neural_type: NeuralType, default_dim=32, skip_batch_a torch.Size """ dims = [] - for axis_ind, axis_type in neural_type.axis2type.items(): - if axis_type._semantics == BatchTag and skip_batch_axis: + for axis in neural_type.axes: + if axis.kind == AxisKind.Batch and skip_batch_axis: continue - if axis_type.dim is not None: - dims.append(axis_type.dim) + if axis.size is not None: + dims.append(axis.size) else: dims.append(default_dim) return torch.Size(dims) diff --git a/nemo/backends/pytorch/tutorials/chatbot/data.py b/nemo/backends/pytorch/tutorials/chatbot/data.py index 0f0b2609fb32..e0f46289fbfc 100644 --- a/nemo/backends/pytorch/tutorials/chatbot/data.py +++ b/nemo/backends/pytorch/tutorials/chatbot/data.py @@ -215,7 +215,7 @@ def outputVar(l, voc): max_target_len = max([len(indexes) for indexes in indexes_batch]) padList = zeroPadding(indexes_batch) mask = binaryMatrix(padList) - mask = t.ByteTensor(mask) + mask = t.ByteTensor(mask).to(t.bool) padVar = t.LongTensor(padList) return padVar, mask, max_target_len diff --git a/nemo/backends/pytorch/tutorials/chatbot/modules.py b/nemo/backends/pytorch/tutorials/chatbot/modules.py index 33fec674ba02..14d704b4d4fc 100644 --- a/nemo/backends/pytorch/tutorials/chatbot/modules.py +++ b/nemo/backends/pytorch/tutorials/chatbot/modules.py @@ -20,34 +20,13 @@ class DialogDataLayer(DataLayerNM): @property def output_ports(self): """Returns definitions of module output ports. - - src: - 0: AxisType(TimeTag) - - 1: AxisType(BatchTag) - - src_lengths: - 0: AxisType(BatchTag) - - tgt: - 0: AxisType(TimeTag) - - 1: AxisType(BatchTag) - - mask: - 0: AxisType(TimeTag) - - 1: AxisType(BatchTag) - - max_tgt_lengths: - None """ return { - "src": NeuralType({0: AxisType(TimeTag), 1: AxisType(BatchTag)}), - "src_lengths": NeuralType({0: AxisType(BatchTag)}), - "tgt": NeuralType({0: AxisType(TimeTag), 1: AxisType(BatchTag)}), - "mask": NeuralType({0: AxisType(TimeTag), 1: AxisType(BatchTag)}), - "max_tgt_lengths": NeuralType(None), + "src": NeuralType(('T', 'B'), ChannelType()), + "src_lengths": NeuralType(tuple('B'), LengthsType()), + "tgt": NeuralType(('T', 'B'), LabelsType()), + "mask": NeuralType(('T', 'B'), ChannelType()), + "max_tgt_lengths": NeuralType(axes=None), } def __init__(self, batch_size, corpus_name, datafile, min_count=3): @@ -94,39 +73,19 @@ class EncoderRNN(TrainableNM): @property def input_ports(self): """Returns definitions of module input ports. - - input_seq: - 0: AxisType(TimeTag) - - 1: AxisType(BatchTag) - - input_lengths: - 0: AxisType(BatchTag) """ return { - "input_seq": NeuralType({0: AxisType(TimeTag), 1: AxisType(BatchTag)}), - "input_lengths": NeuralType({0: AxisType(BatchTag)}), + "input_seq": NeuralType(('T', 'B'), ChannelType()), + "input_lengths": NeuralType(tuple('B'), LengthsType()), } @property def output_ports(self): """Returns definitions of module output ports. - - outputs: - 0: AxisType(TimeTag) - - 1: AxisType(BatchTag) - - 2: AxisType(ChannelTag) - - hidden: - 0: AxisType(BatchTag) - - 1: AxisType(ChannelTag) """ return { - "outputs": NeuralType({0: AxisType(TimeTag), 1: AxisType(BatchTag), 2: AxisType(ChannelTag),}), - "hidden": NeuralType({0: AxisType(BatchTag), 1: AxisType(ChannelTag)}), + "outputs": NeuralType(('T', 'B', 'D'), ChannelType()), + "hidden": NeuralType(('B', 'D'), ChannelType()), } def __init__(self, voc_size, encoder_n_layers, hidden_size, dropout, bidirectional=True): @@ -174,26 +133,11 @@ class LuongAttnDecoderRNN(TrainableNM): @property def input_ports(self): """Returns definitions of module input ports. - - targets: - 0: AxisType(TimeTag) - - 1: AxisType(BatchTag) - - encoder_outputs: - 0: AxisType(TimeTag) - - 1: AxisType(BatchTag) - - 2: AxisType(ChannelTag) - - max_target_len: - None """ return { - "targets": NeuralType({0: AxisType(TimeTag), 1: AxisType(BatchTag)}), - "encoder_outputs": NeuralType({0: AxisType(TimeTag), 1: AxisType(BatchTag), 2: AxisType(ChannelTag),}), - "max_target_len": NeuralType(None), + "targets": NeuralType(('T', 'B'), LabelsType()), + "encoder_outputs": NeuralType(('T', 'B', 'D'), ChannelType()), + "max_target_len": NeuralType(axes=None), } @property @@ -213,8 +157,8 @@ def output_ports(self): 1: AxisType(ChannelTag) """ return { - "outputs": NeuralType({0: AxisType(TimeTag), 1: AxisType(BatchTag), 2: AxisType(ChannelTag),}), - "hidden": NeuralType({0: AxisType(BatchTag), 1: AxisType(ChannelTag)}), + "outputs": NeuralType(('T', 'B', 'D'), ChannelType()), + "hidden": NeuralType(('B', 'D'), ChannelType()), } def __init__(self, attn_model, hidden_size, voc_size, decoder_n_layers, dropout): @@ -327,28 +271,11 @@ class MaskedXEntropyLoss(LossNM): @property def input_ports(self): """Returns definitions of module input ports. - - predictions - 0: AxisType(TimeTag) - - 1: AxisType(BatchTag) - - 2: AxisType(ChannelTag)} - - target: - 0: AxisType(TimeTag) - - 1: AxisType(BatchTag) - - mask: - 0: AxisType(TimeTag) - - 1: AxisType(BatchTag) """ return { - "predictions": NeuralType({0: AxisType(TimeTag), 1: AxisType(BatchTag), 2: AxisType(ChannelTag),}), - "target": NeuralType({0: AxisType(TimeTag), 1: AxisType(BatchTag)}), - "mask": NeuralType({0: AxisType(TimeTag), 1: AxisType(BatchTag)}), + "predictions": NeuralType(('T', 'B', 'D'), ChannelType()), + "target": NeuralType(('T', 'B'), LabelsType()), + "mask": NeuralType(('T', 'B'), ChannelType()), } @property @@ -358,7 +285,7 @@ def output_ports(self): loss: NeuralType(None) """ - return {"loss": NeuralType(None)} + return {"loss": NeuralType(axes=None, elements_type=LossType())} def __init__(self): super().__init__() @@ -381,39 +308,16 @@ class GreedyLuongAttnDecoderRNN(TrainableNM): @property def input_ports(self): """Returns definitions of module input ports. - - encoder_outputs: - 0: AxisType(TimeTag) - - 1: AxisType(BatchTag) - - 2: AxisType(ChannelTag) """ - return {"encoder_outputs": NeuralType({0: AxisType(TimeTag), 1: AxisType(BatchTag), 2: AxisType(ChannelTag),})} + return {"encoder_outputs": NeuralType(('T', 'B', 'D'), ChannelType())} @property def output_ports(self): """Returns definitions of module output ports. - - outputs: - 0: AxisType(TimeTag) - - 1: AxisType(BatchTag) - - hidden: - 0: AxisType(BatchTag) - - 1: AxisType(ChannelTag) """ return { - "outputs": NeuralType( - { - 0: AxisType(TimeTag), - 1: AxisType(BatchTag), - # 2: AxisType(ChannelTag) - } - ), - "hidden": NeuralType({0: AxisType(BatchTag), 1: AxisType(ChannelTag)}), + "outputs": NeuralType(('T', 'B'), ChannelType()), + "hidden": NeuralType(('B', 'D'), ChannelType()), } def __init__(self, attn_model, hidden_size, voc_size, decoder_n_layers, dropout, max_dec_steps=10): diff --git a/nemo/backends/pytorch/tutorials/toys.py b/nemo/backends/pytorch/tutorials/toys.py index cf43c475543e..442c841ee836 100644 --- a/nemo/backends/pytorch/tutorials/toys.py +++ b/nemo/backends/pytorch/tutorials/toys.py @@ -21,7 +21,7 @@ def input_ports(self): Returns: A (dict) of module's input ports names to NeuralTypes mapping """ - return {"x": NeuralType({0: AxisType(BatchTag), 1: AxisType(ChannelTag)})} + return {"x": NeuralType(('B', 'D'), ChannelType())} @property def output_ports(self): @@ -30,7 +30,7 @@ def output_ports(self): Returns: A (dict) of module's output ports names to NeuralTypes mapping """ - return {"y_pred": NeuralType({0: AxisType(BatchTag), 1: AxisType(ChannelTag)})} + return {"y_pred": NeuralType(('B', 'D'), ChannelType())} def __init__(self, dim): # Part specific for Neural Modules API: @@ -61,31 +61,17 @@ class TaylorNetO(TrainableNM): # Note inheritance from TrainableNM def input_ports(self): """Returns definitions of module input ports. - x: - 0: AxisType(BatchTag) - - 1: AxisType(ChannelTag) - - o: - 0: AxisType(BatchTag) - - 1: AxisType(ChannelTag) """ return { - "x": NeuralType({0: AxisType(BatchTag), 1: AxisType(ChannelTag)}), - "o": NeuralType({0: AxisType(BatchTag), 1: AxisType(ChannelTag)}), + "x": NeuralType(('B', 'D'), ChannelType()), + "o": NeuralType(('B', 'D'), ChannelType()), } @property def output_ports(self): """Returns definitions of module output ports. - - y_pred: - 0: AxisType(BatchTag) - - 1: AxisType(ChannelTag) """ - return {"y_pred": NeuralType({0: AxisType(BatchTag), 1: AxisType(ChannelTag)}, optional=True)} + return {"y_pred": NeuralType(('B', 'D'), ChannelType(), optional=True)} def __init__(self, dim): # Part specific for Neural Modules API: @@ -135,20 +121,10 @@ def __len__(self): @property def output_ports(self): """Returns definitions of module output ports - - x: - 0: AxisType(BatchTag) - - 1: AxisType(ChannelTag) - - y: - 0: AxisType(BatchTag) - - 1: AxisType(ChannelTag) """ return { - "x": NeuralType({0: AxisType(BatchTag), 1: AxisType(ChannelTag)}), - "y": NeuralType({0: AxisType(BatchTag), 1: AxisType(ChannelTag)}), + "x": NeuralType(('B', 'D'), ChannelType()), + "y": NeuralType(('B', 'D'), LabelsType()), } def __init__(self, batch_size, f_name="sin", n=1000, x_lo=-4, x_hi=4): @@ -206,18 +182,15 @@ def input_ports(self): 1: AxisType(ChannelTag) """ return { - "predictions": NeuralType({0: AxisType(BatchTag), 1: AxisType(ChannelTag)}), - "target": NeuralType({0: AxisType(BatchTag), 1: AxisType(ChannelTag)}), + "predictions": NeuralType(('B', 'D'), ChannelType()), + "target": NeuralType(('B', 'D'), LabelsType()), } @property def output_ports(self): """Returns definitions of module output ports. - - loss: - NeuralType(None) """ - return {"loss": NeuralType(None)} + return {"loss": NeuralType(elements_type=LossType())} def __init__(self): super().__init__() @@ -231,30 +204,17 @@ class L1Loss(LossNM): @property def input_ports(self): """Returns definitions of module input ports. - - predictions: - 0: AxisType(BatchTag) - - 1: AxisType(ChannelTag) - - target: - 0: AxisType(BatchTag) - - 1: AxisType(ChannelTag) """ return { - "predictions": NeuralType({0: AxisType(BatchTag), 1: AxisType(ChannelTag)}), - "target": NeuralType({0: AxisType(BatchTag), 1: AxisType(ChannelTag)}), + "predictions": NeuralType(('B', 'D'), ChannelType()), + "target": NeuralType(('B', 'D'), LabelsType()), } @property def output_ports(self): """Returns definitions of module output ports. - - loss: - NeuralType(None) """ - return {"loss": NeuralType(None)} + return {"loss": NeuralType(elements_type=LossType())} def __init__(self): super().__init__() @@ -268,18 +228,10 @@ class CrossEntropyLoss(LossNM): @property def input_ports(self): """Returns definitions of module input ports. - - predictions: - 0: AxisType(BatchTag) - - 1: AxisType(ChannelTag) - - labels: - 0: AxisType(BatchTag) """ return { - "predictions": NeuralType({0: AxisType(BatchTag), 1: AxisType(ChannelTag)}), - "labels": NeuralType({0: AxisType(BatchTag)}), + "predictions": NeuralType(('B', 'D'), ChannelType()), + "labels": NeuralType(tuple('B'), LabelsType()), } @property @@ -289,7 +241,7 @@ def output_ports(self): loss: NeuralType(None) """ - return {"loss": NeuralType(None)} + return {"loss": NeuralType(elements_type=LossType())} def __init__(self): # Neural Module API specific @@ -300,67 +252,3 @@ def __init__(self): # You need to implement this function def _loss_function(self, **kwargs): return self._criterion(*(kwargs.values())) - - -class DopeDualLoss(LossNM): - """ - The dual loss function that DOPE uses - """ - - @property - def input_ports(self): - """Returns definitions of module input ports. - - belief_predictions: - 0: AxisType(BatchTag) - - 1: AxisType(ChannelTag) - - belief_labels: - 0: AxisType(BatchTag) - - affinity_predictions: - 0: AxisType(BatchTag) - - 1: AxisType(ChannelTag) - - affinity_labels: - 0: AxisType(BatchTag) - """ - return { - "belief_predictions": NeuralType({0: AxisType(BatchTag), 1: AxisType(ChannelTag)}), - "belief_labels": NeuralType({0: AxisType(BatchTag)}), - "affinity_predictions": NeuralType({0: AxisType(BatchTag), 1: AxisType(ChannelTag)}), - "affinity_labels": NeuralType({0: AxisType(BatchTag)}), - } - - @property - def output_ports(self): - """Returns definitions of module output ports. - - loss: - NeuralType(None) - """ - return {"loss": NeuralType(None)} - - def __init__(self): - # Neural Module API specific - NeuralModule.__init__(self) - - # You need to implement this function - def _loss_function(self, **kwargs): - loss = 0.0 - - # Belief maps loss - # output, each belief map layers. - for l in kwargs["belief_predictions"]: - loss_tmp = ((l - kwargs["belief_labels"]) * (l - kwargs["belief_labels"])).mean() - loss += loss_tmp - - # Affinities loss - # output, each belief map layers. - for l in kwargs["affinity_predictions"]: - loss_tmp = ((l - kwargs["affinity_labels"]) * (l - kwargs["affinity_labels"])).mean() - loss += loss_tmp - - return loss diff --git a/nemo/collections/asr/audio_preprocessing.py b/nemo/collections/asr/audio_preprocessing.py index 94476839a1f3..945f4383caac 100644 --- a/nemo/collections/asr/audio_preprocessing.py +++ b/nemo/collections/asr/audio_preprocessing.py @@ -1,16 +1,17 @@ -# Copyright (C) NVIDIA CORPORATION. All Rights Reserved. +# Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # -# http://www.apache.org/licenses/LICENSE-2.0 +# http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and -# limitations under the License.**** +# limitations under the License. +# ============================================================================= """ This file contains neural modules responsible for preprocessing audio data. """ @@ -120,43 +121,25 @@ class AudioToSpectrogramPreprocessor(AudioPreprocessor): @property def input_ports(self): """Returns definitions of module input ports. - - input_signal: - 0: AxisType(BatchTag) - - 1: AxisType(TimeTag) - - length: - 0: AxisType(BatchTag) - """ return { - "input_signal": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}), - "length": NeuralType({0: AxisType(BatchTag)}), + # "input_signal": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}), + # "length": NeuralType({0: AxisType(BatchTag)}), + "input_signal": NeuralType(('B', 'T'), AudioSignal(freq=self._sample_rate)), + "length": NeuralType(tuple('B'), LengthsType()), } @property def output_ports(self): """Returns definitions of module output ports. - - processed_signal: - - 0: AxisType(BatchTag) - - 1: AxisType(SpectrogramSignalTag) - - 2: AxisType(ProcessedTimeTag) - - processed_length: - - 0: AxisType(BatchTag) - """ return { - "processed_signal": NeuralType( - {0: AxisType(BatchTag), 1: AxisType(SpectrogramSignalTag), 2: AxisType(ProcessedTimeTag),} - ), - "processed_length": NeuralType({0: AxisType(BatchTag)}), + # "processed_signal": NeuralType( + # {0: AxisType(BatchTag), 1: AxisType(SpectrogramSignalTag), 2: AxisType(ProcessedTimeTag),} + # ), + # "processed_length": NeuralType({0: AxisType(BatchTag)}), + "processed_signal": NeuralType(('B', 'D', 'T'), SpectrogramType()), + "processed_length": NeuralType(tuple('B'), LengthsType()), } def __init__( @@ -170,6 +153,7 @@ def __init__( window="hann", normalized=True, ): + self._sample_rate = sample_rate if not HAVE_TORCHAUDIO: raise ModuleNotFoundError( "torchaudio is not installed but is necessary for " @@ -183,9 +167,9 @@ def __init__( f"{self} received both window_stride and " f"n_window_stride. Only one should be specified." ) if window_size: - n_window_size = int(window_size * sample_rate) + n_window_size = int(window_size * self._sample_rate) if window_stride: - n_window_stride = int(window_stride * sample_rate) + n_window_stride = int(window_stride * self._sample_rate) super().__init__(n_window_size, n_window_stride) @@ -216,6 +200,10 @@ def __init__( def get_features(self, input_signal, length): return self.featurizer(input_signal) + @property + def sample_rate(self): + return self._sample_rate + class AudioToMelSpectrogramPreprocessor(AudioPreprocessor): """Featurizer that converts wavs to mel spectrograms. @@ -283,19 +271,12 @@ class AudioToMelSpectrogramPreprocessor(AudioPreprocessor): @property def input_ports(self): """Returns definitions of module input ports. - - input_signal: - 0: AxisType(BatchTag) - - 1: AxisType(TimeTag) - - length: - 0: AxisType(BatchTag) - """ return { - "input_signal": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}), - "length": NeuralType({0: AxisType(BatchTag)}), + # "input_signal": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}), + # "length": NeuralType({0: AxisType(BatchTag)}), + "input_signal": NeuralType(('B', 'T'), AudioSignal(freq=self._sample_rate)), + "length": NeuralType(tuple('B'), LengthsType()), } @property @@ -316,10 +297,12 @@ def output_ports(self): """ return { - "processed_signal": NeuralType( - {0: AxisType(BatchTag), 1: AxisType(MelSpectrogramSignalTag), 2: AxisType(ProcessedTimeTag),} - ), - "processed_length": NeuralType({0: AxisType(BatchTag)}), + # "processed_signal": NeuralType( + # {0: AxisType(BatchTag), 1: AxisType(MelSpectrogramSignalTag), 2: AxisType(ProcessedTimeTag),} + # ), + # "processed_length": NeuralType({0: AxisType(BatchTag)}), + "processed_signal": NeuralType(('B', 'D', 'T'), MelSpectrogramType()), + "processed_length": NeuralType(tuple('B'), LengthsType()), } def __init__( @@ -346,6 +329,7 @@ def __init__( pad_value=0, mag_power=2.0, ): + self._sample_rate = sample_rate if window_size and n_window_size: raise ValueError(f"{self} received both window_size and " f"n_window_size. Only one should be specified.") if window_stride and n_window_stride: @@ -353,14 +337,14 @@ def __init__( f"{self} received both window_stride and " f"n_window_stride. Only one should be specified." ) if window_size: - n_window_size = int(window_size * sample_rate) + n_window_size = int(window_size * self._sample_rate) if window_stride: - n_window_stride = int(window_stride * sample_rate) + n_window_stride = int(window_stride * self._sample_rate) super().__init__(n_window_size, n_window_stride) self.featurizer = FilterbankFeatures( - sample_rate=sample_rate, + sample_rate=self._sample_rate, n_window_size=n_window_size, n_window_stride=n_window_stride, window=window, @@ -433,43 +417,25 @@ class AudioToMFCCPreprocessor(AudioPreprocessor): @property def input_ports(self): """Returns definitions of module input ports. - - input_signal: - 0: AxisType(BatchTag) - - 1: AxisType(TimeTag) - - length: - 0: AxisType(BatchTag) - """ return { - "input_signal": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}), - "length": NeuralType({0: AxisType(BatchTag)}), + # "input_signal": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}), + # "length": NeuralType({0: AxisType(BatchTag)}), + "input_signal": NeuralType(('B', 'T'), AudioSignal(freq=self._sample_rate)), + "length": NeuralType(tuple('B'), LengthsType()), } @property def output_ports(self): """Returns definitions of module output ports. - - processed_signal: - - 0: AxisType(BatchTag) - - 1: AxisType(MFCCSignalTag) - - 2: AxisType(ProcessedTimeTag) - - processed_length: - - 0: AxisType(BatchTag) - """ return { - "processed_signal": NeuralType( - {0: AxisType(BatchTag), 1: AxisType(MFCCSignalTag), 2: AxisType(ProcessedTimeTag),} - ), - "processed_length": NeuralType({0: AxisType(BatchTag)}), + # "processed_signal": NeuralType( + # {0: AxisType(BatchTag), 1: AxisType(MFCCSignalTag), 2: AxisType(ProcessedTimeTag),} + # ), + # "processed_length": NeuralType({0: AxisType(BatchTag)}), + "processed_signal": NeuralType(('B', 'D', 'T'), MFCCSpectrogramType()), + "processed_length": NeuralType(tuple('B'), LengthsType()), } def __init__( @@ -489,6 +455,7 @@ def __init__( norm='ortho', log=True, ): + self._sample_rate = sample_rate if not HAVE_TORCHAUDIO: raise ModuleNotFoundError( "torchaudio is not installed but is necessary for " @@ -503,9 +470,9 @@ def __init__( ) # Get win_length (n_window_size) and hop_length (n_window_stride) if window_size: - n_window_size = int(window_size * sample_rate) + n_window_size = int(window_size * self._sample_rate) if window_stride: - n_window_stride = int(window_stride * sample_rate) + n_window_stride = int(window_stride * self._sample_rate) super().__init__(n_window_size, n_window_stride) @@ -531,7 +498,12 @@ def __init__( # Use torchaudio's implementation of MFCCs as featurizer self.featurizer = torchaudio.transforms.MFCC( - sample_rate=sample_rate, n_mfcc=n_mfcc, dct_type=dct_type, norm=norm, log_mels=log, melkwargs=mel_kwargs, + sample_rate=self._sample_rate, + n_mfcc=n_mfcc, + dct_type=dct_type, + norm=norm, + log_mels=log, + melkwargs=mel_kwargs, ) self.featurizer.to(self._device) @@ -575,36 +547,22 @@ class SpectrogramAugmentation(NonTrainableNM): @property def input_ports(self): """Returns definitions of module input ports. - - input_spec: - 0: AxisType(BatchTag) - - 1: AxisType(SpectrogramSignalTag) - - 2: AxisType(TimeTag) - """ return { - "input_spec": NeuralType({0: AxisType(BatchTag), 1: AxisType(SpectrogramSignalTag), 2: AxisType(TimeTag),}) + # "input_spec": NeuralType({0: AxisType(BatchTag), 1: AxisType(SpectrogramSignalTag), 2: AxisType( + # TimeTag),}) + "input_spec": NeuralType(('B', 'D', 'T'), SpectrogramType()) } @property def output_ports(self): """Returns definitions of module output ports. - - augmented_spec: - - 0: AxisType(BatchTag) - - 1: AxisType(SpectrogramSignalTag) - - 2: AxisType(ProcessedTimeTag) - """ return { - "augmented_spec": NeuralType( - {0: AxisType(BatchTag), 1: AxisType(SpectrogramSignalTag), 2: AxisType(ProcessedTimeTag),} - ) + # "augmented_spec": NeuralType( + # {0: AxisType(BatchTag), 1: AxisType(SpectrogramSignalTag), 2: AxisType(ProcessedTimeTag),} + # ) + "augmented_spec": NeuralType(('B', 'D', 'T'), SpectrogramType()) } def __init__( @@ -652,61 +610,31 @@ class MultiplyBatch(NonTrainableNM): @property def input_ports(self): """Returns definitions of module input ports. - - in_x: - 0: AxisType(BatchTag) - - 1: AxisType(SpectrogramSignalTag) - - 2: AxisType(TimeTag) - - in_x_len: - 0: AxisType(BatchTag) - - in_y: - 0: AxisType(BatchTag) - - 1: AxisType(TimeTag) - - in_y_len: - 0: AxisType(BatchTag) - """ return { - "in_x": NeuralType({0: AxisType(BatchTag), 1: AxisType(SpectrogramSignalTag), 2: AxisType(TimeTag),}), - "in_x_len": NeuralType({0: AxisType(BatchTag)}), - "in_y": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}), - "in_y_len": NeuralType({0: AxisType(BatchTag)}), + # "in_x": NeuralType({0: AxisType(BatchTag), 1: AxisType(SpectrogramSignalTag), 2: AxisType(TimeTag),}), + # "in_x_len": NeuralType({0: AxisType(BatchTag)}), + # "in_y": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}), + # "in_y_len": NeuralType({0: AxisType(BatchTag)}), + "in_x": NeuralType(('B', 'D', 'T'), SpectrogramType()), + "in_x_len": NeuralType(tuple('B'), LengthsType()), + "in_y": NeuralType(('B', 'D', 'T'), SpectrogramType()), + "in_y_len": NeuralType(tuple('B'), LengthsType()), } @property def output_ports(self): """Returns definitions of module output ports. - - out_x: - 0: AxisType(BatchTag) - - 1: AxisType(SpectrogramSignalTag) - - 2: AxisType(TimeTag) - - out_x_len: - 0: AxisType(BatchTag) - - out_y: - 0: AxisType(BatchTag) - - 1: AxisType(TimeTag) - - out_y_len: - 0: AxisType(BatchTag) - """ return { - "out_x": NeuralType({0: AxisType(BatchTag), 1: AxisType(SpectrogramSignalTag), 2: AxisType(TimeTag),}), - "out_x_len": NeuralType({0: AxisType(BatchTag)}), - "out_y": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}), - "out_y_len": NeuralType({0: AxisType(BatchTag)}), + # "out_x": NeuralType({0: AxisType(BatchTag), 1: AxisType(SpectrogramSignalTag), 2: AxisType(TimeTag),}), + # "out_x_len": NeuralType({0: AxisType(BatchTag)}), + # "out_y": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}), + # "out_y_len": NeuralType({0: AxisType(BatchTag)}), + "out_x": NeuralType(('B', 'D', 'T'), SpectrogramType()), + "out_x_len": NeuralType(tuple('B'), LengthsType()), + "out_y": NeuralType(('B', 'D', 'T'), SpectrogramType()), + "out_y_len": NeuralType(tuple('B'), LengthsType()), } def __init__(self, mult_batch=1): diff --git a/nemo/collections/asr/beam_search_decoder.py b/nemo/collections/asr/beam_search_decoder.py index 6bb985a98e5c..ecebe7a00ec3 100644 --- a/nemo/collections/asr/beam_search_decoder.py +++ b/nemo/collections/asr/beam_search_decoder.py @@ -6,7 +6,7 @@ from nemo.backends.pytorch.nm import NonTrainableNM from nemo.core import DeviceType -from nemo.core.neural_types import AxisType, BatchTag, ChannelTag, NeuralType, TimeTag +from nemo.core.neural_types import * from nemo.utils.helpers import get_cuda_device @@ -41,20 +41,12 @@ class BeamSearchDecoderWithLM(NonTrainableNM): @property def input_ports(self): """Returns definitions of module input ports. - - "log_probs": - 0: AxisType(BatchTag) - - 1: AxisType(TimeTag) - - 2: AxisType(ChannelTag) - - log_probs_length: - 0: AxisType(BatchTag) """ return { - "log_probs": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag), 2: AxisType(ChannelTag),}), - "log_probs_length": NeuralType({0: AxisType(BatchTag)}), + # "log_probs": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag), 2: AxisType(ChannelTag),}), + # "log_probs_length": NeuralType({0: AxisType(BatchTag)}), + "log_probs": NeuralType(('B', 'T', 'D'), LogprobsType()), + "log_probs_length": NeuralType(tuple('B'), LengthsType()), } @property @@ -64,7 +56,8 @@ def output_ports(self): predictions: NeuralType(None) """ - return {"predictions": NeuralType(None)} + # return {"predictions": NeuralType(VoidType())} + return {"predictions": NeuralType(('B', 'T'), PredictionsType())} def __init__(self, vocab, beam_width, alpha, beta, lm_path, num_cpus, cutoff_prob=1.0, cutoff_top_n=40): diff --git a/nemo/collections/asr/data_layer.py b/nemo/collections/asr/data_layer.py index 44b1cca9c9b6..e2b95c0e9604 100644 --- a/nemo/collections/asr/data_layer.py +++ b/nemo/collections/asr/data_layer.py @@ -1,4 +1,17 @@ -# Copyright (c) 2019 NVIDIA Corporation +# Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================= """This package contains Neural Modules responsible for ASR data layers.""" from functools import partial @@ -81,29 +94,16 @@ class AudioToTextDataLayer(DataLayerNM): @property def output_ports(self): """Returns definitions of module output ports. - - audio_signal: - 0: AxisType(BatchTag) - - 1: AxisType(TimeTag) - - a_sig_length: - 0: AxisType(BatchTag) - - transcripts: - 0: AxisType(BatchTag) - - 1: AxisType(TimeTag) - - transcript_length: - 0: AxisType(BatchTag) - """ return { - 'audio_signal': NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}), - 'a_sig_length': NeuralType({0: AxisType(BatchTag)}), - 'transcripts': NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}), - 'transcript_length': NeuralType({0: AxisType(BatchTag)}), + # 'audio_signal': NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}), + # 'a_sig_length': NeuralType({0: AxisType(BatchTag)}), + # 'transcripts': NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}), + # 'transcript_length': NeuralType({0: AxisType(BatchTag)}), + 'audio_signal': NeuralType(('B', 'T'), AudioSignal(freq=self._sample_rate)), + 'a_sig_length': NeuralType(tuple('B'), LengthsType()), + 'transcripts': NeuralType(('B', 'T'), LabelsType()), + 'transcript_length': NeuralType(tuple('B'), LengthsType()), } def __init__( @@ -126,8 +126,8 @@ def __init__( num_workers=0, ): super().__init__() - - self._featurizer = WaveformFeaturizer(sample_rate=sample_rate, int_values=int_values, augmentor=None) + self._sample_rate = sample_rate + self._featurizer = WaveformFeaturizer(sample_rate=self._sample_rate, int_values=int_values, augmentor=None) # Set up dataset dataset_params = { @@ -212,32 +212,18 @@ class KaldiFeatureDataLayer(DataLayerNM): def output_ports(self): """Returns definitions of module output ports. - processed_signal: - 0: AxisType(BatchTag) - - 1: AxisType(SpectrogramSignalTag) - - 2: AxisType(ProcessedTimeTag) - - processed_length: - 0: AxisType(BatchTag) - - transcripts: - 0: AxisType(BatchTag) - - 1: AxisType(TimeTag) - - transcript_length: - 0: AxisType(BatchTag) """ return { - 'processed_signal': NeuralType( - {0: AxisType(BatchTag), 1: AxisType(SpectrogramSignalTag), 2: AxisType(ProcessedTimeTag),} - ), - 'processed_length': NeuralType({0: AxisType(BatchTag)}), - 'transcripts': NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}), - 'transcript_length': NeuralType({0: AxisType(BatchTag)}), + # 'processed_signal': NeuralType( + # {0: AxisType(BatchTag), 1: AxisType(SpectrogramSignalTag), 2: AxisType(ProcessedTimeTag),} + # ), + # 'processed_length': NeuralType({0: AxisType(BatchTag)}), + # 'transcripts': NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}), + # 'transcript_length': NeuralType({0: AxisType(BatchTag)}), + 'processed_signal': NeuralType(('B', 'D', 'T'), SpectrogramType()), + 'transcripts': NeuralType(('B', 'T'), ChannelType()), + 'transcript_length': NeuralType(tuple('B'), LengthsType()), } def __init__( @@ -362,8 +348,10 @@ def output_ports(self): """ return { - 'texts': NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}), - 'texts_length': NeuralType({0: AxisType(BatchTag)}), + # 'texts': NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}), + # 'texts_length': NeuralType({0: AxisType(BatchTag)}), + 'texts': NeuralType(('B', 'T'), ChannelType()), + 'texts_length': NeuralType(tuple('B'), LengthsType()), } def __init__( diff --git a/nemo/collections/asr/greedy_ctc_decoder.py b/nemo/collections/asr/greedy_ctc_decoder.py index b9b416b8983a..2d49011e7235 100644 --- a/nemo/collections/asr/greedy_ctc_decoder.py +++ b/nemo/collections/asr/greedy_ctc_decoder.py @@ -2,7 +2,7 @@ import torch from nemo.backends.pytorch.nm import TrainableNM -from nemo.core.neural_types import AxisType, BatchTag, ChannelTag, NeuralType, TimeTag +from nemo.core.neural_types import * class GreedyCTCDecoder(TrainableNM): @@ -13,26 +13,16 @@ class GreedyCTCDecoder(TrainableNM): @property def input_ports(self): """Returns definitions of module input ports. - - log_probs: - 0: AxisType(BatchTag) - - 1: AxisType(TimeTag) - - 2: AxisType(ChannelTag) """ - return {"log_probs": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag), 2: AxisType(ChannelTag),})} + # return {"log_probs": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag), 2: AxisType(ChannelTag),})} + return {"log_probs": NeuralType(('B', 'T', 'D'), LogprobsType())} @property def output_ports(self): """Returns definitions of module output ports. - - predictions: - 0: AxisType(BatchTag) - - 1: AxisType(TimeTag) """ - return {"predictions": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)})} + # return {"predictions": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)})} + return {"predictions": NeuralType(('B', 'T'), PredictionsType())} def __init__(self): super().__init__() diff --git a/nemo/collections/asr/jasper.py b/nemo/collections/asr/jasper.py index db75e0793643..d6fcf7e38259 100644 --- a/nemo/collections/asr/jasper.py +++ b/nemo/collections/asr/jasper.py @@ -7,16 +7,7 @@ from .parts.jasper import JasperBlock, init_weights, jasper_activations from nemo.backends.pytorch.nm import TrainableNM -from nemo.core.neural_types import ( - AxisType, - BatchTag, - ChannelTag, - EncodedRepresentationTag, - NeuralType, - ProcessedTimeTag, - SpectrogramSignalTag, - TimeTag, -) +from nemo.core.neural_types import * class JasperEncoder(TrainableNM): @@ -82,44 +73,27 @@ class JasperEncoder(TrainableNM): @property def input_ports(self): """Returns definitions of module input ports. - - audio_signal: - 0: AxisType(BatchTag) - - 1: AxisType(SpectrogramSignalTag) - - 2: AxisType(ProcessedTimeTag) - - length: - 0: AxisType(BatchTag) """ return { - "audio_signal": NeuralType( - {0: AxisType(BatchTag), 1: AxisType(SpectrogramSignalTag), 2: AxisType(ProcessedTimeTag),} - ), - "length": NeuralType({0: AxisType(BatchTag)}), + # "audio_signal": NeuralType( + # {0: AxisType(BatchTag), 1: AxisType(SpectrogramSignalTag), 2: AxisType(ProcessedTimeTag),} + # ), + # "length": NeuralType({0: AxisType(BatchTag)}), + "audio_signal": NeuralType(('B', 'D', 'T'), SpectrogramType()), + "length": NeuralType(tuple('B'), LengthsType()), } @property def output_ports(self): """Returns definitions of module output ports. - - outputs: - 0: AxisType(BatchTag) - - 1: AxisType(EncodedRepresentationTag) - - 2: AxisType(ProcessedTimeTag) - - encoded_lengths: - 0: AxisType(BatchTag) - """ return { - "outputs": NeuralType( - {0: AxisType(BatchTag), 1: AxisType(EncodedRepresentationTag), 2: AxisType(ProcessedTimeTag),} - ), - "encoded_lengths": NeuralType({0: AxisType(BatchTag)}), + # "outputs": NeuralType( + # {0: AxisType(BatchTag), 1: AxisType(EncodedRepresentationTag), 2: AxisType(ProcessedTimeTag),} + # ), + # "encoded_lengths": NeuralType({0: AxisType(BatchTag)}), + "outputs": NeuralType(('B', 'D', 'T'), AcousticEncodedRepresentation()), + "encoded_lengths": NeuralType(tuple('B'), LengthsType()), } def __init__( @@ -205,32 +179,20 @@ class JasperDecoderForCTC(TrainableNM): @property def input_ports(self): """Returns definitions of module input ports. - - encoder_output: - 0: AxisType(BatchTag) - - 1: AxisType(EncodedRepresentationTag) - - 2: AxisType(ProcessedTimeTag) """ return { - "encoder_output": NeuralType( - {0: AxisType(BatchTag), 1: AxisType(EncodedRepresentationTag), 2: AxisType(ProcessedTimeTag),} - ) + # "encoder_output": NeuralType( + # {0: AxisType(BatchTag), 1: AxisType(EncodedRepresentationTag), 2: AxisType(ProcessedTimeTag),} + # ) + "encoder_output": NeuralType(('B', 'D', 'T'), AcousticEncodedRepresentation()) } @property def output_ports(self): """Returns definitions of module output ports. - - output: - 0: AxisType(BatchTag) - - 1: AxisType(TimeTag) - - 2: AxisType(ChannelTag) """ - return {"output": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag), 2: AxisType(ChannelTag),})} + # return {"output": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag), 2: AxisType(ChannelTag),})} + return {"output": NeuralType(('B', 'T', 'D'), LogprobsType())} def __init__(self, feat_in, num_classes, init_mode="xavier_uniform"): super().__init__() diff --git a/nemo/collections/asr/las/misc.py b/nemo/collections/asr/las/misc.py index a1a1a855e419..56519e143fd8 100644 --- a/nemo/collections/asr/las/misc.py +++ b/nemo/collections/asr/las/misc.py @@ -4,7 +4,7 @@ from nemo.backends.pytorch.nm import TrainableNM from nemo.collections.asr.jasper import init_weights as jasper_init_weights -from nemo.core.neural_types import AxisType, BatchTag, ChannelTag, NeuralType, TimeTag +from nemo.core.neural_types import * class JasperRNNConnector(TrainableNM): @@ -20,15 +20,9 @@ class JasperRNNConnector(TrainableNM): @property def input_ports(self): """Returns definitions of module input ports. - - tensor: - 0: AxisType(BatchTag) - - 1: AxisType(ChannelTag) - - 2: AxisType(TimeTag) """ - return {'tensor': NeuralType({0: AxisType(BatchTag), 1: AxisType(ChannelTag), 2: AxisType(TimeTag),})} + # return {'tensor': NeuralType({0: AxisType(BatchTag), 1: AxisType(ChannelTag), 2: AxisType(TimeTag),})} + return {'tensor': NeuralType(('B', 'D', 'T'), ChannelType())} @property def output_ports(self): @@ -41,7 +35,8 @@ def output_ports(self): 2: AxisType(ChannelTag) """ - return {'tensor': NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag), 2: AxisType(ChannelTag),})} + # return {'tensor': NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag), 2: AxisType(ChannelTag),})} + return {'tensor': NeuralType(('B', 'T', 'D'), ChannelType())} def __init__(self, in_channels, out_channels): super().__init__() diff --git a/nemo/collections/asr/losses.py b/nemo/collections/asr/losses.py index f43a30791079..909a16d6f39c 100644 --- a/nemo/collections/asr/losses.py +++ b/nemo/collections/asr/losses.py @@ -3,7 +3,7 @@ import torch.nn as nn from nemo.backends.pytorch.nm import LossNM -from nemo.core.neural_types import AxisType, BatchTag, ChannelTag, NeuralType, TimeTag +from nemo.core.neural_types import * class CTCLossNM(LossNM): @@ -18,30 +18,16 @@ class CTCLossNM(LossNM): @property def input_ports(self): """Returns definitions of module input ports. - - log_probs: - 1: AxisType(TimeTag) - - 0: AxisType(BatchTag) - - 2: AxisType(ChannelTag) - - targets: - 0: AxisType(BatchTag) - - 1: AxisType(TimeTag) - - input_length: - 0: AxisType(BatchTag) - - target_length: - 0: AxisType(BatchTag) """ return { - "log_probs": NeuralType({1: AxisType(TimeTag), 0: AxisType(BatchTag), 2: AxisType(ChannelTag),}), - "targets": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}), - "input_length": NeuralType({0: AxisType(BatchTag)}), - "target_length": NeuralType({0: AxisType(BatchTag)}), + # "log_probs": NeuralType({1: AxisType(TimeTag), 0: AxisType(BatchTag), 2: AxisType(ChannelTag),}), + # "targets": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}), + # "input_length": NeuralType({0: AxisType(BatchTag)}), + # "target_length": NeuralType({0: AxisType(BatchTag)}), + "log_probs": NeuralType(('B', 'T', 'D'), LogprobsType()), + "targets": NeuralType(('B', 'T'), LabelsType()), + "input_length": NeuralType(tuple('B'), LengthsType()), + "target_length": NeuralType(tuple('B'), LengthsType()), } @property @@ -51,7 +37,8 @@ def output_ports(self): loss: NeuralType(None) """ - return {"loss": NeuralType(None)} + # return {"loss": NeuralType(None)} + return {"loss": NeuralType(elements_type=LossType())} def __init__(self, num_classes): super().__init__() diff --git a/nemo/collections/nlp/nm/data_layers/glue_benchmark_datalayer.py b/nemo/collections/nlp/nm/data_layers/glue_benchmark_datalayer.py index 0a054a275d69..ac5ae86cca6c 100644 --- a/nemo/collections/nlp/nm/data_layers/glue_benchmark_datalayer.py +++ b/nemo/collections/nlp/nm/data_layers/glue_benchmark_datalayer.py @@ -16,7 +16,7 @@ from nemo.collections.nlp.data import GLUEDataset from nemo.collections.nlp.nm.data_layers.text_datalayer import TextDataLayer -from nemo.core import AxisType, BatchTag, CategoricalTag, NeuralType, RegressionTag, TimeTag +from nemo.core import CategoricalValuesType, ChannelType, NeuralType, RegressionValuesType __all__ = ['GlueClassificationDataLayer', 'GlueRegressionDataLayer'] @@ -36,30 +36,16 @@ class GlueClassificationDataLayer(TextDataLayer): @property def output_ports(self): """Returns definitions of module output ports. - - input_ids: - 0: AxisType(BatchTag) - - 1: AxisType(TimeTag) - - input_type_ids: - 0: AxisType(BatchTag) - - 1: AxisType(TimeTag) - - input_mask: - 0: AxisType(BatchTag) - - 1: AxisType(TimeTag) - - labels: - 0: AxisType(CategoricalTag) """ return { - "input_ids": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}), - "input_type_ids": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}), - "input_mask": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}), - "labels": NeuralType({0: AxisType(CategoricalTag)}), + # "input_ids": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}), + # "input_type_ids": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}), + # "input_mask": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}), + # "labels": NeuralType({0: AxisType(CategoricalTag)}), + "input_ids": NeuralType(('B', 'T'), ChannelType()), + "input_type_ids": NeuralType(('B', 'T'), ChannelType()), + "input_mask": NeuralType(('B', 'T'), ChannelType()), + "labels": NeuralType(tuple('B'), CategoricalValuesType()), } def __init__( @@ -101,30 +87,16 @@ class GlueRegressionDataLayer(TextDataLayer): @property def output_ports(self): """Returns definitions of module output ports. - - input_ids: - 0: AxisType(BatchTag) - - 1: AxisType(TimeTag) - - input_type_ids: - 0: AxisType(BatchTag) - - 1: AxisType(TimeTag) - - input_mask: - 0: AxisType(BatchTag) - - 1: AxisType(TimeTag) - - labels: - 0: AxisType(RegressionTag) """ return { - "input_ids": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}), - "input_type_ids": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}), - "input_mask": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}), - "labels": NeuralType({0: AxisType(RegressionTag)}), + # "input_ids": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}), + # "input_type_ids": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}), + # "input_mask": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}), + # "labels": NeuralType({0: AxisType(RegressionTag)}), + "input_ids": NeuralType(('B', 'T'), ChannelType()), + "input_type_ids": NeuralType(('B', 'T'), ChannelType()), + "input_mask": NeuralType(('B', 'T'), ChannelType()), + "labels": NeuralType(tuple('B'), RegressionValuesType()), } def __init__( diff --git a/nemo/collections/nlp/nm/data_layers/joint_intent_slot_datalayer.py b/nemo/collections/nlp/nm/data_layers/joint_intent_slot_datalayer.py index e1cbf3c147ab..c306cfcccc04 100644 --- a/nemo/collections/nlp/nm/data_layers/joint_intent_slot_datalayer.py +++ b/nemo/collections/nlp/nm/data_layers/joint_intent_slot_datalayer.py @@ -16,7 +16,7 @@ from nemo.collections.nlp.data import BertJointIntentSlotDataset, BertJointIntentSlotInferDataset from nemo.collections.nlp.nm.data_layers.text_datalayer import TextDataLayer -from nemo.core import AxisType, BatchTag, NeuralType, TimeTag +from nemo.core import ChannelType, NeuralType __all__ = ['BertJointIntentSlotDataLayer', 'BertJointIntentSlotInferDataLayer'] @@ -43,48 +43,22 @@ class BertJointIntentSlotDataLayer(TextDataLayer): @property def output_ports(self): """Returns definitions of module output ports. - - input_ids: - 0: AxisType(BatchTag) - - 1: AxisType(TimeTag) - - input_type_ids: - 0: AxisType(BatchTag) - - 1: AxisType(TimeTag) - - input_mask: - 0: AxisType(BatchTag) - - 1: AxisType(TimeTag) - - loss_mask: - 0: AxisType(BatchTag) - - 1: AxisType(TimeTag) - - subtokens_mask: - 0: AxisType(BatchTag) - - 1: AxisType(TimeTag) - - intents: - 0: AxisType(BatchTag) - - slots: - 0: AxisType(BatchTag) - - 1: AxisType(TimeTag) """ return { - "input_ids": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}), - "input_type_ids": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}), - "input_mask": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}), - "loss_mask": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}), - "subtokens_mask": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}), - "intents": NeuralType({0: AxisType(BatchTag)}), - "slots": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}), + # "input_ids": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}), + # "input_type_ids": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}), + # "input_mask": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}), + # "loss_mask": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}), + # "subtokens_mask": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}), + # "intents": NeuralType({0: AxisType(BatchTag)}), + # "slots": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}), + "input_ids": NeuralType(('B', 'T'), ChannelType()), + "input_type_ids": NeuralType(('B', 'T'), ChannelType()), + "input_mask": NeuralType(('B', 'T'), ChannelType()), + "loss_mask": NeuralType(('B', 'T'), ChannelType()), + "subtokens_mask": NeuralType(('B', 'T'), ChannelType()), + "intents": NeuralType(tuple('B'), ChannelType()), + "slots": NeuralType(('B', 'T'), ChannelType()), } def __init__( @@ -137,39 +111,18 @@ class BertJointIntentSlotInferDataLayer(TextDataLayer): @property def output_ports(self): """Returns definitions of module output ports. - - input_ids: - 0: AxisType(BatchTag) - - 1: AxisType(TimeTag) - - input_type_ids: - 0: AxisType(BatchTag) - - 1: AxisType(TimeTag) - - input_mask: - 0: AxisType(BatchTag) - - 1: AxisType(TimeTag) - - loss_mask: - 0: AxisType(BatchTag) - - 1: AxisType(TimeTag) - - subtokens_mask: - 0: AxisType(BatchTag) - - 1: AxisType(TimeTag) - """ return { - "input_ids": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}), - "input_type_ids": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}), - "input_mask": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}), - "loss_mask": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}), - "subtokens_mask": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}), + # "input_ids": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}), + # "input_type_ids": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}), + # "input_mask": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}), + # "loss_mask": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}), + # "subtokens_mask": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}), + "input_ids": NeuralType(('B', 'T'), ChannelType()), + "input_type_ids": NeuralType(('B', 'T'), ChannelType()), + "input_mask": NeuralType(('B', 'T'), ChannelType()), + "loss_mask": NeuralType(('B', 'T'), ChannelType()), + "subtokens_mask": NeuralType(('B', 'T'), ChannelType()), } def __init__(self, queries, tokenizer, max_seq_length, batch_size=1, dataset_type=BertJointIntentSlotInferDataset): diff --git a/nemo/collections/nlp/nm/data_layers/lm_bert_datalayer.py b/nemo/collections/nlp/nm/data_layers/lm_bert_datalayer.py index c0a6b7f775e8..98c1ba23c10f 100644 --- a/nemo/collections/nlp/nm/data_layers/lm_bert_datalayer.py +++ b/nemo/collections/nlp/nm/data_layers/lm_bert_datalayer.py @@ -25,7 +25,7 @@ from nemo.backends.pytorch import DataLayerNM from nemo.collections.nlp.data import BertPretrainingDataset, BertPretrainingPreprocessedDataset from nemo.collections.nlp.nm.data_layers.text_datalayer import TextDataLayer -from nemo.core import AxisType, BatchTag, NeuralType, TimeTag +from nemo.core import ChannelType, LabelsType, NeuralType __all__ = ['BertPretrainingDataLayer', 'BertPretrainingPreprocessedDataLayer'] @@ -48,45 +48,20 @@ class BertPretrainingDataLayer(TextDataLayer): @property def output_ports(self): """Returns definitions of module output ports. - - input_ids: indices of tokens which constitute batches of text segments - 0: AxisType(BatchTag) - - 1: AxisType(TimeTag) - - input_type_ids: indices of token types (e.g., sentences A & B in BERT) - 0: AxisType(BatchTag) - - 1: AxisType(TimeTag) - - input_mask: bool tensor with 0s in place of tokens to be masked - 0: AxisType(BatchTag) - - 1: AxisType(TimeTag) - - output_ids: indices of output tokens which should be predicted - 0: AxisType(BatchTag) - - 1: AxisType(TimeTag) - - output_mask: bool tensor with 0s in place of tokens to be excluded - from loss calculation - 0: AxisType(BatchTag) - - 1: AxisType(TimeTag) - - labels: indices of classes to be predicted from [CLS] token of text - segments (e.g, 0 or 1 in next sentence prediction task) - 0: AxisType(BatchTag) - """ return { - "input_ids": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}), - "input_type_ids": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}), - "input_mask": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}), - "output_ids": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}), - "output_mask": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}), - "labels": NeuralType({0: AxisType(BatchTag)}), + # "input_ids": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}), + # "input_type_ids": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}), + # "input_mask": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}), + # "output_ids": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}), + # "output_mask": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}), + # "labels": NeuralType({0: AxisType(BatchTag)}), + "input_ids": NeuralType(('B', 'T'), ChannelType()), + "input_type_ids": NeuralType(('B', 'T'), ChannelType()), + "input_mask": NeuralType(('B', 'T'), ChannelType()), + "output_ids": NeuralType(('B', 'T'), ChannelType()), + "output_mask": NeuralType(('B', 'T'), ChannelType()), + "labels": NeuralType(tuple('B'), LabelsType()), } def __init__(self, tokenizer, dataset, max_seq_length, mask_probability, short_seq_prob=0.1, batch_size=64): @@ -118,45 +93,20 @@ class BertPretrainingPreprocessedDataLayer(DataLayerNM): @property def output_ports(self): """Returns definitions of module output ports. - - input_ids: indices of tokens which constitute batches of text segments - 0: AxisType(BatchTag) - - 1: AxisType(TimeTag) - - input_type_ids: indices of token types (e.g., sentences A & B in BERT) - 0: AxisType(BatchTag) - - 1: AxisType(TimeTag) - - input_mask: bool tensor with 0s in place of tokens to be masked - 0: AxisType(BatchTag) - - 1: AxisType(TimeTag) - - output_ids: indices of output tokens which should be predicted - 0: AxisType(BatchTag) - - 1: AxisType(TimeTag) - - output_mask: bool tensor with 0s in place of tokens to be excluded - from loss calculation - 0: AxisType(BatchTag) - - 1: AxisType(TimeTag) - - labels: indices of classes to be predicted from [CLS] token of text - segments (e.g, 0 or 1 in next sentence prediction task) - 0: AxisType(BatchTag) - """ return { - "input_ids": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}), - "input_type_ids": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}), - "input_mask": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}), - "output_ids": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}), - "output_mask": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}), - "labels": NeuralType({0: AxisType(BatchTag)}), + # "input_ids": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}), + # "input_type_ids": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}), + # "input_mask": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}), + # "output_ids": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}), + # "output_mask": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}), + # "labels": NeuralType({0: AxisType(BatchTag)}), + "input_ids": NeuralType(('B', 'T'), ChannelType()), + "input_type_ids": NeuralType(('B', 'T'), ChannelType()), + "input_mask": NeuralType(('B', 'T'), ChannelType()), + "output_ids": NeuralType(('B', 'T'), ChannelType()), + "output_mask": NeuralType(('B', 'T'), ChannelType()), + "labels": NeuralType(tuple('B'), LabelsType()), } def __init__(self, dataset, max_pred_length, batch_size=64, training=True): diff --git a/nemo/collections/nlp/nm/data_layers/lm_transformer_datalayer.py b/nemo/collections/nlp/nm/data_layers/lm_transformer_datalayer.py index 62d1cc5e857c..ebd1b2a738d0 100644 --- a/nemo/collections/nlp/nm/data_layers/lm_transformer_datalayer.py +++ b/nemo/collections/nlp/nm/data_layers/lm_transformer_datalayer.py @@ -16,7 +16,7 @@ from nemo.collections.nlp.data import LanguageModelingDataset from nemo.collections.nlp.nm.data_layers.text_datalayer import TextDataLayer -from nemo.core import AxisType, BatchTag, NeuralType, TimeTag +from nemo.core import ChannelType, LabelsType, NeuralType __all__ = ['LanguageModelingDataLayer'] @@ -55,9 +55,12 @@ def output_ports(self): 1: AxisType(TimeTag) """ return { - "input_ids": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}), - "input_mask": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}), - "labels": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}), + # "input_ids": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}), + # "input_mask": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}), + # "labels": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}), + "input_ids": NeuralType(('B', 'T'), ChannelType()), + "input_mask": NeuralType(('B', 'T'), ChannelType()), + "labels": NeuralType(('B', 'T'), LabelsType()), } def __init__( diff --git a/nemo/collections/nlp/nm/data_layers/machine_translation_datalayer.py b/nemo/collections/nlp/nm/data_layers/machine_translation_datalayer.py index 8a6608ff12ed..44f877f5dcc3 100644 --- a/nemo/collections/nlp/nm/data_layers/machine_translation_datalayer.py +++ b/nemo/collections/nlp/nm/data_layers/machine_translation_datalayer.py @@ -20,7 +20,7 @@ import nemo from nemo.collections.nlp.data import TranslationDataset from nemo.collections.nlp.nm.data_layers.text_datalayer import TextDataLayer -from nemo.core import AxisType, BatchTag, NeuralType, TimeTag +from nemo.core import ChannelType, LabelsType, NeuralType __all__ = ['TranslationDataLayer'] @@ -48,44 +48,34 @@ def output_ports(self): """Returns definitions of module output ports. src_ids: indices of tokens which correspond to source sentences - 0: AxisType(BatchTag) - - 1: AxisType(TimeTag) src_mask: bool tensor with 0s in place of source tokens to be masked - 0: AxisType(BatchTag) - - 1: AxisType(TimeTag) tgt_ids: indices of tokens which correspond to target sentences - 0: AxisType(BatchTag) - - 1: AxisType(TimeTag) tgt_mask: bool tensor with 0s in place of target tokens to be masked - 0: AxisType(BatchTag) - - 1: AxisType(TimeTag) labels: indices of tokens which should be predicted from each of the corresponding target tokens in tgt_ids; for standard neural machine translation equals to tgt_ids shifted by 1 to the right - 0: AxisType(BatchTag) - - 1: AxisType(TimeTag) sent_ids: indices of the sentences in a batch; important for evaluation with external metrics, such as SacreBLEU - 0: AxisType(BatchTag) """ return { - "src_ids": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}), - "src_mask": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}), - "tgt_ids": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}), - "tgt_mask": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}), - "labels": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}), - "sent_ids": NeuralType({0: AxisType(BatchTag)}), + # "src_ids": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}), + # "src_mask": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}), + # "tgt_ids": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}), + # "tgt_mask": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}), + # "labels": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}), + # "sent_ids": NeuralType({0: AxisType(BatchTag)}), + "src_ids": NeuralType(('B', 'T'), ChannelType()), + "src_mask": NeuralType(('B', 'T'), ChannelType()), + "tgt_ids": NeuralType(('B', 'T'), ChannelType()), + "tgt_mask": NeuralType(('B', 'T'), ChannelType()), + "labels": NeuralType(('B', 'T'), LabelsType()), + "sent_ids": NeuralType(tuple('B'), ChannelType()), } def __init__( diff --git a/nemo/collections/nlp/nm/data_layers/punctuation_capitalization_datalayer.py b/nemo/collections/nlp/nm/data_layers/punctuation_capitalization_datalayer.py index 3bb1fe93b0b6..e3cfeda2235a 100644 --- a/nemo/collections/nlp/nm/data_layers/punctuation_capitalization_datalayer.py +++ b/nemo/collections/nlp/nm/data_layers/punctuation_capitalization_datalayer.py @@ -16,7 +16,7 @@ from nemo.collections.nlp.data import BertPunctuationCapitalizationDataset from nemo.collections.nlp.nm.data_layers.text_datalayer import TextDataLayer -from nemo.core import AxisType, BatchTag, NeuralType, TimeTag +from nemo.core import ChannelType, LabelsType, NeuralType __all__ = ['PunctuationCapitalizationDataLayer'] @@ -25,51 +25,22 @@ class PunctuationCapitalizationDataLayer(TextDataLayer): @property def output_ports(self): """Returns definitions of module output ports. - - input_ids: - 0: AxisType(BatchTag) - - 1: AxisType(TimeTag) - - input_type_ids: - 0: AxisType(BatchTag) - - 1: AxisType(TimeTag) - - input_mask: - 0: AxisType(BatchTag) - - 1: AxisType(TimeTag) - - loss_mask: - 0: AxisType(BatchTag) - - 1: AxisType(TimeTag) - - subtokens_mask: - 0: AxisType(BatchTag) - - 1: AxisType(TimeTag) - - punct_labels: - 0: AxisType(BatchTag) - - 1: AxisType(TimeTag) - - capit_labels: - 0: AxisType(BatchTag) - - 1: AxisType(TimeTag) - """ return { - "input_ids": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}), - "input_type_ids": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}), - "input_mask": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}), - "loss_mask": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}), - "subtokens_mask": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}), - "punct_labels": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}), - "capit_labels": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}), + # "input_ids": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}), + # "input_type_ids": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}), + # "input_mask": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}), + # "loss_mask": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}), + # "subtokens_mask": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}), + # "punct_labels": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}), + # "capit_labels": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}), + "input_ids": NeuralType(('B', 'T'), ChannelType()), + "input_type_ids": NeuralType(('B', 'T'), ChannelType()), + "input_mask": NeuralType(('B', 'T'), ChannelType()), + "loss_mask": NeuralType(('B', 'T'), ChannelType()), + "subtokens_mask": NeuralType(('B', 'T'), ChannelType()), + "punct_labels": NeuralType(('B', 'T'), LabelsType()), + "capit_labels": NeuralType(('B', 'T'), LabelsType()), } def __init__( diff --git a/nemo/collections/nlp/nm/data_layers/qa_squad_datalayer.py b/nemo/collections/nlp/nm/data_layers/qa_squad_datalayer.py index 544d3da0f0ca..24ef5897fb1f 100644 --- a/nemo/collections/nlp/nm/data_layers/qa_squad_datalayer.py +++ b/nemo/collections/nlp/nm/data_layers/qa_squad_datalayer.py @@ -16,7 +16,7 @@ from nemo.collections.nlp.data import SquadDataset from nemo.collections.nlp.nm.data_layers.text_datalayer import TextDataLayer -from nemo.core import AxisType, BatchTag, NeuralType, TimeTag +from nemo.core import ChannelType, LabelsType, NeuralType __all__ = ['BertQuestionAnsweringDataLayer'] @@ -48,39 +48,20 @@ class BertQuestionAnsweringDataLayer(TextDataLayer): @property def output_ports(self): """Returns definitions of module output ports. - - input_ids: - 0: AxisType(BatchTag) - - 1: AxisType(TimeTag) - - input_type_ids: - 0: AxisType(BatchTag) - - 1: AxisType(TimeTag) - - input_mask: - 0: AxisType(BatchTag) - - 1: AxisType(TimeTag) - - start_positions: - 0: AxisType(BatchTag) - - end_positions: - 0: AxisType(BatchTag) - - unique_ids: - 0: AxisType(BatchTag) - """ return { - "input_ids": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}), - "input_type_ids": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}), - "input_mask": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}), - "start_positions": NeuralType({0: AxisType(BatchTag)}), - "end_positions": NeuralType({0: AxisType(BatchTag)}), - "unique_ids": NeuralType({0: AxisType(BatchTag)}), + # "input_ids": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}), + # "input_type_ids": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}), + # "input_mask": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}), + # "start_positions": NeuralType({0: AxisType(BatchTag)}), + # "end_positions": NeuralType({0: AxisType(BatchTag)}), + # "unique_ids": NeuralType({0: AxisType(BatchTag)}), + "input_ids": NeuralType(('B', 'T'), ChannelType()), + "input_type_ids": NeuralType(('B', 'T'), ChannelType()), + "input_mask": NeuralType(('B', 'T'), ChannelType()), + "start_positions": NeuralType(tuple('B'), ChannelType()), + "end_positions": NeuralType(tuple('B'), ChannelType()), + "unique_ids": NeuralType(tuple('B'), ChannelType()), } def __init__( diff --git a/nemo/collections/nlp/nm/data_layers/state_tracking_trade_datalayer.py b/nemo/collections/nlp/nm/data_layers/state_tracking_trade_datalayer.py index 0535eb917397..2b7e3800928a 100644 --- a/nemo/collections/nlp/nm/data_layers/state_tracking_trade_datalayer.py +++ b/nemo/collections/nlp/nm/data_layers/state_tracking_trade_datalayer.py @@ -43,7 +43,7 @@ import nemo from nemo.collections.nlp.data.datasets import MultiWOZDataset from nemo.collections.nlp.nm.data_layers.text_datalayer import TextDataLayer -from nemo.core.neural_types import * +from nemo.core.neural_types import ChannelType, LabelsType, LengthsType, NeuralType __all__ = ['MultiWOZDataLayer'] @@ -54,41 +54,32 @@ def output_ports(self): """Returns definitions of module output ports. src_ids: ids of input sequences - 0: AxisType(BatchTag) - - 1: AxisType(ChannelTag) src_lens: lengths of input sequences - 0: AxisType(BatchTag) tgt_ids: labels for the generator output - 0: AxisType(BatchTag) - - 1: AxisType(ChannelTag) - - 2: AxisType(TimeTag) tgt_lens: lengths of the generator targets - 0: AxisType(BatchTag) - - 1: AxisType(ChannelTag) gating_labels: labels for the gating head - 0: AxisType(BatchTag) - - 1: AxisType(ChannelTag) turn_domain: list of the domains NeuralType(None) """ return { - "src_ids": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}), - "src_lens": NeuralType({0: AxisType(BatchTag)}), - "tgt_ids": NeuralType({0: AxisType(BatchTag), 1: AxisType(ChannelTag), 2: AxisType(TimeTag)}), - "tgt_lens": NeuralType({0: AxisType(BatchTag), 1: AxisType(ChannelTag)}), - "gating_labels": NeuralType({0: AxisType(BatchTag), 1: AxisType(ChannelTag)}), - "turn_domain": NeuralType(None), + # "src_ids": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}), + # "src_lens": NeuralType({0: AxisType(BatchTag)}), + # "tgt_ids": NeuralType({0: AxisType(BatchTag), 1: AxisType(ChannelTag), 2: AxisType(TimeTag)}), + # "tgt_lens": NeuralType({0: AxisType(BatchTag), 1: AxisType(ChannelTag)}), + # "gating_labels": NeuralType({0: AxisType(BatchTag), 1: AxisType(ChannelTag)}), + # "turn_domain": NeuralType(None), + "src_ids": NeuralType(('B', 'T'), ChannelType()), + "src_lens": NeuralType(tuple('B'), LengthsType()), + "tgt_ids": NeuralType(('B', 'D', 'T'), LabelsType()), + "tgt_lens": NeuralType(('B', 'D'), LengthsType()), + "gating_labels": NeuralType(('B', 'D'), LabelsType()), + "turn_domain": NeuralType(), } def __init__( diff --git a/nemo/collections/nlp/nm/data_layers/text_classification_datalayer.py b/nemo/collections/nlp/nm/data_layers/text_classification_datalayer.py index cba881fffad3..a104a5a543f5 100644 --- a/nemo/collections/nlp/nm/data_layers/text_classification_datalayer.py +++ b/nemo/collections/nlp/nm/data_layers/text_classification_datalayer.py @@ -16,7 +16,7 @@ from nemo.collections.nlp.data import BertTextClassificationDataset from nemo.collections.nlp.nm.data_layers.text_datalayer import TextDataLayer -from nemo.core import AxisType, BatchTag, NeuralType, TimeTag +from nemo.core import ChannelType, LabelsType, NeuralType __all__ = ['BertSentenceClassificationDataLayer'] @@ -36,31 +36,16 @@ class BertSentenceClassificationDataLayer(TextDataLayer): @property def output_ports(self): """Returns definitions of module output ports. - - input_ids: - 0: AxisType(BatchTag) - - 1: AxisType(TimeTag) - - input_type_ids: - 0: AxisType(BatchTag) - - 1: AxisType(TimeTag) - - input_mask: - 0: AxisType(BatchTag) - - 1: AxisType(TimeTag) - - labels: - 0: AxisType(BatchTag) - """ return { - "input_ids": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}), - "input_type_ids": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}), - "input_mask": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}), - "labels": NeuralType({0: AxisType(BatchTag)}), + # "input_ids": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}), + # "input_type_ids": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}), + # "input_mask": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}), + # "labels": NeuralType({0: AxisType(BatchTag)}), + "input_ids": NeuralType(('B', 'T'), ChannelType()), + "input_type_ids": NeuralType(('B', 'T'), ChannelType()), + "input_mask": NeuralType(('B', 'T'), ChannelType()), + "labels": NeuralType(tuple('B'), LabelsType()), } def __init__( diff --git a/nemo/collections/nlp/nm/data_layers/token_classification_datalayer.py b/nemo/collections/nlp/nm/data_layers/token_classification_datalayer.py index bb573cf8e08c..5fd6cbe2ee5b 100644 --- a/nemo/collections/nlp/nm/data_layers/token_classification_datalayer.py +++ b/nemo/collections/nlp/nm/data_layers/token_classification_datalayer.py @@ -16,7 +16,7 @@ from nemo.collections.nlp.data import BertTokenClassificationDataset, BertTokenClassificationInferDataset from nemo.collections.nlp.nm.data_layers.text_datalayer import TextDataLayer -from nemo.core import AxisType, BatchTag, NeuralType, TimeTag +from nemo.core import ChannelType, LabelsType, NeuralType __all__ = ['BertTokenClassificationDataLayer', 'BertTokenClassificationInferDataLayer'] @@ -25,44 +25,20 @@ class BertTokenClassificationDataLayer(TextDataLayer): @property def output_ports(self): """Returns definitions of module output ports. - - input_ids: - 0: AxisType(BatchTag) - - 1: AxisType(TimeTag) - - input_type_ids: - 0: AxisType(BatchTag) - - 1: AxisType(TimeTag) - - input_mask: - 0: AxisType(BatchTag) - - 1: AxisType(TimeTag) - - loss_mask: - 0: AxisType(BatchTag) - - 1: AxisType(TimeTag) - - subtokens_mask: - 0: AxisType(BatchTag) - - 1: AxisType(TimeTag) - - labels: - 0: AxisType(BatchTag) - - 1: AxisType(TimeTag) """ return { - "input_ids": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}), - "input_type_ids": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}), - "input_mask": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}), - "loss_mask": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}), - "subtokens_mask": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}), - "labels": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}), + # "input_ids": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}), + # "input_type_ids": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}), + # "input_mask": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}), + # "loss_mask": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}), + # "subtokens_mask": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}), + # "labels": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}), + "input_ids": NeuralType(('B', 'T'), ChannelType()), + "input_type_ids": NeuralType(('B', 'T'), ChannelType()), + "input_mask": NeuralType(('B', 'T'), ChannelType()), + "loss_mask": NeuralType(('B', 'T'), ChannelType()), + "subtokens_mask": NeuralType(('B', 'T'), ChannelType()), + "labels": NeuralType(('B', 'T'), LabelsType()), } def __init__( @@ -101,39 +77,18 @@ class BertTokenClassificationInferDataLayer(TextDataLayer): @property def output_ports(self): """Returns definitions of module output ports. - - input_ids: - 0: AxisType(BatchTag) - - 1: AxisType(TimeTag) - - input_type_ids: - 0: AxisType(BatchTag) - - 1: AxisType(TimeTag) - - input_mask: - 0: AxisType(BatchTag) - - 1: AxisType(TimeTag) - - loss_mask: - 0: AxisType(BatchTag) - - 1: AxisType(TimeTag) - - subtokens_mask: - 0: AxisType(BatchTag) - - 1: AxisType(TimeTag) - """ return { - "input_ids": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}), - "input_type_ids": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}), - "input_mask": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}), - "loss_mask": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}), - "subtokens_mask": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}), + # "input_ids": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}), + # "input_type_ids": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}), + # "input_mask": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}), + # "loss_mask": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}), + # "subtokens_mask": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}), + "input_ids": NeuralType(('B', 'T'), ChannelType()), + "input_type_ids": NeuralType(('B', 'T'), ChannelType()), + "input_mask": NeuralType(('B', 'T'), ChannelType()), + "loss_mask": NeuralType(('B', 'T'), ChannelType()), + "subtokens_mask": NeuralType(('B', 'T'), ChannelType()), } def __init__( diff --git a/nemo/collections/nlp/nm/losses/aggregator_loss.py b/nemo/collections/nlp/nm/losses/aggregator_loss.py index 9a390387531c..b1681c7048cb 100644 --- a/nemo/collections/nlp/nm/losses/aggregator_loss.py +++ b/nemo/collections/nlp/nm/losses/aggregator_loss.py @@ -15,7 +15,7 @@ # ============================================================================= from nemo.backends.pytorch import LossNM -from nemo.core import NeuralType +from nemo.core import LossType, NeuralType __all__ = ['LossAggregatorNM'] @@ -35,7 +35,7 @@ def input_ports(self): """ input_ports = {} for i in range(self.num_losses): - input_ports["loss_" + str(i + 1)] = NeuralType(None) + input_ports["loss_" + str(i + 1)] = NeuralType() return input_ports @@ -46,7 +46,7 @@ def output_ports(self): loss: NeuralType(None) """ - return {"loss": NeuralType(None)} + return {"loss": NeuralType(elements_type=LossType())} def __init__(self, num_inputs=2): # Store number of inputs/losses. diff --git a/nemo/collections/nlp/nm/losses/joint_intent_slot_loss.py b/nemo/collections/nlp/nm/losses/joint_intent_slot_loss.py index 9727ea25b57d..ce73176747d7 100644 --- a/nemo/collections/nlp/nm/losses/joint_intent_slot_loss.py +++ b/nemo/collections/nlp/nm/losses/joint_intent_slot_loss.py @@ -18,7 +18,7 @@ from torch import nn from nemo.backends.pytorch import LossNM -from nemo.core import AxisType, BatchTag, ChannelTag, NeuralType, TimeTag +from nemo.core import ChannelType, LogitsType, LossType, NeuralType __all__ = ['JointIntentSlotLoss'] @@ -49,38 +49,18 @@ class JointIntentSlotLoss(LossNM): def input_ports(self): """Returns definitions of module input ports. - intent_logits: - 0: AxisType(BatchTag) - - 1: AxisType(ChannelTag) - - slot_logits: - 0: AxisType(BatchTag) - - 1: AxisType(TimeTag) - - 2: AxisType(ChannelTag) - - loss_mask: - 0: AxisType(BatchTag) - - 1: AxisType(TimeTag) - - intents: - 0: AxisType(BatchTag) - - slots: - 0: AxisType(BatchTag) - - 1: AxisType(TimeTag) - """ return { - "intent_logits": NeuralType({0: AxisType(BatchTag), 1: AxisType(ChannelTag)}), - "slot_logits": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag), 2: AxisType(ChannelTag)}), - "loss_mask": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}), - "intents": NeuralType({0: AxisType(BatchTag)}), - "slots": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}), + # "intent_logits": NeuralType({0: AxisType(BatchTag), 1: AxisType(ChannelTag)}), + # "slot_logits": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag), 2: AxisType(ChannelTag)}), + # "loss_mask": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}), + # "intents": NeuralType({0: AxisType(BatchTag)}), + # "slots": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}), + "intent_logits": NeuralType(('B', 'D'), LogitsType()), + "slot_logits": NeuralType(('B', 'T', 'D'), LogitsType()), + "loss_mask": NeuralType(('B', 'T'), ChannelType()), + "intents": NeuralType(tuple('B'), ChannelType()), + "slots": NeuralType(('B', 'T'), ChannelType()), } @property @@ -90,7 +70,8 @@ def output_ports(self): loss: NeuralType(None) """ - return {"loss": NeuralType(None)} + # return {"loss": NeuralType(None)} + return {"loss": NeuralType(elements_type=LossType())} def __init__( self, num_slots, slot_classes_loss_weights=None, intent_classes_loss_weights=None, intent_loss_weight=0.6, diff --git a/nemo/collections/nlp/nm/losses/masked_language_modeling_loss.py b/nemo/collections/nlp/nm/losses/masked_language_modeling_loss.py index 45bb5d950972..38f5169bf348 100644 --- a/nemo/collections/nlp/nm/losses/masked_language_modeling_loss.py +++ b/nemo/collections/nlp/nm/losses/masked_language_modeling_loss.py @@ -16,7 +16,7 @@ from nemo.backends.pytorch import LossNM from nemo.collections.nlp.nm.losses.smoothed_cross_entropy_loss import SmoothedCrossEntropyLoss -from nemo.core import AxisType, BatchTag, ChannelTag, NeuralType, TimeTag +from nemo.core import ChannelType, LogitsType, LossType, NeuralType __all__ = ['MaskedLanguageModelingLossNM'] @@ -32,28 +32,14 @@ class MaskedLanguageModelingLossNM(LossNM): @property def input_ports(self): """Returns definitions of module input ports. - - logits: - 0: AxisType(BatchTag) - - 1: AxisType(TimeTag) - - 2: AxisType(ChannelTag) - - output_ids: - 0: AxisType(BatchTag) - - 1: AxisType(TimeTag) - - output_mask: - 0: AxisType(BatchTag) - - 1: AxisType(TimeTag) """ return { - "logits": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag), 2: AxisType(ChannelTag)}), - "output_ids": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}), - "output_mask": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}), + # "logits": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag), 2: AxisType(ChannelTag)}), + # "output_ids": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}), + # "output_mask": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}), + "logits": NeuralType(('B', 'T', 'D'), LogitsType()), + "output_ids": NeuralType(('B', 'T'), ChannelType()), + "output_mask": NeuralType(('B', 'T'), ChannelType()), } @property @@ -63,7 +49,7 @@ def output_ports(self): loss: NeuralType(None) """ - return {"loss": NeuralType(None)} + return {"loss": NeuralType(elements_type=LossType())} def __init__(self, label_smoothing=0.0): LossNM.__init__(self) diff --git a/nemo/collections/nlp/nm/losses/padded_smoothed_cross_entropy_loss.py b/nemo/collections/nlp/nm/losses/padded_smoothed_cross_entropy_loss.py index a8611cc7b3ec..1564f43c40b0 100644 --- a/nemo/collections/nlp/nm/losses/padded_smoothed_cross_entropy_loss.py +++ b/nemo/collections/nlp/nm/losses/padded_smoothed_cross_entropy_loss.py @@ -17,7 +17,7 @@ from nemo.backends.pytorch import LossNM from nemo.collections.nlp.nm.losses.smoothed_cross_entropy_loss import SmoothedCrossEntropyLoss from nemo.collections.nlp.utils.common_nlp_utils import mask_padded_tokens -from nemo.core import AxisType, BatchTag, ChannelTag, NeuralType, TimeTag +from nemo.core import LabelsType, LogitsType, LossType, NeuralType __all__ = ['PaddedSmoothedCrossEntropyLossNM'] @@ -38,32 +38,20 @@ class PaddedSmoothedCrossEntropyLossNM(LossNM): @property def input_ports(self): """Returns definitions of module input ports. - - logits: - 0: AxisType(BatchTag) - - 1: AxisType(TimeTag) - - 2: AxisType(ChannelTag) - - target_ids: - 0: AxisType(BatchTag) - - 1: AxisType(TimeTag) """ return { - "logits": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag), 2: AxisType(ChannelTag)}), - "target_ids": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}), + # "logits": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag), 2: AxisType(ChannelTag)}), + # "target_ids": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}), + "logits": NeuralType(('B', 'T', 'D'), LogitsType()), + "target_ids": NeuralType(('B', 'T'), LabelsType()), } @property def output_ports(self): """Returns definitions of module output ports. - - loss: - NeuralType(None) """ - return {"loss": NeuralType(None)} + # return {"loss": NeuralType(None)} + return {"loss": NeuralType(elements_type=LossType())} def __init__(self, pad_id, label_smoothing=0, predict_last_k=0): LossNM.__init__(self) diff --git a/nemo/collections/nlp/nm/losses/qa_squad_loss.py b/nemo/collections/nlp/nm/losses/qa_squad_loss.py index 0919d7362e90..1237b9255edb 100644 --- a/nemo/collections/nlp/nm/losses/qa_squad_loss.py +++ b/nemo/collections/nlp/nm/losses/qa_squad_loss.py @@ -17,7 +17,7 @@ from torch import nn from nemo.backends.pytorch import LossNM -from nemo.core import AxisType, BatchTag, ChannelTag, NeuralType, TimeTag +from nemo.core import ChannelType, LogitsType, LossType, NeuralType __all__ = ['QuestionAnsweringLoss'] @@ -38,24 +38,14 @@ class QuestionAnsweringLoss(LossNM): @property def input_ports(self): """Returns definitions of module input ports. - - logits: - 0: AxisType(BatchTag) - - 1: AxisType(TimeTag) - - 2: AxisType(ChannelTag) - - start_positions: - 0: AxisType(BatchTag) - - end_positions: - 0: AxisType(BatchTag) """ return { - "logits": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag), 2: AxisType(ChannelTag)}), - "start_positions": NeuralType({0: AxisType(BatchTag)}), - "end_positions": NeuralType({0: AxisType(BatchTag)}), + # "logits": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag), 2: AxisType(ChannelTag)}), + # "start_positions": NeuralType({0: AxisType(BatchTag)}), + # "end_positions": NeuralType({0: AxisType(BatchTag)}), + "logits": NeuralType(('B', 'T', 'D'), LogitsType()), + "start_positions": NeuralType(tuple('B'), ChannelType()), + "end_positions": NeuralType(tuple('B'), ChannelType()), } @property @@ -76,9 +66,12 @@ def output_ports(self): 1: AxisType(TimeTag) """ return { - "loss": NeuralType(None), - "start_logits": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}), - "end_logits": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}), + # "loss": NeuralType(None), + # "start_logits": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}), + # "end_logits": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}), + "loss": NeuralType(elements_type=LossType()), + "start_logits": NeuralType(('B', 'T'), ChannelType()), + "end_logits": NeuralType(('B', 'T'), ChannelType()), } def __init__(self): diff --git a/nemo/collections/nlp/nm/losses/state_tracking_trade_loss.py b/nemo/collections/nlp/nm/losses/state_tracking_trade_loss.py index 79c94f389028..aa67439b9262 100644 --- a/nemo/collections/nlp/nm/losses/state_tracking_trade_loss.py +++ b/nemo/collections/nlp/nm/losses/state_tracking_trade_loss.py @@ -39,7 +39,7 @@ import torch from nemo.backends.pytorch.nm import LossNM -from nemo.core.neural_types import AxisType, BatchTag, ChannelTag, NeuralType, TimeTag +from nemo.core.neural_types import ChannelType, LabelsType, LengthsType, LogitsType, LossType, NeuralType __all__ = ['TRADEMaskedCrossEntropy', 'CrossEntropyLoss3D'] @@ -61,44 +61,29 @@ def input_ports(self): """Returns definitions of module input ports. logits: 4d tensor of logits - 0: AxisType(BatchTag) - - 1: AxisType(TimeTag) - - 2: AxisType(ChannelTag) - - 3: AxisType(ChannelTag) targets: 3d tensor of labels - 0: AxisType(BatchTag) - - 1: AxisType(ChannelTag) - - 2: AxisType(TimeTag) loss_mask: specifies the words to be considered in the loss calculation - 0: AxisType(BatchTag) - - 1: AxisType(ChannelTag) """ return { - "logits": NeuralType( - {0: AxisType(BatchTag), 1: AxisType(TimeTag), 2: AxisType(ChannelTag), 3: AxisType(ChannelTag)} - ), - "targets": NeuralType({0: AxisType(BatchTag), 1: AxisType(ChannelTag), 2: AxisType(TimeTag)}), - "loss_mask": NeuralType({0: AxisType(BatchTag), 1: AxisType(ChannelTag)}), + # "logits": NeuralType( + # {0: AxisType(BatchTag), 1: AxisType(TimeTag), 2: AxisType(ChannelTag), 3: AxisType(ChannelTag)} + # ), + # "targets": NeuralType({0: AxisType(BatchTag), 1: AxisType(ChannelTag), 2: AxisType(TimeTag)}), + # "loss_mask": NeuralType({0: AxisType(BatchTag), 1: AxisType(ChannelTag)}), + "logits": NeuralType(('B', 'T', 'D', 'D'), LogitsType()), + "targets": NeuralType(('B', 'D', 'T'), LabelsType()), + "loss_mask": NeuralType(('B', 'D'), LengthsType()), } @property def output_ports(self): """Returns definitions of module output ports. - - loss: loss value - NeuralType(None) - """ - return {"loss": NeuralType(None)} + # return {"loss": NeuralType(None)} + return {"loss": NeuralType(elements_type=LossType())} def __init__(self): LossNM.__init__(self) @@ -139,15 +124,18 @@ def input_ports(self): """Returns definitions of module input ports. """ return { - "logits": NeuralType({0: AxisType(BatchTag), 1: AxisType(ChannelTag), 2: AxisType(ChannelTag)}), - "labels": NeuralType({0: AxisType(BatchTag), 1: AxisType(ChannelTag)}), + # "logits": NeuralType({0: AxisType(BatchTag), 1: AxisType(ChannelTag), 2: AxisType(ChannelTag)}), + # "labels": NeuralType({0: AxisType(BatchTag), 1: AxisType(ChannelTag)}), + "logits": NeuralType(('B', 'D', 'D'), LogitsType()), + "labels": NeuralType(('B', 'D'), LabelsType()), } @property def output_ports(self): """Returns definitions of module output ports. """ - return {"loss": NeuralType(None)} + # return {"loss": NeuralType(None)} + return {"loss": NeuralType(elements_type=LossType())} def __init__(self, num_classes, **kwargs): LossNM.__init__(self, **kwargs) diff --git a/nemo/collections/nlp/nm/losses/token_classification_loss.py b/nemo/collections/nlp/nm/losses/token_classification_loss.py index edc3a136ac68..e27c74e952a3 100644 --- a/nemo/collections/nlp/nm/losses/token_classification_loss.py +++ b/nemo/collections/nlp/nm/losses/token_classification_loss.py @@ -18,7 +18,7 @@ from torch import nn from nemo.backends.pytorch import LossNM -from nemo.core import AxisType, BatchTag, ChannelTag, NeuralType, TimeTag +from nemo.core import ChannelType, LabelsType, LogitsType, LossType, NeuralType __all__ = ['TokenClassificationLoss'] @@ -38,28 +38,14 @@ class TokenClassificationLoss(LossNM): @property def input_ports(self): """Returns definitions of module input ports. - - logits: - 0: AxisType(BatchTag) - - 1: AxisType(TimeTag) - - 2: AxisType(ChannelTag) - - labels: - 0: AxisType(BatchTag) - - 1: AxisType(TimeTag) - - loss_mask: - 0: AxisType(BatchTag) - - 1: AxisType(TimeTag) """ return { - "logits": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag), 2: AxisType(ChannelTag)}), - "labels": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}), - "loss_mask": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}), + # "logits": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag), 2: AxisType(ChannelTag)}), + # "labels": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}), + # "loss_mask": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}), + "logits": NeuralType(('B', 'T', 'D'), LogitsType()), + "labels": NeuralType(('B', 'T'), LabelsType()), + "loss_mask": NeuralType(('B', 'T'), ChannelType()), } @property @@ -69,7 +55,7 @@ def output_ports(self): loss: NeuralType(None) """ - return {"loss": NeuralType(None)} + return {"loss": NeuralType(elements_type=LossType())} def __init__(self, num_classes, class_weights=None): LossNM.__init__(self) diff --git a/nemo/collections/nlp/nm/trainables/common/huggingface/bert_nm.py b/nemo/collections/nlp/nm/trainables/common/huggingface/bert_nm.py index ba41297e13b1..e51ca6b3b9d1 100644 --- a/nemo/collections/nlp/nm/trainables/common/huggingface/bert_nm.py +++ b/nemo/collections/nlp/nm/trainables/common/huggingface/bert_nm.py @@ -20,7 +20,7 @@ from nemo.backends.pytorch.nm import TrainableNM from nemo.core.neural_modules import PretrainedModelInfo -from nemo.core.neural_types import AxisType, BatchTag, ChannelTag, NeuralType, TimeTag +from nemo.core.neural_types import ChannelType, NeuralType __all__ = ['BERT'] @@ -49,40 +49,22 @@ class BERT(TrainableNM): @property def input_ports(self): """Returns definitions of module input ports. - - input_ids: - 0: AxisType(BatchTag) - - 1: AxisType(TimeTag) - - token_type_ids: - 0: AxisType(BatchTag) - - 1: AxisType(TimeTag) - - attention_mask: - 0: AxisType(BatchTag) - - 1: AxisType(TimeTag) """ return { - "input_ids": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}), - "token_type_ids": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}), - "attention_mask": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}), + # "input_ids": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}), + # "token_type_ids": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}), + # "attention_mask": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}), + "input_ids": NeuralType(('B', 'T'), ChannelType()), + "token_type_ids": NeuralType(('B', 'T'), ChannelType()), + "attention_mask": NeuralType(('B', 'T'), ChannelType()), } @property def output_ports(self): """Returns definitions of module output ports. - - hidden_states: - 0: AxisType(BatchTag) - - 1: AxisType(TimeTag) - - 2: AxisType(ChannelTag) """ - return {"hidden_states": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag), 2: AxisType(ChannelTag)})} + # return {"hidden_states": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag), 2: AxisType(ChannelTag)})} + return {"hidden_states": NeuralType(('B', 'T', 'D'), ChannelType())} def __init__( self, diff --git a/nemo/collections/nlp/nm/trainables/common/sequence_classification_nm.py b/nemo/collections/nlp/nm/trainables/common/sequence_classification_nm.py index 73d487c20fc4..60b1f2c45e7c 100644 --- a/nemo/collections/nlp/nm/trainables/common/sequence_classification_nm.py +++ b/nemo/collections/nlp/nm/trainables/common/sequence_classification_nm.py @@ -18,7 +18,7 @@ from nemo.backends.pytorch import MultiLayerPerceptron, TrainableNM from nemo.collections.nlp.nm.trainables.common.transformer.transformer_utils import transformer_weights_init -from nemo.core import AxisType, BatchTag, ChannelTag, NeuralType, TimeTag +from nemo.core import ChannelType, LogitsType, NeuralType __all__ = ['SequenceClassifier'] @@ -41,26 +41,15 @@ class SequenceClassifier(TrainableNM): @property def input_ports(self): """Returns definitions of module input ports. - - hidden_states: - 0: AxisType(BatchTag) - - 1: AxisType(TimeTag) - - 2: AxisType(ChannelTag) """ - return {"hidden_states": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag), 2: AxisType(ChannelTag)})} + return {"hidden_states": NeuralType(('B', 'T', 'D'), ChannelType())} @property def output_ports(self): """Returns definitions of module output ports. - - logits: - 0: AxisType(BatchTag) - - 1: AxisType(ChannelTag) """ - return {"logits": NeuralType({0: AxisType(BatchTag), 1: AxisType(ChannelTag)})} + # return {"logits": NeuralType({0: AxisType(BatchTag), 1: AxisType(ChannelTag)})} + return {"logits": NeuralType(('B', 'D'), LogitsType())} def __init__( self, diff --git a/nemo/collections/nlp/nm/trainables/common/sequence_regression_nm.py b/nemo/collections/nlp/nm/trainables/common/sequence_regression_nm.py index b3b1de0da00a..0989afd162ad 100644 --- a/nemo/collections/nlp/nm/trainables/common/sequence_regression_nm.py +++ b/nemo/collections/nlp/nm/trainables/common/sequence_regression_nm.py @@ -18,7 +18,7 @@ from nemo.backends.pytorch import MultiLayerPerceptron, TrainableNM from nemo.collections.nlp.nm.trainables.common.transformer.transformer_utils import transformer_weights_init -from nemo.core import AxisType, BatchTag, ChannelTag, NeuralType, RegressionTag, TimeTag +from nemo.core import ChannelType, NeuralType, RegressionValuesType __all__ = ['SequenceRegression'] @@ -39,24 +39,16 @@ class SequenceRegression(TrainableNM): @property def input_ports(self): """Returns definitions of module input ports. - - hidden_states: - 0: AxisType(BatchTag) - - 1: AxisType(TimeTag) - - 2: AxisType(ChannelTag) """ - return {"hidden_states": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag), 2: AxisType(ChannelTag)})} + # return {"hidden_states": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag), 2: AxisType(ChannelTag)})} + return {"hidden_states": NeuralType(('B', 'T', 'D'), ChannelType())} @property def output_ports(self): """Returns definitions of module output ports. - - preds: - 0: AxisType(RegressionTag) """ - return {"preds": NeuralType({0: AxisType(RegressionTag)})} + # return {"preds": NeuralType({0: AxisType(RegressionTag)})} + return {"preds": NeuralType(tuple('B'), RegressionValuesType())} def __init__(self, hidden_size, num_layers=2, activation='relu', dropout=0.0, use_transformer_pretrained=True): super().__init__() diff --git a/nemo/collections/nlp/nm/trainables/common/token_classification_nm.py b/nemo/collections/nlp/nm/trainables/common/token_classification_nm.py index e95903d764e3..1b4c879906c7 100644 --- a/nemo/collections/nlp/nm/trainables/common/token_classification_nm.py +++ b/nemo/collections/nlp/nm/trainables/common/token_classification_nm.py @@ -18,7 +18,7 @@ from nemo.backends.pytorch import MultiLayerPerceptron, TrainableNM from nemo.collections.nlp.nm.trainables.common.transformer.transformer_utils import gelu, transformer_weights_init -from nemo.core import AxisType, BatchTag, ChannelTag, NeuralType, TimeTag +from nemo.core import ChannelType, LogitsType, NeuralType __all__ = ['BertTokenClassifier', 'TokenClassifier'] @@ -42,28 +42,16 @@ class BertTokenClassifier(TrainableNM): @property def input_ports(self): """Returns definitions of module input ports. - - hidden_states: - 0: AxisType(BatchTag) - - 1: AxisType(TimeTag) - - 2: AxisType(ChannelTag) """ - return {"hidden_states": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag), 2: AxisType(ChannelTag)})} + # return {"hidden_states": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag), 2: AxisType(ChannelTag)})} + return {"hidden_states": NeuralType(('B', 'T', 'D'), ChannelType())} @property def output_ports(self): """Returns definitions of module output ports. - - logits: - 0: AxisType(BatchTag) - - 1: AxisType(TimeTag) - - 2: AxisType(ChannelTag) """ - return {"logits": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag), 2: AxisType(ChannelTag)})} + # return {"logits": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag), 2: AxisType(ChannelTag)})} + return {"logits": NeuralType(('B', 'T', 'C'), LogitsType())} def __init__( self, @@ -115,28 +103,16 @@ class TokenClassifier(TrainableNM): @property def input_ports(self): """Returns definitions of module input ports. - - hidden_states: - 0: AxisType(BatchTag) - - 1: AxisType(TimeTag) - - 2: AxisType(ChannelTag) """ - return {"hidden_states": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag), 2: AxisType(ChannelTag)})} + # return {"hidden_states": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag), 2: AxisType(ChannelTag)})} + return {"hidden_states": NeuralType(('B', 'T', 'C'), ChannelType())} @property def output_ports(self): """Returns definitions of module output ports. - - logits: - 0: AxisType(BatchTag) - - 1: AxisType(TimeTag) - - 2: AxisType(ChannelTag) """ - return {"logits": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag), 2: AxisType(ChannelTag)})} + # return {"logits": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag), 2: AxisType(ChannelTag)})} + return {"logits": NeuralType(('B', 'T', 'D'), LogitsType())} def __init__( self, diff --git a/nemo/collections/nlp/nm/trainables/common/transformer/transformer_nm.py b/nemo/collections/nlp/nm/trainables/common/transformer/transformer_nm.py index b736588a3d33..db858982adb1 100644 --- a/nemo/collections/nlp/nm/trainables/common/transformer/transformer_nm.py +++ b/nemo/collections/nlp/nm/trainables/common/transformer/transformer_nm.py @@ -14,7 +14,7 @@ ) from nemo.collections.nlp.nm.trainables.common.transformer.transformer_modules import TransformerEmbedding from nemo.collections.nlp.nm.trainables.common.transformer.transformer_utils import transformer_weights_init -from nemo.core.neural_types import * +from nemo.core.neural_types import ChannelType, NeuralType __all__ = ['TransformerEncoderNM', 'TransformerDecoderNM', 'GreedyLanguageGeneratorNM', 'BeamSearchTranslatorNM'] @@ -47,34 +47,21 @@ class TransformerEncoderNM(TrainableNM): @property def input_ports(self): """Returns definitions of module input ports. - - input_ids: - 0: AxisType(BatchTag) - - 1: AxisType(TimeTag) - - input_mask_src: - 0: AxisType(BatchTag) - - 1: AxisType(TimeTag) """ return { - "input_ids": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}), - "input_mask_src": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}), + # "input_ids": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}), + # "input_mask_src": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}), + "input_ids": NeuralType(('B', 'T'), ChannelType()), + "input_mask_src": NeuralType(('B', 'T'), ChannelType()), } @property def output_ports(self): """Returns definitions of module output ports. - hidden_states: - 0: AxisType(BatchTag) - - 1: AxisType(TimeTag) - - 2: AxisType(ChannelTag) """ - return {"hidden_states": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag), 2: AxisType(ChannelTag)})} + # return {"hidden_states": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag), 2: AxisType(ChannelTag)})} + return {"hidden_states": NeuralType(('B', 'T', 'D'), ChannelType())} def __init__( self, @@ -149,48 +136,24 @@ class TransformerDecoderNM(TrainableNM): @property def input_ports(self): """Returns definitions of module input ports. - - input_ids_tgt: - 0: AxisType(BatchTag) - - 1: AxisType(TimeTag) - - hidden_states_src: - 0: AxisType(BatchTag) - - 1: AxisType(TimeTag) - - 2: AxisType(ChannelTag) - - input_mask_src: - 0: AxisType(BatchTag) - - 1: AxisType(TimeTag) - - input_mask_tgt: - 0: AxisType(BatchTag) - - 1: AxisType(TimeTag) """ return { - "input_ids_tgt": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}), - "hidden_states_src": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag), 2: AxisType(ChannelTag)}), - "input_mask_src": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}), - "input_mask_tgt": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}), + # "input_ids_tgt": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}), + # "hidden_states_src": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag), 2: AxisType(ChannelTag)}), + # "input_mask_src": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}), + # "input_mask_tgt": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}), + "input_ids_tgt": NeuralType(('B', 'T'), ChannelType()), + "hidden_states_src": NeuralType(('B', 'T', 'D'), ChannelType()), + "input_mask_src": NeuralType(('B', 'T'), ChannelType()), + "input_mask_tgt": NeuralType(('B', 'T'), ChannelType()), } @property def output_ports(self): """Returns definitions of module output ports. - - hidden_states: - 0: AxisType(BatchTag) - - 1: AxisType(TimeTag) - - 2: AxisType(ChannelTag) """ - return {"hidden_states": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag), 2: AxisType(ChannelTag)})} + # return {"hidden_states": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag), 2: AxisType(ChannelTag)})} + return {"hidden_states": NeuralType(('B', 'T', 'D'), ChannelType())} def __init__( self, @@ -255,24 +218,16 @@ class GreedyLanguageGeneratorNM(TrainableNM): @property def input_ports(self): """Returns definitions of module input ports. - - input_ids: - 0: AxisType(BatchTag) - - 1: AxisType(TimeTag) """ - return {"input_ids": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)})} + # return {"input_ids": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)})} + return {"input_ids": NeuralType(('B', 'T'), ChannelType())} @property def output_ports(self): """Returns definitions of module output ports. - - output_ids: - 0: AxisType(BatchTag) - - 1: AxisType(TimeTag) """ - return {"output_ids": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)})} + # return {"output_ids": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)})} + return {"output_ids": NeuralType(('B', 'T'), ChannelType())} def __init__(self, decoder, log_softmax, max_seq_length, pad_token, bos_token, eos_token, batch_size=1): super().__init__() @@ -319,34 +274,20 @@ class BeamSearchTranslatorNM(TrainableNM): @property def input_ports(self): """Returns definitions of module input ports. - - hidden_states_src: - 0: AxisType(BatchTag) - - 1: AxisType(TimeTag) - - 2: AxisType(ChannelTag) - - input_mask_src: - 0: AxisType(BatchTag) - - 1: AxisType(TimeTag) """ return { - "hidden_states_src": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag), 2: AxisType(ChannelTag)}), - "input_mask_src": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}), + # "hidden_states_src": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag), 2: AxisType(ChannelTag)}), + # "input_mask_src": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}), + "hidden_states_src": NeuralType(('B', 'T', 'C'), ChannelType()), + "input_mask_src": NeuralType(('B', 'T'), ChannelType()), } @property def output_ports(self): """Returns definitions of module output ports. - - output_ids: - 0: AxisType(BatchTag) - - 1: AxisType(TimeTag) """ - return {"output_ids": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)})} + # return {"output_ids": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)})} + return {"output_ids": NeuralType(('B', 'T'), ChannelType())} @property def num_weights(self): diff --git a/nemo/collections/nlp/nm/trainables/dialogue_state_tracking/state_tracking_trade_nm.py b/nemo/collections/nlp/nm/trainables/dialogue_state_tracking/state_tracking_trade_nm.py index d58860f8b5ff..1e047542e3ba 100644 --- a/nemo/collections/nlp/nm/trainables/dialogue_state_tracking/state_tracking_trade_nm.py +++ b/nemo/collections/nlp/nm/trainables/dialogue_state_tracking/state_tracking_trade_nm.py @@ -45,7 +45,7 @@ from torch import nn as nn from nemo.backends.pytorch.nm import TrainableNM -from nemo.core.neural_types import AxisType, BatchTag, ChannelTag, NeuralType, TimeTag +from nemo.core.neural_types import ChannelType, LabelsType, LengthsType, LogitsType, NeuralType __all__ = ['TRADEGenerator'] @@ -56,41 +56,28 @@ def input_ports(self): """Returns definitions of module input ports. encoder_hidden: hidden states of the encoder - 0: AxisType(BatchTag) - - 1: AxisType(TimeTag) - - 2: AxisType(ChannelTag) encoder_outputs: outputs of the encoder - 0: AxisType(BatchTag) - - 1: AxisType(TimeTag) - - 2: AxisType(ChannelTag) input_lens: lengths of the input sequences to encoder - 0: AxisType(BatchTag) src_ids: input sequences to encoder - 0: AxisType(BatchTag) - - 1: AxisType(TimeTag) targets: targets for the output of the generator - 0: AxisType(BatchTag) - - 1: AxisType(BatchTag) - - 2: AxisType(TimeTag) """ return { - 'encoder_hidden': NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag), 2: AxisType(ChannelTag)}), - 'encoder_outputs': NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag), 2: AxisType(ChannelTag)}), - 'input_lens': NeuralType({0: AxisType(BatchTag)}), - 'src_ids': NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}), - 'targets': NeuralType({0: AxisType(BatchTag), 1: AxisType(ChannelTag), 2: AxisType(TimeTag)}), + # 'encoder_hidden': NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag), 2: AxisType(ChannelTag)}), + # 'encoder_outputs': NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag), 2: AxisType(ChannelTag)}), + # 'input_lens': NeuralType({0: AxisType(BatchTag)}), + # 'src_ids': NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}), + # 'targets': NeuralType({0: AxisType(BatchTag), 1: AxisType(ChannelTag), 2: AxisType(TimeTag)}), + 'encoder_hidden': NeuralType(('B', 'T', 'C'), ChannelType()), + 'encoder_outputs': NeuralType(('B', 'T', 'C'), ChannelType()), + 'input_lens': NeuralType(tuple('B'), LengthsType()), + 'src_ids': NeuralType(('B', 'T'), ChannelType()), + # 'targets': NeuralType(ChannelType(), ('B', 'D', 'T')), + 'targets': NeuralType(('B', 'D', 'T'), LabelsType()), } @property @@ -98,27 +85,19 @@ def output_ports(self): """Returns definitions of module output ports. point_outputs: outputs of the generator - 0: AxisType(BatchTag) - - 1: AxisType(TimeTag) - - 2: AxisType(ChannelTag) - - 3: AxisType(ChannelTag) gate_outputs: outputs of gating heads - 0: AxisType(BatchTag) - - 1: AxisType(ChannelTag) - - 2: AxisType(ChannelTag) """ + # return { + # 'point_outputs': NeuralType( + # {0: AxisType(BatchTag), 1: AxisType(TimeTag), 2: AxisType(ChannelTag), 3: AxisType(ChannelTag)} + # ), + # 'gate_outputs': NeuralType({0: AxisType(BatchTag), 1: AxisType(ChannelTag), 2: AxisType(ChannelTag)}), + # } return { - 'point_outputs': NeuralType( - {0: AxisType(BatchTag), 1: AxisType(TimeTag), 2: AxisType(ChannelTag), 3: AxisType(ChannelTag)} - ), - 'gate_outputs': NeuralType({0: AxisType(BatchTag), 1: AxisType(ChannelTag), 2: AxisType(ChannelTag)}), + 'point_outputs': NeuralType(('B', 'T', 'D', 'D'), LogitsType()), + 'gate_outputs': NeuralType(('B', 'D', 'D'), LogitsType()), } def __init__(self, vocab, embeddings, hid_size, dropout, slots, nb_gate, teacher_forcing=0.5): diff --git a/nemo/collections/nlp/nm/trainables/joint_intent_slot/joint_intent_slot_nm.py b/nemo/collections/nlp/nm/trainables/joint_intent_slot/joint_intent_slot_nm.py index a5fc02b0abcd..c906417afd6d 100644 --- a/nemo/collections/nlp/nm/trainables/joint_intent_slot/joint_intent_slot_nm.py +++ b/nemo/collections/nlp/nm/trainables/joint_intent_slot/joint_intent_slot_nm.py @@ -18,7 +18,7 @@ from nemo.backends.pytorch import MultiLayerPerceptron, TrainableNM from nemo.collections.nlp.nm.trainables.common.transformer.transformer_utils import transformer_weights_init -from nemo.core import AxisType, BatchTag, ChannelTag, NeuralType, TimeTag +from nemo.core import ChannelType, LogitsType, NeuralType __all__ = ['JointIntentSlotClassifier'] @@ -39,15 +39,9 @@ class JointIntentSlotClassifier(TrainableNM): @property def input_ports(self): """Returns definitions of module input ports. - - hidden_states: - 0: AxisType(BatchTag) - - 1: AxisType(TimeTag) - - 2: AxisType(ChannelTag) """ - return {"hidden_states": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag), 2: AxisType(ChannelTag)})} + # return {"hidden_states": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag), 2: AxisType(ChannelTag)})} + return {"hidden_states": NeuralType(('B', 'T', 'C'), ChannelType())} @property def output_ports(self): @@ -66,8 +60,10 @@ def output_ports(self): 2: AxisType(ChannelTag) """ return { - "intent_logits": NeuralType({0: AxisType(BatchTag), 1: AxisType(ChannelTag)}), - "slot_logits": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag), 2: AxisType(ChannelTag)}), + # "intent_logits": NeuralType({0: AxisType(BatchTag), 1: AxisType(ChannelTag)}), + # "slot_logits": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag), 2: AxisType(ChannelTag)}), + "intent_logits": NeuralType(('B', 'D'), LogitsType()), + "slot_logits": NeuralType(('B', 'T', 'D'), LogitsType()), } def __init__(self, hidden_size, num_intents, num_slots, dropout=0.0, use_transformer_pretrained=True, **kwargs): diff --git a/nemo/collections/simple_gan/gan.py b/nemo/collections/simple_gan/gan.py index cc28d7dad4d9..b0d39a406d64 100644 --- a/nemo/collections/simple_gan/gan.py +++ b/nemo/collections/simple_gan/gan.py @@ -4,8 +4,9 @@ from torch.utils.data import Dataset from torchvision import datasets, transforms -from nemo.backends.pytorch.nm import DataLayerNM, LossNM, NonTrainableNM, TrainableNM -from nemo.core import AxisType, BatchTag, ChannelTag, DeviceType, HeightTag, NeuralType, WidthTag +from nemo.backends.pytorch.nm import DataLayerNM, LossNM, TrainableNM +from nemo.core import DeviceType +from nemo.core.neural_types import ChannelType, LabelsType, LossType, NeuralType class SimpleDiscriminator(TrainableNM): @@ -16,37 +17,25 @@ class SimpleDiscriminator(TrainableNM): @property def input_ports(self): """Returns definitions of module input ports. - - image: - 0: AxisType(BatchTag) - - 1: AxisType(ChannelTag) - - 2: AxisType(HeightTag, 28) - - 3: AxisType(WidthTag, 28) """ return { - "image": NeuralType( - { - 0: AxisType(BatchTag), - 1: AxisType(ChannelTag), - 2: AxisType(HeightTag, 28), - 3: AxisType(WidthTag, 28), - } - ) + # "image": NeuralType( + # { + # 0: AxisType(BatchTag), + # 1: AxisType(ChannelTag), + # 2: AxisType(HeightTag, 28), + # 3: AxisType(WidthTag, 28), + # } + # ) + "image": NeuralType(('B', 'C', 'H', 'W'), ChannelType()) } @property def output_ports(self): """Returns definitions of module output ports. - - decision: - 0: AxisType(BatchTag) - - 1: AxisType(ChannelTag, 1) """ - return {"decision": NeuralType({0: AxisType(BatchTag), 1: AxisType(ChannelTag, 1)})} + # return {"decision": NeuralType({0: AxisType(BatchTag), 1: AxisType(ChannelTag, 1)})} + return {"decision": NeuralType(('B', 'C'), ChannelType())} def __init__(self): super().__init__() @@ -78,49 +67,33 @@ class SimpleGenerator(TrainableNM): @property def input_ports(self): """Returns definitions of module input ports. - - latents: - 0: AxisType(BatchTag) - - 1: AxisType(ChannelTag, 64) - - 2: AxisType(HeightTag, 4) - - 3: AxisType(WidthTag, 4) """ return { - "latents": NeuralType( - { - 0: AxisType(BatchTag), - 1: AxisType(ChannelTag, 64), - 2: AxisType(HeightTag, 4), - 3: AxisType(WidthTag, 4), - } - ) + # "latents": NeuralType( + # { + # 0: AxisType(BatchTag), + # 1: AxisType(ChannelTag, 64), + # 2: AxisType(HeightTag, 4), + # 3: AxisType(WidthTag, 4), + # } + # ) + "latents": NeuralType(('B', 'C', 'H', 'W'), ChannelType()) } @property def output_ports(self): """Returns definitions of module output ports. - - image: - 0: AxisType(BatchTag) - - 1: AxisType(ChannelTag) - - 2: AxisType(HeightTag, 28) - - 3: AxisType(WidthTag, 28) """ return { - "image": NeuralType( - { - 0: AxisType(BatchTag), - 1: AxisType(ChannelTag), - 2: AxisType(HeightTag, 28), - 3: AxisType(WidthTag, 28), - } - ) + # "image": NeuralType( + # { + # 0: AxisType(BatchTag), + # 1: AxisType(ChannelTag), + # 2: AxisType(HeightTag, 28), + # 3: AxisType(WidthTag, 28), + # } + # ) + "image": NeuralType(('B', 'C', 'H', 'W'), ChannelType()) } def __init__(self, batch_size): @@ -164,17 +137,15 @@ def input_ports(self): 1: AxisType(ChannelTag, 1) """ return { - "decision": NeuralType({0: AxisType(BatchTag), 1: AxisType(ChannelTag, 1)}), + # "decision": NeuralType({0: AxisType(BatchTag), 1: AxisType(ChannelTag, 1)}), + "decision": NeuralType(('B', 'D'), ChannelType()) } @property def output_ports(self): """Returns definitions of module output ports. - - loss: - NeuralType(None) """ - return {"loss": NeuralType(None)} + return {"loss": NeuralType(elements_type=LossType())} def __init__(self, neg=False): super().__init__() @@ -199,31 +170,19 @@ class GradientPenalty(LossNM): @property def input_ports(self): """Returns definitions of module input ports. - - interpolated_image: - 0: AxisType(BatchTag) - - 1: AxisType(ChannelTag) - - 2: AxisType(HeightTag, 28) - - 3: AxisType(WidthTag, 28) - - interpolated_decision: - 0: AxisType(BatchTag) - - 1: AxisType(ChannelTag, 1) """ return { - "interpolated_image": NeuralType( - { - 0: AxisType(BatchTag), - 1: AxisType(ChannelTag), - 2: AxisType(HeightTag, 28), - 3: AxisType(WidthTag, 28), - } - ), - "interpolated_decision": NeuralType({0: AxisType(BatchTag), 1: AxisType(ChannelTag, 1)}), + # "interpolated_image": NeuralType( + # { + # 0: AxisType(BatchTag), + # 1: AxisType(ChannelTag), + # 2: AxisType(HeightTag, 28), + # 3: AxisType(WidthTag, 28), + # } + # ), + # "interpolated_decision": NeuralType({0: AxisType(BatchTag), 1: AxisType(ChannelTag, 1)}), + "interpolated_image": NeuralType(('B', 'C', 'H', 'W'), ChannelType()), + "interpolated_decision": NeuralType(('B', 'C'), ChannelType()), } @property @@ -233,7 +192,7 @@ def output_ports(self): loss: NeuralType(None) """ - return {"loss": NeuralType(None)} + return {"loss": NeuralType(elements_type=LossType())} def __init__(self, lambda_): super().__init__() @@ -268,66 +227,42 @@ class InterpolateImage(TrainableNM): @property def input_ports(self): """Returns definitions of module input ports. - - image1: - 0: AxisType(BatchTag) - - 1: AxisType(ChannelTag) - - 2: AxisType(HeightTag, 28) - - 3: AxisType(WidthTag, 28) - - image2: - 0: AxisType(BatchTag) - - 1: AxisType(ChannelTag) - - 2: AxisType(HeightTag, 28) - - 3: AxisType(WidthTag, 28) """ return { - "image1": NeuralType( - { - 0: AxisType(BatchTag), - 1: AxisType(ChannelTag), - 2: AxisType(HeightTag, 28), - 3: AxisType(WidthTag, 28), - } - ), - "image2": NeuralType( - { - 0: AxisType(BatchTag), - 1: AxisType(ChannelTag), - 2: AxisType(HeightTag, 28), - 3: AxisType(WidthTag, 28), - } - ), + # "image1": NeuralType( + # { + # 0: AxisType(BatchTag), + # 1: AxisType(ChannelTag), + # 2: AxisType(HeightTag, 28), + # 3: AxisType(WidthTag, 28), + # } + # ), + # "image2": NeuralType( + # { + # 0: AxisType(BatchTag), + # 1: AxisType(ChannelTag), + # 2: AxisType(HeightTag, 28), + # 3: AxisType(WidthTag, 28), + # } + # ), + "image1": NeuralType(('B', 'C', 'H', 'W'), ChannelType()), + "image2": NeuralType(('B', 'C', 'H', 'W'), ChannelType()), } @property def output_ports(self): """Returns definitions of module output ports. - - interpolated_image: - 0: AxisType(BatchTag) - - 1: AxisType(ChannelTag) - - 2: AxisType(HeightTag, 28) - - 3: AxisType(WidthTag, 28) """ return { - "interpolated_image": NeuralType( - { - 0: AxisType(BatchTag), - 1: AxisType(ChannelTag), - 2: AxisType(HeightTag, 28), - 3: AxisType(WidthTag, 28), - } - ) + # "interpolated_image": NeuralType( + # { + # 0: AxisType(BatchTag), + # 1: AxisType(ChannelTag), + # 2: AxisType(HeightTag, 28), + # 3: AxisType(WidthTag, 28), + # } + # ) + "interpolated_image": NeuralType(('B', 'C', 'H', 'W'), ChannelType()) } def __init__(self): @@ -364,14 +299,15 @@ def output_ports(self): 3: AxisType(WidthTag, 4) """ return { - "latent": NeuralType( - { - 0: AxisType(BatchTag), - 1: AxisType(ChannelTag, 64), - 2: AxisType(HeightTag, 4), - 3: AxisType(WidthTag, 4), - } - ) + # "latent": NeuralType( + # { + # 0: AxisType(BatchTag), + # 1: AxisType(ChannelTag, 64), + # 2: AxisType(HeightTag, 4), + # 3: AxisType(WidthTag, 4), + # } + # ) + "latent": NeuralType(('B', 'C', 'H', 'W'), ChannelType()) } def __init__(self, batch_size): @@ -417,46 +353,28 @@ class MnistGanDataLayer(DataLayerNM): @property def output_ports(self): """Returns definitions of module output ports. - - latent: - 0: AxisType(BatchTag) - - 1: AxisType(ChannelTag, 64) - - 2: AxisType(HeightTag, 4) - - 3: AxisType(WidthTag, 4) - - image: - 0: AxisType(BatchTag) - - 1: AxisType(ChannelTag) - - 2: AxisType(HeightTag, user defined) - - 3: AxisType(WidthTag, user defined) - - label: - 0: AxisType(BatchTag) """ return { - "latent": NeuralType( - { - 0: AxisType(BatchTag), - 1: AxisType(ChannelTag, 64), - 2: AxisType(HeightTag, 4), - 3: AxisType(WidthTag, 4), - } - ), - "image": NeuralType( - { - 0: AxisType(BatchTag), - 1: AxisType(ChannelTag), - 2: AxisType(HeightTag, self._input_size[1]), - 3: AxisType(WidthTag, self._input_size[0]), - } - ), - "label": NeuralType({0: AxisType(BatchTag)}), + # "latent": NeuralType( + # { + # 0: AxisType(BatchTag), + # 1: AxisType(ChannelTag, 64), + # 2: AxisType(HeightTag, 4), + # 3: AxisType(WidthTag, 4), + # } + # ), + # "image": NeuralType( + # { + # 0: AxisType(BatchTag), + # 1: AxisType(ChannelTag), + # 2: AxisType(HeightTag, self._input_size[1]), + # 3: AxisType(WidthTag, self._input_size[0]), + # } + # ), + # "label": NeuralType({0: AxisType(BatchTag)}), + "latent": NeuralType(('B', 'C', 'H', 'W'), ChannelType()), + "image": NeuralType(('B', 'C', 'H', 'W'), ChannelType()), + "label": NeuralType(tuple('B'), LabelsType()), } def __init__(self, batch_size, root, train=True, shuffle=True): diff --git a/nemo/collections/tts/data_layers.py b/nemo/collections/tts/data_layers.py index cad859fb10cb..ffebe99e3df9 100644 --- a/nemo/collections/tts/data_layers.py +++ b/nemo/collections/tts/data_layers.py @@ -5,7 +5,7 @@ from .parts.datasets import AudioOnlyDataset from nemo.backends.pytorch.nm import DataLayerNM from nemo.core import DeviceType -from nemo.core.neural_types import * +from nemo.core.neural_types import AudioSignal, LengthsType, NeuralType class AudioDataLayer(DataLayerNM): @@ -48,18 +48,12 @@ class AudioDataLayer(DataLayerNM): @property def output_ports(self): """Returns definitions of module output ports. - - audio_signal: - 0: AxisType(BatchTag) - - 1: AxisType(TimeTag) - - a_sig_length: - 0: AxisType(BatchTag) """ return { - "audio_signal": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}), - "a_sig_length": NeuralType({0: AxisType(BatchTag)}), + # "audio_signal": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}), + # "a_sig_length": NeuralType({0: AxisType(BatchTag)}), + "audio_signal": NeuralType(('B', 'T'), AudioSignal()), + "a_sig_length": NeuralType(tuple('B'), LengthsType()), } def __init__( diff --git a/nemo/collections/tts/tacotron2_modules.py b/nemo/collections/tts/tacotron2_modules.py index 0613311d3dc4..083ac4697526 100644 --- a/nemo/collections/tts/tacotron2_modules.py +++ b/nemo/collections/tts/tacotron2_modules.py @@ -35,29 +35,19 @@ class TextEmbedding(TrainableNM): @property def input_ports(self): """Returns definitions of module input ports. - - char_phone - 0: AxisType(BatchTag) - - 1: AxisType(TimeTag) """ - return {"char_phone": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)})} + # return {"char_phone": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)})} + return {"char_phone": NeuralType(('B', 'T'), LabelsType())} @property def output_ports(self): """Returns definitions of module output ports. - - char_phone_embeddings: - 0: AxisType(BatchTag) - - 1: AxisType(EmbeddedTextTag) - - 2: AxisType(TimeTag)}) """ return { - "char_phone_embeddings": NeuralType( - {0: AxisType(BatchTag), 1: AxisType(EmbeddedTextTag), 2: AxisType(TimeTag),} - ) + # "char_phone_embeddings": NeuralType( + # {0: AxisType(BatchTag), 1: AxisType(EmbeddedTextTag), 2: AxisType(TimeTag),} + # ) + "char_phone_embeddings": NeuralType(('B', 'D', 'T'), EmbeddedTextType()) } def __init__(self, n_symbols, symbols_embedding_dim: int = 512): @@ -87,39 +77,25 @@ class Tacotron2Encoder(TrainableNM): @property def input_ports(self): """Returns definitions of module input ports. - - char_phone_embeddings: - 0: AxisType(BatchTag) - - 1: AxisType(EmbeddedTextTag) - - 2: AxisType(TimeTag) - - embedding_length: - 0: AxisType(BatchTag) """ return { - "char_phone_embeddings": NeuralType( - {0: AxisType(BatchTag), 1: AxisType(EmbeddedTextTag), 2: AxisType(TimeTag),} - ), - "embedding_length": NeuralType({0: AxisType(BatchTag)}), + # "char_phone_embeddings": NeuralType( + # {0: AxisType(BatchTag), 1: AxisType(EmbeddedTextTag), 2: AxisType(TimeTag),} + # ), + # "embedding_length": NeuralType({0: AxisType(BatchTag)}), + "char_phone_embeddings": NeuralType(('B', 'D', 'T'), EmbeddedTextType()), + "embedding_length": NeuralType(tuple('B'), LengthsType()), } @property def output_ports(self): """Returns definitions of module output ports. - - char_phone_embeddings: - 0: AxisType(BatchTag) - - 1: AxisType(TimeTag) - - 2: AxisType(EncodedRepresentationTag)}) """ return { - "char_phone_encoded": NeuralType( - {0: AxisType(BatchTag), 1: AxisType(TimeTag), 2: AxisType(EncodedRepresentationTag),} - ) + # "char_phone_encoded": NeuralType( + # {0: AxisType(BatchTag), 1: AxisType(TimeTag), 2: AxisType(EncodedRepresentationTag),} + # ) + "char_phone_encoded": NeuralType(('B', 'T', 'D'), EncodedRepresentation()) } def __init__( @@ -179,63 +155,33 @@ class Tacotron2Decoder(TrainableNM): @property def input_ports(self): """Returns definitions of module input ports. - - char_phone_encoded: - 0: AxisType(BatchTag) - - 1: AxisType(TimeTag) - - 2: AxisType(EncodedRepresentationTag) - - encoded_length: - 0: AxisType(BatchTag) - - mel_target: - 0: AxisType(BatchTag) - - 1: AxisType(MelSpectrogramSignalTag) - - 2: AxisType(TimeTag) """ return { - "char_phone_encoded": NeuralType( - {0: AxisType(BatchTag), 1: AxisType(TimeTag), 2: AxisType(EncodedRepresentationTag),} - ), - "encoded_length": NeuralType({0: AxisType(BatchTag)}), - "mel_target": NeuralType( - {0: AxisType(BatchTag), 1: AxisType(MelSpectrogramSignalTag), 2: AxisType(TimeTag),} - ), + # "char_phone_encoded": NeuralType( + # {0: AxisType(BatchTag), 1: AxisType(TimeTag), 2: AxisType(EncodedRepresentationTag),} + # ), + # "encoded_length": NeuralType({0: AxisType(BatchTag)}), + # "mel_target": NeuralType( + # {0: AxisType(BatchTag), 1: AxisType(MelSpectrogramSignalTag), 2: AxisType(TimeTag),} + # ), + "char_phone_encoded": NeuralType(('B', 'T', 'D'), EncodedRepresentation()), + "encoded_length": NeuralType(tuple('B'), LengthsType()), + "mel_target": NeuralType(('B', 'D', 'T'), MelSpectrogramType()), } @property def output_ports(self): """Returns definitions of module output ports. - - mel_output: - 0: AxisType(BatchTag) - - 1: AxisType(MelSpectrogramSignalTag) - - 2: AxisType(TimeTag) - - gate_output: - 0: AxisType(BatchTag) - - 1: AxisType(TimeTag) - - alignments: - 0: AxisType(BatchTag) - - 1: AxisType(TimeTag) - - 2: AxisType(TimeTag) """ return { - "mel_output": NeuralType( - {0: AxisType(BatchTag), 1: AxisType(MelSpectrogramSignalTag), 2: AxisType(TimeTag),} - ), - "gate_output": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}), - "alignments": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag), 2: AxisType(TimeTag),}), + # "mel_output": NeuralType( + # {0: AxisType(BatchTag), 1: AxisType(MelSpectrogramSignalTag), 2: AxisType(TimeTag),} + # ), + # "gate_output": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}), + # "alignments": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag), 2: AxisType(TimeTag),}), + "mel_output": NeuralType(('B', 'D', 'T'), MelSpectrogramType()), + "gate_output": NeuralType(('B', 'T'), ChannelType()), + "alignments": NeuralType(('B', 'T', 'T'), ChannelType()), } def __init__( @@ -326,57 +272,31 @@ class Tacotron2DecoderInfer(Tacotron2Decoder): @property def input_ports(self): """Returns definitions of module input ports. - - char_phone_encoded: - 0: AxisType(BatchTag) - - 1: AxisType(TimeTag) - - 2: AxisType(EncodedRepresentationTag) - - encoded_length: - 0: AxisType(BatchTag) """ return { - "char_phone_encoded": NeuralType( - {0: AxisType(BatchTag), 1: AxisType(TimeTag), 2: AxisType(EncodedRepresentationTag),} - ), - "encoded_length": NeuralType({0: AxisType(BatchTag)}), + # "char_phone_encoded": NeuralType( + # {0: AxisType(BatchTag), 1: AxisType(TimeTag), 2: AxisType(EncodedRepresentationTag),} + # ), + # "encoded_length": NeuralType({0: AxisType(BatchTag)}), + "char_phone_encoded": NeuralType(('B', 'T', 'D'), EncodedRepresentation()), + "encoded_length": NeuralType(tuple('B'), LengthsType()), } @property def output_ports(self): """Returns definitions of module output ports. - - mel_output: - 0: AxisType(BatchTag) - - 1: AxisType(MelSpectrogramSignalTag) - - 2: AxisType(TimeTag) - - gate_output: - 0: AxisType(BatchTag) - - 1: AxisType(TimeTag) - - alignments: - 0: AxisType(BatchTag) - - 1: AxisType(TimeTag) - - 2: AxisType(TimeTag) - - mel_len: - 0: AxisType(BatchTag) """ return { - "mel_output": NeuralType( - {0: AxisType(BatchTag), 1: AxisType(MelSpectrogramSignalTag), 2: AxisType(TimeTag),} - ), - "gate_output": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}), - "alignments": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag), 2: AxisType(TimeTag),}), - "mel_len": NeuralType({0: AxisType(BatchTag)}), + # "mel_output": NeuralType( + # {0: AxisType(BatchTag), 1: AxisType(MelSpectrogramSignalTag), 2: AxisType(TimeTag),} + # ), + # "gate_output": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}), + # "alignments": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag), 2: AxisType(TimeTag),}), + # "mel_len": NeuralType({0: AxisType(BatchTag)}), + "mel_output": NeuralType(('B', 'D', 'T'), MelSpectrogramType()), + "gate_output": NeuralType(('B', 'T'), ChannelType()), + "alignments": NeuralType(('B', 'T', 'T'), ChannelType()), + "mel_len": NeuralType(tuple('B'), LengthsType()), } def __str__(self): @@ -411,35 +331,23 @@ class Tacotron2Postnet(TrainableNM): @property def input_ports(self): """Returns definitions of module input ports. - - mel_input: - 0: AxisType(BatchTag) - - 1: AxisType(MelSpectrogramSignalTag) - - 2: AxisType(TimeTag) """ return { - "mel_input": NeuralType( - {0: AxisType(BatchTag), 1: AxisType(MelSpectrogramSignalTag), 2: AxisType(TimeTag),} - ) + # "mel_input": NeuralType( + # {0: AxisType(BatchTag), 1: AxisType(MelSpectrogramSignalTag), 2: AxisType(TimeTag),} + # ) + "mel_input": NeuralType(('B', 'D', 'T'), MelSpectrogramType()) } @property def output_ports(self): """Returns definitions of module output ports. - - mel_output: - 0: AxisType(BatchTag) - - 1: AxisType(MelSpectrogramSignalTag) - - 2: AxisType(TimeTag) """ return { - "mel_output": NeuralType( - {0: AxisType(BatchTag), 1: AxisType(MelSpectrogramSignalTag), 2: AxisType(TimeTag),} - ), + # "mel_output": NeuralType( + # {0: AxisType(BatchTag), 1: AxisType(MelSpectrogramSignalTag), 2: AxisType(TimeTag),} + # ), + "mel_output": NeuralType(('B', 'D', 'T'), MelSpectrogramType()) } def __init__( @@ -482,68 +390,35 @@ class Tacotron2Loss(LossNM): @property def input_ports(self): """Returns definitions of module input ports. - - mel_out: - 0: AxisType(BatchTag) - - 1: AxisType(MelSpectrogramSignalTag) - - 2: AxisType(TimeTag) - - mel_out_postnet: - 0: AxisType(BatchTag) - - 1: AxisType(MelSpectrogramSignalTag) - - 2: AxisType(TimeTag) - - gate_out: - 0: AxisType(BatchTag) - - 1: AxisType(TimeTag) - - mel_target: - 0: AxisType(BatchTag) - - 1: AxisType(MelSpectrogramSignalTag) - - 2: AxisType(TimeTag) - - gate_target: - 0: AxisType(BatchTag) - - 1: AxisType(TimeTag) - - target_len: - 0: AxisType(BatchTag) - - seq_len: - 0: AxisType(BatchTag) """ return { - "mel_out": NeuralType( - {0: AxisType(BatchTag), 1: AxisType(MelSpectrogramSignalTag), 2: AxisType(TimeTag),} - ), - "mel_out_postnet": NeuralType( - {0: AxisType(BatchTag), 1: AxisType(MelSpectrogramSignalTag), 2: AxisType(TimeTag),} - ), - "gate_out": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}), - "mel_target": NeuralType( - {0: AxisType(BatchTag), 1: AxisType(MelSpectrogramSignalTag), 2: AxisType(TimeTag),} - ), - "gate_target": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}), - "target_len": NeuralType({0: AxisType(BatchTag)}), - "seq_len": NeuralType({0: AxisType(BatchTag)}), + # "mel_out": NeuralType( + # {0: AxisType(BatchTag), 1: AxisType(MelSpectrogramSignalTag), 2: AxisType(TimeTag),} + # ), + # "mel_out_postnet": NeuralType( + # {0: AxisType(BatchTag), 1: AxisType(MelSpectrogramSignalTag), 2: AxisType(TimeTag),} + # ), + # "gate_out": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}), + # "mel_target": NeuralType( + # {0: AxisType(BatchTag), 1: AxisType(MelSpectrogramSignalTag), 2: AxisType(TimeTag),} + # ), + # "gate_target": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}), + # "target_len": NeuralType({0: AxisType(BatchTag)}), + # "seq_len": NeuralType({0: AxisType(BatchTag)}), + "mel_out": NeuralType(('B', 'D', 'T'), MelSpectrogramType()), + "mel_out_postnet": NeuralType(('B', 'D', 'T'), MelSpectrogramType()), + "gate_out": NeuralType(('B', 'T'), ChannelType()), + "mel_target": NeuralType(('B', 'D', 'T'), MelSpectrogramType()), + "gate_target": NeuralType(('B', 'T'), ChannelType()), + "target_len": NeuralType(tuple('B'), LengthsType()), + "seq_len": NeuralType(tuple('B'), LengthsType()), } @property def output_ports(self): """Returns definitions of module output ports. - - loss: - NeuralType(None) """ - return {"loss": NeuralType(None)} + return {"loss": NeuralType(elements_type=LossType())} def __init__(self, pad_value: float = -11.52): super().__init__() @@ -595,34 +470,22 @@ class MakeGate(NonTrainableNM): @property def input_ports(self): """Returns definitions of module input ports. - - target_len: - 0: AxisType(BatchTag) - - mel_target: - 0: AxisType(BatchTag) - - 1: AxisType(MelSpectrogramSignalTag) - - 2: AxisType(TimeTag) """ return { - "target_len": NeuralType({0: AxisType(BatchTag)}), - "mel_target": NeuralType( - {0: AxisType(BatchTag), 1: AxisType(MelSpectrogramSignalTag), 2: AxisType(TimeTag),} - ), + # "target_len": NeuralType({0: AxisType(BatchTag)}), + # "mel_target": NeuralType( + # {0: AxisType(BatchTag), 1: AxisType(MelSpectrogramSignalTag), 2: AxisType(TimeTag),} + # ), + "target_len": NeuralType(tuple('B'), LengthsType()), + "mel_target": NeuralType(('B', 'D', 'T'), MelSpectrogramType()), } @property def output_ports(self): """Returns definitions of module output ports. - - gate_target: - 0: AxisType(BatchTag) - - 1: AxisType(TimeTag) """ - return {"gate_target": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)})} + # return {"gate_target": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)})} + return {"gate_target": NeuralType(('B', 'T'), ChannelType())} def forward(self, target_len, mel_target): max_len = mel_target.shape[2] diff --git a/nemo/collections/tts/waveglow_modules.py b/nemo/collections/tts/waveglow_modules.py index 5e13ae73faf9..1acffdb59d73 100644 --- a/nemo/collections/tts/waveglow_modules.py +++ b/nemo/collections/tts/waveglow_modules.py @@ -41,47 +41,28 @@ class WaveGlowNM(TrainableNM): @property def input_ports(self): """Returns definitions of module input ports. - - mel_spectrogram: - 0: AxisType(BatchTag) - - 1: AxisType(MelSpectrogramSignalTag) - - 2: AxisType(TimeTag) - - audio: - 0: AxisType(BatchTag) - - 1: AxisType(TimeTag) """ return { - "mel_spectrogram": NeuralType( - {0: AxisType(BatchTag), 1: AxisType(MelSpectrogramSignalTag), 2: AxisType(TimeTag),} - ), - "audio": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}), + # "mel_spectrogram": NeuralType( + # {0: AxisType(BatchTag), 1: AxisType(MelSpectrogramSignalTag), 2: AxisType(TimeTag),} + # ), + # "audio": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}), + "mel_spectrogram": NeuralType(('B', 'D', 'T'), MelSpectrogramType()), + "audio": NeuralType(('B', 'T'), AudioSignal()), } @property def output_ports(self): """Returns definitions of module output ports. - - audio: - 0: AxisType(BatchTag) - - 1: AxisType(TimeTag) - - log_s_list: - List? - - log_det_W_list: - List? - """ # TODO @blisc: please take a look at those definitions return { - "audio": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}), - "log_s_list": NeuralType(), - "log_det_W_list": NeuralType(), + # "audio": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}), + # "log_s_list": NeuralType(), + # "log_det_W_list": NeuralType(), + "audio": NeuralType(('B', 'T'), AudioSignal()), + "log_s_list": NeuralType(elements_type=ChannelType()), + "log_det_W_list": NeuralType(elements_type=ChannelType()), } def __init__( @@ -157,30 +138,20 @@ class WaveGlowInferNM(WaveGlowNM): @property def input_ports(self): """Returns definitions of module input ports. - - mel_spectrogram: - 0: AxisType(BatchTag) - - 1: AxisType(MelSpectrogramSignalTag) - - 2: AxisType(TimeTag) """ return { - "mel_spectrogram": NeuralType( - {0: AxisType(BatchTag), 1: AxisType(MelSpectrogramSignalTag), 2: AxisType(TimeTag),} - ) + # "mel_spectrogram": NeuralType( + # {0: AxisType(BatchTag), 1: AxisType(MelSpectrogramSignalTag), 2: AxisType(TimeTag),} + # ) + "mel_spectrogram": NeuralType(('B', 'D', 'T'), MelSpectrogramType()) } @property def output_ports(self): """Returns definitions of module output ports. - - audio: - 0: AxisType(BatchTag) - - 1: AxisType(TimeTag) """ - return {"audio": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)})} + # return {"audio": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)})} + return {"audio": NeuralType(('B', 'T'), AudioSignal())} def __str__(self): return "WaveGlowNM" @@ -256,33 +227,22 @@ class WaveGlowLoss(LossNM): @property def input_ports(self): """Returns definitions of module input ports. - - z: - 0: AxisType(BatchTag) - - 1: AxisType(TimeTag) - - log_s_list: - List? - - log_det_W_list: - List? """ # TODO @blisc: please take a look at those definitions return { - "z": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}), - "log_s_list": NeuralType(), - "log_det_W_list": NeuralType(), + # "z": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}), + # "log_s_list": NeuralType(), + # "log_det_W_list": NeuralType(), + "z": NeuralType(('B', 'T'), AudioSignal()), + "log_s_list": NeuralType(elements_type=ChannelType()), + "log_det_W_list": NeuralType(elements_type=ChannelType()), } @property def output_ports(self): """Returns definitions of module output ports. - - loss: - NeuralType(None) """ - return {"loss": NeuralType(None)} + return {"loss": NeuralType(elements_type=LossType())} def __init__(self, sigma: float = 1.0): super().__init__() diff --git a/nemo/core/__init__.py b/nemo/core/__init__.py index 7b13691e476a..e48567b139a6 100644 --- a/nemo/core/__init__.py +++ b/nemo/core/__init__.py @@ -1,4 +1,20 @@ -# Copyright (c) 2019 NVIDIA Corporation +# ! /usr/bin/python +# -*- coding: utf-8 -*- + +# Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + from .callbacks import * from .neural_factory import * from .neural_modules import * diff --git a/nemo/core/callbacks.py b/nemo/core/callbacks.py index 4f6c94ba01dc..1ebf3675e270 100644 --- a/nemo/core/callbacks.py +++ b/nemo/core/callbacks.py @@ -1,4 +1,20 @@ -# Copyright (c) 2019 NVIDIA Corporation +# ! /usr/bin/python +# -*- coding: utf-8 -*- + +# Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + import glob import os import sys diff --git a/nemo/core/neural_factory.py b/nemo/core/neural_factory.py index ede5195b3909..0692ea46095c 100644 --- a/nemo/core/neural_factory.py +++ b/nemo/core/neural_factory.py @@ -1,4 +1,20 @@ -# Copyright (c) 2019 NVIDIA Corporation +# ! /usr/bin/python +# -*- coding: utf-8 -*- + +# Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + __all__ = [ 'Backend', 'ModelMode', diff --git a/nemo/core/neural_modules.py b/nemo/core/neural_modules.py index 609d3c567a30..25e42c7824fa 100644 --- a/nemo/core/neural_modules.py +++ b/nemo/core/neural_modules.py @@ -1,4 +1,20 @@ -# Copyright (c) 2019 NVIDIA Corporation +# ! /usr/bin/python +# -*- coding: utf-8 -*- + +# Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + """This file contains NeuralModule and NmTensor classes.""" __all__ = ['WeightShareTransform', 'NeuralModule'] @@ -134,7 +150,7 @@ def _validate_params(self, params): def __is_of_allowed_type(self, var): """ - A recursive function that checks if a given variable is allowed (in) + A recursive function that checks if a given variable is allowed (in) Args: pretrained_model_name (str): name of pretrained model to use in order. @@ -211,10 +227,6 @@ def __call__(self, **kwargs): Returns: NmTensor object or tuple of NmTensor objects """ - # if self._assigned_top_order is not None: - # raise ValueError("We currently do not support calling same NM" - # "more than once") - # Get input and output ports definitions. input_port_defs = self.input_ports output_port_defs = self.output_ports @@ -222,23 +234,15 @@ def __call__(self, **kwargs): first_input_nmtensor_type = None input_nmtensors_are_of_same_type = True for port_name, tgv in kwargs.items(): + # make sure that passed arguments correspond to input port names if port_name not in input_port_defs.keys(): raise NeuralPortNameMismatchError("Wrong input port name: {0}".format(port_name)) - type_comatibility = input_port_defs[port_name].compare(tgv) - - if first_input_nmtensor_type is None: - first_input_nmtensor_type = NeuralType(tgv._axis2type) - else: - if first_input_nmtensor_type._axis2type is None: - input_nmtensors_are_of_same_type = True - else: - input_nmtensors_are_of_same_type = first_input_nmtensor_type.compare( - tgv - ) == NeuralTypeComparisonResult.SAME and len(first_input_nmtensor_type._axis2type) - if not ( - type_comatibility == NeuralTypeComparisonResult.SAME - or type_comatibility == NeuralTypeComparisonResult.GREATER + input_port = input_port_defs[port_name] + type_comatibility = input_port.compare(tgv) + if ( + type_comatibility != NeuralTypeComparisonResult.SAME + and type_comatibility != NeuralTypeComparisonResult.GREATER ): raise NeuralPortNmTensorMismatchError( "\n\nIn {0}. \n" @@ -248,8 +252,30 @@ def __call__(self, **kwargs): self.__class__.__name__, port_name, input_port_defs[port_name], tgv, type_comatibility, ) ) - if type_comatibility == NeuralTypeComparisonResult.LESS: - logging.info('Types were raised') + + # if first_input_nmtensor_type is None: + # first_input_nmtensor_type = NeuralType(tgv._axis2type) + # else: + # if first_input_nmtensor_type._axis2type is None: + # input_nmtensors_are_of_same_type = True + # else: + # input_nmtensors_are_of_same_type = first_input_nmtensor_type.compare( + # tgv + # ) == NeuralTypeComparisonResult.SAME and len(first_input_nmtensor_type._axis2type) + # if not ( + # type_comatibility == NeuralTypeComparisonResult.SAME + # or type_comatibility == NeuralTypeComparisonResult.GREATER + # ): + # raise NeuralPortNmTensorMismatchError( + # "\n\nIn {0}. \n" + # "Port: {1} and a NmTensor it was fed are \n" + # "of incompatible neural types:\n\n{2} \n\n and \n\n{3}" + # "\n\nType comparison result: {4}".format( + # self.__class__.__name__, port_name, input_port_defs[port_name], tgv, type_comatibility, + # ) + # ) + # if type_comatibility == NeuralTypeComparisonResult.LESS: + # print('Types were raised') if len(output_port_defs) == 1: out_name = list(output_port_defs)[0] diff --git a/nemo/core/neural_types.py b/nemo/core/neural_types.py deleted file mode 100644 index 38b606fc5b9e..000000000000 --- a/nemo/core/neural_types.py +++ /dev/null @@ -1,410 +0,0 @@ -# Copyright (c) 2019 NVIDIA Corporation -"""This module contains Tags, AxisTypes, NeuralTypes and NmTensors. -Every NmTensor is of a particular Neural Type. -Neural Modules' input and output ports are also of Neural Type. - -An exception will be raised when a NmTensor and input port where it goes are -of incompatible types. -""" -__all__ = [ - 'BaseTag', - 'BatchTag', - 'TimeTag', - 'ProcessedTimeTag', - 'ChannelTag', - 'EmbeddedTextTag', - 'SpectrogramSignalTag', - 'MelSpectrogramSignalTag', - 'MFCCSignalTag', - 'EncodedRepresentationTag', - 'ClassTag', - 'WidthTag', - 'HeightTag', - 'CategoricalTag', - 'RegressionTag', - 'NeuralTypeComparisonResult', - 'AxisType', - 'NeuralType', - 'NmTensor', - 'NeuralTypeError', - 'NeuralPortNameMismatchError', - 'NeuralPortNmTensorMismatchError', - 'CanNotInferResultNeuralType', -] - -import uuid -from enum import Enum - - -class BaseTag(object): - """Base Neural Tag. All Tags should inherit from this.""" - - def __str__(self): - return "base" - - -class BatchTag(BaseTag): - """Tag for batch dimension.""" - - def __str__(self): - return "batch" - - -class TimeTag(BaseTag): - """Tag for time dimension.""" - - def __str__(self): - return "time" - - -class ProcessedTimeTag(TimeTag): - """Tag for processed time dimension. - For example: after pre-processing, or augmentation.""" - - def __str__(self): - return "processed_time" - - -class ChannelTag(BaseTag): - """Tag for channel dimension.""" - - def __str__(self): - return "channel" - - -class EmbeddedTextTag(ChannelTag): - """Tag for any dimensions that contains text that goes through an - enbedding layer.""" - - def __str__(self): - return "embedded_text" - - -class SpectrogramSignalTag(ChannelTag): - """Tag for spectrogram signal dimension.""" - - def __str__(self): - return "spectrogram_signal" - - -class MelSpectrogramSignalTag(SpectrogramSignalTag): - """Tag for mel spectrogram signal dimension.""" - - def __str__(self): - return "mel_spectrogram_signal" - - -class MFCCSignalTag(SpectrogramSignalTag): - """Tag for MFCC signal dimension.""" - - def __str__(self): - return "mfcc_signal" - - -class EncodedRepresentationTag(ChannelTag): - """Tag for encoded representation. This should be used to - denote encoders' outputs.""" - - def __str__(self): - return "encoded_representation" - - -class ClassTag(BaseTag): - """Tag for class dimension. - For example, number of classes in classification problem, - vocabuary size or num of characters for ASR.""" - - def __str__(self): - return "channel" - - -class WidthTag(BaseTag): - """Tag for width dimension.""" - - def __str__(self): - return "width" - - -class HeightTag(BaseTag): - """Tag for width dimension.""" - - def __str__(self): - return "height" - - -class CategoricalTag(BatchTag): - """Tag for labels for classification tasks.""" - - def __str__(self): - return "category" - - -class RegressionTag(BatchTag): - """Tag for labels for regression tasks. - For example, this should be used in STS-B task, where labels - represent semantic semilarity score (float)""" - - def __str__(self): - return "regression" - - -class NeuralTypeComparisonResult(Enum): - """The result of comparing two neural type objects for compatibility. - When comparing A.compare_to(B):""" - - SAME = 0 - LESS = 1 # A is B - GREATER = 2 # B is A - DIM_INCOMPATIBLE = 3 # Resize connector might fix incompatibility - TRANSPOSE_SAME = 4 # A transpose will make them same - INCOMPATIBLE = 5 # A and B are incompatible. Can't fix incompatibility automatically - - -class AxisType(object): - """Every tensor's axis has semantics, dimension and descriptor. - It's semantics is a Neural Tag (inherited from BaseTag) - dimension (dim) is (optional) int and descriptor is (optional) string""" - - def __init__(self, semantics, dim: int = None, descriptor: str = None): - self._semantics = semantics - self._dim = dim - self._descriptor = descriptor - - def __eq__(self, other): - return self.semantics == other.semantics and self.dim == other.dim and self.descriptor == other.descriptor - - def __str__(self): - return "{0}:{1}:{2}".format(self.semantics, self.dim, self.descriptor) - - def __hash__(self): - return hash(self.__str__()) - - def compare_to(self, other): - """ - Compares current AxisType object to other - - Args: - other (AxisType): other AxisType object to compare with - - Returns: - Results of a comparison (NeuralTypeComparisonResult) - """ - if (self.dim is None or self.dim == other.dim) and self.descriptor == other.descriptor: - if self.semantics == other.semantics: - return NeuralTypeComparisonResult.SAME - elif issubclass(self.semantics, other.semantics): - return NeuralTypeComparisonResult.LESS - elif issubclass(other.semantics, self.semantics): - return NeuralTypeComparisonResult.GREATER - else: - return NeuralTypeComparisonResult.INCOMPATIBLE - elif self.descriptor == other.descriptor and self.semantics == other.semantics: - return NeuralTypeComparisonResult.DIM_INCOMPATIBLE - else: - return NeuralTypeComparisonResult.INCOMPATIBLE - - @property - def semantics(self): - return self._semantics - - @property - def dim(self): - return self._dim - - @property - def descriptor(self): - return self._descriptor - - -class NeuralType(object): - """Neural Type: a type for NmTensor. - - Note: This type mechanism is represented by Python inheritance. That is, - NmTensor - class inherits from NeuralType class. - - A Neural Type is a mapping from Tensor's axis number to it's type ( - AxisType). - - To instantiate a NeuralType you should pass it a dictionary (axis2type) - which - will map axis to it's AxisType. You can also pass optional argument when - describing input ports. - - For example, a ResNet18 input can be described as: - - .. code-block:: python - - NeuralType({0: AxisType(BatchTag, None, None), - 1: AxisType(ChannelTag, None, None), - 2: AxisType(HeightTag, 224, None), - 3: AxisType(WidthTag, 224, None)}) - - Special cases: - - non-tensor objects should be denoted as NeuralType(None) - - root type is denoted by NeuralType({}). A port of NeuralType({}) must - - accept NmTensors of any NeuralType. More specifically: - root_type = NeuralType({}) - root_type.compare(any_other_neural_type) == - NeuralTypeComparisonResult.SAME - - - See "nemo/tests/test_neural_types.py" for more examples. - - """ - - # def __init__(self, axis2type=None): - def __init__(self, axis2type={}, optional=False): - """ - Constructor - Args: - axis2type: mapping axises to it's AxisType - optional: (default: False). If this port is optional - """ - self._axis2type = axis2type - self._optional = optional - - def __str__(self): - if self._axis2type is None: - return "(Optional) " if self._optional else "" + "non-tensor " "object" - elif len(self._axis2type) == 0: - return "(Optional) " if self._optional else "" + "Root NeuralType" - return ( - "(Optional)" - if self._optional - else "" + "\n".join(["{0}->{1}".format(axis, tag) for axis, tag in self._axis2type.items()]) - ) - - def compare(self, n_type2) -> NeuralTypeComparisonResult: - """Compares if current object's NeuralType semantics is compatible - with n_type2 - - Args: - n_type2 (NeuralType): a type to compare with - - Returns: - Results of a comparison (NeuralTypeComparisonResult) - """ - # self is a root type - if self.axis2type is not None and len(self.axis2type) == 0: - return NeuralTypeComparisonResult.SAME - # n_type2 is root type but self is not - elif n_type2.axis2type is not None and len(n_type2.axis2type) == 0: - return NeuralTypeComparisonResult.INCOMPATIBLE - # one is None while other is not - elif self._axis2type is None and n_type2._axis2type is not None: - return NeuralTypeComparisonResult.INCOMPATIBLE - elif self._axis2type is not None and n_type2._axis2type is None: - return NeuralTypeComparisonResult.INCOMPATIBLE - # same neural type - elif self._axis2type == n_type2._axis2type: - return NeuralTypeComparisonResult.SAME - # same set of keys and set of values => TRANSPOSE_SAME - elif set(self._axis2type.keys()) == set(n_type2._axis2type.keys()) and set(self._axis2type.values()) == set( - n_type2._axis2type.values() - ): - return NeuralTypeComparisonResult.TRANSPOSE_SAME - - elif set(self._axis2type.keys()) == set(n_type2._axis2type.keys()): - # comparison_result = 1 - comparison_result = 0 - for key in self._axis2type.keys(): - comparison_result = max( - self._axis2type[key].compare_to(n_type2._axis2type[key]).value, comparison_result, - ) - return NeuralTypeComparisonResult(comparison_result) - else: - return NeuralTypeComparisonResult.INCOMPATIBLE - - @property - def axis2type(self): - return self._axis2type - - -class NmTensor(NeuralType): - """Class representing data which flows between NeuralModules' ports. - It also has a type of NeuralType represented by inheriting from NeuralType - object.""" - - def __init__(self, producer, producer_args, name, ntype=None): - """NmTensor constructor. - - Args: - producer (NeuralModule): object which produced this - producer_args (dict): a dictionary of port_name->NmTensor value - of arguments which were sent to producer to create this - """ - super(NmTensor, self).__init__(axis2type=ntype._axis2type) - self._producer = producer - self._producer_args = producer_args - self._name = name - self._uuid = str(uuid.uuid4()) - - @property - def producer(self): - """ - Returns: - NeuralModule object which produced this NmTensor. - """ - return self._producer - - @property - def producer_args(self): - """ - Returns: - a dictionary of port_name->NmTensor value - of arguments which were sent to producer to create this object - """ - return self._producer_args - - @property - def name(self): - """ - Returns: - A NmTensor's name which should be equal to - the NeuralModule's output port's name which created it - """ - return self._name - - @property - def unique_name(self): - """Unique NMTensor name. - It is composed of non-unique name (self.name) and uuid of NeuralModule - which created this tensor. - - Returns: - str: unique name - """ - if self._producer is None: - raise ValueError("This NmTensor does not have a unique name") - return f"{self._name}~~~{self.producer}~~~{self._uuid}" - - -class NeuralTypeError(Exception): - """Base class for neural type related exceptions.""" - - pass - - -class NeuralPortNameMismatchError(NeuralTypeError): - """Exception raised when neural module is called with incorrect port - names.""" - - def __init__(self, message): - self.message = message - - -class NeuralPortNmTensorMismatchError(NeuralTypeError): - """Exception raised when a port is fed with a NmTensor of incompatible - type.""" - - def __init__(self, message): - self.message = message - - -class CanNotInferResultNeuralType(NeuralTypeError): - """Exception raised when NeuralType of output can not be inferred.""" - - def __init__(self, message): - self.message = message diff --git a/nemo/core/neural_types/__init__.py b/nemo/core/neural_types/__init__.py new file mode 100644 index 000000000000..1fb5bf349076 --- /dev/null +++ b/nemo/core/neural_types/__init__.py @@ -0,0 +1,21 @@ +# ! /usr/bin/python +# -*- coding: utf-8 -*- + +# Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from nemo.core.neural_types.axes import * +from nemo.core.neural_types.comparison import * +from nemo.core.neural_types.elements import * +from nemo.core.neural_types.neural_type import * diff --git a/nemo/core/neural_types/axes.py b/nemo/core/neural_types/axes.py new file mode 100644 index 000000000000..dcc2e7736ff6 --- /dev/null +++ b/nemo/core/neural_types/axes.py @@ -0,0 +1,82 @@ +# ! /usr/bin/python +# -*- coding: utf-8 -*- + +# Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +__all__ = ['AxisKindAbstract', 'AxisKind', 'AxisType'] + +from enum import Enum +from typing import Optional + + +class AxisKindAbstract(Enum): + """This is an abstract Enum to represents what does varying axis dimension mean. + In practice, you will almost always use AxisKind Enum. This Enum should be inherited by + your OWN Enum if you aren't satisfied with AxisKind. Then your own Enum can be used + instead of AxisKind.""" + + pass + + +class AxisKind(AxisKindAbstract): + """This Enum represents what does varying axis dimension mean. + For example, does this dimension correspond to width, batch, time, etc. + The "Dimension" and "Channel" kinds are the same and used to represent + a general axis. + """ + + Batch = 0 + Time = 1 + Dimension = 2 + Channel = 2 + Width = 3 + Height = 4 + + def __str__(self): + return str(self.name).lower() + + @staticmethod + def from_str(label): + """Returns AxisKind instance based on short string representation""" + _label = label.lower().strip() + if _label == "b" or _label == "n" or _label == "batch": + return AxisKind.Batch + elif _label == "t" or _label == "time": + return AxisKind.Time + elif _label == "d" or _label == "c" or _label == "channel": + return AxisKind.Dimension + elif _label == "w" or _label == "width": + return AxisKind.Width + elif _label == "h" or _label == "height": + return AxisKind.Height + else: + raise ValueError(f"Can't create AxisKind from {label}") + + +class AxisType(object): + """This class represents axis semantics and (optionally) it's dimensionality + Args: + kind (AxisKindAbstract): what kind of axis it is? For example Batch, Height, etc. + size (int, optional): specify if the axis should have a fixed size. By default it is set to None and you + typically do not want to set it for Batch and Time + is_list (bool, default=False): whether this is a list or a tensor axis + """ + + def __init__(self, kind: AxisKindAbstract, size: Optional[int] = None, is_list=False): + if size is not None and is_list: + raise ValueError("The axis can't be list and have a fixed size") + self.kind = kind + self.size = size + self.is_list = is_list diff --git a/nemo/core/neural_types/comparison.py b/nemo/core/neural_types/comparison.py new file mode 100644 index 000000000000..6cbb9661a0e2 --- /dev/null +++ b/nemo/core/neural_types/comparison.py @@ -0,0 +1,34 @@ +# ! /usr/bin/python +# -*- coding: utf-8 -*- + +# Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +__all__ = ['NeuralTypeComparisonResult'] + +from enum import Enum + + +class NeuralTypeComparisonResult(Enum): + """The result of comparing two neural type objects for compatibility. + When comparing A.compare_to(B):""" + + SAME = 0 + LESS = 1 # A is B + GREATER = 2 # B is A + DIM_INCOMPATIBLE = 3 # Resize connector might fix incompatibility + TRANSPOSE_SAME = 4 # A transpose and/or converting between lists and tensors will make them same + CONTAINER_SIZE_MISMATCH = 5 # A and B contain different number of elements + INCOMPATIBLE = 6 # A and B are incompatible + SAME_TYPE_INCOMPATIBLE_PARAMS = 7 # A and B are of the same type but parametrized differently diff --git a/nemo/core/neural_types/elements.py b/nemo/core/neural_types/elements.py new file mode 100644 index 000000000000..5d410b90ebde --- /dev/null +++ b/nemo/core/neural_types/elements.py @@ -0,0 +1,187 @@ +# ! /usr/bin/python +# -*- coding: utf-8 -*- + +# Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +__all__ = [ + 'ElementType', + 'VoidType', + 'ChannelType', + 'AcousticEncodedRepresentation', + 'AudioSignal', + 'SpectrogramType', + 'MelSpectrogramType', + 'MFCCSpectrogramType', + 'LogitsType', + 'LabelsType', + 'LossType', + 'RegressionValuesType', + 'CategoricalValuesType', + 'PredictionsType', + 'LogprobsType', + 'LengthsType', + 'EmbeddedTextType', + 'EncodedRepresentation', +] +import abc +from abc import ABC, abstractmethod +from typing import Dict, Optional, Tuple + +from nemo.core.neural_types.comparison import NeuralTypeComparisonResult + + +class ElementType(ABC): + """Abstract class defining semantics of the tensor elements. + We are relying on Python for inheritance checking""" + + def __str__(self): + self.__doc__ + + @property + def type_parameters(self) -> Dict: + """Override this property to parametrize your type. For example, you can specify 'storage' type such as + float, int, bool with 'dtype' keyword. Another example, is if you want to represent a signal with a + particular property (say, sample frequency), then you can put sample_freq->value in there. + When two types are compared their type_parameters must match.""" + return {} + + @property + def fields(self) -> Optional[Tuple]: + """This should be used to logically represent tuples/structures. For example, if you want to represent a + bounding box (x, y, width, height) you can put a tuple with names ('x', y', 'w', 'h') in here. + Under the hood this should be converted to the last tesnor dimension of fixed size = len(fields). + When two types are compared their fields must match.""" + return None + + def compare(self, second) -> NeuralTypeComparisonResult: + # First, check general compatibility + first_t = type(self) + second_t = type(second) + + if first_t == second_t: + result = NeuralTypeComparisonResult.SAME + elif issubclass(first_t, second_t): + result = NeuralTypeComparisonResult.LESS + elif issubclass(second_t, first_t): + result = NeuralTypeComparisonResult.GREATER + else: + result = NeuralTypeComparisonResult.INCOMPATIBLE + + if result != NeuralTypeComparisonResult.SAME: + return result + else: + # now check that all parameters match + check_params = set(self.type_parameters.keys()) == set(second.type_parameters.keys()) + if check_params is False: + return NeuralTypeComparisonResult.SAME_TYPE_INCOMPATIBLE_PARAMS + else: + for k1, v1 in self.type_parameters.items(): + if v1 != second.type_parameters[k1]: + return NeuralTypeComparisonResult.SAME_TYPE_INCOMPATIBLE_PARAMS + # check that all fields match + if self.fields == second.fields: + return NeuralTypeComparisonResult.SAME + else: + return NeuralTypeComparisonResult.INCOMPATIBLE + + +class VoidType(ElementType): + """Void-like type which is compatible with everything. + It is a good practice to use this type only as necessary. + For example, when you need template-like functionality. + """ + + def compare(cls, second: abc.ABCMeta) -> NeuralTypeComparisonResult: + return NeuralTypeComparisonResult.SAME + + +# TODO: Consider moving these files elsewhere +class ChannelType(ElementType): + """Element to represent convolutional input/output channel. + """ + + +class EmbeddedTextType(ChannelType): + """Element to represent output on word/text embedding layers + """ + + +class LogitsType(ElementType): + """Element type to represent logits""" + + +class LogprobsType(ElementType): + """Element type to represent log-probabilities. For example, outputs of softmax layers.""" + + +class LabelsType(ElementType): + """Element type to represent some sort of labels. This is often used as a base class to create + a more concrete types such as RegressionValuesType, etc.""" + + +class LengthsType(ElementType): + """Element type representing lengths of something""" + + +class LossType(ElementType): + """Element type to represent outputs of Loss modules""" + + +class EncodedRepresentation(ChannelType): + """Element type to represent encoded representation, for example, encoder's output""" + + +class AcousticEncodedRepresentation(EncodedRepresentation): + """Element type to represent encoded representation returned by the acoustic encoder model""" + + +class AudioSignal(ElementType): + """Element type to represent encoded representation returned by the acoustic encoder model + Args: + freq (int): sampling frequency of a signal. Note that two signals will only be the same if their + freq is the same. + """ + + def __init__(self, freq: int = 16000): + self._params = {} + self._params['freq'] = freq + + @property + def type_parameters(self): + return self._params + + +class SpectrogramType(ChannelType): + """Element type to represent generic spectrogram signal""" + + +class MelSpectrogramType(SpectrogramType): + """Element type to represent mel spectrogram signal""" + + +class MFCCSpectrogramType(SpectrogramType): + """Element type to represent MFCC spectrogram signal""" + + +class PredictionsType(LabelsType): + """Element type to represent some sort of predictions returned by model""" + + +class RegressionValuesType(PredictionsType): + """Element type to represent labels for regression task""" + + +class CategoricalValuesType(PredictionsType): + """Element type to represent labels for categorical classification task""" diff --git a/nemo/core/neural_types/neural_type.py b/nemo/core/neural_types/neural_type.py new file mode 100644 index 000000000000..a2070c354b3c --- /dev/null +++ b/nemo/core/neural_types/neural_type.py @@ -0,0 +1,267 @@ +# ! /usr/bin/python +# -*- coding: utf-8 -*- + +# Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +__all__ = [ + 'NeuralType', + 'NmTensor', + 'NeuralTypeError', + 'NeuralPortNameMismatchError', + 'NeuralPortNmTensorMismatchError', + 'NeuralPortNmTensorMismatchError', + 'CanNotInferResultNeuralType', +] +import uuid +from typing import Optional, Tuple + +from nemo.core.neural_types.axes import AxisKind, AxisType +from nemo.core.neural_types.comparison import NeuralTypeComparisonResult +from nemo.core.neural_types.elements import * + + +class NeuralType(object): + """This is the main class which would represent neural type concept. + nmTensors derives from this. It is used to represent *the types* of inputs and outputs. + Args: + axes (Optional[Tuple]): a tuple of AxisTypes objects representing the semantics of what varying each axis means + You can use a short, string-based form here. For example: ('B', 'C', 'H', 'W') would correspond to an NCHW + format frequently used in computer vision. ('B', 'T', 'D') is frequently used for signal processing and + means [batch, time, dimension/channel]. + elements_type (ElementType): an instance of ElementType class representing the semantics of what is stored + inside the tensor. For example: logits (LogitsType), log probabilities (LogprobType), etc. + optional (bool): By default, this is false. If set to True, it would means that input to the port of this + type can be optional. + """ + + def __init__(self, axes: Optional[Tuple] = None, elements_type: ElementType = VoidType(), optional=False): + if not isinstance(elements_type, ElementType): + raise ValueError( + f"elements_type of NeuralType must be an instance of a class derived from ElementType." + f"Did you pass a class instead?" + ) + self.elements_type = elements_type + if axes is not None: + NeuralType.__check_sanity(axes) + axes_list = [] + for axis in axes: + if isinstance(axis, str): + axes_list.append(AxisType(AxisKind.from_str(axis), None)) + elif isinstance(axis, AxisType): + axes_list.append(axis) + else: + raise ValueError(f"axis type must be either str or AxisType instance") + self.axes = tuple(axes_list) + else: + self.axes = None + self.optional = optional + + def compare(self, second) -> NeuralTypeComparisonResult: + """Performs neural type comparison of self with second. When you chain two modules' inputs/outputs via + __call__ method, this comparison will be called to ensure neural type compatibility.""" + # First, handle dimensionality + axes_a = self.axes + axes_b = second.axes + + # "Big void" type + if isinstance(self.elements_type, VoidType) and self.axes is None: + return NeuralTypeComparisonResult.SAME + + if self.axes is None: + if second.axes is None: + return self.elements_type.compare(second.elements_type) + else: + return NeuralTypeComparisonResult.INCOMPATIBLE + + dimensions_pass = NeuralType.__compare_axes(axes_a, axes_b) + element_comparison_result = self.elements_type.compare(second.elements_type) + + # SAME DIMS + if dimensions_pass == 0: + return element_comparison_result + # TRANSPOSE_SAME DIMS + elif dimensions_pass == 1: + if element_comparison_result == NeuralTypeComparisonResult.SAME: + return NeuralTypeComparisonResult.TRANSPOSE_SAME + else: + return NeuralTypeComparisonResult.INCOMPATIBLE + # DIM_INCOMPATIBLE DIMS + elif dimensions_pass == 2: + if element_comparison_result == NeuralTypeComparisonResult.SAME: + return NeuralTypeComparisonResult.DIM_INCOMPATIBLE + else: + return NeuralTypeComparisonResult.INCOMPATIBLE + else: + return NeuralTypeComparisonResult.INCOMPATIBLE + + @staticmethod + def __check_sanity(axes): + # check that list come before any tensor dimension + are_strings = True + for axis in axes: + if not isinstance(axis, str): + are_strings = False + if isinstance(axis, str) and not are_strings: + raise ValueError("Either use full class names or all strings") + if are_strings: + return + checks_passed = True + saw_tensor_dim = False + for axis in axes: + if not axis.is_list: + saw_tensor_dim = True + else: # current axis is a list + if saw_tensor_dim: # which is preceded by tensor dim + checks_passed = False + if not checks_passed: + raise ValueError( + "You have list dimension after Tensor dimension. All list dimensions must preceed Tensor dimensions" + ) + + @staticmethod + def __compare_axes(axes_a, axes_b) -> int: + """ + Compares axes_a and axes_b + Args: + axes_a: first axes tuple + axes_b: second axes tuple + + Returns: + 0 - if they are exactly the same + 1 - if they are "TRANSPOSE_SAME" + 2 - if the are "DIM_INCOMPATIBLE" + 3 - if they are different + """ + if axes_a is None and axes_b is None: + return 0 + elif axes_a is None and axes_b is not None: + return 3 + elif axes_a is not None and axes_b is None: + return 3 + elif len(axes_a) != len(axes_b): + return 3 + # After these ifs we know that len(axes_a) == len(axes_b) + + same = True + kinds_a = dict() + kinds_b = dict() + for axis_a, axis_b in zip(axes_a, axes_b): + kinds_a[axis_a.kind] = axis_a.size + kinds_b[axis_b.kind] = axis_b.size + if ( + axis_a.kind != axis_b.kind + or axis_a.is_list != axis_b.is_list + or (axis_a.size != axis_b.size and axis_a.size is not None) + ): + same = False + if same: + return 0 + else: + # can be TRANSPOSE_SAME, DIM_INCOMPATIBLE + if kinds_a.keys() == kinds_b.keys(): + for key, value in kinds_a.items(): + if kinds_b[key] != value: + return 2 + return 1 + else: + return 3 + + +class NmTensor(NeuralType): + """Class representing data which flows between NeuralModules' ports. + It also has a type of NeuralType represented by inheriting from NeuralType + object.""" + + def __init__(self, producer, producer_args, name, ntype=None): + """NmTensor constructor. + + Args: + producer (NeuralModule): object which produced this + producer_args (dict): a dictionary of port_name->NmTensor value + of arguments which were sent to producer to create this + """ + super(NmTensor, self).__init__(axes=ntype.axes, elements_type=ntype.elements_type, optional=ntype.optional) + self._producer = producer + self._producer_args = producer_args + self._name = name + self._uuid = str(uuid.uuid4()) + + @property + def producer(self): + """ + Returns: + NeuralModule object which produced this NmTensor. + """ + return self._producer + + @property + def producer_args(self): + """ + Returns: + a dictionary of port_name->NmTensor value + of arguments which were sent to producer to create this object + """ + return self._producer_args + + @property + def name(self): + """ + Returns: + A NmTensor's name which should be equal to + the NeuralModule's output port's name which created it + """ + return self._name + + @property + def unique_name(self): + """Unique NMTensor name. + It is composed of non-unique name (self.name) and uuid of NeuralModule + which created this tensor. + + Returns: + str: unique name + """ + if self._producer is None: + raise ValueError("This NmTensor does not have a unique name") + return f"{self._name}~~~{self.producer}~~~{self._uuid}" + + +class NeuralTypeError(Exception): + """Base class for neural type related exceptions.""" + + pass + + +class NeuralPortNameMismatchError(NeuralTypeError): + """Exception raised when neural module is called with incorrect port + names.""" + + def __init__(self, message): + self.message = message + + +class NeuralPortNmTensorMismatchError(NeuralTypeError): + """Exception raised when a port is fed with a NmTensor of incompatible + type.""" + + def __init__(self, message): + self.message = message + + +class CanNotInferResultNeuralType(NeuralTypeError): + """Exception raised when NeuralType of output can not be inferred.""" + + def __init__(self, message): + self.message = message diff --git a/requirements/requirements_test.txt b/requirements/requirements_test.txt index 321780254517..ddd891eac3fe 100644 --- a/requirements/requirements_test.txt +++ b/requirements/requirements_test.txt @@ -5,3 +5,4 @@ black isort[requirements] wrapt wget +onnxruntime diff --git a/tests/asr/test_asr.py b/tests/asr/test_asr.py index 91fd571b4a67..86bec0de6d63 100644 --- a/tests/asr/test_asr.py +++ b/tests/asr/test_asr.py @@ -404,8 +404,8 @@ def test_double_jasper_training(self): feat_in=jasper_model_definition['AudioToMelSpectrogramPreprocessor']['features'], **jasper_model_definition['JasperEncoder'], ) - mx_max1 = nemo.backends.pytorch.common.SimpleCombiner(mode="max") - mx_max2 = nemo.backends.pytorch.common.SimpleCombiner(mode="max") + # mx_max1 = nemo.backends.pytorch.common.SimpleCombiner(mode="max") + # mx_max2 = nemo.backends.pytorch.common.SimpleCombiner(mode="max") jasper_decoder1 = nemo_asr.JasperDecoderForCTC(feat_in=1024, num_classes=len(self.labels)) jasper_decoder2 = nemo_asr.JasperDecoderForCTC(feat_in=1024, num_classes=len(self.labels)) @@ -419,8 +419,10 @@ def test_double_jasper_training(self): encoded2, encoded_len2 = jasper_encoder2(audio_signal=processed_signal, length=p_length) log_probs1 = jasper_decoder1(encoder_output=encoded1) log_probs2 = jasper_decoder2(encoder_output=encoded2) - log_probs = mx_max1(x1=log_probs1, x2=log_probs2) - encoded_len = mx_max2(x1=encoded_len1, x2=encoded_len2) + # log_probs = mx_max1(x1=log_probs1, x2=log_probs2) + # encoded_len = mx_max2(x1=encoded_len1, x2=encoded_len2) + log_probs = log_probs1 + encoded_len = encoded_len1 loss = ctc_loss( log_probs=log_probs, targets=transcript, input_length=encoded_len, target_length=transcript_len, ) diff --git a/tests/asr/test_weight_share.py b/tests/asr/test_weight_share.py deleted file mode 100644 index a6f56253b749..000000000000 --- a/tests/asr/test_weight_share.py +++ /dev/null @@ -1,271 +0,0 @@ -# ! /usr/bin/python -# -*- coding: utf-8 -*- - -# Copyright 2020 NVIDIA. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================= - -import os -import shutil -import tarfile -import unittest -from typing import Dict - -import numpy as np -import torch -from ruamel.yaml import YAML - -import nemo -import nemo.collections.asr as nemo_asr -from nemo.core import WeightShareTransform -from nemo.core.neural_types import * -from tests.common_setup import NeMoUnitTest - -logging = nemo.logging - - -class TestWeightSharing(NeMoUnitTest): - labels = [ - "'", - "a", - "b", - "c", - "d", - "e", - "f", - "g", - "h", - "i", - "j", - "k", - "l", - "m", - "n", - "o", - "p", - "q", - "r", - "s", - "t", - "u", - "v", - "w", - "x", - "y", - "z", - " ", - ] - manifest_filepath = os.path.abspath(os.path.join(os.path.dirname(__file__), "../data/asr/an4_train.json")) - featurizer_config = { - 'window': 'hann', - 'dither': 1e-05, - 'normalize': 'per_feature', - 'frame_splicing': 1, - 'int_values': False, - 'window_stride': 0.01, - 'sample_rate': 16000, - 'features': 64, - 'n_fft': 512, - 'window_size': 0.02, - } - yaml = YAML(typ="safe") - - @classmethod - def setUpClass(cls) -> None: - super().setUpClass() - data_folder = os.path.abspath(os.path.join(os.path.dirname(__file__), "../data/")) - logging.info("Looking up for test ASR data") - if not os.path.exists(os.path.join(data_folder, "asr")): - logging.info("Extracting ASR data to: {0}".format(os.path.join(data_folder, "asr"))) - tar = tarfile.open(os.path.join(data_folder, "asr.tar.gz"), "r:gz") - tar.extractall(path=data_folder) - tar.close() - else: - logging.info("ASR data found in: {0}".format(os.path.join(data_folder, "asr"))) - - @classmethod - def tearDownClass(cls) -> None: - super().tearDownClass() - data_folder = os.path.abspath(os.path.join(os.path.dirname(__file__), "../data/")) - logging.info("Looking up for test ASR data") - if os.path.exists(os.path.join(data_folder, "asr")): - shutil.rmtree(os.path.join(data_folder, "asr")) - - def __check_if_weights_are_equal(self, w1: Dict, w2: Dict): - all_same = set(w1.keys()) == set(w2.keys()) - if not all_same: - return False - else: - for key in w1.keys(): - all_same = all_same and np.array_equal( - w1[key][0].cpu().detach().numpy(), w2[key][0].cpu().detach().numpy(), - ) - return all_same - - def test_TaylorNet_get_weights(self): - tn1 = nemo.backends.pytorch.tutorials.TaylorNet(dim=4) - tn2 = nemo.backends.pytorch.tutorials.TaylorNet(dim=4) - # because of randomness, actual weights should be different - self.assertFalse(self.__check_if_weights_are_equal(tn1.get_weights(), tn2.get_weights())) - tn3 = nemo.backends.pytorch.tutorials.TaylorNet(dim=4) - tn3.set_weights(tn1.get_weights()) - # check than weights are the same - self.assertTrue(self.__check_if_weights_are_equal(tn1.get_weights(), tn3.get_weights())) - # change weights on one module - another module should not change - tn1.fc1.bias.data = torch.tensor([0.1]) - self.assertFalse(self.__check_if_weights_are_equal(tn1.get_weights(), tn3.get_weights())) - - def test_TaylorNet_tie_weights(self): - tn1 = nemo.backends.pytorch.tutorials.TaylorNet(dim=4) - tn2 = nemo.backends.pytorch.tutorials.TaylorNet(dim=4) - # because of randomness, actual weights should be different - self.assertFalse(self.__check_if_weights_are_equal(tn1.get_weights(), tn2.get_weights())) - tn2.tie_weights_with(tn1, list(tn1.get_weights().keys())) - # change weights on one module - another module should change too - tn1.fc1.bias.data = torch.tensor([0.1]) - self.assertTrue(self.__check_if_weights_are_equal(tn1.get_weights(), tn2.get_weights())) - - def test_tie_weights2(self): - voc_size = 3 - dim = 2 - embd = nemo.backends.pytorch.common.SequenceEmbedding(voc_size=voc_size, hidden_size=dim) - proj = nemo.backends.pytorch.common.SequenceProjection(from_dim=dim, to_dim=voc_size) - embd.tie_weights_with( - proj, - weight_names=["embedding.weight"], - name2name_and_transform={"embedding.weight": ("projection.weight", WeightShareTransform.SAME,)}, - ) - self.assertTrue( - np.array_equal(embd.embedding.weight.detach().numpy(), proj.projection.weight.detach().numpy(),) - ) - was = embd.embedding.weight.detach().numpy() - embd.embedding.weight.data = torch.tensor(np.random.randint(0, 10, (3, 2)) * 1.0) - after = embd.embedding.weight.detach().numpy() - self.assertTrue( - np.array_equal(embd.embedding.weight.detach().numpy(), proj.projection.weight.detach().numpy(),) - ) - self.assertFalse(np.array_equal(was, after)) - - def test_set_weights(self): - voc_size = 3 - dim = 2 - embd = nemo.backends.pytorch.common.SequenceEmbedding(voc_size=voc_size, hidden_size=dim) - weights = torch.tensor(np.random.randint(0, 10, (3, 2)) * 1.0) - name2weights = {"embedding.weight": (weights, True)} - embd.set_weights(name2weight=name2weights) - self.assertTrue(np.array_equal(embd.embedding.weight.detach().numpy(), weights.detach().numpy(),)) - weights = torch.tensor(np.random.randint(0, 10, (3, 2)) * 1.0) - self.assertFalse(np.array_equal(embd.embedding.weight.detach().numpy(), weights.detach().numpy(),)) - - def test_freeze_unfreeze_TrainableNM(self): - path = os.path.abspath(os.path.join(os.path.dirname(__file__), "../data/jasper_smaller.yaml")) - with open(path) as file: - jasper_model_definition = self.yaml.load(file) - dl = nemo_asr.AudioToTextDataLayer( - # featurizer_config=self.featurizer_config, - manifest_filepath=self.manifest_filepath, - labels=self.labels, - batch_size=4, - ) - pre_process_params = { - #'int_values': False, - 'frame_splicing': 1, - 'features': 64, - 'window_size': 0.02, - 'n_fft': 512, - 'dither': 1e-05, - 'window': 'hann', - 'sample_rate': 16000, - 'normalize': 'per_feature', - 'window_stride': 0.01, - } - preprocessing = nemo_asr.AudioToMelSpectrogramPreprocessor(**pre_process_params) - jasper_encoder = nemo_asr.JasperEncoder( - feat_in=jasper_model_definition['AudioToMelSpectrogramPreprocessor']['features'], - **jasper_model_definition['JasperEncoder'], - ) - jasper_decoder = nemo_asr.JasperDecoderForCTC(feat_in=1024, num_classes=len(self.labels)) - ctc_loss = nemo_asr.CTCLossNM(num_classes=len(self.labels)) - jasper_encoder.freeze() - jasper_encoder.unfreeze(set(['encoder.4.conv.1.weight'])) - jasper_decoder.unfreeze() - # DAG - audio_signal, a_sig_length, transcript, transcript_len = dl() - processed_signal, p_length = preprocessing(input_signal=audio_signal, length=a_sig_length) - - encoded, encoded_len = jasper_encoder(audio_signal=processed_signal, length=p_length) - # logging.info(jasper_encoder) - log_probs = jasper_decoder(encoder_output=encoded) - loss = ctc_loss( - log_probs=log_probs, targets=transcript, input_length=encoded_len, target_length=transcript_len, - ) - - callback = nemo.core.SimpleLossLoggerCallback( - tensors=[loss], print_func=lambda x: logging.info(f'Train Loss: {str(x[0].item())}'), - ) - optimizer = self.nf.get_trainer() - optimizer.train( - [loss], callbacks=[callback], optimizer="sgd", optimization_params={"num_epochs": 2, "lr": 0.0003}, - ) - - # @unittest.skip( - # "Tests fails at get_pytorch_module() that will be changed in next PR anyway. \ - # Besides, quite sure this test is not related with ASR :]" - # ) - def test_freeze_unfreeze_Wrapper(self): - dl_train = nemo.backends.pytorch.ZerosDataLayer( - size=40, - dtype=[torch.FloatTensor, torch.LongTensor], - batch_size=4, - output_ports={ - "image": NeuralType( - { - 0: AxisType(BatchTag), - 1: AxisType(ChannelTag, 3), - 2: AxisType(HeightTag, 224), - 3: AxisType(WidthTag, 224), - } - ), - "label": NeuralType({0: AxisType(BatchTag)}), - }, - ) - - # WHY THE HELL THIS TEST IS IN ASR!!!!??? - - # NOTICE: pretrain=True argument - resnet = self.nf.get_module( - name="resnet18", params={"num_classes": 2}, collection="torchvision", pretrained=True, - ) - - L_train = self.nf.get_module(name="CrossEntropyLoss", collection="toys", params={}) - - # NOTICE: Freeze all Neural Module's weights - resnet.freeze() - # NOTICE: unfreeze, top classification layer for fine-tuning - resnet.unfreeze(set(["fc.weight", "fc.bias"])) - - images, labels = dl_train() - outputs = resnet(x=images) - train_loss = L_train(predictions=outputs, labels=labels) - - callback = nemo.core.SimpleLossLoggerCallback( - tensors=[train_loss], print_func=lambda x: logging.info(f'Train Loss: {str(x[0].item())}'), - ) - # Instantiate an optimizer to perform `train` action - optimizer = self.nf.get_trainer() - optimizer.train( - [train_loss], callbacks=[callback], optimizer="sgd", optimization_params={"num_epochs": 2, "lr": 0.0003}, - ) - - # WHERE IS ACTUALLY THE TEST?? ARE WE CHECKING ANYTHING?? diff --git a/tests/asr/test_zeroDS.py b/tests/asr/test_zeroDS.py index 6dd55dca014c..a413e1f2e514 100644 --- a/tests/asr/test_zeroDS.py +++ b/tests/asr/test_zeroDS.py @@ -86,30 +86,6 @@ def tearDownClass(cls) -> None: if os.path.exists(os.path.join(data_folder, "asr")): shutil.rmtree(os.path.join(data_folder, "asr")) - def test_simple_train(self): - logging.info("Simplest train test with ZeroDL") - trainable_module = nemo.backends.pytorch.tutorials.TaylorNet(dim=4) - data_source = nemo.backends.pytorch.common.ZerosDataLayer( - size=10000, - dtype=torch.FloatTensor, - batch_size=128, - output_ports={ - "x": NeuralType({0: AxisType(BatchTag), 1: AxisType(ChannelTag, dim=1)}), - "y": NeuralType({0: AxisType(BatchTag), 1: AxisType(ChannelTag, dim=1)}), - }, - ) - loss = nemo.backends.pytorch.tutorials.MSELoss() - x, y = data_source() - y_pred = trainable_module(x=x) - loss_tensor = loss(predictions=y_pred, target=y) - - callback = nemo.core.SimpleLossLoggerCallback( - tensors=[loss_tensor], print_func=lambda x: logging.info(f'Train Loss: {str(x[0].item())}'), - ) - self.nf.train( - [loss_tensor], callbacks=[callback], optimization_params={"num_epochs": 3, "lr": 0.0003}, optimizer="sgd", - ) - def test_asr_with_zero_ds(self): logging.info("Testing ASR NMs with ZeroDS and without pre-processing") path = os.path.abspath(os.path.join(os.path.dirname(__file__), "../data/jasper_smaller.yaml")) @@ -121,16 +97,23 @@ def test_asr_with_zero_ds(self): dtype=torch.FloatTensor, batch_size=4, output_ports={ + # "processed_signal": NeuralType( + # { + # 0: AxisType(BatchTag), + # 1: AxisType(SpectrogramSignalTag, dim=64), + # 2: AxisType(ProcessedTimeTag, dim=64), + # } + # ), + # "processed_length": NeuralType({0: AxisType(BatchTag)}), + # "transcript": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag, dim=64)}), + # "transcript_length": NeuralType({0: AxisType(BatchTag)}), "processed_signal": NeuralType( - { - 0: AxisType(BatchTag), - 1: AxisType(SpectrogramSignalTag, dim=64), - 2: AxisType(ProcessedTimeTag, dim=64), - } + (AxisType(AxisKind.Batch), AxisType(AxisKind.Dimension, 64), AxisType(AxisKind.Time, 64)), + SpectrogramType(), ), - "processed_length": NeuralType({0: AxisType(BatchTag)}), - "transcript": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag, dim=64)}), - "transcript_length": NeuralType({0: AxisType(BatchTag)}), + "processed_length": NeuralType(tuple('B'), LengthsType()), + "transcript": NeuralType((AxisType(AxisKind.Batch), AxisType(AxisKind.Time, 64)), LabelsType()), + "transcript_length": NeuralType(tuple('B'), LengthsType()), }, ) diff --git a/tests/core/__init__.py b/tests/core/__init__.py new file mode 100644 index 000000000000..e69de29bb2d1 diff --git a/tests/test_actions_api.py b/tests/core/test_actions_api.py similarity index 100% rename from tests/test_actions_api.py rename to tests/core/test_actions_api.py diff --git a/tests/test_deploy_export.py b/tests/core/test_deploy_export.py similarity index 100% rename from tests/test_deploy_export.py rename to tests/core/test_deploy_export.py diff --git a/tests/test_deprecated.py b/tests/core/test_deprecated.py similarity index 100% rename from tests/test_deprecated.py rename to tests/core/test_deprecated.py diff --git a/tests/test_infer.py b/tests/core/test_infer.py similarity index 81% rename from tests/test_infer.py rename to tests/core/test_infer.py index 20dc566fca66..d9b11a3997da 100644 --- a/tests/test_infer.py +++ b/tests/core/test_infer.py @@ -30,11 +30,13 @@ def __init__(self): @property def input_ports(self): - return {"mod_in": NeuralType({0: AxisType(BatchTag), 1: AxisType(BaseTag, dim=1)})} + # return {"mod_in": NeuralType({0: AxisType(BatchTag), 1: AxisType(BaseTag, dim=1)})} + return {"mod_in": NeuralType((AxisType(AxisKind.Batch), AxisType(AxisKind.Dimension, 1)), ChannelType())} @property def output_ports(self): - return {"mod_out": NeuralType({0: AxisType(BatchTag), 1: AxisType(BaseTag, dim=1)})} + # return {"mod_out": NeuralType({0: AxisType(BatchTag), 1: AxisType(BaseTag, dim=1)})} + return {"mod_out": NeuralType((AxisType(AxisKind.Batch), AxisType(AxisKind.Dimension, 1)), ChannelType())} def forward(self, mod_in): return mod_in + 10 @@ -46,11 +48,11 @@ def __init__(self): @property def input_ports(self): - return {"mod_in": NeuralType({0: AxisType(BatchTag), 1: AxisType(BaseTag, dim=1)})} + return {"mod_in": NeuralType((AxisType(AxisKind.Batch), AxisType(AxisKind.Dimension, 1)), ChannelType())} @property def output_ports(self): - return {"mod_out": NeuralType({0: AxisType(BatchTag), 1: AxisType(BaseTag, dim=1)})} + return {"mod_out": NeuralType((AxisType(AxisKind.Batch), AxisType(AxisKind.Dimension, 1)), ChannelType())} def forward(self, mod_in): return mod_in - 10 @@ -66,7 +68,9 @@ def test_infer_caching(self): size=1, dtype=torch.FloatTensor, batch_size=1, - output_ports={"dl_out": NeuralType({0: AxisType(BatchTag), 1: AxisType(BaseTag, dim=1)})}, + output_ports={ + "dl_out": NeuralType((AxisType(AxisKind.Batch), AxisType(AxisKind.Dimension, 1)), ChannelType()) + }, ) addten = AddsTen() minusten = SubtractsTen() @@ -93,7 +97,9 @@ def test_infer_errors(self): size=1, dtype=torch.FloatTensor, batch_size=1, - output_ports={"dl_out": NeuralType({0: AxisType(BatchTag), 1: AxisType(BaseTag, dim=1)})}, + output_ports={ + "dl_out": NeuralType((AxisType(AxisKind.Batch), AxisType(AxisKind.Dimension, 1)), ChannelType()) + }, ) addten = AddsTen() minusten = SubtractsTen() diff --git a/tests/test_neural_factory.py b/tests/core/test_neural_factory.py similarity index 100% rename from tests/test_neural_factory.py rename to tests/core/test_neural_factory.py diff --git a/tests/core/test_neural_modules.py b/tests/core/test_neural_modules.py new file mode 100644 index 000000000000..04e82c2802bf --- /dev/null +++ b/tests/core/test_neural_modules.py @@ -0,0 +1,52 @@ +# ! /usr/bin/python +# -*- coding: utf-8 -*- + +# Copyright 2019 NVIDIA. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================= + +import nemo +from nemo.core.neural_types import ChannelType, NeuralType +from tests.common_setup import NeMoUnitTest + + +class NeuralModulesTests(NeMoUnitTest): + def test_call_TaylorNet(self): + x_tg = nemo.core.neural_modules.NmTensor( + producer=None, producer_args=None, name=None, ntype=NeuralType(('B', 'D'), ChannelType()) + ) + + tn = nemo.backends.pytorch.tutorials.TaylorNet(dim=4) + # note that real port's name: x was used + y_pred = tn(x=x_tg) + self.assertEqual(y_pred.producer, tn) + self.assertEqual(y_pred.producer_args.get("x"), x_tg) + + def test_simplest_example_chain(self): + data_source = nemo.backends.pytorch.tutorials.RealFunctionDataLayer(n=10000, batch_size=1) + trainable_module = nemo.backends.pytorch.tutorials.TaylorNet(dim=4) + loss = nemo.backends.pytorch.tutorials.MSELoss() + x, y = data_source() + y_pred = trainable_module(x=x) + loss_tensor = loss(predictions=y_pred, target=y) + + # check producers' bookkeeping + self.assertEqual(loss_tensor.producer, loss) + self.assertEqual(loss_tensor.producer_args, {"predictions": y_pred, "target": y}) + self.assertEqual(y_pred.producer, trainable_module) + self.assertEqual(y_pred.producer_args, {"x": x}) + self.assertEqual(y.producer, data_source) + self.assertEqual(y.producer_args, {}) + self.assertEqual(x.producer, data_source) + self.assertEqual(x.producer_args, {}) diff --git a/tests/test_neural_modules_initialization.py b/tests/core/test_neural_modules_initialization.py similarity index 100% rename from tests/test_neural_modules_initialization.py rename to tests/core/test_neural_modules_initialization.py diff --git a/tests/test_neural_modules_pytorch.py b/tests/core/test_neural_modules_pytorch.py similarity index 92% rename from tests/test_neural_modules_pytorch.py rename to tests/core/test_neural_modules_pytorch.py index c74eac7afc36..d0cfbc44c62b 100644 --- a/tests/test_neural_modules_pytorch.py +++ b/tests/core/test_neural_modules_pytorch.py @@ -17,10 +17,13 @@ # limitations under the License. # ============================================================================= +# TODO: These test look bad/useless - redo + import unittest import nemo from nemo.backends.pytorch.nm import TrainableNM +from nemo.core.neural_types import ChannelType, NeuralType from tests.common_setup import NeMoUnitTest @@ -70,12 +73,7 @@ def test_call_TaylorNet(self): producer=None, producer_args=None, name=None, - ntype=nemo.core.neural_types.NeuralType( - { - 0: nemo.core.neural_types.AxisType(nemo.core.neural_types.BatchTag), - 1: nemo.core.neural_types.AxisType(nemo.core.neural_types.ChannelTag), - } - ), + ntype=NeuralType(elements_type=ChannelType(), axes=('B', 'D')), ) tn = nemo.backends.pytorch.tutorials.TaylorNet(dim=4) diff --git a/tests/core/test_neural_types.py b/tests/core/test_neural_types.py new file mode 100644 index 000000000000..e31fd08941d3 --- /dev/null +++ b/tests/core/test_neural_types.py @@ -0,0 +1,178 @@ +# ! /usr/bin/python +# -*- coding: utf-8 -*- + +# Copyright 2019 NVIDIA. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================= +import nemo +from nemo.core.neural_types import ( + AcousticEncodedRepresentation, + AudioSignal, + AxisKind, + AxisType, + ChannelType, + MelSpectrogramType, + MFCCSpectrogramType, + NeuralPortNmTensorMismatchError, + NeuralType, + NeuralTypeComparisonResult, + SpectrogramType, + VoidType, +) +from tests.common_setup import NeMoUnitTest + + +class NeuralTypeSystemTests(NeMoUnitTest): + def test_short_vs_long_version(self): + long_version = NeuralType( + axes=(AxisType(AxisKind.Batch, None), AxisType(AxisKind.Dimension, None), AxisType(AxisKind.Time, None)), + elements_type=AcousticEncodedRepresentation(), + ) + short_version = NeuralType(('B', 'D', 'T'), AcousticEncodedRepresentation()) + self.assertEqual(long_version.compare(short_version), NeuralTypeComparisonResult.SAME) + self.assertEqual(short_version.compare(long_version), NeuralTypeComparisonResult.SAME) + + def test_parameterized_type_audio_sampling_frequency(self): + audio16K = NeuralType(axes=('B', 'T'), elements_type=AudioSignal(16000)) + audio8K = NeuralType(axes=('B', 'T'), elements_type=AudioSignal(8000)) + another16K = NeuralType(axes=('B', 'T'), elements_type=AudioSignal(16000)) + + self.assertEqual(audio8K.compare(audio16K), NeuralTypeComparisonResult.SAME_TYPE_INCOMPATIBLE_PARAMS) + self.assertEqual(audio16K.compare(audio8K), NeuralTypeComparisonResult.SAME_TYPE_INCOMPATIBLE_PARAMS) + self.assertEqual(another16K.compare(audio16K), NeuralTypeComparisonResult.SAME) + self.assertEqual(audio16K.compare(another16K), NeuralTypeComparisonResult.SAME) + + def test_transpose_same_1(self): + type1 = NeuralType(axes=('B', 'T', 'C')) + type2 = NeuralType(axes=('T', 'B', 'C')) + self.assertEqual(type1.compare(type2), NeuralTypeComparisonResult.TRANSPOSE_SAME) + self.assertEqual(type2.compare(type1), NeuralTypeComparisonResult.TRANSPOSE_SAME) + + def test_transpose_same_2(self): + audio16K = NeuralType(axes=('B', 'T'), elements_type=AudioSignal(16000)) + audio16K_t = NeuralType(axes=('T', 'B'), elements_type=AudioSignal(16000)) + self.assertEqual(audio16K.compare(audio16K_t), NeuralTypeComparisonResult.TRANSPOSE_SAME) + + def test_inheritance_spec_augment_example(self): + input = NeuralType(('B', 'D', 'T'), SpectrogramType()) + out1 = NeuralType(('B', 'D', 'T'), MelSpectrogramType()) + out2 = NeuralType(('B', 'D', 'T'), MFCCSpectrogramType()) + self.assertEqual(out1.compare(out2), NeuralTypeComparisonResult.INCOMPATIBLE) + self.assertEqual(out2.compare(out1), NeuralTypeComparisonResult.INCOMPATIBLE) + self.assertEqual(input.compare(out1), NeuralTypeComparisonResult.GREATER) + self.assertEqual(input.compare(out2), NeuralTypeComparisonResult.GREATER) + self.assertEqual(out1.compare(input), NeuralTypeComparisonResult.LESS) + self.assertEqual(out2.compare(input), NeuralTypeComparisonResult.LESS) + + def test_singletone(self): + loss_output1 = NeuralType(axes=None) + loss_output2 = NeuralType(axes=None) + self.assertEqual(loss_output1.compare(loss_output2), NeuralTypeComparisonResult.SAME) + self.assertEqual(loss_output2.compare(loss_output1), NeuralTypeComparisonResult.SAME) + + def test_list_of_lists(self): + T1 = NeuralType( + axes=( + AxisType(kind=AxisKind.Batch, size=None, is_list=True), + AxisType(kind=AxisKind.Time, size=None, is_list=True), + AxisType(kind=AxisKind.Dimension, size=32, is_list=False), + AxisType(kind=AxisKind.Dimension, size=128, is_list=False), + AxisType(kind=AxisKind.Dimension, size=256, is_list=False), + ), + elements_type=ChannelType(), + ) + T2 = NeuralType( + axes=( + AxisType(kind=AxisKind.Batch, size=None, is_list=False), + AxisType(kind=AxisKind.Time, size=None, is_list=False), + AxisType(kind=AxisKind.Dimension, size=32, is_list=False), + AxisType(kind=AxisKind.Dimension, size=128, is_list=False), + AxisType(kind=AxisKind.Dimension, size=256, is_list=False), + ), + elements_type=ChannelType(), + ) + # TODO: should this be incompatible instead??? + self.assertEqual(T1.compare(T2), NeuralTypeComparisonResult.TRANSPOSE_SAME) + + def test_void(self): + btc_spctr = NeuralType(('B', 'T', 'C'), SpectrogramType()) + btc_spct_bad = NeuralType(('B', 'T'), SpectrogramType()) + btc_void = NeuralType(('B', 'T', 'C'), VoidType()) + self.assertEqual(btc_void.compare(btc_spctr), NeuralTypeComparisonResult.SAME) + self.assertEqual(btc_spctr.compare(btc_void), NeuralTypeComparisonResult.INCOMPATIBLE) + self.assertEqual(btc_void.compare(btc_spct_bad), NeuralTypeComparisonResult.INCOMPATIBLE) + + def test_big_void(self): + big_void_1 = NeuralType(elements_type=VoidType()) + big_void_2 = NeuralType() + + btc_spctr = NeuralType(('B', 'T', 'C'), SpectrogramType()) + btc_spct_bad = NeuralType(('B', 'T'), SpectrogramType()) + t1 = NeuralType( + axes=( + AxisType(kind=AxisKind.Batch, size=None, is_list=True), + AxisType(kind=AxisKind.Time, size=None, is_list=True), + AxisType(kind=AxisKind.Dimension, size=32, is_list=False), + AxisType(kind=AxisKind.Dimension, size=128, is_list=False), + AxisType(kind=AxisKind.Dimension, size=256, is_list=False), + ), + elements_type=ChannelType(), + ) + t2 = NeuralType( + axes=( + AxisType(kind=AxisKind.Batch, size=None, is_list=False), + AxisType(kind=AxisKind.Time, size=None, is_list=False), + AxisType(kind=AxisKind.Dimension, size=32, is_list=False), + AxisType(kind=AxisKind.Dimension, size=128, is_list=False), + AxisType(kind=AxisKind.Dimension, size=256, is_list=False), + ), + elements_type=ChannelType(), + ) + + self.assertEqual(big_void_1.compare(btc_spctr), NeuralTypeComparisonResult.SAME) + self.assertEqual(big_void_1.compare(btc_spct_bad), NeuralTypeComparisonResult.SAME) + self.assertEqual(big_void_1.compare(t1), NeuralTypeComparisonResult.SAME) + self.assertEqual(big_void_1.compare(t2), NeuralTypeComparisonResult.SAME) + + self.assertEqual(big_void_2.compare(btc_spctr), NeuralTypeComparisonResult.SAME) + self.assertEqual(big_void_2.compare(btc_spct_bad), NeuralTypeComparisonResult.SAME) + self.assertEqual(big_void_2.compare(t1), NeuralTypeComparisonResult.SAME) + self.assertEqual(big_void_2.compare(t2), NeuralTypeComparisonResult.SAME) + + def test_dag(self): + data_source = nemo.backends.pytorch.tutorials.RealFunctionDataLayer(n=10000, batch_size=128) + trainable_module = nemo.backends.pytorch.tutorials.TaylorNet(dim=4) + loss = nemo.backends.pytorch.tutorials.MSELoss() + x, y = data_source() + y_pred = trainable_module(x=x) + _ = loss(predictions=y_pred, target=y) + + def wrong(): + data_source = nemo.backends.pytorch.tutorials.RealFunctionDataLayer(n=10000, batch_size=128) + trainable_module = nemo.backends.pytorch.tutorials.TaylorNet(dim=4) + loss = nemo.backends.pytorch.tutorials.MSELoss() + x, y = data_source() + loss_tensor = loss(predictions=x, target=x) + _ = trainable_module(x=loss_tensor) + + self.assertRaises(NeuralPortNmTensorMismatchError, wrong) + + def test_unspecified_dimensions(self): + t0 = NeuralType( + (AxisType(AxisKind.Batch, 64), AxisType(AxisKind.Time, 10), AxisType(AxisKind.Dimension, 128)), + SpectrogramType(), + ) + t1 = NeuralType(('B', 'T', 'C'), SpectrogramType()) + self.assertEqual(t1.compare(t0), NeuralTypeComparisonResult.SAME) + self.assertEqual(t0.compare(t1), NeuralTypeComparisonResult.DIM_INCOMPATIBLE) diff --git a/tests/test_policies.py b/tests/core/test_policies.py similarity index 100% rename from tests/test_policies.py rename to tests/core/test_policies.py diff --git a/tests/test_pytorch_trainers.py b/tests/core/test_pytorch_trainers.py similarity index 100% rename from tests/test_pytorch_trainers.py rename to tests/core/test_pytorch_trainers.py diff --git a/tests/core/test_weight_share.py b/tests/core/test_weight_share.py new file mode 100644 index 000000000000..92f82ce18061 --- /dev/null +++ b/tests/core/test_weight_share.py @@ -0,0 +1,220 @@ +# # ! /usr/bin/python +# # -*- coding: utf-8 -*- +# +# # Copyright 2019 NVIDIA. All Rights Reserved. +# # +# # Licensed under the Apache License, Version 2.0 (the "License"); +# # you may not use this file except in compliance with the License. +# # You may obtain a copy of the License at +# # +# # http://www.apache.org/licenses/LICENSE-2.0 +# # +# # Unless required by applicable law or agreed to in writing, software +# # distributed under the License is distributed on an "AS IS" BASIS, +# # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# # See the License for the specific language governing permissions and +# # limitations under the License. +# # ============================================================================= +# +# import os +# import shutil +# import tarfile +# import unittest +# from typing import Dict +# +# import numpy as np +# import torch +# from ruamel.yaml import YAML +# +# import nemo +# import nemo.collections.asr as nemo_asr +# from nemo.core import WeightShareTransform +# from nemo.core.neural_types import * +# from tests.common_setup import NeMoUnitTest +# +# logging = nemo.logging +# +# +# class TestWeightSharing(NeMoUnitTest): +# labels = [ +# "'", +# "a", +# "b", +# "c", +# "d", +# "e", +# "f", +# "g", +# "h", +# "i", +# "j", +# "k", +# "l", +# "m", +# "n", +# "o", +# "p", +# "q", +# "r", +# "s", +# "t", +# "u", +# "v", +# "w", +# "x", +# "y", +# "z", +# " ", +# ] +# manifest_filepath = os.path.abspath(os.path.join(os.path.dirname(__file__), "../data/asr/an4_train.json")) +# featurizer_config = { +# 'window': 'hann', +# 'dither': 1e-05, +# 'normalize': 'per_feature', +# 'frame_splicing': 1, +# 'int_values': False, +# 'window_stride': 0.01, +# 'sample_rate': 16000, +# 'features': 64, +# 'n_fft': 512, +# 'window_size': 0.02, +# } +# yaml = YAML(typ="safe") +# +# @classmethod +# def setUpClass(cls) -> None: +# super().setUpClass() +# data_folder = os.path.abspath(os.path.join(os.path.dirname(__file__), "../data/")) +# logging.info("Looking up for test ASR data") +# if not os.path.exists(os.path.join(data_folder, "asr")): +# logging.info("Extracting ASR data to: {0}".format(os.path.join(data_folder, "asr"))) +# tar = tarfile.open(os.path.join(data_folder, "asr.tar.gz"), "r:gz") +# tar.extractall(path=data_folder) +# tar.close() +# else: +# logging.info("ASR data found in: {0}".format(os.path.join(data_folder, "asr"))) +# +# @classmethod +# def tearDownClass(cls) -> None: +# super().tearDownClass() +# data_folder = os.path.abspath(os.path.join(os.path.dirname(__file__), "../data/")) +# logging.info("Looking up for test ASR data") +# if os.path.exists(os.path.join(data_folder, "asr")): +# shutil.rmtree(os.path.join(data_folder, "asr")) +# +# def __check_if_weights_are_equal(self, w1: Dict, w2: Dict): +# all_same = set(w1.keys()) == set(w2.keys()) +# if not all_same: +# return False +# else: +# for key in w1.keys(): +# all_same = all_same and np.array_equal( +# w1[key][0].cpu().detach().numpy(), w2[key][0].cpu().detach().numpy(), +# ) +# return all_same +# +# def test_TaylorNet_get_weights(self): +# tn1 = nemo.backends.pytorch.tutorials.TaylorNet(dim=4) +# tn2 = nemo.backends.pytorch.tutorials.TaylorNet(dim=4) +# # because of randomness, actual weights should be different +# self.assertFalse(self.__check_if_weights_are_equal(tn1.get_weights(), tn2.get_weights())) +# tn3 = nemo.backends.pytorch.tutorials.TaylorNet(dim=4) +# tn3.set_weights(tn1.get_weights()) +# # check than weights are the same +# self.assertTrue(self.__check_if_weights_are_equal(tn1.get_weights(), tn3.get_weights())) +# # change weights on one module - another module should not change +# tn1.fc1.bias.data = torch.tensor([0.1]) +# self.assertFalse(self.__check_if_weights_are_equal(tn1.get_weights(), tn3.get_weights())) +# +# def test_TaylorNet_tie_weights(self): +# tn1 = nemo.backends.pytorch.tutorials.TaylorNet(dim=4) +# tn2 = nemo.backends.pytorch.tutorials.TaylorNet(dim=4) +# # because of randomness, actual weights should be different +# self.assertFalse(self.__check_if_weights_are_equal(tn1.get_weights(), tn2.get_weights())) +# tn2.tie_weights_with(tn1, list(tn1.get_weights().keys())) +# # change weights on one module - another module should change too +# tn1.fc1.bias.data = torch.tensor([0.1]) +# self.assertTrue(self.__check_if_weights_are_equal(tn1.get_weights(), tn2.get_weights())) +# +# def test_tie_weights2(self): +# voc_size = 3 +# dim = 2 +# embd = nemo.backends.pytorch.common.SequenceEmbedding(voc_size=voc_size, hidden_size=dim) +# proj = nemo.backends.pytorch.common.SequenceProjection(from_dim=dim, to_dim=voc_size) +# embd.tie_weights_with( +# proj, +# weight_names=["embedding.weight"], +# name2name_and_transform={"embedding.weight": ("projection.weight", WeightShareTransform.SAME,)}, +# ) +# self.assertTrue( +# np.array_equal(embd.embedding.weight.detach().numpy(), proj.projection.weight.detach().numpy(),) +# ) +# was = embd.embedding.weight.detach().numpy() +# embd.embedding.weight.data = torch.tensor(np.random.randint(0, 10, (3, 2)) * 1.0) +# after = embd.embedding.weight.detach().numpy() +# self.assertTrue( +# np.array_equal(embd.embedding.weight.detach().numpy(), proj.projection.weight.detach().numpy(),) +# ) +# self.assertFalse(np.array_equal(was, after)) +# +# def test_set_weights(self): +# voc_size = 3 +# dim = 2 +# embd = nemo.backends.pytorch.common.SequenceEmbedding(voc_size=voc_size, hidden_size=dim) +# weights = torch.tensor(np.random.randint(0, 10, (3, 2)) * 1.0) +# name2weights = {"embedding.weight": (weights, True)} +# embd.set_weights(name2weight=name2weights) +# self.assertTrue(np.array_equal(embd.embedding.weight.detach().numpy(), weights.detach().numpy(),)) +# weights = torch.tensor(np.random.randint(0, 10, (3, 2)) * 1.0) +# self.assertFalse(np.array_equal(embd.embedding.weight.detach().numpy(), weights.detach().numpy(),)) +# +# def test_freeze_unfreeze_TrainableNM(self): +# path = os.path.abspath(os.path.join(os.path.dirname(__file__), "../data/jasper_smaller.yaml")) +# with open(path) as file: +# jasper_model_definition = self.yaml.load(file) +# dl = nemo_asr.AudioToTextDataLayer( +# # featurizer_config=self.featurizer_config, +# manifest_filepath=self.manifest_filepath, +# labels=self.labels, +# batch_size=4, +# ) +# pre_process_params = { +# #'int_values': False, +# 'frame_splicing': 1, +# 'features': 64, +# 'window_size': 0.02, +# 'n_fft': 512, +# 'dither': 1e-05, +# 'window': 'hann', +# 'sample_rate': 16000, +# 'normalize': 'per_feature', +# 'window_stride': 0.01, +# } +# preprocessing = nemo_asr.AudioToMelSpectrogramPreprocessor(**pre_process_params) +# jasper_encoder = nemo_asr.JasperEncoder( +# feat_in=jasper_model_definition['AudioToMelSpectrogramPreprocessor']['features'], +# **jasper_model_definition['JasperEncoder'], +# ) +# jasper_decoder = nemo_asr.JasperDecoderForCTC(feat_in=1024, num_classes=len(self.labels)) +# ctc_loss = nemo_asr.CTCLossNM(num_classes=len(self.labels)) +# jasper_encoder.freeze() +# jasper_encoder.unfreeze(set(['encoder.4.conv.1.weight'])) +# jasper_decoder.unfreeze() +# # DAG +# audio_signal, a_sig_length, transcript, transcript_len = dl() +# processed_signal, p_length = preprocessing(input_signal=audio_signal, length=a_sig_length) +# +# encoded, encoded_len = jasper_encoder(audio_signal=processed_signal, length=p_length) +# # logging.info(jasper_encoder) +# log_probs = jasper_decoder(encoder_output=encoded) +# loss = ctc_loss( +# log_probs=log_probs, targets=transcript, input_length=encoded_len, target_length=transcript_len, +# ) +# +# callback = nemo.core.SimpleLossLoggerCallback( +# tensors=[loss], print_func=lambda x: logging.info(f'Train Loss: {str(x[0].item())}'), +# ) +# optimizer = self.nf.get_trainer() +# optimizer.train( +# [loss], callbacks=[callback], optimizer="sgd", optimization_params={"num_epochs": 2, "lr": 0.0003}, +# ) diff --git a/tests/test_tutorials_pytorch.py b/tests/nlp/test_tutorials_pytorch.py similarity index 100% rename from tests/test_tutorials_pytorch.py rename to tests/nlp/test_tutorials_pytorch.py diff --git a/tests/test_neural_types.py b/tests/test_neural_types.py deleted file mode 100644 index 1e4e9c436a24..000000000000 --- a/tests/test_neural_types.py +++ /dev/null @@ -1,258 +0,0 @@ -# ! /usr/bin/python -# -*- coding: utf-8 -*- - -# Copyright 2020 NVIDIA. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================= - -import tarfile - -from ruamel.yaml import YAML - -import nemo.collections.asr as nemo_asr -from nemo import logging -from nemo.core import * -from tests.common_setup import NeMoUnitTest - - -class TestNeuralTypes(NeMoUnitTest): - manifest_filepath = "tests/data/asr/an4_train.json" - yaml = YAML(typ="safe") - - def setUp(self) -> None: - super().setUp() - data_folder = "tests/data/" - logging.info("Looking up for test ASR data") - if not os.path.exists(data_folder + "asr"): - logging.info("Extracting ASR data to: {0}".format(data_folder + "asr")) - tar = tarfile.open("tests/data/asr.tar.gz", "r:gz") - tar.extractall(path=data_folder) - tar.close() - else: - logging.info("ASR data found in: {0}".format(data_folder + "asr")) - - def test_same(self): - btc = NeuralType(axis2type={0: AxisType(BatchTag), 1: AxisType(TimeTag), 2: AxisType(ChannelTag)}) - btc2 = NeuralType(axis2type={0: AxisType(BatchTag), 1: AxisType(TimeTag), 2: AxisType(ChannelTag)}) - self.assertEqual(btc2.compare(btc), NeuralTypeComparisonResult.SAME) - - def test_transpose_same(self): - btc = NeuralType(axis2type={0: AxisType(BatchTag), 1: AxisType(TimeTag), 2: AxisType(ChannelTag)}) - tbc = NeuralType(axis2type={1: AxisType(BatchTag), 0: AxisType(TimeTag), 2: AxisType(ChannelTag)}) - - self.assertEqual(btc.compare(tbc), NeuralTypeComparisonResult.TRANSPOSE_SAME) - self.assertEqual(tbc.compare(btc), NeuralTypeComparisonResult.TRANSPOSE_SAME) - - def test_dim_incompatible(self): - nchw1 = NeuralType( - axis2type={ - 0: AxisType(BatchTag), - 1: AxisType(ChannelTag), - 2: AxisType(HeightTag, 224), - 3: AxisType(WidthTag, 224), - } - ) - nchw2 = NeuralType( - axis2type={ - 0: AxisType(BatchTag), - 1: AxisType(ChannelTag), - 2: AxisType(HeightTag, 256), - 3: AxisType(WidthTag, 256), - } - ) - self.assertEqual(nchw1.compare(nchw2), NeuralTypeComparisonResult.DIM_INCOMPATIBLE) - - def test_rank_incompatible(self): - btc = NeuralType(axis2type={0: AxisType(BatchTag), 1: AxisType(TimeTag), 2: AxisType(ChannelTag)}) - nchw = NeuralType( - axis2type={0: AxisType(BatchTag), 1: AxisType(ChannelTag), 2: AxisType(HeightTag), 3: AxisType(WidthTag)} - ) - self.assertEqual(nchw.compare(btc), NeuralTypeComparisonResult.INCOMPATIBLE) - - def test_axis_type(self): - ax1 = AxisType(BatchTag) - ax2 = AxisType(TimeTag) - ax3 = AxisType(ProcessedTimeTag) - self.assertEqual(ax1.compare_to(ax2), NeuralTypeComparisonResult.INCOMPATIBLE) - self.assertEqual(ax3.compare_to(ax2), NeuralTypeComparisonResult.LESS) - self.assertEqual(ax2.compare_to(ax3), NeuralTypeComparisonResult.GREATER) - self.assertEqual(ax2.compare_to(AxisType(TimeTag)), NeuralTypeComparisonResult.SAME) - - def test_semantic_incompatible(self): - nchw = NeuralType( - axis2type={0: AxisType(BatchTag), 1: AxisType(ChannelTag), 2: AxisType(HeightTag), 3: AxisType(WidthTag)} - ) - badd = NeuralType( - axis2type={0: AxisType(BatchTag), 1: AxisType(ChannelTag), 2: AxisType(ChannelTag), 3: AxisType(WidthTag)} - ) - self.assertEqual(nchw.compare(badd), NeuralTypeComparisonResult.INCOMPATIBLE) - self.assertEqual(badd.compare(nchw), NeuralTypeComparisonResult.INCOMPATIBLE) - - def test_root(self): - root = NeuralType({}) - non_tensor = NeuralType(None) - btc = NeuralType(axis2type={0: AxisType(BatchTag), 1: AxisType(TimeTag), 2: AxisType(ChannelTag)}) - nchw = NeuralType( - axis2type={0: AxisType(BatchTag), 1: AxisType(ChannelTag), 2: AxisType(HeightTag), 3: AxisType(WidthTag)} - ) - self.assertEqual(root.compare(btc), NeuralTypeComparisonResult.SAME) - self.assertEqual(root.compare(nchw), NeuralTypeComparisonResult.SAME) - self.assertEqual(root.compare(non_tensor), NeuralTypeComparisonResult.SAME) - - self.assertEqual(non_tensor.compare(root), NeuralTypeComparisonResult.INCOMPATIBLE) - self.assertEqual(btc.compare(root), NeuralTypeComparisonResult.INCOMPATIBLE) - self.assertEqual(nchw.compare(root), NeuralTypeComparisonResult.INCOMPATIBLE) - - def test_combiner_type_infer(self): - combiner = nemo.backends.pytorch.common.SimpleCombiner(mode="add") - x_tg = nemo.core.NmTensor( - producer=None, producer_args=None, name=None, ntype=NeuralType({0: AxisType(BatchTag)}) - ) - y_tg = nemo.core.NmTensor( - producer=None, producer_args=None, name=None, ntype=NeuralType({0: AxisType(BatchTag)}) - ) - res = combiner(x1=y_tg, x2=x_tg) - self.assertEqual(res.compare(x_tg), NeuralTypeComparisonResult.SAME) - self.assertEqual(res.compare(y_tg), NeuralTypeComparisonResult.SAME) - self.assertEqual(x_tg.compare(res), NeuralTypeComparisonResult.SAME) - self.assertEqual(y_tg.compare(res), NeuralTypeComparisonResult.SAME) - - combiner1 = nemo.backends.pytorch.common.SimpleCombiner(mode="add") - x_tg1 = NmTensor( - producer=None, - producer_args=None, - name=None, - ntype=NeuralType({0: AxisType(BatchTag), 1: AxisType(ChannelTag)}), - ) - y_tg1 = NmTensor( - producer=None, - producer_args=None, - name=None, - ntype=NeuralType({0: AxisType(BatchTag), 1: AxisType(ChannelTag)}), - ) - res1 = combiner1(x1=y_tg1, x2=x_tg1) - self.assertEqual(res1.compare(x_tg1), NeuralTypeComparisonResult.SAME) - self.assertEqual(res1.compare(y_tg1), NeuralTypeComparisonResult.SAME) - self.assertEqual(x_tg1.compare(res1), NeuralTypeComparisonResult.SAME) - self.assertEqual(y_tg1.compare(res1), NeuralTypeComparisonResult.SAME) - - def test_optional_input_no_input(self): - data_source = nemo.backends.pytorch.tutorials.RealFunctionDataLayer(n=100, batch_size=128) - trainable_module = nemo.backends.pytorch.tutorials.TaylorNetO(dim=4) - loss = nemo.backends.pytorch.tutorials.MSELoss() - x, y = data_source() - y_pred = trainable_module(x=x) - loss_tensor = loss(predictions=y_pred, target=y) - - optimizer = nemo.backends.pytorch.actions.PtActions() - optimizer.train( - tensors_to_optimize=[loss_tensor], optimizer="sgd", optimization_params={"lr": 0.0003, "num_epochs": 1} - ) - - def test_optional_input_no_with_input(self): - data_source = nemo.backends.pytorch.tutorials.RealFunctionDataLayer(n=100, batch_size=128) - trainable_module = nemo.backends.pytorch.tutorials.TaylorNetO(dim=4) - loss = nemo.backends.pytorch.tutorials.MSELoss() - x, y = data_source() - y_pred = trainable_module(x=x, o=x) - loss_tensor = loss(predictions=y_pred, target=y) - optimizer = nemo.backends.pytorch.actions.PtActions() - optimizer.train( - tensors_to_optimize=[loss_tensor], optimizer="sgd", optimization_params={"lr": 0.0003, "num_epochs": 1} - ) - - def test_optional_input_no_with_wrong_input(self): - def wrong_fn(): - data_source = nemo.backends.pytorch.tutorials.RealFunctionDataLayer(n=100, batch_size=128) - trainable_module = nemo.backends.pytorch.tutorials.TaylorNetO(dim=4) - loss = nemo.backends.pytorch.tutorials.MSELoss() - x, y = data_source() - wrong_optional = NmTensor( - producer=None, - producer_args=None, - name=None, - ntype=NeuralType({0: AxisType(ChannelTag), 1: AxisType(BatchTag)}), - ) - y_pred = trainable_module(x=x, o=wrong_optional) - loss_tensor = loss(predictions=y_pred, target=y) - optimizer = nemo.backends.pytorch.actions.PtActions() - optimizer.train( - tensors_to_optimize=[loss_tensor], optimizer="sgd", optimization_params={"lr": 0.0003, "num_epochs": 1} - ) - - self.assertRaises(NeuralPortNmTensorMismatchError, wrong_fn) - - def test_simple_dags(self): - # module instantiation - with open("tests/data/jasper_smaller.yaml") as file: - jasper_model_definition = self.yaml.load(file) - labels = jasper_model_definition['labels'] - - data_layer = nemo_asr.AudioToTextDataLayer( - manifest_filepath=self.manifest_filepath, labels=labels, batch_size=4 - ) - data_preprocessor = nemo_asr.AudioToMelSpectrogramPreprocessor( - **jasper_model_definition['AudioToMelSpectrogramPreprocessor'] - ) - jasper_encoder = nemo_asr.JasperEncoder( - feat_in=jasper_model_definition['AudioToMelSpectrogramPreprocessor']['features'], - **jasper_model_definition['JasperEncoder'], - ) - jasper_decoder = nemo_asr.JasperDecoderForCTC(feat_in=1024, num_classes=len(labels)) - ctc_loss = nemo_asr.CTCLossNM(num_classes=len(labels)) - greedy_decoder = nemo_asr.GreedyCTCDecoder() - - # DAG definition - (audio_signal, audio_signal_len, transcript, transcript_len) = data_layer() - processed_signal, processed_signal_len = data_preprocessor(input_signal=audio_signal, length=audio_signal_len) - - spec_augment = nemo_asr.SpectrogramAugmentation(rect_masks=5) - aug_signal = spec_augment(input_spec=processed_signal) - - encoded, encoded_len = jasper_encoder(audio_signal=aug_signal, length=processed_signal_len) - log_probs = jasper_decoder(encoder_output=encoded) - predictions = greedy_decoder(log_probs=log_probs) - loss = ctc_loss( - log_probs=log_probs, targets=transcript, input_length=encoded_len, target_length=transcript_len - ) - - def wrong(): - with open("tests/data/jasper_smaller.yaml") as file: - jasper_config = self.yaml.load(file) - labels = jasper_config['labels'] - - data_layer = nemo_asr.AudioToTextDataLayer( - manifest_filepath=self.manifest_filepath, labels=labels, batch_size=4 - ) - data_preprocessor = nemo_asr.AudioToMelSpectrogramPreprocessor( - **jasper_config['AudioToMelSpectrogramPreprocessor'] - ) - jasper_encoder = nemo_asr.JasperEncoder( - feat_in=jasper_config['AudioToMelSpectrogramPreprocessor']['features'], - **jasper_config['JasperEncoder'], - ) - jasper_decoder = nemo_asr.JasperDecoderForCTC(feat_in=1024, num_classes=len(labels)) - # DAG definition - (audio_signal, audio_signal_len, transcript, transcript_len) = data_layer() - processed_signal, processed_signal_len = data_preprocessor( - input_signal=audio_signal, length=audio_signal_len - ) - - spec_augment = nemo_asr.SpectrogramAugmentation(rect_masks=5) - aug_signal = spec_augment(input_spec=processed_signal) - - encoded, encoded_len = jasper_encoder(audio_signal=aug_signal, length=processed_signal_len) - log_probs = jasper_decoder(encoder_output=processed_signal) - - self.assertRaises(NeuralPortNmTensorMismatchError, wrong)