Skip to content

Commit

Permalink
Merge pull request #1 from tensorflow/master
Browse files Browse the repository at this point in the history
Updated
  • Loading branch information
ayushmankumar7 committed Mar 4, 2020
2 parents 8b47aa3 + 75d1304 commit 43178d7
Show file tree
Hide file tree
Showing 80 changed files with 422 additions and 6,788 deletions.
2 changes: 1 addition & 1 deletion official/benchmark/bert_squad_benchmark.py
Original file line number Diff line number Diff line change
Expand Up @@ -172,7 +172,7 @@ def _run_and_report_benchmark(self,
run_eagerly=False,
ds_type='mirrored'):
"""Runs the benchmark and reports various metrics."""
if FLAGS.train_batch_size <= 4:
if FLAGS.train_batch_size <= 4 or run_eagerly:
FLAGS.input_meta_data_path = SQUAD_MEDIUM_INPUT_META_DATA_PATH
else:
FLAGS.input_meta_data_path = SQUAD_LONG_INPUT_META_DATA_PATH
Expand Down
70 changes: 68 additions & 2 deletions official/modeling/hyperparams/base_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -143,12 +143,10 @@ def _get_subconfig_type(cls, k) -> Type[params_dict.ParamsDict]:
return subconfig_type

def __post_init__(self, default_params, restrictions, *args, **kwargs):
logging.error('DEBUG before init %r', type(self))
super().__init__(default_params=default_params,
restrictions=restrictions,
*args,
**kwargs)
logging.error('DEBUG after init %r', type(self))

def _set(self, k, v):
"""Overrides same method in ParamsDict.
Expand Down Expand Up @@ -246,3 +244,71 @@ def from_args(cls, *args, **kwargs):
default_params = {a: p for a, p in zip(attributes, args)}
default_params.update(kwargs)
return cls(default_params)


@dataclasses.dataclass
class RuntimeConfig(Config):
"""High-level configurations for Runtime.
These include parameters that are not directly related to the experiment,
e.g. directories, accelerator type, etc.
Attributes:
distribution_strategy: e.g. 'mirrored', 'tpu', etc.
enable_eager: Whether or not to enable eager mode.
enable_xla: Whether or not to enable XLA.
per_gpu_thread_count: thread count per GPU.
gpu_threads_enabled: Whether or not GPU threads are enabled.
gpu_thread_mode: Whether and how the GPU device uses its own threadpool.
dataset_num_private_threads: Number of threads for a private threadpool
created for all datasets computation.
tpu: The address of the TPU to use, if any.
num_gpus: The number of GPUs to use, if any.
worker_hosts: comma-separated list of worker ip:port pairs for running
multi-worker models with DistributionStrategy.
task_index: If multi-worker training, the task index of this worker.
all_reduce_alg: Defines the algorithm for performing all-reduce.
"""
distribution_strategy: str = 'mirrored'
enable_eager: bool = False
enable_xla: bool = False
gpu_threads_enabled: bool = False
gpu_thread_mode: Optional[str] = None
dataset_num_private_threads: Optional[int] = None
per_gpu_thread_count: int = 0
tpu: Optional[str] = None
num_gpus: int = 0
worker_hosts: Optional[str] = None
task_index: int = -1
all_reduce_alg: Optional[str] = None


@dataclasses.dataclass
class TensorboardConfig(Config):
"""Configuration for Tensorboard.
Attributes:
track_lr: Whether or not to track the learning rate in Tensorboard. Defaults
to True.
write_model_weights: Whether or not to write the model weights as
images in Tensorboard. Defaults to False.
"""
track_lr: bool = True
write_model_weights: bool = False


@dataclasses.dataclass
class CallbacksConfig(Config):
"""Configuration for Callbacks.
Attributes:
enable_checkpoint_and_export: Whether or not to enable checkpoints as a
Callback. Defaults to True.
enable_tensorboard: Whether or not to enable Tensorboard as a Callback.
Defaults to True.
"""
enable_checkpoint_and_export: bool = True
enable_tensorboard: bool = True

6 changes: 3 additions & 3 deletions official/nlp/modeling/layers/attention.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
# limitations under the License.
# ==============================================================================
"""Keras-based attention layer."""

# pylint: disable=g-classes-have-attributes
from __future__ import absolute_import
from __future__ import division
# from __future__ import google_type_annotations
Expand Down Expand Up @@ -45,7 +45,7 @@ class Attention(tf.keras.layers.Layer):
interpolated by these probabilities, then concatenated back to a single
tensor and returned.
Attributes:
Arguments:
num_heads: Number of attention heads.
head_size: Size of each attention head.
dropout: Dropout probability.
Expand Down Expand Up @@ -186,7 +186,7 @@ def call(self, inputs):
class CachedAttention(Attention):
"""Attention layer with cache used for auto-agressive decoding.
Attributes:
Arguments:
num_heads: Number of attention heads.
head_size: Size of each attention head.
**kwargs: Other keyword arguments inherit from `Attention` class.
Expand Down
4 changes: 2 additions & 2 deletions official/nlp/modeling/layers/dense_einsum.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
# limitations under the License.
# ==============================================================================
"""Keras-based einsum layer."""

# pylint: disable=g-classes-have-attributes
from __future__ import absolute_import
from __future__ import division
# from __future__ import google_type_annotations
Expand All @@ -30,7 +30,7 @@ class DenseEinsum(tf.keras.layers.Layer):
This layer can perform einsum calculations of arbitrary dimensionality.
Attributes:
Arguments:
output_shape: Positive integer or tuple, dimensionality of the output space.
num_summed_dimensions: The number of dimensions to sum over. Standard 2D
matmul should use 1, 3D matmul should use 2, and so forth.
Expand Down
4 changes: 2 additions & 2 deletions official/nlp/modeling/layers/masked_softmax.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
# limitations under the License.
# ==============================================================================
"""Keras-based softmax layer with optional masking."""

# pylint: disable=g-classes-have-attributes
from __future__ import absolute_import
from __future__ import division
# from __future__ import google_type_annotations
Expand All @@ -26,7 +26,7 @@
class MaskedSoftmax(tf.keras.layers.Layer):
"""Performs a softmax with optional masking on a tensor.
Attributes:
Arguments:
mask_expansion_axes: Any axes that should be padded on the mask tensor.
"""

Expand Down
4 changes: 2 additions & 2 deletions official/nlp/modeling/layers/on_device_embedding.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
# limitations under the License.
# ==============================================================================
"""Keras-based one-hot embedding layer."""

# pylint: disable=g-classes-have-attributes
from __future__ import absolute_import
from __future__ import division
# from __future__ import google_type_annotations
Expand All @@ -31,7 +31,7 @@ class OnDeviceEmbedding(tf.keras.layers.Layer):
This layer uses either tf.gather or tf.one_hot to translate integer indices to
float embeddings.
Attributes:
Arguments:
vocab_size: Number of elements in the vocabulary.
embedding_width: Output size of the embedding layer.
initializer: The initializer to use for the embedding weights. Defaults to
Expand Down
4 changes: 2 additions & 2 deletions official/nlp/modeling/layers/position_embedding.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
# limitations under the License.
# ==============================================================================
"""Keras-based positional embedding layer."""

# pylint: disable=g-classes-have-attributes
from __future__ import absolute_import
from __future__ import division
# from __future__ import google_type_annotations
Expand All @@ -37,7 +37,7 @@ class PositionEmbedding(tf.keras.layers.Layer):
can have a dynamic 1st dimension, while if `use_dynamic_slicing` is False the
input size must be fixed.
Attributes:
Arguments:
use_dynamic_slicing: Whether to use the dynamic slicing path.
max_sequence_length: The maximum size of the dynamic sequence. Only
applicable if `use_dynamic_slicing` is True.
Expand Down
4 changes: 2 additions & 2 deletions official/nlp/modeling/layers/transformer.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
# limitations under the License.
# ==============================================================================
"""Keras-based transformer block layer."""

# pylint: disable=g-classes-have-attributes
from __future__ import absolute_import
from __future__ import division
# from __future__ import google_type_annotations
Expand All @@ -32,7 +32,7 @@ class Transformer(tf.keras.layers.Layer):
This layer implements the Transformer from "Attention Is All You Need".
(https://arxiv.org/abs/1706.03762).
Attributes:
Arguments:
num_attention_heads: Number of attention heads.
intermediate_size: Size of the intermediate layer.
intermediate_activation: Activation for the intermediate layer.
Expand Down
4 changes: 2 additions & 2 deletions official/nlp/modeling/layers/transformer_scaffold.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
# limitations under the License.
# ==============================================================================
"""Keras-based transformer scaffold layer."""

# pylint: disable=g-classes-have-attributes
from __future__ import absolute_import
from __future__ import division
# from __future__ import google_type_annotations
Expand All @@ -35,7 +35,7 @@ class TransformerScaffold(tf.keras.layers.Layer):
`attention_cfg`, in which case the scaffold will instantiate the class with
the config, or pass a class instance to `attention_cls`.
Attributes:
Arguments:
num_attention_heads: Number of attention heads.
intermediate_size: Size of the intermediate layer.
intermediate_activation: Activation for the intermediate layer.
Expand Down
4 changes: 2 additions & 2 deletions official/nlp/modeling/networks/albert_transformer_encoder.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
# limitations under the License.
# ==============================================================================
"""ALBERT (https://arxiv.org/abs/1810.04805) text encoder network."""

# pylint: disable=g-classes-have-attributes
from __future__ import absolute_import
from __future__ import division
# from __future__ import google_type_annotations
Expand Down Expand Up @@ -41,7 +41,7 @@ class AlbertTransformerEncoder(network.Network):
The default values for this object are taken from the ALBERT-Base
implementation described in the paper.
Attributes:
Arguments:
vocab_size: The size of the token vocabulary.
embedding_width: The width of the word embeddings. If the embedding width
is not equal to hidden size, embedding parameters will be factorized into
Expand Down
4 changes: 2 additions & 2 deletions official/nlp/modeling/networks/bert_classifier.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
# limitations under the License.
# ==============================================================================
"""Trainer network for BERT-style models."""

# pylint: disable=g-classes-have-attributes
from __future__ import absolute_import
from __future__ import division
# from __future__ import google_type_annotations
Expand All @@ -36,7 +36,7 @@ class BertClassifier(tf.keras.Model):
instantiates a classification network based on the passed `num_classes`
argument.
Attributes:
Arguments:
network: A transformer network. This network should output a sequence output
and a classification output. Furthermore, it should expose its embedding
table via a "get_embedding_table" method.
Expand Down
4 changes: 2 additions & 2 deletions official/nlp/modeling/networks/bert_pretrainer.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
# limitations under the License.
# ==============================================================================
"""Trainer network for BERT-style models."""

# pylint: disable=g-classes-have-attributes
from __future__ import absolute_import
from __future__ import division
# from __future__ import google_type_annotations
Expand All @@ -37,7 +37,7 @@ class BertPretrainer(tf.keras.Model):
instantiates the masked language model and classification networks that are
used to create the training objectives.
Attributes:
Arguments:
network: A transformer network. This network should output a sequence output
and a classification output. Furthermore, it should expose its embedding
table via a "get_embedding_table" method.
Expand Down
4 changes: 2 additions & 2 deletions official/nlp/modeling/networks/bert_span_labeler.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
# limitations under the License.
# ==============================================================================
"""Trainer network for BERT-style models."""

# pylint: disable=g-classes-have-attributes
from __future__ import absolute_import
from __future__ import division
# from __future__ import google_type_annotations
Expand All @@ -35,7 +35,7 @@ class BertSpanLabeler(tf.keras.Model):
The BertSpanLabeler allows a user to pass in a transformer stack, and
instantiates a span labeling network based on a single dense layer.
Attributes:
Arguments:
network: A transformer network. This network should output a sequence output
and a classification output. Furthermore, it should expose its embedding
table via a "get_embedding_table" method.
Expand Down
4 changes: 2 additions & 2 deletions official/nlp/modeling/networks/classification.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
# limitations under the License.
# ==============================================================================
"""Classification network."""

# pylint: disable=g-classes-have-attributes
from __future__ import absolute_import
from __future__ import division
# from __future__ import google_type_annotations
Expand All @@ -31,7 +31,7 @@ class Classification(network.Network):
This network implements a simple classifier head based on a dense layer.
Attributes:
Arguments:
input_width: The innermost dimension of the input tensor to this network.
num_classes: The number of classes that this network should classify to.
activation: The activation, if any, for the dense layer in this network.
Expand Down
4 changes: 2 additions & 2 deletions official/nlp/modeling/networks/encoder_scaffold.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
# limitations under the License.
# ==============================================================================
"""Transformer-based text encoder network."""

# pylint: disable=g-classes-have-attributes
from __future__ import absolute_import
from __future__ import division
# from __future__ import google_type_annotations
Expand Down Expand Up @@ -46,7 +46,7 @@ class instance. If a class is passed, that class will be instantiated using
If the hidden_cls is not overridden, a default transformer layer will be
instantiated.
Attributes:
Arguments:
num_output_classes: The output size of the classification layer.
classification_layer_initializer: The initializer for the classification
layer.
Expand Down
4 changes: 2 additions & 2 deletions official/nlp/modeling/networks/masked_lm.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
# limitations under the License.
# ==============================================================================
"""Masked language model network."""

# pylint: disable=g-classes-have-attributes
from __future__ import absolute_import
from __future__ import division
# from __future__ import google_type_annotations
Expand All @@ -32,7 +32,7 @@ class MaskedLM(network.Network):
This network implements a masked language model based on the provided network.
It assumes that the network being passed has a "get_embedding_table()" method.
Attributes:
Arguments:
input_width: The innermost dimension of the input tensor to this network.
num_predictions: The number of predictions to make per sequence.
source_network: The network with the embedding layer to use for the
Expand Down
4 changes: 2 additions & 2 deletions official/nlp/modeling/networks/span_labeling.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
# limitations under the License.
# ==============================================================================
"""Span labeling network."""

# pylint: disable=g-classes-have-attributes
from __future__ import absolute_import
from __future__ import division
# from __future__ import google_type_annotations
Expand All @@ -31,7 +31,7 @@ class SpanLabeling(network.Network):
This network implements a simple single-span labeler based on a dense layer.
Attributes:
Arguments:
input_width: The innermost dimension of the input tensor to this network.
activation: The activation, if any, for the dense layer in this network.
initializer: The intializer for the dense layer in this network. Defaults to
Expand Down
4 changes: 2 additions & 2 deletions official/nlp/modeling/networks/transformer_encoder.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
# limitations under the License.
# ==============================================================================
"""Transformer-based text encoder network."""

# pylint: disable=g-classes-have-attributes
from __future__ import absolute_import
from __future__ import division
# from __future__ import google_type_annotations
Expand All @@ -40,7 +40,7 @@ class TransformerEncoder(network.Network):
in "BERT: Pre-training of Deep Bidirectional Transformers for Language
Understanding".
Attributes:
Arguments:
vocab_size: The size of the token vocabulary.
hidden_size: The size of the transformer hidden layers.
num_layers: The number of transformer layers.
Expand Down
Loading

0 comments on commit 43178d7

Please sign in to comment.