Getting DNNModel to work with the new feature columns.

PiperOrigin-RevId: 213561495
ganny26 · Sep 19, 2018 · 9fe1778 · 9fe1778
1 parent 9ee75bb
commit 9fe1778
Show file tree

Hide file tree

Showing 8 changed files with 477 additions and 127 deletions.
diff --git a/tensorflow/contrib/estimator/python/estimator/dnn_with_layer_annotations.py b/tensorflow/contrib/estimator/python/estimator/dnn_with_layer_annotations.py
@@ -76,6 +76,7 @@ def input_layer_with_layer_annotations(features,
                                          weight_collections=None,
                                          trainable=True,
                                          cols_to_vars=None,
+                                         scope=None,
                                          cols_to_output_tensors=None):
     """Returns a dense `Tensor` as input layer based on given `feature_columns`.
 
@@ -112,6 +113,7 @@ def input_layer_with_layer_annotations(features,
         'some_variable:0' shape=(5, 10), <tf.Variable 'some_variable:1'
           shape=(5, 10)]} If a column creates no variables, its value will be an
           empty list.
+      scope: A name or variable scope to use
       cols_to_output_tensors: If not `None`, must be a dictionary that will be
         filled with a mapping from '_FeatureColumn' to the associated output
         `Tensor`s.
@@ -132,6 +134,7 @@ def input_layer_with_layer_annotations(features,
         weight_collections=weight_collections,
         trainable=trainable,
         cols_to_vars=cols_to_vars,
+        scope=scope,
         cols_to_output_tensors=local_cols_to_output_tensors)
 
     if cols_to_output_tensors is not None:
@@ -301,9 +304,9 @@ def DNNClassifierWithLayerAnnotations(  # pylint: disable=invalid-name
 
   def _model_fn(features, labels, mode, config):
     with _monkey_patch(
-        feature_column_lib, 'input_layer',
-        make_input_layer_with_layer_annotations(feature_column_lib.input_layer,
-                                                mode)):
+        feature_column_lib, '_internal_input_layer',
+        make_input_layer_with_layer_annotations(
+            feature_column_lib._internal_input_layer, mode)):  # pylint: disable=protected-access
       return original.model_fn(features, labels, mode, config)
 
   return estimator.Estimator(
@@ -422,9 +425,9 @@ def DNNRegressorWithLayerAnnotations(  # pylint: disable=invalid-name
 
   def _model_fn(features, labels, mode, config):
     with _monkey_patch(
-        feature_column_lib, 'input_layer',
-        make_input_layer_with_layer_annotations(feature_column_lib.input_layer,
-                                                mode)):
+        feature_column_lib, '_internal_input_layer',
+        make_input_layer_with_layer_annotations(
+            feature_column_lib._internal_input_layer, mode)):  # pylint: disable=protected-access
       return original.model_fn(features, labels, mode, config)
 
   return estimator.Estimator(

diff --git a/tensorflow/python/estimator/BUILD b/tensorflow/python/estimator/BUILD
@@ -251,6 +251,7 @@ py_library(
         ":prediction_keys",
         "//tensorflow:tensorflow_py_no_contrib",
         "//third_party/py/numpy",
+        "@absl_py//absl/testing:parameterized",
         "@six_archive//:six",
     ],
 )
@@ -273,6 +274,7 @@ py_test(
         ":pandas_io",
         ":prediction_keys",
         "//tensorflow:tensorflow_py_no_contrib",
+        "@absl_py//absl/testing:parameterized",
         "@six_archive//:six",
     ],
 )

diff --git a/tensorflow/python/estimator/canned/dnn.py b/tensorflow/python/estimator/canned/dnn.py
@@ -24,7 +24,9 @@
 from tensorflow.python.estimator import model_fn
 from tensorflow.python.estimator.canned import head as head_lib
 from tensorflow.python.estimator.canned import optimizers
-from tensorflow.python.feature_column import feature_column as feature_column_lib
+from tensorflow.python.feature_column import feature_column
+from tensorflow.python.feature_column import feature_column_v2
+from tensorflow.python.keras.engine import training
 from tensorflow.python.layers import core as core_layers
 from tensorflow.python.layers import normalization
 from tensorflow.python.ops import init_ops
@@ -45,8 +47,14 @@ def _add_hidden_layer_summary(value, tag):
   summary.histogram('%s/activation' % tag, value)
 
 
-def _dnn_logit_fn_builder(units, hidden_units, feature_columns, activation_fn,
-                          dropout, input_layer_partitioner, batch_norm):
+def _dnn_logit_fn_builder(units,
+                          hidden_units,
+                          feature_columns,
+                          activation_fn,
+                          dropout,
+                          input_layer_partitioner,
+                          batch_norm,
+                          shared_state_manager=None):
   """Function builder for a dnn logit_fn.
 
   Args:
@@ -60,6 +68,8 @@ def _dnn_logit_fn_builder(units, hidden_units, feature_columns, activation_fn,
       coordinate.
     input_layer_partitioner: Partitioner for input layer.
     batch_norm: Whether to use batch normalization after each hidden layer.
+    shared_state_manager: A SharedEmbeddingStateManager object to hold the
+      shared state for SharedEmbeddingColumn's.
 
   Returns:
     A logit_fn (see below).
@@ -85,50 +95,110 @@ def dnn_logit_fn(features, mode):
       A `Tensor` representing the logits, or a list of `Tensor`'s representing
       multiple logits in the MultiHead case.
     """
-    is_training = mode == model_fn.ModeKeys.TRAIN
-    with variable_scope.variable_scope(
-        'input_from_feature_columns',
-        values=tuple(six.itervalues(features)),
-        partitioner=input_layer_partitioner):
-      net = feature_column_lib.input_layer(
-          features=features, feature_columns=feature_columns)
+    dnn_model = _DNNModel(
+        units,
+        hidden_units,
+        feature_columns,
+        activation_fn,
+        dropout,
+        input_layer_partitioner,
+        batch_norm,
+        shared_state_manager,
+        name='dnn')
+    return dnn_model(features, mode)
+
+  return dnn_logit_fn
+
+
+class _DNNModel(training.Model):
+  """A DNN Model."""
+
+  def __init__(self,
+               units,
+               hidden_units,
+               feature_columns,
+               activation_fn,
+               dropout,
+               input_layer_partitioner,
+               batch_norm,
+               shared_state_manager,
+               name=None,
+               **kwargs):
+    super(_DNNModel, self).__init__(name=name, **kwargs)
+
+    if feature_column_v2.is_feature_column_v2(feature_columns):
+      input_layer = feature_column_v2.FeatureLayer(
+          feature_columns=feature_columns,
+          name='input_layer',
+          shared_state_manager=shared_state_manager)
+    else:
+      with variable_scope.variable_scope('input_from_feature_columns'):
+        input_layer = feature_column.InputLayer(
+            feature_columns=feature_columns, name='input_layer')
+
+    self._input_layer = self._add_layers([input_layer])[0]
+
+    self._dropout = dropout
+    self._batch_norm = batch_norm
+
+    hidden_layers = []
+    dropout_layers = []
+    batch_norm_layers = []
     for layer_id, num_hidden_units in enumerate(hidden_units):
-      with variable_scope.variable_scope(
-          'hiddenlayer_%d' % layer_id, values=(net,)) as hidden_layer_scope:
-        net = core_layers.dense(
-            net,
-            units=num_hidden_units,
-            activation=activation_fn,
-            kernel_initializer=init_ops.glorot_uniform_initializer(),
-            name=hidden_layer_scope)
-        if dropout is not None and is_training:
-          net = core_layers.dropout(net, rate=dropout, training=True)
-        if batch_norm:
-          # TODO(hjm): In future, if this becomes popular, we can enable
-          # customization of the batch normalization params by accepting a
-          # list of `BatchNormalization` instances as `batch_norm`.
-          net = normalization.batch_normalization(
-              net,
-              # The default momentum 0.99 actually crashes on certain
-              # problem, so here we use 0.999, which is the default of
-              # tf.contrib.layers.batch_norm.
-              momentum=0.999,
-              training=is_training,
-              name='batchnorm_%d' % layer_id)
-      _add_hidden_layer_summary(net, hidden_layer_scope.name)
-
-    with variable_scope.variable_scope('logits', values=(net,)) as logits_scope:
-      logits = core_layers.dense(
-          net,
-          units=units,
-          activation=None,
+      hidden_layer = core_layers.Dense(
+          units=num_hidden_units,
+          activation=activation_fn,
           kernel_initializer=init_ops.glorot_uniform_initializer(),
-          name=logits_scope)
-    _add_hidden_layer_summary(logits, logits_scope.name)
-
+          name='hiddenlayer_%d' % layer_id)
+      hidden_layers.append(hidden_layer)
+      if self._dropout is not None:
+        dropout_layer = core_layers.Dropout(rate=dropout)
+        dropout_layers.append(dropout_layer)
+      if self._batch_norm:
+        batch_norm_layer = normalization.BatchNormalization(
+            # The default momentum 0.99 actually crashes on certain
+            # problem, so here we use 0.999, which is the default of
+            # tf.contrib.layers.batch_norm.
+            momentum=0.999,
+            trainable=True,
+            name='hiddenlayer_%d/batchnorm_%d' % (layer_id, layer_id))
+        batch_norm_layers.append(batch_norm_layer)
+
+    self._hidden_layers = self._add_layers(hidden_layers)
+    if self._dropout is not None:
+      self._dropout_layers = self._add_layers(dropout_layers)
+    if self._batch_norm:
+      self._batch_norm_layers = self._add_layers(batch_norm_layers)
+
+    self._logits_layer = core_layers.Dense(
+        units=units,
+        activation=None,
+        kernel_initializer=init_ops.glorot_uniform_initializer(),
+        name='logits')
+
+  def call(self, features, mode):
+    is_training = mode == model_fn.ModeKeys.TRAIN
+    with variable_scope.variable_scope('input_from_feature_columns'):
+      net = self._input_layer(features)
+    for i in range(len(self._hidden_layers)):
+      net = self._hidden_layers[i](net)
+      if self._dropout is not None and is_training:
+        net = self._dropout_layers[i](net)
+      if self._batch_norm:
+        net = self._batch_norm_layers[i](net, training=is_training)
+      _add_hidden_layer_summary(net, self._hidden_layers[i].name)
+
+    logits = self._logits_layer(net)
+    _add_hidden_layer_summary(logits, self._logits_layer.name)
     return logits
 
-  return dnn_logit_fn
+  def _add_layers(self, layers):
+    # "Magic" required for keras.Model classes to track all the variables in
+    # a list of layers.Layer objects.
+    # TODO(ashankar): Figure out API so user code doesn't have to do this.
+    for layer in layers:
+      setattr(self, layer.name, layer)
+    return layers
 
 
 def _dnn_model_fn(features,
@@ -143,7 +213,8 @@ def _dnn_model_fn(features,
                   input_layer_partitioner=None,
                   config=None,
                   use_tpu=False,
-                  batch_norm=False):
+                  batch_norm=False,
+                  shared_state_manager=None):
   """Deep Neural Net model_fn.
 
   Args:
@@ -167,6 +238,8 @@ def _dnn_model_fn(features,
     use_tpu: Whether to make a DNN model able to run on TPU. Will make function
       return a `_TPUEstimatorSpec` instance and disable variable partitioning.
     batch_norm: Whether to use batch normalization after each hidden layer.
+    shared_state_manager: A SharedEmbeddingStateManager object to hold the
+      shared state for SharedEmbeddingColumn's.
 
   Returns:
     An `EstimatorSpec` instance.
@@ -202,7 +275,8 @@ def _dnn_model_fn(features,
         activation_fn=activation_fn,
         dropout=dropout,
         input_layer_partitioner=input_layer_partitioner,
-        batch_norm=batch_norm)
+        batch_norm=batch_norm,
+        shared_state_manager=shared_state_manager)
     logits = logit_fn(features=features, mode=mode)
 
     if use_tpu:
@@ -370,6 +444,10 @@ def __init__(
     """
     head = head_lib._binary_logistic_or_multi_class_head(  # pylint: disable=protected-access
         n_classes, weight_column, label_vocabulary, loss_reduction)
+
+    shared_state_manager = feature_column_v2.maybe_create_shared_state_manager(
+        feature_columns)
+
     def _model_fn(features, labels, mode, config):
       """Call the defined shared _dnn_model_fn."""
       return _dnn_model_fn(
@@ -384,7 +462,8 @@ def _model_fn(features, labels, mode, config):
           dropout=dropout,
           input_layer_partitioner=input_layer_partitioner,
           config=config,
-          batch_norm=batch_norm)
+          batch_norm=batch_norm,
+          shared_state_manager=shared_state_manager)
 
     super(DNNClassifier, self).__init__(
         model_fn=_model_fn, model_dir=model_dir, config=config,
@@ -532,14 +611,19 @@ def __init__(
       batch_norm: Whether to use batch normalization after each hidden layer.
     """
 
+    shared_state_manager = None
+    if feature_column_v2.is_feature_column_v2(feature_columns):
+      shared_state_manager = feature_column_v2.SharedEmbeddingStateManager()
+
     def _model_fn(features, labels, mode, config):
       """Call the defined shared _dnn_model_fn."""
       return _dnn_model_fn(
           features=features,
           labels=labels,
           mode=mode,
           head=head_lib._regression_head(  # pylint: disable=protected-access
-              label_dimension=label_dimension, weight_column=weight_column,
+              label_dimension=label_dimension,
+              weight_column=weight_column,
               loss_reduction=loss_reduction),
           hidden_units=hidden_units,
           feature_columns=tuple(feature_columns or []),
@@ -548,7 +632,8 @@ def _model_fn(features, labels, mode, config):
           dropout=dropout,
           input_layer_partitioner=input_layer_partitioner,
           config=config,
-          batch_norm=batch_norm)
+          batch_norm=batch_norm,
+          shared_state_manager=shared_state_manager)
 
     super(DNNRegressor, self).__init__(
         model_fn=_model_fn, model_dir=model_dir, config=config,

diff --git a/tensorflow/python/estimator/canned/dnn_linear_combined.py b/tensorflow/python/estimator/canned/dnn_linear_combined.py
@@ -27,6 +27,7 @@
 from tensorflow.python.estimator.canned import head as head_lib
 from tensorflow.python.estimator.canned import linear
 from tensorflow.python.estimator.canned import optimizers
+from tensorflow.python.feature_column import feature_column_v2
 from tensorflow.python.framework import ops
 from tensorflow.python.ops import control_flow_ops
 from tensorflow.python.ops import nn
@@ -142,6 +143,9 @@ def _dnn_linear_combined_model_fn(features,
           max_partitions=num_ps_replicas,
           min_slice_size=64 << 20))
 
+  shared_state_manager = feature_column_v2.maybe_create_shared_state_manager(
+      list(linear_feature_columns) + list(dnn_feature_columns))
+
   # Build DNN Logits.
   dnn_parent_scope = 'dnn'
 
@@ -170,7 +174,8 @@ def _dnn_linear_combined_model_fn(features,
           activation_fn=dnn_activation_fn,
           dropout=dnn_dropout,
           input_layer_partitioner=input_layer_partitioner,
-          batch_norm=batch_norm)
+          batch_norm=batch_norm,
+          shared_state_manager=shared_state_manager)
       dnn_logits = dnn_logit_fn(features=features, mode=mode)
 
   linear_parent_scope = 'linear'