Merge BVLC#2217 (ReshapeLayer) and fix conflict

weiliu89 · Apr 14, 2015 · c1d560e · c1d560e
2 parents 0710648 + d09a66d
commit c1d560e
Show file tree

Hide file tree

Showing 5 changed files with 519 additions and 1 deletion.
diff --git a/docs/tutorial/layers.md b/docs/tutorial/layers.md
@@ -419,6 +419,45 @@ The `SPLIT` layer is a utility layer that splits an input blob to multiple outpu
 
 The `FLATTEN` layer is a utility layer that flattens an input of shape `n * c * h * w` to a simple vector output of shape `n * (c*h*w) * 1 * 1`.
 
+#### Reshape
+
+* Layer type: `Reshape`
+* Implementation: `./src/caffe/layers/reshape_layer.cpp`
+* Parameters (`ReshapeParameter reshape_param`)
+    - Optional: (also see detailed description below)
+        - `shape`
+
+* Input
+    - a single blob with arbitrary dimensions
+* Output
+    - the same blob, with modified dimensions, as specified by `reshape_param`
+
+* Sample
+
+        layer {
+          name: "reshape"
+          type: "Reshape"
+          bottom: "input"
+          top: "output"
+          reshape_param {
+            shape {
+              dim: 0  # copy the dimension from below
+              dim: 2
+              dim: 3
+              dim: -1 # infer it from the other dimensions
+            }
+          }
+        }
+
+The `Reshape` layer can be used to change the dimensions of its input, without changing its data. Just like the `Flatten` layer, only the dimensions are changed; no data is copied in the process.
+
+Output dimensions are specified by the `ReshapeParam` proto. Positive numbers are used directly, setting the corresponding dimension of the output blob. In addition, two special values are accepted for any of the target dimension values:
+
+* **0** means "copy the respective dimension of the bottom layer". That is, if the bottom has 2 as its 1st dimension, the top will have 2 as its 1st dimension as well, given `dim: 0` as the 1st target dimension.
+* **-1** stands for "infer this from the other dimensions". This behavior is similar to that of -1 in *numpy*'s or `[]` for *MATLAB*'s reshape: this dimension is calculated to keep the overall element count the same as in the bottom layer. At most one -1 can be used in a reshape operation.
+
+As another example, specifying `reshape_param { shape { dim: 0 dim: -1 } }` makes the layer behave in exactly the same way as the `Flatten` layer.
+
 #### Concatenation
 
 * LayerType: `CONCAT`

diff --git a/include/caffe/common_layers.hpp b/include/caffe/common_layers.hpp
@@ -306,6 +306,44 @@ class MVNLayer : public Layer<Dtype> {
   Blob<Dtype> sum_multiplier_;
 };
 
+/*
+ * @brief Reshapes the input Blob into an arbitrary-sized output Blob.
+ *
+ * Note: similarly to FlattenLayer, this layer does not change the input values
+ * (see FlattenLayer, Blob::ShareData and Blob::ShareDiff).
+ */
+template <typename Dtype>
+class ReshapeLayer : public Layer<Dtype> {
+ public:
+  explicit ReshapeLayer(const LayerParameter& param)
+      : Layer<Dtype>(param) {}
+  virtual void LayerSetUp(const vector<Blob<Dtype>*>& bottom,
+      const vector<Blob<Dtype>*>& top);
+  virtual void Reshape(const vector<Blob<Dtype>*>& bottom,
+      const vector<Blob<Dtype>*>& top);
+
+  virtual inline const char* type() const { return "Reshape"; }
+  virtual inline int ExactNumBottomBlobs() const { return 1; }
+  virtual inline int ExactNumTopBlobs() const { return 1; }
+
+ protected:
+  virtual void Forward_cpu(const vector<Blob<Dtype>*>& bottom,
+      const vector<Blob<Dtype>*>& top) {}
+  virtual void Backward_cpu(const vector<Blob<Dtype>*>& top,
+      const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom) {}
+  virtual void Forward_gpu(const vector<Blob<Dtype>*>& bottom,
+      const vector<Blob<Dtype>*>& top) {}
+  virtual void Backward_gpu(const vector<Blob<Dtype>*>& top,
+      const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom) {}
+
+  /// @brief vector of axes indices whose dimensions we'll copy from the bottom
+  vector<int> copy_axes_;
+  /// @brief the index of the axis whose dimension we infer, or -1 if none
+  int inferred_axis_;
+  /// @brief the product of the "constant" output dimensions
+  int64_t constant_count_;
+};
+
 /**
  * @brief Normalizes input.
  * https://github.com/kuprel/caffe

diff --git a/src/caffe/layers/reshape_layer.cpp b/src/caffe/layers/reshape_layer.cpp
@@ -0,0 +1,95 @@
+#include <vector>
+
+#include "caffe/common_layers.hpp"
+#include "caffe/layer.hpp"
+
+namespace caffe {
+
+template <typename Dtype>
+void ReshapeLayer<Dtype>::LayerSetUp(const vector<Blob<Dtype>*>& bottom,
+    const vector<Blob<Dtype>*>& top) {
+  inferred_axis_ = -1;
+  copy_axes_.clear();
+  const BlobShape& top_blob_shape = this->layer_param_.reshape_param().shape();
+  const int top_num_axes = top_blob_shape.dim_size();
+  constant_count_ = 1;
+  for (int i = 0; i < top_num_axes; ++i) {
+    const int top_dim = top_blob_shape.dim(i);
+    if (top_dim == 0) {
+      copy_axes_.push_back(i);
+    } else if (top_dim == -1) {
+      CHECK_EQ(inferred_axis_, -1) << "new shape contains multiple "
+          << "-1 dims; at most a single (1) value of -1 may be specified";
+      inferred_axis_ = i;
+    } else {
+      constant_count_ *= top_dim;
+    }
+  }
+}
+
+template <typename Dtype>
+void ReshapeLayer<Dtype>::Reshape(const vector<Blob<Dtype>*>& bottom,
+    const vector<Blob<Dtype>*>& top) {
+  const int input_start_axis = this->layer_param_.reshape_param().axis();
+  const int start_axis = (input_start_axis >= 0) ? input_start_axis :
+      bottom[0]->num_axes() + input_start_axis + 1;
+  CHECK_GE(start_axis, 0) << "axis " << input_start_axis << " out of range";
+  CHECK_LE(start_axis, bottom[0]->num_axes()) << "axis " << input_start_axis
+      << " out of range for " << bottom[0]->num_axes() << "-D input blob";
+  const int num_axes = this->layer_param_.reshape_param().num_axes();
+  CHECK_GE(num_axes, -1) << "num_axes must be >= 0, or -1 for all";
+  const int end_axis =
+      (num_axes == -1) ? bottom[0]->num_axes() : (start_axis + num_axes);
+  CHECK_LE(end_axis, bottom[0]->num_axes())
+      << "end_axis = axis + num_axes is out of range";
+  const int num_axes_replaced = end_axis - start_axis;
+  const int num_axes_retained = bottom[0]->num_axes() - num_axes_replaced;
+  const BlobShape& top_blob_shape = this->layer_param_.reshape_param().shape();
+  const int num_new_axes = top_blob_shape.dim_size();
+  vector<int> top_shape(num_axes_retained + num_new_axes);
+  int top_shape_index = 0;
+  for (int i = 0; i < start_axis; ++i) {
+    top_shape[top_shape_index++] = bottom[0]->shape(i);
+  }
+  for (int i = 0; i < num_new_axes; ++i) {
+    top_shape[top_shape_index++] = top_blob_shape.dim(i);
+  }
+  for (int i = end_axis; i < bottom[0]->num_axes(); ++i) {
+    top_shape[top_shape_index++] = bottom[0]->shape(i);
+  }
+  CHECK_EQ(top_shape_index, top_shape.size());
+  for (int i = 0; i < copy_axes_.size(); ++i) {
+    const int copy_axis_index = copy_axes_[i];
+    CHECK_GT(bottom[0]->num_axes(), start_axis + copy_axis_index)
+        << "new shape contains a 0, but there was no corresponding bottom axis "
+        << "to copy";
+    top_shape[start_axis + copy_axis_index] =
+        bottom[0]->shape(start_axis + copy_axis_index);
+  }
+  if (inferred_axis_ >= 0) {
+    // A -1 dim was specified; infer the correct dimension by computing the
+    // product of the other dimensions.
+    int explicit_count = constant_count_;
+    explicit_count *= bottom[0]->count(0, start_axis);
+    explicit_count *= bottom[0]->count(end_axis);
+    for (int i = 0; i < copy_axes_.size(); ++i) {
+      const int copy_axis_index = copy_axes_[i];
+      explicit_count *= top_shape[start_axis + copy_axis_index];
+    }
+    CHECK_EQ(0, bottom[0]->count() % explicit_count) << "bottom count ("
+        << bottom[0]->count() << ") must be divisible by the product of "
+        << "the specified dimensions (" << explicit_count << ")";
+    const int inferred_dim = bottom[0]->count() / explicit_count;
+    top_shape[start_axis + inferred_axis_] = inferred_dim;
+  }
+  top[0]->Reshape(top_shape);
+  CHECK_EQ(top[0]->count(), bottom[0]->count())
+      << "output count must match input count";
+  top[0]->ShareData(*bottom[0]);
+  top[0]->ShareDiff(*bottom[0]);
+}
+
+INSTANTIATE_CLASS(ReshapeLayer);
+REGISTER_LAYER_CLASS(Reshape);
+
+}  // namespace caffe
diff --git a/src/caffe/proto/caffe.proto b/src/caffe/proto/caffe.proto
@@ -270,7 +270,7 @@ message ParamSpec {
 // NOTE
 // Update the next available ID when you add a new LayerParameter field.
 //
-// LayerParameter next available layer-specific ID: 136 (last added: tile_param)
+// LayerParameter next available layer-specific ID: 137 (last added: reshape_param)
 message LayerParameter {
   optional string name = 1; // the layer name
   optional string type = 2; // the layer type
@@ -343,6 +343,7 @@ message LayerParameter {
   optional PReLUParameter prelu_param = 131;
   optional PythonParameter python_param = 130;
   optional ReLUParameter relu_param = 123;
+  optional ReshapeParameter reshape_param = 136;
   optional SigmoidParameter sigmoid_param = 124;
   optional SoftmaxParameter softmax_param = 125;
   optional SliceParameter slice_param = 126;
@@ -716,6 +717,71 @@ message ReLUParameter {
   optional Engine engine = 2 [default = DEFAULT];
 }
 
+// Message that stores parameters used by ReshapeLayer
+message ReshapeParameter {
+  // Specify the output dimensions. If some of the dimensions are set to 0,
+  // the corresponding dimension from the bottom layer is used (unchanged).
+  // Exactly one dimension may be set to -1, in which case its value is
+  // inferred from the count of the bottom blob and the remaining dimensions.
+  // For example, suppose we want to reshape a 2D blob "input" with shape 2 x 8:
+  //
+  //   layer {
+  //     type: "Reshape" bottom: "input" top: "output"
+  //     reshape_param { ... }
+  //   }
+  //
+  // If "input" is 2D with shape 2 x 8, then the following reshape_param
+  // specifications are all equivalent, producing a 3D blob "output" with shape
+  // 2 x 2 x 4:
+  //
+  //   reshape_param { shape { dim:  2  dim: 2  dim:  4 } }
+  //   reshape_param { shape { dim:  0  dim: 2  dim:  4 } }
+  //   reshape_param { shape { dim:  0  dim: 2  dim: -1 } }
+  //   reshape_param { shape { dim: -1  dim: 0  dim:  2 } }
+  //
+  optional BlobShape shape = 1;
+
+  // axis and num_axes control the portion of the bottom blob's shape that are
+  // replaced by (included in) the reshape. By default (axis == 0 and
+  // num_axes == -1), the entire bottom blob shape is included in the reshape,
+  // and hence the shape field must specify the entire output shape.
+  //
+  // axis may be non-zero to retain some portion of the beginning of the input
+  // shape (and may be negative to index from the end; e.g., -1 to begin the
+  // reshape after the last axis, including nothing in the reshape,
+  // -2 to include only the last axis, etc.).
+  //
+  // For example, suppose "input" is a 2D blob with shape 2 x 8.
+  // Then the following ReshapeLayer specifications are all equivalent,
+  // producing a blob "output" with shape 2 x 2 x 4:
+  //
+  //   reshape_param { shape { dim: 2  dim: 2  dim: 4 } }
+  //   reshape_param { shape { dim: 2  dim: 4 } axis:  1 }
+  //   reshape_param { shape { dim: 2  dim: 4 } axis: -3 }
+  //
+  // num_axes specifies the extent of the reshape.
+  // If num_axes >= 0 (and axis >= 0), the reshape will be performed only on
+  // input axes in the range [axis, axis+num_axes].
+  // num_axes may also be -1, the default, to include all remaining axes
+  // (starting from axis).
+  //
+  // For example, suppose "input" is a 2D blob with shape 2 x 8.
+  // Then the following ReshapeLayer specifications are equivalent,
+  // producing a blob "output" with shape 1 x 2 x 8.
+  //
+  //   reshape_param { shape { dim:  1  dim: 2  dim:  8 } }
+  //   reshape_param { shape { dim:  1  dim: 2  }  num_axes: 1 }
+  //   reshape_param { shape { dim:  1  }  num_axes: 0 }
+  //
+  // On the other hand, these would produce output blob shape 2 x 1 x 8:
+  //
+  //   reshape_param { shape { dim: 2  dim: 1  dim: 8  }  }
+  //   reshape_param { shape { dim: 1 }  axis: 1  num_axes: 0 }
+  //
+  optional int32 axis = 2 [default = 0];
+  optional int32 num_axes = 3 [default = -1];
+}
+
 // Message that stores parameters used by SigmoidLayer
 message SigmoidParameter {
   enum Engine {