From 4fb3c9e6a5ac80804c910639d14651c2ecdcb5f3 Mon Sep 17 00:00:00 2001
From: Simon Safar <latanius@gmail.com>
Date: Wed, 15 Oct 2014 20:15:14 -0700
Subject: [PATCH 1/4] Added a Reshape layer for copying-free modification of
 blob dimensions.

---
 docs/tutorial/layers.md               |  42 +++++++++
 include/caffe/common_layers.hpp       |  35 ++++++++
 src/caffe/layers/reshape_layer.cpp    | 113 ++++++++++++++++++++++++
 src/caffe/layers/reshape_layer.cu     |  23 +++++
 src/caffe/proto/caffe.proto           |  16 +++-
 src/caffe/test/test_reshape_layer.cpp | 120 ++++++++++++++++++++++++++
 6 files changed, 348 insertions(+), 1 deletion(-)
 create mode 100644 src/caffe/layers/reshape_layer.cpp
 create mode 100644 src/caffe/layers/reshape_layer.cu
 create mode 100644 src/caffe/test/test_reshape_layer.cpp
diff --git a/docs/tutorial/layers.md b/docs/tutorial/layers.md
index 839939f5ad6..422ee01f201 100644
--- a/docs/tutorial/layers.md
+++ b/docs/tutorial/layers.md
@@ -419,6 +419,48 @@ The `SPLIT` layer is a utility layer that splits an input blob to multiple outpu
 
 The `FLATTEN` layer is a utility layer that flattens an input of shape `n * c * h * w` to a simple vector output of shape `n * (c*h*w) * 1 * 1`.
 
+#### Reshape
+
+* LayerType: `RESHAPE`
+* CPU implementation: `./src/caffe/layers/reshape_layer.cpp`
+* CUDA GPU implementation: `./src/caffe/layers/reshape_layer.cu`
+* Parameters (`ReshapeParameter reshape_param`)
+    - Optional: (also see detailed description below)
+        - `num` [default 0]
+        - `channels` [default 0]
+        - `width` [default 0]
+        - `height` [default 0]
+
+* Input
+    - a single blob with arbitrary dimensions
+* Output
+    - the same blob, with modified dimensions, as specified by `reshape_param`
+
+* Sample
+
+        layers {
+          name: "reshape"
+          type: RESHAPE
+          bottom: "input"
+          top: "output"
+
+          reshape_param {
+            num: 0  # copy the dimension from below
+            channels: 2
+            width: 3
+            height: -1 # infer it from the other dimensions
+          }
+        }
+
+The `RESHAPE` layer can be used to change the dimensions of its input, without changing its data. Just like the `FLATTEN` layer, only the dimensions are changed, no data is copied in the process.
+
+Output dimensions are specified by the `ReshapeParam` proto. Positive numbers are used directly, setting the corresponding dimension of the output blob. In addition, two special values are accepted for any of the target dimension values:
+
+* **0** means "copy the respective dimension of the bottom layer". That is, if the bottom layer has 2 channels, the top one will have 2 channels too, given `channels: 0` as target dimension. Since the default value of all the target dimensions is 0, omitting any of the target dimensions will also cause it to be copied.
+* **-1** stands for "infer this from the other dimensions". This behavior is similar to that of -1 in *numpy*'s or `[]` for *MATLAB*'s reshape: this dimension is calculated to keep the overall element count the same as in the bottom layer. If this is not possible, an error is raised. Also, at most one -1 can be used in a reshape operation.
+
+As another example, giving `num: 0, channels: -1, height: 1, width: 1` as parameters makes the layer behave in exactly the same way as the `FLATTEN` layer.
+
 #### Concatenation
 
 * LayerType: `CONCAT`
diff --git a/include/caffe/common_layers.hpp b/include/caffe/common_layers.hpp
index cae1c3e4ee6..945c0cef1b6 100644
--- a/include/caffe/common_layers.hpp
+++ b/include/caffe/common_layers.hpp
@@ -297,6 +297,41 @@ class MVNLayer : public Layer<Dtype> {
   Blob<Dtype> sum_multiplier_;
 };
 
+/*
+ * @brief Reshapes the input Blob into an arbitrary-sized output Blob.
+ *
+ * Note: similarly to FlattenLayer, this layer does not change the input values
+ * (see FlattenLayer, Blob::ShareData and Blob::ShareDiff).
+ */
+template <typename Dtype>
+class ReshapeLayer : public Layer<Dtype> {
+ public:
+  explicit ReshapeLayer(const LayerParameter& param)
+      : Layer<Dtype>(param) {}
+  virtual void LayerSetUp(const vector<Blob<Dtype>*>& bottom,
+      const vector<Blob<Dtype>*>& top);
+  virtual void Reshape(const vector<Blob<Dtype>*>& bottom,
+      const vector<Blob<Dtype>*>& top);
+
+ protected:
+  virtual void Forward_cpu(const vector<Blob<Dtype>*>& bottom,
+      const vector<Blob<Dtype>*>& top);
+  virtual void Backward_cpu(const vector<Blob<Dtype>*>& bottom,
+      const vector<bool>& propagate_down,
+      const vector<Blob<Dtype>*>& top);
+  virtual void Forward_gpu(const vector<Blob<Dtype>*>& bottom,
+      const vector<Blob<Dtype>*>& top);
+  virtual void Backward_gpu(const vector<Blob<Dtype>*>& top,
+      const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom);
+
+  void FillInSingleUnspecifiedDimension(int bottom_count);
+
+  int num_out;
+  int channels_out;
+  int height_out;
+  int width_out;
+};
+
 /**
  * @brief Ignores bottom blobs while producing no top blobs. (This is useful
  *        to suppress outputs during testing.)
diff --git a/src/caffe/layers/reshape_layer.cpp b/src/caffe/layers/reshape_layer.cpp
new file mode 100644
index 00000000000..7e8704e058a
--- /dev/null
+++ b/src/caffe/layers/reshape_layer.cpp
@@ -0,0 +1,113 @@
+#include <vector>
+
+#include "caffe/common_layers.hpp"
+#include "caffe/layer.hpp"
+
+namespace caffe {
+
+template <typename Dtype>
+void ReshapeLayer<Dtype>::LayerSetUp(const vector<Blob<Dtype>*>& bottom,
+      const vector<Blob<Dtype>*>& top) {
+  CHECK_EQ(bottom.size(), 1) << "Reshape Layer takes a single blob as input.";
+  CHECK_EQ(top.size(), 1) << "Reshape Layer takes a single blob as output.";
+
+  num_out = this->layer_param_.reshape_param().num();
+  // Dimensions set to 0 (either by default or explicitly) will be copied from
+  // the bottom layer.
+  if (num_out == 0) {
+    num_out = bottom[0]->num();
+  }
+
+  channels_out = this->layer_param_.reshape_param().channels();
+  if (channels_out == 0) {
+    channels_out = bottom[0]->channels();
+  }
+
+  width_out = this->layer_param_.reshape_param().width();
+  if (width_out == 0) {
+    width_out = bottom[0]->width();
+  }
+
+  height_out = this->layer_param_.reshape_param().height();
+  if (height_out == 0) {
+    height_out = bottom[0]->height();
+  }
+
+  FillInSingleUnspecifiedDimension(bottom[0]->count());
+}
+
+template <typename Dtype>
+void ReshapeLayer<Dtype>::Reshape(const vector<Blob<Dtype>*>& bottom,
+      const vector<Blob<Dtype>*>& top) {
+  top[0]->Reshape(num_out, channels_out, height_out, width_out);
+
+  const size_t out_count = num_out * channels_out * height_out * width_out;
+  CHECK_EQ(out_count, bottom[0]->count()) <<
+      "Bottom layer count isn't equal to predicted; output layer size is " <<
+      num_out << "x" << channels_out << "x" << height_out << "x" << width_out;
+}
+
+template <typename Dtype>
+void ReshapeLayer<Dtype>::Forward_cpu(const vector<Blob<Dtype>*>& bottom,
+      const vector<Blob<Dtype>*>& top) {
+  top[0]->ShareData(*bottom[0]);
+}
+
+template <typename Dtype>
+void ReshapeLayer<Dtype>::Backward_cpu(const vector<Blob<Dtype>*>& top,
+      const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom) {
+  bottom[0]->ShareDiff(*top[0]);
+}
+
+/**
+ * @brief Fill in a single dimension left unspecified.
+ *
+ * If a dimension is set to -1, it will be filled in with a value inferred from
+ * the count of the bottom layer (if the product of the nonzero dimensions is a
+ * divisor of the count).
+ *
+ * @param bottom_count Count of the bottom layer.
+ */
+template <typename Dtype>
+void ReshapeLayer<Dtype>::FillInSingleUnspecifiedDimension(int bottom_count) {
+  int* const dimensions[] = {&num_out, &channels_out, &width_out, &height_out};
+  const size_t N_DIMENSIONS = 4;
+
+  // How many -1 dimensions do we have.
+  int n_unspecified = 0;
+  // Product of the remaining dimensions.
+  int product_without_unspecified_dim = 1;
+
+  for (size_t i = 0; i < N_DIMENSIONS; i++) {
+    if (*(dimensions[i]) == -1) {
+      n_unspecified++;
+    } else {
+      product_without_unspecified_dim *= *(dimensions[i]);
+    }
+  }
+
+  if (n_unspecified == 0) {
+    // Everything is filled out, nothing to do.
+    return;
+  }
+
+  CHECK_EQ(n_unspecified, 1) << "Only one dimension can be set -1.";
+  CHECK_EQ(bottom_count % product_without_unspecified_dim, 0) <<
+    "Bottom layer count " << bottom_count << " not divisible by product " <<
+    product_without_unspecified_dim;
+
+  // Fill up the one remaining dimension.
+  for (size_t i = 0; i < N_DIMENSIONS; i++) {
+    if (*(dimensions[i]) == -1) {
+      *(dimensions[i]) = bottom_count / product_without_unspecified_dim;
+    }
+  }
+}
+
+#ifdef CPU_ONLY
+STUB_GPU(ReshapeLayer);
+#endif
+
+INSTANTIATE_CLASS(ReshapeLayer);
+REGISTER_LAYER_CLASS(RESHAPE, ReshapeLayer);
+}  // namespace caffe
diff --git a/src/caffe/layers/reshape_layer.cu b/src/caffe/layers/reshape_layer.cu
new file mode 100644
index 00000000000..3023ce3ae88
--- /dev/null
+++ b/src/caffe/layers/reshape_layer.cu
@@ -0,0 +1,23 @@
+#include <vector>
+
+#include "caffe/common_layers.hpp"
+#include "caffe/layer.hpp"
+#include "caffe/util/math_functions.hpp"
+
+namespace caffe {
+
+template <typename Dtype>
+void ReshapeLayer<Dtype>::Forward_gpu(const vector<Blob<Dtype>*>& bottom,
+      const vector<Blob<Dtype>*>& top) {
+  top[0]->ShareData(*bottom[0]);
+}
+
+template <typename Dtype>
+void ReshapeLayer<Dtype>::Backward_gpu(const vector<Blob<Dtype>*>& top,
+      const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom) {
+  bottom[0]->ShareDiff(*top[0]);
+}
+
+INSTANTIATE_LAYER_GPU_FUNCS(ReshapeLayer);
+
+}  // namespace caffe
diff --git a/src/caffe/proto/caffe.proto b/src/caffe/proto/caffe.proto
index 4516106428c..e8bf240c1b3 100644
--- a/src/caffe/proto/caffe.proto
+++ b/src/caffe/proto/caffe.proto
@@ -259,7 +259,7 @@ message ParamSpec {
 // NOTE
 // Update the next available ID when you add a new LayerParameter field.
 //
-// LayerParameter next available layer-specific ID: 133 (last added: spp_param)
+// LayerParameter next available layer-specific ID: 134 (last added: reshape_param)
 message LayerParameter {
   optional string name = 1; // the layer name
   optional string type = 2; // the layer type
@@ -326,6 +326,7 @@ message LayerParameter {
   optional PReLUParameter prelu_param = 131;
   optional PythonParameter python_param = 130;
   optional ReLUParameter relu_param = 123;
+  optional ReshapeParameter reshape_param = 133;
   optional SigmoidParameter sigmoid_param = 124;
   optional SoftmaxParameter softmax_param = 125;
   optional SPPParameter spp_param = 132;
@@ -690,6 +691,19 @@ message ReLUParameter {
   optional Engine engine = 2 [default = DEFAULT];
 }
 
+// Message that stores parameters used by ReshapeLayer
+message ReshapeParameter {
+  // Specify the output dimensions. If some of the following parameters are
+  // omitted or set to 0 explicitly, the corresponding dimension from the bottom
+  // layer is used (unchanged). Also, if exactly one of them is set to -1, its
+  // value is calculated from the count of the bottom layer and the remaining
+  // dimensions, if possible.
+  optional int32 num = 1 [default = 0];
+  optional int32 channels = 2 [default = 0];
+  optional int32 height = 3 [default = 0];
+  optional int32 width = 4 [default = 0];
+}
+
 // Message that stores parameters used by SigmoidLayer
 message SigmoidParameter {
   enum Engine {
diff --git a/src/caffe/test/test_reshape_layer.cpp b/src/caffe/test/test_reshape_layer.cpp
new file mode 100644
index 00000000000..878d40bb4d5
--- /dev/null
+++ b/src/caffe/test/test_reshape_layer.cpp
@@ -0,0 +1,120 @@
+#include <cstring>
+#include <vector>
+
+#include "gtest/gtest.h"
+
+#include "caffe/blob.hpp"
+#include "caffe/common.hpp"
+#include "caffe/common_layers.hpp"
+#include "caffe/filler.hpp"
+
+#include "caffe/test/test_caffe_main.hpp"
+#include "caffe/test/test_gradient_check_util.hpp"
+
+namespace caffe {
+
+template <typename TypeParam>
+class ReshapeLayerTest : public MultiDeviceTest<TypeParam> {
+  typedef typename TypeParam::Dtype Dtype;
+ protected:
+  ReshapeLayerTest()
+    : blob_bottom_(new Blob<Dtype>(2, 3, 6, 5)),
+      blob_top_(new Blob<Dtype>()) {
+    // fill the values
+    FillerParameter filler_param;
+    GaussianFiller<Dtype> filler(filler_param);
+    filler.Fill(this->blob_bottom_);
+    blob_bottom_vec_.push_back(blob_bottom_);
+    blob_top_vec_.push_back(blob_top_);
+  }
+
+  virtual ~ReshapeLayerTest() { delete blob_bottom_; delete blob_top_; }
+
+  Blob<Dtype>* const blob_bottom_;
+  Blob<Dtype>* const blob_top_;
+  vector<Blob<Dtype>*> blob_bottom_vec_;
+  vector<Blob<Dtype>*> blob_top_vec_;
+};
+
+TYPED_TEST_CASE(ReshapeLayerTest, TestDtypesAndDevices);
+
+TYPED_TEST(ReshapeLayerTest, TestFlattenOutputSizes) {
+  typedef typename TypeParam::Dtype Dtype;
+  LayerParameter layer_param;
+  ReshapeParameter* reshape_param =
+      layer_param.mutable_reshape_param();
+  reshape_param->set_channels(-1);
+  reshape_param->set_height(1);
+  reshape_param->set_width(1);
+
+  ReshapeLayer<Dtype> layer(layer_param);
+  layer.SetUp(this->blob_bottom_vec_, this->blob_top_vec_);
+  EXPECT_EQ(this->blob_top_->num(), 2);
+  EXPECT_EQ(this->blob_top_->channels(), 3 * 6 * 5);
+  EXPECT_EQ(this->blob_top_->height(), 1);
+  EXPECT_EQ(this->blob_top_->width(), 1);
+}
+
+TYPED_TEST(ReshapeLayerTest, TestFlattenValues) {
+  typedef typename TypeParam::Dtype Dtype;
+  LayerParameter layer_param;
+  ReshapeParameter* reshape_param =
+      layer_param.mutable_reshape_param();
+  reshape_param->set_channels(-1);
+  reshape_param->set_height(1);
+  reshape_param->set_width(1);
+  ReshapeLayer<Dtype> layer(layer_param);
+  layer.SetUp(this->blob_bottom_vec_, this->blob_top_vec_);
+  layer.Forward(this->blob_bottom_vec_, this->blob_top_vec_);
+  for (int c = 0; c < 3 * 6 * 5; ++c) {
+    EXPECT_EQ(this->blob_top_->data_at(0, c, 0, 0),
+        this->blob_bottom_->data_at(0, c / (6 * 5), (c / 5) % 6, c % 5));
+    EXPECT_EQ(this->blob_top_->data_at(1, c, 0, 0),
+        this->blob_bottom_->data_at(1, c / (6 * 5), (c / 5) % 6, c % 5));
+  }
+}
+
+// Test whether setting output dimensions to 0 either explicitly or implicitly
+// copies the respective dimension of the input layer.
+TYPED_TEST(ReshapeLayerTest, TestCopyDimensions) {
+  typedef typename TypeParam::Dtype Dtype;
+  LayerParameter layer_param;
+  ReshapeParameter* reshape_param =
+      layer_param.mutable_reshape_param();
+  // Omitting num to test implicit zeroes.
+  reshape_param->set_channels(0);
+  reshape_param->set_height(0);
+  reshape_param->set_width(0);
+  ReshapeLayer<Dtype> layer(layer_param);
+  layer.SetUp(this->blob_bottom_vec_, this->blob_top_vec_);
+
+  EXPECT_EQ(this->blob_top_->num(), 2);
+  EXPECT_EQ(this->blob_top_->channels(), 3);
+  EXPECT_EQ(this->blob_top_->height(), 6);
+  EXPECT_EQ(this->blob_top_->width(), 5);
+}
+
+// When a dimension is set to -1, we should infer its value from the other
+// dimensions (including those that get copied from below).
+TYPED_TEST(ReshapeLayerTest, TestInferenceOfUnspecified) {
+  typedef typename TypeParam::Dtype Dtype;
+  LayerParameter layer_param;
+  ReshapeParameter* reshape_param =
+      layer_param.mutable_reshape_param();
+  // Since omitted, num is implicitly set to 0 (thus, copies 2).
+  reshape_param->set_channels(3);
+  reshape_param->set_height(10);
+  reshape_param->set_width(-1);
+
+  // Count is 180, thus height should be 180 / (2*3*10) = 3.
+
+  ReshapeLayer<Dtype> layer(layer_param);
+  layer.SetUp(this->blob_bottom_vec_, this->blob_top_vec_);
+
+  EXPECT_EQ(this->blob_top_->num(), 2);
+  EXPECT_EQ(this->blob_top_->channels(), 3);
+  EXPECT_EQ(this->blob_top_->height(), 10);
+  EXPECT_EQ(this->blob_top_->width(), 3);
+}
+
+}  // namespace caffe

From fa6169ee799f97f80d33d6b4525c7fd4b891774a Mon Sep 17 00:00:00 2001
From: Jeff Donahue <jeff.donahue@gmail.com>
Date: Wed, 25 Mar 2015 17:44:37 -0700
Subject: [PATCH 2/4] ReshapeLayer fixups for ND blobs

---
 docs/tutorial/layers.md               |  33 +++----
 include/caffe/common_layers.hpp       |  31 +++---
 src/caffe/layers/reshape_layer.cpp    | 135 ++++++++------------------
 src/caffe/layers/reshape_layer.cu     |  23 -----
 src/caffe/proto/caffe.proto           |  14 +--
 src/caffe/test/test_reshape_layer.cpp |  42 ++++----
 6 files changed, 101 insertions(+), 177 deletions(-)
 delete mode 100644 src/caffe/layers/reshape_layer.cu

diff --git a/docs/tutorial/layers.md b/docs/tutorial/layers.md
index 422ee01f201..c4529e6afc0 100644
--- a/docs/tutorial/layers.md
+++ b/docs/tutorial/layers.md
@@ -421,15 +421,11 @@ The `FLATTEN` layer is a utility layer that flattens an input of shape `n * c *
 
 #### Reshape
 
-* LayerType: `RESHAPE`
-* CPU implementation: `./src/caffe/layers/reshape_layer.cpp`
-* CUDA GPU implementation: `./src/caffe/layers/reshape_layer.cu`
+* Layer type: `Reshape`
+* Implementation: `./src/caffe/layers/reshape_layer.cpp`
 * Parameters (`ReshapeParameter reshape_param`)
     - Optional: (also see detailed description below)
-        - `num` [default 0]
-        - `channels` [default 0]
-        - `width` [default 0]
-        - `height` [default 0]
+        - `shape`
 
 * Input
     - a single blob with arbitrary dimensions
@@ -438,28 +434,29 @@ The `FLATTEN` layer is a utility layer that flattens an input of shape `n * c *
 
 * Sample
 
-        layers {
+        layer {
           name: "reshape"
-          type: RESHAPE
+          type: "Reshape"
           bottom: "input"
           top: "output"
-
           reshape_param {
-            num: 0  # copy the dimension from below
-            channels: 2
-            width: 3
-            height: -1 # infer it from the other dimensions
+            shape {
+              dim: 0  # copy the dimension from below
+              dim: 2
+              dim: 3
+              dim: -1 # infer it from the other dimensions
+            }
           }
         }
 
-The `RESHAPE` layer can be used to change the dimensions of its input, without changing its data. Just like the `FLATTEN` layer, only the dimensions are changed, no data is copied in the process.
+The `Reshape` layer can be used to change the dimensions of its input, without changing its data. Just like the `Flatten` layer, only the dimensions are changed; no data is copied in the process.
 
 Output dimensions are specified by the `ReshapeParam` proto. Positive numbers are used directly, setting the corresponding dimension of the output blob. In addition, two special values are accepted for any of the target dimension values:
 
-* **0** means "copy the respective dimension of the bottom layer". That is, if the bottom layer has 2 channels, the top one will have 2 channels too, given `channels: 0` as target dimension. Since the default value of all the target dimensions is 0, omitting any of the target dimensions will also cause it to be copied.
-* **-1** stands for "infer this from the other dimensions". This behavior is similar to that of -1 in *numpy*'s or `[]` for *MATLAB*'s reshape: this dimension is calculated to keep the overall element count the same as in the bottom layer. If this is not possible, an error is raised. Also, at most one -1 can be used in a reshape operation.
+* **0** means "copy the respective dimension of the bottom layer". That is, if the bottom has 2 as its 1st dimension, the top will have 2 as its 1st dimension as well, given `dim: 0` as the 1st target dimension.
+* **-1** stands for "infer this from the other dimensions". This behavior is similar to that of -1 in *numpy*'s or `[]` for *MATLAB*'s reshape: this dimension is calculated to keep the overall element count the same as in the bottom layer. At most one -1 can be used in a reshape operation.
 
-As another example, giving `num: 0, channels: -1, height: 1, width: 1` as parameters makes the layer behave in exactly the same way as the `FLATTEN` layer.
+As another example, specifying `reshape_param { shape { dim: 0 dim: -1 } }` makes the layer behave in exactly the same way as the `Flatten` layer.
 
 #### Concatenation
 
diff --git a/include/caffe/common_layers.hpp b/include/caffe/common_layers.hpp
index 945c0cef1b6..ccdfd62d5be 100644
--- a/include/caffe/common_layers.hpp
+++ b/include/caffe/common_layers.hpp
@@ -313,23 +313,28 @@ class ReshapeLayer : public Layer<Dtype> {
   virtual void Reshape(const vector<Blob<Dtype>*>& bottom,
       const vector<Blob<Dtype>*>& top);
 
+  virtual inline const char* type() const { return "Reshape"; }
+  virtual inline int ExactNumBottomBlobs() const { return 1; }
+  virtual inline int ExactNumTopBlobs() const { return 1; }
+
  protected:
   virtual void Forward_cpu(const vector<Blob<Dtype>*>& bottom,
-      const vector<Blob<Dtype>*>& top);
-  virtual void Backward_cpu(const vector<Blob<Dtype>*>& bottom,
-      const vector<bool>& propagate_down,
-      const vector<Blob<Dtype>*>& top);
+      const vector<Blob<Dtype>*>& top) {}
+  virtual void Backward_cpu(const vector<Blob<Dtype>*>& top,
+      const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom) {}
   virtual void Forward_gpu(const vector<Blob<Dtype>*>& bottom,
-      const vector<Blob<Dtype>*>& top);
+      const vector<Blob<Dtype>*>& top) {}
   virtual void Backward_gpu(const vector<Blob<Dtype>*>& top,
-      const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom);
-
-  void FillInSingleUnspecifiedDimension(int bottom_count);
-
-  int num_out;
-  int channels_out;
-  int height_out;
-  int width_out;
+      const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom) {}
+
+  /// @brief the current output shape
+  vector<int> top_shape_;
+  /// @brief vector of axes indices whose dimensions we'll copy from the bottom
+  vector<int> copy_axes_;
+  /// @brief the index of the axis whose dimension we infer, or -1 if none
+  int inferred_axis_;
+  /// @brief the product of the "constant" output dimensions
+  int constant_count_;
 };
 
 /**
diff --git a/src/caffe/layers/reshape_layer.cpp b/src/caffe/layers/reshape_layer.cpp
index 7e8704e058a..618edf31824 100644
--- a/src/caffe/layers/reshape_layer.cpp
+++ b/src/caffe/layers/reshape_layer.cpp
@@ -7,107 +7,58 @@ namespace caffe {
 
 template <typename Dtype>
 void ReshapeLayer<Dtype>::LayerSetUp(const vector<Blob<Dtype>*>& bottom,
-      const vector<Blob<Dtype>*>& top) {
-  CHECK_EQ(bottom.size(), 1) << "Reshape Layer takes a single blob as input.";
-  CHECK_EQ(top.size(), 1) << "Reshape Layer takes a single blob as output.";
-
-  num_out = this->layer_param_.reshape_param().num();
-  // Dimensions set to 0 (either by default or explicitly) will be copied from
-  // the bottom layer.
-  if (num_out == 0) {
-    num_out = bottom[0]->num();
-  }
-
-  channels_out = this->layer_param_.reshape_param().channels();
-  if (channels_out == 0) {
-    channels_out = bottom[0]->channels();
-  }
-
-  width_out = this->layer_param_.reshape_param().width();
-  if (width_out == 0) {
-    width_out = bottom[0]->width();
-  }
-
-  height_out = this->layer_param_.reshape_param().height();
-  if (height_out == 0) {
-    height_out = bottom[0]->height();
+    const vector<Blob<Dtype>*>& top) {
+  inferred_axis_ = -1;
+  copy_axes_.clear();
+  const BlobShape& top_blob_shape = this->layer_param_.reshape_param().shape();
+  const int top_num_axes = top_blob_shape.dim_size();
+  top_shape_.resize(top_num_axes);
+  constant_count_ = 1;
+  for (int i = 0; i < top_num_axes; ++i) {
+    top_shape_[i] = top_blob_shape.dim(i);
+    if (top_shape_[i] == 0) {
+      copy_axes_.push_back(i);
+    } else if (top_shape_[i] == -1) {
+      CHECK_EQ(inferred_axis_, -1) << "new shape contains multiple "
+          << "-1 dims; at most a single (1) value of -1 may be specified";
+      inferred_axis_ = i;
+    } else {
+      constant_count_ *= top_shape_[i];
+    }
   }
-
-  FillInSingleUnspecifiedDimension(bottom[0]->count());
 }
 
 template <typename Dtype>
 void ReshapeLayer<Dtype>::Reshape(const vector<Blob<Dtype>*>& bottom,
-      const vector<Blob<Dtype>*>& top) {
-  top[0]->Reshape(num_out, channels_out, height_out, width_out);
-
-  const size_t out_count = num_out * channels_out * height_out * width_out;
-  CHECK_EQ(out_count, bottom[0]->count()) <<
-      "Bottom layer count isn't equal to predicted; output layer size is " <<
-      num_out << "x" << channels_out << "x" << height_out << "x" << width_out;
-}
-
-template <typename Dtype>
-void ReshapeLayer<Dtype>::Forward_cpu(const vector<Blob<Dtype>*>& bottom,
-      const vector<Blob<Dtype>*>& top) {
-  top[0]->ShareData(*bottom[0]);
-}
-
-template <typename Dtype>
-void ReshapeLayer<Dtype>::Backward_cpu(const vector<Blob<Dtype>*>& top,
-      const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom) {
-  bottom[0]->ShareDiff(*top[0]);
-}
-
-/**
- * @brief Fill in a single dimension left unspecified.
- *
- * If a dimension is set to -1, it will be filled in with a value inferred from
- * the count of the bottom layer (if the product of the nonzero dimensions is a
- * divisor of the count).
- *
- * @param bottom_count Count of the bottom layer.
- */
-template <typename Dtype>
-void ReshapeLayer<Dtype>::FillInSingleUnspecifiedDimension(int bottom_count) {
-  int* const dimensions[] = {&num_out, &channels_out, &width_out, &height_out};
-  const size_t N_DIMENSIONS = 4;
-
-  // How many -1 dimensions do we have.
-  int n_unspecified = 0;
-  // Product of the remaining dimensions.
-  int product_without_unspecified_dim = 1;
-
-  for (size_t i = 0; i < N_DIMENSIONS; i++) {
-    if (*(dimensions[i]) == -1) {
-      n_unspecified++;
-    } else {
-      product_without_unspecified_dim *= *(dimensions[i]);
-    }
-  }
-
-  if (n_unspecified == 0) {
-    // Everything is filled out, nothing to do.
-    return;
+    const vector<Blob<Dtype>*>& top) {
+  for (int i = 0; i < copy_axes_.size(); ++i) {
+    const int copy_axis_index = copy_axes_[i];
+    CHECK_GT(bottom[0]->num_axes(), copy_axis_index) << "new shape contains "
+        << "a 0, but there is no corresponding bottom axis to copy";
+    top_shape_[copy_axis_index] = bottom[0]->shape(copy_axis_index);
   }
-
-  CHECK_EQ(n_unspecified, 1) << "Only one dimension can be set -1.";
-  CHECK_EQ(bottom_count % product_without_unspecified_dim, 0) <<
-    "Bottom layer count " << bottom_count << " not divisible by product " <<
-    product_without_unspecified_dim;
-
-  // Fill up the one remaining dimension.
-  for (size_t i = 0; i < N_DIMENSIONS; i++) {
-    if (*(dimensions[i]) == -1) {
-      *(dimensions[i]) = bottom_count / product_without_unspecified_dim;
+  if (inferred_axis_ >= 0) {
+    // A -1 dim was specified; infer the correct dimension by computing the
+    // product of the other dimensions.
+    int explicit_count = constant_count_;
+    for (int i = 0; i < copy_axes_.size(); ++i) {
+      const int copy_axis_index = copy_axes_[i];
+      explicit_count *= top_shape_[copy_axis_index];
     }
+    CHECK_EQ(0, bottom[0]->count() % explicit_count) << "bottom count ("
+        << bottom[0]->count() << ") must be divisible by the product of "
+        << "the specified dimensions (" << explicit_count << ")";
+    const int inferred_dim = bottom[0]->count() / explicit_count;
+    top_shape_[inferred_axis_] = inferred_dim;
   }
+  top[0]->Reshape(top_shape_);
+  CHECK_EQ(top[0]->count(), bottom[0]->count())
+      << "output count must match input count";
+  top[0]->ShareData(*bottom[0]);
+  top[0]->ShareDiff(*bottom[0]);
 }
 
-#ifdef CPU_ONLY
-STUB_GPU(ReshapeLayer);
-#endif
-
 INSTANTIATE_CLASS(ReshapeLayer);
-REGISTER_LAYER_CLASS(RESHAPE, ReshapeLayer);
+REGISTER_LAYER_CLASS(Reshape);
+
 }  // namespace caffe
diff --git a/src/caffe/layers/reshape_layer.cu b/src/caffe/layers/reshape_layer.cu
deleted file mode 100644
index 3023ce3ae88..00000000000
--- a/src/caffe/layers/reshape_layer.cu
+++ /dev/null
@@ -1,23 +0,0 @@
-#include <vector>
-
-#include "caffe/common_layers.hpp"
-#include "caffe/layer.hpp"
-#include "caffe/util/math_functions.hpp"
-
-namespace caffe {
-
-template <typename Dtype>
-void ReshapeLayer<Dtype>::Forward_gpu(const vector<Blob<Dtype>*>& bottom,
-      const vector<Blob<Dtype>*>& top) {
-  top[0]->ShareData(*bottom[0]);
-}
-
-template <typename Dtype>
-void ReshapeLayer<Dtype>::Backward_gpu(const vector<Blob<Dtype>*>& top,
-      const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom) {
-  bottom[0]->ShareDiff(*top[0]);
-}
-
-INSTANTIATE_LAYER_GPU_FUNCS(ReshapeLayer);
-
-}  // namespace caffe
diff --git a/src/caffe/proto/caffe.proto b/src/caffe/proto/caffe.proto
index e8bf240c1b3..d36f1d511df 100644
--- a/src/caffe/proto/caffe.proto
+++ b/src/caffe/proto/caffe.proto
@@ -693,15 +693,11 @@ message ReLUParameter {
 
 // Message that stores parameters used by ReshapeLayer
 message ReshapeParameter {
-  // Specify the output dimensions. If some of the following parameters are
-  // omitted or set to 0 explicitly, the corresponding dimension from the bottom
-  // layer is used (unchanged). Also, if exactly one of them is set to -1, its
-  // value is calculated from the count of the bottom layer and the remaining
-  // dimensions, if possible.
-  optional int32 num = 1 [default = 0];
-  optional int32 channels = 2 [default = 0];
-  optional int32 height = 3 [default = 0];
-  optional int32 width = 4 [default = 0];
+  // Specify the output dimensions. If some of the dimensions are set to 0,
+  // the corresponding dimension from the bottom layer is used (unchanged).
+  // Exactly one dimension may be set to -1, in which case its value is
+  // inferred from the count of the bottom layer and the remaining dimensions.
+  optional BlobShape shape = 1;
 }
 
 // Message that stores parameters used by SigmoidLayer
diff --git a/src/caffe/test/test_reshape_layer.cpp b/src/caffe/test/test_reshape_layer.cpp
index 878d40bb4d5..0c8e2427aa7 100644
--- a/src/caffe/test/test_reshape_layer.cpp
+++ b/src/caffe/test/test_reshape_layer.cpp
@@ -41,11 +41,11 @@ TYPED_TEST_CASE(ReshapeLayerTest, TestDtypesAndDevices);
 TYPED_TEST(ReshapeLayerTest, TestFlattenOutputSizes) {
   typedef typename TypeParam::Dtype Dtype;
   LayerParameter layer_param;
-  ReshapeParameter* reshape_param =
-      layer_param.mutable_reshape_param();
-  reshape_param->set_channels(-1);
-  reshape_param->set_height(1);
-  reshape_param->set_width(1);
+  BlobShape* blob_shape = layer_param.mutable_reshape_param()->mutable_shape();
+  blob_shape->add_dim(0);
+  blob_shape->add_dim(-1);
+  blob_shape->add_dim(1);
+  blob_shape->add_dim(1);
 
   ReshapeLayer<Dtype> layer(layer_param);
   layer.SetUp(this->blob_bottom_vec_, this->blob_top_vec_);
@@ -58,11 +58,11 @@ TYPED_TEST(ReshapeLayerTest, TestFlattenOutputSizes) {
 TYPED_TEST(ReshapeLayerTest, TestFlattenValues) {
   typedef typename TypeParam::Dtype Dtype;
   LayerParameter layer_param;
-  ReshapeParameter* reshape_param =
-      layer_param.mutable_reshape_param();
-  reshape_param->set_channels(-1);
-  reshape_param->set_height(1);
-  reshape_param->set_width(1);
+  BlobShape* blob_shape = layer_param.mutable_reshape_param()->mutable_shape();
+  blob_shape->add_dim(0);
+  blob_shape->add_dim(-1);
+  blob_shape->add_dim(1);
+  blob_shape->add_dim(1);
   ReshapeLayer<Dtype> layer(layer_param);
   layer.SetUp(this->blob_bottom_vec_, this->blob_top_vec_);
   layer.Forward(this->blob_bottom_vec_, this->blob_top_vec_);
@@ -79,12 +79,11 @@ TYPED_TEST(ReshapeLayerTest, TestFlattenValues) {
 TYPED_TEST(ReshapeLayerTest, TestCopyDimensions) {
   typedef typename TypeParam::Dtype Dtype;
   LayerParameter layer_param;
-  ReshapeParameter* reshape_param =
-      layer_param.mutable_reshape_param();
-  // Omitting num to test implicit zeroes.
-  reshape_param->set_channels(0);
-  reshape_param->set_height(0);
-  reshape_param->set_width(0);
+  BlobShape* blob_shape = layer_param.mutable_reshape_param()->mutable_shape();
+  blob_shape->add_dim(0);
+  blob_shape->add_dim(0);
+  blob_shape->add_dim(0);
+  blob_shape->add_dim(0);
   ReshapeLayer<Dtype> layer(layer_param);
   layer.SetUp(this->blob_bottom_vec_, this->blob_top_vec_);
 
@@ -99,12 +98,11 @@ TYPED_TEST(ReshapeLayerTest, TestCopyDimensions) {
 TYPED_TEST(ReshapeLayerTest, TestInferenceOfUnspecified) {
   typedef typename TypeParam::Dtype Dtype;
   LayerParameter layer_param;
-  ReshapeParameter* reshape_param =
-      layer_param.mutable_reshape_param();
-  // Since omitted, num is implicitly set to 0 (thus, copies 2).
-  reshape_param->set_channels(3);
-  reshape_param->set_height(10);
-  reshape_param->set_width(-1);
+  BlobShape* blob_shape = layer_param.mutable_reshape_param()->mutable_shape();
+  blob_shape->add_dim(0);
+  blob_shape->add_dim(3);
+  blob_shape->add_dim(10);
+  blob_shape->add_dim(-1);
 
   // Count is 180, thus height should be 180 / (2*3*10) = 3.
 

From 6b64f121c272b3d1464004554b9e6a9c4033a8f5 Mon Sep 17 00:00:00 2001
From: Jeff Donahue <jeff.donahue@gmail.com>
Date: Thu, 26 Mar 2015 02:25:48 -0700
Subject: [PATCH 3/4] basic tests (Forward, Gradient) for ReshapeLayer

---
 src/caffe/test/test_reshape_layer.cpp | 57 +++++++++++++++++++++++++++
 1 file changed, 57 insertions(+)

diff --git a/src/caffe/test/test_reshape_layer.cpp b/src/caffe/test/test_reshape_layer.cpp
index 0c8e2427aa7..8635792a66e 100644
--- a/src/caffe/test/test_reshape_layer.cpp
+++ b/src/caffe/test/test_reshape_layer.cpp
@@ -115,4 +115,61 @@ TYPED_TEST(ReshapeLayerTest, TestInferenceOfUnspecified) {
   EXPECT_EQ(this->blob_top_->width(), 3);
 }
 
+TYPED_TEST(ReshapeLayerTest, TestForward) {
+  typedef typename TypeParam::Dtype Dtype;
+  LayerParameter layer_param;
+  BlobShape* shape = layer_param.mutable_reshape_param()->mutable_shape();
+  shape->add_dim(6);
+  shape->add_dim(2);
+  shape->add_dim(3);
+  shape->add_dim(5);
+  ReshapeLayer<Dtype> layer(layer_param);
+  layer.SetUp(this->blob_bottom_vec_, this->blob_top_vec_);
+  layer.Forward(this->blob_bottom_vec_, this->blob_top_vec_);
+  for (int i = 0; i < this->blob_bottom_->count(); ++i) {
+    EXPECT_EQ(this->blob_top_->cpu_data()[i],
+              this->blob_bottom_->cpu_data()[i]);
+  }
+}
+
+TYPED_TEST(ReshapeLayerTest, TestForwardAfterReshape) {
+  typedef typename TypeParam::Dtype Dtype;
+  LayerParameter layer_param;
+  BlobShape* shape = layer_param.mutable_reshape_param()->mutable_shape();
+  shape->add_dim(6);
+  shape->add_dim(2);
+  shape->add_dim(3);
+  shape->add_dim(5);
+  ReshapeLayer<Dtype> layer(layer_param);
+  layer.SetUp(this->blob_bottom_vec_, this->blob_top_vec_);
+  layer.Forward(this->blob_bottom_vec_, this->blob_top_vec_);
+  // We know the above produced the correct result from TestForward.
+  // Reshape the bottom and call layer.Reshape, then try again.
+  vector<int> new_bottom_shape(1, 2 * 3 * 6 * 5);
+  this->blob_bottom_->Reshape(new_bottom_shape);
+  layer.Reshape(this->blob_bottom_vec_, this->blob_top_vec_);
+  FillerParameter filler_param;
+  GaussianFiller<Dtype> filler(filler_param);
+  filler.Fill(this->blob_bottom_);
+  layer.Forward(this->blob_bottom_vec_, this->blob_top_vec_);
+  for (int i = 0; i < this->blob_bottom_->count(); ++i) {
+    EXPECT_EQ(this->blob_top_->cpu_data()[i],
+              this->blob_bottom_->cpu_data()[i]);
+  }
+}
+
+TYPED_TEST(ReshapeLayerTest, TestGradient) {
+  typedef typename TypeParam::Dtype Dtype;
+  LayerParameter layer_param;
+  BlobShape* shape = layer_param.mutable_reshape_param()->mutable_shape();
+  shape->add_dim(6);
+  shape->add_dim(2);
+  shape->add_dim(3);
+  shape->add_dim(5);
+  ReshapeLayer<Dtype> layer(layer_param);
+  GradientChecker<Dtype> checker(1e-2, 1e-2);
+  checker.CheckGradientEltwise(&layer, this->blob_bottom_vec_,
+      this->blob_top_vec_);
+}
+
 }  // namespace caffe

From 21032b2b0911cd4d907df46c114b8e96e55c2313 Mon Sep 17 00:00:00 2001
From: Jeff Donahue <jeff.donahue@gmail.com>
Date: Thu, 26 Mar 2015 01:13:18 -0700
Subject: [PATCH 4/4] Add ReshapeParameter axis and num_axes to reshape only a
 particular span of the input shape

---
 include/caffe/common_layers.hpp       |   2 -
 src/caffe/layers/reshape_layer.cpp    |  53 ++++++++++---
 src/caffe/proto/caffe.proto           |  58 +++++++++++++-
 src/caffe/test/test_reshape_layer.cpp | 105 ++++++++++++++++++++++++++
 4 files changed, 204 insertions(+), 14 deletions(-)

diff --git a/include/caffe/common_layers.hpp b/include/caffe/common_layers.hpp
index ccdfd62d5be..8da6d68096b 100644
--- a/include/caffe/common_layers.hpp
+++ b/include/caffe/common_layers.hpp
@@ -327,8 +327,6 @@ class ReshapeLayer : public Layer<Dtype> {
   virtual void Backward_gpu(const vector<Blob<Dtype>*>& top,
       const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom) {}
 
-  /// @brief the current output shape
-  vector<int> top_shape_;
   /// @brief vector of axes indices whose dimensions we'll copy from the bottom
   vector<int> copy_axes_;
   /// @brief the index of the axis whose dimension we infer, or -1 if none
diff --git a/src/caffe/layers/reshape_layer.cpp b/src/caffe/layers/reshape_layer.cpp
index 618edf31824..ffe970f2689 100644
--- a/src/caffe/layers/reshape_layer.cpp
+++ b/src/caffe/layers/reshape_layer.cpp
@@ -12,18 +12,17 @@ void ReshapeLayer<Dtype>::LayerSetUp(const vector<Blob<Dtype>*>& bottom,
   copy_axes_.clear();
   const BlobShape& top_blob_shape = this->layer_param_.reshape_param().shape();
   const int top_num_axes = top_blob_shape.dim_size();
-  top_shape_.resize(top_num_axes);
   constant_count_ = 1;
   for (int i = 0; i < top_num_axes; ++i) {
-    top_shape_[i] = top_blob_shape.dim(i);
-    if (top_shape_[i] == 0) {
+    const int top_dim = top_blob_shape.dim(i);
+    if (top_dim == 0) {
       copy_axes_.push_back(i);
-    } else if (top_shape_[i] == -1) {
+    } else if (top_dim == -1) {
       CHECK_EQ(inferred_axis_, -1) << "new shape contains multiple "
           << "-1 dims; at most a single (1) value of -1 may be specified";
       inferred_axis_ = i;
     } else {
-      constant_count_ *= top_shape_[i];
+      constant_count_ *= top_dim;
     }
   }
 }
@@ -31,27 +30,59 @@ void ReshapeLayer<Dtype>::LayerSetUp(const vector<Blob<Dtype>*>& bottom,
 template <typename Dtype>
 void ReshapeLayer<Dtype>::Reshape(const vector<Blob<Dtype>*>& bottom,
     const vector<Blob<Dtype>*>& top) {
+  const int input_start_axis = this->layer_param_.reshape_param().axis();
+  const int start_axis = (input_start_axis >= 0) ? input_start_axis :
+      bottom[0]->num_axes() + input_start_axis + 1;
+  CHECK_GE(start_axis, 0) << "axis " << input_start_axis << " out of range";
+  CHECK_LE(start_axis, bottom[0]->num_axes()) << "axis " << input_start_axis
+      << " out of range for " << bottom[0]->num_axes() << "-D input blob";
+  const int num_axes = this->layer_param_.reshape_param().num_axes();
+  CHECK_GE(num_axes, -1) << "num_axes must be >= 0, or -1 for all";
+  const int end_axis =
+      (num_axes == -1) ? bottom[0]->num_axes() : (start_axis + num_axes);
+  CHECK_LE(end_axis, bottom[0]->num_axes())
+      << "end_axis = axis + num_axes is out of range";
+  const int num_axes_replaced = end_axis - start_axis;
+  const int num_axes_retained = bottom[0]->num_axes() - num_axes_replaced;
+  const BlobShape& top_blob_shape = this->layer_param_.reshape_param().shape();
+  const int num_new_axes = top_blob_shape.dim_size();
+  vector<int> top_shape(num_axes_retained + num_new_axes);
+  int top_shape_index = 0;
+  for (int i = 0; i < start_axis; ++i) {
+    top_shape[top_shape_index++] = bottom[0]->shape(i);
+  }
+  for (int i = 0; i < num_new_axes; ++i) {
+    top_shape[top_shape_index++] = top_blob_shape.dim(i);
+  }
+  for (int i = end_axis; i < bottom[0]->num_axes(); ++i) {
+    top_shape[top_shape_index++] = bottom[0]->shape(i);
+  }
+  CHECK_EQ(top_shape_index, top_shape.size());
   for (int i = 0; i < copy_axes_.size(); ++i) {
     const int copy_axis_index = copy_axes_[i];
-    CHECK_GT(bottom[0]->num_axes(), copy_axis_index) << "new shape contains "
-        << "a 0, but there is no corresponding bottom axis to copy";
-    top_shape_[copy_axis_index] = bottom[0]->shape(copy_axis_index);
+    CHECK_GT(bottom[0]->num_axes(), start_axis + copy_axis_index)
+        << "new shape contains a 0, but there was no corresponding bottom axis "
+        << "to copy";
+    top_shape[start_axis + copy_axis_index] =
+        bottom[0]->shape(start_axis + copy_axis_index);
   }
   if (inferred_axis_ >= 0) {
     // A -1 dim was specified; infer the correct dimension by computing the
     // product of the other dimensions.
     int explicit_count = constant_count_;
+    explicit_count *= bottom[0]->count(0, start_axis);
+    explicit_count *= bottom[0]->count(end_axis);
     for (int i = 0; i < copy_axes_.size(); ++i) {
       const int copy_axis_index = copy_axes_[i];
-      explicit_count *= top_shape_[copy_axis_index];
+      explicit_count *= top_shape[start_axis + copy_axis_index];
     }
     CHECK_EQ(0, bottom[0]->count() % explicit_count) << "bottom count ("
         << bottom[0]->count() << ") must be divisible by the product of "
         << "the specified dimensions (" << explicit_count << ")";
     const int inferred_dim = bottom[0]->count() / explicit_count;
-    top_shape_[inferred_axis_] = inferred_dim;
+    top_shape[start_axis + inferred_axis_] = inferred_dim;
   }
-  top[0]->Reshape(top_shape_);
+  top[0]->Reshape(top_shape);
   CHECK_EQ(top[0]->count(), bottom[0]->count())
       << "output count must match input count";
   top[0]->ShareData(*bottom[0]);
diff --git a/src/caffe/proto/caffe.proto b/src/caffe/proto/caffe.proto
index d36f1d511df..d43e560a1fa 100644
--- a/src/caffe/proto/caffe.proto
+++ b/src/caffe/proto/caffe.proto
@@ -696,8 +696,64 @@ message ReshapeParameter {
   // Specify the output dimensions. If some of the dimensions are set to 0,
   // the corresponding dimension from the bottom layer is used (unchanged).
   // Exactly one dimension may be set to -1, in which case its value is
-  // inferred from the count of the bottom layer and the remaining dimensions.
+  // inferred from the count of the bottom blob and the remaining dimensions.
+  // For example, suppose we want to reshape a 2D blob "input" with shape 2 x 8:
+  //
+  //   layer {
+  //     type: "Reshape" bottom: "input" top: "output"
+  //     reshape_param { ... }
+  //   }
+  //
+  // If "input" is 2D with shape 2 x 8, then the following reshape_param
+  // specifications are all equivalent, producing a 3D blob "output" with shape
+  // 2 x 2 x 4:
+  //
+  //   reshape_param { shape { dim:  2  dim: 2  dim:  4 } }
+  //   reshape_param { shape { dim:  0  dim: 2  dim:  4 } }
+  //   reshape_param { shape { dim:  0  dim: 2  dim: -1 } }
+  //   reshape_param { shape { dim: -1  dim: 0  dim:  2 } }
+  //
   optional BlobShape shape = 1;
+
+  // axis and num_axes control the portion of the bottom blob's shape that are
+  // replaced by (included in) the reshape. By default (axis == 0 and
+  // num_axes == -1), the entire bottom blob shape is included in the reshape,
+  // and hence the shape field must specify the entire output shape.
+  //
+  // axis may be non-zero to retain some portion of the beginning of the input
+  // shape (and may be negative to index from the end; e.g., -1 to begin the
+  // reshape after the last axis, including nothing in the reshape,
+  // -2 to include only the last axis, etc.).
+  //
+  // For example, suppose "input" is a 2D blob with shape 2 x 8.
+  // Then the following ReshapeLayer specifications are all equivalent,
+  // producing a blob "output" with shape 2 x 2 x 4:
+  //
+  //   reshape_param { shape { dim: 2  dim: 2  dim: 4 } }
+  //   reshape_param { shape { dim: 2  dim: 4 } axis:  1 }
+  //   reshape_param { shape { dim: 2  dim: 4 } axis: -3 }
+  //
+  // num_axes specifies the extent of the reshape.
+  // If num_axes >= 0 (and axis >= 0), the reshape will be performed only on
+  // input axes in the range [axis, axis+num_axes].
+  // num_axes may also be -1, the default, to include all remaining axes
+  // (starting from axis).
+  //
+  // For example, suppose "input" is a 2D blob with shape 2 x 8.
+  // Then the following ReshapeLayer specifications are equivalent,
+  // producing a blob "output" with shape 1 x 2 x 8.
+  //
+  //   reshape_param { shape { dim:  1  dim: 2  dim:  8 } }
+  //   reshape_param { shape { dim:  1  dim: 2  }  num_axes: 1 }
+  //   reshape_param { shape { dim:  1  }  num_axes: 0 }
+  //
+  // On the other hand, these would produce output blob shape 2 x 1 x 8:
+  //
+  //   reshape_param { shape { dim: 2  dim: 1  dim: 8  }  }
+  //   reshape_param { shape { dim: 1 }  axis: 1  num_axes: 0 }
+  //
+  optional int32 axis = 2 [default = 0];
+  optional int32 num_axes = 3 [default = -1];
 }
 
 // Message that stores parameters used by SigmoidLayer
diff --git a/src/caffe/test/test_reshape_layer.cpp b/src/caffe/test/test_reshape_layer.cpp
index 8635792a66e..9d08ec60d4e 100644
--- a/src/caffe/test/test_reshape_layer.cpp
+++ b/src/caffe/test/test_reshape_layer.cpp
@@ -115,6 +115,111 @@ TYPED_TEST(ReshapeLayerTest, TestInferenceOfUnspecified) {
   EXPECT_EQ(this->blob_top_->width(), 3);
 }
 
+TYPED_TEST(ReshapeLayerTest, TestInferenceOfUnspecifiedWithStartAxis) {
+  typedef typename TypeParam::Dtype Dtype;
+  LayerParameter layer_param;
+  layer_param.mutable_reshape_param()->set_axis(1);
+  BlobShape* blob_shape = layer_param.mutable_reshape_param()->mutable_shape();
+  blob_shape->add_dim(3);
+  blob_shape->add_dim(10);
+  blob_shape->add_dim(-1);
+
+  ReshapeLayer<Dtype> layer(layer_param);
+  layer.SetUp(this->blob_bottom_vec_, this->blob_top_vec_);
+
+  ASSERT_EQ(this->blob_top_->num_axes(), 4);
+  EXPECT_EQ(this->blob_top_->num(), 2);
+  EXPECT_EQ(this->blob_top_->channels(), 3);
+  EXPECT_EQ(this->blob_top_->height(), 10);
+  EXPECT_EQ(this->blob_top_->width(), 3);
+}
+
+TYPED_TEST(ReshapeLayerTest, TestInsertSingletonAxesStart) {
+  typedef typename TypeParam::Dtype Dtype;
+  LayerParameter layer_param;
+  layer_param.mutable_reshape_param()->set_axis(0);
+  layer_param.mutable_reshape_param()->set_num_axes(0);
+  BlobShape* blob_shape = layer_param.mutable_reshape_param()->mutable_shape();
+  blob_shape->add_dim(1);
+  blob_shape->add_dim(1);
+  blob_shape->add_dim(1);
+
+  ReshapeLayer<Dtype> layer(layer_param);
+  layer.SetUp(this->blob_bottom_vec_, this->blob_top_vec_);
+
+  ASSERT_EQ(this->blob_top_->num_axes(), 7);
+  EXPECT_EQ(this->blob_top_->shape(0), 1);
+  EXPECT_EQ(this->blob_top_->shape(1), 1);
+  EXPECT_EQ(this->blob_top_->shape(2), 1);
+  EXPECT_EQ(this->blob_top_->shape(3), 2);
+  EXPECT_EQ(this->blob_top_->shape(4), 3);
+  EXPECT_EQ(this->blob_top_->shape(5), 6);
+  EXPECT_EQ(this->blob_top_->shape(6), 5);
+}
+
+TYPED_TEST(ReshapeLayerTest, TestInsertSingletonAxesMiddle) {
+  typedef typename TypeParam::Dtype Dtype;
+  LayerParameter layer_param;
+  layer_param.mutable_reshape_param()->set_axis(2);
+  layer_param.mutable_reshape_param()->set_num_axes(0);
+  BlobShape* blob_shape = layer_param.mutable_reshape_param()->mutable_shape();
+  blob_shape->add_dim(1);
+  blob_shape->add_dim(1);
+  blob_shape->add_dim(1);
+
+  ReshapeLayer<Dtype> layer(layer_param);
+  layer.SetUp(this->blob_bottom_vec_, this->blob_top_vec_);
+
+  ASSERT_EQ(this->blob_top_->num_axes(), 7);
+  EXPECT_EQ(this->blob_top_->shape(0), 2);
+  EXPECT_EQ(this->blob_top_->shape(1), 3);
+  EXPECT_EQ(this->blob_top_->shape(2), 1);
+  EXPECT_EQ(this->blob_top_->shape(3), 1);
+  EXPECT_EQ(this->blob_top_->shape(4), 1);
+  EXPECT_EQ(this->blob_top_->shape(5), 6);
+  EXPECT_EQ(this->blob_top_->shape(6), 5);
+}
+
+TYPED_TEST(ReshapeLayerTest, TestInsertSingletonAxesEnd) {
+  typedef typename TypeParam::Dtype Dtype;
+  LayerParameter layer_param;
+  layer_param.mutable_reshape_param()->set_axis(-1);
+  layer_param.mutable_reshape_param()->set_num_axes(0);
+  BlobShape* blob_shape = layer_param.mutable_reshape_param()->mutable_shape();
+  blob_shape->add_dim(1);
+  blob_shape->add_dim(1);
+  blob_shape->add_dim(1);
+
+  ReshapeLayer<Dtype> layer(layer_param);
+  layer.SetUp(this->blob_bottom_vec_, this->blob_top_vec_);
+
+  ASSERT_EQ(this->blob_top_->num_axes(), 7);
+  EXPECT_EQ(this->blob_top_->shape(0), 2);
+  EXPECT_EQ(this->blob_top_->shape(1), 3);
+  EXPECT_EQ(this->blob_top_->shape(2), 6);
+  EXPECT_EQ(this->blob_top_->shape(3), 5);
+  EXPECT_EQ(this->blob_top_->shape(4), 1);
+  EXPECT_EQ(this->blob_top_->shape(5), 1);
+  EXPECT_EQ(this->blob_top_->shape(6), 1);
+}
+
+TYPED_TEST(ReshapeLayerTest, TestFlattenMiddle) {
+  typedef typename TypeParam::Dtype Dtype;
+  LayerParameter layer_param;
+  layer_param.mutable_reshape_param()->set_axis(1);
+  layer_param.mutable_reshape_param()->set_num_axes(2);
+  BlobShape* blob_shape = layer_param.mutable_reshape_param()->mutable_shape();
+  blob_shape->add_dim(-1);
+
+  ReshapeLayer<Dtype> layer(layer_param);
+  layer.SetUp(this->blob_bottom_vec_, this->blob_top_vec_);
+
+  ASSERT_EQ(this->blob_top_->num_axes(), 3);
+  EXPECT_EQ(this->blob_top_->shape(0), 2);
+  EXPECT_EQ(this->blob_top_->shape(1), 3 * 6);
+  EXPECT_EQ(this->blob_top_->shape(2), 5);
+}
+
 TYPED_TEST(ReshapeLayerTest, TestForward) {
   typedef typename TypeParam::Dtype Dtype;
   LayerParameter layer_param;