BVLC · to3i · Jul 11, 2014 · Jul 11, 2014 · Jul 11, 2014 · Jul 14, 2014
diff --git a/include/caffe/vision_layers.hpp b/include/caffe/vision_layers.hpp
@@ -91,6 +91,7 @@ class EltwiseLayer : public Layer<Dtype> {
 
   EltwiseParameter_EltwiseOp op_;
   vector<Dtype> coeffs_;
+  shared_ptr<Blob<int> > max_idx_;
 };
 
 /* Im2colLayer

diff --git a/src/caffe/layers/eltwise_layer.cpp b/src/caffe/layers/eltwise_layer.cpp
@@ -1,6 +1,7 @@
 // Copyright 2014 BVLC and contributors.
 
 #include <vector>
+#include <cfloat>
 
 #include "caffe/layer.hpp"
 #include "caffe/vision_layers.hpp"
@@ -38,11 +39,20 @@ void EltwiseLayer<Dtype>::SetUp(const vector<Blob<Dtype>*>& bottom,
       coeffs_[i] = this->layer_param().eltwise_param().coeff(i);
     }
   }
+  // If max operation, we will initialize the vector index part.
+  if (this->layer_param_.eltwise_param().operation() ==
+		  EltwiseParameter_EltwiseOp_MAX && top->size() == 1) {
+    max_idx_.reset(new Blob<int>(bottom[0]->num(), channels,
+                                 height, width));
+  }
 }
 
 template <typename Dtype>
 Dtype EltwiseLayer<Dtype>::Forward_cpu(
     const vector<Blob<Dtype>*>& bottom, vector<Blob<Dtype>*>* top) {
+  int* mask = NULL;
+  const Dtype* bottom_data_a = NULL;
+  const Dtype* bottom_data_b = NULL;
   const int count = (*top)[0]->count();
   Dtype* top_data = (*top)[0]->mutable_cpu_data();
   switch (op_) {
@@ -59,6 +69,50 @@ Dtype EltwiseLayer<Dtype>::Forward_cpu(
       caffe_axpy(count, coeffs_[i], bottom[i]->cpu_data(), top_data);
     }
     break;
+  case EltwiseParameter_EltwiseOp_MAX:
+	// Initialize
+	mask = max_idx_->mutable_cpu_data();
+	caffe_set(count, -1, mask);
+	caffe_set(count, Dtype(-FLT_MAX), top_data);
+	// bottom 0 & 1
+	bottom_data_a = bottom[0]->cpu_data();
+	bottom_data_b = bottom[1]->cpu_data();
+	for (int idx = 0; idx < count; ++idx) {
+		if (bottom_data_a[idx] > bottom_data_b[idx]) {
+			top_data[idx] = bottom_data_a[idx]; // maxval
+			mask[idx] = 0; // maxid
+		} else {
+			top_data[idx] = bottom_data_b[idx]; // maxval
+			mask[idx] = 1; // maxid
+		}
+	}
+	// bottom 2++
+	bottom_data_a = top_data;
+	for (int blob_idx = 2; blob_idx < bottom.size(); ++blob_idx) {
+		bottom_data_b = bottom[blob_idx]->cpu_data();
+		for (int idx = 0; idx < count; ++idx) {
+			if (bottom_data_a[idx] < bottom_data_b[idx]) {
+				top_data[idx] = bottom_data_b[idx];	// maxval
+				mask[idx] = blob_idx;	// maxid
+			}
+		}
+	}
+	/*
+	NOT_IMPLEMENTED;
+	int channels = bottom[0]->channels();
+	int height = bottom[0]->height();
+	int width = bottom[0]->width();
+	// The main loop
+	for (int n = 0; n < bottom[0]->num(); ++n) {
+		for (int c = 0; c < channels; ++c) {
+			for (int h = 0; h < height; ++h) {
+				for (int w = 0; w < width; ++w) {
+				}
+			}
+		}
+	}
+	*/
+	break;
   default:
     LOG(FATAL) << "Unknown elementwise operation.";
   }
@@ -68,6 +122,7 @@ Dtype EltwiseLayer<Dtype>::Forward_cpu(
 template <typename Dtype>
 void EltwiseLayer<Dtype>::Backward_cpu(const vector<Blob<Dtype>*>& top,
     const vector<bool>& propagate_down, vector<Blob<Dtype>*>* bottom) {
+  const int* mask = NULL;
   const int count = top[0]->count();
   const Dtype* top_data = top[0]->cpu_data();
   const Dtype* top_diff = top[0]->cpu_diff();
@@ -87,6 +142,17 @@ void EltwiseLayer<Dtype>::Backward_cpu(const vector<Blob<Dtype>*>& top,
           caffe_cpu_scale(count, coeffs_[i], top_diff, bottom_diff);
         }
         break;
+      case EltwiseParameter_EltwiseOp_MAX:
+    	mask = max_idx_->cpu_data();
+    	caffe_set(count, Dtype(0), bottom_diff);
+    	for (int idx = 0; idx < count; ++idx) {
+    		Dtype gradient = 0;
+    		if (mask[idx] == i) {
+    			gradient += top_diff[idx];
+    		}
+    		bottom_diff[idx] = gradient;
+		}
+		break;
       default:
         LOG(FATAL) << "Unknown elementwise operation.";
       }

diff --git a/src/caffe/layers/eltwise_layer.cu b/src/caffe/layers/eltwise_layer.cu
@@ -1,69 +1,126 @@
 // Copyright 2014 BVLC and contributors.
 
 #include <vector>
+#include <cfloat>
 
 #include "caffe/layer.hpp"
 #include "caffe/vision_layers.hpp"
 #include "caffe/util/math_functions.hpp"
 
 namespace caffe {
 
-template <typename Dtype>
-Dtype EltwiseLayer<Dtype>::Forward_gpu(
-    const vector<Blob<Dtype>*>& bottom, vector<Blob<Dtype>*>* top) {
-  const int count = (*top)[0]->count();
-  Dtype* top_data = (*top)[0]->mutable_gpu_data();
-  switch (op_) {
-  case EltwiseParameter_EltwiseOp_PROD:
-    caffe_gpu_mul(count, bottom[0]->gpu_data(),
-        bottom[1]->gpu_data(), top_data);
-    for (int i = 2; i < bottom.size(); ++i) {
-      caffe_gpu_mul(count, top_data, bottom[i]->gpu_data(), top_data);
-    }
-    break;
-  case EltwiseParameter_EltwiseOp_SUM:
-    caffe_gpu_set(count, Dtype(0.), top_data);
-    // TODO(shelhamer) does cuBLAS optimize to sum for coeff = 1?
-    for (int i = 0; i < bottom.size(); ++i) {
-      caffe_gpu_axpy(count, coeffs_[i], bottom[i]->gpu_data(), top_data);
-    }
-    break;
-  default:
-    LOG(FATAL) << "Unknown elementwise operation.";
-  }
-  return Dtype(0.);
+template<typename Dtype>
+__global__ void MaxForward(const int nthreads, const Dtype* bottom_data_a,
+		const Dtype* bottom_data_b, const int blob_idx, Dtype* top_data,
+		int* mask) {
+	CUDA_KERNEL_LOOP(index, nthreads)
+	{
+		Dtype maxval = -FLT_MAX;
+		int maxidx = -1;
+		if (bottom_data_a[index] > bottom_data_b[index]) {
+			// only update for very first bottom_data blob (blob_idx == 0)
+			if (blob_idx == 0) {
+				maxval = bottom_data_a[index];
+				top_data[index] = maxval;
+				maxidx = blob_idx;
+				mask[index] = maxidx;
+			}
+		} else {
+			maxval = bottom_data_b[index];
+			top_data[index] = maxval;
+			maxidx = blob_idx + 1;
+			mask[index] = maxidx;
+		}
+	}
 }
 
-template <typename Dtype>
+template<typename Dtype>
+Dtype EltwiseLayer<Dtype>::Forward_gpu(const vector<Blob<Dtype>*>& bottom,
+		vector<Blob<Dtype>*>* top) {
+	int* mask = NULL;
+	const int count = (*top)[0]->count();
+	Dtype* top_data = (*top)[0]->mutable_gpu_data();
+	switch (op_) {
+	case EltwiseParameter_EltwiseOp_PROD:
+		caffe_gpu_mul(count, bottom[0]->gpu_data(), bottom[1]->gpu_data(),
+				top_data);
+		for (int i = 2; i < bottom.size(); ++i) {
+			caffe_gpu_mul(count, top_data, bottom[i]->gpu_data(), top_data);
+		}
+		break;
+	case EltwiseParameter_EltwiseOp_SUM:
+		caffe_gpu_set(count, Dtype(0.), top_data);
+		// TODO(shelhamer) does cuBLAS optimize to sum for coeff = 1?
+		for (int i = 0; i < bottom.size(); ++i) {
+			caffe_gpu_axpy(count, coeffs_[i], bottom[i]->gpu_data(), top_data);
+		}
+		break;
+	case EltwiseParameter_EltwiseOp_MAX:
+		mask = max_idx_->mutable_gpu_data();
+		// NOLINT_NEXT_LINE(whitespace/operators)
+		MaxForward<Dtype> <<<CAFFE_GET_BLOCKS(count), CAFFE_CUDA_NUM_THREADS>>>(
+				count, bottom[0]->gpu_data(), bottom[1]->gpu_data(), 0, top_data, mask);
+		for (int i = 2; i < bottom.size(); ++i) {
+			// NOLINT_NEXT_LINE(whitespace/operators)
+			MaxForward<Dtype><<<CAFFE_GET_BLOCKS(count), CAFFE_CUDA_NUM_THREADS>>>(
+					count, top_data, bottom[i]->gpu_data(), i-1, top_data, mask);
+		}
+		break;
+	default:
+		LOG(FATAL) << "Unknown elementwise operation.";
+	}
+	return Dtype(0.);
+}
+
+template<typename Dtype>
+__global__ void MaxBackward(const int nthreads, const Dtype* top_diff,
+		const int blob_idx, const int* mask, Dtype* bottom_diff) {
+	CUDA_KERNEL_LOOP(index, nthreads)
+	{
+		Dtype gradient = 0;
+		if (mask[index] == blob_idx) {
+			gradient += top_diff[index];
+		}
+		bottom_diff[index] = gradient;
+	}
+}
+
+template<typename Dtype>
 void EltwiseLayer<Dtype>::Backward_gpu(const vector<Blob<Dtype>*>& top,
-    const vector<bool>& propagate_down, vector<Blob<Dtype>*>* bottom) {
-  const int count = top[0]->count();
-  const Dtype* top_data = top[0]->gpu_data();
-  const Dtype* top_diff = top[0]->gpu_diff();
-  for (int i = 0; i < bottom->size(); ++i) {
-    if (propagate_down[i]) {
-      const Dtype* bottom_data = (*bottom)[i]->gpu_data();
-      Dtype* bottom_diff = (*bottom)[i]->mutable_gpu_diff();
-      switch (op_) {
-      case EltwiseParameter_EltwiseOp_PROD:
-        caffe_gpu_div(count, top_data, bottom_data, bottom_diff);
-        caffe_gpu_mul(count, bottom_diff, top_diff, bottom_diff);
-        break;
-      case EltwiseParameter_EltwiseOp_SUM:
-        if (coeffs_[i] == Dtype(1.)) {
-          caffe_copy(count, top_diff, bottom_diff);
-        } else {
-          caffe_gpu_scale(count, coeffs_[i], top_diff, bottom_diff);
-        }
-        break;
-      default:
-        LOG(FATAL) << "Unknown elementwise operation.";
-      }
-    }
-  }
+		const vector<bool>& propagate_down, vector<Blob<Dtype>*>* bottom) {
+	const int* mask = NULL;
+	const int count = top[0]->count();
+	const Dtype* top_data = top[0]->gpu_data();
+	const Dtype* top_diff = top[0]->gpu_diff();
+	for (int i = 0; i < bottom->size(); ++i) {
+		if (propagate_down[i]) {
+			const Dtype* bottom_data = (*bottom)[i]->gpu_data();
+			Dtype* bottom_diff = (*bottom)[i]->mutable_gpu_diff();
+			switch (op_) {
+			case EltwiseParameter_EltwiseOp_PROD:
+				caffe_gpu_div(count, top_data, bottom_data, bottom_diff);
+				caffe_gpu_mul(count, bottom_diff, top_diff, bottom_diff);
+				break;
+			case EltwiseParameter_EltwiseOp_SUM:
+				if (coeffs_[i] == Dtype(1.)) {
+					caffe_copy(count, top_diff, bottom_diff);
+				} else {
+					caffe_gpu_scale(count, coeffs_[i], top_diff, bottom_diff);
+				}
+				break;
+			case EltwiseParameter_EltwiseOp_MAX:
+				mask = max_idx_->gpu_data();
+				// NOLINT_NEXT_LINE(whitespace/operators)
+				MaxBackward<Dtype> <<<CAFFE_GET_BLOCKS(count), CAFFE_CUDA_NUM_THREADS>>>(
+						count, top_diff, i, mask, bottom_diff);
+				break;
+			default:
+				LOG(FATAL) << "Unknown elementwise operation.";
+			}
+		}
+	}
 }
 
 INSTANTIATE_CLASS(EltwiseLayer);
 
-
 }  // namespace caffe
diff --git a/src/caffe/proto/caffe.proto b/src/caffe/proto/caffe.proto
@@ -304,6 +304,7 @@ message EltwiseParameter {
   enum EltwiseOp {
     PROD = 0;
     SUM = 1;
+    MAX = 2;
   }
   optional EltwiseOp operation = 1 [default = SUM]; // element-wise operation
   repeated float coeff = 2; // blob-wise coefficient for SUM operation

diff --git a/src/caffe/test/test_eltwise_layer.cpp b/src/caffe/test/test_eltwise_layer.cpp
@@ -128,6 +128,27 @@ TYPED_TEST(EltwiseLayerTest, TestSumCoeff) {
   }
 }
 
+TYPED_TEST(EltwiseLayerTest, TestMax) {
+  typedef typename TypeParam::Dtype Dtype;
+  LayerParameter layer_param;
+  EltwiseParameter* eltwise_param = layer_param.mutable_eltwise_param();
+  eltwise_param->set_operation(EltwiseParameter_EltwiseOp_MAX);
+  shared_ptr<EltwiseLayer<Dtype> > layer(
+      new EltwiseLayer<Dtype>(layer_param));
+  layer->SetUp(this->blob_bottom_vec_, &(this->blob_top_vec_));
+  layer->Forward(this->blob_bottom_vec_, &(this->blob_top_vec_));
+  const Dtype* data = this->blob_top_->cpu_data();
+  const int count = this->blob_top_->count();
+  const Dtype* in_data_a = this->blob_bottom_a_->cpu_data();
+  const Dtype* in_data_b = this->blob_bottom_b_->cpu_data();
+  const Dtype* in_data_c = this->blob_bottom_c_->cpu_data();
+  for (int i = 0; i < count; ++i) {
+    EXPECT_GE(data[i], in_data_a[i]);
+	EXPECT_GE(data[i], in_data_b[i]);
+	EXPECT_GE(data[i], in_data_c[i]);
+  }
+}
+
 TYPED_TEST(EltwiseLayerTest, TestProdGradient) {
   typedef typename TypeParam::Dtype Dtype;
   LayerParameter layer_param;
@@ -164,4 +185,15 @@ TYPED_TEST(EltwiseLayerTest, TestSumCoeffGradient) {
       &(this->blob_top_vec_));
 }
 
+TYPED_TEST(EltwiseLayerTest, TestMaxGradient) {
+  typedef typename TypeParam::Dtype Dtype;
+  LayerParameter layer_param;
+  EltwiseParameter* eltwise_param = layer_param.mutable_eltwise_param();
+  eltwise_param->set_operation(EltwiseParameter_EltwiseOp_MAX);
+  EltwiseLayer<Dtype> layer(layer_param);
+  GradientChecker<Dtype> checker(1e-2, 1e-3);
+  checker.CheckGradientEltwise(&layer, &(this->blob_bottom_vec_),
+      &(this->blob_top_vec_));
+}
+
 }  // namespace caffe