From 0a6870339afce63d0ca838f312248bf049791dc6 Mon Sep 17 00:00:00 2001 From: Jeff Donahue Date: Thu, 24 Apr 2014 21:44:48 -0700 Subject: [PATCH 1/2] move analytic gradient computation outside loop and store -- saves a lot of time --- include/caffe/blob.hpp | 1 + src/caffe/blob.cpp | 5 +++ src/caffe/test/test_gradient_check_util.hpp | 43 +++++++++++++-------- 3 files changed, 33 insertions(+), 16 deletions(-) diff --git a/include/caffe/blob.hpp b/include/caffe/blob.hpp index 712fc0521ea..2f6b8f80e68 100644 --- a/include/caffe/blob.hpp +++ b/include/caffe/blob.hpp @@ -19,6 +19,7 @@ class Blob { const int width); void Reshape(const int num, const int channels, const int height, const int width); + void ReshapeLike(const Blob& other); inline int num() const { return num_; } inline int channels() const { return channels_; } inline int height() const { return height_; } diff --git a/src/caffe/blob.cpp b/src/caffe/blob.cpp index 54b699222ca..f1fe98df4a6 100644 --- a/src/caffe/blob.cpp +++ b/src/caffe/blob.cpp @@ -31,6 +31,11 @@ void Blob::Reshape(const int num, const int channels, const int height, } } +template +void Blob::ReshapeLike(const Blob& other) { + Reshape(other.num(), other.channels(), other.height(), other.width()); +} + template Blob::Blob(const int num, const int channels, const int height, const int width) { diff --git a/src/caffe/test/test_gradient_check_util.hpp b/src/caffe/test/test_gradient_check_util.hpp index 19758868232..a1c66a1e8d8 100644 --- a/src/caffe/test/test_gradient_check_util.hpp +++ b/src/caffe/test/test_gradient_check_util.hpp @@ -62,9 +62,6 @@ class GradientChecker { }; -// Detailed implementations are as follows. - - template void GradientChecker::CheckGradientSingle(Layer* layer, vector*>* bottom, vector*>* top, @@ -82,36 +79,50 @@ void GradientChecker::CheckGradientSingle(Layer* layer, CHECK(check_bottom < bottom->size()); blobs_to_check.push_back((*bottom)[check_bottom]); } - // go through the bottom and parameter blobs + // Compute the gradient analytically using Backward + Caffe::set_random_seed(seed_); + // Get any loss from the layer + Dtype computed_objective = layer->Forward(*bottom, top); + // Get additional loss from the objective + computed_objective += GetObjAndGradient(top, top_id, top_data_id); + layer->Backward(*top, true, bottom); + // Store computed gradients for all checked blobs + vector > > computed_gradient_blobs(blobs_to_check.size()); + for (int blob_id = 0; blob_id < blobs_to_check.size(); ++blob_id) { + Blob* current_blob = blobs_to_check[blob_id]; + computed_gradient_blobs[blob_id].reset(new Blob()); + computed_gradient_blobs[blob_id]->ReshapeLike(*current_blob); + const int count = blobs_to_check[blob_id]->count(); + const Dtype* diff = blobs_to_check[blob_id]->cpu_diff(); + Dtype* computed_gradients = + computed_gradient_blobs[blob_id]->mutable_cpu_data(); + caffe_copy(count, diff, computed_gradients); + } + // Compute derivative of top w.r.t. each bottom and parameter input using + // finite differencing. // LOG(ERROR) << "Checking " << blobs_to_check.size() << " blobs."; for (int blob_id = 0; blob_id < blobs_to_check.size(); ++blob_id) { Blob* current_blob = blobs_to_check[blob_id]; + const Dtype* computed_gradients = + computed_gradient_blobs[blob_id]->cpu_data(); // LOG(ERROR) << "Blob " << blob_id << ": checking " // << current_blob->count() << " parameters."; - // go through the values for (int feat_id = 0; feat_id < current_blob->count(); ++feat_id) { - // First, obtain the original data - Caffe::set_random_seed(seed_); - // Get any loss from the layer - Dtype computed_objective = layer->Forward(*bottom, top); - // Get additional loss from the objective - computed_objective += GetObjAndGradient(top, top_id, top_data_id); - layer->Backward(*top, true, bottom); - Dtype computed_gradient = current_blob->cpu_diff()[feat_id]; - // compute score by adding stepsize + // Compute loss with stepsize_ added to input. current_blob->mutable_cpu_data()[feat_id] += stepsize_; Caffe::set_random_seed(seed_); Dtype positive_objective = layer->Forward(*bottom, top); positive_objective += GetObjAndGradient(top, top_id, top_data_id); - // compute score by subtracting stepsize + // Compute loss with stepsize_ subtracted from input. current_blob->mutable_cpu_data()[feat_id] -= stepsize_ * 2; Caffe::set_random_seed(seed_); Dtype negative_objective = layer->Forward(*bottom, top); negative_objective += GetObjAndGradient(top, top_id, top_data_id); - // Recover stepsize + // Recover original input value. current_blob->mutable_cpu_data()[feat_id] += stepsize_; Dtype estimated_gradient = (positive_objective - negative_objective) / stepsize_ / 2.; + Dtype computed_gradient = computed_gradients[feat_id]; Dtype feature = current_blob->cpu_data()[feat_id]; // LOG(ERROR) << "debug: " << current_blob->cpu_data()[feat_id] << " " // << current_blob->cpu_diff()[feat_id]; From 2c5f9dd7e2c8d7b2b130ec001a3f066ead8682f4 Mon Sep 17 00:00:00 2001 From: Jeff Donahue Date: Thu, 24 Apr 2014 20:24:19 -0700 Subject: [PATCH 2/2] eltwise gradient checker --- src/caffe/test/test_eltwise_product_layer.cpp | 4 +- src/caffe/test/test_flatten_layer.cpp | 4 +- src/caffe/test/test_gradient_check_util.hpp | 73 ++++++++++++++----- src/caffe/test/test_neuron_layer.cpp | 14 ++-- src/caffe/test/test_power_layer.cpp | 2 +- src/caffe/test/test_split_layer.cpp | 8 +- src/caffe/test/test_tanh_layer.cpp | 4 +- 7 files changed, 74 insertions(+), 35 deletions(-) diff --git a/src/caffe/test/test_eltwise_product_layer.cpp b/src/caffe/test/test_eltwise_product_layer.cpp index 8255a579b15..86d6fdc5334 100644 --- a/src/caffe/test/test_eltwise_product_layer.cpp +++ b/src/caffe/test/test_eltwise_product_layer.cpp @@ -102,7 +102,7 @@ TYPED_TEST(EltwiseProductLayerTest, TestCPUGradient) { LayerParameter layer_param; EltwiseProductLayer layer(layer_param); GradientChecker checker(1e-2, 1e-3); - checker.CheckGradientExhaustive(&layer, &(this->blob_bottom_vec_), + checker.CheckGradientEltwise(&layer, &(this->blob_bottom_vec_), &(this->blob_top_vec_)); } @@ -111,7 +111,7 @@ TYPED_TEST(EltwiseProductLayerTest, TestGPUGradient) { LayerParameter layer_param; EltwiseProductLayer layer(layer_param); GradientChecker checker(1e-2, 1e-2); - checker.CheckGradientExhaustive(&layer, &(this->blob_bottom_vec_), + checker.CheckGradientEltwise(&layer, &(this->blob_bottom_vec_), &(this->blob_top_vec_)); } diff --git a/src/caffe/test/test_flatten_layer.cpp b/src/caffe/test/test_flatten_layer.cpp index 139488b54a6..52c567b0295 100644 --- a/src/caffe/test/test_flatten_layer.cpp +++ b/src/caffe/test/test_flatten_layer.cpp @@ -84,7 +84,7 @@ TYPED_TEST(FlattenLayerTest, TestCPUGradient) { Caffe::set_mode(Caffe::CPU); FlattenLayer layer(layer_param); GradientChecker checker(1e-2, 1e-2); - checker.CheckGradientExhaustive(&layer, &(this->blob_bottom_vec_), + checker.CheckGradientEltwise(&layer, &(this->blob_bottom_vec_), &(this->blob_top_vec_)); } @@ -93,7 +93,7 @@ TYPED_TEST(FlattenLayerTest, TestGPUGradient) { Caffe::set_mode(Caffe::GPU); FlattenLayer layer(layer_param); GradientChecker checker(1e-2, 1e-2); - checker.CheckGradientExhaustive(&layer, &(this->blob_bottom_vec_), + checker.CheckGradientEltwise(&layer, &(this->blob_bottom_vec_), &(this->blob_top_vec_)); } diff --git a/src/caffe/test/test_gradient_check_util.hpp b/src/caffe/test/test_gradient_check_util.hpp index a1c66a1e8d8..da54a966803 100644 --- a/src/caffe/test/test_gradient_check_util.hpp +++ b/src/caffe/test/test_gradient_check_util.hpp @@ -40,9 +40,15 @@ class GradientChecker { vector*>* bottom, vector*>* top, int check_bottom = -1); + // CheckGradientEltwise can be used to test layers that perform element-wise + // computation only (e.g., neuron layers) -- where (d y_i) / (d x_j) = 0 when + // i != j. + void CheckGradientEltwise(Layer* layer, + vector*>* bottom, vector*>* top); + void CheckGradientSingle(Layer* layer, vector*>* bottom, vector*>* top, int check_bottom, int top_id, - int top_data_id); + int top_data_id, bool element_wise = false); // Checks the gradient of a network. This network should not have any data // layers or loss layers, since the function does not explicitly deal with @@ -65,7 +71,16 @@ class GradientChecker { template void GradientChecker::CheckGradientSingle(Layer* layer, vector*>* bottom, vector*>* top, - int check_bottom, int top_id, int top_data_id) { + int check_bottom, int top_id, int top_data_id, bool element_wise) { + if (element_wise) { + CHECK_EQ(0, layer->blobs().size()); + CHECK_LE(0, top_id); + CHECK_LE(0, top_data_id); + const int top_count = (*top)[top_id]->count(); + for (int blob_id = 0; blob_id < bottom->size(); ++blob_id) { + CHECK_EQ(top_count, (*bottom)[blob_id]->count()); + } + } // First, figure out what blobs we need to check against. vector*> blobs_to_check; for (int i = 0; i < layer->blobs().size(); ++i) { @@ -87,7 +102,8 @@ void GradientChecker::CheckGradientSingle(Layer* layer, computed_objective += GetObjAndGradient(top, top_id, top_data_id); layer->Backward(*top, true, bottom); // Store computed gradients for all checked blobs - vector > > computed_gradient_blobs(blobs_to_check.size()); + vector > > + computed_gradient_blobs(blobs_to_check.size()); for (int blob_id = 0; blob_id < blobs_to_check.size(); ++blob_id) { Blob* current_blob = blobs_to_check[blob_id]; computed_gradient_blobs[blob_id].reset(new Blob()); @@ -108,20 +124,29 @@ void GradientChecker::CheckGradientSingle(Layer* layer, // LOG(ERROR) << "Blob " << blob_id << ": checking " // << current_blob->count() << " parameters."; for (int feat_id = 0; feat_id < current_blob->count(); ++feat_id) { - // Compute loss with stepsize_ added to input. - current_blob->mutable_cpu_data()[feat_id] += stepsize_; - Caffe::set_random_seed(seed_); - Dtype positive_objective = layer->Forward(*bottom, top); - positive_objective += GetObjAndGradient(top, top_id, top_data_id); - // Compute loss with stepsize_ subtracted from input. - current_blob->mutable_cpu_data()[feat_id] -= stepsize_ * 2; - Caffe::set_random_seed(seed_); - Dtype negative_objective = layer->Forward(*bottom, top); - negative_objective += GetObjAndGradient(top, top_id, top_data_id); - // Recover original input value. - current_blob->mutable_cpu_data()[feat_id] += stepsize_; - Dtype estimated_gradient = (positive_objective - negative_objective) / - stepsize_ / 2.; + // For an element-wise layer, we only need to do finite differencing to + // compute the derivative of (*top)[top_id][top_data_id] w.r.t. + // (*bottom)[blob_id][i] only for i == top_data_id. For any other + // i != top_data_id, we know the derivative is 0 by definition, and simply + // check that that's true. + Dtype estimated_gradient = 0; + if (!element_wise || (feat_id == top_data_id)) { + // Do finite differencing. + // Compute loss with stepsize_ added to input. + current_blob->mutable_cpu_data()[feat_id] += stepsize_; + Caffe::set_random_seed(seed_); + Dtype positive_objective = layer->Forward(*bottom, top); + positive_objective += GetObjAndGradient(top, top_id, top_data_id); + // Compute loss with stepsize_ subtracted from input. + current_blob->mutable_cpu_data()[feat_id] -= stepsize_ * 2; + Caffe::set_random_seed(seed_); + Dtype negative_objective = layer->Forward(*bottom, top); + negative_objective += GetObjAndGradient(top, top_id, top_data_id); + // Recover original input value. + current_blob->mutable_cpu_data()[feat_id] += stepsize_; + estimated_gradient = (positive_objective - negative_objective) / + stepsize_ / 2.; + } Dtype computed_gradient = computed_gradients[feat_id]; Dtype feature = current_blob->cpu_data()[feat_id]; // LOG(ERROR) << "debug: " << current_blob->cpu_data()[feat_id] << " " @@ -157,6 +182,20 @@ void GradientChecker::CheckGradientExhaustive(Layer* layer, } } +template +void GradientChecker::CheckGradientEltwise(Layer* layer, + vector*>* bottom, vector*>* top) { + layer->SetUp(*bottom, top); + CHECK_GT(top->size(), 0) << "Eltwise mode requires at least one top blob."; + const int check_bottom = -1; + const bool element_wise = true; + for (int i = 0; i < top->size(); ++i) { + for (int j = 0; j < (*top)[i]->count(); ++j) { + CheckGradientSingle(layer, bottom, top, check_bottom, i, j, element_wise); + } + } +} + template void GradientChecker::CheckGradientNet( const Net& net, const vector*>& input) { diff --git a/src/caffe/test/test_neuron_layer.cpp b/src/caffe/test/test_neuron_layer.cpp index cd733751a70..9c852a1c697 100644 --- a/src/caffe/test/test_neuron_layer.cpp +++ b/src/caffe/test/test_neuron_layer.cpp @@ -61,7 +61,7 @@ TYPED_TEST(NeuronLayerTest, TestReLUGradientCPU) { Caffe::set_mode(Caffe::CPU); ReLULayer layer(layer_param); GradientChecker checker(1e-2, 1e-3, 1701, 0., 0.01); - checker.CheckGradientExhaustive(&layer, &(this->blob_bottom_vec_), + checker.CheckGradientEltwise(&layer, &(this->blob_bottom_vec_), &(this->blob_top_vec_)); } @@ -87,7 +87,7 @@ TYPED_TEST(NeuronLayerTest, TestReLUGradientGPU) { Caffe::set_mode(Caffe::GPU); ReLULayer layer(layer_param); GradientChecker checker(1e-2, 1e-3, 1701, 0., 0.01); - checker.CheckGradientExhaustive(&layer, &(this->blob_bottom_vec_), + checker.CheckGradientEltwise(&layer, &(this->blob_bottom_vec_), &(this->blob_top_vec_)); } @@ -115,7 +115,7 @@ TYPED_TEST(NeuronLayerTest, TestSigmoidGradientCPU) { Caffe::set_mode(Caffe::CPU); SigmoidLayer layer(layer_param); GradientChecker checker(1e-2, 1e-3, 1701, 0., 0.01); - checker.CheckGradientExhaustive(&layer, &(this->blob_bottom_vec_), + checker.CheckGradientEltwise(&layer, &(this->blob_bottom_vec_), &(this->blob_top_vec_)); } @@ -142,7 +142,7 @@ TYPED_TEST(NeuronLayerTest, TestSigmoidGradientGPU) { Caffe::set_mode(Caffe::GPU); SigmoidLayer layer(layer_param); GradientChecker checker(1e-2, 1e-3, 1701, 0., 0.01); - checker.CheckGradientExhaustive(&layer, &(this->blob_bottom_vec_), + checker.CheckGradientEltwise(&layer, &(this->blob_bottom_vec_), &(this->blob_top_vec_)); } @@ -172,7 +172,7 @@ TYPED_TEST(NeuronLayerTest, TestDropoutGradientCPU) { Caffe::set_mode(Caffe::CPU); DropoutLayer layer(layer_param); GradientChecker checker(1e-2, 1e-3); - checker.CheckGradientExhaustive(&layer, &(this->blob_bottom_vec_), + checker.CheckGradientEltwise(&layer, &(this->blob_bottom_vec_), &(this->blob_top_vec_)); } @@ -271,7 +271,7 @@ TYPED_TEST(NeuronLayerTest, TestBNLLGradientCPU) { Caffe::set_mode(Caffe::CPU); BNLLLayer layer(layer_param); GradientChecker checker(1e-2, 1e-3); - checker.CheckGradientExhaustive(&layer, &(this->blob_bottom_vec_), + checker.CheckGradientEltwise(&layer, &(this->blob_bottom_vec_), &(this->blob_top_vec_)); } @@ -297,7 +297,7 @@ TYPED_TEST(NeuronLayerTest, TestBNLLGradientGPU) { Caffe::set_mode(Caffe::GPU); BNLLLayer layer(layer_param); GradientChecker checker(1e-2, 1e-3); - checker.CheckGradientExhaustive(&layer, &(this->blob_bottom_vec_), + checker.CheckGradientEltwise(&layer, &(this->blob_bottom_vec_), &(this->blob_top_vec_)); } diff --git a/src/caffe/test/test_power_layer.cpp b/src/caffe/test/test_power_layer.cpp index 2101a4194d7..4fab8af3c25 100644 --- a/src/caffe/test/test_power_layer.cpp +++ b/src/caffe/test/test_power_layer.cpp @@ -79,7 +79,7 @@ class PowerLayerTest : public ::testing::Test { } } GradientChecker checker(1e-2, 1e-2, 1701, 0., 0.01); - checker.CheckGradientExhaustive(&layer, &(this->blob_bottom_vec_), + checker.CheckGradientEltwise(&layer, &(this->blob_bottom_vec_), &(this->blob_top_vec_)); } diff --git a/src/caffe/test/test_split_layer.cpp b/src/caffe/test/test_split_layer.cpp index 06f0f3fc101..327bcf937ac 100644 --- a/src/caffe/test/test_split_layer.cpp +++ b/src/caffe/test/test_split_layer.cpp @@ -121,7 +121,7 @@ TYPED_TEST(SplitLayerTest, TestCPUGradient) { Caffe::set_mode(Caffe::CPU); SplitLayer layer(layer_param); GradientChecker checker(1e-2, 1e-2); - checker.CheckGradientExhaustive(&layer, &(this->blob_bottom_vec_), + checker.CheckGradientEltwise(&layer, &(this->blob_bottom_vec_), &(this->blob_top_vec_)); } @@ -130,7 +130,7 @@ TYPED_TEST(SplitLayerTest, TestGPUGradient) { Caffe::set_mode(Caffe::GPU); SplitLayer layer(layer_param); GradientChecker checker(1e-2, 1e-2); - checker.CheckGradientExhaustive(&layer, &(this->blob_bottom_vec_), + checker.CheckGradientEltwise(&layer, &(this->blob_bottom_vec_), &(this->blob_top_vec_)); } @@ -140,7 +140,7 @@ TYPED_TEST(SplitLayerTest, TestCPUGradientInPlace) { SplitLayer layer(layer_param); GradientChecker checker(1e-2, 1e-2); this->blob_top_vec_[0] = this->blob_bottom_vec_[0]; - checker.CheckGradientExhaustive(&layer, &(this->blob_bottom_vec_), + checker.CheckGradientEltwise(&layer, &(this->blob_bottom_vec_), &(this->blob_top_vec_)); } @@ -150,7 +150,7 @@ TYPED_TEST(SplitLayerTest, TestGPUGradientInPlace) { SplitLayer layer(layer_param); GradientChecker checker(1e-2, 1e-2); this->blob_top_vec_[0] = this->blob_bottom_vec_[0]; - checker.CheckGradientExhaustive(&layer, &(this->blob_bottom_vec_), + checker.CheckGradientEltwise(&layer, &(this->blob_bottom_vec_), &(this->blob_top_vec_)); } diff --git a/src/caffe/test/test_tanh_layer.cpp b/src/caffe/test/test_tanh_layer.cpp index 82cb96c174a..9c9f8a74ae2 100644 --- a/src/caffe/test/test_tanh_layer.cpp +++ b/src/caffe/test/test_tanh_layer.cpp @@ -70,7 +70,7 @@ TYPED_TEST(TanHLayerTest, TestGradientCPU) { Caffe::set_mode(Caffe::CPU); TanHLayer layer(layer_param); GradientChecker checker(1e-2, 1e-3); - checker.CheckGradientExhaustive(&layer, &(this->blob_bottom_vec_), + checker.CheckGradientEltwise(&layer, &(this->blob_bottom_vec_), &(this->blob_top_vec_)); } @@ -102,7 +102,7 @@ TYPED_TEST(TanHLayerTest, TestGradientGPU) { Caffe::set_mode(Caffe::GPU); TanHLayer layer(layer_param); GradientChecker checker(1e-2, 1e-3); - checker.CheckGradientExhaustive(&layer, &(this->blob_bottom_vec_), + checker.CheckGradientEltwise(&layer, &(this->blob_bottom_vec_), &(this->blob_top_vec_)); }