Skip to content

Commit

Permalink
Merge pull request #363 from jeffdonahue/speedup-gradient-check
Browse files Browse the repository at this point in the history
Gradient check speedups
  • Loading branch information
shelhamer committed Apr 25, 2014
2 parents a32e930 + 2c5f9dd commit c6f1011
Show file tree
Hide file tree
Showing 9 changed files with 103 additions and 47 deletions.
1 change: 1 addition & 0 deletions include/caffe/blob.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ class Blob {
const int width);
void Reshape(const int num, const int channels, const int height,
const int width);
void ReshapeLike(const Blob& other);
inline int num() const { return num_; }
inline int channels() const { return channels_; }
inline int height() const { return height_; }
Expand Down
5 changes: 5 additions & 0 deletions src/caffe/blob.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,11 @@ void Blob<Dtype>::Reshape(const int num, const int channels, const int height,
}
}

template <typename Dtype>
void Blob<Dtype>::ReshapeLike(const Blob<Dtype>& other) {
Reshape(other.num(), other.channels(), other.height(), other.width());
}

template <typename Dtype>
Blob<Dtype>::Blob(const int num, const int channels, const int height,
const int width) {
Expand Down
4 changes: 2 additions & 2 deletions src/caffe/test/test_eltwise_product_layer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -102,7 +102,7 @@ TYPED_TEST(EltwiseProductLayerTest, TestCPUGradient) {
LayerParameter layer_param;
EltwiseProductLayer<TypeParam> layer(layer_param);
GradientChecker<TypeParam> checker(1e-2, 1e-3);
checker.CheckGradientExhaustive(&layer, &(this->blob_bottom_vec_),
checker.CheckGradientEltwise(&layer, &(this->blob_bottom_vec_),
&(this->blob_top_vec_));
}

Expand All @@ -111,7 +111,7 @@ TYPED_TEST(EltwiseProductLayerTest, TestGPUGradient) {
LayerParameter layer_param;
EltwiseProductLayer<TypeParam> layer(layer_param);
GradientChecker<TypeParam> checker(1e-2, 1e-2);
checker.CheckGradientExhaustive(&layer, &(this->blob_bottom_vec_),
checker.CheckGradientEltwise(&layer, &(this->blob_bottom_vec_),
&(this->blob_top_vec_));
}

Expand Down
4 changes: 2 additions & 2 deletions src/caffe/test/test_flatten_layer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,7 @@ TYPED_TEST(FlattenLayerTest, TestCPUGradient) {
Caffe::set_mode(Caffe::CPU);
FlattenLayer<TypeParam> layer(layer_param);
GradientChecker<TypeParam> checker(1e-2, 1e-2);
checker.CheckGradientExhaustive(&layer, &(this->blob_bottom_vec_),
checker.CheckGradientEltwise(&layer, &(this->blob_bottom_vec_),
&(this->blob_top_vec_));
}

Expand All @@ -93,7 +93,7 @@ TYPED_TEST(FlattenLayerTest, TestGPUGradient) {
Caffe::set_mode(Caffe::GPU);
FlattenLayer<TypeParam> layer(layer_param);
GradientChecker<TypeParam> checker(1e-2, 1e-2);
checker.CheckGradientExhaustive(&layer, &(this->blob_bottom_vec_),
checker.CheckGradientEltwise(&layer, &(this->blob_bottom_vec_),
&(this->blob_top_vec_));
}

Expand Down
108 changes: 79 additions & 29 deletions src/caffe/test/test_gradient_check_util.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -40,9 +40,15 @@ class GradientChecker {
vector<Blob<Dtype>*>* bottom, vector<Blob<Dtype>*>* top,
int check_bottom = -1);

// CheckGradientEltwise can be used to test layers that perform element-wise
// computation only (e.g., neuron layers) -- where (d y_i) / (d x_j) = 0 when
// i != j.
void CheckGradientEltwise(Layer<Dtype>* layer,
vector<Blob<Dtype>*>* bottom, vector<Blob<Dtype>*>* top);

void CheckGradientSingle(Layer<Dtype>* layer, vector<Blob<Dtype>*>* bottom,
vector<Blob<Dtype>*>* top, int check_bottom, int top_id,
int top_data_id);
int top_data_id, bool element_wise = false);

// Checks the gradient of a network. This network should not have any data
// layers or loss layers, since the function does not explicitly deal with
Expand All @@ -62,13 +68,19 @@ class GradientChecker {
};


// Detailed implementations are as follows.


template <typename Dtype>
void GradientChecker<Dtype>::CheckGradientSingle(Layer<Dtype>* layer,
vector<Blob<Dtype>*>* bottom, vector<Blob<Dtype>*>* top,
int check_bottom, int top_id, int top_data_id) {
int check_bottom, int top_id, int top_data_id, bool element_wise) {
if (element_wise) {
CHECK_EQ(0, layer->blobs().size());
CHECK_LE(0, top_id);
CHECK_LE(0, top_data_id);
const int top_count = (*top)[top_id]->count();
for (int blob_id = 0; blob_id < bottom->size(); ++blob_id) {
CHECK_EQ(top_count, (*bottom)[blob_id]->count());
}
}
// First, figure out what blobs we need to check against.
vector<Blob<Dtype>*> blobs_to_check;
for (int i = 0; i < layer->blobs().size(); ++i) {
Expand All @@ -82,36 +94,60 @@ void GradientChecker<Dtype>::CheckGradientSingle(Layer<Dtype>* layer,
CHECK(check_bottom < bottom->size());
blobs_to_check.push_back((*bottom)[check_bottom]);
}
// go through the bottom and parameter blobs
// Compute the gradient analytically using Backward
Caffe::set_random_seed(seed_);
// Get any loss from the layer
Dtype computed_objective = layer->Forward(*bottom, top);
// Get additional loss from the objective
computed_objective += GetObjAndGradient(top, top_id, top_data_id);
layer->Backward(*top, true, bottom);
// Store computed gradients for all checked blobs
vector<shared_ptr<Blob<Dtype> > >
computed_gradient_blobs(blobs_to_check.size());
for (int blob_id = 0; blob_id < blobs_to_check.size(); ++blob_id) {
Blob<Dtype>* current_blob = blobs_to_check[blob_id];
computed_gradient_blobs[blob_id].reset(new Blob<Dtype>());
computed_gradient_blobs[blob_id]->ReshapeLike(*current_blob);
const int count = blobs_to_check[blob_id]->count();
const Dtype* diff = blobs_to_check[blob_id]->cpu_diff();
Dtype* computed_gradients =
computed_gradient_blobs[blob_id]->mutable_cpu_data();
caffe_copy(count, diff, computed_gradients);
}
// Compute derivative of top w.r.t. each bottom and parameter input using
// finite differencing.
// LOG(ERROR) << "Checking " << blobs_to_check.size() << " blobs.";
for (int blob_id = 0; blob_id < blobs_to_check.size(); ++blob_id) {
Blob<Dtype>* current_blob = blobs_to_check[blob_id];
const Dtype* computed_gradients =
computed_gradient_blobs[blob_id]->cpu_data();
// LOG(ERROR) << "Blob " << blob_id << ": checking "
// << current_blob->count() << " parameters.";
// go through the values
for (int feat_id = 0; feat_id < current_blob->count(); ++feat_id) {
// First, obtain the original data
Caffe::set_random_seed(seed_);
// Get any loss from the layer
Dtype computed_objective = layer->Forward(*bottom, top);
// Get additional loss from the objective
computed_objective += GetObjAndGradient(top, top_id, top_data_id);
layer->Backward(*top, true, bottom);
Dtype computed_gradient = current_blob->cpu_diff()[feat_id];
// compute score by adding stepsize
current_blob->mutable_cpu_data()[feat_id] += stepsize_;
Caffe::set_random_seed(seed_);
Dtype positive_objective = layer->Forward(*bottom, top);
positive_objective += GetObjAndGradient(top, top_id, top_data_id);
// compute score by subtracting stepsize
current_blob->mutable_cpu_data()[feat_id] -= stepsize_ * 2;
Caffe::set_random_seed(seed_);
Dtype negative_objective = layer->Forward(*bottom, top);
negative_objective += GetObjAndGradient(top, top_id, top_data_id);
// Recover stepsize
current_blob->mutable_cpu_data()[feat_id] += stepsize_;
Dtype estimated_gradient = (positive_objective - negative_objective) /
stepsize_ / 2.;
// For an element-wise layer, we only need to do finite differencing to
// compute the derivative of (*top)[top_id][top_data_id] w.r.t.
// (*bottom)[blob_id][i] only for i == top_data_id. For any other
// i != top_data_id, we know the derivative is 0 by definition, and simply
// check that that's true.
Dtype estimated_gradient = 0;
if (!element_wise || (feat_id == top_data_id)) {
// Do finite differencing.
// Compute loss with stepsize_ added to input.
current_blob->mutable_cpu_data()[feat_id] += stepsize_;
Caffe::set_random_seed(seed_);
Dtype positive_objective = layer->Forward(*bottom, top);
positive_objective += GetObjAndGradient(top, top_id, top_data_id);
// Compute loss with stepsize_ subtracted from input.
current_blob->mutable_cpu_data()[feat_id] -= stepsize_ * 2;
Caffe::set_random_seed(seed_);
Dtype negative_objective = layer->Forward(*bottom, top);
negative_objective += GetObjAndGradient(top, top_id, top_data_id);
// Recover original input value.
current_blob->mutable_cpu_data()[feat_id] += stepsize_;
estimated_gradient = (positive_objective - negative_objective) /
stepsize_ / 2.;
}
Dtype computed_gradient = computed_gradients[feat_id];
Dtype feature = current_blob->cpu_data()[feat_id];
// LOG(ERROR) << "debug: " << current_blob->cpu_data()[feat_id] << " "
// << current_blob->cpu_diff()[feat_id];
Expand Down Expand Up @@ -146,6 +182,20 @@ void GradientChecker<Dtype>::CheckGradientExhaustive(Layer<Dtype>* layer,
}
}

template <typename Dtype>
void GradientChecker<Dtype>::CheckGradientEltwise(Layer<Dtype>* layer,
vector<Blob<Dtype>*>* bottom, vector<Blob<Dtype>*>* top) {
layer->SetUp(*bottom, top);
CHECK_GT(top->size(), 0) << "Eltwise mode requires at least one top blob.";
const int check_bottom = -1;
const bool element_wise = true;
for (int i = 0; i < top->size(); ++i) {
for (int j = 0; j < (*top)[i]->count(); ++j) {
CheckGradientSingle(layer, bottom, top, check_bottom, i, j, element_wise);
}
}
}

template <typename Dtype>
void GradientChecker<Dtype>::CheckGradientNet(
const Net<Dtype>& net, const vector<Blob<Dtype>*>& input) {
Expand Down
14 changes: 7 additions & 7 deletions src/caffe/test/test_neuron_layer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,7 @@ TYPED_TEST(NeuronLayerTest, TestReLUGradientCPU) {
Caffe::set_mode(Caffe::CPU);
ReLULayer<TypeParam> layer(layer_param);
GradientChecker<TypeParam> checker(1e-2, 1e-3, 1701, 0., 0.01);
checker.CheckGradientExhaustive(&layer, &(this->blob_bottom_vec_),
checker.CheckGradientEltwise(&layer, &(this->blob_bottom_vec_),
&(this->blob_top_vec_));
}

Expand All @@ -87,7 +87,7 @@ TYPED_TEST(NeuronLayerTest, TestReLUGradientGPU) {
Caffe::set_mode(Caffe::GPU);
ReLULayer<TypeParam> layer(layer_param);
GradientChecker<TypeParam> checker(1e-2, 1e-3, 1701, 0., 0.01);
checker.CheckGradientExhaustive(&layer, &(this->blob_bottom_vec_),
checker.CheckGradientEltwise(&layer, &(this->blob_bottom_vec_),
&(this->blob_top_vec_));
}

Expand Down Expand Up @@ -115,7 +115,7 @@ TYPED_TEST(NeuronLayerTest, TestSigmoidGradientCPU) {
Caffe::set_mode(Caffe::CPU);
SigmoidLayer<TypeParam> layer(layer_param);
GradientChecker<TypeParam> checker(1e-2, 1e-3, 1701, 0., 0.01);
checker.CheckGradientExhaustive(&layer, &(this->blob_bottom_vec_),
checker.CheckGradientEltwise(&layer, &(this->blob_bottom_vec_),
&(this->blob_top_vec_));
}

Expand All @@ -142,7 +142,7 @@ TYPED_TEST(NeuronLayerTest, TestSigmoidGradientGPU) {
Caffe::set_mode(Caffe::GPU);
SigmoidLayer<TypeParam> layer(layer_param);
GradientChecker<TypeParam> checker(1e-2, 1e-3, 1701, 0., 0.01);
checker.CheckGradientExhaustive(&layer, &(this->blob_bottom_vec_),
checker.CheckGradientEltwise(&layer, &(this->blob_bottom_vec_),
&(this->blob_top_vec_));
}

Expand Down Expand Up @@ -172,7 +172,7 @@ TYPED_TEST(NeuronLayerTest, TestDropoutGradientCPU) {
Caffe::set_mode(Caffe::CPU);
DropoutLayer<TypeParam> layer(layer_param);
GradientChecker<TypeParam> checker(1e-2, 1e-3);
checker.CheckGradientExhaustive(&layer, &(this->blob_bottom_vec_),
checker.CheckGradientEltwise(&layer, &(this->blob_bottom_vec_),
&(this->blob_top_vec_));
}

Expand Down Expand Up @@ -271,7 +271,7 @@ TYPED_TEST(NeuronLayerTest, TestBNLLGradientCPU) {
Caffe::set_mode(Caffe::CPU);
BNLLLayer<TypeParam> layer(layer_param);
GradientChecker<TypeParam> checker(1e-2, 1e-3);
checker.CheckGradientExhaustive(&layer, &(this->blob_bottom_vec_),
checker.CheckGradientEltwise(&layer, &(this->blob_bottom_vec_),
&(this->blob_top_vec_));
}

Expand All @@ -297,7 +297,7 @@ TYPED_TEST(NeuronLayerTest, TestBNLLGradientGPU) {
Caffe::set_mode(Caffe::GPU);
BNLLLayer<TypeParam> layer(layer_param);
GradientChecker<TypeParam> checker(1e-2, 1e-3);
checker.CheckGradientExhaustive(&layer, &(this->blob_bottom_vec_),
checker.CheckGradientEltwise(&layer, &(this->blob_bottom_vec_),
&(this->blob_top_vec_));
}

Expand Down
2 changes: 1 addition & 1 deletion src/caffe/test/test_power_layer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,7 @@ class PowerLayerTest : public ::testing::Test {
}
}
GradientChecker<Dtype> checker(1e-2, 1e-2, 1701, 0., 0.01);
checker.CheckGradientExhaustive(&layer, &(this->blob_bottom_vec_),
checker.CheckGradientEltwise(&layer, &(this->blob_bottom_vec_),
&(this->blob_top_vec_));
}

Expand Down
8 changes: 4 additions & 4 deletions src/caffe/test/test_split_layer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -121,7 +121,7 @@ TYPED_TEST(SplitLayerTest, TestCPUGradient) {
Caffe::set_mode(Caffe::CPU);
SplitLayer<TypeParam> layer(layer_param);
GradientChecker<TypeParam> checker(1e-2, 1e-2);
checker.CheckGradientExhaustive(&layer, &(this->blob_bottom_vec_),
checker.CheckGradientEltwise(&layer, &(this->blob_bottom_vec_),
&(this->blob_top_vec_));
}

Expand All @@ -130,7 +130,7 @@ TYPED_TEST(SplitLayerTest, TestGPUGradient) {
Caffe::set_mode(Caffe::GPU);
SplitLayer<TypeParam> layer(layer_param);
GradientChecker<TypeParam> checker(1e-2, 1e-2);
checker.CheckGradientExhaustive(&layer, &(this->blob_bottom_vec_),
checker.CheckGradientEltwise(&layer, &(this->blob_bottom_vec_),
&(this->blob_top_vec_));
}

Expand All @@ -140,7 +140,7 @@ TYPED_TEST(SplitLayerTest, TestCPUGradientInPlace) {
SplitLayer<TypeParam> layer(layer_param);
GradientChecker<TypeParam> checker(1e-2, 1e-2);
this->blob_top_vec_[0] = this->blob_bottom_vec_[0];
checker.CheckGradientExhaustive(&layer, &(this->blob_bottom_vec_),
checker.CheckGradientEltwise(&layer, &(this->blob_bottom_vec_),
&(this->blob_top_vec_));
}

Expand All @@ -150,7 +150,7 @@ TYPED_TEST(SplitLayerTest, TestGPUGradientInPlace) {
SplitLayer<TypeParam> layer(layer_param);
GradientChecker<TypeParam> checker(1e-2, 1e-2);
this->blob_top_vec_[0] = this->blob_bottom_vec_[0];
checker.CheckGradientExhaustive(&layer, &(this->blob_bottom_vec_),
checker.CheckGradientEltwise(&layer, &(this->blob_bottom_vec_),
&(this->blob_top_vec_));
}

Expand Down
4 changes: 2 additions & 2 deletions src/caffe/test/test_tanh_layer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,7 @@ TYPED_TEST(TanHLayerTest, TestGradientCPU) {
Caffe::set_mode(Caffe::CPU);
TanHLayer<TypeParam> layer(layer_param);
GradientChecker<TypeParam> checker(1e-2, 1e-3);
checker.CheckGradientExhaustive(&layer, &(this->blob_bottom_vec_),
checker.CheckGradientEltwise(&layer, &(this->blob_bottom_vec_),
&(this->blob_top_vec_));
}

Expand Down Expand Up @@ -102,7 +102,7 @@ TYPED_TEST(TanHLayerTest, TestGradientGPU) {
Caffe::set_mode(Caffe::GPU);
TanHLayer<TypeParam> layer(layer_param);
GradientChecker<TypeParam> checker(1e-2, 1e-3);
checker.CheckGradientExhaustive(&layer, &(this->blob_bottom_vec_),
checker.CheckGradientEltwise(&layer, &(this->blob_bottom_vec_),
&(this->blob_top_vec_));
}

Expand Down

0 comments on commit c6f1011

Please sign in to comment.