From 03c8a619e7fbfa98b8c477b42c1faf128aec750b Mon Sep 17 00:00:00 2001 From: Kai Li Date: Tue, 25 Feb 2014 18:23:04 +0800 Subject: [PATCH 1/8] Add __builtin_popcount* based fast Hamming distance math function --- include/caffe/util/math_functions.hpp | 4 ++ src/caffe/test/test_math_functions.cpp | 77 ++++++++++++++++++++++++++ src/caffe/util/math_functions.cpp | 23 ++++++++ 3 files changed, 104 insertions(+) create mode 100644 src/caffe/test/test_math_functions.cpp diff --git a/include/caffe/util/math_functions.hpp b/include/caffe/util/math_functions.hpp index e9e2db8f274..26abb2d02c2 100644 --- a/include/caffe/util/math_functions.hpp +++ b/include/caffe/util/math_functions.hpp @@ -1,4 +1,5 @@ // Copyright 2013 Yangqing Jia +// Copyright 2014 kloudkl@github #ifndef CAFFE_UTIL_MATH_FUNCTIONS_H_ #define CAFFE_UTIL_MATH_FUNCTIONS_H_ @@ -100,6 +101,9 @@ Dtype caffe_cpu_dot(const int n, const Dtype* x, const Dtype* y); template void caffe_gpu_dot(const int n, const Dtype* x, const Dtype* y, Dtype* out); +template +int caffe_hamming_distance(const int n, const Dtype* x, const Dtype* y); + } // namespace caffe diff --git a/src/caffe/test/test_math_functions.cpp b/src/caffe/test/test_math_functions.cpp new file mode 100644 index 00000000000..0e313eefb4f --- /dev/null +++ b/src/caffe/test/test_math_functions.cpp @@ -0,0 +1,77 @@ +// Copyright 2014 kloudkl@github + +#include // for uint32_t & uint64_t + +#include "gtest/gtest.h" +#include "caffe/blob.hpp" +#include "caffe/common.hpp" +#include "caffe/filler.hpp" +#include "caffe/util/math_functions.hpp" + +#include "caffe/test/test_caffe_main.hpp" + +namespace caffe { + +template +class MathFunctionsTest : public ::testing::Test { + protected: + MathFunctionsTest() + : blob_bottom_(new Blob()), + blob_top_(new Blob()) { + } + + virtual void SetUp() { + Caffe::set_random_seed(1701); + this->blob_bottom_->Reshape(100, 70, 50, 30); + this->blob_top_->Reshape(100, 70, 50, 30); + // fill the values + FillerParameter filler_param; + GaussianFiller filler(filler_param); + filler.Fill(this->blob_bottom_); + filler.Fill(this->blob_top_); + } + + virtual ~MathFunctionsTest() { + delete blob_bottom_; + delete blob_top_; + } + // http://en.wikipedia.org/wiki/Hamming_distance + int ReferenceHammingDistance(const int n, const Dtype* x, const Dtype* y); + + Blob* const blob_bottom_; + Blob* const blob_top_; +}; + +#define REF_HAMMING_DIST(float_type, int_type) \ +template<> \ +int MathFunctionsTest::ReferenceHammingDistance(const int n, \ + const float_type* x, \ + const float_type* y) { \ + int dist = 0; \ + int_type val; \ + for (int i = 0; i < n; ++i) { \ + val = static_cast(x[i]) ^ static_cast(y[i]); \ + /* Count the number of set bits */ \ + while (val) { \ + ++dist; \ + val &= val - 1; \ + } \ + } \ + return dist; \ +} + +REF_HAMMING_DIST(float, uint32_t); +REF_HAMMING_DIST(double, uint64_t); + +typedef ::testing::Types Dtypes; +TYPED_TEST_CASE(MathFunctionsTest, Dtypes); + +TYPED_TEST(MathFunctionsTest, TestHammingDistance){ + int n = this->blob_bottom_->count(); + const TypeParam* x = this->blob_bottom_->cpu_data(); + const TypeParam* y = this->blob_top_->cpu_data(); + CHECK_EQ(this->ReferenceHammingDistance(n, x, y), + caffe_hamming_distance(n, x, y)); +} + +} diff --git a/src/caffe/util/math_functions.cpp b/src/caffe/util/math_functions.cpp index 60656b87093..790f00eaf0e 100644 --- a/src/caffe/util/math_functions.cpp +++ b/src/caffe/util/math_functions.cpp @@ -1,4 +1,5 @@ // Copyright 2013 Yangqing Jia +// Copyright 2014 kloudkl@github #include #include @@ -293,4 +294,26 @@ void caffe_gpu_dot(const int n, const double* x, const double* y, CUBLAS_CHECK(cublasDdot(Caffe::cublas_handle(), n, x, 1, y, 1, out)); } +template <> +int caffe_hamming_distance(const int n, const float* x, + const float* y) { + int dist = 0; + for (int i = 0; i < n; ++i) { + dist += __builtin_popcount(static_cast(x[i]) ^ + static_cast(y[i])); + } + return dist; +} + +template <> +int caffe_hamming_distance(const int n, const double* x, + const double* y) { + int dist = 0; + for (int i = 0; i < n; ++i) { + dist += __builtin_popcountl(static_cast(x[i]) ^ + static_cast(y[i])); + } + return dist; +} + } // namespace caffe From 0c30c067c1162f5b8780a8b1c6fd4d974b3d46e8 Mon Sep 17 00:00:00 2001 From: Kai Li Date: Tue, 25 Feb 2014 18:41:45 +0800 Subject: [PATCH 2/8] Add and test sum of absolute values math functions for CPU and GPU --- include/caffe/util/math_functions.hpp | 7 +++++++ src/caffe/test/test_math_functions.cpp | 24 ++++++++++++++++++++++++ src/caffe/util/math_functions.cpp | 20 ++++++++++++++++++++ 3 files changed, 51 insertions(+) diff --git a/include/caffe/util/math_functions.hpp b/include/caffe/util/math_functions.hpp index 26abb2d02c2..8271c768a66 100644 --- a/include/caffe/util/math_functions.hpp +++ b/include/caffe/util/math_functions.hpp @@ -104,6 +104,13 @@ void caffe_gpu_dot(const int n, const Dtype* x, const Dtype* y, Dtype* out); template int caffe_hamming_distance(const int n, const Dtype* x, const Dtype* y); +// Returns the sum of the absolute values of the elements of vector x +template +Dtype caffe_cpu_asum(const int n, const Dtype* x); + +template +void caffe_gpu_asum(const int n, const Dtype* x, Dtype* y); + } // namespace caffe diff --git a/src/caffe/test/test_math_functions.cpp b/src/caffe/test/test_math_functions.cpp index 0e313eefb4f..c7ba4803a5f 100644 --- a/src/caffe/test/test_math_functions.cpp +++ b/src/caffe/test/test_math_functions.cpp @@ -1,6 +1,7 @@ // Copyright 2014 kloudkl@github #include // for uint32_t & uint64_t +#include // for std::fabs #include "gtest/gtest.h" #include "caffe/blob.hpp" @@ -74,4 +75,27 @@ TYPED_TEST(MathFunctionsTest, TestHammingDistance){ caffe_hamming_distance(n, x, y)); } +TYPED_TEST(MathFunctionsTest, TestAsumCPU){ + int n = this->blob_bottom_->count(); + const TypeParam* x = this->blob_bottom_->cpu_data(); + TypeParam std_asum = 0; + for (int i = 0; i < n; ++i) { + std_asum += std::fabs(x[i]); + } + TypeParam cpu_asum = caffe_cpu_asum(n, x); + CHECK_LT((cpu_asum - std_asum) / std_asum, 1e-2); +} + +TYPED_TEST(MathFunctionsTest, TestAsumGPU){ + int n = this->blob_bottom_->count(); + const TypeParam* x = this->blob_bottom_->cpu_data(); + TypeParam std_asum = 0; + for (int i = 0; i < n; ++i) { + std_asum += std::fabs(x[i]); + } + TypeParam gpu_asum; + caffe_gpu_asum(n, this->blob_bottom_->gpu_data(), &gpu_asum); + CHECK_LT((gpu_asum - std_asum) / std_asum, 1e-2); +} + } diff --git a/src/caffe/util/math_functions.cpp b/src/caffe/util/math_functions.cpp index 790f00eaf0e..5ff4e806481 100644 --- a/src/caffe/util/math_functions.cpp +++ b/src/caffe/util/math_functions.cpp @@ -316,4 +316,24 @@ int caffe_hamming_distance(const int n, const double* x, return dist; } +template <> +float caffe_cpu_asum(const int n, const float* x) { + return cblas_sasum(n, x, 1); +} + +template <> +double caffe_cpu_asum(const int n, const double* x) { + return cblas_dasum(n, x, 1); +} + +template <> +void caffe_gpu_asum(const int n, const float* x, float* y) { + CUBLAS_CHECK(cublasSasum(Caffe::cublas_handle(), n, x, 1, y)); +} + +template <> +void caffe_gpu_asum(const int n, const double* x, double* y) { + CUBLAS_CHECK(cublasDasum(Caffe::cublas_handle(), n, x, 1, y)); +} + } // namespace caffe From 109f10e5d484c89dae804276522d348a9804bd33 Mon Sep 17 00:00:00 2001 From: Kai Li Date: Tue, 25 Feb 2014 19:16:44 +0800 Subject: [PATCH 3/8] Add and test element wise sign math funtions for CPU and GPU --- include/caffe/util/math_functions.hpp | 17 +++++++++++++++++ src/caffe/test/test_math_functions.cpp | 21 +++++++++++++++++++++ src/caffe/util/math_functions.cu | 20 ++++++++++++++++++++ 3 files changed, 58 insertions(+) diff --git a/include/caffe/util/math_functions.hpp b/include/caffe/util/math_functions.hpp index 8271c768a66..c828d69b9e3 100644 --- a/include/caffe/util/math_functions.hpp +++ b/include/caffe/util/math_functions.hpp @@ -111,6 +111,23 @@ Dtype caffe_cpu_asum(const int n, const Dtype* x); template void caffe_gpu_asum(const int n, const Dtype* x, Dtype* y); +// the branchless, type-safe version from +// http://stackoverflow.com/questions/1903954/is-there-a-standard-sign-function-signum-sgn-in-c-c +template +inline char caffe_sign(Dtype val) { + return (Dtype(0) < val) - (val < Dtype(0)); +} + +template +void caffe_cpu_sign(const int n, const Dtype* x, Dtype* y) { + for (int i = 0; i < n; ++i) { + y[i] = caffe_sign(x[i]); + } +} + +template +void caffe_gpu_sign(const int n, const Dtype* x, Dtype* y); + } // namespace caffe diff --git a/src/caffe/test/test_math_functions.cpp b/src/caffe/test/test_math_functions.cpp index c7ba4803a5f..09b4aa67719 100644 --- a/src/caffe/test/test_math_functions.cpp +++ b/src/caffe/test/test_math_functions.cpp @@ -98,4 +98,25 @@ TYPED_TEST(MathFunctionsTest, TestAsumGPU){ CHECK_LT((gpu_asum - std_asum) / std_asum, 1e-2); } +TYPED_TEST(MathFunctionsTest, TestSignCPU){ + int n = this->blob_bottom_->count(); + const TypeParam* x = this->blob_bottom_->cpu_data(); + caffe_cpu_sign(n, x, this->blob_bottom_->mutable_cpu_diff()); + const TypeParam* signs = this->blob_bottom_->cpu_diff(); + for (int i = 0; i < n; ++i) { + CHECK_EQ(signs[i], x[i] > 0 ? 1 : (x[i] < 0 ? -1 : 0)); + } +} + +TYPED_TEST(MathFunctionsTest, TestSignGPU){ + int n = this->blob_bottom_->count(); + caffe_gpu_sign(n, this->blob_bottom_->gpu_data(), + this->blob_bottom_->mutable_gpu_diff()); + const TypeParam* signs = this->blob_bottom_->cpu_diff(); + const TypeParam* x = this->blob_bottom_->cpu_data(); + for (int i = 0; i < n; ++i) { + CHECK_EQ(signs[i], x[i] > 0 ? 1 : (x[i] < 0 ? -1 : 0)); + } +} + } diff --git a/src/caffe/util/math_functions.cu b/src/caffe/util/math_functions.cu index b29a58abe7f..e6d50baa2e7 100644 --- a/src/caffe/util/math_functions.cu +++ b/src/caffe/util/math_functions.cu @@ -1,4 +1,5 @@ // Copyright 2013 Yangqing Jia +// Copyright 2014 kloudkl@github #include #include @@ -34,5 +35,24 @@ void caffe_gpu_mul(const int N, const double* a, N, a, b, y); } +template +__global__ void sign_kernel(const int n, const Dtype* x, Dtype* y) { + int index = threadIdx.x + blockIdx.x * blockDim.x; + if (index < n) { + y[index] = (Dtype(0) < x[index]) - (x[index] < Dtype(0)); + } +} + +template <> +void caffe_gpu_sign(const int n, const float* x, float* y) { + sign_kernel<<>>( + n, x, y); +} + +template <> +void caffe_gpu_sign(const int n, const double* x, double* y) { + sign_kernel<<>>( + n, x, y); +} } // namespace caffe From 573dc2cde2fb718e5ba8d2cefe31a62ff7577167 Mon Sep 17 00:00:00 2001 From: Kai Li Date: Tue, 25 Feb 2014 19:21:18 +0800 Subject: [PATCH 4/8] Instantiate caffe_cpu_sign for float and double --- src/caffe/util/math_functions.cpp | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/src/caffe/util/math_functions.cpp b/src/caffe/util/math_functions.cpp index 5ff4e806481..11b31d78f80 100644 --- a/src/caffe/util/math_functions.cpp +++ b/src/caffe/util/math_functions.cpp @@ -336,4 +336,10 @@ void caffe_gpu_asum(const int n, const double* x, double* y) { CUBLAS_CHECK(cublasDasum(Caffe::cublas_handle(), n, x, 1, y)); } +template <> +void caffe_cpu_sign(const int n, const float* x, float* y); + +template <> +void caffe_cpu_sign(const int n, const double* x, double* y); + } // namespace caffe From 2f09616d8e6b9bd745a5b12d16957994c18cbb17 Mon Sep 17 00:00:00 2001 From: Kai Li Date: Tue, 25 Feb 2014 19:33:10 +0800 Subject: [PATCH 5/8] Add and test element wise abs math functions for CPU and GPU --- include/caffe/util/math_functions.hpp | 11 +++++++++++ src/caffe/test/test_math_functions.cpp | 21 +++++++++++++++++++++ src/caffe/util/math_functions.cpp | 6 ++++++ src/caffe/util/math_functions.cu | 21 +++++++++++++++++++++ 4 files changed, 59 insertions(+) diff --git a/include/caffe/util/math_functions.hpp b/include/caffe/util/math_functions.hpp index c828d69b9e3..3f8beec593d 100644 --- a/include/caffe/util/math_functions.hpp +++ b/include/caffe/util/math_functions.hpp @@ -4,6 +4,7 @@ #ifndef CAFFE_UTIL_MATH_FUNCTIONS_H_ #define CAFFE_UTIL_MATH_FUNCTIONS_H_ +#include // for std::fabs #include #include @@ -128,6 +129,16 @@ void caffe_cpu_sign(const int n, const Dtype* x, Dtype* y) { template void caffe_gpu_sign(const int n, const Dtype* x, Dtype* y); +template +void caffe_cpu_fabs(const int n, const Dtype* x, Dtype* y) { + for (int i = 0; i < n; ++i) { + y[i] = std::fabs(x[i]); + } +} + +template +void caffe_gpu_fabs(const int n, const Dtype* x, Dtype* y); + } // namespace caffe diff --git a/src/caffe/test/test_math_functions.cpp b/src/caffe/test/test_math_functions.cpp index 09b4aa67719..0f46273d257 100644 --- a/src/caffe/test/test_math_functions.cpp +++ b/src/caffe/test/test_math_functions.cpp @@ -119,4 +119,25 @@ TYPED_TEST(MathFunctionsTest, TestSignGPU){ } } +TYPED_TEST(MathFunctionsTest, TestFabsCPU){ + int n = this->blob_bottom_->count(); + const TypeParam* x = this->blob_bottom_->cpu_data(); + caffe_cpu_fabs(n, x, this->blob_bottom_->mutable_cpu_diff()); + const TypeParam* abs_val = this->blob_bottom_->cpu_diff(); + for (int i = 0; i < n; ++i) { + CHECK_EQ(abs_val[i], x[i] > 0 ? x[i] : -x[i]); + } +} + +TYPED_TEST(MathFunctionsTest, TestFabsGPU){ + int n = this->blob_bottom_->count(); + caffe_gpu_fabs(n, this->blob_bottom_->gpu_data(), + this->blob_bottom_->mutable_gpu_diff()); + const TypeParam* abs_val = this->blob_bottom_->cpu_diff(); + const TypeParam* x = this->blob_bottom_->cpu_data(); + for (int i = 0; i < n; ++i) { + CHECK_EQ(abs_val[i], x[i] > 0 ? x[i] : -x[i]); + } +} + } diff --git a/src/caffe/util/math_functions.cpp b/src/caffe/util/math_functions.cpp index 11b31d78f80..ecee2f2e2ee 100644 --- a/src/caffe/util/math_functions.cpp +++ b/src/caffe/util/math_functions.cpp @@ -342,4 +342,10 @@ void caffe_cpu_sign(const int n, const float* x, float* y); template <> void caffe_cpu_sign(const int n, const double* x, double* y); +template <> +void caffe_cpu_fabs(const int n, const float* x, float* y); + +template <> +void caffe_cpu_fabs(const int n, const double* x, double* y); + } // namespace caffe diff --git a/src/caffe/util/math_functions.cu b/src/caffe/util/math_functions.cu index e6d50baa2e7..8ecd72c49d0 100644 --- a/src/caffe/util/math_functions.cu +++ b/src/caffe/util/math_functions.cu @@ -4,6 +4,7 @@ #include #include #include +#include // CUDA's, not caffe's, for fabs #include "caffe/common.hpp" #include "caffe/util/math_functions.hpp" @@ -55,4 +56,24 @@ void caffe_gpu_sign(const int n, const double* x, double* y) { n, x, y); } +template +__global__ void fabs_kernel(const int n, const Dtype* x, Dtype* y) { + int index = threadIdx.x + blockIdx.x * blockDim.x; + if (index < n) { + y[index] = fabs(x[index]); + } +} + +template <> +void caffe_gpu_fabs(const int n, const float* x, float* y) { + fabs_kernel<<>>( + n, x, y); +} + +template <> +void caffe_gpu_fabs(const int n, const double* x, double* y) { + fabs_kernel<<>>( + n, x, y); +} + } // namespace caffe From f21ff16f7c2708508b9cf678efc9599bb3ab1594 Mon Sep 17 00:00:00 2001 From: Kai Li Date: Tue, 25 Feb 2014 20:06:05 +0800 Subject: [PATCH 6/8] Use macro to simplify element wise cpu math functions --- include/caffe/util/math_functions.hpp | 33 ++++++++++++++++++--------- src/caffe/util/math_functions.cpp | 13 ++--------- 2 files changed, 24 insertions(+), 22 deletions(-) diff --git a/include/caffe/util/math_functions.hpp b/include/caffe/util/math_functions.hpp index 3f8beec593d..895b5752c2c 100644 --- a/include/caffe/util/math_functions.hpp +++ b/include/caffe/util/math_functions.hpp @@ -119,22 +119,33 @@ inline char caffe_sign(Dtype val) { return (Dtype(0) < val) - (val < Dtype(0)); } -template -void caffe_cpu_sign(const int n, const Dtype* x, Dtype* y) { - for (int i = 0; i < n; ++i) { - y[i] = caffe_sign(x[i]); +// The following two macros are modifications of DEFINE_VSL_UNARY_FUNC +// in include/caffe/util/mkl_alternate.hpp authored by @Rowland Depp. +// Please refer to commit 7e8ef25c7 of the boost-eigen branch. +// Git cherry picking that commit caused a conflict hard to resolve and +// copying that file in convenient for code reviewing. +// So they have to be pasted here temporarily. +#define DEFINE_CAFFE_CPU_UNARY_FUNC(name, operation) \ + template \ + void caffe_cpu_##name(const int n, const Dtype* x, Dtype* y) { \ + CHECK_GT(n, 0); CHECK(x); CHECK(y); \ + for (int i = 0; i < n; ++i) { \ + operation; \ + } \ } -} + +#define INSTANTIATE_CAFFE_CPU_UNARY_FUNC(name) \ + template <> \ + void caffe_cpu_##name(const int n, const float* x, float* y); \ + template <> \ + void caffe_cpu_##name(const int n, const double* x, double* y) + +DEFINE_CAFFE_CPU_UNARY_FUNC(sign, y[i] = caffe_sign(x[i])); template void caffe_gpu_sign(const int n, const Dtype* x, Dtype* y); -template -void caffe_cpu_fabs(const int n, const Dtype* x, Dtype* y) { - for (int i = 0; i < n; ++i) { - y[i] = std::fabs(x[i]); - } -} +DEFINE_CAFFE_CPU_UNARY_FUNC(fabs, y[i] = std::fabs(x[i])); template void caffe_gpu_fabs(const int n, const Dtype* x, Dtype* y); diff --git a/src/caffe/util/math_functions.cpp b/src/caffe/util/math_functions.cpp index ecee2f2e2ee..50e5662e207 100644 --- a/src/caffe/util/math_functions.cpp +++ b/src/caffe/util/math_functions.cpp @@ -336,16 +336,7 @@ void caffe_gpu_asum(const int n, const double* x, double* y) { CUBLAS_CHECK(cublasDasum(Caffe::cublas_handle(), n, x, 1, y)); } -template <> -void caffe_cpu_sign(const int n, const float* x, float* y); - -template <> -void caffe_cpu_sign(const int n, const double* x, double* y); - -template <> -void caffe_cpu_fabs(const int n, const float* x, float* y); - -template <> -void caffe_cpu_fabs(const int n, const double* x, double* y); +INSTANTIATE_CAFFE_CPU_UNARY_FUNC(sign); +INSTANTIATE_CAFFE_CPU_UNARY_FUNC(fabs); } // namespace caffe From a5333f7804762383671c85427337a19f3bbc52bf Mon Sep 17 00:00:00 2001 From: Kai Li Date: Tue, 25 Feb 2014 20:26:55 +0800 Subject: [PATCH 7/8] Add and test non-in-place scale math functions for CPU and GPU --- include/caffe/util/math_functions.hpp | 6 ++++++ src/caffe/test/test_math_functions.cpp | 26 ++++++++++++++++++++++++ src/caffe/util/math_functions.cpp | 28 ++++++++++++++++++++++++++ 3 files changed, 60 insertions(+) diff --git a/include/caffe/util/math_functions.hpp b/include/caffe/util/math_functions.hpp index 895b5752c2c..222e29a0134 100644 --- a/include/caffe/util/math_functions.hpp +++ b/include/caffe/util/math_functions.hpp @@ -150,6 +150,12 @@ DEFINE_CAFFE_CPU_UNARY_FUNC(fabs, y[i] = std::fabs(x[i])); template void caffe_gpu_fabs(const int n, const Dtype* x, Dtype* y); +template +void caffe_cpu_scale(const int n, const Dtype alpha, const Dtype *x, Dtype* y); + +template +void caffe_gpu_scale(const int n, const Dtype alpha, const Dtype *x, Dtype* y); + } // namespace caffe diff --git a/src/caffe/test/test_math_functions.cpp b/src/caffe/test/test_math_functions.cpp index 0f46273d257..00f28badb86 100644 --- a/src/caffe/test/test_math_functions.cpp +++ b/src/caffe/test/test_math_functions.cpp @@ -140,4 +140,30 @@ TYPED_TEST(MathFunctionsTest, TestFabsGPU){ } } +TYPED_TEST(MathFunctionsTest, TestScaleCPU){ + int n = this->blob_bottom_->count(); + TypeParam alpha = this->blob_bottom_->cpu_diff()[rand() % + this->blob_bottom_->count()]; + caffe_cpu_scale(n, alpha, this->blob_bottom_->cpu_data(), + this->blob_bottom_->mutable_cpu_diff()); + const TypeParam* scaled = this->blob_bottom_->cpu_diff(); + const TypeParam* x = this->blob_bottom_->cpu_data(); + for (int i = 0; i < n; ++i) { + CHECK_EQ(scaled[i], x[i] * alpha); + } +} + +TYPED_TEST(MathFunctionsTest, TestScaleGPU){ + int n = this->blob_bottom_->count(); + TypeParam alpha = this->blob_bottom_->cpu_diff()[rand() % + this->blob_bottom_->count()]; + caffe_gpu_scale(n, alpha, this->blob_bottom_->gpu_data(), + this->blob_bottom_->mutable_gpu_diff()); + const TypeParam* scaled = this->blob_bottom_->cpu_diff(); + const TypeParam* x = this->blob_bottom_->cpu_data(); + for (int i = 0; i < n; ++i) { + CHECK_EQ(scaled[i], x[i] * alpha); + } +} + } diff --git a/src/caffe/util/math_functions.cpp b/src/caffe/util/math_functions.cpp index 50e5662e207..b822e076937 100644 --- a/src/caffe/util/math_functions.cpp +++ b/src/caffe/util/math_functions.cpp @@ -339,4 +339,32 @@ void caffe_gpu_asum(const int n, const double* x, double* y) { INSTANTIATE_CAFFE_CPU_UNARY_FUNC(sign); INSTANTIATE_CAFFE_CPU_UNARY_FUNC(fabs); +template <> +void caffe_cpu_scale(const int n, const float alpha, const float *x, + float* y) { + cblas_scopy(n, x, 1, y, 1); + cblas_sscal(n, alpha, y, 1); +} + +template <> +void caffe_cpu_scale(const int n, const double alpha, const double *x, + double* y) { + cblas_dcopy(n, x, 1, y, 1); + cblas_dscal(n, alpha, y, 1); +} + +template <> +void caffe_gpu_scale(const int n, const float alpha, const float *x, + float* y) { + CUBLAS_CHECK(cublasScopy(Caffe::cublas_handle(), n, x, 1, y, 1)); + CUBLAS_CHECK(cublasSscal(Caffe::cublas_handle(), n, &alpha, y, 1)); +} + +template <> +void caffe_gpu_scale(const int n, const double alpha, const double *x, + double* y) { + CUBLAS_CHECK(cublasDcopy(Caffe::cublas_handle(), n, x, 1, y, 1)); + CUBLAS_CHECK(cublasDscal(Caffe::cublas_handle(), n, &alpha, y, 1)); +} + } // namespace caffe From c9d905618cc435b23e06c1cf667726ccc6a68a00 Mon Sep 17 00:00:00 2001 From: Kai Li Date: Wed, 26 Feb 2014 11:23:20 +0800 Subject: [PATCH 8/8] Add signbit math func, simplify GPU defs & instantiations with a macro --- include/caffe/util/math_functions.hpp | 31 ++++++++++++++++-- src/caffe/test/test_math_functions.cpp | 21 ++++++++++++ src/caffe/util/math_functions.cpp | 1 + src/caffe/util/math_functions.cu | 44 +++----------------------- 4 files changed, 54 insertions(+), 43 deletions(-) diff --git a/include/caffe/util/math_functions.hpp b/include/caffe/util/math_functions.hpp index 222e29a0134..003d07cd4d3 100644 --- a/include/caffe/util/math_functions.hpp +++ b/include/caffe/util/math_functions.hpp @@ -5,6 +5,7 @@ #define CAFFE_UTIL_MATH_FUNCTIONS_H_ #include // for std::fabs +#include // for signbit #include #include @@ -122,9 +123,6 @@ inline char caffe_sign(Dtype val) { // The following two macros are modifications of DEFINE_VSL_UNARY_FUNC // in include/caffe/util/mkl_alternate.hpp authored by @Rowland Depp. // Please refer to commit 7e8ef25c7 of the boost-eigen branch. -// Git cherry picking that commit caused a conflict hard to resolve and -// copying that file in convenient for code reviewing. -// So they have to be pasted here temporarily. #define DEFINE_CAFFE_CPU_UNARY_FUNC(name, operation) \ template \ void caffe_cpu_##name(const int n, const Dtype* x, Dtype* y) { \ @@ -140,11 +138,38 @@ inline char caffe_sign(Dtype val) { template <> \ void caffe_cpu_##name(const int n, const double* x, double* y) + +#define DEFINE_AND_INSTANTIATE_GPU_UNARY_FUNC(name, operation) \ +template \ +__global__ void name##_kernel(const int n, const Dtype* x, Dtype* y) { \ + int index = threadIdx.x + blockIdx.x * blockDim.x; \ + if (index < n) { \ + operation; \ + } \ +} \ +template <> \ +void caffe_gpu_##name(const int n, const float* x, float* y) { \ + name##_kernel<<>>( \ + n, x, y); \ +} \ +template <> \ +void caffe_gpu_##name(const int n, const double* x, double* y) { \ + name##_kernel<<>>( \ + n, x, y); \ +} + +// output is 1 for the positives, 0 for zero, and -1 for the negatives DEFINE_CAFFE_CPU_UNARY_FUNC(sign, y[i] = caffe_sign(x[i])); template void caffe_gpu_sign(const int n, const Dtype* x, Dtype* y); +// returns a nonzero value is the input has its sign bit set. +DEFINE_CAFFE_CPU_UNARY_FUNC(signbit, y[i] = std::signbit(x[i])); + +template +void caffe_gpu_signbit(const int n, const Dtype* x, Dtype* y); + DEFINE_CAFFE_CPU_UNARY_FUNC(fabs, y[i] = std::fabs(x[i])); template diff --git a/src/caffe/test/test_math_functions.cpp b/src/caffe/test/test_math_functions.cpp index 00f28badb86..d314d73b45c 100644 --- a/src/caffe/test/test_math_functions.cpp +++ b/src/caffe/test/test_math_functions.cpp @@ -119,6 +119,27 @@ TYPED_TEST(MathFunctionsTest, TestSignGPU){ } } +TYPED_TEST(MathFunctionsTest, TestSignbitCPU){ + int n = this->blob_bottom_->count(); + const TypeParam* x = this->blob_bottom_->cpu_data(); + caffe_cpu_signbit(n, x, this->blob_bottom_->mutable_cpu_diff()); + const TypeParam* signbits = this->blob_bottom_->cpu_diff(); + for (int i = 0; i < n; ++i) { + CHECK_EQ(signbits[i], x[i] < 0 ? 1 : 0); + } +} + +TYPED_TEST(MathFunctionsTest, TestSignbitGPU){ + int n = this->blob_bottom_->count(); + caffe_gpu_signbit(n, this->blob_bottom_->gpu_data(), + this->blob_bottom_->mutable_gpu_diff()); + const TypeParam* signbits = this->blob_bottom_->cpu_diff(); + const TypeParam* x = this->blob_bottom_->cpu_data(); + for (int i = 0; i < n; ++i) { + CHECK_EQ(signbits[i], x[i] < 0 ? 1 : 0); + } +} + TYPED_TEST(MathFunctionsTest, TestFabsCPU){ int n = this->blob_bottom_->count(); const TypeParam* x = this->blob_bottom_->cpu_data(); diff --git a/src/caffe/util/math_functions.cpp b/src/caffe/util/math_functions.cpp index b822e076937..8a2f25e0c34 100644 --- a/src/caffe/util/math_functions.cpp +++ b/src/caffe/util/math_functions.cpp @@ -337,6 +337,7 @@ void caffe_gpu_asum(const int n, const double* x, double* y) { } INSTANTIATE_CAFFE_CPU_UNARY_FUNC(sign); +INSTANTIATE_CAFFE_CPU_UNARY_FUNC(signbit); INSTANTIATE_CAFFE_CPU_UNARY_FUNC(fabs); template <> diff --git a/src/caffe/util/math_functions.cu b/src/caffe/util/math_functions.cu index 8ecd72c49d0..3ca5fea9a64 100644 --- a/src/caffe/util/math_functions.cu +++ b/src/caffe/util/math_functions.cu @@ -4,7 +4,7 @@ #include #include #include -#include // CUDA's, not caffe's, for fabs +#include // CUDA's, not caffe's, for fabs, signbit #include "caffe/common.hpp" #include "caffe/util/math_functions.hpp" @@ -36,44 +36,8 @@ void caffe_gpu_mul(const int N, const double* a, N, a, b, y); } -template -__global__ void sign_kernel(const int n, const Dtype* x, Dtype* y) { - int index = threadIdx.x + blockIdx.x * blockDim.x; - if (index < n) { - y[index] = (Dtype(0) < x[index]) - (x[index] < Dtype(0)); - } -} - -template <> -void caffe_gpu_sign(const int n, const float* x, float* y) { - sign_kernel<<>>( - n, x, y); -} - -template <> -void caffe_gpu_sign(const int n, const double* x, double* y) { - sign_kernel<<>>( - n, x, y); -} - -template -__global__ void fabs_kernel(const int n, const Dtype* x, Dtype* y) { - int index = threadIdx.x + blockIdx.x * blockDim.x; - if (index < n) { - y[index] = fabs(x[index]); - } -} - -template <> -void caffe_gpu_fabs(const int n, const float* x, float* y) { - fabs_kernel<<>>( - n, x, y); -} - -template <> -void caffe_gpu_fabs(const int n, const double* x, double* y) { - fabs_kernel<<>>( - n, x, y); -} +DEFINE_AND_INSTANTIATE_GPU_UNARY_FUNC(sign, y[index] = (Dtype(0) < x[index]) - (x[index] < Dtype(0))); +DEFINE_AND_INSTANTIATE_GPU_UNARY_FUNC(signbit, y[index] = signbit(x[index])); +DEFINE_AND_INSTANTIATE_GPU_UNARY_FUNC(fabs, y[index] = fabs(x[index])); } // namespace caffe