Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add support for grouped convolutions #2485

Draft
wants to merge 8 commits into
base: master
Choose a base branch
from
Draft
9 changes: 6 additions & 3 deletions dlib/cuda/cpu_dlib.h
Original file line number Diff line number Diff line change
Expand Up @@ -571,17 +571,19 @@ namespace dlib
int stride_y,
int stride_x,
int padding_y,
int padding_x
)
int padding_x,
long groups
)
{
(void)data; /* silence compiler */
DLIB_CASSERT(stride_y > 0 && stride_x > 0);
DLIB_CASSERT(0 <= padding_y && padding_y < filters.nr());
DLIB_CASSERT(0 <= padding_x && padding_x < filters.nc());
DLIB_CASSERT(groups >= 0);
last_stride_y = stride_y;
last_stride_x = stride_x;
last_padding_y = padding_y;
last_padding_x = padding_x;
last_padding_x = padding_x;
}

void operator() (
Expand Down Expand Up @@ -634,6 +636,7 @@ namespace dlib
long last_stride_x = 0;
long last_padding_y = 0;
long last_padding_x = 0;
long groups = 1;
};

// -----------------------------------------------------------------------------------
Expand Down
36 changes: 30 additions & 6 deletions dlib/cuda/cudnn_dlibapi.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -975,10 +975,26 @@ namespace dlib
int stride_y_,
int stride_x_,
int padding_y_,
int padding_x_
)
int padding_x_,
long groups_
)
{
DLIB_CASSERT(data.k() == filters.k());
DLIB_CASSERT(groups_ >= 0);
if (groups_ == 0)
{
groups_ = data.k();
}
else if (groups_ == 1)
{
DLIB_CASSERT(data.k() == filters.k());
}
else
{
DLIB_CASSERT(data.k() % filters.num_samples() == 0,
"The number of input channels (" << data.k()
<< ") must be a muliple of the number of filters ("
<< filters.num_samples() << ")");
}

// if the last call to setup gave the same exact settings then don't do
// anything.
Expand All @@ -991,7 +1007,8 @@ namespace dlib
padding_y_ == padding_y &&
padding_x_ == padding_x &&
filters_num_samples == filters.num_samples() &&
filters_k == filters.k() &&
filters_k == filters.k() * groups_ &&
groups_ == groups &&
filters_nr == filters.nr() &&
filters_nc == filters.nc()
)
Expand All @@ -1014,6 +1031,7 @@ namespace dlib
filters_k = filters.k();
filters_nr = filters.nr();
filters_nc = filters.nc();
groups = groups_;

CHECK_CUDNN(cudnnCreateFilterDescriptor((cudnnFilterDescriptor_t*)&filter_handle));
CHECK_CUDNN(cudnnSetFilter4dDescriptor((cudnnFilterDescriptor_t)filter_handle,
Expand Down Expand Up @@ -1044,6 +1062,12 @@ namespace dlib
CUDNN_CROSS_CORRELATION)); // could also be CUDNN_CONVOLUTION
#endif

#if CUDNN_MAJOR >= 7
CHECK_CUDNN(cudnnSetConvolutionGroupCount((cudnnConvolutionDescriptor_t)conv_handle, groups));
#else
DLIB_CASSERT(groups == 1, "Grouped convolutions are not supported in CUDA " << CUDNN_MAJOR);
#endif

CHECK_CUDNN(cudnnGetConvolution2dForwardOutputDim(
(const cudnnConvolutionDescriptor_t)conv_handle,
descriptor(data),
Expand Down Expand Up @@ -1105,7 +1129,7 @@ namespace dlib
{
DLIB_CASSERT(is_same_object(output,data) == false);
DLIB_CASSERT(is_same_object(output,filters) == false);
DLIB_CASSERT(filters.k() == data.k());
DLIB_CASSERT(filters.k() == data.k() / groups);
DLIB_CASSERT(stride_y > 0 && stride_x > 0, "You must call setup() before calling this function");
DLIB_CASSERT(filters.nc() <= data.nc() + 2*padding_x,
"Filter windows must be small enough to fit into the padded image."
Expand Down Expand Up @@ -1179,7 +1203,7 @@ namespace dlib
{
DLIB_CASSERT(is_same_object(output,data) == false);
DLIB_CASSERT(is_same_object(output,filters) == false);
DLIB_CASSERT(filters.k() == data.k());
DLIB_CASSERT(filters.k() == data.k() / groups);
DLIB_CASSERT(stride_y > 0 && stride_x > 0, "You must call setup() before calling this function");
DLIB_CASSERT(filters.nc() <= data.nc() + 2*padding_x,
"Filter windows must be small enough to fit into the padded image."
Expand Down
4 changes: 3 additions & 1 deletion dlib/cuda/cudnn_dlibapi.h
Original file line number Diff line number Diff line change
Expand Up @@ -221,7 +221,8 @@ namespace dlib
int stride_y,
int stride_x,
int padding_y,
int padding_x
int padding_x,
long groups
);

void setup(
Expand All @@ -243,6 +244,7 @@ namespace dlib
int padding_x;
long data_num_samples, data_k, data_nr, data_nc;
long filters_num_samples, filters_k, filters_nr, filters_nc;
long groups;


void* filter_handle;
Expand Down
11 changes: 8 additions & 3 deletions dlib/cuda/tensor_tools.h
Original file line number Diff line number Diff line change
Expand Up @@ -1177,11 +1177,16 @@ namespace dlib { namespace tt
int stride_y,
int stride_x,
int padding_y,
int padding_x
) {impl.setup(data,filters,stride_y,stride_x,padding_y,padding_x); }
int padding_x,
long groups
) {impl.setup(data,filters,stride_y,stride_x,padding_y,padding_x,groups); }
/*!
requires
- filters.k() == data.k()
- groups_ >= 1
- if groups == 1
- filters.k() == data.k()
- if groups > 1
- data.k() % filters.num_samples() == 0
- stride_y > 0
- stride_x > 0
- 0 <= padding_y < filters.nr()
Expand Down
41 changes: 32 additions & 9 deletions dlib/dnn/layers.h
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,8 @@ namespace dlib
int _stride_y,
int _stride_x,
int _padding_y = _stride_y!=1? 0 : _nr/2,
int _padding_x = _stride_x!=1? 0 : _nc/2
int _padding_x = _stride_x!=1? 0 : _nc/2,
long _groups = 1
>
class con_
{
Expand All @@ -49,6 +50,7 @@ namespace dlib
static_assert(_nc==0 || (0 <= _padding_x && _padding_x < _nc), "The padding must be smaller than the filter size.");
static_assert(_nr!=0 || 0 == _padding_y, "If _nr==0 then the padding must be set to 0 as well.");
static_assert(_nc!=0 || 0 == _padding_x, "If _nr==0 then the padding must be set to 0 as well.");
static_assert(_groups >= 0, "The number of groups must be >= 0");

con_(
num_con_outputs o
Expand All @@ -58,16 +60,19 @@ namespace dlib
bias_learning_rate_multiplier(1),
bias_weight_decay_multiplier(0),
num_filters_(o.num_outputs),
groups_(_groups),
padding_y_(_padding_y),
padding_x_(_padding_x),
use_bias(true)
{
DLIB_CASSERT(num_filters_ > 0);
DLIB_CASSERT(groups_ >= 0);
}

con_() : con_(num_con_outputs(_num_filters)) {}

long num_filters() const { return num_filters_; }
long groups() const { return groups_; }
long nr() const
{
if (_nr==0)
Expand Down Expand Up @@ -98,6 +103,17 @@ namespace dlib
}
}

void set_groups(long num)
{
DLIB_CASSERT(num > 0);
if (num != groups_)
{
DLIB_CASSERT(get_layer_params().size() == 0,
"You can't change the number of groups in con_ if the parameter tensor has already been allocated.");
groups_ = num;
}
}

double get_learning_rate_multiplier () const { return learning_rate_multiplier; }
double get_weight_decay_multiplier () const { return weight_decay_multiplier; }
void set_learning_rate_multiplier(double val) { learning_rate_multiplier = val; }
Expand Down Expand Up @@ -169,6 +185,7 @@ namespace dlib
bias_learning_rate_multiplier(item.bias_learning_rate_multiplier),
bias_weight_decay_multiplier(item.bias_weight_decay_multiplier),
num_filters_(item.num_filters_),
groups_(item.groups_),
padding_y_(item.padding_y_),
padding_x_(item.padding_x_),
use_bias(item.use_bias)
Expand Down Expand Up @@ -196,6 +213,7 @@ namespace dlib
bias_learning_rate_multiplier = item.bias_learning_rate_multiplier;
bias_weight_decay_multiplier = item.bias_weight_decay_multiplier;
num_filters_ = item.num_filters_;
groups_ = item.groups_;
use_bias = item.use_bias;
return *this;
}
Expand All @@ -205,16 +223,17 @@ namespace dlib
{
const long filt_nr = _nr!=0 ? _nr : sub.get_output().nr();
const long filt_nc = _nc!=0 ? _nc : sub.get_output().nc();
groups_ = _groups!=0 ? _groups : sub.get_output().k();

long num_inputs = filt_nr*filt_nc*sub.get_output().k();
long num_outputs = num_filters_;
const long num_inputs = filt_nr*filt_nc*sub.get_output().k();
const long num_outputs = num_filters_;
// allocate params for the filters and also for the filter bias values.
params.set_size(num_inputs*num_filters_ + static_cast<int>(use_bias) * num_filters_);
params.set_size(num_inputs*num_filters_/groups_ + static_cast<int>(use_bias) * num_filters_);

dlib::rand rnd(std::rand());
randomize_parameters(params, num_inputs+num_outputs, rnd);

filters = alias_tensor(num_filters_, sub.get_output().k(), filt_nr, filt_nc);
filters = alias_tensor(num_filters_, sub.get_output().k()/groups_, filt_nr, filt_nc);
if (use_bias)
{
biases = alias_tensor(1,num_filters_);
Expand All @@ -231,7 +250,8 @@ namespace dlib
_stride_y,
_stride_x,
padding_y_,
padding_x_);
padding_x_,
groups_);
if (use_bias)
{
conv(false, output,
Expand Down Expand Up @@ -338,8 +358,10 @@ namespace dlib
<< ", stride_y="<<_stride_y
<< ", stride_x="<<_stride_x
<< ", padding_y="<<item.padding_y_
<< ", padding_x="<<item.padding_x_
<< ")";
<< ", padding_x="<<item.padding_x_;
if (item.groups_ != 1)
out << ", groups=" << item.groups_;
out << ")";
out << " learning_rate_mult="<<item.learning_rate_multiplier;
out << " weight_decay_mult="<<item.weight_decay_multiplier;
if (item.use_bias)
Expand Down Expand Up @@ -384,6 +406,7 @@ namespace dlib
double bias_learning_rate_multiplier;
double bias_weight_decay_multiplier;
long num_filters_;
long groups_;

// These are here only because older versions of con (which you might encounter
// serialized to disk) used different padding settings.
Expand Down Expand Up @@ -562,7 +585,7 @@ namespace dlib
unsigned int gnsamps = sub.get_output().num_samples();
unsigned int gk = filt.k();
output.set_size(gnsamps,gk,gnr,gnc);
conv.setup(output,filt,_stride_y,_stride_x,padding_y_,padding_x_);
conv.setup(output,filt,_stride_y,_stride_x,padding_y_,padding_x_,1);
conv.get_gradient_for_data(false, sub.get_output(),filt,output);
if (use_bias)
{
Expand Down