Skip to content

Commit

Permalink
Python multi-GPU
Browse files Browse the repository at this point in the history
  • Loading branch information
cypof committed Aug 9, 2016
1 parent 801cc2e commit 7b78d8f
Show file tree
Hide file tree
Showing 13 changed files with 530 additions and 29 deletions.
1 change: 1 addition & 0 deletions include/caffe/blob.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -220,6 +220,7 @@ class Blob {
void set_cpu_data(Dtype* data);
const int* gpu_shape() const;
const Dtype* gpu_data() const;
void set_gpu_data(Dtype* data);
const Dtype* cpu_diff() const;
const Dtype* gpu_diff() const;
Dtype* mutable_cpu_data();
Expand Down
2 changes: 1 addition & 1 deletion include/caffe/data_transformer.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ class DataTransformer {
* @brief Initialize the Random number generations if needed by the
* transformation.
*/
void InitRand();
void InitRand(unsigned int seed);

/**
* @brief Applies the transformation defined in the data layer's
Expand Down
8 changes: 4 additions & 4 deletions include/caffe/layers/python_layer.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -20,10 +20,10 @@ class PythonLayer : public Layer<Dtype> {
const vector<Blob<Dtype>*>& top) {
// Disallow PythonLayer in MultiGPU training stage, due to GIL issues
// Details: https://github.com/BVLC/caffe/issues/2936
if (this->phase_ == TRAIN && Caffe::solver_count() > 1
&& !ShareInParallel()) {
LOG(FATAL) << "PythonLayer is not implemented in Multi-GPU training";
}
// if (this->phase_ == TRAIN && Caffe::solver_count() > 1
// && !ShareInParallel()) {
// LOG(FATAL) << "PythonLayer is not implemented in Multi-GPU training";
// }
self_.attr("param_str") = bp::str(
this->layer_param_.python_param().param_str());
self_.attr("phase") = static_cast<int>(this->phase_);
Expand Down
5 changes: 3 additions & 2 deletions python/caffe/__init__.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,9 @@
from .pycaffe import Net, SGDSolver, NesterovSolver, AdaGradSolver, RMSPropSolver, AdaDeltaSolver, AdamSolver
from ._caffe import set_mode_cpu, set_mode_gpu, set_device, Layer, get_solver, layer_type_list, set_random_seed
from .pycaffe import Net, SGDSolver, NesterovSolver, AdaGradSolver, RMSPropSolver, AdaDeltaSolver, AdamSolver, DataTransformer, Blob, NCCL, Timer
from ._caffe import init_log, log, set_mode_cpu, set_mode_gpu, set_device, Layer, get_solver, layer_type_list, set_random_seed, get_random, solver_count, set_solver_count, solver_rank, set_solver_rank, Layer, get_solver, layer_type_list
from ._caffe import __version__
from .proto.caffe_pb2 import TRAIN, TEST
from .classifier import Classifier
from .detector import Detector
from . import io
from .net_spec import layers, params, NetSpec, to_proto
from .train import train
215 changes: 206 additions & 9 deletions python/caffe/_caffe.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,16 @@ void set_mode_gpu() { Caffe::set_mode(Caffe::GPU); }

void set_random_seed(unsigned int seed) { Caffe::set_random_seed(seed); }

void InitLog(int level) {
FLAGS_logtostderr = 1;
FLAGS_minloglevel = level;
::google::InitGoogleLogging("");
::google::InstallFailureSignalHandler();
}
void Log(const string& s) {
LOG(INFO) << s;
}

// For convenience, check that input files can be opened, and raise an
// exception that boost will send to Python if not (caffe could still crash
// later if the input files are disturbed before they are actually used, but
Expand Down Expand Up @@ -254,12 +264,12 @@ bp::object BlobVec_add_blob(bp::tuple args, bp::dict kwargs) {
}

template<typename Dtype>
class PythonCallback: public Solver<Dtype>::Callback {
class SolverCallback: public Solver<Dtype>::Callback {
protected:
bp::object on_start_, on_gradients_ready_;

public:
PythonCallback(bp::object on_start, bp::object on_gradients_ready)
SolverCallback(bp::object on_start, bp::object on_gradients_ready)
: on_start_(on_start), on_gradients_ready_(on_gradients_ready) { }
virtual void on_gradients_ready() {
on_gradients_ready_();
Expand All @@ -271,7 +281,121 @@ class PythonCallback: public Solver<Dtype>::Callback {
template<typename Dtype>
void Solver_add_callback(Solver<Dtype> * solver, bp::object on_start,
bp::object on_gradients_ready) {
solver->add_callback(new PythonCallback<Dtype>(on_start, on_gradients_ready));
solver->add_callback(new SolverCallback<Dtype>(on_start, on_gradients_ready));
}
// Seems boost cannot call the base method directly
void Solver_add_nccl(SGDSolver<Dtype>* solver, NCCL<Dtype>* nccl) {
solver->add_callback(nccl);
}
template<typename Dtype>
class NetCallback: public Net<Dtype>::Callback {
public:
explicit NetCallback(bp::object run) : run_(run) {}

protected:
virtual void run(int layer) {
run_(layer);
}
bp::object run_;
};
void Net_before_forward(Net<Dtype>* net, bp::object run) {
net->add_before_forward(new NetCallback<Dtype>(run));
}
void Net_after_forward(Net<Dtype>* net, bp::object run) {
net->add_after_forward(new NetCallback<Dtype>(run));
}
void Net_before_backward(Net<Dtype>* net, bp::object run) {
net->add_before_backward(new NetCallback<Dtype>(run));
}
void Net_after_backward(Net<Dtype>* net, bp::object run) {
net->add_after_backward(new NetCallback<Dtype>(run));
}
void Net_add_nccl(Net<Dtype>* net, NCCL<Dtype>* nccl) {
net->add_after_backward(nccl);
}

// Transformer constructor for passing phase as int
shared_ptr<DataTransformer<Dtype> > Transformer_Init(
const TransformationParameter& param, int phase) {
shared_ptr<DataTransformer<Dtype> > t(
new DataTransformer<Dtype>(param,
static_cast<Phase>(phase)));
return t;
}

void Transform(DataTransformer<Dtype>* trans,
const string& str,
Blob<Dtype>* data,
Blob<Dtype>* label,
int index) {
Datum datum;
datum.ParseFromString(str);
vector<int> shape(data->shape());
shape[0] = 1;
Blob<Dtype> tmp(shape);
tmp.set_cpu_data(data->mutable_cpu_data() + data->offset(index));
trans->Transform(datum, &tmp);
label->mutable_cpu_data()[label->offset(index)] = datum.label();
}

template<class T>
struct proto_pickle : bp::pickle_suite {
static bp::tuple getstate(const T& proto) {
return bp::make_tuple(proto.SerializeAsString());
}

static void setstate(T& proto, // NOLINT(runtime/references)
bp::tuple state) {
string s = bp::extract<string>(state[0])();
proto.ParseFromString(s);
}
};

struct blob_pickle : bp::pickle_suite {
// TODO also transfer cpu side through regular IPC
static bp::tuple getstate(const Blob<Dtype>& blob) {
string s1(sizeof(int) * blob.shape().size(), 0);
memcpy(&s1[0], &blob.shape()[0], s1.size()); // NOLINT(caffe/alt_fn)

cudaPointerAttributes attributes;
CUDA_CHECK(cudaPointerGetAttributes(&attributes, blob.gpu_data()));
CUDA_CHECK(cudaSetDevice(attributes.device));

cudaIpcMemHandle_t handle;
CUDA_CHECK(cudaIpcGetMemHandle(&handle,
reinterpret_cast<void*>(const_cast<Dtype*>(blob.gpu_data()))));
string s2(CUDA_IPC_HANDLE_SIZE, 0);
memcpy(&s2[0], &handle, CUDA_IPC_HANDLE_SIZE); // NOLINT(caffe/alt_fn)

return bp::make_tuple(s1, s2);
}

static void setstate(Blob<Dtype>& blob, // NOLINT(runtime/references)
bp::tuple state) {
string s1 = bp::extract<string>(state[0])();
string s2 = bp::extract<string>(state[1])();

vector<int> shape(s1.size() / sizeof(int));
memcpy(&shape[0], &s1[0], s1.size()); // NOLINT(caffe/alt_fn)
blob.Reshape(shape);

cudaIpcMemHandle_t handle;
memcpy(&handle, &s2[0], CUDA_IPC_HANDLE_SIZE); // NOLINT(caffe/alt_fn)
Dtype* data;
CUDA_CHECK(cudaIpcOpenMemHandle(reinterpret_cast<void**>(&data), handle,
cudaIpcMemLazyEnablePeerAccess));
blob.set_gpu_data(data);
}
};

int phase_as_int(LayerParameter* param) {
return static_cast<int>(param->phase());
}
void prefetch_to_gpu(Blob<Dtype>* blob) {
blob->gpu_data();
}
void set_gpu_data(Blob<Dtype>* blob, Blob<Dtype>* source) {
blob->set_gpu_data(source->mutable_gpu_data());
}

BOOST_PYTHON_MEMBER_FUNCTION_OVERLOADS(SolveOverloads, Solve, 0, 1);
Expand All @@ -283,10 +407,17 @@ BOOST_PYTHON_MODULE(_caffe) {
bp::scope().attr("__version__") = AS_STRING(CAFFE_VERSION);

// Caffe utility functions
bp::def("init_log", &InitLog);
bp::def("log", &Log);
bp::def("set_mode_cpu", &set_mode_cpu);
bp::def("set_mode_gpu", &set_mode_gpu);
bp::def("set_random_seed", &set_random_seed);
bp::def("get_random", &caffe_rng_rand);
bp::def("set_device", &Caffe::SetDevice);
bp::def("solver_count", &Caffe::solver_count);
bp::def("set_solver_count", &Caffe::set_solver_count);
bp::def("solver_rank", &Caffe::solver_rank);
bp::def("set_solver_rank", &Caffe::set_solver_rank);

bp::def("layer_type_list", &LayerRegistry<Dtype>::LayerTypeList);

Expand Down Expand Up @@ -317,6 +448,7 @@ BOOST_PYTHON_MODULE(_caffe) {
bp::return_internal_reference<>()))
.add_property("layers", bp::make_function(&Net<Dtype>::layers,
bp::return_internal_reference<>()))
.def("layer", bp::make_function(&Net<Dtype>::layer_by_name))
.add_property("_blob_names", bp::make_function(&Net<Dtype>::blob_names,
bp::return_value_policy<bp::copy_const_reference>()))
.add_property("_layer_names", bp::make_function(&Net<Dtype>::layer_names,
Expand All @@ -330,11 +462,16 @@ BOOST_PYTHON_MODULE(_caffe) {
bp::with_custodian_and_ward<1, 2, bp::with_custodian_and_ward<1, 3> >())
.def("save", &Net_Save)
.def("save_hdf5", &Net_SaveHDF5)
.def("load_hdf5", &Net_LoadHDF5);
.def("load_hdf5", &Net_LoadHDF5)
.def("before_forward", &Net_before_forward)
.def("after_forward", &Net_after_forward)
.def("before_backward", &Net_before_backward)
.def("after_backward", &Net_after_backward)
.def("after_backward", &Net_add_nccl);
BP_REGISTER_SHARED_PTR_TO_PYTHON(Net<Dtype>);

bp::class_<Blob<Dtype>, shared_ptr<Blob<Dtype> >, boost::noncopyable>(
"Blob", bp::no_init)
"Blob", bp::init<>())
.add_property("shape",
bp::make_function(
static_cast<const vector<int>& (Blob<Dtype>::*)() const>(
Expand All @@ -350,7 +487,10 @@ BOOST_PYTHON_MODULE(_caffe) {
.add_property("data", bp::make_function(&Blob<Dtype>::mutable_cpu_data,
NdarrayCallPolicies()))
.add_property("diff", bp::make_function(&Blob<Dtype>::mutable_cpu_diff,
NdarrayCallPolicies()));
NdarrayCallPolicies()))
.def_pickle(blob_pickle())
.def("prefetch_to_gpu", &prefetch_to_gpu)
.def("set_gpu_data", &set_gpu_data);
BP_REGISTER_SHARED_PTR_TO_PYTHON(Blob<Dtype>);

bp::class_<Layer<Dtype>, shared_ptr<PythonLayer<Dtype> >,
Expand All @@ -359,10 +499,43 @@ BOOST_PYTHON_MODULE(_caffe) {
bp::return_internal_reference<>()))
.def("setup", &Layer<Dtype>::LayerSetUp)
.def("reshape", &Layer<Dtype>::Reshape)
.add_property("type", bp::make_function(&Layer<Dtype>::type));
.add_property("type", bp::make_function(&Layer<Dtype>::type))
.add_property("layer_param", bp::make_function(&Layer<Dtype>::layer_param,
bp::return_value_policy<bp::copy_const_reference>()));
BP_REGISTER_SHARED_PTR_TO_PYTHON(Layer<Dtype>);

bp::class_<LayerParameter>("LayerParameter", bp::no_init);
bp::class_<SolverParameter>("SolverParameter", bp::init<>())
.add_property("max_iter", &SolverParameter::max_iter)
.add_property("display", &SolverParameter::display)
.def_pickle(proto_pickle<SolverParameter>());
bp::class_<TransformationParameter>("TransformationParameter", bp::init<>())
.add_property("crop_size", &TransformationParameter::crop_size);
bp::class_<DataParameter>("DataParameter", bp::init<>())
.add_property("batch_size", &DataParameter::batch_size)
.add_property("source", bp::make_function(&DataParameter::source,
bp::return_value_policy<bp::copy_const_reference>()))
.add_property("backend", &DataParameter::backend)
.def_pickle(proto_pickle<DataParameter>());
bp::class_<MemoryDataParameter>("MemoryDataParameter", bp::init<>())
.add_property("batch_size", &MemoryDataParameter::batch_size)
.add_property("channels", &MemoryDataParameter::channels)
.add_property("height", &MemoryDataParameter::height)
.add_property("width", &MemoryDataParameter::width)
.def_pickle(proto_pickle<MemoryDataParameter>());
bp::class_<LayerParameter>("LayerParameter", bp::init<>())
.add_property("name", bp::make_function(&LayerParameter::name,
bp::return_value_policy<bp::copy_const_reference>()))
.add_property("phase", &phase_as_int)
.add_property("top_size", &LayerParameter::top_size)
.add_property("transform_param",
bp::make_function(&LayerParameter::transform_param,
bp::return_value_policy<bp::copy_const_reference>()))
.add_property("data_param", bp::make_function(&LayerParameter::data_param,
bp::return_value_policy<bp::copy_const_reference>()))
.add_property("memory_data_param",
bp::make_function(&LayerParameter::memory_data_param,
bp::return_value_policy<bp::copy_const_reference>()))
.def_pickle(proto_pickle<LayerParameter>());

bp::class_<Solver<Dtype>, shared_ptr<Solver<Dtype> >, boost::noncopyable>(
"Solver", bp::no_init)
Expand All @@ -371,11 +544,14 @@ BOOST_PYTHON_MODULE(_caffe) {
bp::return_internal_reference<>()))
.add_property("iter", &Solver<Dtype>::iter)
.def("add_callback", &Solver_add_callback<Dtype>)
.def("add_callback", &Solver_add_nccl)
.def("solve", static_cast<void (Solver<Dtype>::*)(const char*)>(
&Solver<Dtype>::Solve), SolveOverloads())
.def("step", &Solver<Dtype>::Step)
.def("restore", &Solver<Dtype>::Restore)
.def("snapshot", &Solver<Dtype>::Snapshot);
.def("snapshot", &Solver<Dtype>::Snapshot)
.add_property("param", bp::make_function(&Solver<Dtype>::param,
bp::return_value_policy<bp::copy_const_reference>()));
BP_REGISTER_SHARED_PTR_TO_PYTHON(Solver<Dtype>);

bp::class_<SGDSolver<Dtype>, bp::bases<Solver<Dtype> >,
Expand Down Expand Up @@ -419,6 +595,27 @@ BOOST_PYTHON_MODULE(_caffe) {
bp::class_<vector<bool> >("BoolVec")
.def(bp::vector_indexing_suite<vector<bool> >());

bp::class_<DataTransformer<Dtype>, shared_ptr<DataTransformer<Dtype> >,
boost::noncopyable>("DataTransformer", bp::no_init)
.def("__init__", bp::make_constructor(&Transformer_Init))
.def("init_rand", &DataTransformer<Dtype>::InitRand)
.def("transform", &Transform);
BP_REGISTER_SHARED_PTR_TO_PYTHON(DataTransformer<Dtype>);

bp::class_<NCCL<Dtype>, shared_ptr<NCCL<Dtype> >,
boost::noncopyable>("NCCL",
bp::init<shared_ptr<Solver<Dtype> >, const string&>())
.def("new_uid", &NCCL<Dtype>::new_uid).staticmethod("new_uid")
.def("bcast", &NCCL<Dtype>::bcast);
BP_REGISTER_SHARED_PTR_TO_PYTHON(NCCL<Dtype>);

bp::class_<Timer, shared_ptr<Timer>, boost::noncopyable>(
"Timer", bp::init<>())
.def("start", &Timer::Start)
.def("stop", &Timer::Stop)
.add_property("ms", &Timer::MilliSeconds);
BP_REGISTER_SHARED_PTR_TO_PYTHON(Timer);

// boost python expects a void (missing) return value, while import_array
// returns NULL for python3. import_array1() forces a void return value.
import_array1();
Expand Down
3 changes: 2 additions & 1 deletion python/caffe/pycaffe.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,8 @@
import numpy as np

from ._caffe import Net, SGDSolver, NesterovSolver, AdaGradSolver, \
RMSPropSolver, AdaDeltaSolver, AdamSolver
RMSPropSolver, AdaDeltaSolver, AdamSolver, DataTransformer, \
Blob, NCCL, Timer
import caffe.io

import six
Expand Down
Loading

0 comments on commit 7b78d8f

Please sign in to comment.