Skip to content

Commit

Permalink
Improve sparse embedding index out of bound error message; (apache#11940
Browse files Browse the repository at this point in the history
)
  • Loading branch information
eric-haibin-lin authored and aaronmarkham committed Aug 6, 2018
1 parent 83c4330 commit 4c9c7be
Show file tree
Hide file tree
Showing 2 changed files with 70 additions and 14 deletions.
38 changes: 33 additions & 5 deletions src/operator/tensor/indexing_op.cc
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,27 @@
namespace mxnet {
namespace op {

/*
* \brief returns true if all indices are between [min, max]
* \param data_ptr the indices to check
* \param data_size the number of indices to examine
* \param min the expected min value for indices
* \param max the expected max value for indices
*/
template<typename DType>
bool CheckIndexOutOfBound(const DType* data_ptr, size_t data_size,
const DType min, const DType max) {
bool is_valid = true;
for (size_t i = 0; i < data_size; i++) {
if (data_ptr[i] > max || data_ptr[i] < min) {
is_valid = false;
break;
}
}
return is_valid;
}


template<>
void SparseEmbeddingOpForwardRspImpl<cpu>(const OpContext& ctx,
const TBlob& data,
Expand All @@ -48,18 +69,16 @@ void SparseEmbeddingOpForwardRspImpl<cpu>(const OpContext& ctx,
return;
}
// check out-of-bound indices
bool is_valid = true;
MSHADOW_TYPE_SWITCH(data.type_flag_, DType, {
DType min = 0;
DType max = static_cast<DType>(weight.shape()[0] - 1);
// check with single thread is faster since data is small
DType* data_ptr = data.dptr<DType>();
size_t data_size = data.shape_.Size();
for (size_t i = 0; i < data_size; i++) {
if (data_ptr[i] > max || data_ptr[i] < min) is_valid = false;
}
bool is_valid = CheckIndexOutOfBound(data_ptr, data_size,
min, max);
CHECK(is_valid) << "SparseEmbedding input contains data out of bound";
})
CHECK(is_valid) << "SparseEmbedding input contains data out of bound";
// the weight is actually dense
if (weight.aux_shape(kIdx)[0] == weight.shape()[0]) {
EmbeddingOpForwardDnsImpl<cpu>(s, data, weight.data(), req, output);
Expand Down Expand Up @@ -101,6 +120,15 @@ inline void SparseEmbeddingOpBackwardRspImpl<cpu>(const bool deterministic,
MSHADOW_TYPE_SWITCH(data.type_flag_, IType, {
MSHADOW_SGL_DBL_TYPE_SWITCH(ograd.type_flag_, DType, {
MSHADOW_IDX_TYPE_SWITCH(output.aux_type(kIdx), RType, {
// check out of bound indices
{
IType min = 0;
IType max = static_cast<IType>(output.shape()[0] - 1);
// check with single thread is faster since data is small
IType* data_ptr = data.dptr<IType>();
bool is_valid = CheckIndexOutOfBound(data_ptr, data.shape_.Size(), min, max);
CHECK(is_valid) << "Embedding input contains data out of bound";
}
// mark row flags
Fill<false>(s, TBlob(row_flg, Shape1(num_rows), cpu::kDevMask), kWriteTo, 0);
Kernel<MarkRowFlgKernel, cpu>::Launch(s, data_size, row_flg, data.dptr<IType>());
Expand Down
46 changes: 37 additions & 9 deletions src/operator/tensor/indexing_op.cu
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ namespace op {

struct is_valid_check {
template<typename DType>
MSHADOW_XINLINE static void Map(int i, int32_t* out, const DType* data,
MSHADOW_XINLINE static void Map(int i, char* out, const DType* data,
const DType min, const DType max) {
if (data[i] < min || data[i] > max) *out = 1;
}
Expand Down Expand Up @@ -116,6 +116,27 @@ struct AddTakeGradRspDeterministicKernel {
}
};

/*
* \brief returns true if all indices are between [min, max]
* \param s the stream
* \param data_ptr the indices on the stream
* \param data_size the number of indices to examine
* \param min the expected min value for indices
* \param max the expected max value for indices
* \param is_valid_ptr the temparary workspace
*/
template<typename DType>
bool CheckIndexOutOfBound(mshadow::Stream<gpu> *s, const DType* data_ptr, size_t data_size,
const DType min, const DType max, char* is_valid_ptr) {
using namespace mxnet_op;
int32_t is_valid = 0;
Kernel<set_zero, gpu>::Launch(s, 1, is_valid_ptr);
Kernel<is_valid_check, gpu>::Launch(s, data_size, is_valid_ptr, data_ptr, min, max);
CUDA_CALL(cudaMemcpy(&is_valid, is_valid_ptr, sizeof(char),
cudaMemcpyDeviceToHost));
return is_valid == 0;
}

template<>
void SparseEmbeddingOpForwardRspImpl<gpu>(const OpContext& ctx,
const TBlob& data,
Expand All @@ -136,21 +157,17 @@ void SparseEmbeddingOpForwardRspImpl<gpu>(const OpContext& ctx,
return;
}
// check out-of-bound indices
int32_t is_valid = 0;
MSHADOW_TYPE_SWITCH(data.type_flag_, DType, {
DType min = 0;
DType max = static_cast<DType>(weight.shape()[0] - 1);
DType* data_ptr = data.dptr<DType>();
size_t data_size = data.shape_.Size();
Tensor<gpu, 1, char> workspace = ctx.requested[0]
.get_space_typed<gpu, 1, char>(Shape1(sizeof(int32_t)), s);
int32_t* is_valid_ptr = reinterpret_cast<int32_t*>(workspace.dptr_);
Kernel<set_zero, gpu>::Launch(s, 1, is_valid_ptr);
Kernel<is_valid_check, gpu>::Launch(s, data_size, is_valid_ptr, data_ptr, min, max);
CUDA_CALL(cudaMemcpy(&is_valid, is_valid_ptr, sizeof(int32_t),
cudaMemcpyDeviceToHost));
.get_space_typed<gpu, 1, char>(Shape1(1), s);
char* is_valid_ptr = reinterpret_cast<char*>(workspace.dptr_);
bool is_valid = CheckIndexOutOfBound(s, data_ptr, data_size, min, max, is_valid_ptr);
CHECK(is_valid) << "SparseEmbedding input contains data out of bound";
})
CHECK_EQ(is_valid, 0) << "SparseEmbedding input contains data out of bound";
// the weight is actually dense
if (weight.aux_shape(kIdx)[0] == weight.shape()[0]) {
EmbeddingOpForwardDnsImpl<gpu>(s, data, weight.data(), req, output);
Expand Down Expand Up @@ -207,6 +224,17 @@ void SparseEmbeddingDeterministicKernelLaunch(const OpContext& ctx,
sorted_data_storage_bytes);
temp_storage = workspace.dptr_ + total_storage_bytes - temp_workspace_bytes;

// check out-of-bound indices
{
IType min = 0;
IType max = static_cast<IType>(output.shape()[0] - 1);
IType* data_ptr = data.dptr<IType>();
size_t data_size = data.shape_.Size();
bool is_valid = CheckIndexOutOfBound(s, data_ptr, data_size, min, max,
reinterpret_cast<char*>(temp_storage));
CHECK(is_valid) << "Embedding input contains data out of bound";
}

// make a copy of the data, to be sorted
TBlob sorted_data_blob(sorted_data, Shape1(data_size), gpu::kDevMask);
auto sorted_data_tensor = sorted_data_blob.FlatTo1D<gpu, dim_t>(s);
Expand Down

0 comments on commit 4c9c7be

Please sign in to comment.