From e4bf6465cf352507538a29e2aeb05d4772df9690 Mon Sep 17 00:00:00 2001 From: Alexander Zai Date: Tue, 26 Jun 2018 11:00:47 -0700 Subject: [PATCH] [MXNET-551] Test CreateMKLDNNMem/CommitOutput (#11308) * refactor copyfrom * add boilerplate * rename to MKLDNNCopy * write to temp memory * reorder mkldnn / views * return memory from GetMKLDNNData * add kaddto to unit test * move orig output before creatingnewmem * coerce memory if shape does not fit * use MKLDNNCopy in commit * uncomment addto test * switch order of mkldnnsum params * improving logging * wait to read after copying arr * remove extra white spaces * remove extra white space * remove unused var * reorder output * do not write to views * remove shape check in test * use input pdesc * remove unused var * fix merge * put inplace in separate loop * use two mem * use sum_pd when calling CreateMKLDNNData * reorder sum shapes if needed * comment out getsumpd * use MKLDNNCopy helper to reshape mem * remove getsumpd * use output mem for createmem * remove todo * waittoread output * do not attempt to shape output * use correct arr as input * revert commit change to ps-lite * revert change to tvm * fix lint * add comment to test * reduce calls to get_primitive_desc * skip tests that reorder2default * push_back to inputs * skip if view/mkldnn * add noop test * pass input ptr for write in place * allow empty --- src/ndarray/ndarray.cc | 72 +----------- src/operator/nn/mkldnn/mkldnn_base-inl.h | 1 + src/operator/nn/mkldnn/mkldnn_base.cc | 103 +++++++++++++--- src/operator/nn/mkldnn/mkldnn_sum.cc | 20 +++- tests/cpp/operator/mkldnn.cc | 142 +++++++++++++++++++++-- 5 files changed, 242 insertions(+), 96 deletions(-) diff --git a/src/ndarray/ndarray.cc b/src/ndarray/ndarray.cc index 94d3d90413ab..e90fb6319d77 100644 --- a/src/ndarray/ndarray.cc +++ b/src/ndarray/ndarray.cc @@ -482,7 +482,7 @@ const mkldnn::memory *NDArray::GetMKLDNNData( if (mem->get_primitive_desc() == desc || (desc1.data.format == GetDefaultFormat(desc1) && desc2.data.format == GetDefaultFormat(desc2))) { - return GetMKLDNNExact(ptr_->mkl_mem_->GetRaw(), desc); + return GetMKLDNNExact(mem, desc); } else { return nullptr; } @@ -638,7 +638,6 @@ void NDArray::CopyFrom(const mkldnn::memory &mem) { CHECK(mem.get_primitive_desc().get_size() == shape().Size() * GetTypeSize(dtype_)) << "The size of NDArray doesn't match the requested MKLDNN memory desc"; - MKLDNNStream *stream = MKLDNNStream::Get(); // If this array uses MKLDNN layout, we have to make sure it's not a view. // Otherwise, we'll have to change the layout inside the array. @@ -646,74 +645,7 @@ void NDArray::CopyFrom(const mkldnn::memory &mem) { ptr_->Reorder2Default(); const mkldnn::memory *this_mem = GetMKLDNNData(); - mkldnn::memory::primitive_desc from_pd = mem.get_primitive_desc(); - mkldnn::memory::desc from_desc = from_pd.desc(); - mkldnn::memory::primitive_desc this_pd = this_mem->get_primitive_desc(); - mkldnn::memory::desc this_desc = this_pd.desc(); - mkldnn_memory_format_t from_def_format = GetDefaultFormat(from_desc); - mkldnn_memory_format_t this_def_format = GetDefaultFormat(this_desc); - if (IsView()) { - // Sliced array must use the default layout. - CHECK_EQ(GetDefaultFormat(this_desc), this_desc.data.format); - } - // It's possible that the memory and the NDArray don't have the same shape. - if (!same_shape(this_desc, from_desc) - // If the source memory uses the default layout, we can reshape directly. - && from_def_format == from_desc.data.format) { - // In this case, we can simply create a new MKLDNN memory for the required - // shape. - mkldnn::memory::dims dims(this_desc.data.dims, - this_desc.data.dims + this_desc.data.ndims); - auto this_dtype = static_cast(this_desc.data.data_type); - auto this_format = static_cast(GetDefaultFormat(this_desc)); - mkldnn::memory::desc data_md(dims, this_dtype, this_format); - mkldnn::memory::primitive_desc pd(data_md, from_pd.get_engine()); - mkldnn_mem_ptr tmp_mem(new mkldnn::memory(pd, mem.get_data_handle())); - stream->RegisterMem(tmp_mem); - stream->RegisterPrim(mkldnn::reorder(*tmp_mem, *this_mem)); - } else if (!same_shape(this_desc, from_desc)) { - // In this case, the source memory stores data in a customized layout. We - // need to reorganize the data in memory before we can reshape. - mkldnn::memory::primitive_desc def_pd = GetPrimitiveDesc(from_pd, from_def_format); - mkldnn::memory *def_mem = TmpMemMgr::Get()->Alloc(def_pd); - stream->RegisterPrim(mkldnn::reorder(mem, *def_mem)); - // Now we can reshape it - mkldnn::memory::dims dims(this_desc.data.dims, - this_desc.data.dims + this_desc.data.ndims); - auto this_dtype = static_cast(this_desc.data.data_type); - auto this_format = static_cast(GetDefaultFormat(this_desc)); - mkldnn::memory::desc data_md(dims, this_dtype, this_format); - mkldnn::memory::primitive_desc pd(data_md, from_pd.get_engine()); - mkldnn_mem_ptr tmp_mem(new mkldnn::memory(pd, def_mem->get_data_handle())); - stream->RegisterMem(tmp_mem); - stream->RegisterPrim(mkldnn::reorder(*tmp_mem, *this_mem)); - } else if (from_pd == this_pd) { - // If the layout is the same, we can just copy data. - stream->RegisterPrim(mkldnn::reorder(mem, *this_mem)); - } else { - // If both are not using the default layouts. There isn't much we can do, - // other than reorder data layout directly. - if (this_def_format != this_desc.data.format - && from_def_format != from_desc.data.format) { - stream->RegisterPrim(mkldnn::reorder(mem, *this_mem)); - } else if (this_def_format == this_desc.data.format) { - // If the dest mem uses the default memory layout, we can simply use - // the default format of the source memory to improve perf of reorder. - mkldnn::memory::primitive_desc pd = GetPrimitiveDesc(from_pd, - from_def_format); - mkldnn_mem_ptr tmp_mem(new mkldnn::memory(pd, this_mem->get_data_handle())); - stream->RegisterMem(tmp_mem); - stream->RegisterPrim(mkldnn::reorder(mem, *tmp_mem)); - } else { - // If the src mem uses the default memory layout, we can use - // the default format of the source memory to improve perf. - mkldnn::memory::primitive_desc pd = GetPrimitiveDesc(this_pd, - this_def_format); - mkldnn_mem_ptr tmp_mem(new mkldnn::memory(pd, mem.get_data_handle())); - stream->RegisterMem(tmp_mem); - stream->RegisterPrim(mkldnn::reorder(*tmp_mem, *this_mem)); - } - } + MKLDNNCopy(mem, this_mem); } mkldnn::memory *NDArray::CreateMKLDNNData(const mkldnn::memory::primitive_desc &desc) { diff --git a/src/operator/nn/mkldnn/mkldnn_base-inl.h b/src/operator/nn/mkldnn/mkldnn_base-inl.h index 6a7c58f29912..c6e7f9bdefdc 100644 --- a/src/operator/nn/mkldnn/mkldnn_base-inl.h +++ b/src/operator/nn/mkldnn/mkldnn_base-inl.h @@ -318,6 +318,7 @@ enum OutDataOp { }; typedef std::pair mkldnn_output_t; +void MKLDNNCopy(const mkldnn::memory &mem, const mkldnn::memory* this_mem); /* * These two functions try to create MKLDNN memory in an NDArray based on `req'. diff --git a/src/operator/nn/mkldnn/mkldnn_base.cc b/src/operator/nn/mkldnn/mkldnn_base.cc index b182aa0b68d4..858f8e3261f2 100644 --- a/src/operator/nn/mkldnn/mkldnn_base.cc +++ b/src/operator/nn/mkldnn/mkldnn_base.cc @@ -77,6 +77,75 @@ mkldnn::memory *TmpMemMgr::Alloc(const mkldnn::memory::primitive_desc &pd) { } } +void MKLDNNCopy(const mkldnn::memory &mem, const mkldnn::memory* this_mem) { + MKLDNNStream *stream = MKLDNNStream::Get(); + + mkldnn::memory::primitive_desc from_pd = mem.get_primitive_desc(); + mkldnn::memory::desc from_desc = from_pd.desc(); + mkldnn::memory::primitive_desc this_pd = this_mem->get_primitive_desc(); + mkldnn::memory::desc this_desc = this_pd.desc(); + mkldnn_memory_format_t from_def_format = GetDefaultFormat(from_desc); + mkldnn_memory_format_t this_def_format = GetDefaultFormat(this_desc); + // It's possible that the memory and the NDArray don't have the same shape. + if (!same_shape(this_desc, from_desc) + // If the source memory uses the default layout, we can reshape directly. + && from_def_format == from_desc.data.format) { + // In this case, we can simply create a new MKLDNN memory for the required + // shape. + mkldnn::memory::dims dims(this_desc.data.dims, + this_desc.data.dims + this_desc.data.ndims); + auto this_dtype = static_cast(this_desc.data.data_type); + auto this_format = static_cast(GetDefaultFormat(this_desc)); + mkldnn::memory::desc data_md(dims, this_dtype, this_format); + mkldnn::memory::primitive_desc pd(data_md, from_pd.get_engine()); + mkldnn_mem_ptr tmp_mem(new mkldnn::memory(pd, mem.get_data_handle())); + stream->RegisterMem(tmp_mem); + stream->RegisterPrim(mkldnn::reorder(*tmp_mem, *this_mem)); + } else if (!same_shape(this_desc, from_desc)) { + // In this case, the source memory stores data in a customized layout. We + // need to reorganize the data in memory before we can reshape. + mkldnn::memory::primitive_desc def_pd = GetPrimitiveDesc(from_pd, from_def_format); + mkldnn::memory *def_mem = TmpMemMgr::Get()->Alloc(def_pd); + stream->RegisterPrim(mkldnn::reorder(mem, *def_mem)); + // Now we can reshape it + mkldnn::memory::dims dims(this_desc.data.dims, + this_desc.data.dims + this_desc.data.ndims); + auto this_dtype = static_cast(this_desc.data.data_type); + auto this_format = static_cast(GetDefaultFormat(this_desc)); + mkldnn::memory::desc data_md(dims, this_dtype, this_format); + mkldnn::memory::primitive_desc pd(data_md, from_pd.get_engine()); + mkldnn_mem_ptr tmp_mem(new mkldnn::memory(pd, def_mem->get_data_handle())); + stream->RegisterMem(tmp_mem); + stream->RegisterPrim(mkldnn::reorder(*tmp_mem, *this_mem)); + } else if (from_pd == this_pd) { + // If the layout is the same, we can just copy data. + stream->RegisterPrim(mkldnn::reorder(mem, *this_mem)); + } else { + // If both are not using the default layouts. There isn't much we can do, + // other than reorder data layout directly. + if (this_def_format != this_desc.data.format + && from_def_format != from_desc.data.format) { + stream->RegisterPrim(mkldnn::reorder(mem, *this_mem)); + } else if (this_def_format == this_desc.data.format) { + // If the dest mem uses the default memory layout, we can simply use + // the default format of the source memory to improve perf of reorder. + mkldnn::memory::primitive_desc pd = GetPrimitiveDesc(from_pd, + from_def_format); + mkldnn_mem_ptr tmp_mem(new mkldnn::memory(pd, this_mem->get_data_handle())); + stream->RegisterMem(tmp_mem); + stream->RegisterPrim(mkldnn::reorder(mem, *tmp_mem)); + } else { + // If the src mem uses the default memory layout, we can use + // the default format of the source memory to improve perf. + mkldnn::memory::primitive_desc pd = GetPrimitiveDesc(this_pd, + this_def_format); + mkldnn_mem_ptr tmp_mem(new mkldnn::memory(pd, mem.get_data_handle())); + stream->RegisterMem(tmp_mem); + stream->RegisterPrim(mkldnn::reorder(*tmp_mem, *this_mem)); + } + } +} + bool CanWriteTo(const NDArray &out_arr, const NDArray &in_arr, const mkldnn::memory::primitive_desc &desc) { @@ -94,22 +163,25 @@ mkldnn_output_t CreateMKLDNNMem(const NDArray &out_arr, if (kAddTo == req) { auto tmp = TmpMemMgr::Get()->Alloc(desc); return mkldnn_output_t(OutDataOp::AddBack, tmp); - } else if (req == kWriteInplace && in_arr != nullptr && CanWriteTo(out_arr, *in_arr, desc)) { + } else if (kWriteInplace == req && in_arr != nullptr && CanWriteTo(out_arr, *in_arr, desc)) { mkldnn::memory *mem = const_cast(out_arr).CreateMKLDNNData(desc); // mem is nullptr if out_arr is view and desc is MKLDNN format. // need to Reorder2Default before calling CreateMKLDNNMem CHECK(mem != nullptr); return mkldnn_output_t(OutDataOp::Noop, mem); - } else if (req == kWriteInplace) { - auto tmp = TmpMemMgr::Get()->Alloc(desc); - return mkldnn_output_t(OutDataOp::CopyBack, tmp); - } - mkldnn::memory *mem = const_cast(out_arr).CreateMKLDNNData(desc); - if (nullptr == mem) { + } else if (kWriteInplace == req) { auto tmp = TmpMemMgr::Get()->Alloc(desc); return mkldnn_output_t(OutDataOp::CopyBack, tmp); + } else if (kWriteTo == req) { + mkldnn::memory *mem = const_cast(out_arr).CreateMKLDNNData(desc); + if (nullptr == mem) { + auto tmp = TmpMemMgr::Get()->Alloc(desc); + return mkldnn_output_t(OutDataOp::CopyBack, tmp); + } + return mkldnn_output_t(OutDataOp::Noop, mem); } - return mkldnn_output_t(OutDataOp::Noop, mem); + auto tmp = TmpMemMgr::Get()->Alloc(desc); + return mkldnn_output_t(OutDataOp::Noop, tmp); } mkldnn_output_t CreateMKLDNNWeightGrad(const NDArray &out_arr, @@ -141,13 +213,16 @@ void CommitOutput(const NDArray &arr, const mkldnn_output_t &res) { if (res.first == CopyBack) { const_cast(arr).CopyFrom(*res.second); } else if (res.first == AddBack) { + auto res_memory = res.second; + auto target_pd = arr.GetMKLDNNData()->get_primitive_desc(); auto mem = arr.GetMKLDNNData(res.second->get_primitive_desc()); - CHECK(mem != nullptr); - // We have to allocate new memory for the sum result. - auto sum_res = TmpMemMgr::Get()->Alloc( - res.second->get_primitive_desc()); - op::MKLDNNSum(*res.second, *mem, *sum_res); - const_cast(arr).CopyFrom(*sum_res); + if (mem == nullptr) { + auto tmp_memory = TmpMemMgr::Get()->Alloc(target_pd); + MKLDNNCopy(*res_memory, tmp_memory); + res_memory = tmp_memory; + mem = arr.GetMKLDNNData(); + } + op::MKLDNNSum(*mem, *res_memory, *mem); } } diff --git a/src/operator/nn/mkldnn/mkldnn_sum.cc b/src/operator/nn/mkldnn/mkldnn_sum.cc index c51e1081d694..dfb0e254c128 100644 --- a/src/operator/nn/mkldnn/mkldnn_sum.cc +++ b/src/operator/nn/mkldnn/mkldnn_sum.cc @@ -38,10 +38,22 @@ void MKLDNNSum(const mkldnn::memory &arr1, const mkldnn::memory &arr2, std::vector inputs; input_pds[0] = arr1.get_primitive_desc(); input_pds[1] = arr2.get_primitive_desc(); - CHECK(input_pds[0] == input_pds[1]); - inputs.push_back(arr1); - inputs.push_back(arr2); - // TODO(zhengda) I need to reorder memory here. + CHECK(input_pds[0] == input_pds[0]); + const mkldnn::memory *in_mem1 = &arr1; + const mkldnn::memory *in_mem2 = &arr2; + auto output_pd = out.get_primitive_desc(); + if (input_pds[0] != output_pd) { + auto tmp_memory1 = TmpMemMgr::Get()->Alloc(output_pd); + auto tmp_memory2 = TmpMemMgr::Get()->Alloc(output_pd); + mxnet::MKLDNNCopy(arr1, tmp_memory1); + mxnet::MKLDNNCopy(arr2, tmp_memory2); + input_pds[0] = tmp_memory1->get_primitive_desc(); + input_pds[1] = tmp_memory2->get_primitive_desc(); + in_mem1 = tmp_memory1; + in_mem2 = tmp_memory2; + } + inputs.push_back(*in_mem1); + inputs.push_back(*in_mem2); mkldnn::sum::primitive_desc sum_pd(scales, input_pds); MKLDNNStream::Get()->RegisterPrim(mkldnn::sum(sum_pd, inputs, out)); } diff --git a/tests/cpp/operator/mkldnn.cc b/tests/cpp/operator/mkldnn.cc index 655435193851..e593d00a0de4 100644 --- a/tests/cpp/operator/mkldnn.cc +++ b/tests/cpp/operator/mkldnn.cc @@ -574,6 +574,7 @@ std::vector GetTestOutputArrays(const TShape &shape, continue; // Type 2, 3. + arr = NDArray(shape, Context()); desc = "MKLDNN NDArray"; if (shape.ndim() != pd.desc().data.ndims) { @@ -688,6 +689,15 @@ void PrintVerifyMsg(const NDArrayAttrs &arr1, const NDArrayAttrs &arr2) { printf(")\n"); } +void VerifyAddRequest(const std::vector &in_arrs, + const std::vector &original_outputs, + const std::vector &new_outputs, + VerifyFunc verify_fn) { + NDArray tmp = new_outputs[0]->Reorder2Default() - original_outputs[0]->Reorder2Default(); + tmp.WaitToRead(); + verify_fn(in_arrs, {&tmp}); +} + TEST(MKLDNN_NDArray, CopyFrom) { TestArrayShapes tas = GetTestArrayShapes(); std::vector pds = tas.pds; @@ -803,25 +813,34 @@ TEST(MKLDNN_BASE, MKLDNNSum) { for (int i = 0; i < in_arrs.size(); i++) { auto in_arr = in_arrs[i]; auto in_arr2 = in_arrs2[i]; - std::vector out_arrs = GetTestOutputArrays(in_arr.arr.shape(), pds); - if (!SupportMKLDNN(in_arr.arr) || !in_arr.arr.IsMKLDNNData() || in_arr.arr.IsView()) + if (!SupportMKLDNN(in_arr.arr)) continue; - + if (in_arr.arr.IsMKLDNNData() && in_arr.arr.IsView()) { + continue; + } + std::vector out_arrs = GetTestOutputArrays(in_arr.arr.shape(), pds); for (auto out_arr : out_arrs) { auto in_mem1 = in_arr.arr.GetMKLDNNData(); auto in_mem2 = in_arr2.arr.GetMKLDNNData(); - auto out_mem = out_arr.arr.GetMKLDNNData(in_mem1->get_primitive_desc()); - - // TODO(alexzai) : remove this noop when by reordering in MKLDNNSum - if (out_mem == nullptr) + if (out_arr.arr.IsView()) continue; + auto out_mem = out_arr.arr.GetMKLDNNData(); PrintVerifyMsg(in_arr, in_arr); op::MKLDNNSum(*in_mem1, *in_mem2, *out_mem); MKLDNNStream::Get()->Submit(); VerifySumResult({&in_arr.arr, &in_arr2.arr}, {&out_arr.arr}); } + } - // in place + // in place + for (int i = 0; i < in_arrs.size(); i++) { + auto in_arr = in_arrs[i]; + auto in_arr2 = in_arrs2[i]; + if (!SupportMKLDNN(in_arr.arr)) + continue; + if (in_arr.arr.IsMKLDNNData() && in_arr.arr.IsView()) { + continue; + } auto input_mem = in_arr.arr.GetMKLDNNData(); auto input_mem2 = in_arr2.arr.GetMKLDNNData(); NDArrayAttrs orig_arr(in_arr.arr.Copy(in_arr.arr.ctx()), "In Place Copy"); @@ -834,4 +853,111 @@ TEST(MKLDNN_BASE, MKLDNNSum) { } } +TEST(MKLDNN_BASE, CreateMKLDNNMem) { + std::vector in_arrs = GetTestInputArrays(); + std::vector in_arrs2 = GetTestInputArrays(true); + TestArrayShapes tas = GetTestArrayShapes(); + std::vector pds = tas.pds; + MKLDNNStream *stream = MKLDNNStream::Get(); + + // kWriteTo + for (int i = 0; i < in_arrs.size(); i++) { + auto in_arr = in_arrs[i]; + auto in_arr2 = in_arrs2[i]; + if (!SupportMKLDNN(in_arr.arr)) + continue; + if (in_arr.arr.IsMKLDNNData() && in_arr.arr.IsView()) { + continue; + } + std::vector out_arrs = GetTestOutputArrays(in_arr.arr.shape(), pds); + for (auto out_arr : out_arrs) { + auto in_mem = in_arr.arr.GetMKLDNNData(); + auto in_mem2 = in_arr2.arr.GetMKLDNNData(); + NDArray orig_output = out_arr.arr.Copy(out_arr.arr.ctx()); + orig_output.WaitToRead(); + PrintVerifyMsg(in_arr, out_arr); + auto out_mem = out_arr.arr.GetMKLDNNData(); + auto output_mem_t = CreateMKLDNNMem(out_arr.arr, out_mem->get_primitive_desc(), kWriteTo); + op::MKLDNNSum(*in_mem, *in_mem2, *output_mem_t.second); + CommitOutput(out_arr.arr, output_mem_t); + stream->Submit(); + VerifySumResult({&in_arr.arr, &in_arr2.arr}, {&out_arr.arr}); + } + } + + // kWriteInPlace + for (int i = 0; i < in_arrs.size(); i++) { + auto in_arr = in_arrs[i]; + auto in_arr2 = in_arrs2[i]; + if (!SupportMKLDNN(in_arr.arr)) + continue; + if (in_arr.arr.IsMKLDNNData() && in_arr.arr.IsView()) { + continue; + } + auto input_mem = in_arr.arr.GetMKLDNNData(); + auto input_mem2 = in_arr2.arr.GetMKLDNNData(); + NDArrayAttrs orig_arr(in_arr.arr.Copy(in_arr.arr.ctx()), "In Place Copy"); + orig_arr.arr.WaitToRead(); + PrintVerifyMsg(orig_arr, in_arr); + InitMKLDNNArray(&orig_arr.arr, input_mem->get_primitive_desc()); + orig_arr.arr.CopyFrom(*input_mem); + auto output_mem_t = CreateMKLDNNMem(in_arr.arr, + input_mem->get_primitive_desc(), kWriteInplace, &in_arr.arr); + op::MKLDNNSum(*input_mem, *input_mem2, *output_mem_t.second); + CommitOutput(in_arr.arr, output_mem_t); + stream->Submit(); + VerifySumResult({&orig_arr.arr, &in_arr2.arr}, {&in_arr.arr}); + } + + // kAddTo + for (int i = 0; i < in_arrs.size(); i++) { + auto in_arr = in_arrs[i]; + auto in_arr2 = in_arrs2[i]; + if (!SupportMKLDNN(in_arr.arr)) + continue; + if (in_arr.arr.IsMKLDNNData() && in_arr.arr.IsView()) { + continue; + } + std::vector out_arrs = GetTestOutputArrays(in_arr.arr.shape(), pds); + for (auto out_arr : out_arrs) { + auto in_mem = in_arr.arr.GetMKLDNNData(); + auto in_mem2 = in_arr2.arr.GetMKLDNNData(); + NDArray orig_output = out_arr.arr.Copy(out_arr.arr.ctx()); + orig_output.WaitToRead(); + PrintVerifyMsg(in_arr, out_arr); + auto out_mem = out_arr.arr.GetMKLDNNData(); + auto output_mem_t = CreateMKLDNNMem(out_arr.arr, out_mem->get_primitive_desc(), kAddTo); + op::MKLDNNSum(*in_mem, *in_mem2, *output_mem_t.second); + CommitOutput(out_arr.arr, output_mem_t); + stream->Submit(); + VerifyAddRequest( + {&in_arr.arr, &in_arr2.arr}, {&orig_output}, {&out_arr.arr}, VerifySumResult); + } + } + + // kNullOp + for (int i = 0; i < in_arrs.size(); i++) { + auto in_arr = in_arrs[i]; + auto in_arr2 = in_arrs2[i]; + if (!SupportMKLDNN(in_arr.arr)) + continue; + if (in_arr.arr.IsMKLDNNData() && in_arr.arr.IsView()) { + continue; + } + auto input_mem = in_arr.arr.GetMKLDNNData(); + auto input_mem2 = in_arr2.arr.GetMKLDNNData(); + NDArrayAttrs orig_arr(in_arr.arr.Copy(in_arr.arr.ctx()), "In Place Copy"); + orig_arr.arr.WaitToRead(); + PrintVerifyMsg(orig_arr, in_arr); + InitMKLDNNArray(&orig_arr.arr, input_mem->get_primitive_desc()); + orig_arr.arr.CopyFrom(*input_mem); + auto output_mem_t = CreateMKLDNNMem(in_arr.arr, input_mem->get_primitive_desc(), kNullOp); + op::MKLDNNSum(*input_mem, *input_mem2, *output_mem_t.second); + CommitOutput(in_arr.arr, output_mem_t); + stream->Submit(); + // original and input should be the same since noop + VerifyCopyResult({&orig_arr.arr}, {&in_arr.arr}); + } +} + #endif