From fbb40b8f62f8cf5f7662436250c51997e601537f Mon Sep 17 00:00:00 2001 From: "Li, Tingqian" Date: Tue, 14 Sep 2021 12:01:09 +0800 Subject: [PATCH] [Transformation] SpaceToDepthFusion Transform StridedSlice_chain+concat in yolov5 into SpaceToDepth Signed-off-by: Li, Tingqian --- .../space_to_depth_fusion.hpp | 4 +- .../space_to_depth_fusion.cpp | 254 +++++++-------- .../space_to_depth_fusion_test.cpp | 302 ++++++++++++++---- 3 files changed, 356 insertions(+), 204 deletions(-) diff --git a/inference-engine/src/transformations/include/transformations/common_optimizations/space_to_depth_fusion.hpp b/inference-engine/src/transformations/include/transformations/common_optimizations/space_to_depth_fusion.hpp index a042a3e778edb1..b3008ee0cb75be 100644 --- a/inference-engine/src/transformations/include/transformations/common_optimizations/space_to_depth_fusion.hpp +++ b/inference-engine/src/transformations/include/transformations/common_optimizations/space_to_depth_fusion.hpp @@ -28,10 +28,8 @@ class TRANSFORMATIONS_API SpaceToDepthFusion; * +---> StridedSlice -> StridedSlice ----+ * +---> StridedSlice -> StridedSlice ----+ * - * to SpaceToDepth + * with SpaceToDepth when applicable. * - * Restrictions: - * - input rank must be 4 */ class ngraph::pass::SpaceToDepthFusion: public ngraph::pass::MatcherPass { diff --git a/inference-engine/src/transformations/src/transformations/common_optimizations/space_to_depth_fusion.cpp b/inference-engine/src/transformations/src/transformations/common_optimizations/space_to_depth_fusion.cpp index 5a789fea03051f..b18444214d1e3a 100644 --- a/inference-engine/src/transformations/src/transformations/common_optimizations/space_to_depth_fusion.cpp +++ b/inference-engine/src/transformations/src/transformations/common_optimizations/space_to_depth_fusion.cpp @@ -6,7 +6,7 @@ #include #include -#include +#include #include #include #include @@ -18,25 +18,79 @@ NGRAPH_RTTI_DEFINITION(ngraph::pass::SpaceToDepthFusion, "SpaceToDepthFusion", 0 using namespace ngraph; -const auto end_max = std::numeric_limits::max(); +static const auto end_max = std::numeric_limits::max(); -struct SliceSyntax { +struct SliceSemantics { std::vector begin; std::vector end; std::vector stride; + bool b_valid = false; - SliceSyntax() = default; + SliceSemantics() = default; + + SliceSemantics(std::shared_ptr ss) : b_valid(false) { + Shape in_shape_max; + + const auto& new_axis_mask = ss->get_new_axis_mask(); + const auto& shrink_axis_mask = ss->get_shrink_axis_mask(); + const auto& ellipsis_mask = ss->get_ellipsis_mask(); + + // no new, deleted or ellipsis axis is allowed + if (std::find(new_axis_mask.begin(), new_axis_mask.end(), 1) != new_axis_mask.end() || + std::find(shrink_axis_mask.begin(), shrink_axis_mask.end(), 1) != shrink_axis_mask.end() || + std::find(ellipsis_mask.begin(), ellipsis_mask.end(), 1) != ellipsis_mask.end()) + return; + + auto get_masked_input = [&](int input_id, std::vector mask, int64_t masked_value) { + std::vector ret; + auto input = + std::dynamic_pointer_cast(ss->input_value(input_id).get_node_shared_ptr()); + if (!input) + return ret; + + ret = input->cast_vector(); + + for (size_t k = 0; k < mask.size(); k++) { + if (mask[k] == 1) + ret[k] = masked_value; + } + return ret; + }; + + begin = get_masked_input(1, ss->get_begin_mask(), 0); + end = get_masked_input(2, ss->get_end_mask(), end_max); + + const auto& pshape = ss->input_value(0).get_partial_shape(); + if (pshape.is_static()) { + // use end_max to indicate the selection of whole range + const auto static_shape = pshape.get_shape(); + for (size_t k = 0; k < static_shape.size() && k < end.size(); k++) { + if (end[k] >= static_cast(static_shape[k])) + end[k] = end_max; + } + } + + stride.resize(begin.size(), 1); + if (ss->get_input_size() >= 4) { + auto input = std::dynamic_pointer_cast(ss->input_value(3).get_node_shared_ptr()); + if (input) + stride = input->cast_vector(); + } + b_valid = true; + } operator bool() const { - return begin.size() > 0 && end.size() > 0 && stride.size() > 0; + return b_valid; } /* - A -> StridedSlice1 -> B -> StridedSlice2 -> C - <=> - A -> StridedSlice3 -> C + Fusion of two concecutive StridedSlices can be done on some condition: + + A -> StridedSlice1 -> B -> StridedSlice2 -> C + <=> + A -> StridedSlice3 -> C - for 1 particular dimension + for 1 particular dimension: StridedSlice1 (b1,e1,s1): B[i]=A[i*s1+b1] for i*s1+b1begin[i] = new_begin; this->end[i] = new_end; } - } -}; - -static SliceSyntax get_syntax(std::shared_ptr ss) { - SliceSyntax s; - int rank; - Shape in_shape_max; - - rank = ss->input_value(0).get_partial_shape().rank().get_length(); - - if (ss->input_value(0).get_partial_shape().is_static()) { - in_shape_max = ss->input_value(0).get_shape(); - } else { - in_shape_max = Shape(rank, end_max); - } - - const auto& new_axis_mask = ss->get_new_axis_mask(); - const auto& shrink_axis_mask = ss->get_shrink_axis_mask(); - const auto& ellipsis_mask = ss->get_ellipsis_mask(); - // no new, deleted or ellipsis axis is allowed - for (auto& v : new_axis_mask) { - if (v == 1) - return s; + b_valid = true; } - for (auto& v : shrink_axis_mask) { - if (v == 1) - return s; - } - for (auto& v : ellipsis_mask) { - if (v == 1) - return s; - } - - auto get_masked_input = [&](int input_id, std::vector mask, int64_t masked_value) { - std::vector ret; - auto input = - std::dynamic_pointer_cast(ss->input_value(input_id).get_node_shared_ptr()); - if (!input) - return ret; - - ret = input->cast_vector(); - - for (size_t k = 0; k < mask.size(); k++) { - if (mask[k] == 1) - ret[k] = masked_value; - } - return ret; - }; - - s.begin = get_masked_input(1, ss->get_begin_mask(), 0); - s.end = get_masked_input(2, ss->get_end_mask(), end_max); - for (size_t k = 0; k < in_shape_max.size(); k++) { - if (s.end[k] >= static_cast(in_shape_max[k])) - s.end[k] = end_max; - } - - s.stride.resize(s.begin.size(), 1); - if (ss->get_input_size() >= 4) { - auto input = std::dynamic_pointer_cast(ss->input_value(3).get_node_shared_ptr()); - if (input) - s.stride = input->cast_vector(); - } - - return s; -} +}; ngraph::pass::SpaceToDepthFusion::SpaceToDepthFusion() { MATCHER_SCOPE(SpaceToDepthFusion); - const char* env_p = ::getenv("CROSS_CHECK_TOOL"); - const int cross_check_tool = env_p ? std::stol(env_p) : -1; - - if (cross_check_tool == 0) { - printf("[%s]: cross_check_tool=%d, skipping.\n", __func__, cross_check_tool); - return; - } else { - printf("[%s]: cross_check_tool=%d, enabled.\n", __func__, cross_check_tool); - } - - auto concat_pattern = pattern::wrap_type({}, [](const Output& value) { - auto concat = std::dynamic_pointer_cast(value.get_node_shared_ptr()); + auto concat_pattern = pattern::wrap_type({}, [](const Output& value) { + auto concat = std::dynamic_pointer_cast(value.get_node_shared_ptr()); if (!concat) return false; return concat->get_axis() == 1; }); ngraph::matcher_pass_callback callback = [=](ngraph::pattern::Matcher& m) { - const auto& pattern_map = m.get_pattern_value_map(); - auto concat = std::dynamic_pointer_cast(pattern_map.at(concat_pattern).get_node_shared_ptr()); + auto concat = std::dynamic_pointer_cast(m.get_match_root()); if (!concat) return false; @@ -175,22 +156,25 @@ ngraph::pass::SpaceToDepthFusion::SpaceToDepthFusion() { Output common_input; for (int i = 0; i < slice_cnt; i++) { - SliceSyntax slice_syntax; + SliceSemantics slice_semantics; auto input = concat->get_input_source_output(i); - auto ss = std::dynamic_pointer_cast(input.get_node_shared_ptr()); + auto ss = std::dynamic_pointer_cast(input.get_node_shared_ptr()); while (ss) { nodes_to_delete.push_back(ss); - auto syntax = get_syntax(ss); - if (!syntax) + SliceSemantics semantics(ss); + if (!semantics) return false; - slice_syntax.fuse_with(syntax); + slice_semantics.fuse_with(semantics); input = ss->input_value(0); - ss = std::dynamic_pointer_cast(input.get_node_shared_ptr()); + ss = std::dynamic_pointer_cast(input.get_node_shared_ptr()); } + if (!slice_semantics) + return false; + // all path concated must originates from same input if (!common_input.get_node_shared_ptr()) common_input = input; @@ -199,24 +183,28 @@ ngraph::pass::SpaceToDepthFusion::SpaceToDepthFusion() { return false; if (rank == 0) - rank = slice_syntax.stride.size(); + rank = slice_semantics.stride.size(); if (rank == 0) return false; - if (static_cast(slice_syntax.stride.size()) != rank) + if (static_cast(slice_semantics.stride.size()) != rank) return false; // [N, C, D1, D2, ...] for (size_t k = 0; k < 2; k++) { - if (slice_syntax.stride[k] != 1 || slice_syntax.begin[k] != 0 || slice_syntax.end[k] < end_max) + if (slice_semantics.stride[k] != 1 || slice_semantics.begin[k] != 0 || slice_semantics.end[k] < end_max) return false; } - // check block size consistency + // do: + // - block size consistency check + // - slice count consistency check + // - begin/stride/end validation + // - slice order calculation for (int k = 2; k < rank; k++) { if (block_size == 0) { - block_size = slice_syntax.stride[k]; + block_size = slice_semantics.stride[k]; if (block_size < 2) return false; @@ -227,79 +215,69 @@ ngraph::pass::SpaceToDepthFusion::SpaceToDepthFusion() { if (slice_expected != slice_cnt) return false; } - if (slice_syntax.stride[k] != block_size) + if (slice_semantics.begin[k] >= block_size) return false; - if (slice_syntax.end[k] < end_max) + if (slice_semantics.stride[k] != block_size) + return false; + if (slice_semantics.end[k] < end_max) return false; - slice_order[i] = slice_order[i] * block_size + slice_syntax.begin[k]; + slice_order[i] = slice_order[i] * block_size + slice_semantics.begin[k]; } if (slice_order[i] != i) is_ordered = false; - if (slice_order[i] >= slice_cnt) { - printf("ERROR slice_order[i]=%d\n", slice_order[i]); - return false; - } slice_from_order[slice_order[i]] = i; } if (is_ordered) { std::shared_ptr new_root = - register_new_node(common_input, - opset7::SpaceToDepth::SpaceToDepthMode::BLOCKS_FIRST, - block_size); + std::make_shared(common_input, + opset8::SpaceToDepth::SpaceToDepthMode::BLOCKS_FIRST, + block_size); new_root->set_friendly_name(concat->get_friendly_name()); copy_runtime_info(nodes_to_delete, new_root); replace_node(m.get_match_root(), new_root); } else { - // if output is connected to a Convolution node, channel re-order can be further fused - // into weights - bool b_further_opt = true; + // if output is connected to Convolution nodes only, channel + // re-order can be further fused into weights for (auto input_to : concat->get_default_output().get_target_inputs()) { - auto conv = std::dynamic_pointer_cast(input_to.get_node()->shared_from_this()); - if (!conv) { - b_further_opt = false; - break; - } - auto filters = std::dynamic_pointer_cast(conv->get_input_node_shared_ptr(1)); - if (!filters) { - b_further_opt = false; - break; - } - } + auto conv = std::dynamic_pointer_cast(input_to.get_node()->shared_from_this()); + if (!conv) + return false; - if (!b_further_opt) - return false; + auto filters = std::dynamic_pointer_cast(conv->get_input_node_shared_ptr(1)); + if (!filters) + return false; + } std::shared_ptr new_root = - register_new_node(common_input, - opset7::SpaceToDepth::SpaceToDepthMode::BLOCKS_FIRST, - block_size); + std::make_shared(common_input, + opset8::SpaceToDepth::SpaceToDepthMode::BLOCKS_FIRST, + block_size); new_root->set_friendly_name(concat->get_friendly_name()); copy_runtime_info(nodes_to_delete, new_root); - // add slplit & concat to Convolution's weights, const-folding will eliminate them later + // add slplit & concat to reorder the channels of Convolution's weights, + // later constant-folding pass will eliminate them. for (auto input_to : concat->get_default_output().get_target_inputs()) { - auto conv = std::dynamic_pointer_cast(input_to.get_node()->shared_from_this()); - auto filters = std::dynamic_pointer_cast(conv->get_input_node_shared_ptr(1)); + auto conv = std::dynamic_pointer_cast(input_to.get_node()->shared_from_this()); + auto filters = std::dynamic_pointer_cast(conv->get_input_node_shared_ptr(1)); - // filters are ordered by slice-order, now re-order them - auto axis = register_new_node(element::i32, Shape{}, std::vector{1}); - auto split = register_new_node(filters, axis, slice_cnt); + auto axis = std::make_shared(element::i32, Shape{}, std::vector{1}); + auto split = std::make_shared(filters, axis, slice_cnt); OutputVector reorder; for (int i = 0; i < slice_cnt; i++) reorder.push_back(split->output(slice_from_order[i])); - auto new_filter = register_new_node(reorder, 1); - replace_node(filters, new_filter); - } + auto new_filter = std::make_shared(reorder, 1); + conv->set_argument(1, new_filter->get_default_output()); + } replace_node(m.get_match_root(), new_root); } - return true; }; diff --git a/inference-engine/tests/functional/inference_engine/transformations/space_to_depth_fusion_test.cpp b/inference-engine/tests/functional/inference_engine/transformations/space_to_depth_fusion_test.cpp index ad670d6d2088a7..f68327f198c150 100644 --- a/inference-engine/tests/functional/inference_engine/transformations/space_to_depth_fusion_test.cpp +++ b/inference-engine/tests/functional/inference_engine/transformations/space_to_depth_fusion_test.cpp @@ -4,14 +4,15 @@ #include +#include #include -#include -#include - #include -#include +#include #include #include +#include +#include +#include #include #include #include @@ -21,66 +22,241 @@ using namespace testing; using namespace ngraph; -std::shared_ptr create_ss(const Output &data_node, - size_t ndims, int stride, - int axis, int begin) { - std::vector begin_c(ndims, 0); - std::vector end_c(ndims, 0); - std::vector stride_c(ndims, 1); - begin_c[axis] = begin; - stride_c[axis] = stride; - auto begin_node = opset6::Constant::create(ngraph::element::i64, - ngraph::Shape{ndims}, begin_c); - auto end_node = opset6::Constant::create(ngraph::element::i64, - ngraph::Shape{ndims}, end_c); - auto stride_node = opset6::Constant::create(ngraph::element::i64, - ngraph::Shape{ndims}, stride_c); - std::vector begin_mask(ndims, 0); - std::vector end_mask(ndims, 1); - auto ss = std::make_shared( - data_node, begin_node, end_node, stride_node, begin_mask, end_mask); - return ss; +static const auto end_max = std::numeric_limits::max(); + +static std::shared_ptr create_ss(const Output& data_node, + size_t ndims, + int axis, + int begin, + int stride) { + std::vector begin_c(ndims, 0); + std::vector end_c(ndims, 0); + std::vector stride_c(ndims, 1); + begin_c[axis] = begin; + stride_c[axis] = stride; + auto begin_node = opset8::Constant::create(ngraph::element::i64, ngraph::Shape{ndims}, begin_c); + auto end_node = opset8::Constant::create(ngraph::element::i64, ngraph::Shape{ndims}, end_c); + auto stride_node = opset8::Constant::create(ngraph::element::i64, ngraph::Shape{ndims}, stride_c); + std::vector begin_mask(ndims, 0); + std::vector end_mask(ndims, 1); + auto ss = + std::make_shared(data_node, begin_node, end_node, stride_node, begin_mask, end_mask); + return ss; +} + +struct coordinate : std::vector { + using base = std::vector; + int radix; + coordinate(int ndims, int radix) : base(ndims, 0), radix(radix) {} + coordinate& operator++() { + int ndims = size(); + for (int k = ndims - 1; k >= 0; k--) { + (*this)[k]++; + if ((*this)[k] < radix) + break; + (*this)[k] = 0; + } + return *this; + } +}; + +static std::shared_ptr build_ss_chain(const Output& in, + int block_size, + const std::vector& shuffle = {}) { + auto shape = in.get_shape(); + + OutputVector ss_outputs; + + coordinate begin(shape.size(), block_size); + + do { + std::shared_ptr node = in.get_node_shared_ptr(); + for (int k = 2; k < shape.size(); k++) + node = create_ss(node, k + 1, k, begin[k], block_size); + + ss_outputs.push_back(node); + + ++begin; + } while (begin[1] == 0); + + if (shuffle.size()) { + OutputVector after_shuffle; + + for (int i = 0; i < ss_outputs.size(); i++) { + auto id = shuffle[i % shuffle.size()]; + after_shuffle.push_back(ss_outputs[id]); + } + + ss_outputs = after_shuffle; + } + + return std::make_shared(ss_outputs, 1); } TEST(TransformationTests, SpaceToDepthFusionFromStridedSlice2x2) { - std::shared_ptr f(nullptr), f_ref(nullptr); - { - auto in = std::make_shared(element::f32, - Shape{1, 3, 640, 640}); - auto ss_chain = [&](int begin_dim2, int begin_dim3) { - auto s0 = create_ss(in, 4, 2, 2, begin_dim2); - auto s1 = create_ss(s0, 4, 2, 3, begin_dim3); - return s1; - }; - - auto a = ss_chain(0, 0); - auto b = ss_chain(0, 1); - auto c = ss_chain(1, 0); - auto d = ss_chain(1, 1); - - auto out = std::make_shared(OutputVector{a, b, c, d}, 1); - - f = std::make_shared(NodeVector{out}, ParameterVector{in}); - - pass::Manager m; - m.register_pass(); - m.register_pass(); - m.run_passes(f); - ASSERT_NO_THROW(check_rt_info(f)); - } - - { - auto data = - std::make_shared(element::f32, Shape{12, 3, 4, 8}); - auto batch_to_space = std::make_shared( - data, op::Constant::create(element::i64, Shape{4}, {1, 1, 2, 2}), - op::Constant::create(element::i64, Shape{4}, {0, 0, 2, 1}), - op::Constant::create(element::i64, Shape{4}, {1, 2, 1, 14})); - - f_ref = std::make_shared(NodeVector{batch_to_space}, - ParameterVector{data}); - } - - auto res = compare_functions(f, f_ref, true); - ASSERT_TRUE(res.first) << res.second; + std::shared_ptr f(nullptr), f_ref(nullptr); + + auto block_size = 2; + { + auto in = std::make_shared(element::f32, Shape{1, 3, 640, 640}); + auto out = build_ss_chain(in, block_size); + f = std::make_shared(NodeVector{out}, ParameterVector{in}); + + pass::Manager m; + m.register_pass(); + m.register_pass(); + m.run_passes(f); + ASSERT_NO_THROW(check_rt_info(f)); + } + + { + auto in = std::make_shared(element::f32, Shape{1, 3, 640, 640}); + auto space_to_depth = + std::make_shared(in, + opset6::SpaceToDepth::SpaceToDepthMode::BLOCKS_FIRST, + block_size); + + f_ref = std::make_shared(NodeVector{space_to_depth}, ParameterVector{in}); + } + + auto res = compare_functions(f, f_ref, true); + ASSERT_TRUE(res.first) << res.second; +} + +TEST(TransformationTests, SpaceToDepthFusionFromStridedSlice2x2_Negative) { + std::shared_ptr f(nullptr), f_ref(nullptr); + + auto block_size = 2; + { + auto in = std::make_shared(element::f32, Shape{1, 3, 640, 640}); + auto out = build_ss_chain(in, block_size, {0, 1, 3, 2}); // shuffled order, so should fail + f = std::make_shared(NodeVector{out}, ParameterVector{in}); + + pass::Manager m; + m.register_pass(); + m.register_pass(); + m.run_passes(f); + ASSERT_NO_THROW(check_rt_info(f)); + } + + { + auto in = std::make_shared(element::f32, Shape{1, 3, 640, 640}); + auto out = build_ss_chain(in, block_size, {0, 1, 3, 2}); // shuffled order, so should fail + f_ref = std::make_shared(NodeVector{out}, ParameterVector{in}); + } + + auto res = compare_functions(f, f_ref, true); + ASSERT_TRUE(res.first) << res.second; +} + +TEST(TransformationTests, SpaceToDepthFusionFromStridedSlice3x3) { + std::shared_ptr f(nullptr), f_ref(nullptr); + auto block_size = 3; + + { + auto in = std::make_shared(element::f32, Shape{1, 3, 120, 120}); + auto out = build_ss_chain(in, block_size); + f = std::make_shared(NodeVector{out}, ParameterVector{in}); + + pass::Manager m; + m.register_pass(); + m.register_pass(); + m.run_passes(f); + ASSERT_NO_THROW(check_rt_info(f)); + } + + { + auto in = std::make_shared(element::f32, Shape{1, 3, 120, 120}); + auto space_to_depth = + std::make_shared(in, + opset6::SpaceToDepth::SpaceToDepthMode::BLOCKS_FIRST, + block_size); + + f_ref = std::make_shared(NodeVector{space_to_depth}, ParameterVector{in}); + } + + auto res = compare_functions(f, f_ref, true); + ASSERT_TRUE(res.first) << res.second; +} + +TEST(TransformationTests, SpaceToDepthFusionFromStridedSlice2x2x2) { + std::shared_ptr f(nullptr), f_ref(nullptr); + + auto block_size = 2; + { + auto in = std::make_shared(element::f32, Shape{1, 3, 640, 640, 640}); + + auto out = build_ss_chain(in, block_size); + + f = std::make_shared(NodeVector{out}, ParameterVector{in}); + + pass::Manager m; + m.register_pass(); + m.register_pass(); + m.run_passes(f); + ASSERT_NO_THROW(check_rt_info(f)); + } + + { + auto in = std::make_shared(element::f32, Shape{1, 3, 640, 640, 640}); + auto space_to_depth = + std::make_shared(in, + opset6::SpaceToDepth::SpaceToDepthMode::BLOCKS_FIRST, + block_size); + + f_ref = std::make_shared(NodeVector{space_to_depth}, ParameterVector{in}); + } + + auto res = compare_functions(f, f_ref, true); + ASSERT_TRUE(res.first) << res.second; +} + +TEST(TransformationTests, SpaceToDepthFusionFromStridedSlice2x2WithConv) { + std::shared_ptr f(nullptr), f_ref(nullptr); + + std::vector weights(10 * 12 * 3 * 3, 0); + + auto block_size = 2; + { + auto in = std::make_shared(element::f32, Shape{1, 3, 640, 640}); + auto ssconcat = build_ss_chain(in, block_size, {0, 1, 3, 2}); + + auto filters = op::Constant::create(element::f32, Shape{10, 12, 3, 3}, weights); + auto out = std::make_shared(ssconcat, + filters, + Strides{1, 1}, + CoordinateDiff{0, 0}, + CoordinateDiff{0, 0}, + Strides{1, 1}); + + f = std::make_shared(NodeVector{out}, ParameterVector{in}); + + pass::Manager m; + m.register_pass(); + m.register_pass(); + m.register_pass(); + m.run_passes(f); + ASSERT_NO_THROW(check_rt_info(f)); + } + + { + auto in = std::make_shared(element::f32, Shape{1, 3, 640, 640}); + auto space_to_depth = + std::make_shared(in, + opset6::SpaceToDepth::SpaceToDepthMode::BLOCKS_FIRST, + block_size); + + auto filters = op::Constant::create(element::f32, Shape{10, 12, 3, 3}, weights); + auto out = std::make_shared(space_to_depth, + filters, + Strides{1, 1}, + CoordinateDiff{0, 0}, + CoordinateDiff{0, 0}, + Strides{1, 1}); + + f_ref = std::make_shared(NodeVector{out}, ParameterVector{in}); + } + + auto res = compare_functions(f, f_ref, true); + ASSERT_TRUE(res.first) << res.second; }