From fbb40b8f62f8cf5f7662436250c51997e601537f Mon Sep 17 00:00:00 2001
From: "Li, Tingqian" <tingqian.li@intel.com>
Date: Tue, 14 Sep 2021 12:01:09 +0800
Subject: [PATCH] [Transformation] SpaceToDepthFusion

Transform StridedSlice_chain+concat in yolov5 into SpaceToDepth

Signed-off-by: Li, Tingqian <tingqian.li@intel.com>
---
 .../space_to_depth_fusion.hpp                 |   4 +-
 .../space_to_depth_fusion.cpp                 | 254 +++++++--------
 .../space_to_depth_fusion_test.cpp            | 302 ++++++++++++++----
 3 files changed, 356 insertions(+), 204 deletions(-)
diff --git a/inference-engine/src/transformations/include/transformations/common_optimizations/space_to_depth_fusion.hpp b/inference-engine/src/transformations/include/transformations/common_optimizations/space_to_depth_fusion.hpp
index a042a3e778edb1..b3008ee0cb75be 100644
--- a/inference-engine/src/transformations/include/transformations/common_optimizations/space_to_depth_fusion.hpp
+++ b/inference-engine/src/transformations/include/transformations/common_optimizations/space_to_depth_fusion.hpp
@@ -28,10 +28,8 @@ class TRANSFORMATIONS_API SpaceToDepthFusion;
  *          +---> StridedSlice -> StridedSlice ----+
  *          +---> StridedSlice -> StridedSlice ----+
  * 
- * to SpaceToDepth
+ * with SpaceToDepth when applicable.
  * 
- * Restrictions:
- * - input rank must be 4
  */
 
 class ngraph::pass::SpaceToDepthFusion: public ngraph::pass::MatcherPass {
diff --git a/inference-engine/src/transformations/src/transformations/common_optimizations/space_to_depth_fusion.cpp b/inference-engine/src/transformations/src/transformations/common_optimizations/space_to_depth_fusion.cpp
index 5a789fea03051f..b18444214d1e3a 100644
--- a/inference-engine/src/transformations/src/transformations/common_optimizations/space_to_depth_fusion.cpp
+++ b/inference-engine/src/transformations/src/transformations/common_optimizations/space_to_depth_fusion.cpp
@@ -6,7 +6,7 @@
 
 #include <limits>
 #include <memory>
-#include <ngraph/opsets/opset7.hpp>
+#include <ngraph/opsets/opset8.hpp>
 #include <ngraph/pattern/op/wrap_type.hpp>
 #include <ngraph/rt_info.hpp>
 #include <numeric>
@@ -18,25 +18,79 @@ NGRAPH_RTTI_DEFINITION(ngraph::pass::SpaceToDepthFusion, "SpaceToDepthFusion", 0
 
 using namespace ngraph;
 
-const auto end_max = std::numeric_limits<int64_t>::max();
+static const auto end_max = std::numeric_limits<int64_t>::max();
 
-struct SliceSyntax {
+struct SliceSemantics {
     std::vector<int64_t> begin;
     std::vector<int64_t> end;
     std::vector<int64_t> stride;
+    bool b_valid = false;
 
-    SliceSyntax() = default;
+    SliceSemantics() = default;
+
+    SliceSemantics(std::shared_ptr<ngraph::opset8::StridedSlice> ss) : b_valid(false) {
+        Shape in_shape_max;
+
+        const auto& new_axis_mask = ss->get_new_axis_mask();
+        const auto& shrink_axis_mask = ss->get_shrink_axis_mask();
+        const auto& ellipsis_mask = ss->get_ellipsis_mask();
+
+        // no new, deleted or ellipsis axis is allowed
+        if (std::find(new_axis_mask.begin(), new_axis_mask.end(), 1) != new_axis_mask.end() ||
+            std::find(shrink_axis_mask.begin(), shrink_axis_mask.end(), 1) != shrink_axis_mask.end() ||
+            std::find(ellipsis_mask.begin(), ellipsis_mask.end(), 1) != ellipsis_mask.end())
+            return;
+
+        auto get_masked_input = [&](int input_id, std::vector<int64_t> mask, int64_t masked_value) {
+            std::vector<int64_t> ret;
+            auto input =
+                std::dynamic_pointer_cast<ngraph::opset8::Constant>(ss->input_value(input_id).get_node_shared_ptr());
+            if (!input)
+                return ret;
+
+            ret = input->cast_vector<int64_t>();
+
+            for (size_t k = 0; k < mask.size(); k++) {
+                if (mask[k] == 1)
+                    ret[k] = masked_value;
+            }
+            return ret;
+        };
+
+        begin = get_masked_input(1, ss->get_begin_mask(), 0);
+        end = get_masked_input(2, ss->get_end_mask(), end_max);
+
+        const auto& pshape = ss->input_value(0).get_partial_shape();
+        if (pshape.is_static()) {
+            // use end_max to indicate the selection of whole range
+            const auto static_shape = pshape.get_shape();
+            for (size_t k = 0; k < static_shape.size() && k < end.size(); k++) {
+                if (end[k] >= static_cast<int64_t>(static_shape[k]))
+                    end[k] = end_max;
+            }
+        }
+
+        stride.resize(begin.size(), 1);
+        if (ss->get_input_size() >= 4) {
+            auto input = std::dynamic_pointer_cast<ngraph::opset8::Constant>(ss->input_value(3).get_node_shared_ptr());
+            if (input)
+                stride = input->cast_vector<int64_t>();
+        }
+        b_valid = true;
+    }
 
     operator bool() const {
-        return begin.size() > 0 && end.size() > 0 && stride.size() > 0;
+        return b_valid;
     }
 
     /*
-    A -> StridedSlice1 -> B -> StridedSlice2 -> C
-        <=>
-    A -> StridedSlice3 -> C
+    Fusion of two concecutive StridedSlices can be done on some condition:
+
+            A -> StridedSlice1 -> B -> StridedSlice2 -> C
+                <=>
+            A -> StridedSlice3 -> C
 
-    for 1 particular dimension
+    for 1 particular dimension:
 
         StridedSlice1 (b1,e1,s1):   B[i]=A[i*s1+b1] for i*s1+b1<e1
         StridedSlice2 (b2,e2,s2):   C[i]=B[i*s2+b2] for i*s2+b2<e2
@@ -49,7 +103,7 @@ struct SliceSyntax {
             b3 = b1 + b2*s1
             e3 = MIN(e1, e2*s1+b1)
     */
-    void fuse_with(const SliceSyntax& s2) {
+    void fuse_with(const SliceSemantics& s2) {
         auto rank = s2.begin.size();
 
         // expand rank to match s2
@@ -71,96 +125,23 @@ struct SliceSyntax {
             this->begin[i] = new_begin;
             this->end[i] = new_end;
         }
-    }
-};
-
-static SliceSyntax get_syntax(std::shared_ptr<ngraph::opset7::StridedSlice> ss) {
-    SliceSyntax s;
-    int rank;
-    Shape in_shape_max;
-
-    rank = ss->input_value(0).get_partial_shape().rank().get_length();
-
-    if (ss->input_value(0).get_partial_shape().is_static()) {
-        in_shape_max = ss->input_value(0).get_shape();
-    } else {
-        in_shape_max = Shape(rank, end_max);
-    }
-
-    const auto& new_axis_mask = ss->get_new_axis_mask();
-    const auto& shrink_axis_mask = ss->get_shrink_axis_mask();
-    const auto& ellipsis_mask = ss->get_ellipsis_mask();
 
-    // no new, deleted or ellipsis axis is allowed
-    for (auto& v : new_axis_mask) {
-        if (v == 1)
-            return s;
+        b_valid = true;
     }
-    for (auto& v : shrink_axis_mask) {
-        if (v == 1)
-            return s;
-    }
-    for (auto& v : ellipsis_mask) {
-        if (v == 1)
-            return s;
-    }
-
-    auto get_masked_input = [&](int input_id, std::vector<int64_t> mask, int64_t masked_value) {
-        std::vector<int64_t> ret;
-        auto input =
-            std::dynamic_pointer_cast<ngraph::opset7::Constant>(ss->input_value(input_id).get_node_shared_ptr());
-        if (!input)
-            return ret;
-
-        ret = input->cast_vector<int64_t>();
-
-        for (size_t k = 0; k < mask.size(); k++) {
-            if (mask[k] == 1)
-                ret[k] = masked_value;
-        }
-        return ret;
-    };
-
-    s.begin = get_masked_input(1, ss->get_begin_mask(), 0);
-    s.end = get_masked_input(2, ss->get_end_mask(), end_max);
-    for (size_t k = 0; k < in_shape_max.size(); k++) {
-        if (s.end[k] >= static_cast<int64_t>(in_shape_max[k]))
-            s.end[k] = end_max;
-    }
-
-    s.stride.resize(s.begin.size(), 1);
-    if (ss->get_input_size() >= 4) {
-        auto input = std::dynamic_pointer_cast<ngraph::opset7::Constant>(ss->input_value(3).get_node_shared_ptr());
-        if (input)
-            s.stride = input->cast_vector<int64_t>();
-    }
-
-    return s;
-}
+};
 
 ngraph::pass::SpaceToDepthFusion::SpaceToDepthFusion() {
     MATCHER_SCOPE(SpaceToDepthFusion);
 
-    const char* env_p = ::getenv("CROSS_CHECK_TOOL");
-    const int cross_check_tool = env_p ? std::stol(env_p) : -1;
-
-    if (cross_check_tool == 0) {
-        printf("[%s]: cross_check_tool=%d, skipping.\n", __func__, cross_check_tool);
-        return;
-    } else {
-        printf("[%s]: cross_check_tool=%d, enabled.\n", __func__, cross_check_tool);
-    }
-
-    auto concat_pattern = pattern::wrap_type<opset7::Concat>({}, [](const Output<Node>& value) {
-        auto concat = std::dynamic_pointer_cast<opset7::Concat>(value.get_node_shared_ptr());
+    auto concat_pattern = pattern::wrap_type<opset8::Concat>({}, [](const Output<Node>& value) {
+        auto concat = std::dynamic_pointer_cast<opset8::Concat>(value.get_node_shared_ptr());
         if (!concat)
             return false;
         return concat->get_axis() == 1;
     });
 
     ngraph::matcher_pass_callback callback = [=](ngraph::pattern::Matcher& m) {
-        const auto& pattern_map = m.get_pattern_value_map();
-        auto concat = std::dynamic_pointer_cast<opset7::Concat>(pattern_map.at(concat_pattern).get_node_shared_ptr());
+        auto concat = std::dynamic_pointer_cast<opset8::Concat>(m.get_match_root());
         if (!concat)
             return false;
 
@@ -175,22 +156,25 @@ ngraph::pass::SpaceToDepthFusion::SpaceToDepthFusion() {
         Output<Node> common_input;
 
         for (int i = 0; i < slice_cnt; i++) {
-            SliceSyntax slice_syntax;
+            SliceSemantics slice_semantics;
             auto input = concat->get_input_source_output(i);
-            auto ss = std::dynamic_pointer_cast<opset7::StridedSlice>(input.get_node_shared_ptr());
+            auto ss = std::dynamic_pointer_cast<opset8::StridedSlice>(input.get_node_shared_ptr());
             while (ss) {
                 nodes_to_delete.push_back(ss);
 
-                auto syntax = get_syntax(ss);
-                if (!syntax)
+                SliceSemantics semantics(ss);
+                if (!semantics)
                     return false;
 
-                slice_syntax.fuse_with(syntax);
+                slice_semantics.fuse_with(semantics);
                 input = ss->input_value(0);
 
-                ss = std::dynamic_pointer_cast<opset7::StridedSlice>(input.get_node_shared_ptr());
+                ss = std::dynamic_pointer_cast<opset8::StridedSlice>(input.get_node_shared_ptr());
             }
 
+            if (!slice_semantics)
+                return false;
+
             // all path concated must originates from same input
             if (!common_input.get_node_shared_ptr())
                 common_input = input;
@@ -199,24 +183,28 @@ ngraph::pass::SpaceToDepthFusion::SpaceToDepthFusion() {
                 return false;
 
             if (rank == 0)
-                rank = slice_syntax.stride.size();
+                rank = slice_semantics.stride.size();
 
             if (rank == 0)
                 return false;
 
-            if (static_cast<int>(slice_syntax.stride.size()) != rank)
+            if (static_cast<int>(slice_semantics.stride.size()) != rank)
                 return false;
 
             // [N, C, D1, D2, ...]
             for (size_t k = 0; k < 2; k++) {
-                if (slice_syntax.stride[k] != 1 || slice_syntax.begin[k] != 0 || slice_syntax.end[k] < end_max)
+                if (slice_semantics.stride[k] != 1 || slice_semantics.begin[k] != 0 || slice_semantics.end[k] < end_max)
                     return false;
             }
 
-            // check block size consistency
+            // do:
+            //   - block size consistency check
+            //   - slice count consistency check
+            //   - begin/stride/end validation
+            //   - slice order calculation
             for (int k = 2; k < rank; k++) {
                 if (block_size == 0) {
-                    block_size = slice_syntax.stride[k];
+                    block_size = slice_semantics.stride[k];
                     if (block_size < 2)
                         return false;
 
@@ -227,79 +215,69 @@ ngraph::pass::SpaceToDepthFusion::SpaceToDepthFusion() {
                     if (slice_expected != slice_cnt)
                         return false;
                 }
-                if (slice_syntax.stride[k] != block_size)
+                if (slice_semantics.begin[k] >= block_size)
                     return false;
-                if (slice_syntax.end[k] < end_max)
+                if (slice_semantics.stride[k] != block_size)
+                    return false;
+                if (slice_semantics.end[k] < end_max)
                     return false;
 
-                slice_order[i] = slice_order[i] * block_size + slice_syntax.begin[k];
+                slice_order[i] = slice_order[i] * block_size + slice_semantics.begin[k];
             }
 
             if (slice_order[i] != i)
                 is_ordered = false;
 
-            if (slice_order[i] >= slice_cnt) {
-                printf("ERROR slice_order[i]=%d\n", slice_order[i]);
-                return false;
-            }
             slice_from_order[slice_order[i]] = i;
         }
 
         if (is_ordered) {
             std::shared_ptr<Node> new_root =
-                register_new_node<opset7::SpaceToDepth>(common_input,
-                                                        opset7::SpaceToDepth::SpaceToDepthMode::BLOCKS_FIRST,
-                                                        block_size);
+                std::make_shared<opset8::SpaceToDepth>(common_input,
+                                                       opset8::SpaceToDepth::SpaceToDepthMode::BLOCKS_FIRST,
+                                                       block_size);
 
             new_root->set_friendly_name(concat->get_friendly_name());
             copy_runtime_info(nodes_to_delete, new_root);
             replace_node(m.get_match_root(), new_root);
         } else {
-            // if output is connected to a Convolution node, channel re-order can be further fused
-            // into weights
-            bool b_further_opt = true;
+            // if output is connected to Convolution nodes only, channel
+            // re-order can be further fused into weights
             for (auto input_to : concat->get_default_output().get_target_inputs()) {
-                auto conv = std::dynamic_pointer_cast<opset7::Convolution>(input_to.get_node()->shared_from_this());
-                if (!conv) {
-                    b_further_opt = false;
-                    break;
-                }
-                auto filters = std::dynamic_pointer_cast<opset7::Constant>(conv->get_input_node_shared_ptr(1));
-                if (!filters) {
-                    b_further_opt = false;
-                    break;
-                }
-            }
+                auto conv = std::dynamic_pointer_cast<opset8::Convolution>(input_to.get_node()->shared_from_this());
+                if (!conv)
+                    return false;
 
-            if (!b_further_opt)
-                return false;
+                auto filters = std::dynamic_pointer_cast<opset8::Constant>(conv->get_input_node_shared_ptr(1));
+                if (!filters)
+                    return false;
+            }
 
             std::shared_ptr<Node> new_root =
-                register_new_node<opset7::SpaceToDepth>(common_input,
-                                                        opset7::SpaceToDepth::SpaceToDepthMode::BLOCKS_FIRST,
-                                                        block_size);
+                std::make_shared<opset8::SpaceToDepth>(common_input,
+                                                       opset8::SpaceToDepth::SpaceToDepthMode::BLOCKS_FIRST,
+                                                       block_size);
 
             new_root->set_friendly_name(concat->get_friendly_name());
             copy_runtime_info(nodes_to_delete, new_root);
 
-            // add slplit & concat to Convolution's weights, const-folding will eliminate them later
+            // add slplit & concat to reorder the channels of Convolution's weights,
+            // later constant-folding pass will eliminate them.
             for (auto input_to : concat->get_default_output().get_target_inputs()) {
-                auto conv = std::dynamic_pointer_cast<opset7::Convolution>(input_to.get_node()->shared_from_this());
-                auto filters = std::dynamic_pointer_cast<opset7::Constant>(conv->get_input_node_shared_ptr(1));
+                auto conv = std::dynamic_pointer_cast<opset8::Convolution>(input_to.get_node()->shared_from_this());
+                auto filters = std::dynamic_pointer_cast<opset8::Constant>(conv->get_input_node_shared_ptr(1));
 
-                // filters are ordered by slice-order, now re-order them
-                auto axis = register_new_node<opset7::Constant>(element::i32, Shape{}, std::vector<int32_t>{1});
-                auto split = register_new_node<opset7::Split>(filters, axis, slice_cnt);
+                auto axis = std::make_shared<opset8::Constant>(element::i32, Shape{}, std::vector<int32_t>{1});
+                auto split = std::make_shared<opset8::Split>(filters, axis, slice_cnt);
                 OutputVector reorder;
                 for (int i = 0; i < slice_cnt; i++)
                     reorder.push_back(split->output(slice_from_order[i]));
-                auto new_filter = register_new_node<opset7::Concat>(reorder, 1);
-                replace_node(filters, new_filter);
-            }
+                auto new_filter = std::make_shared<opset8::Concat>(reorder, 1);
 
+                conv->set_argument(1, new_filter->get_default_output());
+            }
             replace_node(m.get_match_root(), new_root);
         }
-
         return true;
     };
 
diff --git a/inference-engine/tests/functional/inference_engine/transformations/space_to_depth_fusion_test.cpp b/inference-engine/tests/functional/inference_engine/transformations/space_to_depth_fusion_test.cpp
index ad670d6d2088a7..f68327f198c150 100644
--- a/inference-engine/tests/functional/inference_engine/transformations/space_to_depth_fusion_test.cpp
+++ b/inference-engine/tests/functional/inference_engine/transformations/space_to_depth_fusion_test.cpp
@@ -4,14 +4,15 @@
 
 #include <gtest/gtest.h>
 
+#include <algorithm>
 #include <memory>
-#include <queue>
-#include <string>
-
 #include <ngraph/function.hpp>
-#include <ngraph/opsets/opset6.hpp>
+#include <ngraph/opsets/opset8.hpp>
 #include <ngraph/pass/constant_folding.hpp>
 #include <ngraph/pass/manager.hpp>
+#include <queue>
+#include <random>
+#include <string>
 #include <transformations/common_optimizations/space_to_depth_fusion.hpp>
 #include <transformations/init_node_info.hpp>
 #include <transformations/utils/utils.hpp>
@@ -21,66 +22,241 @@
 using namespace testing;
 using namespace ngraph;
 
-std::shared_ptr<opset6::StridedSlice> create_ss(const Output<Node> &data_node,
-                                                size_t ndims, int stride,
-                                                int axis, int begin) {
-  std::vector<int64_t> begin_c(ndims, 0);
-  std::vector<int64_t> end_c(ndims, 0);
-  std::vector<int64_t> stride_c(ndims, 1);
-  begin_c[axis] = begin;
-  stride_c[axis] = stride;
-  auto begin_node = opset6::Constant::create(ngraph::element::i64,
-                                             ngraph::Shape{ndims}, begin_c);
-  auto end_node = opset6::Constant::create(ngraph::element::i64,
-                                           ngraph::Shape{ndims}, end_c);
-  auto stride_node = opset6::Constant::create(ngraph::element::i64,
-                                              ngraph::Shape{ndims}, stride_c);
-  std::vector<int64_t> begin_mask(ndims, 0);
-  std::vector<int64_t> end_mask(ndims, 1);
-  auto ss = std::make_shared<opset6::StridedSlice>(
-      data_node, begin_node, end_node, stride_node, begin_mask, end_mask);
-  return ss;
+static const auto end_max = std::numeric_limits<int64_t>::max();
+
+static std::shared_ptr<opset8::StridedSlice> create_ss(const Output<Node>& data_node,
+                                                size_t ndims,
+                                                int axis,
+                                                int begin,
+                                                int stride) {
+    std::vector<int64_t> begin_c(ndims, 0);
+    std::vector<int64_t> end_c(ndims, 0);
+    std::vector<int64_t> stride_c(ndims, 1);
+    begin_c[axis] = begin;
+    stride_c[axis] = stride;
+    auto begin_node = opset8::Constant::create(ngraph::element::i64, ngraph::Shape{ndims}, begin_c);
+    auto end_node = opset8::Constant::create(ngraph::element::i64, ngraph::Shape{ndims}, end_c);
+    auto stride_node = opset8::Constant::create(ngraph::element::i64, ngraph::Shape{ndims}, stride_c);
+    std::vector<int64_t> begin_mask(ndims, 0);
+    std::vector<int64_t> end_mask(ndims, 1);
+    auto ss =
+        std::make_shared<opset8::StridedSlice>(data_node, begin_node, end_node, stride_node, begin_mask, end_mask);
+    return ss;
+}
+
+struct coordinate : std::vector<int> {
+    using base = std::vector<int>;
+    int radix;
+    coordinate(int ndims, int radix) : base(ndims, 0), radix(radix) {}
+    coordinate& operator++() {
+        int ndims = size();
+        for (int k = ndims - 1; k >= 0; k--) {
+            (*this)[k]++;
+            if ((*this)[k] < radix)
+                break;
+            (*this)[k] = 0;
+        }
+        return *this;
+    }
+};
+
+static std::shared_ptr<Node> build_ss_chain(const Output<Node>& in,
+                                            int block_size,
+                                            const std::vector<int>& shuffle = {}) {
+    auto shape = in.get_shape();
+
+    OutputVector ss_outputs;
+
+    coordinate begin(shape.size(), block_size);
+
+    do {
+        std::shared_ptr<Node> node = in.get_node_shared_ptr();
+        for (int k = 2; k < shape.size(); k++)
+            node = create_ss(node, k + 1, k, begin[k], block_size);
+
+        ss_outputs.push_back(node);
+
+        ++begin;
+    } while (begin[1] == 0);
+
+    if (shuffle.size()) {
+        OutputVector after_shuffle;
+
+        for (int i = 0; i < ss_outputs.size(); i++) {
+            auto id = shuffle[i % shuffle.size()];
+            after_shuffle.push_back(ss_outputs[id]);
+        }
+
+        ss_outputs = after_shuffle;
+    }
+
+    return std::make_shared<opset8::Concat>(ss_outputs, 1);
 }
 
 TEST(TransformationTests, SpaceToDepthFusionFromStridedSlice2x2) {
-  std::shared_ptr<Function> f(nullptr), f_ref(nullptr);
-  {
-    auto in = std::make_shared<opset6::Parameter>(element::f32,
-                                                  Shape{1, 3, 640, 640});
-    auto ss_chain = [&](int begin_dim2, int begin_dim3) {
-      auto s0 = create_ss(in, 4, 2, 2, begin_dim2);
-      auto s1 = create_ss(s0, 4, 2, 3, begin_dim3);
-      return s1;
-    };
-
-    auto a = ss_chain(0, 0);
-    auto b = ss_chain(0, 1);
-    auto c = ss_chain(1, 0);
-    auto d = ss_chain(1, 1);
-
-    auto out = std::make_shared<opset6::Concat>(OutputVector{a, b, c, d}, 1);
-
-    f = std::make_shared<Function>(NodeVector{out}, ParameterVector{in});
-
-    pass::Manager m;
-    m.register_pass<pass::InitNodeInfo>();
-    m.register_pass<pass::SpaceToDepthFusion>();
-    m.run_passes(f);
-    ASSERT_NO_THROW(check_rt_info(f));
-  }
-
-  {
-    auto data =
-        std::make_shared<opset6::Parameter>(element::f32, Shape{12, 3, 4, 8});
-    auto batch_to_space = std::make_shared<opset6::BatchToSpace>(
-        data, op::Constant::create(element::i64, Shape{4}, {1, 1, 2, 2}),
-        op::Constant::create(element::i64, Shape{4}, {0, 0, 2, 1}),
-        op::Constant::create(element::i64, Shape{4}, {1, 2, 1, 14}));
-
-    f_ref = std::make_shared<Function>(NodeVector{batch_to_space},
-                                       ParameterVector{data});
-  }
-
-  auto res = compare_functions(f, f_ref, true);
-  ASSERT_TRUE(res.first) << res.second;
+    std::shared_ptr<Function> f(nullptr), f_ref(nullptr);
+
+    auto block_size = 2;
+    {
+        auto in = std::make_shared<opset8::Parameter>(element::f32, Shape{1, 3, 640, 640});
+        auto out = build_ss_chain(in, block_size);
+        f = std::make_shared<Function>(NodeVector{out}, ParameterVector{in});
+
+        pass::Manager m;
+        m.register_pass<pass::InitNodeInfo>();
+        m.register_pass<pass::SpaceToDepthFusion>();
+        m.run_passes(f);
+        ASSERT_NO_THROW(check_rt_info(f));
+    }
+
+    {
+        auto in = std::make_shared<opset8::Parameter>(element::f32, Shape{1, 3, 640, 640});
+        auto space_to_depth =
+            std::make_shared<opset8::SpaceToDepth>(in,
+                                                   opset6::SpaceToDepth::SpaceToDepthMode::BLOCKS_FIRST,
+                                                   block_size);
+
+        f_ref = std::make_shared<Function>(NodeVector{space_to_depth}, ParameterVector{in});
+    }
+
+    auto res = compare_functions(f, f_ref, true);
+    ASSERT_TRUE(res.first) << res.second;
+}
+
+TEST(TransformationTests, SpaceToDepthFusionFromStridedSlice2x2_Negative) {
+    std::shared_ptr<Function> f(nullptr), f_ref(nullptr);
+
+    auto block_size = 2;
+    {
+        auto in = std::make_shared<opset8::Parameter>(element::f32, Shape{1, 3, 640, 640});
+        auto out = build_ss_chain(in, block_size, {0, 1, 3, 2});  // shuffled order, so should fail
+        f = std::make_shared<Function>(NodeVector{out}, ParameterVector{in});
+
+        pass::Manager m;
+        m.register_pass<pass::InitNodeInfo>();
+        m.register_pass<pass::SpaceToDepthFusion>();
+        m.run_passes(f);
+        ASSERT_NO_THROW(check_rt_info(f));
+    }
+
+    {
+        auto in = std::make_shared<opset8::Parameter>(element::f32, Shape{1, 3, 640, 640});
+        auto out = build_ss_chain(in, block_size, {0, 1, 3, 2});  // shuffled order, so should fail
+        f_ref = std::make_shared<Function>(NodeVector{out}, ParameterVector{in});
+    }
+
+    auto res = compare_functions(f, f_ref, true);
+    ASSERT_TRUE(res.first) << res.second;
+}
+
+TEST(TransformationTests, SpaceToDepthFusionFromStridedSlice3x3) {
+    std::shared_ptr<Function> f(nullptr), f_ref(nullptr);
+    auto block_size = 3;
+
+    {
+        auto in = std::make_shared<opset8::Parameter>(element::f32, Shape{1, 3, 120, 120});
+        auto out = build_ss_chain(in, block_size);
+        f = std::make_shared<Function>(NodeVector{out}, ParameterVector{in});
+
+        pass::Manager m;
+        m.register_pass<pass::InitNodeInfo>();
+        m.register_pass<pass::SpaceToDepthFusion>();
+        m.run_passes(f);
+        ASSERT_NO_THROW(check_rt_info(f));
+    }
+
+    {
+        auto in = std::make_shared<opset8::Parameter>(element::f32, Shape{1, 3, 120, 120});
+        auto space_to_depth =
+            std::make_shared<opset8::SpaceToDepth>(in,
+                                                   opset6::SpaceToDepth::SpaceToDepthMode::BLOCKS_FIRST,
+                                                   block_size);
+
+        f_ref = std::make_shared<Function>(NodeVector{space_to_depth}, ParameterVector{in});
+    }
+
+    auto res = compare_functions(f, f_ref, true);
+    ASSERT_TRUE(res.first) << res.second;
+}
+
+TEST(TransformationTests, SpaceToDepthFusionFromStridedSlice2x2x2) {
+    std::shared_ptr<Function> f(nullptr), f_ref(nullptr);
+
+    auto block_size = 2;
+    {
+        auto in = std::make_shared<opset8::Parameter>(element::f32, Shape{1, 3, 640, 640, 640});
+
+        auto out = build_ss_chain(in, block_size);
+
+        f = std::make_shared<Function>(NodeVector{out}, ParameterVector{in});
+
+        pass::Manager m;
+        m.register_pass<pass::InitNodeInfo>();
+        m.register_pass<pass::SpaceToDepthFusion>();
+        m.run_passes(f);
+        ASSERT_NO_THROW(check_rt_info(f));
+    }
+
+    {
+        auto in = std::make_shared<opset8::Parameter>(element::f32, Shape{1, 3, 640, 640, 640});
+        auto space_to_depth =
+            std::make_shared<opset8::SpaceToDepth>(in,
+                                                   opset6::SpaceToDepth::SpaceToDepthMode::BLOCKS_FIRST,
+                                                   block_size);
+
+        f_ref = std::make_shared<Function>(NodeVector{space_to_depth}, ParameterVector{in});
+    }
+
+    auto res = compare_functions(f, f_ref, true);
+    ASSERT_TRUE(res.first) << res.second;
+}
+
+TEST(TransformationTests, SpaceToDepthFusionFromStridedSlice2x2WithConv) {
+    std::shared_ptr<Function> f(nullptr), f_ref(nullptr);
+
+    std::vector<float> weights(10 * 12 * 3 * 3, 0);
+
+    auto block_size = 2;
+    {
+        auto in = std::make_shared<opset8::Parameter>(element::f32, Shape{1, 3, 640, 640});
+        auto ssconcat = build_ss_chain(in, block_size, {0, 1, 3, 2});
+
+        auto filters = op::Constant::create(element::f32, Shape{10, 12, 3, 3}, weights);
+        auto out = std::make_shared<opset8::Convolution>(ssconcat,
+                                                         filters,
+                                                         Strides{1, 1},
+                                                         CoordinateDiff{0, 0},
+                                                         CoordinateDiff{0, 0},
+                                                         Strides{1, 1});
+
+        f = std::make_shared<Function>(NodeVector{out}, ParameterVector{in});
+
+        pass::Manager m;
+        m.register_pass<pass::InitNodeInfo>();
+        m.register_pass<pass::SpaceToDepthFusion>();
+        m.register_pass<pass::ConstantFolding>();
+        m.run_passes(f);
+        ASSERT_NO_THROW(check_rt_info(f));
+    }
+
+    {
+        auto in = std::make_shared<opset8::Parameter>(element::f32, Shape{1, 3, 640, 640});
+        auto space_to_depth =
+            std::make_shared<opset8::SpaceToDepth>(in,
+                                                   opset6::SpaceToDepth::SpaceToDepthMode::BLOCKS_FIRST,
+                                                   block_size);
+
+        auto filters = op::Constant::create(element::f32, Shape{10, 12, 3, 3}, weights);
+        auto out = std::make_shared<opset8::Convolution>(space_to_depth,
+                                                         filters,
+                                                         Strides{1, 1},
+                                                         CoordinateDiff{0, 0},
+                                                         CoordinateDiff{0, 0},
+                                                         Strides{1, 1});
+
+        f_ref = std::make_shared<Function>(NodeVector{out}, ParameterVector{in});
+    }
+
+    auto res = compare_functions(f, f_ref, true);
+    ASSERT_TRUE(res.first) << res.second;
 }