Add files via upload

vadimbelan · Jun 11, 2024 · c4bd406 · c4bd406
1 parent 4d4d8a9
commit c4bd406
Show file tree

Hide file tree

Showing 4 changed files with 192 additions and 0 deletions.
diff --git a/tasks/stl/belan_vadim_mat_fox_stl/func_tests/main.cpp b/tasks/stl/belan_vadim_mat_fox_stl/func_tests/main.cpp
diff --git a/tasks/stl/belan_vadim_mat_fox_stl/include/ops_stl.hpp b/tasks/stl/belan_vadim_mat_fox_stl/include/ops_stl.hpp
@@ -0,0 +1,46 @@
+// Copyright 2024 Vadim Belan
+#pragma once
+
+#include <memory>
+#include <utility>
+#include <vector>
+#include <thread>
+#include <algorithm>
+
+#include "core/task/include/task.hpp"
+
+namespace BelanSTL {
+
+using Matrix = std::vector<std::vector<double>>;
+
+class FoxBlockedSequential : public ppc::core::Task {
+ public:
+  explicit FoxBlockedSequential(std::shared_ptr<ppc::core::TaskData> taskData_) : Task(std::move(taskData_)) {}
+  bool pre_processing() override;
+  bool validation() override;
+  bool run() override;
+  bool post_processing() override;
+
+ private:
+  Matrix A{};
+  Matrix B{};
+  Matrix C{};
+  int block_size{};
+};
+
+class FoxBlockedParallel : public ppc::core::Task {
+ public:
+  explicit FoxBlockedParallel(std::shared_ptr<ppc::core::TaskData> taskData_) : Task(std::move(taskData_)) {}
+  bool pre_processing() override;
+  bool validation() override;
+  bool run() override;
+  bool post_processing() override;
+
+ private:
+  Matrix A{};
+  Matrix B{};
+  Matrix C{};
+  int block_size{};
+};
+
+}  // namespace BelanSTL
diff --git a/tasks/stl/belan_vadim_mat_fox_stl/perf_tests/main.cpp b/tasks/stl/belan_vadim_mat_fox_stl/perf_tests/main.cpp
diff --git a/tasks/stl/belan_vadim_mat_fox_stl/src/ops_stl.cpp b/tasks/stl/belan_vadim_mat_fox_stl/src/ops_stl.cpp
@@ -0,0 +1,146 @@
+// Copyright 2024 Vadim Belan
+#include "stl/belan_vadim_mat_fox_stl/include/ops_stl.hpp"
+
+using BelanSTL::FoxBlockedParallel;
+using BelanSTL::FoxBlockedSequential;
+using BelanSTL::Matrix;
+
+bool FoxBlockedSequential::validation() {
+  internal_order_test();
+
+  return taskData->inputs_count[0] == taskData->inputs_count[1] &&
+         taskData->inputs_count[0] == taskData->outputs_count[0];
+}
+
+bool FoxBlockedSequential::pre_processing() {
+  internal_order_test();
+
+  auto* matrixA = reinterpret_cast<double*>(taskData->inputs[0]);
+  auto* matrixB = reinterpret_cast<double*>(taskData->inputs[1]);
+
+  int rows = taskData->inputs_count[0];
+  int cols = taskData->inputs_count[1];
+
+  block_size = 32;
+
+  A.resize(rows, std::vector<double>(cols));
+  B.resize(rows, std::vector<double>(cols));
+  C.resize(rows, std::vector<double>(cols));
+
+  for (int i = 0; i < rows; ++i) {
+    for (int j = 0; j < cols; ++j) {
+      A[i][j] = matrixA[i * cols + j];
+      B[i][j] = matrixB[i * cols + j];
+    }
+  }
+
+  return true;
+}
+
+bool FoxBlockedSequential::run() {
+  internal_order_test();
+
+  for (std::vector<double>::size_type i = 0; i < A.size(); i += block_size) {
+    for (std::vector<double>::size_type j = 0; j < B[0].size(); j += block_size) {
+      for (std::vector<double>::size_type k = 0; k < A[0].size(); ++k) {
+        for (std::vector<double>::size_type ii = i;
+             ii < std::min(i + static_cast<std::vector<double>::size_type>(block_size), A.size()); ++ii) {
+          for (std::vector<double>::size_type jj = j;
+               jj < std::min(j + static_cast<std::vector<double>::size_type>(block_size), B[0].size()); ++jj) {
+            C[ii][jj] += A[ii][k] * B[k][jj];
+          }
+        }
+      }
+    }
+  }
+
+  return true;
+}
+
+bool FoxBlockedSequential::post_processing() {
+  internal_order_test();
+
+  auto* out_ptr = reinterpret_cast<double*>(taskData->outputs[0]);
+
+  for (std::vector<double>::size_type i = 0; i < C.size(); ++i) {
+    for (std::vector<double>::size_type j = 0; j < C[0].size(); ++j) {
+      out_ptr[i * C[0].size() + j] = C[i][j];
+    }
+  }
+
+  return true;
+}
+
+bool FoxBlockedParallel::validation() {
+  internal_order_test();
+
+  return taskData->inputs_count[0] == taskData->inputs_count[1] &&
+         taskData->inputs_count[0] == taskData->outputs_count[0];
+}
+
+bool FoxBlockedParallel::pre_processing() {
+  internal_order_test();
+
+  auto* matrixA = reinterpret_cast<double*>(taskData->inputs[0]);
+  auto* matrixB = reinterpret_cast<double*>(taskData->inputs[1]);
+
+  int rows = taskData->inputs_count[0];
+  int cols = taskData->inputs_count[1];
+
+  block_size = 32;
+
+  A.resize(rows, std::vector<double>(cols));
+  B.resize(rows, std::vector<double>(cols));
+  C.resize(rows, std::vector<double>(cols));
+
+  for (int i = 0; i < rows; ++i) {
+    for (int j = 0; j < cols; ++j) {
+      A[i][j] = matrixA[i * cols + j];
+      B[i][j] = matrixB[i * cols + j];
+    }
+  }
+
+  return true;
+}
+
+bool FoxBlockedParallel::run() {
+  internal_order_test();
+
+  std::vector<std::thread> threads;
+
+  for (int ii = 0; ii < static_cast<int>(A.size()); ii += block_size) {
+    threads.emplace_back([&, ii]() {
+      for (std::vector<double>::size_type jj = 0; jj < B[0].size();
+           jj += static_cast<std::vector<double>::size_type>(block_size)) {
+        for (std::vector<double>::size_type k = 0; k < A[0].size(); ++k) {
+          for (int i = ii; i < std::min(ii + block_size, static_cast<int>(A.size())); ++i) {
+            for (std::vector<double>::size_type j = jj;
+                 j < std::min(jj + static_cast<std::vector<double>::size_type>(block_size), B[0].size()); ++j) {
+              C[i][j] += A[i][k] * B[k][j];
+            }
+          }
+        }
+      }
+    });
+  }
+
+  for (auto& thread : threads) {
+    thread.join();
+  }
+
+  return true;
+}
+
+bool FoxBlockedParallel::post_processing() {
+  internal_order_test();
+
+  auto* out_ptr = reinterpret_cast<double*>(taskData->outputs[0]);
+
+  for (std::vector<double>::size_type i = 0; i < C.size(); ++i) {
+    for (std::vector<double>::size_type j = 0; j < C[0].size(); ++j) {
+      out_ptr[i * C[0].size() + j] = C[i][j];
+    }
+  }
+
+  return true;
+}