diff --git a/tasks/tbb/belan_vadim_mat_fox_tbb/func_tests/main.cpp b/tasks/tbb/belan_vadim_mat_fox_tbb/func_tests/main.cpp new file mode 100644 index 000000000..6cc3840be --- /dev/null +++ b/tasks/tbb/belan_vadim_mat_fox_tbb/func_tests/main.cpp @@ -0,0 +1,198 @@ +// Copyright 2024 Vadim Belan +#include + +#include + +#include "tbb/belan_vadim_mat_fox_tbb/include/ops_tbb.hpp" + +using namespace BelanTBB; + +TEST(FoxBlockedParallel, MatrixMultiplication2x2) { + // Define input matrices + std::vector matrixA = {1, 2, 3, 4}; + std::vector matrixB = {4, 3, 2, 1}; + std::vector expectedOutput = {8, 5, 20, 13}; + + // Create TaskData + std::shared_ptr taskData = std::make_shared(); + taskData->inputs.emplace_back(reinterpret_cast(matrixA.data())); + taskData->inputs_count.emplace_back(2); + taskData->inputs_count.emplace_back(2); + taskData->inputs.emplace_back(reinterpret_cast(matrixB.data())); + taskData->inputs_count.emplace_back(2); + taskData->inputs_count.emplace_back(2); + taskData->outputs.emplace_back(reinterpret_cast(new double[4]())); + taskData->outputs_count.emplace_back(2); + taskData->outputs_count.emplace_back(2); + + // Create Task + FoxBlockedParallel foxBlockedParallel(taskData); + foxBlockedParallel.validation(); + foxBlockedParallel.pre_processing(); + auto *output = reinterpret_cast(taskData->outputs[0]); + foxBlockedParallel.run(); + foxBlockedParallel.post_processing(); + + // Check the output + for (size_t i = 0; i < 4; ++i) { + ASSERT_DOUBLE_EQ(output[i], expectedOutput[i]); + } + + // Free memory + delete[] output; +} + +TEST(FoxBlockedParallel, MatrixMultiplication) { + // Define input matrices + std::vector matrixA = {1, 2, 3, 4, 5, 6, 7, 8, 9}; + std::vector matrixB = {9, 8, 7, 6, 5, 4, 3, 2, 1}; + std::vector expectedOutput = {30, 24, 18, 84, 69, 54, 138, 114, 90}; + + // Create TaskData + std::shared_ptr taskData = std::make_shared(); + taskData->inputs.emplace_back(reinterpret_cast(matrixA.data())); + taskData->inputs_count.emplace_back(3); + taskData->inputs_count.emplace_back(3); + taskData->inputs.emplace_back(reinterpret_cast(matrixB.data())); + taskData->inputs_count.emplace_back(3); + taskData->inputs_count.emplace_back(3); + taskData->outputs.emplace_back(reinterpret_cast(new double[9]())); + taskData->outputs_count.emplace_back(3); + taskData->outputs_count.emplace_back(3); + + // Create Task + FoxBlockedParallel foxBlockedParallel(taskData); + foxBlockedParallel.validation(); + foxBlockedParallel.pre_processing(); + auto *output = reinterpret_cast(taskData->outputs[0]); + foxBlockedParallel.run(); + foxBlockedParallel.post_processing(); + + // Check the output + for (size_t i = 0; i < 9; ++i) { + ASSERT_DOUBLE_EQ(output[i], expectedOutput[i]); + } + + // Free memory + delete[] output; +} + +TEST(FoxBlockedParallel, MatrixMultiplication_SmallMatrices) { + // Define input matrices + std::vector matrixA(10 * 10); + std::vector matrixB(10 * 10); + std::vector expectedOutput(10 * 10); + + // Initialize matrices with random values + for (size_t i = 0; i < 10 * 10; ++i) { + matrixA[i] = rand() % 10; + matrixB[i] = rand() % 10; + expectedOutput[i] = 0; + } + + for (size_t i = 0; i < 10; ++i) { + for (size_t j = 0; j < 10; ++j) { + for (size_t k = 0; k < 10; ++k) { + expectedOutput[i * 10 + j] += matrixA[i * 10 + k] * matrixB[k * 10 + j]; + } + } + } + + // Create TaskData + std::shared_ptr taskData = std::make_shared(); + taskData->inputs.emplace_back(reinterpret_cast(matrixA.data())); + taskData->inputs_count.emplace_back(10); + taskData->inputs_count.emplace_back(10); + taskData->inputs.emplace_back(reinterpret_cast(matrixB.data())); + taskData->inputs_count.emplace_back(10); + taskData->inputs_count.emplace_back(10); + taskData->outputs.emplace_back(reinterpret_cast(new double[10 * 10]())); + taskData->outputs_count.emplace_back(10); + taskData->outputs_count.emplace_back(10); + + // Create Task + FoxBlockedParallel foxBlockedParallel(taskData); + foxBlockedParallel.validation(); + foxBlockedParallel.pre_processing(); + auto *output = reinterpret_cast(taskData->outputs[0]); + foxBlockedParallel.run(); + foxBlockedParallel.post_processing(); + + // Check the output + for (size_t i = 0; i < 10 * 10; ++i) { + ASSERT_DOUBLE_EQ(output[i], expectedOutput[i]); + } + + // Free memory + delete[] output; +} + +TEST(FoxBlockedParallel, MatrixMultiplication_VerySmallMatrices) { + // Define input matrices + std::vector matrixA = {1, 2, 3, 4}; + std::vector matrixB = {2, 0, 1, 2}; + std::vector expectedOutput = {4, 4, 10, 8}; + + // Create TaskData + std::shared_ptr taskData = std::make_shared(); + taskData->inputs.emplace_back(reinterpret_cast(matrixA.data())); + taskData->inputs_count.emplace_back(2); + taskData->inputs_count.emplace_back(2); + taskData->inputs.emplace_back(reinterpret_cast(matrixB.data())); + taskData->inputs_count.emplace_back(2); + taskData->inputs_count.emplace_back(2); + taskData->outputs.emplace_back(reinterpret_cast(new double[4]())); + taskData->outputs_count.emplace_back(2); + taskData->outputs_count.emplace_back(2); + + // Create Task + FoxBlockedParallel foxBlockedParallel(taskData); + foxBlockedParallel.validation(); + foxBlockedParallel.pre_processing(); + auto *output = reinterpret_cast(taskData->outputs[0]); + foxBlockedParallel.run(); + foxBlockedParallel.post_processing(); + + // Check the output + for (size_t i = 0; i < 4; ++i) { + ASSERT_DOUBLE_EQ(output[i], expectedOutput[i]); + } + + // Free memory + delete[] output; +} + +TEST(FoxBlockedParallel, MatrixMultiplicationWithNegatives) { + // Define input matrices with negative values + std::vector matrixA = {-1, -2, -3, -4, -5, -6, -7, -8, -9}; + std::vector matrixB = {-9, -8, -7, -6, -5, -4, -3, -2, -1}; + std::vector expectedOutput = {30, 24, 18, 84, 69, 54, 138, 114, 90}; + + // Create TaskData + std::shared_ptr taskData = std::make_shared(); + taskData->inputs.emplace_back(reinterpret_cast(matrixA.data())); + taskData->inputs_count.emplace_back(3); + taskData->inputs_count.emplace_back(3); + taskData->inputs.emplace_back(reinterpret_cast(matrixB.data())); + taskData->inputs_count.emplace_back(3); + taskData->inputs_count.emplace_back(3); + taskData->outputs.emplace_back(reinterpret_cast(new double[9]())); + taskData->outputs_count.emplace_back(3); + taskData->outputs_count.emplace_back(3); + + // Create Task + FoxBlockedParallel foxBlockedParallel(taskData); + foxBlockedParallel.validation(); + foxBlockedParallel.pre_processing(); + auto *output = reinterpret_cast(taskData->outputs[0]); + foxBlockedParallel.run(); + foxBlockedParallel.post_processing(); + + // Check the output + for (size_t i = 0; i < 9; ++i) { + ASSERT_DOUBLE_EQ(output[i], expectedOutput[i]); + } + + // Free memory + delete[] output; +} \ No newline at end of file diff --git a/tasks/tbb/belan_vadim_mat_fox_tbb/include/ops_tbb.hpp b/tasks/tbb/belan_vadim_mat_fox_tbb/include/ops_tbb.hpp new file mode 100644 index 000000000..347a587d5 --- /dev/null +++ b/tasks/tbb/belan_vadim_mat_fox_tbb/include/ops_tbb.hpp @@ -0,0 +1,45 @@ +// Copyright 2024 Vadim Belan +#pragma once + +#include +#include +#include + +#include "core/task/include/task.hpp" +#include "tbb/tbb.h" + +namespace BelanTBB { + +using Matrix = std::vector>; + +class FoxBlockedSequential : public ppc::core::Task { + public: + explicit FoxBlockedSequential(std::shared_ptr taskData_) : Task(std::move(taskData_)) {} + bool pre_processing() override; + bool validation() override; + bool run() override; + bool post_processing() override; + + private: + Matrix A{}; + Matrix B{}; + Matrix C{}; + int block_size{}; +}; + +class FoxBlockedParallel : public ppc::core::Task { + public: + explicit FoxBlockedParallel(std::shared_ptr taskData_) : Task(std::move(taskData_)) {} + bool pre_processing() override; + bool validation() override; + bool run() override; + bool post_processing() override; + + private: + Matrix A{}; + Matrix B{}; + Matrix C{}; + int block_size{}; +}; + +} // namespace BelanTBB \ No newline at end of file diff --git a/tasks/tbb/belan_vadim_mat_fox_tbb/perf_tests/main.cpp b/tasks/tbb/belan_vadim_mat_fox_tbb/perf_tests/main.cpp new file mode 100644 index 000000000..d83d5a36d --- /dev/null +++ b/tasks/tbb/belan_vadim_mat_fox_tbb/perf_tests/main.cpp @@ -0,0 +1,128 @@ +// Copyright 2024 Vadim Belan +#include +#include + +#include "core/perf/include/perf.hpp" +#include "tbb/belan_vadim_mat_fox_tbb/include/ops_tbb.hpp" + +using namespace BelanTBB; + +TEST(fox_blocked_tbb, test_pipeline_run) { + // Create data + const int rows = 512; + const int cols = 512; + std::vector matrix_a(rows * cols); + std::vector matrix_b(rows * cols); + std::vector matrix_c(rows * cols); + + for (int i = 0; i < rows; ++i) { + for (int j = 0; j < cols; ++j) { + matrix_a[i * cols + j] = i + j; + matrix_b[i * cols + j] = i - j; + matrix_c[i * cols + j] = 0; + } + } + + // Create TaskData + std::shared_ptr taskDataSeq = std::make_shared(); + taskDataSeq->inputs.emplace_back(reinterpret_cast(matrix_a.data())); + taskDataSeq->inputs_count.emplace_back(rows); + taskDataSeq->inputs_count.emplace_back(cols); + taskDataSeq->inputs.emplace_back(reinterpret_cast(matrix_b.data())); + taskDataSeq->inputs_count.emplace_back(rows); + taskDataSeq->inputs_count.emplace_back(cols); + taskDataSeq->outputs.emplace_back(reinterpret_cast(matrix_c.data())); + taskDataSeq->outputs_count.emplace_back(rows); + taskDataSeq->outputs_count.emplace_back(cols); + + // Create Task + auto testTaskTBB = std::make_shared(taskDataSeq); + + // Create Perf attributes + auto perfAttr = std::make_shared(); + perfAttr->num_running = 10; + const auto t0 = std::chrono::high_resolution_clock::now(); + perfAttr->current_timer = [&] { + auto current_time_point = std::chrono::high_resolution_clock::now(); + auto duration = std::chrono::duration_cast(current_time_point - t0).count(); + return static_cast(duration) * 1e-9; + }; + + // Create and init perf results + auto perfResults = std::make_shared(); + // Create Perf analyzer + auto perfAnalyzer = std::make_shared(testTaskTBB); + perfAnalyzer->pipeline_run(perfAttr, perfResults); + ppc::core::Perf::print_perf_statistic(perfResults); + + // Compare results + for (int i = 0; i < rows; ++i) { + for (int j = 0; j < cols; ++j) { + double expected = 0; + for (int k = 0; k < cols; ++k) { + expected += matrix_a[i * cols + k] * matrix_b[k * cols + j]; + } + ASSERT_EQ(matrix_c[i * cols + j], expected); + } + } +} + +TEST(fox_blocked_tbb, test_task_run) { + // Create data + const int rows = 512; + const int cols = 512; + std::vector matrix_a(rows * cols); + std::vector matrix_b(rows * cols); + std::vector matrix_c(rows * cols); + + for (int i = 0; i < rows; ++i) { + for (int j = 0; j < cols; ++j) { + matrix_a[i * cols + j] = i + j; + matrix_b[i * cols + j] = i - j; + matrix_c[i * cols + j] = 0; + } + } + + // Create TaskData + std::shared_ptr taskDataSeq = std::make_shared(); + taskDataSeq->inputs.emplace_back(reinterpret_cast(matrix_a.data())); + taskDataSeq->inputs_count.emplace_back(rows); + taskDataSeq->inputs_count.emplace_back(cols); + taskDataSeq->inputs.emplace_back(reinterpret_cast(matrix_b.data())); + taskDataSeq->inputs_count.emplace_back(rows); + taskDataSeq->inputs_count.emplace_back(cols); + taskDataSeq->outputs.emplace_back(reinterpret_cast(matrix_c.data())); + taskDataSeq->outputs_count.emplace_back(rows); + taskDataSeq->outputs_count.emplace_back(cols); + + // Create Task + auto testTaskTBB = std::make_shared(taskDataSeq); + + // Create Perf attributes + auto perfAttr = std::make_shared(); + perfAttr->num_running = 10; + const auto t0 = std::chrono::high_resolution_clock::now(); + perfAttr->current_timer = [&] { + auto current_time_point = std::chrono::high_resolution_clock::now(); + auto duration = std::chrono::duration_cast(current_time_point - t0).count(); + return static_cast(duration) * 1e-9; + }; + + // Create and init perf results + auto perfResults = std::make_shared(); + // Create Perf analyzer + auto perfAnalyzer = std::make_shared(testTaskTBB); + perfAnalyzer->task_run(perfAttr, perfResults); + ppc::core::Perf::print_perf_statistic(perfResults); + + // Compare results + for (int i = 0; i < rows; ++i) { + for (int j = 0; j < cols; ++j) { + double expected = 0; + for (int k = 0; k < cols; ++k) { + expected += matrix_a[i * cols + k] * matrix_b[k * cols + j]; + } + ASSERT_EQ(matrix_c[i * cols + j], expected); + } + } +} diff --git a/tasks/tbb/belan_vadim_mat_fox_tbb/src/ops_tbb.cpp b/tasks/tbb/belan_vadim_mat_fox_tbb/src/ops_tbb.cpp new file mode 100644 index 000000000..f4812058f --- /dev/null +++ b/tasks/tbb/belan_vadim_mat_fox_tbb/src/ops_tbb.cpp @@ -0,0 +1,139 @@ + +// Copyright 2024 Vadim Belan +#include "tbb/belan_vadim_mat_fox_tbb/include/ops_tbb.hpp" + +using BelanTBB::FoxBlockedParallel; +using BelanTBB::FoxBlockedSequential; +using BelanTBB::Matrix; + +bool FoxBlockedSequential::validation() { + internal_order_test(); + + return taskData->inputs_count[0] == taskData->inputs_count[1] && + taskData->inputs_count[0] == taskData->outputs_count[0]; +} + +bool FoxBlockedSequential::pre_processing() { + internal_order_test(); + + auto* matrixA = reinterpret_cast(taskData->inputs[0]); + auto* matrixB = reinterpret_cast(taskData->inputs[1]); + + int rows = taskData->inputs_count[0]; + int cols = taskData->inputs_count[1]; + + block_size = 32; + + A.resize(rows, std::vector(cols)); + B.resize(rows, std::vector(cols)); + C.resize(rows, std::vector(cols)); + + for (int i = 0; i < rows; ++i) { + for (int j = 0; j < cols; ++j) { + A[i][j] = matrixA[i * cols + j]; + B[i][j] = matrixB[i * cols + j]; + } + } + + return true; +} + +bool FoxBlockedSequential::run() { + internal_order_test(); + + for (std::vector::size_type i = 0; i < A.size(); i += block_size) { + for (std::vector::size_type j = 0; j < B[0].size(); j += block_size) { + for (std::vector::size_type k = 0; k < A[0].size(); ++k) { + for (std::vector::size_type ii = i; + ii < std::min(i + static_cast::size_type>(block_size), A.size()); ++ii) { + for (std::vector::size_type jj = j; + jj < std::min(j + static_cast::size_type>(block_size), B[0].size()); ++jj) { + C[ii][jj] += A[ii][k] * B[k][jj]; + } + } + } + } + } + + return true; +} + +bool FoxBlockedSequential::post_processing() { + internal_order_test(); + + auto* out_ptr = reinterpret_cast(taskData->outputs[0]); + + for (std::vector::size_type i = 0; i < C.size(); ++i) { + for (std::vector::size_type j = 0; j < C[0].size(); ++j) { + out_ptr[i * C[0].size() + j] = C[i][j]; + } + } + + return true; +} + +bool FoxBlockedParallel::validation() { + internal_order_test(); + + return taskData->inputs_count[0] == taskData->inputs_count[1] && + taskData->inputs_count[0] == taskData->outputs_count[0]; +} + +bool FoxBlockedParallel::pre_processing() { + internal_order_test(); + + auto* matrixA = reinterpret_cast(taskData->inputs[0]); + auto* matrixB = reinterpret_cast(taskData->inputs[1]); + + int rows = taskData->inputs_count[0]; + int cols = taskData->inputs_count[1]; + + block_size = 32; + + A.resize(rows, std::vector(cols)); + B.resize(rows, std::vector(cols)); + C.resize(rows, std::vector(cols)); + + for (int i = 0; i < rows; ++i) { + for (int j = 0; j < cols; ++j) { + A[i][j] = matrixA[i * cols + j]; + B[i][j] = matrixB[i * cols + j]; + } + } + + return true; +} + +bool FoxBlockedParallel::run() { + internal_order_test(); + + tbb::parallel_for(0u, static_cast(A.size()), [&](unsigned int ii) { + for (std::vector::size_type jj = 0; jj < B[0].size(); + jj += static_cast::size_type>(block_size)) { + for (std::vector::size_type k = 0; k < A[0].size(); ++k) { + for (unsigned int i = ii; i < std::min(ii + static_cast(block_size), A.size()); ++i) { + for (std::vector::size_type j = jj; + j < std::min(jj + static_cast::size_type>(block_size), B[0].size()); ++j) { + C[i][j] += A[i][k] * B[k][j]; + } + } + } + } + }); + + return true; +} + +bool FoxBlockedParallel::post_processing() { + internal_order_test(); + + auto* out_ptr = reinterpret_cast(taskData->outputs[0]); + + for (std::vector::size_type i = 0; i < C.size(); ++i) { + for (std::vector::size_type j = 0; j < C[0].size(); ++j) { + out_ptr[i * C[0].size() + j] = C[i][j]; + } + } + + return true; +} \ No newline at end of file