Skip to content
This repository has been archived by the owner on Jun 30, 2024. It is now read-only.

Commit

Permalink
Add files via upload
Browse files Browse the repository at this point in the history
  • Loading branch information
vadimbelan authored Jun 4, 2024
1 parent dd5d142 commit a8e7977
Show file tree
Hide file tree
Showing 4 changed files with 510 additions and 0 deletions.
198 changes: 198 additions & 0 deletions tasks/tbb/belan_vadim_mat_fox_tbb/func_tests/main.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,198 @@
// Copyright 2024 Vadim Belan
#include <gtest/gtest.h>

#include <vector>

#include "tbb/belan_vadim_mat_fox_tbb/include/ops_tbb.hpp"

using namespace BelanTBB;

TEST(FoxBlockedParallel, MatrixMultiplication2x2) {
// Define input matrices
std::vector<double> matrixA = {1, 2, 3, 4};
std::vector<double> matrixB = {4, 3, 2, 1};
std::vector<double> expectedOutput = {8, 5, 20, 13};

// Create TaskData
std::shared_ptr<ppc::core::TaskData> taskData = std::make_shared<ppc::core::TaskData>();
taskData->inputs.emplace_back(reinterpret_cast<uint8_t *>(matrixA.data()));
taskData->inputs_count.emplace_back(2);
taskData->inputs_count.emplace_back(2);
taskData->inputs.emplace_back(reinterpret_cast<uint8_t *>(matrixB.data()));
taskData->inputs_count.emplace_back(2);
taskData->inputs_count.emplace_back(2);
taskData->outputs.emplace_back(reinterpret_cast<uint8_t *>(new double[4]()));
taskData->outputs_count.emplace_back(2);
taskData->outputs_count.emplace_back(2);

// Create Task
FoxBlockedParallel foxBlockedParallel(taskData);
foxBlockedParallel.validation();
foxBlockedParallel.pre_processing();
auto *output = reinterpret_cast<double *>(taskData->outputs[0]);
foxBlockedParallel.run();
foxBlockedParallel.post_processing();

// Check the output
for (size_t i = 0; i < 4; ++i) {
ASSERT_DOUBLE_EQ(output[i], expectedOutput[i]);
}

// Free memory
delete[] output;
}

TEST(FoxBlockedParallel, MatrixMultiplication) {
// Define input matrices
std::vector<double> matrixA = {1, 2, 3, 4, 5, 6, 7, 8, 9};
std::vector<double> matrixB = {9, 8, 7, 6, 5, 4, 3, 2, 1};
std::vector<double> expectedOutput = {30, 24, 18, 84, 69, 54, 138, 114, 90};

// Create TaskData
std::shared_ptr<ppc::core::TaskData> taskData = std::make_shared<ppc::core::TaskData>();
taskData->inputs.emplace_back(reinterpret_cast<uint8_t *>(matrixA.data()));
taskData->inputs_count.emplace_back(3);
taskData->inputs_count.emplace_back(3);
taskData->inputs.emplace_back(reinterpret_cast<uint8_t *>(matrixB.data()));
taskData->inputs_count.emplace_back(3);
taskData->inputs_count.emplace_back(3);
taskData->outputs.emplace_back(reinterpret_cast<uint8_t *>(new double[9]()));
taskData->outputs_count.emplace_back(3);
taskData->outputs_count.emplace_back(3);

// Create Task
FoxBlockedParallel foxBlockedParallel(taskData);
foxBlockedParallel.validation();
foxBlockedParallel.pre_processing();
auto *output = reinterpret_cast<double *>(taskData->outputs[0]);
foxBlockedParallel.run();
foxBlockedParallel.post_processing();

// Check the output
for (size_t i = 0; i < 9; ++i) {
ASSERT_DOUBLE_EQ(output[i], expectedOutput[i]);
}

// Free memory
delete[] output;
}

TEST(FoxBlockedParallel, MatrixMultiplication_SmallMatrices) {
// Define input matrices
std::vector<double> matrixA(10 * 10);
std::vector<double> matrixB(10 * 10);
std::vector<double> expectedOutput(10 * 10);

// Initialize matrices with random values
for (size_t i = 0; i < 10 * 10; ++i) {
matrixA[i] = rand() % 10;
matrixB[i] = rand() % 10;
expectedOutput[i] = 0;
}

for (size_t i = 0; i < 10; ++i) {
for (size_t j = 0; j < 10; ++j) {
for (size_t k = 0; k < 10; ++k) {
expectedOutput[i * 10 + j] += matrixA[i * 10 + k] * matrixB[k * 10 + j];
}
}
}

// Create TaskData
std::shared_ptr<ppc::core::TaskData> taskData = std::make_shared<ppc::core::TaskData>();
taskData->inputs.emplace_back(reinterpret_cast<uint8_t *>(matrixA.data()));
taskData->inputs_count.emplace_back(10);
taskData->inputs_count.emplace_back(10);
taskData->inputs.emplace_back(reinterpret_cast<uint8_t *>(matrixB.data()));
taskData->inputs_count.emplace_back(10);
taskData->inputs_count.emplace_back(10);
taskData->outputs.emplace_back(reinterpret_cast<uint8_t *>(new double[10 * 10]()));
taskData->outputs_count.emplace_back(10);
taskData->outputs_count.emplace_back(10);

// Create Task
FoxBlockedParallel foxBlockedParallel(taskData);
foxBlockedParallel.validation();
foxBlockedParallel.pre_processing();
auto *output = reinterpret_cast<double *>(taskData->outputs[0]);
foxBlockedParallel.run();
foxBlockedParallel.post_processing();

// Check the output
for (size_t i = 0; i < 10 * 10; ++i) {
ASSERT_DOUBLE_EQ(output[i], expectedOutput[i]);
}

// Free memory
delete[] output;
}

TEST(FoxBlockedParallel, MatrixMultiplication_VerySmallMatrices) {
// Define input matrices
std::vector<double> matrixA = {1, 2, 3, 4};
std::vector<double> matrixB = {2, 0, 1, 2};
std::vector<double> expectedOutput = {4, 4, 10, 8};

// Create TaskData
std::shared_ptr<ppc::core::TaskData> taskData = std::make_shared<ppc::core::TaskData>();
taskData->inputs.emplace_back(reinterpret_cast<uint8_t *>(matrixA.data()));
taskData->inputs_count.emplace_back(2);
taskData->inputs_count.emplace_back(2);
taskData->inputs.emplace_back(reinterpret_cast<uint8_t *>(matrixB.data()));
taskData->inputs_count.emplace_back(2);
taskData->inputs_count.emplace_back(2);
taskData->outputs.emplace_back(reinterpret_cast<uint8_t *>(new double[4]()));
taskData->outputs_count.emplace_back(2);
taskData->outputs_count.emplace_back(2);

// Create Task
FoxBlockedParallel foxBlockedParallel(taskData);
foxBlockedParallel.validation();
foxBlockedParallel.pre_processing();
auto *output = reinterpret_cast<double *>(taskData->outputs[0]);
foxBlockedParallel.run();
foxBlockedParallel.post_processing();

// Check the output
for (size_t i = 0; i < 4; ++i) {
ASSERT_DOUBLE_EQ(output[i], expectedOutput[i]);
}

// Free memory
delete[] output;
}

TEST(FoxBlockedParallel, MatrixMultiplicationWithNegatives) {
// Define input matrices with negative values
std::vector<double> matrixA = {-1, -2, -3, -4, -5, -6, -7, -8, -9};
std::vector<double> matrixB = {-9, -8, -7, -6, -5, -4, -3, -2, -1};
std::vector<double> expectedOutput = {30, 24, 18, 84, 69, 54, 138, 114, 90};

// Create TaskData
std::shared_ptr<ppc::core::TaskData> taskData = std::make_shared<ppc::core::TaskData>();
taskData->inputs.emplace_back(reinterpret_cast<uint8_t *>(matrixA.data()));
taskData->inputs_count.emplace_back(3);
taskData->inputs_count.emplace_back(3);
taskData->inputs.emplace_back(reinterpret_cast<uint8_t *>(matrixB.data()));
taskData->inputs_count.emplace_back(3);
taskData->inputs_count.emplace_back(3);
taskData->outputs.emplace_back(reinterpret_cast<uint8_t *>(new double[9]()));
taskData->outputs_count.emplace_back(3);
taskData->outputs_count.emplace_back(3);

// Create Task
FoxBlockedParallel foxBlockedParallel(taskData);
foxBlockedParallel.validation();
foxBlockedParallel.pre_processing();
auto *output = reinterpret_cast<double *>(taskData->outputs[0]);
foxBlockedParallel.run();
foxBlockedParallel.post_processing();

// Check the output
for (size_t i = 0; i < 9; ++i) {
ASSERT_DOUBLE_EQ(output[i], expectedOutput[i]);
}

// Free memory
delete[] output;
}
45 changes: 45 additions & 0 deletions tasks/tbb/belan_vadim_mat_fox_tbb/include/ops_tbb.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
// Copyright 2024 Vadim Belan
#pragma once

#include <memory>
#include <utility>
#include <vector>

#include "core/task/include/task.hpp"
#include "tbb/tbb.h"

namespace BelanTBB {

using Matrix = std::vector<std::vector<double>>;

class FoxBlockedSequential : public ppc::core::Task {
public:
explicit FoxBlockedSequential(std::shared_ptr<ppc::core::TaskData> taskData_) : Task(std::move(taskData_)) {}
bool pre_processing() override;
bool validation() override;
bool run() override;
bool post_processing() override;

private:
Matrix A{};
Matrix B{};
Matrix C{};
int block_size{};
};

class FoxBlockedParallel : public ppc::core::Task {
public:
explicit FoxBlockedParallel(std::shared_ptr<ppc::core::TaskData> taskData_) : Task(std::move(taskData_)) {}
bool pre_processing() override;
bool validation() override;
bool run() override;
bool post_processing() override;

private:
Matrix A{};
Matrix B{};
Matrix C{};
int block_size{};
};

} // namespace BelanTBB
128 changes: 128 additions & 0 deletions tasks/tbb/belan_vadim_mat_fox_tbb/perf_tests/main.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,128 @@
// Copyright 2024 Vadim Belan
#include <gtest/gtest.h>
#include <oneapi/tbb.h>

#include "core/perf/include/perf.hpp"
#include "tbb/belan_vadim_mat_fox_tbb/include/ops_tbb.hpp"

using namespace BelanTBB;

TEST(fox_blocked_tbb, test_pipeline_run) {
// Create data
const int rows = 512;
const int cols = 512;
std::vector<double> matrix_a(rows * cols);
std::vector<double> matrix_b(rows * cols);
std::vector<double> matrix_c(rows * cols);

for (int i = 0; i < rows; ++i) {
for (int j = 0; j < cols; ++j) {
matrix_a[i * cols + j] = i + j;
matrix_b[i * cols + j] = i - j;
matrix_c[i * cols + j] = 0;
}
}

// Create TaskData
std::shared_ptr<ppc::core::TaskData> taskDataSeq = std::make_shared<ppc::core::TaskData>();
taskDataSeq->inputs.emplace_back(reinterpret_cast<uint8_t *>(matrix_a.data()));
taskDataSeq->inputs_count.emplace_back(rows);
taskDataSeq->inputs_count.emplace_back(cols);
taskDataSeq->inputs.emplace_back(reinterpret_cast<uint8_t *>(matrix_b.data()));
taskDataSeq->inputs_count.emplace_back(rows);
taskDataSeq->inputs_count.emplace_back(cols);
taskDataSeq->outputs.emplace_back(reinterpret_cast<uint8_t *>(matrix_c.data()));
taskDataSeq->outputs_count.emplace_back(rows);
taskDataSeq->outputs_count.emplace_back(cols);

// Create Task
auto testTaskTBB = std::make_shared<FoxBlockedParallel>(taskDataSeq);

// Create Perf attributes
auto perfAttr = std::make_shared<ppc::core::PerfAttr>();
perfAttr->num_running = 10;
const auto t0 = std::chrono::high_resolution_clock::now();
perfAttr->current_timer = [&] {
auto current_time_point = std::chrono::high_resolution_clock::now();
auto duration = std::chrono::duration_cast<std::chrono::nanoseconds>(current_time_point - t0).count();
return static_cast<double>(duration) * 1e-9;
};

// Create and init perf results
auto perfResults = std::make_shared<ppc::core::PerfResults>();
// Create Perf analyzer
auto perfAnalyzer = std::make_shared<ppc::core::Perf>(testTaskTBB);
perfAnalyzer->pipeline_run(perfAttr, perfResults);
ppc::core::Perf::print_perf_statistic(perfResults);

// Compare results
for (int i = 0; i < rows; ++i) {
for (int j = 0; j < cols; ++j) {
double expected = 0;
for (int k = 0; k < cols; ++k) {
expected += matrix_a[i * cols + k] * matrix_b[k * cols + j];
}
ASSERT_EQ(matrix_c[i * cols + j], expected);
}
}
}

TEST(fox_blocked_tbb, test_task_run) {
// Create data
const int rows = 512;
const int cols = 512;
std::vector<double> matrix_a(rows * cols);
std::vector<double> matrix_b(rows * cols);
std::vector<double> matrix_c(rows * cols);

for (int i = 0; i < rows; ++i) {
for (int j = 0; j < cols; ++j) {
matrix_a[i * cols + j] = i + j;
matrix_b[i * cols + j] = i - j;
matrix_c[i * cols + j] = 0;
}
}

// Create TaskData
std::shared_ptr<ppc::core::TaskData> taskDataSeq = std::make_shared<ppc::core::TaskData>();
taskDataSeq->inputs.emplace_back(reinterpret_cast<uint8_t *>(matrix_a.data()));
taskDataSeq->inputs_count.emplace_back(rows);
taskDataSeq->inputs_count.emplace_back(cols);
taskDataSeq->inputs.emplace_back(reinterpret_cast<uint8_t *>(matrix_b.data()));
taskDataSeq->inputs_count.emplace_back(rows);
taskDataSeq->inputs_count.emplace_back(cols);
taskDataSeq->outputs.emplace_back(reinterpret_cast<uint8_t *>(matrix_c.data()));
taskDataSeq->outputs_count.emplace_back(rows);
taskDataSeq->outputs_count.emplace_back(cols);

// Create Task
auto testTaskTBB = std::make_shared<FoxBlockedParallel>(taskDataSeq);

// Create Perf attributes
auto perfAttr = std::make_shared<ppc::core::PerfAttr>();
perfAttr->num_running = 10;
const auto t0 = std::chrono::high_resolution_clock::now();
perfAttr->current_timer = [&] {
auto current_time_point = std::chrono::high_resolution_clock::now();
auto duration = std::chrono::duration_cast<std::chrono::nanoseconds>(current_time_point - t0).count();
return static_cast<double>(duration) * 1e-9;
};

// Create and init perf results
auto perfResults = std::make_shared<ppc::core::PerfResults>();
// Create Perf analyzer
auto perfAnalyzer = std::make_shared<ppc::core::Perf>(testTaskTBB);
perfAnalyzer->task_run(perfAttr, perfResults);
ppc::core::Perf::print_perf_statistic(perfResults);

// Compare results
for (int i = 0; i < rows; ++i) {
for (int j = 0; j < cols; ++j) {
double expected = 0;
for (int k = 0; k < cols; ++k) {
expected += matrix_a[i * cols + k] * matrix_b[k * cols + j];
}
ASSERT_EQ(matrix_c[i * cols + j], expected);
}
}
}
Loading

0 comments on commit a8e7977

Please sign in to comment.