This repository has been archived by the owner on Jun 30, 2024. It is now read-only.
forked from mahbhlddnhakkh/ppc-2024-threads
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
4d4d8a9
commit dbb7121
Showing
4 changed files
with
522 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,212 @@ | ||
#include <gtest/gtest.h> | ||
|
||
#include <vector> | ||
|
||
#include "tbb/belan_vadim_mat_fox_tbb/include/ops_tbb.hpp" | ||
|
||
using namespace BelanTBB; | ||
|
||
TEST(FoxBlockedParallel, MatrixMultiplication2x2) { | ||
// Define input matrices | ||
std::vector<double> matrixA = {1, 2, 3, 4}; | ||
std::vector<double> matrixB = {4, 3, 2, 1}; | ||
std::vector<double> expectedOutput = {8, 5, 20, 13}; | ||
|
||
// Create TaskData | ||
std::shared_ptr<ppc::core::TaskData> taskData = std::make_shared<ppc::core::TaskData>(); | ||
taskData->inputs.emplace_back(reinterpret_cast<uint8_t *>(matrixA.data())); | ||
taskData->inputs_count.emplace_back(2); | ||
taskData->inputs_count.emplace_back(2); | ||
taskData->inputs.emplace_back(reinterpret_cast<uint8_t *>(matrixB.data())); | ||
taskData->inputs_count.emplace_back(2); | ||
taskData->inputs_count.emplace_back(2); | ||
taskData->outputs.emplace_back(reinterpret_cast<uint8_t *>(new double[4]())); | ||
taskData->outputs_count.emplace_back(2); | ||
taskData->outputs_count.emplace_back(2); | ||
|
||
// Create Task | ||
FoxBlockedParallel foxBlockedParallel(taskData); | ||
foxBlockedParallel.validation(); | ||
foxBlockedParallel.pre_processing(); | ||
auto *output = reinterpret_cast<double *>(taskData->outputs[0]); | ||
foxBlockedParallel.run(); | ||
foxBlockedParallel.post_processing(); | ||
|
||
// Check the output | ||
for (size_t i = 0; i < 4; ++i) { | ||
ASSERT_DOUBLE_EQ(output[i], expectedOutput[i]); | ||
} | ||
|
||
// Free memory | ||
delete[] output; | ||
} | ||
|
||
TEST(FoxBlockedParallel, MatrixMultiplication) { | ||
// Define input matrices | ||
std::vector<double> matrixA = {1, 2, 3, 4, 5, 6, 7, 8, 9}; | ||
std::vector<double> matrixB = {9, 8, 7, 6, 5, 4, 3, 2, 1}; | ||
std::vector<double> expectedOutput = {30, 24, 18, 84, 69, 54, 138, 114, 90}; | ||
|
||
// Create TaskData | ||
std::shared_ptr<ppc::core::TaskData> taskData = std::make_shared<ppc::core::TaskData>(); | ||
taskData->inputs.emplace_back(reinterpret_cast<uint8_t *>(matrixA.data())); | ||
taskData->inputs_count.emplace_back(3); | ||
taskData->inputs_count.emplace_back(3); | ||
taskData->inputs.emplace_back(reinterpret_cast<uint8_t *>(matrixB.data())); | ||
taskData->inputs_count.emplace_back(3); | ||
taskData->inputs_count.emplace_back(3); | ||
taskData->outputs.emplace_back(reinterpret_cast<uint8_t *>(new double[9]())); | ||
taskData->outputs_count.emplace_back(3); | ||
taskData->outputs_count.emplace_back(3); | ||
|
||
// Create Task | ||
FoxBlockedParallel foxBlockedParallel(taskData); | ||
foxBlockedParallel.validation(); | ||
foxBlockedParallel.pre_processing(); | ||
auto *output = reinterpret_cast<double *>(taskData->outputs[0]); | ||
foxBlockedParallel.run(); | ||
foxBlockedParallel.post_processing(); | ||
|
||
// Check the output | ||
for (size_t i = 0; i < 9; ++i) { | ||
ASSERT_DOUBLE_EQ(output[i], expectedOutput[i]); | ||
} | ||
|
||
// Free memory | ||
delete[] output; | ||
} | ||
|
||
TEST(FoxBlockedParallel, MatrixMultiplication_VerySmallMatrices) { | ||
// Define input matrices | ||
std::vector<double> matrixA(10 * 10); | ||
std::vector<double> matrixB(10 * 10); | ||
std::vector<double> expectedOutput(10 * 10); | ||
|
||
// Initialize matrices with random values | ||
for (size_t i = 0; i < 10 * 10; ++i) { | ||
matrixA[i] = rand() % 10; | ||
matrixB[i] = rand() % 10; | ||
expectedOutput[i] = 0; | ||
} | ||
|
||
for (size_t i = 0; i < 10; ++i) { | ||
for (size_t j = 0; j < 10; ++j) { | ||
for (size_t k = 0; k < 10; ++k) { | ||
expectedOutput[i * 10 + j] += matrixA[i * 10 + k] * matrixB[k * 10 + j]; | ||
} | ||
} | ||
} | ||
|
||
// Create TaskData | ||
std::shared_ptr<ppc::core::TaskData> taskData = std::make_shared<ppc::core::TaskData>(); | ||
taskData->inputs.emplace_back(reinterpret_cast<uint8_t *>(matrixA.data())); | ||
taskData->inputs_count.emplace_back(10); | ||
taskData->inputs_count.emplace_back(10); | ||
taskData->inputs.emplace_back(reinterpret_cast<uint8_t *>(matrixB.data())); | ||
taskData->inputs_count.emplace_back(10); | ||
taskData->inputs_count.emplace_back(10); | ||
taskData->outputs.emplace_back(reinterpret_cast<uint8_t *>(new double[10 * 10]())); | ||
taskData->outputs_count.emplace_back(10); | ||
taskData->outputs_count.emplace_back(10); | ||
|
||
// Create Task | ||
FoxBlockedParallel foxBlockedParallel(taskData); | ||
foxBlockedParallel.validation(); | ||
foxBlockedParallel.pre_processing(); | ||
auto *output = reinterpret_cast<double *>(taskData->outputs[0]); | ||
foxBlockedParallel.run(); | ||
foxBlockedParallel.post_processing(); | ||
|
||
// Check the output | ||
for (size_t i = 0; i < 10 * 10; ++i) { | ||
ASSERT_DOUBLE_EQ(output[i], expectedOutput[i]); | ||
} | ||
|
||
// Free memory | ||
delete[] output; | ||
} | ||
|
||
TEST(FoxBlockedParallel, MatrixMultiplication_SmallMatrices) { | ||
// Define input matrices | ||
std::vector<double> matrixA(100 * 100); | ||
std::vector<double> matrixB(100 * 100); | ||
std::vector<double> expectedOutput(100 * 100); | ||
|
||
// Initialize matrices with random values | ||
for (size_t i = 0; i < 100 * 100; ++i) { | ||
matrixA[i] = rand() % 10; | ||
matrixB[i] = rand() % 10; | ||
expectedOutput[i] = 0; | ||
} | ||
|
||
for (size_t i = 0; i < 100; ++i) { | ||
for (size_t j = 0; j < 100; ++j) { | ||
for (size_t k = 0; k < 100; ++k) { | ||
expectedOutput[i * 100 + j] += matrixA[i * 100 + k] * matrixB[k * 100 + j]; | ||
} | ||
} | ||
} | ||
|
||
// Create TaskData | ||
std::shared_ptr<ppc::core::TaskData> taskData = std::make_shared<ppc::core::TaskData>(); | ||
taskData->inputs.emplace_back(reinterpret_cast<uint8_t *>(matrixA.data())); | ||
taskData->inputs_count.emplace_back(100); | ||
taskData->inputs_count.emplace_back(100); | ||
taskData->inputs.emplace_back(reinterpret_cast<uint8_t *>(matrixB.data())); | ||
taskData->inputs_count.emplace_back(100); | ||
taskData->inputs_count.emplace_back(100); | ||
taskData->outputs.emplace_back(reinterpret_cast<uint8_t *>(new double[100 * 100]())); | ||
taskData->outputs_count.emplace_back(100); | ||
taskData->outputs_count.emplace_back(100); | ||
|
||
// Create Task | ||
FoxBlockedParallel foxBlockedParallel(taskData); | ||
foxBlockedParallel.validation(); | ||
foxBlockedParallel.pre_processing(); | ||
auto *output = reinterpret_cast<double *>(taskData->outputs[0]); | ||
foxBlockedParallel.run(); | ||
foxBlockedParallel.post_processing(); | ||
|
||
// Check the output | ||
for (size_t i = 0; i < 100 * 100; ++i) { | ||
ASSERT_DOUBLE_EQ(output[i], expectedOutput[i]); | ||
} | ||
|
||
// Free memory | ||
delete[] output; | ||
} | ||
|
||
TEST(FoxBlockedParallel, MatrixMultiplicationWithNegatives) { | ||
// Define input matrices with negative values | ||
std::vector<double> matrixA = {-1, -2, -3, -4, -5, -6, -7, -8, -9}; | ||
std::vector<double> matrixB = {-9, -8, -7, -6, -5, -4, -3, -2, -1}; | ||
std::vector<double> expectedOutput = {30, 24, 18, 84, 69, 54, 138, 114, 90}; | ||
|
||
// Create TaskData | ||
std::shared_ptr<ppc::core::TaskData> taskData = std::make_shared<ppc::core::TaskData>(); | ||
taskData->inputs.emplace_back(reinterpret_cast<uint8_t *>(matrixA.data())); | ||
taskData->inputs_count.emplace_back(3); | ||
taskData->inputs_count.emplace_back(3); | ||
taskData->inputs.emplace_back(reinterpret_cast<uint8_t *>(matrixB.data())); | ||
taskData->inputs_count.emplace_back(3); | ||
taskData->inputs_count.emplace_back(3); | ||
taskData->outputs.emplace_back(reinterpret_cast<uint8_t *>(new double[9]())); | ||
taskData->outputs_count.emplace_back(3); | ||
taskData->outputs_count.emplace_back(3); | ||
|
||
// Create Task | ||
FoxBlockedParallel foxBlockedParallel(taskData); | ||
foxBlockedParallel.validation(); | ||
foxBlockedParallel.pre_processing(); | ||
auto *output = reinterpret_cast<double *>(taskData->outputs[0]); | ||
foxBlockedParallel.run(); | ||
foxBlockedParallel.post_processing(); | ||
|
||
// Check the output | ||
for (size_t i = 0; i < 9; ++i) { | ||
ASSERT_DOUBLE_EQ(output[i], expectedOutput[i]); | ||
} | ||
|
||
// Free memory | ||
delete[] output; | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,44 @@ | ||
// Copyright 2024 Vadim Belan | ||
#pragma once | ||
|
||
#include <memory> | ||
#include <utility> | ||
#include <vector> | ||
|
||
#include "core/task/include/task.hpp" | ||
|
||
namespace BelanTBB { | ||
|
||
using Matrix = std::vector<std::vector<double>>; | ||
|
||
class FoxBlockedSequential : public ppc::core::Task { | ||
public: | ||
explicit FoxBlockedSequential(std::shared_ptr<ppc::core::TaskData> taskData_) : Task(std::move(taskData_)) {} | ||
bool pre_processing() override; | ||
bool validation() override; | ||
bool run() override; | ||
bool post_processing() override; | ||
|
||
private: | ||
Matrix A{}; | ||
Matrix B{}; | ||
Matrix C{}; | ||
int block_size{}; | ||
}; | ||
|
||
class FoxBlockedParallel : public ppc::core::Task { | ||
public: | ||
explicit FoxBlockedParallel(std::shared_ptr<ppc::core::TaskData> taskData_) : Task(std::move(taskData_)) {} | ||
bool pre_processing() override; | ||
bool validation() override; | ||
bool run() override; | ||
bool post_processing() override; | ||
|
||
private: | ||
Matrix A{}; | ||
Matrix B{}; | ||
Matrix C{}; | ||
int block_size{}; | ||
}; | ||
|
||
} // namespace BelanTBB |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,127 @@ | ||
// Copyright 2024 Vadim Belan | ||
#include <gtest/gtest.h> | ||
|
||
#include "core/perf/include/perf.hpp" | ||
#include "tbb/belan_vadim_mat_fox_tbb/include/ops_tbb.hpp" | ||
|
||
using namespace BelanTBB; | ||
|
||
TEST(belan_fox_blocked_tbb, test_pipeline_run) { | ||
// Create data | ||
const int rows = 512; | ||
const int cols = 512; | ||
std::vector<double> matrix_a(rows * cols); | ||
std::vector<double> matrix_b(rows * cols); | ||
std::vector<double> matrix_c(rows * cols); | ||
|
||
for (int i = 0; i < rows; ++i) { | ||
for (int j = 0; j < cols; ++j) { | ||
matrix_a[i * cols + j] = i + j; | ||
matrix_b[i * cols + j] = i - j; | ||
matrix_c[i * cols + j] = 0; | ||
} | ||
} | ||
|
||
// Create TaskData | ||
std::shared_ptr<ppc::core::TaskData> taskDataSeq = std::make_shared<ppc::core::TaskData>(); | ||
taskDataSeq->inputs.emplace_back(reinterpret_cast<uint8_t *>(matrix_a.data())); | ||
taskDataSeq->inputs_count.emplace_back(rows); | ||
taskDataSeq->inputs_count.emplace_back(cols); | ||
taskDataSeq->inputs.emplace_back(reinterpret_cast<uint8_t *>(matrix_b.data())); | ||
taskDataSeq->inputs_count.emplace_back(rows); | ||
taskDataSeq->inputs_count.emplace_back(cols); | ||
taskDataSeq->outputs.emplace_back(reinterpret_cast<uint8_t *>(matrix_c.data())); | ||
taskDataSeq->outputs_count.emplace_back(rows); | ||
taskDataSeq->outputs_count.emplace_back(cols); | ||
|
||
// Create Task | ||
auto testTaskTBB = std::make_shared<FoxBlockedParallel>(taskDataSeq); | ||
|
||
// Create Perf attributes | ||
auto perfAttr = std::make_shared<ppc::core::PerfAttr>(); | ||
perfAttr->num_running = 10; | ||
const auto t0 = std::chrono::high_resolution_clock::now(); | ||
perfAttr->current_timer = [&] { | ||
auto current_time_point = std::chrono::high_resolution_clock::now(); | ||
auto duration = std::chrono::duration_cast<std::chrono::nanoseconds>(current_time_point - t0).count(); | ||
return static_cast<double>(duration) * 1e-9; | ||
}; | ||
|
||
// Create and init perf results | ||
auto perfResults = std::make_shared<ppc::core::PerfResults>(); | ||
// Create Perf analyzer | ||
auto perfAnalyzer = std::make_shared<ppc::core::Perf>(testTaskTBB); | ||
perfAnalyzer->pipeline_run(perfAttr, perfResults); | ||
ppc::core::Perf::print_perf_statistic(perfResults); | ||
|
||
// Compare results | ||
for (int i = 0; i < rows; ++i) { | ||
for (int j = 0; j < cols; ++j) { | ||
double expected = 0; | ||
for (int k = 0; k < cols; ++k) { | ||
expected += matrix_a[i * cols + k] * matrix_b[k * cols + j]; | ||
} | ||
ASSERT_EQ(matrix_c[i * cols + j], expected); | ||
} | ||
} | ||
} | ||
|
||
TEST(belan_fox_blocked_tbb, test_task_run) { | ||
// Create data | ||
const int rows = 512; | ||
const int cols = 512; | ||
std::vector<double> matrix_a(rows * cols); | ||
std::vector<double> matrix_b(rows * cols); | ||
std::vector<double> matrix_c(rows * cols); | ||
|
||
for (int i = 0; i < rows; ++i) { | ||
for (int j = 0; j < cols; ++j) { | ||
matrix_a[i * cols + j] = i + j; | ||
matrix_b[i * cols + j] = i - j; | ||
matrix_c[i * cols + j] = 0; | ||
} | ||
} | ||
|
||
// Create TaskData | ||
std::shared_ptr<ppc::core::TaskData> taskDataSeq = std::make_shared<ppc::core::TaskData>(); | ||
taskDataSeq->inputs.emplace_back(reinterpret_cast<uint8_t *>(matrix_a.data())); | ||
taskDataSeq->inputs_count.emplace_back(rows); | ||
taskDataSeq->inputs_count.emplace_back(cols); | ||
taskDataSeq->inputs.emplace_back(reinterpret_cast<uint8_t *>(matrix_b.data())); | ||
taskDataSeq->inputs_count.emplace_back(rows); | ||
taskDataSeq->inputs_count.emplace_back(cols); | ||
taskDataSeq->outputs.emplace_back(reinterpret_cast<uint8_t *>(matrix_c.data())); | ||
taskDataSeq->outputs_count.emplace_back(rows); | ||
taskDataSeq->outputs_count.emplace_back(cols); | ||
|
||
// Create Task | ||
auto testTaskTBB = std::make_shared<FoxBlockedParallel>(taskDataSeq); | ||
|
||
// Create Perf attributes | ||
auto perfAttr = std::make_shared<ppc::core::PerfAttr>(); | ||
perfAttr->num_running = 10; | ||
const auto t0 = std::chrono::high_resolution_clock::now(); | ||
perfAttr->current_timer = [&] { | ||
auto current_time_point = std::chrono::high_resolution_clock::now(); | ||
auto duration = std::chrono::duration_cast<std::chrono::nanoseconds>(current_time_point - t0).count(); | ||
return static_cast<double>(duration) * 1e-9; | ||
}; | ||
|
||
// Create and init perf results | ||
auto perfResults = std::make_shared<ppc::core::PerfResults>(); | ||
// Create Perf analyzer | ||
auto perfAnalyzer = std::make_shared<ppc::core::Perf>(testTaskTBB); | ||
perfAnalyzer->task_run(perfAttr, perfResults); | ||
ppc::core::Perf::print_perf_statistic(perfResults); | ||
|
||
// Compare results | ||
for (int i = 0; i < rows; ++i) { | ||
for (int j = 0; j < cols; ++j) { | ||
double expected = 0; | ||
for (int k = 0; k < cols; ++k) { | ||
expected += matrix_a[i * cols + k] * matrix_b[k * cols + j]; | ||
} | ||
ASSERT_EQ(matrix_c[i * cols + j], expected); | ||
} | ||
} | ||
} |
Oops, something went wrong.