This repository has been archived by the owner on Jun 30, 2024. It is now read-only.
forked from mahbhlddnhakkh/ppc-2024-threads
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
4d4d8a9
commit c4bd406
Showing
4 changed files
with
192 additions
and
0 deletions.
There are no files selected for viewing
Empty file.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,46 @@ | ||
// Copyright 2024 Vadim Belan | ||
#pragma once | ||
|
||
#include <memory> | ||
#include <utility> | ||
#include <vector> | ||
#include <thread> | ||
#include <algorithm> | ||
|
||
#include "core/task/include/task.hpp" | ||
|
||
namespace BelanSTL { | ||
|
||
using Matrix = std::vector<std::vector<double>>; | ||
|
||
class FoxBlockedSequential : public ppc::core::Task { | ||
public: | ||
explicit FoxBlockedSequential(std::shared_ptr<ppc::core::TaskData> taskData_) : Task(std::move(taskData_)) {} | ||
bool pre_processing() override; | ||
bool validation() override; | ||
bool run() override; | ||
bool post_processing() override; | ||
|
||
private: | ||
Matrix A{}; | ||
Matrix B{}; | ||
Matrix C{}; | ||
int block_size{}; | ||
}; | ||
|
||
class FoxBlockedParallel : public ppc::core::Task { | ||
public: | ||
explicit FoxBlockedParallel(std::shared_ptr<ppc::core::TaskData> taskData_) : Task(std::move(taskData_)) {} | ||
bool pre_processing() override; | ||
bool validation() override; | ||
bool run() override; | ||
bool post_processing() override; | ||
|
||
private: | ||
Matrix A{}; | ||
Matrix B{}; | ||
Matrix C{}; | ||
int block_size{}; | ||
}; | ||
|
||
} // namespace BelanSTL |
Empty file.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,146 @@ | ||
// Copyright 2024 Vadim Belan | ||
#include "stl/belan_vadim_mat_fox_stl/include/ops_stl.hpp" | ||
|
||
using BelanSTL::FoxBlockedParallel; | ||
using BelanSTL::FoxBlockedSequential; | ||
using BelanSTL::Matrix; | ||
|
||
bool FoxBlockedSequential::validation() { | ||
internal_order_test(); | ||
|
||
return taskData->inputs_count[0] == taskData->inputs_count[1] && | ||
taskData->inputs_count[0] == taskData->outputs_count[0]; | ||
} | ||
|
||
bool FoxBlockedSequential::pre_processing() { | ||
internal_order_test(); | ||
|
||
auto* matrixA = reinterpret_cast<double*>(taskData->inputs[0]); | ||
auto* matrixB = reinterpret_cast<double*>(taskData->inputs[1]); | ||
|
||
int rows = taskData->inputs_count[0]; | ||
int cols = taskData->inputs_count[1]; | ||
|
||
block_size = 32; | ||
|
||
A.resize(rows, std::vector<double>(cols)); | ||
B.resize(rows, std::vector<double>(cols)); | ||
C.resize(rows, std::vector<double>(cols)); | ||
|
||
for (int i = 0; i < rows; ++i) { | ||
for (int j = 0; j < cols; ++j) { | ||
A[i][j] = matrixA[i * cols + j]; | ||
B[i][j] = matrixB[i * cols + j]; | ||
} | ||
} | ||
|
||
return true; | ||
} | ||
|
||
bool FoxBlockedSequential::run() { | ||
internal_order_test(); | ||
|
||
for (std::vector<double>::size_type i = 0; i < A.size(); i += block_size) { | ||
for (std::vector<double>::size_type j = 0; j < B[0].size(); j += block_size) { | ||
for (std::vector<double>::size_type k = 0; k < A[0].size(); ++k) { | ||
for (std::vector<double>::size_type ii = i; | ||
ii < std::min(i + static_cast<std::vector<double>::size_type>(block_size), A.size()); ++ii) { | ||
for (std::vector<double>::size_type jj = j; | ||
jj < std::min(j + static_cast<std::vector<double>::size_type>(block_size), B[0].size()); ++jj) { | ||
C[ii][jj] += A[ii][k] * B[k][jj]; | ||
} | ||
} | ||
} | ||
} | ||
} | ||
|
||
return true; | ||
} | ||
|
||
bool FoxBlockedSequential::post_processing() { | ||
internal_order_test(); | ||
|
||
auto* out_ptr = reinterpret_cast<double*>(taskData->outputs[0]); | ||
|
||
for (std::vector<double>::size_type i = 0; i < C.size(); ++i) { | ||
for (std::vector<double>::size_type j = 0; j < C[0].size(); ++j) { | ||
out_ptr[i * C[0].size() + j] = C[i][j]; | ||
} | ||
} | ||
|
||
return true; | ||
} | ||
|
||
bool FoxBlockedParallel::validation() { | ||
internal_order_test(); | ||
|
||
return taskData->inputs_count[0] == taskData->inputs_count[1] && | ||
taskData->inputs_count[0] == taskData->outputs_count[0]; | ||
} | ||
|
||
bool FoxBlockedParallel::pre_processing() { | ||
internal_order_test(); | ||
|
||
auto* matrixA = reinterpret_cast<double*>(taskData->inputs[0]); | ||
auto* matrixB = reinterpret_cast<double*>(taskData->inputs[1]); | ||
|
||
int rows = taskData->inputs_count[0]; | ||
int cols = taskData->inputs_count[1]; | ||
|
||
block_size = 32; | ||
|
||
A.resize(rows, std::vector<double>(cols)); | ||
B.resize(rows, std::vector<double>(cols)); | ||
C.resize(rows, std::vector<double>(cols)); | ||
|
||
for (int i = 0; i < rows; ++i) { | ||
for (int j = 0; j < cols; ++j) { | ||
A[i][j] = matrixA[i * cols + j]; | ||
B[i][j] = matrixB[i * cols + j]; | ||
} | ||
} | ||
|
||
return true; | ||
} | ||
|
||
bool FoxBlockedParallel::run() { | ||
internal_order_test(); | ||
|
||
std::vector<std::thread> threads; | ||
|
||
for (int ii = 0; ii < static_cast<int>(A.size()); ii += block_size) { | ||
threads.emplace_back([&, ii]() { | ||
for (std::vector<double>::size_type jj = 0; jj < B[0].size(); | ||
jj += static_cast<std::vector<double>::size_type>(block_size)) { | ||
for (std::vector<double>::size_type k = 0; k < A[0].size(); ++k) { | ||
for (int i = ii; i < std::min(ii + block_size, static_cast<int>(A.size())); ++i) { | ||
for (std::vector<double>::size_type j = jj; | ||
j < std::min(jj + static_cast<std::vector<double>::size_type>(block_size), B[0].size()); ++j) { | ||
C[i][j] += A[i][k] * B[k][j]; | ||
} | ||
} | ||
} | ||
} | ||
}); | ||
} | ||
|
||
for (auto& thread : threads) { | ||
thread.join(); | ||
} | ||
|
||
return true; | ||
} | ||
|
||
bool FoxBlockedParallel::post_processing() { | ||
internal_order_test(); | ||
|
||
auto* out_ptr = reinterpret_cast<double*>(taskData->outputs[0]); | ||
|
||
for (std::vector<double>::size_type i = 0; i < C.size(); ++i) { | ||
for (std::vector<double>::size_type j = 0; j < C[0].size(); ++j) { | ||
out_ptr[i * C[0].size() + j] = C[i][j]; | ||
} | ||
} | ||
|
||
return true; | ||
} |