Skip to content
This repository has been archived by the owner on Jun 30, 2024. It is now read-only.

Commit

Permalink
Add files via upload
Browse files Browse the repository at this point in the history
  • Loading branch information
vadimbelan authored Jun 11, 2024
1 parent 4d4d8a9 commit c4bd406
Show file tree
Hide file tree
Showing 4 changed files with 192 additions and 0 deletions.
Empty file.
46 changes: 46 additions & 0 deletions tasks/stl/belan_vadim_mat_fox_stl/include/ops_stl.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
// Copyright 2024 Vadim Belan
#pragma once

#include <memory>
#include <utility>
#include <vector>
#include <thread>
#include <algorithm>

#include "core/task/include/task.hpp"

namespace BelanSTL {

using Matrix = std::vector<std::vector<double>>;

class FoxBlockedSequential : public ppc::core::Task {
public:
explicit FoxBlockedSequential(std::shared_ptr<ppc::core::TaskData> taskData_) : Task(std::move(taskData_)) {}
bool pre_processing() override;
bool validation() override;
bool run() override;
bool post_processing() override;

private:
Matrix A{};
Matrix B{};
Matrix C{};
int block_size{};
};

class FoxBlockedParallel : public ppc::core::Task {
public:
explicit FoxBlockedParallel(std::shared_ptr<ppc::core::TaskData> taskData_) : Task(std::move(taskData_)) {}
bool pre_processing() override;
bool validation() override;
bool run() override;
bool post_processing() override;

private:
Matrix A{};
Matrix B{};
Matrix C{};
int block_size{};
};

} // namespace BelanSTL
Empty file.
146 changes: 146 additions & 0 deletions tasks/stl/belan_vadim_mat_fox_stl/src/ops_stl.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,146 @@
// Copyright 2024 Vadim Belan
#include "stl/belan_vadim_mat_fox_stl/include/ops_stl.hpp"

using BelanSTL::FoxBlockedParallel;
using BelanSTL::FoxBlockedSequential;
using BelanSTL::Matrix;

bool FoxBlockedSequential::validation() {
internal_order_test();

return taskData->inputs_count[0] == taskData->inputs_count[1] &&
taskData->inputs_count[0] == taskData->outputs_count[0];
}

bool FoxBlockedSequential::pre_processing() {
internal_order_test();

auto* matrixA = reinterpret_cast<double*>(taskData->inputs[0]);
auto* matrixB = reinterpret_cast<double*>(taskData->inputs[1]);

int rows = taskData->inputs_count[0];
int cols = taskData->inputs_count[1];

block_size = 32;

A.resize(rows, std::vector<double>(cols));
B.resize(rows, std::vector<double>(cols));
C.resize(rows, std::vector<double>(cols));

for (int i = 0; i < rows; ++i) {
for (int j = 0; j < cols; ++j) {
A[i][j] = matrixA[i * cols + j];
B[i][j] = matrixB[i * cols + j];
}
}

return true;
}

bool FoxBlockedSequential::run() {
internal_order_test();

for (std::vector<double>::size_type i = 0; i < A.size(); i += block_size) {
for (std::vector<double>::size_type j = 0; j < B[0].size(); j += block_size) {
for (std::vector<double>::size_type k = 0; k < A[0].size(); ++k) {
for (std::vector<double>::size_type ii = i;
ii < std::min(i + static_cast<std::vector<double>::size_type>(block_size), A.size()); ++ii) {
for (std::vector<double>::size_type jj = j;
jj < std::min(j + static_cast<std::vector<double>::size_type>(block_size), B[0].size()); ++jj) {
C[ii][jj] += A[ii][k] * B[k][jj];
}
}
}
}
}

return true;
}

bool FoxBlockedSequential::post_processing() {
internal_order_test();

auto* out_ptr = reinterpret_cast<double*>(taskData->outputs[0]);

for (std::vector<double>::size_type i = 0; i < C.size(); ++i) {
for (std::vector<double>::size_type j = 0; j < C[0].size(); ++j) {
out_ptr[i * C[0].size() + j] = C[i][j];
}
}

return true;
}

bool FoxBlockedParallel::validation() {
internal_order_test();

return taskData->inputs_count[0] == taskData->inputs_count[1] &&
taskData->inputs_count[0] == taskData->outputs_count[0];
}

bool FoxBlockedParallel::pre_processing() {
internal_order_test();

auto* matrixA = reinterpret_cast<double*>(taskData->inputs[0]);
auto* matrixB = reinterpret_cast<double*>(taskData->inputs[1]);

int rows = taskData->inputs_count[0];
int cols = taskData->inputs_count[1];

block_size = 32;

A.resize(rows, std::vector<double>(cols));
B.resize(rows, std::vector<double>(cols));
C.resize(rows, std::vector<double>(cols));

for (int i = 0; i < rows; ++i) {
for (int j = 0; j < cols; ++j) {
A[i][j] = matrixA[i * cols + j];
B[i][j] = matrixB[i * cols + j];
}
}

return true;
}

bool FoxBlockedParallel::run() {
internal_order_test();

std::vector<std::thread> threads;

for (int ii = 0; ii < static_cast<int>(A.size()); ii += block_size) {
threads.emplace_back([&, ii]() {
for (std::vector<double>::size_type jj = 0; jj < B[0].size();
jj += static_cast<std::vector<double>::size_type>(block_size)) {
for (std::vector<double>::size_type k = 0; k < A[0].size(); ++k) {
for (int i = ii; i < std::min(ii + block_size, static_cast<int>(A.size())); ++i) {
for (std::vector<double>::size_type j = jj;
j < std::min(jj + static_cast<std::vector<double>::size_type>(block_size), B[0].size()); ++j) {
C[i][j] += A[i][k] * B[k][j];
}
}
}
}
});
}

for (auto& thread : threads) {
thread.join();
}

return true;
}

bool FoxBlockedParallel::post_processing() {
internal_order_test();

auto* out_ptr = reinterpret_cast<double*>(taskData->outputs[0]);

for (std::vector<double>::size_type i = 0; i < C.size(); ++i) {
for (std::vector<double>::size_type j = 0; j < C[0].size(); ++j) {
out_ptr[i * C[0].size() + j] = C[i][j];
}
}

return true;
}

0 comments on commit c4bd406

Please sign in to comment.