Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
30 changes: 30 additions & 0 deletions tasks/kulik_a_mat_mul_double_ccs/omp/include/ops_omp.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
#pragma once

#include <cstddef>
#include <vector>

#include "kulik_a_mat_mul_double_ccs/common/include/common.hpp"
#include "task/include/task.hpp"

namespace kulik_a_mat_mul_double_ccs {

class KulikAMatMulDoubleCcsOMP : public BaseTask {
public:
static constexpr ppc::task::TypeOfTask GetStaticTypeOfTask() {
return ppc::task::TypeOfTask::kOMP;
}
explicit KulikAMatMulDoubleCcsOMP(const InType &in);

private:
bool ValidationImpl() override;
bool PreProcessingImpl() override;
bool RunImpl() override;
bool PostProcessingImpl() override;
static void ProcessColumn(size_t j, int tid, const CCS &a, const CCS &b,
std::vector<std::vector<double>> &thread_accum, std::vector<std::vector<bool>> &thread_nz,
std::vector<std::vector<size_t>> &thread_nnz_rows,
std::vector<std::vector<double>> &local_values,
std::vector<std::vector<size_t>> &local_rows);
};

} // namespace kulik_a_mat_mul_double_ccs
114 changes: 114 additions & 0 deletions tasks/kulik_a_mat_mul_double_ccs/omp/src/ops_omp.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,114 @@
#include "kulik_a_mat_mul_double_ccs/omp/include/ops_omp.hpp"

#include <omp.h>

#include <algorithm>
#include <cstddef>
#include <tuple>
#include <vector>

#include "kulik_a_mat_mul_double_ccs/common/include/common.hpp"

namespace kulik_a_mat_mul_double_ccs {

void KulikAMatMulDoubleCcsOMP::ProcessColumn(size_t j, int tid, const CCS &a, const CCS &b,
std::vector<std::vector<double>> &thread_accum,
std::vector<std::vector<bool>> &thread_nz,
std::vector<std::vector<size_t>> &thread_nnz_rows,
std::vector<std::vector<double>> &local_values,
std::vector<std::vector<size_t>> &local_rows) {
for (size_t k = b.col_ind[j]; k < b.col_ind[j + 1]; ++k) {
size_t ind = b.row[k];
double b_val = b.value[k];
for (size_t zc = a.col_ind[ind]; zc < a.col_ind[ind + 1]; ++zc) {
size_t i = a.row[zc];
double a_val = a.value[zc];
thread_accum[tid][i] += a_val * b_val;
if (!thread_nz[tid][i]) {
thread_nz[tid][i] = true;
thread_nnz_rows[tid].push_back(i);
}
}
}

std::ranges::sort(thread_nnz_rows[tid]);

for (size_t i : thread_nnz_rows[tid]) {
if (thread_accum[tid][i] != 0.0) {
local_rows[j].push_back(i);
local_values[j].push_back(thread_accum[tid][i]);
}
thread_accum[tid][i] = 0.0;
thread_nz[tid][i] = false;
}
thread_nnz_rows[tid].clear();
}

KulikAMatMulDoubleCcsOMP::KulikAMatMulDoubleCcsOMP(const InType &in) {
SetTypeOfTask(GetStaticTypeOfTask());
GetInput() = in;
}

bool KulikAMatMulDoubleCcsOMP::ValidationImpl() {
const auto &a = std::get<0>(GetInput());
const auto &b = std::get<1>(GetInput());
return (a.m == b.n);
}

bool KulikAMatMulDoubleCcsOMP::PreProcessingImpl() {
return true;
}

bool KulikAMatMulDoubleCcsOMP::RunImpl() {
const auto &a = std::get<0>(GetInput());
const auto &b = std::get<1>(GetInput());
OutType &c = GetOutput();
c.n = a.n;
c.m = b.m;
c.col_ind.assign(c.m + 1, 0);

std::vector<std::vector<double>> local_values(b.m);
std::vector<std::vector<size_t>> local_rows(b.m);

int num_threads = omp_get_max_threads();

std::vector<std::vector<double>> thread_accum(num_threads, std::vector<double>(a.n, 0.0));
std::vector<std::vector<bool>> thread_nz(num_threads, std::vector<bool>(a.n, false));
std::vector<std::vector<size_t>> thread_nnz_rows(num_threads);

#pragma omp parallel for default(none) schedule(static) \
shared(a, b, thread_accum, thread_nz, thread_nnz_rows, local_values, local_rows)
for (size_t j = 0; j < b.m; ++j) {
int tid = omp_get_thread_num();
ProcessColumn(j, tid, a, b, thread_accum, thread_nz, thread_nnz_rows, local_values, local_rows);
}

size_t total_nz = 0;
for (size_t j = 0; j < b.m; ++j) {
c.col_ind[j] = total_nz;
total_nz += local_values[j].size();
}
c.col_ind[b.m] = total_nz;
c.nz = total_nz;

c.value.resize(total_nz);
c.row.resize(total_nz);

#pragma omp parallel for default(none) schedule(static) shared(b, c, local_values, local_rows)
for (size_t j = 0; j < b.m; ++j) {
size_t offset = c.col_ind[j];
size_t col_nz = local_values[j].size();
for (size_t k = 0; k < col_nz; ++k) {
c.value[offset + k] = local_values[j][k];
c.row[offset + k] = local_rows[j][k];
}
}

return true;
}

bool KulikAMatMulDoubleCcsOMP::PostProcessingImpl() {
return true;
}

} // namespace kulik_a_mat_mul_double_ccs
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
#include <vector>

#include "kulik_a_mat_mul_double_ccs/common/include/common.hpp"
#include "kulik_a_mat_mul_double_ccs/omp/include/ops_omp.hpp"
#include "kulik_a_mat_mul_double_ccs/seq/include/ops_seq.hpp"
#include "util/include/func_test_util.hpp"
#include "util/include/util.hpp"
Expand Down Expand Up @@ -99,7 +100,8 @@ TEST_P(KulikARunFuncTestsThreads, MatmulFromPic) {
const std::array<TestType, 1> kTestParam = {std::make_tuple(std::string("matrix_test"), std::string("matrix_test"))};

const auto kTestTasksList = std::tuple_cat(
ppc::util::AddFuncTask<KulikAMatMulDoubleCcsSEQ, InType>(kTestParam, PPC_SETTINGS_kulik_a_mat_mul_double_ccs));
ppc::util::AddFuncTask<KulikAMatMulDoubleCcsSEQ, InType>(kTestParam, PPC_SETTINGS_kulik_a_mat_mul_double_ccs),
ppc::util::AddFuncTask<KulikAMatMulDoubleCcsOMP, InType>(kTestParam, PPC_SETTINGS_kulik_a_mat_mul_double_ccs));

const auto kGtestValues = ppc::util::ExpandToValues(kTestTasksList);

Expand Down
5 changes: 3 additions & 2 deletions tasks/kulik_a_mat_mul_double_ccs/tests/performance/main.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
#include <vector>

#include "kulik_a_mat_mul_double_ccs/common/include/common.hpp"
#include "kulik_a_mat_mul_double_ccs/omp/include/ops_omp.hpp"
#include "kulik_a_mat_mul_double_ccs/seq/include/ops_seq.hpp"
#include "util/include/perf_test_util.hpp"

Expand Down Expand Up @@ -125,8 +126,8 @@ TEST_P(KulikARunPerfTestThreads, RunPerfModes) {

namespace {

const auto kAllPerfTasks =
ppc::util::MakeAllPerfTasks<InType, KulikAMatMulDoubleCcsSEQ>(PPC_SETTINGS_kulik_a_mat_mul_double_ccs);
const auto kAllPerfTasks = ppc::util::MakeAllPerfTasks<InType, KulikAMatMulDoubleCcsSEQ, KulikAMatMulDoubleCcsOMP>(
PPC_SETTINGS_kulik_a_mat_mul_double_ccs);

const auto kGtestValues = ppc::util::TupleToGTestValues(kAllPerfTasks);

Expand Down
Loading