Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Update simd warp example #80

Open
wants to merge 2 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion Exercises/simd_warp/Begin/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ endif

CXXFLAGS ?= -O3 -g
override CXXFLAGS += -I$(MAKEFILE_PATH)
override CXXFLAGS += -I$(KOKKOS_PATH)/../simd-math
override CXXFLAGS += -I$(KOKKOS_PATH)/simd/src

DEPFLAGS = -M
LINK = ${CXX}
Expand Down
20 changes: 11 additions & 9 deletions Exercises/simd_warp/Begin/simd_warp_begin.cpp
Original file line number Diff line number Diff line change
@@ -1,16 +1,11 @@
#include<Kokkos_Core.hpp>
//EXERCISE: include the right header (later Kokkos will include this)
//#include<simd.hpp>
//#include<Kokkos_SIMD.hpp>

void test_simd(int N_in, int M, int R, double a) {

//EXERCISE: get the right type here for CUDA/Non-Cuda
//#ifdef KOKKOS_ENABLE_CUDA
//using simd_t = ...;
//#else
//using simd_t = ...;
//#endif
//using simd_storage_t = ...;

//EXERCISE: What will the N now be?
int N = N_in;
Expand All @@ -33,15 +28,22 @@ void test_simd(int N_in, int M, int R, double a) {
});
Kokkos::deep_copy(results_scalar,0.0);

//EXERCISE: use TeamPolicy here
#ifdef KOKKOS_ENABLE_CUDA
constexpr int team_size = ...;
#else
constexpr int team_size = ...;
#endif

Kokkos::Timer timer;
for(int r = 0; r<R; r++) {
//EXERCISE: use TeamPolicy here
Kokkos::parallel_for("Combine",data.extent(0), KOKKOS_LAMBDA(const int i) {
//EXERCISE Use the correct type here
double tmp = 0.0;
double b = a;
//EXERCISE: how do you related index i to team policy member ?
for(int j=0; j<data.extent(1); j++) {
//EXERCISE: add storage_type to temporary type conversion
tmp += b * data(i,j);
b+=a+1.0*(j+1);
}
Expand All @@ -64,7 +66,7 @@ void test_simd(int N_in, int M, int R, double a) {
void test_team_vector(int N, int M, int R, double a) {

constexpr int V = 32;
Kokkos::View<double**,Kokkos::LayoutLeft> data("D",N,M);
Kokkos::View<double**, Kokkos::LayoutLeft> data("D",N,M);
Kokkos::View<double*> results("R",N);

// Lets fill the input data
Expand Down Expand Up @@ -105,7 +107,7 @@ void test_team_vector(int N, int M, int R, double a) {
int main(int argc, char* argv[]) {
Kokkos::initialize(argc,argv);

int N = argc>1?atoi(argv[1]):320000;
int N = argc>1?atoi(argv[1]):32000000;
int M = argc>2?atoi(argv[2]):3;
int R = argc>3?atoi(argv[3]):10;
double scal = argc>4?atof(argv[4]):1.5;
Expand Down
2 changes: 1 addition & 1 deletion Exercises/simd_warp/Solution/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ endif

CXXFLAGS ?= -O3 -g
override CXXFLAGS += -I$(MAKEFILE_PATH)
override CXXFLAGS += -I$(KOKKOS_PATH)/../simd-math
override CXXFLAGS += -I$(KOKKOS_PATH)/simd/src

DEPFLAGS = -M
LINK = ${CXX}
Expand Down
31 changes: 16 additions & 15 deletions Exercises/simd_warp/Solution/simd_warp_solution.cpp
Original file line number Diff line number Diff line change
@@ -1,42 +1,43 @@
#include<Kokkos_Core.hpp>
#include<simd.hpp>
#include<Kokkos_SIMD.hpp>

void test_simd(int N_in, int M, int R, double a) {

#ifdef KOKKOS_ENABLE_CUDA
using simd_t = simd::simd<double,simd::simd_abi::cuda_warp<32>>;
#else
using simd_t = simd::simd<double,simd::simd_abi::native>;
#endif
using simd_storage_t = simd_t::storage_type;
using simd_t = Kokkos::Experimental::native_simd<double>;

int N = N_in/simd_t::size();

Kokkos::View<simd_storage_t**, Kokkos::LayoutLeft> data("D",N,M);
Kokkos::View<simd_storage_t*> results("R",N);
Kokkos::View<simd_t**,Kokkos::LayoutLeft> data("D",N,M);
Kokkos::View<simd_t*> results("R",N);

// For the final reduction we gonna need a scalar view of the data for now
// Relying on knowing the data layout, we will add SIMD Layouts later
// so that simple copy construction/assignment would work
Kokkos::View<double**, Kokkos::LayoutLeft> data_scalar((double*)data.data(),N_in,M);
Kokkos::View<double*> results_scalar((double*)results.data(),N_in);

// Lets fill the data
// Lets fill the input data using scalar view
Kokkos::parallel_for("init",data_scalar.extent(0), KOKKOS_LAMBDA(const int i) {
for (int j=0; j<data_scalar.extent(1); j++)
data_scalar(i,j) = i%8;
});
Kokkos::deep_copy(results_scalar,0.0);

#ifdef KOKKOS_ENABLE_CUDA
constexpr int team_size = 32;
#else
constexpr int team_size = 1;
#endif

Kokkos::Timer timer;
for(int r = 0; r<R; r++) {
Kokkos::parallel_for("Combine",Kokkos::TeamPolicy<>(data.extent(0),1,simd_t::size()),
Kokkos::parallel_for("Combine",Kokkos::TeamPolicy<>(data.extent(0)/team_size,team_size,simd_t::size()),
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
Kokkos::parallel_for("Combine",Kokkos::TeamPolicy<>(data.extent(0)/team_size,team_size,simd_t::size()),
Kokkos::parallel_for("Combine",Kokkos::TeamPolicy<>(data.extent(0)/team_size,team_size,simd_t::size()),

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think there is an indent issue here.

KOKKOS_LAMBDA(const Kokkos::TeamPolicy<>::member_type& team) {
simd_t tmp = 0.0;
double b = a;
const int i = team.league_rank();
const int i = team.league_rank() * team.team_size() + team.team_rank();
for(int j=0; j<data.extent(1); j++) {
tmp += b * simd_t(data(i,j));
tmp += b * data(i,j);
b+=a+1.0*(j+1);
}
results(i) = tmp;
Expand All @@ -58,7 +59,7 @@ void test_simd(int N_in, int M, int R, double a) {
void test_team_vector(int N, int M, int R, double a) {

constexpr int V = 32;
Kokkos::View<double**,Kokkos::LayoutLeft> data("D",N,M);
Kokkos::View<double**, Kokkos::LayoutLeft> data("D",N,M);
Kokkos::View<double*> results("R",N);

// Lets fill the input data
Expand Down Expand Up @@ -99,7 +100,7 @@ void test_team_vector(int N, int M, int R, double a) {
int main(int argc, char* argv[]) {
Kokkos::initialize(argc,argv);

int N = argc>1?atoi(argv[1]):320000;
int N = argc>1?atoi(argv[1]):32000000;
int M = argc>2?atoi(argv[2]):3;
int R = argc>3?atoi(argv[3]):10;
double scal = argc>4?atof(argv[4]):1.5;
Expand Down