Skip to content

Commit 6ab5170

Browse files
committed
add openmp examples
1 parent 572b621 commit 6ab5170

File tree

7 files changed

+173
-0
lines changed

7 files changed

+173
-0
lines changed

openmp/CMakeLists.txt

+37
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,37 @@
1+
2+
cmake_minimum_required(VERSION 3.10)
3+
4+
project(OpenMP_Samples VERSION 1.0)
5+
6+
set(CMAKE_CXX_STANDARD 14)
7+
set(CMAKE_CXX_STANDARD_REQUIRED True)
8+
9+
# Force to code object v3
10+
add_compile_options(-mcode-object-version=3)
11+
add_link_options(-mcode-object-version=3)
12+
13+
set(AOMP OFF CACHE BOOL "Is a compiler from AOMP build?")
14+
if (AOMP)
15+
set(AOMP_DIR "$ENV{HOME}/rocm/aomp")
16+
set(CMAKE_CXX_COMPILER "${AOMP_DIR}/bin/clang++" CACHE PATH "" FORCE)
17+
set(CMAKE_C_COMPILER "${AOMP_DIR}/bin/clang" CACHE PATH "" FORCE)
18+
else()
19+
set(CMAKE_CXX_COMPILER "clang++" CACHE PATH "" FORCE)
20+
set(CMAKE_C_COMPILER "clang" CACHE PATH "" FORCE)
21+
endif()
22+
23+
if (CMAKE_BUILD_TYPE STREQUAL "Debug" AND AOMP)
24+
set(CMAKE_CXX_LINKER "env LD_LIBRARY_PATH=${AOMP_DIR}/lib-debug ${CMAKE_CXX_COMPILER}")
25+
set(CMAKE_C_LINKER "env LD_LIBRARY_PATH=${AOMP_DIR}/lib-debug ${CMAKE_C_COMPILER}")
26+
endif()
27+
28+
set(OMP_OFFLOAD_TRIPLE "amdgcn-amd-amdhsa")
29+
set(CLANG_OMP_TARGET_OFFLOAD_FLAGS "-target x86_64-pc-linux-gnu -fopenmp -fopenmp-targets=${OMP_OFFLOAD_TRIPLE} -Xopenmp-target=${OMP_OFFLOAD_TRIPLE}")
30+
set(OMP_GPU "gfx900")
31+
set(CLANG_OMP_TARGET_OFFLOAD_FLAGS "${CLANG_OMP_TARGET_OFFLOAD_FLAGS} -march=${OMP_GPU}")
32+
33+
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${CLANG_OMP_TARGET_OFFLOAD_FLAGS}")
34+
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${CLANG_OMP_TARGET_OFFLOAD_FLAGS}")
35+
36+
add_subdirectory(vmulsum)
37+
add_subdirectory(launch_latency)

openmp/launch_latency/CMakeLists.txt

+4
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
cmake_minimum_required(VERSION 3.10)
2+
3+
project(launch_latency VERSION 1.0)
4+
add_executable(launch_latency launch_latency.cpp)
+62
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,62 @@
1+
#include <chrono>
2+
#include <iostream>
3+
#include <utility>
4+
#include <vector>
5+
6+
template<class chrono_clock_type>
7+
class timer {
8+
typedef chrono_clock_type clock;
9+
typedef typename clock::time_point time_point;
10+
public:
11+
timer(uint32_t prealloc_timeslots) {
12+
intervals.reserve(prealloc_timeslots);
13+
}
14+
timer() : timer(__default_timeslots) { }
15+
void start() {
16+
intervals[next_start++].first = now();
17+
}
18+
void stop() {
19+
intervals[next_end++].second = now();
20+
}
21+
float get_avg_milliseconds() {
22+
float t = 0.0f;
23+
uint32_t i = 0;
24+
for (; i < next_end; ++i)
25+
t += std::chrono::duration<float, std::milli>(delta(intervals[i])).count();
26+
if (i > 0)
27+
t = t/(float)i;
28+
return t;
29+
}
30+
private:
31+
constexpr static uint32_t __default_timeslots = 1024;
32+
time_point now() { return clock::now(); }
33+
typedef std::pair<time_point, time_point> __interval;
34+
auto delta(const __interval &i) {
35+
return i.second - i.first;
36+
}
37+
std::vector<__interval> intervals;
38+
uint32_t next_start = 0;
39+
uint32_t next_end = 0;
40+
};
41+
42+
43+
static void launch_empty(const int n) {
44+
#pragma omp target teams distribute parallel for
45+
for(int i = 0; i < n; ++i) {
46+
}
47+
}
48+
49+
int main() {
50+
// warm up
51+
launch_empty(1);
52+
53+
constexpr int N = 1000;
54+
timer<std::chrono::high_resolution_clock> timer(N);
55+
for (int i = 0; i < N; ++i) {
56+
timer.start();
57+
launch_empty(1);
58+
timer.stop();
59+
}
60+
std::cout << "Average latency: " << timer.get_avg_milliseconds() * 1000.0f << " ms" << std::endl;
61+
return 0;
62+
}

openmp/vmulsum/CMakeLists.txt

+4
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
cmake_minimum_required(VERSION 3.10)
2+
3+
project(vmulsum VERSION 1.0)
4+
add_executable(vmulsum main.c vmul.c vsum.c)

openmp/vmulsum/main.c

+40
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,40 @@
1+
//
2+
// main.c: Demo of multi-target mulit-source OpenMP offload
3+
// Sources are main.c, vmul.c, and vsum.c
4+
// offload targets are nvptx64 and amdgcn
5+
//
6+
7+
#include <stdio.h>
8+
9+
void vmul(int*a, int*b, int*c, int N);
10+
void vsum(int*a, int*b, int*c, int N);
11+
12+
int main(){
13+
const int N = 100000;
14+
int a[N],b[N],p[N],pcheck[N],s[N],scheck[N];
15+
int flag=-1;
16+
for(int i=0;i<N;i++) {
17+
a[i]=i+1;
18+
b[i]=i+2;
19+
pcheck[i]=a[i]*b[i];
20+
scheck[i]=a[i]+b[i];
21+
}
22+
23+
vmul(a,b,p,N);
24+
vsum(a,b,s,N);
25+
26+
// check the results
27+
for(int i=0;i<N;i++)
28+
if((p[i]!=pcheck[i])|(s[i]!=scheck[i])) flag=i;
29+
30+
if (flag != -1) {
31+
printf("Fail p[%d]=%d pcheck[%d]=%d\n",
32+
flag,p[flag],flag,pcheck[flag]);
33+
printf("Fail s[%d]=%d scheck[%d]=%d\n",
34+
flag,s[flag],flag,scheck[flag]);
35+
return 1;
36+
} else {
37+
printf("Success\n");
38+
return 0;
39+
}
40+
}

openmp/vmulsum/vmul.c

+13
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
//
2+
// vmul.c : Demo of multi-target mulit-source OpenMP offload
3+
//
4+
5+
#include <stdio.h>
6+
7+
void vmul(int*a, int*b, int*c, int N){
8+
#pragma omp target teams map(to: a[0:N],b[0:N]) map(from:c[0:N])
9+
#pragma omp distribute parallel for
10+
for(int i=0;i<N;i++) {
11+
c[i]=a[i]*b[i];
12+
}
13+
}

openmp/vmulsum/vsum.c

+13
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
//
2+
// vsum.c : Demo of multi-target mulit-source OpenMP offload
3+
//
4+
5+
#include <stdio.h>
6+
7+
void vsum(int*a, int*b, int*c, int N){
8+
#pragma omp target teams map(to: a[0:N],b[0:N]) map(from:c[0:N])
9+
#pragma omp distribute parallel for
10+
for(int i=0;i<N;i++) {
11+
c[i]=a[i]+b[i];
12+
}
13+
}

0 commit comments

Comments
 (0)