Skip to content

Commit 01f920f

Browse files
authored
Merge pull request #218 from pkestene/fix/examples
Fix examples that fail because Kokkos::finalize called too early
2 parents ea78f3a + a9cff5a commit 01f920f

File tree

17 files changed

+128
-87
lines changed

17 files changed

+128
-87
lines changed

examples/BabelStream/functor/babel_stream.py

+8-5
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,7 @@ def dot(self, index: int, acc: pk.Acc[float]):
4343
acc += self.a[index] * self.b[index]
4444

4545

46-
if __name__ == "__main__":
46+
def run() -> None:
4747
array_size: int = 2**25 # 100000
4848
startA: float = 0.1
4949
startB: float = 0.2
@@ -92,7 +92,7 @@ def dot(self, index: int, acc: pk.Acc[float]):
9292
timings[4].append(timer.seconds())
9393
timer.reset()
9494

95-
goldA = startA
95+
goldA = startA
9696
goldB = startB
9797
goldC = startC
9898

@@ -108,9 +108,9 @@ def dot(self, index: int, acc: pk.Acc[float]):
108108
errB /= len(w.b)
109109
errC = reduce(lambda s, val: s + abs(val - goldC), w.c)
110110
errC /= len(w.c)
111-
112-
# epsi = sys.float_info.epsilon * 100
113-
epsi = 1e-8
111+
112+
# epsi = sys.float_info.epsilon * 100
113+
epsi = 1e-8
114114
if (errA > epsi):
115115
print(f"Validation failed on a[]. Average error {errA}")
116116
if (errB > epsi):
@@ -143,3 +143,6 @@ def dot(self, index: int, acc: pk.Acc[float]):
143143
# bandwidth = 1.0e-9 * (total_bytes / runtime)
144144
# print(f"Runtime (seconds): {runtime}")
145145
# print(f"Bandwidth (GB/s): {bandwidth}")
146+
147+
if __name__ == "__main__":
148+
run()

examples/BabelStream/standalone/babel_stream.py

+8-5
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@ def dot(index, acc, a_view, b_view):
3131
acc += a_view[index] * b_view[index]
3232

3333

34-
if __name__ == "__main__":
34+
def run() -> None:
3535
array_size: int = 2**25 # 100000
3636
startA: float = 0.1
3737
startB: float = 0.2
@@ -85,7 +85,7 @@ def dot(index, acc, a_view, b_view):
8585
timings[4].append(timer.seconds())
8686
timer.reset()
8787

88-
goldA = startA
88+
goldA = startA
8989
goldB = startB
9090
goldC = startC
9191

@@ -101,9 +101,9 @@ def dot(index, acc, a_view, b_view):
101101
errB /= len(b)
102102
errC = reduce(lambda s, val: s + abs(val - goldC), c)
103103
errC /= len(c)
104-
105-
# epsi = sys.float_info.epsilon * 100
106-
epsi = 1e-8
104+
105+
# epsi = sys.float_info.epsilon * 100
106+
epsi = 1e-8
107107
if (errA > epsi):
108108
print(f"Validation failed on a[]. Average error {errA}")
109109
if (errB > epsi):
@@ -136,3 +136,6 @@ def dot(index, acc, a_view, b_view):
136136
# bandwidth = 1.0e-9 * (total_bytes / runtime)
137137
# print(f"Runtime (seconds): {runtime}")
138138
# print(f"Bandwidth (GB/s): {bandwidth}")
139+
140+
if __name__ == "__main__":
141+
run()

examples/BabelStream/workload/babel_stream.py

+38-35
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66

77
@pk.workload
88
class KokkosStream:
9-
def __init__(self, ARRAY_SIZE: int, initA: float, initB: float, initC: float,
9+
def __init__(self, ARRAY_SIZE: int, initA: float, initB: float, initC: float,
1010
scalar: float, num_times: int):
1111
self.array_size: int = ARRAY_SIZE
1212

@@ -18,7 +18,7 @@ def __init__(self, ARRAY_SIZE: int, initA: float, initB: float, initC: float,
1818
self.initB: pk.double = initB
1919
self.initC: pk.double = initC
2020
self.scalar: pk.double = scalar
21-
self.num_times: int = num_times
21+
self.num_times: int = num_times
2222
self.sum: pk.double = 0
2323

2424
self.runtime: float = 0
@@ -48,38 +48,38 @@ def run(self):
4848

4949
self.runtime = timer.seconds()
5050

51-
# @pk.callback
52-
# def results(self):
53-
# goldA = self.initA
54-
# goldB = self.initB
55-
# goldC = self.initC
56-
57-
# for i in range(self.num_times):
58-
# goldC = goldA
59-
# goldB = self.scalar * goldC
60-
# goldC = goldA + goldB
61-
# goldA = goldB + self.scalar * goldC
62-
63-
# errA = reduce(lambda s, val: s + abs(val - goldA), self.a)
64-
# errA /= len(self.a)
65-
# errB = reduce(lambda s, val: s + abs(val - goldB), self.b)
66-
# errB /= len(self.b)
67-
# errC = reduce(lambda s, val: s + abs(val - goldC), self.c)
68-
# errC /= len(self.c)
69-
70-
# # epsi = sys.float_info.epsilon * 100
71-
# epsi = 1e-8
72-
# if (errA > epsi):
73-
# print(f"Validation failed on a[]. Average error {errA}")
74-
# if (errB > epsi):
75-
# print(f"Validation failed on b[]. Average error {errB}")
76-
# if (errC > epsi):
77-
# print(f"Validation failed on c[]. Average error {errC}")
78-
79-
# goldSum = goldA * goldB * self.array_size
80-
# errSum = self.sum - goldSum
81-
# if (abs(errSum) > 1e-8):
82-
# print(f"Validation failed on sum. Error {errSum}")
51+
@pk.callback
52+
def results(self):
53+
goldA = self.initA
54+
goldB = self.initB
55+
goldC = self.initC
56+
57+
for i in range(self.num_times):
58+
goldC = goldA
59+
goldB = self.scalar * goldC
60+
goldC = goldA + goldB
61+
goldA = goldB + self.scalar * goldC
62+
63+
errA = reduce(lambda s, val: s + abs(val - goldA), self.a)
64+
errA /= len(self.a)
65+
errB = reduce(lambda s, val: s + abs(val - goldB), self.b)
66+
errB /= len(self.b)
67+
errC = reduce(lambda s, val: s + abs(val - goldC), self.c)
68+
errC /= len(self.c)
69+
70+
# epsi = sys.float_info.epsilon * 100
71+
epsi = 1e-8
72+
if (errA > epsi):
73+
print(f"Validation failed on a[]. Average error {errA}")
74+
if (errB > epsi):
75+
print(f"Validation failed on b[]. Average error {errB}")
76+
if (errC > epsi):
77+
print(f"Validation failed on c[]. Average error {errC}")
78+
79+
goldSum = goldA * goldB * self.array_size
80+
errSum = self.sum - goldSum
81+
if (abs(errSum) > 1e-8):
82+
print(f"Validation failed on sum. Error {errSum}")
8383

8484
# total_bytes = 3 * sys.getsizeof(0.0) * self.array_size * num_times;
8585
# bandwidth = 1.0e-9 * (total_bytes / self.runtime)
@@ -114,7 +114,7 @@ def dot(self, index: int, acc: pk.Acc[float]):
114114
acc += self.a[index] * self.b[index]
115115

116116

117-
if __name__ == "__main__":
117+
def run() -> None:
118118
array_size: int = 2**25 # 100000
119119
startA: float = 0.1
120120
startB: float = 0.2
@@ -138,3 +138,6 @@ def dot(self, index: int, acc: pk.Acc[float]):
138138

139139
pk.set_default_space(space)
140140
pk.execute(space, KokkosStream(array_size, startA, startB, startC, startScalar, num_times))
141+
142+
if __name__ == "__main__":
143+
run()

examples/ParRes/workload/nstream.py

+4-2
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@ def __init__(self, iterations, length, offset):
1616
self.scalar: float = 3
1717
self.asum: float = 0
1818

19-
self.nstream_time: float = 0
19+
self.nstream_time: float = 0
2020

2121
@pk.main
2222
def run(self):
@@ -66,7 +66,7 @@ def init(self, i: int):
6666
self.B[i] = 2
6767
self.C[i] = 2
6868

69-
if __name__ == "__main__":
69+
def run() -> None:
7070
parser = argparse.ArgumentParser()
7171
parser.add_argument('iterations', type=int)
7272
parser.add_argument('length', type=int)
@@ -100,3 +100,5 @@ def init(self, i: int):
100100
print("Offset = " , offset)
101101
pk.execute(pk.ExecutionSpace.Default, main(iterations, length, offset))
102102

103+
if __name__ == "__main__":
104+
run()

examples/ParRes/workload/stencil.py

+11-9
Original file line numberDiff line numberDiff line change
@@ -18,14 +18,14 @@ def __init__(self, iterations, n, tile_size, star, radius):
1818
self.out: pk.View2D[pk.double] = pk.View([self.n, self.n], pk.double, layout=pk.Layout.LayoutRight)
1919
self.norm: float = 0
2020

21-
self.stencil_time: float = 0
21+
self.stencil_time: float = 0
2222

2323
@pk.main
2424
def run(self):
2525
t: int = tile_size
2626
r: int = radius
2727

28-
pk.parallel_for(pk.MDRangePolicy([0,0], [n, n], [t, t]),
28+
pk.parallel_for(pk.MDRangePolicy([0,0], [n, n], [t, t]),
2929
self.init)
3030
pk.fence()
3131

@@ -34,7 +34,7 @@ def run(self):
3434
for i in range(iterations):
3535
if (i == 1):
3636
pk.fence()
37-
37+
3838
if r == 1:
3939
# star1 stencil
4040
pk.parallel_for("stencil", pk.MDRangePolicy([r,r], [n-r, n-r], [t, t]), self.star1)
@@ -45,8 +45,8 @@ def run(self):
4545
# star3 stencil
4646
pk.parallel_for("stencil", pk.MDRangePolicy([r,r], [n-r, n-r], [t, t]), self.star3)
4747

48-
49-
pk.parallel_for(pk.MDRangePolicy([0,0], [n, n], [t, t]),
48+
49+
pk.parallel_for(pk.MDRangePolicy([0,0], [n, n], [t, t]),
5050
self.increment)
5151

5252
pk.fence()
@@ -55,7 +55,7 @@ def run(self):
5555
active_points: int = (n-2*r)*(n-2*r)
5656

5757
# verify correctness
58-
self.norm = pk.parallel_reduce(pk.MDRangePolicy([r, r], [n-r, n-r], [t, t]),
58+
self.norm = pk.parallel_reduce(pk.MDRangePolicy([r, r], [n-r, n-r], [t, t]),
5959
self.norm_reduce)
6060
pk.fence()
6161
self.norm /= active_points
@@ -78,7 +78,7 @@ def increment(self, i: int, j: int):
7878

7979
@pk.workunit
8080
def norm_reduce(self, i: int, j: int, acc: pk.Acc[pk.double]):
81-
acc += abs(self.out[i][j])
81+
acc += abs(self.out[i][j])
8282

8383
# @pk.callback
8484
# def print_result(self):
@@ -121,7 +121,7 @@ def star3(self, i: int, j: int):
121121
+self.inp[i][j+2] * 0.08333333333333333 \
122122
+self.inp[i][j+3] * 0.05555555555555555
123123

124-
if __name__ == "__main__":
124+
def run() -> None:
125125
parser = argparse.ArgumentParser()
126126
parser.add_argument('iterations', type=int)
127127
parser.add_argument('n', type=int)
@@ -169,9 +169,11 @@ def star3(self, i: int, j: int):
169169

170170
n = 2 ** n
171171
print("Number of iterations = ", iterations)
172-
print("Grid size = ", n)
172+
print("Grid size = ", n)
173173
print("Tile size = ", tile_size)
174174
print("Type of stencil = ", "star" if star else "grid")
175175
print("Radius of stencil = ", radius)
176176
pk.execute(pk.ExecutionSpace.Default, main(iterations, n, tile_size, star, radius))
177177

178+
if __name__ == "__main__":
179+
run()

examples/ParRes/workload/transpose.py

+9-6
Original file line numberDiff line numberDiff line change
@@ -11,19 +11,19 @@ def __init__(self, iterations, order, tile_size, permute):
1111
self.iterations: int = iterations
1212
self.order: int = order
1313
self.tile_size: int = tile_size
14-
self.permute: int = permute
14+
self.permute: int = permute
1515

1616
self.A: pk.View2D[pk.double] = pk.View([self.order, self.order], pk.double, layout=pk.LayoutRight)
1717
self.B: pk.View2D[pk.double] = pk.View([self.order, self.order], pk.double, layout=pk.LayoutRight)
1818

1919
self.abserr: float = 0
20-
self.transpose_time: float = 0
20+
self.transpose_time: float = 0
2121
self.addit: float = (self.iterations) * (0.5 * (self.iterations - 1))
2222

2323
@pk.main
2424
def run(self):
2525
pk.parallel_for(
26-
pk.MDRangePolicy([0,0], [self.order, self.order], [self.tile_size, self.tile_size]), self.init)
26+
pk.MDRangePolicy([0,0], [self.order, self.order], [self.tile_size, self.tile_size]), self.init)
2727
pk.fence()
2828

2929
timer = pk.Timer()
@@ -39,7 +39,7 @@ def run(self):
3939
self.transpose_time = timer.seconds()
4040

4141
self.abserr = pk.parallel_reduce(
42-
pk.MDRangePolicy([0,0], [self.order, self.order], [self.tile_size, self.tile_size]),
42+
pk.MDRangePolicy([0,0], [self.order, self.order], [self.tile_size, self.tile_size]),
4343
self.abserr_reduce)
4444

4545
pk.printf("%f\n", self.abserr)
@@ -69,9 +69,9 @@ def abserr_reduce(self, i: int, j: int, acc: pk.Acc[pk.double]):
6969
def tranpose(self, i: int, j: int):
7070
self.B[i][j] += self.A[j][i]
7171
self.A[j][i] += 1
72-
7372

74-
if __name__ == "__main__":
73+
74+
def run() -> None:
7575
parser = argparse.ArgumentParser()
7676
parser.add_argument('iterations', type=int)
7777
parser.add_argument('order', type=int)
@@ -112,3 +112,6 @@ def tranpose(self, i: int, j: int):
112112
print("Tile size = " , tile_size)
113113
print("Permute loops = " , "yes" if permute else "no")
114114
pk.execute(pk.ExecutionSpace.Default, main(iterations, order, tile_size, permute))
115+
116+
if __name__ == "__main__":
117+
run()

examples/kokkos-benchmarks/functor/bytes_and_flops.py

+13-9
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@ def benchmark(self, team: pk.TeamMember):
2626
n: int = team.league_rank()
2727
for r in range(self.R):
2828
def team_for(i: int):
29-
a1: pk.double = self.A[n][i][0]
29+
a1: pk.double = self.A[n][i][0]
3030
b: pk.double = self.B[n][i][0]
3131
a2: pk.double = a1 * 1.3
3232
a3: pk.double = a2 * 1.1
@@ -51,13 +51,13 @@ def team_for(i: int):
5151

5252
pk.parallel_for(pk.TeamThreadRange(team, self.K), team_for)
5353

54-
if __name__ == "__main__":
54+
def run() -> None:
5555
# example args
56-
# Bandwidth Bound : 2 100000 1024 1 1 1 8 256 0
57-
# Cache Bound : 2 100000 1024 64 1 1 8 512 0
58-
# Compute Bound : 2 100000 1024 1 1 8 64 256 0
59-
# Load Slots Used : 2 20000 256 32 16 8 1 256 0
60-
# Inefficient Load: 2 20000 256 32 2 8 1 256 0
56+
# Bandwidth Bound : 2 100000 1024 1 1 1 8 256 0
57+
# Cache Bound : 2 100000 1024 64 1 1 8 512 0
58+
# Compute Bound : 2 100000 1024 1 1 8 64 256 0
59+
# Load Slots Used : 2 20000 256 32 16 8 1 256 0
60+
# Inefficient Load: 2 20000 256 32 2 8 1 256 0
6161
# NOTE P and U are hard coded to double and 8 because otherwise we would have a lot of duplicates
6262
parser = argparse.ArgumentParser()
6363
parser.add_argument("P", type=int, help="Precision (1==float, 2==double)")
@@ -84,7 +84,7 @@ def team_for(i: int):
8484
exit(1)
8585
if args.S != 0:
8686
print("S must be 0 (shared scratch memory not supported)")
87-
exit(1)
87+
exit(1)
8888

8989
space = pk.ExecutionSpace.OpenMP
9090
if args.execution_space:
@@ -98,7 +98,7 @@ def team_for(i: int):
9898
T = args.T
9999
S = args.S
100100
scalar_size = 8
101-
101+
102102
pk.set_default_space(space)
103103

104104
r = pk.TeamPolicy(N, T)
@@ -113,3 +113,7 @@ def team_for(i: int):
113113
print(f"NKRUFTS: {N} {K} {R} {U} {F} {T} {S} Time: {seconds} " +
114114
f"Bandwidth: {1.0 * num_bytes / seconds / (1024**3)} GiB/s GFlop/s: {1e-9 * flops / seconds}")
115115
print(w.C)
116+
117+
118+
if __name__ == "__main__":
119+
run()

0 commit comments

Comments
 (0)