Skip to content

Commit 4f149b5

Browse files
committed
Support to run only 1 time to get gpu_time only for speed task.
1 parent c65f4be commit 4f149b5

File tree

5 files changed

+105
-67
lines changed

5 files changed

+105
-67
lines changed

Diff for: api/common/benchmark.py

+19-6
Original file line numberDiff line numberDiff line change
@@ -52,7 +52,12 @@ def layers(self, api_name, module_name=None, **kwargs):
5252
def append_gradients(self, targets, inputs):
5353
pass
5454

55-
def get_running_stats(self, use_gpu, config, runtimes, walltimes=None):
55+
def get_running_stats(self,
56+
use_gpu,
57+
config,
58+
runtimes,
59+
walltimes=None,
60+
repeat=None):
5661
try:
5762
module_name = "torch" if self._framework == "pytorch" else self._framework
5863
module = importlib.import_module(module_name)
@@ -73,9 +78,17 @@ def get_running_stats(self, use_gpu, config, runtimes, walltimes=None):
7378
if walltimes is not None:
7479
stats["wall_time"] = walltimes
7580

76-
flop, byte = self.compute_flop_and_byte(config)
77-
if flop is not None:
78-
stats["flop"] = flop
79-
if byte is not None:
80-
stats["byte"] = byte
81+
if repeat is not None:
82+
stats["repeat"] = repeat
83+
84+
try:
85+
flop, byte = self.compute_flop_and_byte(config)
86+
if flop is not None:
87+
stats["flop"] = flop
88+
if byte is not None:
89+
stats["byte"] = byte
90+
except Exception:
91+
print("Failed to call compute_flops_and_byte for %s." %
92+
(self._framework))
93+
8194
return stats

Diff for: api/common/launch.py

+2
Original file line numberDiff line numberDiff line change
@@ -458,6 +458,8 @@ def _set_args(args, arg, value):
458458
if task == "speed":
459459
args.benchmark_script_args.append(" --gpu_time ")
460460
args.benchmark_script_args.append(str(output_time))
461+
_set_args(args.benchmark_script_args,
462+
"--get_status_without_running", "True")
461463
if task == "scheduling":
462464
args.benchmark_script_args.append(" --scheduling_times ")
463465
args.benchmark_script_args.append("\"" + str(output_time) + "\"")

Diff for: api/common/main.py

+7-2
Original file line numberDiff line numberDiff line change
@@ -135,8 +135,13 @@ def parse_args():
135135
"task should be paddle, tensorflow, tf, pytorch, torch, both")
136136

137137
if args.get_status_without_running:
138-
assert args.task == "scheduling", "task must be 'scheduling' if get_status_without_running is True."
139-
assert args.scheduling_times != "{}", "scheduling_times can't be {} if task is 'scheduling' and get_status_without_running is True."
138+
assert args.task in [
139+
"speed", "scheduling"
140+
], "task must be 'speed' or 'scheduling' if get_status_without_running is True."
141+
if args.task == "speed":
142+
assert args.gpu_time != 0, "gpu_time can't be 0 if task is 'speed' and get_status_without_running is True."
143+
if args.task == "scheduling":
144+
assert args.scheduling_times != "{}", "scheduling_times can't be {} if task is 'scheduling' and get_status_without_running is True."
140145

141146
if args.task == "accuracy":
142147
args.repeat = 1

Diff for: api/common/paddle_op_benchmark.py

+2-1
Original file line numberDiff line numberDiff line change
@@ -442,7 +442,8 @@ def _run_main_iter(step=1):
442442
# "_run_main_iter" needs to be executed firstly because
443443
# parameter "self._backward" needs to be update.
444444
if get_status_without_running:
445-
stats = self.get_running_stats(use_gpu, config, None)
445+
stats = self.get_running_stats(
446+
use_gpu, config, runtimes=None, repeat=repeat)
446447
return None, stats
447448

448449
runtimes = []

Diff for: api/common/utils.py

+75-58
Original file line numberDiff line numberDiff line change
@@ -270,33 +270,27 @@ def check_outputs(output_list,
270270
sys.exit(1)
271271

272272

273-
def print_benchmark_result(result,
274-
task="speed",
275-
log_level=0,
276-
config_params=None):
277-
assert isinstance(result, dict), "Input result should be a dict."
273+
def _print_runtime(log_level, runtimes, walltimes):
274+
if runtimes is None:
275+
return
278276

279-
status = collections.OrderedDict()
280-
status["framework"] = result["framework"]
281-
status["version"] = result["version"]
282-
status["name"] = result["name"]
283-
status["device"] = result["device"]
284-
status["backward"] = result["backward"]
277+
# print all times
278+
repeat = len(runtimes)
279+
seg_range = [0, 0]
280+
if log_level == 0:
281+
seg_range = [0, repeat]
282+
elif log_level == 1 and repeat > 20:
283+
seg_range = [10, repeat - 10]
284+
for i in range(repeat):
285+
if i < seg_range[0] or i >= seg_range[1]:
286+
walltime = walltimes[i] if walltimes is not None else 0
287+
print("Iter %4d, Runtime: %.5f ms, Walltime: %.5f ms" %
288+
(i, runtimes[i], walltime))
285289

286-
scheduling_times = result.get("scheduling_times", "{}")
287-
if task == "scheduling" and scheduling_times is not None:
288-
status["scheduling"] = eval(scheduling_times)
289290

290-
runtimes = result.get("total", None)
291+
def _compute_average_runtime(runtimes, walltimes):
291292
if runtimes is None:
292-
status["parameters"] = config_params
293-
print(json.dumps(status))
294-
return
295-
296-
walltimes = result.get("wall_time", None)
297-
gpu_time = result.get("gpu_time", None)
298-
stable = result.get("stable", None)
299-
diff = result.get("diff", None)
293+
return 0, 0, 0, 0
300294

301295
repeat = len(runtimes)
302296
for i in range(repeat):
@@ -320,47 +314,70 @@ def print_benchmark_result(result,
320314
avg_walltime = np.average(np.sort(walltimes)[begin:end])
321315
else:
322316
avg_walltime = 0
317+
return begin, end, avg_runtime, avg_walltime
323318

324-
# print all times
325-
seg_range = [0, 0]
326-
if log_level == 0:
327-
seg_range = [0, repeat]
328-
elif log_level == 1 and repeat > 20:
329-
seg_range = [10, repeat - 10]
330-
for i in range(len(runtimes)):
331-
if i < seg_range[0] or i >= seg_range[1]:
332-
walltime = walltimes[i] if walltimes is not None else 0
333-
print("Iter %4d, Runtime: %.5f ms, Walltime: %.5f ms" %
334-
(i, runtimes[i], walltime))
335319

336-
if avg_runtime - avg_walltime > 0.001:
337-
total = avg_runtime - avg_walltime
338-
else:
339-
print(
340-
"Average runtime (%.5f ms) is less than average walltime (%.5f ms)."
341-
% (avg_runtime, avg_walltime))
342-
total = 0.001
320+
def print_benchmark_result(result,
321+
task="speed",
322+
log_level=0,
323+
config_params=None):
324+
assert isinstance(result, dict), "Input result should be a dict."
325+
326+
status = collections.OrderedDict()
327+
status["framework"] = result["framework"]
328+
status["version"] = result["version"]
329+
status["name"] = result["name"]
330+
status["device"] = result["device"]
331+
status["backward"] = result["backward"]
343332

333+
scheduling_times = result.get("scheduling_times", "{}")
334+
if task == "scheduling" and scheduling_times is not None:
335+
status["scheduling"] = eval(scheduling_times)
336+
status["parameters"] = config_params
337+
print(json.dumps(status))
338+
return
339+
340+
stable = result.get("stable", None)
341+
diff = result.get("diff", None)
344342
if stable is not None and diff is not None:
345343
status["precision"] = collections.OrderedDict()
346344
status["precision"]["stable"] = stable
347345
status["precision"]["diff"] = diff
348-
status["speed"] = collections.OrderedDict()
349-
status["speed"]["repeat"] = repeat
350-
status["speed"]["begin"] = begin
351-
status["speed"]["end"] = end
352-
status["speed"]["total"] = total
353-
status["speed"]["wall_time"] = avg_walltime
354-
status["speed"]["total_include_wall_time"] = avg_runtime
355-
if gpu_time is not None:
356-
avg_gpu_time = gpu_time / repeat
357-
status["speed"]["gpu_time"] = avg_gpu_time
358-
359-
flop = result.get("flop", None)
360-
byte = result.get("byte", None)
361-
if flop is not None and abs(avg_gpu_time) > 1E-6:
362-
status["speed"]["gflops"] = float(flop) * 1E-6 / avg_gpu_time
363-
if byte is not None and abs(avg_gpu_time) > 1E-6:
364-
status["speed"]["gbs"] = float(byte) * 1E-6 / avg_gpu_time
346+
347+
if task == "speed":
348+
runtimes = result.get("total", None)
349+
walltimes = result.get("wall_time", None)
350+
gpu_time = result.get("gpu_time", None)
351+
352+
repeat = len(runtimes) if runtimes is not None else result.get(
353+
"repeat", 1)
354+
begin, end, avg_runtime, avg_walltime = _compute_average_runtime(
355+
runtimes, walltimes)
356+
_print_runtime(log_level, runtimes, walltimes)
357+
if avg_runtime - avg_walltime > 0.001:
358+
total = avg_runtime - avg_walltime
359+
else:
360+
print(
361+
"Average runtime (%.5f ms) is less than average walltime (%.5f ms)."
362+
% (avg_runtime, avg_walltime))
363+
total = 0.001
364+
365+
status["speed"] = collections.OrderedDict()
366+
status["speed"]["repeat"] = repeat
367+
status["speed"]["begin"] = begin
368+
status["speed"]["end"] = end
369+
status["speed"]["total"] = total
370+
status["speed"]["wall_time"] = avg_walltime
371+
status["speed"]["total_include_wall_time"] = avg_runtime
372+
if gpu_time is not None:
373+
avg_gpu_time = gpu_time / repeat
374+
status["speed"]["gpu_time"] = avg_gpu_time
375+
376+
flop = result.get("flop", None)
377+
byte = result.get("byte", None)
378+
if flop is not None and abs(avg_gpu_time) > 1E-6:
379+
status["speed"]["gflops"] = float(flop) * 1E-6 / avg_gpu_time
380+
if byte is not None and abs(avg_gpu_time) > 1E-6:
381+
status["speed"]["gbs"] = float(byte) * 1E-6 / avg_gpu_time
365382
status["parameters"] = config_params
366383
print(json.dumps(status))

0 commit comments

Comments
 (0)