@@ -270,33 +270,27 @@ def check_outputs(output_list,
270
270
sys .exit (1 )
271
271
272
272
273
- def print_benchmark_result (result ,
274
- task = "speed" ,
275
- log_level = 0 ,
276
- config_params = None ):
277
- assert isinstance (result , dict ), "Input result should be a dict."
273
+ def _print_runtime (log_level , runtimes , walltimes ):
274
+ if runtimes is None :
275
+ return
278
276
279
- status = collections .OrderedDict ()
280
- status ["framework" ] = result ["framework" ]
281
- status ["version" ] = result ["version" ]
282
- status ["name" ] = result ["name" ]
283
- status ["device" ] = result ["device" ]
284
- status ["backward" ] = result ["backward" ]
277
+ # print all times
278
+ repeat = len (runtimes )
279
+ seg_range = [0 , 0 ]
280
+ if log_level == 0 :
281
+ seg_range = [0 , repeat ]
282
+ elif log_level == 1 and repeat > 20 :
283
+ seg_range = [10 , repeat - 10 ]
284
+ for i in range (repeat ):
285
+ if i < seg_range [0 ] or i >= seg_range [1 ]:
286
+ walltime = walltimes [i ] if walltimes is not None else 0
287
+ print ("Iter %4d, Runtime: %.5f ms, Walltime: %.5f ms" %
288
+ (i , runtimes [i ], walltime ))
285
289
286
- scheduling_times = result .get ("scheduling_times" , "{}" )
287
- if task == "scheduling" and scheduling_times is not None :
288
- status ["scheduling" ] = eval (scheduling_times )
289
290
290
- runtimes = result . get ( "total" , None )
291
+ def _compute_average_runtime ( runtimes , walltimes ):
291
292
if runtimes is None :
292
- status ["parameters" ] = config_params
293
- print (json .dumps (status ))
294
- return
295
-
296
- walltimes = result .get ("wall_time" , None )
297
- gpu_time = result .get ("gpu_time" , None )
298
- stable = result .get ("stable" , None )
299
- diff = result .get ("diff" , None )
293
+ return 0 , 0 , 0 , 0
300
294
301
295
repeat = len (runtimes )
302
296
for i in range (repeat ):
@@ -320,47 +314,70 @@ def print_benchmark_result(result,
320
314
avg_walltime = np .average (np .sort (walltimes )[begin :end ])
321
315
else :
322
316
avg_walltime = 0
317
+ return begin , end , avg_runtime , avg_walltime
323
318
324
- # print all times
325
- seg_range = [0 , 0 ]
326
- if log_level == 0 :
327
- seg_range = [0 , repeat ]
328
- elif log_level == 1 and repeat > 20 :
329
- seg_range = [10 , repeat - 10 ]
330
- for i in range (len (runtimes )):
331
- if i < seg_range [0 ] or i >= seg_range [1 ]:
332
- walltime = walltimes [i ] if walltimes is not None else 0
333
- print ("Iter %4d, Runtime: %.5f ms, Walltime: %.5f ms" %
334
- (i , runtimes [i ], walltime ))
335
319
336
- if avg_runtime - avg_walltime > 0.001 :
337
- total = avg_runtime - avg_walltime
338
- else :
339
- print (
340
- "Average runtime (%.5f ms) is less than average walltime (%.5f ms)."
341
- % (avg_runtime , avg_walltime ))
342
- total = 0.001
320
+ def print_benchmark_result (result ,
321
+ task = "speed" ,
322
+ log_level = 0 ,
323
+ config_params = None ):
324
+ assert isinstance (result , dict ), "Input result should be a dict."
325
+
326
+ status = collections .OrderedDict ()
327
+ status ["framework" ] = result ["framework" ]
328
+ status ["version" ] = result ["version" ]
329
+ status ["name" ] = result ["name" ]
330
+ status ["device" ] = result ["device" ]
331
+ status ["backward" ] = result ["backward" ]
343
332
333
+ scheduling_times = result .get ("scheduling_times" , "{}" )
334
+ if task == "scheduling" and scheduling_times is not None :
335
+ status ["scheduling" ] = eval (scheduling_times )
336
+ status ["parameters" ] = config_params
337
+ print (json .dumps (status ))
338
+ return
339
+
340
+ stable = result .get ("stable" , None )
341
+ diff = result .get ("diff" , None )
344
342
if stable is not None and diff is not None :
345
343
status ["precision" ] = collections .OrderedDict ()
346
344
status ["precision" ]["stable" ] = stable
347
345
status ["precision" ]["diff" ] = diff
348
- status ["speed" ] = collections .OrderedDict ()
349
- status ["speed" ]["repeat" ] = repeat
350
- status ["speed" ]["begin" ] = begin
351
- status ["speed" ]["end" ] = end
352
- status ["speed" ]["total" ] = total
353
- status ["speed" ]["wall_time" ] = avg_walltime
354
- status ["speed" ]["total_include_wall_time" ] = avg_runtime
355
- if gpu_time is not None :
356
- avg_gpu_time = gpu_time / repeat
357
- status ["speed" ]["gpu_time" ] = avg_gpu_time
358
-
359
- flop = result .get ("flop" , None )
360
- byte = result .get ("byte" , None )
361
- if flop is not None and abs (avg_gpu_time ) > 1E-6 :
362
- status ["speed" ]["gflops" ] = float (flop ) * 1E-6 / avg_gpu_time
363
- if byte is not None and abs (avg_gpu_time ) > 1E-6 :
364
- status ["speed" ]["gbs" ] = float (byte ) * 1E-6 / avg_gpu_time
346
+
347
+ if task == "speed" :
348
+ runtimes = result .get ("total" , None )
349
+ walltimes = result .get ("wall_time" , None )
350
+ gpu_time = result .get ("gpu_time" , None )
351
+
352
+ repeat = len (runtimes ) if runtimes is not None else result .get (
353
+ "repeat" , 1 )
354
+ begin , end , avg_runtime , avg_walltime = _compute_average_runtime (
355
+ runtimes , walltimes )
356
+ _print_runtime (log_level , runtimes , walltimes )
357
+ if avg_runtime - avg_walltime > 0.001 :
358
+ total = avg_runtime - avg_walltime
359
+ else :
360
+ print (
361
+ "Average runtime (%.5f ms) is less than average walltime (%.5f ms)."
362
+ % (avg_runtime , avg_walltime ))
363
+ total = 0.001
364
+
365
+ status ["speed" ] = collections .OrderedDict ()
366
+ status ["speed" ]["repeat" ] = repeat
367
+ status ["speed" ]["begin" ] = begin
368
+ status ["speed" ]["end" ] = end
369
+ status ["speed" ]["total" ] = total
370
+ status ["speed" ]["wall_time" ] = avg_walltime
371
+ status ["speed" ]["total_include_wall_time" ] = avg_runtime
372
+ if gpu_time is not None :
373
+ avg_gpu_time = gpu_time / repeat
374
+ status ["speed" ]["gpu_time" ] = avg_gpu_time
375
+
376
+ flop = result .get ("flop" , None )
377
+ byte = result .get ("byte" , None )
378
+ if flop is not None and abs (avg_gpu_time ) > 1E-6 :
379
+ status ["speed" ]["gflops" ] = float (flop ) * 1E-6 / avg_gpu_time
380
+ if byte is not None and abs (avg_gpu_time ) > 1E-6 :
381
+ status ["speed" ]["gbs" ] = float (byte ) * 1E-6 / avg_gpu_time
365
382
status ["parameters" ] = config_params
366
383
print (json .dumps (status ))
0 commit comments