diff --git a/api/common/benchmark.py b/api/common/benchmark.py index 1cc7e713bd..4482fde66f 100644 --- a/api/common/benchmark.py +++ b/api/common/benchmark.py @@ -122,7 +122,12 @@ def layers(self, api_name, module_name=None, **kwargs): def append_gradients(self, targets, inputs): pass - def get_running_stats(self, use_gpu, config, runtimes, walltimes=None): + def get_running_stats(self, + use_gpu, + config, + runtimes, + walltimes=None, + repeat=None): try: module_name = "torch" if self._framework == "pytorch" else self._framework module = importlib.import_module(module_name) @@ -143,9 +148,17 @@ def get_running_stats(self, use_gpu, config, runtimes, walltimes=None): if walltimes is not None: stats["wall_time"] = walltimes - flop, byte = self.compute_flop_and_byte(config) - if flop is not None: - stats["flop"] = flop - if byte is not None: - stats["byte"] = byte + if repeat is not None: + stats["repeat"] = repeat + + try: + flop, byte = self.compute_flop_and_byte(config) + if flop is not None: + stats["flop"] = flop + if byte is not None: + stats["byte"] = byte + except Exception: + print("Failed to call compute_flops_and_byte for %s." % + (self._framework)) + return stats diff --git a/api/common/launch.py b/api/common/launch.py index cdd05027c4..d9a8135b5b 100644 --- a/api/common/launch.py +++ b/api/common/launch.py @@ -474,6 +474,8 @@ def _set_args(args, arg, value): if task == "speed": args.benchmark_script_args.append(" --gpu_time ") args.benchmark_script_args.append(str(output_time)) + _set_args(args.benchmark_script_args, + "--get_status_without_running", "True") if task == "scheduling": args.benchmark_script_args.append(" --scheduling_times ") args.benchmark_script_args.append("\"" + str(output_time) + "\"") diff --git a/api/common/main.py b/api/common/main.py index 4f4abfe7b9..c1b8a073e7 100644 --- a/api/common/main.py +++ b/api/common/main.py @@ -135,8 +135,13 @@ def parse_args(): "task should be paddle, tensorflow, tf, pytorch, torch, both") if args.get_status_without_running: - assert args.task == "scheduling", "task must be 'scheduling' if get_status_without_running is True." - assert args.scheduling_times != "{}", "scheduling_times can't be {} if task is 'scheduling' and get_status_without_running is True." + assert args.task in [ + "speed", "scheduling" + ], "task must be 'speed' or 'scheduling' if get_status_without_running is True." + if args.task == "speed": + assert args.gpu_time != 0, "gpu_time can't be 0 if task is 'speed' and get_status_without_running is True." + if args.task == "scheduling": + assert args.scheduling_times != "{}", "scheduling_times can't be {} if task is 'scheduling' and get_status_without_running is True." if args.task == "accuracy": args.repeat = 1 diff --git a/api/common/paddle_op_benchmark.py b/api/common/paddle_op_benchmark.py index 827d0b55aa..3ad931aeb6 100644 --- a/api/common/paddle_op_benchmark.py +++ b/api/common/paddle_op_benchmark.py @@ -442,7 +442,8 @@ def _run_main_iter(step=1): # "_run_main_iter" needs to be executed firstly because # parameter "self._backward" needs to be update. if get_status_without_running: - stats = self.get_running_stats(use_gpu, config, None) + stats = self.get_running_stats( + use_gpu, config, runtimes=None, repeat=repeat) return None, stats runtimes = [] diff --git a/api/common/utils.py b/api/common/utils.py index 34f027fa39..6435bd5a68 100644 --- a/api/common/utils.py +++ b/api/common/utils.py @@ -270,33 +270,27 @@ def check_outputs(output_list, sys.exit(1) -def print_benchmark_result(result, - task="speed", - log_level=0, - config_params=None): - assert isinstance(result, dict), "Input result should be a dict." +def _print_runtime(log_level, runtimes, walltimes): + if runtimes is None: + return - status = collections.OrderedDict() - status["framework"] = result["framework"] - status["version"] = result["version"] - status["name"] = result["name"] - status["device"] = result["device"] - status["backward"] = result["backward"] + # print all times + repeat = len(runtimes) + seg_range = [0, 0] + if log_level == 0: + seg_range = [0, repeat] + elif log_level == 1 and repeat > 20: + seg_range = [10, repeat - 10] + for i in range(repeat): + if i < seg_range[0] or i >= seg_range[1]: + walltime = walltimes[i] if walltimes is not None else 0 + print("Iter %4d, Runtime: %.5f ms, Walltime: %.5f ms" % + (i, runtimes[i], walltime)) - scheduling_times = result.get("scheduling_times", "{}") - if task == "scheduling" and scheduling_times is not None: - status["scheduling"] = eval(scheduling_times) - runtimes = result.get("total", None) +def _compute_average_runtime(runtimes, walltimes): if runtimes is None: - status["parameters"] = config_params - print(json.dumps(status)) - return - - walltimes = result.get("wall_time", None) - gpu_time = result.get("gpu_time", None) - stable = result.get("stable", None) - diff = result.get("diff", None) + return 0, 0, 0, 0 repeat = len(runtimes) for i in range(repeat): @@ -320,47 +314,70 @@ def print_benchmark_result(result, avg_walltime = np.average(np.sort(walltimes)[begin:end]) else: avg_walltime = 0 + return begin, end, avg_runtime, avg_walltime - # print all times - seg_range = [0, 0] - if log_level == 0: - seg_range = [0, repeat] - elif log_level == 1 and repeat > 20: - seg_range = [10, repeat - 10] - for i in range(len(runtimes)): - if i < seg_range[0] or i >= seg_range[1]: - walltime = walltimes[i] if walltimes is not None else 0 - print("Iter %4d, Runtime: %.5f ms, Walltime: %.5f ms" % - (i, runtimes[i], walltime)) - if avg_runtime - avg_walltime > 0.001: - total = avg_runtime - avg_walltime - else: - print( - "Average runtime (%.5f ms) is less than average walltime (%.5f ms)." - % (avg_runtime, avg_walltime)) - total = 0.001 +def print_benchmark_result(result, + task="speed", + log_level=0, + config_params=None): + assert isinstance(result, dict), "Input result should be a dict." + + status = collections.OrderedDict() + status["framework"] = result["framework"] + status["version"] = result["version"] + status["name"] = result["name"] + status["device"] = result["device"] + status["backward"] = result["backward"] + scheduling_times = result.get("scheduling_times", "{}") + if task == "scheduling" and scheduling_times is not None: + status["scheduling"] = eval(scheduling_times) + status["parameters"] = config_params + print(json.dumps(status)) + return + + stable = result.get("stable", None) + diff = result.get("diff", None) if stable is not None and diff is not None: status["precision"] = collections.OrderedDict() status["precision"]["stable"] = stable status["precision"]["diff"] = diff - status["speed"] = collections.OrderedDict() - status["speed"]["repeat"] = repeat - status["speed"]["begin"] = begin - status["speed"]["end"] = end - status["speed"]["total"] = total - status["speed"]["wall_time"] = avg_walltime - status["speed"]["total_include_wall_time"] = avg_runtime - if gpu_time is not None: - avg_gpu_time = gpu_time / repeat - status["speed"]["gpu_time"] = avg_gpu_time - - flop = result.get("flop", None) - byte = result.get("byte", None) - if flop is not None and abs(avg_gpu_time) > 1E-6: - status["speed"]["gflops"] = float(flop) * 1E-6 / avg_gpu_time - if byte is not None and abs(avg_gpu_time) > 1E-6: - status["speed"]["gbs"] = float(byte) * 1E-6 / avg_gpu_time + + if task == "speed": + runtimes = result.get("total", None) + walltimes = result.get("wall_time", None) + gpu_time = result.get("gpu_time", None) + + repeat = len(runtimes) if runtimes is not None else result.get( + "repeat", 1) + begin, end, avg_runtime, avg_walltime = _compute_average_runtime( + runtimes, walltimes) + _print_runtime(log_level, runtimes, walltimes) + if avg_runtime - avg_walltime > 0.001: + total = avg_runtime - avg_walltime + else: + print( + "Average runtime (%.5f ms) is less than average walltime (%.5f ms)." + % (avg_runtime, avg_walltime)) + total = 0.001 + + status["speed"] = collections.OrderedDict() + status["speed"]["repeat"] = repeat + status["speed"]["begin"] = begin + status["speed"]["end"] = end + status["speed"]["total"] = total + status["speed"]["wall_time"] = avg_walltime + status["speed"]["total_include_wall_time"] = avg_runtime + if gpu_time is not None: + avg_gpu_time = gpu_time / repeat + status["speed"]["gpu_time"] = avg_gpu_time + + flop = result.get("flop", None) + byte = result.get("byte", None) + if flop is not None and abs(avg_gpu_time) > 1E-6: + status["speed"]["gflops"] = float(flop) * 1E-6 / avg_gpu_time + if byte is not None and abs(avg_gpu_time) > 1E-6: + status["speed"]["gbs"] = float(byte) * 1E-6 / avg_gpu_time status["parameters"] = config_params print(json.dumps(status))