From 54a9bb6f7b7bc6565d7e77deb9329ca64aa34fcd Mon Sep 17 00:00:00 2001 From: sunboquan Date: Thu, 28 Sep 2023 16:10:34 +0800 Subject: [PATCH] remove memory trace from L0 L1 --- .../profiler/analysis/prof_common_func/global_var.py | 3 ++- .../profiler/analysis/prof_view/trace_view_parser.py | 9 ++++++++- torch_npu/profiler/analysis/profiler_config.py | 10 +++++++++- 3 files changed, 19 insertions(+), 3 deletions(-) diff --git a/torch_npu/profiler/analysis/prof_common_func/global_var.py b/torch_npu/profiler/analysis/prof_common_func/global_var.py index 99414b764b..1c49756a39 100644 --- a/torch_npu/profiler/analysis/prof_common_func/global_var.py +++ b/torch_npu/profiler/analysis/prof_common_func/global_var.py @@ -29,7 +29,8 @@ class GlobalVar: for level1_node in root_node.child_node_list: if level1_node.is_profiler_step(): step_id = level1_node.event.name.split("#")[-1] - cls.step_range.append([step_id, level1_node.device_start, level1_node.device_end]) + cls.step_range.append([step_id, level1_node.device_start, + max(level1_node.device_end, level1_node.end_time)]) cls.torch_op_tree_node = TreeBuilder.go_through_tree(root_node) @classmethod diff --git a/torch_npu/profiler/analysis/prof_view/trace_view_parser.py b/torch_npu/profiler/analysis/prof_view/trace_view_parser.py index 394323b969..ff04dcdbc2 100644 --- a/torch_npu/profiler/analysis/prof_view/trace_view_parser.py +++ b/torch_npu/profiler/analysis/prof_view/trace_view_parser.py @@ -36,15 +36,22 @@ class TraceViewParser(BaseViewParser): @staticmethod def _prune_trace_by_level(json_data: list) -> list: prune_config = ProfilerConfig().get_prune_config() + prune_process = ProfilerConfig().get_prune_process() if not prune_config or not json_data: return json_data result = [] + prune_pid = [] + for data in json_data: + if data.get("name", "") == "process_name" and data.get("args", {}).get("name", "") in prune_process: + prune_pid.append(data.get("pid", "")) for data in json_data: prune_flag = False + if data.get("pid") in prune_pid: + continue for prune_key in prune_config: if data.get("name", "").startswith(prune_key) or data.get("args", {}).get("name", "") == prune_key: prune_flag = True - continue + break if not prune_flag: result.append(data) return result diff --git a/torch_npu/profiler/analysis/profiler_config.py b/torch_npu/profiler/analysis/profiler_config.py index f76678d8c9..e770628888 100644 --- a/torch_npu/profiler/analysis/profiler_config.py +++ b/torch_npu/profiler/analysis/profiler_config.py @@ -19,8 +19,13 @@ class ProfilerConfig: Constant.LEVEL1: [], Constant.LEVEL2: [(CANNDataEnum.AI_CPU, AiCpuBean)] } + LEVEL_PROCESS_PRUNE_CONFIG = { + Constant.LEVEL0: ['CANN', 'HBM', 'LLC', 'NPU_MEM'], + Constant.LEVEL1: ['HBM', 'LLC', 'NPU_MEM'], + Constant.LEVEL2: [] + } LEVEL_TRACE_PRUNE_CONFIG = { - Constant.LEVEL0: ['CANN', 'AscendCL', 'Runtime', 'GE', 'Node', 'Model', 'Hccl', 'acl_to_npu'], + Constant.LEVEL0: ['HostToDevice'], Constant.LEVEL1: ['Runtime', 'GE', 'Node', 'Model', 'Hccl'], Constant.LEVEL2: [] } @@ -82,6 +87,9 @@ class ProfilerConfig: def get_prune_config(self): return self.LEVEL_TRACE_PRUNE_CONFIG.get(self._profiler_level) + def get_prune_process(self): + return self.LEVEL_PROCESS_PRUNE_CONFIG.get(self._profiler_level) + def is_all_kernel_headers(self): if self._ai_core_metrics != Constant.AicMetricsNone: return True -- Gitee