diff --git a/torch_npu/profiler/analysis/prof_common_func/global_var.py b/torch_npu/profiler/analysis/prof_common_func/global_var.py index 99414b764bbe449935f598a31dbaef56cfac199b..1c49756a399261e55a4c81fbcc72123209019c59 100644 --- a/torch_npu/profiler/analysis/prof_common_func/global_var.py +++ b/torch_npu/profiler/analysis/prof_common_func/global_var.py @@ -29,7 +29,8 @@ class GlobalVar: for level1_node in root_node.child_node_list: if level1_node.is_profiler_step(): step_id = level1_node.event.name.split("#")[-1] - cls.step_range.append([step_id, level1_node.device_start, level1_node.device_end]) + cls.step_range.append([step_id, level1_node.device_start, + max(level1_node.device_end, level1_node.end_time)]) cls.torch_op_tree_node = TreeBuilder.go_through_tree(root_node) @classmethod diff --git a/torch_npu/profiler/analysis/prof_view/trace_view_parser.py b/torch_npu/profiler/analysis/prof_view/trace_view_parser.py index 394323b9698e1166596db4a25383fa1f0f8eafd1..ff04dcdbc28a37d60b2ee95fbf0297c730d80b78 100644 --- a/torch_npu/profiler/analysis/prof_view/trace_view_parser.py +++ b/torch_npu/profiler/analysis/prof_view/trace_view_parser.py @@ -36,15 +36,22 @@ class TraceViewParser(BaseViewParser): @staticmethod def _prune_trace_by_level(json_data: list) -> list: prune_config = ProfilerConfig().get_prune_config() + prune_process = ProfilerConfig().get_prune_process() if not prune_config or not json_data: return json_data result = [] + prune_pid = [] + for data in json_data: + if data.get("name", "") == "process_name" and data.get("args", {}).get("name", "") in prune_process: + prune_pid.append(data.get("pid", "")) for data in json_data: prune_flag = False + if data.get("pid") in prune_pid: + continue for prune_key in prune_config: if data.get("name", "").startswith(prune_key) or data.get("args", {}).get("name", "") == prune_key: prune_flag = True - continue + break if not prune_flag: result.append(data) return result diff --git a/torch_npu/profiler/analysis/profiler_config.py b/torch_npu/profiler/analysis/profiler_config.py index f76678d8c9f41e4875a219e9618ccab7d39fbd23..e770628888ba711177298abe78ad52ad840a7f53 100644 --- a/torch_npu/profiler/analysis/profiler_config.py +++ b/torch_npu/profiler/analysis/profiler_config.py @@ -19,8 +19,13 @@ class ProfilerConfig: Constant.LEVEL1: [], Constant.LEVEL2: [(CANNDataEnum.AI_CPU, AiCpuBean)] } + LEVEL_PROCESS_PRUNE_CONFIG = { + Constant.LEVEL0: ['CANN', 'HBM', 'LLC', 'NPU_MEM'], + Constant.LEVEL1: ['HBM', 'LLC', 'NPU_MEM'], + Constant.LEVEL2: [] + } LEVEL_TRACE_PRUNE_CONFIG = { - Constant.LEVEL0: ['CANN', 'AscendCL', 'Runtime', 'GE', 'Node', 'Model', 'Hccl', 'acl_to_npu'], + Constant.LEVEL0: ['HostToDevice'], Constant.LEVEL1: ['Runtime', 'GE', 'Node', 'Model', 'Hccl'], Constant.LEVEL2: [] } @@ -82,6 +87,9 @@ class ProfilerConfig: def get_prune_config(self): return self.LEVEL_TRACE_PRUNE_CONFIG.get(self._profiler_level) + def get_prune_process(self): + return self.LEVEL_PROCESS_PRUNE_CONFIG.get(self._profiler_level) + def is_all_kernel_headers(self): if self._ai_core_metrics != Constant.AicMetricsNone: return True