From 4d7419970cc9e25ff997281c044eb07401acc324 Mon Sep 17 00:00:00 2001 From: tangmengcheng <745274877@qq.com> Date: Sun, 3 Aug 2025 23:34:42 +0800 Subject: [PATCH 1/3] profiler analysis analysis --- .../analysis/prof_bean/_memory_use_bean.py | 64 +++++---- .../analysis/prof_bean/_op_mark_bean.py | 64 ++++++--- .../analysis/prof_bean/_torch_op_bean.py | 86 ++++++++---- .../analysis/prof_common_func/_constant.py | 3 + .../analysis/prof_common_func/_db_manager.py | 2 +- .../analysis/prof_common_func/_log.py | 2 +- .../prof_common_func/_task_manager.py | 58 ++++++++ .../prof_common_func/_trace_event_manager.py | 4 +- .../prof_common_func/_tree_builder.py | 4 +- .../analysis/prof_config/_parser_config.py | 30 +++- .../prof_config/_parser_deps_config.py | 34 +++-- .../prof_parse/_fwk_cann_relation_parser.py | 19 ++- .../analysis/prof_parse/_fwk_file_parser.py | 128 +++++------------- .../prof_parse/_python_trace_parser.py | 22 +-- .../prof_view/_communication_parser.py | 2 + .../analysis/prof_view/_integrate_parser.py | 2 + .../analysis/prof_view/_kernel_view_parser.py | 2 + .../prof_view/_memory_prepare_parser.py | 43 +++--- .../analysis/prof_view/_memory_view_parser.py | 8 +- .../prof_view/_operator_view_parser.py | 6 +- .../analysis/prof_view/_stack_view_parser.py | 11 +- .../prof_view/_trace_step_time_parser.py | 6 +- .../analysis/prof_view/_trace_view_parser.py | 17 ++- .../prof_view/cann_parse/_cann_analyze.py | 2 + .../prof_view/cann_parse/_cann_export.py | 7 + .../prepare_parse/_fwk_pre_parser.py | 88 +++++++++++- .../prepare_parse/_relation_parser.py | 6 +- .../prof_db_parse/_basic_db_parser.py | 2 + .../prof_view/prof_db_parse/_db_parser.py | 2 + .../prof_db_parse/_fwk_api_db_parser.py | 39 ++++-- .../prof_db_parse/_gc_record_db_parser.py | 2 + .../prof_db_parse/_memory_db_parser.py | 12 +- .../prof_db_parse/_step_info_db_parser.py | 2 + .../_trace_step_time_db_parser.py | 6 +- 34 files changed, 517 insertions(+), 268 deletions(-) diff --git a/torch_npu/profiler/analysis/prof_bean/_memory_use_bean.py b/torch_npu/profiler/analysis/prof_bean/_memory_use_bean.py index 0385af8d79..566fb7a697 100644 --- a/torch_npu/profiler/analysis/prof_bean/_memory_use_bean.py +++ b/torch_npu/profiler/analysis/prof_bean/_memory_use_bean.py @@ -35,79 +35,93 @@ class MemoryUseBean(CommonBean): def __init__(self, data: dict): super().__init__(data) self._constant_data = struct.unpack(self.CONSTANT_STRUCT, self._data.get(Constant.CONSTANT_BYTES)) + self._ptr = int(self._constant_data[MemoryEnum.PTR.value]) + self._stream_ptr = int(self._constant_data[MemoryEnum.STREAM_PTR.value]) + self._time_ns = ProfilerConfig().get_local_time( + ProfilerConfig().get_timestamp_from_syscnt(self._constant_data[MemoryEnum.TIME_NS.value])) + self._alloc_size = int(self._constant_data[MemoryEnum.ALLOC_SIZE.value]) + self._total_allocated = int(self._constant_data[MemoryEnum.TOTAL_ALLOCATED.value]) + self._total_reserved = int(self._constant_data[MemoryEnum.TOTAL_RESERVED.value]) + self._total_active = int(self._constant_data[MemoryEnum.TOTAL_ACTIVE.value]) + self._device_type = int(self._constant_data[MemoryEnum.DEVICE_TYPE.value]) + self._device_index = int(self._constant_data[MemoryEnum.DEVICE_INDEX.value]) + self._component_type = int(self._constant_data[MemoryEnum.COMPONENT_TYPE.value]) + self._data_type = int(self._constant_data[MemoryEnum.DATA_TYPE.value]) + self._allocator_type = int(self._constant_data[MemoryEnum.ALLOCATOR_TYPE.value]) + self._thread_id = int(self._constant_data[MemoryEnum.THREAD_ID.value]) + self._process_id = int(self._constant_data[MemoryEnum.PROCESS_ID.value]) @property def ptr(self) -> int: - return int(self._constant_data[MemoryEnum.PTR.value]) + return self._ptr @property def stream_ptr(self) -> int: - return int(self._constant_data[MemoryEnum.STREAM_PTR.value]) + return self._stream_ptr @property def time_ns(self) -> int: - time_ns = ProfilerConfig().get_timestamp_from_syscnt(self._constant_data[MemoryEnum.TIME_NS.value]) - return ProfilerConfig().get_local_time(time_ns) + return self._time_ns @property - def alloc_size(self) -> int: - return int(self._constant_data[MemoryEnum.ALLOC_SIZE.value]) / Constant.B_TO_KB + def alloc_size(self) -> float: + return self._alloc_size / Constant.B_TO_KB @property - def alloc_size_for_db(self) -> int: - return int(self._constant_data[MemoryEnum.ALLOC_SIZE.value]) + def alloc_size_for_db(self) -> float: + return self._alloc_size @property - def total_allocated(self) -> int: - return int(self._constant_data[MemoryEnum.TOTAL_ALLOCATED.value]) / Constant.B_TO_MB + def total_allocated(self) -> float: + return self._total_allocated / Constant.B_TO_MB @property def total_allocated_for_db(self) -> int: - return int(self._constant_data[MemoryEnum.TOTAL_ALLOCATED.value]) + return self._total_allocated @property - def total_reserved(self) -> int: - return int(self._constant_data[MemoryEnum.TOTAL_RESERVED.value]) / Constant.B_TO_MB + def total_reserved(self) -> float: + return self._total_reserved / Constant.B_TO_MB @property def total_reserved_for_db(self) -> int: - return int(self._constant_data[MemoryEnum.TOTAL_RESERVED.value]) + return self._total_reserved @property - def total_active(self) -> int: - return int(self._constant_data[MemoryEnum.TOTAL_ACTIVE.value]) / Constant.B_TO_MB + def total_active(self) -> float: + return self._total_active / Constant.B_TO_MB @property def total_active_for_db(self) -> int: - return int(self._constant_data[MemoryEnum.TOTAL_ACTIVE.value]) + return self._total_active @property def device_type(self) -> int: - return int(self._constant_data[MemoryEnum.DEVICE_TYPE.value]) + return self._device_type @property def device_index(self) -> int: - return int(self._constant_data[MemoryEnum.DEVICE_INDEX.value]) + return self._device_index @property def component_type(self) -> int: - return int(self._constant_data[MemoryEnum.COMPONENT_TYPE.value]) + return self._component_type @property def data_type(self) -> int: - return int(self._constant_data[MemoryEnum.DATA_TYPE.value]) - + return self._data_type + @property def allocator_type(self) -> int: - return int(self._constant_data[MemoryEnum.ALLOCATOR_TYPE.value]) + return self._allocator_type @property def tid(self) -> int: - return int(self._constant_data[MemoryEnum.THREAD_ID.value]) + return self._thread_id @property def pid(self) -> int: - return int(self._constant_data[MemoryEnum.PROCESS_ID.value]) + return self._process_id def is_npu(self) -> bool: return self.device_type == self.NPU_ID diff --git a/torch_npu/profiler/analysis/prof_bean/_op_mark_bean.py b/torch_npu/profiler/analysis/prof_bean/_op_mark_bean.py index 34d95e97fd..788599d5f4 100644 --- a/torch_npu/profiler/analysis/prof_bean/_op_mark_bean.py +++ b/torch_npu/profiler/analysis/prof_bean/_op_mark_bean.py @@ -27,65 +27,91 @@ class OpMarkBean: Constant.NAME: 1 } CONSTANT_STRUCT = " int: + if self._pid is None: + self._pid = int(self._constant_data[OpMarkEnum.PROCESS_ID.value]) return self._pid @property def tid(self) -> int: + if self._tid is None: + self._tid = int(self._constant_data[OpMarkEnum.THREAD_ID.value]) return self._tid @property def time_ns(self) -> int: + if self._time_ns is None: + self._init_time_ns() return self._time_ns @property def corr_id(self) -> int: + if self._corr_id is None: + self._corr_id = int(self._constant_data[OpMarkEnum.CORRELATION_ID.value]) return self._corr_id @property def origin_name(self) -> str: + if self._origin_name is None: + self._origin_name = self._origin_data.get(self._NAME_ID, "") return self._origin_name @property def name(self) -> str: - if self.is_dequeue_start or self.is_dequeue_end: - return "Dequeue@" + str(self._origin_data[self.TLV_TYPE_DICT.get(Constant.NAME)]) - return "Enqueue" + if self._name is None: + if self.is_dequeue_start or self.is_dequeue_end: + self._name = "Dequeue@" + self.origin_name + else: + self._name = "Enqueue" + return self._name @property def args(self) -> dict: - return {"correlation_id": self.corr_id} + if self._args is None: + self._args = {"correlation_id": self.corr_id} + return self._args + + @property + def category(self): + if self._category is None: + self._category = _OpMarkCategoryEnum(int(self._constant_data[OpMarkEnum.CATEGORY.value])) + return self._category @property def is_enqueue_start(self) -> bool: - return self._category == _OpMarkCategoryEnum.ENQUEUE_START + return self.category == _OpMarkCategoryEnum.ENQUEUE_START @property def is_enqueue_end(self) -> bool: - return self._category == _OpMarkCategoryEnum.ENQUEUE_END + return self.category == _OpMarkCategoryEnum.ENQUEUE_END @property def is_dequeue_start(self) -> bool: - return self._category == _OpMarkCategoryEnum.DEQUEUE_START + return self.category == _OpMarkCategoryEnum.DEQUEUE_START @property def is_dequeue_end(self) -> bool: - return self._category == _OpMarkCategoryEnum.DEQUEUE_END + return self.category == _OpMarkCategoryEnum.DEQUEUE_END @property def is_dequeue(self) -> bool: @@ -114,3 +140,9 @@ class OpMarkBean: @dur.setter def dur(self, dur: int): self._dur = dur + + def _init_time_ns(self): + profiler_config = ProfilerConfig() + syscnt = self._constant_data[OpMarkEnum.TIME_NS.value] + self._time_ns = profiler_config.get_local_time( + profiler_config.get_timestamp_from_syscnt(syscnt)) diff --git a/torch_npu/profiler/analysis/prof_bean/_torch_op_bean.py b/torch_npu/profiler/analysis/prof_bean/_torch_op_bean.py index d001269eb2..8e0fb4e734 100644 --- a/torch_npu/profiler/analysis/prof_bean/_torch_op_bean.py +++ b/torch_npu/profiler/analysis/prof_bean/_torch_op_bean.py @@ -32,11 +32,22 @@ class TorchOpBean: Constant.FLOPS: 10 } CONSTANT_STRUCT = "<3q4QB?" + _constant_unpacker = struct.Struct(CONSTANT_STRUCT) + + _OP_NAME_ID = TLV_TYPE_DICT[Constant.OP_NAME] + _CALL_STACK_ID = TLV_TYPE_DICT[Constant.CALL_STACK] + _INPUT_TENSORS_ID = TLV_TYPE_DICT[Constant.INPUT_TENSORS] + _INPUT_TENSORLISTS_ID = TLV_TYPE_DICT[Constant.INPUT_TENSORLISTS] + _INPUT_SCALARS_ID = TLV_TYPE_DICT[Constant.INPUT_SCALARS] + + _REPLACE_FIELDS = {Constant.INPUT_SHAPES, Constant.INPUT_DTYPES, Constant.CALL_STACK} + _SKIP_FIELDS = {Constant.OP_NAME, Constant.INPUT_TENSORS, Constant.INPUT_TENSORLISTS, Constant.INPUT_SCALARS} def __init__(self, data: dict): self._origin_data = data - self._constant_data = struct.unpack(self.CONSTANT_STRUCT, data.get(Constant.CONSTANT_BYTES)) + self._constant_data = self._constant_unpacker.unpack(data.get(Constant.CONSTANT_BYTES)) self._kernel_list = [] + self._pid = None self._tid = None self._name = None @@ -44,81 +55,102 @@ class TorchOpBean: self._end_ns = None self._call_stack = None self._args = None - self.init() + self._inputs = None + self._scope = None + self._dur = None @property def pid(self) -> int: + if self._pid is None: + self._pid = self._constant_data[TorchOpEnum.PROCESS_ID.value] return self._pid @property def tid(self) -> int: + if self._tid is None: + self._tid = self._constant_data[TorchOpEnum.START_THREAD_ID.value] return self._tid @property def name(self) -> str: + if self._name is None: + self._name = str(self._origin_data.get(self._OP_NAME_ID, "")) return self._name @property def ts(self) -> int: + if self._start_ns is None: + self._init_timestamps() return self._start_ns @property def dur(self) -> int: - return int(self._end_ns) - int(self._start_ns) + if self._dur is None: + if self._start_ns is None or self._end_ns is None: + self._init_timestamps() + self._dur = self._end_ns - self._start_ns + return self._dur @property def end_ns(self): + if self._end_ns is None: + self._init_timestamps() return self._end_ns @property def call_stack(self): + if self._call_stack is None: + call_stack_raw = self._origin_data.get(self._CALL_STACK_ID, "") + self._call_stack = call_stack_raw.replace(";", ";\r\n") if call_stack_raw else "" return self._call_stack @property def inputs(self): + if self._inputs is None: + self._inputs = { + Constant.INPUT_TENSORS: self._origin_data.get(self._INPUT_TENSORS_ID), + Constant.INPUT_TENSORLISTS: self._origin_data.get(self._INPUT_TENSORLISTS_ID), + Constant.INPUT_SCALARS: self._origin_data.get(self._INPUT_SCALARS_ID) + } return self._inputs - + @property def scope(self): + if self._scope is None: + self._scope = self._constant_data[TorchOpEnum.SCOPE.value] return self._scope - + @property def args(self): + if self._args is None: + self._args = self.get_args() return self._args @property def is_torch_op(self): return True + + def _init_timestamps(self): + profiler_config = ProfilerConfig() + start_syscnt = self._constant_data[TorchOpEnum.START_NS.value] + end_syscnt = self._constant_data[TorchOpEnum.END_NS.value] - def init(self): - self._pid = int(self._constant_data[TorchOpEnum.PROCESS_ID.value]) - self._tid = int(self._constant_data[TorchOpEnum.START_THREAD_ID.value]) - self._name = str(self._origin_data.get(self.TLV_TYPE_DICT.get(Constant.OP_NAME), "")) - self._start_ns = ProfilerConfig().get_local_time( - ProfilerConfig().get_timestamp_from_syscnt(self._constant_data[TorchOpEnum.START_NS.value])) - self._end_ns = ProfilerConfig().get_local_time( - ProfilerConfig().get_timestamp_from_syscnt(self._constant_data[TorchOpEnum.END_NS.value])) - self._scope = int(self._constant_data[TorchOpEnum.SCOPE.value]) - self._call_stack = self._origin_data.get(self.TLV_TYPE_DICT.get(Constant.CALL_STACK), "").replace(";", ";\r\n") - self._args = self.get_args() - self._inputs = { - Constant.INPUT_TENSORS: self._origin_data.get(self.TLV_TYPE_DICT.get(Constant.INPUT_TENSORS)), - Constant.INPUT_TENSORLISTS: self._origin_data.get(self.TLV_TYPE_DICT.get(Constant.INPUT_TENSORLISTS)), - Constant.INPUT_SCALARS: self._origin_data.get(self.TLV_TYPE_DICT.get(Constant.INPUT_SCALARS))} + self._start_ns = profiler_config.get_local_time(profiler_config.get_timestamp_from_syscnt(start_syscnt)) + self._end_ns = profiler_config.get_local_time(profiler_config.get_timestamp_from_syscnt(end_syscnt)) def get_args(self) -> dict: args = { Constant.SEQUENCE_NUMBER: int(self._constant_data[TorchOpEnum.SEQUENCE_NUMBER.value]), Constant.FORWARD_THREAD_ID: int( self._constant_data[TorchOpEnum.FORWARD_THREAD_ID.value])} + origin_keys = self._origin_data.keys() for type_name, type_id in self.TLV_TYPE_DICT.items(): - if type_name in [Constant.OP_NAME, Constant.INPUT_TENSORS, - Constant.INPUT_TENSORLISTS, Constant.INPUT_SCALARS]: - continue - if type_id not in self._origin_data.keys(): + if type_name in self._SKIP_FIELDS or type_id not in origin_keys: continue - if type_name in [Constant.INPUT_SHAPES, Constant.INPUT_DTYPES, Constant.CALL_STACK]: - args[type_name] = self._origin_data.get(type_id).replace(";", ";\r\n") + + value = self._origin_data[type_id] + if type_name in self._REPLACE_FIELDS: + args[type_name] = value.replace(";", ";\r\n") if value else "" else: - args[type_name] = self._origin_data.get(type_id) + args[type_name] = value return args diff --git a/torch_npu/profiler/analysis/prof_common_func/_constant.py b/torch_npu/profiler/analysis/prof_common_func/_constant.py index 1a62c54d6f..5ede83023c 100644 --- a/torch_npu/profiler/analysis/prof_common_func/_constant.py +++ b/torch_npu/profiler/analysis/prof_common_func/_constant.py @@ -181,6 +181,9 @@ class Constant(object): FAIL = 1 # parser name + TASK_QUEUE_PARSER = "task_queue" + TORCH_OP_PARSER = "torch_op" + PYTHON_TRACE_PRE_PARSER = "python_trace_prepare" TRACE_PRE_PARSER = "trace_prepare" TREE_BUILD_PARSER = "build_tree" CANN_EXPORT_PARSER = "export" diff --git a/torch_npu/profiler/analysis/prof_common_func/_db_manager.py b/torch_npu/profiler/analysis/prof_common_func/_db_manager.py index 593a44dd78..e95bb0b684 100644 --- a/torch_npu/profiler/analysis/prof_common_func/_db_manager.py +++ b/torch_npu/profiler/analysis/prof_common_func/_db_manager.py @@ -1,6 +1,6 @@ import os import sqlite3 - +import time from ._constant import Constant, print_warn_msg, print_error_msg from ._file_manager import FileManager from ._singleton import Singleton diff --git a/torch_npu/profiler/analysis/prof_common_func/_log.py b/torch_npu/profiler/analysis/prof_common_func/_log.py index 0fecde48c4..7054fb13ee 100644 --- a/torch_npu/profiler/analysis/prof_common_func/_log.py +++ b/torch_npu/profiler/analysis/prof_common_func/_log.py @@ -23,7 +23,7 @@ class ProfilerLogger: BACKUP_COUNT: Number of backup files to keep """ - LOG_FORMAT = "[%(asctime)s] [%(levelname)s] [%(name)s:%(lineno)d] %(message)s" + LOG_FORMAT = "[%(asctime)s.%(msecs)03d] [%(levelname)s] [%(name)s:%(lineno)d] %(message)s" DATE_FORMAT = "%Y-%m-%d-%H:%M:%S" DEFAULT_LOGGER_NAME = "AscendProfiler" DEFAULT_LOG_LEVEL = logging.INFO diff --git a/torch_npu/profiler/analysis/prof_common_func/_task_manager.py b/torch_npu/profiler/analysis/prof_common_func/_task_manager.py index 1753752ae0..3484d38bb9 100644 --- a/torch_npu/profiler/analysis/prof_common_func/_task_manager.py +++ b/torch_npu/profiler/analysis/prof_common_func/_task_manager.py @@ -12,6 +12,7 @@ from enum import Enum from abc import ABC, abstractmethod from torch_npu.utils._error_code import ErrCode, prof_error from ._constant import print_error_msg, Constant +from ._log import ProfilerLogger __all__ = [] @@ -109,6 +110,9 @@ class TaskInfo: self.handler = None self.pipe = (-1, -1) self.recv_buffer = None + self.start_time = None + self.end_time = None + self.execution_time = None class ConcurrentTasksManager: @@ -123,6 +127,7 @@ class ConcurrentTasksManager: self.epoll = None self.max_concurrent_num = max_concurrent_num self.progress_bar = progress_bar + self.logger = None def add_task(self, task): if not isinstance(task, ConcurrentTask): @@ -153,6 +158,7 @@ class ConcurrentTasksManager: print_error_msg(f"An error occurred: {e}") finally: self.finalize() + self.log_task_execution_summary() def finalize(self): for task_info in self.task_infos.values(): @@ -184,6 +190,7 @@ class ConcurrentTasksManager: def __run_one_task(self, task_info): task_info.status = TaskStatus.Running + task_info.start_time = time.time() if (task_info.task.mode & ConcurrentMode.SUB_PROCESS) != 0: self.__run_in_subprocess(task_info) elif (task_info.task.mode & ConcurrentMode.PTHREAD) != 0: @@ -258,6 +265,9 @@ class ConcurrentTasksManager: def __on_task_done(self, task_info, ret_code, output): """ be called when task.run is finish(listening thread receives ret_code) """ + task_info.end_time = time.time() + task_info.execution_time = task_info.end_time - task_info.start_time + if ret_code == 0: task_info.status = TaskStatus.Succeed if output is not None: @@ -421,3 +431,51 @@ class ConcurrentTasksManager: def __del__(self): self.clear() + + def __get_mode_string(self, mode): + modes = [] + if mode & ConcurrentMode.MAIN_PROCESS: + modes.append("MAIN_PROCESS") + if mode & ConcurrentMode.SUB_PROCESS: + modes.append("SUB_PROCESS") + if mode & ConcurrentMode.PTHREAD: + modes.append("PTHREAD") + if mode & ConcurrentMode.NON_BLOCKING: + modes.append("NON_BLOCKING") + return "|".join(modes) if modes else "UNKNOWN" + + def get_task_execution_summary(self): + summary = [] + + for task_name, task_info in self.task_infos.items(): + if task_info.execution_time is not None: + mode_str = self.__get_mode_string(task_info.task.mode) + status_str = task_info.status.name + deps_names = [dep for dep in task_info.task.deps] + deps_str = ", ".join(deps_names) if deps_names else "None" + + summary.append({ + 'task_name': task_name, + 'mode': mode_str, + 'status': status_str, + 'execution_time': task_info.execution_time, + 'start_time': task_info.start_time, + 'end_time': task_info.end_time, + 'deps': deps_str + }) + + return { + 'tasks': summary, + 'task_count': len(summary) + } + + def log_task_execution_summary(self): + self.logger = ProfilerLogger.get_instance() + summary = self.get_task_execution_summary() + + self.logger.info("="*60) + self.logger.info("TASK EXECUTION SUMMARY") + self.logger.info("="*60) + + for task in summary['tasks']: + self.logger.info(f"{task['task_name']:<25} | {task['mode']:<15} | {task['status']:<10} | {task['execution_time']:.3f}s | deps: {task['deps']}") diff --git a/torch_npu/profiler/analysis/prof_common_func/_trace_event_manager.py b/torch_npu/profiler/analysis/prof_common_func/_trace_event_manager.py index fcbff9cc0c..dd10cafaa7 100644 --- a/torch_npu/profiler/analysis/prof_common_func/_trace_event_manager.py +++ b/torch_npu/profiler/analysis/prof_common_func/_trace_event_manager.py @@ -52,9 +52,9 @@ class TraceEventManager: "tid": event.tid, "ts": convert_ns2us_str(event.ts), "cat": "async_task_queue"} @classmethod - def create_fwd_flow(cls, event: any) -> list: + def create_fwd_flow(cls, fwd_dict: dict) -> list: fwd_list = [] - for fwd_id, node in event.items(): + for fwd_id, node in fwd_dict.items(): if node.get('start') and node.get('end'): if node['start']['tid'] == node['end']['tid']: continue diff --git a/torch_npu/profiler/analysis/prof_common_func/_tree_builder.py b/torch_npu/profiler/analysis/prof_common_func/_tree_builder.py index 06ec8b7d6b..949ef78c34 100644 --- a/torch_npu/profiler/analysis/prof_common_func/_tree_builder.py +++ b/torch_npu/profiler/analysis/prof_common_func/_tree_builder.py @@ -8,9 +8,9 @@ __all__ = [] class TreeBuilder: @classmethod - def build_tree(cls, event_list: list, enqueue_list: list) -> TorchOpNode: + def build_tree(cls, event_list: list, enqueue_list: list) -> list: all_node_list = [None] * (len(event_list) + 1) - event_list.extend(enqueue_list) + event_list = event_list + enqueue_list event_list.sort(key=lambda x: x.ts) root_node = TorchOpNode() last_node = root_node diff --git a/torch_npu/profiler/analysis/prof_config/_parser_config.py b/torch_npu/profiler/analysis/prof_config/_parser_config.py index 986ff3bb30..0789ecd834 100644 --- a/torch_npu/profiler/analysis/prof_config/_parser_config.py +++ b/torch_npu/profiler/analysis/prof_config/_parser_config.py @@ -17,7 +17,13 @@ from ..prof_common_func._constant import Constant from ..prof_view.cann_parse._cann_analyze import CANNAnalyzeParser from ..prof_view.cann_parse._cann_export import CANNExportParser, CANNTimelineParser from ..prof_view._memory_prepare_parser import MemoryPrepareParser -from ..prof_view.prepare_parse._fwk_pre_parser import TracePreParser, TreeBuildParser +from ..prof_view.prepare_parse._fwk_pre_parser import ( + PythonTracePreParser, + TracePreParser, + TreeBuildParser, + TaskQueueParser, + TorchOpParser +) from ..prof_view._kernel_view_parser import KernelViewParser from ..prof_view._operator_view_parser import OperatorViewParser from ..prof_view.prepare_parse._relation_parser import RelationParser @@ -37,6 +43,9 @@ class ParserConfig: LEVEL_NONE_CONFIG = { Constant.Text: { Constant.TENSORBOARD_TRACE_HANDLER: [ + TorchOpParser, + TaskQueueParser, + PythonTracePreParser, TracePreParser, TreeBuildParser, CANNExportParser, @@ -52,6 +61,9 @@ class ParserConfig: }, Constant.Db: { Constant.TENSORBOARD_TRACE_HANDLER: [ + TorchOpParser, + TaskQueueParser, + PythonTracePreParser, CANNExportParser, CANNTimelineParser, CANNAnalyzeParser, @@ -65,6 +77,9 @@ class ParserConfig: COMMON_CONFIG = { Constant.Text: { Constant.TENSORBOARD_TRACE_HANDLER: [ + TorchOpParser, + TaskQueueParser, + PythonTracePreParser, TracePreParser, TreeBuildParser, CANNExportParser, @@ -80,13 +95,17 @@ class ParserConfig: IntegrateParser, CommunicationParser ], - Constant.EXPORT_CHROME_TRACE: [TracePreParser, TreeBuildParser, CANNExportParser, CANNTimelineParser, - TraceViewParser], - Constant.EXPORT_STACK: [TreeBuildParser, CANNExportParser, CANNTimelineParser, StackViewParser], + Constant.EXPORT_CHROME_TRACE: [TorchOpParser, TaskQueueParser, TracePreParser, TreeBuildParser, + CANNExportParser, CANNTimelineParser, TraceViewParser], + Constant.EXPORT_STACK: [TorchOpParser, TaskQueueParser, TreeBuildParser, CANNExportParser, CANNTimelineParser, + StackViewParser], Constant.EXPORT_MEMORY_TIMELINE: [MemoryTimelineParser] }, Constant.Db: { Constant.TENSORBOARD_TRACE_HANDLER: [ + TorchOpParser, + TaskQueueParser, + PythonTracePreParser, CANNExportParser, CANNTimelineParser, CANNAnalyzeParser, @@ -111,6 +130,9 @@ class ParserConfig: PARSER_NAME_MAP = { # text parser + TorchOpParser: Constant.TORCH_OP_PARSER, + TaskQueueParser: Constant.TASK_QUEUE_PARSER, + PythonTracePreParser: Constant.PYTHON_TRACE_PRE_PARSER, TracePreParser: Constant.TRACE_PRE_PARSER, TreeBuildParser: Constant.TREE_BUILD_PARSER, CANNExportParser: Constant.CANN_EXPORT_PARSER, diff --git a/torch_npu/profiler/analysis/prof_config/_parser_deps_config.py b/torch_npu/profiler/analysis/prof_config/_parser_deps_config.py index e61ecc3690..267ce7207d 100644 --- a/torch_npu/profiler/analysis/prof_config/_parser_deps_config.py +++ b/torch_npu/profiler/analysis/prof_config/_parser_deps_config.py @@ -21,41 +21,53 @@ __all__ = [] class ParserDepsConfig: COMMON_CONFIG = { - Constant.TRACE_PRE_PARSER: {Constant.MODE: ConcurrentMode.SUB_PROCESS, Constant.DEPS: []}, - Constant.TREE_BUILD_PARSER: {Constant.MODE: ConcurrentMode.PTHREAD, Constant.DEPS: []}, + Constant.TORCH_OP_PARSER: {Constant.MODE: ConcurrentMode.PTHREAD, Constant.DEPS: []}, + Constant.TASK_QUEUE_PARSER: {Constant.MODE: ConcurrentMode.PTHREAD, Constant.DEPS: []}, + Constant.PYTHON_TRACE_PRE_PARSER: {Constant.MODE: ConcurrentMode.PTHREAD, + Constant.DEPS: [Constant.TORCH_OP_PARSER, Constant.TASK_QUEUE_PARSER]}, + Constant.TRACE_PRE_PARSER: {Constant.MODE: ConcurrentMode.SUB_PROCESS, + Constant.DEPS: [Constant.TORCH_OP_PARSER, Constant.TASK_QUEUE_PARSER, + Constant.PYTHON_TRACE_PRE_PARSER]}, + Constant.TREE_BUILD_PARSER: {Constant.MODE: ConcurrentMode.PTHREAD, + Constant.DEPS: [Constant.TORCH_OP_PARSER, Constant.TASK_QUEUE_PARSER]}, Constant.CANN_EXPORT_PARSER: {Constant.MODE: ConcurrentMode.SUB_PROCESS, Constant.DEPS: []}, Constant.CANN_TIMELINE_PARSER: {Constant.MODE: ConcurrentMode.NON_BLOCKING | ConcurrentMode.PTHREAD, Constant.DEPS: []}, Constant.RELATION_PARSER: {Constant.MODE: ConcurrentMode.PTHREAD, - Constant.DEPS: [Constant.CANN_TIMELINE_PARSER]}, + Constant.DEPS: [Constant.TASK_QUEUE_PARSER, Constant.CANN_TIMELINE_PARSER]}, Constant.CANN_ANALYZE_PARSER: {Constant.MODE: ConcurrentMode.SUB_PROCESS, Constant.DEPS: [Constant.CANN_TIMELINE_PARSER]}, Constant.OPERATOR_VIEW_PARSER: {Constant.MODE: ConcurrentMode.SUB_PROCESS, Constant.DEPS: [Constant.TREE_BUILD_PARSER, Constant.CANN_TIMELINE_PARSER, - Constant.RELATION_PARSER]}, + Constant.RELATION_PARSER, Constant.TORCH_OP_PARSER]}, Constant.TRACE_VIEW_PARSER: {Constant.MODE: ConcurrentMode.SUB_PROCESS, Constant.DEPS: [Constant.TREE_BUILD_PARSER, Constant.TRACE_PRE_PARSER, - Constant.CANN_TIMELINE_PARSER]}, + Constant.CANN_TIMELINE_PARSER, Constant.TASK_QUEUE_PARSER, + Constant.PYTHON_TRACE_PRE_PARSER]}, Constant.KERNEL_VIEW_PARSER: {Constant.MODE: ConcurrentMode.SUB_PROCESS, Constant.DEPS: [Constant.TREE_BUILD_PARSER, Constant.CANN_EXPORT_PARSER, Constant.RELATION_PARSER]}, Constant.TRACE_STEP_TIME_PARSER: {Constant.MODE: ConcurrentMode.SUB_PROCESS, Constant.DEPS: [Constant.TREE_BUILD_PARSER, Constant.CANN_TIMELINE_PARSER, - Constant.RELATION_PARSER]}, + Constant.RELATION_PARSER, Constant.TORCH_OP_PARSER]}, Constant.MEMORY_VIEW_PARSER: {Constant.MODE: ConcurrentMode.SUB_PROCESS, - Constant.DEPS: [Constant.CANN_EXPORT_PARSER, Constant.MEMORY_PREPARE]}, + Constant.DEPS: [Constant.CANN_EXPORT_PARSER, Constant.MEMORY_PREPARE, + Constant.TORCH_OP_PARSER]}, Constant.INTEGRATE_PARSER: {Constant.MODE: ConcurrentMode.SUB_PROCESS, Constant.DEPS: [Constant.CANN_EXPORT_PARSER]}, Constant.COMMUNICATION_PARSER: {Constant.MODE: ConcurrentMode.SUB_PROCESS, Constant.DEPS: [Constant.TREE_BUILD_PARSER, Constant.CANN_ANALYZE_PARSER, Constant.RELATION_PARSER]}, Constant.STACK_VIEW_PARSER: {Constant.MODE: ConcurrentMode.SUB_PROCESS, - Constant.DEPS: [Constant.TREE_BUILD_PARSER, Constant.CANN_TIMELINE_PARSER]}, + Constant.DEPS: [Constant.TASK_QUEUE_PARSER, Constant.TREE_BUILD_PARSER, + Constant.CANN_TIMELINE_PARSER, Constant.TORCH_OP_PARSER]}, Constant.MEMORY_PREPARE: {Constant.MODE: ConcurrentMode.PTHREAD, - Constant.DEPS: [Constant.TREE_BUILD_PARSER]}, + Constant.DEPS: [Constant.TASK_QUEUE_PARSER, Constant.TREE_BUILD_PARSER, + Constant.TORCH_OP_PARSER]}, Constant.DB_PARSER: {Constant.MODE: ConcurrentMode.PTHREAD, - Constant.DEPS: [Constant.CANN_EXPORT_PARSER, Constant.MEMORY_PREPARE, - Constant.TREE_BUILD_PARSER, Constant.CANN_ANALYZE_PARSER]}, + Constant.DEPS: [Constant.CANN_EXPORT_PARSER, Constant.MEMORY_PREPARE, Constant.TORCH_OP_PARSER, + Constant.TREE_BUILD_PARSER, Constant.CANN_ANALYZE_PARSER, Constant.TASK_QUEUE_PARSER, + Constant.PYTHON_TRACE_PRE_PARSER]}, Constant.MEMORY_TIMELINE_PARSER: {} } diff --git a/torch_npu/profiler/analysis/prof_parse/_fwk_cann_relation_parser.py b/torch_npu/profiler/analysis/prof_parse/_fwk_cann_relation_parser.py index dc7142738a..6e437f3c9e 100644 --- a/torch_npu/profiler/analysis/prof_parse/_fwk_cann_relation_parser.py +++ b/torch_npu/profiler/analysis/prof_parse/_fwk_cann_relation_parser.py @@ -1,5 +1,5 @@ import os - +from collections import defaultdict from ._fwk_file_parser import FwkFileParser from .._profiler_config import ProfilerConfig from ..prof_bean._torch_op_node import TorchOpNode @@ -20,16 +20,14 @@ class FwkCANNRelationParser: def combine_kernel_dict(cls, acl_to_npu_dict: dict, dequeue_data_list: list): if not dequeue_data_list: return acl_to_npu_dict - kernel_dict = {} + kernel_dict = defaultdict(list) index = 0 - acl_start_time_list = sorted(list(acl_to_npu_dict.keys())) - for acl_start_time in acl_start_time_list: + for acl_start_time in sorted(acl_to_npu_dict.keys()): while index < len(dequeue_data_list): if dequeue_data_list[index].ts > acl_start_time: break if acl_start_time <= dequeue_data_list[index].ts + dequeue_data_list[index].dur: - kernel_dict.setdefault(dequeue_data_list[index].corr_id, []).extend( - acl_to_npu_dict.get(acl_start_time, [])) + kernel_dict[dequeue_data_list[index].corr_id].extend(acl_to_npu_dict.get(acl_start_time, [])) break index += 1 return kernel_dict @@ -48,7 +46,7 @@ class FwkCANNRelationParser: break index += 1 - def get_kernel_dict(self) -> dict: + def get_kernel_dict(self, dequeue_data: list) -> dict: acl_to_npu_dict = CANNFileParser(self._profiler_path).get_acl_to_npu_data() if not acl_to_npu_dict and ProfilerConfig().get_level() != Constant.LEVEL_NONE: error_msg = ( @@ -57,8 +55,7 @@ class FwkCANNRelationParser: ) print_error_msg(error_msg) return acl_to_npu_dict - dequeue_data_list = FwkFileParser(self._profiler_path).get_dequeue_data() - return self.combine_kernel_dict(acl_to_npu_dict, dequeue_data_list) + return self.combine_kernel_dict(acl_to_npu_dict, dequeue_data) def get_step_range(self, root_node: TorchOpNode, kernel_dict: dict): if not kernel_dict: @@ -69,10 +66,10 @@ class FwkCANNRelationParser: if not step_node_list: self.logger.warning("Get step range failed, the step node list is empty.") return [] - + # Gather flow events start time in each step node if not FwkFileParser(self._profiler_path).has_task_queue_data(): - acl_start_time_list = sorted(list(kernel_dict.keys())) + acl_start_time_list = sorted(kernel_dict.keys()) self._update_step_node_info(step_node_list, acl_start_time_list) # Get step range on device by flow events step_range = [] diff --git a/torch_npu/profiler/analysis/prof_parse/_fwk_file_parser.py b/torch_npu/profiler/analysis/prof_parse/_fwk_file_parser.py index f78e3821d6..f6bb4f6814 100644 --- a/torch_npu/profiler/analysis/prof_parse/_fwk_file_parser.py +++ b/torch_npu/profiler/analysis/prof_parse/_fwk_file_parser.py @@ -1,6 +1,7 @@ import os import re from collections import defaultdict +from typing import List, Tuple from ..prof_bean._torch_op_bean import TorchOpBean from ..prof_common_func._binary_decoder import BinaryDecoder @@ -13,7 +14,6 @@ from ..prof_common_func._trace_event_manager import TraceEventManager from ..prof_common_func._tree_builder import TreeBuilder from ..prof_common_func._log import ProfilerLogger from ..prof_config._fwk_file_parser_config import FwkFileParserConfig -from ._python_trace_parser import PythonTraceParser __all__ = [] @@ -28,7 +28,7 @@ class FwkFileParser: ProfilerLogger.init(self._profiler_path, "FwkFileParser") self.logger = ProfilerLogger.get_instance() - def get_file_data_by_tag(self, file_tag: int) -> list: + def get_file_data_by_tag(self, file_tag: FileTag) -> list: file_path = self._file_list.get(file_tag) if not file_path: return [] @@ -41,63 +41,7 @@ class FwkFileParser: else: return BinaryDecoder.decode(all_bytes, file_bean, struct_size) - def get_enqueue_data(self) -> list: - enqueue_data_list = [] - op_mark_data = self.get_file_data_by_tag(FileTag.OP_MARK) - if not op_mark_data: - self.logger.error("Get enqueue data failed, the op mark data is empty.") - return enqueue_data_list - op_mark_data.sort(key=lambda x: x.time_ns) - tid_op_dict = defaultdict(lambda: defaultdict(list)) - match_failed_num = 0 - for op_mark in op_mark_data: - if not op_mark.is_enqueue: - continue - if op_mark.is_enqueue_start: - tid_op_dict[op_mark.tid][op_mark.origin_name].append(op_mark) - continue - start_op_list = tid_op_dict.get(op_mark.tid, {}).get(op_mark.origin_name, []) - if not start_op_list: - match_failed_num += 1 - continue - start_op = start_op_list.pop() - op_mark.ts = start_op.time_ns - op_mark.dur = op_mark.time_ns - start_op.time_ns - enqueue_data_list.append(op_mark) - start_op_list.clear() - if match_failed_num: - self.logger.warning(f"{match_failed_num} enqueue data match failed.") - return enqueue_data_list - - def get_dequeue_data(self) -> list: - dequeue_data_list = [] - op_mark_data = self.get_file_data_by_tag(FileTag.OP_MARK) - if not op_mark_data: - self.logger.error("Get dequeue data failed, the op mark data is empty.") - return dequeue_data_list - op_mark_data.sort(key=lambda x: x.time_ns) - tid_op_dict = defaultdict(lambda: defaultdict(list)) - match_failed_num = 0 - for op_mark in op_mark_data: - if not op_mark.is_dequeue: - continue - if op_mark.is_dequeue_start: - tid_op_dict[op_mark.tid][op_mark.origin_name].append(op_mark) - continue - start_op_list = tid_op_dict.get(op_mark.tid, {}).get(op_mark.origin_name, []) - if not start_op_list: - match_failed_num += 1 - continue - start_op = start_op_list.pop() - op_mark.ts = start_op.time_ns - op_mark.dur = op_mark.time_ns - start_op.time_ns - dequeue_data_list.append(op_mark) - start_op_list.clear() - if match_failed_num: - self.logger.warning(f"{match_failed_num} enqueue data match failed.") - return dequeue_data_list - - def get_task_queue_data(self) -> any: + def get_task_queue_data(self) -> Tuple[List, List]: enqueue_data_list, dequeue_data_list = [], [] op_mark_data = self.get_file_data_by_tag(FileTag.OP_MARK) if not op_mark_data: @@ -140,20 +84,15 @@ class FwkFileParser: self.logger.warning(f"{dequeue_match_failed_num} dequeue data match failed.") return enqueue_data_list, dequeue_data_list - def get_torch_op_tree_node(self, only_fwk: bool = False) -> list: - torch_op_list = self.get_file_data_by_tag(FileTag.TORCH_OP) - if not torch_op_list: + def get_torch_op_tree_node(self, torch_op_data: list, enqueue_data: list = []) -> list: + if not torch_op_data: self.logger.error("Get torch op tree node failed, the torch op data is empty.") return [] - enqueue_data_list = [] - if not only_fwk: - enqueue_data_list = self.get_enqueue_data() - result_data = TreeBuilder.build_tree(torch_op_list, enqueue_data_list) + result_data = TreeBuilder.build_tree(torch_op_data, enqueue_data) return result_data - def get_fwk_trace_data(self): - torch_op_data = self.get_file_data_by_tag(FileTag.TORCH_OP) - enqueue_data_list, dequeue_data_list = self.get_task_queue_data() + def get_fwk_trace_data(self, torch_op_data: list, enqueue_data_list: list, dequeue_data_list: list, + python_trace_data: list) -> list: if torch_op_data: pid = torch_op_data[0].pid elif enqueue_data_list or dequeue_data_list: @@ -163,9 +102,9 @@ class FwkFileParser: return [] tid_dict = {} fwk_x_event_list = [None] * ( - len(torch_op_data) + len(enqueue_data_list) * 2 + len(dequeue_data_list) * 2) + len(torch_op_data) + len(enqueue_data_list) * 2 + len(dequeue_data_list) * 2 + len(python_trace_data)) index = 0 - fwd_dict = {} + fwd_dict = defaultdict(dict) correlation_id_name_dict = {} for torch_op in torch_op_data: self.filter_fwd_bwd_event(fwd_dict, torch_op) @@ -188,25 +127,19 @@ class FwkFileParser: index += 1 fwk_x_event_list[index] = TraceEventManager.create_task_queue_flow(Constant.FLOW_START_PH, enqueue_data) index += 1 + for python_trace in python_trace_data: + fwk_x_event_list[index] = TraceEventManager.create_x_event(python_trace, "python_function") + index += 1 other_event_list = TraceEventManager.create_m_event(pid, tid_dict) other_event_list.extend(TraceEventManager.create_fwd_flow(fwd_dict)) fwk_x_event_list.extend(other_event_list) - python_trace_data = self.get_python_trace_data(set(tid_dict.keys())) - if python_trace_data: - fwk_x_event_list.extend(python_trace_data) gc_record_data = self.get_gc_record_trace_data() if gc_record_data: fwk_x_event_list.extend(gc_record_data) return fwk_x_event_list - def get_python_trace_data(self, torch_tids: set) -> list: - trace_hash_data = self.get_file_data_by_tag(FileTag.PYTHON_TRACER_HASH) - func_call_data = self.get_file_data_by_tag(FileTag.PYTHON_TRACER_FUNC) - python_trace_parser = PythonTraceParser(torch_tids, trace_hash_data, func_call_data) - return python_trace_parser.get_python_trace_data() - @classmethod - def filter_fwd_bwd_event(cls, fwd_dict: dict, torch_op: TorchOpBean): + def filter_fwd_bwd_event(cls, fwd_dict: defaultdict, torch_op: TorchOpBean): seq_num = torch_op.args.get("Sequence number", -1) if seq_num < 0: return @@ -214,7 +147,7 @@ class FwkFileParser: mode = "start" if torch_op.args.get("Fwd thread id") == 0 else "end" if fwd_event.get(mode, {}).get("ts", -float('inf')) < torch_op.ts: node = {mode: {'pid': torch_op.pid, 'tid': torch_op.tid, 'ts': torch_op.ts}} - fwd_dict.setdefault(seq_num, {}).update(node) + fwd_dict[seq_num].update(node) def has_task_queue_data(self): return bool(self._file_list.get(FileTag.OP_MARK)) @@ -249,9 +182,8 @@ class FwkFileParser: start_connection_id += 1 - def get_fwk_api(self) -> dict: - torch_op_data = self.get_file_data_by_tag(FileTag.TORCH_OP) - enqueue_data_list, dequeue_data_list = self.get_task_queue_data() + def get_fwk_api(self, torch_op_data: list, enqueue_data_list: list, dequeue_data_list: list, + python_trace_data: list) -> dict: if torch_op_data: pid = torch_op_data[0].pid elif enqueue_data_list or dequeue_data_list: @@ -264,7 +196,6 @@ class FwkFileParser: fwd_bwd_dict = {} torch_op_idx = 0 mstx_mark_apis = [] - torch_tids = set() for torch_op in torch_op_data: api = [torch_op.ts, torch_op.end_ns, contact_2num(pid, torch_op.tid), [], torch_op.name, @@ -277,7 +208,6 @@ class FwkFileParser: torch_op_apis.append(api) self.filter_fwd_bwd_api(fwd_bwd_dict, torch_op, torch_op_idx) torch_op_idx += 1 - torch_tids.add(torch_op.tid) connection_ids = [] task_enqueues = [] @@ -288,7 +218,7 @@ class FwkFileParser: [dequeue_data.ts, dequeue_data.ts + dequeue_data.dur, contact_2num(pid, dequeue_data.tid), dequeue_data.corr_id, dequeue_data.name]) correlation_id_name_dict[dequeue_data.corr_id] = dequeue_data.origin_name - torch_tids.add(dequeue_data.tid) + for enqueue_data in enqueue_data_list: name = enqueue_data.name if enqueue_data.corr_id in correlation_id_name_dict: @@ -298,30 +228,34 @@ class FwkFileParser: [enqueue_data.ts, enqueue_data.ts + enqueue_data.dur, contact_2num(pid, enqueue_data.tid), enqueue_data.corr_id, name]) connection_ids.append(enqueue_data.corr_id) - torch_tids.add(enqueue_data.tid) start_connection_id = max(connection_ids) + 1 if connection_ids else 0 self.update_fwd_bwd_connection_id(fwd_bwd_dict, torch_op_apis, start_connection_id) - trace_hash_data = self.get_file_data_by_tag(FileTag.PYTHON_TRACER_HASH) - func_call_data = self.get_file_data_by_tag(FileTag.PYTHON_TRACER_FUNC) - python_trace_parser = PythonTraceParser(torch_tids, trace_hash_data, func_call_data) - python_trace_apis = python_trace_parser.get_python_trace_api_data() + python_trace_apis = [None] * len(python_trace_data) + for i, event in enumerate(python_trace_data): + python_trace_apis[i] = [event.ts, event.ts + event.dur, contact_2num(event.pid, event.tid), event.name] + return {"torch_op": torch_op_apis, "task_enqueues": task_enqueues, "task_dequeues": task_dequeues, "python_trace": python_trace_apis, "mstx_op": mstx_mark_apis} - def get_first_fwk_op(self): - torch_op_data = self.get_file_data_by_tag(FileTag.TORCH_OP) + def get_first_fwk_op(self, torch_op_data: list): if not torch_op_data: return None return min(torch_op_data, key=lambda op: op.ts) - def get_torch_op_tids(self): - torch_op_data = self.get_file_data_by_tag(FileTag.TORCH_OP) + def get_torch_op_tids(self, torch_op_data: list = []): + if not torch_op_data: + torch_op_data = self.get_file_data_by_tag(FileTag.TORCH_OP) if not torch_op_data: return set() return {op.tid for op in torch_op_data} + def get_task_queue_tids(self, enqueue_data: list, dequeue_data: list): + enqueue_tids = {op.tid for op in enqueue_data} + dequeue_tids = {op.tid for op in dequeue_data} + return enqueue_tids | dequeue_tids + def get_gc_record_db_data(self): gc_events = self.get_file_data_by_tag(FileTag.GC_RECORD) if not gc_events: diff --git a/torch_npu/profiler/analysis/prof_parse/_python_trace_parser.py b/torch_npu/profiler/analysis/prof_parse/_python_trace_parser.py index d97c6786ce..5f3f541613 100644 --- a/torch_npu/profiler/analysis/prof_parse/_python_trace_parser.py +++ b/torch_npu/profiler/analysis/prof_parse/_python_trace_parser.py @@ -131,27 +131,9 @@ class PythonTraceParser: self._hash_map = {} self._param_map = {} - def get_python_trace_data(self) -> list: - trace_event_list = self._gen_python_trace_event_data() - if not trace_event_list: - return [] - trace_data = [None] * len(trace_event_list) - for i, event in enumerate(trace_event_list): - trace_data[i] = TraceEventManager.create_x_event(event, "python_function") - return trace_data - - def get_python_trace_api_data(self) -> list: - trace_event_list = self._gen_python_trace_event_data() - if not trace_event_list: - return [] - trace_api_data = [None] * len(trace_event_list) - for i, event in enumerate(trace_event_list): - trace_api_data[i] = [event.ts, event.ts + event.dur, contact_2num(event.pid, event.tid), event.name] - return trace_api_data - def get_pycall_data(self) -> list: self._gen_param_map() - return self._gen_python_trace_event_data() + return self.gen_python_trace_event_data() def _group_tarce_data_by_tid(self): trace_data_by_tid = defaultdict(lambda: []) @@ -160,7 +142,7 @@ class PythonTraceParser: trace_data_by_tid[call_bean.tid].append(call_bean) return trace_data_by_tid - def _gen_python_trace_event_data(self): + def gen_python_trace_event_data(self): self._gen_hash_map() trace_event_by_tid = self._group_tarce_data_by_tid() trace_event_list = [] diff --git a/torch_npu/profiler/analysis/prof_view/_communication_parser.py b/torch_npu/profiler/analysis/prof_view/_communication_parser.py index e07f68b785..d0b9ab4bf7 100644 --- a/torch_npu/profiler/analysis/prof_view/_communication_parser.py +++ b/torch_npu/profiler/analysis/prof_view/_communication_parser.py @@ -63,12 +63,14 @@ class CommunicationParser(BaseParser): def run(self, deps_data: dict): ProfilerLogger.init(self._profiler_path, "CommunicationParser") self.logger = ProfilerLogger.get_instance() + self.logger.info("CommunicationParser start.") try: self._init_step_list(deps_data) self.generate_view() except Exception as e: self.logger.error("Failed to generate communication.json or communication_matrix.json, error: %s", str(e), exc_info=True) return Constant.FAIL, None + self.logger.info("CommunicationParser finish.") return Constant.SUCCESS, None def generate_view(self) -> None: diff --git a/torch_npu/profiler/analysis/prof_view/_integrate_parser.py b/torch_npu/profiler/analysis/prof_view/_integrate_parser.py index 28472a2411..1c296c9534 100644 --- a/torch_npu/profiler/analysis/prof_view/_integrate_parser.py +++ b/torch_npu/profiler/analysis/prof_view/_integrate_parser.py @@ -30,12 +30,14 @@ class IntegrateParser(BaseParser): def run(self, deps_data: dict): ProfilerLogger.init(self._profiler_path, "IntegrateParser") self.logger = ProfilerLogger.get_instance() + self.logger.info("IntegrateParser start.") try: ProfilerConfig().load_info(self._profiler_path) self.generate_view() except Exception as e: self.logger.error("Failed to generate data_preprocess.csv or l2_cache.csv, error: %s", str(e), exc_info=True) return Constant.FAIL, None + self.logger.info("IntegrateParser finish.") return Constant.SUCCESS, None def generate_view(self) -> None: diff --git a/torch_npu/profiler/analysis/prof_view/_kernel_view_parser.py b/torch_npu/profiler/analysis/prof_view/_kernel_view_parser.py index ded9a612c6..b913f5ac3c 100644 --- a/torch_npu/profiler/analysis/prof_view/_kernel_view_parser.py +++ b/torch_npu/profiler/analysis/prof_view/_kernel_view_parser.py @@ -35,6 +35,7 @@ class KernelViewParser(BaseParser): def run(self, deps_data: dict): ProfilerLogger.init(self._profiler_path, "KernelViewParser") self.logger = ProfilerLogger.get_instance() + self.logger.info("KernelViewParser start.") try: ProfilerConfig().load_info(self._profiler_path) self._init_step_range(deps_data) @@ -42,6 +43,7 @@ class KernelViewParser(BaseParser): except Exception as e: self.logger.error("Failed to generate kernel_details.csv, error: %s", str(e), exc_info=True) return Constant.FAIL, None + self.logger.info("KernelViewParser finish.") return Constant.SUCCESS, None def generate_view(self) -> None: diff --git a/torch_npu/profiler/analysis/prof_view/_memory_prepare_parser.py b/torch_npu/profiler/analysis/prof_view/_memory_prepare_parser.py index 4cb4ed35db..0ea896036e 100644 --- a/torch_npu/profiler/analysis/prof_view/_memory_prepare_parser.py +++ b/torch_npu/profiler/analysis/prof_view/_memory_prepare_parser.py @@ -40,8 +40,11 @@ class MemoryPrepareParser(BaseParser): self.pta_record_list = [] self.memory_data = dict() self._torch_op_node = [] + self._torch_op_data = [] self._incomplete_num = 0 self._is_malloc_workspace_in_dequeue_enabled = False + self._enqueue_data = [] + self._dequeue_data = [] self._dequeue_record_dict = defaultdict(list) # {(pid, tid): [dequeue_records]} self._enqueue_record_dict = {} # {corrid: enqueue} self._dequeue_pids = set() @@ -62,19 +65,26 @@ class MemoryPrepareParser(BaseParser): return left def run(self, deps_data: dict): + self.logger.info("MemoryPrepareParser start.") try: self._torch_op_node = deps_data.get(Constant.TREE_BUILD_PARSER, []) + self._torch_op_data = deps_data.get(Constant.TORCH_OP_PARSER, []) + task_queue_data = deps_data.get(Constant.TASK_QUEUE_PARSER, {}) + self._enqueue_data = task_queue_data.get("enqueue_data", []) + self._dequeue_data = task_queue_data.get("dequeue_data", []) self.generate_view() except Exception as e: self.logger.error("Failed to generate pytorch memory data, error: %s", str(e), exc_info=True) return Constant.FAIL, {} if self._incomplete_num > 0: print_warn_msg(f"{self._incomplete_num} memory record(s) are incomplete.") + self.logger.info("MemoryPrepareParser finish.") return Constant.SUCCESS, {"pta_record_list": self.pta_record_list, "memory_data": self.memory_data} def generate_view(self) -> None: ProfilerConfig().load_info(self._profiler_path) self._init_torch_op() + self._init_queue_info() self._add_pta_memory_data() def _find_matched_torch_op_name(self, mem_start_ts: int, torch_ops: list) -> str: @@ -88,35 +98,30 @@ class MemoryPrepareParser(BaseParser): return matched_torch_op.name def _init_queue_info(self): - enqueue_records = FwkFileParser(self._profiler_path).get_enqueue_data() - for enqueue_record in enqueue_records: - self._enqueue_record_dict[enqueue_record.corr_id] = enqueue_record - dequeue_records = FwkFileParser(self._profiler_path).get_dequeue_data() - for dequeue_record in dequeue_records: - self._dequeue_pids.add(dequeue_record.pid) - self._dequeue_tids.add(dequeue_record.tid) - key = (dequeue_record.pid, dequeue_record.tid) - self._dequeue_record_dict.setdefault(key, []).append(dequeue_record) + self._enqueue_record_dict = {record.corr_id: record for record in self._enqueue_data} + for record in self._dequeue_data: + self._dequeue_pids.add(record.pid) + self._dequeue_tids.add(record.tid) + self._dequeue_record_dict[(record.pid, record.tid)].append(record) def _add_pta_memory_data(self): - self._init_queue_info() pta_memory_data = FwkFileParser(self._profiler_path).get_file_data_by_tag(FileTag.MEMORY) - npu_memory_dict = {} - torch_op_dict = {} - pta_memory_data = sorted(pta_memory_data, key=lambda x: x.time_ns) + npu_memory_dict = defaultdict(list) + torch_op_dict = defaultdict(list) + pta_memory_data.sort(key=lambda x: x.time_ns) for record in pta_memory_data: if record.is_npu(): if record.is_inner_allocator(): - npu_memory_dict.setdefault(record.pid, []).append(record) + npu_memory_dict[record.pid].append(record) self.pta_record_list.append(record) for torch_op in self._torch_op_node: - torch_op_dict.setdefault(torch_op.pid, []).append(torch_op) + torch_op_dict[torch_op.pid].append(torch_op) for pid_key, memory_records in npu_memory_dict.items(): torch_ops = torch_op_dict.get(pid_key, []) if not torch_ops: warn(f"Lack of torch ops to connect memory record, whose process id is {pid_key}") continue - torch_ops = sorted(torch_ops, key=lambda x: x.start_time) + torch_ops.sort(key=lambda x: x.start_time) memory_dict = defaultdict(list) for record in memory_records: memory_dict[record.ptr].append(record) @@ -236,7 +241,7 @@ class MemoryPrepareParser(BaseParser): active_duration_time, records[0].total_allocated_for_db, records[0].total_reserved_for_db, records[0].total_active_for_db, records[free_idx].total_allocated_for_db, records[free_idx].total_reserved_for_db, records[free_idx].total_active_for_db, records[0].stream_ptr, records[0].device_index] - ret_list.append(combine_data[:]) + ret_list.append(combine_data) return ret_list def _complete_record_entry(self, ptr_records: list, torch_ops: list) -> list: @@ -281,11 +286,11 @@ class MemoryPrepareParser(BaseParser): active_duration_time, records[0].total_allocated, records[0].total_reserved, records[0].total_active, records[free_idx].total_allocated, records[free_idx].total_reserved, records[free_idx].total_active, records[0].stream_ptr, records[0].device_tag] - ret_list.append(combine_data[:]) + ret_list.append(combine_data) return ret_list def _init_torch_op(self): if not ProfilerPathManager.get_cann_path(self._profiler_path): - self._torch_op_node = FwkFileParser(self._profiler_path).get_torch_op_tree_node(only_fwk=True) + self._torch_op_node = FwkFileParser(self._profiler_path).get_torch_op_tree_node(self._torch_op_data) if self._torch_op_node: self._torch_op_node = self._torch_op_node[1:] diff --git a/torch_npu/profiler/analysis/prof_view/_memory_view_parser.py b/torch_npu/profiler/analysis/prof_view/_memory_view_parser.py index 47255efd09..997ec642b6 100644 --- a/torch_npu/profiler/analysis/prof_view/_memory_view_parser.py +++ b/torch_npu/profiler/analysis/prof_view/_memory_view_parser.py @@ -34,6 +34,7 @@ class MemoryViewParser(BaseParser): self.ge_record_list = [] self.memory_data = [] self.component_list = [] + self._torch_op_data = [] @staticmethod def _get_data_from_file(file_set: set, file_type_bean: any, bean_list: bool = False) -> list: @@ -73,13 +74,16 @@ class MemoryViewParser(BaseParser): def run(self, deps_data: dict): ProfilerLogger.init(self._profiler_path, "MemoryViewParser") self.logger = ProfilerLogger.get_instance() + self.logger.info("MemoryViewParser start.") try: self.memory_data = deps_data.get(Constant.MEMORY_PREPARE, {}).get("memory_data", {}).get(Constant.Text, []) self.pta_record_list = deps_data.get(Constant.MEMORY_PREPARE, {}).get("pta_record_list", []) + self._torch_op_data = deps_data.get(Constant.TORCH_OP_PARSER, []) self.generate_view() except Exception as e: self.logger.error("Failed to generate operator_memory.csv or memory_record.csv, error: %s", str(e), exc_info=True) return Constant.FAIL, None + self.logger.info("MemoryViewParser finish.") return Constant.SUCCESS, None def generate_view(self) -> None: @@ -151,8 +155,8 @@ class MemoryViewParser(BaseParser): def _init_pta_data(self): if not ProfilerPathManager.get_cann_path(self._profiler_path): - torch_nop_node = FwkFileParser(self._profiler_path).get_torch_op_tree_node(only_fwk=True) - deps_data = {Constant.TREE_BUILD_PARSER: torch_nop_node} + torch_op_node = FwkFileParser(self._profiler_path).get_torch_op_tree_node(self._torch_op_data) + deps_data = {Constant.TREE_BUILD_PARSER: torch_op_node, Constant.TORCH_OP_PARSER: self._torch_op_data} _, pta_data = MemoryPrepareParser(Constant.MEMORY_PREPARE, self._param_dict).run(deps_data) self.memory_data = pta_data.get("memory_data", {}).get(Constant.Text, []) self.pta_record_list = pta_data.get("pta_record_list", []) diff --git a/torch_npu/profiler/analysis/prof_view/_operator_view_parser.py b/torch_npu/profiler/analysis/prof_view/_operator_view_parser.py index 7c10e9d4bf..a75625fa1c 100644 --- a/torch_npu/profiler/analysis/prof_view/_operator_view_parser.py +++ b/torch_npu/profiler/analysis/prof_view/_operator_view_parser.py @@ -20,19 +20,23 @@ class OperatorViewParser(BaseParser): def __init__(self, name: str, param_dict: dict): super().__init__(name, param_dict) self._torch_op_node = [] + self._torch_op_data = [] self._root_node = None self._kernel_dict = {} def run(self, deps_data: dict): ProfilerLogger.init(self._profiler_path, "OperatorViewParser") self.logger = ProfilerLogger.get_instance() + self.logger.info("OperatorViewParser start.") try: self._torch_op_node = deps_data.get(Constant.TREE_BUILD_PARSER, []) self._kernel_dict = deps_data.get(Constant.RELATION_PARSER, {}) + self._torch_op_data = deps_data.get(Constant.TORCH_OP_PARSER, []) self.generate_view() except Exception as e: self.logger.error("Failed to generate operator_details.csv, error: %s", str(e), exc_info=True) return Constant.FAIL, None + self.logger.info("OperatorViewParser finish.") return Constant.SUCCESS, None def generate_view(self) -> None: @@ -70,7 +74,7 @@ class OperatorViewParser(BaseParser): def _init_torch_op(self): if not ProfilerPathManager.get_cann_path(self._profiler_path): - self._torch_op_node = FwkFileParser(self._profiler_path).get_torch_op_tree_node(only_fwk=True) + self._torch_op_node = FwkFileParser(self._profiler_path).get_torch_op_tree_node(self._torch_op_data) if self._torch_op_node: self._root_node = self._torch_op_node[0] self._torch_op_node = self._torch_op_node[1:] diff --git a/torch_npu/profiler/analysis/prof_view/_stack_view_parser.py b/torch_npu/profiler/analysis/prof_view/_stack_view_parser.py index b4a85271d9..07936e7e55 100644 --- a/torch_npu/profiler/analysis/prof_view/_stack_view_parser.py +++ b/torch_npu/profiler/analysis/prof_view/_stack_view_parser.py @@ -11,7 +11,6 @@ from ..prof_common_func._file_manager import FileManager from ..prof_common_func._log import ProfilerLogger from ..prof_parse._fwk_cann_relation_parser import FwkCANNRelationParser from ..prof_parse._fwk_file_parser import FwkFileParser -from ....utils._path_manager import PathManager __all__ = [] @@ -19,20 +18,26 @@ __all__ = [] class StackViewParser(BaseParser): def __init__(self, name: str, param_dict: dict): super().__init__(name, param_dict) + self._torch_op_data = [] self._torch_op_node = [] self._root_node = None + self._dequeue_data = [] self._kernel_dict = {} self._metric = param_dict.get("metric") def run(self, deps_data: dict): ProfilerLogger.init(self._profiler_path, "StackViewParser") self.logger = ProfilerLogger.get_instance() + self.logger.info("StackViewParser start.") try: self._torch_op_node = deps_data.get(Constant.TREE_BUILD_PARSER, []) + self._torch_op_data = deps_data.get(Constant.TORCH_OP_PARSER, []) + self._dequeue_data = deps_data.get(Constant.TASK_QUEUE_PARSER, {}).get("dequeue_data", []) self.generate_view() except Exception as e: self.logger.error("Failed to export stack, error: %s", str(e), exc_info=True) return Constant.FAIL, None + self.logger.info("StackViewParser finish.") return Constant.SUCCESS, None def generate_view(self) -> None: @@ -70,14 +75,14 @@ class StackViewParser(BaseParser): def _init_data(self): if not ProfilerPathManager.get_cann_path(self._profiler_path): - self._torch_op_node = FwkFileParser(self._profiler_path).get_torch_op_tree_node(only_fwk=True) + self._torch_op_node = FwkFileParser(self._profiler_path).get_torch_op_tree_node(self._torch_op_data) if not self._torch_op_node: return self._root_node = self._torch_op_node[0] self._torch_op_node = self._torch_op_node[1:] if self._metric == Constant.METRIC_NPU_TIME: - self._kernel_dict = FwkCANNRelationParser(self._profiler_path).get_kernel_dict() + self._kernel_dict = FwkCANNRelationParser(self._profiler_path).get_kernel_dict(self._dequeue_data) if not FwkFileParser(self._profiler_path).has_task_queue_data(): for acl_ts in self._kernel_dict.keys(): TreeBuilder.update_tree_node_info(acl_ts, self._root_node) diff --git a/torch_npu/profiler/analysis/prof_view/_trace_step_time_parser.py b/torch_npu/profiler/analysis/prof_view/_trace_step_time_parser.py index bcdb7d2c6e..19fd2db8cd 100644 --- a/torch_npu/profiler/analysis/prof_view/_trace_step_time_parser.py +++ b/torch_npu/profiler/analysis/prof_view/_trace_step_time_parser.py @@ -50,6 +50,7 @@ class TraceStepTimeParser(BaseParser): def __init__(self, name: str, param_dict: dict): super().__init__(name, param_dict) + self.torch_op_data = [] self.step_range = [] @classmethod @@ -102,7 +103,7 @@ class TraceStepTimeParser(BaseParser): if cur_step[StepInfoIndex.ID.value] == step: first_task_start_ts = cur_step[StepInfoIndex.FIRST_TASK_TS.value] if step is None: - first_fwk_op = FwkFileParser(self._profiler_path).get_first_fwk_op() + first_fwk_op = FwkFileParser(self._profiler_path).get_first_fwk_op(self.torch_op_data) return (first_task_start_ts - convert_ns2us_float(first_fwk_op.ts)) if first_fwk_op else 0 return first_task_start_ts - cur_step[StepInfoIndex.FWK_START_TS.value] return 0 @@ -165,12 +166,15 @@ class TraceStepTimeParser(BaseParser): def run(self, deps_data: dict): ProfilerLogger.init(self._profiler_path, "TraceStepTimeParser") self.logger = ProfilerLogger.get_instance() + self.logger.info("TraceStepTimeParser start.") try: self._init_step_range(deps_data) + self.torch_op_data = deps_data.get(Constant.TORCH_OP_PARSER, []) self.generate_view() except Exception as e: self.logger.error("Failed to generate step_trace_time.csv, error: %s", str(e), exc_info=True) return Constant.FAIL, None + self.logger.info("TraceStepTimeParser finish.") return Constant.SUCCESS, None def generate_view(self) -> None: diff --git a/torch_npu/profiler/analysis/prof_view/_trace_view_parser.py b/torch_npu/profiler/analysis/prof_view/_trace_view_parser.py index c5e572e1bc..2438bac3ec 100644 --- a/torch_npu/profiler/analysis/prof_view/_trace_view_parser.py +++ b/torch_npu/profiler/analysis/prof_view/_trace_view_parser.py @@ -26,6 +26,10 @@ class TraceViewParser(BaseParser): self._output_path) else self._output_path self._trace_data = [] self._torch_op_node = [] + self._torch_op_data = [] + self._enqueue_data = [] + self._dequeue_data = [] + self._python_trace_data = [] self._root_node = None @staticmethod @@ -47,21 +51,29 @@ class TraceViewParser(BaseParser): def run(self, deps_data: dict): ProfilerLogger.init(self._profiler_path, "TraceViewParser") self.logger = ProfilerLogger.get_instance() + self.logger.info("TraceViewParser start.") try: ProfilerConfig().load_info(self._profiler_path) torch_op_node = deps_data.get(Constant.TREE_BUILD_PARSER, []) if torch_op_node: self._root_node = torch_op_node[0] self._torch_op_node = torch_op_node[1:] + self._torch_op_data = deps_data.get(Constant.TORCH_OP_PARSER, []) + task_queue_data = deps_data.get(Constant.TASK_QUEUE_PARSER, {}) + self._enqueue_data = task_queue_data.get("enqueue_data", []) + self._dequeue_data = task_queue_data.get("dequeue_data", []) + self._python_trace_data = deps_data.get(Constant.PYTHON_TRACE_PRE_PARSER, []) self.generate_view() except Exception as e: self.logger.error("Failed to generate trace_view.json, error: %s", str(e), exc_info=True) return Constant.FAIL, None + self.logger.info("TraceViewParser finish.") return Constant.SUCCESS, None def generate_view(self) -> None: if not ProfilerPathManager.get_cann_path(self._profiler_path): - self._trace_data = FwkFileParser(self._profiler_path).get_fwk_trace_data() + self._trace_data = FwkFileParser(self._profiler_path).get_fwk_trace_data( + self._torch_op_data, self._enqueue_data, self._dequeue_data, self._python_trace_data) else: msprof_timeline_data = CANNFileParser(self._profiler_path).get_timeline_all_data() self._trace_data.extend( @@ -86,8 +98,7 @@ class TraceViewParser(BaseParser): flow_event_list.extend( TraceEventManager.create_torch_to_npu_flow(matched_torch_op.event, kernel)) return flow_event_list - dequeue_data_list = FwkFileParser(self._profiler_path).get_dequeue_data() - kernel_dict = FwkCANNRelationParser.combine_kernel_dict(acl_to_npu_dict, dequeue_data_list) + kernel_dict = FwkCANNRelationParser.combine_kernel_dict(acl_to_npu_dict, self._dequeue_data) for torch_op_node in self._torch_op_node: for corr_id in torch_op_node.corr_id_self: kernel_list = kernel_dict.get(corr_id, []) diff --git a/torch_npu/profiler/analysis/prof_view/cann_parse/_cann_analyze.py b/torch_npu/profiler/analysis/prof_view/cann_parse/_cann_analyze.py index 9c1916753f..8a1439b963 100644 --- a/torch_npu/profiler/analysis/prof_view/cann_parse/_cann_analyze.py +++ b/torch_npu/profiler/analysis/prof_view/cann_parse/_cann_analyze.py @@ -38,6 +38,7 @@ class CANNAnalyzeParser(BaseParser): def run(self, deps_data: dict): ProfilerLogger.init(self._profiler_path, "CANNAnalyzeParser") self.logger = ProfilerLogger.get_instance() + self.logger.info("CANNAnalyzeParser start.") try: ProfilerConfig().load_info(self._profiler_path) if not os.path.isdir(self._cann_path): @@ -63,4 +64,5 @@ class CANNAnalyzeParser(BaseParser): print_error_msg("Failed to analyze CANN Profiling data.") self.logger.error("Failed to analyze CANN Profiling data, error: %s", str(e), exc_info=True) return Constant.FAIL, None + self.logger.info("CANNAnalyzeParser finish.") return Constant.SUCCESS, None diff --git a/torch_npu/profiler/analysis/prof_view/cann_parse/_cann_export.py b/torch_npu/profiler/analysis/prof_view/cann_parse/_cann_export.py index 278bdbb7f9..d6e409d152 100644 --- a/torch_npu/profiler/analysis/prof_view/cann_parse/_cann_export.py +++ b/torch_npu/profiler/analysis/prof_view/cann_parse/_cann_export.py @@ -47,6 +47,7 @@ class CANNExportParser(BaseParser): def run(self, deps_data: dict): ProfilerLogger.init(self._profiler_path, "CANNExportParser") self.logger = ProfilerLogger.get_instance() + self.logger.info("CANNExportParser start.") try: ProfilerConfig().load_info(self._profiler_path) if not os.path.isdir(self._cann_path): @@ -74,6 +75,7 @@ class CANNExportParser(BaseParser): return Constant.FAIL, None end_time = datetime.utcnow() print_info_msg(f"CANN profiling data parsed in a total time of {end_time - start_time}") + self.logger.info("CANNExportParser finish.") return Constant.SUCCESS, None def _check_msprof_environment(self): @@ -143,6 +145,9 @@ class CANNTimelineParser(BaseParser): self._cann_path = ProfilerPathManager.get_cann_path(self._profiler_path) def run(self, deps_data: dict): + ProfilerLogger.init(self._profiler_path, "CANNTimelineParser") + self.logger = ProfilerLogger.get_instance() + self.logger.info("CANNTimelineParser start.") if not os.path.isdir(self._cann_path): return Constant.SUCCESS, None ProfilerConfig().load_info(self._profiler_path) @@ -152,6 +157,7 @@ class CANNTimelineParser(BaseParser): if os.path.exists(output_path): for file_name in os.listdir(output_path): if file_name.endswith('.csv'): + self.logger.info("CANNTimelineParser finish.") return Constant.SUCCESS, None try: time.sleep(Constant.SLEEP_TIME) @@ -162,6 +168,7 @@ class CANNTimelineParser(BaseParser): while True: for file in os.listdir(self._cann_path): if re.match(patten, file) and os.path.isfile(os.path.join(self._cann_path, file)): + self.logger.info("CANNTimelineParser finish.") return Constant.SUCCESS, None try: time.sleep(Constant.SLEEP_TIME) diff --git a/torch_npu/profiler/analysis/prof_view/prepare_parse/_fwk_pre_parser.py b/torch_npu/profiler/analysis/prof_view/prepare_parse/_fwk_pre_parser.py index a54ec86d40..cce9455f7d 100644 --- a/torch_npu/profiler/analysis/prof_view/prepare_parse/_fwk_pre_parser.py +++ b/torch_npu/profiler/analysis/prof_view/prepare_parse/_fwk_pre_parser.py @@ -17,8 +17,10 @@ import os from ...prof_common_func._constant import Constant from ...prof_common_func._file_manager import FileManager +from ...prof_common_func._file_tag import FileTag from ...prof_common_func._log import ProfilerLogger from ...prof_parse._fwk_file_parser import FwkFileParser +from ...prof_parse._python_trace_parser import PythonTraceParser from ...prof_view._base_parser import BaseParser __all__ = [] @@ -32,14 +34,22 @@ class TracePreParser(BaseParser): def run(self, deps_data: dict): ProfilerLogger.init(self._profiler_path, "TracePreParser") self.logger = ProfilerLogger.get_instance() + self.logger.info("TracePreParser start.") try: - fwk_trace_data = FwkFileParser(self._profiler_path).get_fwk_trace_data() + torch_op_data = deps_data.get(Constant.TORCH_OP_PARSER, []) + task_queue_data = deps_data.get(Constant.TASK_QUEUE_PARSER, {}) + enqueue_data = task_queue_data.get("enqueue_data", []) + dequeue_data = task_queue_data.get("dequeue_data", []) + python_trace_data = deps_data.get(Constant.PYTHON_TRACE_PRE_PARSER, []) + fwk_trace_data = FwkFileParser(self._profiler_path).get_fwk_trace_data( + torch_op_data, enqueue_data, dequeue_data, python_trace_data) trace_file_path = os.path.join(self._output_path, Constant.TRACE_VIEW_TEMP) if os.path.isdir( self._output_path) else self._output_path FileManager.create_prepare_trace_json_by_path(trace_file_path, fwk_trace_data) except Exception as e: self.logger.error("Failed to create prepare trace json, error: %s", str(e), exc_info=True) return Constant.FAIL, None + self.logger.info("TracePreParser finish.") return Constant.SUCCESS, None @@ -47,13 +57,83 @@ class TreeBuildParser(BaseParser): def __init__(self, name: str, param_dict: dict): super().__init__(name, param_dict) - ProfilerLogger.init(self._profiler_path, "TracePreParser") - self.logger = ProfilerLogger.get_instance() def run(self, deps_data: dict): + ProfilerLogger.init(self._profiler_path, "TreeBuildParser") + self.logger = ProfilerLogger.get_instance() + self.logger.info("TreeBuildParser start.") + enqueue_data = deps_data.get(Constant.TASK_QUEUE_PARSER, {}).get("enqueue_data", []) + torch_op_data = deps_data.get(Constant.TORCH_OP_PARSER, []) try: - torch_op_node = FwkFileParser(self._profiler_path).get_torch_op_tree_node() + torch_op_node = FwkFileParser(self._profiler_path).get_torch_op_tree_node(torch_op_data, enqueue_data) except Exception as e: self.logger.error("Failed to build torch op tree, error: %s", str(e), exc_info=True) return Constant.FAIL, [] + self.logger.info("TreeBuildParser finish.") return Constant.SUCCESS, torch_op_node + + +class TaskQueueParser(BaseParser): + + def __init__(self, name: str, param_dict: dict): + super().__init__(name, param_dict) + + def run(self, deps_data: dict): + ProfilerLogger.init(self._profiler_path, "TaskQueueParser") + self.logger = ProfilerLogger.get_instance() + self.logger.info("TaskQueueParser start.") + try: + enqueue_data, dequeue_data = FwkFileParser(self._profiler_path).get_task_queue_data() + except Exception as e: + self.logger.error("Failed to get task queue data, error: %s", str(e), exc_info=True) + return Constant.FAIL, {} + self.logger.info("TaskQueueParser finish.") + return Constant.SUCCESS, {"enqueue_data": enqueue_data, "dequeue_data": dequeue_data} + + +class TorchOpParser(BaseParser): + + def __init__(self, name: str, param_dict: dict): + super().__init__(name, param_dict) + + def run(self, deps_data: dict): + ProfilerLogger.init(self._profiler_path, "TorchOpParser") + self.logger = ProfilerLogger.get_instance() + self.logger.info("TorchOpParser start.") + try: + torch_op_data = FwkFileParser(self._profiler_path).get_file_data_by_tag(FileTag.TORCH_OP) + except Exception as e: + self.logger.error("Failed to get torch op tree, error: %s", str(e), exc_info=True) + return Constant.FAIL, [] + self.logger.info("TorchOpParser finish.") + return Constant.SUCCESS, torch_op_data + + +class PythonTracePreParser(BaseParser): + + def __init__(self, name: str, param_dict: dict): + super().__init__(name, param_dict) + + def run(self, deps_data: dict): + ProfilerLogger.init(self._profiler_path, "PythonTracePreParser") + self.logger = ProfilerLogger.get_instance() + self.logger.info("PythonTracePreParser start.") + try: + fwk_file_parser = FwkFileParser(self._profiler_path) + trace_hash_data = fwk_file_parser.get_file_data_by_tag(FileTag.PYTHON_TRACER_HASH) + func_call_data = fwk_file_parser.get_file_data_by_tag(FileTag.PYTHON_TRACER_FUNC) + if not (trace_hash_data and func_call_data): + return Constant.SUCCESS, [] + torch_op_data = deps_data.get(Constant.TORCH_OP_PARSER, []) + task_queue_data = deps_data.get(Constant.TASK_QUEUE_PARSER, {}) + enqueue_data = task_queue_data.get("enqueue_data", []) + dequeue_data = task_queue_data.get("dequeue_data", []) + torch_op_tids = fwk_file_parser.get_torch_op_tids(torch_op_data) + task_queue_tids = fwk_file_parser.get_task_queue_tids(enqueue_data, dequeue_data) + python_trace_parser = PythonTraceParser(torch_op_tids | task_queue_tids, trace_hash_data, func_call_data) + python_tracer_data = python_trace_parser.gen_python_trace_event_data() + except Exception as e: + self.logger.error("Failed to get python tracer data, error: %s", str(e), exc_info=True) + return Constant.FAIL, [] + self.logger.info("PythonTracePreParser finish.") + return Constant.SUCCESS, python_tracer_data diff --git a/torch_npu/profiler/analysis/prof_view/prepare_parse/_relation_parser.py b/torch_npu/profiler/analysis/prof_view/prepare_parse/_relation_parser.py index 27437eaa65..f48c50713b 100644 --- a/torch_npu/profiler/analysis/prof_view/prepare_parse/_relation_parser.py +++ b/torch_npu/profiler/analysis/prof_view/prepare_parse/_relation_parser.py @@ -23,13 +23,17 @@ __all__ = [] class RelationParser(BaseParser): def __init__(self, name: str, param_dict: dict): super().__init__(name, param_dict) + self._dequeue_data = [] def run(self, deps_data: dict): ProfilerLogger.init(self._profiler_path, "RelationParser") self.logger = ProfilerLogger.get_instance() + self.logger.info("RelationParser start.") + self._dequeue_data = deps_data.get(Constant.TASK_QUEUE_PARSER, {}).get("dequeue_data", []) try: - kernel_dict = FwkCANNRelationParser(self._profiler_path).get_kernel_dict() + kernel_dict = FwkCANNRelationParser(self._profiler_path).get_kernel_dict(self._dequeue_data) except Exception as e: self.logger.error("Failed to get acl to npu flow dict, error: %s", str(e), exc_info=True) return Constant.FAIL, {} + self.logger.info("RelationParser finish.") return Constant.SUCCESS, kernel_dict diff --git a/torch_npu/profiler/analysis/prof_view/prof_db_parse/_basic_db_parser.py b/torch_npu/profiler/analysis/prof_view/prof_db_parse/_basic_db_parser.py index 940fa6417d..0726e59920 100644 --- a/torch_npu/profiler/analysis/prof_view/prof_db_parse/_basic_db_parser.py +++ b/torch_npu/profiler/analysis/prof_view/prof_db_parse/_basic_db_parser.py @@ -24,6 +24,7 @@ class BasicDbParser(BaseParser): self.logger = ProfilerLogger.get_instance() def run(self, deps_data: dict): + self.logger.info("BasicDbParser start.") try: cann_db_path = self.get_cann_db_path() if cann_db_path: @@ -36,6 +37,7 @@ class BasicDbParser(BaseParser): except Exception as error: self.logger.error("Failed to generate basic db file. Error: %s", str(error), exc_info=True) return Constant.FAIL, "" + self.logger.info("BasicDbParser finish.") return Constant.SUCCESS, "" def get_cann_db_path(self): diff --git a/torch_npu/profiler/analysis/prof_view/prof_db_parse/_db_parser.py b/torch_npu/profiler/analysis/prof_view/prof_db_parse/_db_parser.py index 89cc322980..83cc061c6a 100644 --- a/torch_npu/profiler/analysis/prof_view/prof_db_parse/_db_parser.py +++ b/torch_npu/profiler/analysis/prof_view/prof_db_parse/_db_parser.py @@ -37,6 +37,7 @@ class DbParser(BaseParser): self.logger = ProfilerLogger.get_instance() def run(self, deps_data: dict): + self.logger.info("DbParser start.") ProfilerConfig().load_info(self._profiler_path) torch_db_path = DbConstant.DB_ASCEND_PYTORCH_PROFILER if ProfilerConfig().rank_id != -1: @@ -60,4 +61,5 @@ class DbParser(BaseParser): finally: TorchDb().close() AnalysisDb().close() + self.logger.info("DbParser finish.") return Constant.SUCCESS, "" diff --git a/torch_npu/profiler/analysis/prof_view/prof_db_parse/_fwk_api_db_parser.py b/torch_npu/profiler/analysis/prof_view/prof_db_parse/_fwk_api_db_parser.py index 2ae2ac6474..eae0578621 100644 --- a/torch_npu/profiler/analysis/prof_view/prof_db_parse/_fwk_api_db_parser.py +++ b/torch_npu/profiler/analysis/prof_view/prof_db_parse/_fwk_api_db_parser.py @@ -61,16 +61,26 @@ class FwkApiDbParser(BaseParser): self.logger = ProfilerLogger.get_instance() def run(self, deps_data: dict): + self.logger.info("FwkApiDbParser start.") try: self.init_db_connect() self.set_start_string_id() self.get_max_cann_id() - fwk_api_data = FwkFileParser(self._profiler_path).get_fwk_api() + torch_op_data = deps_data.get(Constant.TORCH_OP_PARSER, []) + task_queue_data = deps_data.get(Constant.TASK_QUEUE_PARSER, {}) + enqueue_data = task_queue_data.get("enqueue_data", []) + dequeue_data = task_queue_data.get("dequeue_data", []) + python_trace_data = deps_data.get(Constant.PYTHON_TRACE_PRE_PARSER, []) + fwk_api_data = FwkFileParser(self._profiler_path).get_fwk_api( + torch_op_data, enqueue_data, dequeue_data, python_trace_data) + self.logger.info("FwkApiDbParser get fwk api data.") self.get_api_data_for_db(fwk_api_data) + self.logger.info("FwkApiDbParser get api data for db.") self.save_api_data_to_db() except Exception as error: self.logger.error("Failed to generate framework api table, error: %s", str(error), exc_info=True) return Constant.FAIL, None + self.logger.info("FwkApiDbParser finish.") return Constant.SUCCESS, None def get_api_data_for_db(self, fwk_api_data: dict): @@ -78,21 +88,24 @@ class FwkApiDbParser(BaseParser): return task_enqueues = fwk_api_data.get("task_enqueues", []) task_dequeues = fwk_api_data.get("task_dequeues", []) + connectionId_manager = ConnectionIdManager() + str2Id_manager = Str2IdManager() + callchainId_manager = CallChainIdManager() for enqueue in task_enqueues: self._fwk_apis.append([enqueue[TaskQueueDataOri.START_NS.value], enqueue[TaskQueueDataOri.END_NS.value], enqueue[TaskQueueDataOri.GLOBAL_TID.value], - ConnectionIdManager().get_id_from_connection_ids( + connectionId_manager.get_id_from_connection_ids( [enqueue[TaskQueueDataOri.CORRELATION_ID.value] + self._max_cann_connection_id]), - Str2IdManager().get_id_from_str(enqueue[TaskQueueDataOri.NAME.value]), + str2Id_manager.get_id_from_str(enqueue[TaskQueueDataOri.NAME.value]), None, None, None, None, None, ApiType.TASK_QUEUE.value]) for dequeue in task_dequeues: self._fwk_apis.append([dequeue[TaskQueueDataOri.START_NS.value], dequeue[TaskQueueDataOri.END_NS.value], dequeue[TaskQueueDataOri.GLOBAL_TID.value], - ConnectionIdManager().get_id_from_connection_ids( + connectionId_manager.get_id_from_connection_ids( [dequeue[TaskQueueDataOri.CORRELATION_ID.value] + self._max_cann_connection_id]), - Str2IdManager().get_id_from_str(dequeue[TaskQueueDataOri.NAME.value]), + str2Id_manager.get_id_from_str(dequeue[TaskQueueDataOri.NAME.value]), None, None, None, None, None, ApiType.TASK_QUEUE.value]) python_trace_apis = fwk_api_data.get("python_trace", []) for python_trace_api in python_trace_apis: @@ -100,7 +113,7 @@ class FwkApiDbParser(BaseParser): python_trace_api[PythonTraceApiDataOri.END_NS.value], python_trace_api[PythonTraceApiDataOri.GLOBAL_TID.value], None, - Str2IdManager().get_id_from_str(python_trace_api[PythonTraceApiDataOri.NAME.value]), + str2Id_manager.get_id_from_str(python_trace_api[PythonTraceApiDataOri.NAME.value]), None, None, None, None, None, ApiType.PYTHON_TRACE.value]) torch_op_apis = fwk_api_data.get("torch_op", []) if not torch_op_apis: @@ -116,13 +129,13 @@ class FwkApiDbParser(BaseParser): self._fwk_apis.append([torch_op_api[TorchOpDataOri.START_NS.value], torch_op_api[TorchOpDataOri.END_NS.value], torch_op_api[TorchOpDataOri.GLOBAL_TID.value], - None if not torch_op_api[TorchOpDataOri.CONNECTION_ID.value] else ConnectionIdManager().get_id_from_connection_ids(torch_op_api[TorchOpDataOri.CONNECTION_ID.value]), - Str2IdManager().get_id_from_str(torch_op_api[TorchOpDataOri.NAME.value]), + None if not torch_op_api[TorchOpDataOri.CONNECTION_ID.value] else connectionId_manager.get_id_from_connection_ids(torch_op_api[TorchOpDataOri.CONNECTION_ID.value]), + str2Id_manager.get_id_from_str(torch_op_api[TorchOpDataOri.NAME.value]), torch_op_api[TorchOpDataOri.SEQUENCE_NUM.value], torch_op_api[TorchOpDataOri.FWD_THREAD_ID.value], - None if not torch_op_api[TorchOpDataOri.INPUT_DIMS.value] else Str2IdManager().get_id_from_str(torch_op_api[TorchOpDataOri.INPUT_DIMS.value]), - None if not torch_op_api[TorchOpDataOri.INPUT_SHAPES.value] else Str2IdManager().get_id_from_str(torch_op_api[TorchOpDataOri.INPUT_SHAPES.value]), - None if not torch_op_api[TorchOpDataOri.CALL_STACK.value] else CallChainIdManager().get_callchain_id_from_callstack(torch_op_api[TorchOpDataOri.CALL_STACK.value]), + None if not torch_op_api[TorchOpDataOri.INPUT_DIMS.value] else str2Id_manager.get_id_from_str(torch_op_api[TorchOpDataOri.INPUT_DIMS.value]), + None if not torch_op_api[TorchOpDataOri.INPUT_SHAPES.value] else str2Id_manager.get_id_from_str(torch_op_api[TorchOpDataOri.INPUT_SHAPES.value]), + None if not torch_op_api[TorchOpDataOri.CALL_STACK.value] else callchainId_manager.get_callchain_id_from_callstack(torch_op_api[TorchOpDataOri.CALL_STACK.value]), ApiType.TORCH_OP.value]) mstx_mark_apis = fwk_api_data.get("mstx_op", []) if not mstx_mark_apis: @@ -130,8 +143,8 @@ class FwkApiDbParser(BaseParser): self.get_mstx_mark_op_connection_ids_with_cann_api(task_enqueues, task_dequeues, mstx_mark_apis) for mstx_mark_api in mstx_mark_apis: self._fwk_apis.append([mstx_mark_api[TorchOpDataOri.START_NS.value], mstx_mark_api[TorchOpDataOri.END_NS.value], mstx_mark_api[TorchOpDataOri.GLOBAL_TID.value], - None if not mstx_mark_api[TorchOpDataOri.CONNECTION_ID.value] else ConnectionIdManager().get_id_from_connection_ids(mstx_mark_api[TorchOpDataOri.CONNECTION_ID.value]), - Str2IdManager().get_id_from_str(mstx_mark_api[TorchOpDataOri.NAME.value]), + None if not mstx_mark_api[TorchOpDataOri.CONNECTION_ID.value] else connectionId_manager.get_id_from_connection_ids(mstx_mark_api[TorchOpDataOri.CONNECTION_ID.value]), + str2Id_manager.get_id_from_str(mstx_mark_api[TorchOpDataOri.NAME.value]), None, mstx_mark_api[TorchOpDataOri.FWD_THREAD_ID.value], None, None, None, ApiType.MSTX_OP.value]) diff --git a/torch_npu/profiler/analysis/prof_view/prof_db_parse/_gc_record_db_parser.py b/torch_npu/profiler/analysis/prof_view/prof_db_parse/_gc_record_db_parser.py index a570e909e3..b464241f7c 100644 --- a/torch_npu/profiler/analysis/prof_view/prof_db_parse/_gc_record_db_parser.py +++ b/torch_npu/profiler/analysis/prof_view/prof_db_parse/_gc_record_db_parser.py @@ -30,6 +30,7 @@ class GCRecordDbParser(BaseParser): self.logger = ProfilerLogger.get_instance() def run(self, deps_data: dict): + self.logger.info("GCRecordDbParser start.") try: self.init_db_connect() self._gc_record_data = FwkFileParser(self._profiler_path).get_gc_record_db_data() @@ -37,6 +38,7 @@ class GCRecordDbParser(BaseParser): except Exception as error: self.logger.error("Failed to generate gc record table, error: %s", str(error), exc_info=True) return Constant.FAIL, None + self.logger.info("GCRecordDbParser finish.") return Constant.SUCCESS, None def init_db_connect(self) -> None: diff --git a/torch_npu/profiler/analysis/prof_view/prof_db_parse/_memory_db_parser.py b/torch_npu/profiler/analysis/prof_view/prof_db_parse/_memory_db_parser.py index 34a5fc27f8..cd7482e803 100644 --- a/torch_npu/profiler/analysis/prof_view/prof_db_parse/_memory_db_parser.py +++ b/torch_npu/profiler/analysis/prof_view/prof_db_parse/_memory_db_parser.py @@ -60,6 +60,7 @@ class MemoryDbParser(BaseParser): self._pta_record_list = [] self._ge_record_list = [] self._record_list = [] + self._torch_op_data = [] ProfilerLogger.init(self._profiler_path, "MemoryDbParser") self.logger = ProfilerLogger.get_instance() @@ -77,16 +78,19 @@ class MemoryDbParser(BaseParser): return [cur_record, pta_ge_record_list] def run(self, deps_data: dict): + self.logger.info("MemoryDbParser start.") try: self.init_db_connect() self.set_start_string_id() self._pta_op_memory_data = deps_data.get(Constant.MEMORY_PREPARE, {}).get("memory_data", {}).get(Constant.Db, []) self._pta_memory_bean_list = deps_data.get(Constant.MEMORY_PREPARE, {}).get("pta_record_list", []) + self._torch_op_data = deps_data.get(Constant.TORCH_OP_PARSER, []) self.init_pta_memory_data() self.save_memory_data_to_db() except Exception as error: self.logger.error("Failed to generate memory_record table or op_memory table, error: %s", str(error), exc_info=True) return Constant.FAIL, None + self.logger.info("MemoryDbParser finish.") return Constant.SUCCESS, None def init_db_connect(self): @@ -140,14 +144,16 @@ class MemoryDbParser(BaseParser): operator_value = namedtuple('operator_value', ['size', 'timestamp', 'total_allocate', 'total_reserve']) for mem_record in ge_mem_records: if mem_record[GeOpMemRecordsOri.SIZE.value] > 0: - record_key = operator_key(operator=mem_record[GeOpMemRecordsOri.NAME.value], addr=mem_record[GeOpMemRecordsOri.ADDR.value], device_id=mem_record[GeOpMemRecordsOri.DEVICE_ID.value]) + record_key = operator_key(operator=mem_record[GeOpMemRecordsOri.NAME.value], + addr=mem_record[GeOpMemRecordsOri.ADDR.value], device_id=mem_record[GeOpMemRecordsOri.DEVICE_ID.value]) record_value = operator_value(size=mem_record[GeOpMemRecordsOri.SIZE.value], timestamp=mem_record[GeOpMemRecordsOri.TIME_STAMP.value], total_allocate=mem_record[GeOpMemRecordsOri.TOTAL_ALLOCATED.value], total_reserve=mem_record[GeOpMemRecordsOri.TOTAL_RESERVED.value]) allocated_datas[record_key] = record_value elif mem_record[GeOpMemRecordsOri.SIZE.value] < 0: - record_key = operator_key(operator=mem_record[GeOpMemRecordsOri.NAME.value], addr=mem_record[GeOpMemRecordsOri.ADDR.value], device_id=mem_record[GeOpMemRecordsOri.DEVICE_ID.value]) + record_key = operator_key(operator=mem_record[GeOpMemRecordsOri.NAME.value], + addr=mem_record[GeOpMemRecordsOri.ADDR.value], device_id=mem_record[GeOpMemRecordsOri.DEVICE_ID.value]) record_value = operator_value(size=mem_record[GeOpMemRecordsOri.SIZE.value], timestamp=mem_record[GeOpMemRecordsOri.TIME_STAMP.value], total_allocate=mem_record[GeOpMemRecordsOri.TOTAL_ALLOCATED.value], @@ -238,7 +244,7 @@ class MemoryDbParser(BaseParser): def init_pta_memory_data(self): if not ProfilerPathManager.get_cann_path(self._profiler_path): - torch_nop_node = FwkFileParser(self._profiler_path).get_torch_op_tree_node(only_fwk=True) + torch_nop_node = FwkFileParser(self._profiler_path).get_torch_op_tree_node(self._torch_op_data) deps_data = {Constant.TREE_BUILD_PARSER: torch_nop_node} _, pta_data = MemoryPrepareParser(Constant.MEMORY_PREPARE, self._param_dict).run(deps_data) self._pta_op_memory_data = pta_data.get("memory_data", {}).get(Constant.Db, []) diff --git a/torch_npu/profiler/analysis/prof_view/prof_db_parse/_step_info_db_parser.py b/torch_npu/profiler/analysis/prof_view/prof_db_parse/_step_info_db_parser.py index df3b8fea4f..3c6e5674eb 100644 --- a/torch_npu/profiler/analysis/prof_view/prof_db_parse/_step_info_db_parser.py +++ b/torch_npu/profiler/analysis/prof_view/prof_db_parse/_step_info_db_parser.py @@ -31,12 +31,14 @@ class StepInfoDbParser(BaseParser): self.logger = ProfilerLogger.get_instance() def run(self, deps_data: dict): + self.logger.info("StepInfoDbParser start.") try: torch_op_node = deps_data.get(Constant.TREE_BUILD_PARSER, []) step_range = self.get_step_range(torch_op_node[0] if torch_op_node else None) except Exception as error: self.logger.error("Failed to get step info from db, error: %s", str(error), exc_info=True) return Constant.FAIL, [] + self.logger.info("StepInfoDbParser finish.") return Constant.SUCCESS, step_range def get_api_data_in_time_range(self, begin_ts, end_ts) -> list: diff --git a/torch_npu/profiler/analysis/prof_view/prof_db_parse/_trace_step_time_db_parser.py b/torch_npu/profiler/analysis/prof_view/prof_db_parse/_trace_step_time_db_parser.py index db82064fde..b13da495ec 100644 --- a/torch_npu/profiler/analysis/prof_view/prof_db_parse/_trace_step_time_db_parser.py +++ b/torch_npu/profiler/analysis/prof_view/prof_db_parse/_trace_step_time_db_parser.py @@ -37,6 +37,7 @@ class TraceStepTimeDbParser(BaseParser): def __init__(self, name: str, param_dict: dict): super().__init__(name, param_dict) + self.torch_op_data = [] self.step_range = [] self.compute_task_info = defaultdict(list) self.communication_op_info = defaultdict(list) @@ -58,7 +59,7 @@ class TraceStepTimeDbParser(BaseParser): if not first_task_start_ts: return 0 if step_info.get(Constant.STEP_ID) is None: - first_fwk_op = FwkFileParser(self._profiler_path).get_first_fwk_op() + first_fwk_op = FwkFileParser(self._profiler_path).get_first_fwk_op(self.torch_op_data) return (first_task_start_ts - first_fwk_op.ts) if first_fwk_op else 0 return first_task_start_ts - step_info.get(Constant.FWK_START_TS, 0) @@ -71,13 +72,16 @@ class TraceStepTimeDbParser(BaseParser): AnalysisDb().insert_data_into_table(DbConstant.TABLE_STEP_TRACE_TIME, step_trace_data) def run(self, deps_data: dict): + self.logger.info("TraceStepTimeDbParser start.") try: + self.torch_op_data = deps_data.get(Constant.TORCH_OP_PARSER, []) self._init_step_range(deps_data) self._init_task_info_from_db() self.generate_view() except Exception as error: self.logger.error("Failed to generate step_trace_time table, error: %s", str(error), exc_info=True) return Constant.FAIL, None + self.logger.info("TraceStepTimeDbParser finish.") return Constant.SUCCESS, None def generate_view(self) -> None: -- Gitee From 8784d1bfb65496a2098be6d38c824d4ddcc2acbc Mon Sep 17 00:00:00 2001 From: tangmengcheng <745274877@qq.com> Date: Sun, 10 Aug 2025 17:02:25 +0800 Subject: [PATCH 2/3] python trace event optima --- .../prof_bean/_python_tracer_func_bean.py | 31 ++++++++++++++----- .../prof_bean/_python_tracer_hash_bean.py | 14 +++++++-- 2 files changed, 34 insertions(+), 11 deletions(-) diff --git a/torch_npu/profiler/analysis/prof_bean/_python_tracer_func_bean.py b/torch_npu/profiler/analysis/prof_bean/_python_tracer_func_bean.py index 94d0b46115..aff1f35463 100644 --- a/torch_npu/profiler/analysis/prof_bean/_python_tracer_func_bean.py +++ b/torch_npu/profiler/analysis/prof_bean/_python_tracer_func_bean.py @@ -1,6 +1,7 @@ import struct from enum import Enum from .._profiler_config import ProfilerConfig +from ..prof_common_func._constant import Constant __all__ = [] @@ -16,32 +17,46 @@ class PythonTracerFuncEnum(Enum): class PythonTracerFuncBean: CONSTANT_STRUCT = "<4QB" + _constant_unpacker = struct.Struct(CONSTANT_STRUCT) - def __init__(self, data): - self._constant_data = struct.unpack(self.CONSTANT_STRUCT, data) - self._start_ns = ProfilerConfig().get_local_time( - ProfilerConfig().get_timestamp_from_syscnt(self._constant_data[PythonTracerFuncEnum.START_NS.value])) - self._tid = int(self._constant_data[PythonTracerFuncEnum.THREAD_ID.value]) - self._pid = int(self._constant_data[PythonTracerFuncEnum.PROCESS_ID.value]) - self._key = int(self._constant_data[PythonTracerFuncEnum.HASH_KEY.value]) - self._trace_tag = int(self._constant_data[PythonTracerFuncEnum.TRACE_TAG.value]) + def __init__(self, data: dict): + self._origin_data = data + self._constant_data = self._constant_unpacker.unpack(data) + + self._start_ns = None + self._tid = None + self._pid = None + self._key = None + self._trace_tag = None @property def start_ns(self) -> int: + if self._start_ns is None: + cfg = ProfilerConfig() + start_syscnt = self._constant_data[PythonTracerFuncEnum.START_NS.value] + self._start_ns = cfg.get_local_time(cfg.get_timestamp_from_syscnt(start_syscnt)) return self._start_ns @property def tid(self) -> int: + if self._tid is None: + self._tid = self._constant_data[PythonTracerFuncEnum.THREAD_ID.value] return self._tid @property def pid(self) -> int: + if self._pid is None: + self._pid = self._constant_data[PythonTracerFuncEnum.PROCESS_ID.value] return self._pid @property def trace_tag(self) -> int: + if self._trace_tag is None: + self._trace_tag = self._constant_data[PythonTracerFuncEnum.TRACE_TAG.value] return self._trace_tag @property def key(self) -> int: + if self._key is None: + self._key = self._constant_data[PythonTracerFuncEnum.HASH_KEY.value] return self._key diff --git a/torch_npu/profiler/analysis/prof_bean/_python_tracer_hash_bean.py b/torch_npu/profiler/analysis/prof_bean/_python_tracer_hash_bean.py index c80b173455..3e89d779a5 100644 --- a/torch_npu/profiler/analysis/prof_bean/_python_tracer_hash_bean.py +++ b/torch_npu/profiler/analysis/prof_bean/_python_tracer_hash_bean.py @@ -12,20 +12,28 @@ class PythonTracerHashEnum(Enum): class PythonTracerHashBean: CONSTANT_STRUCT = " int: + if self._key is None: + self._key = self._constant_data[PythonTracerHashEnum.KEY.value] return self._key @property def value(self) -> str: + if self._value is None: + self._value = self._origin_data.get(self._VALUE_ID, "") return self._value -- Gitee From 2458c6851a91d690fa2b7dcbdfba41f97df734dc Mon Sep 17 00:00:00 2001 From: tangmengcheng <745274877@qq.com> Date: Mon, 11 Aug 2025 22:37:15 +0800 Subject: [PATCH 3/3] python trace optima --- .../profiler/analysis/prof_config/_parser_config.py | 6 ------ .../analysis/prof_config/_parser_deps_config.py | 11 +++-------- .../profiler/analysis/prof_view/_trace_view_parser.py | 1 - .../prof_view/prepare_parse/_fwk_pre_parser.py | 2 +- .../prof_view/prof_db_parse/_fwk_api_db_parser.py | 3 ++- 5 files changed, 6 insertions(+), 17 deletions(-) diff --git a/torch_npu/profiler/analysis/prof_config/_parser_config.py b/torch_npu/profiler/analysis/prof_config/_parser_config.py index 0789ecd834..d0c9573740 100644 --- a/torch_npu/profiler/analysis/prof_config/_parser_config.py +++ b/torch_npu/profiler/analysis/prof_config/_parser_config.py @@ -18,7 +18,6 @@ from ..prof_view.cann_parse._cann_analyze import CANNAnalyzeParser from ..prof_view.cann_parse._cann_export import CANNExportParser, CANNTimelineParser from ..prof_view._memory_prepare_parser import MemoryPrepareParser from ..prof_view.prepare_parse._fwk_pre_parser import ( - PythonTracePreParser, TracePreParser, TreeBuildParser, TaskQueueParser, @@ -45,7 +44,6 @@ class ParserConfig: Constant.TENSORBOARD_TRACE_HANDLER: [ TorchOpParser, TaskQueueParser, - PythonTracePreParser, TracePreParser, TreeBuildParser, CANNExportParser, @@ -63,7 +61,6 @@ class ParserConfig: Constant.TENSORBOARD_TRACE_HANDLER: [ TorchOpParser, TaskQueueParser, - PythonTracePreParser, CANNExportParser, CANNTimelineParser, CANNAnalyzeParser, @@ -79,7 +76,6 @@ class ParserConfig: Constant.TENSORBOARD_TRACE_HANDLER: [ TorchOpParser, TaskQueueParser, - PythonTracePreParser, TracePreParser, TreeBuildParser, CANNExportParser, @@ -105,7 +101,6 @@ class ParserConfig: Constant.TENSORBOARD_TRACE_HANDLER: [ TorchOpParser, TaskQueueParser, - PythonTracePreParser, CANNExportParser, CANNTimelineParser, CANNAnalyzeParser, @@ -132,7 +127,6 @@ class ParserConfig: # text parser TorchOpParser: Constant.TORCH_OP_PARSER, TaskQueueParser: Constant.TASK_QUEUE_PARSER, - PythonTracePreParser: Constant.PYTHON_TRACE_PRE_PARSER, TracePreParser: Constant.TRACE_PRE_PARSER, TreeBuildParser: Constant.TREE_BUILD_PARSER, CANNExportParser: Constant.CANN_EXPORT_PARSER, diff --git a/torch_npu/profiler/analysis/prof_config/_parser_deps_config.py b/torch_npu/profiler/analysis/prof_config/_parser_deps_config.py index 267ce7207d..2324843442 100644 --- a/torch_npu/profiler/analysis/prof_config/_parser_deps_config.py +++ b/torch_npu/profiler/analysis/prof_config/_parser_deps_config.py @@ -23,11 +23,8 @@ class ParserDepsConfig: COMMON_CONFIG = { Constant.TORCH_OP_PARSER: {Constant.MODE: ConcurrentMode.PTHREAD, Constant.DEPS: []}, Constant.TASK_QUEUE_PARSER: {Constant.MODE: ConcurrentMode.PTHREAD, Constant.DEPS: []}, - Constant.PYTHON_TRACE_PRE_PARSER: {Constant.MODE: ConcurrentMode.PTHREAD, - Constant.DEPS: [Constant.TORCH_OP_PARSER, Constant.TASK_QUEUE_PARSER]}, Constant.TRACE_PRE_PARSER: {Constant.MODE: ConcurrentMode.SUB_PROCESS, - Constant.DEPS: [Constant.TORCH_OP_PARSER, Constant.TASK_QUEUE_PARSER, - Constant.PYTHON_TRACE_PRE_PARSER]}, + Constant.DEPS: [Constant.TORCH_OP_PARSER, Constant.TASK_QUEUE_PARSER]}, Constant.TREE_BUILD_PARSER: {Constant.MODE: ConcurrentMode.PTHREAD, Constant.DEPS: [Constant.TORCH_OP_PARSER, Constant.TASK_QUEUE_PARSER]}, Constant.CANN_EXPORT_PARSER: {Constant.MODE: ConcurrentMode.SUB_PROCESS, Constant.DEPS: []}, @@ -42,8 +39,7 @@ class ParserDepsConfig: Constant.RELATION_PARSER, Constant.TORCH_OP_PARSER]}, Constant.TRACE_VIEW_PARSER: {Constant.MODE: ConcurrentMode.SUB_PROCESS, Constant.DEPS: [Constant.TREE_BUILD_PARSER, Constant.TRACE_PRE_PARSER, - Constant.CANN_TIMELINE_PARSER, Constant.TASK_QUEUE_PARSER, - Constant.PYTHON_TRACE_PRE_PARSER]}, + Constant.CANN_TIMELINE_PARSER, Constant.TASK_QUEUE_PARSER]}, Constant.KERNEL_VIEW_PARSER: {Constant.MODE: ConcurrentMode.SUB_PROCESS, Constant.DEPS: [Constant.TREE_BUILD_PARSER, Constant.CANN_EXPORT_PARSER, Constant.RELATION_PARSER]}, @@ -66,8 +62,7 @@ class ParserDepsConfig: Constant.TORCH_OP_PARSER]}, Constant.DB_PARSER: {Constant.MODE: ConcurrentMode.PTHREAD, Constant.DEPS: [Constant.CANN_EXPORT_PARSER, Constant.MEMORY_PREPARE, Constant.TORCH_OP_PARSER, - Constant.TREE_BUILD_PARSER, Constant.CANN_ANALYZE_PARSER, Constant.TASK_QUEUE_PARSER, - Constant.PYTHON_TRACE_PRE_PARSER]}, + Constant.TREE_BUILD_PARSER, Constant.CANN_ANALYZE_PARSER, Constant.TASK_QUEUE_PARSER]}, Constant.MEMORY_TIMELINE_PARSER: {} } diff --git a/torch_npu/profiler/analysis/prof_view/_trace_view_parser.py b/torch_npu/profiler/analysis/prof_view/_trace_view_parser.py index 2438bac3ec..158b7ae025 100644 --- a/torch_npu/profiler/analysis/prof_view/_trace_view_parser.py +++ b/torch_npu/profiler/analysis/prof_view/_trace_view_parser.py @@ -62,7 +62,6 @@ class TraceViewParser(BaseParser): task_queue_data = deps_data.get(Constant.TASK_QUEUE_PARSER, {}) self._enqueue_data = task_queue_data.get("enqueue_data", []) self._dequeue_data = task_queue_data.get("dequeue_data", []) - self._python_trace_data = deps_data.get(Constant.PYTHON_TRACE_PRE_PARSER, []) self.generate_view() except Exception as e: self.logger.error("Failed to generate trace_view.json, error: %s", str(e), exc_info=True) diff --git a/torch_npu/profiler/analysis/prof_view/prepare_parse/_fwk_pre_parser.py b/torch_npu/profiler/analysis/prof_view/prepare_parse/_fwk_pre_parser.py index cce9455f7d..e26e10ab56 100644 --- a/torch_npu/profiler/analysis/prof_view/prepare_parse/_fwk_pre_parser.py +++ b/torch_npu/profiler/analysis/prof_view/prepare_parse/_fwk_pre_parser.py @@ -40,7 +40,7 @@ class TracePreParser(BaseParser): task_queue_data = deps_data.get(Constant.TASK_QUEUE_PARSER, {}) enqueue_data = task_queue_data.get("enqueue_data", []) dequeue_data = task_queue_data.get("dequeue_data", []) - python_trace_data = deps_data.get(Constant.PYTHON_TRACE_PRE_PARSER, []) + _, python_trace_data = PythonTracePreParser(Constant.PYTHON_TRACE_PRE_PARSER, self._param_dict).run(deps_data) fwk_trace_data = FwkFileParser(self._profiler_path).get_fwk_trace_data( torch_op_data, enqueue_data, dequeue_data, python_trace_data) trace_file_path = os.path.join(self._output_path, Constant.TRACE_VIEW_TEMP) if os.path.isdir( diff --git a/torch_npu/profiler/analysis/prof_view/prof_db_parse/_fwk_api_db_parser.py b/torch_npu/profiler/analysis/prof_view/prof_db_parse/_fwk_api_db_parser.py index eae0578621..d8e1dcf598 100644 --- a/torch_npu/profiler/analysis/prof_view/prof_db_parse/_fwk_api_db_parser.py +++ b/torch_npu/profiler/analysis/prof_view/prof_db_parse/_fwk_api_db_parser.py @@ -5,6 +5,7 @@ from ...prof_common_func._constant import Constant, DbConstant, TableColumnsMana from .._base_parser import BaseParser from ...prof_common_func._log import ProfilerLogger from ...prof_parse._fwk_file_parser import FwkFileParser +from ...prof_view.prepare_parse._fwk_pre_parser import PythonTracePreParser __all__ = [] @@ -70,7 +71,7 @@ class FwkApiDbParser(BaseParser): task_queue_data = deps_data.get(Constant.TASK_QUEUE_PARSER, {}) enqueue_data = task_queue_data.get("enqueue_data", []) dequeue_data = task_queue_data.get("dequeue_data", []) - python_trace_data = deps_data.get(Constant.PYTHON_TRACE_PRE_PARSER, []) + _, python_trace_data = PythonTracePreParser(Constant.PYTHON_TRACE_PRE_PARSER, self._param_dict).run(deps_data) fwk_api_data = FwkFileParser(self._profiler_path).get_fwk_api( torch_op_data, enqueue_data, dequeue_data, python_trace_data) self.logger.info("FwkApiDbParser get fwk api data.") -- Gitee