From 8e4bc9615ae2fc23e06fd59ee03b1a147b0d0f2f Mon Sep 17 00:00:00 2001 From: wjchuee Date: Thu, 12 Jun 2025 14:14:06 +0800 Subject: [PATCH] Ant msmonitor add start --- .../_dynamic_profiler_config_context.py | 15 +++++++++++++++ .../_dynamic_profiler_monitor_shm.py | 1 + torch_npu/profiler/_non_intrusive_profile.py | 4 ++-- torch_npu/profiler/dynamic_profile.py | 3 ++- 4 files changed, 20 insertions(+), 3 deletions(-) diff --git a/torch_npu/profiler/_dynamic_profiler/_dynamic_profiler_config_context.py b/torch_npu/profiler/_dynamic_profiler/_dynamic_profiler_config_context.py index ab8a5abfe6..675da67d7b 100644 --- a/torch_npu/profiler/_dynamic_profiler/_dynamic_profiler_config_context.py +++ b/torch_npu/profiler/_dynamic_profiler/_dynamic_profiler_config_context.py @@ -25,6 +25,7 @@ class ConfigContext: self.experimental_config = None self._active = 1 self._start_step = 0 + self._start = False self._warmup = 0 self.is_valid = False self._meta_data = {} @@ -52,6 +53,7 @@ class ConfigContext: self._parse_active(json_data) self._parse_warmup(json_data) self._parse_start_step(json_data) + self._parse_start(json_data) self._parse_exp_cfg(json_data) self._parse_ranks(json_data) @@ -72,6 +74,16 @@ class ConfigContext: DynamicProfilerUtils.out_log("Start step will be set to {}.".format( self._start_step), DynamicProfilerUtils.LoggerLevelEnum.INFO) + def _parse_start(self, json_data: dict): + if not self._is_dyno: + self._start = json_data.get("start", False) + else: + start = json_data.get("PROFILE_START", False) + if isinstance(start, str): + self._start = self.BOOL_MAP.get(start.lower(), False) + else: + self._start = False + def _parse_prof_dir(self, json_data: dict): if not self._is_dyno: self.prof_path = json_data.get('prof_dir', self.DEADLINE_PROF_DIR) @@ -343,6 +355,9 @@ class ConfigContext: def start_step(self) -> int: return self._start_step + def start(self) -> bool: + return self._start + def experimental_config(self) -> _ExperimentalConfig: return self.experimental_config diff --git a/torch_npu/profiler/_dynamic_profiler/_dynamic_profiler_monitor_shm.py b/torch_npu/profiler/_dynamic_profiler/_dynamic_profiler_monitor_shm.py index 9ffbce78f7..dd8e4820be 100644 --- a/torch_npu/profiler/_dynamic_profiler/_dynamic_profiler_monitor_shm.py +++ b/torch_npu/profiler/_dynamic_profiler/_dynamic_profiler_monitor_shm.py @@ -27,6 +27,7 @@ class DynamicProfilerShareMemory: "active": 1, "warmup": 0, "start_step": 0, + "start": False, "is_rank": False, "rank_list": [], "experimental_config": { diff --git a/torch_npu/profiler/_non_intrusive_profile.py b/torch_npu/profiler/_non_intrusive_profile.py index a60303adec..1d8115b637 100644 --- a/torch_npu/profiler/_non_intrusive_profile.py +++ b/torch_npu/profiler/_non_intrusive_profile.py @@ -59,11 +59,11 @@ class _NonIntrusiveProfile: @staticmethod def init(): prof_config_path = os.getenv("PROF_CONFIG_PATH", "") - dyno_enable_flag = os.getenv("KINETO_USE_DAEMON", 0) + dyno_enable_flag = os.getenv("KINETO_USE_DAEMON_NPU", 0) try: dyno_enable_flag = int(dyno_enable_flag) except ValueError: - print_error_msg("Environment variable KINETO_USE_DAEMON value not valid, will be set to 0 !") + print_error_msg("Environment variable KINETO_USE_DAEMON_NPU value not valid, will be set to 0 !") dyno_enable_flag = 0 if not prof_config_path and dyno_enable_flag != 1: return diff --git a/torch_npu/profiler/dynamic_profile.py b/torch_npu/profiler/dynamic_profile.py index 99bfd76f72..b5ea180ff6 100644 --- a/torch_npu/profiler/dynamic_profile.py +++ b/torch_npu/profiler/dynamic_profile.py @@ -85,7 +85,8 @@ class _DynamicProfile: self.prof = None DynamicProfilerUtils.out_log("Stop Dynamic Profiler at {} step.".format( self.cur_step), DynamicProfilerUtils.LoggerLevelEnum.INFO) - elif self.prof is None and self.cfg_ctx is not None and self.cur_step == self.cfg_ctx.start_step(): + elif self.prof is None and self.cfg_ctx is not None and \ + (self.cur_step == self.cfg_ctx.start_step() or self.cfg_ctx.start()): self.step_num = self.cfg_ctx.active() + self.cfg_ctx.warmup() self.enable_prof() self.cfg_ctx = None -- Gitee