diff --git a/torch_npu/csrc/core/npu/NPUException.cpp b/torch_npu/csrc/core/npu/NPUException.cpp index 399e84051413024c5ed7e8f10aa6c7f48a1d59ea..c338a0647a82ef9a47e48723ef65f727d7d01e71 100644 --- a/torch_npu/csrc/core/npu/NPUException.cpp +++ b/torch_npu/csrc/core/npu/NPUException.cpp @@ -154,6 +154,9 @@ const char *c10_npu_get_error_message() if (c10_npu::option::OptionsManager::IsCompactErrorOutput()) { std::string log(errmsg); std::string errmsg_ = c10_npu::c10_npu_check_error_message(log); + if (errmsg_ == "") { + return errmsg; + } thread_local std::string processedErrMsg = "CANN error: " + errmsg_; c10_npu::setRepoErrMsg(processedErrMsg.c_str()); return processedErrMsg.c_str(); diff --git a/torch_npu/csrc/core/npu/NPUException.h b/torch_npu/csrc/core/npu/NPUException.h index 2fa8bff52a0239c928f1c0594abe02fc3d8aa3c1..ce65ac1b927103211839cc407b943e97ca9d0225 100644 --- a/torch_npu/csrc/core/npu/NPUException.h +++ b/torch_npu/csrc/core/npu/NPUException.h @@ -163,10 +163,11 @@ inline const char* getErrorFunction(const char* /* msg */, const char* args) << "\n"; \ std::string err_msg = oss.str(); \ ASCEND_LOGE("%s", err_msg.c_str()); \ + std::string errmsg(c10_npu::c10_npu_get_error_message()); \ TORCH_CHECK( \ false, \ (device_error_msg.empty() ? "" : device_error_msg), \ - c10_npu::c10_npu_get_error_message()); \ + errmsg.empty() ? err_msg : errmsg); \ } else if (error_code == ACL_ERROR_RT_DEVICE_TASK_ABORT) { \ TORCH_CHECK( \ false, \ @@ -218,9 +219,10 @@ inline const char* getErrorFunction(const char* /* msg */, const char* args) err_map.error_code_map[Error] : ".") + "\n"; \ std::string err_msg = oss.str(); \ ASCEND_LOGE("%s", err_msg.c_str()); \ + std::string errmsg(c10_npu::c10_npu_get_error_message()); \ TORCH_CHECK( \ false, \ - c10_npu::c10_npu_get_error_message()); \ + errmsg.empty() ? err_msg : errmsg); \ } else { \ TORCH_CHECK( \ false, \ diff --git a/torch_npu/csrc/distributed/HCCLUtils.hpp b/torch_npu/csrc/distributed/HCCLUtils.hpp index 1033d8de97f5d0d5eaa099041fd7089616fa3589..07930b27075e8e8c252c5804db925983069c5ea3 100644 --- a/torch_npu/csrc/distributed/HCCLUtils.hpp +++ b/torch_npu/csrc/distributed/HCCLUtils.hpp @@ -24,9 +24,10 @@ << DIST_ERROR(ErrCode::HCCL) + ".\n"; \ std::string err_msg = oss.str(); \ ASCEND_LOGE("%s", err_msg.c_str()); \ + std::string errmsg(c10_npu::c10_npu_get_error_message()); \ TORCH_CHECK( \ false, \ - c10_npu::c10_npu_get_error_message()); \ + errmsg.empty() ? err_msg : errmsg); \ } else { \ TORCH_CHECK( \ false, \ diff --git a/torch_npu/csrc/framework/utils/CalcuOpUtil.h b/torch_npu/csrc/framework/utils/CalcuOpUtil.h index 5ee41e7d64b4988d356dcb69a9b06e332277e0f7..ef11595d3d6f0425f8f414bf74aa05c1f309e233 100644 --- a/torch_npu/csrc/framework/utils/CalcuOpUtil.h +++ b/torch_npu/csrc/framework/utils/CalcuOpUtil.h @@ -46,7 +46,8 @@ using std::vector; << OPS_ERROR(ErrCode::INTERNAL); \ std::string err_msg=oss.str(); \ ASCEND_LOGE("%s", err_msg.c_str()); \ - TORCH_CHECK((expr) == 0, c10_npu::c10_npu_get_error_message()); \ + std::string errmsg(c10_npu::c10_npu_get_error_message()); \ + TORCH_CHECK((expr) == 0, errmsg.empty() ? err_msg : errmsg); \ } else { \ TORCH_CHECK((expr) == 0, __func__, ":", __FILE__, ":", __LINE__, \ " NPU error,NPU error code is:", expr, "\n", \