From 071c0f808a070fd0e153e21217cff82eade344ac Mon Sep 17 00:00:00 2001 From: SCh-zx <1325467101@qq.com> Date: Wed, 10 Sep 2025 11:16:24 +0800 Subject: [PATCH 1/3] safe --- torch_npu/csrc/aten/common/ResizeNpu.h | 1 + torch_npu/csrc/core/NPUStorageImpl.cpp | 1 + torch_npu/csrc/distributed/rpc/tensorpipe_npu.cpp | 1 + torch_npu/csrc/distributed/rpc/tensorpipe_utils.cpp | 1 + 4 files changed, 4 insertions(+) diff --git a/torch_npu/csrc/aten/common/ResizeNpu.h b/torch_npu/csrc/aten/common/ResizeNpu.h index c063534bc1..8c42324154 100644 --- a/torch_npu/csrc/aten/common/ResizeNpu.h +++ b/torch_npu/csrc/aten/common/ResizeNpu.h @@ -34,6 +34,7 @@ static void storage_resize_npu( } at::DataPtr new_data = storage.allocator()->allocate(size); + TORCH_CHECK(new_data, "Get new_data failed"); size_t itemsize = storage_desc.data_type_.itemsize(); at::DataPtr old_data = storage.set_data_ptr(std::move(new_data)); ptrdiff_t old_size = static_cast(storage.nbytes()); diff --git a/torch_npu/csrc/core/NPUStorageImpl.cpp b/torch_npu/csrc/core/NPUStorageImpl.cpp index 1dbd9da08d..0f51798a16 100644 --- a/torch_npu/csrc/core/NPUStorageImpl.cpp +++ b/torch_npu/csrc/core/NPUStorageImpl.cpp @@ -34,6 +34,7 @@ c10::intrusive_ptr make_npu_storage_impl( { if (data_ptr == nullptr) { data_ptr = allocator->allocate(size_bytes.as_int_unchecked()); + TORCH_CHECK(data_ptr, "Get data_ptr failed"); } // Correctly create NPUStorageImpl object. c10::intrusive_ptr npu_storage_impl = c10::make_intrusive( diff --git a/torch_npu/csrc/distributed/rpc/tensorpipe_npu.cpp b/torch_npu/csrc/distributed/rpc/tensorpipe_npu.cpp index c83bcbdabe..8b59b80628 100644 --- a/torch_npu/csrc/distributed/rpc/tensorpipe_npu.cpp +++ b/torch_npu/csrc/distributed/rpc/tensorpipe_npu.cpp @@ -60,6 +60,7 @@ public: // stream. c10_npu::NPUStreamGuard guard(stream); at::DataPtr dataPtr = c10_npu::NPUCachingAllocator::get()->allocate(length); + TORCH_CHECK(dataPtr, "Get dataPtr failed"); tensorpipe_npu::NPUBuffer buffer; buffer.ptr = dataPtr.get(); diff --git a/torch_npu/csrc/distributed/rpc/tensorpipe_utils.cpp b/torch_npu/csrc/distributed/rpc/tensorpipe_utils.cpp index 0f6cd5e6c2..7d5355819e 100644 --- a/torch_npu/csrc/distributed/rpc/tensorpipe_utils.cpp +++ b/torch_npu/csrc/distributed/rpc/tensorpipe_utils.cpp @@ -80,6 +80,7 @@ public: tensorpipe_npu::Allocation &allocation) const override { at::DataPtr dataPtr = at::getCPUAllocator()->allocate(length); + TORCH_CHECK(dataPtr, "Get dataPtr failed"); tensorpipe_npu::CpuBuffer buffer; buffer.ptr = dataPtr.get(); -- Gitee From e580f9c0f29cae64c79028d4cb3e4e660c513527 Mon Sep 17 00:00:00 2001 From: SCh-zx <1325467101@qq.com> Date: Wed, 10 Sep 2025 13:11:57 +0800 Subject: [PATCH 2/3] 1 --- torch_npu/csrc/aten/common/ResizeNpu.h | 4 +++- torch_npu/csrc/core/NPUStorageImpl.cpp | 4 +++- torch_npu/csrc/distributed/rpc/tensorpipe_npu.cpp | 4 +++- torch_npu/csrc/distributed/rpc/tensorpipe_utils.cpp | 4 +++- 4 files changed, 12 insertions(+), 4 deletions(-) diff --git a/torch_npu/csrc/aten/common/ResizeNpu.h b/torch_npu/csrc/aten/common/ResizeNpu.h index 8c42324154..6dec22c760 100644 --- a/torch_npu/csrc/aten/common/ResizeNpu.h +++ b/torch_npu/csrc/aten/common/ResizeNpu.h @@ -34,7 +34,9 @@ static void storage_resize_npu( } at::DataPtr new_data = storage.allocator()->allocate(size); - TORCH_CHECK(new_data, "Get new_data failed"); + if (size > 0) { + TORCH_CHECK(new_data, "Get new_data failed"); + } size_t itemsize = storage_desc.data_type_.itemsize(); at::DataPtr old_data = storage.set_data_ptr(std::move(new_data)); ptrdiff_t old_size = static_cast(storage.nbytes()); diff --git a/torch_npu/csrc/core/NPUStorageImpl.cpp b/torch_npu/csrc/core/NPUStorageImpl.cpp index 0f51798a16..55c3e8815e 100644 --- a/torch_npu/csrc/core/NPUStorageImpl.cpp +++ b/torch_npu/csrc/core/NPUStorageImpl.cpp @@ -34,7 +34,9 @@ c10::intrusive_ptr make_npu_storage_impl( { if (data_ptr == nullptr) { data_ptr = allocator->allocate(size_bytes.as_int_unchecked()); - TORCH_CHECK(data_ptr, "Get data_ptr failed"); + if (size_bytes.as_int_unchecked() > 0) { + TORCH_CHECK(data_ptr, "Get data_ptr failed"); + } } // Correctly create NPUStorageImpl object. c10::intrusive_ptr npu_storage_impl = c10::make_intrusive( diff --git a/torch_npu/csrc/distributed/rpc/tensorpipe_npu.cpp b/torch_npu/csrc/distributed/rpc/tensorpipe_npu.cpp index 8b59b80628..fb08c6544b 100644 --- a/torch_npu/csrc/distributed/rpc/tensorpipe_npu.cpp +++ b/torch_npu/csrc/distributed/rpc/tensorpipe_npu.cpp @@ -60,7 +60,9 @@ public: // stream. c10_npu::NPUStreamGuard guard(stream); at::DataPtr dataPtr = c10_npu::NPUCachingAllocator::get()->allocate(length); - TORCH_CHECK(dataPtr, "Get dataPtr failed"); + if (length > 0) { + TORCH_CHECK(dataPtr, "Get dataPtr failed"); + } tensorpipe_npu::NPUBuffer buffer; buffer.ptr = dataPtr.get(); diff --git a/torch_npu/csrc/distributed/rpc/tensorpipe_utils.cpp b/torch_npu/csrc/distributed/rpc/tensorpipe_utils.cpp index 7d5355819e..56c98b5443 100644 --- a/torch_npu/csrc/distributed/rpc/tensorpipe_utils.cpp +++ b/torch_npu/csrc/distributed/rpc/tensorpipe_utils.cpp @@ -80,7 +80,9 @@ public: tensorpipe_npu::Allocation &allocation) const override { at::DataPtr dataPtr = at::getCPUAllocator()->allocate(length); - TORCH_CHECK(dataPtr, "Get dataPtr failed"); + if (length > 0) { + TORCH_CHECK(dataPtr, "Get dataPtr failed"); + } tensorpipe_npu::CpuBuffer buffer; buffer.ptr = dataPtr.get(); -- Gitee From 8e108ffbd946f9067604d86b8c0c8eb0e8fbb6d8 Mon Sep 17 00:00:00 2001 From: SCh-zx <1325467101@qq.com> Date: Wed, 10 Sep 2025 18:33:58 +0800 Subject: [PATCH 3/3] err --- torch_npu/csrc/aten/common/ResizeNpu.h | 2 +- torch_npu/csrc/core/NPUStorageImpl.cpp | 2 +- torch_npu/csrc/distributed/rpc/tensorpipe_npu.cpp | 2 +- torch_npu/csrc/distributed/rpc/tensorpipe_utils.cpp | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/torch_npu/csrc/aten/common/ResizeNpu.h b/torch_npu/csrc/aten/common/ResizeNpu.h index 6dec22c760..c771679130 100644 --- a/torch_npu/csrc/aten/common/ResizeNpu.h +++ b/torch_npu/csrc/aten/common/ResizeNpu.h @@ -35,7 +35,7 @@ static void storage_resize_npu( at::DataPtr new_data = storage.allocator()->allocate(size); if (size > 0) { - TORCH_CHECK(new_data, "Get new_data failed"); + TORCH_CHECK(new_data, "Get new_data failed", PTA_ERROR(ErrCode::PARAM)); } size_t itemsize = storage_desc.data_type_.itemsize(); at::DataPtr old_data = storage.set_data_ptr(std::move(new_data)); diff --git a/torch_npu/csrc/core/NPUStorageImpl.cpp b/torch_npu/csrc/core/NPUStorageImpl.cpp index 55c3e8815e..4d11f506fe 100644 --- a/torch_npu/csrc/core/NPUStorageImpl.cpp +++ b/torch_npu/csrc/core/NPUStorageImpl.cpp @@ -35,7 +35,7 @@ c10::intrusive_ptr make_npu_storage_impl( if (data_ptr == nullptr) { data_ptr = allocator->allocate(size_bytes.as_int_unchecked()); if (size_bytes.as_int_unchecked() > 0) { - TORCH_CHECK(data_ptr, "Get data_ptr failed"); + TORCH_CHECK(data_ptr, "Get data_ptr failed", PTA_ERROR(ErrCode::PARAM)); } } // Correctly create NPUStorageImpl object. diff --git a/torch_npu/csrc/distributed/rpc/tensorpipe_npu.cpp b/torch_npu/csrc/distributed/rpc/tensorpipe_npu.cpp index fb08c6544b..888f1fcb1a 100644 --- a/torch_npu/csrc/distributed/rpc/tensorpipe_npu.cpp +++ b/torch_npu/csrc/distributed/rpc/tensorpipe_npu.cpp @@ -61,7 +61,7 @@ public: c10_npu::NPUStreamGuard guard(stream); at::DataPtr dataPtr = c10_npu::NPUCachingAllocator::get()->allocate(length); if (length > 0) { - TORCH_CHECK(dataPtr, "Get dataPtr failed"); + TORCH_CHECK(dataPtr, "Get dataPtr failed", PTA_ERROR(ErrCode::PARAM)); } tensorpipe_npu::NPUBuffer buffer; diff --git a/torch_npu/csrc/distributed/rpc/tensorpipe_utils.cpp b/torch_npu/csrc/distributed/rpc/tensorpipe_utils.cpp index 56c98b5443..d3695aa3aa 100644 --- a/torch_npu/csrc/distributed/rpc/tensorpipe_utils.cpp +++ b/torch_npu/csrc/distributed/rpc/tensorpipe_utils.cpp @@ -81,7 +81,7 @@ public: { at::DataPtr dataPtr = at::getCPUAllocator()->allocate(length); if (length > 0) { - TORCH_CHECK(dataPtr, "Get dataPtr failed"); + TORCH_CHECK(dataPtr, "Get dataPtr failed", PTA_ERROR(ErrCode::PARAM)); } tensorpipe_npu::CpuBuffer buffer; -- Gitee