From a2583035195695a64855aed62eda0bec6278258c Mon Sep 17 00:00:00 2001 From: lijian Date: Tue, 23 Sep 2025 17:17:26 +0800 Subject: [PATCH] [perf] whisperx vad perf improve bind core --- ACL_PyTorch/built-in/audio/whisperx/README.md | 53 ++++++++++++++----- .../built-in/audio/whisperx/check_numa.sh | 24 +++++++++ .../audio/whisperx/patches/vad_model.patch | 32 ++++++++--- 3 files changed, 88 insertions(+), 21 deletions(-) create mode 100644 ACL_PyTorch/built-in/audio/whisperx/check_numa.sh diff --git a/ACL_PyTorch/built-in/audio/whisperx/README.md b/ACL_PyTorch/built-in/audio/whisperx/README.md index 4300502578..4c152b7cee 100644 --- a/ACL_PyTorch/built-in/audio/whisperx/README.md +++ b/ACL_PyTorch/built-in/audio/whisperx/README.md @@ -71,6 +71,7 @@ cd .. ```text 📁 whisper/ +├── check_numa.sh ├── audio.mp3 ├── infer.py ├── modeling_whisper.py @@ -90,19 +91,45 @@ cd .. | |── 📁 speech_fsmn_vad_zh-cn-16k-common-pytorch ``` -## 开始推理 -```SHELL -# 1. 激活环境变量 -source /usr/local/Ascend/ascend-toolkit/set_env.sh # 具体路径根据你自己的情况修改 -# 2. 指定使用NPU ID,默认为0 -export ASCEND_RT_VISIBLE_DEVICES=0 -# 3. 给funasr和torchaudio打补丁 -cd patches -python3 patch_apply.py -cd .. -# 4. 开始推理 -python3 infer.py --whisper_model_path ./weight/Whisper-large-v3/large-v3.pt -``` +## 模型推理 +1. 激活环境变量 + ```SHELL + source /usr/local/Ascend/ascend-toolkit/set_env.sh # 具体路径根据你自己的情况修改 + # 提升性能相关环境变量 + export TASK_QUEUE_ENABLE=2 + export PYTORCH_NPU_ALLOC_CONF='expandable_segments:True' + export HOST_CACHE_CAPACITY=20 + export ASCEND_ENHANCE_ENABLE=1 + ``` + +2. 指定使用NPU ID,默认为0 + ```SHELL + export ASCEND_RT_VISIBLE_DEVICES=0 + ``` +3. 给funasr和torchaudio打补丁 + ```SHELL + cd patches + python3 patch_apply.py + cd .. + ``` +4. 使能绑核,进一步提升性能 + ```SHELL + export CPU_AFFINITY_CONF=1 + apt-get update + apt-get install numactl + # 在容器外执行脚本查看NPU id对应的NUMA node和cpu + bash check_numa.sh + ``` + 回显如下: + ```SHELL + ... + >>>>设备 0 对应 NUMA 节点: 6, NUMA node6 CPU(s): 192-223 + ... + ``` +5. 开始推理, 根据实际查询到的核数配置,比如 + ```SHELL + taskset -c 192-223 python3 infer.py --whisper_model_path ./weight/Whisper-large-v3/large-v3.pt + ``` infer.py推理参数: * --whisper_model_path:whisper模型权重路径,默认为"./weight/Whisper-large-v3/large-v3.pt" * --vad_model_path:vad模型权重路径,默认为"./weight/speech_fsmn_vad_zh-cn-16k-common-pytorch" diff --git a/ACL_PyTorch/built-in/audio/whisperx/check_numa.sh b/ACL_PyTorch/built-in/audio/whisperx/check_numa.sh new file mode 100644 index 0000000000..094624afc9 --- /dev/null +++ b/ACL_PyTorch/built-in/audio/whisperx/check_numa.sh @@ -0,0 +1,24 @@ +#!/bin/bash + +# 查询NPU设备信息并获取第四列的PCI地址 +pci_addresses=$(npu-smi info | grep 0000 | awk '{print $4}') + +if [ -z "$pci_addresses" ]; then + echo "未找到匹配的NPU设备信息" + exit 1 +fi + +echo "找到以下NPU设备PCI地址:" +echo "$pci_addresses" + +# 为每个PCI地址查询NUMA节点信息 +i=0 +for addr in $pci_addresses; do + echo "查询PCI地址 $addr 的NUMA信息" + + # 使用lspci获取节点信息 + node_info=$(lspci -vvv -s $addr | grep node | awk -F ':' '{print $2}' | awk '{print $1}') + numa_info=$(lscpu | grep "node${node_info}") + echo ">>>>设备 $i 对应 NUMA 节点: $node_info, $numa_info" + i=$((i+1)) +done \ No newline at end of file diff --git a/ACL_PyTorch/built-in/audio/whisperx/patches/vad_model.patch b/ACL_PyTorch/built-in/audio/whisperx/patches/vad_model.patch index 0e41152c38..15a60dd99b 100644 --- a/ACL_PyTorch/built-in/audio/whisperx/patches/vad_model.patch +++ b/ACL_PyTorch/built-in/audio/whisperx/patches/vad_model.patch @@ -1,16 +1,32 @@ ---- model.py 2025-09-09 14:29:16 -+++ model_modified.py 2025-09-09 14:43:59 -@@ -336,7 +336,7 @@ +--- model.py 2025-09-22 19:07:02 ++++ model_modified.py 2025-09-23 09:23:30 +@@ -336,19 +336,26 @@ (cache["stats"].data_buf_all, cache["stats"].waveform[0]) ) - waveform_numpy = cache["stats"].waveform.numpy() -+ waveform_numpy = cache["stats"].waveform.cpu().numpy() +- +- offsets = np.arange(0, waveform_numpy.shape[1] - frame_sample_length + 1, frame_shift_length) +- frames = waveform_numpy[0, offsets[:, np.newaxis] + np.arange(frame_sample_length)] ++ waveform = cache["stats"].waveform - offsets = np.arange(0, waveform_numpy.shape[1] - frame_sample_length + 1, frame_shift_length) - frames = waveform_numpy[0, offsets[:, np.newaxis] + np.arange(frame_sample_length)] -@@ -348,7 +348,8 @@ +- decibel_numpy = 10 * np.log10(np.sum(np.square(frames), axis=1) + 0.000001) +- decibel_numpy = decibel_numpy.tolist() ++ offsets = torch.arange( ++ 0, waveform.shape[1] - frame_sample_length + 1, frame_shift_length, ++ device=waveform.device ++ ) +- cache["stats"].decibel.extend(decibel_numpy) ++ indices = offsets.unsqueeze(1) + torch.arange(frame_sample_length, device=waveform.device) ++ frames = waveform[0].index_select(0, indices.view(-1)).view(offsets.size(0), frame_sample_length) + ++ decibel = 10 * torch.log10(torch.sum(frames ** 2, dim=1) + 1e-6) ++ ++ if isinstance(cache["stats"].decibel, torch.Tensor): ++ cache["stats"].decibel = torch.cat(cache["stats"].decibel, decibel) ++ else: ++ cache["stats"].decibel.extend(decibel.cpu().tolist()) def ComputeScores(self, feats: torch.Tensor, cache: dict = {}) -> None: - scores = self.encoder(feats, cache=cache["encoder"]).to("cpu") # return B * T * D @@ -19,7 +35,7 @@ assert ( scores.shape[1] == feats.shape[1] ), "The shape between feats and scores does not match" -@@ -688,7 +689,7 @@ +@@ -688,7 +695,7 @@ meta_data["load_data"] = f"{time2 - time1:0.3f}" assert len(audio_sample_list) == 1, "batch_size must be set 1" -- Gitee