From 79af928373a7c72d49a43548ac807f8d30e423be Mon Sep 17 00:00:00 2001 From: majorli Date: Fri, 9 Dec 2022 03:43:16 +0000 Subject: [PATCH] fix rnnt model running issue dependancy link #I64SU3 Signed-off-by: majorli --- .../speech_recognition/rnnt/pytorch/README.md | 14 ++++++---- .../pytorch/configs/baseline_v3-1023sp.yaml | 2 +- .../rnnt/pytorch/install.sh | 28 +++++++++++++++---- .../rnnt/pytorch/scripts/train_rnnt_1x1.sh | 2 +- 4 files changed, 34 insertions(+), 12 deletions(-) diff --git a/speech/speech_recognition/rnnt/pytorch/README.md b/speech/speech_recognition/rnnt/pytorch/README.md index 24c012c79..3d3571ae0 100644 --- a/speech/speech_recognition/rnnt/pytorch/README.md +++ b/speech/speech_recognition/rnnt/pytorch/README.md @@ -13,20 +13,24 @@ bash install.sh ## Step 2: Preparing datasets download LibriSpeech [http://www.openslr.org/12](http://www.openslr.org/12) ``` -bash scripts/download_librispeech.sh $DATASET_DIR +bash scripts/download_librispeech.sh ${DATA_ROOT_DIR} ``` preprocess LibriSpeech ``` -bash scripts/preprocess_librispeech.sh $DATASET_DIR +bash scripts/preprocess_librispeech.sh ${DATA_ROOT_DIR} ``` ## Step 3: Training +### Setup config yaml +```shell +sed -i "s#MODIFY_DATASET_DIR#${DATA_ROOT_DIR}/LibriSpeech#g" configs/baseline_v3-1023sp.yaml +``` ### Multiple GPUs on one machine ``` -cd scripts -bash train_rnnt_1x8.sh $OUTPUT_DIR $DATA_DIR +mkdir -p output/ +bash scripts/train_rnnt_1x8.sh output/ ${DATA_ROOT_DIR}/LibriSpeech ``` Following conditions were tested, you can run any of them below: @@ -46,4 +50,4 @@ Following conditions were tested, you can run any of them below: ## Reference -https://github.com/mlcommons/training/tree/master/rnn_speech_recognition/pytorch \ No newline at end of file +https://github.com/mlcommons/training/tree/master/rnn_speech_recognition/pytorch diff --git a/speech/speech_recognition/rnnt/pytorch/configs/baseline_v3-1023sp.yaml b/speech/speech_recognition/rnnt/pytorch/configs/baseline_v3-1023sp.yaml index 92aef3093..08532c8b9 100644 --- a/speech/speech_recognition/rnnt/pytorch/configs/baseline_v3-1023sp.yaml +++ b/speech/speech_recognition/rnnt/pytorch/configs/baseline_v3-1023sp.yaml @@ -17,7 +17,7 @@ # tokenizer: - sentpiece_model: /home/lin.wu/workspace/rnnt/datasets/sentencepieces/librispeech1023.model + sentpiece_model: MODIFY_DATASET_DIR/sentencepieces/librispeech1023.model labels: [" ", "a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k", "l", "m", "n", "o", "p", "q", "r", "s", "t", "u", "v", "w", "x", "y", "z", "'"] diff --git a/speech/speech_recognition/rnnt/pytorch/install.sh b/speech/speech_recognition/rnnt/pytorch/install.sh index 394596dfc..e74589c58 100755 --- a/speech/speech_recognition/rnnt/pytorch/install.sh +++ b/speech/speech_recognition/rnnt/pytorch/install.sh @@ -13,14 +13,32 @@ yum install -y epel-release yum install -y jq pip install -r requirements.txt -######## install warprnnt_pytorch -git clone https://github.com/HawkAaron/warp-transducer deps/warp-transducer +######## prepare env +# clean deps/ +rm -rf deps/ +mkdir -p deps/ +# download openmp-13.0.1.src.tar.xz +cd ./deps +wget "https://github.com/llvm/llvm-project/releases/download/llvmorg-13.0.1/openmp-13.0.1.src.tar.xz" +tar -xvJf openmp-13.0.1.src.tar.xz && mv openmp-13.0.1.src openmp +cd openmp/ +mkdir build && cd build/ + +OPENMP_INSTALL_PREFIX=/usr/local/llvmopenmp +cmake -G "Unix Makefiles" -DCMAKE_INSTALL_PREFIX=${OPENMP_INSTALL_PREFIX} -DCMAKE_BUILD_TYPE=Release -DLLVM_ENABLE_ASSERTIONS=On -DCMAKE_CXX_COMPILER=/opt/rh/devtoolset-7/root/usr/bin/c++ -DCMAKE_C_COMPILER=/opt/rh/devtoolset-7/root/usr/bin/gcc ../ +make && make install + +cp ${OPENMP_INSTALL_PREFIX}/lib/libomp.so /opt/sw_home/local/lib64/libomp.so +cp ${OPENMP_INSTALL_PREFIX}/include/omp.h /opt/sw_home/local/lib64/clang/13.0.1/include/omp.h + +######## install warp-transducer +## back to deps/ +cd ../../ +git clone https://github.com/HawkAaron/warp-transducer COMMIT_SHA=f546575109111c455354861a0567c8aa794208a2 -cd deps/warp-transducer && git checkout $COMMIT_SHA +cd warp-transducer && git checkout $COMMIT_SHA mkdir build && cd build -######## solve lmp not find error -cp /opt/sw_home/local/lib64/libomp.so.1 /opt/sw_home/local/lib64/libomp.so export CUDA_HOME=/opt/sw_home/local/cuda export CC=/opt/sw_home/local/bin/clang export CXX=/opt/sw_home/local/bin/clang++ diff --git a/speech/speech_recognition/rnnt/pytorch/scripts/train_rnnt_1x1.sh b/speech/speech_recognition/rnnt/pytorch/scripts/train_rnnt_1x1.sh index e92ec1040..044cb5db0 100755 --- a/speech/speech_recognition/rnnt/pytorch/scripts/train_rnnt_1x1.sh +++ b/speech/speech_recognition/rnnt/pytorch/scripts/train_rnnt_1x1.sh @@ -23,4 +23,4 @@ set -a : ${NUM_GPUS:=1} : ${GRAD_ACCUMULATION_STEPS:=64} -bash ./scripts/train_rnnt_dist_1x8.sh "$@" +bash ./scripts/train_rnnt_1x8.sh "$@" -- Gitee