From 79af928373a7c72d49a43548ac807f8d30e423be Mon Sep 17 00:00:00 2001
From: majorli <mingjiang.li@iluvatar.ai>
Date: Fri, 9 Dec 2022 03:43:16 +0000
Subject: [PATCH] fix rnnt model running issue dependancy

link #I64SU3

Signed-off-by: majorli <mingjiang.li@iluvatar.ai>
---
 .../speech_recognition/rnnt/pytorch/README.md | 14 ++++++----
 .../pytorch/configs/baseline_v3-1023sp.yaml   |  2 +-
 .../rnnt/pytorch/install.sh                   | 28 +++++++++++++++----
 .../rnnt/pytorch/scripts/train_rnnt_1x1.sh    |  2 +-
 4 files changed, 34 insertions(+), 12 deletions(-)

diff --git a/speech/speech_recognition/rnnt/pytorch/README.md b/speech/speech_recognition/rnnt/pytorch/README.md
index 24c012c79..3d3571ae0 100644
--- a/speech/speech_recognition/rnnt/pytorch/README.md
+++ b/speech/speech_recognition/rnnt/pytorch/README.md
@@ -13,20 +13,24 @@ bash install.sh
 ## Step 2: Preparing datasets
 download LibriSpeech [http://www.openslr.org/12](http://www.openslr.org/12)
 ```
-bash scripts/download_librispeech.sh $DATASET_DIR
+bash scripts/download_librispeech.sh ${DATA_ROOT_DIR}
 ```
 preprocess LibriSpeech
 ```
-bash scripts/preprocess_librispeech.sh $DATASET_DIR
+bash scripts/preprocess_librispeech.sh ${DATA_ROOT_DIR}
 ```
 
 ## Step 3: Training
+### Setup config yaml
+```shell
+sed -i "s#MODIFY_DATASET_DIR#${DATA_ROOT_DIR}/LibriSpeech#g" configs/baseline_v3-1023sp.yaml
+```
 
 ### Multiple GPUs on one machine
 
 ```
-cd scripts
-bash train_rnnt_1x8.sh $OUTPUT_DIR $DATA_DIR
+mkdir -p output/
+bash scripts/train_rnnt_1x8.sh output/ ${DATA_ROOT_DIR}/LibriSpeech
 ```
 
 Following conditions were tested, you can run any of them below:
@@ -46,4 +50,4 @@ Following conditions were tested, you can run any of them below:
 
 
 ## Reference
-https://github.com/mlcommons/training/tree/master/rnn_speech_recognition/pytorch
\ No newline at end of file
+https://github.com/mlcommons/training/tree/master/rnn_speech_recognition/pytorch
diff --git a/speech/speech_recognition/rnnt/pytorch/configs/baseline_v3-1023sp.yaml b/speech/speech_recognition/rnnt/pytorch/configs/baseline_v3-1023sp.yaml
index 92aef3093..08532c8b9 100644
--- a/speech/speech_recognition/rnnt/pytorch/configs/baseline_v3-1023sp.yaml
+++ b/speech/speech_recognition/rnnt/pytorch/configs/baseline_v3-1023sp.yaml
@@ -17,7 +17,7 @@
 #
 
 tokenizer:
-    sentpiece_model: /home/lin.wu/workspace/rnnt/datasets/sentencepieces/librispeech1023.model
+    sentpiece_model: MODIFY_DATASET_DIR/sentencepieces/librispeech1023.model
     labels: [" ", "a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k", "l", "m",
              "n", "o", "p", "q", "r", "s", "t", "u", "v", "w", "x", "y", "z", "'"]
 
diff --git a/speech/speech_recognition/rnnt/pytorch/install.sh b/speech/speech_recognition/rnnt/pytorch/install.sh
index 394596dfc..e74589c58 100755
--- a/speech/speech_recognition/rnnt/pytorch/install.sh
+++ b/speech/speech_recognition/rnnt/pytorch/install.sh
@@ -13,14 +13,32 @@ yum install -y epel-release
 yum install -y jq
 pip install -r requirements.txt
 
-######## install warprnnt_pytorch
-git clone https://github.com/HawkAaron/warp-transducer deps/warp-transducer
+######## prepare env
+# clean deps/
+rm -rf deps/
+mkdir -p deps/
+# download openmp-13.0.1.src.tar.xz
+cd ./deps
+wget "https://github.com/llvm/llvm-project/releases/download/llvmorg-13.0.1/openmp-13.0.1.src.tar.xz"
+tar -xvJf openmp-13.0.1.src.tar.xz && mv openmp-13.0.1.src openmp
+cd openmp/
+mkdir build && cd build/
+
+OPENMP_INSTALL_PREFIX=/usr/local/llvmopenmp
+cmake -G "Unix Makefiles" -DCMAKE_INSTALL_PREFIX=${OPENMP_INSTALL_PREFIX} -DCMAKE_BUILD_TYPE=Release -DLLVM_ENABLE_ASSERTIONS=On -DCMAKE_CXX_COMPILER=/opt/rh/devtoolset-7/root/usr/bin/c++ -DCMAKE_C_COMPILER=/opt/rh/devtoolset-7/root/usr/bin/gcc ../
+make && make install
+
+cp ${OPENMP_INSTALL_PREFIX}/lib/libomp.so /opt/sw_home/local/lib64/libomp.so
+cp ${OPENMP_INSTALL_PREFIX}/include/omp.h /opt/sw_home/local/lib64/clang/13.0.1/include/omp.h
+
+######## install warp-transducer
+## back to deps/
+cd ../../
+git clone https://github.com/HawkAaron/warp-transducer
 COMMIT_SHA=f546575109111c455354861a0567c8aa794208a2
-cd deps/warp-transducer && git checkout $COMMIT_SHA
+cd warp-transducer && git checkout $COMMIT_SHA
 mkdir build && cd build
 
-######## solve lmp not find error
-cp /opt/sw_home/local/lib64/libomp.so.1 /opt/sw_home/local/lib64/libomp.so
 export CUDA_HOME=/opt/sw_home/local/cuda
 export CC=/opt/sw_home/local/bin/clang
 export CXX=/opt/sw_home/local/bin/clang++
diff --git a/speech/speech_recognition/rnnt/pytorch/scripts/train_rnnt_1x1.sh b/speech/speech_recognition/rnnt/pytorch/scripts/train_rnnt_1x1.sh
index e92ec1040..044cb5db0 100755
--- a/speech/speech_recognition/rnnt/pytorch/scripts/train_rnnt_1x1.sh
+++ b/speech/speech_recognition/rnnt/pytorch/scripts/train_rnnt_1x1.sh
@@ -23,4 +23,4 @@ set -a
 : ${NUM_GPUS:=1}
 : ${GRAD_ACCUMULATION_STEPS:=64}
 
-bash ./scripts/train_rnnt_dist_1x8.sh "$@"
+bash ./scripts/train_rnnt_1x8.sh "$@"
-- 
Gitee