diff --git a/nlp/llm/llama3_8b/openrlhf/README.md b/nlp/llm/llama3_8b/openrlhf/README.md
index e65b8c46b9be72b5e91a0d4e9bb72694845bf183..3847d65852e2a318bbaa0887cb3321b2b43b7ee2 100644
--- a/nlp/llm/llama3_8b/openrlhf/README.md
+++ b/nlp/llm/llama3_8b/openrlhf/README.md
@@ -13,6 +13,7 @@ language understanding and generation.
 
 | GPU    | [IXUCA SDK](https://gitee.com/deep-spark/deepspark#%E5%A4%A9%E6%95%B0%E6%99%BA%E7%AE%97%E8%BD%AF%E4%BB%B6%E6%A0%88-ixuca) | Release |
 | :----: | :----: | :----: |
+| BI-V150 | 4.3.0     |  25.09  |
 | BI-V150 | 4.2.0     |  25.06  |
 
 ## Model Preparation
@@ -28,9 +29,11 @@ mkdir -p Dylan2048
 # for dpo: OpenRLHF/preference_dataset_mixture2_and_safe_pku
 # for kto: Dylan2048/ultrafeedback-unpaired-preferences
 # for ppo: OpenRLHF/prompt-collection-v0.1
+# for sft: Open-Orca/OpenOrca
 
 # get pretrain model from huggingface: https://huggingface.co/OpenRLHF/Llama-3-8b-sft-mixture
 # get reward_pretrain model from huggingface: https://huggingface.co/OpenRLHF/Llama-3-8b-rm-mixture
+# get model from https://huggingface.co/meta-llama/Meta-Llama-3-8B
 ```
 
 ### Install Dependencies
@@ -57,6 +60,9 @@ bash train_kto_llama.sh
 # train with ppo
 bash train_ppo_llama.sh
 
+# train with sft
+bash train_sft_llama.sh
+
 # Tips:
 # If you throw out: FileNotFoundError: Directory OpenRLHF/prompt-collection-v0.1 is neither a `Dataset` directory nor a `DatasetDict` directory.
 # please modify OpenRLHF/openrlhf/utils/utils.py:76 `data = load_from_disk(dataset)` --> `data = load_dataset(dataset, data_dir=data_dir)`
diff --git a/nlp/llm/llama3_8b/openrlhf/train_sft_llama.sh b/nlp/llm/llama3_8b/openrlhf/train_sft_llama.sh
new file mode 100644
index 0000000000000000000000000000000000000000..c532ee313cb30fff6b9b69c895662dcd15569345
--- /dev/null
+++ b/nlp/llm/llama3_8b/openrlhf/train_sft_llama.sh
@@ -0,0 +1,30 @@
+set -x
+
+read -r -d '' training_commands <<EOF
+openrlhf.cli.train_sft \
+   --max_len 2048 \
+   --dataset Open-Orca/OpenOrca \
+   --input_key question \
+   --output_key response \
+   --train_batch_size 128 \
+   --micro_train_batch_size 1 \
+   --max_samples 500000 \
+   --pretrain meta-llama/Meta-Llama-3-8B \
+   --save_path ./checkpoint/llama3-8b-sft \
+   --save_steps -1 \
+   --logging_steps 1 \
+   --eval_steps -1 \
+   --zero_stage 2 \
+   --max_epochs 1 \
+   --bf16 \
+   --flash_attn \
+   --learning_rate 5e-6 \
+   --load_checkpoint \
+   --gradient_checkpointing
+EOF
+    # --wandb [WANDB_TOKENS]
+    # --packing_samples
+
+if [[ ${1} != "slurm" ]]; then
+    deepspeed --module $training_commands
+fi
\ No newline at end of file
diff --git a/nlp/llm/mixtral/openrlhf/README.md b/nlp/llm/mixtral/openrlhf/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..622b90b583ecafc988ed7121265efe70ca814b5a
--- /dev/null
+++ b/nlp/llm/mixtral/openrlhf/README.md
@@ -0,0 +1,47 @@
+# Mixtral 8x7B (OpenRLHF)
+
+## Model Description
+
+The Mixtral model is a Mixture of Experts (MoE)-based large language model developed by Mistral AI, an innovative
+company focusing on open-source AI models. Mixtral is designed to achieve high performance while maintaining
+computational efficiency, making it an excellent choice for real-world applications.
+
+## Supported Environments
+
+| GPU    | [IXUCA SDK](https://gitee.com/deep-spark/deepspark#%E5%A4%A9%E6%95%B0%E6%99%BA%E7%AE%97%E8%BD%AF%E4%BB%B6%E6%A0%88-ixuca) | Release |
+| :----: | :----: | :----: |
+| BI-V150 | 4.3.0     |  25.09  |
+
+## Model Preparation
+
+### Prepare Resources
+
+```sh
+git clone https://github.com/OpenRLHF/OpenRLHF.git -b v0.5.7
+cd examples/scripts/
+# get datasets from huggingface Open-Orca/OpenOrca
+# get model from https://huggingface.co/mistralai/Mixtral-8x7B-v0.1
+
+```
+
+### Install Dependencies
+
+```sh
+# install
+cp requirements.txt OpenRLHF/requirements.txt
+cd OpenRLHF
+pip install -e .
+```
+
+## Model Training
+
+```sh
+# Make sure you have need 16 BI-V150
+cp *.sh OpenRLHF/examples/scripts/
+cd OpenRLHF/examples/scripts/
+bash train_sft_mixtral_lora.sh
+```
+
+## References
+
+- [OpenRLHF](https://github.com/OpenRLHF/OpenRLHF)
diff --git a/nlp/llm/mixtral/openrlhf/requirements.txt b/nlp/llm/mixtral/openrlhf/requirements.txt
new file mode 100644
index 0000000000000000000000000000000000000000..47aa5ab23e322aaf421b0782fed6f0caaf7d9163
--- /dev/null
+++ b/nlp/llm/mixtral/openrlhf/requirements.txt
@@ -0,0 +1,18 @@
+accelerate
+bitsandbytes
+datasets
+einops
+isort
+jsonlines
+loralib
+optimum
+packaging
+peft
+ray
+tensorboard
+torch
+torchmetrics
+tqdm
+transformers_stream_generator
+wandb
+wheel
\ No newline at end of file
diff --git a/nlp/llm/mixtral/openrlhf/train_sft_mixtral_lora.sh b/nlp/llm/mixtral/openrlhf/train_sft_mixtral_lora.sh
new file mode 100644
index 0000000000000000000000000000000000000000..f702d4554067328179134302256e037a40bd65e6
--- /dev/null
+++ b/nlp/llm/mixtral/openrlhf/train_sft_mixtral_lora.sh
@@ -0,0 +1,30 @@
+set -x
+
+read -r -d '' training_commands <<EOF
+openrlhf.cli.train_sft \
+    --max_len 2048 \
+    --dataset Open-Orca/OpenOrca \
+    --input_key question \
+    --output_key response \
+    --train_batch_size 16 \
+    --micro_train_batch_size 1 \
+    --max_samples 500000 \
+    --pretrain mistralai/Mixtral-8x7B-v0.1 \
+    --save_path ./checkpoint/mixtral-sft-lora\
+    --save_steps -1 \
+    --logging_steps 1 \
+    --eval_steps -1 \
+    --zero_stage 3 \
+    --max_epochs 1 \
+    --bf16 \
+    --gradient_checkpointing \
+    --flash_attn \
+    --learning_rate 5e-6 \
+    --lora_rank 64 \
+    --lora_alpha 64 \
+    --aux_loss_coef 0.001
+EOF
+
+if [[ ${1} != "slurm" ]]; then
+    deepspeed --module $training_commands
+fi
\ No newline at end of file