From 99496b64e5c19d51c59faddfb2649c46a4d21915 Mon Sep 17 00:00:00 2001
From: "siyuan.lei" <siyuan.lei@iluvatar.com>
Date: Thu, 21 Aug 2025 05:35:05 +0000
Subject: [PATCH 1/2] support llama3-8B for 4.3.0

---
 nlp/llm/llama3_8b/openorca/README.md          | 44 +++++++++++++++++++
 nlp/llm/llama3_8b/openorca/train_sft_llama.sh | 31 +++++++++++++
 2 files changed, 75 insertions(+)
 create mode 100644 nlp/llm/llama3_8b/openorca/README.md
 create mode 100644 nlp/llm/llama3_8b/openorca/train_sft_llama.sh

diff --git a/nlp/llm/llama3_8b/openorca/README.md b/nlp/llm/llama3_8b/openorca/README.md
new file mode 100644
index 000000000..c1b126179
--- /dev/null
+++ b/nlp/llm/llama3_8b/openorca/README.md
@@ -0,0 +1,44 @@
+# Llama3-8B (OpenRLHF)
+
+## Model Description
+
+Llama3-8B is an advanced auto-regressive language model developed by Meta, featuring 8 billion parameters. It utilizes
+an optimized transformer architecture with Grouped-Query Attention (GQA) for improved inference efficiency. Trained on
+sequences of 8,192 tokens and using a 128K token vocabulary, it excels in various natural language tasks. The model
+incorporates supervised fine-tuning (SFT) and reinforcement learning with human feedback (RLHF) to align with human
+preferences, ensuring both helpfulness and safety in its responses. Llama3-8B offers state-of-the-art performance in
+language understanding and generation.
+
+## Supported Environments
+
+| GPU    | [IXUCA SDK](https://gitee.com/deep-spark/deepspark#%E5%A4%A9%E6%95%B0%E6%99%BA%E7%AE%97%E8%BD%AF%E4%BB%B6%E6%A0%88-ixuca) | Release |
+| :----: | :----: | :----: |
+| BI-V150 | 4.3.0     |  25.06  |
+
+## Model Preparation
+
+### Install OpenRLHF
+
+```sh
+# install
+git clone https://github.com/OpenRLHF/OpenRLHF.git -b v0.5.7
+cd OpenRLHF
+pip install -e .
+```
+
+## Model Training
+
+```sh
+# Make sure you have need 16 BI-V150
+cp *.sh OpenRLHF/examples/scripts/
+cd OpenRLHF/examples/scripts/
+
+# train sft
+bash train_sft_llama.sh
+```
+
+tips: 如果执行中遇到oom，可以适当降低下micro_train_batch_size
+
+## References
+
+- [OpenRLHF](https://github.com/OpenRLHF/OpenRLHF)
diff --git a/nlp/llm/llama3_8b/openorca/train_sft_llama.sh b/nlp/llm/llama3_8b/openorca/train_sft_llama.sh
new file mode 100644
index 000000000..5eb67be28
--- /dev/null
+++ b/nlp/llm/llama3_8b/openorca/train_sft_llama.sh
@@ -0,0 +1,31 @@
+set -x
+
+read -r -d '' training_commands <<EOF
+openrlhf.cli.train_sft \
+   --max_len 2048 \
+   --dataset Open-Orca/OpenOrca \
+   --input_key question \
+   --output_key response \
+   --train_batch_size 256 \
+   --micro_train_batch_size 2 \
+   --max_samples 500000 \
+   --pretrain meta-llama/Meta-Llama-3-8B \
+   --save_path ./checkpoint/llama3-8b-sft \
+   --save_steps -1 \
+   --logging_steps 1 \
+   --eval_steps -1 \
+   --zero_stage 2 \
+   --max_epochs 1 \
+   --bf16 \
+   --attn_implementation flash_attention_2 \
+   --learning_rate 5e-6 \
+   --load_checkpoint \
+   --packing_samples \
+   --gradient_checkpointing
+EOF
+    # --wandb [WANDB_TOKENS]
+    # --packing_samples
+
+if [[ ${1} != "slurm" ]]; then
+    deepspeed --module $training_commands
+f
\ No newline at end of file
-- 
Gitee


From b73798597cfce99b28d3e3703ec1e9c775c8792e Mon Sep 17 00:00:00 2001
From: "hongliang.yuan" <hongliang.yuan@iluvatar.com>
Date: Fri, 22 Aug 2025 17:12:44 +0800
Subject: [PATCH 2/2] refine pr 500 and add Mixtral-8x7B-v0.1 model

---
 nlp/llm/llama3_8b/openrlhf/README.md          |  6 ++++
 .../{openorca => openrlhf}/train_sft_llama.sh |  9 +++--
 .../openorca => mixtral/openrlhf}/README.md   | 33 ++++++++++---------
 nlp/llm/mixtral/openrlhf/requirements.txt     | 18 ++++++++++
 .../openrlhf/train_sft_mixtral_lora.sh        | 30 +++++++++++++++++
 5 files changed, 76 insertions(+), 20 deletions(-)
 rename nlp/llm/llama3_8b/{openorca => openrlhf}/train_sft_llama.sh (82%)
 rename nlp/llm/{llama3_8b/openorca => mixtral/openrlhf}/README.md (40%)
 create mode 100644 nlp/llm/mixtral/openrlhf/requirements.txt
 create mode 100644 nlp/llm/mixtral/openrlhf/train_sft_mixtral_lora.sh

diff --git a/nlp/llm/llama3_8b/openrlhf/README.md b/nlp/llm/llama3_8b/openrlhf/README.md
index e65b8c46b..3847d6585 100644
--- a/nlp/llm/llama3_8b/openrlhf/README.md
+++ b/nlp/llm/llama3_8b/openrlhf/README.md
@@ -13,6 +13,7 @@ language understanding and generation.
 
 | GPU    | [IXUCA SDK](https://gitee.com/deep-spark/deepspark#%E5%A4%A9%E6%95%B0%E6%99%BA%E7%AE%97%E8%BD%AF%E4%BB%B6%E6%A0%88-ixuca) | Release |
 | :----: | :----: | :----: |
+| BI-V150 | 4.3.0     |  25.09  |
 | BI-V150 | 4.2.0     |  25.06  |
 
 ## Model Preparation
@@ -28,9 +29,11 @@ mkdir -p Dylan2048
 # for dpo: OpenRLHF/preference_dataset_mixture2_and_safe_pku
 # for kto: Dylan2048/ultrafeedback-unpaired-preferences
 # for ppo: OpenRLHF/prompt-collection-v0.1
+# for sft: Open-Orca/OpenOrca
 
 # get pretrain model from huggingface: https://huggingface.co/OpenRLHF/Llama-3-8b-sft-mixture
 # get reward_pretrain model from huggingface: https://huggingface.co/OpenRLHF/Llama-3-8b-rm-mixture
+# get model from https://huggingface.co/meta-llama/Meta-Llama-3-8B
 ```
 
 ### Install Dependencies
@@ -57,6 +60,9 @@ bash train_kto_llama.sh
 # train with ppo
 bash train_ppo_llama.sh
 
+# train with sft
+bash train_sft_llama.sh
+
 # Tips:
 # If you throw out: FileNotFoundError: Directory OpenRLHF/prompt-collection-v0.1 is neither a `Dataset` directory nor a `DatasetDict` directory.
 # please modify OpenRLHF/openrlhf/utils/utils.py:76 `data = load_from_disk(dataset)` --> `data = load_dataset(dataset, data_dir=data_dir)`
diff --git a/nlp/llm/llama3_8b/openorca/train_sft_llama.sh b/nlp/llm/llama3_8b/openrlhf/train_sft_llama.sh
similarity index 82%
rename from nlp/llm/llama3_8b/openorca/train_sft_llama.sh
rename to nlp/llm/llama3_8b/openrlhf/train_sft_llama.sh
index 5eb67be28..c532ee313 100644
--- a/nlp/llm/llama3_8b/openorca/train_sft_llama.sh
+++ b/nlp/llm/llama3_8b/openrlhf/train_sft_llama.sh
@@ -6,8 +6,8 @@ openrlhf.cli.train_sft \
    --dataset Open-Orca/OpenOrca \
    --input_key question \
    --output_key response \
-   --train_batch_size 256 \
-   --micro_train_batch_size 2 \
+   --train_batch_size 128 \
+   --micro_train_batch_size 1 \
    --max_samples 500000 \
    --pretrain meta-llama/Meta-Llama-3-8B \
    --save_path ./checkpoint/llama3-8b-sft \
@@ -17,10 +17,9 @@ openrlhf.cli.train_sft \
    --zero_stage 2 \
    --max_epochs 1 \
    --bf16 \
-   --attn_implementation flash_attention_2 \
+   --flash_attn \
    --learning_rate 5e-6 \
    --load_checkpoint \
-   --packing_samples \
    --gradient_checkpointing
 EOF
     # --wandb [WANDB_TOKENS]
@@ -28,4 +27,4 @@ EOF
 
 if [[ ${1} != "slurm" ]]; then
     deepspeed --module $training_commands
-f
\ No newline at end of file
+fi
\ No newline at end of file
diff --git a/nlp/llm/llama3_8b/openorca/README.md b/nlp/llm/mixtral/openrlhf/README.md
similarity index 40%
rename from nlp/llm/llama3_8b/openorca/README.md
rename to nlp/llm/mixtral/openrlhf/README.md
index c1b126179..622b90b58 100644
--- a/nlp/llm/llama3_8b/openorca/README.md
+++ b/nlp/llm/mixtral/openrlhf/README.md
@@ -1,27 +1,34 @@
-# Llama3-8B (OpenRLHF)
+# Mixtral 8x7B (OpenRLHF)
 
 ## Model Description
 
-Llama3-8B is an advanced auto-regressive language model developed by Meta, featuring 8 billion parameters. It utilizes
-an optimized transformer architecture with Grouped-Query Attention (GQA) for improved inference efficiency. Trained on
-sequences of 8,192 tokens and using a 128K token vocabulary, it excels in various natural language tasks. The model
-incorporates supervised fine-tuning (SFT) and reinforcement learning with human feedback (RLHF) to align with human
-preferences, ensuring both helpfulness and safety in its responses. Llama3-8B offers state-of-the-art performance in
-language understanding and generation.
+The Mixtral model is a Mixture of Experts (MoE)-based large language model developed by Mistral AI, an innovative
+company focusing on open-source AI models. Mixtral is designed to achieve high performance while maintaining
+computational efficiency, making it an excellent choice for real-world applications.
 
 ## Supported Environments
 
 | GPU    | [IXUCA SDK](https://gitee.com/deep-spark/deepspark#%E5%A4%A9%E6%95%B0%E6%99%BA%E7%AE%97%E8%BD%AF%E4%BB%B6%E6%A0%88-ixuca) | Release |
 | :----: | :----: | :----: |
-| BI-V150 | 4.3.0     |  25.06  |
+| BI-V150 | 4.3.0     |  25.09  |
 
 ## Model Preparation
 
-### Install OpenRLHF
+### Prepare Resources
 
 ```sh
-# install
 git clone https://github.com/OpenRLHF/OpenRLHF.git -b v0.5.7
+cd examples/scripts/
+# get datasets from huggingface Open-Orca/OpenOrca
+# get model from https://huggingface.co/mistralai/Mixtral-8x7B-v0.1
+
+```
+
+### Install Dependencies
+
+```sh
+# install
+cp requirements.txt OpenRLHF/requirements.txt
 cd OpenRLHF
 pip install -e .
 ```
@@ -32,13 +39,9 @@ pip install -e .
 # Make sure you have need 16 BI-V150
 cp *.sh OpenRLHF/examples/scripts/
 cd OpenRLHF/examples/scripts/
-
-# train sft
-bash train_sft_llama.sh
+bash train_sft_mixtral_lora.sh
 ```
 
-tips: 如果执行中遇到oom，可以适当降低下micro_train_batch_size
-
 ## References
 
 - [OpenRLHF](https://github.com/OpenRLHF/OpenRLHF)
diff --git a/nlp/llm/mixtral/openrlhf/requirements.txt b/nlp/llm/mixtral/openrlhf/requirements.txt
new file mode 100644
index 000000000..47aa5ab23
--- /dev/null
+++ b/nlp/llm/mixtral/openrlhf/requirements.txt
@@ -0,0 +1,18 @@
+accelerate
+bitsandbytes
+datasets
+einops
+isort
+jsonlines
+loralib
+optimum
+packaging
+peft
+ray
+tensorboard
+torch
+torchmetrics
+tqdm
+transformers_stream_generator
+wandb
+wheel
\ No newline at end of file
diff --git a/nlp/llm/mixtral/openrlhf/train_sft_mixtral_lora.sh b/nlp/llm/mixtral/openrlhf/train_sft_mixtral_lora.sh
new file mode 100644
index 000000000..f702d4554
--- /dev/null
+++ b/nlp/llm/mixtral/openrlhf/train_sft_mixtral_lora.sh
@@ -0,0 +1,30 @@
+set -x
+
+read -r -d '' training_commands <<EOF
+openrlhf.cli.train_sft \
+    --max_len 2048 \
+    --dataset Open-Orca/OpenOrca \
+    --input_key question \
+    --output_key response \
+    --train_batch_size 16 \
+    --micro_train_batch_size 1 \
+    --max_samples 500000 \
+    --pretrain mistralai/Mixtral-8x7B-v0.1 \
+    --save_path ./checkpoint/mixtral-sft-lora\
+    --save_steps -1 \
+    --logging_steps 1 \
+    --eval_steps -1 \
+    --zero_stage 3 \
+    --max_epochs 1 \
+    --bf16 \
+    --gradient_checkpointing \
+    --flash_attn \
+    --learning_rate 5e-6 \
+    --lora_rank 64 \
+    --lora_alpha 64 \
+    --aux_loss_coef 0.001
+EOF
+
+if [[ ${1} != "slurm" ]]; then
+    deepspeed --module $training_commands
+fi
\ No newline at end of file
-- 
Gitee