From 99496b64e5c19d51c59faddfb2649c46a4d21915 Mon Sep 17 00:00:00 2001 From: "siyuan.lei" Date: Thu, 21 Aug 2025 05:35:05 +0000 Subject: [PATCH 1/2] support llama3-8B for 4.3.0 --- nlp/llm/llama3_8b/openorca/README.md | 44 +++++++++++++++++++ nlp/llm/llama3_8b/openorca/train_sft_llama.sh | 31 +++++++++++++ 2 files changed, 75 insertions(+) create mode 100644 nlp/llm/llama3_8b/openorca/README.md create mode 100644 nlp/llm/llama3_8b/openorca/train_sft_llama.sh diff --git a/nlp/llm/llama3_8b/openorca/README.md b/nlp/llm/llama3_8b/openorca/README.md new file mode 100644 index 000000000..c1b126179 --- /dev/null +++ b/nlp/llm/llama3_8b/openorca/README.md @@ -0,0 +1,44 @@ +# Llama3-8B (OpenRLHF) + +## Model Description + +Llama3-8B is an advanced auto-regressive language model developed by Meta, featuring 8 billion parameters. It utilizes +an optimized transformer architecture with Grouped-Query Attention (GQA) for improved inference efficiency. Trained on +sequences of 8,192 tokens and using a 128K token vocabulary, it excels in various natural language tasks. The model +incorporates supervised fine-tuning (SFT) and reinforcement learning with human feedback (RLHF) to align with human +preferences, ensuring both helpfulness and safety in its responses. Llama3-8B offers state-of-the-art performance in +language understanding and generation. + +## Supported Environments + +| GPU | [IXUCA SDK](https://gitee.com/deep-spark/deepspark#%E5%A4%A9%E6%95%B0%E6%99%BA%E7%AE%97%E8%BD%AF%E4%BB%B6%E6%A0%88-ixuca) | Release | +| :----: | :----: | :----: | +| BI-V150 | 4.3.0 | 25.06 | + +## Model Preparation + +### Install OpenRLHF + +```sh +# install +git clone https://github.com/OpenRLHF/OpenRLHF.git -b v0.5.7 +cd OpenRLHF +pip install -e . +``` + +## Model Training + +```sh +# Make sure you have need 16 BI-V150 +cp *.sh OpenRLHF/examples/scripts/ +cd OpenRLHF/examples/scripts/ + +# train sft +bash train_sft_llama.sh +``` + +tips: 如果执行中遇到oom,可以适当降低下micro_train_batch_size + +## References + +- [OpenRLHF](https://github.com/OpenRLHF/OpenRLHF) diff --git a/nlp/llm/llama3_8b/openorca/train_sft_llama.sh b/nlp/llm/llama3_8b/openorca/train_sft_llama.sh new file mode 100644 index 000000000..5eb67be28 --- /dev/null +++ b/nlp/llm/llama3_8b/openorca/train_sft_llama.sh @@ -0,0 +1,31 @@ +set -x + +read -r -d '' training_commands < Date: Fri, 22 Aug 2025 17:12:44 +0800 Subject: [PATCH 2/2] refine pr 500 and add Mixtral-8x7B-v0.1 model --- nlp/llm/llama3_8b/openrlhf/README.md | 6 ++++ .../{openorca => openrlhf}/train_sft_llama.sh | 9 +++-- .../openorca => mixtral/openrlhf}/README.md | 33 ++++++++++--------- nlp/llm/mixtral/openrlhf/requirements.txt | 18 ++++++++++ .../openrlhf/train_sft_mixtral_lora.sh | 30 +++++++++++++++++ 5 files changed, 76 insertions(+), 20 deletions(-) rename nlp/llm/llama3_8b/{openorca => openrlhf}/train_sft_llama.sh (82%) rename nlp/llm/{llama3_8b/openorca => mixtral/openrlhf}/README.md (40%) create mode 100644 nlp/llm/mixtral/openrlhf/requirements.txt create mode 100644 nlp/llm/mixtral/openrlhf/train_sft_mixtral_lora.sh diff --git a/nlp/llm/llama3_8b/openrlhf/README.md b/nlp/llm/llama3_8b/openrlhf/README.md index e65b8c46b..3847d6585 100644 --- a/nlp/llm/llama3_8b/openrlhf/README.md +++ b/nlp/llm/llama3_8b/openrlhf/README.md @@ -13,6 +13,7 @@ language understanding and generation. | GPU | [IXUCA SDK](https://gitee.com/deep-spark/deepspark#%E5%A4%A9%E6%95%B0%E6%99%BA%E7%AE%97%E8%BD%AF%E4%BB%B6%E6%A0%88-ixuca) | Release | | :----: | :----: | :----: | +| BI-V150 | 4.3.0 | 25.09 | | BI-V150 | 4.2.0 | 25.06 | ## Model Preparation @@ -28,9 +29,11 @@ mkdir -p Dylan2048 # for dpo: OpenRLHF/preference_dataset_mixture2_and_safe_pku # for kto: Dylan2048/ultrafeedback-unpaired-preferences # for ppo: OpenRLHF/prompt-collection-v0.1 +# for sft: Open-Orca/OpenOrca # get pretrain model from huggingface: https://huggingface.co/OpenRLHF/Llama-3-8b-sft-mixture # get reward_pretrain model from huggingface: https://huggingface.co/OpenRLHF/Llama-3-8b-rm-mixture +# get model from https://huggingface.co/meta-llama/Meta-Llama-3-8B ``` ### Install Dependencies @@ -57,6 +60,9 @@ bash train_kto_llama.sh # train with ppo bash train_ppo_llama.sh +# train with sft +bash train_sft_llama.sh + # Tips: # If you throw out: FileNotFoundError: Directory OpenRLHF/prompt-collection-v0.1 is neither a `Dataset` directory nor a `DatasetDict` directory. # please modify OpenRLHF/openrlhf/utils/utils.py:76 `data = load_from_disk(dataset)` --> `data = load_dataset(dataset, data_dir=data_dir)` diff --git a/nlp/llm/llama3_8b/openorca/train_sft_llama.sh b/nlp/llm/llama3_8b/openrlhf/train_sft_llama.sh similarity index 82% rename from nlp/llm/llama3_8b/openorca/train_sft_llama.sh rename to nlp/llm/llama3_8b/openrlhf/train_sft_llama.sh index 5eb67be28..c532ee313 100644 --- a/nlp/llm/llama3_8b/openorca/train_sft_llama.sh +++ b/nlp/llm/llama3_8b/openrlhf/train_sft_llama.sh @@ -6,8 +6,8 @@ openrlhf.cli.train_sft \ --dataset Open-Orca/OpenOrca \ --input_key question \ --output_key response \ - --train_batch_size 256 \ - --micro_train_batch_size 2 \ + --train_batch_size 128 \ + --micro_train_batch_size 1 \ --max_samples 500000 \ --pretrain meta-llama/Meta-Llama-3-8B \ --save_path ./checkpoint/llama3-8b-sft \ @@ -17,10 +17,9 @@ openrlhf.cli.train_sft \ --zero_stage 2 \ --max_epochs 1 \ --bf16 \ - --attn_implementation flash_attention_2 \ + --flash_attn \ --learning_rate 5e-6 \ --load_checkpoint \ - --packing_samples \ --gradient_checkpointing EOF # --wandb [WANDB_TOKENS] @@ -28,4 +27,4 @@ EOF if [[ ${1} != "slurm" ]]; then deepspeed --module $training_commands -f \ No newline at end of file +fi \ No newline at end of file diff --git a/nlp/llm/llama3_8b/openorca/README.md b/nlp/llm/mixtral/openrlhf/README.md similarity index 40% rename from nlp/llm/llama3_8b/openorca/README.md rename to nlp/llm/mixtral/openrlhf/README.md index c1b126179..622b90b58 100644 --- a/nlp/llm/llama3_8b/openorca/README.md +++ b/nlp/llm/mixtral/openrlhf/README.md @@ -1,27 +1,34 @@ -# Llama3-8B (OpenRLHF) +# Mixtral 8x7B (OpenRLHF) ## Model Description -Llama3-8B is an advanced auto-regressive language model developed by Meta, featuring 8 billion parameters. It utilizes -an optimized transformer architecture with Grouped-Query Attention (GQA) for improved inference efficiency. Trained on -sequences of 8,192 tokens and using a 128K token vocabulary, it excels in various natural language tasks. The model -incorporates supervised fine-tuning (SFT) and reinforcement learning with human feedback (RLHF) to align with human -preferences, ensuring both helpfulness and safety in its responses. Llama3-8B offers state-of-the-art performance in -language understanding and generation. +The Mixtral model is a Mixture of Experts (MoE)-based large language model developed by Mistral AI, an innovative +company focusing on open-source AI models. Mixtral is designed to achieve high performance while maintaining +computational efficiency, making it an excellent choice for real-world applications. ## Supported Environments | GPU | [IXUCA SDK](https://gitee.com/deep-spark/deepspark#%E5%A4%A9%E6%95%B0%E6%99%BA%E7%AE%97%E8%BD%AF%E4%BB%B6%E6%A0%88-ixuca) | Release | | :----: | :----: | :----: | -| BI-V150 | 4.3.0 | 25.06 | +| BI-V150 | 4.3.0 | 25.09 | ## Model Preparation -### Install OpenRLHF +### Prepare Resources ```sh -# install git clone https://github.com/OpenRLHF/OpenRLHF.git -b v0.5.7 +cd examples/scripts/ +# get datasets from huggingface Open-Orca/OpenOrca +# get model from https://huggingface.co/mistralai/Mixtral-8x7B-v0.1 + +``` + +### Install Dependencies + +```sh +# install +cp requirements.txt OpenRLHF/requirements.txt cd OpenRLHF pip install -e . ``` @@ -32,13 +39,9 @@ pip install -e . # Make sure you have need 16 BI-V150 cp *.sh OpenRLHF/examples/scripts/ cd OpenRLHF/examples/scripts/ - -# train sft -bash train_sft_llama.sh +bash train_sft_mixtral_lora.sh ``` -tips: 如果执行中遇到oom,可以适当降低下micro_train_batch_size - ## References - [OpenRLHF](https://github.com/OpenRLHF/OpenRLHF) diff --git a/nlp/llm/mixtral/openrlhf/requirements.txt b/nlp/llm/mixtral/openrlhf/requirements.txt new file mode 100644 index 000000000..47aa5ab23 --- /dev/null +++ b/nlp/llm/mixtral/openrlhf/requirements.txt @@ -0,0 +1,18 @@ +accelerate +bitsandbytes +datasets +einops +isort +jsonlines +loralib +optimum +packaging +peft +ray +tensorboard +torch +torchmetrics +tqdm +transformers_stream_generator +wandb +wheel \ No newline at end of file diff --git a/nlp/llm/mixtral/openrlhf/train_sft_mixtral_lora.sh b/nlp/llm/mixtral/openrlhf/train_sft_mixtral_lora.sh new file mode 100644 index 000000000..f702d4554 --- /dev/null +++ b/nlp/llm/mixtral/openrlhf/train_sft_mixtral_lora.sh @@ -0,0 +1,30 @@ +set -x + +read -r -d '' training_commands <