diff --git a/nlp/llm/qwen1.5-7b/README.md b/nlp/llm/qwen1.5-7b/README.md new file mode 100644 index 0000000000000000000000000000000000000000..40bd566b9af5ff32303495a11aa3c982228c4167 --- /dev/null +++ b/nlp/llm/qwen1.5-7b/README.md @@ -0,0 +1,45 @@ +# Qwen1.5-7B + +## Model description + +Qwen1.5 is the beta version of Qwen2, a transformer-based decoder-only language model pretrained on a large amount of data. In comparison with the previous released Qwen, the improvements include:8 model sizes, including 0.5B, 1.8B, 4B, 7B, 14B, 32B and 72B dense models, and an MoE model of 14B with 2.7B activated;Significant performance improvement in Chat models;Multilingual support of both base and chat models;Stable support of 32K context length for models of all sizes;No need of trust_remote_code. + +## Step 1: Installation + +```bash +# install firefly +pushd /toolbox/firefly +pip3 install -r requirements.txt +python3 setup.py develop +popd +``` + +## Step 2: Preparing datasets and checkpoints + +```bash +mkdir -p /home/datasets/nlp +git clone -b school_math_0.25M git@gitee.com:sanghui-ilu/datasets.git +mv datasets/school_math_0.25M.jsonl /home/datasets/nlp +rm -rf datasets + +mkdir -p /home/model_zoo/nlp +pip install modelscope +python3 ./get_Qwen1.5-7B.py --model=Qwen1.5-7B +mv /root/.cache/modelscope/hub/qwen/Qwen1___5-7B /home/model_zoo/nlp +``` + +## Step 3: Training + +```bash +bash train.sh 16 configs/qwen-7b-sft-full.json full +``` + +## Results + +| No. | model | peft | num_gpus | train_samples_per_second | +| --- | ---------- | -------- | -------- | ------------------------ | +| 1 | Qwen1.5-7B | Full sft | 16 | 9.805 | + +## Reference + +- [Firefly](https://github.com/yangjianxin1/Firefly) diff --git a/nlp/llm/qwen1.5-7b/configs/ds_config/ds_z2_config.json b/nlp/llm/qwen1.5-7b/configs/ds_config/ds_z2_config.json new file mode 100644 index 0000000000000000000000000000000000000000..914e1368ed2bab7f94b3cbf4a20b225d504207ab --- /dev/null +++ b/nlp/llm/qwen1.5-7b/configs/ds_config/ds_z2_config.json @@ -0,0 +1,41 @@ +{ + "gradient_accumulation_steps": "auto", + "gradient_clipping": "auto", + "steps_per_print": 200, + "train_batch_size": "auto", + "train_micro_batch_size_per_gpu": "auto", + "wall_clock_breakdown": false, + + "optimizer": { + "type": "Adam", + "params": { + "lr": "auto", + "betas": "auto", + "eps": "auto", + "weight_decay": "auto" + } + }, + "fp16": { + "enabled": "auto", + "loss_scale": 0, + "loss_scale_window": 1000, + "initial_scale_power": 16, + "hysteresis": 2, + "min_loss_scale": 1 + }, + "zero_optimization": { + "stage": 2, + "allgather_partitions": true, + "allgather_bucket_size": 5e8, + "reduce_scatter": true, + "reduce_bucket_size": 5e8 + }, + "scheduler": { + "type": "WarmupLR", + "params": { + "warmup_min_lr": "auto", + "warmup_max_lr": "auto", + "warmup_num_steps": "auto" + } + } +} \ No newline at end of file diff --git a/nlp/llm/qwen1.5-7b/configs/ds_config/ds_z2_config_bf16.json b/nlp/llm/qwen1.5-7b/configs/ds_config/ds_z2_config_bf16.json new file mode 100644 index 0000000000000000000000000000000000000000..7bf2f8567e6433020a3cf924e2c450c89fa3a5f9 --- /dev/null +++ b/nlp/llm/qwen1.5-7b/configs/ds_config/ds_z2_config_bf16.json @@ -0,0 +1,36 @@ +{ + "gradient_accumulation_steps": "auto", + "gradient_clipping": "auto", + "steps_per_print": 200, + "train_batch_size": "auto", + "train_micro_batch_size_per_gpu": "auto", + "wall_clock_breakdown": false, + + "optimizer": { + "type": "Adam", + "params": { + "lr": "auto", + "betas": "auto", + "eps": "auto", + "weight_decay": "auto" + } + }, + "bf16": { + "enabled": true + }, + "zero_optimization": { + "stage": 2, + "allgather_partitions": true, + "allgather_bucket_size": 5e8, + "reduce_scatter": true, + "reduce_bucket_size": 5e8 + }, + "scheduler": { + "type": "WarmupLR", + "params": { + "warmup_min_lr": "auto", + "warmup_max_lr": "auto", + "warmup_num_steps": "auto" + } + } +} \ No newline at end of file diff --git a/nlp/llm/qwen1.5-7b/configs/ds_config/ds_z2_config_offload.json b/nlp/llm/qwen1.5-7b/configs/ds_config/ds_z2_config_offload.json new file mode 100644 index 0000000000000000000000000000000000000000..e752e410e80fd4a9dd0ffe6809a951228d89c97d --- /dev/null +++ b/nlp/llm/qwen1.5-7b/configs/ds_config/ds_z2_config_offload.json @@ -0,0 +1,49 @@ +{ + "gradient_accumulation_steps": "auto", + "gradient_clipping": "auto", + "steps_per_print": 200, + "train_batch_size": "auto", + "train_micro_batch_size_per_gpu": "auto", + "wall_clock_breakdown": false, + + "optimizer": { + "type": "Adam", + "params": { + "lr": "auto", + "betas": "auto", + "eps": "auto", + "weight_decay": "auto" + } + }, + "fp16": { + "enabled": "auto", + "loss_scale": 0, + "loss_scale_window": 1000, + "initial_scale_power": 16, + "hysteresis": 2, + "min_loss_scale": 1 + }, + "zero_optimization": { + "stage": 2, + "allgather_partitions": true, + "allgather_bucket_size": 5e8, + "reduce_scatter": true, + "reduce_bucket_size": 5e8, + "offload_optimizer": { + "device": "cpu", + "pin_memory": true + }, + "offload_param": { + "device": "cpu", + "pin_memory": true + } + }, + "scheduler": { + "type": "WarmupLR", + "params": { + "warmup_min_lr": "auto", + "warmup_max_lr": "auto", + "warmup_num_steps": "auto" + } + } +} \ No newline at end of file diff --git a/nlp/llm/qwen1.5-7b/configs/ds_config/ds_z3_config.json b/nlp/llm/qwen1.5-7b/configs/ds_config/ds_z3_config.json new file mode 100644 index 0000000000000000000000000000000000000000..18a0cee39ebc393d111e58c79579b7936301a4e6 --- /dev/null +++ b/nlp/llm/qwen1.5-7b/configs/ds_config/ds_z3_config.json @@ -0,0 +1,46 @@ +{ + "gradient_accumulation_steps": "auto", + "gradient_clipping": "auto", + "steps_per_print": 200, + "train_batch_size": "auto", + "train_micro_batch_size_per_gpu": "auto", + "wall_clock_breakdown": false, + + "optimizer": { + "type": "Adam", + "params": { + "lr": "auto", + "betas": "auto", + "eps": "auto", + "weight_decay": "auto" + } + }, + "fp16": { + "enabled": "auto", + "loss_scale": 0, + "loss_scale_window": 1000, + "initial_scale_power": 16, + "hysteresis": 2, + "min_loss_scale": 1 + }, + "zero_optimization": { + "stage": 3, + "overlap_comm": true, + "contiguous_gradients": true, + "sub_group_size": 1e9, + "reduce_bucket_size": "auto", + "stage3_prefetch_bucket_size": "auto", + "stage3_param_persistence_threshold": "auto", + "stage3_max_live_parameters": 1e9, + "stage3_max_reuse_distance": 1e9, + "stage3_gather_16bit_weights_on_model_save": true + }, + "scheduler": { + "type": "WarmupLR", + "params": { + "warmup_min_lr": "auto", + "warmup_max_lr": "auto", + "warmup_num_steps": "auto" + } + } +} \ No newline at end of file diff --git a/nlp/llm/qwen1.5-7b/configs/ds_config/ds_z3_config_bf16.json b/nlp/llm/qwen1.5-7b/configs/ds_config/ds_z3_config_bf16.json new file mode 100644 index 0000000000000000000000000000000000000000..b113d621a8678cc6c3dd674b667dc5d5da3d33d0 --- /dev/null +++ b/nlp/llm/qwen1.5-7b/configs/ds_config/ds_z3_config_bf16.json @@ -0,0 +1,41 @@ +{ + "gradient_accumulation_steps": "auto", + "gradient_clipping": "auto", + "steps_per_print": 200, + "train_batch_size": "auto", + "train_micro_batch_size_per_gpu": "auto", + "wall_clock_breakdown": false, + + "optimizer": { + "type": "Adam", + "params": { + "lr": "auto", + "betas": "auto", + "eps": "auto", + "weight_decay": "auto" + } + }, + "bf16": { + "enabled": true + }, + "zero_optimization": { + "stage": 3, + "overlap_comm": true, + "contiguous_gradients": true, + "sub_group_size": 1e9, + "reduce_bucket_size": "auto", + "stage3_prefetch_bucket_size": "auto", + "stage3_param_persistence_threshold": "auto", + "stage3_max_live_parameters": 1e9, + "stage3_max_reuse_distance": 1e9, + "stage3_gather_16bit_weights_on_model_save": true + }, + "scheduler": { + "type": "WarmupLR", + "params": { + "warmup_min_lr": "auto", + "warmup_max_lr": "auto", + "warmup_num_steps": "auto" + } + } +} \ No newline at end of file diff --git a/nlp/llm/qwen1.5-7b/configs/ds_config/ds_z3_config_offload.json b/nlp/llm/qwen1.5-7b/configs/ds_config/ds_z3_config_offload.json new file mode 100644 index 0000000000000000000000000000000000000000..15a8125c7014b3df651eda856392c4faaa233206 --- /dev/null +++ b/nlp/llm/qwen1.5-7b/configs/ds_config/ds_z3_config_offload.json @@ -0,0 +1,54 @@ +{ + "gradient_accumulation_steps": "auto", + "gradient_clipping": "auto", + "steps_per_print": 200, + "train_batch_size": "auto", + "train_micro_batch_size_per_gpu": "auto", + "wall_clock_breakdown": false, + + "optimizer": { + "type": "Adam", + "params": { + "lr": "auto", + "betas": "auto", + "eps": "auto", + "weight_decay": "auto" + } + }, + "fp16": { + "enabled": "auto", + "loss_scale": 0, + "loss_scale_window": 1000, + "initial_scale_power": 16, + "hysteresis": 2, + "min_loss_scale": 1 + }, + "zero_optimization": { + "stage": 3, + "overlap_comm": true, + "contiguous_gradients": true, + "sub_group_size": 1e9, + "reduce_bucket_size": "auto", + "stage3_prefetch_bucket_size": "auto", + "stage3_param_persistence_threshold": "auto", + "stage3_max_live_parameters": 1e9, + "stage3_max_reuse_distance": 1e9, + "stage3_gather_16bit_weights_on_model_save": true, + "offload_optimizer": { + "device": "cpu", + "pin_memory": true + }, + "offload_param": { + "device": "cpu", + "pin_memory": true + } + }, + "scheduler": { + "type": "WarmupLR", + "params": { + "warmup_min_lr": "auto", + "warmup_max_lr": "auto", + "warmup_num_steps": "auto" + } + } +} \ No newline at end of file diff --git a/nlp/llm/qwen1.5-7b/configs/qwen-7b-sft-full.json b/nlp/llm/qwen1.5-7b/configs/qwen-7b-sft-full.json new file mode 100644 index 0000000000000000000000000000000000000000..b5f826eaf82615ef53cac2fd1ea7feaaf260ab6c --- /dev/null +++ b/nlp/llm/qwen1.5-7b/configs/qwen-7b-sft-full.json @@ -0,0 +1,36 @@ +{ + "output_dir": "output/firefly-qwen-7b-full", + "model_name_or_path": "/home/model_zoo/nlp/Qwen1___5-7B", + "deepspeed": "configs/ds_config/ds_z3_config_bf16.json", + "train_file": "/home/datasets/nlp/school_math_0.25M.jsonl", + "num_train_epochs": 1, + "max_steps": 50, + "per_device_train_batch_size": 4, + "gradient_accumulation_steps": 4, + "learning_rate": 1e-5, + "max_seq_length": 2048, + "logging_steps": 200, + "save_steps": 500, + "save_total_limit": 1, + "lr_scheduler_type": "cosine", + "warmup_steps": 1000, + + "template_name": "llama2", + "peft_type": "full", + "task_type": "sft", + + "gradient_checkpointing": true, + "disable_tqdm": false, + "optim": "adamw_hf", + "seed": 42, + "bf16": true, + "report_to": "tensorboard", + "dataloader_num_workers": 5, + "save_strategy": "steps", + "weight_decay": 0, + "max_grad_norm": 1.0, + "remove_unused_columns": false +} + + + diff --git a/nlp/llm/qwen1.5-7b/configs/qwen-7b-sft-lora.json b/nlp/llm/qwen1.5-7b/configs/qwen-7b-sft-lora.json new file mode 100644 index 0000000000000000000000000000000000000000..be11e877cd87f6119e36aaad59e0e193684b61d1 --- /dev/null +++ b/nlp/llm/qwen1.5-7b/configs/qwen-7b-sft-lora.json @@ -0,0 +1,36 @@ +{ + "output_dir": "output/firefly-qwen-7b", + "model_name_or_path": "checkpoint/Qwen1.5-7B", + "train_file": "./data/school_math_0.25M.jsonl", + "num_train_epochs": 1, + "max_steps": 50, + "per_device_train_batch_size": 4, + "gradient_accumulation_steps": 2, + "learning_rate": 1e-4, + "max_seq_length": 1024, + "logging_steps": 300, + "save_steps": 500, + "save_total_limit": 1, + "lr_scheduler_type": "constant_with_warmup", + "warmup_steps": 3000, + + "template_name": "qwen", + "peft_type": "lora", + "task_type": "sft", + "lora_rank": 64, + "lora_alpha": 16, + "lora_dropout": 0.05, + + "gradient_checkpointing": true, + "disable_tqdm": false, + "optim": "adamw_hf", + "seed": 42, + "bf16": true, + "report_to": "tensorboard", + "dataloader_num_workers": 10, + "save_strategy": "steps", + "weight_decay": 0, + "max_grad_norm": 0.3, + "remove_unused_columns": false, + "gradient_checkpointing_kwargs": {"use_reentrant":false} +} diff --git a/nlp/llm/qwen1.5-7b/configs/qwen-7b-sft-ptuning.json b/nlp/llm/qwen1.5-7b/configs/qwen-7b-sft-ptuning.json new file mode 100644 index 0000000000000000000000000000000000000000..704851b62385eae3ca32b01d706ded73494b93f5 --- /dev/null +++ b/nlp/llm/qwen1.5-7b/configs/qwen-7b-sft-ptuning.json @@ -0,0 +1,36 @@ +{ + "output_dir": "output/firefly-qwen-7b", + "model_name_or_path": "checkpoint/Qwen1.5-7B", + "train_file": "./data/school_math_0.25M.jsonl", + "num_train_epochs": 1, + "per_device_train_batch_size": 4, + "gradient_accumulation_steps": 2, + "learning_rate": 1e-4, + "max_seq_length": 1024, + "logging_steps": 300, + "save_steps": 500, + "save_total_limit": 1, + "lr_scheduler_type": "constant_with_warmup", + "warmup_steps": 3000, + + "template_name": "qwen", + "peft_type": "ptuning", + "task_type": "sft", + "num_virtual_tokens": 20, + "num_attention_heads": 32, + "encoder_num_layers": 32, + "encoder_hidden_size": 768, + "token_dim": 4096, + + "gradient_checkpointing": true, + "disable_tqdm": false, + "optim": "adamw_hf", + "seed": 42, + "bf16": true, + "report_to": "tensorboard", + "dataloader_num_workers": 10, + "save_strategy": "steps", + "weight_decay": 0, + "max_grad_norm": 0.3, + "remove_unused_columns": false +} diff --git a/nlp/llm/qwen1.5-7b/configs/qwen-7b-sft-qlora.json b/nlp/llm/qwen1.5-7b/configs/qwen-7b-sft-qlora.json new file mode 100644 index 0000000000000000000000000000000000000000..86b84df98802c7693f0fceeacb344061fa6f22bd --- /dev/null +++ b/nlp/llm/qwen1.5-7b/configs/qwen-7b-sft-qlora.json @@ -0,0 +1,35 @@ +{ + "output_dir": "output/firefly-qwen-7b", + "model_name_or_path": "checkpoint/Qwen1.5-7B", + "train_file": "./data/school_math_0.25M.jsonl", + "num_train_epochs": 1, + "per_device_train_batch_size": 6, + "gradient_accumulation_steps": 2, + "learning_rate": 2e-4, + "max_seq_length": 1024, + "logging_steps": 300, + "save_steps": 500, + "save_total_limit": 1, + "lr_scheduler_type": "constant_with_warmup", + "warmup_steps": 3000, + + "template_name": "qwen", + "peft_type": "qlora", + "task_type": "sft", + "lora_rank": 64, + "lora_alpha": 16, + "lora_dropout": 0.05, + + "gradient_checkpointing": true, + "disable_tqdm": false, + "optim": "adamw_hf", + "seed": 42, + "fp16": true, + "report_to": "tensorboard", + "dataloader_num_workers": 0, + "save_strategy": "steps", + "weight_decay": 0, + "max_grad_norm": 0.3, + "remove_unused_columns": false, + "gradient_checkpointing_kwargs": {"use_reentrant":false} +} diff --git a/nlp/llm/qwen1.5-7b/get_Qwen1.5.py b/nlp/llm/qwen1.5-7b/get_Qwen1.5.py new file mode 100644 index 0000000000000000000000000000000000000000..6827407ac47e3f4f48439717a0862c1539bbc462 --- /dev/null +++ b/nlp/llm/qwen1.5-7b/get_Qwen1.5.py @@ -0,0 +1,17 @@ +#模型下载 +from modelscope import snapshot_download +import argparse + +def main(): + parser = argparse.ArgumentParser() + parser.add_argument('--model', default=None, type=str, help="appoint the model to download, can be Qwen1.5-7B or Qwen1.5-14B") + + args = parser.parse_args() + + if args.model == "Qwen1.5-7B": + model_dir = snapshot_download('qwen/Qwen1.5-7B') + elif args.model == "Qwen1.5-14B": + model_dir = snapshot_download('qwen/Qwen1.5-14B') + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/nlp/llm/qwen1.5-7b/main.py b/nlp/llm/qwen1.5-7b/main.py new file mode 100644 index 0000000000000000000000000000000000000000..c92544a85bedd50260d5d1bb9a7754dfe66067c3 --- /dev/null +++ b/nlp/llm/qwen1.5-7b/main.py @@ -0,0 +1,26 @@ +import sys +from loguru import logger +from firefly.train import setup_everything, init_components + + +def train(): + # 进行一些配置和检查 + args, training_args = setup_everything() + # 加载各种组件 + trainer = init_components(args, training_args) + # 开始训练 + logger.info("*** starting training ***") + # todo resume from checkpoint + # https://github.com/huggingface/transformers/issues/24252 + train_result = trainer.train() + # 保存最后的checkpoint + trainer.save_model(training_args.output_dir) # Saves the tokenizer too + # 保存训练指标 + metrics = train_result.metrics + trainer.log_metrics("train", metrics) + trainer.save_metrics("train", metrics) + trainer.save_state() + + +if __name__ == "__main__": + train() \ No newline at end of file diff --git a/nlp/llm/qwen1.5-7b/train.sh b/nlp/llm/qwen1.5-7b/train.sh new file mode 100644 index 0000000000000000000000000000000000000000..0bc978c255dd38a1947773905e3536ccc7b60af5 --- /dev/null +++ b/nlp/llm/qwen1.5-7b/train.sh @@ -0,0 +1,49 @@ +#!/bin/bash + +# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +MASTER_PORT=$(shuf -n 1 -i 10000-65535) +NUM_GPUS=$1 +CONFIG_FILE=$2 +PEFT=$3 +MODELS_DIR="$PWD/models" +CHECKPOINT_DIR="$PWD/checkpoint" + +# 把模型结构拷贝到对应预训练文件路径,方便加载时使用 +for source_dir in "$MODELS_DIR"/*; do + if [ -d "$source_dir" ]; then + source_dir_name=$(basename "$source_dir") + + for dest_dir in "$CHECKPOINT_DIR"/*; do + if [ -d "$dest_dir" ]; then + dest_dir_name=$(basename "$dest_dir") + + if [[ "$dest_dir_name" == *"$source_dir_name"* ]]; then + echo "Copying files from $source_dir to $dest_dir" + cp -r "$source_dir"/* "$dest_dir" + fi + fi + done + fi +done + + +echo "==> Training with $NUM_GPUS gpus | config=$CONFIG_FILE | peft=$PEFT" + +torchrun --master_port $MASTER_PORT --nproc_per_node=$NUM_GPUS main.py --train_args_file $CONFIG_FILE --peft_type $PEFT + +# for example +# bash run_peft.sh 1 configs/llama2-sft-qlora.json qlora \ No newline at end of file