From 3a186d66b7c3e8f73c9ddf8b94d3eea2759bd424 Mon Sep 17 00:00:00 2001 From: sanghui_ilu Date: Sat, 20 Jul 2024 19:09:38 +0800 Subject: [PATCH 1/4] add Qwen1.5-7B model link #IA9UFQ add Qwen1.5-7B model Signed-off-by: sanghui_ilu --- nlp/llm/Qwen1.5-7B/README.md | 44 +++++++++++++++ .../configs/ds_config/ds_z2_config.json | 41 ++++++++++++++ .../configs/ds_config/ds_z2_config_bf16.json | 36 +++++++++++++ .../ds_config/ds_z2_config_offload.json | 49 +++++++++++++++++ .../configs/ds_config/ds_z3_config.json | 46 ++++++++++++++++ .../configs/ds_config/ds_z3_config_bf16.json | 41 ++++++++++++++ .../ds_config/ds_z3_config_offload.json | 54 +++++++++++++++++++ .../Qwen1.5-7B/configs/qwen-7b-sft-full.json | 36 +++++++++++++ .../Qwen1.5-7B/configs/qwen-7b-sft-lora.json | 36 +++++++++++++ .../configs/qwen-7b-sft-ptuning.json | 36 +++++++++++++ .../Qwen1.5-7B/configs/qwen-7b-sft-qlora.json | 35 ++++++++++++ nlp/llm/Qwen1.5-7B/get_Qwen1.5.py | 17 ++++++ nlp/llm/Qwen1.5-7B/main.py | 26 +++++++++ nlp/llm/Qwen1.5-7B/train.sh | 34 ++++++++++++ 14 files changed, 531 insertions(+) create mode 100644 nlp/llm/Qwen1.5-7B/README.md create mode 100644 nlp/llm/Qwen1.5-7B/configs/ds_config/ds_z2_config.json create mode 100644 nlp/llm/Qwen1.5-7B/configs/ds_config/ds_z2_config_bf16.json create mode 100644 nlp/llm/Qwen1.5-7B/configs/ds_config/ds_z2_config_offload.json create mode 100644 nlp/llm/Qwen1.5-7B/configs/ds_config/ds_z3_config.json create mode 100644 nlp/llm/Qwen1.5-7B/configs/ds_config/ds_z3_config_bf16.json create mode 100644 nlp/llm/Qwen1.5-7B/configs/ds_config/ds_z3_config_offload.json create mode 100644 nlp/llm/Qwen1.5-7B/configs/qwen-7b-sft-full.json create mode 100644 nlp/llm/Qwen1.5-7B/configs/qwen-7b-sft-lora.json create mode 100644 nlp/llm/Qwen1.5-7B/configs/qwen-7b-sft-ptuning.json create mode 100644 nlp/llm/Qwen1.5-7B/configs/qwen-7b-sft-qlora.json create mode 100644 nlp/llm/Qwen1.5-7B/get_Qwen1.5.py create mode 100644 nlp/llm/Qwen1.5-7B/main.py create mode 100644 nlp/llm/Qwen1.5-7B/train.sh diff --git a/nlp/llm/Qwen1.5-7B/README.md b/nlp/llm/Qwen1.5-7B/README.md new file mode 100644 index 000000000..dff61d9a0 --- /dev/null +++ b/nlp/llm/Qwen1.5-7B/README.md @@ -0,0 +1,44 @@ +# Qwen1.5-7B + +## Model description +Qwen1.5 is the beta version of Qwen2, a transformer-based decoder-only language model pretrained on a large amount of data. In comparison with the previous released Qwen, the improvements include:8 model sizes, including 0.5B, 1.8B, 4B, 7B, 14B, 32B and 72B dense models, and an MoE model of 14B with 2.7B activated;Significant performance improvement in Chat models;Multilingual support of both base and chat models;Stable support of 32K context length for models of all sizes;No need of trust_remote_code. + + +## Step 1: Installation + +```bash +# install firefly +pushd /toolbox/firefly +pip3 install -r requirements.txt +python3 setup.py develop +popd +``` + +## Step 2: Preparing datasets and checkpoints + +```bash +mkdir -p /home/datasets/nlp +git clone -b school_math_0.25M git@gitee.com:sanghui-ilu/datasets.git +mv datasets/school_math_0.25M.jsonl /home/datasets/nlp +rm -rf datasets + +mkdir -p /home/model_zoo/nlp +pip install modelscope +python3 ./get_Qwen1.5-7B.py --model=Qwen1.5-7B +mv /root/.cache/modelscope/hub/qwen/Qwen1___5-7B /home/model_zoo/nlp +``` + +## Step 3: Training +```bash +$ bash train.sh 16 configs/qwen-7b-sft-full.json full +``` + +## Results + +| No. | model | peft | num_gpus |train_samples_per_second | +| ---- | --------- | ----------- | ------------------ | ---------------------- | +| 1 | qwn-7B | Full sft | 16 | 9.805 | + +## Reference + +- [Firefly](https://github.com/yangjianxin1/Firefly) diff --git a/nlp/llm/Qwen1.5-7B/configs/ds_config/ds_z2_config.json b/nlp/llm/Qwen1.5-7B/configs/ds_config/ds_z2_config.json new file mode 100644 index 000000000..914e1368e --- /dev/null +++ b/nlp/llm/Qwen1.5-7B/configs/ds_config/ds_z2_config.json @@ -0,0 +1,41 @@ +{ + "gradient_accumulation_steps": "auto", + "gradient_clipping": "auto", + "steps_per_print": 200, + "train_batch_size": "auto", + "train_micro_batch_size_per_gpu": "auto", + "wall_clock_breakdown": false, + + "optimizer": { + "type": "Adam", + "params": { + "lr": "auto", + "betas": "auto", + "eps": "auto", + "weight_decay": "auto" + } + }, + "fp16": { + "enabled": "auto", + "loss_scale": 0, + "loss_scale_window": 1000, + "initial_scale_power": 16, + "hysteresis": 2, + "min_loss_scale": 1 + }, + "zero_optimization": { + "stage": 2, + "allgather_partitions": true, + "allgather_bucket_size": 5e8, + "reduce_scatter": true, + "reduce_bucket_size": 5e8 + }, + "scheduler": { + "type": "WarmupLR", + "params": { + "warmup_min_lr": "auto", + "warmup_max_lr": "auto", + "warmup_num_steps": "auto" + } + } +} \ No newline at end of file diff --git a/nlp/llm/Qwen1.5-7B/configs/ds_config/ds_z2_config_bf16.json b/nlp/llm/Qwen1.5-7B/configs/ds_config/ds_z2_config_bf16.json new file mode 100644 index 000000000..7bf2f8567 --- /dev/null +++ b/nlp/llm/Qwen1.5-7B/configs/ds_config/ds_z2_config_bf16.json @@ -0,0 +1,36 @@ +{ + "gradient_accumulation_steps": "auto", + "gradient_clipping": "auto", + "steps_per_print": 200, + "train_batch_size": "auto", + "train_micro_batch_size_per_gpu": "auto", + "wall_clock_breakdown": false, + + "optimizer": { + "type": "Adam", + "params": { + "lr": "auto", + "betas": "auto", + "eps": "auto", + "weight_decay": "auto" + } + }, + "bf16": { + "enabled": true + }, + "zero_optimization": { + "stage": 2, + "allgather_partitions": true, + "allgather_bucket_size": 5e8, + "reduce_scatter": true, + "reduce_bucket_size": 5e8 + }, + "scheduler": { + "type": "WarmupLR", + "params": { + "warmup_min_lr": "auto", + "warmup_max_lr": "auto", + "warmup_num_steps": "auto" + } + } +} \ No newline at end of file diff --git a/nlp/llm/Qwen1.5-7B/configs/ds_config/ds_z2_config_offload.json b/nlp/llm/Qwen1.5-7B/configs/ds_config/ds_z2_config_offload.json new file mode 100644 index 000000000..e752e410e --- /dev/null +++ b/nlp/llm/Qwen1.5-7B/configs/ds_config/ds_z2_config_offload.json @@ -0,0 +1,49 @@ +{ + "gradient_accumulation_steps": "auto", + "gradient_clipping": "auto", + "steps_per_print": 200, + "train_batch_size": "auto", + "train_micro_batch_size_per_gpu": "auto", + "wall_clock_breakdown": false, + + "optimizer": { + "type": "Adam", + "params": { + "lr": "auto", + "betas": "auto", + "eps": "auto", + "weight_decay": "auto" + } + }, + "fp16": { + "enabled": "auto", + "loss_scale": 0, + "loss_scale_window": 1000, + "initial_scale_power": 16, + "hysteresis": 2, + "min_loss_scale": 1 + }, + "zero_optimization": { + "stage": 2, + "allgather_partitions": true, + "allgather_bucket_size": 5e8, + "reduce_scatter": true, + "reduce_bucket_size": 5e8, + "offload_optimizer": { + "device": "cpu", + "pin_memory": true + }, + "offload_param": { + "device": "cpu", + "pin_memory": true + } + }, + "scheduler": { + "type": "WarmupLR", + "params": { + "warmup_min_lr": "auto", + "warmup_max_lr": "auto", + "warmup_num_steps": "auto" + } + } +} \ No newline at end of file diff --git a/nlp/llm/Qwen1.5-7B/configs/ds_config/ds_z3_config.json b/nlp/llm/Qwen1.5-7B/configs/ds_config/ds_z3_config.json new file mode 100644 index 000000000..18a0cee39 --- /dev/null +++ b/nlp/llm/Qwen1.5-7B/configs/ds_config/ds_z3_config.json @@ -0,0 +1,46 @@ +{ + "gradient_accumulation_steps": "auto", + "gradient_clipping": "auto", + "steps_per_print": 200, + "train_batch_size": "auto", + "train_micro_batch_size_per_gpu": "auto", + "wall_clock_breakdown": false, + + "optimizer": { + "type": "Adam", + "params": { + "lr": "auto", + "betas": "auto", + "eps": "auto", + "weight_decay": "auto" + } + }, + "fp16": { + "enabled": "auto", + "loss_scale": 0, + "loss_scale_window": 1000, + "initial_scale_power": 16, + "hysteresis": 2, + "min_loss_scale": 1 + }, + "zero_optimization": { + "stage": 3, + "overlap_comm": true, + "contiguous_gradients": true, + "sub_group_size": 1e9, + "reduce_bucket_size": "auto", + "stage3_prefetch_bucket_size": "auto", + "stage3_param_persistence_threshold": "auto", + "stage3_max_live_parameters": 1e9, + "stage3_max_reuse_distance": 1e9, + "stage3_gather_16bit_weights_on_model_save": true + }, + "scheduler": { + "type": "WarmupLR", + "params": { + "warmup_min_lr": "auto", + "warmup_max_lr": "auto", + "warmup_num_steps": "auto" + } + } +} \ No newline at end of file diff --git a/nlp/llm/Qwen1.5-7B/configs/ds_config/ds_z3_config_bf16.json b/nlp/llm/Qwen1.5-7B/configs/ds_config/ds_z3_config_bf16.json new file mode 100644 index 000000000..b113d621a --- /dev/null +++ b/nlp/llm/Qwen1.5-7B/configs/ds_config/ds_z3_config_bf16.json @@ -0,0 +1,41 @@ +{ + "gradient_accumulation_steps": "auto", + "gradient_clipping": "auto", + "steps_per_print": 200, + "train_batch_size": "auto", + "train_micro_batch_size_per_gpu": "auto", + "wall_clock_breakdown": false, + + "optimizer": { + "type": "Adam", + "params": { + "lr": "auto", + "betas": "auto", + "eps": "auto", + "weight_decay": "auto" + } + }, + "bf16": { + "enabled": true + }, + "zero_optimization": { + "stage": 3, + "overlap_comm": true, + "contiguous_gradients": true, + "sub_group_size": 1e9, + "reduce_bucket_size": "auto", + "stage3_prefetch_bucket_size": "auto", + "stage3_param_persistence_threshold": "auto", + "stage3_max_live_parameters": 1e9, + "stage3_max_reuse_distance": 1e9, + "stage3_gather_16bit_weights_on_model_save": true + }, + "scheduler": { + "type": "WarmupLR", + "params": { + "warmup_min_lr": "auto", + "warmup_max_lr": "auto", + "warmup_num_steps": "auto" + } + } +} \ No newline at end of file diff --git a/nlp/llm/Qwen1.5-7B/configs/ds_config/ds_z3_config_offload.json b/nlp/llm/Qwen1.5-7B/configs/ds_config/ds_z3_config_offload.json new file mode 100644 index 000000000..15a8125c7 --- /dev/null +++ b/nlp/llm/Qwen1.5-7B/configs/ds_config/ds_z3_config_offload.json @@ -0,0 +1,54 @@ +{ + "gradient_accumulation_steps": "auto", + "gradient_clipping": "auto", + "steps_per_print": 200, + "train_batch_size": "auto", + "train_micro_batch_size_per_gpu": "auto", + "wall_clock_breakdown": false, + + "optimizer": { + "type": "Adam", + "params": { + "lr": "auto", + "betas": "auto", + "eps": "auto", + "weight_decay": "auto" + } + }, + "fp16": { + "enabled": "auto", + "loss_scale": 0, + "loss_scale_window": 1000, + "initial_scale_power": 16, + "hysteresis": 2, + "min_loss_scale": 1 + }, + "zero_optimization": { + "stage": 3, + "overlap_comm": true, + "contiguous_gradients": true, + "sub_group_size": 1e9, + "reduce_bucket_size": "auto", + "stage3_prefetch_bucket_size": "auto", + "stage3_param_persistence_threshold": "auto", + "stage3_max_live_parameters": 1e9, + "stage3_max_reuse_distance": 1e9, + "stage3_gather_16bit_weights_on_model_save": true, + "offload_optimizer": { + "device": "cpu", + "pin_memory": true + }, + "offload_param": { + "device": "cpu", + "pin_memory": true + } + }, + "scheduler": { + "type": "WarmupLR", + "params": { + "warmup_min_lr": "auto", + "warmup_max_lr": "auto", + "warmup_num_steps": "auto" + } + } +} \ No newline at end of file diff --git a/nlp/llm/Qwen1.5-7B/configs/qwen-7b-sft-full.json b/nlp/llm/Qwen1.5-7B/configs/qwen-7b-sft-full.json new file mode 100644 index 000000000..b5f826eaf --- /dev/null +++ b/nlp/llm/Qwen1.5-7B/configs/qwen-7b-sft-full.json @@ -0,0 +1,36 @@ +{ + "output_dir": "output/firefly-qwen-7b-full", + "model_name_or_path": "/home/model_zoo/nlp/Qwen1___5-7B", + "deepspeed": "configs/ds_config/ds_z3_config_bf16.json", + "train_file": "/home/datasets/nlp/school_math_0.25M.jsonl", + "num_train_epochs": 1, + "max_steps": 50, + "per_device_train_batch_size": 4, + "gradient_accumulation_steps": 4, + "learning_rate": 1e-5, + "max_seq_length": 2048, + "logging_steps": 200, + "save_steps": 500, + "save_total_limit": 1, + "lr_scheduler_type": "cosine", + "warmup_steps": 1000, + + "template_name": "llama2", + "peft_type": "full", + "task_type": "sft", + + "gradient_checkpointing": true, + "disable_tqdm": false, + "optim": "adamw_hf", + "seed": 42, + "bf16": true, + "report_to": "tensorboard", + "dataloader_num_workers": 5, + "save_strategy": "steps", + "weight_decay": 0, + "max_grad_norm": 1.0, + "remove_unused_columns": false +} + + + diff --git a/nlp/llm/Qwen1.5-7B/configs/qwen-7b-sft-lora.json b/nlp/llm/Qwen1.5-7B/configs/qwen-7b-sft-lora.json new file mode 100644 index 000000000..be11e877c --- /dev/null +++ b/nlp/llm/Qwen1.5-7B/configs/qwen-7b-sft-lora.json @@ -0,0 +1,36 @@ +{ + "output_dir": "output/firefly-qwen-7b", + "model_name_or_path": "checkpoint/Qwen1.5-7B", + "train_file": "./data/school_math_0.25M.jsonl", + "num_train_epochs": 1, + "max_steps": 50, + "per_device_train_batch_size": 4, + "gradient_accumulation_steps": 2, + "learning_rate": 1e-4, + "max_seq_length": 1024, + "logging_steps": 300, + "save_steps": 500, + "save_total_limit": 1, + "lr_scheduler_type": "constant_with_warmup", + "warmup_steps": 3000, + + "template_name": "qwen", + "peft_type": "lora", + "task_type": "sft", + "lora_rank": 64, + "lora_alpha": 16, + "lora_dropout": 0.05, + + "gradient_checkpointing": true, + "disable_tqdm": false, + "optim": "adamw_hf", + "seed": 42, + "bf16": true, + "report_to": "tensorboard", + "dataloader_num_workers": 10, + "save_strategy": "steps", + "weight_decay": 0, + "max_grad_norm": 0.3, + "remove_unused_columns": false, + "gradient_checkpointing_kwargs": {"use_reentrant":false} +} diff --git a/nlp/llm/Qwen1.5-7B/configs/qwen-7b-sft-ptuning.json b/nlp/llm/Qwen1.5-7B/configs/qwen-7b-sft-ptuning.json new file mode 100644 index 000000000..704851b62 --- /dev/null +++ b/nlp/llm/Qwen1.5-7B/configs/qwen-7b-sft-ptuning.json @@ -0,0 +1,36 @@ +{ + "output_dir": "output/firefly-qwen-7b", + "model_name_or_path": "checkpoint/Qwen1.5-7B", + "train_file": "./data/school_math_0.25M.jsonl", + "num_train_epochs": 1, + "per_device_train_batch_size": 4, + "gradient_accumulation_steps": 2, + "learning_rate": 1e-4, + "max_seq_length": 1024, + "logging_steps": 300, + "save_steps": 500, + "save_total_limit": 1, + "lr_scheduler_type": "constant_with_warmup", + "warmup_steps": 3000, + + "template_name": "qwen", + "peft_type": "ptuning", + "task_type": "sft", + "num_virtual_tokens": 20, + "num_attention_heads": 32, + "encoder_num_layers": 32, + "encoder_hidden_size": 768, + "token_dim": 4096, + + "gradient_checkpointing": true, + "disable_tqdm": false, + "optim": "adamw_hf", + "seed": 42, + "bf16": true, + "report_to": "tensorboard", + "dataloader_num_workers": 10, + "save_strategy": "steps", + "weight_decay": 0, + "max_grad_norm": 0.3, + "remove_unused_columns": false +} diff --git a/nlp/llm/Qwen1.5-7B/configs/qwen-7b-sft-qlora.json b/nlp/llm/Qwen1.5-7B/configs/qwen-7b-sft-qlora.json new file mode 100644 index 000000000..86b84df98 --- /dev/null +++ b/nlp/llm/Qwen1.5-7B/configs/qwen-7b-sft-qlora.json @@ -0,0 +1,35 @@ +{ + "output_dir": "output/firefly-qwen-7b", + "model_name_or_path": "checkpoint/Qwen1.5-7B", + "train_file": "./data/school_math_0.25M.jsonl", + "num_train_epochs": 1, + "per_device_train_batch_size": 6, + "gradient_accumulation_steps": 2, + "learning_rate": 2e-4, + "max_seq_length": 1024, + "logging_steps": 300, + "save_steps": 500, + "save_total_limit": 1, + "lr_scheduler_type": "constant_with_warmup", + "warmup_steps": 3000, + + "template_name": "qwen", + "peft_type": "qlora", + "task_type": "sft", + "lora_rank": 64, + "lora_alpha": 16, + "lora_dropout": 0.05, + + "gradient_checkpointing": true, + "disable_tqdm": false, + "optim": "adamw_hf", + "seed": 42, + "fp16": true, + "report_to": "tensorboard", + "dataloader_num_workers": 0, + "save_strategy": "steps", + "weight_decay": 0, + "max_grad_norm": 0.3, + "remove_unused_columns": false, + "gradient_checkpointing_kwargs": {"use_reentrant":false} +} diff --git a/nlp/llm/Qwen1.5-7B/get_Qwen1.5.py b/nlp/llm/Qwen1.5-7B/get_Qwen1.5.py new file mode 100644 index 000000000..6827407ac --- /dev/null +++ b/nlp/llm/Qwen1.5-7B/get_Qwen1.5.py @@ -0,0 +1,17 @@ +#模型下载 +from modelscope import snapshot_download +import argparse + +def main(): + parser = argparse.ArgumentParser() + parser.add_argument('--model', default=None, type=str, help="appoint the model to download, can be Qwen1.5-7B or Qwen1.5-14B") + + args = parser.parse_args() + + if args.model == "Qwen1.5-7B": + model_dir = snapshot_download('qwen/Qwen1.5-7B') + elif args.model == "Qwen1.5-14B": + model_dir = snapshot_download('qwen/Qwen1.5-14B') + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/nlp/llm/Qwen1.5-7B/main.py b/nlp/llm/Qwen1.5-7B/main.py new file mode 100644 index 000000000..c92544a85 --- /dev/null +++ b/nlp/llm/Qwen1.5-7B/main.py @@ -0,0 +1,26 @@ +import sys +from loguru import logger +from firefly.train import setup_everything, init_components + + +def train(): + # 进行一些配置和检查 + args, training_args = setup_everything() + # 加载各种组件 + trainer = init_components(args, training_args) + # 开始训练 + logger.info("*** starting training ***") + # todo resume from checkpoint + # https://github.com/huggingface/transformers/issues/24252 + train_result = trainer.train() + # 保存最后的checkpoint + trainer.save_model(training_args.output_dir) # Saves the tokenizer too + # 保存训练指标 + metrics = train_result.metrics + trainer.log_metrics("train", metrics) + trainer.save_metrics("train", metrics) + trainer.save_state() + + +if __name__ == "__main__": + train() \ No newline at end of file diff --git a/nlp/llm/Qwen1.5-7B/train.sh b/nlp/llm/Qwen1.5-7B/train.sh new file mode 100644 index 000000000..7920b53a6 --- /dev/null +++ b/nlp/llm/Qwen1.5-7B/train.sh @@ -0,0 +1,34 @@ +#!/bin/bash + +MASTER_PORT=$(shuf -n 1 -i 10000-65535) +NUM_GPUS=$1 +CONFIG_FILE=$2 +PEFT=$3 +MODELS_DIR="$PWD/models" +CHECKPOINT_DIR="$PWD/checkpoint" + +# 把模型结构拷贝到对应预训练文件路径,方便加载时使用 +for source_dir in "$MODELS_DIR"/*; do + if [ -d "$source_dir" ]; then + source_dir_name=$(basename "$source_dir") + + for dest_dir in "$CHECKPOINT_DIR"/*; do + if [ -d "$dest_dir" ]; then + dest_dir_name=$(basename "$dest_dir") + + if [[ "$dest_dir_name" == *"$source_dir_name"* ]]; then + echo "Copying files from $source_dir to $dest_dir" + cp -r "$source_dir"/* "$dest_dir" + fi + fi + done + fi +done + + +echo "==> Training with $NUM_GPUS gpus | config=$CONFIG_FILE | peft=$PEFT" + +torchrun --master_port $MASTER_PORT --nproc_per_node=$NUM_GPUS main.py --train_args_file $CONFIG_FILE --peft_type $PEFT + +# for example +# bash run_peft.sh 1 configs/llama2-sft-qlora.json qlora \ No newline at end of file -- Gitee From 28c8a5ec5b663ec91324a658f0fd0bc9ac787b7a Mon Sep 17 00:00:00 2001 From: majorli Date: Thu, 1 Aug 2024 16:59:29 +0800 Subject: [PATCH 2/4] format qwen1.5 readme docs Signed-off-by: majorli --- nlp/llm/Qwen1.5-7B/README.md | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/nlp/llm/Qwen1.5-7B/README.md b/nlp/llm/Qwen1.5-7B/README.md index dff61d9a0..40bd566b9 100644 --- a/nlp/llm/Qwen1.5-7B/README.md +++ b/nlp/llm/Qwen1.5-7B/README.md @@ -1,8 +1,8 @@ # Qwen1.5-7B ## Model description -Qwen1.5 is the beta version of Qwen2, a transformer-based decoder-only language model pretrained on a large amount of data. In comparison with the previous released Qwen, the improvements include:8 model sizes, including 0.5B, 1.8B, 4B, 7B, 14B, 32B and 72B dense models, and an MoE model of 14B with 2.7B activated;Significant performance improvement in Chat models;Multilingual support of both base and chat models;Stable support of 32K context length for models of all sizes;No need of trust_remote_code. +Qwen1.5 is the beta version of Qwen2, a transformer-based decoder-only language model pretrained on a large amount of data. In comparison with the previous released Qwen, the improvements include:8 model sizes, including 0.5B, 1.8B, 4B, 7B, 14B, 32B and 72B dense models, and an MoE model of 14B with 2.7B activated;Significant performance improvement in Chat models;Multilingual support of both base and chat models;Stable support of 32K context length for models of all sizes;No need of trust_remote_code. ## Step 1: Installation @@ -29,15 +29,16 @@ mv /root/.cache/modelscope/hub/qwen/Qwen1___5-7B /home/model_zoo/nlp ``` ## Step 3: Training + ```bash -$ bash train.sh 16 configs/qwen-7b-sft-full.json full +bash train.sh 16 configs/qwen-7b-sft-full.json full ``` ## Results -| No. | model | peft | num_gpus |train_samples_per_second | -| ---- | --------- | ----------- | ------------------ | ---------------------- | -| 1 | qwn-7B | Full sft | 16 | 9.805 | +| No. | model | peft | num_gpus | train_samples_per_second | +| --- | ---------- | -------- | -------- | ------------------------ | +| 1 | Qwen1.5-7B | Full sft | 16 | 9.805 | ## Reference -- Gitee From 4d251fc404a48e4ca793065f0c2874154ba54fef Mon Sep 17 00:00:00 2001 From: majorli Date: Thu, 1 Aug 2024 17:01:23 +0800 Subject: [PATCH 3/4] rename qwen1.5 7b location Signed-off-by: majorli --- nlp/llm/{Qwen1.5-7B => qwen1.5-7b}/README.md | 0 .../configs/ds_config/ds_z2_config.json | 0 .../configs/ds_config/ds_z2_config_bf16.json | 0 .../configs/ds_config/ds_z2_config_offload.json | 0 .../configs/ds_config/ds_z3_config.json | 0 .../configs/ds_config/ds_z3_config_bf16.json | 0 .../configs/ds_config/ds_z3_config_offload.json | 0 nlp/llm/{Qwen1.5-7B => qwen1.5-7b}/configs/qwen-7b-sft-full.json | 0 nlp/llm/{Qwen1.5-7B => qwen1.5-7b}/configs/qwen-7b-sft-lora.json | 0 .../{Qwen1.5-7B => qwen1.5-7b}/configs/qwen-7b-sft-ptuning.json | 0 nlp/llm/{Qwen1.5-7B => qwen1.5-7b}/configs/qwen-7b-sft-qlora.json | 0 nlp/llm/{Qwen1.5-7B => qwen1.5-7b}/get_Qwen1.5.py | 0 nlp/llm/{Qwen1.5-7B => qwen1.5-7b}/main.py | 0 nlp/llm/{Qwen1.5-7B => qwen1.5-7b}/train.sh | 0 14 files changed, 0 insertions(+), 0 deletions(-) rename nlp/llm/{Qwen1.5-7B => qwen1.5-7b}/README.md (100%) rename nlp/llm/{Qwen1.5-7B => qwen1.5-7b}/configs/ds_config/ds_z2_config.json (100%) rename nlp/llm/{Qwen1.5-7B => qwen1.5-7b}/configs/ds_config/ds_z2_config_bf16.json (100%) rename nlp/llm/{Qwen1.5-7B => qwen1.5-7b}/configs/ds_config/ds_z2_config_offload.json (100%) rename nlp/llm/{Qwen1.5-7B => qwen1.5-7b}/configs/ds_config/ds_z3_config.json (100%) rename nlp/llm/{Qwen1.5-7B => qwen1.5-7b}/configs/ds_config/ds_z3_config_bf16.json (100%) rename nlp/llm/{Qwen1.5-7B => qwen1.5-7b}/configs/ds_config/ds_z3_config_offload.json (100%) rename nlp/llm/{Qwen1.5-7B => qwen1.5-7b}/configs/qwen-7b-sft-full.json (100%) rename nlp/llm/{Qwen1.5-7B => qwen1.5-7b}/configs/qwen-7b-sft-lora.json (100%) rename nlp/llm/{Qwen1.5-7B => qwen1.5-7b}/configs/qwen-7b-sft-ptuning.json (100%) rename nlp/llm/{Qwen1.5-7B => qwen1.5-7b}/configs/qwen-7b-sft-qlora.json (100%) rename nlp/llm/{Qwen1.5-7B => qwen1.5-7b}/get_Qwen1.5.py (100%) rename nlp/llm/{Qwen1.5-7B => qwen1.5-7b}/main.py (100%) rename nlp/llm/{Qwen1.5-7B => qwen1.5-7b}/train.sh (100%) diff --git a/nlp/llm/Qwen1.5-7B/README.md b/nlp/llm/qwen1.5-7b/README.md similarity index 100% rename from nlp/llm/Qwen1.5-7B/README.md rename to nlp/llm/qwen1.5-7b/README.md diff --git a/nlp/llm/Qwen1.5-7B/configs/ds_config/ds_z2_config.json b/nlp/llm/qwen1.5-7b/configs/ds_config/ds_z2_config.json similarity index 100% rename from nlp/llm/Qwen1.5-7B/configs/ds_config/ds_z2_config.json rename to nlp/llm/qwen1.5-7b/configs/ds_config/ds_z2_config.json diff --git a/nlp/llm/Qwen1.5-7B/configs/ds_config/ds_z2_config_bf16.json b/nlp/llm/qwen1.5-7b/configs/ds_config/ds_z2_config_bf16.json similarity index 100% rename from nlp/llm/Qwen1.5-7B/configs/ds_config/ds_z2_config_bf16.json rename to nlp/llm/qwen1.5-7b/configs/ds_config/ds_z2_config_bf16.json diff --git a/nlp/llm/Qwen1.5-7B/configs/ds_config/ds_z2_config_offload.json b/nlp/llm/qwen1.5-7b/configs/ds_config/ds_z2_config_offload.json similarity index 100% rename from nlp/llm/Qwen1.5-7B/configs/ds_config/ds_z2_config_offload.json rename to nlp/llm/qwen1.5-7b/configs/ds_config/ds_z2_config_offload.json diff --git a/nlp/llm/Qwen1.5-7B/configs/ds_config/ds_z3_config.json b/nlp/llm/qwen1.5-7b/configs/ds_config/ds_z3_config.json similarity index 100% rename from nlp/llm/Qwen1.5-7B/configs/ds_config/ds_z3_config.json rename to nlp/llm/qwen1.5-7b/configs/ds_config/ds_z3_config.json diff --git a/nlp/llm/Qwen1.5-7B/configs/ds_config/ds_z3_config_bf16.json b/nlp/llm/qwen1.5-7b/configs/ds_config/ds_z3_config_bf16.json similarity index 100% rename from nlp/llm/Qwen1.5-7B/configs/ds_config/ds_z3_config_bf16.json rename to nlp/llm/qwen1.5-7b/configs/ds_config/ds_z3_config_bf16.json diff --git a/nlp/llm/Qwen1.5-7B/configs/ds_config/ds_z3_config_offload.json b/nlp/llm/qwen1.5-7b/configs/ds_config/ds_z3_config_offload.json similarity index 100% rename from nlp/llm/Qwen1.5-7B/configs/ds_config/ds_z3_config_offload.json rename to nlp/llm/qwen1.5-7b/configs/ds_config/ds_z3_config_offload.json diff --git a/nlp/llm/Qwen1.5-7B/configs/qwen-7b-sft-full.json b/nlp/llm/qwen1.5-7b/configs/qwen-7b-sft-full.json similarity index 100% rename from nlp/llm/Qwen1.5-7B/configs/qwen-7b-sft-full.json rename to nlp/llm/qwen1.5-7b/configs/qwen-7b-sft-full.json diff --git a/nlp/llm/Qwen1.5-7B/configs/qwen-7b-sft-lora.json b/nlp/llm/qwen1.5-7b/configs/qwen-7b-sft-lora.json similarity index 100% rename from nlp/llm/Qwen1.5-7B/configs/qwen-7b-sft-lora.json rename to nlp/llm/qwen1.5-7b/configs/qwen-7b-sft-lora.json diff --git a/nlp/llm/Qwen1.5-7B/configs/qwen-7b-sft-ptuning.json b/nlp/llm/qwen1.5-7b/configs/qwen-7b-sft-ptuning.json similarity index 100% rename from nlp/llm/Qwen1.5-7B/configs/qwen-7b-sft-ptuning.json rename to nlp/llm/qwen1.5-7b/configs/qwen-7b-sft-ptuning.json diff --git a/nlp/llm/Qwen1.5-7B/configs/qwen-7b-sft-qlora.json b/nlp/llm/qwen1.5-7b/configs/qwen-7b-sft-qlora.json similarity index 100% rename from nlp/llm/Qwen1.5-7B/configs/qwen-7b-sft-qlora.json rename to nlp/llm/qwen1.5-7b/configs/qwen-7b-sft-qlora.json diff --git a/nlp/llm/Qwen1.5-7B/get_Qwen1.5.py b/nlp/llm/qwen1.5-7b/get_Qwen1.5.py similarity index 100% rename from nlp/llm/Qwen1.5-7B/get_Qwen1.5.py rename to nlp/llm/qwen1.5-7b/get_Qwen1.5.py diff --git a/nlp/llm/Qwen1.5-7B/main.py b/nlp/llm/qwen1.5-7b/main.py similarity index 100% rename from nlp/llm/Qwen1.5-7B/main.py rename to nlp/llm/qwen1.5-7b/main.py diff --git a/nlp/llm/Qwen1.5-7B/train.sh b/nlp/llm/qwen1.5-7b/train.sh similarity index 100% rename from nlp/llm/Qwen1.5-7B/train.sh rename to nlp/llm/qwen1.5-7b/train.sh -- Gitee From 1e48bf82602885ee5cffa5ebbf0c991ab6700d69 Mon Sep 17 00:00:00 2001 From: majorli Date: Thu, 1 Aug 2024 17:05:33 +0800 Subject: [PATCH 4/4] update copyrights of bash scripts Signed-off-by: majorli --- nlp/llm/qwen1.5-7b/train.sh | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/nlp/llm/qwen1.5-7b/train.sh b/nlp/llm/qwen1.5-7b/train.sh index 7920b53a6..0bc978c25 100644 --- a/nlp/llm/qwen1.5-7b/train.sh +++ b/nlp/llm/qwen1.5-7b/train.sh @@ -1,5 +1,20 @@ #!/bin/bash +# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + MASTER_PORT=$(shuf -n 1 -i 10000-65535) NUM_GPUS=$1 CONFIG_FILE=$2 -- Gitee