mirror of
https://github.com/RYDE-WORK/MiniCPM.git
synced 2026-02-07 07:43:42 +08:00
Add README and update finetune scripts
This commit is contained in:
parent
76845055c5
commit
a250c200ed
88
finetune/README.md
Normal file
88
finetune/README.md
Normal file
@ -0,0 +1,88 @@
|
|||||||
|
# MiniCPM 微调
|
||||||
|
|
||||||
|
本目录提供 MiniCPM-2B 模型的微调示例,包括全量微调和 PEFT。格式上,提供多轮对话微调样例和输入输出格式微调样例。
|
||||||
|
|
||||||
|
如果将模型下载到了本地,本文和代码中的 `OpenBMB/MiniCPM-2B` 字段均应替换为相应地址以从本地加载模型。
|
||||||
|
|
||||||
|
运行示例需要 `python>=3.10`,除基础的 `torch` 依赖外,示例代码运行还需要依赖。
|
||||||
|
|
||||||
|
**我们提供了 [示例notebook](lora_finetune.ipynb) 用于演示如何以 AdvertiseGen 为例处理数据和使用微调脚本。**
|
||||||
|
|
||||||
|
```bash
|
||||||
|
pip install -r requirements.txt
|
||||||
|
```
|
||||||
|
|
||||||
|
## 测试硬件标准
|
||||||
|
|
||||||
|
我们仅提供了单机多卡/多机多卡的运行示例,因此您需要至少一台具有多个 GPU 的机器。本仓库中的**默认配置文件**中,我们记录了显存的占用情况:
|
||||||
|
|
||||||
|
+ SFT 全量微调: 4张显卡平均分配,每张显卡占用 `30245MiB` 显存。
|
||||||
|
+ LORA 微调: 1张显卡,占用 `10619MiB` 显存。
|
||||||
|
|
||||||
|
> 请注意,该结果仅供参考,对于不同的参数,显存占用可能会有所不同。请结合你的硬件情况进行调整。
|
||||||
|
|
||||||
|
## 多轮对话格式
|
||||||
|
|
||||||
|
多轮对话微调示例采用 ChatGLM3 对话格式约定,对不同角色添加不同 `loss_mask` 从而在一遍计算中为多轮回复计算 `loss`。
|
||||||
|
|
||||||
|
对于数据文件,样例采用如下格式
|
||||||
|
|
||||||
|
```json
|
||||||
|
[
|
||||||
|
{
|
||||||
|
"conversations": [
|
||||||
|
{
|
||||||
|
"role": "system",
|
||||||
|
"content": "<system prompt text>"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"role": "user",
|
||||||
|
"content": "<user prompt text>"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"role": "assistant",
|
||||||
|
"content": "<assistant response text>"
|
||||||
|
},
|
||||||
|
// ... Muti Turn
|
||||||
|
{
|
||||||
|
"role": "user",
|
||||||
|
"content": "<user prompt text>"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"role": "assistant",
|
||||||
|
"content": "<assistant response text>"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
// ...
|
||||||
|
]
|
||||||
|
```
|
||||||
|
|
||||||
|
## 数据集格式示例
|
||||||
|
|
||||||
|
这里以 AdvertiseGen 数据集为例,
|
||||||
|
您可以从 [Google Drive](https://drive.google.com/file/d/13_vf0xRTQsyneRKdD1bZIr93vBGOczrk/view?usp=sharing)
|
||||||
|
或者 [Tsinghua Cloud](https://cloud.tsinghua.edu.cn/f/b3f119a008264b1cabd1/?dl=1) 下载 AdvertiseGen 数据集。
|
||||||
|
将解压后的 AdvertiseGen 目录放到 `data` 目录下并自行转换为如下格式数据集。
|
||||||
|
|
||||||
|
> 请注意,现在的微调代码中加入了验证集,因此,对于一组完整的微调数据集,必须包含训练数据集和验证数据集,测试数据集可以不填写。或者直接用验证数据集代替。
|
||||||
|
|
||||||
|
```
|
||||||
|
{"conversations": [{"role": "user", "content": "类型#裙*裙长#半身裙"}, {"role": "assistant", "content": "这款百搭时尚的仙女半身裙,整体设计非常的飘逸随性,穿上之后每个女孩子都能瞬间变成小仙女啦。料子非常的轻盈,透气性也很好,穿到夏天也很舒适。"}]}
|
||||||
|
```
|
||||||
|
|
||||||
|
## 开始微调
|
||||||
|
|
||||||
|
通过以下代码执行 **单机多卡/多机多卡** 运行。
|
||||||
|
|
||||||
|
```bash
|
||||||
|
cd finetune
|
||||||
|
bash sft_finetune.sh
|
||||||
|
```
|
||||||
|
|
||||||
|
通过以下代码执行 **单机单卡** 运行。
|
||||||
|
|
||||||
|
```angular2html
|
||||||
|
cd finetune
|
||||||
|
bash lora_finetune.sh
|
||||||
|
```
|
||||||
@ -1,8 +1,4 @@
|
|||||||
{
|
{
|
||||||
"train_batch_size": "auto",
|
|
||||||
"train_micro_batch_size_per_gpu": "auto",
|
|
||||||
"gradient_accumulation_steps": "auto",
|
|
||||||
"gradient_clipping": 1.0,
|
|
||||||
"fp16": {
|
"fp16": {
|
||||||
"enabled": "auto",
|
"enabled": "auto",
|
||||||
"loss_scale": 0,
|
"loss_scale": 0,
|
||||||
@ -16,10 +12,15 @@
|
|||||||
},
|
},
|
||||||
"zero_optimization": {
|
"zero_optimization": {
|
||||||
"stage": 2,
|
"stage": 2,
|
||||||
|
"allgather_partitions": true,
|
||||||
"overlap_comm": true,
|
"overlap_comm": true,
|
||||||
"reduce_scatter": true,
|
"reduce_scatter": true,
|
||||||
"contiguous_gradients": true
|
"contiguous_gradients": true
|
||||||
},
|
},
|
||||||
|
"train_batch_size": "auto",
|
||||||
|
"train_micro_batch_size_per_gpu": "auto",
|
||||||
|
"gradient_accumulation_steps": "auto",
|
||||||
|
"gradient_clipping": 1.0,
|
||||||
"wall_clock_breakdown": false,
|
"wall_clock_breakdown": false,
|
||||||
"flops_profiler": {
|
"flops_profiler": {
|
||||||
"enabled": false,
|
"enabled": false,
|
||||||
|
|||||||
@ -1,8 +1,4 @@
|
|||||||
{
|
{
|
||||||
"train_batch_size": "auto",
|
|
||||||
"train_micro_batch_size_per_gpu": "auto",
|
|
||||||
"gradient_accumulation_steps": "auto",
|
|
||||||
"gradient_clipping": 1.0,
|
|
||||||
"fp16": {
|
"fp16": {
|
||||||
"enabled": "auto",
|
"enabled": "auto",
|
||||||
"loss_scale": 0,
|
"loss_scale": 0,
|
||||||
@ -16,16 +12,19 @@
|
|||||||
},
|
},
|
||||||
"zero_optimization": {
|
"zero_optimization": {
|
||||||
"stage": 2,
|
"stage": 2,
|
||||||
|
"allgather_partitions": true,
|
||||||
"overlap_comm": true,
|
"overlap_comm": true,
|
||||||
"reduce_scatter": true,
|
"reduce_scatter": true,
|
||||||
"contiguous_gradients": true,
|
"contiguous_gradients": true,
|
||||||
"offload_optimizer": {
|
"offload_optimizer": {
|
||||||
"device": "cpu"
|
"device": "cpu",
|
||||||
},
|
"pin_memory": true
|
||||||
"offload_param": {
|
|
||||||
"device": "cpu"
|
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
|
"train_batch_size": "auto",
|
||||||
|
"train_micro_batch_size_per_gpu": "auto",
|
||||||
|
"gradient_accumulation_steps": "auto",
|
||||||
|
"gradient_clipping": 1.0,
|
||||||
"wall_clock_breakdown": false,
|
"wall_clock_breakdown": false,
|
||||||
"flops_profiler": {
|
"flops_profiler": {
|
||||||
"enabled": false,
|
"enabled": false,
|
||||||
|
|||||||
@ -1,16 +1,32 @@
|
|||||||
{
|
{
|
||||||
"train_batch_size": "auto",
|
"fp16": {
|
||||||
"train_micro_batch_size_per_gpu": "auto",
|
"enabled": "auto",
|
||||||
"gradient_accumulation_steps": "auto",
|
"loss_scale": 0,
|
||||||
"gradient_clipping": 1.0,
|
"loss_scale_window": 1000,
|
||||||
|
"initial_scale_power": 16,
|
||||||
|
"hysteresis": 2,
|
||||||
|
"min_loss_scale": 1
|
||||||
|
},
|
||||||
"bf16": {
|
"bf16": {
|
||||||
"enabled": "auto"
|
"enabled": "auto"
|
||||||
},
|
},
|
||||||
"zero_optimization": {
|
"zero_optimization": {
|
||||||
"stage": 3,
|
"stage": 3,
|
||||||
|
"allgather_partitions": true,
|
||||||
|
"allgather_bucket_size": 5e8,
|
||||||
|
"reduce_scatter": true,
|
||||||
|
"contiguous_gradients": true,
|
||||||
"overlap_comm": true,
|
"overlap_comm": true,
|
||||||
|
"reduce_bucket_size": "auto",
|
||||||
|
"stage3_prefetch_bucket_size": "auto",
|
||||||
|
"stage3_param_persistence_threshold": "auto",
|
||||||
"stage3_gather_16bit_weights_on_model_save": true
|
"stage3_gather_16bit_weights_on_model_save": true
|
||||||
},
|
},
|
||||||
|
"train_batch_size": "auto",
|
||||||
|
"train_micro_batch_size_per_gpu": "auto",
|
||||||
|
"gradient_accumulation_steps": "auto",
|
||||||
|
"gradient_clipping": 1.0,
|
||||||
|
"wall_clock_breakdown": false,
|
||||||
"flops_profiler": {
|
"flops_profiler": {
|
||||||
"enabled": false,
|
"enabled": false,
|
||||||
"profile_step": 1,
|
"profile_step": 1,
|
||||||
|
|||||||
46
finetune/configs/ds_config_zero3_offload.json
Normal file
46
finetune/configs/ds_config_zero3_offload.json
Normal file
@ -0,0 +1,46 @@
|
|||||||
|
{
|
||||||
|
"fp16": {
|
||||||
|
"enabled": "auto",
|
||||||
|
"loss_scale": 0,
|
||||||
|
"loss_scale_window": 1000,
|
||||||
|
"initial_scale_power": 16,
|
||||||
|
"hysteresis": 2,
|
||||||
|
"min_loss_scale": 1
|
||||||
|
},
|
||||||
|
"bf16": {
|
||||||
|
"enabled": "auto"
|
||||||
|
},
|
||||||
|
"zero_optimization": {
|
||||||
|
"stage": 3,
|
||||||
|
"allgather_partitions": true,
|
||||||
|
"allgather_bucket_size": 5e8,
|
||||||
|
"reduce_scatter": true,
|
||||||
|
"contiguous_gradients": true,
|
||||||
|
"overlap_comm": true,
|
||||||
|
"reduce_bucket_size": "auto",
|
||||||
|
"stage3_prefetch_bucket_size": "auto",
|
||||||
|
"stage3_param_persistence_threshold": "auto",
|
||||||
|
"stage3_gather_16bit_weights_on_model_save": true,
|
||||||
|
"offload_optimizer": {
|
||||||
|
"device": "cpu",
|
||||||
|
"pin_memory": true
|
||||||
|
},
|
||||||
|
"offload_param": {
|
||||||
|
"device": "cpu",
|
||||||
|
"pin_memory": true
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"train_batch_size": "auto",
|
||||||
|
"train_micro_batch_size_per_gpu": "auto",
|
||||||
|
"gradient_accumulation_steps": "auto",
|
||||||
|
"gradient_clipping": 1.0,
|
||||||
|
"wall_clock_breakdown": false,
|
||||||
|
"flops_profiler": {
|
||||||
|
"enabled": false,
|
||||||
|
"profile_step": 1,
|
||||||
|
"module_depth": -1,
|
||||||
|
"top_modules": 1,
|
||||||
|
"detailed": true,
|
||||||
|
"output_file": null
|
||||||
|
}
|
||||||
|
}
|
||||||
@ -1,8 +1,6 @@
|
|||||||
# -*- coding: utf-8 -*-
|
# -*- coding: utf-8 -*-
|
||||||
import json
|
import json
|
||||||
import time
|
|
||||||
from typing import Dict, Optional
|
from typing import Dict, Optional
|
||||||
from datetime import datetime
|
|
||||||
from dataclasses import dataclass, field
|
from dataclasses import dataclass, field
|
||||||
|
|
||||||
import torch
|
import torch
|
||||||
@ -155,7 +153,7 @@ def load_model_and_tokenizer(
|
|||||||
model = get_peft_model(model, lora_config)
|
model = get_peft_model(model, lora_config)
|
||||||
# trainable params: 2,949,120 || all params: 3,010,652,928 || trainable%: 0.09795616002669305
|
# trainable params: 2,949,120 || all params: 3,010,652,928 || trainable%: 0.09795616002669305
|
||||||
model.print_trainable_parameters()
|
model.print_trainable_parameters()
|
||||||
model.enable_input_require_grads()
|
# model.enable_input_require_grads() # need when using adapter
|
||||||
|
|
||||||
return model, tokenizer
|
return model, tokenizer
|
||||||
|
|
||||||
@ -184,15 +182,12 @@ if __name__ == "__main__":
|
|||||||
model_max_length=training_args.model_max_length,
|
model_max_length=training_args.model_max_length,
|
||||||
)
|
)
|
||||||
|
|
||||||
formatted_time = datetime.now().strftime("%Y%m%d%H%M%S")
|
|
||||||
|
|
||||||
trainer = Trainer(
|
trainer = Trainer(
|
||||||
model=model,
|
model=model,
|
||||||
args=training_args,
|
args=training_args,
|
||||||
train_dataset=train_dataset,
|
train_dataset=train_dataset,
|
||||||
eval_dataset=eval_dataset,
|
eval_dataset=eval_dataset,
|
||||||
tokenizer=tokenizer,
|
tokenizer=tokenizer,
|
||||||
# compute_metrics=compute_metrics,
|
|
||||||
)
|
)
|
||||||
|
|
||||||
trainer.train()
|
trainer.train()
|
||||||
|
|||||||
@ -9,7 +9,7 @@
|
|||||||
"本 notebook 是一个使用 `AdvertiseGen` 数据集对 MiniCPM-2B 进行 LoRA 微调,使其具备专业的广告生成能力的代码示例。\n",
|
"本 notebook 是一个使用 `AdvertiseGen` 数据集对 MiniCPM-2B 进行 LoRA 微调,使其具备专业的广告生成能力的代码示例。\n",
|
||||||
"\n",
|
"\n",
|
||||||
"## 硬件需求\n",
|
"## 硬件需求\n",
|
||||||
"- 显存:24GB\n",
|
"- 显存:12GB\n",
|
||||||
"- 显卡架构:安培架构(推荐)\n",
|
"- 显卡架构:安培架构(推荐)\n",
|
||||||
"- 内存:16GB"
|
"- 内存:16GB"
|
||||||
]
|
]
|
||||||
@ -101,7 +101,7 @@
|
|||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"!bash lora_finetune.sh"
|
"!bash lora_finetune_ds.sh"
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
|
|||||||
@ -1,16 +1,17 @@
|
|||||||
formatted_time=$(date +"%Y%m%d%H%M%S")
|
formatted_time=$(date +"%Y%m%d%H%M%S")
|
||||||
echo $formatted_time
|
echo $formatted_time
|
||||||
|
|
||||||
CUDA_VISIBLE_DEVICES=0 python finetune.py \
|
|
||||||
|
deepspeed --include localhost:0 finetune.py \
|
||||||
--model_name_or_path <your_model_name_or_path> \
|
--model_name_or_path <your_model_name_or_path> \
|
||||||
--output_dir output/AdvertiseGenLoRA/$formatted_time/ \
|
--output_dir output/AdvertiseGenLoRA/$formatted_time/ \
|
||||||
--train_data_path data/AdvertiseGenChatML/train.json \
|
--train_data_path data/AdvertiseGenChatML/train.json \
|
||||||
--eval_data_path data/AdvertiseGenChatML/dev.json \
|
--eval_data_path data/AdvertiseGenChatML/dev.json \
|
||||||
--learning_rate 1e-3 --per_device_train_batch_size 1 \
|
--learning_rate 1e-3 --per_device_train_batch_size 1 \
|
||||||
--per_device_eval_batch_size 32 --fp16\
|
--per_device_eval_batch_size 1 --fp16 --use_lora \
|
||||||
--gradient_accumulation_steps 8 --warmup_steps 100 \
|
--gradient_accumulation_steps 1 --warmup_steps 100 \
|
||||||
--max_steps 3000 --weight_decay 0.01 \
|
--max_steps 3000 --weight_decay 0.01 \
|
||||||
--evaluation_strategy steps --eval_steps 500 \
|
--evaluation_strategy steps --eval_steps 500 \
|
||||||
--save_strategy steps --save_steps 500 \
|
--save_strategy steps --save_steps 500 --seed 42 \
|
||||||
--use_lora true --seed 42 \
|
--log_level info --logging_strategy steps --logging_steps 10 \
|
||||||
--log_level info --logging_strategy steps --logging_steps 10
|
--deepspeed configs/ds_config_zero3_offload.json
|
||||||
|
|||||||
@ -1,17 +0,0 @@
|
|||||||
formatted_time=$(date +"%Y%m%d%H%M%S")
|
|
||||||
echo $formatted_time
|
|
||||||
|
|
||||||
|
|
||||||
deepspeed --include localhost:0,1 finetune.py \
|
|
||||||
--model_name_or_path <your_model_name_or_path> \
|
|
||||||
--output_dir output/AdvertiseGenLoRA/$formatted_time/ \
|
|
||||||
--train_data_path data/AdvertiseGenChatML/train.json \
|
|
||||||
--eval_data_path data/AdvertiseGenChatML/dev.json \
|
|
||||||
--learning_rate 1e-3 --per_device_train_batch_size 1 \
|
|
||||||
--per_device_eval_batch_size 1 --fp16 --use_lora \
|
|
||||||
--gradient_accumulation_steps 8 --warmup_steps 100 \
|
|
||||||
--max_steps 3000 --weight_decay 0.01 \
|
|
||||||
--evaluation_strategy steps --eval_steps 500 \
|
|
||||||
--save_strategy steps --save_steps 500 --seed 42 \
|
|
||||||
--log_level info --logging_strategy steps --logging_steps 10 \
|
|
||||||
--deepspeed configs/ds_config_zero2_offload.json
|
|
||||||
@ -2,16 +2,16 @@ formatted_time=$(date +"%Y%m%d%H%M%S")
|
|||||||
echo $formatted_time
|
echo $formatted_time
|
||||||
|
|
||||||
|
|
||||||
deepspeed --include localhost:1,2 finetune.py \
|
deepspeed --include localhost:1,2,4,5 finetune.py \
|
||||||
--model_name_or_path <your_model_name_or_path> \
|
--model_name_or_path <your_model_name_or_path> \
|
||||||
--output_dir output/AdvertiseGenLoRA/$formatted_time/ \
|
--output_dir output/AdvertiseGenLoRA/$formatted_time/ \
|
||||||
--train_data_path data/AdvertiseGenChatML/train.json \
|
--train_data_path data/AdvertiseGenChatML/train.json \
|
||||||
--eval_data_path data/AdvertiseGenChatML/dev.json \
|
--eval_data_path data/AdvertiseGenChatML/dev.json \
|
||||||
--learning_rate 1e-3 --per_device_train_batch_size 1 \
|
--learning_rate 1e-3 --per_device_train_batch_size 1 \
|
||||||
--per_device_eval_batch_size 32 --fp16 \
|
--per_device_eval_batch_size 4 --bf16 \
|
||||||
--gradient_accumulation_steps 8 --warmup_steps 100 \
|
--gradient_accumulation_steps 8 --warmup_steps 100 \
|
||||||
--max_steps 3000 --weight_decay 0.01 \
|
--max_steps 3000 --weight_decay 0.01 \
|
||||||
--evaluation_strategy steps --eval_steps 500 \
|
--evaluation_strategy steps --eval_steps 500 \
|
||||||
--save_strategy steps --save_steps 500 --seed 42 \
|
--save_strategy steps --save_steps 500 --seed 42 \
|
||||||
--log_level info --logging_strategy steps --logging_steps 10 \
|
--log_level info --logging_strategy steps --logging_steps 10 \
|
||||||
--deepspeed configs/ds_config_zero2.json
|
--deepspeed configs/ds_config_zero3_offload.json
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user