mirror of
https://github.com/RYDE-WORK/MiniCPM.git
synced 2026-02-05 06:33:25 +08:00
增加了bash文件执行量化测试
This commit is contained in:
parent
262840a805
commit
2a86c6a287
23
README.md
23
README.md
@ -271,6 +271,29 @@ print(model.response("<用户>山东省最高的山是哪座山, 它比黄山高
|
|||||||
python quant_with_alpaca.py --pretrained_model_dir no_quantized_path --quantized_model_dir save_path --bits 4
|
python quant_with_alpaca.py --pretrained_model_dir no_quantized_path --quantized_model_dir save_path --bits 4
|
||||||
```
|
```
|
||||||
5. 可以使用./AutoGPTQ/examples/quantization/inference.py进行推理,也可以参考前文使用vllm对量化后的模型,单卡4090下minicpm-1b-int4模型vllm推理在2000token/s左右。
|
5. 可以使用./AutoGPTQ/examples/quantization/inference.py进行推理,也可以参考前文使用vllm对量化后的模型,单卡4090下minicpm-1b-int4模型vllm推理在2000token/s左右。
|
||||||
|
|
||||||
|
**awq量化**
|
||||||
|
1. 在quantize/awq_quantize.py 文件中修改根据注释修改配置参数:model_path , quant_path, quant_data_path , quant_config, quant_samples, 如需自定数据集则需要修改 custom_data。
|
||||||
|
2. 在quantize/quantize_data文件下已经提供了alpaca和wiki_text两个数据集作为量化校准集,如果需要自定义数据集,修改quantize/awq_quantize.py中的custom_data变量,如:
|
||||||
|
```
|
||||||
|
custom_data=[{'question':'过敏性鼻炎有什么症状?','answer':'过敏性鼻炎可能鼻塞,流鼻涕,头痛等症状反复发作,严重时建议及时就医。'},
|
||||||
|
{'question':'1+1等于多少?','answer':'等于2'}]
|
||||||
|
```
|
||||||
|
3. 运行quantize/awq_quantize.py文件,在设置的quan_path目录下可得awq量化后的模型。
|
||||||
|
|
||||||
|
**量化测试**
|
||||||
|
1. 命令行进入到 MiniCPM/quantize 目录下
|
||||||
|
2. 修改quantize_eval.sh文件中awq_path,gptq_path,awq_path,如果不需要测试的类型保持为空字符串,如下示例表示仅测试awq模型:
|
||||||
|
```
|
||||||
|
awq_path="/root/ld/ld_project/AutoAWQ/examples/awq_cpm_1b_4bit"
|
||||||
|
gptq_path=""
|
||||||
|
model_path=""
|
||||||
|
```
|
||||||
|
3. 在MiniCPM/quantize路径下命令行输入:
|
||||||
|
```
|
||||||
|
bash quantize_eval.sh
|
||||||
|
```
|
||||||
|
4. 窗口将输出该模型的内存占用情况、困惑度。
|
||||||
<p id="community"></p>
|
<p id="community"></p>
|
||||||
|
|
||||||
## 开源社区
|
## 开源社区
|
||||||
|
|||||||
@ -3,9 +3,7 @@ import torch.nn as nn
|
|||||||
from tqdm import tqdm
|
from tqdm import tqdm
|
||||||
from datasets import load_dataset
|
from datasets import load_dataset
|
||||||
from transformers import AutoModelForCausalLM, AutoTokenizer
|
from transformers import AutoModelForCausalLM, AutoTokenizer
|
||||||
from awq import AutoAWQForCausalLM
|
#import GPUtil
|
||||||
from auto_gptq import AutoGPTQForCausalLM
|
|
||||||
import GPUtil
|
|
||||||
import argparse
|
import argparse
|
||||||
|
|
||||||
parser = argparse.ArgumentParser(description="========量化困惑度测试========")
|
parser = argparse.ArgumentParser(description="========量化困惑度测试========")
|
||||||
@ -31,7 +29,7 @@ parser.add_argument(
|
|||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
"--data_path",
|
"--data_path",
|
||||||
type=str,
|
type=str,
|
||||||
default='/root/ld/ld_project/pull_request/MiniCPM/quantize/quantize_data/wikitext',
|
default='quantize_data/wikitext',
|
||||||
help="可以是以后的量化数据集,示例中默认为wiki_text"
|
help="可以是以后的量化数据集,示例中默认为wiki_text"
|
||||||
)
|
)
|
||||||
|
|
||||||
@ -94,6 +92,8 @@ if __name__ == "__main__":
|
|||||||
del model
|
del model
|
||||||
|
|
||||||
if args.awq_path:
|
if args.awq_path:
|
||||||
|
from awq import AutoAWQForCausalLM
|
||||||
|
|
||||||
model = AutoAWQForCausalLM.from_quantized(args.awq_path, fuse_layers=True,device_map={"":'cuda:0'})
|
model = AutoAWQForCausalLM.from_quantized(args.awq_path, fuse_layers=True,device_map={"":'cuda:0'})
|
||||||
tokenizer = AutoTokenizer.from_pretrained(args.awq_path)
|
tokenizer = AutoTokenizer.from_pretrained(args.awq_path)
|
||||||
print("awq model:",args.awq_path.split('/')[-1])
|
print("awq model:",args.awq_path.split('/')[-1])
|
||||||
@ -104,6 +104,8 @@ if __name__ == "__main__":
|
|||||||
|
|
||||||
#we will support the autogptq later
|
#we will support the autogptq later
|
||||||
if args.gptq_path:
|
if args.gptq_path:
|
||||||
|
from auto_gptq import AutoGPTQForCausalLM
|
||||||
|
|
||||||
tokenizer = AutoTokenizer.from_pretrained(args.gptq_path, use_fast=True)
|
tokenizer = AutoTokenizer.from_pretrained(args.gptq_path, use_fast=True)
|
||||||
model = AutoGPTQForCausalLM.from_quantized(args.gptq_path, device="cuda:0",trust_remote_code=True)
|
model = AutoGPTQForCausalLM.from_quantized(args.gptq_path, device="cuda:0",trust_remote_code=True)
|
||||||
print("gptq model:",args.gptq_path.split('/')[-1])
|
print("gptq model:",args.gptq_path.split('/')[-1])
|
||||||
|
|||||||
8
quantize/quantize_eval.sh
Normal file
8
quantize/quantize_eval.sh
Normal file
@ -0,0 +1,8 @@
|
|||||||
|
#!/bin/bash
|
||||||
|
|
||||||
|
awq_path="/root/ld/ld_project/AutoAWQ/examples/awq_cpm_1b_4bit"
|
||||||
|
gptq_path=""
|
||||||
|
model_path=""
|
||||||
|
|
||||||
|
python quantize_eval.py --awq_path "${awq_path}" \
|
||||||
|
--model_path "${model_path}" --gptq_path "${gptq_path}"
|
||||||
Loading…
x
Reference in New Issue
Block a user