Langchain-Chatchat/configs/model_config.py.example

311 lines
11 KiB
Plaintext
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

log_path: "logs"
log_level: "DEBUG"
api_server:
host: "127.0.0.1"
port: 8000
publish_server:
host: "127.0.0.1"
port: 8001
openai_plugins_folder:
- "openai_plugins"
openai_plugins_load_folder:
- "configs"
plugins:
- openai:
name: "openai"
- fastchat:
name: "fastchat"
logdir: "logs"
# LLM 运行设备。设为"auto"会自动检测,也可手动设定为"cuda","mps","cpu"其中之一。
llm_device: "auto"
model_names:
- "internlm2-chat-7b"
run_controller:
host: "127.0.0.1"
port: 20001
dispatch_method: "shortest_queue"
run_openai_api:
host: "127.0.0.1"
port: 20000
fschat_model_workers:
- default:
host: "127.0.0.1"
port: 20002
device: "auto"
infer_turbo: False
# model_worker多卡加载需要配置的参数
# "gpus": None, # 使用的GPU以str的格式指定如"0,1"如失效请使用CUDA_VISIBLE_DEVICES="0,1"等形式指定
# "num_gpus": 1, # 使用GPU的数量
# "max_gpu_memory": "20GiB", # 每个GPU占用的最大显存
# 以下为model_worker非常用参数可根据需要配置
# "load_8bit": False, # 开启8bit量化
# "cpu_offloading": None,
# "gptq_ckpt": None,
# "gptq_wbits": 16,
# "gptq_groupsize": -1,
# "gptq_act_order": False,
# "awq_ckpt": None,
# "awq_wbits": 16,
# "awq_groupsize": -1,
# "model_names": LLM_MODELS,
# "conv_template": None,
# "limit_worker_concurrency": 5,
# "stream_interval": 2,
# "no_register": False,
# "embed_in_truncate": False,
# 以下为vllm_worker配置参数,注意使用vllm必须有gpu仅在Linux测试通过
# tokenizer = model_path # 如果tokenizer与model_path不一致在此处添加
# 'tokenizer_mode':'auto',
# 'trust_remote_code':True,
# 'download_dir':None,
# 'load_format':'auto',
# 'dtype':'auto',
# 'seed':0,
# 'worker_use_ray':False,
# 'pipeline_parallel_size':1,
# 'tensor_parallel_size':1,
# 'block_size':16,
# 'swap_space':4 , # GiB
# 'gpu_memory_utilization':0.90,
# 'max_num_batched_tokens':2560,
# 'max_num_seqs':256,
# 'disable_log_stats':False,
# 'conv_template':None,
# 'limit_worker_concurrency':5,
# 'no_register':False,
# 'num_gpus': 1
# 'engine_use_ray': False,
# 'disable_log_requests': False
- chatglm3-6b:
host: "127.0.0.1"
device: "cuda"
port: 20009
- internlm2-chat-7b:
host: "127.0.0.1"
device: "cuda"
port: 20009
# 以下配置可以不用修改在model_config中设置启动的模型
- zhipu-api:
port: 21001
- minimax-api:
port: 21002
- xinghuo-api:
port: 21003
- qianfan-api:
port: 21004
- fangzhou-api:
port: 21005
- qwen-api:
port: 21006
- baichuan-api:
port: 21007
- azure-api:
port: 21008
- tiangong-api:
port: 21009
online_llm_model:
# 线上模型。请在server_config中为每个在线API设置不同的端口
- "openai-api":
"model_name": "gpt-3.5-turbo"
"api_base_url": "https://api.openai.com/v1"
"api_key": ""
"openai_proxy": ""
# 具体注册及api key获取请前往 http://open.bigmodel.cn
- "zhipu-api":
"api_key": ""
"version": "chatglm_turbo" # 可选包括 "chatglm_turbo"
"provider": "ChatGLMWorker"
# 具体注册及api key获取请前往 https://api.minimax.chat/
- "minimax-api":
"group_id": ""
"api_key": ""
"is_pro": False
"provider": "MiniMaxWorker"
# 具体注册及api key获取请前往 https://xinghuo.xfyun.cn/
- "xinghuo-api":
"APPID": ""
"APISecret": ""
"api_key": ""
"version": "v1.5" # 你使用的讯飞星火大模型版本,可选包括 "v3.0", "v1.5", "v2.0"
"provider": "XingHuoWorker"
# 百度千帆 API申请方式请参考 https://cloud.baidu.com/doc/WENXINWORKSHOP/s/4lilb2lpf
- "qianfan-api":
"version": "ERNIE-Bot" # 注意大小写。当前支持 "ERNIE-Bot" 或 "ERNIE-Bot-turbo" 更多的见官方文档。
"version_url": "" # 也可以不填写version直接填写在千帆申请模型发布的API地址
"api_key": ""
"secret_key": ""
"provider": "QianFanWorker"
# 火山方舟 API文档参考 https://www.volcengine.com/docs/82379
- "fangzhou-api":
"version": "chatglm-6b-model" # 当前支持 "chatglm-6b-model" 更多的见文档模型支持列表中方舟部分。
"version_url": "" # 可以不填写version直接填写在方舟申请模型发布的API地址
"api_key": ""
"secret_key": ""
"provider": "FangZhouWorker"
# 阿里云通义千问 API文档参考 https://help.aliyun.com/zh/dashscope/developer-reference/api-details
- "qwen-api":
"version": "qwen-turbo" # 可选包括 "qwen-turbo", "qwen-plus"
"api_key": "" # 请在阿里云控制台模型服务灵积API-KEY管理页面创建
"provider": "QwenWorker"
# 百川 API申请方式请参考 https://www.baichuan-ai.com/home#api-enter
- "baichuan-api":
"version": "Baichuan2-53B" # 当前支持 "Baichuan2-53B" 见官方文档。
"api_key": ""
"secret_key": ""
"provider": "BaiChuanWorker"
# Azure API
- "azure-api":
"deployment_name": "" # 部署容器的名字
"resource_name": "" # https://{resource_name}.openai.azure.com/openai/ 填写resource_name的部分其他部分不要填写
"api_version": "" # API的版本不是模型版本
"api_key": ""
"provider": "AzureWorker"
# 昆仑万维天工 API https://model-platform.tiangong.cn/
- "tiangong-api":
"version": "SkyChat-MegaVerse"
"api_key": ""
"secret_key": ""
"provider": "TianGongWorker"
"llm_model":
"chatglm2-6b": "THUDM/chatglm2-6b"
"chatglm2-6b-32k": "THUDM/chatglm2-6b-32k"
"chatglm3-6b": "THUDM/chatglm3-6b"
"chatglm3-6b-32k": "THUDM/chatglm3-6b-32k"
"Llama-2-7b-chat-hf": "meta-llama/Llama-2-7b-chat-hf"
"Llama-2-13b-chat-hf": "meta-llama/Llama-2-13b-chat-hf"
"Llama-2-70b-chat-hf": "meta-llama/Llama-2-70b-chat-hf"
"Qwen-1_8B-Chat": "/media/checkpoint/Qwen-1_8B-Chat"
"Qwen-7B-Chat": "Qwen/Qwen-7B-Chat"
"Qwen-14B-Chat": "Qwen/Qwen-14B-Chat"
"Qwen-72B-Chat": "Qwen/Qwen-72B-Chat"
"baichuan-7b-chat": "baichuan-inc/Baichuan-7B-Chat"
"baichuan-13b-chat": "baichuan-inc/Baichuan-13B-Chat"
"baichuan2-7b-chat": "baichuan-inc/Baichuan2-7B-Chat"
"baichuan2-13b-chat": "baichuan-inc/Baichuan2-13B-Chat"
"internlm-7b": "internlm/internlm-7b"
"internlm-chat-7b": "internlm/internlm-chat-7b"
"internlm2-chat-7b": "internlm/internlm2-chat-7b"
"internlm2-chat-20b": "internlm/internlm2-chat-20b"
"BlueLM-7B-Chat": "vivo-ai/BlueLM-7B-Chat"
"BlueLM-7B-Chat-32k": "vivo-ai/BlueLM-7B-Chat-32k"
"Yi-34B-Chat": "https://huggingface.co/01-ai/Yi-34B-Chat"
"agentlm-7b": "THUDM/agentlm-7b"
"agentlm-13b": "THUDM/agentlm-13b"
"agentlm-70b": "THUDM/agentlm-70b"
"falcon-7b": "tiiuae/falcon-7b"
"falcon-40b": "tiiuae/falcon-40b"
"falcon-rw-7b": "tiiuae/falcon-rw-7b"
"aquila-7b": "BAAI/Aquila-7B"
"aquilachat-7b": "BAAI/AquilaChat-7B"
"open_llama_13b": "openlm-research/open_llama_13b"
"vicuna-13b-v1.5": "lmsys/vicuna-13b-v1.5"
"koala": "young-geng/koala"
"mpt-7b": "mosaicml/mpt-7b"
"mpt-7b-storywriter": "mosaicml/mpt-7b-storywriter"
"mpt-30b": "mosaicml/mpt-30b"
"opt-66b": "facebook/opt-66b"
"opt-iml-max-30b": "facebook/opt-iml-max-30b"
"gpt2": "gpt2"
"gpt2-xl": "gpt2-xl"
"gpt-j-6b": "EleutherAI/gpt-j-6b"
"gpt4all-j": "nomic-ai/gpt4all-j"
"gpt-neox-20b": "EleutherAI/gpt-neox-20b"
"pythia-12b": "EleutherAI/pythia-12b"
"oasst-sft-4-pythia-12b-epoch-3.5": "OpenAssistant/oasst-sft-4-pythia-12b-epoch-3.5"
"dolly-v2-12b": "databricks/dolly-v2-12b"
"stablelm-tuned-alpha-7b": "stabilityai/stablelm-tuned-alpha-7b"
vllm_model_dict:
"chatglm2-6b": "THUDM/chatglm2-6b"
"chatglm2-6b-32k": "THUDM/chatglm2-6b-32k"
"chatglm3-6b": "THUDM/chatglm3-6b"
"chatglm3-6b-32k": "THUDM/chatglm3-6b-32k"
"Llama-2-7b-chat-hf": "meta-llama/Llama-2-7b-chat-hf"
"Llama-2-13b-chat-hf": "meta-llama/Llama-2-13b-chat-hf"
"Llama-2-70b-chat-hf": "meta-llama/Llama-2-70b-chat-hf"
"Qwen-1_8B-Chat": "Qwen/Qwen-1_8B-Chat"
"Qwen-7B-Chat": "Qwen/Qwen-7B-Chat"
"Qwen-14B-Chat": "Qwen/Qwen-14B-Chat"
"Qwen-72B-Chat": "Qwen/Qwen-72B-Chat"
"baichuan-7b-chat": "baichuan-inc/Baichuan-7B-Chat"
"baichuan-13b-chat": "baichuan-inc/Baichuan-13B-Chat"
"baichuan2-7b-chat": "baichuan-inc/Baichuan-7B-Chat"
"baichuan2-13b-chat": "baichuan-inc/Baichuan-13B-Chat"
"BlueLM-7B-Chat": "vivo-ai/BlueLM-7B-Chat"
"BlueLM-7B-Chat-32k": "vivo-ai/BlueLM-7B-Chat-32k"
"internlm-7b": "internlm/internlm-7b"
"internlm-chat-7b": "internlm/internlm-chat-7b"
"internlm2-chat-7b": "internlm/Models/internlm2-chat-7b"
"internlm2-chat-20b": "internlm/Models/internlm2-chat-20b"
"aquila-7b": "BAAI/Aquila-7B"
"aquilachat-7b": "BAAI/AquilaChat-7B"
"falcon-7b": "tiiuae/falcon-7b"
"falcon-40b": "tiiuae/falcon-40b"
"falcon-rw-7b": "tiiuae/falcon-rw-7b"
"gpt2": "gpt2"
"gpt2-xl": "gpt2-xl"
"gpt-j-6b": "EleutherAI/gpt-j-6b"
"gpt4all-j": "nomic-ai/gpt4all-j"
"gpt-neox-20b": "EleutherAI/gpt-neox-20b"
"pythia-12b": "EleutherAI/pythia-12b"
"oasst-sft-4-pythia-12b-epoch-3.5": "OpenAssistant/oasst-sft-4-pythia-12b-epoch-3.5"
"dolly-v2-12b": "databricks/dolly-v2-12b"
"stablelm-tuned-alpha-7b": "stabilityai/stablelm-tuned-alpha-7b"
"open_llama_13b": "openlm-research/open_llama_13b"
"vicuna-13b-v1.3": "lmsys/vicuna-13b-v1.3"
"koala": "young-geng/koala"
"mpt-7b": "mosaicml/mpt-7b"
"mpt-7b-storywriter": "mosaicml/mpt-7b-storywriter"
"mpt-30b": "mosaicml/mpt-30b"
"opt-66b": "facebook/opt-66b"
"opt-iml-max-30b": "facebook/opt-iml-max-30b"