From 61abd98409caf3f144ac87bff4e31014135d0d77 Mon Sep 17 00:00:00 2001 From: zR <2448370773@qq.com> Date: Thu, 18 Jan 2024 21:58:06 +0800 Subject: [PATCH] =?UTF-8?q?=E6=9B=B4=E6=96=B0=E6=A8=A1=E5=9E=8B=E6=89=A7?= =?UTF-8?q?=E8=A1=8C=E5=88=97=E8=A1=A8=E5=92=8C=E4=BB=8A=E6=99=9A=E4=BF=AE?= =?UTF-8?q?=E6=94=B9=E7=9A=84=E5=86=85=E5=AE=B9?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- configs/loom.yaml | 195 +++----- configs/loom.yaml.example | 310 ++++++++++++ configs/model_config.py.example | 521 +++++++++++--------- openai_plugins/openai/app.py | 1 - openai_plugins/openai/requirements.txt | 1 + openai_plugins/zhipuai/app.py | 48 ++ openai_plugins/zhipuai/controller.py | 47 ++ openai_plugins/zhipuai/install.py | 103 ++++ openai_plugins/zhipuai/profile_endpoint.py | 96 ++++ openai_plugins/zhipuai/requirements.txt | 1 + openai_plugins/zhipuai/zhipuai_plugins.json | 11 + webui_pages/dialogue/dialogue.py | 2 - webui_pages/openai_plugins/base.py | 5 +- 13 files changed, 989 insertions(+), 352 deletions(-) create mode 100644 configs/loom.yaml.example create mode 100644 openai_plugins/zhipuai/app.py create mode 100644 openai_plugins/zhipuai/controller.py create mode 100644 openai_plugins/zhipuai/install.py create mode 100644 openai_plugins/zhipuai/profile_endpoint.py create mode 100644 openai_plugins/zhipuai/requirements.txt create mode 100644 openai_plugins/zhipuai/zhipuai_plugins.json diff --git a/configs/loom.yaml b/configs/loom.yaml index a73545f1..a9d608dd 100644 --- a/configs/loom.yaml +++ b/configs/loom.yaml @@ -10,9 +10,9 @@ publish_server: port: 8001 openai_plugins_folder: - - "/media/gpt4-pdf-chatbot-langchain/langchain-chatchat-archive/openai_plugins" + - "openai_plugins" openai_plugins_load_folder: - - "/media/gpt4-pdf-chatbot-langchain/langchain-chatchat-archive/configs" + - "configs" plugins: @@ -21,11 +21,11 @@ plugins: - fastchat: name: "fastchat" - logdir: "/media/gpt4-pdf-chatbot-langchain/langchain-chatchat-archive/logs" + logdir: "logs" # LLM 运行设备。设为"auto"会自动检测,也可手动设定为"cuda","mps","cpu"其中之一。 llm_device: "auto" model_names: - - "Qwen-1_8B-Chat" + - "chatglm3-6b" run_controller: host: "127.0.0.1" port: 20001 @@ -38,10 +38,6 @@ plugins: host: "127.0.0.1" port: 20002 device: "auto" - # False,'vllm',使用的推理加速框架,使用vllm如果出现HuggingFace通信问题,参见doc/FAQ - # vllm对一些模型支持还不成熟,暂时默认关闭 - # fschat=0.2.33的代码有bug, 如需使用,源码修改fastchat.server.vllm_worker, - # 将103行中sampling_params = SamplingParams的参数stop=list(stop)修改为stop= [i for i in stop if i!=""] infer_turbo: False # model_worker多卡加载需要配置的参数 @@ -90,11 +86,16 @@ plugins: # 'num_gpus': 1 # 'engine_use_ray': False, # 'disable_log_requests': False - - Qwen-1_8B: + + - chatglm3-6b: host: "127.0.0.1" - port: 20008 - - chatglm3-6b: # 使用default中的IP和端口 - device": "cuda" + device: "cuda" + port: 20009 + + - internlm2-chat-7b: + host: "127.0.0.1" + device: "cuda" + port: 20009 # 以下配置可以不用修改,在model_config中设置启动的模型 - zhipu-api: @@ -197,104 +198,95 @@ plugins: "secret_key": "" "provider": "TianGongWorker" "llm_model": - # 以下部分模型并未完全测试,仅根据fastchat和vllm模型的模型列表推定支持 + "chatglm2-6b": "THUDM/chatglm2-6b" "chatglm2-6b-32k": "THUDM/chatglm2-6b-32k" - "chatglm3-6b": "THUDM/chatglm3-6b" + "chatglm3-6b": "/share/home/zyx/Models/chatglm3-6b" "chatglm3-6b-32k": "THUDM/chatglm3-6b-32k" - "chatglm3-6b-base": "THUDM/chatglm3-6b-base" - "Qwen-1_8B": "/media/checkpoint/Qwen-1_8B" + "Llama-2-7b-chat-hf": "meta-llama/Llama-2-7b-chat-hf" + "Llama-2-13b-chat-hf": "meta-llama/Llama-2-13b-chat-hf" + "Llama-2-70b-chat-hf": "meta-llama/Llama-2-70b-chat-hf" + "Qwen-1_8B-Chat": "/media/checkpoint/Qwen-1_8B-Chat" - "Qwen-1_8B-Chat-Int8": "Qwen/Qwen-1_8B-Chat-Int8" - "Qwen-1_8B-Chat-Int4": "Qwen/Qwen-1_8B-Chat-Int4" - - "Qwen-7B": "Qwen/Qwen-7B" "Qwen-7B-Chat": "Qwen/Qwen-7B-Chat" - - "Qwen-14B": "Qwen/Qwen-14B" "Qwen-14B-Chat": "Qwen/Qwen-14B-Chat" - - "Qwen-14B-Chat-Int8": "Qwen/Qwen-14B-Chat-Int8" - # 在新版的transformers下需要手动修改模型的config.json文件,在quantization_config字典中 - # 增加`disable_exllama:true` 字段才能启动qwen的量化模型 - "Qwen-14B-Chat-Int4": "Qwen/Qwen-14B-Chat-Int4" - - "Qwen-72B": "Qwen/Qwen-72B" "Qwen-72B-Chat": "Qwen/Qwen-72B-Chat" - "Qwen-72B-Chat-Int8": "Qwen/Qwen-72B-Chat-Int8" - "Qwen-72B-Chat-Int4": "Qwen/Qwen-72B-Chat-Int4" - "baichuan2-13b": "baichuan-inc/Baichuan2-13B-Chat" - "baichuan2-7b": "baichuan-inc/Baichuan2-7B-Chat" - - "baichuan-7b": "baichuan-inc/Baichuan-7B" - "baichuan-13b": "baichuan-inc/Baichuan-13B" + "baichuan-7b-chat": "baichuan-inc/Baichuan-7B-Chat" "baichuan-13b-chat": "baichuan-inc/Baichuan-13B-Chat" - - "aquila-7b": "BAAI/Aquila-7B" - "aquilachat-7b": "BAAI/AquilaChat-7B" + "baichuan2-7b-chat": "baichuan-inc/Baichuan2-7B-Chat" + "baichuan2-13b-chat": "baichuan-inc/Baichuan2-13B-Chat" "internlm-7b": "internlm/internlm-7b" "internlm-chat-7b": "internlm/internlm-chat-7b" - - "falcon-7b": "tiiuae/falcon-7b" - "falcon-40b": "tiiuae/falcon-40b" - "falcon-rw-7b": "tiiuae/falcon-rw-7b" - - "gpt2": "gpt2" - "gpt2-xl": "gpt2-xl" - - "gpt-j-6b": "EleutherAI/gpt-j-6b" - "gpt4all-j": "nomic-ai/gpt4all-j" - "gpt-neox-20b": "EleutherAI/gpt-neox-20b" - "pythia-12b": "EleutherAI/pythia-12b" - "oasst-sft-4-pythia-12b-epoch-3.5": "OpenAssistant/oasst-sft-4-pythia-12b-epoch-3.5" - "dolly-v2-12b": "databricks/dolly-v2-12b" - "stablelm-tuned-alpha-7b": "stabilityai/stablelm-tuned-alpha-7b" - - "Llama-2-13b-hf": "meta-llama/Llama-2-13b-hf" - "Llama-2-70b-hf": "meta-llama/Llama-2-70b-hf" - "open_llama_13b": "openlm-research/open_llama_13b" - "vicuna-13b-v1.3": "lmsys/vicuna-13b-v1.3" - "koala": "young-geng/koala" - - "mpt-7b": "mosaicml/mpt-7b" - "mpt-7b-storywriter": "mosaicml/mpt-7b-storywriter" - "mpt-30b": "mosaicml/mpt-30b" - "opt-66b": "facebook/opt-66b" - "opt-iml-max-30b": "facebook/opt-iml-max-30b" - - "agentlm-7b": "THUDM/agentlm-7b" - "agentlm-13b": "THUDM/agentlm-13b" - "agentlm-70b": "THUDM/agentlm-70b" - - "Yi-34B-Chat": "https://huggingface.co/01-ai/Yi-34B-Chat" - vllm_model_dict: - "aquila-7b": "BAAI/Aquila-7B" - "aquilachat-7b": "BAAI/AquilaChat-7B" - - "baichuan-7b": "baichuan-inc/Baichuan-7B" - "baichuan-13b": "baichuan-inc/Baichuan-13B" - "baichuan-13b-chat": "baichuan-inc/Baichuan-13B-Chat" - - "chatglm2-6b": "THUDM/chatglm2-6b" - "chatglm2-6b-32k": "THUDM/chatglm2-6b-32k" - "chatglm3-6b": "THUDM/chatglm3-6b" - "chatglm3-6b-32k": "THUDM/chatglm3-6b-32k" + "internlm2-chat-7b": "internlm/internlm2-chat-7b" + "internlm2-chat-20b": "internlm/internlm2-chat-20b" "BlueLM-7B-Chat": "vivo-ai/BlueLM-7B-Chat" "BlueLM-7B-Chat-32k": "vivo-ai/BlueLM-7B-Chat-32k" - # 注意:bloom系列的tokenizer与model是分离的,因此虽然vllm支持,但与fschat框架不兼容 - # "bloom": "bigscience/bloom", - # "bloomz": "bigscience/bloomz", - # "bloomz-560m": "bigscience/bloomz-560m", - # "bloomz-7b1": "bigscience/bloomz-7b1", - # "bloomz-1b7": "bigscience/bloomz-1b7", + "Yi-34B-Chat": "https://huggingface.co/01-ai/Yi-34B-Chat" + + "agentlm-7b": "THUDM/agentlm-7b" + "agentlm-13b": "THUDM/agentlm-13b" + "agentlm-70b": "THUDM/agentlm-70b" + + "falcon-7b": "tiiuae/falcon-7b" + "falcon-40b": "tiiuae/falcon-40b" + "falcon-rw-7b": "tiiuae/falcon-rw-7b" + + "aquila-7b": "BAAI/Aquila-7B" + "aquilachat-7b": "BAAI/AquilaChat-7B" + "open_llama_13b": "openlm-research/open_llama_13b" + "vicuna-13b-v1.5": "lmsys/vicuna-13b-v1.5" + "koala": "young-geng/koala" + "mpt-7b": "mosaicml/mpt-7b" + "mpt-7b-storywriter": "mosaicml/mpt-7b-storywriter" + "mpt-30b": "mosaicml/mpt-30b" + "opt-66b": "facebook/opt-66b" + "opt-iml-max-30b": "facebook/opt-iml-max-30b" + "gpt2": "gpt2" + "gpt2-xl": "gpt2-xl" + "gpt-j-6b": "EleutherAI/gpt-j-6b" + "gpt4all-j": "nomic-ai/gpt4all-j" + "gpt-neox-20b": "EleutherAI/gpt-neox-20b" + "pythia-12b": "EleutherAI/pythia-12b" + "oasst-sft-4-pythia-12b-epoch-3.5": "OpenAssistant/oasst-sft-4-pythia-12b-epoch-3.5" + "dolly-v2-12b": "databricks/dolly-v2-12b" + "stablelm-tuned-alpha-7b": "stabilityai/stablelm-tuned-alpha-7b" + + vllm_model_dict: + "chatglm2-6b": "THUDM/chatglm2-6b" + "chatglm2-6b-32k": "THUDM/chatglm2-6b-32k" + "chatglm3-6b": "THUDM/chatglm3-6b" + "chatglm3-6b-32k": "THUDM/chatglm3-6b-32k" + + "Llama-2-7b-chat-hf": "meta-llama/Llama-2-7b-chat-hf" + "Llama-2-13b-chat-hf": "meta-llama/Llama-2-13b-chat-hf" + "Llama-2-70b-chat-hf": "meta-llama/Llama-2-70b-chat-hf" + + "Qwen-1_8B-Chat": "Qwen/Qwen-1_8B-Chat" + "Qwen-7B-Chat": "Qwen/Qwen-7B-Chat" + "Qwen-14B-Chat": "Qwen/Qwen-14B-Chat" + "Qwen-72B-Chat": "Qwen/Qwen-72B-Chat" + + "baichuan-7b-chat": "baichuan-inc/Baichuan-7B-Chat" + "baichuan-13b-chat": "baichuan-inc/Baichuan-13B-Chat" + "baichuan2-7b-chat": "baichuan-inc/Baichuan-7B-Chat" + "baichuan2-13b-chat": "baichuan-inc/Baichuan-13B-Chat" + + "BlueLM-7B-Chat": "vivo-ai/BlueLM-7B-Chat" + "BlueLM-7B-Chat-32k": "vivo-ai/BlueLM-7B-Chat-32k" "internlm-7b": "internlm/internlm-7b" "internlm-chat-7b": "internlm/internlm-chat-7b" + "internlm2-chat-7b": "internlm/Models/internlm2-chat-7b" + "internlm2-chat-20b": "internlm/Models/internlm2-chat-20b" + + "aquila-7b": "BAAI/Aquila-7B" + "aquilachat-7b": "BAAI/AquilaChat-7B" + "falcon-7b": "tiiuae/falcon-7b" "falcon-40b": "tiiuae/falcon-40b" "falcon-rw-7b": "tiiuae/falcon-rw-7b" @@ -307,8 +299,6 @@ plugins: "oasst-sft-4-pythia-12b-epoch-3.5": "OpenAssistant/oasst-sft-4-pythia-12b-epoch-3.5" "dolly-v2-12b": "databricks/dolly-v2-12b" "stablelm-tuned-alpha-7b": "stabilityai/stablelm-tuned-alpha-7b" - "Llama-2-13b-hf": "meta-llama/Llama-2-13b-hf" - "Llama-2-70b-hf": "meta-llama/Llama-2-70b-hf" "open_llama_13b": "openlm-research/open_llama_13b" "vicuna-13b-v1.3": "lmsys/vicuna-13b-v1.3" "koala": "young-geng/koala" @@ -318,24 +308,3 @@ plugins: "opt-66b": "facebook/opt-66b" "opt-iml-max-30b": "facebook/opt-iml-max-30b" - "Qwen-1_8B": "Qwen/Qwen-1_8B" - "Qwen-1_8B-Chat": "Qwen/Qwen-1_8B-Chat" - "Qwen-1_8B-Chat-Int8": "Qwen/Qwen-1_8B-Chat-Int8" - "Qwen-1_8B-Chat-Int4": "Qwen/Qwen-1_8B-Chat-Int4" - - "Qwen-7B": "Qwen/Qwen-7B" - "Qwen-7B-Chat": "Qwen/Qwen-7B-Chat" - - "Qwen-14B": "Qwen/Qwen-14B" - "Qwen-14B-Chat": "Qwen/Qwen-14B-Chat" - "Qwen-14B-Chat-Int8": "Qwen/Qwen-14B-Chat-Int8" - "Qwen-14B-Chat-Int4": "Qwen/Qwen-14B-Chat-Int4" - - "Qwen-72B": "Qwen/Qwen-72B" - "Qwen-72B-Chat": "Qwen/Qwen-72B-Chat" - "Qwen-72B-Chat-Int8": "Qwen/Qwen-72B-Chat-Int8" - "Qwen-72B-Chat-Int4": "Qwen/Qwen-72B-Chat-Int4" - - "agentlm-7b": "THUDM/agentlm-7b" - "agentlm-13b": "THUDM/agentlm-13b" - "agentlm-70b": "THUDM/agentlm-70b" diff --git a/configs/loom.yaml.example b/configs/loom.yaml.example new file mode 100644 index 00000000..f81df49f --- /dev/null +++ b/configs/loom.yaml.example @@ -0,0 +1,310 @@ +log_path: "logs" +log_level: "DEBUG" + +api_server: + host: "127.0.0.1" + port: 8000 + +publish_server: + host: "127.0.0.1" + port: 8001 + +openai_plugins_folder: + - "openai_plugins" +openai_plugins_load_folder: + - "configs" + + +plugins: + - openai: + name: "openai" + + - fastchat: + name: "fastchat" + logdir: "logs" + # LLM 运行设备。设为"auto"会自动检测,也可手动设定为"cuda","mps","cpu"其中之一。 + llm_device: "auto" + model_names: + - "chatglm3-6b" + run_controller: + host: "127.0.0.1" + port: 20001 + dispatch_method: "shortest_queue" + run_openai_api: + host: "127.0.0.1" + port: 20000 + fschat_model_workers: + - default: + host: "127.0.0.1" + port: 20002 + device: "auto" + infer_turbo: False + + # model_worker多卡加载需要配置的参数 + # "gpus": None, # 使用的GPU,以str的格式指定,如"0,1",如失效请使用CUDA_VISIBLE_DEVICES="0,1"等形式指定 + # "num_gpus": 1, # 使用GPU的数量 + # "max_gpu_memory": "20GiB", # 每个GPU占用的最大显存 + + # 以下为model_worker非常用参数,可根据需要配置 + # "load_8bit": False, # 开启8bit量化 + # "cpu_offloading": None, + # "gptq_ckpt": None, + # "gptq_wbits": 16, + # "gptq_groupsize": -1, + # "gptq_act_order": False, + # "awq_ckpt": None, + # "awq_wbits": 16, + # "awq_groupsize": -1, + # "model_names": LLM_MODELS, + # "conv_template": None, + # "limit_worker_concurrency": 5, + # "stream_interval": 2, + # "no_register": False, + # "embed_in_truncate": False, + + # 以下为vllm_worker配置参数,注意使用vllm必须有gpu,仅在Linux测试通过 + + # tokenizer = model_path # 如果tokenizer与model_path不一致在此处添加 + # 'tokenizer_mode':'auto', + # 'trust_remote_code':True, + # 'download_dir':None, + # 'load_format':'auto', + # 'dtype':'auto', + # 'seed':0, + # 'worker_use_ray':False, + # 'pipeline_parallel_size':1, + # 'tensor_parallel_size':1, + # 'block_size':16, + # 'swap_space':4 , # GiB + # 'gpu_memory_utilization':0.90, + # 'max_num_batched_tokens':2560, + # 'max_num_seqs':256, + # 'disable_log_stats':False, + # 'conv_template':None, + # 'limit_worker_concurrency':5, + # 'no_register':False, + # 'num_gpus': 1 + # 'engine_use_ray': False, + # 'disable_log_requests': False + + - chatglm3-6b: + host: "127.0.0.1" + device: "cuda" + port: 20009 + + - internlm2-chat-7b: + host: "127.0.0.1" + device: "cuda" + port: 20009 + + # 以下配置可以不用修改,在model_config中设置启动的模型 + - zhipu-api: + port: 21001 + + - minimax-api: + port: 21002 + + - xinghuo-api: + port: 21003 + + - qianfan-api: + port: 21004 + + - fangzhou-api: + port: 21005 + + - qwen-api: + port: 21006 + + - baichuan-api: + port: 21007 + + - azure-api: + port: 21008 + + - tiangong-api: + port: 21009 + online_llm_model: + # 线上模型。请在server_config中为每个在线API设置不同的端口 + + - "openai-api": + "model_name": "gpt-3.5-turbo" + "api_base_url": "https://api.openai.com/v1" + "api_key": "" + "openai_proxy": "" + + # 具体注册及api key获取请前往 http://open.bigmodel.cn + - "zhipu-api": + "api_key": "" + "version": "chatglm_turbo" # 可选包括 "chatglm_turbo" + "provider": "ChatGLMWorker" + + # 具体注册及api key获取请前往 https://api.minimax.chat/ + - "minimax-api": + "group_id": "" + "api_key": "" + "is_pro": False + "provider": "MiniMaxWorker" + + # 具体注册及api key获取请前往 https://xinghuo.xfyun.cn/ + - "xinghuo-api": + "APPID": "" + "APISecret": "" + "api_key": "" + "version": "v1.5" # 你使用的讯飞星火大模型版本,可选包括 "v3.0", "v1.5", "v2.0" + "provider": "XingHuoWorker" + + # 百度千帆 API,申请方式请参考 https://cloud.baidu.com/doc/WENXINWORKSHOP/s/4lilb2lpf + - "qianfan-api": + "version": "ERNIE-Bot" # 注意大小写。当前支持 "ERNIE-Bot" 或 "ERNIE-Bot-turbo", 更多的见官方文档。 + "version_url": "" # 也可以不填写version,直接填写在千帆申请模型发布的API地址 + "api_key": "" + "secret_key": "" + "provider": "QianFanWorker" + + # 火山方舟 API,文档参考 https://www.volcengine.com/docs/82379 + - "fangzhou-api": + "version": "chatglm-6b-model" # 当前支持 "chatglm-6b-model", 更多的见文档模型支持列表中方舟部分。 + "version_url": "" # 可以不填写version,直接填写在方舟申请模型发布的API地址 + "api_key": "" + "secret_key": "" + "provider": "FangZhouWorker" + + # 阿里云通义千问 API,文档参考 https://help.aliyun.com/zh/dashscope/developer-reference/api-details + - "qwen-api": + "version": "qwen-turbo" # 可选包括 "qwen-turbo", "qwen-plus" + "api_key": "" # 请在阿里云控制台模型服务灵积API-KEY管理页面创建 + "provider": "QwenWorker" + + # 百川 API,申请方式请参考 https://www.baichuan-ai.com/home#api-enter + - "baichuan-api": + "version": "Baichuan2-53B" # 当前支持 "Baichuan2-53B", 见官方文档。 + "api_key": "" + "secret_key": "" + "provider": "BaiChuanWorker" + + # Azure API + - "azure-api": + "deployment_name": "" # 部署容器的名字 + "resource_name": "" # https://{resource_name}.openai.azure.com/openai/ 填写resource_name的部分,其他部分不要填写 + "api_version": "" # API的版本,不是模型版本 + "api_key": "" + "provider": "AzureWorker" + + # 昆仑万维天工 API https://model-platform.tiangong.cn/ + - "tiangong-api": + "version": "SkyChat-MegaVerse" + "api_key": "" + "secret_key": "" + "provider": "TianGongWorker" + "llm_model": + + "chatglm2-6b": "THUDM/chatglm2-6b" + "chatglm2-6b-32k": "THUDM/chatglm2-6b-32k" + "chatglm3-6b": "THUDM/chatglm3-6b" + "chatglm3-6b-32k": "THUDM/chatglm3-6b-32k" + + "Llama-2-7b-chat-hf": "meta-llama/Llama-2-7b-chat-hf" + "Llama-2-13b-chat-hf": "meta-llama/Llama-2-13b-chat-hf" + "Llama-2-70b-chat-hf": "meta-llama/Llama-2-70b-chat-hf" + + "Qwen-1_8B-Chat": "/media/checkpoint/Qwen-1_8B-Chat" + "Qwen-7B-Chat": "Qwen/Qwen-7B-Chat" + "Qwen-14B-Chat": "Qwen/Qwen-14B-Chat" + "Qwen-72B-Chat": "Qwen/Qwen-72B-Chat" + + "baichuan-7b-chat": "baichuan-inc/Baichuan-7B-Chat" + "baichuan-13b-chat": "baichuan-inc/Baichuan-13B-Chat" + "baichuan2-7b-chat": "baichuan-inc/Baichuan2-7B-Chat" + "baichuan2-13b-chat": "baichuan-inc/Baichuan2-13B-Chat" + + "internlm-7b": "internlm/internlm-7b" + "internlm-chat-7b": "internlm/internlm-chat-7b" + "internlm2-chat-7b": "internlm/internlm2-chat-7b" + "internlm2-chat-20b": "internlm/internlm2-chat-20b" + + "BlueLM-7B-Chat": "vivo-ai/BlueLM-7B-Chat" + "BlueLM-7B-Chat-32k": "vivo-ai/BlueLM-7B-Chat-32k" + + "Yi-34B-Chat": "https://huggingface.co/01-ai/Yi-34B-Chat" + + "agentlm-7b": "THUDM/agentlm-7b" + "agentlm-13b": "THUDM/agentlm-13b" + "agentlm-70b": "THUDM/agentlm-70b" + + "falcon-7b": "tiiuae/falcon-7b" + "falcon-40b": "tiiuae/falcon-40b" + "falcon-rw-7b": "tiiuae/falcon-rw-7b" + + "aquila-7b": "BAAI/Aquila-7B" + "aquilachat-7b": "BAAI/AquilaChat-7B" + "open_llama_13b": "openlm-research/open_llama_13b" + "vicuna-13b-v1.5": "lmsys/vicuna-13b-v1.5" + "koala": "young-geng/koala" + "mpt-7b": "mosaicml/mpt-7b" + "mpt-7b-storywriter": "mosaicml/mpt-7b-storywriter" + "mpt-30b": "mosaicml/mpt-30b" + "opt-66b": "facebook/opt-66b" + "opt-iml-max-30b": "facebook/opt-iml-max-30b" + "gpt2": "gpt2" + "gpt2-xl": "gpt2-xl" + "gpt-j-6b": "EleutherAI/gpt-j-6b" + "gpt4all-j": "nomic-ai/gpt4all-j" + "gpt-neox-20b": "EleutherAI/gpt-neox-20b" + "pythia-12b": "EleutherAI/pythia-12b" + "oasst-sft-4-pythia-12b-epoch-3.5": "OpenAssistant/oasst-sft-4-pythia-12b-epoch-3.5" + "dolly-v2-12b": "databricks/dolly-v2-12b" + "stablelm-tuned-alpha-7b": "stabilityai/stablelm-tuned-alpha-7b" + + vllm_model_dict: + "chatglm2-6b": "THUDM/chatglm2-6b" + "chatglm2-6b-32k": "THUDM/chatglm2-6b-32k" + "chatglm3-6b": "THUDM/chatglm3-6b" + "chatglm3-6b-32k": "THUDM/chatglm3-6b-32k" + + "Llama-2-7b-chat-hf": "meta-llama/Llama-2-7b-chat-hf" + "Llama-2-13b-chat-hf": "meta-llama/Llama-2-13b-chat-hf" + "Llama-2-70b-chat-hf": "meta-llama/Llama-2-70b-chat-hf" + + "Qwen-1_8B-Chat": "Qwen/Qwen-1_8B-Chat" + "Qwen-7B-Chat": "Qwen/Qwen-7B-Chat" + "Qwen-14B-Chat": "Qwen/Qwen-14B-Chat" + "Qwen-72B-Chat": "Qwen/Qwen-72B-Chat" + + "baichuan-7b-chat": "baichuan-inc/Baichuan-7B-Chat" + "baichuan-13b-chat": "baichuan-inc/Baichuan-13B-Chat" + "baichuan2-7b-chat": "baichuan-inc/Baichuan-7B-Chat" + "baichuan2-13b-chat": "baichuan-inc/Baichuan-13B-Chat" + + "BlueLM-7B-Chat": "vivo-ai/BlueLM-7B-Chat" + "BlueLM-7B-Chat-32k": "vivo-ai/BlueLM-7B-Chat-32k" + + "internlm-7b": "internlm/internlm-7b" + "internlm-chat-7b": "internlm/internlm-chat-7b" + "internlm2-chat-7b": "internlm/Models/internlm2-chat-7b" + "internlm2-chat-20b": "internlm/Models/internlm2-chat-20b" + + "aquila-7b": "BAAI/Aquila-7B" + "aquilachat-7b": "BAAI/AquilaChat-7B" + + "falcon-7b": "tiiuae/falcon-7b" + "falcon-40b": "tiiuae/falcon-40b" + "falcon-rw-7b": "tiiuae/falcon-rw-7b" + "gpt2": "gpt2" + "gpt2-xl": "gpt2-xl" + "gpt-j-6b": "EleutherAI/gpt-j-6b" + "gpt4all-j": "nomic-ai/gpt4all-j" + "gpt-neox-20b": "EleutherAI/gpt-neox-20b" + "pythia-12b": "EleutherAI/pythia-12b" + "oasst-sft-4-pythia-12b-epoch-3.5": "OpenAssistant/oasst-sft-4-pythia-12b-epoch-3.5" + "dolly-v2-12b": "databricks/dolly-v2-12b" + "stablelm-tuned-alpha-7b": "stabilityai/stablelm-tuned-alpha-7b" + "open_llama_13b": "openlm-research/open_llama_13b" + "vicuna-13b-v1.3": "lmsys/vicuna-13b-v1.3" + "koala": "young-geng/koala" + "mpt-7b": "mosaicml/mpt-7b" + "mpt-7b-storywriter": "mosaicml/mpt-7b-storywriter" + "mpt-30b": "mosaicml/mpt-30b" + "opt-66b": "facebook/opt-66b" + "opt-iml-max-30b": "facebook/opt-iml-max-30b" + diff --git a/configs/model_config.py.example b/configs/model_config.py.example index 0b4d0b92..5fd04a9b 100644 --- a/configs/model_config.py.example +++ b/configs/model_config.py.example @@ -1,253 +1,310 @@ -import os +log_path: "logs" +log_level: "DEBUG" -MODEL_ROOT_PATH = "" -EMBEDDING_MODEL = "bge-large-zh-v1.5" # bge-large-zh -EMBEDDING_DEVICE = "auto" +api_server: + host: "127.0.0.1" + port: 8000 -# 选用的reranker模型 -RERANKER_MODEL = "bge-reranker-large" -# 是否启用reranker模型 -USE_RERANKER = False -RERANKER_MAX_LENGTH = 1024 +publish_server: + host: "127.0.0.1" + port: 8001 -# 如果需要在 EMBEDDING_MODEL 中增加自定义的关键字时配置 -EMBEDDING_KEYWORD_FILE = "keywords.txt" -EMBEDDING_MODEL_OUTPUT_PATH = "output" +openai_plugins_folder: + - "openai_plugins" +openai_plugins_load_folder: + - "configs" -SUPPORT_AGENT_MODELS = [ - "chatglm3-6b", - "openai-api", - "Qwen-14B-Chat", - "Qwen-7B-Chat", -] -LLM_MODEL_CONFIG = { - "preprocess_model": { - # "Mixtral-8x7B-v0.1": { - # "temperature": 0.01, - # "max_tokens": 5, - # "prompt_name": "default", - # "callbacks": False - # }, - "chatglm3-6b": { - "temperature": 0.05, - "max_tokens": 4096, - "prompt_name": "default", - "callbacks": False - }, - }, - "llm_model": { - # "Mixtral-8x7B-v0.1": { - # "temperature": 0.9, - # "max_tokens": 4000, - # "history_len": 5, - # "prompt_name": "default", - # "callbacks": True - # }, - "chatglm3-6b": { - "temperature": 0.05, - "max_tokens": 4096, - "prompt_name": "default", - "history_len": 10, - "callbacks": True - }, - }, - "action_model": { - # "Qwen-14B-Chat": { - # "temperature": 0.05, - # "max_tokens": 4096, - # "prompt_name": "qwen", - # "callbacks": True - # }, - "chatglm3-6b": { - "temperature": 0.05, - "max_tokens": 4096, - "prompt_name": "ChatGLM3", - "callbacks": True - }, - # "zhipu-api": { - # "temperature": 0.01, - # "max_tokens": 4096, - # "prompt_name": "ChatGLM3", - # "callbacks": True - # } - }, - "postprocess_model": { - "zhipu-api": { - "temperature": 0.01, - "max_tokens": 4096, - "prompt_name": "default", - "callbacks": True - } - }, -} -LLM_DEVICE = "auto" -ONLINE_LLM_MODEL = { - "openai-api": { - "model_name": "gpt-4-1106-preview", - "api_base_url": "https://api.openai.com/v1", - "api_key": "sk-", - "openai_proxy": "", - }, - "zhipu-api": { - "api_key": "", - "version": "chatglm_turbo", - "provider": "ChatGLMWorker", - }, - "minimax-api": { - "group_id": "", - "api_key": "", - "is_pro": False, - "provider": "MiniMaxWorker", - }, - "xinghuo-api": { - "APPID": "", - "APISecret": "", - "api_key": "", - "version": "v3.0", - "provider": "XingHuoWorker", - }, - "qianfan-api": { - "version": "ernie-bot-4", - "version_url": "", - "api_key": "", - "secret_key": "", - "provider": "QianFanWorker", - }, - "fangzhou-api": { - "version": "chatglm-6b-model", - "version_url": "", - "api_key": "", - "secret_key": "", - "provider": "FangZhouWorker", - }, - "qwen-api": { - "version": "qwen-max", - "api_key": "", - "provider": "QwenWorker", - "embed_model": "text-embedding-v1" # embedding 模型名称 - }, - "baichuan-api": { - "version": "Baichuan2-53B", - "api_key": "", - "secret_key": "", - "provider": "BaiChuanWorker", - }, - "azure-api": { - "deployment_name": "", - "resource_name": "", - "api_version": "2023-07-01-preview", - "api_key": "", - "provider": "AzureWorker", - }, +plugins: + - openai: + name: "openai" - # 昆仑万维天工 API https://model-platform.tiangong.cn/ - "tiangong-api": { - "version": "SkyChat-MegaVerse", - "api_key": "", - "secret_key": "", - "provider": "TianGongWorker", - }, - # Gemini API https://makersuite.google.com/app/apikey - "gemini-api": { - "api_key": "", - "provider": "GeminiWorker", - } + - fastchat: + name: "fastchat" + logdir: "logs" + # LLM 运行设备。设为"auto"会自动检测,也可手动设定为"cuda","mps","cpu"其中之一。 + llm_device: "auto" + model_names: + - "internlm2-chat-7b" + run_controller: + host: "127.0.0.1" + port: 20001 + dispatch_method: "shortest_queue" + run_openai_api: + host: "127.0.0.1" + port: 20000 + fschat_model_workers: + - default: + host: "127.0.0.1" + port: 20002 + device: "auto" + infer_turbo: False -} + # model_worker多卡加载需要配置的参数 + # "gpus": None, # 使用的GPU,以str的格式指定,如"0,1",如失效请使用CUDA_VISIBLE_DEVICES="0,1"等形式指定 + # "num_gpus": 1, # 使用GPU的数量 + # "max_gpu_memory": "20GiB", # 每个GPU占用的最大显存 -# 在以下字典中修改属性值,以指定本地embedding模型存储位置。支持3种设置方法: -# 1、将对应的值修改为模型绝对路径 -# 2、不修改此处的值(以 text2vec 为例): -# 2.1 如果{MODEL_ROOT_PATH}下存在如下任一子目录: -# - text2vec -# - GanymedeNil/text2vec-large-chinese -# - text2vec-large-chinese -# 2.2 如果以上本地路径不存在,则使用huggingface模型 + # 以下为model_worker非常用参数,可根据需要配置 + # "load_8bit": False, # 开启8bit量化 + # "cpu_offloading": None, + # "gptq_ckpt": None, + # "gptq_wbits": 16, + # "gptq_groupsize": -1, + # "gptq_act_order": False, + # "awq_ckpt": None, + # "awq_wbits": 16, + # "awq_groupsize": -1, + # "model_names": LLM_MODELS, + # "conv_template": None, + # "limit_worker_concurrency": 5, + # "stream_interval": 2, + # "no_register": False, + # "embed_in_truncate": False, -MODEL_PATH = { - "embed_model": { - "ernie-tiny": "nghuyong/ernie-3.0-nano-zh", - "ernie-base": "nghuyong/ernie-3.0-base-zh", - "text2vec-base": "shibing624/text2vec-base-chinese", - "text2vec": "GanymedeNil/text2vec-large-chinese", - "text2vec-paraphrase": "shibing624/text2vec-base-chinese-paraphrase", - "text2vec-sentence": "shibing624/text2vec-base-chinese-sentence", - "text2vec-multilingual": "shibing624/text2vec-base-multilingual", - "text2vec-bge-large-chinese": "shibing624/text2vec-bge-large-chinese", - "m3e-small": "moka-ai/m3e-small", - "m3e-base": "moka-ai/m3e-base", - "m3e-large": "moka-ai/m3e-large", + # 以下为vllm_worker配置参数,注意使用vllm必须有gpu,仅在Linux测试通过 - "bge-small-zh": "BAAI/bge-small-zh", - "bge-base-zh": "BAAI/bge-base-zh", - "bge-large-zh": "/media/zr/Data/Models/Embedding/bge-large-zh", - "bge-large-zh-noinstruct": "BAAI/bge-large-zh-noinstruct", - "bge-base-zh-v1.5": "BAAI/bge-base-zh-v1.5", - "bge-large-zh-v1.5": "/share/home/zyx/Models/bge-large-zh-v1.5", - "piccolo-base-zh": "sensenova/piccolo-base-zh", - "piccolo-large-zh": "sensenova/piccolo-large-zh", - "nlp_gte_sentence-embedding_chinese-large": "/Models/nlp_gte_sentence-embedding_chinese-large", - "text-embedding-ada-002": "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", - } -} + # tokenizer = model_path # 如果tokenizer与model_path不一致在此处添加 + # 'tokenizer_mode':'auto', + # 'trust_remote_code':True, + # 'download_dir':None, + # 'load_format':'auto', + # 'dtype':'auto', + # 'seed':0, + # 'worker_use_ray':False, + # 'pipeline_parallel_size':1, + # 'tensor_parallel_size':1, + # 'block_size':16, + # 'swap_space':4 , # GiB + # 'gpu_memory_utilization':0.90, + # 'max_num_batched_tokens':2560, + # 'max_num_seqs':256, + # 'disable_log_stats':False, + # 'conv_template':None, + # 'limit_worker_concurrency':5, + # 'no_register':False, + # 'num_gpus': 1 + # 'engine_use_ray': False, + # 'disable_log_requests': False -NLTK_DATA_PATH = os.path.join(os.path.dirname(os.path.dirname(__file__)), "nltk_data") + - chatglm3-6b: + host: "127.0.0.1" + device: "cuda" + port: 20009 -# 使用VLLM可能导致模型推理能力下降,无法完成Agent任务 -VLLM_MODEL_DICT = { - "aquila-7b": "BAAI/Aquila-7B", - "aquilachat-7b": "BAAI/AquilaChat-7B", + - internlm2-chat-7b: + host: "127.0.0.1" + device: "cuda" + port: 20009 - "baichuan-7b": "baichuan-inc/Baichuan-7B", - "baichuan-13b": "baichuan-inc/Baichuan-13B", - 'baichuan-13b-chat': 'baichuan-inc/Baichuan-13B-Chat', + # 以下配置可以不用修改,在model_config中设置启动的模型 + - zhipu-api: + port: 21001 - 'chatglm2-6b': 'THUDM/chatglm2-6b', - 'chatglm2-6b-32k': 'THUDM/chatglm2-6b-32k', - 'chatglm3-6b': 'THUDM/chatglm3-6b', - 'chatglm3-6b-32k': 'THUDM/chatglm3-6b-32k', + - minimax-api: + port: 21002 - "internlm-7b": "internlm/internlm-7b", - "internlm-chat-7b": "internlm/internlm-chat-7b", - "internlm2-chat-7b": "internlm/Models/internlm2-chat-7b", - "internlm2-chat-20b": "internlm/Models/internlm2-chat-20b", + - xinghuo-api: + port: 21003 - "aquila-7b": "BAAI/Aquila-7B", - "aquilachat-7b": "BAAI/AquilaChat-7B", + - qianfan-api: + port: 21004 - "falcon-7b": "tiiuae/falcon-7b", - "falcon-40b": "tiiuae/falcon-40b", - "falcon-rw-7b": "tiiuae/falcon-rw-7b", - "gpt2": "gpt2", - "gpt2-xl": "gpt2-xl", - "gpt-j-6b": "EleutherAI/gpt-j-6b", - "gpt4all-j": "nomic-ai/gpt4all-j", - "gpt-neox-20b": "EleutherAI/gpt-neox-20b", - "pythia-12b": "EleutherAI/pythia-12b", - "oasst-sft-4-pythia-12b-epoch-3.5": "OpenAssistant/oasst-sft-4-pythia-12b-epoch-3.5", - "dolly-v2-12b": "databricks/dolly-v2-12b", - "stablelm-tuned-alpha-7b": "stabilityai/stablelm-tuned-alpha-7b", - "open_llama_13b": "openlm-research/open_llama_13b", - "vicuna-13b-v1.3": "lmsys/vicuna-13b-v1.3", - "koala": "young-geng/koala", - "mpt-7b": "mosaicml/mpt-7b", - "mpt-7b-storywriter": "mosaicml/mpt-7b-storywriter", - "mpt-30b": "mosaicml/mpt-30b", - "opt-66b": "facebook/opt-66b", - "opt-iml-max-30b": "facebook/opt-iml-max-30b", + - fangzhou-api: + port: 21005 - "Qwen-7B": "Qwen/Qwen-7B", - "Qwen-14B": "Qwen/Qwen-14B", - "Qwen-7B-Chat": "Qwen/Qwen-7B-Chat", - "Qwen-14B-Chat": "Qwen/Qwen-14B-Chat", + - qwen-api: + port: 21006 - "agentlm-7b": "THUDM/agentlm-7b", - "agentlm-13b": "THUDM/agentlm-13b", - "agentlm-70b": "THUDM/agentlm-70b", + - baichuan-api: + port: 21007 -} + - azure-api: + port: 21008 + + - tiangong-api: + port: 21009 + online_llm_model: + # 线上模型。请在server_config中为每个在线API设置不同的端口 + + - "openai-api": + "model_name": "gpt-3.5-turbo" + "api_base_url": "https://api.openai.com/v1" + "api_key": "" + "openai_proxy": "" + + # 具体注册及api key获取请前往 http://open.bigmodel.cn + - "zhipu-api": + "api_key": "" + "version": "chatglm_turbo" # 可选包括 "chatglm_turbo" + "provider": "ChatGLMWorker" + + # 具体注册及api key获取请前往 https://api.minimax.chat/ + - "minimax-api": + "group_id": "" + "api_key": "" + "is_pro": False + "provider": "MiniMaxWorker" + + # 具体注册及api key获取请前往 https://xinghuo.xfyun.cn/ + - "xinghuo-api": + "APPID": "" + "APISecret": "" + "api_key": "" + "version": "v1.5" # 你使用的讯飞星火大模型版本,可选包括 "v3.0", "v1.5", "v2.0" + "provider": "XingHuoWorker" + + # 百度千帆 API,申请方式请参考 https://cloud.baidu.com/doc/WENXINWORKSHOP/s/4lilb2lpf + - "qianfan-api": + "version": "ERNIE-Bot" # 注意大小写。当前支持 "ERNIE-Bot" 或 "ERNIE-Bot-turbo", 更多的见官方文档。 + "version_url": "" # 也可以不填写version,直接填写在千帆申请模型发布的API地址 + "api_key": "" + "secret_key": "" + "provider": "QianFanWorker" + + # 火山方舟 API,文档参考 https://www.volcengine.com/docs/82379 + - "fangzhou-api": + "version": "chatglm-6b-model" # 当前支持 "chatglm-6b-model", 更多的见文档模型支持列表中方舟部分。 + "version_url": "" # 可以不填写version,直接填写在方舟申请模型发布的API地址 + "api_key": "" + "secret_key": "" + "provider": "FangZhouWorker" + + # 阿里云通义千问 API,文档参考 https://help.aliyun.com/zh/dashscope/developer-reference/api-details + - "qwen-api": + "version": "qwen-turbo" # 可选包括 "qwen-turbo", "qwen-plus" + "api_key": "" # 请在阿里云控制台模型服务灵积API-KEY管理页面创建 + "provider": "QwenWorker" + + # 百川 API,申请方式请参考 https://www.baichuan-ai.com/home#api-enter + - "baichuan-api": + "version": "Baichuan2-53B" # 当前支持 "Baichuan2-53B", 见官方文档。 + "api_key": "" + "secret_key": "" + "provider": "BaiChuanWorker" + + # Azure API + - "azure-api": + "deployment_name": "" # 部署容器的名字 + "resource_name": "" # https://{resource_name}.openai.azure.com/openai/ 填写resource_name的部分,其他部分不要填写 + "api_version": "" # API的版本,不是模型版本 + "api_key": "" + "provider": "AzureWorker" + + # 昆仑万维天工 API https://model-platform.tiangong.cn/ + - "tiangong-api": + "version": "SkyChat-MegaVerse" + "api_key": "" + "secret_key": "" + "provider": "TianGongWorker" + "llm_model": + + "chatglm2-6b": "THUDM/chatglm2-6b" + "chatglm2-6b-32k": "THUDM/chatglm2-6b-32k" + "chatglm3-6b": "THUDM/chatglm3-6b" + "chatglm3-6b-32k": "THUDM/chatglm3-6b-32k" + + "Llama-2-7b-chat-hf": "meta-llama/Llama-2-7b-chat-hf" + "Llama-2-13b-chat-hf": "meta-llama/Llama-2-13b-chat-hf" + "Llama-2-70b-chat-hf": "meta-llama/Llama-2-70b-chat-hf" + + "Qwen-1_8B-Chat": "/media/checkpoint/Qwen-1_8B-Chat" + "Qwen-7B-Chat": "Qwen/Qwen-7B-Chat" + "Qwen-14B-Chat": "Qwen/Qwen-14B-Chat" + "Qwen-72B-Chat": "Qwen/Qwen-72B-Chat" + + "baichuan-7b-chat": "baichuan-inc/Baichuan-7B-Chat" + "baichuan-13b-chat": "baichuan-inc/Baichuan-13B-Chat" + "baichuan2-7b-chat": "baichuan-inc/Baichuan2-7B-Chat" + "baichuan2-13b-chat": "baichuan-inc/Baichuan2-13B-Chat" + + "internlm-7b": "internlm/internlm-7b" + "internlm-chat-7b": "internlm/internlm-chat-7b" + "internlm2-chat-7b": "internlm/internlm2-chat-7b" + "internlm2-chat-20b": "internlm/internlm2-chat-20b" + + "BlueLM-7B-Chat": "vivo-ai/BlueLM-7B-Chat" + "BlueLM-7B-Chat-32k": "vivo-ai/BlueLM-7B-Chat-32k" + + "Yi-34B-Chat": "https://huggingface.co/01-ai/Yi-34B-Chat" + + "agentlm-7b": "THUDM/agentlm-7b" + "agentlm-13b": "THUDM/agentlm-13b" + "agentlm-70b": "THUDM/agentlm-70b" + + "falcon-7b": "tiiuae/falcon-7b" + "falcon-40b": "tiiuae/falcon-40b" + "falcon-rw-7b": "tiiuae/falcon-rw-7b" + + "aquila-7b": "BAAI/Aquila-7B" + "aquilachat-7b": "BAAI/AquilaChat-7B" + "open_llama_13b": "openlm-research/open_llama_13b" + "vicuna-13b-v1.5": "lmsys/vicuna-13b-v1.5" + "koala": "young-geng/koala" + "mpt-7b": "mosaicml/mpt-7b" + "mpt-7b-storywriter": "mosaicml/mpt-7b-storywriter" + "mpt-30b": "mosaicml/mpt-30b" + "opt-66b": "facebook/opt-66b" + "opt-iml-max-30b": "facebook/opt-iml-max-30b" + "gpt2": "gpt2" + "gpt2-xl": "gpt2-xl" + "gpt-j-6b": "EleutherAI/gpt-j-6b" + "gpt4all-j": "nomic-ai/gpt4all-j" + "gpt-neox-20b": "EleutherAI/gpt-neox-20b" + "pythia-12b": "EleutherAI/pythia-12b" + "oasst-sft-4-pythia-12b-epoch-3.5": "OpenAssistant/oasst-sft-4-pythia-12b-epoch-3.5" + "dolly-v2-12b": "databricks/dolly-v2-12b" + "stablelm-tuned-alpha-7b": "stabilityai/stablelm-tuned-alpha-7b" + + vllm_model_dict: + "chatglm2-6b": "THUDM/chatglm2-6b" + "chatglm2-6b-32k": "THUDM/chatglm2-6b-32k" + "chatglm3-6b": "THUDM/chatglm3-6b" + "chatglm3-6b-32k": "THUDM/chatglm3-6b-32k" + + "Llama-2-7b-chat-hf": "meta-llama/Llama-2-7b-chat-hf" + "Llama-2-13b-chat-hf": "meta-llama/Llama-2-13b-chat-hf" + "Llama-2-70b-chat-hf": "meta-llama/Llama-2-70b-chat-hf" + + "Qwen-1_8B-Chat": "Qwen/Qwen-1_8B-Chat" + "Qwen-7B-Chat": "Qwen/Qwen-7B-Chat" + "Qwen-14B-Chat": "Qwen/Qwen-14B-Chat" + "Qwen-72B-Chat": "Qwen/Qwen-72B-Chat" + + "baichuan-7b-chat": "baichuan-inc/Baichuan-7B-Chat" + "baichuan-13b-chat": "baichuan-inc/Baichuan-13B-Chat" + "baichuan2-7b-chat": "baichuan-inc/Baichuan-7B-Chat" + "baichuan2-13b-chat": "baichuan-inc/Baichuan-13B-Chat" + + "BlueLM-7B-Chat": "vivo-ai/BlueLM-7B-Chat" + "BlueLM-7B-Chat-32k": "vivo-ai/BlueLM-7B-Chat-32k" + + "internlm-7b": "internlm/internlm-7b" + "internlm-chat-7b": "internlm/internlm-chat-7b" + "internlm2-chat-7b": "internlm/Models/internlm2-chat-7b" + "internlm2-chat-20b": "internlm/Models/internlm2-chat-20b" + + "aquila-7b": "BAAI/Aquila-7B" + "aquilachat-7b": "BAAI/AquilaChat-7B" + + "falcon-7b": "tiiuae/falcon-7b" + "falcon-40b": "tiiuae/falcon-40b" + "falcon-rw-7b": "tiiuae/falcon-rw-7b" + "gpt2": "gpt2" + "gpt2-xl": "gpt2-xl" + "gpt-j-6b": "EleutherAI/gpt-j-6b" + "gpt4all-j": "nomic-ai/gpt4all-j" + "gpt-neox-20b": "EleutherAI/gpt-neox-20b" + "pythia-12b": "EleutherAI/pythia-12b" + "oasst-sft-4-pythia-12b-epoch-3.5": "OpenAssistant/oasst-sft-4-pythia-12b-epoch-3.5" + "dolly-v2-12b": "databricks/dolly-v2-12b" + "stablelm-tuned-alpha-7b": "stabilityai/stablelm-tuned-alpha-7b" + "open_llama_13b": "openlm-research/open_llama_13b" + "vicuna-13b-v1.3": "lmsys/vicuna-13b-v1.3" + "koala": "young-geng/koala" + "mpt-7b": "mosaicml/mpt-7b" + "mpt-7b-storywriter": "mosaicml/mpt-7b-storywriter" + "mpt-30b": "mosaicml/mpt-30b" + "opt-66b": "facebook/opt-66b" + "opt-iml-max-30b": "facebook/opt-iml-max-30b" -LOOM_CONFIG = "/media/gpt4-pdf-chatbot-langchain/LooM/src/core/loom.yaml" -OPENAI_KEY = None -OPENAI_PROXY = None diff --git a/openai_plugins/openai/app.py b/openai_plugins/openai/app.py index 47140d51..e51443d8 100644 --- a/openai_plugins/openai/app.py +++ b/openai_plugins/openai/app.py @@ -6,7 +6,6 @@ import sys import logging logger = logging.getLogger(__name__) -# 为了能使用插件中的函数,需要将当前目录加入到sys.path中 root_dir = os.path.dirname(os.path.abspath(__file__)) sys.path.append(root_dir) diff --git a/openai_plugins/openai/requirements.txt b/openai_plugins/openai/requirements.txt index e69de29b..c2fd4d73 100644 --- a/openai_plugins/openai/requirements.txt +++ b/openai_plugins/openai/requirements.txt @@ -0,0 +1 @@ +openai>=1.7.1 \ No newline at end of file diff --git a/openai_plugins/zhipuai/app.py b/openai_plugins/zhipuai/app.py new file mode 100644 index 00000000..e8d4678a --- /dev/null +++ b/openai_plugins/zhipuai/app.py @@ -0,0 +1,48 @@ +from loom_core.openai_plugins.core.adapter import ProcessesInfo +from loom_core.openai_plugins.core.application import ApplicationAdapter + +import os +import sys +import logging + +logger = logging.getLogger(__name__) +root_dir = os.path.dirname(os.path.abspath(__file__)) +sys.path.append(root_dir) + + +class ZhipuAIApplicationAdapter(ApplicationAdapter): + + def __init__(self, cfg=None, state_dict: dict = None): + self.processesInfo = None + self._cfg = cfg + super().__init__(state_dict=state_dict) + + def class_name(self) -> str: + """Get class name.""" + return self.__name__ + + @classmethod + def from_config(cls, cfg=None): + _state_dict = { + "application_name": "zhipuai", + "application_version": "0.0.1", + "application_description": "zhipuai application", + "application_author": "zhipuai" + } + state_dict = cfg.get("state_dict", {}) + if state_dict is not None and _state_dict is not None: + _state_dict = {**state_dict, **_state_dict} + else: + # 处理其中一个或两者都为 None 的情况 + _state_dict = state_dict or _state_dict or {} + return cls(cfg=cfg, state_dict=_state_dict) + + def init_processes(self, processesInfo: ProcessesInfo): + + self.processesInfo = processesInfo + + def start(self): + pass + + def stop(self): + pass diff --git a/openai_plugins/zhipuai/controller.py b/openai_plugins/zhipuai/controller.py new file mode 100644 index 00000000..e83f2e4e --- /dev/null +++ b/openai_plugins/zhipuai/controller.py @@ -0,0 +1,47 @@ +from loom_core.openai_plugins.core.control import ControlAdapter + +import os +import sys +import logging + +logger = logging.getLogger(__name__) +# 为了能使用插件中的函数,需要将当前目录加入到sys.path中 +root_dir = os.path.dirname(os.path.abspath(__file__)) +sys.path.append(root_dir) + + +class ZhipuAIControlAdapter(ControlAdapter): + + def __init__(self, cfg=None, state_dict: dict = None): + self._cfg = cfg + super().__init__(state_dict=state_dict) + + def class_name(self) -> str: + """Get class name.""" + return self.__name__ + + def start_model(self, new_model_name): + pass + + def stop_model(self, model_name: str): + pass + + def replace_model(self, model_name: str, new_model_name: str): + pass + + @classmethod + def from_config(cls, cfg=None): + _state_dict = { + "controller_name": "zhipuai", + "controller_version": "0.0.1", + "controller_description": "zhipuai controller", + "controller_author": "zhipuai" + } + state_dict = cfg.get("state_dict", {}) + if state_dict is not None and _state_dict is not None: + _state_dict = {**state_dict, **_state_dict} + else: + # 处理其中一个或两者都为 None 的情况 + _state_dict = state_dict or _state_dict or {} + + return cls(cfg=cfg, state_dict=_state_dict) diff --git a/openai_plugins/zhipuai/install.py b/openai_plugins/zhipuai/install.py new file mode 100644 index 00000000..d69b3457 --- /dev/null +++ b/openai_plugins/zhipuai/install.py @@ -0,0 +1,103 @@ +import logging +import sys +import os +import subprocess +import threading +import re +import locale + +logger = logging.getLogger(__name__) +root_dir = os.path.dirname(os.path.abspath(__file__)) +sys.path.append(root_dir) + +# Perform install +processed_install = set() +pip_list = None + + +def handle_stream(stream, prefix): + stream.reconfigure(encoding=locale.getpreferredencoding(), errors='replace') + for msg in stream: + if prefix == '[!]' and ('it/s]' in msg or 's/it]' in msg) and ('%|' in msg or 'it [' in msg): + if msg.startswith('100%'): + print('\r' + msg, end="", file=sys.stderr), + else: + print('\r' + msg[:-1], end="", file=sys.stderr), + else: + if prefix == '[!]': + print(prefix, msg, end="", file=sys.stderr) + else: + print(prefix, msg, end="") + + +def get_installed_packages(): + global pip_list + + if pip_list is None: + try: + result = subprocess.check_output([sys.executable, '-m', 'pip', 'list'], universal_newlines=True) + pip_list = set([line.split()[0].lower() for line in result.split('\n') if line.strip()]) + except subprocess.CalledProcessError as e: + print(f"[ComfyUI-Manager] Failed to retrieve the information of installed pip packages.") + return set() + + return pip_list + + +def is_installed(name): + name = name.strip() + + if name.startswith('#'): + return True + + pattern = r'([^<>!=]+)([<>!=]=?)' + match = re.search(pattern, name) + + if match: + name = match.group(1) + + return name.lower() in get_installed_packages() + + +def process_wrap(cmd_str, cwd_path, handler=None): + process = subprocess.Popen(cmd_str, cwd=cwd_path, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True, + bufsize=1) + + if handler is None: + handler = handle_stream + + stdout_thread = threading.Thread(target=handler, args=(process.stdout, "")) + stderr_thread = threading.Thread(target=handler, args=(process.stderr, "[!]")) + + stdout_thread.start() + stderr_thread.start() + + stdout_thread.join() + stderr_thread.join() + + return process.wait() + + +def install(): + try: + requirements_path = os.path.join(root_dir, 'requirements.txt') + + this_exit_code = 0 + + if os.path.exists(requirements_path): + with open(requirements_path, 'r', encoding="UTF-8") as file: + for line in file: + package_name = line.strip() + if package_name and not is_installed(package_name): + install_cmd = [sys.executable, "-m", "pip", "install", package_name] + this_exit_code += process_wrap(install_cmd, root_dir) + + if this_exit_code != 0: + logger.info(f"[openai_plugins] Restoring fastchat is failed.") + + except Exception as e: + logger.error(f"[openai_plugins] Restoring fastchat is failed.", exc_info=True) + + +if __name__ == "__main__": + install() diff --git a/openai_plugins/zhipuai/profile_endpoint.py b/openai_plugins/zhipuai/profile_endpoint.py new file mode 100644 index 00000000..513aba98 --- /dev/null +++ b/openai_plugins/zhipuai/profile_endpoint.py @@ -0,0 +1,96 @@ +from typing import List + +from loom_core.openai_plugins.core.adapter import LLMWorkerInfo +from loom_core.openai_plugins.core.profile_endpoint.core import ProfileEndpointAdapter + +import os +import sys +import logging + +logger = logging.getLogger(__name__) + +root_dir = os.path.dirname(os.path.abspath(__file__)) +sys.path.append(root_dir) + + +class ZhipuAIProfileEndpointAdapter(ProfileEndpointAdapter): + """Adapter for the profile endpoint.""" + + def __init__(self, cfg=None, state_dict: dict = None): + self._cfg = cfg + super().__init__(state_dict=state_dict) + + def class_name(self) -> str: + """Get class name.""" + return self.__name__ + + def list_running_models(self) -> List[LLMWorkerInfo]: + """模型列表及其配置项""" + list_worker = [] + list_worker.append(LLMWorkerInfo(worker_id="glm-4", + model_name="glm-4", + model_description="glm-4", + providers=["model", "embedding"], + model_extra_info="{}")) + list_worker.append(LLMWorkerInfo(worker_id="glm-3-turbo", + model_name="glm-3-turbo", + model_description="glm-3-turbo", + providers=["model", "embedding"], + model_extra_info="{}")) + list_worker.append(LLMWorkerInfo(worker_id="embedding-2", + model_name="embedding-2", + model_description="embedding-2", + providers=["embedding"], + model_extra_info="{}")) + return list_worker + + def list_llm_models(self) -> List[LLMWorkerInfo]: + """获取已配置模型列表""" + list_worker = [] + list_worker.append(LLMWorkerInfo(worker_id="glm-4", + model_name="glm-4", + model_description="glm-4", + providers=["model", "embedding"], + model_extra_info="{}")) + list_worker.append(LLMWorkerInfo(worker_id="glm-3-turbo", + model_name="glm-3-turbo", + model_description="glm-3-turbo", + providers=["model", "embedding"], + model_extra_info="{}")) + list_worker.append(LLMWorkerInfo(worker_id="embedding-2", + model_name="embedding-2", + model_description="embedding-2", + providers=["embedding"], + model_extra_info="{}")) + return list_worker + + def get_model_config(self, model_name) -> LLMWorkerInfo: + + ''' + 获取LLM模型配置项(合并后的) + ''' + + info_obj = LLMWorkerInfo(worker_id=model_name, + model_name=model_name, + model_description="", + providers=["model", "embedding"], + model_extra_info="{}") + + return info_obj + + @classmethod + def from_config(cls, cfg=None): + _state_dict = { + "profile_name": "zhipuai", + "profile_version": "0.0.1", + "profile_description": "zhipuai profile endpoint", + "profile_author": "zhipuai" + } + state_dict = cfg.get("state_dict", {}) + if state_dict is not None and _state_dict is not None: + _state_dict = {**state_dict, **_state_dict} + else: + # 处理其中一个或两者都为 None 的情况 + _state_dict = state_dict or _state_dict or {} + + return cls(cfg=cfg, state_dict=_state_dict) diff --git a/openai_plugins/zhipuai/requirements.txt b/openai_plugins/zhipuai/requirements.txt new file mode 100644 index 00000000..bb53db5f --- /dev/null +++ b/openai_plugins/zhipuai/requirements.txt @@ -0,0 +1 @@ +zhipuai>=2.0.1 \ No newline at end of file diff --git a/openai_plugins/zhipuai/zhipuai_plugins.json b/openai_plugins/zhipuai/zhipuai_plugins.json new file mode 100644 index 00000000..24a0708a --- /dev/null +++ b/openai_plugins/zhipuai/zhipuai_plugins.json @@ -0,0 +1,11 @@ +{ + "plugins_name": "zhipuai", + "endpoint_host": "https://open.bigmodel.cn/api/paas/v4/", + "install_file": "install.py", + "application_file": "app.py", + "application_class": "ZhipuAIApplicationAdapter", + "endpoint_controller_file": "controller.py", + "endpoint_controller_class": "ZhipuAIControlAdapter", + "profile_endpoint_file": "profile_endpoint.py", + "profile_endpoint_class": "ZhipuAIProfileEndpointAdapter" +} diff --git a/webui_pages/dialogue/dialogue.py b/webui_pages/dialogue/dialogue.py index a2e0d440..24f4b693 100644 --- a/webui_pages/dialogue/dialogue.py +++ b/webui_pages/dialogue/dialogue.py @@ -345,8 +345,6 @@ def dialogue_page(api: ApiRequest, is_lite: bool = False): st.rerun() warning_placeholder = st.empty() - with warning_placeholder.container(): - st.warning('Running in 8 x A100') export_btn.download_button( "导出记录", diff --git a/webui_pages/openai_plugins/base.py b/webui_pages/openai_plugins/base.py index 2ecae214..4f041d4e 100644 --- a/webui_pages/openai_plugins/base.py +++ b/webui_pages/openai_plugins/base.py @@ -69,7 +69,4 @@ def openai_plugins_page(api: ApiRequest, is_lite: bool = None): st.button("启动" + st.session_state.worker_id, key="start_worker", on_click=start_worker) st.button("停止" + st.session_state.worker_id, key="stop_worker", - on_click=stop_worker) - - - + on_click=stop_worker) \ No newline at end of file