更新模型执行列表和今晚修改的内容

2026-01-27 09:13:25 +08:00 · 2024-01-18 21:58:06 +08:00 · 2024-01-18 21:58:06 +08:00 · 61abd98409
commit 61abd98409
parent cce2b55719
13 changed files with 989 additions and 352 deletions
--- a/configs/loom.yaml
+++ b/configs/loom.yaml
@ -10,9 +10,9 @@ publish_server:
  port: 8001

 openai_plugins_folder:
-  - "/media/gpt4-pdf-chatbot-langchain/langchain-chatchat-archive/openai_plugins"
+  - "openai_plugins"
 openai_plugins_load_folder:
-  - "/media/gpt4-pdf-chatbot-langchain/langchain-chatchat-archive/configs"
+  - "configs"


 plugins:
@ -21,11 +21,11 @@ plugins:

  - fastchat:
      name: "fastchat"
-      logdir: "/media/gpt4-pdf-chatbot-langchain/langchain-chatchat-archive/logs"
+      logdir: "logs"
      # LLM 运行设备。设为"auto"会自动检测，也可手动设定为"cuda","mps","cpu"其中之一。
      llm_device: "auto"
      model_names:
-        - "Qwen-1_8B-Chat"
+        - "chatglm3-6b"
      run_controller:
        host: "127.0.0.1"
        port: 20001
@ -38,10 +38,6 @@ plugins:
            host: "127.0.0.1"
            port: 20002
            device: "auto"
-            # False,'vllm',使用的推理加速框架,使用vllm如果出现HuggingFace通信问题，参见doc/FAQ
-            # vllm对一些模型支持还不成熟，暂时默认关闭
-            # fschat=0.2.33的代码有bug, 如需使用，源码修改fastchat.server.vllm_worker，
-            # 将103行中sampling_params = SamplingParams的参数stop=list(stop)修改为stop= [i for i in stop if i!=""]
            infer_turbo: False

            # model_worker多卡加载需要配置的参数
@ -90,11 +86,16 @@ plugins:
            # 'num_gpus': 1
            # 'engine_use_ray': False,
            # 'disable_log_requests': False
-        - Qwen-1_8B:
+
+        - chatglm3-6b:
            host: "127.0.0.1"
-            port: 20008
-        - chatglm3-6b:   # 使用default中的IP和端口
-            device": "cuda"
+            device: "cuda"
+            port: 20009
+
+        - internlm2-chat-7b:
+            host: "127.0.0.1"
+            device: "cuda"
+            port: 20009

          # 以下配置可以不用修改，在model_config中设置启动的模型
        - zhipu-api:
@ -197,104 +198,95 @@ plugins:
            "secret_key": ""
            "provider": "TianGongWorker"
      "llm_model":
-        # 以下部分模型并未完全测试，仅根据fastchat和vllm模型的模型列表推定支持
+
        "chatglm2-6b": "THUDM/chatglm2-6b"
        "chatglm2-6b-32k": "THUDM/chatglm2-6b-32k"
-        "chatglm3-6b": "THUDM/chatglm3-6b"
+        "chatglm3-6b": "/share/home/zyx/Models/chatglm3-6b"
        "chatglm3-6b-32k": "THUDM/chatglm3-6b-32k"
-        "chatglm3-6b-base": "THUDM/chatglm3-6b-base"

-        "Qwen-1_8B": "/media/checkpoint/Qwen-1_8B"
+        "Llama-2-7b-chat-hf": "meta-llama/Llama-2-7b-chat-hf"
+        "Llama-2-13b-chat-hf": "meta-llama/Llama-2-13b-chat-hf"
+        "Llama-2-70b-chat-hf": "meta-llama/Llama-2-70b-chat-hf"
+
        "Qwen-1_8B-Chat": "/media/checkpoint/Qwen-1_8B-Chat"
-        "Qwen-1_8B-Chat-Int8": "Qwen/Qwen-1_8B-Chat-Int8"
-        "Qwen-1_8B-Chat-Int4": "Qwen/Qwen-1_8B-Chat-Int4"
-
-        "Qwen-7B": "Qwen/Qwen-7B"
        "Qwen-7B-Chat": "Qwen/Qwen-7B-Chat"
-
-        "Qwen-14B": "Qwen/Qwen-14B"
        "Qwen-14B-Chat": "Qwen/Qwen-14B-Chat"
-
-        "Qwen-14B-Chat-Int8": "Qwen/Qwen-14B-Chat-Int8"
-        # 在新版的transformers下需要手动修改模型的config.json文件，在quantization_config字典中
-        # 增加`disable_exllama:true` 字段才能启动qwen的量化模型
-        "Qwen-14B-Chat-Int4": "Qwen/Qwen-14B-Chat-Int4"
-
-        "Qwen-72B": "Qwen/Qwen-72B"
        "Qwen-72B-Chat": "Qwen/Qwen-72B-Chat"
-        "Qwen-72B-Chat-Int8": "Qwen/Qwen-72B-Chat-Int8"
-        "Qwen-72B-Chat-Int4": "Qwen/Qwen-72B-Chat-Int4"

-        "baichuan2-13b": "baichuan-inc/Baichuan2-13B-Chat"
-        "baichuan2-7b": "baichuan-inc/Baichuan2-7B-Chat"
-
-        "baichuan-7b": "baichuan-inc/Baichuan-7B"
-        "baichuan-13b": "baichuan-inc/Baichuan-13B"
+        "baichuan-7b-chat": "baichuan-inc/Baichuan-7B-Chat"
        "baichuan-13b-chat": "baichuan-inc/Baichuan-13B-Chat"
-
-        "aquila-7b": "BAAI/Aquila-7B"
-        "aquilachat-7b": "BAAI/AquilaChat-7B"
+        "baichuan2-7b-chat": "baichuan-inc/Baichuan2-7B-Chat"
+        "baichuan2-13b-chat": "baichuan-inc/Baichuan2-13B-Chat"

        "internlm-7b": "internlm/internlm-7b"
        "internlm-chat-7b": "internlm/internlm-chat-7b"
-
-        "falcon-7b": "tiiuae/falcon-7b"
-        "falcon-40b": "tiiuae/falcon-40b"
-        "falcon-rw-7b": "tiiuae/falcon-rw-7b"
-
-        "gpt2": "gpt2"
-        "gpt2-xl": "gpt2-xl"
-
-        "gpt-j-6b": "EleutherAI/gpt-j-6b"
-        "gpt4all-j": "nomic-ai/gpt4all-j"
-        "gpt-neox-20b": "EleutherAI/gpt-neox-20b"
-        "pythia-12b": "EleutherAI/pythia-12b"
-        "oasst-sft-4-pythia-12b-epoch-3.5": "OpenAssistant/oasst-sft-4-pythia-12b-epoch-3.5"
-        "dolly-v2-12b": "databricks/dolly-v2-12b"
-        "stablelm-tuned-alpha-7b": "stabilityai/stablelm-tuned-alpha-7b"
-
-        "Llama-2-13b-hf": "meta-llama/Llama-2-13b-hf"
-        "Llama-2-70b-hf": "meta-llama/Llama-2-70b-hf"
-        "open_llama_13b": "openlm-research/open_llama_13b"
-        "vicuna-13b-v1.3": "lmsys/vicuna-13b-v1.3"
-        "koala": "young-geng/koala"
-
-        "mpt-7b": "mosaicml/mpt-7b"
-        "mpt-7b-storywriter": "mosaicml/mpt-7b-storywriter"
-        "mpt-30b": "mosaicml/mpt-30b"
-        "opt-66b": "facebook/opt-66b"
-        "opt-iml-max-30b": "facebook/opt-iml-max-30b"
-
-        "agentlm-7b": "THUDM/agentlm-7b"
-        "agentlm-13b": "THUDM/agentlm-13b"
-        "agentlm-70b": "THUDM/agentlm-70b"
-
-        "Yi-34B-Chat": "https://huggingface.co/01-ai/Yi-34B-Chat"
-      vllm_model_dict:
-        "aquila-7b": "BAAI/Aquila-7B"
-        "aquilachat-7b": "BAAI/AquilaChat-7B"
-
-        "baichuan-7b": "baichuan-inc/Baichuan-7B"
-        "baichuan-13b": "baichuan-inc/Baichuan-13B"
-        "baichuan-13b-chat": "baichuan-inc/Baichuan-13B-Chat"
-
-        "chatglm2-6b": "THUDM/chatglm2-6b"
-        "chatglm2-6b-32k": "THUDM/chatglm2-6b-32k"
-        "chatglm3-6b": "THUDM/chatglm3-6b"
-        "chatglm3-6b-32k": "THUDM/chatglm3-6b-32k"
+        "internlm2-chat-7b": "internlm/internlm2-chat-7b"
+        "internlm2-chat-20b": "internlm/internlm2-chat-20b"

        "BlueLM-7B-Chat": "vivo-ai/BlueLM-7B-Chat"
        "BlueLM-7B-Chat-32k": "vivo-ai/BlueLM-7B-Chat-32k"

-        # 注意：bloom系列的tokenizer与model是分离的，因此虽然vllm支持，但与fschat框架不兼容
-        # "bloom": "bigscience/bloom",
-        # "bloomz": "bigscience/bloomz",
-        # "bloomz-560m": "bigscience/bloomz-560m",
-        # "bloomz-7b1": "bigscience/bloomz-7b1",
-        # "bloomz-1b7": "bigscience/bloomz-1b7",
+        "Yi-34B-Chat": "https://huggingface.co/01-ai/Yi-34B-Chat"
+
+        "agentlm-7b": "THUDM/agentlm-7b"
+        "agentlm-13b": "THUDM/agentlm-13b"
+        "agentlm-70b": "THUDM/agentlm-70b"
+
+        "falcon-7b": "tiiuae/falcon-7b"
+        "falcon-40b": "tiiuae/falcon-40b"
+        "falcon-rw-7b": "tiiuae/falcon-rw-7b"
+
+        "aquila-7b": "BAAI/Aquila-7B"
+        "aquilachat-7b": "BAAI/AquilaChat-7B"
+        "open_llama_13b": "openlm-research/open_llama_13b"
+        "vicuna-13b-v1.5": "lmsys/vicuna-13b-v1.5"
+        "koala": "young-geng/koala"
+        "mpt-7b": "mosaicml/mpt-7b"
+        "mpt-7b-storywriter": "mosaicml/mpt-7b-storywriter"
+        "mpt-30b": "mosaicml/mpt-30b"
+        "opt-66b": "facebook/opt-66b"
+        "opt-iml-max-30b": "facebook/opt-iml-max-30b"
+        "gpt2": "gpt2"
+        "gpt2-xl": "gpt2-xl"
+        "gpt-j-6b": "EleutherAI/gpt-j-6b"
+        "gpt4all-j": "nomic-ai/gpt4all-j"
+        "gpt-neox-20b": "EleutherAI/gpt-neox-20b"
+        "pythia-12b": "EleutherAI/pythia-12b"
+        "oasst-sft-4-pythia-12b-epoch-3.5": "OpenAssistant/oasst-sft-4-pythia-12b-epoch-3.5"
+        "dolly-v2-12b": "databricks/dolly-v2-12b"
+        "stablelm-tuned-alpha-7b": "stabilityai/stablelm-tuned-alpha-7b"
+
+      vllm_model_dict:
+        "chatglm2-6b": "THUDM/chatglm2-6b"
+        "chatglm2-6b-32k": "THUDM/chatglm2-6b-32k"
+        "chatglm3-6b": "THUDM/chatglm3-6b"
+        "chatglm3-6b-32k": "THUDM/chatglm3-6b-32k"
+
+        "Llama-2-7b-chat-hf": "meta-llama/Llama-2-7b-chat-hf"
+        "Llama-2-13b-chat-hf": "meta-llama/Llama-2-13b-chat-hf"
+        "Llama-2-70b-chat-hf": "meta-llama/Llama-2-70b-chat-hf"
+
+        "Qwen-1_8B-Chat": "Qwen/Qwen-1_8B-Chat"
+        "Qwen-7B-Chat": "Qwen/Qwen-7B-Chat"
+        "Qwen-14B-Chat": "Qwen/Qwen-14B-Chat"
+        "Qwen-72B-Chat": "Qwen/Qwen-72B-Chat"
+
+        "baichuan-7b-chat": "baichuan-inc/Baichuan-7B-Chat"
+        "baichuan-13b-chat": "baichuan-inc/Baichuan-13B-Chat"
+        "baichuan2-7b-chat": "baichuan-inc/Baichuan-7B-Chat"
+        "baichuan2-13b-chat": "baichuan-inc/Baichuan-13B-Chat"
+
+        "BlueLM-7B-Chat": "vivo-ai/BlueLM-7B-Chat"
+        "BlueLM-7B-Chat-32k": "vivo-ai/BlueLM-7B-Chat-32k"

        "internlm-7b": "internlm/internlm-7b"
        "internlm-chat-7b": "internlm/internlm-chat-7b"
+        "internlm2-chat-7b": "internlm/Models/internlm2-chat-7b"
+        "internlm2-chat-20b": "internlm/Models/internlm2-chat-20b"
+
+        "aquila-7b": "BAAI/Aquila-7B"
+        "aquilachat-7b": "BAAI/AquilaChat-7B"
+
        "falcon-7b": "tiiuae/falcon-7b"
        "falcon-40b": "tiiuae/falcon-40b"
        "falcon-rw-7b": "tiiuae/falcon-rw-7b"
@ -307,8 +299,6 @@ plugins:
        "oasst-sft-4-pythia-12b-epoch-3.5": "OpenAssistant/oasst-sft-4-pythia-12b-epoch-3.5"
        "dolly-v2-12b": "databricks/dolly-v2-12b"
        "stablelm-tuned-alpha-7b": "stabilityai/stablelm-tuned-alpha-7b"
-        "Llama-2-13b-hf": "meta-llama/Llama-2-13b-hf"
-        "Llama-2-70b-hf": "meta-llama/Llama-2-70b-hf"
        "open_llama_13b": "openlm-research/open_llama_13b"
        "vicuna-13b-v1.3": "lmsys/vicuna-13b-v1.3"
        "koala": "young-geng/koala"
@ -318,24 +308,3 @@ plugins:
        "opt-66b": "facebook/opt-66b"
        "opt-iml-max-30b": "facebook/opt-iml-max-30b"

-        "Qwen-1_8B": "Qwen/Qwen-1_8B"
-        "Qwen-1_8B-Chat": "Qwen/Qwen-1_8B-Chat"
-        "Qwen-1_8B-Chat-Int8": "Qwen/Qwen-1_8B-Chat-Int8"
-        "Qwen-1_8B-Chat-Int4": "Qwen/Qwen-1_8B-Chat-Int4"
-
-        "Qwen-7B": "Qwen/Qwen-7B"
-        "Qwen-7B-Chat": "Qwen/Qwen-7B-Chat"
-
-        "Qwen-14B": "Qwen/Qwen-14B"
-        "Qwen-14B-Chat": "Qwen/Qwen-14B-Chat"
-        "Qwen-14B-Chat-Int8": "Qwen/Qwen-14B-Chat-Int8"
-        "Qwen-14B-Chat-Int4": "Qwen/Qwen-14B-Chat-Int4"
-
-        "Qwen-72B": "Qwen/Qwen-72B"
-        "Qwen-72B-Chat": "Qwen/Qwen-72B-Chat"
-        "Qwen-72B-Chat-Int8": "Qwen/Qwen-72B-Chat-Int8"
-        "Qwen-72B-Chat-Int4": "Qwen/Qwen-72B-Chat-Int4"
-
-        "agentlm-7b": "THUDM/agentlm-7b"
-        "agentlm-13b": "THUDM/agentlm-13b"
-        "agentlm-70b": "THUDM/agentlm-70b"
--- a/configs/loom.yaml.example
+++ b/configs/loom.yaml.example
@ -0,0 +1,310 @@
+log_path: "logs"
+log_level: "DEBUG"
+
+api_server:
+  host: "127.0.0.1"
+  port: 8000
+
+publish_server:
+  host: "127.0.0.1"
+  port: 8001
+
+openai_plugins_folder:
+  - "openai_plugins"
+openai_plugins_load_folder:
+  - "configs"
+
+
+plugins:
+  - openai:
+      name: "openai"
+
+  - fastchat:
+      name: "fastchat"
+      logdir: "logs"
+      # LLM 运行设备。设为"auto"会自动检测，也可手动设定为"cuda","mps","cpu"其中之一。
+      llm_device: "auto"
+      model_names:
+        - "chatglm3-6b"
+      run_controller:
+        host: "127.0.0.1"
+        port: 20001
+        dispatch_method: "shortest_queue"
+      run_openai_api:
+        host: "127.0.0.1"
+        port: 20000
+      fschat_model_workers:
+        - default:
+            host: "127.0.0.1"
+            port: 20002
+            device: "auto"
+            infer_turbo: False
+
+            # model_worker多卡加载需要配置的参数
+            # "gpus": None, # 使用的GPU，以str的格式指定，如"0,1"，如失效请使用CUDA_VISIBLE_DEVICES="0,1"等形式指定
+            # "num_gpus": 1, # 使用GPU的数量
+            # "max_gpu_memory": "20GiB", # 每个GPU占用的最大显存
+
+            # 以下为model_worker非常用参数，可根据需要配置
+            # "load_8bit": False, # 开启8bit量化
+            # "cpu_offloading": None,
+            # "gptq_ckpt": None,
+            # "gptq_wbits": 16,
+            # "gptq_groupsize": -1,
+            # "gptq_act_order": False,
+            # "awq_ckpt": None,
+            # "awq_wbits": 16,
+            # "awq_groupsize": -1,
+            # "model_names": LLM_MODELS,
+            # "conv_template": None,
+            # "limit_worker_concurrency": 5,
+            # "stream_interval": 2,
+            # "no_register": False,
+            # "embed_in_truncate": False,
+
+            # 以下为vllm_worker配置参数,注意使用vllm必须有gpu，仅在Linux测试通过
+
+            # tokenizer = model_path # 如果tokenizer与model_path不一致在此处添加
+            # 'tokenizer_mode':'auto',
+            # 'trust_remote_code':True,
+            # 'download_dir':None,
+            # 'load_format':'auto',
+            # 'dtype':'auto',
+            # 'seed':0,
+            # 'worker_use_ray':False,
+            # 'pipeline_parallel_size':1,
+            # 'tensor_parallel_size':1,
+            # 'block_size':16,
+            # 'swap_space':4 , # GiB
+            # 'gpu_memory_utilization':0.90,
+            # 'max_num_batched_tokens':2560,
+            # 'max_num_seqs':256,
+            # 'disable_log_stats':False,
+            # 'conv_template':None,
+            # 'limit_worker_concurrency':5,
+            # 'no_register':False,
+            # 'num_gpus': 1
+            # 'engine_use_ray': False,
+            # 'disable_log_requests': False
+
+        - chatglm3-6b:
+            host: "127.0.0.1"
+            device: "cuda"
+            port: 20009
+
+        - internlm2-chat-7b:
+            host: "127.0.0.1"
+            device: "cuda"
+            port: 20009
+
+          # 以下配置可以不用修改，在model_config中设置启动的模型
+        - zhipu-api:
+            port: 21001
+
+        - minimax-api:
+            port: 21002
+
+        - xinghuo-api:
+            port: 21003
+
+        - qianfan-api:
+            port: 21004
+
+        - fangzhou-api:
+            port: 21005
+
+        - qwen-api:
+            port: 21006
+
+        - baichuan-api:
+            port: 21007
+
+        - azure-api:
+            port: 21008
+
+        - tiangong-api:
+            port: 21009
+      online_llm_model:
+        # 线上模型。请在server_config中为每个在线API设置不同的端口
+
+        - "openai-api":
+            "model_name": "gpt-3.5-turbo"
+            "api_base_url": "https://api.openai.com/v1"
+            "api_key": ""
+            "openai_proxy": ""
+
+        # 具体注册及api key获取请前往 http://open.bigmodel.cn
+        - "zhipu-api":
+            "api_key": ""
+            "version": "chatglm_turbo"  # 可选包括 "chatglm_turbo"
+            "provider": "ChatGLMWorker"
+
+        # 具体注册及api key获取请前往 https://api.minimax.chat/
+        - "minimax-api":
+            "group_id": ""
+            "api_key": ""
+            "is_pro": False
+            "provider": "MiniMaxWorker"
+
+        # 具体注册及api key获取请前往 https://xinghuo.xfyun.cn/
+        - "xinghuo-api":
+            "APPID": ""
+            "APISecret": ""
+            "api_key": ""
+            "version": "v1.5"  # 你使用的讯飞星火大模型版本，可选包括 "v3.0", "v1.5", "v2.0"
+            "provider": "XingHuoWorker"
+
+        # 百度千帆 API，申请方式请参考 https://cloud.baidu.com/doc/WENXINWORKSHOP/s/4lilb2lpf
+        - "qianfan-api":
+            "version": "ERNIE-Bot"  # 注意大小写。当前支持 "ERNIE-Bot" 或 "ERNIE-Bot-turbo"， 更多的见官方文档。
+            "version_url": ""  # 也可以不填写version，直接填写在千帆申请模型发布的API地址
+            "api_key": ""
+            "secret_key": ""
+            "provider": "QianFanWorker"
+
+        # 火山方舟 API，文档参考 https://www.volcengine.com/docs/82379
+        - "fangzhou-api":
+            "version": "chatglm-6b-model"  # 当前支持 "chatglm-6b-model"， 更多的见文档模型支持列表中方舟部分。
+            "version_url": ""  # 可以不填写version，直接填写在方舟申请模型发布的API地址
+            "api_key": ""
+            "secret_key": ""
+            "provider": "FangZhouWorker"
+
+        # 阿里云通义千问 API，文档参考 https://help.aliyun.com/zh/dashscope/developer-reference/api-details
+        - "qwen-api":
+            "version": "qwen-turbo"  # 可选包括 "qwen-turbo", "qwen-plus"
+            "api_key": ""  # 请在阿里云控制台模型服务灵积API-KEY管理页面创建
+            "provider": "QwenWorker"
+
+        # 百川 API，申请方式请参考 https://www.baichuan-ai.com/home#api-enter
+        - "baichuan-api":
+            "version": "Baichuan2-53B"  # 当前支持 "Baichuan2-53B"， 见官方文档。
+            "api_key": ""
+            "secret_key": ""
+            "provider": "BaiChuanWorker"
+
+        # Azure API
+        - "azure-api":
+            "deployment_name": ""  # 部署容器的名字
+            "resource_name": ""  # https://{resource_name}.openai.azure.com/openai/ 填写resource_name的部分，其他部分不要填写
+            "api_version": ""  # API的版本，不是模型版本
+            "api_key": ""
+            "provider": "AzureWorker"
+
+        # 昆仑万维天工 API https://model-platform.tiangong.cn/
+        - "tiangong-api":
+            "version": "SkyChat-MegaVerse"
+            "api_key": ""
+            "secret_key": ""
+            "provider": "TianGongWorker"
+      "llm_model":
+
+        "chatglm2-6b": "THUDM/chatglm2-6b"
+        "chatglm2-6b-32k": "THUDM/chatglm2-6b-32k"
+        "chatglm3-6b": "THUDM/chatglm3-6b"
+        "chatglm3-6b-32k": "THUDM/chatglm3-6b-32k"
+
+        "Llama-2-7b-chat-hf": "meta-llama/Llama-2-7b-chat-hf"
+        "Llama-2-13b-chat-hf": "meta-llama/Llama-2-13b-chat-hf"
+        "Llama-2-70b-chat-hf": "meta-llama/Llama-2-70b-chat-hf"
+
+        "Qwen-1_8B-Chat": "/media/checkpoint/Qwen-1_8B-Chat"
+        "Qwen-7B-Chat": "Qwen/Qwen-7B-Chat"
+        "Qwen-14B-Chat": "Qwen/Qwen-14B-Chat"
+        "Qwen-72B-Chat": "Qwen/Qwen-72B-Chat"
+
+        "baichuan-7b-chat": "baichuan-inc/Baichuan-7B-Chat"
+        "baichuan-13b-chat": "baichuan-inc/Baichuan-13B-Chat"
+        "baichuan2-7b-chat": "baichuan-inc/Baichuan2-7B-Chat"
+        "baichuan2-13b-chat": "baichuan-inc/Baichuan2-13B-Chat"
+
+        "internlm-7b": "internlm/internlm-7b"
+        "internlm-chat-7b": "internlm/internlm-chat-7b"
+        "internlm2-chat-7b": "internlm/internlm2-chat-7b"
+        "internlm2-chat-20b": "internlm/internlm2-chat-20b"
+
+        "BlueLM-7B-Chat": "vivo-ai/BlueLM-7B-Chat"
+        "BlueLM-7B-Chat-32k": "vivo-ai/BlueLM-7B-Chat-32k"
+
+        "Yi-34B-Chat": "https://huggingface.co/01-ai/Yi-34B-Chat"
+
+        "agentlm-7b": "THUDM/agentlm-7b"
+        "agentlm-13b": "THUDM/agentlm-13b"
+        "agentlm-70b": "THUDM/agentlm-70b"
+
+        "falcon-7b": "tiiuae/falcon-7b"
+        "falcon-40b": "tiiuae/falcon-40b"
+        "falcon-rw-7b": "tiiuae/falcon-rw-7b"
+
+        "aquila-7b": "BAAI/Aquila-7B"
+        "aquilachat-7b": "BAAI/AquilaChat-7B"
+        "open_llama_13b": "openlm-research/open_llama_13b"
+        "vicuna-13b-v1.5": "lmsys/vicuna-13b-v1.5"
+        "koala": "young-geng/koala"
+        "mpt-7b": "mosaicml/mpt-7b"
+        "mpt-7b-storywriter": "mosaicml/mpt-7b-storywriter"
+        "mpt-30b": "mosaicml/mpt-30b"
+        "opt-66b": "facebook/opt-66b"
+        "opt-iml-max-30b": "facebook/opt-iml-max-30b"
+        "gpt2": "gpt2"
+        "gpt2-xl": "gpt2-xl"
+        "gpt-j-6b": "EleutherAI/gpt-j-6b"
+        "gpt4all-j": "nomic-ai/gpt4all-j"
+        "gpt-neox-20b": "EleutherAI/gpt-neox-20b"
+        "pythia-12b": "EleutherAI/pythia-12b"
+        "oasst-sft-4-pythia-12b-epoch-3.5": "OpenAssistant/oasst-sft-4-pythia-12b-epoch-3.5"
+        "dolly-v2-12b": "databricks/dolly-v2-12b"
+        "stablelm-tuned-alpha-7b": "stabilityai/stablelm-tuned-alpha-7b"
+
+      vllm_model_dict:
+        "chatglm2-6b": "THUDM/chatglm2-6b"
+        "chatglm2-6b-32k": "THUDM/chatglm2-6b-32k"
+        "chatglm3-6b": "THUDM/chatglm3-6b"
+        "chatglm3-6b-32k": "THUDM/chatglm3-6b-32k"
+
+        "Llama-2-7b-chat-hf": "meta-llama/Llama-2-7b-chat-hf"
+        "Llama-2-13b-chat-hf": "meta-llama/Llama-2-13b-chat-hf"
+        "Llama-2-70b-chat-hf": "meta-llama/Llama-2-70b-chat-hf"
+
+        "Qwen-1_8B-Chat": "Qwen/Qwen-1_8B-Chat"
+        "Qwen-7B-Chat": "Qwen/Qwen-7B-Chat"
+        "Qwen-14B-Chat": "Qwen/Qwen-14B-Chat"
+        "Qwen-72B-Chat": "Qwen/Qwen-72B-Chat"
+
+        "baichuan-7b-chat": "baichuan-inc/Baichuan-7B-Chat"
+        "baichuan-13b-chat": "baichuan-inc/Baichuan-13B-Chat"
+        "baichuan2-7b-chat": "baichuan-inc/Baichuan-7B-Chat"
+        "baichuan2-13b-chat": "baichuan-inc/Baichuan-13B-Chat"
+
+        "BlueLM-7B-Chat": "vivo-ai/BlueLM-7B-Chat"
+        "BlueLM-7B-Chat-32k": "vivo-ai/BlueLM-7B-Chat-32k"
+
+        "internlm-7b": "internlm/internlm-7b"
+        "internlm-chat-7b": "internlm/internlm-chat-7b"
+        "internlm2-chat-7b": "internlm/Models/internlm2-chat-7b"
+        "internlm2-chat-20b": "internlm/Models/internlm2-chat-20b"
+
+        "aquila-7b": "BAAI/Aquila-7B"
+        "aquilachat-7b": "BAAI/AquilaChat-7B"
+
+        "falcon-7b": "tiiuae/falcon-7b"
+        "falcon-40b": "tiiuae/falcon-40b"
+        "falcon-rw-7b": "tiiuae/falcon-rw-7b"
+        "gpt2": "gpt2"
+        "gpt2-xl": "gpt2-xl"
+        "gpt-j-6b": "EleutherAI/gpt-j-6b"
+        "gpt4all-j": "nomic-ai/gpt4all-j"
+        "gpt-neox-20b": "EleutherAI/gpt-neox-20b"
+        "pythia-12b": "EleutherAI/pythia-12b"
+        "oasst-sft-4-pythia-12b-epoch-3.5": "OpenAssistant/oasst-sft-4-pythia-12b-epoch-3.5"
+        "dolly-v2-12b": "databricks/dolly-v2-12b"
+        "stablelm-tuned-alpha-7b": "stabilityai/stablelm-tuned-alpha-7b"
+        "open_llama_13b": "openlm-research/open_llama_13b"
+        "vicuna-13b-v1.3": "lmsys/vicuna-13b-v1.3"
+        "koala": "young-geng/koala"
+        "mpt-7b": "mosaicml/mpt-7b"
+        "mpt-7b-storywriter": "mosaicml/mpt-7b-storywriter"
+        "mpt-30b": "mosaicml/mpt-30b"
+        "opt-66b": "facebook/opt-66b"
+        "opt-iml-max-30b": "facebook/opt-iml-max-30b"
+
--- a/configs/model_config.py.example
+++ b/configs/model_config.py.example
@ -1,253 +1,310 @@
-import os
+log_path: "logs"
+log_level: "DEBUG"

-MODEL_ROOT_PATH = ""
-EMBEDDING_MODEL = "bge-large-zh-v1.5"  # bge-large-zh
-EMBEDDING_DEVICE = "auto"
+api_server:
+  host: "127.0.0.1"
+  port: 8000

-# 选用的reranker模型
-RERANKER_MODEL = "bge-reranker-large"
-# 是否启用reranker模型
-USE_RERANKER = False
-RERANKER_MAX_LENGTH = 1024
+publish_server:
+  host: "127.0.0.1"
+  port: 8001

-# 如果需要在 EMBEDDING_MODEL 中增加自定义的关键字时配置
-EMBEDDING_KEYWORD_FILE = "keywords.txt"
-EMBEDDING_MODEL_OUTPUT_PATH = "output"
+openai_plugins_folder:
+  - "openai_plugins"
+openai_plugins_load_folder:
+  - "configs"

-SUPPORT_AGENT_MODELS = [
-    "chatglm3-6b",
-    "openai-api",
-    "Qwen-14B-Chat",
-    "Qwen-7B-Chat",
-]
-LLM_MODEL_CONFIG = {
-    "preprocess_model": {
-        # "Mixtral-8x7B-v0.1": {
-        #     "temperature": 0.01,
-        #     "max_tokens": 5,
-        #     "prompt_name": "default",
-        #     "callbacks": False
-        # },
-        "chatglm3-6b": {
-            "temperature": 0.05,
-            "max_tokens": 4096,
-            "prompt_name": "default",
-            "callbacks": False
-        },
-    },
-    "llm_model": {
-        # "Mixtral-8x7B-v0.1": {
-        #     "temperature": 0.9,
-        #     "max_tokens": 4000,
-        #     "history_len": 5,
-        #     "prompt_name": "default",
-        #     "callbacks": True
-        # },
-        "chatglm3-6b": {
-            "temperature": 0.05,
-            "max_tokens": 4096,
-            "prompt_name": "default",
-            "history_len": 10,
-            "callbacks": True
-        },
-    },
-    "action_model": {
-        # "Qwen-14B-Chat": {
-        #     "temperature": 0.05,
-        #     "max_tokens": 4096,
-        #     "prompt_name": "qwen",
-        #     "callbacks": True
-        # },
-        "chatglm3-6b": {
-            "temperature": 0.05,
-            "max_tokens": 4096,
-            "prompt_name": "ChatGLM3",
-            "callbacks": True
-        },
-        # "zhipu-api": {
-        #     "temperature": 0.01,
-        #     "max_tokens": 4096,
-        #     "prompt_name": "ChatGLM3",
-        #     "callbacks": True
-        # }

-    },
-    "postprocess_model": {
-        "zhipu-api": {
-            "temperature": 0.01,
-            "max_tokens": 4096,
-            "prompt_name": "default",
-            "callbacks": True
-        }
-    },
-}
-LLM_DEVICE = "auto"
-ONLINE_LLM_MODEL = {
-    "openai-api": {
-        "model_name": "gpt-4-1106-preview",
-        "api_base_url": "https://api.openai.com/v1",
-        "api_key": "sk-",
-        "openai_proxy": "",
-    },
-    "zhipu-api": {
-        "api_key": "",
-        "version": "chatglm_turbo",
-        "provider": "ChatGLMWorker",
-    },
-    "minimax-api": {
-        "group_id": "",
-        "api_key": "",
-        "is_pro": False,
-        "provider": "MiniMaxWorker",
-    },
-    "xinghuo-api": {
-        "APPID": "",
-        "APISecret": "",
-        "api_key": "",
-        "version": "v3.0",
-        "provider": "XingHuoWorker",
-    },
-    "qianfan-api": {
-        "version": "ernie-bot-4",
-        "version_url": "",
-        "api_key": "",
-        "secret_key": "",
-        "provider": "QianFanWorker",
-    },
-    "fangzhou-api": {
-        "version": "chatglm-6b-model",
-        "version_url": "",
-        "api_key": "",
-        "secret_key": "",
-        "provider": "FangZhouWorker",
-    },
-    "qwen-api": {
-        "version": "qwen-max",
-        "api_key": "",
-        "provider": "QwenWorker",
-        "embed_model": "text-embedding-v1"  # embedding 模型名称
-    },
-    "baichuan-api": {
-        "version": "Baichuan2-53B",
-        "api_key": "",
-        "secret_key": "",
-        "provider": "BaiChuanWorker",
-    },
-    "azure-api": {
-        "deployment_name": "",
-        "resource_name": "",
-        "api_version": "2023-07-01-preview",
-        "api_key": "",
-        "provider": "AzureWorker",
-    },
+plugins:
+  - openai:
+      name: "openai"

-    # 昆仑万维天工 API https://model-platform.tiangong.cn/
-    "tiangong-api": {
-        "version": "SkyChat-MegaVerse",
-        "api_key": "",
-        "secret_key": "",
-        "provider": "TianGongWorker",
-    },
-    # Gemini API https://makersuite.google.com/app/apikey
-    "gemini-api": {
-        "api_key": "",
-        "provider": "GeminiWorker",
-    }
+  - fastchat:
+      name: "fastchat"
+      logdir: "logs"
+      # LLM 运行设备。设为"auto"会自动检测，也可手动设定为"cuda","mps","cpu"其中之一。
+      llm_device: "auto"
+      model_names:
+        - "internlm2-chat-7b"
+      run_controller:
+        host: "127.0.0.1"
+        port: 20001
+        dispatch_method: "shortest_queue"
+      run_openai_api:
+        host: "127.0.0.1"
+        port: 20000
+      fschat_model_workers:
+        - default:
+            host: "127.0.0.1"
+            port: 20002
+            device: "auto"
+            infer_turbo: False

-}
+            # model_worker多卡加载需要配置的参数
+            # "gpus": None, # 使用的GPU，以str的格式指定，如"0,1"，如失效请使用CUDA_VISIBLE_DEVICES="0,1"等形式指定
+            # "num_gpus": 1, # 使用GPU的数量
+            # "max_gpu_memory": "20GiB", # 每个GPU占用的最大显存

-# 在以下字典中修改属性值，以指定本地embedding模型存储位置。支持3种设置方法：
-# 1、将对应的值修改为模型绝对路径
-# 2、不修改此处的值（以 text2vec 为例）：
-#       2.1 如果{MODEL_ROOT_PATH}下存在如下任一子目录：
-#           - text2vec
-#           - GanymedeNil/text2vec-large-chinese
-#           - text2vec-large-chinese
-#       2.2 如果以上本地路径不存在，则使用huggingface模型
+            # 以下为model_worker非常用参数，可根据需要配置
+            # "load_8bit": False, # 开启8bit量化
+            # "cpu_offloading": None,
+            # "gptq_ckpt": None,
+            # "gptq_wbits": 16,
+            # "gptq_groupsize": -1,
+            # "gptq_act_order": False,
+            # "awq_ckpt": None,
+            # "awq_wbits": 16,
+            # "awq_groupsize": -1,
+            # "model_names": LLM_MODELS,
+            # "conv_template": None,
+            # "limit_worker_concurrency": 5,
+            # "stream_interval": 2,
+            # "no_register": False,
+            # "embed_in_truncate": False,

-MODEL_PATH = {
-    "embed_model": {
-        "ernie-tiny": "nghuyong/ernie-3.0-nano-zh",
-        "ernie-base": "nghuyong/ernie-3.0-base-zh",
-        "text2vec-base": "shibing624/text2vec-base-chinese",
-        "text2vec": "GanymedeNil/text2vec-large-chinese",
-        "text2vec-paraphrase": "shibing624/text2vec-base-chinese-paraphrase",
-        "text2vec-sentence": "shibing624/text2vec-base-chinese-sentence",
-        "text2vec-multilingual": "shibing624/text2vec-base-multilingual",
-        "text2vec-bge-large-chinese": "shibing624/text2vec-bge-large-chinese",
-        "m3e-small": "moka-ai/m3e-small",
-        "m3e-base": "moka-ai/m3e-base",
-        "m3e-large": "moka-ai/m3e-large",
+            # 以下为vllm_worker配置参数,注意使用vllm必须有gpu，仅在Linux测试通过

-        "bge-small-zh": "BAAI/bge-small-zh",
-        "bge-base-zh": "BAAI/bge-base-zh",
-        "bge-large-zh": "/media/zr/Data/Models/Embedding/bge-large-zh",
-        "bge-large-zh-noinstruct": "BAAI/bge-large-zh-noinstruct",
-        "bge-base-zh-v1.5": "BAAI/bge-base-zh-v1.5",
-        "bge-large-zh-v1.5": "/share/home/zyx/Models/bge-large-zh-v1.5",
-        "piccolo-base-zh": "sensenova/piccolo-base-zh",
-        "piccolo-large-zh": "sensenova/piccolo-large-zh",
-        "nlp_gte_sentence-embedding_chinese-large": "/Models/nlp_gte_sentence-embedding_chinese-large",
-        "text-embedding-ada-002": "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",
-    }
-}
+            # tokenizer = model_path # 如果tokenizer与model_path不一致在此处添加
+            # 'tokenizer_mode':'auto',
+            # 'trust_remote_code':True,
+            # 'download_dir':None,
+            # 'load_format':'auto',
+            # 'dtype':'auto',
+            # 'seed':0,
+            # 'worker_use_ray':False,
+            # 'pipeline_parallel_size':1,
+            # 'tensor_parallel_size':1,
+            # 'block_size':16,
+            # 'swap_space':4 , # GiB
+            # 'gpu_memory_utilization':0.90,
+            # 'max_num_batched_tokens':2560,
+            # 'max_num_seqs':256,
+            # 'disable_log_stats':False,
+            # 'conv_template':None,
+            # 'limit_worker_concurrency':5,
+            # 'no_register':False,
+            # 'num_gpus': 1
+            # 'engine_use_ray': False,
+            # 'disable_log_requests': False

-NLTK_DATA_PATH = os.path.join(os.path.dirname(os.path.dirname(__file__)), "nltk_data")
+        - chatglm3-6b:
+            host: "127.0.0.1"
+            device: "cuda"
+            port: 20009

-# 使用VLLM可能导致模型推理能力下降，无法完成Agent任务
-VLLM_MODEL_DICT = {
-    "aquila-7b": "BAAI/Aquila-7B",
-    "aquilachat-7b": "BAAI/AquilaChat-7B",
+        - internlm2-chat-7b:
+            host: "127.0.0.1"
+            device: "cuda"
+            port: 20009

-    "baichuan-7b": "baichuan-inc/Baichuan-7B",
-    "baichuan-13b": "baichuan-inc/Baichuan-13B",
-    'baichuan-13b-chat': 'baichuan-inc/Baichuan-13B-Chat',
+          # 以下配置可以不用修改，在model_config中设置启动的模型
+        - zhipu-api:
+            port: 21001

-    'chatglm2-6b': 'THUDM/chatglm2-6b',
-    'chatglm2-6b-32k': 'THUDM/chatglm2-6b-32k',
-    'chatglm3-6b': 'THUDM/chatglm3-6b',
-    'chatglm3-6b-32k': 'THUDM/chatglm3-6b-32k',
+        - minimax-api:
+            port: 21002

-    "internlm-7b": "internlm/internlm-7b",
-    "internlm-chat-7b": "internlm/internlm-chat-7b",
-    "internlm2-chat-7b": "internlm/Models/internlm2-chat-7b",
-    "internlm2-chat-20b": "internlm/Models/internlm2-chat-20b",
+        - xinghuo-api:
+            port: 21003

-    "aquila-7b": "BAAI/Aquila-7B",
-    "aquilachat-7b": "BAAI/AquilaChat-7B",
+        - qianfan-api:
+            port: 21004

-    "falcon-7b": "tiiuae/falcon-7b",
-    "falcon-40b": "tiiuae/falcon-40b",
-    "falcon-rw-7b": "tiiuae/falcon-rw-7b",
-    "gpt2": "gpt2",
-    "gpt2-xl": "gpt2-xl",
-    "gpt-j-6b": "EleutherAI/gpt-j-6b",
-    "gpt4all-j": "nomic-ai/gpt4all-j",
-    "gpt-neox-20b": "EleutherAI/gpt-neox-20b",
-    "pythia-12b": "EleutherAI/pythia-12b",
-    "oasst-sft-4-pythia-12b-epoch-3.5": "OpenAssistant/oasst-sft-4-pythia-12b-epoch-3.5",
-    "dolly-v2-12b": "databricks/dolly-v2-12b",
-    "stablelm-tuned-alpha-7b": "stabilityai/stablelm-tuned-alpha-7b",
-    "open_llama_13b": "openlm-research/open_llama_13b",
-    "vicuna-13b-v1.3": "lmsys/vicuna-13b-v1.3",
-    "koala": "young-geng/koala",
-    "mpt-7b": "mosaicml/mpt-7b",
-    "mpt-7b-storywriter": "mosaicml/mpt-7b-storywriter",
-    "mpt-30b": "mosaicml/mpt-30b",
-    "opt-66b": "facebook/opt-66b",
-    "opt-iml-max-30b": "facebook/opt-iml-max-30b",
+        - fangzhou-api:
+            port: 21005

-    "Qwen-7B": "Qwen/Qwen-7B",
-    "Qwen-14B": "Qwen/Qwen-14B",
-    "Qwen-7B-Chat": "Qwen/Qwen-7B-Chat",
-    "Qwen-14B-Chat": "Qwen/Qwen-14B-Chat",
+        - qwen-api:
+            port: 21006

-    "agentlm-7b": "THUDM/agentlm-7b",
-    "agentlm-13b": "THUDM/agentlm-13b",
-    "agentlm-70b": "THUDM/agentlm-70b",
+        - baichuan-api:
+            port: 21007

-}
+        - azure-api:
+            port: 21008
+
+        - tiangong-api:
+            port: 21009
+      online_llm_model:
+        # 线上模型。请在server_config中为每个在线API设置不同的端口
+
+        - "openai-api":
+            "model_name": "gpt-3.5-turbo"
+            "api_base_url": "https://api.openai.com/v1"
+            "api_key": ""
+            "openai_proxy": ""
+
+        # 具体注册及api key获取请前往 http://open.bigmodel.cn
+        - "zhipu-api":
+            "api_key": ""
+            "version": "chatglm_turbo"  # 可选包括 "chatglm_turbo"
+            "provider": "ChatGLMWorker"
+
+        # 具体注册及api key获取请前往 https://api.minimax.chat/
+        - "minimax-api":
+            "group_id": ""
+            "api_key": ""
+            "is_pro": False
+            "provider": "MiniMaxWorker"
+
+        # 具体注册及api key获取请前往 https://xinghuo.xfyun.cn/
+        - "xinghuo-api":
+            "APPID": ""
+            "APISecret": ""
+            "api_key": ""
+            "version": "v1.5"  # 你使用的讯飞星火大模型版本，可选包括 "v3.0", "v1.5", "v2.0"
+            "provider": "XingHuoWorker"
+
+        # 百度千帆 API，申请方式请参考 https://cloud.baidu.com/doc/WENXINWORKSHOP/s/4lilb2lpf
+        - "qianfan-api":
+            "version": "ERNIE-Bot"  # 注意大小写。当前支持 "ERNIE-Bot" 或 "ERNIE-Bot-turbo"， 更多的见官方文档。
+            "version_url": ""  # 也可以不填写version，直接填写在千帆申请模型发布的API地址
+            "api_key": ""
+            "secret_key": ""
+            "provider": "QianFanWorker"
+
+        # 火山方舟 API，文档参考 https://www.volcengine.com/docs/82379
+        - "fangzhou-api":
+            "version": "chatglm-6b-model"  # 当前支持 "chatglm-6b-model"， 更多的见文档模型支持列表中方舟部分。
+            "version_url": ""  # 可以不填写version，直接填写在方舟申请模型发布的API地址
+            "api_key": ""
+            "secret_key": ""
+            "provider": "FangZhouWorker"
+
+        # 阿里云通义千问 API，文档参考 https://help.aliyun.com/zh/dashscope/developer-reference/api-details
+        - "qwen-api":
+            "version": "qwen-turbo"  # 可选包括 "qwen-turbo", "qwen-plus"
+            "api_key": ""  # 请在阿里云控制台模型服务灵积API-KEY管理页面创建
+            "provider": "QwenWorker"
+
+        # 百川 API，申请方式请参考 https://www.baichuan-ai.com/home#api-enter
+        - "baichuan-api":
+            "version": "Baichuan2-53B"  # 当前支持 "Baichuan2-53B"， 见官方文档。
+            "api_key": ""
+            "secret_key": ""
+            "provider": "BaiChuanWorker"
+
+        # Azure API
+        - "azure-api":
+            "deployment_name": ""  # 部署容器的名字
+            "resource_name": ""  # https://{resource_name}.openai.azure.com/openai/ 填写resource_name的部分，其他部分不要填写
+            "api_version": ""  # API的版本，不是模型版本
+            "api_key": ""
+            "provider": "AzureWorker"
+
+        # 昆仑万维天工 API https://model-platform.tiangong.cn/
+        - "tiangong-api":
+            "version": "SkyChat-MegaVerse"
+            "api_key": ""
+            "secret_key": ""
+            "provider": "TianGongWorker"
+      "llm_model":
+
+        "chatglm2-6b": "THUDM/chatglm2-6b"
+        "chatglm2-6b-32k": "THUDM/chatglm2-6b-32k"
+        "chatglm3-6b": "THUDM/chatglm3-6b"
+        "chatglm3-6b-32k": "THUDM/chatglm3-6b-32k"
+
+        "Llama-2-7b-chat-hf": "meta-llama/Llama-2-7b-chat-hf"
+        "Llama-2-13b-chat-hf": "meta-llama/Llama-2-13b-chat-hf"
+        "Llama-2-70b-chat-hf": "meta-llama/Llama-2-70b-chat-hf"
+
+        "Qwen-1_8B-Chat": "/media/checkpoint/Qwen-1_8B-Chat"
+        "Qwen-7B-Chat": "Qwen/Qwen-7B-Chat"
+        "Qwen-14B-Chat": "Qwen/Qwen-14B-Chat"
+        "Qwen-72B-Chat": "Qwen/Qwen-72B-Chat"
+
+        "baichuan-7b-chat": "baichuan-inc/Baichuan-7B-Chat"
+        "baichuan-13b-chat": "baichuan-inc/Baichuan-13B-Chat"
+        "baichuan2-7b-chat": "baichuan-inc/Baichuan2-7B-Chat"
+        "baichuan2-13b-chat": "baichuan-inc/Baichuan2-13B-Chat"
+
+        "internlm-7b": "internlm/internlm-7b"
+        "internlm-chat-7b": "internlm/internlm-chat-7b"
+        "internlm2-chat-7b": "internlm/internlm2-chat-7b"
+        "internlm2-chat-20b": "internlm/internlm2-chat-20b"
+
+        "BlueLM-7B-Chat": "vivo-ai/BlueLM-7B-Chat"
+        "BlueLM-7B-Chat-32k": "vivo-ai/BlueLM-7B-Chat-32k"
+
+        "Yi-34B-Chat": "https://huggingface.co/01-ai/Yi-34B-Chat"
+
+        "agentlm-7b": "THUDM/agentlm-7b"
+        "agentlm-13b": "THUDM/agentlm-13b"
+        "agentlm-70b": "THUDM/agentlm-70b"
+
+        "falcon-7b": "tiiuae/falcon-7b"
+        "falcon-40b": "tiiuae/falcon-40b"
+        "falcon-rw-7b": "tiiuae/falcon-rw-7b"
+
+        "aquila-7b": "BAAI/Aquila-7B"
+        "aquilachat-7b": "BAAI/AquilaChat-7B"
+        "open_llama_13b": "openlm-research/open_llama_13b"
+        "vicuna-13b-v1.5": "lmsys/vicuna-13b-v1.5"
+        "koala": "young-geng/koala"
+        "mpt-7b": "mosaicml/mpt-7b"
+        "mpt-7b-storywriter": "mosaicml/mpt-7b-storywriter"
+        "mpt-30b": "mosaicml/mpt-30b"
+        "opt-66b": "facebook/opt-66b"
+        "opt-iml-max-30b": "facebook/opt-iml-max-30b"
+        "gpt2": "gpt2"
+        "gpt2-xl": "gpt2-xl"
+        "gpt-j-6b": "EleutherAI/gpt-j-6b"
+        "gpt4all-j": "nomic-ai/gpt4all-j"
+        "gpt-neox-20b": "EleutherAI/gpt-neox-20b"
+        "pythia-12b": "EleutherAI/pythia-12b"
+        "oasst-sft-4-pythia-12b-epoch-3.5": "OpenAssistant/oasst-sft-4-pythia-12b-epoch-3.5"
+        "dolly-v2-12b": "databricks/dolly-v2-12b"
+        "stablelm-tuned-alpha-7b": "stabilityai/stablelm-tuned-alpha-7b"
+
+      vllm_model_dict:
+        "chatglm2-6b": "THUDM/chatglm2-6b"
+        "chatglm2-6b-32k": "THUDM/chatglm2-6b-32k"
+        "chatglm3-6b": "THUDM/chatglm3-6b"
+        "chatglm3-6b-32k": "THUDM/chatglm3-6b-32k"
+
+        "Llama-2-7b-chat-hf": "meta-llama/Llama-2-7b-chat-hf"
+        "Llama-2-13b-chat-hf": "meta-llama/Llama-2-13b-chat-hf"
+        "Llama-2-70b-chat-hf": "meta-llama/Llama-2-70b-chat-hf"
+
+        "Qwen-1_8B-Chat": "Qwen/Qwen-1_8B-Chat"
+        "Qwen-7B-Chat": "Qwen/Qwen-7B-Chat"
+        "Qwen-14B-Chat": "Qwen/Qwen-14B-Chat"
+        "Qwen-72B-Chat": "Qwen/Qwen-72B-Chat"
+
+        "baichuan-7b-chat": "baichuan-inc/Baichuan-7B-Chat"
+        "baichuan-13b-chat": "baichuan-inc/Baichuan-13B-Chat"
+        "baichuan2-7b-chat": "baichuan-inc/Baichuan-7B-Chat"
+        "baichuan2-13b-chat": "baichuan-inc/Baichuan-13B-Chat"
+
+        "BlueLM-7B-Chat": "vivo-ai/BlueLM-7B-Chat"
+        "BlueLM-7B-Chat-32k": "vivo-ai/BlueLM-7B-Chat-32k"
+
+        "internlm-7b": "internlm/internlm-7b"
+        "internlm-chat-7b": "internlm/internlm-chat-7b"
+        "internlm2-chat-7b": "internlm/Models/internlm2-chat-7b"
+        "internlm2-chat-20b": "internlm/Models/internlm2-chat-20b"
+
+        "aquila-7b": "BAAI/Aquila-7B"
+        "aquilachat-7b": "BAAI/AquilaChat-7B"
+
+        "falcon-7b": "tiiuae/falcon-7b"
+        "falcon-40b": "tiiuae/falcon-40b"
+        "falcon-rw-7b": "tiiuae/falcon-rw-7b"
+        "gpt2": "gpt2"
+        "gpt2-xl": "gpt2-xl"
+        "gpt-j-6b": "EleutherAI/gpt-j-6b"
+        "gpt4all-j": "nomic-ai/gpt4all-j"
+        "gpt-neox-20b": "EleutherAI/gpt-neox-20b"
+        "pythia-12b": "EleutherAI/pythia-12b"
+        "oasst-sft-4-pythia-12b-epoch-3.5": "OpenAssistant/oasst-sft-4-pythia-12b-epoch-3.5"
+        "dolly-v2-12b": "databricks/dolly-v2-12b"
+        "stablelm-tuned-alpha-7b": "stabilityai/stablelm-tuned-alpha-7b"
+        "open_llama_13b": "openlm-research/open_llama_13b"
+        "vicuna-13b-v1.3": "lmsys/vicuna-13b-v1.3"
+        "koala": "young-geng/koala"
+        "mpt-7b": "mosaicml/mpt-7b"
+        "mpt-7b-storywriter": "mosaicml/mpt-7b-storywriter"
+        "mpt-30b": "mosaicml/mpt-30b"
+        "opt-66b": "facebook/opt-66b"
+        "opt-iml-max-30b": "facebook/opt-iml-max-30b"

-LOOM_CONFIG = "/media/gpt4-pdf-chatbot-langchain/LooM/src/core/loom.yaml"
-OPENAI_KEY = None
-OPENAI_PROXY = None
--- a/openai_plugins/openai/app.py
+++ b/openai_plugins/openai/app.py
@ -6,7 +6,6 @@ import sys
 import logging

 logger = logging.getLogger(__name__)
-# 为了能使用插件中的函数，需要将当前目录加入到sys.path中
 root_dir = os.path.dirname(os.path.abspath(__file__))
 sys.path.append(root_dir)

--- a/openai_plugins/openai/requirements.txt
+++ b/openai_plugins/openai/requirements.txt
@ -0,0 +1 @@
+openai>=1.7.1
--- a/openai_plugins/zhipuai/app.py
+++ b/openai_plugins/zhipuai/app.py
@ -0,0 +1,48 @@
+from loom_core.openai_plugins.core.adapter import ProcessesInfo
+from loom_core.openai_plugins.core.application import ApplicationAdapter
+
+import os
+import sys
+import logging
+
+logger = logging.getLogger(__name__)
+root_dir = os.path.dirname(os.path.abspath(__file__))
+sys.path.append(root_dir)
+
+
+class ZhipuAIApplicationAdapter(ApplicationAdapter):
+
+    def __init__(self, cfg=None, state_dict: dict = None):
+        self.processesInfo = None
+        self._cfg = cfg
+        super().__init__(state_dict=state_dict)
+
+    def class_name(self) -> str:
+        """Get class name."""
+        return self.__name__
+
+    @classmethod
+    def from_config(cls, cfg=None):
+        _state_dict = {
+            "application_name": "zhipuai",
+            "application_version": "0.0.1",
+            "application_description": "zhipuai application",
+            "application_author": "zhipuai"
+        }
+        state_dict = cfg.get("state_dict", {})
+        if state_dict is not None and _state_dict is not None:
+            _state_dict = {**state_dict, **_state_dict}
+        else:
+            # 处理其中一个或两者都为 None 的情况
+            _state_dict = state_dict or _state_dict or {}
+        return cls(cfg=cfg, state_dict=_state_dict)
+
+    def init_processes(self, processesInfo: ProcessesInfo):
+
+        self.processesInfo = processesInfo
+
+    def start(self):
+        pass
+
+    def stop(self):
+        pass
--- a/openai_plugins/zhipuai/controller.py
+++ b/openai_plugins/zhipuai/controller.py
@ -0,0 +1,47 @@
+from loom_core.openai_plugins.core.control import ControlAdapter
+
+import os
+import sys
+import logging
+
+logger = logging.getLogger(__name__)
+# 为了能使用插件中的函数，需要将当前目录加入到sys.path中
+root_dir = os.path.dirname(os.path.abspath(__file__))
+sys.path.append(root_dir)
+
+
+class ZhipuAIControlAdapter(ControlAdapter):
+
+    def __init__(self, cfg=None, state_dict: dict = None):
+        self._cfg = cfg
+        super().__init__(state_dict=state_dict)
+
+    def class_name(self) -> str:
+        """Get class name."""
+        return self.__name__
+
+    def start_model(self, new_model_name):
+        pass
+
+    def stop_model(self, model_name: str):
+        pass
+
+    def replace_model(self, model_name: str, new_model_name: str):
+        pass
+
+    @classmethod
+    def from_config(cls, cfg=None):
+        _state_dict = {
+            "controller_name": "zhipuai",
+            "controller_version": "0.0.1",
+            "controller_description": "zhipuai controller",
+            "controller_author": "zhipuai"
+        }
+        state_dict = cfg.get("state_dict", {})
+        if state_dict is not None and _state_dict is not None:
+            _state_dict = {**state_dict, **_state_dict}
+        else:
+            # 处理其中一个或两者都为 None 的情况
+            _state_dict = state_dict or _state_dict or {}
+
+        return cls(cfg=cfg, state_dict=_state_dict)
--- a/openai_plugins/zhipuai/install.py
+++ b/openai_plugins/zhipuai/install.py
@ -0,0 +1,103 @@
+import logging
+import sys
+import os
+import subprocess
+import threading
+import re
+import locale
+
+logger = logging.getLogger(__name__)
+root_dir = os.path.dirname(os.path.abspath(__file__))
+sys.path.append(root_dir)
+
+# Perform install
+processed_install = set()
+pip_list = None
+
+
+def handle_stream(stream, prefix):
+    stream.reconfigure(encoding=locale.getpreferredencoding(), errors='replace')
+    for msg in stream:
+        if prefix == '[!]' and ('it/s]' in msg or 's/it]' in msg) and ('%|' in msg or 'it [' in msg):
+            if msg.startswith('100%'):
+                print('\r' + msg, end="", file=sys.stderr),
+            else:
+                print('\r' + msg[:-1], end="", file=sys.stderr),
+        else:
+            if prefix == '[!]':
+                print(prefix, msg, end="", file=sys.stderr)
+            else:
+                print(prefix, msg, end="")
+
+
+def get_installed_packages():
+    global pip_list
+
+    if pip_list is None:
+        try:
+            result = subprocess.check_output([sys.executable, '-m', 'pip', 'list'], universal_newlines=True)
+            pip_list = set([line.split()[0].lower() for line in result.split('\n') if line.strip()])
+        except subprocess.CalledProcessError as e:
+            print(f"[ComfyUI-Manager] Failed to retrieve the information of installed pip packages.")
+            return set()
+
+    return pip_list
+
+
+def is_installed(name):
+    name = name.strip()
+
+    if name.startswith('#'):
+        return True
+
+    pattern = r'([^<>!=]+)([<>!=]=?)'
+    match = re.search(pattern, name)
+
+    if match:
+        name = match.group(1)
+
+    return name.lower() in get_installed_packages()
+
+
+def process_wrap(cmd_str, cwd_path, handler=None):
+    process = subprocess.Popen(cmd_str, cwd=cwd_path, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True,
+                               bufsize=1)
+
+    if handler is None:
+        handler = handle_stream
+
+    stdout_thread = threading.Thread(target=handler, args=(process.stdout, ""))
+    stderr_thread = threading.Thread(target=handler, args=(process.stderr, "[!]"))
+
+    stdout_thread.start()
+    stderr_thread.start()
+
+    stdout_thread.join()
+    stderr_thread.join()
+
+    return process.wait()
+
+
+def install():
+    try:
+        requirements_path = os.path.join(root_dir, 'requirements.txt')
+
+        this_exit_code = 0
+
+        if os.path.exists(requirements_path):
+            with open(requirements_path, 'r', encoding="UTF-8") as file:
+                for line in file:
+                    package_name = line.strip()
+                    if package_name and not is_installed(package_name):
+                        install_cmd = [sys.executable, "-m", "pip", "install", package_name]
+                        this_exit_code += process_wrap(install_cmd, root_dir)
+
+        if this_exit_code != 0:
+            logger.info(f"[openai_plugins]  Restoring fastchat  is failed.")
+
+    except Exception as e:
+        logger.error(f"[openai_plugins] Restoring fastchat is failed.", exc_info=True)
+
+
+if __name__ == "__main__":
+    install()
--- a/openai_plugins/zhipuai/profile_endpoint.py
+++ b/openai_plugins/zhipuai/profile_endpoint.py
@ -0,0 +1,96 @@
+from typing import List
+
+from loom_core.openai_plugins.core.adapter import LLMWorkerInfo
+from loom_core.openai_plugins.core.profile_endpoint.core import ProfileEndpointAdapter
+
+import os
+import sys
+import logging
+
+logger = logging.getLogger(__name__)
+
+root_dir = os.path.dirname(os.path.abspath(__file__))
+sys.path.append(root_dir)
+
+
+class ZhipuAIProfileEndpointAdapter(ProfileEndpointAdapter):
+    """Adapter for the profile endpoint."""
+
+    def __init__(self, cfg=None, state_dict: dict = None):
+        self._cfg = cfg
+        super().__init__(state_dict=state_dict)
+
+    def class_name(self) -> str:
+        """Get class name."""
+        return self.__name__
+
+    def list_running_models(self) -> List[LLMWorkerInfo]:
+        """模型列表及其配置项"""
+        list_worker = []
+        list_worker.append(LLMWorkerInfo(worker_id="glm-4",
+                                         model_name="glm-4",
+                                         model_description="glm-4",
+                                         providers=["model", "embedding"],
+                                         model_extra_info="{}"))
+        list_worker.append(LLMWorkerInfo(worker_id="glm-3-turbo",
+                                         model_name="glm-3-turbo",
+                                         model_description="glm-3-turbo",
+                                         providers=["model", "embedding"],
+                                         model_extra_info="{}"))
+        list_worker.append(LLMWorkerInfo(worker_id="embedding-2",
+                                         model_name="embedding-2",
+                                         model_description="embedding-2",
+                                         providers=["embedding"],
+                                         model_extra_info="{}"))
+        return list_worker
+
+    def list_llm_models(self) -> List[LLMWorkerInfo]:
+        """获取已配置模型列表"""
+        list_worker = []
+        list_worker.append(LLMWorkerInfo(worker_id="glm-4",
+                                         model_name="glm-4",
+                                         model_description="glm-4",
+                                         providers=["model", "embedding"],
+                                         model_extra_info="{}"))
+        list_worker.append(LLMWorkerInfo(worker_id="glm-3-turbo",
+                                         model_name="glm-3-turbo",
+                                         model_description="glm-3-turbo",
+                                         providers=["model", "embedding"],
+                                         model_extra_info="{}"))
+        list_worker.append(LLMWorkerInfo(worker_id="embedding-2",
+                                         model_name="embedding-2",
+                                         model_description="embedding-2",
+                                         providers=["embedding"],
+                                         model_extra_info="{}"))
+        return list_worker
+
+    def get_model_config(self, model_name) -> LLMWorkerInfo:
+
+        '''
+        获取LLM模型配置项（合并后的）
+        '''
+
+        info_obj = LLMWorkerInfo(worker_id=model_name,
+                                 model_name=model_name,
+                                 model_description="",
+                                 providers=["model", "embedding"],
+                                 model_extra_info="{}")
+
+        return info_obj
+
+    @classmethod
+    def from_config(cls, cfg=None):
+        _state_dict = {
+            "profile_name": "zhipuai",
+            "profile_version": "0.0.1",
+            "profile_description": "zhipuai profile endpoint",
+            "profile_author": "zhipuai"
+        }
+        state_dict = cfg.get("state_dict", {})
+        if state_dict is not None and _state_dict is not None:
+            _state_dict = {**state_dict, **_state_dict}
+        else:
+            # 处理其中一个或两者都为 None 的情况
+            _state_dict = state_dict or _state_dict or {}
+
+        return cls(cfg=cfg, state_dict=_state_dict)
--- a/openai_plugins/zhipuai/requirements.txt
+++ b/openai_plugins/zhipuai/requirements.txt
@ -0,0 +1 @@
+zhipuai>=2.0.1
--- a/openai_plugins/zhipuai/zhipuai_plugins.json
+++ b/openai_plugins/zhipuai/zhipuai_plugins.json
@ -0,0 +1,11 @@
+{
+  "plugins_name": "zhipuai",
+  "endpoint_host": "https://open.bigmodel.cn/api/paas/v4/",
+  "install_file": "install.py",
+  "application_file": "app.py",
+  "application_class": "ZhipuAIApplicationAdapter",
+  "endpoint_controller_file": "controller.py",
+  "endpoint_controller_class": "ZhipuAIControlAdapter",
+  "profile_endpoint_file": "profile_endpoint.py",
+  "profile_endpoint_class": "ZhipuAIProfileEndpointAdapter"
+}
--- a/webui_pages/dialogue/dialogue.py
+++ b/webui_pages/dialogue/dialogue.py
@ -345,8 +345,6 @@ def dialogue_page(api: ApiRequest, is_lite: bool = False):
            st.rerun()

        warning_placeholder = st.empty()
-        with warning_placeholder.container():
-            st.warning('Running in 8 x A100')

    export_btn.download_button(
        "导出记录",
--- a/webui_pages/openai_plugins/base.py
+++ b/webui_pages/openai_plugins/base.py
@ -69,7 +69,4 @@ def openai_plugins_page(api: ApiRequest, is_lite: bool = None):
                st.button("启动" + st.session_state.worker_id, key="start_worker",
                          on_click=start_worker)
                st.button("停止" + st.session_state.worker_id, key="stop_worker",
-                          on_click=stop_worker)
-
-
-
+                          on_click=stop_worker)