log_path: "logs"
log_level: "DEBUG"

api_server:
  host: "127.0.0.1"
  port: 8000

publish_server:
  host: "127.0.0.1"
  port: 8001

openai_plugins_folder:
  - "/media/gpt4-pdf-chatbot-langchain/langchain-chatchat-archive/openai_plugins"
openai_plugins_load_folder:
  - "/media/gpt4-pdf-chatbot-langchain/langchain-chatchat-archive/configs"


plugins:
  - openai:
      name: "openai"

  - fastchat:
      name: "fastchat"
      logdir: "/media/gpt4-pdf-chatbot-langchain/langchain-chatchat-archive/logs"
      # LLM 运行设备。设为"auto"会自动检测，也可手动设定为"cuda","mps","cpu"其中之一。
      llm_device: "auto"
      model_names:
        - "Qwen-1_8B-Chat"
      run_controller:
        host: "127.0.0.1"
        port: 20001
        dispatch_method: "shortest_queue"
      run_openai_api:
        host: "127.0.0.1"
        port: 20000
      fschat_model_workers:
        - default:
            host: "127.0.0.1"
            port: 20002
            device: "auto"
            # False,'vllm',使用的推理加速框架,使用vllm如果出现HuggingFace通信问题，参见doc/FAQ
            # vllm对一些模型支持还不成熟，暂时默认关闭
            # fschat=0.2.33的代码有bug, 如需使用，源码修改fastchat.server.vllm_worker，
            # 将103行中sampling_params = SamplingParams的参数stop=list(stop)修改为stop= [i for i in stop if i!=""]
            infer_turbo: False

            # model_worker多卡加载需要配置的参数
            # "gpus": None, # 使用的GPU，以str的格式指定，如"0,1"，如失效请使用CUDA_VISIBLE_DEVICES="0,1"等形式指定
            # "num_gpus": 1, # 使用GPU的数量
            # "max_gpu_memory": "20GiB", # 每个GPU占用的最大显存

            # 以下为model_worker非常用参数，可根据需要配置
            # "load_8bit": False, # 开启8bit量化
            # "cpu_offloading": None,
            # "gptq_ckpt": None,
            # "gptq_wbits": 16,
            # "gptq_groupsize": -1,
            # "gptq_act_order": False,
            # "awq_ckpt": None,
            # "awq_wbits": 16,
            # "awq_groupsize": -1,
            # "model_names": LLM_MODELS,
            # "conv_template": None,
            # "limit_worker_concurrency": 5,
            # "stream_interval": 2,
            # "no_register": False,
            # "embed_in_truncate": False,

            # 以下为vllm_worker配置参数,注意使用vllm必须有gpu，仅在Linux测试通过

            # tokenizer = model_path # 如果tokenizer与model_path不一致在此处添加
            # 'tokenizer_mode':'auto',
            # 'trust_remote_code':True,
            # 'download_dir':None,
            # 'load_format':'auto',
            # 'dtype':'auto',
            # 'seed':0,
            # 'worker_use_ray':False,
            # 'pipeline_parallel_size':1,
            # 'tensor_parallel_size':1,
            # 'block_size':16,
            # 'swap_space':4 , # GiB
            # 'gpu_memory_utilization':0.90,
            # 'max_num_batched_tokens':2560,
            # 'max_num_seqs':256,
            # 'disable_log_stats':False,
            # 'conv_template':None,
            # 'limit_worker_concurrency':5,
            # 'no_register':False,
            # 'num_gpus': 1
            # 'engine_use_ray': False,
            # 'disable_log_requests': False
        - Qwen-1_8B:
            host: "127.0.0.1"
            port: 20008
        - chatglm3-6b:   # 使用default中的IP和端口
            device": "cuda"

          # 以下配置可以不用修改，在model_config中设置启动的模型
        - zhipu-api:
            port: 21001

        - minimax-api:
            port: 21002

        - xinghuo-api:
            port: 21003

        - qianfan-api:
            port: 21004

        - fangzhou-api:
            port: 21005

        - qwen-api:
            port: 21006

        - baichuan-api:
            port: 21007

        - azure-api:
            port: 21008

        - tiangong-api:
            port: 21009
      online_llm_model:
        # 线上模型。请在server_config中为每个在线API设置不同的端口

        - "openai-api":
            "model_name": "gpt-3.5-turbo"
            "api_base_url": "https://api.openai.com/v1"
            "api_key": ""
            "openai_proxy": ""

        # 具体注册及api key获取请前往 http://open.bigmodel.cn
        - "zhipu-api":
            "api_key": ""
            "version": "chatglm_turbo"  # 可选包括 "chatglm_turbo"
            "provider": "ChatGLMWorker"

        # 具体注册及api key获取请前往 https://api.minimax.chat/
        - "minimax-api":
            "group_id": ""
            "api_key": ""
            "is_pro": False
            "provider": "MiniMaxWorker"

        # 具体注册及api key获取请前往 https://xinghuo.xfyun.cn/
        - "xinghuo-api":
            "APPID": ""
            "APISecret": ""
            "api_key": ""
            "version": "v1.5"  # 你使用的讯飞星火大模型版本，可选包括 "v3.0", "v1.5", "v2.0"
            "provider": "XingHuoWorker"

        # 百度千帆 API，申请方式请参考 https://cloud.baidu.com/doc/WENXINWORKSHOP/s/4lilb2lpf
        - "qianfan-api":
            "version": "ERNIE-Bot"  # 注意大小写。当前支持 "ERNIE-Bot" 或 "ERNIE-Bot-turbo"， 更多的见官方文档。
            "version_url": ""  # 也可以不填写version，直接填写在千帆申请模型发布的API地址
            "api_key": ""
            "secret_key": ""
            "provider": "QianFanWorker"

        # 火山方舟 API，文档参考 https://www.volcengine.com/docs/82379
        - "fangzhou-api":
            "version": "chatglm-6b-model"  # 当前支持 "chatglm-6b-model"， 更多的见文档模型支持列表中方舟部分。
            "version_url": ""  # 可以不填写version，直接填写在方舟申请模型发布的API地址
            "api_key": ""
            "secret_key": ""
            "provider": "FangZhouWorker"

        # 阿里云通义千问 API，文档参考 https://help.aliyun.com/zh/dashscope/developer-reference/api-details
        - "qwen-api":
            "version": "qwen-turbo"  # 可选包括 "qwen-turbo", "qwen-plus"
            "api_key": ""  # 请在阿里云控制台模型服务灵积API-KEY管理页面创建
            "provider": "QwenWorker"

        # 百川 API，申请方式请参考 https://www.baichuan-ai.com/home#api-enter
        - "baichuan-api":
            "version": "Baichuan2-53B"  # 当前支持 "Baichuan2-53B"， 见官方文档。
            "api_key": ""
            "secret_key": ""
            "provider": "BaiChuanWorker"

        # Azure API
        - "azure-api":
            "deployment_name": ""  # 部署容器的名字
            "resource_name": ""  # https://{resource_name}.openai.azure.com/openai/ 填写resource_name的部分，其他部分不要填写
            "api_version": ""  # API的版本，不是模型版本
            "api_key": ""
            "provider": "AzureWorker"

        # 昆仑万维天工 API https://model-platform.tiangong.cn/
        - "tiangong-api":
            "version": "SkyChat-MegaVerse"
            "api_key": ""
            "secret_key": ""
            "provider": "TianGongWorker"
      "llm_model":
        # 以下部分模型并未完全测试，仅根据fastchat和vllm模型的模型列表推定支持
        "chatglm2-6b": "THUDM/chatglm2-6b"
        "chatglm2-6b-32k": "THUDM/chatglm2-6b-32k"
        "chatglm3-6b": "THUDM/chatglm3-6b"
        "chatglm3-6b-32k": "THUDM/chatglm3-6b-32k"
        "chatglm3-6b-base": "THUDM/chatglm3-6b-base"

        "Qwen-1_8B": "/media/checkpoint/Qwen-1_8B"
        "Qwen-1_8B-Chat": "/media/checkpoint/Qwen-1_8B-Chat"
        "Qwen-1_8B-Chat-Int8": "Qwen/Qwen-1_8B-Chat-Int8"
        "Qwen-1_8B-Chat-Int4": "Qwen/Qwen-1_8B-Chat-Int4"

        "Qwen-7B": "Qwen/Qwen-7B"
        "Qwen-7B-Chat": "Qwen/Qwen-7B-Chat"

        "Qwen-14B": "Qwen/Qwen-14B"
        "Qwen-14B-Chat": "Qwen/Qwen-14B-Chat"

        "Qwen-14B-Chat-Int8": "Qwen/Qwen-14B-Chat-Int8"
        # 在新版的transformers下需要手动修改模型的config.json文件，在quantization_config字典中
        # 增加`disable_exllama:true` 字段才能启动qwen的量化模型
        "Qwen-14B-Chat-Int4": "Qwen/Qwen-14B-Chat-Int4"

        "Qwen-72B": "Qwen/Qwen-72B"
        "Qwen-72B-Chat": "Qwen/Qwen-72B-Chat"
        "Qwen-72B-Chat-Int8": "Qwen/Qwen-72B-Chat-Int8"
        "Qwen-72B-Chat-Int4": "Qwen/Qwen-72B-Chat-Int4"

        "baichuan2-13b": "baichuan-inc/Baichuan2-13B-Chat"
        "baichuan2-7b": "baichuan-inc/Baichuan2-7B-Chat"

        "baichuan-7b": "baichuan-inc/Baichuan-7B"
        "baichuan-13b": "baichuan-inc/Baichuan-13B"
        "baichuan-13b-chat": "baichuan-inc/Baichuan-13B-Chat"

        "aquila-7b": "BAAI/Aquila-7B"
        "aquilachat-7b": "BAAI/AquilaChat-7B"

        "internlm-7b": "internlm/internlm-7b"
        "internlm-chat-7b": "internlm/internlm-chat-7b"

        "falcon-7b": "tiiuae/falcon-7b"
        "falcon-40b": "tiiuae/falcon-40b"
        "falcon-rw-7b": "tiiuae/falcon-rw-7b"

        "gpt2": "gpt2"
        "gpt2-xl": "gpt2-xl"

        "gpt-j-6b": "EleutherAI/gpt-j-6b"
        "gpt4all-j": "nomic-ai/gpt4all-j"
        "gpt-neox-20b": "EleutherAI/gpt-neox-20b"
        "pythia-12b": "EleutherAI/pythia-12b"
        "oasst-sft-4-pythia-12b-epoch-3.5": "OpenAssistant/oasst-sft-4-pythia-12b-epoch-3.5"
        "dolly-v2-12b": "databricks/dolly-v2-12b"
        "stablelm-tuned-alpha-7b": "stabilityai/stablelm-tuned-alpha-7b"

        "Llama-2-13b-hf": "meta-llama/Llama-2-13b-hf"
        "Llama-2-70b-hf": "meta-llama/Llama-2-70b-hf"
        "open_llama_13b": "openlm-research/open_llama_13b"
        "vicuna-13b-v1.3": "lmsys/vicuna-13b-v1.3"
        "koala": "young-geng/koala"

        "mpt-7b": "mosaicml/mpt-7b"
        "mpt-7b-storywriter": "mosaicml/mpt-7b-storywriter"
        "mpt-30b": "mosaicml/mpt-30b"
        "opt-66b": "facebook/opt-66b"
        "opt-iml-max-30b": "facebook/opt-iml-max-30b"

        "agentlm-7b": "THUDM/agentlm-7b"
        "agentlm-13b": "THUDM/agentlm-13b"
        "agentlm-70b": "THUDM/agentlm-70b"

        "Yi-34B-Chat": "https://huggingface.co/01-ai/Yi-34B-Chat"
      vllm_model_dict:
        "aquila-7b": "BAAI/Aquila-7B"
        "aquilachat-7b": "BAAI/AquilaChat-7B"

        "baichuan-7b": "baichuan-inc/Baichuan-7B"
        "baichuan-13b": "baichuan-inc/Baichuan-13B"
        "baichuan-13b-chat": "baichuan-inc/Baichuan-13B-Chat"

        "chatglm2-6b": "THUDM/chatglm2-6b"
        "chatglm2-6b-32k": "THUDM/chatglm2-6b-32k"
        "chatglm3-6b": "THUDM/chatglm3-6b"
        "chatglm3-6b-32k": "THUDM/chatglm3-6b-32k"

        "BlueLM-7B-Chat": "vivo-ai/BlueLM-7B-Chat"
        "BlueLM-7B-Chat-32k": "vivo-ai/BlueLM-7B-Chat-32k"

        # 注意：bloom系列的tokenizer与model是分离的，因此虽然vllm支持，但与fschat框架不兼容
        # "bloom": "bigscience/bloom",
        # "bloomz": "bigscience/bloomz",
        # "bloomz-560m": "bigscience/bloomz-560m",
        # "bloomz-7b1": "bigscience/bloomz-7b1",
        # "bloomz-1b7": "bigscience/bloomz-1b7",

        "internlm-7b": "internlm/internlm-7b"
        "internlm-chat-7b": "internlm/internlm-chat-7b"
        "falcon-7b": "tiiuae/falcon-7b"
        "falcon-40b": "tiiuae/falcon-40b"
        "falcon-rw-7b": "tiiuae/falcon-rw-7b"
        "gpt2": "gpt2"
        "gpt2-xl": "gpt2-xl"
        "gpt-j-6b": "EleutherAI/gpt-j-6b"
        "gpt4all-j": "nomic-ai/gpt4all-j"
        "gpt-neox-20b": "EleutherAI/gpt-neox-20b"
        "pythia-12b": "EleutherAI/pythia-12b"
        "oasst-sft-4-pythia-12b-epoch-3.5": "OpenAssistant/oasst-sft-4-pythia-12b-epoch-3.5"
        "dolly-v2-12b": "databricks/dolly-v2-12b"
        "stablelm-tuned-alpha-7b": "stabilityai/stablelm-tuned-alpha-7b"
        "Llama-2-13b-hf": "meta-llama/Llama-2-13b-hf"
        "Llama-2-70b-hf": "meta-llama/Llama-2-70b-hf"
        "open_llama_13b": "openlm-research/open_llama_13b"
        "vicuna-13b-v1.3": "lmsys/vicuna-13b-v1.3"
        "koala": "young-geng/koala"
        "mpt-7b": "mosaicml/mpt-7b"
        "mpt-7b-storywriter": "mosaicml/mpt-7b-storywriter"
        "mpt-30b": "mosaicml/mpt-30b"
        "opt-66b": "facebook/opt-66b"
        "opt-iml-max-30b": "facebook/opt-iml-max-30b"

        "Qwen-1_8B": "Qwen/Qwen-1_8B"
        "Qwen-1_8B-Chat": "Qwen/Qwen-1_8B-Chat"
        "Qwen-1_8B-Chat-Int8": "Qwen/Qwen-1_8B-Chat-Int8"
        "Qwen-1_8B-Chat-Int4": "Qwen/Qwen-1_8B-Chat-Int4"

        "Qwen-7B": "Qwen/Qwen-7B"
        "Qwen-7B-Chat": "Qwen/Qwen-7B-Chat"

        "Qwen-14B": "Qwen/Qwen-14B"
        "Qwen-14B-Chat": "Qwen/Qwen-14B-Chat"
        "Qwen-14B-Chat-Int8": "Qwen/Qwen-14B-Chat-Int8"
        "Qwen-14B-Chat-Int4": "Qwen/Qwen-14B-Chat-Int4"

        "Qwen-72B": "Qwen/Qwen-72B"
        "Qwen-72B-Chat": "Qwen/Qwen-72B-Chat"
        "Qwen-72B-Chat-Int8": "Qwen/Qwen-72B-Chat-Int8"
        "Qwen-72B-Chat-Int4": "Qwen/Qwen-72B-Chat-Int4"

        "agentlm-7b": "THUDM/agentlm-7b"
        "agentlm-13b": "THUDM/agentlm-13b"
        "agentlm-70b": "THUDM/agentlm-70b"