删除fastchat的配置

This commit is contained in:
glide-the 2024-01-18 01:17:11 +08:00 committed by liunux4odoo
parent 48fb6b83fd
commit 49f6760702
7 changed files with 55 additions and 333 deletions

View File

@ -36,3 +36,5 @@ BASE_TEMP_DIR = os.path.join(tempfile.gettempdir(), "chatchat")
if os.path.isdir(BASE_TEMP_DIR): if os.path.isdir(BASE_TEMP_DIR):
shutil.rmtree(BASE_TEMP_DIR) shutil.rmtree(BASE_TEMP_DIR)
os.makedirs(BASE_TEMP_DIR, exist_ok=True) os.makedirs(BASE_TEMP_DIR, exist_ok=True)
MEDIA_PATH = None

View File

@ -1,6 +1,6 @@
import os import os
# 默认使用的知识库
DEFAULT_KNOWLEDGE_BASE = "samples" DEFAULT_KNOWLEDGE_BASE = "samples"
# 默认向量库/全文检索引擎类型。可选faiss, milvus(离线) & zilliz(在线), pgvector,全文检索引擎es # 默认向量库/全文检索引擎类型。可选faiss, milvus(离线) & zilliz(在线), pgvector,全文检索引擎es
@ -24,15 +24,16 @@ VECTOR_SEARCH_TOP_K = 3
# 知识库匹配相关度阈值取值范围在0-1之间SCORE越小相关度越高取到1相当于不筛选建议设置在0.5左右 # 知识库匹配相关度阈值取值范围在0-1之间SCORE越小相关度越高取到1相当于不筛选建议设置在0.5左右
SCORE_THRESHOLD = 1 SCORE_THRESHOLD = 1
# 是否开启中文标题加强,以及标题增强的相关配置 # 默认搜索引擎。可选bing, duckduckgo, metaphor
# 通过增加标题判断判断哪些文本为标题并在metadata中进行标记 DEFAULT_SEARCH_ENGINE = "duckduckgo"
# 然后将文本与往上一级的标题进行拼合,实现文本信息的增强。
ZH_TITLE_ENHANCE = False
# 搜索引擎匹配结题数量
SEARCH_ENGINE_TOP_K = 3
ZH_TITLE_ENHANCE = False
# 每个知识库的初始化介绍用于在初始化知识库时显示和Agent调用没写则没有介绍不会被Agent调用。 # 每个知识库的初始化介绍用于在初始化知识库时显示和Agent调用没写则没有介绍不会被Agent调用。
KB_INFO = { KB_INFO = {
"知识库名称": "知识库介绍",
"samples": "关于本项目issue的解答", "samples": "关于本项目issue的解答",
} }

View File

@ -22,29 +22,54 @@ SUPPORT_AGENT_MODELS = [
] ]
LLM_MODEL_CONFIG = { LLM_MODEL_CONFIG = {
"preprocess_model": { "preprocess_model": {
# "Mixtral-8x7B-v0.1": {
# "temperature": 0.01,
# "max_tokens": 5,
# "prompt_name": "default",
# "callbacks": False
# },
"chatglm3-6b": { "chatglm3-6b": {
"temperature": 0.01, "temperature": 0.05,
"max_tokens": 5, "max_tokens": 4096,
"prompt_name": "default", "prompt_name": "default",
"callbacks": False "callbacks": False
}, },
}, },
"llm_model": { "llm_model": {
# "Mixtral-8x7B-v0.1": {
# "temperature": 0.9,
# "max_tokens": 4000,
# "history_len": 5,
# "prompt_name": "default",
# "callbacks": True
# },
"chatglm3-6b": { "chatglm3-6b": {
"temperature": 0.9, "temperature": 0.05,
"max_tokens": 4000, "max_tokens": 4096,
"history_len": 5,
"prompt_name": "default", "prompt_name": "default",
"history_len": 10,
"callbacks": True "callbacks": True
}, },
}, },
"action_model": { "action_model": {
# "Qwen-14B-Chat": {
# "temperature": 0.05,
# "max_tokens": 4096,
# "prompt_name": "qwen",
# "callbacks": True
# },
"chatglm3-6b": { "chatglm3-6b": {
"temperature": 0.05, "temperature": 0.05,
"max_tokens": 4096, "max_tokens": 4096,
"prompt_name": "ChatGLM3", "prompt_name": "ChatGLM3",
"callbacks": True "callbacks": True
}, },
# "zhipu-api": {
# "temperature": 0.01,
# "max_tokens": 4096,
# "prompt_name": "ChatGLM3",
# "callbacks": True
# }
}, },
"postprocess_model": { "postprocess_model": {
@ -55,11 +80,6 @@ LLM_MODEL_CONFIG = {
"callbacks": True "callbacks": True
} }
}, },
"image_model": {
"sd-turbo": {
"size": "1024x1024",
}
}
} }
LLM_DEVICE = "auto" LLM_DEVICE = "auto"
ONLINE_LLM_MODEL = { ONLINE_LLM_MODEL = {
@ -69,11 +89,6 @@ ONLINE_LLM_MODEL = {
"api_key": "sk-", "api_key": "sk-",
"openai_proxy": "", "openai_proxy": "",
}, },
"sd-turbo": {
"model_name": "sd-turbo",
"api_base_url": "http://127.0.0.1:9997/v1",
"api_key": "EMPTY",
},
"zhipu-api": { "zhipu-api": {
"api_key": "", "api_key": "",
"version": "chatglm_turbo", "version": "chatglm_turbo",
@ -174,91 +189,8 @@ MODEL_PATH = {
"piccolo-large-zh": "sensenova/piccolo-large-zh", "piccolo-large-zh": "sensenova/piccolo-large-zh",
"nlp_gte_sentence-embedding_chinese-large": "/Models/nlp_gte_sentence-embedding_chinese-large", "nlp_gte_sentence-embedding_chinese-large": "/Models/nlp_gte_sentence-embedding_chinese-large",
"text-embedding-ada-002": "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", "text-embedding-ada-002": "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",
}, }
}
"llm_model": {
"vicuna-7b-v1.5": "/share/official_pretrains/hf_home/vicuna-7b-v1.5",
"Mixtral-8x7B-v0.1": "/share/home/zyx/Models/Mixtral-8x7B-v0.1",
"chatglm2-6b": "THUDM/chatglm2-6b",
"chatglm2-6b-32k": "THUDM/chatglm2-6b-32k",
"chatglm3-6b": "/share/home/zyx/Models/chatglm3-6b",
"chatglm3-6b-32k": "THUDM/chatglm3-6b-32k",
"Yi-34B-Chat": "/share/home/zyx/Models/Yi-34B-Chat",
"BlueLM-7B-Chat": "/Models/BlueLM-7B-Chat",
"baichuan2-13b": "/media/zr/Data/Models/LLM/Baichuan2-13B-Chat",
"baichuan2-7b": "/media/zr/Data/Models/LLM/Baichuan2-7B-Chat",
"baichuan-7b": "baichuan-inc/Baichuan-7B",
"baichuan-13b": "baichuan-inc/Baichuan-13B",
'baichuan-13b-chat': 'baichuan-inc/Baichuan-13B-Chat',
"aquila-7b": "BAAI/Aquila-7B",
"aquilachat-7b": "BAAI/AquilaChat-7B",
"internlm-7b": "internlm/internlm-7b",
"internlm-chat-7b": "internlm/internlm-chat-7b",
"internlm2-chat-7b": "internlm/internlm2-chat-7b",
"internlm2-chat-20b": "internlm/internlm2-chat-20b",
"BlueLM-7B-Chat": "vivo-ai/BlueLM-7B-Chat",
"BlueLM-7B-Chat-32k": "vivo-ai/BlueLM-7B-Chat-32k",
"Yi-34B-Chat": "https://huggingface.co/01-ai/Yi-34B-Chat",
"agentlm-7b": "THUDM/agentlm-7b",
"agentlm-13b": "THUDM/agentlm-13b",
"agentlm-70b": "THUDM/agentlm-70b",
"falcon-7b": "tiiuae/falcon-7b",
"falcon-40b": "tiiuae/falcon-40b",
"falcon-rw-7b": "tiiuae/falcon-rw-7b",
"aquila-7b": "BAAI/Aquila-7B",
"aquilachat-7b": "BAAI/AquilaChat-7B",
"open_llama_13b": "openlm-research/open_llama_13b",
"vicuna-13b-v1.5": "lmsys/vicuna-13b-v1.5",
"koala": "young-geng/koala",
"mpt-7b": "mosaicml/mpt-7b",
"mpt-7b-storywriter": "mosaicml/mpt-7b-storywriter",
"mpt-30b": "mosaicml/mpt-30b",
"opt-66b": "facebook/opt-66b",
"opt-iml-max-30b": "facebook/opt-iml-max-30b",
"gpt2": "gpt2",
"gpt2-xl": "gpt2-xl",
"gpt-j-6b": "EleutherAI/gpt-j-6b",
"gpt4all-j": "nomic-ai/gpt4all-j",
"gpt-neox-20b": "EleutherAI/gpt-neox-20b",
"pythia-12b": "EleutherAI/pythia-12b",
"oasst-sft-4-pythia-12b-epoch-3.5": "OpenAssistant/oasst-sft-4-pythia-12b-epoch-3.5",
"dolly-v2-12b": "databricks/dolly-v2-12b",
"stablelm-tuned-alpha-7b": "stabilityai/stablelm-tuned-alpha-7b",
"Llama-2-13b-hf": "meta-llama/Llama-2-13b-hf",
"Llama-2-70b-hf": "meta-llama/Llama-2-70b-hf",
"open_llama_13b": "openlm-research/open_llama_13b",
"vicuna-13b-v1.3": "lmsys/vicuna-13b-v1.3",
"koala": "young-geng/koala",
"mpt-7b": "mosaicml/mpt-7b",
"mpt-7b-storywriter": "mosaicml/mpt-7b-storywriter",
"mpt-30b": "mosaicml/mpt-30b",
"opt-66b": "facebook/opt-66b",
"opt-iml-max-30b": "facebook/opt-iml-max-30b",
"Qwen-1_8B-Chat": "/home/zr/Models/Qwen-1_8B-Chat",
"Qwen-7B": "Qwen/Qwen-7B",
"Qwen-14B": "Qwen/Qwen-14B",
"Qwen-7B-Chat": "Qwen/Qwen-7B-Chat",
"Qwen-14B-Chat": "/share/home/zyx/Models/Qwen-14B-Chat",
"Qwen-14B-Chat-Int8": "Qwen/Qwen-14B-Chat-Int8",
"Qwen-14B-Chat-Int4": "/media/zr/Data/Models/LLM/Qwen-14B-Chat-Int4",
},
NLTK_DATA_PATH = os.path.join(os.path.dirname(os.path.dirname(__file__)), "nltk_data") NLTK_DATA_PATH = os.path.join(os.path.dirname(os.path.dirname(__file__)), "nltk_data")
@ -315,3 +247,7 @@ VLLM_MODEL_DICT = {
"agentlm-70b": "THUDM/agentlm-70b", "agentlm-70b": "THUDM/agentlm-70b",
} }
LOOM_CONFIG = "/media/gpt4-pdf-chatbot-langchain/LooM/src/core/loom.yaml"
OPENAI_KEY = None
OPENAI_PROXY = None

View File

@ -1,7 +1,7 @@
PROMPT_TEMPLATES = { PROMPT_TEMPLATES = {
"preprocess_model": { "preprocess_model": {
"default": "default":
'你只要回复0 和 1 ,代表不需要使用工具。以下几种问题需要使用工具:' '你只要回复0 和 1 ,代表不需要使用工具。以下几种问题需要使用工具:'
'1. 需要联网查询的内容\n' '1. 需要联网查询的内容\n'
'2. 需要计算的内容\n' '2. 需要计算的内容\n'
'3. 需要查询实时性的内容\n' '3. 需要查询实时性的内容\n'
@ -45,9 +45,9 @@ PROMPT_TEMPLATES = {
'Thought: I now know the final answer\n' 'Thought: I now know the final answer\n'
'Final Answer: the final answer to the original input question\n' 'Final Answer: the final answer to the original input question\n'
'Begin! Reminder to always use the exact characters `Final Answer` when responding.\n' 'Begin! Reminder to always use the exact characters `Final Answer` when responding.\n'
'history: {history}\n'
'Question:{input}\n' 'Question:{input}\n'
'Thought:{agent_scratchpad}\n', 'Thought:{agent_scratchpad}\n',
"ChatGLM3": "ChatGLM3":
'You can answer using the tools.Respond to the human as helpfully and accurately as possible.\n' 'You can answer using the tools.Respond to the human as helpfully and accurately as possible.\n'
'You have access to the following tools:\n' 'You have access to the following tools:\n'
@ -81,7 +81,7 @@ PROMPT_TEMPLATES = {
'Begin! Reminder to ALWAYS respond with a valid json blob of a single action. Use tools if necessary.\n' 'Begin! Reminder to ALWAYS respond with a valid json blob of a single action. Use tools if necessary.\n'
'Respond directly if appropriate. Format is Action:```$JSON_BLOB```then Observation:.\n' 'Respond directly if appropriate. Format is Action:```$JSON_BLOB```then Observation:.\n'
'Question: {input}\n\n' 'Question: {input}\n\n'
'Thought: {agent_scratchpad}\n', '{agent_scratchpad}\n',
"qwen": "qwen":
'Answer the following question as best you can. You have access to the following APIs:\n\n' 'Answer the following question as best you can. You have access to the following APIs:\n\n'
'{tools}\n\n' '{tools}\n\n'
@ -130,7 +130,7 @@ TOOL_CONFIG = {
"bing": { "bing": {
"result_len": 3, "result_len": 3,
"bing_search_url": "https://api.bing.microsoft.com/v7.0/search", "bing_search_url": "https://api.bing.microsoft.com/v7.0/search",
"bing_key": "your bing key", "bing_key": "680a39347d7242c5bd2d7a9576a125b7",
}, },
"metaphor": { "metaphor": {
"result_len": 3, "result_len": 3,
@ -160,7 +160,7 @@ TOOL_CONFIG = {
}, },
"weather_check": { "weather_check": {
"use": False, "use": False,
"api-key": "your key", "api-key": "S8vrB4U_-c5mvAMiK",
}, },
"search_youtube": { "search_youtube": {
"use": False, "use": False,
@ -171,21 +171,12 @@ TOOL_CONFIG = {
"calculate": { "calculate": {
"use": False, "use": False,
}, },
"text2images": {
"use": False,
},
# Use THUDM/cogvlm-chat-hf as default
"vqa_processor": { "vqa_processor": {
"use": False, "use": False,
"model_path": "your model path", "model_path": "your model path",
"tokenizer_path": "your tokenizer path", "tokenizer_path": "your tokenizer path",
"device": "cuda:1" "device": "cuda:1"
}, },
# Use Qwen/Qwen-Audio-Chat as default
"aqa_processor": { "aqa_processor": {
"use": False, "use": False,
"model_path": "your model path", "model_path": "your model path",

View File

@ -6,15 +6,15 @@ HTTPX_DEFAULT_TIMEOUT = 300.0
# API 是否开启跨域默认为False如果需要开启请设置为True # API 是否开启跨域默认为False如果需要开启请设置为True
# is open cross domain # is open cross domain
OPEN_CROSS_DOMAIN = False OPEN_CROSS_DOMAIN = True
# 各服务器默认绑定host。如改为"0.0.0.0"需要修改下方所有XX_SERVER的host # 各服务器默认绑定host。如改为"0.0.0.0"需要修改下方所有XX_SERVER的host
DEFAULT_BIND_HOST = "0.0.0.0" if sys.platform != "win32" else "127.0.0.1" DEFAULT_BIND_HOST = "127.0.0.1" if sys.platform != "win32" else "127.0.0.1"
# webui.py server # webui.py server
WEBUI_SERVER = { WEBUI_SERVER = {
"host": DEFAULT_BIND_HOST, "host": DEFAULT_BIND_HOST,
"port": 8501, "port": 7870,
} }
# api.py server # api.py server
@ -23,121 +23,3 @@ API_SERVER = {
"port": 7861, "port": 7861,
} }
# fastchat openai_api server
FSCHAT_OPENAI_API = {
"host": DEFAULT_BIND_HOST,
"port": 20000,
}
# fastchat model_worker server
# 这些模型必须是在model_config.MODEL_PATH或ONLINE_MODEL中正确配置的。
# 在启动startup.py时可用通过`--model-name xxxx yyyy`指定模型不指定则为LLM_MODELS
FSCHAT_MODEL_WORKERS = {
# 所有模型共用的默认配置,可在模型专项配置中进行覆盖。
"default": {
"host": DEFAULT_BIND_HOST,
"port": 20002,
"device": LLM_DEVICE,
# False,'vllm',使用的推理加速框架,使用vllm如果出现HuggingFace通信问题参见doc/FAQ
# vllm对一些模型支持还不成熟暂时默认关闭
# fschat=0.2.33的代码有bug, 如需使用源码修改fastchat.server.vllm_worker
# 将103行中sampling_params = SamplingParams的参数stop=list(stop)修改为stop= [i for i in stop if i!=""]
"infer_turbo": False,
# model_worker多卡加载需要配置的参数
# "gpus": None, # 使用的GPU以str的格式指定如"0,1"如失效请使用CUDA_VISIBLE_DEVICES="0,1"等形式指定
# "num_gpus": 1, # 使用GPU的数量
# "max_gpu_memory": "20GiB", # 每个GPU占用的最大显存
# 以下为model_worker非常用参数可根据需要配置
# "load_8bit": False, # 开启8bit量化
# "cpu_offloading": None,
# "gptq_ckpt": None,
# "gptq_wbits": 16,
# "gptq_groupsize": -1,
# "gptq_act_order": False,
# "awq_ckpt": None,
# "awq_wbits": 16,
# "awq_groupsize": -1,
# "model_names": None,
# "conv_template": None,
# "limit_worker_concurrency": 5,
# "stream_interval": 2,
# "no_register": False,
# "embed_in_truncate": False,
# 以下为vllm_worker配置参数,注意使用vllm必须有gpu仅在Linux测试通过
# tokenizer = model_path # 如果tokenizer与model_path不一致在此处添加
# 'tokenizer_mode':'auto',
# 'trust_remote_code':True,
# 'download_dir':None,
# 'load_format':'auto',
# 'dtype':'auto',
# 'seed':0,
# 'worker_use_ray':False,
# 'pipeline_parallel_size':1,
# 'tensor_parallel_size':1,
# 'block_size':16,
# 'swap_space':4 , # GiB
# 'gpu_memory_utilization':0.90,
# 'max_num_batched_tokens':2560,
# 'max_num_seqs':256,
# 'disable_log_stats':False,
# 'conv_template':None,
# 'limit_worker_concurrency':5,
# 'no_register':False,
# 'num_gpus': 1
# 'engine_use_ray': False,
# 'disable_log_requests': False
},
# 可以如下示例方式更改默认配置
# "Qwen-1_8B-Chat": { # 使用default中的IP和端口
# "device": "cpu",
# },
"chatglm3-6b": { # 使用default中的IP和端口
"device": "cuda",
},
# 以下配置可以不用修改在model_config中设置启动的模型
"zhipu-api": {
"port": 21001,
},
"minimax-api": {
"port": 21002,
},
"xinghuo-api": {
"port": 21003,
},
"qianfan-api": {
"port": 21004,
},
"fangzhou-api": {
"port": 21005,
},
"qwen-api": {
"port": 21006,
},
"baichuan-api": {
"port": 21007,
},
"azure-api": {
"port": 21008,
},
"tiangong-api": {
"port": 21009,
},
}
# fastchat multi model worker server
FSCHAT_MULTI_MODEL_WORKERS = {
# TODO:
}
# fastchat controller server
FSCHAT_CONTROLLER = {
"host": DEFAULT_BIND_HOST,
"port": 20001,
"dispatch_method": "shortest_queue",
}

View File

@ -6,7 +6,7 @@ from pathlib import Path
import asyncio import asyncio
from configs import (LLM_MODEL_CONFIG, LLM_DEVICE, EMBEDDING_DEVICE, from configs import (LLM_MODEL_CONFIG, LLM_DEVICE, EMBEDDING_DEVICE,
MODEL_PATH, MODEL_ROOT_PATH, ONLINE_LLM_MODEL, logger, log_verbose, MODEL_PATH, MODEL_ROOT_PATH, ONLINE_LLM_MODEL, logger, log_verbose,
FSCHAT_MODEL_WORKERS, HTTPX_DEFAULT_TIMEOUT) HTTPX_DEFAULT_TIMEOUT)
import os import os
from concurrent.futures import ThreadPoolExecutor, as_completed from concurrent.futures import ThreadPoolExecutor, as_completed
from langchain_openai.chat_models import ChatOpenAI from langchain_openai.chat_models import ChatOpenAI
@ -319,20 +319,6 @@ def list_embed_models() -> List[str]:
return list(MODEL_PATH["embed_model"]) return list(MODEL_PATH["embed_model"])
def list_config_llm_models() -> Dict[str, Dict]:
'''
get configured llm models with different types.
return {config_type: {model_name: config}, ...}
'''
workers = FSCHAT_MODEL_WORKERS.copy()
workers.pop("default", None)
return {
"local": MODEL_PATH["llm_model"].copy(),
"online": ONLINE_LLM_MODEL.copy(),
"worker": workers,
}
def get_model_path(model_name: str, type: str = None) -> Optional[str]: def get_model_path(model_name: str, type: str = None) -> Optional[str]:
if type in MODEL_PATH: if type in MODEL_PATH:
@ -368,14 +354,8 @@ def get_model_worker_config(model_name: str = None) -> dict:
加载model worker的配置项 加载model worker的配置项
优先级:FSCHAT_MODEL_WORKERS[model_name] > ONLINE_LLM_MODEL[model_name] > FSCHAT_MODEL_WORKERS["default"] 优先级:FSCHAT_MODEL_WORKERS[model_name] > ONLINE_LLM_MODEL[model_name] > FSCHAT_MODEL_WORKERS["default"]
''' '''
from configs.model_config import ONLINE_LLM_MODEL, MODEL_PATH
from configs.server_config import FSCHAT_MODEL_WORKERS
config = FSCHAT_MODEL_WORKERS.get("default", {}).copy() return {}
config.update(ONLINE_LLM_MODEL.get(model_name, {}).copy())
config.update(FSCHAT_MODEL_WORKERS.get(model_name, {}).copy())
return config
def api_address() -> str: def api_address() -> str:

View File

@ -1,70 +0,0 @@
import requests
import json
import sys
from pathlib import Path
root_path = Path(__file__).parent.parent.parent
sys.path.append(str(root_path))
from configs.server_config import FSCHAT_MODEL_WORKERS
from server.utils import api_address, get_model_worker_config
from pprint import pprint
import random
from typing import List
def get_configured_models() -> List[str]:
model_workers = list(FSCHAT_MODEL_WORKERS)
if "default" in model_workers:
model_workers.remove("default")
return model_workers
api_base_url = api_address()
def get_running_models(api="/llm_model/list_models"):
url = api_base_url + api
r = requests.post(url)
if r.status_code == 200:
return r.json()["data"]
return []
def test_running_models(api="/llm_model/list_running_models"):
url = api_base_url + api
r = requests.post(url)
assert r.status_code == 200
print("\n获取当前正在运行的模型列表:")
pprint(r.json())
assert isinstance(r.json()["data"], list)
assert len(r.json()["data"]) > 0
# 不建议使用stop_model功能。按现在的实现停止了就只能手动再启动
# def test_stop_model(api="/llm_model/stop"):
# url = api_base_url + api
# r = requests.post(url, json={""})
def test_change_model(api="/llm_model/change_model"):
url = api_base_url + api
running_models = get_running_models()
assert len(running_models) > 0
model_workers = get_configured_models()
availabel_new_models = list(set(model_workers) - set(running_models))
assert len(availabel_new_models) > 0
print(availabel_new_models)
local_models = [x for x in running_models if not get_model_worker_config(x).get("online_api")]
model_name = random.choice(local_models)
new_model_name = random.choice(availabel_new_models)
print(f"\n尝试将模型从 {model_name} 切换到 {new_model_name}")
r = requests.post(url, json={"model_name": model_name, "new_model_name": new_model_name})
assert r.status_code == 200
running_models = get_running_models()
assert new_model_name in running_models