删除fastchat的配置

This commit is contained in:
glide-the 2024-01-18 01:17:11 +08:00 committed by liunux4odoo
parent 48fb6b83fd
commit 49f6760702
7 changed files with 55 additions and 333 deletions

View File

@ -36,3 +36,5 @@ BASE_TEMP_DIR = os.path.join(tempfile.gettempdir(), "chatchat")
if os.path.isdir(BASE_TEMP_DIR):
shutil.rmtree(BASE_TEMP_DIR)
os.makedirs(BASE_TEMP_DIR, exist_ok=True)
MEDIA_PATH = None

View File

@ -1,6 +1,6 @@
import os
# 默认使用的知识库
DEFAULT_KNOWLEDGE_BASE = "samples"
# 默认向量库/全文检索引擎类型。可选faiss, milvus(离线) & zilliz(在线), pgvector,全文检索引擎es
@ -24,15 +24,16 @@ VECTOR_SEARCH_TOP_K = 3
# 知识库匹配相关度阈值取值范围在0-1之间SCORE越小相关度越高取到1相当于不筛选建议设置在0.5左右
SCORE_THRESHOLD = 1
# 是否开启中文标题加强,以及标题增强的相关配置
# 通过增加标题判断判断哪些文本为标题并在metadata中进行标记
# 然后将文本与往上一级的标题进行拼合,实现文本信息的增强。
ZH_TITLE_ENHANCE = False
# 默认搜索引擎。可选bing, duckduckgo, metaphor
DEFAULT_SEARCH_ENGINE = "duckduckgo"
# 搜索引擎匹配结题数量
SEARCH_ENGINE_TOP_K = 3
ZH_TITLE_ENHANCE = False
# 每个知识库的初始化介绍用于在初始化知识库时显示和Agent调用没写则没有介绍不会被Agent调用。
KB_INFO = {
"知识库名称": "知识库介绍",
"samples": "关于本项目issue的解答",
}

View File

@ -22,29 +22,54 @@ SUPPORT_AGENT_MODELS = [
]
LLM_MODEL_CONFIG = {
"preprocess_model": {
# "Mixtral-8x7B-v0.1": {
# "temperature": 0.01,
# "max_tokens": 5,
# "prompt_name": "default",
# "callbacks": False
# },
"chatglm3-6b": {
"temperature": 0.01,
"max_tokens": 5,
"temperature": 0.05,
"max_tokens": 4096,
"prompt_name": "default",
"callbacks": False
},
},
"llm_model": {
# "Mixtral-8x7B-v0.1": {
# "temperature": 0.9,
# "max_tokens": 4000,
# "history_len": 5,
# "prompt_name": "default",
# "callbacks": True
# },
"chatglm3-6b": {
"temperature": 0.9,
"max_tokens": 4000,
"history_len": 5,
"temperature": 0.05,
"max_tokens": 4096,
"prompt_name": "default",
"history_len": 10,
"callbacks": True
},
},
"action_model": {
# "Qwen-14B-Chat": {
# "temperature": 0.05,
# "max_tokens": 4096,
# "prompt_name": "qwen",
# "callbacks": True
# },
"chatglm3-6b": {
"temperature": 0.05,
"max_tokens": 4096,
"prompt_name": "ChatGLM3",
"callbacks": True
},
# "zhipu-api": {
# "temperature": 0.01,
# "max_tokens": 4096,
# "prompt_name": "ChatGLM3",
# "callbacks": True
# }
},
"postprocess_model": {
@ -55,11 +80,6 @@ LLM_MODEL_CONFIG = {
"callbacks": True
}
},
"image_model": {
"sd-turbo": {
"size": "1024x1024",
}
}
}
LLM_DEVICE = "auto"
ONLINE_LLM_MODEL = {
@ -69,11 +89,6 @@ ONLINE_LLM_MODEL = {
"api_key": "sk-",
"openai_proxy": "",
},
"sd-turbo": {
"model_name": "sd-turbo",
"api_base_url": "http://127.0.0.1:9997/v1",
"api_key": "EMPTY",
},
"zhipu-api": {
"api_key": "",
"version": "chatglm_turbo",
@ -174,91 +189,8 @@ MODEL_PATH = {
"piccolo-large-zh": "sensenova/piccolo-large-zh",
"nlp_gte_sentence-embedding_chinese-large": "/Models/nlp_gte_sentence-embedding_chinese-large",
"text-embedding-ada-002": "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",
},
"llm_model": {
"vicuna-7b-v1.5": "/share/official_pretrains/hf_home/vicuna-7b-v1.5",
"Mixtral-8x7B-v0.1": "/share/home/zyx/Models/Mixtral-8x7B-v0.1",
"chatglm2-6b": "THUDM/chatglm2-6b",
"chatglm2-6b-32k": "THUDM/chatglm2-6b-32k",
"chatglm3-6b": "/share/home/zyx/Models/chatglm3-6b",
"chatglm3-6b-32k": "THUDM/chatglm3-6b-32k",
"Yi-34B-Chat": "/share/home/zyx/Models/Yi-34B-Chat",
"BlueLM-7B-Chat": "/Models/BlueLM-7B-Chat",
"baichuan2-13b": "/media/zr/Data/Models/LLM/Baichuan2-13B-Chat",
"baichuan2-7b": "/media/zr/Data/Models/LLM/Baichuan2-7B-Chat",
"baichuan-7b": "baichuan-inc/Baichuan-7B",
"baichuan-13b": "baichuan-inc/Baichuan-13B",
'baichuan-13b-chat': 'baichuan-inc/Baichuan-13B-Chat',
"aquila-7b": "BAAI/Aquila-7B",
"aquilachat-7b": "BAAI/AquilaChat-7B",
"internlm-7b": "internlm/internlm-7b",
"internlm-chat-7b": "internlm/internlm-chat-7b",
"internlm2-chat-7b": "internlm/internlm2-chat-7b",
"internlm2-chat-20b": "internlm/internlm2-chat-20b",
"BlueLM-7B-Chat": "vivo-ai/BlueLM-7B-Chat",
"BlueLM-7B-Chat-32k": "vivo-ai/BlueLM-7B-Chat-32k",
"Yi-34B-Chat": "https://huggingface.co/01-ai/Yi-34B-Chat",
"agentlm-7b": "THUDM/agentlm-7b",
"agentlm-13b": "THUDM/agentlm-13b",
"agentlm-70b": "THUDM/agentlm-70b",
"falcon-7b": "tiiuae/falcon-7b",
"falcon-40b": "tiiuae/falcon-40b",
"falcon-rw-7b": "tiiuae/falcon-rw-7b",
"aquila-7b": "BAAI/Aquila-7B",
"aquilachat-7b": "BAAI/AquilaChat-7B",
"open_llama_13b": "openlm-research/open_llama_13b",
"vicuna-13b-v1.5": "lmsys/vicuna-13b-v1.5",
"koala": "young-geng/koala",
"mpt-7b": "mosaicml/mpt-7b",
"mpt-7b-storywriter": "mosaicml/mpt-7b-storywriter",
"mpt-30b": "mosaicml/mpt-30b",
"opt-66b": "facebook/opt-66b",
"opt-iml-max-30b": "facebook/opt-iml-max-30b",
"gpt2": "gpt2",
"gpt2-xl": "gpt2-xl",
"gpt-j-6b": "EleutherAI/gpt-j-6b",
"gpt4all-j": "nomic-ai/gpt4all-j",
"gpt-neox-20b": "EleutherAI/gpt-neox-20b",
"pythia-12b": "EleutherAI/pythia-12b",
"oasst-sft-4-pythia-12b-epoch-3.5": "OpenAssistant/oasst-sft-4-pythia-12b-epoch-3.5",
"dolly-v2-12b": "databricks/dolly-v2-12b",
"stablelm-tuned-alpha-7b": "stabilityai/stablelm-tuned-alpha-7b",
"Llama-2-13b-hf": "meta-llama/Llama-2-13b-hf",
"Llama-2-70b-hf": "meta-llama/Llama-2-70b-hf",
"open_llama_13b": "openlm-research/open_llama_13b",
"vicuna-13b-v1.3": "lmsys/vicuna-13b-v1.3",
"koala": "young-geng/koala",
"mpt-7b": "mosaicml/mpt-7b",
"mpt-7b-storywriter": "mosaicml/mpt-7b-storywriter",
"mpt-30b": "mosaicml/mpt-30b",
"opt-66b": "facebook/opt-66b",
"opt-iml-max-30b": "facebook/opt-iml-max-30b",
"Qwen-1_8B-Chat": "/home/zr/Models/Qwen-1_8B-Chat",
"Qwen-7B": "Qwen/Qwen-7B",
"Qwen-14B": "Qwen/Qwen-14B",
"Qwen-7B-Chat": "Qwen/Qwen-7B-Chat",
"Qwen-14B-Chat": "/share/home/zyx/Models/Qwen-14B-Chat",
"Qwen-14B-Chat-Int8": "Qwen/Qwen-14B-Chat-Int8",
"Qwen-14B-Chat-Int4": "/media/zr/Data/Models/LLM/Qwen-14B-Chat-Int4",
},
}
}
NLTK_DATA_PATH = os.path.join(os.path.dirname(os.path.dirname(__file__)), "nltk_data")
@ -315,3 +247,7 @@ VLLM_MODEL_DICT = {
"agentlm-70b": "THUDM/agentlm-70b",
}
LOOM_CONFIG = "/media/gpt4-pdf-chatbot-langchain/LooM/src/core/loom.yaml"
OPENAI_KEY = None
OPENAI_PROXY = None

View File

@ -1,7 +1,7 @@
PROMPT_TEMPLATES = {
"preprocess_model": {
"default":
'你只要回复0 和 1 ,代表不需要使用工具。以下几种问题需要使用工具:'
'你只要回复0 和 1 ,代表不需要使用工具。以下几种问题需要使用工具:'
'1. 需要联网查询的内容\n'
'2. 需要计算的内容\n'
'3. 需要查询实时性的内容\n'
@ -45,9 +45,9 @@ PROMPT_TEMPLATES = {
'Thought: I now know the final answer\n'
'Final Answer: the final answer to the original input question\n'
'Begin! Reminder to always use the exact characters `Final Answer` when responding.\n'
'history: {history}\n'
'Question:{input}\n'
'Thought:{agent_scratchpad}\n',
"ChatGLM3":
'You can answer using the tools.Respond to the human as helpfully and accurately as possible.\n'
'You have access to the following tools:\n'
@ -81,7 +81,7 @@ PROMPT_TEMPLATES = {
'Begin! Reminder to ALWAYS respond with a valid json blob of a single action. Use tools if necessary.\n'
'Respond directly if appropriate. Format is Action:```$JSON_BLOB```then Observation:.\n'
'Question: {input}\n\n'
'Thought: {agent_scratchpad}\n',
'{agent_scratchpad}\n',
"qwen":
'Answer the following question as best you can. You have access to the following APIs:\n\n'
'{tools}\n\n'
@ -130,7 +130,7 @@ TOOL_CONFIG = {
"bing": {
"result_len": 3,
"bing_search_url": "https://api.bing.microsoft.com/v7.0/search",
"bing_key": "your bing key",
"bing_key": "680a39347d7242c5bd2d7a9576a125b7",
},
"metaphor": {
"result_len": 3,
@ -160,7 +160,7 @@ TOOL_CONFIG = {
},
"weather_check": {
"use": False,
"api-key": "your key",
"api-key": "S8vrB4U_-c5mvAMiK",
},
"search_youtube": {
"use": False,
@ -171,21 +171,12 @@ TOOL_CONFIG = {
"calculate": {
"use": False,
},
"text2images": {
"use": False,
},
# Use THUDM/cogvlm-chat-hf as default
"vqa_processor": {
"use": False,
"model_path": "your model path",
"tokenizer_path": "your tokenizer path",
"device": "cuda:1"
},
# Use Qwen/Qwen-Audio-Chat as default
"aqa_processor": {
"use": False,
"model_path": "your model path",

View File

@ -6,15 +6,15 @@ HTTPX_DEFAULT_TIMEOUT = 300.0
# API 是否开启跨域默认为False如果需要开启请设置为True
# is open cross domain
OPEN_CROSS_DOMAIN = False
OPEN_CROSS_DOMAIN = True
# 各服务器默认绑定host。如改为"0.0.0.0"需要修改下方所有XX_SERVER的host
DEFAULT_BIND_HOST = "0.0.0.0" if sys.platform != "win32" else "127.0.0.1"
DEFAULT_BIND_HOST = "127.0.0.1" if sys.platform != "win32" else "127.0.0.1"
# webui.py server
WEBUI_SERVER = {
"host": DEFAULT_BIND_HOST,
"port": 8501,
"port": 7870,
}
# api.py server
@ -23,121 +23,3 @@ API_SERVER = {
"port": 7861,
}
# fastchat openai_api server
FSCHAT_OPENAI_API = {
"host": DEFAULT_BIND_HOST,
"port": 20000,
}
# fastchat model_worker server
# 这些模型必须是在model_config.MODEL_PATH或ONLINE_MODEL中正确配置的。
# 在启动startup.py时可用通过`--model-name xxxx yyyy`指定模型不指定则为LLM_MODELS
FSCHAT_MODEL_WORKERS = {
# 所有模型共用的默认配置,可在模型专项配置中进行覆盖。
"default": {
"host": DEFAULT_BIND_HOST,
"port": 20002,
"device": LLM_DEVICE,
# False,'vllm',使用的推理加速框架,使用vllm如果出现HuggingFace通信问题参见doc/FAQ
# vllm对一些模型支持还不成熟暂时默认关闭
# fschat=0.2.33的代码有bug, 如需使用源码修改fastchat.server.vllm_worker
# 将103行中sampling_params = SamplingParams的参数stop=list(stop)修改为stop= [i for i in stop if i!=""]
"infer_turbo": False,
# model_worker多卡加载需要配置的参数
# "gpus": None, # 使用的GPU以str的格式指定如"0,1"如失效请使用CUDA_VISIBLE_DEVICES="0,1"等形式指定
# "num_gpus": 1, # 使用GPU的数量
# "max_gpu_memory": "20GiB", # 每个GPU占用的最大显存
# 以下为model_worker非常用参数可根据需要配置
# "load_8bit": False, # 开启8bit量化
# "cpu_offloading": None,
# "gptq_ckpt": None,
# "gptq_wbits": 16,
# "gptq_groupsize": -1,
# "gptq_act_order": False,
# "awq_ckpt": None,
# "awq_wbits": 16,
# "awq_groupsize": -1,
# "model_names": None,
# "conv_template": None,
# "limit_worker_concurrency": 5,
# "stream_interval": 2,
# "no_register": False,
# "embed_in_truncate": False,
# 以下为vllm_worker配置参数,注意使用vllm必须有gpu仅在Linux测试通过
# tokenizer = model_path # 如果tokenizer与model_path不一致在此处添加
# 'tokenizer_mode':'auto',
# 'trust_remote_code':True,
# 'download_dir':None,
# 'load_format':'auto',
# 'dtype':'auto',
# 'seed':0,
# 'worker_use_ray':False,
# 'pipeline_parallel_size':1,
# 'tensor_parallel_size':1,
# 'block_size':16,
# 'swap_space':4 , # GiB
# 'gpu_memory_utilization':0.90,
# 'max_num_batched_tokens':2560,
# 'max_num_seqs':256,
# 'disable_log_stats':False,
# 'conv_template':None,
# 'limit_worker_concurrency':5,
# 'no_register':False,
# 'num_gpus': 1
# 'engine_use_ray': False,
# 'disable_log_requests': False
},
# 可以如下示例方式更改默认配置
# "Qwen-1_8B-Chat": { # 使用default中的IP和端口
# "device": "cpu",
# },
"chatglm3-6b": { # 使用default中的IP和端口
"device": "cuda",
},
# 以下配置可以不用修改在model_config中设置启动的模型
"zhipu-api": {
"port": 21001,
},
"minimax-api": {
"port": 21002,
},
"xinghuo-api": {
"port": 21003,
},
"qianfan-api": {
"port": 21004,
},
"fangzhou-api": {
"port": 21005,
},
"qwen-api": {
"port": 21006,
},
"baichuan-api": {
"port": 21007,
},
"azure-api": {
"port": 21008,
},
"tiangong-api": {
"port": 21009,
},
}
# fastchat multi model worker server
FSCHAT_MULTI_MODEL_WORKERS = {
# TODO:
}
# fastchat controller server
FSCHAT_CONTROLLER = {
"host": DEFAULT_BIND_HOST,
"port": 20001,
"dispatch_method": "shortest_queue",
}

View File

@ -6,7 +6,7 @@ from pathlib import Path
import asyncio
from configs import (LLM_MODEL_CONFIG, LLM_DEVICE, EMBEDDING_DEVICE,
MODEL_PATH, MODEL_ROOT_PATH, ONLINE_LLM_MODEL, logger, log_verbose,
FSCHAT_MODEL_WORKERS, HTTPX_DEFAULT_TIMEOUT)
HTTPX_DEFAULT_TIMEOUT)
import os
from concurrent.futures import ThreadPoolExecutor, as_completed
from langchain_openai.chat_models import ChatOpenAI
@ -319,20 +319,6 @@ def list_embed_models() -> List[str]:
return list(MODEL_PATH["embed_model"])
def list_config_llm_models() -> Dict[str, Dict]:
'''
get configured llm models with different types.
return {config_type: {model_name: config}, ...}
'''
workers = FSCHAT_MODEL_WORKERS.copy()
workers.pop("default", None)
return {
"local": MODEL_PATH["llm_model"].copy(),
"online": ONLINE_LLM_MODEL.copy(),
"worker": workers,
}
def get_model_path(model_name: str, type: str = None) -> Optional[str]:
if type in MODEL_PATH:
@ -368,14 +354,8 @@ def get_model_worker_config(model_name: str = None) -> dict:
加载model worker的配置项
优先级:FSCHAT_MODEL_WORKERS[model_name] > ONLINE_LLM_MODEL[model_name] > FSCHAT_MODEL_WORKERS["default"]
'''
from configs.model_config import ONLINE_LLM_MODEL, MODEL_PATH
from configs.server_config import FSCHAT_MODEL_WORKERS
config = FSCHAT_MODEL_WORKERS.get("default", {}).copy()
config.update(ONLINE_LLM_MODEL.get(model_name, {}).copy())
config.update(FSCHAT_MODEL_WORKERS.get(model_name, {}).copy())
return config
return {}
def api_address() -> str:

View File

@ -1,70 +0,0 @@
import requests
import json
import sys
from pathlib import Path
root_path = Path(__file__).parent.parent.parent
sys.path.append(str(root_path))
from configs.server_config import FSCHAT_MODEL_WORKERS
from server.utils import api_address, get_model_worker_config
from pprint import pprint
import random
from typing import List
def get_configured_models() -> List[str]:
model_workers = list(FSCHAT_MODEL_WORKERS)
if "default" in model_workers:
model_workers.remove("default")
return model_workers
api_base_url = api_address()
def get_running_models(api="/llm_model/list_models"):
url = api_base_url + api
r = requests.post(url)
if r.status_code == 200:
return r.json()["data"]
return []
def test_running_models(api="/llm_model/list_running_models"):
url = api_base_url + api
r = requests.post(url)
assert r.status_code == 200
print("\n获取当前正在运行的模型列表:")
pprint(r.json())
assert isinstance(r.json()["data"], list)
assert len(r.json()["data"]) > 0
# 不建议使用stop_model功能。按现在的实现停止了就只能手动再启动
# def test_stop_model(api="/llm_model/stop"):
# url = api_base_url + api
# r = requests.post(url, json={""})
def test_change_model(api="/llm_model/change_model"):
url = api_base_url + api
running_models = get_running_models()
assert len(running_models) > 0
model_workers = get_configured_models()
availabel_new_models = list(set(model_workers) - set(running_models))
assert len(availabel_new_models) > 0
print(availabel_new_models)
local_models = [x for x in running_models if not get_model_worker_config(x).get("online_api")]
model_name = random.choice(local_models)
new_model_name = random.choice(availabel_new_models)
print(f"\n尝试将模型从 {model_name} 切换到 {new_model_name}")
r = requests.post(url, json={"model_name": model_name, "new_model_name": new_model_name})
assert r.status_code == 200
running_models = get_running_models()
assert new_model_name in running_models