diff --git a/configs/__init__.py b/configs/__init__.py index 86f8dcf7..0e2d830a 100644 --- a/configs/__init__.py +++ b/configs/__init__.py @@ -1,8 +1,8 @@ -from .basic_config import * -from .model_config import * -from .kb_config import * -from .server_config import * -from .prompt_config import * - - -VERSION = "v0.3.0-preview" +from .basic_config import * +from .model_config import * +from .kb_config import * +from .server_config import * +from .prompt_config import * + + +VERSION = "v0.3.0-preview" diff --git a/configs/basic_config.py.example b/configs/basic_config.py.example index 7b50365f..a22fb977 100644 --- a/configs/basic_config.py.example +++ b/configs/basic_config.py.example @@ -25,8 +25,6 @@ if not os.path.exists(LOG_PATH): # 临时文件目录,主要用于文件对话 BASE_TEMP_DIR = os.path.join(tempfile.gettempdir(), "chatchat") -try: +if os.path.isdir(BASE_TEMP_DIR): shutil.rmtree(BASE_TEMP_DIR) -except Exception: - pass os.makedirs(BASE_TEMP_DIR, exist_ok=True) diff --git a/configs/kb_config.py.example b/configs/kb_config.py.example index 00a12991..12e989b6 100644 --- a/configs/kb_config.py.example +++ b/configs/kb_config.py.example @@ -3,7 +3,7 @@ import os # 默认使用的知识库 DEFAULT_KNOWLEDGE_BASE = "samples" -# 默认向量库/全文检索引擎类型。可选:faiss, milvus(离线) & zilliz(在线), pgvector, chromadb 全文检索引擎es +# 默认向量库/全文检索引擎类型。可选:faiss, milvus(离线) & zilliz(在线), pgvector,全文检索引擎es DEFAULT_VS_TYPE = "faiss" # 缓存向量库数量(针对FAISS) @@ -21,44 +21,14 @@ OVERLAP_SIZE = 50 # 知识库匹配向量数量 VECTOR_SEARCH_TOP_K = 3 -# 知识库匹配的距离阈值,一般取值范围在0-1之间,SCORE越小,距离越小从而相关度越高。 -# 但有用户报告遇到过匹配分值超过1的情况,为了兼容性默认设为1,在WEBUI中调整范围为0-2 -SCORE_THRESHOLD = 1.0 - -# 默认搜索引擎。可选:bing, duckduckgo, metaphor -DEFAULT_SEARCH_ENGINE = "duckduckgo" - -# 搜索引擎匹配结题数量 -SEARCH_ENGINE_TOP_K = 3 - - -# Bing 搜索必备变量 -# 使用 Bing 搜索需要使用 Bing Subscription Key,需要在azure port中申请试用bing search -# 具体申请方式请见 -# https://learn.microsoft.com/en-us/bing/search-apis/bing-web-search/create-bing-search-service-resource -# 使用python创建bing api 搜索实例详见: -# https://learn.microsoft.com/en-us/bing/search-apis/bing-web-search/quickstarts/rest/python -BING_SEARCH_URL = "https://api.bing.microsoft.com/v7.0/search" -# 注意不是bing Webmaster Tools的api key, - -# 此外,如果是在服务器上,报Failed to establish a new connection: [Errno 110] Connection timed out -# 是因为服务器加了防火墙,需要联系管理员加白名单,如果公司的服务器的话,就别想了GG -BING_SUBSCRIPTION_KEY = "" - -# metaphor搜索需要KEY -METAPHOR_API_KEY = "" - -# 心知天气 API KEY,用于天气Agent。申请:https://www.seniverse.com/ -SENIVERSE_API_KEY = "" +# 知识库匹配相关度阈值,取值范围在0-1之间,SCORE越小,相关度越高,取到1相当于不筛选,建议设置在0.5左右 +SCORE_THRESHOLD = 1 # 是否开启中文标题加强,以及标题增强的相关配置 # 通过增加标题判断,判断哪些文本为标题,并在metadata中进行标记; # 然后将文本与往上一级的标题进行拼合,实现文本信息的增强。 ZH_TITLE_ENHANCE = False -# PDF OCR 控制:只对宽高超过页面一定比例(图片宽/页面宽,图片高/页面高)的图片进行 OCR。 -# 这样可以避免 PDF 中一些小图片的干扰,提高非扫描版 PDF 处理速度 -PDF_OCR_THRESHOLD = (0.6, 0.6) # 每个知识库的初始化介绍,用于在初始化知识库时显示和Agent调用,没写则没有介绍,不会被Agent调用。 KB_INFO = { @@ -106,12 +76,7 @@ kbs_config = { "index_name": "test_index", "user": "", "password": "" - }, - "milvus_kwargs":{ - "search_params":{"metric_type": "L2"}, #在此处增加search_params - "index_params":{"metric_type": "L2","index_type": "HNSW"} # 在此处增加index_params - }, - "chromadb": {} + } } # TextSplitter配置项,如果你不明白其中的含义,就不要修改。 diff --git a/configs/model_config.py.example b/configs/model_config.py.example index 610694f2..ea7ac90d 100644 --- a/configs/model_config.py.example +++ b/configs/model_config.py.example @@ -1,4 +1,5 @@ import os + MODEL_ROOT_PATH = "" EMBEDDING_MODEL = "bge-large-zh-v1.5" # bge-large-zh EMBEDDING_DEVICE = "auto" @@ -12,32 +13,24 @@ RERANKER_MAX_LENGTH = 1024 # 如果需要在 EMBEDDING_MODEL 中增加自定义的关键字时配置 EMBEDDING_KEYWORD_FILE = "keywords.txt" EMBEDDING_MODEL_OUTPUT_PATH = "output" + SUPPORT_AGENT_MODELS = [ "chatglm3-6b", - "Qwen-14b-Chat", - "Qwen-1_8B-Chat", - "openai-api" + "openai-api", + "Qwen-14B-Chat", + "Qwen-7B-Chat", ] LLM_MODEL_CONFIG = { - # 意图识别不需要输出,模型后台知道就行 "preprocess_model": { - "zhipu-api": { - "temperature": 0.4, - "max_tokens": 2048, - "history_len": 100, + "chatglm3-6b": { + "temperature": 0.01, + "max_tokens": 5, "prompt_name": "default", "callbacks": False }, }, "llm_model": { "chatglm3-6b": { - "temperature": 0.9, - "max_tokens": 4096, - "history_len": 3, - "prompt_name": "default", - "callbacks": True - }, - "zhipu-api": { "temperature": 0.9, "max_tokens": 4000, "history_len": 5, @@ -47,111 +40,28 @@ LLM_MODEL_CONFIG = { }, "action_model": { "chatglm3-6b": { - "temperature": 0.01, + "temperature": 0.05, "max_tokens": 4096, "prompt_name": "ChatGLM3", "callbacks": True }, - "openai-api": { - "temperature": 0.01, - "max_tokens": 4096, - "prompt_name": "GPT-4", - "callbacks": True - }, + }, "postprocess_model": { - "chatglm3-6b": { + "zhipu-api": { "temperature": 0.01, "max_tokens": 4096, "prompt_name": "default", "callbacks": True } }, - } - -TOOL_CONFIG = { - "search_local_knowledgebase": { - "use": True, - "top_k": 10, - "score_threshold": 1, - "conclude_prompt": { - "with_result": - '<指令>根据已知信息,简洁和专业的来回答问题。如果无法从中得到答案,请说 "根据已知信息无法回答该问题",' - '不允许在答案中添加编造成分,答案请使用中文。 \n' - '<已知信息>{{ context }}\n' - '<问题>{{ question }}\n', - "without_result": - '请你根据我的提问回答我的问题:\n' - '{{ question }}\n' - '请注意,你必须在回答结束后强调,你的回答是根据你的经验回答而不是参考资料回答的。\n', - } - }, - "search_internet": { - "use": True, - "search_engine_name": "bing", - "search_engine_config": - { - "bing": { - "result_len": 3, - "bing_search_url": "https://api.bing.microsoft.com/v7.0/search", - "bing_key": "", - }, - "metaphor": { - "result_len": 3, - "metaphor_api_key": "", - "split_result": False, - "chunk_size": 500, - "chunk_overlap": 0, - }, - "duckduckgo": { - "result_len": 3 - } - }, - "top_k": 10, - "verbose": "Origin", - "conclude_prompt": - "<指令>这是搜索到的互联网信息,请你根据这些信息进行提取并有调理,简洁的回答问题。如果无法从中得到答案,请说 “无法搜索到能回答问题的内容”。 " - "\n<已知信息>{{ context }}\n" - "<问题>\n" - "{{ question }}\n" - "\n" - }, - "arxiv": { - "use": True, - }, - "shell": { - "use": True, - }, - "weather_check": { - "use": True, - "api-key": "", - }, - "search_youtube": { - "use": False, - }, - "wolfram": { - "use": False, - }, - "calculate": { - "use": False, - }, - "aqa_processor": { - "use": False, - }, - "vqa_processor": { - "use": False, - }, -} - -# LLM 模型运行设备。设为"auto"会自动检测(会有警告),也可手动设定为 "cuda","mps","cpu","xpu" 其中之一。 LLM_DEVICE = "auto" - ONLINE_LLM_MODEL = { "openai-api": { "model_name": "gpt-4-1106-preview", "api_base_url": "https://api.openai.com/v1", - "api_key": "", + "api_key": "sk-", "openai_proxy": "", }, "zhipu-api": { @@ -249,24 +159,31 @@ MODEL_PATH = { "bge-large-zh": "/media/zr/Data/Models/Embedding/bge-large-zh", "bge-large-zh-noinstruct": "BAAI/bge-large-zh-noinstruct", "bge-base-zh-v1.5": "BAAI/bge-base-zh-v1.5", - "bge-large-zh-v1.5": "/Models/bge-large-zh-v1.5", + "bge-large-zh-v1.5": "/share/home/zyx/Models/bge-large-zh-v1.5", "piccolo-base-zh": "sensenova/piccolo-base-zh", "piccolo-large-zh": "sensenova/piccolo-large-zh", "nlp_gte_sentence-embedding_chinese-large": "/Models/nlp_gte_sentence-embedding_chinese-large", - "text-embedding-ada-002": "Just write your OpenAI key like "sk-o3IGBhC9g8AiFvTGWVKsT*****" ", + "text-embedding-ada-002": "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", }, "llm_model": { + + "vicuna-7b-v1.5": "/share/official_pretrains/hf_home/vicuna-7b-v1.5", + + "Mixtral-8x7B-v0.1": "/share/home/zyx/Models/Mixtral-8x7B-v0.1", + "chatglm2-6b": "THUDM/chatglm2-6b", "chatglm2-6b-32k": "THUDM/chatglm2-6b-32k", - "chatglm3-6b": "/Models/chatglm3-6b", + + "chatglm3-6b": "/share/home/zyx/Models/chatglm3-6b", "chatglm3-6b-32k": "THUDM/chatglm3-6b-32k", - "Yi-34B-Chat": "/data/share/models/Yi-34B-Chat", + "Yi-34B-Chat": "/share/home/zyx/Models/Yi-34B-Chat", "BlueLM-7B-Chat": "/Models/BlueLM-7B-Chat", "baichuan2-13b": "/media/zr/Data/Models/LLM/Baichuan2-13B-Chat", "baichuan2-7b": "/media/zr/Data/Models/LLM/Baichuan2-7B-Chat", + "baichuan-7b": "baichuan-inc/Baichuan-7B", "baichuan-13b": "baichuan-inc/Baichuan-13B", 'baichuan-13b-chat': 'baichuan-inc/Baichuan-13B-Chat', @@ -324,13 +241,13 @@ MODEL_PATH = { "opt-66b": "facebook/opt-66b", "opt-iml-max-30b": "facebook/opt-iml-max-30b", - "Qwen-1_8B-Chat":"Qwen/Qwen-1_8B-Chat" + "Qwen-1_8B-Chat": "/home/zr/Models/Qwen-1_8B-Chat", "Qwen-7B": "Qwen/Qwen-7B", "Qwen-14B": "Qwen/Qwen-14B", "Qwen-7B-Chat": "Qwen/Qwen-7B-Chat", - "Qwen-14B-Chat": "Qwen/Qwen-14B-Chat", - "Qwen-14B-Chat-Int8": "Qwen/Qwen-14B-Chat-Int8", # 确保已经安装了auto-gptq optimum flash-attn - "Qwen-14B-Chat-Int4": "/media/zr/Data/Models/LLM/Qwen-14B-Chat-Int4", # 确保已经安装了auto-gptq optimum flash-attn + "Qwen-14B-Chat": "/share/home/zyx/Models/Qwen-14B-Chat", + "Qwen-14B-Chat-Int8": "Qwen/Qwen-14B-Chat-Int8", + "Qwen-14B-Chat-Int4": "/media/zr/Data/Models/LLM/Qwen-14B-Chat-Int4", }, NLTK_DATA_PATH = os.path.join(os.path.dirname(os.path.dirname(__file__)), "nltk_data") diff --git a/configs/prompt_config.py.example b/configs/prompt_config.py.example index b17c4a1b..2f8dd2aa 100644 --- a/configs/prompt_config.py.example +++ b/configs/prompt_config.py.example @@ -1,15 +1,14 @@ PROMPT_TEMPLATES = { "preprocess_model": { "default": - '根据我们对话的历史,判断本次跟我交流是否需要使用工具,还是可以直接凭借你的知识或者历史记录跟我对话。' - '以下几种情况要使用工具:\n' - '1. 实时性的问题,例如查询天气,日期,地点等信息\n' - '2. 需要数学计算的问题\n' - '3. 需要查询数据,地点等精确数据\n' - '4. 需要行业知识的问题\n' - '5. 需要联网的内容\n' - '你只要回答一个数字:1代表需要使用工具,你无法为我直接提供服务。0代表不需要使用工具。你应该尽量使用工具\n' - '你只能回答0或者1' + '你只要回复0 和 1 ,代表不需要使用工具。以下几种问题不需要使用工具:' + '1. 介绍一下你自己, 回复0\n' + '2. 讲一个故事, 回复0\n' + '3. 给我开一个玩笑, 回复0\n' + '4. 我当前运行的文件夹是, 回复1\n' + '5. where is this cat, 回复1\n' + '6. 介绍一下像极了我大学, 回复1\n' + '这是我的问题:' }, "llm_model": { "default": @@ -83,7 +82,6 @@ PROMPT_TEMPLATES = { '}}}}\n' 'Begin! Reminder to ALWAYS respond with a valid json blob of a single action. Use tools if necessary.\n' 'Respond directly if appropriate. Format is Action:```$JSON_BLOB```then Observation:.\n' - 'history: {history}\n\n' 'Question: {input}\n\n' 'Thought: {agent_scratchpad}\n', "qwen": @@ -102,8 +100,96 @@ PROMPT_TEMPLATES = { 'Begin!\n\n' 'Question: {input}\n\n' '{agent_scratchpad}\n\n', + }, "postprocess_model": { "default": "{{input}}", } } + +TOOL_CONFIG = { + "search_local_knowledgebase": { + "use": False, + "top_k": 3, + "score_threshold": 1, + "conclude_prompt": { + "with_result": + '<指令>根据已知信息,简洁和专业的来回答问题。如果无法从中得到答案,请说 "根据已知信息无法回答该问题",' + '不允许在答案中添加编造成分,答案请使用中文。 \n' + '<已知信息>{{ context }}\n' + '<问题>{{ question }}\n', + "without_result": + '请你根据我的提问回答我的问题:\n' + '{{ question }}\n' + '请注意,你必须在回答结束后强调,你的回答是根据你的经验回答而不是参考资料回答的。\n', + } + }, + "search_internet": { + "use": False, + "search_engine_name": "bing", + "search_engine_config": + { + "bing": { + "result_len": 3, + "bing_search_url": "https://api.bing.microsoft.com/v7.0/search", + "bing_key": "your bing key", + }, + "metaphor": { + "result_len": 3, + "metaphor_api_key": "", + "split_result": False, + "chunk_size": 500, + "chunk_overlap": 0, + }, + "duckduckgo": { + "result_len": 3 + } + }, + "top_k": 10, + "verbose": "Origin", + "conclude_prompt": + "<指令>这是搜索到的互联网信息,请你根据这些信息进行提取并有调理,简洁的回答问题。如果无法从中得到答案,请说 “无法搜索到能回答问题的内容”。 " + "\n<已知信息>{{ context }}\n" + "<问题>\n" + "{{ question }}\n" + "\n" + }, + "arxiv": { + "use": False, + }, + "shell": { + "use": False, + }, + "weather_check": { + "use": False, + "api-key": "your key", + }, + "search_youtube": { + "use": False, + }, + "wolfram": { + "use": False, + }, + "calculate": { + "use": False, + }, + + # Use THUDM/cogvlm-chat-hf as default + + "vqa_processor": { + "use": False, + "model_path": "your model path", + "tokenizer_path": "your tokenizer path", + "device": "cuda:1" + }, + + # Use Qwen/Qwen-Audio-Chat as default + + "aqa_processor": { + "use": False, + "model_path": "your model path", + "tokenizer_path": "yout tokenizer path", + "device": "cuda:2" + }, + +} diff --git a/configs/server_config.py.example b/configs/server_config.py.example index 3cc51dd5..c8699840 100644 --- a/configs/server_config.py.example +++ b/configs/server_config.py.example @@ -40,6 +40,8 @@ FSCHAT_MODEL_WORKERS = { "device": LLM_DEVICE, # False,'vllm',使用的推理加速框架,使用vllm如果出现HuggingFace通信问题,参见doc/FAQ # vllm对一些模型支持还不成熟,暂时默认关闭 + # fschat=0.2.33的代码有bug, 如需使用,源码修改fastchat.server.vllm_worker, + # 将103行中sampling_params = SamplingParams的参数stop=list(stop)修改为stop= [i for i in stop if i!=""] "infer_turbo": False, # model_worker多卡加载需要配置的参数 @@ -90,12 +92,14 @@ FSCHAT_MODEL_WORKERS = { # 'disable_log_requests': False }, - "chatglm3-6b": { - "device": "cuda", - }, - "Qwen1.5-0.5B-Chat": { + # 可以如下示例方式更改默认配置 + # "Qwen-1_8B-Chat": { # 使用default中的IP和端口 + # "device": "cpu", + # }, + "chatglm3-6b": { # 使用default中的IP和端口 "device": "cuda", }, + # 以下配置可以不用修改,在model_config中设置启动的模型 "zhipu-api": { "port": 21001, @@ -124,11 +128,14 @@ FSCHAT_MODEL_WORKERS = { "tiangong-api": { "port": 21009, }, - "gemini-api": { - "port": 21010, - }, } +# fastchat multi model worker server +FSCHAT_MULTI_MODEL_WORKERS = { + # TODO: +} + +# fastchat controller server FSCHAT_CONTROLLER = { "host": DEFAULT_BIND_HOST, "port": 20001, diff --git a/server/agent/agent_factory/glm3_agent.py b/server/agent/agent_factory/glm3_agent.py index 9deaa71f..52a16ae3 100644 --- a/server/agent/agent_factory/glm3_agent.py +++ b/server/agent/agent_factory/glm3_agent.py @@ -227,5 +227,6 @@ def initialize_glm3_agent( tools=tools, memory=memory, tags=tags_, + intermediate_steps=[], **kwargs, ) diff --git a/server/agent/agent_factory/qwen_agent.py b/server/agent/agent_factory/qwen_agent.py index b2cb3b38..5b081c53 100644 --- a/server/agent/agent_factory/qwen_agent.py +++ b/server/agent/agent_factory/qwen_agent.py @@ -20,7 +20,6 @@ from langchain.tools.base import BaseTool from langchain.tools.render import format_tool_to_openai_function from server.utils import get_prompt_template - HUMAN_MESSAGE_TEMPLATE = "{input}\n\n{agent_scratchpad}" logger = logging.getLogger(__name__) @@ -108,16 +107,16 @@ class QwenChatAgent(LLMSingleActionAgent): @classmethod def from_llm_and_tools( - cls, - llm: BaseLanguageModel, - tools: Sequence[BaseTool], - prompt: str = None, - callback_manager: Optional[BaseCallbackManager] = None, - output_parser: Optional[AgentOutputParser] = None, - human_message_template: str = HUMAN_MESSAGE_TEMPLATE, - input_variables: Optional[List[str]] = None, - memory_prompts: Optional[List[BaseChatPromptTemplate]] = None, - **kwargs: Any, + cls, + llm: BaseLanguageModel, + tools: Sequence[BaseTool], + prompt: str = None, + callback_manager: Optional[BaseCallbackManager] = None, + output_parser: Optional[AgentOutputParser] = None, + human_message_template: str = HUMAN_MESSAGE_TEMPLATE, + input_variables: Optional[List[str]] = None, + memory_prompts: Optional[List[BaseChatPromptTemplate]] = None, + **kwargs: Any, ) -> QwenChatAgent: """Construct an agent from an LLM and tools.""" cls._validate_tools(tools) @@ -148,28 +147,29 @@ class QwenChatAgent(LLMSingleActionAgent): def initialize_qwen_agent( - tools: Sequence[BaseTool], - llm: BaseLanguageModel, - prompt: str = None, - callback_manager: Optional[BaseCallbackManager] = None, - memory: Optional[ConversationBufferWindowMemory] = None, - agent_kwargs: Optional[dict] = None, - *, - return_direct: Optional[bool] = None, - tags: Optional[Sequence[str]] = None, - **kwargs: Any, + tools: Sequence[BaseTool], + llm: BaseLanguageModel, + prompt: str = None, + callback_manager: Optional[BaseCallbackManager] = None, + memory: Optional[ConversationBufferWindowMemory] = None, + agent_kwargs: Optional[dict] = None, + *, + return_direct: Optional[bool] = None, + tags: Optional[Sequence[str]] = None, + **kwargs: Any, ) -> AgentExecutor: tags_ = list(tags) if tags else [] agent_kwargs = agent_kwargs or {} - if isinstance(return_direct, bool): # can make all tools return directly + if isinstance(return_direct, bool): # can make all tools return directly tools = [t.copy(update={"return_direct": return_direct}) for t in tools] agent_obj = QwenChatAgent.from_llm_and_tools( llm=llm, tools=tools, prompt=prompt, - callback_manager=callback_manager, **agent_kwargs + callback_manager=callback_manager, + **agent_kwargs ) return AgentExecutor.from_agent_and_tools( agent=agent_obj, diff --git a/server/chat/chat.py b/server/chat/chat.py index 1752aa4f..23a7624c 100644 --- a/server/chat/chat.py +++ b/server/chat/chat.py @@ -82,7 +82,6 @@ def create_models_chains(history, history_len, prompts, models, tools, callbacks llm=models["action_model"], tools=tools, prompt=prompts["action_model"], - input_variables=["input", "intermediate_steps", "history"], memory=memory, # callback_manager=BaseCallbackManager(handlers=callbacks), verbose=True, diff --git a/webui_pages/dialogue/dialogue.py b/webui_pages/dialogue/dialogue.py index d40a5e27..4e48efdc 100644 --- a/webui_pages/dialogue/dialogue.py +++ b/webui_pages/dialogue/dialogue.py @@ -10,7 +10,7 @@ from datetime import datetime import os import re import time -from configs import (LLM_MODEL_CONFIG, SUPPORT_AGENT_MODELS) +from configs import (LLM_MODEL_CONFIG, SUPPORT_AGENT_MODELS, TOOL_CONFIG) import uuid from typing import List, Dict @@ -204,12 +204,12 @@ def dialogue_page(api: ApiRequest, is_lite: bool = False): import importlib importlib.reload(model_config_py) - tools = list(model_config_py.TOOL_CONFIG.keys()) + tools = list(TOOL_CONFIG.keys()) with st.expander("工具栏"): for tool in tools: - is_selected = st.checkbox(tool, value=model_config_py.TOOL_CONFIG[tool]["use"], key=tool) + is_selected = st.checkbox(tool, value=TOOL_CONFIG[tool]["use"], key=tool) if is_selected: - selected_tool_configs[tool] = model_config_py.TOOL_CONFIG[tool] + selected_tool_configs[tool] = TOOL_CONFIG[tool] if llm_model is not None: model_config['llm_model'][llm_model] = LLM_MODEL_CONFIG['llm_model'][llm_model]