fix bugs; make poetry using tsinghua mirror of pypi
@ -3,3 +3,7 @@ in-project = true
|
||||
|
||||
[installer]
|
||||
modern-installation = false
|
||||
|
||||
[plugins]
|
||||
[plugins.pypi_mirror]
|
||||
url = "https://pypi.tuna.tsinghua.edu.cn/simple"
|
||||
|
||||
@ -6,7 +6,7 @@ authors = ["chatchat"]
|
||||
readme = "README.md"
|
||||
|
||||
[tool.poetry.dependencies]
|
||||
python = ">=3.8,<4.0,!=3.9.7"
|
||||
python = ">=3.8.1,<4.0,!=3.9.7"
|
||||
chatchat-model-providers = "^0.3.0"
|
||||
langchain = "0.1.5"
|
||||
langchainhub = "0.1.14"
|
||||
@ -35,9 +35,9 @@ strsimpy = ">=0.2.1"
|
||||
markdownify = ">=0.11.6"
|
||||
tqdm = ">=4.66.1"
|
||||
websockets = ">=12.0"
|
||||
numpy = ">=1.26.3"
|
||||
numpy = "1.24.4"
|
||||
pandas = "~2.1.4"
|
||||
pydantic = "1.10.14"
|
||||
pydantic = "2.6.4"
|
||||
httpx = {version = ">=0.25.2", extras = ["brotli", "http2", "socks"]}
|
||||
python-multipart = "0.0.9"
|
||||
|
||||
@ -194,7 +194,7 @@ omit = [
|
||||
]
|
||||
|
||||
[build-system]
|
||||
requires = ["poetry-core>=1.0.0"]
|
||||
requires = ["poetry-core>=1.0.0", "poetry-plugin-pypi-mirror==0.4.2"]
|
||||
build-backend = "poetry.core.masonry.api"
|
||||
|
||||
[tool.pytest.ini_options]
|
||||
|
||||
47
chatchat/chatchat/configs/basic_config.py
Normal file
@ -0,0 +1,47 @@
|
||||
import logging
|
||||
import os
|
||||
from pathlib import Path
|
||||
|
||||
import langchain
|
||||
|
||||
|
||||
# 是否显示详细日志
|
||||
log_verbose = True
|
||||
langchain.verbose = log_verbose
|
||||
|
||||
# 通常情况下不需要更改以下内容
|
||||
|
||||
# 用户数据根目录
|
||||
DATA_PATH = str(Path(__file__).absolute().parent.parent / "data")
|
||||
if not os.path.exists(DATA_PATH):
|
||||
os.mkdir(DATA_PATH)
|
||||
|
||||
# nltk 模型存储路径
|
||||
NLTK_DATA_PATH = os.path.join(DATA_PATH, "nltk_data")
|
||||
import nltk
|
||||
nltk.data.path = [NLTK_DATA_PATH] + nltk.data.path
|
||||
|
||||
# 日志格式
|
||||
LOG_FORMAT = "%(asctime)s - %(filename)s[line:%(lineno)d] - %(levelname)s: %(message)s"
|
||||
logger = logging.getLogger()
|
||||
logger.setLevel(logging.INFO)
|
||||
logging.basicConfig(format=LOG_FORMAT)
|
||||
|
||||
|
||||
# 日志存储路径
|
||||
LOG_PATH = os.path.join(DATA_PATH, "logs")
|
||||
if not os.path.exists(LOG_PATH):
|
||||
os.mkdir(LOG_PATH)
|
||||
|
||||
# 模型生成内容(图片、视频、音频等)保存位置
|
||||
MEDIA_PATH = os.path.join(DATA_PATH, "media")
|
||||
if not os.path.exists(MEDIA_PATH):
|
||||
os.mkdir(MEDIA_PATH)
|
||||
os.mkdir(os.path.join(MEDIA_PATH, "image"))
|
||||
os.mkdir(os.path.join(MEDIA_PATH, "audio"))
|
||||
os.mkdir(os.path.join(MEDIA_PATH, "video"))
|
||||
|
||||
# 临时文件目录,主要用于文件对话
|
||||
BASE_TEMP_DIR = os.path.join(DATA_PATH, "temp")
|
||||
if not os.path.exists(BASE_TEMP_DIR):
|
||||
os.mkdir(BASE_TEMP_DIR)
|
||||
148
chatchat/chatchat/configs/kb_config.py
Normal file
@ -0,0 +1,148 @@
|
||||
import os
|
||||
|
||||
from configs.basic_config import DATA_PATH
|
||||
|
||||
|
||||
# 默认使用的知识库
|
||||
DEFAULT_KNOWLEDGE_BASE = "samples"
|
||||
|
||||
# 默认向量库/全文检索引擎类型。可选:faiss, milvus(离线) & zilliz(在线), pgvector,全文检索引擎es
|
||||
DEFAULT_VS_TYPE = "faiss"
|
||||
|
||||
# 缓存向量库数量(针对FAISS)
|
||||
CACHED_VS_NUM = 1
|
||||
|
||||
# 缓存临时向量库数量(针对FAISS),用于文件对话
|
||||
CACHED_MEMO_VS_NUM = 10
|
||||
|
||||
# 知识库中单段文本长度(不适用MarkdownHeaderTextSplitter)
|
||||
CHUNK_SIZE = 250
|
||||
|
||||
# 知识库中相邻文本重合长度(不适用MarkdownHeaderTextSplitter)
|
||||
OVERLAP_SIZE = 50
|
||||
|
||||
# 知识库匹配向量数量
|
||||
VECTOR_SEARCH_TOP_K = 3
|
||||
|
||||
# 知识库匹配相关度阈值,取值范围在0-1之间,SCORE越小,相关度越高,取到1相当于不筛选,建议设置在0.5左右
|
||||
SCORE_THRESHOLD = 1
|
||||
|
||||
# 默认搜索引擎。可选:bing, duckduckgo, metaphor
|
||||
DEFAULT_SEARCH_ENGINE = "metaphor"
|
||||
|
||||
# 搜索引擎匹配结题数量
|
||||
SEARCH_ENGINE_TOP_K = 3
|
||||
|
||||
|
||||
# Bing 搜索必备变量
|
||||
# 使用 Bing 搜索需要使用 Bing Subscription Key,需要在azure port中申请试用bing search
|
||||
# 具体申请方式请见
|
||||
# https://learn.microsoft.com/en-us/bing/search-apis/bing-web-search/create-bing-search-service-resource
|
||||
# 使用python创建bing api 搜索实例详见:
|
||||
# https://learn.microsoft.com/en-us/bing/search-apis/bing-web-search/quickstarts/rest/python
|
||||
BING_SEARCH_URL = "https://api.bing.microsoft.com/v7.0/search"
|
||||
# 注意不是bing Webmaster Tools的api key,
|
||||
|
||||
# 此外,如果是在服务器上,报Failed to establish a new connection: [Errno 110] Connection timed out
|
||||
# 是因为服务器加了防火墙,需要联系管理员加白名单,如果公司的服务器的话,就别想了GG
|
||||
BING_SUBSCRIPTION_KEY = "b31d23d7b96742ab959f4cc07a605f72"
|
||||
|
||||
# metaphor搜索需要KEY
|
||||
METAPHOR_API_KEY = "f8c9f98f-141a-4a55-9be7-ae675ccacd7a"
|
||||
|
||||
# 心知天气 API KEY,用于天气Agent。申请:https://www.seniverse.com/
|
||||
SENIVERSE_API_KEY = ""
|
||||
|
||||
# 是否开启中文标题加强,以及标题增强的相关配置
|
||||
# 通过增加标题判断,判断哪些文本为标题,并在metadata中进行标记;
|
||||
# 然后将文本与往上一级的标题进行拼合,实现文本信息的增强。
|
||||
ZH_TITLE_ENHANCE = False
|
||||
|
||||
# PDF OCR 控制:只对宽高超过页面一定比例(图片宽/页面宽,图片高/页面高)的图片进行 OCR。
|
||||
# 这样可以避免 PDF 中一些小图片的干扰,提高非扫描版 PDF 处理速度
|
||||
PDF_OCR_THRESHOLD = (0.6, 0.6)
|
||||
|
||||
# 每个知识库的初始化介绍,用于在初始化知识库时显示和Agent调用,没写则没有介绍,不会被Agent调用。
|
||||
KB_INFO = {
|
||||
"samples": "关于本项目issue的解答",
|
||||
}
|
||||
|
||||
|
||||
# 通常情况下不需要更改以下内容
|
||||
|
||||
# 知识库默认存储路径
|
||||
KB_ROOT_PATH = os.path.join(DATA_PATH, "knowledge_base")
|
||||
if not os.path.exists(KB_ROOT_PATH):
|
||||
os.mkdir(KB_ROOT_PATH)
|
||||
|
||||
# 数据库默认存储路径。
|
||||
# 如果使用sqlite,可以直接修改DB_ROOT_PATH;如果使用其它数据库,请直接修改SQLALCHEMY_DATABASE_URI。
|
||||
DB_ROOT_PATH = os.path.join(KB_ROOT_PATH, "info.db")
|
||||
SQLALCHEMY_DATABASE_URI = f"sqlite:///{DB_ROOT_PATH}"
|
||||
|
||||
# 可选向量库类型及对应配置
|
||||
kbs_config = {
|
||||
"faiss": {
|
||||
},
|
||||
"milvus": {
|
||||
"host": "127.0.0.1",
|
||||
"port": "19530",
|
||||
"user": "",
|
||||
"password": "",
|
||||
"secure": False,
|
||||
},
|
||||
"zilliz": {
|
||||
"host": "in01-a7ce524e41e3935.ali-cn-hangzhou.vectordb.zilliz.com.cn",
|
||||
"port": "19530",
|
||||
"user": "",
|
||||
"password": "",
|
||||
"secure": True,
|
||||
},
|
||||
"pg": {
|
||||
"connection_uri": "postgresql://postgres:postgres@127.0.0.1:5432/langchain_chatchat",
|
||||
},
|
||||
|
||||
"es": {
|
||||
"host": "127.0.0.1",
|
||||
"port": "9200",
|
||||
"index_name": "test_index",
|
||||
"user": "",
|
||||
"password": ""
|
||||
},
|
||||
"milvus_kwargs":{
|
||||
"search_params":{"metric_type": "L2"}, #在此处增加search_params
|
||||
"index_params":{"metric_type": "L2","index_type": "HNSW"} # 在此处增加index_params
|
||||
},
|
||||
"chromadb": {}
|
||||
}
|
||||
|
||||
# TextSplitter配置项,如果你不明白其中的含义,就不要修改。
|
||||
text_splitter_dict = {
|
||||
"ChineseRecursiveTextSplitter": {
|
||||
"source": "", ## 选择tiktoken则使用openai的方法
|
||||
"tokenizer_name_or_path": "",
|
||||
},
|
||||
"SpacyTextSplitter": {
|
||||
"source": "",
|
||||
"tokenizer_name_or_path": "",
|
||||
},
|
||||
"RecursiveCharacterTextSplitter": {
|
||||
"source": "tiktoken",
|
||||
"tokenizer_name_or_path": "cl100k_base",
|
||||
},
|
||||
"MarkdownHeaderTextSplitter": {
|
||||
"headers_to_split_on":
|
||||
[
|
||||
("#", "head1"),
|
||||
("##", "head2"),
|
||||
("###", "head3"),
|
||||
("####", "head4"),
|
||||
]
|
||||
},
|
||||
}
|
||||
|
||||
# TEXT_SPLITTER 名称
|
||||
TEXT_SPLITTER_NAME = "ChineseRecursiveTextSplitter"
|
||||
|
||||
# Embedding模型定制词语的词表文件
|
||||
EMBEDDING_KEYWORD_FILE = "embedding_keywords.txt"
|
||||
170
chatchat/chatchat/configs/model_config.py
Normal file
@ -0,0 +1,170 @@
|
||||
import os
|
||||
|
||||
|
||||
# 默认选用的 LLM 名称
|
||||
DEFAULT_LLM_MODEL = "qwen"
|
||||
|
||||
# 默认选用的 Embedding 名称
|
||||
DEFAULT_EMBEDDING_MODEL = "bge"
|
||||
|
||||
|
||||
# AgentLM模型的名称 (可以不指定,指定之后就锁定进入Agent之后的Chain的模型,不指定就是LLM_MODELS[0])
|
||||
Agent_MODEL = None
|
||||
|
||||
# 历史对话轮数
|
||||
HISTORY_LEN = 3
|
||||
|
||||
# 大模型最长支持的长度,如果不填写,则使用模型默认的最大长度,如果填写,则为用户设定的最大长度
|
||||
MAX_TOKENS = None
|
||||
|
||||
# LLM通用对话参数
|
||||
TEMPERATURE = 0.7
|
||||
# TOP_P = 0.95 # ChatOpenAI暂不支持该参数
|
||||
|
||||
SUPPORT_AGENT_MODELS = [
|
||||
"chatglm3-6b",
|
||||
"openai-api",
|
||||
"Qwen-14B-Chat",
|
||||
"Qwen-7B-Chat",
|
||||
"qwen",
|
||||
]
|
||||
|
||||
|
||||
LLM_MODEL_CONFIG = {
|
||||
# 意图识别不需要输出,模型后台知道就行
|
||||
"preprocess_model": {
|
||||
DEFAULT_LLM_MODEL: {
|
||||
"temperature": 0.05,
|
||||
"max_tokens": 4096,
|
||||
"history_len": 100,
|
||||
"prompt_name": "default",
|
||||
"callbacks": False
|
||||
},
|
||||
},
|
||||
"llm_model": {
|
||||
DEFAULT_LLM_MODEL: {
|
||||
"temperature": 0.9,
|
||||
"max_tokens": 4096,
|
||||
"history_len": 10,
|
||||
"prompt_name": "default",
|
||||
"callbacks": True
|
||||
},
|
||||
},
|
||||
"action_model": {
|
||||
DEFAULT_LLM_MODEL: {
|
||||
"temperature": 0.01,
|
||||
"max_tokens": 4096,
|
||||
"callbacks": True
|
||||
},
|
||||
},
|
||||
"postprocess_model": {
|
||||
DEFAULT_LLM_MODEL: {
|
||||
"temperature": 0.01,
|
||||
"max_tokens": 4096,
|
||||
"prompt_name": "default",
|
||||
"callbacks": True
|
||||
}
|
||||
},
|
||||
"image_model": {
|
||||
"sd-turbo": {
|
||||
"size": "256*256",
|
||||
}
|
||||
},
|
||||
"multimodal_model": {
|
||||
"qwen-vl": {}
|
||||
},
|
||||
}
|
||||
|
||||
# 可以通过 loom/xinference/oneapi/fastchat 启动模型服务,然后将其 URL 和 KEY 配置过来即可。
|
||||
# - platform_name 可以任意填写,不要重复即可
|
||||
# - platform_type 可选:openai, xinference, oneapi, fastchat。以后可能根据平台类型做一些功能区分
|
||||
# - 将框架部署的模型填写到对应列表即可。不同框架可以加载同名模型,项目会自动做负载均衡。
|
||||
|
||||
MODEL_PLATFORMS = [
|
||||
# {
|
||||
# "platform_name": "openai-api",
|
||||
# "platform_type": "openai",
|
||||
# "api_base_url": "https://api.openai.com/v1",
|
||||
# "api_key": "sk-yBuaCpqEVUBarBP9700e7224A2D743AeA329334d19C0A336",
|
||||
# "api_proxy": "https://qujhzynu.cloud.sealos.io/v1",
|
||||
# "api_concurrencies": 5,
|
||||
# "llm_models": [
|
||||
# "gpt-3.5-turbo",
|
||||
# ],
|
||||
# "embed_models": [],
|
||||
# "image_models": [],
|
||||
# "multimodal_models": [],
|
||||
# },
|
||||
|
||||
{
|
||||
"platform_name": "xinference",
|
||||
"platform_type": "xinference",
|
||||
"api_base_url": "http://127.0.0.1:9997/v1",
|
||||
"api_key": "EMPTY",
|
||||
"api_concurrencies": 5,
|
||||
# 注意:这里填写的是 xinference 部署的模型 UID,而非模型名称
|
||||
"llm_models": [
|
||||
"qwen",
|
||||
"glm3",
|
||||
],
|
||||
"embed_models": [
|
||||
"bge",
|
||||
],
|
||||
"image_models": [
|
||||
"sd-turbo",
|
||||
],
|
||||
"multimodal_models": [
|
||||
"qwen-vl",
|
||||
],
|
||||
},
|
||||
|
||||
{
|
||||
"platform_name": "oneapi",
|
||||
"platform_type": "oneapi",
|
||||
"api_base_url": "http://127.0.0.1:3000/v1",
|
||||
"api_key": "sk-Mlft68FXoTYqLfQr06F0E2D77e6e4220B6F420999d25383f",
|
||||
"api_concurrencies": 5,
|
||||
"llm_models": [
|
||||
# 智谱 API
|
||||
"chatglm_pro",
|
||||
"chatglm_turbo",
|
||||
"chatglm_std",
|
||||
"chatglm_lite",
|
||||
# 千问 API
|
||||
"qwen-turbo",
|
||||
"qwen-plus",
|
||||
"qwen-max",
|
||||
"qwen-max-longcontext",
|
||||
# 千帆 API
|
||||
"ERNIE-Bot",
|
||||
"ERNIE-Bot-turbo",
|
||||
"ERNIE-Bot-4",
|
||||
# 星火 API
|
||||
"SparkDesk",
|
||||
],
|
||||
"embed_models": [
|
||||
# 千问 API
|
||||
"text-embedding-v1",
|
||||
# 千帆 API
|
||||
"Embedding-V1",
|
||||
],
|
||||
"image_models": [],
|
||||
"multimodal_models": [],
|
||||
},
|
||||
|
||||
# {
|
||||
# "platform_name": "loom",
|
||||
# "platform_type": "loom",
|
||||
# "api_base_url": "http://127.0.0.1:7860/v1",
|
||||
# "api_key": "88296d2f9bbd9ab222c1086e39f5fbb2.FbC0YSrAMcaEF2gB",
|
||||
# "api_concurrencies": 5,
|
||||
# "llm_models": [
|
||||
# "chatglm3-6b",
|
||||
# ],
|
||||
# "embed_models": [],
|
||||
# "image_models": [],
|
||||
# "multimodal_models": [],
|
||||
# },
|
||||
]
|
||||
|
||||
LOOM_CONFIG = os.path.join(os.path.dirname(os.path.abspath(__file__)), "loom.yaml")
|
||||
209
chatchat/chatchat/configs/prompt_config.py
Normal file
@ -0,0 +1,209 @@
|
||||
PROMPT_TEMPLATES = {
|
||||
"preprocess_model": {
|
||||
"default":
|
||||
'你只要回复0 和 1 ,代表不需要使用工具。以下几种问题不需要使用工具:'
|
||||
'1. 需要联网查询的内容\n'
|
||||
'2. 需要计算的内容\n'
|
||||
'3. 需要查询实时性的内容\n'
|
||||
'如果我的输入满足这几种情况,返回1。其他输入,请你回复0,你只要返回一个数字\n'
|
||||
'这是我的问题:'
|
||||
},
|
||||
"llm_model": {
|
||||
"default":
|
||||
'{{input}}',
|
||||
"with_history":
|
||||
'The following is a friendly conversation between a human and an AI. '
|
||||
'The AI is talkative and provides lots of specific details from its context. '
|
||||
'If the AI does not know the answer to a question, it truthfully says it does not know.\n\n'
|
||||
'Current conversation:\n'
|
||||
'{history}\n'
|
||||
'Human: {input}\n'
|
||||
'AI:',
|
||||
},
|
||||
"action_model": {
|
||||
"GPT-4":
|
||||
'Answer the following questions as best you can. You have access to the following tools:\n'
|
||||
'The way you use the tools is by specifying a json blob.\n'
|
||||
'Specifically, this json should have a `action` key (with the name of the tool to use) and a `action_input` key (with the input to the tool going here).\n'
|
||||
'The only values that should be in the "action" field are: {tool_names}\n'
|
||||
'The $JSON_BLOB should only contain a SINGLE action, do NOT return a list of multiple actions. Here is an example of a valid $JSON_BLOB:\n'
|
||||
'```\n\n'
|
||||
'{{{{\n'
|
||||
' "action": $TOOL_NAME,\n'
|
||||
' "action_input": $INPUT\n'
|
||||
'}}}}\n'
|
||||
'```\n\n'
|
||||
'ALWAYS use the following format:\n'
|
||||
'Question: the input question you must answer\n'
|
||||
'Thought: you should always think about what to do\n'
|
||||
'Action:\n'
|
||||
'```\n\n'
|
||||
'$JSON_BLOB'
|
||||
'```\n\n'
|
||||
'Observation: the result of the action\n'
|
||||
'... (this Thought/Action/Observation can repeat N times)\n'
|
||||
'Thought: I now know the final answer\n'
|
||||
'Final Answer: the final answer to the original input question\n'
|
||||
'Begin! Reminder to always use the exact characters `Final Answer` when responding.\n'
|
||||
'Question:{input}\n'
|
||||
'Thought:{agent_scratchpad}\n',
|
||||
|
||||
"ChatGLM3":
|
||||
'You can answer using the tools.Respond to the human as helpfully and accurately as possible.\n'
|
||||
'You have access to the following tools:\n'
|
||||
'{tools}\n'
|
||||
'Use a json blob to specify a tool by providing an action key (tool name)\n'
|
||||
'and an action_input key (tool input).\n'
|
||||
'Valid "action" values: "Final Answer" or [{tool_names}]\n'
|
||||
'Provide only ONE action per $JSON_BLOB, as shown:\n\n'
|
||||
'```\n'
|
||||
'{{{{\n'
|
||||
' "action": $TOOL_NAME,\n'
|
||||
' "action_input": $INPUT\n'
|
||||
'}}}}\n'
|
||||
'```\n\n'
|
||||
'Follow this format:\n\n'
|
||||
'Question: input question to answer\n'
|
||||
'Thought: consider previous and subsequent steps\n'
|
||||
'Action:\n'
|
||||
'```\n'
|
||||
'$JSON_BLOB\n'
|
||||
'```\n'
|
||||
'Observation: action result\n'
|
||||
'... (repeat Thought/Action/Observation N times)\n'
|
||||
'Thought: I know what to respond\n'
|
||||
'Action:\n'
|
||||
'```\n'
|
||||
'{{{{\n'
|
||||
' "action": "Final Answer",\n'
|
||||
' "action_input": "Final response to human"\n'
|
||||
'}}}}\n'
|
||||
'Begin! Reminder to ALWAYS respond with a valid json blob of a single action. Use tools if necessary.\n'
|
||||
'Respond directly if appropriate. Format is Action:```$JSON_BLOB```then Observation:.\n'
|
||||
'Question: {input}\n\n'
|
||||
'{agent_scratchpad}\n',
|
||||
"qwen":
|
||||
'Answer the following questions as best you can. You have access to the following APIs:\n\n'
|
||||
'{tools}\n\n'
|
||||
'Use the following format:\n\n'
|
||||
'Question: the input question you must answer\n'
|
||||
'Thought: you should always think about what to do\n'
|
||||
'Action: the action to take, should be one of [{tool_names}]\n'
|
||||
'Action Input: the input to the action\n'
|
||||
'Observation: the result of the action\n'
|
||||
'... (this Thought/Action/Action Input/Observation can be repeated zero or more times)\n'
|
||||
'Thought: I now know the final answer\n'
|
||||
'Final Answer: the final answer to the original input question\n\n'
|
||||
'Format the Action Input as a JSON object.\n\n'
|
||||
'Begin!\n\n'
|
||||
'Question: {input}\n\n'
|
||||
'{agent_scratchpad}\n\n',
|
||||
"structured-chat-agent":
|
||||
'Respond to the human as helpfully and accurately as possible. You have access to the following tools:\n\n'
|
||||
'{tools}\n\n'
|
||||
'Use a json blob to specify a tool by providing an action key (tool name) and an action_input key (tool input).\n\n'
|
||||
'Valid "action" values: "Final Answer" or {tool_names}\n\n'
|
||||
'Provide only ONE action per $JSON_BLOB, as shown:\n\n'
|
||||
'```\n{{\n "action": $TOOL_NAME,\n "action_input": $INPUT\n}}\n```\n\n'
|
||||
'Follow this format:\n\n'
|
||||
'Question: input question to answer\n'
|
||||
'Thought: consider previous and subsequent steps\n'
|
||||
'Action:\n```\n$JSON_BLOB\n```\n'
|
||||
'Observation: action result\n'
|
||||
'... (repeat Thought/Action/Observation N times)\n'
|
||||
'Thought: I know what to respond\n'
|
||||
'Action:\n```\n{{\n "action": "Final Answer",\n "action_input": "Final response to human"\n}}\n\n'
|
||||
'Begin! Reminder to ALWAYS respond with a valid json blob of a single action. Use tools if necessary. Respond directly if appropriate. Format is Action:```$JSON_BLOB```then Observation\n'
|
||||
'{input}\n\n'
|
||||
'{agent_scratchpad}\n\n'
|
||||
# '(reminder to respond in a JSON blob no matter what)'
|
||||
},
|
||||
"postprocess_model": {
|
||||
"default": "{{input}}",
|
||||
}
|
||||
}
|
||||
|
||||
TOOL_CONFIG = {
|
||||
"search_local_knowledgebase": {
|
||||
"use": False,
|
||||
"top_k": 3,
|
||||
"score_threshold": 1,
|
||||
"conclude_prompt": {
|
||||
"with_result":
|
||||
'<指令>根据已知信息,简洁和专业的来回答问题。如果无法从中得到答案,请说 "根据已知信息无法回答该问题",'
|
||||
'不允许在答案中添加编造成分,答案请使用中文。 </指令>\n'
|
||||
'<已知信息>{{ context }}</已知信息>\n'
|
||||
'<问题>{{ question }}</问题>\n',
|
||||
"without_result":
|
||||
'请你根据我的提问回答我的问题:\n'
|
||||
'{{ question }}\n'
|
||||
'请注意,你必须在回答结束后强调,你的回答是根据你的经验回答而不是参考资料回答的。\n',
|
||||
}
|
||||
},
|
||||
"search_internet": {
|
||||
"use": False,
|
||||
"search_engine_name": "bing",
|
||||
"search_engine_config":
|
||||
{
|
||||
"bing": {
|
||||
"result_len": 3,
|
||||
"bing_search_url": "https://api.bing.microsoft.com/v7.0/search",
|
||||
"bing_key": "680a39347d7242c5bd2d7a9576a125b7",
|
||||
},
|
||||
"metaphor": {
|
||||
"result_len": 3,
|
||||
"metaphor_api_key": "",
|
||||
"split_result": False,
|
||||
"chunk_size": 500,
|
||||
"chunk_overlap": 0,
|
||||
},
|
||||
"duckduckgo": {
|
||||
"result_len": 3
|
||||
}
|
||||
},
|
||||
"top_k": 10,
|
||||
"verbose": "Origin",
|
||||
"conclude_prompt":
|
||||
"<指令>这是搜索到的互联网信息,请你根据这些信息进行提取并有调理,简洁的回答问题。如果无法从中得到答案,请说 “无法搜索到能回答问题的内容”。 "
|
||||
"</指令>\n<已知信息>{{ context }}</已知信息>\n"
|
||||
"<问题>\n"
|
||||
"{{ question }}\n"
|
||||
"</问题>\n"
|
||||
},
|
||||
"arxiv": {
|
||||
"use": False,
|
||||
},
|
||||
"shell": {
|
||||
"use": False,
|
||||
},
|
||||
"weather_check": {
|
||||
"use": False,
|
||||
"api-key": "S8vrB4U_-c5mvAMiK",
|
||||
},
|
||||
"search_youtube": {
|
||||
"use": False,
|
||||
},
|
||||
"wolfram": {
|
||||
"use": False,
|
||||
},
|
||||
"calculate": {
|
||||
"use": False,
|
||||
},
|
||||
"vqa_processor": {
|
||||
"use": False,
|
||||
"model_path": "your model path",
|
||||
"tokenizer_path": "your tokenizer path",
|
||||
"device": "cuda:1"
|
||||
},
|
||||
"aqa_processor": {
|
||||
"use": False,
|
||||
"model_path": "your model path",
|
||||
"tokenizer_path": "yout tokenizer path",
|
||||
"device": "cuda:2"
|
||||
},
|
||||
|
||||
"text2images": {
|
||||
"use": False,
|
||||
},
|
||||
|
||||
}
|
||||
25
chatchat/chatchat/configs/server_config.py
Normal file
@ -0,0 +1,25 @@
|
||||
import sys
|
||||
|
||||
|
||||
# httpx 请求默认超时时间(秒)。如果加载模型或对话较慢,出现超时错误,可以适当加大该值。
|
||||
HTTPX_DEFAULT_TIMEOUT = 300.0
|
||||
|
||||
# API 是否开启跨域,默认为False,如果需要开启,请设置为True
|
||||
# is open cross domain
|
||||
OPEN_CROSS_DOMAIN = True
|
||||
|
||||
# 各服务器默认绑定host。如改为"0.0.0.0"需要修改下方所有XX_SERVER的host
|
||||
DEFAULT_BIND_HOST = "127.0.0.1" if sys.platform != "win32" else "127.0.0.1"
|
||||
|
||||
|
||||
# webui.py server
|
||||
WEBUI_SERVER = {
|
||||
"host": DEFAULT_BIND_HOST,
|
||||
"port": 8501,
|
||||
}
|
||||
|
||||
# api.py server
|
||||
API_SERVER = {
|
||||
"host": DEFAULT_BIND_HOST,
|
||||
"port": 7861,
|
||||
}
|
||||
BIN
chatchat/chatchat/data/knowledge_base/info.db
Normal file
159
chatchat/chatchat/data/knowledge_base/samples/content/README.md
Normal file
@ -0,0 +1,159 @@
|
||||

|
||||
|
||||
|
||||
🌍 [READ THIS IN ENGLISH](README_en.md)
|
||||
|
||||
📃 **LangChain-Chatchat** (原 Langchain-ChatGLM)
|
||||
|
||||
基于 ChatGLM 等大语言模型与 Langchain 等应用框架实现,开源、可离线部署的检索增强生成(RAG)大模型知识库项目。
|
||||
|
||||
---
|
||||
|
||||
## 目录
|
||||
|
||||
* [介绍](README.md#介绍)
|
||||
* [解决的痛点](README.md#解决的痛点)
|
||||
* [快速上手](README.md#快速上手)
|
||||
* [1. 环境配置](README.md#1-环境配置)
|
||||
* [2. 模型下载](README.md#2-模型下载)
|
||||
* [3. 初始化知识库和配置文件](README.md#3-初始化知识库和配置文件)
|
||||
* [4. 一键启动](README.md#4-一键启动)
|
||||
* [5. 启动界面示例](README.md#5-启动界面示例)
|
||||
* [联系我们](README.md#联系我们)
|
||||
|
||||
|
||||
## 介绍
|
||||
|
||||
🤖️ 一种利用 [langchain](https://github.com/hwchase17/langchain) 思想实现的基于本地知识库的问答应用,目标期望建立一套对中文场景与开源模型支持友好、可离线运行的知识库问答解决方案。
|
||||
|
||||
💡 受 [GanymedeNil](https://github.com/GanymedeNil) 的项目 [document.ai](https://github.com/GanymedeNil/document.ai) 和 [AlexZhangji](https://github.com/AlexZhangji) 创建的 [ChatGLM-6B Pull Request](https://github.com/THUDM/ChatGLM-6B/pull/216) 启发,建立了全流程可使用开源模型实现的本地知识库问答应用。本项目的最新版本中通过使用 [FastChat](https://github.com/lm-sys/FastChat) 接入 Vicuna, Alpaca, LLaMA, Koala, RWKV 等模型,依托于 [langchain](https://github.com/langchain-ai/langchain) 框架支持通过基于 [FastAPI](https://github.com/tiangolo/fastapi) 提供的 API 调用服务,或使用基于 [Streamlit](https://github.com/streamlit/streamlit) 的 WebUI 进行操作。
|
||||
|
||||
✅ 依托于本项目支持的开源 LLM 与 Embedding 模型,本项目可实现全部使用**开源**模型**离线私有部署**。与此同时,本项目也支持 OpenAI GPT API 的调用,并将在后续持续扩充对各类模型及模型 API 的接入。
|
||||
|
||||
⛓️ 本项目实现原理如下图所示,过程包括加载文件 -> 读取文本 -> 文本分割 -> 文本向量化 -> 问句向量化 -> 在文本向量中匹配出与问句向量最相似的 `top k`个 -> 匹配出的文本作为上下文和问题一起添加到 `prompt`中 -> 提交给 `LLM`生成回答。
|
||||
|
||||
📺 [原理介绍视频](https://www.bilibili.com/video/BV13M4y1e7cN/?share_source=copy_web&vd_source=e6c5aafe684f30fbe41925d61ca6d514)
|
||||
|
||||

|
||||
|
||||
从文档处理角度来看,实现流程如下:
|
||||
|
||||

|
||||
|
||||
🚩 本项目未涉及微调、训练过程,但可利用微调或训练对本项目效果进行优化。
|
||||
|
||||
🌐 [AutoDL 镜像](https://www.codewithgpu.com/i/chatchat-space/Langchain-Chatchat/Langchain-Chatchat) 中 `v11` 版本所使用代码已更新至本项目 `v0.2.7` 版本。
|
||||
|
||||
🐳 [Docker 镜像](registry.cn-beijing.aliyuncs.com/chatchat/chatchat:0.2.6) 已经更新到 ```0.2.7``` 版本。
|
||||
|
||||
🌲 一行命令运行 Docker :
|
||||
|
||||
```shell
|
||||
docker run -d --gpus all -p 80:8501 registry.cn-beijing.aliyuncs.com/chatchat/chatchat:0.2.7
|
||||
```
|
||||
|
||||
🧩 本项目有一个非常完整的[Wiki](https://github.com/chatchat-space/Langchain-Chatchat/wiki/) , README只是一个简单的介绍,__仅仅是入门教程,能够基础运行__。 如果你想要更深入的了解本项目,或者想对本项目做出贡献。请移步 [Wiki](https://github.com/chatchat-space/Langchain-Chatchat/wiki/) 界面
|
||||
|
||||
## 解决的痛点
|
||||
|
||||
该项目是一个可以实现 __完全本地化__推理的知识库增强方案, 重点解决数据安全保护,私域化部署的企业痛点。
|
||||
本开源方案采用```Apache License```,可以免费商用,无需付费。
|
||||
|
||||
我们支持市面上主流的本地大预言模型和Embedding模型,支持开源的本地向量数据库。
|
||||
支持列表详见[Wiki](https://github.com/chatchat-space/Langchain-Chatchat/wiki/)
|
||||
|
||||
|
||||
## 快速上手
|
||||
|
||||
### 1. 环境配置
|
||||
|
||||
+ 首先,确保你的机器安装了 Python 3.8 - 3.10
|
||||
```
|
||||
$ python --version
|
||||
Python 3.10.12
|
||||
```
|
||||
接着,创建一个虚拟环境,并在虚拟环境内安装项目的依赖
|
||||
```shell
|
||||
|
||||
# 拉取仓库
|
||||
$ git clone https://github.com/chatchat-space/Langchain-Chatchat.git
|
||||
|
||||
# 进入目录
|
||||
$ cd Langchain-Chatchat
|
||||
|
||||
# 安装全部依赖
|
||||
$ pip install -r requirements.txt
|
||||
$ pip install -r requirements_api.txt
|
||||
$ pip install -r requirements_webui.txt
|
||||
|
||||
# 默认依赖包括基本运行环境(FAISS向量库)。如果要使用 milvus/pg_vector 等向量库,请将 requirements.txt 中相应依赖取消注释再安装。
|
||||
```
|
||||
### 2, 模型下载
|
||||
|
||||
如需在本地或离线环境下运行本项目,需要首先将项目所需的模型下载至本地,通常开源 LLM 与 Embedding 模型可以从 [HuggingFace](https://huggingface.co/models) 下载。
|
||||
|
||||
以本项目中默认使用的 LLM 模型 [THUDM/ChatGLM2-6B](https://huggingface.co/THUDM/chatglm2-6b) 与 Embedding 模型 [moka-ai/m3e-base](https://huggingface.co/moka-ai/m3e-base) 为例:
|
||||
|
||||
下载模型需要先[安装 Git LFS](https://docs.github.com/zh/repositories/working-with-files/managing-large-files/installing-git-large-file-storage),然后运行
|
||||
|
||||
```Shell
|
||||
$ git lfs install
|
||||
$ git clone https://huggingface.co/THUDM/chatglm2-6b
|
||||
$ git clone https://huggingface.co/moka-ai/m3e-base
|
||||
```
|
||||
### 3. 初始化知识库和配置文件
|
||||
|
||||
按照下列方式初始化自己的知识库和简单的复制配置文件
|
||||
```shell
|
||||
$ python copy_config_example.py
|
||||
$ python init_database.py --recreate-vs
|
||||
```
|
||||
### 4. 一键启动
|
||||
|
||||
按照以下命令启动项目
|
||||
```shell
|
||||
$ python startup.py -a
|
||||
```
|
||||
### 5. 启动界面示例
|
||||
|
||||
如果正常启动,你将能看到以下界面
|
||||
|
||||
1. FastAPI Docs 界面
|
||||
|
||||

|
||||
|
||||
2. Web UI 启动界面示例:
|
||||
|
||||
- Web UI 对话界面:
|
||||
|
||||

|
||||
|
||||
- Web UI 知识库管理页面:
|
||||
|
||||

|
||||
|
||||
|
||||
### 注意
|
||||
|
||||
以上方式只是为了快速上手,如果需要更多的功能和自定义启动方式 ,请参考[Wiki](https://github.com/chatchat-space/Langchain-Chatchat/wiki/)
|
||||
|
||||
|
||||
---
|
||||
## 项目里程碑
|
||||
|
||||
|
||||
---
|
||||
## 联系我们
|
||||
### Telegram
|
||||
[](https://t.me/+RjliQ3jnJ1YyN2E9)
|
||||
|
||||
### 项目交流群
|
||||
<img src="img/qr_code_76.jpg" alt="二维码" width="300" />
|
||||
|
||||
🎉 Langchain-Chatchat 项目微信交流群,如果你也对本项目感兴趣,欢迎加入群聊参与讨论交流。
|
||||
|
||||
### 公众号
|
||||
|
||||
<img src="img/official_wechat_mp_account.png" alt="二维码" width="300" />
|
||||
|
||||
🎉 Langchain-Chatchat 项目官方公众号,欢迎扫码关注。
|
||||
|
Before Width: | Height: | Size: 94 KiB After Width: | Height: | Size: 94 KiB |
|
Before Width: | Height: | Size: 178 KiB After Width: | Height: | Size: 178 KiB |
|
Before Width: | Height: | Size: 227 KiB After Width: | Height: | Size: 227 KiB |
|
Before Width: | Height: | Size: 28 KiB After Width: | Height: | Size: 28 KiB |
|
Before Width: | Height: | Size: 1.0 MiB After Width: | Height: | Size: 1.0 MiB |
|
Before Width: | Height: | Size: 154 KiB After Width: | Height: | Size: 154 KiB |
|
Before Width: | Height: | Size: 54 KiB After Width: | Height: | Size: 54 KiB |
|
Before Width: | Height: | Size: 228 KiB After Width: | Height: | Size: 228 KiB |
|
Before Width: | Height: | Size: 27 KiB After Width: | Height: | Size: 27 KiB |
|
Before Width: | Height: | Size: 35 KiB After Width: | Height: | Size: 35 KiB |
|
Before Width: | Height: | Size: 331 KiB After Width: | Height: | Size: 331 KiB |
|
Before Width: | Height: | Size: 47 KiB After Width: | Height: | Size: 47 KiB |
|
Before Width: | Height: | Size: 48 KiB After Width: | Height: | Size: 48 KiB |
|
Before Width: | Height: | Size: 32 KiB After Width: | Height: | Size: 32 KiB |
|
Before Width: | Height: | Size: 9.2 KiB After Width: | Height: | Size: 9.2 KiB |
|
Before Width: | Height: | Size: 22 KiB After Width: | Height: | Size: 22 KiB |
|
Before Width: | Height: | Size: 30 KiB After Width: | Height: | Size: 30 KiB |
|
Before Width: | Height: | Size: 23 KiB After Width: | Height: | Size: 23 KiB |
|
Before Width: | Height: | Size: 143 KiB After Width: | Height: | Size: 143 KiB |
|
Before Width: | Height: | Size: 44 KiB After Width: | Height: | Size: 44 KiB |
|
Before Width: | Height: | Size: 30 KiB After Width: | Height: | Size: 30 KiB |
|
Before Width: | Height: | Size: 149 KiB After Width: | Height: | Size: 149 KiB |
|
Before Width: | Height: | Size: 227 KiB After Width: | Height: | Size: 227 KiB |
|
Before Width: | Height: | Size: 14 KiB After Width: | Height: | Size: 14 KiB |
|
Before Width: | Height: | Size: 145 KiB After Width: | Height: | Size: 145 KiB |
|
Before Width: | Height: | Size: 22 KiB After Width: | Height: | Size: 22 KiB |
|
Before Width: | Height: | Size: 79 KiB After Width: | Height: | Size: 79 KiB |
|
Before Width: | Height: | Size: 34 KiB After Width: | Height: | Size: 34 KiB |
|
Before Width: | Height: | Size: 177 KiB After Width: | Height: | Size: 177 KiB |
|
Before Width: | Height: | Size: 64 KiB After Width: | Height: | Size: 64 KiB |
|
Before Width: | Height: | Size: 49 KiB After Width: | Height: | Size: 49 KiB |
|
Before Width: | Height: | Size: 266 KiB After Width: | Height: | Size: 266 KiB |
|
Before Width: | Height: | Size: 88 KiB After Width: | Height: | Size: 88 KiB |
237
chatchat/chatchat/data/knowledge_base/samples/content/webui2.py
Normal file
@ -0,0 +1,237 @@
|
||||
from nicegui import ui, Client, app, run
|
||||
from nicegui.events import ValueChangeEventArguments
|
||||
from configs import (VERSION, LLM_MODELS, TEMPERATURE, HISTORY_LEN,
|
||||
VECTOR_SEARCH_TOP_K, SEARCH_ENGINE_TOP_K)
|
||||
from webui_pages.utils import AsyncApiRequest
|
||||
import asyncio
|
||||
from typing import Any, List, Dict, Any
|
||||
|
||||
|
||||
app.add_static_files("/image", "img")
|
||||
|
||||
|
||||
class Session:
|
||||
def __init__(self) -> None:
|
||||
user = app.storage.user
|
||||
for k, v in self._attrs().items():
|
||||
user.setdefault(k, v)
|
||||
|
||||
def _attrs(self) -> Dict[str, Any]:
|
||||
return {
|
||||
"messages": [],
|
||||
"query": "",
|
||||
"thinking": False,
|
||||
"cur_kb": "",
|
||||
"cur_temperature": TEMPERATURE,
|
||||
"chat_list": [],
|
||||
"cur_chat": "",
|
||||
}
|
||||
|
||||
@property
|
||||
def user(self):
|
||||
return app.storage.user
|
||||
|
||||
def __getattr__(self, attr: str) -> Any:
|
||||
if attr in self._attrs():
|
||||
return self.user[attr]
|
||||
else:
|
||||
raise AttributeError(attr)
|
||||
|
||||
def __setattr__(self, attr: str, val: Any) -> None:
|
||||
if attr in self._attrs():
|
||||
self.user[attr] = val
|
||||
else:
|
||||
raise AttributeError(attr)
|
||||
|
||||
|
||||
def make_header(left_drawer, right_drawer):
|
||||
with ui.header().classes("bg-black p-2") as header:
|
||||
with ui.link():
|
||||
ui.icon("menu", size="md").on("click", lambda: left_drawer.toggle())
|
||||
ui.image("img/logo-long-chatchat-trans-v2.png").props("fit=scale-down").classes("h-8 w-48 float-left")
|
||||
left_header = ui.row().props('id="left-header"')
|
||||
ui.element("q-space")
|
||||
right_header = ui.row().props('id="right-header"')
|
||||
ui.label(f"(Version: {VERSION})").classes("text-grey text-xs pt-4")
|
||||
with ui.link():
|
||||
ui.icon("menu", size="md").on("click", lambda: right_drawer.toggle())
|
||||
return left_header, right_header
|
||||
|
||||
|
||||
def make_left_drawer(links: List, current: str):
|
||||
with ui.left_drawer(bordered=True, elevated=True) as drawer:
|
||||
return drawer
|
||||
|
||||
|
||||
@ui.refreshable
|
||||
async def output_messages():
|
||||
session = Session()
|
||||
|
||||
for msg in session.messages:
|
||||
is_user = msg["role"] == "user"
|
||||
if is_user:
|
||||
name = "User"
|
||||
avatar = "/image/user_avatar.png"
|
||||
else:
|
||||
name = "AI"
|
||||
avatar = "/image/chatchat_icon_blue_square_v2.png"
|
||||
ele = ui.chat_message([], sent=False, name=None, avatar=avatar)
|
||||
with ele.add_slot("default"):
|
||||
ui.markdown(msg["content"])
|
||||
|
||||
ui.query("img.q-message-avatar").classes("self-start")
|
||||
(ui.query("div.q-message-text--received")
|
||||
.classes("bg-green-100")
|
||||
.style("border-radius: 5px;"))
|
||||
# (ui.query("div.q-message-text--received")
|
||||
# .run_method("remove_classes", ["q-message-text--received"]))
|
||||
# await ui.run_javascript("window.sc")
|
||||
|
||||
|
||||
@ui.page("/", title="Langchain-Chatchat WebUI")
|
||||
async def index(client: Client):
|
||||
ui.add_head_html('''<style>
|
||||
p > code {color: green;padding: 2px;}
|
||||
pre:has(code) {background-color: #eee; padding: 10px;} !important
|
||||
</style>''')
|
||||
|
||||
async def send():
|
||||
question = query.value.strip()
|
||||
query.value = ""
|
||||
|
||||
if not question:
|
||||
return
|
||||
|
||||
if question == "/clear":
|
||||
session.messages = []
|
||||
output_messages.refresh()
|
||||
return
|
||||
|
||||
session.thinking = True
|
||||
session.messages.append({"role": "user", "content": question})
|
||||
session.messages.append({"role": "assistant", "content": "Thinking..."})
|
||||
output_messages.refresh()
|
||||
await asyncio.sleep(0.1)
|
||||
|
||||
text = ""
|
||||
async for chunk in api.chat_chat(question,
|
||||
stream=True,
|
||||
conversation_id=None,
|
||||
model=cur_llm_model.value,
|
||||
temperature=temperature.value):
|
||||
text += chunk.get("text", "")
|
||||
tail = " ▌"
|
||||
if text.count("```") % 2 == 1:
|
||||
if text[-1] != "`":
|
||||
tail += "\n```\n"
|
||||
elif text[-2:] == "``":
|
||||
tail += "`\n"
|
||||
elif text[-1:] == "`":
|
||||
tail += "``\n"
|
||||
session.messages[-1]["content"] = text + tail
|
||||
output_messages.refresh()
|
||||
await asyncio.sleep(0.1)
|
||||
|
||||
session.messages[-1]["content"] = text
|
||||
output_messages.refresh()
|
||||
await asyncio.sleep(0.1)
|
||||
session.thinking = False
|
||||
|
||||
session = Session()
|
||||
api = AsyncApiRequest()
|
||||
|
||||
left_drawer = make_left_drawer([], "")
|
||||
|
||||
with ui.right_drawer(bordered=True, elevated=True) as right_drawer:
|
||||
ui.markdown("### 灵感大全")
|
||||
user_name = ui.input("用户名称", value="用户")
|
||||
system_message = (ui.input("AI系统消息",
|
||||
value="你是一个聪明的人工智能助手,可以回答用户提出的问题。")
|
||||
.props("autogrow"))
|
||||
chat_image = ui.upload(label="上传图片").classes("w-full mt-5")
|
||||
chat_file = ui.upload(label="上传文件").classes("w-full mt-5")
|
||||
|
||||
left_header, right_header = make_header(left_drawer, right_drawer)
|
||||
|
||||
with left_header:
|
||||
chat_session = (ui.radio(["会话1", "会话2"], value="会话1")
|
||||
.props("inline")
|
||||
.classes("p-0"))
|
||||
|
||||
with left_drawer:
|
||||
ui.markdown("### 配置项")
|
||||
|
||||
def on_chat_mode_change(e: ValueChangeEventArguments):
|
||||
if e.value == "Agent对话":
|
||||
session.cur_temperature = temperature.value
|
||||
temperature.set_value(0.01)
|
||||
else:
|
||||
temperature.set_value(session.cur_temperature)
|
||||
|
||||
chat_mode = ui.select(["LLM 对话", "知识库问答", "搜索引擎问答", "Agent对话"],
|
||||
label="对话模式",
|
||||
value="LLM 对话",
|
||||
on_change=on_chat_mode_change,
|
||||
)
|
||||
ui.separator()
|
||||
|
||||
with ui.expansion("模型配置", icon="psychology", value=True):
|
||||
running_models = await api.list_running_models()
|
||||
config_models = await api.list_config_models()
|
||||
models = {x: f"{x}(running)" for x in running_models}
|
||||
for v in config_models.values():
|
||||
for m in v:
|
||||
if m not in running_models:
|
||||
models.update({m: m})
|
||||
cur_llm_model = ui.select(models, label="LLM模型", value=LLM_MODELS[0], with_input=True, clearable=True)
|
||||
temperature = ui.number("Temperature", value=TEMPERATURE, min=0, max=1, step=0.01)
|
||||
history_len = ui.number("历史对话轮数", value=HISTORY_LEN, min=0, max=10)
|
||||
|
||||
with (ui.expansion("知识库配置", icon="book", value=True)
|
||||
.bind_visibility_from(chat_mode, "value", value="知识库问答")):
|
||||
def on_kb_change(e: ValueChangeEventArguments):
|
||||
session.cur_kb = e.value
|
||||
|
||||
kb_names = await api.list_knowledge_bases()
|
||||
kb_name = ui.select(kb_names,
|
||||
label="知识库",
|
||||
value=session.cur_kb or kb_names[0],
|
||||
on_change=on_kb_change,
|
||||
)
|
||||
vector_top_k = ui.number("Top K", value=VECTOR_SEARCH_TOP_K, min=1, max=10)
|
||||
|
||||
with (ui.expansion("搜索引擎配置", icon="travel_explore", value=True)
|
||||
.bind_visibility_from(chat_mode, "value", value="搜索引擎问答")):
|
||||
search_engine = ui.select(["Bing", "Duckduckgo"], value="Bing")
|
||||
search_top_k = ui.number("Top K", value=SEARCH_ENGINE_TOP_K, min=1, max=10)
|
||||
|
||||
await client.connected()
|
||||
with ui.column():
|
||||
await output_messages()
|
||||
|
||||
with ui.row().classes("absolute bottom-2 left-20 right-20"):
|
||||
# command = ui.select(["/clear", "/upload"]).classes("w-1/4")
|
||||
query = (ui.input(autocomplete=["/clear", "/upload"],
|
||||
placeholder="input your question here.")
|
||||
.classes("flex-grow")
|
||||
.props('autogrow outlined autofocus counter dense clearable')
|
||||
.bind_value(session, "query")
|
||||
.on("keydown.enter.prevent", send)
|
||||
)
|
||||
with query.add_slot("after"):
|
||||
ui.button(icon="send", on_click=send).classes("self-center").props("small dense p-0 m-0")
|
||||
# query._props["autofocus"] = True
|
||||
# query._props["autogrow"] = True
|
||||
# query._props["placeholder"] = "input your question here."
|
||||
# query._props[":list"] = '["/clear", "/upload"]'
|
||||
# query._props["shadow-text"] = ["/clear", "/upload"]
|
||||
# ui.input(autocomplete=["/clear", "/upload"])
|
||||
|
||||
|
||||
|
||||
# TODO:
|
||||
# 右侧栏上下文:system_message, picture, file, 知识库文档预览
|
||||
|
||||
|
||||
if __name__ in {"__main__", "__mp_main__"}:
|
||||
ui.run(port=5000, storage_secret="111111", reload=True)
|
||||
@ -0,0 +1,74 @@
|
||||
|
||||

|
||||
|
||||
> 欢迎来到 Langchain‐Chatchat 的 Wiki , 在这里开启 Langchain 与大模型的邂逅!
|
||||
|
||||
|
||||
## 项目简介
|
||||
|
||||
📃 **LangChain-Chatchat** (原 Langchain-ChatGLM): 基于 Langchain 与 ChatGLM 等大语言模型的本地知识库问答应用实现。
|
||||
|
||||
🤖️ 一种利用 [langchain](https://github.com/hwchase17/langchain) 思想实现的基于本地知识库的问答应用,目标期望建立一套对中文场景与开源模型支持友好、可离线运行的知识库问答解决方案。
|
||||
|
||||
💡 受 [GanymedeNil](https://github.com/GanymedeNil) 的项目 [document.ai](https://github.com/GanymedeNil/document.ai) 和 [AlexZhangji](https://github.com/AlexZhangji) 创建的 [ChatGLM-6B Pull Request](https://github.com/THUDM/ChatGLM-6B/pull/216) 启发,建立了全流程可使用开源模型实现的本地知识库问答应用。本项目的最新版本中通过使用 [FastChat](https://github.com/lm-sys/FastChat) 接入 Vicuna, Alpaca, LLaMA, Koala, RWKV 等模型,依托于 [langchain](https://github.com/langchain-ai/langchain) 框架支持通过基于 [FastAPI](https://github.com/tiangolo/fastapi) 提供的 API 调用服务,或使用基于 [Streamlit](https://github.com/streamlit/streamlit) 的 WebUI 进行操作。
|
||||
|
||||
✅ 依托于本项目支持的开源 LLM 与 Embedding 模型,本项目可实现全部使用**开源**模型**离线私有部署**。与此同时,本项目也支持 OpenAI GPT API 的调用,并将在后续持续扩充对各类模型及模型 API 的接入。
|
||||
|
||||
⛓️ 本项目实现原理如下图所示,过程包括加载文件 -> 读取文本 -> 文本分割 -> 文本向量化 -> 问句向量化 -> 在文本向量中匹配出与问句向量最相似的 `top k`个 -> 匹配出的文本作为上下文和问题一起添加到 `prompt`中 -> 提交给 `LLM`生成回答。
|
||||
|
||||
|
||||
## 算法流程
|
||||
|
||||
大家可以前往Bilibili平台查看原理介绍视频:
|
||||
|
||||
📺 [原理介绍视频](https://www.bilibili.com/video/BV13M4y1e7cN/?share_source=copy_web&vd_source=e6c5aafe684f30fbe41925d61ca6d514)
|
||||
|
||||
开发组也为大家绘制了一张实现原理图,效果如下:
|
||||
|
||||

|
||||
|
||||
从文档处理角度来看,实现流程如下:
|
||||
|
||||

|
||||
|
||||
|
||||
## 技术路线图(截止0.2.10)
|
||||
|
||||
- [X] Langchain 应用
|
||||
- [X] 本地数据接入
|
||||
- [X] 接入非结构化文档
|
||||
- [X] .txt, .rtf, .epub, .srt
|
||||
- [X] .eml, .msg
|
||||
- [X] .html, .xml, .toml, .mhtml
|
||||
- [X] .json, .jsonl
|
||||
- [X] .md, .rst
|
||||
- [X] .docx, .doc, .pptx, .ppt, .odt
|
||||
- [X] .enex
|
||||
- [X] .pdf
|
||||
- [X] .jpg, .jpeg, .png, .bmp
|
||||
- [X] .py, .ipynb
|
||||
- [X] 结构化数据接入
|
||||
- [X] .csv, .tsv
|
||||
- [X] .xlsx, .xls, .xlsd
|
||||
- [X] 分词及召回
|
||||
- [X] 接入不同类型 TextSplitter
|
||||
- [X] 优化依据中文标点符号设计的 ChineseTextSplitter
|
||||
- [X] 搜索引擎接入
|
||||
- [X] Bing 搜索
|
||||
- [X] DuckDuckGo 搜索
|
||||
- [X] Metaphor 搜索
|
||||
- [X] Agent 实现
|
||||
- [X] 基础React形式的Agent实现,包括调用计算器等
|
||||
- [X] Langchain 自带的Agent实现和调用
|
||||
- [X] 智能调用不同的数据库和联网知识
|
||||
- [X] LLM 模型接入
|
||||
- [X] 支持通过调用 [FastChat](https://github.com/lm-sys/fastchat) api 调用 llm
|
||||
- [X] 支持 ChatGLM API 等 LLM API 的接入
|
||||
- [X] 支持 Langchain 框架支持的LLM API 接入
|
||||
- [X] Embedding 模型接入
|
||||
- [X] 支持调用 HuggingFace 中各开源 Emebdding 模型
|
||||
- [X] 支持 OpenAI Embedding API 等 Embedding API 的接入
|
||||
- [X] 支持 智谱AI、百度千帆、千问、MiniMax 等在线 Embedding API 的接入
|
||||
- [X] 基于 FastAPI 的 API 方式调用
|
||||
- [X] Web UI
|
||||
- [X] 基于 Streamlit 的 Web UI
|
||||
@ -0,0 +1,58 @@
|
||||
__导航栏,一切从这里出发__
|
||||
## [Home](https://github.com/chatchat-space/Langchain-Chatchat/wiki)
|
||||
## [支持列表](https://github.com/chatchat-space/Langchain-Chatchat/wiki/%E6%94%AF%E6%8C%81%E5%88%97%E8%A1%A8)
|
||||
* [LLM 模型支持列表](https://github.com/chatchat-space/Langchain-Chatchat/wiki/%E6%94%AF%E6%8C%81%E5%88%97%E8%A1%A8#llm-%E6%A8%A1%E5%9E%8B%E6%94%AF%E6%8C%81%E5%88%97%E8%A1%A8)
|
||||
* [Embedding 模型支持列表](https://github.com/chatchat-space/Langchain-Chatchat/wiki/%E6%94%AF%E6%8C%81%E5%88%97%E8%A1%A8#embedding-%E6%A8%A1%E5%9E%8B%E6%94%AF%E6%8C%81%E5%88%97%E8%A1%A8)
|
||||
* [分词器支持列表](https://github.com/chatchat-space/Langchain-Chatchat/wiki/%E6%94%AF%E6%8C%81%E5%88%97%E8%A1%A8#%E5%88%86%E8%AF%8D%E5%99%A8%E6%94%AF%E6%8C%81%E5%88%97%E8%A1%A8)
|
||||
* [向量数据库支持列表](https://github.com/chatchat-space/Langchain-Chatchat/wiki/%E6%94%AF%E6%8C%81%E5%88%97%E8%A1%A8#%E5%90%91%E9%87%8F%E6%95%B0%E6%8D%AE%E5%BA%93%E6%94%AF%E6%8C%81%E5%88%97%E8%A1%A8)
|
||||
* [工具支持列表](https://github.com/chatchat-space/Langchain-Chatchat/wiki/%E6%94%AF%E6%8C%81%E5%88%97%E8%A1%A8#%E5%B7%A5%E5%85%B7%E6%94%AF%E6%8C%81%E5%88%97%E8%A1%A8)
|
||||
|
||||
## [开发环境部署](https://github.com/chatchat-space/Langchain-Chatchat/wiki/%E5%BC%80%E5%8F%91%E7%8E%AF%E5%A2%83%E9%83%A8%E7%BD%B2)
|
||||
|
||||
### 前期准备
|
||||
* [软件要求](https://github.com/chatchat-space/Langchain-Chatchat/wiki/%E5%BC%80%E5%8F%91%E7%8E%AF%E5%A2%83%E9%83%A8%E7%BD%B2#%E8%BD%AF%E4%BB%B6%E8%A6%81%E6%B1%82)
|
||||
* [硬件要求](https://github.com/chatchat-space/Langchain-Chatchat/wiki/%E5%BC%80%E5%8F%91%E7%8E%AF%E5%A2%83%E9%83%A8%E7%BD%B2#%E7%A1%AC%E4%BB%B6%E8%A6%81%E6%B1%82)
|
||||
* [VPN](https://github.com/chatchat-space/Langchain-Chatchat/wiki/%E5%BC%80%E5%8F%91%E7%8E%AF%E5%A2%83%E9%83%A8%E7%BD%B2#vpn)
|
||||
|
||||
### 部署代码
|
||||
* [Docker 部署](https://github.com/chatchat-space/Langchain-Chatchat/wiki/%E5%BC%80%E5%8F%91%E7%8E%AF%E5%A2%83%E9%83%A8%E7%BD%B2#docker-%E9%83%A8%E7%BD%B2)
|
||||
* [最轻模式部署方案](https://github.com/chatchat-space/Langchain-Chatchat/wiki/%E5%BC%80%E5%8F%91%E7%8E%AF%E5%A2%83%E9%83%A8%E7%BD%B2#%E6%9C%80%E8%BD%BB%E6%A8%A1%E5%BC%8F%E6%9C%AC%E5%9C%B0%E9%83%A8%E7%BD%B2%E6%96%B9%E6%A1%88)
|
||||
* [常规模式本地部署方案](https://github.com/chatchat-space/Langchain-Chatchat/wiki/%E5%BC%80%E5%8F%91%E7%8E%AF%E5%A2%83%E9%83%A8%E7%BD%B2#%E5%B8%B8%E8%A7%84%E6%A8%A1%E5%BC%8F%E6%9C%AC%E5%9C%B0%E9%83%A8%E7%BD%B2%E6%96%B9%E6%A1%88)
|
||||
+ [环境安装](https://github.com/chatchat-space/Langchain-Chatchat/wiki/%E5%BC%80%E5%8F%91%E7%8E%AF%E5%A2%83%E9%83%A8%E7%BD%B2#%E6%9C%AC%E5%9C%B0%E9%83%A8%E7%BD%B2%E7%8E%AF%E5%A2%83%E5%AE%89%E8%A3%85)
|
||||
+ [模型下载](https://github.com/chatchat-space/Langchain-Chatchat/wiki/%E5%BC%80%E5%8F%91%E7%8E%AF%E5%A2%83%E9%83%A8%E7%BD%B2#%E6%A8%A1%E5%9E%8B%E4%B8%8B%E8%BD%BD)
|
||||
+ [初始化知识库](https://github.com/chatchat-space/Langchain-Chatchat/wiki/%E5%BC%80%E5%8F%91%E7%8E%AF%E5%A2%83%E9%83%A8%E7%BD%B2#%E5%88%9D%E5%A7%8B%E5%8C%96%E7%9F%A5%E8%AF%86%E5%BA%93)
|
||||
+ [一键启动](https://github.com/chatchat-space/Langchain-Chatchat/wiki/%E5%BC%80%E5%8F%91%E7%8E%AF%E5%A2%83%E9%83%A8%E7%BD%B2#%E4%B8%80%E9%94%AE%E5%90%AF%E5%8A%A8)
|
||||
+ [多卡加载](https://github.com/chatchat-space/Langchain-Chatchat/wiki/%E5%BC%80%E5%8F%91%E7%8E%AF%E5%A2%83%E9%83%A8%E7%BD%B2#%E5%A4%9A%E5%8D%A1%E5%8A%A0%E8%BD%BD)
|
||||
|
||||
## [参数配置](https://github.com/chatchat-space/Langchain-Chatchat/wiki/%E5%8F%82%E6%95%B0%E9%85%8D%E7%BD%AE)
|
||||
|
||||
* [基础配置项 basic_config.py](https://github.com/chatchat-space/Langchain-Chatchat/wiki/%E5%8F%82%E6%95%B0%E9%85%8D%E7%BD%AE#%E5%9F%BA%E7%A1%80%E9%85%8D%E7%BD%AE%E9%A1%B9-basic_configpy)
|
||||
* [模型配置项 model_config.py](https://github.com/chatchat-space/Langchain-Chatchat/wiki/%E5%8F%82%E6%95%B0%E9%85%8D%E7%BD%AE#%E6%A8%A1%E5%9E%8B%E9%85%8D%E7%BD%AE%E9%A1%B9-model_configpy)
|
||||
* [提示词配置项 prompt_config.py](https://github.com/chatchat-space/Langchain-Chatchat/wiki/%E5%8F%82%E6%95%B0%E9%85%8D%E7%BD%AE#%E6%8F%90%E7%A4%BA%E8%AF%8D%E9%85%8D%E7%BD%AE%E9%A1%B9-prompt_configpy)
|
||||
* [数据库配置 kb_config.py](https://github.com/chatchat-space/Langchain-Chatchat/wiki/%E5%8F%82%E6%95%B0%E9%85%8D%E7%BD%AE#%E6%95%B0%E6%8D%AE%E5%BA%93%E9%85%8D%E7%BD%AE-kb_configpy)
|
||||
* [服务和端口配置项 server_config.py](https://github.com/chatchat-space/Langchain-Chatchat/wiki/%E5%8F%82%E6%95%B0%E9%85%8D%E7%BD%AE#%E6%9C%8D%E5%8A%A1%E5%92%8C%E7%AB%AF%E5%8F%A3%E9%85%8D%E7%BD%AE%E9%A1%B9-server_configpy)
|
||||
* [覆盖配置文件 或者配置 startup.py](https://github.com/chatchat-space/Langchain-Chatchat/wiki/%E5%8F%82%E6%95%B0%E9%85%8D%E7%BD%AE#%E8%A6%86%E7%9B%96%E9%85%8D%E7%BD%AE%E6%96%87%E4%BB%B6-%E6%88%96%E8%80%85%E9%85%8D%E7%BD%AE-startuppy)
|
||||
|
||||
## [自定义](https://github.com/chatchat-space/Langchain-Chatchat/wiki/%E8%87%AA%E5%AE%9A%E4%B9%89)
|
||||
|
||||
* [使用自定义的分词器](https://github.com/chatchat-space/Langchain-Chatchat/wiki/%E8%87%AA%E5%AE%9A%E4%B9%89#%E4%BD%BF%E7%94%A8%E8%87%AA%E5%AE%9A%E4%B9%89%E7%9A%84%E5%88%86%E8%AF%8D%E5%99%A8)
|
||||
* [使用自定义的 Agent 工具](https://github.com/chatchat-space/Langchain-Chatchat/wiki/%E8%87%AA%E5%AE%9A%E4%B9%89#%E4%BD%BF%E7%94%A8%E8%87%AA%E5%AE%9A%E4%B9%89%E7%9A%84-agent-%E5%B7%A5%E5%85%B7)
|
||||
* [使用自定义的微调模型](https://github.com/chatchat-space/Langchain-Chatchat/wiki/%E8%87%AA%E5%AE%9A%E4%B9%89#%E4%BD%BF%E7%94%A8%E8%87%AA%E5%AE%9A%E4%B9%89%E7%9A%84%E5%BE%AE%E8%B0%83%E6%A8%A1%E5%9E%8B)
|
||||
* [使用自定义的嵌入模型](https://github.com/chatchat-space/Langchain-Chatchat/wiki/%E8%87%AA%E5%AE%9A%E4%B9%89#%E4%BD%BF%E7%94%A8%E8%87%AA%E5%AE%9A%E4%B9%89%E7%9A%84%E5%B5%8C%E5%85%A5%E6%A8%A1%E5%9E%8B)
|
||||
* [日志功能](https://github.com/chatchat-space/Langchain-Chatchat/wiki/%E6%9C%80%E4%BD%B3%E5%AE%9E%E8%B7%B5#%E6%97%A5%E5%BF%97%E5%8A%9F%E8%83%BD)
|
||||
|
||||
## [最佳实践](https://github.com/chatchat-space/Langchain-Chatchat/wiki/%E6%9C%80%E4%BD%B3%E5%AE%9E%E8%B7%B5)
|
||||
* [推荐的模型组合](https://github.com/chatchat-space/Langchain-Chatchat/wiki/%E6%9C%80%E4%BD%B3%E5%AE%9E%E8%B7%B5#%E6%8E%A8%E8%8D%90%E7%9A%84%E6%A8%A1%E5%9E%8B%E7%BB%84%E5%90%88)
|
||||
* [微调模型加载实操](https://github.com/chatchat-space/Langchain-Chatchat/wiki/%E6%9C%80%E4%BD%B3%E5%AE%9E%E8%B7%B5#%E5%BE%AE%E8%B0%83%E6%A8%A1%E5%9E%8B%E5%8A%A0%E8%BD%BD%E5%AE%9E%E6%93%8D)
|
||||
* [预处理知识库文件](https://github.com/chatchat-space/Langchain-Chatchat/wiki/%E6%9C%80%E4%BD%B3%E5%AE%9E%E8%B7%B5#%E9%A2%84%E5%A4%84%E7%90%86%E7%9F%A5%E8%AF%86%E5%BA%93%E6%96%87%E4%BB%B6)
|
||||
* [自定义的关键词调整Embedding模型](https://github.com/chatchat-space/Langchain-Chatchat/wiki/%E6%9C%80%E4%BD%B3%E5%AE%9E%E8%B7%B5#%E8%87%AA%E5%AE%9A%E4%B9%89%E7%9A%84%E5%85%B3%E9%94%AE%E8%AF%8D%E8%B0%83%E6%95%B4embedding%E6%A8%A1%E5%9E%8B)
|
||||
* [实际使用效果](https://github.com/chatchat-space/Langchain-Chatchat/wiki/%E6%9C%80%E4%BD%B3%E5%AE%9E%E8%B7%B5#%E5%AE%9E%E9%99%85%E4%BD%BF%E7%94%A8%E6%95%88%E6%9E%9C)
|
||||
|
||||
## [做出贡献](https://github.com/chatchat-space/Langchain-Chatchat/wiki/%E5%81%9A%E5%87%BA%E8%B4%A1%E7%8C%AE)
|
||||
|
||||
* [Issue 规范](https://github.com/chatchat-space/Langchain-Chatchat/wiki/%E5%81%9A%E5%87%BA%E8%B4%A1%E7%8C%AE#issue-%E8%A7%84%E8%8C%83)
|
||||
* [PR 规范](https://github.com/chatchat-space/Langchain-Chatchat/wiki/%E5%81%9A%E5%87%BA%E8%B4%A1%E7%8C%AE#pr-%E8%A7%84%E8%8C%83)
|
||||
|
||||
## [合作伙伴](https://github.com/chatchat-space/Langchain-Chatchat/wiki/%E5%90%88%E4%BD%9C%E4%BC%99%E4%BC%B4)
|
||||
|
||||
## [常见问题](https://github.com/chatchat-space/Langchain-Chatchat/wiki/%E5%B8%B8%E8%A7%81%E9%97%AE%E9%A2%98)
|
||||
@ -0,0 +1,51 @@
|
||||
## Issue 规范
|
||||
> 什么样的 issue 是不会被回复的
|
||||
|
||||
1. 在提出issue前,请查看您的提出的问题是否已经在 issue 列表或者 discussion 内出现,提出重复的问题将 **被关闭** 。
|
||||
2. 非项目推荐配置的任何关于环境配置问题的 issue 通常将 **不会由官方回复**,请您在微信沟通群内咨询。
|
||||
3. 与项目无关的 issue 将 **不会被回复** 。
|
||||
4. 超过30天没有更新动态的 issue 将 **被关闭** 。
|
||||
5. 语言非中文和英语的 issue 将 **被关闭** 。
|
||||
6. 没有尝试过解决方案的 issue 将 **被关闭** 。
|
||||
7. 没有提出任何贡献(例如PR,论文)的 feature / enhancement 将会 **被关闭** 。您可以在 discussion 中的 **希望开发的功能** 讨论区中留言,我们开发组会进行回复。
|
||||
8. 不按照 Issue 规范提出的 issue 可能将 **被关闭** 。
|
||||
|
||||
> 如何提 issue
|
||||
|
||||
1. 简要阐述你的问题
|
||||
2. 配上报错日志以(运行报错)或者运行不理想的效果图(原本期望和现实的)
|
||||
3. 配上对应的配置文件以你的环境
|
||||
4. 你尝试过的解决方法。(非常重要)
|
||||
5. 按照模板提出Issue
|
||||
|
||||
## PR 规范
|
||||
|
||||
> 什么样的 PR 是不会被接受的
|
||||
1. 非紧急bug修复的PR并直接提交到```master```的PR。
|
||||
2. 仅仅修改```Readme.md```和```配置文件```的。
|
||||
3. 跟项目组已经开发的内容冲突的(dev版本),将可能被拒绝。
|
||||
|
||||
首先请注意所有的PR需要以dev分支为基准,master分支仅用来发行与紧急bug修复。
|
||||
|
||||
> 提出新的通用自定义分词器
|
||||
|
||||
1. 将您的分词器所在的代码文件放在```text_splitter```文件夹下,文件名为您的分词器名字`my_splitter.py`,然后在`__init__.py`中导入您的分词器。
|
||||
2. 发起PR,并说明您的分词器面向的场景或者改进之处。我们非常期待您能举例一个具体的应用场景。
|
||||
|
||||
> 提出新的 Agent 工具
|
||||
|
||||
1. 将您的Agent工具所在的代码放在 ```server/agent```文件夹下,文件名为您的工具名字`my_tools.py`,然后在`tools.py`中导入您的工具。
|
||||
2. 发起PR,说明您的工具面向的场景或改进之处,并说明如何进行测试和调用。我们非常期待您能举例一个具体的应用场景。
|
||||
|
||||
> 提出新的自定义模型
|
||||
|
||||
1. 将您的模型贡献到huggingface平台上,并开放给开发人员下载。
|
||||
2. 发起PR,说明您的工具面向的场景或改进之处,并说明如何进行测试和调用。我们非常期待您能举例一个具体的应用场景。
|
||||
3. 由开发人员测试通过后,将您的模型添加到合作模型名单中。
|
||||
|
||||
|
||||
> 修复 Bug & 增加其他新功能
|
||||
|
||||
1. 一个 PR 中必须 **只有一个或者一类功能增加,或者修复一个bug** ,多个功能混合的 PR 将 **不会被接受** 。
|
||||
2. 说明您增加的功能或者改进之处,并说明如何进行测试和调用。我们非常期待您能举例一个具体的应用场景。
|
||||
|
||||
@ -0,0 +1,156 @@
|
||||
在开始参数配置之前,先执行以下脚本
|
||||
```shell
|
||||
python copy_config_example.py
|
||||
```
|
||||
该脚本将会将所有```config```目录下的配置文件样例复制一份到```config```目录下,方便开发者进行配置。
|
||||
接着,开发者可以根据自己的需求,对配置文件进行修改。
|
||||
|
||||
## 基础配置项 basic_config.py
|
||||
该配置基负责记录日志的格式和储存路径,通常不需要修改。
|
||||
|
||||
## 模型配置项 model_config.py
|
||||
本文件包含本地LLM模型、本地Embeddings模型、在线LLM模型API的相关配置。
|
||||
|
||||
- 本地模型路径配置。建议将所有下载的模型放到一个统一的目录下,然后将`MODEL_ROOT_PATH`指定为该目录,只要模型目录名称符合下列情况之一(以text2vec为例),即可自动识别加载:
|
||||
- text2vec,即MODEL_PATH中的键
|
||||
- GanymedeNil/text2vec-large-chinese,即MODEL_PATH中的值
|
||||
- text2vec-large-chinese,即MODEL_PATH中的值的简写形式
|
||||
|
||||
- 在线模型API配置。在`ONLINE_LLM_MODEL`已经预先写好了所有支持的在线API服务,通常只需要把申请的API_KEY等填入即可。
|
||||
有些在线API服务需要安装额外的依赖:
|
||||
- zhipu-api: zhipuai
|
||||
- fangzhou-api: volcengine>=1.0.106
|
||||
- qianfan-api: qianfan
|
||||
- qwen-api: dashscope
|
||||
|
||||
- HISTORY_LEN。历史对话轮数通常不建议设置超过10,因为这可能导致以下问题
|
||||
1. 显存占用过高:尤其是部分模型,本身就已经要占用满显存的情况下,保留太多历史,一次传入token太多,可能会爆显存。
|
||||
2. 速度处理很慢:还是因为一次传入了太多token,导致速度很慢。
|
||||
|
||||
- TEMPERATURE。通常不建议设置过高。
|
||||
在Agent对话模式和知识库问答中,我们强烈建议将要其设置成0或者接近于0。
|
||||
|
||||
- Agent_MODEL = None
|
||||
我们支持用户使用“模型接力赛”的用法,即:
|
||||
选择的大模型仅能调用工具,但是在工具中表现较差,则这个工具作为 “模型调用工具”
|
||||
如果用户设置了Agent_MODEL,则在 Agent 中,使用Agent_MODEL来执行任务,否则,使用LLM_MODEL
|
||||
|
||||
|
||||
## 提示词配置项 prompt_config.py
|
||||
|
||||
提示词配置分为三个板块,分别对应三种聊天类型。
|
||||
- llm_chat: 基础的对话提示词, 通常来说,直接是用户输入的内容,没有系统提示词。
|
||||
- knowledge_base_chat: 与知识库对话的提示词,在模板中,我们为开发者设计了一个系统提示词,开发者可以自行更改。
|
||||
- agent_chat: 与Agent对话的提示词,同样,我们为开发者设计了一个系统提示词,开发者可以自行更改。
|
||||
|
||||
prompt模板使用Jinja2语法,简单点就是用双大括号代替f-string的单大括号
|
||||
请注意,本配置文件支持热加载,修改prompt模板后无需重启服务。
|
||||
|
||||
## 数据库配置 kb_config.py
|
||||
请确认本地分词器路径是否已经填写,如:
|
||||
|
||||
```
|
||||
text_splitter_dict = {
|
||||
"ChineseRecursiveTextSplitter": {
|
||||
"source":"huggingface", # 选择tiktoken则使用openai的方法,不填写则默认为字符长度切割方法。
|
||||
"tokenizer_name_or_path":"", # 空格不填则默认使用大模型的分词器。
|
||||
}
|
||||
}
|
||||
```
|
||||
设置好的分词器需要再```TEXT_SPLITTER_NAME```中指定并应用。
|
||||
|
||||
在这里,通常使用```huggingface```的方法,并且,我们推荐使用大模型自带的分词器来完成任务。
|
||||
|
||||
请注意,使用```gpt2```分词器将要访问huggingface官网下载权重。
|
||||
|
||||
我们还支持使用```tiktoken``` 和传统的 按照长度分词的方式,开发者可以自行配置。
|
||||
|
||||
如果希望调用自己的分词器,请参考[最佳实践]部分。
|
||||
|
||||
```kbs_config```设置了使用的向量数据库,目前可以选择
|
||||
- ```faiss```: 使用faiss数据库,需要安装faiss-gpu
|
||||
- ```milvus```: 使用milvus数据库,需要安装milvus并进行端口配置
|
||||
- ```pg```: 使用pg数据库,需要配置connection_uri
|
||||
|
||||
## 服务和端口配置项 server_config.py
|
||||
|
||||
通常,这个页面并不需要进行大量的修改,仅需确保对应的端口打开,并不互相冲突即可。
|
||||
|
||||
如果你是Linux系统推荐设置
|
||||
|
||||
```
|
||||
DEFAULT_BIND_HOST ="0.0.0.0"
|
||||
```
|
||||
如果使用联网模型,则需要关注联网模型的端口。
|
||||
|
||||
这些模型必须是在model_config.MODEL_PATH或ONLINE_MODEL中正确配置的。
|
||||
|
||||
#在启动startup.py时,可用通过`--model-worker --model-name xxxx`指定模型,不指定则为LLM_MODEL
|
||||
|
||||
|
||||
## 覆盖配置文件 或者配置 startup.py
|
||||
|
||||
在 ```server_config.py```中有以下配置文件被注释了
|
||||
|
||||
```
|
||||
"gpus": None, # 使用的GPU,以str的格式指定,如"0,1",如失效请使用CUDA_VISIBLE_DEVICES="0,1"等形式指定
|
||||
"num_gpus": 1, # 使用GPU的数量
|
||||
"max_gpu_memory":"20GiB", # 每个GPU占用的最大显存
|
||||
|
||||
以下为model_worker非常用参数,可根据需要配置
|
||||
"load_8bit": False, # 开启8bit量化
|
||||
"cpu_offloading": None,
|
||||
"gptq_ckpt": None,
|
||||
"gptq_wbits": 16,
|
||||
"gptq_groupsize": -1,
|
||||
"gptq_act_order": False,
|
||||
"awq_ckpt": None,
|
||||
"awq_wbits": 16,
|
||||
"awq_groupsize": -1,
|
||||
"model_names": [LLM_MODEL],
|
||||
"conv_template": None,
|
||||
"limit_worker_concurrency": 5,
|
||||
"stream_interval": 2,
|
||||
"no_register": False,
|
||||
"embed_in_truncate": False,
|
||||
|
||||
以下为vllm_woker配置参数,注意使用vllm必须有gpu,仅在Linux测试通过
|
||||
|
||||
tokenizer = model_path # 如果tokenizer与model_path不一致在此处添加
|
||||
'tokenizer_mode':'auto',
|
||||
'trust_remote_code':True,
|
||||
'download_dir':None,
|
||||
'load_format':'auto',
|
||||
'dtype':'auto',
|
||||
'seed':0,
|
||||
'worker_use_ray':False,
|
||||
'pipeline_parallel_size':1,
|
||||
'tensor_parallel_size':1,
|
||||
'block_size':16,
|
||||
'swap_space':4 , # GiB
|
||||
'gpu_memory_utilization':0.90,
|
||||
'max_num_batched_tokens':2560,
|
||||
'max_num_seqs':256,
|
||||
'disable_log_stats':False,
|
||||
'conv_template':None,
|
||||
'limit_worker_concurrency':5,
|
||||
'no_register':False,
|
||||
'num_gpus': 1
|
||||
'engine_use_ray': False,
|
||||
'disable_log_requests': False
|
||||
```
|
||||
|
||||
在这些参数中,如果没有设置,则使用```startup.py```中的默认值,如果设置了,则使用设置的值。
|
||||
因此,强烈建议开发不要在```startup.py```中进行配置,而应该在```server_config.py```中进行配置。避免配置文件覆盖。
|
||||
|
||||
## 选择使用的模型
|
||||
在```model_config.py```完成模型配置后,还不能直接使用,需要在该文件下配置本地模型的运行方式或在线模型的API,例如
|
||||
```
|
||||
"agentlm-7b": { # 使用default中的IP和端口
|
||||
"device": "cuda",
|
||||
},
|
||||
"zhipu-api": { # 请为每个要运行的在线API设置不同的端口
|
||||
"port": 21001,
|
||||
},
|
||||
```
|
||||
本地模型使用default中的IP和端口,在线模型可以自己选择端口
|
||||
@ -0,0 +1,37 @@
|
||||
## 合作伙伴名单
|
||||
🎉 Langchain-Chatchat 项目合作伙伴,感谢以下合作伙伴对本项目的支持。
|
||||
|
||||
<table style="width:100%; border-collapse:collapse;">
|
||||
<tr>
|
||||
<td style="width:30%; text-align:center; vertical-align:middle;">
|
||||
<img src="https://github.com/chatchat-space/Langchain-Chatchat/blob/dev/img/partners/chatglm.svg" alt="ChatGLM Logo" width="300" height="100">
|
||||
</td>
|
||||
<td style="width:80%; vertical-align:middle;">
|
||||
<a href="https://chatglm.cn/" target="_blank" style="text-decoration:none;">ChatGLM: 国内最早的开源中文大模型之一</a>
|
||||
</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td style="width:30%; text-align:center; vertical-align:middle;">
|
||||
<img src="https://github.com/chatchat-space/Langchain-Chatchat/blob/dev/img/partners/autodl.svg" alt="ChatGLM Logo" width="300" height="100">
|
||||
</td>
|
||||
<td style="width:80%; vertical-align:middle;">
|
||||
<a href="https://www.autodl.com/" target="_blank" style="text-decoration:none;"> AutoDL 提供弹性、好用、省钱的云GPU租用服务。缺显卡就上 AutoDL.com </a>
|
||||
</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td style="width:30%; text-align:center; vertical-align:middle;">
|
||||
<img src="https://github.com/chatchat-space/Langchain-Chatchat/blob/dev/img/partners/aws.svg" alt="ChatGLM Logo" width="300" height="100">
|
||||
</td>
|
||||
<td style="width:80%; vertical-align:middle;">
|
||||
<a href="https://aws.amazon.com/" target="_blank" style="text-decoration:none;"> 全球云计算领导者 </a>
|
||||
</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td style="width:30%; text-align:center; vertical-align:middle;">
|
||||
<img src="https://github.com/chatchat-space/Langchain-Chatchat/blob/dev/img/partners/zhenfund.svg" alt="Zhenge Logo" width="300" height="100">
|
||||
</td>
|
||||
<td style="width:80%; vertical-align:middle;">
|
||||
<a href="https://www.zhenfund.com/" target="_blank" style="text-decoration:none;">我们相信预测未来的最好方式是自己来创造。我们在这里等你。</a>
|
||||
</td>
|
||||
</tr>
|
||||
</table>
|
||||
@ -0,0 +1,277 @@
|
||||
> 以下是一些常见的问题和回答
|
||||
#### Q: 我要提出问题,怎么办
|
||||
|
||||
A: 首先,你要观察一下你的问题是否有没有被解决,建议翻看以往的Issue和Discussion,如果有,先按照他们的方法来做。
|
||||
如果没有,按照以下步骤
|
||||
1. 这是一个bug还是一个讨论问题,如果是讨论问题,放在disscusion,如果是bug和feature,放在issue。
|
||||
2. 如果要提出feature,提交一份对应的PR会让开发者更重视你的问题,否则你的问题很有可能被直接关闭。
|
||||
|
||||
#### Q: ValueError: Found modules on cpu/disk. Using Exllama backend requires all the modules to be on GPU. You can deactivate exllama backend by setting disable_exllama=True in the quantization config object.
|
||||
|
||||
A: 这是Fschat依赖源码的问题,请查看以下解决方式,通过修改'Fschat'库中的对应内容。
|
||||
|
||||
https://github.com/lm-sys/FastChat/issues/2459
|
||||
|
||||
https://stackoverflow.com/questions/76983305/fine-tuning-thebloke-llama-2-13b-chat-gptq-model-with-hugging-face-transformers
|
||||
|
||||
---
|
||||
|
||||
#### Q: AttributeError: 'ChatGLMTokenizer' object has no attribute 'tokenizer'
|
||||
|
||||
A: 查看以下Issue
|
||||
|
||||
https://github.com/chatchat-space/Langchain-Chatchat/issues/1835
|
||||
|
||||
---
|
||||
|
||||
#### Q: 使用Qwen API key 报错 multiple wodgets with the same key=“
|
||||
|
||||
A: 确保你的key是`dashscope`平台的key。并保证`dashscope`依赖满足我们的依赖版本。
|
||||
|
||||
---
|
||||
|
||||
#### Q:linux下向量化PDF文件时出错:`ImportError: 从文件 *.pdf 加载文档时出错:libGL.so.1: cannot open shared object file: No such file or directory`
|
||||
|
||||
A: 这是系统缺少必要的动态库,可以手动安装:`libgl1-mesa-glx` 和 `libglib2.0-0`
|
||||
|
||||
---
|
||||
|
||||
#### Q: 各种Int4模型无法载入
|
||||
A. 由于各种Int4模型与Fp16模型并不相似,且量化技术可能有所不同,无法载入可能是因为fschat不支持或者缺少对应的依赖,需要查看对应仓库的issue获得更多信息。开发组没有针对Int4模型进行优化。
|
||||
|
||||
---
|
||||
|
||||
#### Q1: 本项目支持哪些文件格式?
|
||||
|
||||
A1: 目前已测试支持 txt、docx、md、pdf、csv、html、json 等格式文件
|
||||
|
||||
更多文件格式请参考 [langchain 文档](https://python.langchain.com/en/latest/modules/indexes/document_loaders/examples/unstructured_file.html)。目前已知文档中若含有特殊字符,可能存在文件无法加载的问题。
|
||||
|
||||
---
|
||||
|
||||
#### Q2: 使用过程中 Python 包 `nltk`发生了 `Resource punkt not found.`报错,该如何解决?
|
||||
|
||||
A2: 方法一:https://github.com/nltk/nltk_data/raw/gh-pages/packages/tokenizers/punkt.zip 中的 `packages/tokenizers` 解压,放到 `nltk_data/tokenizers` 存储路径下。
|
||||
|
||||
`nltk_data` 存储路径可以通过 `nltk.data.path` 查询。
|
||||
|
||||
方法二:执行python代码
|
||||
|
||||
```
|
||||
import nltk
|
||||
nltk.download()
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
#### Q3: 使用过程中 Python 包 `nltk`发生了 `Resource averaged_perceptron_tagger not found.`报错,该如何解决?
|
||||
|
||||
A3:
|
||||
|
||||
方法一:将 https://github.com/nltk/nltk_data/blob/gh-pages/packages/taggers/averaged_perceptron_tagger.zip 下载,解压放到 `nltk_data/taggers` 存储路径下。
|
||||
|
||||
`nltk_data` 存储路径可以通过 `nltk.data.path` 查询。
|
||||
|
||||
方法二:执行python代码
|
||||
|
||||
```
|
||||
import nltk
|
||||
nltk.download()
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
#### Q4: 本项目可否在 colab 中运行?
|
||||
|
||||
A4: 可以尝试使用 chatglm-6b-int4 模型在 colab 中运行,需要注意的是,如需在 colab 中运行 Web UI,需将 `webui.py`中 `demo.queue(concurrency_count=3).launch( server_name='0.0.0.0', share=False, inbrowser=False)`中参数 `share`设置为 `True`。
|
||||
|
||||
---
|
||||
|
||||
#### Q5: 在 Anaconda 中使用 pip 安装包无效如何解决?
|
||||
|
||||
A5: 此问题是系统环境问题,详细见 [在Anaconda中使用pip安装包无效问题](在Anaconda中使用pip安装包无效问题.md)
|
||||
|
||||
---
|
||||
|
||||
#### Q6: 本项目中所需模型如何下载至本地?
|
||||
|
||||
A6: 本项目中使用的模型均为 `huggingface.com` 中可下载的开源模型,以默认选择的 `chatglm-6b`和 `text2vec-large-chinese`模型为例,下载模型可执行如下代码:
|
||||
|
||||
```shell
|
||||
# 安装 git lfs
|
||||
$ git lfs install
|
||||
|
||||
# 下载 LLM 模型
|
||||
$ git clone https://huggingface.co/THUDM/chatglm-6b /your_path/chatglm-6b
|
||||
|
||||
# 下载 Embedding 模型
|
||||
$ git clone https://huggingface.co/GanymedeNil/text2vec-large-chinese /your_path/text2vec
|
||||
|
||||
# 模型需要更新时,可打开模型所在文件夹后拉取最新模型文件/代码
|
||||
$ git pull
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
#### Q7: `huggingface.com`中模型下载速度较慢怎么办?
|
||||
|
||||
A7: 可使用本项目用到的模型权重文件百度网盘地址:
|
||||
|
||||
- ernie-3.0-base-zh.zip 链接: https://pan.baidu.com/s/1CIvKnD3qzE-orFouA8qvNQ?pwd=4wih
|
||||
- ernie-3.0-nano-zh.zip 链接: https://pan.baidu.com/s/1Fh8fgzVdavf5P1omAJJ-Zw?pwd=q6s5
|
||||
- text2vec-large-chinese.zip 链接: https://pan.baidu.com/s/1sMyPzBIXdEzHygftEoyBuA?pwd=4xs7
|
||||
- chatglm-6b-int4-qe.zip 链接: https://pan.baidu.com/s/1DDKMOMHtNZccOOBGWIOYww?pwd=22ji
|
||||
- chatglm-6b-int4.zip 链接: https://pan.baidu.com/s/1pvZ6pMzovjhkA6uPcRLuJA?pwd=3gjd
|
||||
- chatglm-6b.zip 链接: https://pan.baidu.com/s/1B-MpsVVs1GHhteVBetaquw?pwd=djay
|
||||
|
||||
---
|
||||
|
||||
#### Q8: 老版本和新版本无法兼容怎么办?
|
||||
|
||||
A8: 保存老版本的配置文件,删除老版本代码并下载新版本代码后,根据新版本的配置文件格式进行修改。
|
||||
|
||||
在 ```0.2.6```后,运行环境和配置文件发生重大变化,建议重新配置环境和配置文件,并重建知识库。
|
||||
|
||||
|
||||
---
|
||||
|
||||
#### Q9: 显卡内存爆了,提示 "OutOfMemoryError: CUDA out of memory"
|
||||
|
||||
A9: `VECTOR_SEARCH_TOP_K` 和 `HISTORY_LEN` 的值调低,比如 `VECTOR_SEARCH_TOP_K = 3` 和 `LLM_HISTORY_LEN = 2`,这样由 `query` 和 `context` 拼接得到的 `prompt` 会变短,会减少内存的占用。或者使用量化模型减少显存占用。
|
||||
|
||||
---
|
||||
|
||||
#### Q10: 执行 `pip install -r requirements.txt` 过程中遇到 python 包,如 langchain 找不到对应版本的问题
|
||||
|
||||
A10: 更换 pypi 源后重新安装,如阿里源、清华源等,网络条件允许时建议直接使用 pypi.org 源,具体操作命令如下:
|
||||
|
||||
```shell
|
||||
# 使用 pypi 源
|
||||
$ pip install -r requirements.txt -i https://pypi.python.org/simple
|
||||
```
|
||||
|
||||
或
|
||||
|
||||
```shell
|
||||
# 使用阿里源
|
||||
$ pip install -r requirements.txt -i http://mirrors.aliyun.com/pypi/simple/
|
||||
```
|
||||
|
||||
或
|
||||
|
||||
```shell
|
||||
# 使用清华源
|
||||
$ pip install -r requirements.txt -i https://pypi.tuna.tsinghua.edu.cn/simple/
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
#### Q11: 启动 api.py 时 upload_file 接口抛出 `partially initialized module 'charset_normalizer' has no attribute 'md__mypyc' (most likely due to a circular import)`
|
||||
|
||||
A11: 这是由于 charset_normalizer 模块版本过高导致的,需要降低低 charset_normalizer 的版本,测试在 charset_normalizer==2.1.0 上可用。
|
||||
|
||||
---
|
||||
|
||||
#### Q12: 调用api中的 `bing_search_chat` 接口时,报出 `Failed to establish a new connection: [Errno 110] Connection timed out`
|
||||
|
||||
A12: 这是因为服务器加了防火墙,需要联系管理员加白名单,如果公司的服务器的话,就别想了GG--!
|
||||
|
||||
---
|
||||
|
||||
#### Q13: 加载 chatglm-6b-int8 或 chatglm-6b-int4 抛出 `RuntimeError: Only Tensors of floating point andcomplex dtype can require gradients`
|
||||
|
||||
A13: 疑为 chatglm 的 quantization 的问题或 torch 版本差异问题,针对已经变为 Parameter 的 torch.zeros 矩阵也执行 Parameter 操作,从而抛出 `RuntimeError: Only Tensors of floating point andcomplex dtype can require gradients`。解决办法是在 chatglm 项目的原始文件中的 quantization.py 文件 374 行改为:
|
||||
|
||||
```
|
||||
try:
|
||||
self.weight =Parameter(self.weight.to(kwargs["device"]), requires_grad=False)
|
||||
except Exception as e:
|
||||
pass
|
||||
```
|
||||
|
||||
如果上述方式不起作用,则在.cache/hugggingface/modules/目录下针对chatglm项目的原始文件中的quantization.py文件执行上述操作,若软链接不止一个,按照错误提示选择正确的路径。
|
||||
|
||||
注:虽然模型可以顺利加载但在cpu上仍存在推理失败的可能:即针对每个问题,模型一直输出gugugugu。
|
||||
|
||||
因此,最好不要试图用cpu加载量化模型,原因可能是目前python主流量化包的量化操作是在gpu上执行的,会天然地存在gap。
|
||||
|
||||
---
|
||||
|
||||
#### Q14: 修改配置中路径后,加载 text2vec-large-chinese 依然提示 `WARNING: No sentence-transformers model found with name text2vec-large-chinese. Creating a new one with MEAN pooling.`
|
||||
|
||||
A14: 尝试更换 embedding,如 text2vec-base-chinese,请在 [configs/model_config.py](../configs/model_config.py) 文件中,修改 `text2vec-base`参数为本地路径,绝对路径或者相对路径均可
|
||||
|
||||
---
|
||||
|
||||
#### Q16: 使用pg向量库建表报错
|
||||
|
||||
A15: 需要手动安装对应的vector扩展(连接pg执行 CREATE EXTENSION IF NOT EXISTS vector)
|
||||
|
||||
---
|
||||
|
||||
#### Q16: pymilvus 连接超时
|
||||
|
||||
A16.pymilvus版本需要匹配和milvus对应否则会超时参考pymilvus==2.1.3
|
||||
|
||||
---
|
||||
|
||||
#### Q17: 使用vllm推理加速框架时,已经下载了模型但出现HuggingFace通信问题
|
||||
|
||||
A17: 参照如下代码修改python环境下/site-packages/vllm/model_executor/weight_utils.py文件的prepare_hf_model_weights函数如下对应代码:
|
||||
|
||||
```python
|
||||
|
||||
if not is_local:
|
||||
# Use file lock to prevent multiple processes from
|
||||
# downloading the same model weights at the same time.
|
||||
model_path_temp = os.path.join(
|
||||
os.getenv("HOME"),
|
||||
".cache/huggingface/hub",
|
||||
"models--" + model_name_or_path.replace("/", "--"),
|
||||
"snapshots/",
|
||||
)
|
||||
downloaded = False
|
||||
if os.path.exists(model_path_temp):
|
||||
temp_last_dir = os.listdir(model_path_temp)[-1]
|
||||
model_path_temp = os.path.join(model_path_temp, temp_last_dir)
|
||||
base_pattern = os.path.join(model_path_temp, "pytorch_model*.bin")
|
||||
files = glob.glob(base_pattern)
|
||||
if len(files) > 0:
|
||||
downloaded = True
|
||||
|
||||
if downloaded:
|
||||
hf_folder = model_path_temp
|
||||
else:
|
||||
with get_lock(model_name_or_path, cache_dir):
|
||||
hf_folder = snapshot_download(model_name_or_path,
|
||||
allow_patterns=allow_patterns,
|
||||
cache_dir=cache_dir,
|
||||
tqdm_class=Disabledtqdm)
|
||||
else:
|
||||
hf_folder = model_name_or_path
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
#### Q18: `/xxx/base_model_worer.py` 报 `assert r.status_code == 200` 错误
|
||||
|
||||
A:这个错误是本地模型进程注册到 fastchat controller 失败了。一般有两种原因:1、开了系统全局代理,关闭即可。2、DEFAULT_BIND_HOST 设为'0.0.0.0',改成'127.0.0.1' 或 本机实际 IP 即可。或者更新到最新版本代码也可以解决。
|
||||
|
||||
|
||||
#### Q19: 使用vllm后端加速,无返回且不报错。
|
||||
|
||||
A: fschat=0.2.33的vllm_worker脚本代码有bug, 如需使用,需源码修改fastchat.server.vllm_worker,将103行中sampling_params = SamplingParams的参数stop=list(stop)修改为stop= [i for i in stop if i!=""]
|
||||
|
||||
|
||||
#### Q20: chatglm3-6b对话中出现"<|user|>"标签,且自问自答。
|
||||
|
||||
A20: chatglm3官方目前已经修复了chatglm3-6b的问题,若使用的模型为chatglm3-6b,仅需更新chatglm3-6b模型代码即可;请前往 Huggingface 下载最新的权重。
|
||||
并更新fschat版本到 0.2.34以上。
|
||||
|
||||
#### Q21: 为什么启动的时候一直出现
|
||||
```
|
||||
"device not in ['cuda', 'mps', 'cpu','xpu'], device = auto"
|
||||
```
|
||||
的警告
|
||||
|
||||
A21: 这是因为你没有在对应的启动选项设定设备,请在`model_config.py`中设定 DEVICE,不过,就算不设定,auto也能正常使用
|
||||
@ -0,0 +1,281 @@
|
||||
## 软件要求
|
||||
|
||||
要顺利运行本代码,请按照以下系统要求进行配置
|
||||
|
||||
**已经测试过的系统**
|
||||
|
||||
+ Linux Ubuntu 22.04.5 kernel version 6.7
|
||||
|
||||
其他系统可能出现系统兼容性问题。
|
||||
|
||||
**最低要求**
|
||||
|
||||
该要求仅针对标准模式,轻量模式使用在线模型,不需要安装torch等库,也不需要显卡即可运行。
|
||||
|
||||
+ Python 版本: >= 3.8(很不稳定), < 3.12
|
||||
+ CUDA 版本: >= 12.1
|
||||
|
||||
**推荐要求**
|
||||
|
||||
开发者在以下环境下进行代码调试,在该环境下能够避免最多环境问题。
|
||||
|
||||
+ Python 版本 == 3.11.7
|
||||
+ CUDA 版本: == 12.1
|
||||
|
||||
## 硬件要求
|
||||
|
||||
本框架使用 `fschat`驱动,统一使用 `huggingface`进行推理,其他推理方式(如 `llama-cpp`,`TensorRT加速引擎` 建议通过推理引擎以 API 形式接入我们的框架)。
|
||||
|
||||
同时, 我们没有对 `Int4` 模型进行适配,不保证`Int4`模型能够正常运行。因此,量化版本暂时需要由开发者自行适配, 我们可能在未来放。
|
||||
|
||||
如果想要顺利在GPU运行本地模型的 **FP16** 版本,你至少需要以下的硬件配置,来保证在我们框架下能够实现 **稳定连续对话**
|
||||
|
||||
+ ChatGLM3-6B & LLaMA-7B-Chat 等 7B模型
|
||||
+ 最低显存要求: 14GB
|
||||
+ 推荐显卡: RTX 4080
|
||||
+ Qwen-14B-Chat 等 14B模型
|
||||
+ 最低显存要求: 30GB
|
||||
+ 推荐显卡: V100
|
||||
+ Yi-34B-Chat 等 34B模型
|
||||
+ 最低显存要求: 69GB
|
||||
+ 推荐显卡: A100
|
||||
+ Qwen-72B-Chat 等 72B模型
|
||||
+ 最低显存要求: 145GB
|
||||
+ 推荐显卡:多卡 A100 以上
|
||||
|
||||
一种简单的估算方式为:
|
||||
```
|
||||
FP16: 显存占用(GB) = 模型量级 x 2
|
||||
Int4: 显存占用(GB) = 模型量级 x 0.75
|
||||
```
|
||||
以上数据仅为估算,实际情况以 **nvidia-smi** 占用为准。
|
||||
请注意,如果使用最低配置,仅能保证代码能够运行,但运行速度较慢,体验不佳。
|
||||
|
||||
同时,Embedding 模型将会占用 1-2G 的显存,历史记录最多会占用 数GB 的显存,因此,需要多冗余一些显存。
|
||||
|
||||
内存最低要求: 内存要求至少应该比模型运行的显存大。
|
||||
|
||||
例如,运行ChatGLM3-6B `FP16` 模型,显存占用13G,推荐使用16G以上内存。
|
||||
|
||||
### 部分测试用机配置参考,在以下机器下开发组成员已经进行原生模拟测试(创建新环境并根据要求下载后运行),确保能流畅运行全部功能的代码框架。
|
||||
+ 服务器
|
||||
```
|
||||
处理器: Intel® Xeon® Platinum 8558P Processor (260M Cache, 2.7 GHz)
|
||||
内存: 4 TB
|
||||
显卡组: NVIDIA H800 SXM5 80GB x 8
|
||||
硬盘: 6 PB
|
||||
操作系统: Ubuntu 22.04 LTS,Linux kernel 5.15.0-60-generic
|
||||
显卡驱动版本: 535.129.03
|
||||
Cuda版本: 12.1
|
||||
Python版本: 3.11.7
|
||||
网络IP地址:美国,洛杉矶
|
||||
```
|
||||
+ 个人PC
|
||||
```
|
||||
处理器: Intel® Core™ i9 processor 14900K
|
||||
内存: 256 GB DDR5
|
||||
显卡组: NVIDIA RTX4090 X 1 / NVIDIA RTXA6000 X 1
|
||||
硬盘: 1 TB
|
||||
操作系统: Ubuntu 22.04 LTS / Arch Linux, Linux Kernel 6.6.7
|
||||
显卡驱动版本: 545.29.06
|
||||
Cuda版本: 12.3 Update 1
|
||||
Python版本: 3.11.7
|
||||
网络IP地址:中国,上海
|
||||
```
|
||||
|
||||
## VPN
|
||||
|
||||
如果您位于中国(含港,澳,台) 需要调用 OpenAI 或者 其他境外模型的 API,需要使用 VPN 工具或访问镜像站。
|
||||
|
||||
从 Huggingface 下载模型或者从本仓库拉取最新的代码时,需要开发者自行设置代理。本项目不涉及任何代理工具设置和使用,也不解决任何关于代理的问题。
|
||||
|
||||
## Docker 部署
|
||||
|
||||
开发组为开发者们提供了一键部署的 docker 镜像文件懒人包。开发者们可以在 AutoDL 平台和 Docker 平台一键部署。
|
||||
|
||||
🌐 [AutoDL 镜像](https://www.codewithgpu.com/i/chatchat-space/Langchain-Chatchat/Langchain-Chatchat) ,已经更新到`V13`版本,对应`0.2.9`
|
||||
|
||||
🐳 [Docker 镜像](registry.cn-beijing.aliyuncs.com/chatchat/chatchat:0.2.7)
|
||||
|
||||
💻 一行命令运行 Docker 🌲:
|
||||
|
||||
```shell
|
||||
docker run -d --gpus all -p 80:8501 registry.cn-beijing.aliyuncs.com/chatchat/chatchat:0.2.7
|
||||
```
|
||||
|
||||
- 该版本镜像大小 `43.1GB`,使用 `v0.2.6`,以 `nvidia/cuda:12.1.1-cudnn8-devel-ubuntu22.04` 为基础镜像
|
||||
- 该版本为正常版本,非轻量化版本
|
||||
- 该版本内置两个 Embedding 模型:`m3e-large`,`text2vec-bge-large-chinese`,默认启用后者,内置 `chatglm2-6b-32k`
|
||||
- 该版本目标为方便一键部署使用,请确保您已经在 Linux 发行版上安装了 NVIDIA 驱动程序
|
||||
- 请注意,您不需要在主机系统上安装 CUDA 工具包,但需要安装 `NVIDIA Driver` 以及 `NVIDIA Container Toolkit`,请参考[安装指南](https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/latest/install-guide.html)
|
||||
- 首次拉取和启动均需要一定时间,首次启动时请参照下图使用 `docker logs -f <container id>` 查看日志
|
||||
- 如遇到启动过程卡在 `Waiting..` 步骤,建议使用 `docker exec -it <container id> bash` 进入 `/logs/` 目录查看对应阶段日志
|
||||
|
||||
## 常规模式本地部署方案
|
||||
|
||||
```shell
|
||||
# 首先,确信你的机器安装了 Python 3.8 - 3.10 版本
|
||||
$ python --version
|
||||
Python 3.8.13
|
||||
|
||||
# 如果低于这个版本,可使用conda安装环境
|
||||
$ conda create -p /your_path/env_name python=3.8
|
||||
|
||||
# 激活环境
|
||||
$ source activate /your_path/env_name
|
||||
|
||||
# 或,conda安装,不指定路径, 注意以下,都将/your_path/env_name替换为env_name
|
||||
$ conda create -n env_name python=3.8
|
||||
$ conda activate env_name # Activate the environment
|
||||
|
||||
# 更新py库
|
||||
$ pip3 install --upgrade pip
|
||||
|
||||
# 关闭环境
|
||||
$ source deactivate /your_path/env_name
|
||||
|
||||
# 删除环境
|
||||
$ conda env remove -p /your_path/env_name
|
||||
```
|
||||
接着,开始安装项目的依赖
|
||||
|
||||
```shell
|
||||
# 拉取仓库
|
||||
$ git clone --recursive https://github.com/chatchat-space/Langchain-Chatchat.git
|
||||
|
||||
# 进入目录
|
||||
$ cd Langchain-Chatchat
|
||||
|
||||
# 安装全部依赖
|
||||
$ pip install -r requirements.txt
|
||||
|
||||
# 默认依赖包括基本运行环境(FAISS向量库)。以下是可选依赖:
|
||||
- 如果要使用 milvus/pg_vector 等向量库,请将 requirements.txt 中相应依赖取消注释再安装。
|
||||
- 如果要开启 OCR GPU 加速,请安装 rapidocr_paddle[gpu]
|
||||
- 如果要使用在线 API 模型,请安装对用的 SDK
|
||||
|
||||
```
|
||||
|
||||
此外,为方便用户 API 与 webui 分离运行,可单独根据运行需求安装依赖包。
|
||||
|
||||
- 如果只需运行 API,可执行:
|
||||
```shell
|
||||
$ pip install -r requirements_api.txt
|
||||
|
||||
# 默认依赖包括基本运行环境(FAISS向量库)。如果要使用 milvus/pg_vector 等向量库,请将 requirements.txt 中相应依赖取消注释再安装。
|
||||
```
|
||||
|
||||
- 如果只需运行 WebUI,可执行:
|
||||
```shell
|
||||
$ pip install -r requirements_webui.txt
|
||||
```
|
||||
|
||||
注:使用 `langchain.document_loaders.UnstructuredFileLoader`进行 `.docx` 等格式非结构化文件接入时,可能需要依据文档进行其他依赖包的安装,请参考 [langchain 文档](https://python.langchain.com/en/latest/modules/indexes/document_loaders/examples/unstructured_file.html)。
|
||||
|
||||
|
||||
需要注意的是,对于以下依赖,我们建议源码安装依赖或者定期检查是否为最新版本,我们的框架可能会大量使用这些依赖的最新特性。
|
||||
+ transformers
|
||||
+ fastchat
|
||||
+ fastapi
|
||||
+ streamlit 以及其组件
|
||||
+ langchain 以及其组件
|
||||
+ xformers
|
||||
|
||||
## 模型下载
|
||||
|
||||
如需在本地或离线环境下运行本项目,需要首先将项目所需的模型下载至本地,通常开源 LLM 与 Embedding 模型可以从 [HuggingFace](https://huggingface.co/models) 下载。
|
||||
|
||||
以本项目中默认使用的 LLM 模型 [THUDM/chatglm2-6b](https://huggingface.co/THUDM/chatglm2-6b) 与 Embedding 模型 [moka-ai/m3e-base](https://huggingface.co/moka-ai/m3e-base) 为例:
|
||||
|
||||
下载模型需要先[安装Git LFS](https://docs.github.com/zh/repositories/working-with-files/managing-large-files/installing-git-large-file-storage),然后运行
|
||||
|
||||
```Shell
|
||||
$ git lfs install
|
||||
$ git clone https://huggingface.co/THUDM/chatglm2-6b
|
||||
$ git clone https://huggingface.co/moka-ai/m3e-base
|
||||
```
|
||||
|
||||
## 初始化知识库
|
||||
|
||||
当前项目的知识库信息存储在数据库中,在正式运行项目之前请先初始化数据库(我们强烈建议您在执行操作前备份您的知识文件)。
|
||||
- 如果您已经有创建过知识库,可以先执行以下命令创建或更新数据库表:
|
||||
```shell
|
||||
$ python init_database.py --create-tables
|
||||
```
|
||||
如果可以正常运行,则无需再重建知识库。
|
||||
|
||||
- 如果您是第一次运行本项目,知识库尚未建立,或者之前使用的是低于最新master分支版本的框架,或者配置文件中的知识库类型、嵌入模型发生变化,或者之前的向量库没有开启 `normalize_L2`,需要以下命令初始化或重建知识库:
|
||||
|
||||
```shell
|
||||
$ python init_database.py --recreate-vs
|
||||
```
|
||||
|
||||
## 一键启动
|
||||
启动前,确保已经按照[参数配置](https://github.com/chatchat-space/Langchain-Chatchat/wiki/%E5%8F%82%E6%95%B0%E9%85%8D%E7%BD%AE)正确配置各config模块。
|
||||
|
||||
一键启动脚本 startup.py, 一键启动所有 Fastchat 服务、API 服务、WebUI 服务,示例代码:
|
||||
|
||||
```shell
|
||||
$ python startup.py -a
|
||||
```
|
||||
|
||||
并可使用 `Ctrl + C` 直接关闭所有运行服务。如果一次结束不了,可以多按几次。
|
||||
|
||||
可选参数包括 `-a (或--all-webui)`, `--all-api`, `--llm-api`, `-c (或--controller)`, `--openai-api`,
|
||||
`-m (或--model-worker)`, `--api`, `--webui`,其中:
|
||||
|
||||
- `--all-webui` 为一键启动 WebUI 所有依赖服务;
|
||||
- `--all-api` 为一键启动 API 所有依赖服务;
|
||||
- `--llm-api` 为一键启动 Fastchat 所有依赖的 LLM 服务;
|
||||
- `--openai-api` 为仅启动 FastChat 的 controller 和 openai-api-server 服务;
|
||||
- 其他为单独服务启动选项。
|
||||
|
||||
若想指定非默认模型,需要用 `--model-name` 选项,示例:
|
||||
|
||||
```shell
|
||||
$ python startup.py --all-webui --model-name Qwen-7B-Chat
|
||||
```
|
||||
|
||||
更多信息可通过 `python startup.py -h` 查看。
|
||||
|
||||
## 多卡加载
|
||||
项目支持多卡加载,需在 startup.py 中的 create_model_worker_app 函数中,修改如下三个参数:
|
||||
|
||||
```python
|
||||
gpus=None,
|
||||
num_gpus= 1,
|
||||
max_gpu_memory="20GiB"
|
||||
```
|
||||
|
||||
其中,`gpus` 控制使用的显卡的ID,例如 "0,1";
|
||||
|
||||
`num_gpus` 控制使用的卡数;
|
||||
|
||||
`max_gpu_memory` 控制每个卡使用的显存容量。
|
||||
|
||||
注1:server_config.py的FSCHAT_MODEL_WORKERS字典中也增加了相关配置,如有需要也可通过修改FSCHAT_MODEL_WORKERS字典中对应参数实现多卡加载,且需注意server_config.py的配置会覆盖create_model_worker_app 函数的配置。
|
||||
|
||||
注2:少数情况下,gpus参数会不生效,此时需要通过设置环境变量CUDA_VISIBLE_DEVICES来指定torch可见的gpu,示例代码:
|
||||
|
||||
```shell
|
||||
CUDA_VISIBLE_DEVICES=0,1 python startup.py -a
|
||||
```
|
||||
|
||||
## 最轻模式本地部署方案
|
||||
|
||||
该模式的配置方式与常规模式相同,但无需安装 `torch` 等重依赖,通过在线API实现 LLM 和 Ebeddings 相关功能,适合没有显卡的电脑使用。
|
||||
|
||||
```shell
|
||||
$ pip install -r requirements_lite.txt
|
||||
$ python startup.py -a --lite
|
||||
```
|
||||
|
||||
该模式支持的在线 Embeddings 包括:
|
||||
- [智谱AI](http://open.bigmodel.cn)
|
||||
- [MiniMax](https://api.minimax.chat)
|
||||
- [百度千帆](https://cloud.baidu.com/product/wenxinworkshop?track=dingbutonglan)
|
||||
- [阿里云通义千问](https://dashscope.aliyun.com/)
|
||||
|
||||
在 model_config.py 中 将 LLM_MODELS 和 EMBEDDING_MODEL 设置为可用的在线 API 名称即可。
|
||||
|
||||
注意:在对话过程中并不要求 LLM 模型与 Embeddings 模型一致,你可以在知识库管理页面中使用 zhipu-api 作为嵌入模型,在知识库对话页面使用其它模型。
|
||||
@ -0,0 +1,176 @@
|
||||
## LLM 模型支持列表
|
||||
> 本地模型
|
||||
|
||||
本地 LLM 模型接入基于 [FastChat](https://github.com/lm-sys/FastChat) 实现,支持模型如下:
|
||||
|
||||
- [ChatGLM 全系类对话模型](https://huggingface.co/THUDM/)
|
||||
- [Orion 全系列对话模型](https://huggingface.co/OrionStarAI/),必须安装flash-attn 才能使用
|
||||
- [Qwen 全系列对话模型](https://huggingface.co/Qwen/)
|
||||
- [internlm 全系列对话模型](https://huggingface.co/internlm)
|
||||
- [Baichuan 全系列对话模型](https://huggingface.co/baichuan-inc),必须降级transformer才能使用
|
||||
- [llama 全系列对话模型](https://huggingface.co/meta-llama)
|
||||
- [Vicuna 全系列对话模型](https://huggingface.co/lmsys)
|
||||
- [mistral 全系列对话模型](https://huggingface.co/mistralai)
|
||||
|
||||
- [vivo-ai/BlueLM-7B-Chat](https://huggingface.co/vivo-ai/BlueLM-7B-Chat)
|
||||
- [01-ai/Yi-34B-Chat](https://huggingface.co/01-ai/Yi-34B-Chat)
|
||||
- [BlinkDL/RWKV-4-Raven](https://huggingface.co/BlinkDL/rwkv-4-raven)
|
||||
- [camel-ai/CAMEL-13B-Combined-Data](https://huggingface.co/camel-ai/CAMEL-13B-Combined-Data)
|
||||
- [databricks/dolly-v2-12b](https://huggingface.co/databricks/dolly-v2-12b)
|
||||
- [FreedomIntelligence/phoenix-inst-chat-7b](https://huggingface.co/FreedomIntelligence/phoenix-inst-chat-7b)
|
||||
- [h2oai/h2ogpt-gm-oasst1-en-2048-open-llama-7b](https://huggingface.co/h2oai/h2ogpt-gm-oasst1-en-2048-open-llama-7b)
|
||||
- [lcw99/polyglot-ko-12.8b-chang-instruct-chat](https://huggingface.co/lcw99/polyglot-ko-12.8b-chang-instruct-chat)
|
||||
- [lmsys/fastchat-t5-3b-v1.0](https://huggingface.co/lmsys/fastchat-t5)
|
||||
- [mosaicml/mpt-7b-chat](https://huggingface.co/mosaicml/mpt-7b-chat)
|
||||
- [Neutralzz/BiLLa-7B-SFT](https://huggingface.co/Neutralzz/BiLLa-7B-SFT)
|
||||
- [nomic-ai/gpt4all-13b-snoozy](https://huggingface.co/nomic-ai/gpt4all-13b-snoozy)
|
||||
- [NousResearch/Nous-Hermes-13b](https://huggingface.co/NousResearch/Nous-Hermes-13b)
|
||||
- [openaccess-ai-collective/manticore-13b-chat-pyg](https://huggingface.co/openaccess-ai-collective/manticore-13b-chat-pyg)
|
||||
- [OpenAssistant/oasst-sft-4-pythia-12b-epoch-3.5](https://huggingface.co/OpenAssistant/oasst-sft-4-pythia-12b-epoch-3.5)
|
||||
- [project-baize/baize-v2-7b](https://huggingface.co/project-baize/baize-v2-7b)
|
||||
- [Salesforce/codet5p-6b](https://huggingface.co/Salesforce/codet5p-6b)
|
||||
- [StabilityAI/stablelm-tuned-alpha-7b](https://huggingface.co/stabilityai/stablelm-tuned-alpha-7b)
|
||||
- [tiiuae/falcon-40b](https://huggingface.co/tiiuae/falcon-40b)
|
||||
- [timdettmers/guanaco-33b-merged](https://huggingface.co/timdettmers/guanaco-33b-merged)
|
||||
- [togethercomputer/RedPajama-INCITE-7B-Chat](https://huggingface.co/togethercomputer/RedPajama-INCITE-7B-Chat)
|
||||
- [WizardLM/WizardLM-13B-V1.0](https://huggingface.co/WizardLM/WizardLM-13B-V1.0)
|
||||
- [WizardLM/WizardCoder-15B-V1.0](https://huggingface.co/WizardLM/WizardCoder-15B-V1.0)
|
||||
- [HuggingFaceH4/starchat-beta](https://huggingface.co/HuggingFaceH4/starchat-beta)
|
||||
- [FlagAlpha/Llama2-Chinese-13b-Chat](https://huggingface.co/FlagAlpha/Llama2-Chinese-13b-Chat) and others
|
||||
- [BAAI/AquilaChat-7B](https://huggingface.co/BAAI/AquilaChat-7B)
|
||||
- [all models of OpenOrca](https://huggingface.co/Open-Orca)
|
||||
- [Spicyboros](https://huggingface.co/jondurbin/spicyboros-7b-2.2?not-for-all-audiences=true)
|
||||
- [airoboros 2.2](https://huggingface.co/jondurbin/airoboros-l2-13b-2.2)
|
||||
- [VMware's OpenLLaMa OpenInstruct](https://huggingface.co/VMware/open-llama-7b-open-instruct)
|
||||
- 任何 [EleutherAI](https://huggingface.co/EleutherAI) 的 pythia 模型,如 [pythia-6.9b](https://huggingface.co/EleutherAI/pythia-6.9b)
|
||||
- 在以上模型基础上训练的任何 [Peft](https://github.com/huggingface/peft) 适配器。为了激活,模型路径中必须有 `peft` 。注意:如果加载多个peft模型,你可以通过在任何模型工作器中设置环境变量 `PEFT_SHARE_BASE_WEIGHTS=true` 来使它们共享基础模型的权重。
|
||||
|
||||
以上模型支持列表可能随 [FastChat](https://github.com/lm-sys/FastChat) 更新而持续更新,可参考 [FastChat 已支持模型列表](https://github.com/lm-sys/FastChat/blob/main/docs/model_support.md)。
|
||||
|
||||
> 联网模型
|
||||
|
||||
支持的联网模型
|
||||
- [智谱AI](http://open.bigmodel.cn) **临时解决方案,不支持流式输出,尽情期待0.3.x**
|
||||
- [阿里云通义千问](https://dashscope.aliyun.com/)
|
||||
- [百川](https://www.baichuan-ai.com/)
|
||||
- [ChatGPT](https://api.openai.com)
|
||||
- [Gimini](https://makersuite.google.com/app/apikey)
|
||||
- [Azure OpenAI](https://azure.microsoft.com/en-us/products/ai-services/openai-service)
|
||||
- [MiniMax](https://api.minimax.chat)
|
||||
- [讯飞星火](https://xinghuo.xfyun.cn)
|
||||
- [百度千帆](https://cloud.baidu.com/product/wenxinworkshop?track=dingbutonglan)
|
||||
- [字节火山方舟](https://www.volcengine.com)
|
||||
|
||||
## Embedding 模型支持列表
|
||||
|
||||
> 本地模型
|
||||
|
||||
本项目支持调用 [HuggingFace](https://huggingface.co/models?pipeline_tag=sentence-similarity) 中的 Embedding 模型,已支持的 Embedding 模型如下:
|
||||
|
||||
|
||||
MokaAI系列嵌入模型
|
||||
|
||||
- [moka-ai/m3e-small](https://huggingface.co/moka-ai/m3e-small)
|
||||
- [moka-ai/m3e-base](https://huggingface.co/moka-ai/m3e-base)
|
||||
- [moka-ai/m3e-large](https://huggingface.co/moka-ai/m3e-large)
|
||||
|
||||
BAAI系列嵌入模型
|
||||
- [BAAI/bge-small-zh](https://huggingface.co/BAAI/bge-small-zh)
|
||||
- [BAAI/bge-base-zh](https://huggingface.co/BAAI/bge-base-zh)
|
||||
- [BAAI/bge-large-zh](https://huggingface.co/BAAI/bge-large-zh)
|
||||
- [BAAI/bge-small-zh-v1.5](https://huggingface.co/BAAI/bge-small-zh-v1.5)
|
||||
- [BAAI/bge-base-zh-v1.5](https://huggingface.co/BAAI/bge-base-zh-v1.5)
|
||||
- [BAAI/bge-large-zh-v1.5](https://huggingface.co/BAAI/bge-large-zh-v1.5)
|
||||
- [BAAI/bge-large-zh-noinstruct](https://huggingface.co/BAAI/bge-large-zh-noinstruct)
|
||||
- [BAAI/bge-reranker-large](https://huggingface.co/BAAI/bge-reranker-large)
|
||||
- [BAAI/bge-reranker-base](https://huggingface.co/BAAI/bge-reranker-base)
|
||||
|
||||
|
||||
text2vec系列嵌入模型
|
||||
- [shibing624/text2vec-base-chinese-sentence](https://huggingface.co/shibing624/text2vec-base-chinese-sentence)
|
||||
- [shibing624/text2vec-base-chinese-paraphrase](https://huggingface.co/shibing624/text2vec-base-chinese-paraphrase)
|
||||
- [shibing624/text2vec-base-multilingual](https://huggingface.co/shibing624/text2vec-base-multilingual)
|
||||
- [shibing624/text2vec-base-chinese](https://huggingface.co/shibing624/text2vec-base-chinese)
|
||||
- [shibing624/text2vec-bge-large-chinese](https://huggingface.co/shibing624/text2vec-bge-large-chinese)
|
||||
- [GanymedeNil/text2vec-large-chinese](https://huggingface.co/GanymedeNil/text2vec-large-chinese)
|
||||
|
||||
其他模型
|
||||
- [sensenova/piccolo-base-zh](https://huggingface.co/sensenova/piccolo-base-zh)
|
||||
- [sensenova/piccolo-large-zh](https://huggingface.co/sensenova/piccolo-large-zh)
|
||||
- [nghuyong/ernie-3.0-nano-zh](https://huggingface.co/nghuyong/ernie-3.0-nano-zh)
|
||||
- [nghuyong/ernie-3.0-base-zh](https://huggingface.co/nghuyong/ernie-3.0-base-zh)
|
||||
|
||||
达摩院系列嵌入模型
|
||||
- [damo/nlp_gte_sentence-embedding_chinese-large](https://modelscope.cn/models/damo/nlp_gte_sentence-embedding_chinese-large)
|
||||
|
||||
> 联网模型
|
||||
|
||||
除本地模型外,本项目也支持直接接入 OpenAI的在线嵌入模型。
|
||||
支持的联网模型
|
||||
- [OpenAI/text-embedding-ada-002](https://platform.openai.com/docs/guides/embeddings)
|
||||
- [智谱AI](http://open.bigmodel.cn)
|
||||
- [MiniMax](https://api.minimax.chat)
|
||||
- [百度千帆](https://cloud.baidu.com/product/wenxinworkshop?track=dingbutonglan)
|
||||
- [阿里云通义千问](https://dashscope.aliyun.com/)
|
||||
|
||||
## 分词器支持列表
|
||||
|
||||
> Langchain 中的分词器
|
||||
|
||||
本项目支持调用 [Langchain](https://api.python.langchain.com/en/latest/api_reference.html#module-langchain.text_splitter) 的 Text Splitter 分词器以及基于此改进的自定义分词器,已支持的 Text Splitter 类型如下:
|
||||
- CharacterTextSplitter
|
||||
- LatexTextSplitter
|
||||
- MarkdownHeaderTextSplitter
|
||||
- MarkdownTextSplitter
|
||||
- NLTKTextSplitter
|
||||
- PythonCodeTextSplitter
|
||||
- RecursiveCharacterTextSplitter
|
||||
- SentenceTransformersTokenTextSplitter
|
||||
- SpacyTextSplitter
|
||||
|
||||
> 自定义分词器
|
||||
|
||||
已经支持的定制分词器如下:
|
||||
|
||||
- [AliTextSplitter](text_splitter/ali_text_splitter.py)
|
||||
- [ChineseRecursiveTextSplitter](text_splitter/chinese_recursive_text_splitter.py)
|
||||
- [ChineseTextSplitter](text_splitter/chinese_text_splitter.py)
|
||||
|
||||
|
||||
## 向量数据库支持列表
|
||||
|
||||
> 本地向量数据库
|
||||
|
||||
目前支持的本地向量数据库列表如下:
|
||||
|
||||
- [FAISS](https://github.com/facebookresearch/faiss)
|
||||
- [Milvus](https://github.com/milvus-io/milvus)
|
||||
- [PGVector](https://github.com/pgvector/pgvector)
|
||||
|
||||
> 联网向量数据库
|
||||
|
||||
- [Zilliz](https://zilliz.com)
|
||||
|
||||
## 工具支持列表
|
||||
> Langchain工具
|
||||
|
||||
- Shell 工具,用于模拟当前的Linux Shell环境
|
||||
- Youtube 工具,用于搜索Youtube的相关视频链接
|
||||
- Wolfram 工具,用Wolfram来实现数学计算等
|
||||
|
||||
其他Langchain自带的工具也可以按照上述三个工具的方式来自己实现
|
||||
|
||||
> 本地工具
|
||||
|
||||
- 翻译工具,实现对输入的任意语言翻译。
|
||||
- 数学工具,使用LLMMathChain 实现数学计算。
|
||||
- 高级知识库工具,智能选择调用多个或者单个知识库并查询内容。
|
||||
- 进阶知识库工具,智能选择调用一个最相近的知识库并查询内容。
|
||||
- 基础知识库工具,选择指定的一个知识库并回答。
|
||||
|
||||
> 联网工具
|
||||
|
||||
- 天气工具,使用自定义的LLMWetherChain实现天气查询,调用和风天气API。
|
||||
- 搜索工具,使用我们的搜索API来实现搜索并概括内容。
|
||||
|
||||
- 我们期待开发者共享更多的工具,帮助项目生态完善
|
||||
@ -0,0 +1,889 @@
|
||||
## 推荐的模型组合
|
||||
|
||||
+ 在默认的配置文件中,我们提供了以下模型组合
|
||||
```
|
||||
LLM: Chatglm2-6b
|
||||
Embedding Models: m3e-base
|
||||
TextSplitter: ChineseRecursiveTextSplitter
|
||||
Kb_dataset: faiss
|
||||
```
|
||||
|
||||
+ 我们推荐开发者根据自己的业务需求进行模型微调,如果不需要微调且配置充足,可选择以下性能较好的配置
|
||||
```
|
||||
model_config.py
|
||||
LLM: Qwen-14B-Chat 或 Baichuan2-13B-Chat
|
||||
Embedding Models: piccolo-large-zh 或 bge-large-zh-v1.5
|
||||
HISTORY_LEN = 20
|
||||
TEMPERATURE = 0.1
|
||||
```
|
||||
使用该模型将需要更高的硬件要求
|
||||
```
|
||||
1张 RTX A6000 或者 A40 等 48GB 显存以上的显卡。推荐 1 x A100 以上。
|
||||
(使用多张显卡拼接也能运行,但是速度非常慢,2张4090拼接运行大概为一秒一个字的速度)
|
||||
|
||||
64GB 内存用于加载模型而不被Kill
|
||||
|
||||
服务器级的CPU,推荐 Xeon(R) Platinum 8358P 以上
|
||||
```
|
||||
|
||||
+ 如果开发者知识库较大,有大量文档,大文件,我们推荐开发者使用 ```pg``` 向量数据库
|
||||
+ 如果开发者的知识库具有一定的关键词特征,例如:
|
||||
+ 问答对文件(以Q + A 为一个组合的json文件)
|
||||
+ Markdown文件
|
||||
+ 并排的pdf文件
|
||||
+ 具有多个表格的pdf文件
|
||||
|
||||
我们推荐开发者自行开发分词器,以达到更好的效果。
|
||||
|
||||
+ 如果开发者想使用更全面的 Agent 功能,我们推荐开发者使用以下配置
|
||||
```
|
||||
LLM: Qwen-14B-Chat, AgentLM-70B 或 GPT-4
|
||||
Tools 的工具控制在10个之内
|
||||
```
|
||||
|
||||
## 微调模型加载实操
|
||||
|
||||
### 非p-tuning类PEFT加载
|
||||
本项目基于 FastChat 加载 LLM 服务,故需以 FastChat 加载 PEFT 路径,针对chatglm,falcon,codet5p以外的模型,以及非p-tuning以外的peft方法,需对peft文件进行修改,步骤如下:
|
||||
|
||||
1. 将config.json文件修改为adapter_config.json;
|
||||
2. 保证文件夹包含pytorch_model.bin文件;
|
||||
3. 修改文件夹名称,保证文件夹包含'peft'一词;
|
||||
4. 将peft文件夹移入项目目录下;
|
||||
5. 确保adapter_config.json文件夹中base_model_name_or_path指向基础模型;
|
||||
6. 将peft路径添加到model_config.py的llm_dict中,键为模型名,值为peft路径,注意使用相对路径,如"peft";
|
||||
7. 开启 `PEFT_SHARE_BASE_WEIGHTS=true`环境变量,再执行python startup.py -a
|
||||
|
||||
针对p-tuning和chatglm模型,需要对fastchat进行较大幅度的修改。
|
||||
|
||||
### p-tuning加载
|
||||
|
||||
P-tuning虽然是一种peft方法,但并不能于huggingface的peft python包兼容,而fastchat在多处以字符串匹配的方式进行硬编码加载模型,因此导致fastchat和chatchat不能兼容p-tuning,经langchain-chatchat开发组多次尝试,给出如下指南进行p-tuning加载。
|
||||
|
||||
#### 1. peft文件夹修改
|
||||
|
||||
1. 将config.json文件修改为adapter_config.json;
|
||||
2. 保证文件夹包含pytorch_model.bin文件;
|
||||
3. 修改文件夹名称,保证文件夹包含'peft'一词;
|
||||
4. 在adapter_config.json文件中增加如下字段:
|
||||
|
||||
```json
|
||||
"base_model_name_or_path": "/root/model/chatglm2-6b/"
|
||||
"task_type": "CAUSAL_LM",
|
||||
"peft_type": "PREFIX_TUNING",
|
||||
"inference_mode": true,
|
||||
"revision": "main",
|
||||
"num_virtual_tokens": 16
|
||||
```
|
||||
|
||||
**其中,"base_model_name_or_path"为基础模型的存在位置**;
|
||||
5. 将文件夹移入项目文件夹中,如Langchain-Chatchat项目文件夹目录下;
|
||||
|
||||
#### 2. fastchat包代码修改
|
||||
|
||||
##### 2.1 fastchat.model.model_adapter文件修改
|
||||
|
||||
1. 将fastchat.model.model_adapter.py文件的load_model函数修改为:
|
||||
|
||||
```python
|
||||
def load_model(
|
||||
model_path: str,
|
||||
device: str = "cuda",
|
||||
num_gpus: int = 1,
|
||||
max_gpu_memory: Optional[str] = None,
|
||||
dtype: Optional[torch.dtype] = None,
|
||||
load_8bit: bool = False,
|
||||
cpu_offloading: bool = False,
|
||||
gptq_config: Optional[GptqConfig] = None,
|
||||
awq_config: Optional[AWQConfig] = None,
|
||||
revision: str = "main",
|
||||
debug: bool = False,
|
||||
load_kwargs = {}
|
||||
):
|
||||
"""Load a model from Hugging Face."""
|
||||
# get model adapter
|
||||
adapter = get_model_adapter(model_path)
|
||||
kwargs = load_kwargs
|
||||
# Handle device mapping
|
||||
cpu_offloading = raise_warning_for_incompatible_cpu_offloading_configuration(
|
||||
device, load_8bit, cpu_offloading
|
||||
)
|
||||
if device == "cpu":
|
||||
kwargs["torch_dtype"]= torch.float32
|
||||
if CPU_ISA in ["avx512_bf16", "amx"]:
|
||||
try:
|
||||
import intel_extension_for_pytorch as ipex
|
||||
|
||||
kwargs ["torch_dtype"]= torch.bfloat16
|
||||
except ImportError:
|
||||
warnings.warn(
|
||||
"Intel Extension for PyTorch is not installed, it can be installed to accelerate cpu inference"
|
||||
)
|
||||
elif device == "cuda":
|
||||
kwargs["torch_dtype"] = torch.float16
|
||||
if num_gpus != 1:
|
||||
kwargs["device_map"] = "auto"
|
||||
if max_gpu_memory is None:
|
||||
kwargs[
|
||||
"device_map"
|
||||
] = "sequential" # This is important for not the same VRAM sizes
|
||||
available_gpu_memory = get_gpu_memory(num_gpus)
|
||||
kwargs["max_memory"] = {
|
||||
i: str(int(available_gpu_memory[i] * 0.85)) + "GiB"
|
||||
for i in range(num_gpus)
|
||||
}
|
||||
else:
|
||||
kwargs["max_memory"] = {i: max_gpu_memory for i in range(num_gpus)}
|
||||
elif device == "mps":
|
||||
kwargs["torch_dtype"] = torch.float16
|
||||
# Avoid bugs in mps backend by not using in-place operations.
|
||||
replace_llama_attn_with_non_inplace_operations()
|
||||
elif device == "xpu":
|
||||
kwargs["torch_dtype"] = torch.bfloat16
|
||||
# Try to load ipex, while it looks unused, it links into torch for xpu support
|
||||
try:
|
||||
import intel_extension_for_pytorch as ipex
|
||||
except ImportError:
|
||||
warnings.warn(
|
||||
"Intel Extension for PyTorch is not installed, but is required for xpu inference."
|
||||
)
|
||||
elif device == "npu":
|
||||
kwargs["torch_dtype"]= torch.float16
|
||||
# Try to load ipex, while it looks unused, it links into torch for xpu support
|
||||
try:
|
||||
import torch_npu
|
||||
except ImportError:
|
||||
warnings.warn("Ascend Extension for PyTorch is not installed.")
|
||||
else:
|
||||
raise ValueError(f"Invalid device: {device}")
|
||||
|
||||
if cpu_offloading:
|
||||
# raises an error on incompatible platforms
|
||||
from transformers import BitsAndBytesConfig
|
||||
|
||||
if "max_memory" in kwargs:
|
||||
kwargs["max_memory"]["cpu"] = (
|
||||
str(math.floor(psutil.virtual_memory().available / 2**20)) + "Mib"
|
||||
)
|
||||
kwargs["quantization_config"] = BitsAndBytesConfig(
|
||||
load_in_8bit_fp32_cpu_offload=cpu_offloading
|
||||
)
|
||||
kwargs["load_in_8bit"] = load_8bit
|
||||
elif load_8bit:
|
||||
if num_gpus != 1:
|
||||
warnings.warn(
|
||||
"8-bit quantization is not supported for multi-gpu inference."
|
||||
)
|
||||
else:
|
||||
model, tokenizer = adapter.load_compress_model(
|
||||
model_path=model_path,
|
||||
device=device,
|
||||
torch_dtype=kwargs["torch_dtype"],
|
||||
revision=revision,
|
||||
)
|
||||
if debug:
|
||||
print(model)
|
||||
return model, tokenizer
|
||||
elif awq_config and awq_config.wbits < 16:
|
||||
assert (
|
||||
awq_config.wbits == 4
|
||||
), "Currently we only support 4-bit inference for AWQ."
|
||||
model, tokenizer = load_awq_quantized(model_path, awq_config, device)
|
||||
if num_gpus != 1:
|
||||
device_map = accelerate.infer_auto_device_map(
|
||||
model,
|
||||
max_memory=kwargs["max_memory"],
|
||||
no_split_module_classes=[
|
||||
"OPTDecoderLayer",
|
||||
"LlamaDecoderLayer",
|
||||
"BloomBlock",
|
||||
"MPTBlock",
|
||||
"DecoderLayer",
|
||||
],
|
||||
)
|
||||
model = accelerate.dispatch_model(
|
||||
model, device_map=device_map, offload_buffers=True
|
||||
)
|
||||
else:
|
||||
model.to(device)
|
||||
return model, tokenizer
|
||||
elif gptq_config and gptq_config.wbits < 16:
|
||||
model, tokenizer = load_gptq_quantized(model_path, gptq_config)
|
||||
if num_gpus != 1:
|
||||
device_map = accelerate.infer_auto_device_map(
|
||||
model,
|
||||
max_memory=kwargs["max_memory"],
|
||||
no_split_module_classes=["LlamaDecoderLayer"],
|
||||
)
|
||||
model = accelerate.dispatch_model(
|
||||
model, device_map=device_map, offload_buffers=True
|
||||
)
|
||||
else:
|
||||
model.to(device)
|
||||
return model, tokenizer
|
||||
kwargs["revision"] = revision
|
||||
|
||||
if dtype is not None: # Overwrite dtype if it is provided in the arguments.
|
||||
kwargs["torch_dtype"] = dtype
|
||||
|
||||
# Load model
|
||||
model, tokenizer = adapter.load_model(model_path, kwargs)
|
||||
|
||||
if (
|
||||
device == "cpu"
|
||||
and kwargs["torch_dtype"] is torch.bfloat16
|
||||
and CPU_ISA is not None
|
||||
):
|
||||
model = ipex.optimize(model, dtype=kwargs["torch_dtype"])
|
||||
|
||||
if (device == "cuda" and num_gpus == 1 and not cpu_offloading) or device in (
|
||||
"mps",
|
||||
"xpu",
|
||||
"npu",
|
||||
):
|
||||
model.to(device)
|
||||
|
||||
if device == "xpu":
|
||||
model = torch.xpu.optimize(model, dtype=kwargs["torch_dtype"], inplace=True)
|
||||
|
||||
if debug:
|
||||
print(model)
|
||||
|
||||
return model, tokenizer
|
||||
```
|
||||
2. 将fastchat.model.model_adapter.py的函数修改为:
|
||||
|
||||
```python
|
||||
def get_generate_stream_function(model: torch.nn.Module, model_path: str):
|
||||
"""Get the generate_stream function for inference."""
|
||||
from fastchat.serve.inference import generate_stream
|
||||
|
||||
model_type = str(type(model)).lower()
|
||||
|
||||
is_chatglm = "chatglm" in model_type
|
||||
is_falcon = "rwforcausallm" in model_type
|
||||
is_codet5p = "codet5p" in model_type
|
||||
is_peft = "peft" in model_type
|
||||
|
||||
if is_chatglm:
|
||||
return generate_stream_chatglm
|
||||
elif is_falcon:
|
||||
return generate_stream_falcon
|
||||
elif is_codet5p:
|
||||
return generate_stream_codet5p
|
||||
elif peft_share_base_weights and is_peft:
|
||||
# Return a curried stream function that loads the right adapter
|
||||
# according to the model_name available in this context. This ensures
|
||||
# the right weights are available.
|
||||
@torch.inference_mode()
|
||||
def generate_stream_peft(
|
||||
model,
|
||||
tokenizer,
|
||||
params: Dict,
|
||||
device: str,
|
||||
context_len: int,
|
||||
stream_interval: int = 2,
|
||||
judge_sent_end: bool = False,
|
||||
):
|
||||
|
||||
model.set_adapter(model_path)
|
||||
if "chatglm" in str(type(model.base_model)).lower():
|
||||
model.disable_adapter()
|
||||
prefix_state_dict = torch.load(os.path.join(model_path, "pytorch_model.bin"))
|
||||
new_prefix_state_dict = {}
|
||||
|
||||
for k, v in prefix_state_dict.items():
|
||||
if k.startswith("transformer.prefix_encoder."):
|
||||
new_prefix_state_dict[k[len("transformer.prefix_encoder."):]] = v
|
||||
elif k.startswith("transformer.prompt_encoder."):
|
||||
new_prefix_state_dict[k[len("transformer.prompt_encoder."):]] = v
|
||||
model.transformer.prefix_encoder.load_state_dict(new_prefix_state_dict)
|
||||
for x in generate_stream_chatglm(
|
||||
model,
|
||||
tokenizer,
|
||||
params,
|
||||
device,
|
||||
context_len,
|
||||
stream_interval,
|
||||
judge_sent_end,
|
||||
):
|
||||
yield x
|
||||
elif "rwforcausallm" in str(type(model.base_model)).lower():
|
||||
|
||||
for x in generate_stream_falcon(
|
||||
model,
|
||||
tokenizer,
|
||||
params,
|
||||
device,
|
||||
context_len,
|
||||
stream_interval,
|
||||
judge_sent_end,
|
||||
):
|
||||
yield x
|
||||
elif "codet5p" in str(type(model.base_model)).lower():
|
||||
|
||||
for x in generate_stream_codet5p(
|
||||
model,
|
||||
tokenizer,
|
||||
params,
|
||||
device,
|
||||
context_len,
|
||||
stream_interval,
|
||||
judge_sent_end,
|
||||
):
|
||||
yield x
|
||||
else:
|
||||
|
||||
for x in generate_stream(
|
||||
model,
|
||||
tokenizer,
|
||||
params,
|
||||
device,
|
||||
context_len,
|
||||
stream_interval,
|
||||
judge_sent_end,
|
||||
):
|
||||
yield x
|
||||
|
||||
return generate_stream_peft
|
||||
else:
|
||||
return generate_stream
|
||||
```
|
||||
3. 将fastchat.model.model_adapter.py的PeftModelAdapter类的load_model方法修改为:
|
||||
|
||||
```python
|
||||
def load_model(self, model_path: str, from_pretrained_kwargs: dict):
|
||||
"""Loads the base model then the (peft) adapter weights"""
|
||||
from peft import PeftConfig, PeftModel
|
||||
|
||||
config = PeftConfig.from_pretrained(model_path)
|
||||
base_model_path = config.base_model_name_or_path
|
||||
if "peft" in base_model_path:
|
||||
raise ValueError(
|
||||
f"PeftModelAdapter cannot load a base model with 'peft' in the name: {config.base_model_name_or_path}"
|
||||
)
|
||||
|
||||
# Basic proof of concept for loading peft adapters that share the base
|
||||
# weights. This is pretty messy because Peft re-writes the underlying
|
||||
# base model and internally stores a map of adapter layers.
|
||||
# So, to make this work we:
|
||||
# 1. Cache the first peft model loaded for a given base models.
|
||||
# 2. Call `load_model` for any follow on Peft models.
|
||||
# 3. Make sure we load the adapters by the model_path. Why? This is
|
||||
# what's accessible during inference time.
|
||||
# 4. In get_generate_stream_function, make sure we load the right
|
||||
# adapter before doing inference. This *should* be safe when calls
|
||||
# are blocked the same semaphore.
|
||||
if peft_share_base_weights:
|
||||
if base_model_path in peft_model_cache:
|
||||
model, tokenizer = peft_model_cache[base_model_path]
|
||||
# Super important: make sure we use model_path as the
|
||||
# `adapter_name`.
|
||||
model.load_adapter(model_path, adapter_name=model_path)
|
||||
else:
|
||||
base_adapter = get_model_adapter(base_model_path)
|
||||
base_model, tokenizer = base_adapter.load_model(
|
||||
base_model_path, from_pretrained_kwargs
|
||||
)
|
||||
# Super important: make sure we use model_path as the
|
||||
# `adapter_name`.
|
||||
from peft import get_peft_model
|
||||
model = get_peft_model(base_model,config,adapter_name=model_path)
|
||||
peft_model_cache[base_model_path] = (model, tokenizer)
|
||||
return model, tokenizer
|
||||
|
||||
# In the normal case, load up the base model weights again.
|
||||
base_adapter = get_model_adapter(base_model_path)
|
||||
base_model, tokenizer = base_adapter.load_model(
|
||||
base_model_path, from_pretrained_kwargs
|
||||
)
|
||||
from peft import get_peft_model
|
||||
model = get_peft_model(base_model,config,adapter_name=model_path)
|
||||
return model, tokenizer
|
||||
|
||||
```
|
||||
4. 将fastchat.model.model_adapter.py的ChatglmAdapter类的load_model方法修改为:
|
||||
|
||||
```python
|
||||
def load_model(self, model_path: str, from_pretrained_kwargs: dict):
|
||||
revision = from_pretrained_kwargs.get("revision", "main")
|
||||
tokenizer = AutoTokenizer.from_pretrained(
|
||||
model_path, trust_remote_code=True, revision=revision
|
||||
)
|
||||
config = AutoConfig.from_pretrained(model_path, trust_remote_code=True,**from_pretrained_kwargs)
|
||||
model = AutoModel.from_pretrained(
|
||||
model_path, trust_remote_code=True, config=config
|
||||
)
|
||||
return model, tokenizer
|
||||
```
|
||||
|
||||
##### 2.2 fastchat.serve.model_worker文件修改
|
||||
|
||||
1. 将fastchat.serve.model_worker文件的ModelWorker的__init__方法修改如下:
|
||||
|
||||
```python
|
||||
class ModelWorker(BaseModelWorker):
|
||||
def __init__(
|
||||
self,
|
||||
controller_addr: str,
|
||||
worker_addr: str,
|
||||
worker_id: str,
|
||||
model_path: str,
|
||||
model_names: List[str],
|
||||
limit_worker_concurrency: int,
|
||||
no_register: bool,
|
||||
device: str,
|
||||
num_gpus: int,
|
||||
max_gpu_memory: str,
|
||||
dtype: Optional[torch.dtype] = None,
|
||||
load_8bit: bool = False,
|
||||
cpu_offloading: bool = False,
|
||||
gptq_config: Optional[GptqConfig] = None,
|
||||
awq_config: Optional[AWQConfig] = None,
|
||||
stream_interval: int = 2,
|
||||
conv_template: Optional[str] = None,
|
||||
embed_in_truncate: bool = False,
|
||||
seed: Optional[int] = None,
|
||||
load_kwargs = {}, #修改点
|
||||
**kwargs,
|
||||
):
|
||||
super().__init__(
|
||||
controller_addr,
|
||||
worker_addr,
|
||||
worker_id,
|
||||
model_path,
|
||||
model_names,
|
||||
limit_worker_concurrency,
|
||||
conv_template=conv_template,
|
||||
)
|
||||
|
||||
logger.info(f"Loading the model {self.model_names} on worker {worker_id} ...")
|
||||
self.model, self.tokenizer = load_model(
|
||||
model_path,
|
||||
device=device,
|
||||
num_gpus=num_gpus,
|
||||
max_gpu_memory=max_gpu_memory,
|
||||
dtype=dtype,
|
||||
load_8bit=load_8bit,
|
||||
cpu_offloading=cpu_offloading,
|
||||
gptq_config=gptq_config,
|
||||
awq_config=awq_config,
|
||||
load_kwargs=load_kwargs #修改点
|
||||
)
|
||||
self.device = device
|
||||
if self.tokenizer.pad_token == None:
|
||||
self.tokenizer.pad_token = self.tokenizer.eos_token
|
||||
self.context_len = get_context_length(self.model.config)
|
||||
print("**"*100)
|
||||
self.generate_stream_func = get_generate_stream_function(self.model, model_path)
|
||||
print(f"self.generate_stream_func{self.generate_stream_func}")
|
||||
print("*"*100)
|
||||
self.stream_interval = stream_interval
|
||||
self.embed_in_truncate = embed_in_truncate
|
||||
self.seed = seed
|
||||
|
||||
if not no_register:
|
||||
self.init_heart_beat()
|
||||
```
|
||||
2. 在fastchat.serve.model_worker文件的create_model_worker增加如下args参数:
|
||||
|
||||
```python
|
||||
parser.add_argument("--load_kwargs",type=dict,default={})
|
||||
```
|
||||
|
||||
并将如下语句:
|
||||
|
||||
```python
|
||||
worker = ModelWorker(
|
||||
args.controller_address,
|
||||
args.worker_address,
|
||||
worker_id,
|
||||
args.model_path,
|
||||
args.model_names,
|
||||
args.limit_worker_concurrency,
|
||||
no_register=args.no_register,
|
||||
device=args.device,
|
||||
num_gpus=args.num_gpus,
|
||||
max_gpu_memory=args.max_gpu_memory,
|
||||
dtype=str_to_torch_dtype(args.dtype),
|
||||
load_8bit=args.load_8bit,
|
||||
cpu_offloading=args.cpu_offloading,
|
||||
gptq_config=gptq_config,
|
||||
awq_config=awq_config,
|
||||
stream_interval=args.stream_interval,
|
||||
conv_template=args.conv_template,
|
||||
embed_in_truncate=args.embed_in_truncate,
|
||||
seed=args.seed,
|
||||
)
|
||||
```
|
||||
|
||||
修改为:
|
||||
|
||||
```python
|
||||
worker = ModelWorker(
|
||||
args.controller_address,
|
||||
args.worker_address,
|
||||
worker_id,
|
||||
args.model_path,
|
||||
args.model_names,
|
||||
args.limit_worker_concurrency,
|
||||
no_register=args.no_register,
|
||||
device=args.device,
|
||||
num_gpus=args.num_gpus,
|
||||
max_gpu_memory=args.max_gpu_memory,
|
||||
dtype=str_to_torch_dtype(args.dtype),
|
||||
load_8bit=args.load_8bit,
|
||||
cpu_offloading=args.cpu_offloading,
|
||||
gptq_config=gptq_config,
|
||||
awq_config=awq_config,
|
||||
stream_interval=args.stream_interval,
|
||||
conv_template=args.conv_template,
|
||||
embed_in_truncate=args.embed_in_truncate,
|
||||
seed=args.seed,
|
||||
load_kwargs=args.load_kwargs
|
||||
)
|
||||
```
|
||||
|
||||
至此,我们完成了fastchat加载ptuning的所有修改,在调用fastchat加载p-tuning时,可以通过加入 `PEFT_SHARE_BASE_WEIGHTS=true`,并以字典的形式添加--load_kwargs参数为训练ptuning时的pre_seq_len值即可,例如将2.2.2步骤中的 `parser.add_argument("--load_kwargs",type=dict,default={})`修改为:
|
||||
|
||||
`parser.add_argument("--load_kwargs",type=dict,default={"pre_seq_len":16})`
|
||||
|
||||
#### 3 langchain-chatchat代码修改:
|
||||
|
||||
1. 在configs/serve_config.py中的FSCHAT_MODEL_WORKERS字典中增加如下字段:
|
||||
|
||||
```
|
||||
"load_kwargs": {"pre_seq_len": 16} #值修改为adapter_config.json中的pre_seq_len值
|
||||
```
|
||||
2. 将startup.py中的create_model_worker_app修改为:
|
||||
|
||||
```python
|
||||
def create_model_worker_app(log_level: str = "INFO", **kwargs) -> FastAPI:
|
||||
"""
|
||||
kwargs包含的字段如下:
|
||||
host:
|
||||
port:
|
||||
model_names:[`model_name`]
|
||||
controller_address:
|
||||
worker_address:
|
||||
|
||||
|
||||
对于online_api:
|
||||
online_api:True
|
||||
worker_class: `provider`
|
||||
对于离线模型:
|
||||
model_path: `model_name_or_path`,huggingface的repo-id或本地路径
|
||||
device:`LLM_DEVICE`
|
||||
"""
|
||||
import fastchat.constants
|
||||
fastchat.constants.LOGDIR = LOG_PATH
|
||||
from fastchat.serve.model_worker import worker_id, logger
|
||||
import argparse
|
||||
logger.setLevel(log_level)
|
||||
|
||||
parser = argparse.ArgumentParser()
|
||||
args = parser.parse_args([])
|
||||
|
||||
for k, v in kwargs.items():
|
||||
setattr(args, k, v)
|
||||
|
||||
# 在线模型API
|
||||
if worker_class := kwargs.get("worker_class"):
|
||||
from fastchat.serve.model_worker import app
|
||||
worker = worker_class(model_names=args.model_names,
|
||||
controller_addr=args.controller_address,
|
||||
worker_addr=args.worker_address)
|
||||
sys.modules["fastchat.serve.model_worker"].worker = worker
|
||||
# 本地模型
|
||||
else:
|
||||
from configs.model_config import VLLM_MODEL_DICT
|
||||
if kwargs["model_names"][0] in VLLM_MODEL_DICT and args.infer_turbo == "vllm":
|
||||
import fastchat.serve.vllm_worker
|
||||
from fastchat.serve.vllm_worker import VLLMWorker,app
|
||||
from vllm import AsyncLLMEngine
|
||||
from vllm.engine.arg_utils import AsyncEngineArgs,EngineArgs
|
||||
args.tokenizer = args.model_path # 如果tokenizer与model_path不一致在此处添加
|
||||
args.tokenizer_mode = 'auto'
|
||||
args.trust_remote_code= True
|
||||
args.download_dir= None
|
||||
args.load_format = 'auto'
|
||||
args.dtype = 'auto'
|
||||
args.seed = 0
|
||||
args.worker_use_ray = False
|
||||
args.pipeline_parallel_size = 1
|
||||
args.tensor_parallel_size = 1
|
||||
args.block_size = 16
|
||||
args.swap_space = 4 # GiB
|
||||
args.gpu_memory_utilization = 0.90
|
||||
args.max_num_batched_tokens = 2560
|
||||
args.max_num_seqs = 256
|
||||
args.disable_log_stats = False
|
||||
args.conv_template = None
|
||||
args.limit_worker_concurrency = 5
|
||||
args.no_register = False
|
||||
args.num_gpus = 1 # vllm worker的切分是tensor并行,这里填写显卡的数量
|
||||
args.engine_use_ray = False
|
||||
args.disable_log_requests = False
|
||||
if args.model_path:
|
||||
args.model = args.model_path
|
||||
if args.num_gpus > 1:
|
||||
args.tensor_parallel_size = args.num_gpus
|
||||
|
||||
for k, v in kwargs.items():
|
||||
setattr(args, k, v)
|
||||
|
||||
engine_args = AsyncEngineArgs.from_cli_args(args)
|
||||
engine = AsyncLLMEngine.from_engine_args(engine_args)
|
||||
|
||||
worker = VLLMWorker(
|
||||
controller_addr = args.controller_address,
|
||||
worker_addr = args.worker_address,
|
||||
worker_id = worker_id,
|
||||
model_path = args.model_path,
|
||||
model_names = args.model_names,
|
||||
limit_worker_concurrency = args.limit_worker_concurrency,
|
||||
no_register = args.no_register,
|
||||
llm_engine = engine,
|
||||
conv_template = args.conv_template,
|
||||
)
|
||||
sys.modules["fastchat.serve.vllm_worker"].engine = engine
|
||||
sys.modules["fastchat.serve.vllm_worker"].worker = worker
|
||||
|
||||
else:
|
||||
from fastchat.serve.model_worker import app, GptqConfig, AWQConfig, ModelWorker
|
||||
args.gpus = "0" # GPU的编号,如果有多个GPU,可以设置为"0,1,2,3"
|
||||
args.max_gpu_memory = "20GiB"
|
||||
args.num_gpus = 1 # model worker的切分是model并行,这里填写显卡的数量
|
||||
|
||||
args.load_8bit = False
|
||||
args.cpu_offloading = None
|
||||
args.gptq_ckpt = None
|
||||
args.gptq_wbits = 16
|
||||
args.gptq_groupsize = -1
|
||||
args.gptq_act_order = False
|
||||
args.awq_ckpt = None
|
||||
args.awq_wbits = 16
|
||||
args.awq_groupsize = -1
|
||||
args.model_names = []
|
||||
args.conv_template = None
|
||||
args.limit_worker_concurrency = 5
|
||||
args.stream_interval = 2
|
||||
args.no_register = False
|
||||
args.embed_in_truncate = False
|
||||
args.load_kwargs = {"pre_seq_len": 16} # 改*************************
|
||||
for k, v in kwargs.items():
|
||||
setattr(args, k, v)
|
||||
if args.gpus:
|
||||
if args.num_gpus is None:
|
||||
args.num_gpus = len(args.gpus.split(','))
|
||||
if len(args.gpus.split(",")) < args.num_gpus:
|
||||
raise ValueError(
|
||||
f"Larger --num-gpus ({args.num_gpus}) than --gpus {args.gpus}!"
|
||||
)
|
||||
os.environ["CUDA_VISIBLE_DEVICES"] = args.gpus
|
||||
gptq_config = GptqConfig(
|
||||
ckpt=args.gptq_ckpt or args.model_path,
|
||||
wbits=args.gptq_wbits,
|
||||
groupsize=args.gptq_groupsize,
|
||||
act_order=args.gptq_act_order,
|
||||
)
|
||||
awq_config = AWQConfig(
|
||||
ckpt=args.awq_ckpt or args.model_path,
|
||||
wbits=args.awq_wbits,
|
||||
groupsize=args.awq_groupsize,
|
||||
)
|
||||
|
||||
worker = ModelWorker(
|
||||
controller_addr=args.controller_address,
|
||||
worker_addr=args.worker_address,
|
||||
worker_id=worker_id,
|
||||
model_path=args.model_path,
|
||||
model_names=args.model_names,
|
||||
limit_worker_concurrency=args.limit_worker_concurrency,
|
||||
no_register=args.no_register,
|
||||
device=args.device,
|
||||
num_gpus=args.num_gpus,
|
||||
max_gpu_memory=args.max_gpu_memory,
|
||||
load_8bit=args.load_8bit,
|
||||
cpu_offloading=args.cpu_offloading,
|
||||
gptq_config=gptq_config,
|
||||
awq_config=awq_config,
|
||||
stream_interval=args.stream_interval,
|
||||
conv_template=args.conv_template,
|
||||
embed_in_truncate=args.embed_in_truncate,
|
||||
load_kwargs=args.load_kwargs #改*************************
|
||||
)
|
||||
sys.modules["fastchat.serve.model_worker"].args = args
|
||||
sys.modules["fastchat.serve.model_worker"].gptq_config = gptq_config
|
||||
|
||||
sys.modules["fastchat.serve.model_worker"].worker = worker
|
||||
|
||||
MakeFastAPIOffline(app)
|
||||
app.title = f"FastChat LLM Server ({args.model_names[0]})"
|
||||
app._worker = worker
|
||||
return app
|
||||
```
|
||||
|
||||
至此,我们完成了langchain-chatchat加载p-tuning的全部操作,将ptuing的路径添加到model_config的llm_dict,如
|
||||
```
|
||||
chatglm2-6b: 'p-tuning-peft'
|
||||
```
|
||||
|
||||
即可以如下方式加载p-tuning:
|
||||
|
||||
```shell
|
||||
PEFT_SHARE_BASE_WEIGHTS=true python startup.py -a
|
||||
|
||||
```
|
||||
|
||||
|
||||
## 预处理知识库文件
|
||||
|
||||
在载入知识库文件的时候,直接上传文档虽然能实现基础的问答,但是,其效果并不能发挥到最佳水平。因此,我们建议开发者对知识库文件做出以下的预处理。
|
||||
以下方式的预处理如果执行了,有概率提升模型的召回率。
|
||||
|
||||
### 1. 使用``` TXT / Markdown ``` 等格式化文件,并按照要点排版
|
||||
例如,以下段落应该被处理成如下内容后在嵌入知识库,会有更好的效果。
|
||||
```
|
||||
原文: PDF类型
|
||||
查特查特团队荣获AGI Playground Hackathon黑客松“生产力工具的新想象”赛道季军
|
||||
2023年10月16日, Founder Park在近日结束的AGI Playground Hackathon黑客松比赛中,查特查特团队展现出色的实力,荣获了“生产力工具的新想象”赛道季军。本次比赛由Founder Park主办,并由智谱、Dify、Zilliz、声网、AWS云服务等企业协办。
|
||||
比赛吸引了120多支参赛团队,最终有36支队伍进入决赛,其中34支队伍成功完成了路演。比赛规定,所有参赛选手必须在短短的48小时内完成一个应用产品开发,同时要求使用智谱大模型及Zilliz向量数据库进行开发。
|
||||
查特查特团队的现场参赛人员由两名项目成员组成:
|
||||
来自A大学的小明负责了Agent旅游助手的开发、场地协调以及团队住宿和行程的安排;在保证团队完赛上做出了主要贡献。作为队长,栋宇坚持自信,创新,沉着的精神,不断提出改进方案并抓紧落实,遇到相关问题积极请教老师,提高了团队开发效率。
|
||||
作为核心开发者的B公司小蓝,他则主管Agent智能知识库查询开发、Agent底层框架设计、相关API调整和UI调整。在最后,他代表团队在规定的时间内呈现了产品的特点和优势,并完美的展示了产品demo。为团队最终产品能够得到奖项做出了重要贡献。
|
||||
```
|
||||
修改后的Markdown文件,具有更高的召回率
|
||||
```
|
||||
# 查特查特团队荣获AGI Playground Hackathon黑客松“生产力工具的新想象”赛道季军。
|
||||
|
||||
## 报道简介
|
||||
2023年10月16日, Founder Park在近日结束的AGI Playground Hackathon黑客松比赛中,查特查特团队展现出色的实力,荣获了“生产力工具的新想象”赛道季军。本次比赛由Founder Park主办,并由智谱、Dify、Zilliz、声网、AWS云服务等企业协办。
|
||||
|
||||
## 比赛介绍
|
||||
|
||||
比赛吸引了120多支参赛团队,最终有36支队伍进入决赛,其中34支队伍成功完成了路演。比赛规定,所有参赛选手必须在短短的48小时内完成一个应用产品开发,同时要求使用智谱大模型及Zilliz向量数据库进行开发。
|
||||
|
||||
## 获奖队员简介
|
||||
|
||||
+ 小明,A大学
|
||||
+ 负责Agent旅游助手的开发、场地协调以及团队住宿和行程的安排
|
||||
+ 在保证团队完赛上做出了主要贡献。作为队长,栋宇坚持自信,创新,沉着的精神,不断提出改进方案并抓紧落实,遇到相关问题积极请教老师,提高了团队开发效率。
|
||||
|
||||
+ 小蓝,B公司
|
||||
+ 主管Agent智能知识库查询开发、Agent底层框架设计、相关API调整和UI调整。
|
||||
+ 代表团队在规定的时间内呈现了产品的特点和优势,并完美的展示了产品demo。
|
||||
```
|
||||
|
||||
### 2. 减少文件中冲突的内容,分门别类存放数据
|
||||
|
||||
就像人类寻找相关点一样,如果在多份文件中存在相似的内容,可能会导致模型无法准确的搜索到相关内容。
|
||||
因此,需要减少文件中相似的内容,或将其分在不同的知识库中。
|
||||
例如,以下两个句子中,如果搜索外籍教师,则具有歧义,非常容易搜索到错误答案。
|
||||
|
||||
```
|
||||
文件一:
|
||||
在大数据专业中,我们已经拥有超过1/3的外籍博士和教师。
|
||||
|
||||
文件二:
|
||||
|
||||
本专业具有40%的外籍教师比例,
|
||||
本专业有博士生10人,研究生12人。
|
||||
```
|
||||
|
||||
### 3. 减少具有歧义的句子
|
||||
知识库中应该减少具有歧义的句子和段落,或者汉语的高级用法,例如
|
||||
```
|
||||
1. 他说他会杀了那个人。
|
||||
2. 你说啥子?
|
||||
3. 我喜欢你的头发。
|
||||
4. 地板真的滑,我差点没摔倒。
|
||||
```
|
||||
在相似度模型对比的时候,仅仅能搜索句子的表面意思,因此,使用有歧义的句子和段落可能导致搜索错误。
|
||||
|
||||
### 4. 减少单个文件的大小,减少文件中的特殊符号
|
||||
+ 上传知识库的单个文件不建议超过5MB,以免出现向量化中断卡死等情况。同时,上传大文件不要使用faiss数据库。
|
||||
+ 减少上传文件中的中文符号,特殊符号,无意义空格等。
|
||||
|
||||
## 自定义的关键词调整Embedding模型
|
||||
|
||||
1.首先准备一个关键字的文本文件,每一行是一个关键字。例如:
|
||||
```
|
||||
文件key_words.txt:
|
||||
iphone13pro
|
||||
中石油
|
||||
```
|
||||
2. 配置kb_config.py
|
||||
```
|
||||
EMBEDDING_KEYWORD_FILE = "embedding_keywords.txt"
|
||||
```
|
||||
3. 运行```embeddings/add_embedding_keywords.py```
|
||||
```
|
||||
输入的文本(这里只是一个没分隔的一串字符):iphone13pro
|
||||
生成的token id序列:[101, 21128, 102]
|
||||
token到token id的映射:
|
||||
[CLS]->101
|
||||
iphone13pro->21128
|
||||
[SEP]->102
|
||||
|
||||
输入的文本:中石油
|
||||
生成的token id序列:[101, 21129, 102]
|
||||
token到token id的映射:
|
||||
[CLS]->101
|
||||
中石油->21129
|
||||
[SEP]->102
|
||||
```
|
||||
这样,你就获得了一个新的带有关键词调整的Embedding模型
|
||||
## 实际使用效果
|
||||
在这里,我们放置了一些成功调用的效果图,方便开发者进行查看自己是否成功运行了框架。
|
||||
|
||||
### 检查是否成功上传/管理自己的知识库
|
||||
|
||||
在WebUI界面上传知识库,则必须保证知识库进行向量化,成功之后,文件会被切分并在向量位置打钩。
|
||||
下图展示了成功上传知识库的画面
|
||||
|
||||

|
||||
|
||||
请确保所有知识库都已经进行了向量化。
|
||||
|
||||
### 检查是否成功开启LLM对话
|
||||
|
||||
若打开webui后,在该模式下能成功跟大模型对话即成功调用。
|
||||
|
||||
下图为成功调用LLM的效果图:
|
||||
|
||||

|
||||
|
||||
### 检查是否成功调用知识库/搜索
|
||||
若成功调用知识库,则你应该能看到,在大模型回答的下方有一个```知识库匹配结果```的展开框,并且内部显示了相关的匹配结果。
|
||||
如果没有搜索到相关内容,则会提示```根据已知信息无法回答问题```,并且下拉框中没有任何内容。
|
||||
|
||||
下图为成功调用知识库效果图:
|
||||
|
||||

|
||||
|
||||
在这个案例中,第一次用户的提问无法在知识库中寻找到合适的答案,因此,大模型回答了```根据已知信息无法回答问题```。
|
||||
|
||||
第二次用户的提问能在知识库中寻找到合适的答案,因此,大模型给出了一个正确的回答。
|
||||
|
||||
__注意__: 知识库的搜索情况取决于嵌入模型的准度,分词器的设置,知识库的排版和大模型的数量,提示词设定等多个因素。因此,需要开发者进行深度的优化和调试。
|
||||
|
||||
### 检查是否成功调用Agent工具
|
||||
|
||||
若成功调用Agent工具,则你应该看到大模型完整的思维过程,这会在```思考过程```下拉框中显示出来。如果成功调用Agent工具,则你应该看到Markdown引用效果的工具使用情况。
|
||||
在Agent对话模式中,```思考过程```中显示的是大模型的思考过程,而下拉框之前的内容为大模型的```Final Answer```,缺乏中间的运算过程。
|
||||
|
||||
下图展现了一个成功调用Agent工具的效果图:
|
||||
|
||||

|
||||
|
||||
本框架支持模型连续掉用多个Agent工具,下图展示了一个一个提问中大模型连续调用多个Agent工具的效果图:
|
||||
|
||||

|
||||
|
||||
在这个案例中,```3900```是大模型的最终答案,其余都是思考过程。
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
@ -0,0 +1,159 @@
|
||||
## 使用自定义的分词器
|
||||
1. 在```text_splitter```文件夹下新建一个文件,文件名为您的分词器名字,比如`my_splitter.py`,然后在`__init__.py`中导入您的分词器,如下所示:
|
||||
```python
|
||||
from .my_splitter import MySplitter
|
||||
```
|
||||
|
||||
2. 修改```config/model_config.py```文件,将您的分词器名字添加到```text_splitter_dict```中,如下所示:
|
||||
```python
|
||||
MySplitter: {
|
||||
"source": "huggingface", # 选择tiktoken则使用openai的方法
|
||||
"tokenizer_name_or_path": "your tokenizer", #如果选择huggingface则使用huggingface的方法,部分tokenizer需要从Huggingface下载
|
||||
}
|
||||
TEXT_SPLITTER = "MySplitter"
|
||||
```
|
||||
|
||||
完成上述步骤后,就能使用自己的分词器了。
|
||||
|
||||
## 使用自定义的 Agent 工具
|
||||
|
||||
1. 创建自己的Agent工具
|
||||
|
||||
+ 开发者在```server/agent```文件中创建一个自己的文件,并将其添加到```tools_select.py```中。这样就完成了Tools的设定。
|
||||
|
||||
+ 当您创建了一个```custom_agent.py```文件,其中包含一个```work```函数,那么您需要在```tools_select.py```中添加如下代码:
|
||||
```python
|
||||
from custom_agent import work
|
||||
Tool.from_function(
|
||||
func=work,
|
||||
name="该函数的名字",
|
||||
description=""
|
||||
)
|
||||
```
|
||||
+ 请注意,如果你确定在某一个工程中不会使用到某个工具,可以将其从Tools中移除,降低模型分类错误导致使用错误工具的风险。
|
||||
|
||||
2. 修改 ```custom_template.py``` 文件
|
||||
|
||||
开发者需要根据自己选择的大模型设定适合该模型的Agent Prompt和自自定义返回格式。
|
||||
````
|
||||
"""
|
||||
Answer the following questions as best you can. You have access to the following tools:
|
||||
{tools}
|
||||
Use the following format:
|
||||
Question: the input question you must answer
|
||||
Thought: you should always think about what to do
|
||||
Action: the action to take, should be one of [{tool_names}]
|
||||
Action Input: the input to the action
|
||||
Observation: the result of the action
|
||||
... (this Thought/Action/Action Input/Observation can be repeated zero or more times)
|
||||
Thought: I now know the final answer
|
||||
Final Answer: the final answer to the original input question
|
||||
Begin!
|
||||
history:
|
||||
{history}
|
||||
Question: {input}
|
||||
Thought: {agent_scratchpad}
|
||||
"""
|
||||
````
|
||||
除了使用 `Zero React` 的提示词方案,开发者可以自行对提示词进行修改,或者使用 Langchain 提供的其他的Agent结构。例如,如果您使用的模型为`ChatGLM3-6B`模型,我们提供了一个可以正常运行`ChatGLM3-6B`的Agent提示词,该提示词与 Langchain 的 `struct Agent`相似,其内容如下:
|
||||
````
|
||||
|
||||
"ChatGLM3":
|
||||
"""
|
||||
You can answer using the tools, or answer directly using your knowledge without using the tools.Respond to the human as helpfully and accurately as possible.
|
||||
You have access to the following tools:
|
||||
{tools}
|
||||
Use a json blob to specify a tool by providing an action key (tool name) and an action_input key (tool input).
|
||||
Valid "action" values: "Final Answer" or [{tool_names}]
|
||||
Provide only ONE action per $JSON_BLOB, as shown:
|
||||
|
||||
```
|
||||
{{{{
|
||||
"action": $TOOL_NAME,
|
||||
"action_input": $INPUT
|
||||
}}}}
|
||||
```
|
||||
Follow this format:
|
||||
|
||||
Question: input question to answer
|
||||
Thought: consider previous and subsequent steps
|
||||
Action:
|
||||
```
|
||||
$JSON_BLOB
|
||||
```
|
||||
Observation: action result
|
||||
... (repeat Thought/Action/Observation N times)
|
||||
Thought: I know what to respond
|
||||
Action:
|
||||
```
|
||||
{{{{
|
||||
"action": "Final Answer",
|
||||
"action_input": "Final response to human"
|
||||
}}}}
|
||||
Begin! Reminder to ALWAYS respond with a valid json blob of a single action. Use tools if necessary. Respond directly if appropriate. Format is Action:```$JSON_BLOB```then Observation:.
|
||||
|
||||
history: {history}
|
||||
|
||||
Question: {input}
|
||||
|
||||
Thought: {agent_scratchpad}
|
||||
""",
|
||||
````
|
||||
|
||||
3. 让不支持 Langchain 调用方式的但具备 Agent 能力的模型展现能力
|
||||
|
||||
以**ChatGLM3-6B**为代表的模型,虽然具有 Function Call 能力,但其对齐格式与 Langchain 提供默认Agent格式并不符合,因此无法使用 Langchain 自身能力实现 Function Call。在我们的框架中,您可以在 ```server/Agent/custom_agent/``` 文件夹中自行复现更多模型的 Agent 能力实现。
|
||||
|
||||
在完成上述步骤之后,您还需要到```server/chat/agent_chat/```中导入您的模块来实现特殊判定。
|
||||
|
||||
同时,你应该在调用工具的时候使用自定义的模板,我们以`GLM`系列模型进行演示,如果您在使用`GLM`模型进行工具调用,你应该使用`model_config.py`中的`ChatGLM3`模板。
|
||||
|
||||
4. 局限性
|
||||
|
||||
- 由于 React Agent 的脆弱性,temperature 参数的设置对于模型的效果有很大的影响。我们建议开发者在使用自定义 Agent 时,对于不同的模型,将其设置成0.1以下,以达到更好的效果。
|
||||
- 目前,官方仅对 **ChatGLM3-6B** 一种模型进行了 非 Langchain 对齐格式下的能力激活,我们欢迎开发者自行探索其他模型,并提交对应的 PR,让框架支持更多的 Agent 模型。
|
||||
- 在`0.2.x`版本中,我们没有对`Plan`进行优化,因此,连续调用工具的能力较差,我们会在`0.3.x`中优化这一问题。此外,经过测试,本地模型在工具调用上的表现不如在线模型,我们更推荐使用 `gpt4-1106-Preview` 来完成工具调用的任务。
|
||||
|
||||
## 使用自定义的微调模型
|
||||
|
||||
- 本项目基于 FastChat 加载 LLM 服务,故需以 FastChat 加载 PEFT 路径。
|
||||
- 开发者需要保证路径名称里必须有 peft 这个词。
|
||||
- 配置文件的名字为 ```adapter_config.json```
|
||||
- peft 路径下包含.bin 格式的 PEFT 权重, peft路径在startup.py中 ```create_model_worker_app``` 函数的 ```args.model_names``` 中指定
|
||||
```python
|
||||
args.model_names = ["/home/ubuntu/your_peft_folder/peft"]
|
||||
|
||||
```
|
||||
- 执行代码之,应该设定环境变量
|
||||
```
|
||||
PEFT_SHARE_BASE_WEIGHTS=true
|
||||
```
|
||||
|
||||
注:如果上述方式启动失败,则需要以标准的 FastChat 服务启动方式分步启动,PEFT加载详细步骤参考以下ISSUE
|
||||
|
||||
[加载lora微调后模型失效](https://github.com/chatchat-space/Langchain-Chatchat/issues/1130#issuecomment-1685291822)
|
||||
|
||||
在```最佳实践```章节中,我们为开发者做了更详细的模型载入文档。
|
||||
|
||||
__该功能可能还具有一定的Bug,需要开发者仔细适配。__
|
||||
|
||||
|
||||
## 使用自定义的嵌入模型
|
||||
|
||||
- 使用自定义的嵌入模型,开发者需要将其合并到原始的嵌入模型中,之后仅需将其路径添加到```config/model_config.py```中并选择自己的模型启动即可。
|
||||
- 如果想自己在Embedding模型中支持 自定义的关键字,需要在 ```embeddings/embedding_keywords.txt```中设定好自己的关键字
|
||||
- 运行 ```embeddings/add_embedding_keywords.py```
|
||||
- 将生成的新Embedding模型地址放入```configs/model_config.py```中并选择,
|
||||
```
|
||||
"custom-embedding": "your path",
|
||||
```
|
||||
并设置
|
||||
```
|
||||
EMBEDDING_MODEL = "custom-embedding"
|
||||
```
|
||||
即可调用加入关键字的embedding模型。
|
||||
在```最佳实践```章节中,我们为某几个关键词定制了一个Embed模型。
|
||||
|
||||
## 日志功能
|
||||
|
||||
- 日志功能记录了大模型的心跳和网络端口传输记录,开发者可以通过日志功能查看模型的运行情况。
|
||||