fix bugs; make poetry using tsinghua mirror of pypi

2026-01-19 13:23:16 +08:00 · 2024-03-26 12:27:02 +08:00 · 2024-03-26 12:27:02 +08:00 · 24ffa2f362
commit 24ffa2f362
parent b9c08cbefd
214 changed files with 3354 additions and 18 deletions
--- a/chatchat-server/poetry.toml
+++ b/chatchat-server/poetry.toml
@ -3,3 +3,7 @@ in-project = true

 [installer]
 modern-installation = false
+
+[plugins]
+[plugins.pypi_mirror]
+url = "https://pypi.tuna.tsinghua.edu.cn/simple"
--- a/chatchat-server/pyproject.toml
+++ b/chatchat-server/pyproject.toml
@ -6,7 +6,7 @@ authors = ["chatchat"]
 readme = "README.md"

 [tool.poetry.dependencies]
-python = ">=3.8,<4.0,!=3.9.7"
+python = ">=3.8.1,<4.0,!=3.9.7"
 chatchat-model-providers = "^0.3.0"
 langchain = "0.1.5"
 langchainhub = "0.1.14"
@ -35,9 +35,9 @@ strsimpy = ">=0.2.1"
 markdownify = ">=0.11.6"
 tqdm = ">=4.66.1"
 websockets = ">=12.0"
-numpy = ">=1.26.3"
+numpy = "1.24.4"
 pandas = "~2.1.4"
-pydantic = "1.10.14"
+pydantic = "2.6.4"
 httpx = {version = ">=0.25.2", extras = ["brotli", "http2", "socks"]}
 python-multipart = "0.0.9"

@ -194,7 +194,7 @@ omit = [
 ]

 [build-system]
-requires = ["poetry-core>=1.0.0"]
+requires = ["poetry-core>=1.0.0", "poetry-plugin-pypi-mirror==0.4.2"]
 build-backend = "poetry.core.masonry.api"

 [tool.pytest.ini_options]
--- a/chatchat/chatchat/configs/init.py
+++ b/chatchat/chatchat/configs/init.py
--- a/chatchat/chatchat/configs/basic_config.py
+++ b/chatchat/chatchat/configs/basic_config.py
@ -0,0 +1,47 @@
+import logging
+import os
+from pathlib import Path
+
+import langchain
+
+
+# 是否显示详细日志
+log_verbose = True
+langchain.verbose = log_verbose
+
+# 通常情况下不需要更改以下内容
+
+# 用户数据根目录
+DATA_PATH = str(Path(__file__).absolute().parent.parent / "data")
+if not os.path.exists(DATA_PATH):
+    os.mkdir(DATA_PATH)
+
+# nltk 模型存储路径
+NLTK_DATA_PATH = os.path.join(DATA_PATH, "nltk_data")
+import nltk
+nltk.data.path = [NLTK_DATA_PATH] + nltk.data.path
+
+# 日志格式
+LOG_FORMAT = "%(asctime)s - %(filename)s[line:%(lineno)d] - %(levelname)s: %(message)s"
+logger = logging.getLogger()
+logger.setLevel(logging.INFO)
+logging.basicConfig(format=LOG_FORMAT)
+
+
+# 日志存储路径
+LOG_PATH = os.path.join(DATA_PATH, "logs")
+if not os.path.exists(LOG_PATH):
+    os.mkdir(LOG_PATH)
+
+# 模型生成内容（图片、视频、音频等）保存位置
+MEDIA_PATH = os.path.join(DATA_PATH, "media")
+if not os.path.exists(MEDIA_PATH):
+    os.mkdir(MEDIA_PATH)
+    os.mkdir(os.path.join(MEDIA_PATH, "image"))
+    os.mkdir(os.path.join(MEDIA_PATH, "audio"))
+    os.mkdir(os.path.join(MEDIA_PATH, "video"))
+
+# 临时文件目录，主要用于文件对话
+BASE_TEMP_DIR = os.path.join(DATA_PATH, "temp")
+if not os.path.exists(BASE_TEMP_DIR):
+    os.mkdir(BASE_TEMP_DIR)
--- a/chatchat/chatchat/configs/basic_config.py.example
+++ b/chatchat/chatchat/configs/basic_config.py.example
--- a/chatchat/chatchat/configs/kb_config.py
+++ b/chatchat/chatchat/configs/kb_config.py
@ -0,0 +1,148 @@
+import os
+
+from configs.basic_config import DATA_PATH
+
+
+# 默认使用的知识库
+DEFAULT_KNOWLEDGE_BASE = "samples"
+
+# 默认向量库/全文检索引擎类型。可选：faiss, milvus(离线) & zilliz(在线), pgvector,全文检索引擎es
+DEFAULT_VS_TYPE = "faiss"
+
+# 缓存向量库数量（针对FAISS）
+CACHED_VS_NUM = 1
+
+# 缓存临时向量库数量（针对FAISS），用于文件对话
+CACHED_MEMO_VS_NUM = 10
+
+# 知识库中单段文本长度(不适用MarkdownHeaderTextSplitter)
+CHUNK_SIZE = 250
+
+# 知识库中相邻文本重合长度(不适用MarkdownHeaderTextSplitter)
+OVERLAP_SIZE = 50
+
+# 知识库匹配向量数量
+VECTOR_SEARCH_TOP_K = 3
+
+# 知识库匹配相关度阈值，取值范围在0-1之间，SCORE越小，相关度越高，取到1相当于不筛选，建议设置在0.5左右
+SCORE_THRESHOLD = 1
+
+# 默认搜索引擎。可选：bing, duckduckgo, metaphor
+DEFAULT_SEARCH_ENGINE = "metaphor"
+
+# 搜索引擎匹配结题数量
+SEARCH_ENGINE_TOP_K = 3
+
+
+# Bing 搜索必备变量
+# 使用 Bing 搜索需要使用 Bing Subscription Key,需要在azure port中申请试用bing search
+# 具体申请方式请见
+# https://learn.microsoft.com/en-us/bing/search-apis/bing-web-search/create-bing-search-service-resource
+# 使用python创建bing api 搜索实例详见:
+# https://learn.microsoft.com/en-us/bing/search-apis/bing-web-search/quickstarts/rest/python
+BING_SEARCH_URL = "https://api.bing.microsoft.com/v7.0/search"
+# 注意不是bing Webmaster Tools的api key，
+
+# 此外，如果是在服务器上，报Failed to establish a new connection: [Errno 110] Connection timed out
+# 是因为服务器加了防火墙，需要联系管理员加白名单，如果公司的服务器的话，就别想了GG
+BING_SUBSCRIPTION_KEY = "b31d23d7b96742ab959f4cc07a605f72"
+
+# metaphor搜索需要KEY
+METAPHOR_API_KEY = "f8c9f98f-141a-4a55-9be7-ae675ccacd7a"
+
+# 心知天气 API KEY，用于天气Agent。申请：https://www.seniverse.com/
+SENIVERSE_API_KEY = ""
+
+# 是否开启中文标题加强，以及标题增强的相关配置
+# 通过增加标题判断，判断哪些文本为标题，并在metadata中进行标记；
+# 然后将文本与往上一级的标题进行拼合，实现文本信息的增强。
+ZH_TITLE_ENHANCE = False
+
+# PDF OCR 控制：只对宽高超过页面一定比例（图片宽/页面宽，图片高/页面高）的图片进行 OCR。
+# 这样可以避免 PDF 中一些小图片的干扰，提高非扫描版 PDF 处理速度
+PDF_OCR_THRESHOLD = (0.6, 0.6)
+
+# 每个知识库的初始化介绍，用于在初始化知识库时显示和Agent调用，没写则没有介绍，不会被Agent调用。
+KB_INFO = {
+    "samples": "关于本项目issue的解答",
+}
+
+
+# 通常情况下不需要更改以下内容
+
+# 知识库默认存储路径
+KB_ROOT_PATH = os.path.join(DATA_PATH, "knowledge_base")
+if not os.path.exists(KB_ROOT_PATH):
+    os.mkdir(KB_ROOT_PATH)
+
+# 数据库默认存储路径。
+# 如果使用sqlite，可以直接修改DB_ROOT_PATH；如果使用其它数据库，请直接修改SQLALCHEMY_DATABASE_URI。
+DB_ROOT_PATH = os.path.join(KB_ROOT_PATH, "info.db")
+SQLALCHEMY_DATABASE_URI = f"sqlite:///{DB_ROOT_PATH}"
+
+# 可选向量库类型及对应配置
+kbs_config = {
+    "faiss": {
+    },
+    "milvus": {
+        "host": "127.0.0.1",
+        "port": "19530",
+        "user": "",
+        "password": "",
+        "secure": False,
+    },
+    "zilliz": {
+        "host": "in01-a7ce524e41e3935.ali-cn-hangzhou.vectordb.zilliz.com.cn",
+        "port": "19530",
+        "user": "",
+        "password": "",
+        "secure": True,
+        },
+    "pg": {
+        "connection_uri": "postgresql://postgres:postgres@127.0.0.1:5432/langchain_chatchat",
+    },
+
+    "es": {
+        "host": "127.0.0.1",
+        "port": "9200",
+        "index_name": "test_index",
+        "user": "",
+        "password": ""
+    },
+    "milvus_kwargs":{
+        "search_params":{"metric_type": "L2"}, #在此处增加search_params
+        "index_params":{"metric_type": "L2","index_type": "HNSW"} # 在此处增加index_params
+    },
+    "chromadb": {}
+}
+
+# TextSplitter配置项，如果你不明白其中的含义，就不要修改。
+text_splitter_dict = {
+    "ChineseRecursiveTextSplitter": {
+        "source": "",  ## 选择tiktoken则使用openai的方法
+        "tokenizer_name_or_path": "",
+    },
+    "SpacyTextSplitter": {
+        "source": "",
+        "tokenizer_name_or_path": "",
+    },
+    "RecursiveCharacterTextSplitter": {
+        "source": "tiktoken",
+        "tokenizer_name_or_path": "cl100k_base",
+    },
+    "MarkdownHeaderTextSplitter": {
+        "headers_to_split_on":
+            [
+                ("#", "head1"),
+                ("##", "head2"),
+                ("###", "head3"),
+                ("####", "head4"),
+            ]
+    },
+}
+
+# TEXT_SPLITTER 名称
+TEXT_SPLITTER_NAME = "ChineseRecursiveTextSplitter"
+
+# Embedding模型定制词语的词表文件
+EMBEDDING_KEYWORD_FILE = "embedding_keywords.txt"
--- a/chatchat/chatchat/configs/kb_config.py.example
+++ b/chatchat/chatchat/configs/kb_config.py.example
--- a/chatchat/chatchat/configs/loom.yaml.example
+++ b/chatchat/chatchat/configs/loom.yaml.example
--- a/chatchat/chatchat/configs/model_config.py
+++ b/chatchat/chatchat/configs/model_config.py
@ -0,0 +1,170 @@
+import os
+
+
+# 默认选用的 LLM 名称
+DEFAULT_LLM_MODEL = "qwen"
+
+# 默认选用的 Embedding 名称
+DEFAULT_EMBEDDING_MODEL = "bge"
+
+
+# AgentLM模型的名称 (可以不指定，指定之后就锁定进入Agent之后的Chain的模型，不指定就是LLM_MODELS[0])
+Agent_MODEL = None
+
+# 历史对话轮数
+HISTORY_LEN = 3
+
+# 大模型最长支持的长度，如果不填写，则使用模型默认的最大长度，如果填写，则为用户设定的最大长度
+MAX_TOKENS = None
+
+# LLM通用对话参数
+TEMPERATURE = 0.7
+# TOP_P = 0.95 # ChatOpenAI暂不支持该参数
+
+SUPPORT_AGENT_MODELS = [
+    "chatglm3-6b",
+    "openai-api",
+    "Qwen-14B-Chat",
+    "Qwen-7B-Chat",
+    "qwen",
+]
+
+
+LLM_MODEL_CONFIG = {
+    # 意图识别不需要输出，模型后台知道就行
+    "preprocess_model": {
+        DEFAULT_LLM_MODEL: {
+            "temperature": 0.05,
+            "max_tokens": 4096,
+            "history_len": 100,
+            "prompt_name": "default",
+            "callbacks": False
+        },
+    },
+    "llm_model": {
+        DEFAULT_LLM_MODEL: {
+            "temperature": 0.9,
+            "max_tokens": 4096,
+            "history_len": 10,
+            "prompt_name": "default",
+            "callbacks": True
+        },
+    },
+    "action_model": {
+        DEFAULT_LLM_MODEL: {
+            "temperature": 0.01,
+            "max_tokens": 4096,
+            "callbacks": True
+        },
+    },
+    "postprocess_model": {
+        DEFAULT_LLM_MODEL: {
+            "temperature": 0.01,
+            "max_tokens": 4096,
+            "prompt_name": "default",
+            "callbacks": True
+        }
+    },
+    "image_model": {
+        "sd-turbo": {
+            "size": "256*256",
+        }
+    },
+    "multimodal_model": {
+        "qwen-vl": {}
+    },
+}
+
+# 可以通过 loom/xinference/oneapi/fastchat 启动模型服务，然后将其 URL 和 KEY 配置过来即可。
+#   - platform_name 可以任意填写，不要重复即可
+#   - platform_type 可选：openai, xinference, oneapi, fastchat。以后可能根据平台类型做一些功能区分
+#   - 将框架部署的模型填写到对应列表即可。不同框架可以加载同名模型，项目会自动做负载均衡。
+
+MODEL_PLATFORMS = [
+    # {
+    #     "platform_name": "openai-api",
+    #     "platform_type": "openai",
+    #     "api_base_url": "https://api.openai.com/v1",
+    #     "api_key": "sk-yBuaCpqEVUBarBP9700e7224A2D743AeA329334d19C0A336",
+    #     "api_proxy": "https://qujhzynu.cloud.sealos.io/v1",
+    #     "api_concurrencies": 5,
+    #     "llm_models": [
+    #         "gpt-3.5-turbo",
+    #     ],
+    #     "embed_models": [],
+    #     "image_models": [],
+    #     "multimodal_models": [],
+    # },
+
+    {
+        "platform_name": "xinference",
+        "platform_type": "xinference",
+        "api_base_url": "http://127.0.0.1:9997/v1",
+        "api_key": "EMPTY",
+        "api_concurrencies": 5,
+        # 注意：这里填写的是 xinference 部署的模型 UID，而非模型名称
+        "llm_models": [
+            "qwen",
+            "glm3",
+        ],
+        "embed_models": [
+            "bge",
+        ],
+        "image_models": [
+            "sd-turbo",
+        ],
+        "multimodal_models": [
+            "qwen-vl",
+        ],
+    },
+
+    {
+        "platform_name": "oneapi",
+        "platform_type": "oneapi",
+        "api_base_url": "http://127.0.0.1:3000/v1",
+        "api_key": "sk-Mlft68FXoTYqLfQr06F0E2D77e6e4220B6F420999d25383f",
+        "api_concurrencies": 5,
+        "llm_models": [
+            # 智谱 API
+            "chatglm_pro",
+            "chatglm_turbo",
+            "chatglm_std",
+            "chatglm_lite",
+            # 千问 API
+            "qwen-turbo",
+            "qwen-plus",
+            "qwen-max",
+            "qwen-max-longcontext",
+            # 千帆 API
+            "ERNIE-Bot",
+            "ERNIE-Bot-turbo",
+            "ERNIE-Bot-4",
+            # 星火 API
+            "SparkDesk",
+        ],
+        "embed_models": [
+            # 千问 API
+            "text-embedding-v1",
+            # 千帆 API
+            "Embedding-V1",
+        ],
+        "image_models": [],
+        "multimodal_models": [],
+    },
+
+    # {
+    #     "platform_name": "loom",
+    #     "platform_type": "loom",
+    #     "api_base_url": "http://127.0.0.1:7860/v1",
+    #     "api_key": "88296d2f9bbd9ab222c1086e39f5fbb2.FbC0YSrAMcaEF2gB",
+    #     "api_concurrencies": 5,
+    #     "llm_models": [
+    #         "chatglm3-6b",
+    #     ],
+    #     "embed_models": [],
+    #     "image_models": [],
+    #     "multimodal_models": [],
+    # },
+]
+
+LOOM_CONFIG = os.path.join(os.path.dirname(os.path.abspath(__file__)), "loom.yaml")
--- a/chatchat/chatchat/configs/model_config.py.example
+++ b/chatchat/chatchat/configs/model_config.py.example
--- a/chatchat/chatchat/configs/openai-plugins-list.json
+++ b/chatchat/chatchat/configs/openai-plugins-list.json
--- a/chatchat/chatchat/configs/prompt_config.py
+++ b/chatchat/chatchat/configs/prompt_config.py
@ -0,0 +1,209 @@
+PROMPT_TEMPLATES = {
+    "preprocess_model": {
+        "default":
+            '你只要回复0 和 1 ，代表不需要使用工具。以下几种问题不需要使用工具:'
+            '1. 需要联网查询的内容\n'
+            '2. 需要计算的内容\n'
+            '3. 需要查询实时性的内容\n'
+            '如果我的输入满足这几种情况，返回1。其他输入，请你回复0，你只要返回一个数字\n'
+            '这是我的问题:'
+    },
+    "llm_model": {
+        "default":
+            '{{input}}',
+        "with_history":
+            'The following is a friendly conversation between a human and an AI. '
+            'The AI is talkative and provides lots of specific details from its context. '
+            'If the AI does not know the answer to a question, it truthfully says it does not know.\n\n'
+            'Current conversation:\n'
+            '{history}\n'
+            'Human: {input}\n'
+            'AI:',
+    },
+    "action_model": {
+        "GPT-4":
+            'Answer the following questions as best you can. You have access to the following tools:\n'
+            'The way you use the tools is by specifying a json blob.\n'
+            'Specifically, this json should have a `action` key (with the name of the tool to use) and a `action_input` key (with the input to the tool going here).\n'
+            'The only values that should be in the "action" field are: {tool_names}\n'
+            'The $JSON_BLOB should only contain a SINGLE action, do NOT return a list of multiple actions. Here is an example of a valid $JSON_BLOB:\n'
+            '```\n\n'
+            '{{{{\n'
+            '  "action": $TOOL_NAME,\n'
+            '  "action_input": $INPUT\n'
+            '}}}}\n'
+            '```\n\n'
+            'ALWAYS use the following format:\n'
+            'Question: the input question you must answer\n'
+            'Thought: you should always think about what to do\n'
+            'Action:\n'
+            '```\n\n'
+            '$JSON_BLOB'
+            '```\n\n'
+            'Observation: the result of the action\n'
+            '... (this Thought/Action/Observation can repeat N times)\n'
+            'Thought: I now know the final answer\n'
+            'Final Answer: the final answer to the original input question\n'
+            'Begin! Reminder to always use the exact characters `Final Answer` when responding.\n'
+            'Question:{input}\n'
+            'Thought:{agent_scratchpad}\n',
+
+        "ChatGLM3":
+            'You can answer using the tools.Respond to the human as helpfully and accurately as possible.\n'
+            'You have access to the following tools:\n'
+            '{tools}\n'
+            'Use a json blob to specify a tool by providing an action key (tool name)\n'
+            'and an action_input key (tool input).\n'
+            'Valid "action" values: "Final Answer" or  [{tool_names}]\n'
+            'Provide only ONE action per $JSON_BLOB, as shown:\n\n'
+            '```\n'
+            '{{{{\n'
+            '  "action": $TOOL_NAME,\n'
+            '  "action_input": $INPUT\n'
+            '}}}}\n'
+            '```\n\n'
+            'Follow this format:\n\n'
+            'Question: input question to answer\n'
+            'Thought: consider previous and subsequent steps\n'
+            'Action:\n'
+            '```\n'
+            '$JSON_BLOB\n'
+            '```\n'
+            'Observation: action result\n'
+            '... (repeat Thought/Action/Observation N times)\n'
+            'Thought: I know what to respond\n'
+            'Action:\n'
+            '```\n'
+            '{{{{\n'
+            '  "action": "Final Answer",\n'
+            '  "action_input": "Final response to human"\n'
+            '}}}}\n'
+            'Begin! Reminder to ALWAYS respond with a valid json blob of a single action. Use tools if necessary.\n'
+            'Respond directly if appropriate. Format is Action:```$JSON_BLOB```then Observation:.\n'
+            'Question: {input}\n\n'
+            '{agent_scratchpad}\n',
+        "qwen":
+            'Answer the following questions as best you can. You have access to the following APIs:\n\n'
+            '{tools}\n\n'
+            'Use the following format:\n\n'
+            'Question: the input question you must answer\n'
+            'Thought: you should always think about what to do\n'
+            'Action: the action to take, should be one of [{tool_names}]\n'
+            'Action Input: the input to the action\n'
+            'Observation: the result of the action\n'
+            '... (this Thought/Action/Action Input/Observation can be repeated zero or more times)\n'
+            'Thought: I now know the final answer\n'
+            'Final Answer: the final answer to the original input question\n\n'
+            'Format the Action Input as a JSON object.\n\n'
+            'Begin!\n\n'
+            'Question: {input}\n\n'
+            '{agent_scratchpad}\n\n',
+        "structured-chat-agent":
+            'Respond to the human as helpfully and accurately as possible. You have access to the following tools:\n\n'
+            '{tools}\n\n'
+            'Use a json blob to specify a tool by providing an action key (tool name) and an action_input key (tool input).\n\n'
+            'Valid "action" values: "Final Answer" or {tool_names}\n\n'
+            'Provide only ONE action per $JSON_BLOB, as shown:\n\n'
+            '```\n{{\n  "action": $TOOL_NAME,\n  "action_input": $INPUT\n}}\n```\n\n'
+            'Follow this format:\n\n'
+            'Question: input question to answer\n'
+            'Thought: consider previous and subsequent steps\n'
+            'Action:\n```\n$JSON_BLOB\n```\n'
+            'Observation: action result\n'
+            '... (repeat Thought/Action/Observation N times)\n'
+            'Thought: I know what to respond\n'
+            'Action:\n```\n{{\n  "action": "Final Answer",\n  "action_input": "Final response to human"\n}}\n\n'
+            'Begin! Reminder to ALWAYS respond with a valid json blob of a single action. Use tools if necessary. Respond directly if appropriate. Format is Action:```$JSON_BLOB```then Observation\n'
+            '{input}\n\n'
+            '{agent_scratchpad}\n\n'
+            # '(reminder to respond in a JSON blob no matter what)'
+    },
+    "postprocess_model": {
+        "default": "{{input}}",
+    }
+}
+
+TOOL_CONFIG = {
+    "search_local_knowledgebase": {
+        "use": False,
+        "top_k": 3,
+        "score_threshold": 1,
+        "conclude_prompt": {
+            "with_result":
+                '<指令>根据已知信息，简洁和专业的来回答问题。如果无法从中得到答案，请说 "根据已知信息无法回答该问题"，'
+                '不允许在答案中添加编造成分，答案请使用中文。 </指令>\n'
+                '<已知信息>{{ context }}</已知信息>\n'
+                '<问题>{{ question }}</问题>\n',
+            "without_result":
+                '请你根据我的提问回答我的问题:\n'
+                '{{ question }}\n'
+                '请注意，你必须在回答结束后强调，你的回答是根据你的经验回答而不是参考资料回答的。\n',
+        }
+    },
+    "search_internet": {
+        "use": False,
+        "search_engine_name": "bing",
+        "search_engine_config":
+            {
+                "bing": {
+                    "result_len": 3,
+                    "bing_search_url": "https://api.bing.microsoft.com/v7.0/search",
+                    "bing_key": "680a39347d7242c5bd2d7a9576a125b7",
+                },
+                "metaphor": {
+                    "result_len": 3,
+                    "metaphor_api_key": "",
+                    "split_result": False,
+                    "chunk_size": 500,
+                    "chunk_overlap": 0,
+                },
+                "duckduckgo": {
+                    "result_len": 3
+                }
+            },
+        "top_k": 10,
+        "verbose": "Origin",
+        "conclude_prompt":
+            "<指令>这是搜索到的互联网信息，请你根据这些信息进行提取并有调理，简洁的回答问题。如果无法从中得到答案，请说 “无法搜索到能回答问题的内容”。 "
+            "</指令>\n<已知信息>{{ context }}</已知信息>\n"
+            "<问题>\n"
+            "{{ question }}\n"
+            "</问题>\n"
+    },
+    "arxiv": {
+        "use": False,
+    },
+    "shell": {
+        "use": False,
+    },
+    "weather_check": {
+        "use": False,
+        "api-key": "S8vrB4U_-c5mvAMiK",
+    },
+    "search_youtube": {
+        "use": False,
+    },
+    "wolfram": {
+        "use": False,
+    },
+    "calculate": {
+        "use": False,
+    },
+    "vqa_processor": {
+        "use": False,
+        "model_path": "your model path",
+        "tokenizer_path": "your tokenizer path",
+        "device": "cuda:1"
+    },
+    "aqa_processor": {
+        "use": False,
+        "model_path": "your model path",
+        "tokenizer_path": "yout tokenizer path",
+        "device": "cuda:2"
+    },
+
+    "text2images": {
+        "use": False,
+    },
+
+}
--- a/chatchat/chatchat/configs/prompt_config.py.example
+++ b/chatchat/chatchat/configs/prompt_config.py.example
--- a/chatchat/chatchat/configs/server_config.py
+++ b/chatchat/chatchat/configs/server_config.py
@ -0,0 +1,25 @@
+import sys
+
+
+# httpx 请求默认超时时间（秒）。如果加载模型或对话较慢，出现超时错误，可以适当加大该值。
+HTTPX_DEFAULT_TIMEOUT = 300.0
+
+# API 是否开启跨域，默认为False，如果需要开启，请设置为True
+# is open cross domain
+OPEN_CROSS_DOMAIN = True
+
+# 各服务器默认绑定host。如改为"0.0.0.0"需要修改下方所有XX_SERVER的host
+DEFAULT_BIND_HOST = "127.0.0.1" if sys.platform != "win32" else "127.0.0.1"
+
+
+# webui.py server
+WEBUI_SERVER = {
+    "host": DEFAULT_BIND_HOST,
+    "port": 8501,
+}
+
+# api.py server
+API_SERVER = {
+    "host": DEFAULT_BIND_HOST,
+    "port": 7861,
+}
--- a/chatchat/chatchat/configs/server_config.py.example
+++ b/chatchat/chatchat/configs/server_config.py.example
--- a/chatchat/chatchat/copy_config_example.py
+++ b/chatchat/chatchat/copy_config_example.py
--- a/chatchat/chatchat/data/knowledge_base/info.db
+++ b/chatchat/chatchat/data/knowledge_base/info.db
--- a/chatchat/chatchat/data/knowledge_base/samples/content/README.md
+++ b/chatchat/chatchat/data/knowledge_base/samples/content/README.md
@ -0,0 +1,159 @@
+![](img/logo-long-chatchat-trans-v2.png)
+
+
+🌍 [READ THIS IN ENGLISH](README_en.md)
+
+📃 **LangChain-Chatchat** (原 Langchain-ChatGLM)
+
+基于 ChatGLM 等大语言模型与 Langchain 等应用框架实现，开源、可离线部署的检索增强生成(RAG)大模型知识库项目。
+
+---
+
+## 目录
+
+* [介绍](README.md#介绍)
+* [解决的痛点](README.md#解决的痛点)
+* [快速上手](README.md#快速上手)
+  * [1. 环境配置](README.md#1-环境配置)
+  * [2. 模型下载](README.md#2-模型下载)
+  * [3. 初始化知识库和配置文件](README.md#3-初始化知识库和配置文件)
+  * [4. 一键启动](README.md#4-一键启动)
+  * [5. 启动界面示例](README.md#5-启动界面示例)
+* [联系我们](README.md#联系我们)
+
+
+## 介绍
+
+🤖️ 一种利用 [langchain](https://github.com/hwchase17/langchain) 思想实现的基于本地知识库的问答应用，目标期望建立一套对中文场景与开源模型支持友好、可离线运行的知识库问答解决方案。
+
+💡 受 [GanymedeNil](https://github.com/GanymedeNil) 的项目 [document.ai](https://github.com/GanymedeNil/document.ai) 和 [AlexZhangji](https://github.com/AlexZhangji) 创建的 [ChatGLM-6B Pull Request](https://github.com/THUDM/ChatGLM-6B/pull/216) 启发，建立了全流程可使用开源模型实现的本地知识库问答应用。本项目的最新版本中通过使用 [FastChat](https://github.com/lm-sys/FastChat) 接入 Vicuna, Alpaca, LLaMA, Koala, RWKV 等模型，依托于 [langchain](https://github.com/langchain-ai/langchain) 框架支持通过基于 [FastAPI](https://github.com/tiangolo/fastapi) 提供的 API 调用服务，或使用基于 [Streamlit](https://github.com/streamlit/streamlit) 的 WebUI 进行操作。
+
+✅ 依托于本项目支持的开源 LLM 与 Embedding 模型，本项目可实现全部使用**开源**模型**离线私有部署**。与此同时，本项目也支持 OpenAI GPT API 的调用，并将在后续持续扩充对各类模型及模型 API 的接入。
+
+⛓️ 本项目实现原理如下图所示，过程包括加载文件 -> 读取文本 -> 文本分割 -> 文本向量化 -> 问句向量化 -> 在文本向量中匹配出与问句向量最相似的 `top k`个 -> 匹配出的文本作为上下文和问题一起添加到 `prompt`中 -> 提交给 `LLM`生成回答。
+
+📺 [原理介绍视频](https://www.bilibili.com/video/BV13M4y1e7cN/?share_source=copy_web&vd_source=e6c5aafe684f30fbe41925d61ca6d514)
+
+![实现原理图](img/langchain+chatglm.png)
+
+从文档处理角度来看，实现流程如下：
+
+![实现原理图2](img/langchain+chatglm2.png)
+
+🚩 本项目未涉及微调、训练过程，但可利用微调或训练对本项目效果进行优化。
+
+🌐 [AutoDL 镜像](https://www.codewithgpu.com/i/chatchat-space/Langchain-Chatchat/Langchain-Chatchat) 中 `v11` 版本所使用代码已更新至本项目 `v0.2.7` 版本。
+
+🐳 [Docker 镜像](registry.cn-beijing.aliyuncs.com/chatchat/chatchat:0.2.6) 已经更新到 ```0.2.7``` 版本。
+
+🌲 一行命令运行 Docker ：
+
+```shell
+docker run -d --gpus all -p 80:8501 registry.cn-beijing.aliyuncs.com/chatchat/chatchat:0.2.7
+```
+
+🧩 本项目有一个非常完整的[Wiki](https://github.com/chatchat-space/Langchain-Chatchat/wiki/) ， README只是一个简单的介绍，__仅仅是入门教程，能够基础运行__。 如果你想要更深入的了解本项目，或者想对本项目做出贡献。请移步 [Wiki](https://github.com/chatchat-space/Langchain-Chatchat/wiki/)  界面
+
+## 解决的痛点
+
+该项目是一个可以实现 __完全本地化__推理的知识库增强方案, 重点解决数据安全保护，私域化部署的企业痛点。
+本开源方案采用```Apache License```，可以免费商用，无需付费。
+
+我们支持市面上主流的本地大预言模型和Embedding模型，支持开源的本地向量数据库。
+支持列表详见[Wiki](https://github.com/chatchat-space/Langchain-Chatchat/wiki/)
+
+
+## 快速上手
+
+### 1. 环境配置
+
+ 首先，确保你的机器安装了 Python 3.8 - 3.10
+```
+$ python --version
+Python 3.10.12
+```
+接着，创建一个虚拟环境，并在虚拟环境内安装项目的依赖
+```shell
+
+# 拉取仓库
+$ git clone https://github.com/chatchat-space/Langchain-Chatchat.git
+
+# 进入目录
+$ cd Langchain-Chatchat
+
+# 安装全部依赖
+$ pip install -r requirements.txt 
+$ pip install -r requirements_api.txt
+$ pip install -r requirements_webui.txt  
+
+# 默认依赖包括基本运行环境（FAISS向量库）。如果要使用 milvus/pg_vector 等向量库，请将 requirements.txt 中相应依赖取消注释再安装。
+```
+### 2， 模型下载
+
+如需在本地或离线环境下运行本项目，需要首先将项目所需的模型下载至本地，通常开源 LLM 与 Embedding 模型可以从 [HuggingFace](https://huggingface.co/models) 下载。
+
+以本项目中默认使用的 LLM 模型 [THUDM/ChatGLM2-6B](https://huggingface.co/THUDM/chatglm2-6b) 与 Embedding 模型 [moka-ai/m3e-base](https://huggingface.co/moka-ai/m3e-base) 为例：
+
+下载模型需要先[安装 Git LFS](https://docs.github.com/zh/repositories/working-with-files/managing-large-files/installing-git-large-file-storage)，然后运行
+
+```Shell
+$ git lfs install
+$ git clone https://huggingface.co/THUDM/chatglm2-6b
+$ git clone https://huggingface.co/moka-ai/m3e-base
+```
+### 3. 初始化知识库和配置文件
+
+按照下列方式初始化自己的知识库和简单的复制配置文件
+```shell
+$ python copy_config_example.py
+$ python init_database.py --recreate-vs
+ ```
+### 4. 一键启动
+
+按照以下命令启动项目
+```shell
+$ python startup.py -a
+```
+### 5. 启动界面示例
+
+如果正常启动，你将能看到以下界面
+
+1. FastAPI Docs 界面
+
+![](img/fastapi_docs_026.png)
+
+2. Web UI 启动界面示例：
+
+- Web UI 对话界面：
+
+![img](img/LLM_success.png)
+
+- Web UI 知识库管理页面：
+
+![](img/init_knowledge_base.jpg)
+
+
+### 注意
+
+以上方式只是为了快速上手，如果需要更多的功能和自定义启动方式 ，请参考[Wiki](https://github.com/chatchat-space/Langchain-Chatchat/wiki/)
+
+
+---
+## 项目里程碑
+
+
+---
+## 联系我们
+### Telegram
+[![Telegram](https://img.shields.io/badge/Telegram-2CA5E0?style=for-the-badge&logo=telegram&logoColor=white "langchain-chatglm")](https://t.me/+RjliQ3jnJ1YyN2E9)
+
+### 项目交流群
+<img src="img/qr_code_76.jpg" alt="二维码" width="300" />
+
+🎉 Langchain-Chatchat 项目微信交流群，如果你也对本项目感兴趣，欢迎加入群聊参与讨论交流。
+
+### 公众号
+
+<img src="img/official_wechat_mp_account.png" alt="二维码" width="300" />
+
+🎉 Langchain-Chatchat 项目官方公众号，欢迎扫码关注。
--- a/chatchat/chatchat/data/knowledge_base/samples/content/llm/img/分布式训练技术原理-幕布图片-124076-270516.jpg
+++ b/chatchat/chatchat/data/knowledge_base/samples/content/llm/img/分布式训练技术原理-幕布图片-124076-270516.jpg
--- a/chatchat/chatchat/data/knowledge_base/samples/content/llm/img/分布式训练技术原理-幕布图片-20096-279847.jpg
+++ b/chatchat/chatchat/data/knowledge_base/samples/content/llm/img/分布式训练技术原理-幕布图片-20096-279847.jpg
--- a/chatchat/chatchat/data/knowledge_base/samples/content/llm/img/分布式训练技术原理-幕布图片-220157-552735.jpg
+++ b/chatchat/chatchat/data/knowledge_base/samples/content/llm/img/分布式训练技术原理-幕布图片-220157-552735.jpg
--- a/chatchat/chatchat/data/knowledge_base/samples/content/llm/img/分布式训练技术原理-幕布图片-36114-765327.jpg
+++ b/chatchat/chatchat/data/knowledge_base/samples/content/llm/img/分布式训练技术原理-幕布图片-36114-765327.jpg
--- a/chatchat/chatchat/data/knowledge_base/samples/content/llm/img/分布式训练技术原理-幕布图片-392521-261326.jpg
+++ b/chatchat/chatchat/data/knowledge_base/samples/content/llm/img/分布式训练技术原理-幕布图片-392521-261326.jpg
--- a/chatchat/chatchat/data/knowledge_base/samples/content/llm/img/分布式训练技术原理-幕布图片-42284-124759.jpg
+++ b/chatchat/chatchat/data/knowledge_base/samples/content/llm/img/分布式训练技术原理-幕布图片-42284-124759.jpg
--- a/chatchat/chatchat/data/knowledge_base/samples/content/llm/img/分布式训练技术原理-幕布图片-57107-679259.jpg
+++ b/chatchat/chatchat/data/knowledge_base/samples/content/llm/img/分布式训练技术原理-幕布图片-57107-679259.jpg
--- a/chatchat/chatchat/data/knowledge_base/samples/content/llm/img/分布式训练技术原理-幕布图片-618350-869132.jpg
+++ b/chatchat/chatchat/data/knowledge_base/samples/content/llm/img/分布式训练技术原理-幕布图片-618350-869132.jpg
--- a/chatchat/chatchat/data/knowledge_base/samples/content/llm/img/分布式训练技术原理-幕布图片-838373-426344.jpg
+++ b/chatchat/chatchat/data/knowledge_base/samples/content/llm/img/分布式训练技术原理-幕布图片-838373-426344.jpg
--- a/chatchat/chatchat/data/knowledge_base/samples/content/llm/img/分布式训练技术原理-幕布图片-906937-836104.jpg
+++ b/chatchat/chatchat/data/knowledge_base/samples/content/llm/img/分布式训练技术原理-幕布图片-906937-836104.jpg
--- a/chatchat/chatchat/data/knowledge_base/samples/content/llm/img/大模型应用技术原理-幕布图片-108319-429731.jpg
+++ b/chatchat/chatchat/data/knowledge_base/samples/content/llm/img/大模型应用技术原理-幕布图片-108319-429731.jpg
--- a/chatchat/chatchat/data/knowledge_base/samples/content/llm/img/大模型应用技术原理-幕布图片-580318-260070.jpg
+++ b/chatchat/chatchat/data/knowledge_base/samples/content/llm/img/大模型应用技术原理-幕布图片-580318-260070.jpg
--- a/chatchat/chatchat/data/knowledge_base/samples/content/llm/img/大模型应用技术原理-幕布图片-793118-735987.jpg
+++ b/chatchat/chatchat/data/knowledge_base/samples/content/llm/img/大模型应用技术原理-幕布图片-793118-735987.jpg
--- a/chatchat/chatchat/data/knowledge_base/samples/content/llm/img/大模型应用技术原理-幕布图片-918388-323086.jpg
+++ b/chatchat/chatchat/data/knowledge_base/samples/content/llm/img/大模型应用技术原理-幕布图片-918388-323086.jpg
--- a/chatchat/chatchat/data/knowledge_base/samples/content/llm/img/大模型技术栈-算法与原理-幕布图片-19929-302935.jpg
+++ b/chatchat/chatchat/data/knowledge_base/samples/content/llm/img/大模型技术栈-算法与原理-幕布图片-19929-302935.jpg
--- a/chatchat/chatchat/data/knowledge_base/samples/content/llm/img/大模型技术栈-算法与原理-幕布图片-299768-254064.jpg
+++ b/chatchat/chatchat/data/knowledge_base/samples/content/llm/img/大模型技术栈-算法与原理-幕布图片-299768-254064.jpg
--- a/chatchat/chatchat/data/knowledge_base/samples/content/llm/img/大模型技术栈-算法与原理-幕布图片-454007-940199.jpg
+++ b/chatchat/chatchat/data/knowledge_base/samples/content/llm/img/大模型技术栈-算法与原理-幕布图片-454007-940199.jpg
--- a/chatchat/chatchat/data/knowledge_base/samples/content/llm/img/大模型技术栈-算法与原理-幕布图片-628857-182232.jpg
+++ b/chatchat/chatchat/data/knowledge_base/samples/content/llm/img/大模型技术栈-算法与原理-幕布图片-628857-182232.jpg
--- a/chatchat/chatchat/data/knowledge_base/samples/content/llm/img/大模型技术栈-算法与原理-幕布图片-729151-372321.jpg
+++ b/chatchat/chatchat/data/knowledge_base/samples/content/llm/img/大模型技术栈-算法与原理-幕布图片-729151-372321.jpg
--- a/chatchat/chatchat/data/knowledge_base/samples/content/llm/img/大模型技术栈-算法与原理-幕布图片-81470-404273.jpg
+++ b/chatchat/chatchat/data/knowledge_base/samples/content/llm/img/大模型技术栈-算法与原理-幕布图片-81470-404273.jpg
--- a/chatchat/chatchat/data/knowledge_base/samples/content/llm/img/大模型指令对齐训练原理-幕布图片-17565-176537.jpg
+++ b/chatchat/chatchat/data/knowledge_base/samples/content/llm/img/大模型指令对齐训练原理-幕布图片-17565-176537.jpg
--- a/chatchat/chatchat/data/knowledge_base/samples/content/llm/img/大模型指令对齐训练原理-幕布图片-349153-657791.jpg
+++ b/chatchat/chatchat/data/knowledge_base/samples/content/llm/img/大模型指令对齐训练原理-幕布图片-349153-657791.jpg
--- a/chatchat/chatchat/data/knowledge_base/samples/content/llm/img/大模型指令对齐训练原理-幕布图片-350029-666381.jpg
+++ b/chatchat/chatchat/data/knowledge_base/samples/content/llm/img/大模型指令对齐训练原理-幕布图片-350029-666381.jpg
--- a/chatchat/chatchat/data/knowledge_base/samples/content/llm/img/大模型指令对齐训练原理-幕布图片-759487-923925.jpg
+++ b/chatchat/chatchat/data/knowledge_base/samples/content/llm/img/大模型指令对齐训练原理-幕布图片-759487-923925.jpg
--- a/chatchat/chatchat/data/knowledge_base/samples/content/llm/img/大模型指令对齐训练原理-幕布图片-805089-731888.jpg
+++ b/chatchat/chatchat/data/knowledge_base/samples/content/llm/img/大模型指令对齐训练原理-幕布图片-805089-731888.jpg
--- a/chatchat/chatchat/data/knowledge_base/samples/content/llm/img/大模型指令对齐训练原理-幕布图片-95996-523276.jpg
+++ b/chatchat/chatchat/data/knowledge_base/samples/content/llm/img/大模型指令对齐训练原理-幕布图片-95996-523276.jpg
--- a/chatchat/chatchat/data/knowledge_base/samples/content/llm/img/大模型推理优化策略-幕布图片-276446-401476.jpg
+++ b/chatchat/chatchat/data/knowledge_base/samples/content/llm/img/大模型推理优化策略-幕布图片-276446-401476.jpg
--- a/chatchat/chatchat/data/knowledge_base/samples/content/llm/img/大模型推理优化策略-幕布图片-380552-579242.jpg
+++ b/chatchat/chatchat/data/knowledge_base/samples/content/llm/img/大模型推理优化策略-幕布图片-380552-579242.jpg
--- a/chatchat/chatchat/data/knowledge_base/samples/content/llm/img/大模型推理优化策略-幕布图片-590671-36787.jpg
+++ b/chatchat/chatchat/data/knowledge_base/samples/content/llm/img/大模型推理优化策略-幕布图片-590671-36787.jpg
--- a/chatchat/chatchat/data/knowledge_base/samples/content/llm/img/大模型推理优化策略-幕布图片-699343-219844.jpg
+++ b/chatchat/chatchat/data/knowledge_base/samples/content/llm/img/大模型推理优化策略-幕布图片-699343-219844.jpg
--- a/chatchat/chatchat/data/knowledge_base/samples/content/llm/img/大模型推理优化策略-幕布图片-789705-122117.jpg
+++ b/chatchat/chatchat/data/knowledge_base/samples/content/llm/img/大模型推理优化策略-幕布图片-789705-122117.jpg
--- a/chatchat/chatchat/data/knowledge_base/samples/content/llm/img/大模型推理优化策略-幕布图片-923924-83386.jpg
+++ b/chatchat/chatchat/data/knowledge_base/samples/content/llm/img/大模型推理优化策略-幕布图片-923924-83386.jpg
--- a/chatchat/chatchat/data/knowledge_base/samples/content/llm/img/大模型推理优化策略-幕布图片-930255-616209.jpg
+++ b/chatchat/chatchat/data/knowledge_base/samples/content/llm/img/大模型推理优化策略-幕布图片-930255-616209.jpg
--- a/chatchat/chatchat/data/knowledge_base/samples/content/llm/分布式训练技术原理.md
+++ b/chatchat/chatchat/data/knowledge_base/samples/content/llm/分布式训练技术原理.md
--- a/chatchat/chatchat/data/knowledge_base/samples/content/llm/大模型应用技术原理.md
+++ b/chatchat/chatchat/data/knowledge_base/samples/content/llm/大模型应用技术原理.md
--- a/chatchat/chatchat/data/knowledge_base/samples/content/llm/大模型技术栈-实战与应用.md
+++ b/chatchat/chatchat/data/knowledge_base/samples/content/llm/大模型技术栈-实战与应用.md
--- a/chatchat/chatchat/data/knowledge_base/samples/content/llm/大模型技术栈-算法与原理.md
+++ b/chatchat/chatchat/data/knowledge_base/samples/content/llm/大模型技术栈-算法与原理.md
--- a/chatchat/chatchat/data/knowledge_base/samples/content/llm/大模型指令对齐训练原理.md
+++ b/chatchat/chatchat/data/knowledge_base/samples/content/llm/大模型指令对齐训练原理.md
--- a/chatchat/chatchat/data/knowledge_base/samples/content/llm/大模型推理优化策略.md
+++ b/chatchat/chatchat/data/knowledge_base/samples/content/llm/大模型推理优化策略.md
--- a/chatchat/chatchat/data/knowledge_base/samples/content/test_files/langchain-ChatGLM_closed.csv
+++ b/chatchat/chatchat/data/knowledge_base/samples/content/test_files/langchain-ChatGLM_closed.csv
--- a/chatchat/chatchat/data/knowledge_base/samples/content/test_files/langchain-ChatGLM_closed.jsonl
+++ b/chatchat/chatchat/data/knowledge_base/samples/content/test_files/langchain-ChatGLM_closed.jsonl
--- a/chatchat/chatchat/data/knowledge_base/samples/content/test_files/langchain-ChatGLM_closed.xlsx
+++ b/chatchat/chatchat/data/knowledge_base/samples/content/test_files/langchain-ChatGLM_closed.xlsx
--- a/chatchat/chatchat/data/knowledge_base/samples/content/test_files/langchain-ChatGLM_open.csv
+++ b/chatchat/chatchat/data/knowledge_base/samples/content/test_files/langchain-ChatGLM_open.csv
--- a/chatchat/chatchat/data/knowledge_base/samples/content/test_files/langchain-ChatGLM_open.jsonl
+++ b/chatchat/chatchat/data/knowledge_base/samples/content/test_files/langchain-ChatGLM_open.jsonl
--- a/chatchat/chatchat/data/knowledge_base/samples/content/test_files/langchain-ChatGLM_open.xlsx
+++ b/chatchat/chatchat/data/knowledge_base/samples/content/test_files/langchain-ChatGLM_open.xlsx
--- a/chatchat/chatchat/data/knowledge_base/samples/content/test_files/langchain.pdf
+++ b/chatchat/chatchat/data/knowledge_base/samples/content/test_files/langchain.pdf
--- a/chatchat/chatchat/data/knowledge_base/samples/content/test_files/test.txt
+++ b/chatchat/chatchat/data/knowledge_base/samples/content/test_files/test.txt
--- a/chatchat/chatchat/data/knowledge_base/samples/content/webui2.py
+++ b/chatchat/chatchat/data/knowledge_base/samples/content/webui2.py
@ -0,0 +1,237 @@
+from nicegui import ui, Client, app, run
+from nicegui.events import ValueChangeEventArguments
+from configs import (VERSION, LLM_MODELS, TEMPERATURE, HISTORY_LEN,
+                     VECTOR_SEARCH_TOP_K, SEARCH_ENGINE_TOP_K)
+from webui_pages.utils import AsyncApiRequest
+import asyncio
+from typing import Any, List, Dict, Any
+
+
+app.add_static_files("/image", "img")
+
+
+class Session:
+    def __init__(self) -> None:
+        user = app.storage.user
+        for k, v in self._attrs().items():
+            user.setdefault(k, v)
+
+    def _attrs(self) -> Dict[str, Any]:
+        return {
+            "messages": [],
+            "query": "",
+            "thinking": False,
+            "cur_kb": "",
+            "cur_temperature": TEMPERATURE,
+            "chat_list": [],
+            "cur_chat": "",
+        }
+
+    @property
+    def user(self):
+        return app.storage.user
+
+    def __getattr__(self, attr: str) -> Any:
+        if attr in self._attrs():
+            return self.user[attr]
+        else:
+            raise AttributeError(attr)
+
+    def __setattr__(self, attr: str, val: Any) -> None:
+        if attr in self._attrs():
+            self.user[attr] = val
+        else:
+            raise AttributeError(attr)
+
+
+def make_header(left_drawer, right_drawer):
+    with ui.header().classes("bg-black p-2") as header:
+        with ui.link():
+            ui.icon("menu", size="md").on("click", lambda: left_drawer.toggle())
+        ui.image("img/logo-long-chatchat-trans-v2.png").props("fit=scale-down").classes("h-8 w-48 float-left")
+        left_header = ui.row().props('id="left-header"')
+        ui.element("q-space")
+        right_header = ui.row().props('id="right-header"')
+        ui.label(f"(Version: {VERSION})").classes("text-grey text-xs pt-4")
+        with ui.link():
+            ui.icon("menu", size="md").on("click", lambda: right_drawer.toggle())
+        return left_header, right_header
+
+
+def make_left_drawer(links: List, current: str):
+    with ui.left_drawer(bordered=True, elevated=True) as drawer:
+        return drawer
+
+
+@ui.refreshable
+async def output_messages():
+    session = Session()
+
+    for msg in session.messages:
+        is_user = msg["role"] == "user"
+        if is_user:
+            name = "User"
+            avatar = "/image/user_avatar.png"
+        else:
+            name = "AI"
+            avatar = "/image/chatchat_icon_blue_square_v2.png"
+        ele = ui.chat_message([], sent=False, name=None, avatar=avatar)
+        with ele.add_slot("default"):
+            ui.markdown(msg["content"])
+    
+    ui.query("img.q-message-avatar").classes("self-start")
+    (ui.query("div.q-message-text--received")
+     .classes("bg-green-100")
+     .style("border-radius: 5px;"))
+    # (ui.query("div.q-message-text--received")
+    #  .run_method("remove_classes", ["q-message-text--received"]))
+    # await ui.run_javascript("window.sc")
+
+
+@ui.page("/", title="Langchain-Chatchat WebUI")
+async def index(client: Client):
+    ui.add_head_html('''<style>
+                     p > code {color: green;padding: 2px;}
+                     pre:has(code) {background-color: #eee; padding: 10px;} !important
+                     </style>''')
+
+    async def send():
+        question = query.value.strip()
+        query.value = ""
+
+        if not question:
+            return
+
+        if question == "/clear":
+            session.messages = []
+            output_messages.refresh()
+            return
+        
+        session.thinking = True
+        session.messages.append({"role": "user", "content": question})
+        session.messages.append({"role": "assistant", "content": "Thinking..."})
+        output_messages.refresh()
+        await asyncio.sleep(0.1)
+
+        text = ""
+        async for chunk in api.chat_chat(question,
+                                   stream=True,
+                                   conversation_id=None,
+                                   model=cur_llm_model.value,
+                                   temperature=temperature.value):
+            text += chunk.get("text", "")
+            tail = " ▌"
+            if text.count("```") % 2 == 1:
+                if text[-1] != "`":
+                    tail += "\n```\n"
+                elif text[-2:] == "``":
+                    tail += "`\n"
+                elif text[-1:] == "`":
+                    tail += "``\n"
+            session.messages[-1]["content"] = text + tail
+            output_messages.refresh()
+            await asyncio.sleep(0.1)
+
+        session.messages[-1]["content"] = text
+        output_messages.refresh()
+        await asyncio.sleep(0.1)
+        session.thinking = False
+
+    session = Session()
+    api = AsyncApiRequest()
+
+    left_drawer = make_left_drawer([], "")
+
+    with ui.right_drawer(bordered=True, elevated=True) as right_drawer:
+        ui.markdown("### 灵感大全")
+        user_name = ui.input("用户名称", value="用户")
+        system_message = (ui.input("AI系统消息",
+                                   value="你是一个聪明的人工智能助手，可以回答用户提出的问题。")
+                            .props("autogrow"))
+        chat_image = ui.upload(label="上传图片").classes("w-full mt-5")
+        chat_file = ui.upload(label="上传文件").classes("w-full mt-5")
+
+    left_header, right_header = make_header(left_drawer, right_drawer)
+
+    with left_header:
+        chat_session = (ui.radio(["会话1", "会话2"], value="会话1")
+                        .props("inline")
+                        .classes("p-0"))
+
+    with left_drawer:
+        ui.markdown("### 配置项")
+
+        def on_chat_mode_change(e: ValueChangeEventArguments):
+            if e.value == "Agent对话":
+                session.cur_temperature = temperature.value
+                temperature.set_value(0.01)
+            else:
+                temperature.set_value(session.cur_temperature)
+
+        chat_mode = ui.select(["LLM 对话", "知识库问答", "搜索引擎问答", "Agent对话"],
+                            label="对话模式",
+                            value="LLM 对话",
+                            on_change=on_chat_mode_change,
+                            )
+        ui.separator()
+
+        with ui.expansion("模型配置", icon="psychology", value=True):
+            running_models = await api.list_running_models()
+            config_models = await api.list_config_models()
+            models = {x: f"{x}(running)" for x in running_models}
+            for v in config_models.values():
+                for m in v:
+                    if m not in running_models:
+                        models.update({m: m})
+            cur_llm_model = ui.select(models, label="LLM模型", value=LLM_MODELS[0], with_input=True, clearable=True)
+            temperature = ui.number("Temperature", value=TEMPERATURE, min=0, max=1, step=0.01)
+            history_len = ui.number("历史对话轮数", value=HISTORY_LEN, min=0, max=10)
+
+        with (ui.expansion("知识库配置", icon="book", value=True)
+              .bind_visibility_from(chat_mode, "value", value="知识库问答")):
+            def on_kb_change(e: ValueChangeEventArguments):
+                session.cur_kb = e.value
+
+            kb_names = await api.list_knowledge_bases()
+            kb_name = ui.select(kb_names,
+                                label="知识库",
+                                value=session.cur_kb or kb_names[0],
+                                on_change=on_kb_change,
+                                )
+            vector_top_k = ui.number("Top K", value=VECTOR_SEARCH_TOP_K, min=1, max=10)
+
+        with (ui.expansion("搜索引擎配置", icon="travel_explore", value=True)
+              .bind_visibility_from(chat_mode, "value", value="搜索引擎问答")):
+            search_engine = ui.select(["Bing", "Duckduckgo"], value="Bing")
+            search_top_k = ui.number("Top K", value=SEARCH_ENGINE_TOP_K, min=1, max=10)
+
+    await client.connected()
+    with ui.column():
+        await output_messages()
+
+    with ui.row().classes("absolute bottom-2 left-20 right-20"):
+        # command = ui.select(["/clear", "/upload"]).classes("w-1/4")
+        query = (ui.input(autocomplete=["/clear", "/upload"],
+                          placeholder="input your question here.")
+                          .classes("flex-grow")
+                          .props('autogrow outlined autofocus counter dense clearable')
+                          .bind_value(session, "query")
+                          .on("keydown.enter.prevent", send)
+        )
+        with query.add_slot("after"):
+            ui.button(icon="send", on_click=send).classes("self-center").props("small dense p-0 m-0")
+        # query._props["autofocus"] = True
+        # query._props["autogrow"] = True
+        # query._props["placeholder"] = "input your question here."
+        # query._props[":list"] = '["/clear", "/upload"]'
+        # query._props["shadow-text"] = ["/clear", "/upload"]
+        # ui.input(autocomplete=["/clear", "/upload"])
+
+
+
+# TODO: 
+# 右侧栏上下文：system_message, picture, file, 知识库文档预览
+
+
+if __name__ in {"__main__", "__mp_main__"}:
+    ui.run(port=5000, storage_secret="111111", reload=True)
--- a/chatchat/chatchat/data/knowledge_base/samples/content/wiki/Home.md
+++ b/chatchat/chatchat/data/knowledge_base/samples/content/wiki/Home.md
@ -0,0 +1,74 @@
+
+![](https://github.com/chatchat-space/Langchain-Chatchat/blob/master/img/logo-long-chatchat-trans-v2.png)
+
+> 欢迎来到 Langchain‐Chatchat 的 Wiki , 在这里开启 Langchain 与大模型的邂逅!
+
+
+## 项目简介
+
+📃 **LangChain-Chatchat** (原 Langchain-ChatGLM):  基于 Langchain 与 ChatGLM 等大语言模型的本地知识库问答应用实现。
+
+🤖️ 一种利用 [langchain](https://github.com/hwchase17/langchain) 思想实现的基于本地知识库的问答应用，目标期望建立一套对中文场景与开源模型支持友好、可离线运行的知识库问答解决方案。
+
+💡 受 [GanymedeNil](https://github.com/GanymedeNil) 的项目 [document.ai](https://github.com/GanymedeNil/document.ai) 和 [AlexZhangji](https://github.com/AlexZhangji) 创建的 [ChatGLM-6B Pull Request](https://github.com/THUDM/ChatGLM-6B/pull/216) 启发，建立了全流程可使用开源模型实现的本地知识库问答应用。本项目的最新版本中通过使用 [FastChat](https://github.com/lm-sys/FastChat) 接入 Vicuna, Alpaca, LLaMA, Koala, RWKV 等模型，依托于 [langchain](https://github.com/langchain-ai/langchain) 框架支持通过基于 [FastAPI](https://github.com/tiangolo/fastapi) 提供的 API 调用服务，或使用基于 [Streamlit](https://github.com/streamlit/streamlit) 的 WebUI 进行操作。
+
+✅ 依托于本项目支持的开源 LLM 与 Embedding 模型，本项目可实现全部使用**开源**模型**离线私有部署**。与此同时，本项目也支持 OpenAI GPT API 的调用，并将在后续持续扩充对各类模型及模型 API 的接入。
+
+⛓️ 本项目实现原理如下图所示，过程包括加载文件 -> 读取文本 -> 文本分割 -> 文本向量化 -> 问句向量化 -> 在文本向量中匹配出与问句向量最相似的 `top k`个 -> 匹配出的文本作为上下文和问题一起添加到 `prompt`中 -> 提交给 `LLM`生成回答。
+
+
+## 算法流程
+
+大家可以前往Bilibili平台查看原理介绍视频：
+
+📺 [原理介绍视频](https://www.bilibili.com/video/BV13M4y1e7cN/?share_source=copy_web&vd_source=e6c5aafe684f30fbe41925d61ca6d514)
+
+开发组也为大家绘制了一张实现原理图，效果如下：
+
+![实现原理图](https://github.com/chatchat-space/Langchain-Chatchat/blob/master/img/langchain+chatglm.png)
+
+从文档处理角度来看，实现流程如下：
+
+![实现原理图2](https://github.com/chatchat-space/Langchain-Chatchat/blob/master/img/langchain+chatglm2.png)
+
+
+## 技术路线图（截止0.2.10）
+
+- [X] Langchain 应用
+  - [X] 本地数据接入
+    - [X] 接入非结构化文档
+      - [X] .txt, .rtf, .epub, .srt
+      - [X] .eml, .msg
+      - [X] .html, .xml, .toml, .mhtml
+      - [X] .json, .jsonl
+      - [X] .md, .rst
+      - [X] .docx, .doc, .pptx, .ppt, .odt
+      - [X] .enex
+      - [X] .pdf
+      - [X] .jpg, .jpeg, .png, .bmp
+      - [X] .py, .ipynb
+    - [X] 结构化数据接入
+      - [X] .csv, .tsv
+      - [X] .xlsx, .xls, .xlsd
+    - [X] 分词及召回
+      - [X] 接入不同类型 TextSplitter
+      - [X] 优化依据中文标点符号设计的 ChineseTextSplitter
+  - [X] 搜索引擎接入
+    - [X] Bing 搜索
+    - [X] DuckDuckGo 搜索
+    - [X] Metaphor 搜索
+  - [X] Agent 实现
+    - [X] 基础React形式的Agent实现，包括调用计算器等
+    - [X] Langchain 自带的Agent实现和调用
+    - [X] 智能调用不同的数据库和联网知识
+- [X] LLM 模型接入
+  - [X] 支持通过调用 [FastChat](https://github.com/lm-sys/fastchat) api 调用 llm
+  - [X] 支持 ChatGLM API 等 LLM API 的接入
+  - [X] 支持 Langchain 框架支持的LLM API 接入
+- [X] Embedding 模型接入
+  - [X] 支持调用 HuggingFace 中各开源 Emebdding 模型
+  - [X] 支持 OpenAI Embedding API 等 Embedding API 的接入
+  - [X] 支持 智谱AI、百度千帆、千问、MiniMax 等在线 Embedding API 的接入
+- [X] 基于 FastAPI 的 API 方式调用
+- [X] Web UI
+  - [X] 基于 Streamlit 的 Web UI
--- a/chatchat/chatchat/data/knowledge_base/samples/content/wiki/_Sidebar.md
+++ b/chatchat/chatchat/data/knowledge_base/samples/content/wiki/_Sidebar.md
@ -0,0 +1,58 @@
+__导航栏，一切从这里出发__
+## [Home](https://github.com/chatchat-space/Langchain-Chatchat/wiki)
+## [支持列表](https://github.com/chatchat-space/Langchain-Chatchat/wiki/%E6%94%AF%E6%8C%81%E5%88%97%E8%A1%A8)
+* [LLM 模型支持列表](https://github.com/chatchat-space/Langchain-Chatchat/wiki/%E6%94%AF%E6%8C%81%E5%88%97%E8%A1%A8#llm-%E6%A8%A1%E5%9E%8B%E6%94%AF%E6%8C%81%E5%88%97%E8%A1%A8)
+* [Embedding 模型支持列表](https://github.com/chatchat-space/Langchain-Chatchat/wiki/%E6%94%AF%E6%8C%81%E5%88%97%E8%A1%A8#embedding-%E6%A8%A1%E5%9E%8B%E6%94%AF%E6%8C%81%E5%88%97%E8%A1%A8)
+* [分词器支持列表](https://github.com/chatchat-space/Langchain-Chatchat/wiki/%E6%94%AF%E6%8C%81%E5%88%97%E8%A1%A8#%E5%88%86%E8%AF%8D%E5%99%A8%E6%94%AF%E6%8C%81%E5%88%97%E8%A1%A8)
+* [向量数据库支持列表](https://github.com/chatchat-space/Langchain-Chatchat/wiki/%E6%94%AF%E6%8C%81%E5%88%97%E8%A1%A8#%E5%90%91%E9%87%8F%E6%95%B0%E6%8D%AE%E5%BA%93%E6%94%AF%E6%8C%81%E5%88%97%E8%A1%A8)
+* [工具支持列表](https://github.com/chatchat-space/Langchain-Chatchat/wiki/%E6%94%AF%E6%8C%81%E5%88%97%E8%A1%A8#%E5%B7%A5%E5%85%B7%E6%94%AF%E6%8C%81%E5%88%97%E8%A1%A8)
+
+## [开发环境部署](https://github.com/chatchat-space/Langchain-Chatchat/wiki/%E5%BC%80%E5%8F%91%E7%8E%AF%E5%A2%83%E9%83%A8%E7%BD%B2)
+
+### 前期准备
+  * [软件要求](https://github.com/chatchat-space/Langchain-Chatchat/wiki/%E5%BC%80%E5%8F%91%E7%8E%AF%E5%A2%83%E9%83%A8%E7%BD%B2#%E8%BD%AF%E4%BB%B6%E8%A6%81%E6%B1%82)
+  * [硬件要求](https://github.com/chatchat-space/Langchain-Chatchat/wiki/%E5%BC%80%E5%8F%91%E7%8E%AF%E5%A2%83%E9%83%A8%E7%BD%B2#%E7%A1%AC%E4%BB%B6%E8%A6%81%E6%B1%82)
+  * [VPN](https://github.com/chatchat-space/Langchain-Chatchat/wiki/%E5%BC%80%E5%8F%91%E7%8E%AF%E5%A2%83%E9%83%A8%E7%BD%B2#vpn)
+
+### 部署代码
+  * [Docker 部署](https://github.com/chatchat-space/Langchain-Chatchat/wiki/%E5%BC%80%E5%8F%91%E7%8E%AF%E5%A2%83%E9%83%A8%E7%BD%B2#docker-%E9%83%A8%E7%BD%B2)
+  * [最轻模式部署方案](https://github.com/chatchat-space/Langchain-Chatchat/wiki/%E5%BC%80%E5%8F%91%E7%8E%AF%E5%A2%83%E9%83%A8%E7%BD%B2#%E6%9C%80%E8%BD%BB%E6%A8%A1%E5%BC%8F%E6%9C%AC%E5%9C%B0%E9%83%A8%E7%BD%B2%E6%96%B9%E6%A1%88)
+  * [常规模式本地部署方案](https://github.com/chatchat-space/Langchain-Chatchat/wiki/%E5%BC%80%E5%8F%91%E7%8E%AF%E5%A2%83%E9%83%A8%E7%BD%B2#%E5%B8%B8%E8%A7%84%E6%A8%A1%E5%BC%8F%E6%9C%AC%E5%9C%B0%E9%83%A8%E7%BD%B2%E6%96%B9%E6%A1%88)
+    + [环境安装](https://github.com/chatchat-space/Langchain-Chatchat/wiki/%E5%BC%80%E5%8F%91%E7%8E%AF%E5%A2%83%E9%83%A8%E7%BD%B2#%E6%9C%AC%E5%9C%B0%E9%83%A8%E7%BD%B2%E7%8E%AF%E5%A2%83%E5%AE%89%E8%A3%85)
+    + [模型下载](https://github.com/chatchat-space/Langchain-Chatchat/wiki/%E5%BC%80%E5%8F%91%E7%8E%AF%E5%A2%83%E9%83%A8%E7%BD%B2#%E6%A8%A1%E5%9E%8B%E4%B8%8B%E8%BD%BD)
+    + [初始化知识库](https://github.com/chatchat-space/Langchain-Chatchat/wiki/%E5%BC%80%E5%8F%91%E7%8E%AF%E5%A2%83%E9%83%A8%E7%BD%B2#%E5%88%9D%E5%A7%8B%E5%8C%96%E7%9F%A5%E8%AF%86%E5%BA%93)
+    + [一键启动](https://github.com/chatchat-space/Langchain-Chatchat/wiki/%E5%BC%80%E5%8F%91%E7%8E%AF%E5%A2%83%E9%83%A8%E7%BD%B2#%E4%B8%80%E9%94%AE%E5%90%AF%E5%8A%A8)
+    + [多卡加载](https://github.com/chatchat-space/Langchain-Chatchat/wiki/%E5%BC%80%E5%8F%91%E7%8E%AF%E5%A2%83%E9%83%A8%E7%BD%B2#%E5%A4%9A%E5%8D%A1%E5%8A%A0%E8%BD%BD)
+
+## [参数配置](https://github.com/chatchat-space/Langchain-Chatchat/wiki/%E5%8F%82%E6%95%B0%E9%85%8D%E7%BD%AE)
+
+* [基础配置项 basic_config.py](https://github.com/chatchat-space/Langchain-Chatchat/wiki/%E5%8F%82%E6%95%B0%E9%85%8D%E7%BD%AE#%E5%9F%BA%E7%A1%80%E9%85%8D%E7%BD%AE%E9%A1%B9-basic_configpy)
+* [模型配置项 model_config.py](https://github.com/chatchat-space/Langchain-Chatchat/wiki/%E5%8F%82%E6%95%B0%E9%85%8D%E7%BD%AE#%E6%A8%A1%E5%9E%8B%E9%85%8D%E7%BD%AE%E9%A1%B9-model_configpy)
+* [提示词配置项 prompt_config.py](https://github.com/chatchat-space/Langchain-Chatchat/wiki/%E5%8F%82%E6%95%B0%E9%85%8D%E7%BD%AE#%E6%8F%90%E7%A4%BA%E8%AF%8D%E9%85%8D%E7%BD%AE%E9%A1%B9-prompt_configpy)
+* [数据库配置 kb_config.py](https://github.com/chatchat-space/Langchain-Chatchat/wiki/%E5%8F%82%E6%95%B0%E9%85%8D%E7%BD%AE#%E6%95%B0%E6%8D%AE%E5%BA%93%E9%85%8D%E7%BD%AE-kb_configpy)
+* [服务和端口配置项 server_config.py](https://github.com/chatchat-space/Langchain-Chatchat/wiki/%E5%8F%82%E6%95%B0%E9%85%8D%E7%BD%AE#%E6%9C%8D%E5%8A%A1%E5%92%8C%E7%AB%AF%E5%8F%A3%E9%85%8D%E7%BD%AE%E9%A1%B9-server_configpy)
+* [覆盖配置文件 或者配置 startup.py](https://github.com/chatchat-space/Langchain-Chatchat/wiki/%E5%8F%82%E6%95%B0%E9%85%8D%E7%BD%AE#%E8%A6%86%E7%9B%96%E9%85%8D%E7%BD%AE%E6%96%87%E4%BB%B6-%E6%88%96%E8%80%85%E9%85%8D%E7%BD%AE-startuppy)
+
+## [自定义](https://github.com/chatchat-space/Langchain-Chatchat/wiki/%E8%87%AA%E5%AE%9A%E4%B9%89)
+
+* [使用自定义的分词器](https://github.com/chatchat-space/Langchain-Chatchat/wiki/%E8%87%AA%E5%AE%9A%E4%B9%89#%E4%BD%BF%E7%94%A8%E8%87%AA%E5%AE%9A%E4%B9%89%E7%9A%84%E5%88%86%E8%AF%8D%E5%99%A8)
+* [使用自定义的 Agent 工具](https://github.com/chatchat-space/Langchain-Chatchat/wiki/%E8%87%AA%E5%AE%9A%E4%B9%89#%E4%BD%BF%E7%94%A8%E8%87%AA%E5%AE%9A%E4%B9%89%E7%9A%84-agent-%E5%B7%A5%E5%85%B7)
+* [使用自定义的微调模型](https://github.com/chatchat-space/Langchain-Chatchat/wiki/%E8%87%AA%E5%AE%9A%E4%B9%89#%E4%BD%BF%E7%94%A8%E8%87%AA%E5%AE%9A%E4%B9%89%E7%9A%84%E5%BE%AE%E8%B0%83%E6%A8%A1%E5%9E%8B)
+* [使用自定义的嵌入模型](https://github.com/chatchat-space/Langchain-Chatchat/wiki/%E8%87%AA%E5%AE%9A%E4%B9%89#%E4%BD%BF%E7%94%A8%E8%87%AA%E5%AE%9A%E4%B9%89%E7%9A%84%E5%B5%8C%E5%85%A5%E6%A8%A1%E5%9E%8B)
+* [日志功能](https://github.com/chatchat-space/Langchain-Chatchat/wiki/%E6%9C%80%E4%BD%B3%E5%AE%9E%E8%B7%B5#%E6%97%A5%E5%BF%97%E5%8A%9F%E8%83%BD)
+
+## [最佳实践](https://github.com/chatchat-space/Langchain-Chatchat/wiki/%E6%9C%80%E4%BD%B3%E5%AE%9E%E8%B7%B5)
+* [推荐的模型组合](https://github.com/chatchat-space/Langchain-Chatchat/wiki/%E6%9C%80%E4%BD%B3%E5%AE%9E%E8%B7%B5#%E6%8E%A8%E8%8D%90%E7%9A%84%E6%A8%A1%E5%9E%8B%E7%BB%84%E5%90%88)
+* [微调模型加载实操](https://github.com/chatchat-space/Langchain-Chatchat/wiki/%E6%9C%80%E4%BD%B3%E5%AE%9E%E8%B7%B5#%E5%BE%AE%E8%B0%83%E6%A8%A1%E5%9E%8B%E5%8A%A0%E8%BD%BD%E5%AE%9E%E6%93%8D)
+* [预处理知识库文件](https://github.com/chatchat-space/Langchain-Chatchat/wiki/%E6%9C%80%E4%BD%B3%E5%AE%9E%E8%B7%B5#%E9%A2%84%E5%A4%84%E7%90%86%E7%9F%A5%E8%AF%86%E5%BA%93%E6%96%87%E4%BB%B6)
+* [自定义的关键词调整Embedding模型](https://github.com/chatchat-space/Langchain-Chatchat/wiki/%E6%9C%80%E4%BD%B3%E5%AE%9E%E8%B7%B5#%E8%87%AA%E5%AE%9A%E4%B9%89%E7%9A%84%E5%85%B3%E9%94%AE%E8%AF%8D%E8%B0%83%E6%95%B4embedding%E6%A8%A1%E5%9E%8B)
+* [实际使用效果](https://github.com/chatchat-space/Langchain-Chatchat/wiki/%E6%9C%80%E4%BD%B3%E5%AE%9E%E8%B7%B5#%E5%AE%9E%E9%99%85%E4%BD%BF%E7%94%A8%E6%95%88%E6%9E%9C)
+
+## [做出贡献](https://github.com/chatchat-space/Langchain-Chatchat/wiki/%E5%81%9A%E5%87%BA%E8%B4%A1%E7%8C%AE)
+
+* [Issue 规范](https://github.com/chatchat-space/Langchain-Chatchat/wiki/%E5%81%9A%E5%87%BA%E8%B4%A1%E7%8C%AE#issue-%E8%A7%84%E8%8C%83)
+* [PR 规范](https://github.com/chatchat-space/Langchain-Chatchat/wiki/%E5%81%9A%E5%87%BA%E8%B4%A1%E7%8C%AE#pr-%E8%A7%84%E8%8C%83)
+
+## [合作伙伴](https://github.com/chatchat-space/Langchain-Chatchat/wiki/%E5%90%88%E4%BD%9C%E4%BC%99%E4%BC%B4)
+
+## [常见问题](https://github.com/chatchat-space/Langchain-Chatchat/wiki/%E5%B8%B8%E8%A7%81%E9%97%AE%E9%A2%98)
--- a/chatchat/chatchat/data/knowledge_base/samples/content/wiki/做出贡献.md
+++ b/chatchat/chatchat/data/knowledge_base/samples/content/wiki/做出贡献.md
@ -0,0 +1,51 @@
+## Issue 规范
+> 什么样的 issue 是不会被回复的
+
+1. 在提出issue前，请查看您的提出的问题是否已经在 issue 列表或者 discussion 内出现，提出重复的问题将 **被关闭** 。
+2. 非项目推荐配置的任何关于环境配置问题的 issue 通常将  **不会由官方回复**，请您在微信沟通群内咨询。
+3. 与项目无关的 issue 将  **不会被回复** 。
+4. 超过30天没有更新动态的 issue 将  **被关闭** 。
+5. 语言非中文和英语的 issue 将  **被关闭** 。
+6. 没有尝试过解决方案的 issue 将  **被关闭** 。
+7. 没有提出任何贡献（例如PR，论文）的 feature / enhancement 将会 **被关闭** 。您可以在 discussion 中的 **希望开发的功能** 讨论区中留言，我们开发组会进行回复。
+8. 不按照 Issue 规范提出的 issue 可能将 **被关闭** 。
+
+> 如何提 issue
+
+1. 简要阐述你的问题
+2. 配上报错日志以(运行报错)或者运行不理想的效果图(原本期望和现实的)
+3. 配上对应的配置文件以你的环境
+4. 你尝试过的解决方法。（非常重要）
+5. 按照模板提出Issue
+
+## PR 规范 
+
+> 什么样的 PR 是不会被接受的
+1. 非紧急bug修复的PR并直接提交到```master```的PR。
+2. 仅仅修改```Readme.md```和```配置文件```的。
+3. 跟项目组已经开发的内容冲突的(dev版本)，将可能被拒绝。
+
+首先请注意所有的PR需要以dev分支为基准，master分支仅用来发行与紧急bug修复。
+
+> 提出新的通用自定义分词器
+
+1. 将您的分词器所在的代码文件放在```text_splitter```文件夹下，文件名为您的分词器名字`my_splitter.py`，然后在`__init__.py`中导入您的分词器。
+2. 发起PR，并说明您的分词器面向的场景或者改进之处。我们非常期待您能举例一个具体的应用场景。
+
+> 提出新的 Agent 工具
+
+1. 将您的Agent工具所在的代码放在 ```server/agent```文件夹下，文件名为您的工具名字`my_tools.py`，然后在`tools.py`中导入您的工具。
+2. 发起PR，说明您的工具面向的场景或改进之处，并说明如何进行测试和调用。我们非常期待您能举例一个具体的应用场景。
+
+> 提出新的自定义模型
+
+1. 将您的模型贡献到huggingface平台上，并开放给开发人员下载。
+2. 发起PR，说明您的工具面向的场景或改进之处，并说明如何进行测试和调用。我们非常期待您能举例一个具体的应用场景。
+3. 由开发人员测试通过后，将您的模型添加到合作模型名单中。
+
+
+> 修复 Bug & 增加其他新功能
+
+1. 一个 PR 中必须 **只有一个或者一类功能增加，或者修复一个bug** ，多个功能混合的 PR 将 **不会被接受** 。
+2. 说明您增加的功能或者改进之处，并说明如何进行测试和调用。我们非常期待您能举例一个具体的应用场景。
+
--- a/chatchat/chatchat/data/knowledge_base/samples/content/wiki/参数配置.md
+++ b/chatchat/chatchat/data/knowledge_base/samples/content/wiki/参数配置.md
@ -0,0 +1,156 @@
+在开始参数配置之前，先执行以下脚本
+```shell
+python copy_config_example.py
+```
+该脚本将会将所有```config```目录下的配置文件样例复制一份到```config```目录下，方便开发者进行配置。
+接着，开发者可以根据自己的需求，对配置文件进行修改。
+
+## 基础配置项 basic_config.py
+该配置基负责记录日志的格式和储存路径，通常不需要修改。
+
+## 模型配置项 model_config.py
+本文件包含本地LLM模型、本地Embeddings模型、在线LLM模型API的相关配置。
+
+- 本地模型路径配置。建议将所有下载的模型放到一个统一的目录下，然后将`MODEL_ROOT_PATH`指定为该目录，只要模型目录名称符合下列情况之一（以text2vec为例），即可自动识别加载：
+  - text2vec，即MODEL_PATH中的键
+  - GanymedeNil/text2vec-large-chinese，即MODEL_PATH中的值
+  - text2vec-large-chinese，即MODEL_PATH中的值的简写形式
+
+- 在线模型API配置。在`ONLINE_LLM_MODEL`已经预先写好了所有支持的在线API服务，通常只需要把申请的API_KEY等填入即可。
+有些在线API服务需要安装额外的依赖：
+  - zhipu-api: zhipuai
+  - fangzhou-api: volcengine>=1.0.106
+  - qianfan-api: qianfan
+  - qwen-api: dashscope
+
+- HISTORY_LEN。历史对话轮数通常不建议设置超过10，因为这可能导致以下问题
+  1. 显存占用过高：尤其是部分模型，本身就已经要占用满显存的情况下，保留太多历史，一次传入token太多，可能会爆显存。
+  2. 速度处理很慢：还是因为一次传入了太多token，导致速度很慢。
+
+- TEMPERATURE。通常不建议设置过高。
+在Agent对话模式和知识库问答中，我们强烈建议将要其设置成0或者接近于0。
+
+- Agent_MODEL = None
+我们支持用户使用“模型接力赛”的用法，即：
+选择的大模型仅能调用工具，但是在工具中表现较差，则这个工具作为 “模型调用工具”
+如果用户设置了Agent_MODEL，则在 Agent 中，使用Agent_MODEL来执行任务，否则，使用LLM_MODEL
+
+
+## 提示词配置项 prompt_config.py
+
+提示词配置分为三个板块，分别对应三种聊天类型。
+- llm_chat: 基础的对话提示词， 通常来说，直接是用户输入的内容，没有系统提示词。
+- knowledge_base_chat: 与知识库对话的提示词，在模板中，我们为开发者设计了一个系统提示词，开发者可以自行更改。
+- agent_chat: 与Agent对话的提示词，同样，我们为开发者设计了一个系统提示词，开发者可以自行更改。
+
+prompt模板使用Jinja2语法，简单点就是用双大括号代替f-string的单大括号
+请注意，本配置文件支持热加载，修改prompt模板后无需重启服务。
+
+## 数据库配置 kb_config.py
+请确认本地分词器路径是否已经填写，如：
+
+```
+text_splitter_dict = {
+   "ChineseRecursiveTextSplitter": {
+       "source":"huggingface",  # 选择tiktoken则使用openai的方法,不填写则默认为字符长度切割方法。
+       "tokenizer_name_or_path":"", # 空格不填则默认使用大模型的分词器。 
+    }
+}
+```
+设置好的分词器需要再```TEXT_SPLITTER_NAME```中指定并应用。
+
+在这里，通常使用```huggingface```的方法，并且，我们推荐使用大模型自带的分词器来完成任务。
+
+请注意，使用```gpt2```分词器将要访问huggingface官网下载权重。
+
+我们还支持使用```tiktoken``` 和传统的 按照长度分词的方式，开发者可以自行配置。
+
+如果希望调用自己的分词器，请参考[最佳实践]部分。
+
+```kbs_config```设置了使用的向量数据库，目前可以选择
+- ```faiss```: 使用faiss数据库，需要安装faiss-gpu
+- ```milvus```: 使用milvus数据库，需要安装milvus并进行端口配置
+- ```pg```: 使用pg数据库，需要配置connection_uri
+
+## 服务和端口配置项 server_config.py
+
+通常，这个页面并不需要进行大量的修改，仅需确保对应的端口打开，并不互相冲突即可。
+
+如果你是Linux系统推荐设置
+
+```
+DEFAULT_BIND_HOST ="0.0.0.0"
+```
+如果使用联网模型，则需要关注联网模型的端口。
+
+这些模型必须是在model_config.MODEL_PATH或ONLINE_MODEL中正确配置的。
+
+#在启动startup.py时，可用通过`--model-worker --model-name xxxx`指定模型，不指定则为LLM_MODEL
+
+
+## 覆盖配置文件 或者配置 startup.py
+
+在 ```server_config.py```中有以下配置文件被注释了
+
+```
+"gpus": None, # 使用的GPU，以str的格式指定，如"0,1"，如失效请使用CUDA_VISIBLE_DEVICES="0,1"等形式指定
+"num_gpus": 1, # 使用GPU的数量
+"max_gpu_memory":"20GiB", # 每个GPU占用的最大显存
+
+ 以下为model_worker非常用参数，可根据需要配置
+"load_8bit": False, # 开启8bit量化
+"cpu_offloading": None,
+"gptq_ckpt": None,
+"gptq_wbits": 16,
+"gptq_groupsize": -1,
+"gptq_act_order": False,
+"awq_ckpt": None,
+"awq_wbits": 16,
+"awq_groupsize": -1,
+"model_names": [LLM_MODEL],
+"conv_template": None,
+"limit_worker_concurrency": 5,
+"stream_interval": 2,
+"no_register": False,
+"embed_in_truncate": False,
+
+ 以下为vllm_woker配置参数,注意使用vllm必须有gpu，仅在Linux测试通过
+
+ tokenizer = model_path # 如果tokenizer与model_path不一致在此处添加
+ 'tokenizer_mode':'auto',
+ 'trust_remote_code':True,
+ 'download_dir':None,
+ 'load_format':'auto',
+ 'dtype':'auto',
+ 'seed':0,
+ 'worker_use_ray':False,
+ 'pipeline_parallel_size':1,
+ 'tensor_parallel_size':1,
+ 'block_size':16,
+ 'swap_space':4 , # GiB
+ 'gpu_memory_utilization':0.90,
+ 'max_num_batched_tokens':2560,
+ 'max_num_seqs':256,
+ 'disable_log_stats':False,
+ 'conv_template':None,
+ 'limit_worker_concurrency':5,
+ 'no_register':False,
+ 'num_gpus': 1
+ 'engine_use_ray': False,
+ 'disable_log_requests': False
+```
+
+在这些参数中，如果没有设置，则使用```startup.py```中的默认值，如果设置了，则使用设置的值。
+因此，强烈建议开发不要在```startup.py```中进行配置，而应该在```server_config.py```中进行配置。避免配置文件覆盖。
+
+## 选择使用的模型
+在```model_config.py```完成模型配置后，还不能直接使用，需要在该文件下配置本地模型的运行方式或在线模型的API，例如
+```
+    "agentlm-7b": { # 使用default中的IP和端口
+       "device": "cuda",
+    },
+    "zhipu-api": { # 请为每个要运行的在线API设置不同的端口
+        "port": 21001,
+    },
+```
+本地模型使用default中的IP和端口，在线模型可以自己选择端口
--- a/chatchat/chatchat/data/knowledge_base/samples/content/wiki/合作伙伴.md
+++ b/chatchat/chatchat/data/knowledge_base/samples/content/wiki/合作伙伴.md
@ -0,0 +1,37 @@
+## 合作伙伴名单
+🎉 Langchain-Chatchat 项目合作伙伴，感谢以下合作伙伴对本项目的支持。
+
+<table style="width:100%; border-collapse:collapse;">
+  <tr>
+    <td style="width:30%; text-align:center; vertical-align:middle;">
+      <img src="https://github.com/chatchat-space/Langchain-Chatchat/blob/dev/img/partners/chatglm.svg" alt="ChatGLM Logo" width="300" height="100">
+    </td>
+    <td style="width:80%; vertical-align:middle;">
+      <a href="https://chatglm.cn/" target="_blank" style="text-decoration:none;">ChatGLM: 国内最早的开源中文大模型之一</a>
+    </td>
+  </tr>
+  <tr>
+    <td style="width:30%; text-align:center; vertical-align:middle;">
+      <img src="https://github.com/chatchat-space/Langchain-Chatchat/blob/dev/img/partners/autodl.svg" alt="ChatGLM Logo" width="300" height="100">
+    </td>
+    <td style="width:80%; vertical-align:middle;">
+      <a href="https://www.autodl.com/" target="_blank" style="text-decoration:none;"> AutoDL 提供弹性、好用、省钱的云GPU租用服务。缺显卡就上 AutoDL.com </a>
+    </td>
+  </tr>
+  <tr>
+    <td style="width:30%; text-align:center; vertical-align:middle;">
+      <img src="https://github.com/chatchat-space/Langchain-Chatchat/blob/dev/img/partners/aws.svg" alt="ChatGLM Logo" width="300" height="100">
+    </td>
+    <td style="width:80%; vertical-align:middle;">
+      <a href="https://aws.amazon.com/" target="_blank" style="text-decoration:none;"> 全球云计算领导者 </a>
+    </td>
+  </tr>
+  <tr>
+    <td style="width:30%; text-align:center; vertical-align:middle;">
+      <img src="https://github.com/chatchat-space/Langchain-Chatchat/blob/dev/img/partners/zhenfund.svg" alt="Zhenge Logo" width="300" height="100">
+    </td>
+    <td style="width:80%; vertical-align:middle;">
+      <a href="https://www.zhenfund.com/" target="_blank" style="text-decoration:none;">我们相信预测未来的最好方式是自己来创造。我们在这里等你。</a>
+    </td>
+  </tr>
+</table>
--- a/chatchat/chatchat/data/knowledge_base/samples/content/wiki/常见问题.md
+++ b/chatchat/chatchat/data/knowledge_base/samples/content/wiki/常见问题.md
@ -0,0 +1,277 @@
+> 以下是一些常见的问题和回答
+#### Q: 我要提出问题，怎么办
+
+A: 首先，你要观察一下你的问题是否有没有被解决，建议翻看以往的Issue和Discussion，如果有，先按照他们的方法来做。
+如果没有，按照以下步骤
+1. 这是一个bug还是一个讨论问题，如果是讨论问题，放在disscusion，如果是bug和feature，放在issue。
+2. 如果要提出feature，提交一份对应的PR会让开发者更重视你的问题，否则你的问题很有可能被直接关闭。
+
+#### Q: ValueError: Found modules on cpu/disk. Using Exllama backend requires all the modules to be on GPU. You can deactivate exllama backend by setting disable_exllama=True in the quantization config object.
+
+A: 这是Fschat依赖源码的问题，请查看以下解决方式，通过修改'Fschat'库中的对应内容。
+
+https://github.com/lm-sys/FastChat/issues/2459
+
+https://stackoverflow.com/questions/76983305/fine-tuning-thebloke-llama-2-13b-chat-gptq-model-with-hugging-face-transformers
+
+---
+
+#### Q: AttributeError: 'ChatGLMTokenizer' object has no attribute 'tokenizer'
+
+A: 查看以下Issue
+
+https://github.com/chatchat-space/Langchain-Chatchat/issues/1835
+
+---
+
+#### Q: 使用Qwen API key 报错 multiple wodgets with the same key＝“
+
+A: 确保你的key是`dashscope`平台的key。并保证`dashscope`依赖满足我们的依赖版本。
+
+---
+
+#### Q：linux下向量化PDF文件时出错：`ImportError: 从文件 *.pdf 加载文档时出错：libGL.so.1: cannot open shared object file: No such file or directory`
+
+A： 这是系统缺少必要的动态库，可以手动安装：`libgl1-mesa-glx` 和 `libglib2.0-0`
+
+---
+
+#### Q: 各种Int4模型无法载入
+A. 由于各种Int4模型与Fp16模型并不相似，且量化技术可能有所不同，无法载入可能是因为fschat不支持或者缺少对应的依赖，需要查看对应仓库的issue获得更多信息。开发组没有针对Int4模型进行优化。
+
+---
+
+#### Q1: 本项目支持哪些文件格式？
+
+A1: 目前已测试支持 txt、docx、md、pdf、csv、html、json 等格式文件
+
+更多文件格式请参考 [langchain 文档](https://python.langchain.com/en/latest/modules/indexes/document_loaders/examples/unstructured_file.html)。目前已知文档中若含有特殊字符，可能存在文件无法加载的问题。
+
+---
+
+#### Q2: 使用过程中 Python 包 `nltk`发生了 `Resource punkt not found.`报错，该如何解决？
+
+A2: 方法一：https://github.com/nltk/nltk_data/raw/gh-pages/packages/tokenizers/punkt.zip 中的 `packages/tokenizers` 解压，放到  `nltk_data/tokenizers` 存储路径下。
+
+`nltk_data` 存储路径可以通过 `nltk.data.path` 查询。
+
+方法二：执行python代码
+
+```
+import nltk
+nltk.download()
+```
+
+---
+
+#### Q3: 使用过程中 Python 包 `nltk`发生了 `Resource averaged_perceptron_tagger not found.`报错，该如何解决？
+
+A3: 
+
+方法一：将 https://github.com/nltk/nltk_data/blob/gh-pages/packages/taggers/averaged_perceptron_tagger.zip 下载，解压放到 `nltk_data/taggers` 存储路径下。
+
+`nltk_data` 存储路径可以通过 `nltk.data.path` 查询。
+
+方法二：执行python代码
+
+```
+import nltk
+nltk.download()
+```
+
+---
+
+#### Q4: 本项目可否在 colab 中运行？
+
+A4: 可以尝试使用 chatglm-6b-int4 模型在 colab 中运行，需要注意的是，如需在 colab 中运行 Web UI，需将 `webui.py`中 `demo.queue(concurrency_count=3).launch( server_name='0.0.0.0', share=False, inbrowser=False)`中参数 `share`设置为 `True`。
+
+---
+
+#### Q5: 在 Anaconda 中使用 pip 安装包无效如何解决？
+
+A5: 此问题是系统环境问题，详细见  [在Anaconda中使用pip安装包无效问题](在Anaconda中使用pip安装包无效问题.md)
+
+---
+
+#### Q6: 本项目中所需模型如何下载至本地？
+
+A6: 本项目中使用的模型均为 `huggingface.com` 中可下载的开源模型，以默认选择的 `chatglm-6b`和 `text2vec-large-chinese`模型为例，下载模型可执行如下代码：
+
+```shell
+# 安装 git lfs
+$ git lfs install
+
+# 下载 LLM 模型
+$ git clone https://huggingface.co/THUDM/chatglm-6b /your_path/chatglm-6b
+
+# 下载 Embedding 模型
+$ git clone https://huggingface.co/GanymedeNil/text2vec-large-chinese /your_path/text2vec
+
+# 模型需要更新时，可打开模型所在文件夹后拉取最新模型文件/代码
+$ git pull
+```
+
+---
+
+#### Q7: `huggingface.com`中模型下载速度较慢怎么办？
+
+A7: 可使用本项目用到的模型权重文件百度网盘地址：
+
+- ernie-3.0-base-zh.zip 链接: https://pan.baidu.com/s/1CIvKnD3qzE-orFouA8qvNQ?pwd=4wih
+- ernie-3.0-nano-zh.zip 链接: https://pan.baidu.com/s/1Fh8fgzVdavf5P1omAJJ-Zw?pwd=q6s5
+- text2vec-large-chinese.zip 链接: https://pan.baidu.com/s/1sMyPzBIXdEzHygftEoyBuA?pwd=4xs7
+- chatglm-6b-int4-qe.zip 链接: https://pan.baidu.com/s/1DDKMOMHtNZccOOBGWIOYww?pwd=22ji
+- chatglm-6b-int4.zip 链接: https://pan.baidu.com/s/1pvZ6pMzovjhkA6uPcRLuJA?pwd=3gjd
+- chatglm-6b.zip 链接: https://pan.baidu.com/s/1B-MpsVVs1GHhteVBetaquw?pwd=djay
+
+---
+
+#### Q8: 老版本和新版本无法兼容怎么办？
+
+A8: 保存老版本的配置文件，删除老版本代码并下载新版本代码后，根据新版本的配置文件格式进行修改。
+
+在 ```0.2.6```后，运行环境和配置文件发生重大变化，建议重新配置环境和配置文件，并重建知识库。
+
+
+---
+
+#### Q9: 显卡内存爆了，提示 "OutOfMemoryError: CUDA out of memory"
+
+A9: `VECTOR_SEARCH_TOP_K` 和 `HISTORY_LEN` 的值调低，比如 `VECTOR_SEARCH_TOP_K = 3` 和 `LLM_HISTORY_LEN = 2`，这样由 `query` 和 `context` 拼接得到的 `prompt` 会变短，会减少内存的占用。或者使用量化模型减少显存占用。
+
+---
+
+#### Q10: 执行 `pip install -r requirements.txt` 过程中遇到 python 包，如 langchain 找不到对应版本的问题
+
+A10: 更换 pypi 源后重新安装，如阿里源、清华源等，网络条件允许时建议直接使用 pypi.org 源，具体操作命令如下：
+
+```shell
+# 使用 pypi 源
+$ pip install -r requirements.txt -i https://pypi.python.org/simple
+```
+
+或
+
+```shell
+# 使用阿里源
+$ pip install -r requirements.txt -i http://mirrors.aliyun.com/pypi/simple/
+```
+
+或
+
+```shell
+# 使用清华源
+$ pip install -r requirements.txt -i https://pypi.tuna.tsinghua.edu.cn/simple/
+```
+
+---
+
+#### Q11: 启动 api.py 时 upload_file 接口抛出 `partially initialized module 'charset_normalizer' has no attribute 'md__mypyc' (most likely due to a circular import)`
+
+A11: 这是由于 charset_normalizer 模块版本过高导致的，需要降低低 charset_normalizer 的版本,测试在 charset_normalizer==2.1.0 上可用。
+
+---
+
+#### Q12: 调用api中的 `bing_search_chat` 接口时，报出 `Failed to establish a new connection: [Errno 110] Connection timed out`
+
+A12: 这是因为服务器加了防火墙，需要联系管理员加白名单，如果公司的服务器的话，就别想了GG--!
+
+---
+
+#### Q13: 加载 chatglm-6b-int8 或 chatglm-6b-int4 抛出 `RuntimeError: Only Tensors of floating point andcomplex dtype can require gradients`
+
+A13: 疑为 chatglm 的 quantization 的问题或 torch 版本差异问题，针对已经变为 Parameter 的 torch.zeros 矩阵也执行 Parameter 操作，从而抛出 `RuntimeError: Only Tensors of floating point andcomplex dtype can require gradients`。解决办法是在 chatglm 项目的原始文件中的 quantization.py 文件 374 行改为：
+
+```
+    try:
+        self.weight =Parameter(self.weight.to(kwargs["device"]), requires_grad=False)
+    except Exception as e:
+        pass
+```
+
+    如果上述方式不起作用，则在.cache/hugggingface/modules/目录下针对chatglm项目的原始文件中的quantization.py文件执行上述操作，若软链接不止一个，按照错误提示选择正确的路径。
+
+注：虽然模型可以顺利加载但在cpu上仍存在推理失败的可能：即针对每个问题，模型一直输出gugugugu。
+
+    因此，最好不要试图用cpu加载量化模型，原因可能是目前python主流量化包的量化操作是在gpu上执行的,会天然地存在gap。
+
+---
+
+#### Q14: 修改配置中路径后，加载 text2vec-large-chinese 依然提示 `WARNING: No sentence-transformers model found with name text2vec-large-chinese. Creating a new one with MEAN pooling.`
+
+A14: 尝试更换 embedding，如 text2vec-base-chinese，请在 [configs/model_config.py](../configs/model_config.py) 文件中，修改 `text2vec-base`参数为本地路径，绝对路径或者相对路径均可
+
+---
+
+#### Q16: 使用pg向量库建表报错
+
+A15: 需要手动安装对应的vector扩展(连接pg执行 CREATE EXTENSION IF NOT EXISTS vector)
+
+---
+
+#### Q16: pymilvus 连接超时
+
+A16.pymilvus版本需要匹配和milvus对应否则会超时参考pymilvus==2.1.3
+
+---
+
+#### Q17: 使用vllm推理加速框架时，已经下载了模型但出现HuggingFace通信问题
+
+A17: 参照如下代码修改python环境下/site-packages/vllm/model_executor/weight_utils.py文件的prepare_hf_model_weights函数如下对应代码：
+
+```python
+
+    if not is_local:
+        # Use file lock to prevent multiple processes from
+        # downloading the same model weights at the same time.
+        model_path_temp = os.path.join(
+            os.getenv("HOME"),
+            ".cache/huggingface/hub",
+            "models--" + model_name_or_path.replace("/", "--"),
+            "snapshots/",
+        )
+        downloaded = False
+        if os.path.exists(model_path_temp):
+            temp_last_dir = os.listdir(model_path_temp)[-1]
+            model_path_temp = os.path.join(model_path_temp, temp_last_dir)
+            base_pattern = os.path.join(model_path_temp, "pytorch_model*.bin")
+            files = glob.glob(base_pattern)
+            if len(files) > 0:
+                downloaded = True
+
+        if downloaded:
+           hf_folder = model_path_temp
+        else:
+            with get_lock(model_name_or_path, cache_dir):
+                hf_folder = snapshot_download(model_name_or_path,
+                                            allow_patterns=allow_patterns,
+                                            cache_dir=cache_dir,
+                                            tqdm_class=Disabledtqdm)
+    else:
+        hf_folder = model_name_or_path
+```
+
+---
+
+#### Q18: `/xxx/base_model_worer.py` 报 `assert r.status_code == 200` 错误
+
+A：这个错误是本地模型进程注册到 fastchat controller 失败了。一般有两种原因：1、开了系统全局代理，关闭即可。2、DEFAULT_BIND_HOST 设为'0.0.0.0'，改成'127.0.0.1' 或 本机实际 IP 即可。或者更新到最新版本代码也可以解决。
+
+
+#### Q19: 使用vllm后端加速，无返回且不报错。
+
+A: fschat=0.2.33的vllm_worker脚本代码有bug, 如需使用，需源码修改fastchat.server.vllm_worker，将103行中sampling_params = SamplingParams的参数stop=list(stop)修改为stop= [i for i in stop if i!=""]
+
+
+#### Q20: chatglm3-6b对话中出现"<|user|>"标签，且自问自答。
+
+A20: chatglm3官方目前已经修复了chatglm3-6b的问题，若使用的模型为chatglm3-6b，仅需更新chatglm3-6b模型代码即可;请前往 Huggingface 下载最新的权重。
+并更新fschat版本到 0.2.34以上。
+
+#### Q21: 为什么启动的时候一直出现
+```
+"device not in ['cuda', 'mps', 'cpu','xpu'], device = auto"
+```
+的警告
+
+A21: 这是因为你没有在对应的启动选项设定设备，请在`model_config.py`中设定 DEVICE，不过，就算不设定，auto也能正常使用
--- a/chatchat/chatchat/data/knowledge_base/samples/content/wiki/开发环境部署.md
+++ b/chatchat/chatchat/data/knowledge_base/samples/content/wiki/开发环境部署.md
@ -0,0 +1,281 @@
+## 软件要求
+
+要顺利运行本代码，请按照以下系统要求进行配置
+
+**已经测试过的系统**
+
+ Linux Ubuntu 22.04.5 kernel version 6.7
+
+其他系统可能出现系统兼容性问题。
+
+**最低要求**
+
+该要求仅针对标准模式，轻量模式使用在线模型，不需要安装torch等库，也不需要显卡即可运行。
+
+ Python 版本: >= 3.8(很不稳定), < 3.12
+ CUDA 版本: >= 12.1 
+
+**推荐要求**
+
+开发者在以下环境下进行代码调试，在该环境下能够避免最多环境问题。
+
+ Python 版本 == 3.11.7
+ CUDA 版本: == 12.1
+
+## 硬件要求
+
+本框架使用 `fschat`驱动，统一使用 `huggingface`进行推理，其他推理方式(如 `llama-cpp`，`TensorRT加速引擎` 建议通过推理引擎以 API 形式接入我们的框架)。
+
+同时, 我们没有对 `Int4` 模型进行适配，不保证`Int4`模型能够正常运行。因此，量化版本暂时需要由开发者自行适配, 我们可能在未来放。
+
+如果想要顺利在GPU运行本地模型的 **FP16** 版本，你至少需要以下的硬件配置，来保证在我们框架下能够实现 **稳定连续对话** 
+
+ ChatGLM3-6B & LLaMA-7B-Chat 等 7B模型
+  + 最低显存要求: 14GB
+  + 推荐显卡: RTX 4080
+ Qwen-14B-Chat 等 14B模型
+  + 最低显存要求: 30GB
+  + 推荐显卡: V100
+ Yi-34B-Chat 等 34B模型
+  + 最低显存要求: 69GB  
+  + 推荐显卡: A100
+ Qwen-72B-Chat 等 72B模型
+  + 最低显存要求: 145GB
+  + 推荐显卡：多卡 A100 以上
+
+一种简单的估算方式为：
+```
+FP16: 显存占用(GB) = 模型量级 x 2
+Int4: 显存占用(GB) = 模型量级 x 0.75
+```
+以上数据仅为估算，实际情况以 **nvidia-smi** 占用为准。
+请注意，如果使用最低配置，仅能保证代码能够运行，但运行速度较慢，体验不佳。
+
+同时，Embedding 模型将会占用 1-2G 的显存，历史记录最多会占用 数GB 的显存，因此，需要多冗余一些显存。
+
+内存最低要求: 内存要求至少应该比模型运行的显存大。
+
+例如，运行ChatGLM3-6B `FP16` 模型，显存占用13G，推荐使用16G以上内存。
+
+### 部分测试用机配置参考，在以下机器下开发组成员已经进行原生模拟测试（创建新环境并根据要求下载后运行），确保能流畅运行全部功能的代码框架。
+ 服务器
+```
+处理器: Intel® Xeon® Platinum 8558P Processor (260M Cache, 2.7 GHz)
+内存: 4 TB
+显卡组:  NVIDIA H800 SXM5 80GB x 8
+硬盘: 6 PB 
+操作系统: Ubuntu 22.04 LTS,Linux kernel 5.15.0-60-generic
+显卡驱动版本: 535.129.03
+Cuda版本: 12.1 
+Python版本: 3.11.7
+网络IP地址：美国，洛杉矶
+```
+ 个人PC
+```
+处理器: Intel® Core™ i9 processor 14900K 
+内存: 256 GB DDR5
+显卡组:  NVIDIA RTX4090 X 1 / NVIDIA RTXA6000 X 1
+硬盘: 1 TB
+操作系统: Ubuntu 22.04 LTS / Arch Linux, Linux Kernel 6.6.7
+显卡驱动版本: 545.29.06
+Cuda版本: 12.3 Update 1
+Python版本: 3.11.7
+网络IP地址：中国，上海 
+```
+
+## VPN
+
+如果您位于中国(含港，澳，台) 需要调用 OpenAI 或者 其他境外模型的 API，需要使用 VPN 工具或访问镜像站。
+
+从 Huggingface 下载模型或者从本仓库拉取最新的代码时，需要开发者自行设置代理。本项目不涉及任何代理工具设置和使用，也不解决任何关于代理的问题。
+
+## Docker 部署
+
+开发组为开发者们提供了一键部署的 docker 镜像文件懒人包。开发者们可以在 AutoDL 平台和 Docker 平台一键部署。
+
+🌐 [AutoDL 镜像](https://www.codewithgpu.com/i/chatchat-space/Langchain-Chatchat/Langchain-Chatchat) ，已经更新到`V13`版本,对应`0.2.9`
+
+🐳 [Docker 镜像](registry.cn-beijing.aliyuncs.com/chatchat/chatchat:0.2.7)
+
+💻 一行命令运行 Docker 🌲：
+
+```shell
+docker run -d --gpus all -p 80:8501 registry.cn-beijing.aliyuncs.com/chatchat/chatchat:0.2.7
+```
+
+- 该版本镜像大小 `43.1GB`，使用 `v0.2.6`，以 `nvidia/cuda:12.1.1-cudnn8-devel-ubuntu22.04` 为基础镜像
+- 该版本为正常版本，非轻量化版本
+- 该版本内置两个 Embedding 模型：`m3e-large`，`text2vec-bge-large-chinese`，默认启用后者，内置 `chatglm2-6b-32k`
+- 该版本目标为方便一键部署使用，请确保您已经在 Linux 发行版上安装了 NVIDIA 驱动程序
+- 请注意，您不需要在主机系统上安装 CUDA 工具包，但需要安装 `NVIDIA Driver` 以及 `NVIDIA Container Toolkit`，请参考[安装指南](https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/latest/install-guide.html)
+- 首次拉取和启动均需要一定时间，首次启动时请参照下图使用 `docker logs -f <container id>` 查看日志
+- 如遇到启动过程卡在 `Waiting..` 步骤，建议使用 `docker exec -it <container id> bash` 进入 `/logs/` 目录查看对应阶段日志
+
+## 常规模式本地部署方案
+
+```shell
+# 首先，确信你的机器安装了 Python 3.8 - 3.10 版本
+$ python --version
+Python 3.8.13
+
+# 如果低于这个版本，可使用conda安装环境
+$ conda create -p /your_path/env_name python=3.8
+
+# 激活环境
+$ source activate /your_path/env_name
+
+# 或，conda安装，不指定路径, 注意以下，都将/your_path/env_name替换为env_name
+$ conda create -n env_name python=3.8
+$ conda activate env_name # Activate the environment
+
+# 更新py库
+$ pip3 install --upgrade pip
+
+# 关闭环境
+$ source deactivate /your_path/env_name
+
+# 删除环境
+$ conda env remove -p  /your_path/env_name
+```
+接着，开始安装项目的依赖
+
+```shell
+# 拉取仓库
+$ git clone --recursive https://github.com/chatchat-space/Langchain-Chatchat.git
+
+# 进入目录
+$ cd Langchain-Chatchat
+
+# 安装全部依赖
+$ pip install -r requirements.txt
+
+# 默认依赖包括基本运行环境（FAISS向量库）。以下是可选依赖：
+- 如果要使用 milvus/pg_vector 等向量库，请将 requirements.txt 中相应依赖取消注释再安装。
+- 如果要开启 OCR GPU 加速，请安装 rapidocr_paddle[gpu]
+- 如果要使用在线 API 模型，请安装对用的 SDK
+
+```
+
+此外，为方便用户 API 与 webui 分离运行，可单独根据运行需求安装依赖包。
+
+- 如果只需运行 API，可执行：
+    ```shell
+    $ pip install -r requirements_api.txt
+    
+    # 默认依赖包括基本运行环境（FAISS向量库）。如果要使用 milvus/pg_vector 等向量库，请将 requirements.txt 中相应依赖取消注释再安装。
+    ```
+
+- 如果只需运行 WebUI，可执行：
+    ```shell
+    $ pip install -r requirements_webui.txt
+    ```
+
+注：使用 `langchain.document_loaders.UnstructuredFileLoader`进行 `.docx` 等格式非结构化文件接入时，可能需要依据文档进行其他依赖包的安装，请参考 [langchain 文档](https://python.langchain.com/en/latest/modules/indexes/document_loaders/examples/unstructured_file.html)。
+
+
+需要注意的是，对于以下依赖，我们建议源码安装依赖或者定期检查是否为最新版本，我们的框架可能会大量使用这些依赖的最新特性。
+ transformers
+ fastchat
+ fastapi
+ streamlit 以及其组件
+ langchain 以及其组件
+ xformers 
+
+## 模型下载
+
+如需在本地或离线环境下运行本项目，需要首先将项目所需的模型下载至本地，通常开源 LLM 与 Embedding 模型可以从 [HuggingFace](https://huggingface.co/models) 下载。
+
+以本项目中默认使用的 LLM 模型 [THUDM/chatglm2-6b](https://huggingface.co/THUDM/chatglm2-6b) 与 Embedding 模型 [moka-ai/m3e-base](https://huggingface.co/moka-ai/m3e-base) 为例：
+
+下载模型需要先[安装Git LFS](https://docs.github.com/zh/repositories/working-with-files/managing-large-files/installing-git-large-file-storage)，然后运行
+
+```Shell
+$ git lfs install
+$ git clone https://huggingface.co/THUDM/chatglm2-6b
+$ git clone https://huggingface.co/moka-ai/m3e-base
+```
+
+## 初始化知识库
+
+当前项目的知识库信息存储在数据库中，在正式运行项目之前请先初始化数据库（我们强烈建议您在执行操作前备份您的知识文件）。
+- 如果您已经有创建过知识库，可以先执行以下命令创建或更新数据库表：
+  ```shell
+  $ python init_database.py --create-tables
+  ```
+  如果可以正常运行，则无需再重建知识库。
+
+- 如果您是第一次运行本项目，知识库尚未建立，或者之前使用的是低于最新master分支版本的框架，或者配置文件中的知识库类型、嵌入模型发生变化，或者之前的向量库没有开启 `normalize_L2`，需要以下命令初始化或重建知识库：
+
+  ```shell
+  $ python init_database.py --recreate-vs
+  ```
+  
+## 一键启动
+启动前，确保已经按照[参数配置](https://github.com/chatchat-space/Langchain-Chatchat/wiki/%E5%8F%82%E6%95%B0%E9%85%8D%E7%BD%AE)正确配置各config模块。
+
+一键启动脚本 startup.py， 一键启动所有 Fastchat 服务、API 服务、WebUI 服务，示例代码：
+
+```shell
+$ python startup.py -a
+```
+
+并可使用 `Ctrl + C` 直接关闭所有运行服务。如果一次结束不了，可以多按几次。
+
+可选参数包括 `-a (或--all-webui)`, `--all-api`, `--llm-api`, `-c (或--controller)`, `--openai-api`,
+`-m (或--model-worker)`, `--api`, `--webui`，其中：
+
+- `--all-webui` 为一键启动 WebUI 所有依赖服务；
+- `--all-api` 为一键启动 API 所有依赖服务；
+- `--llm-api` 为一键启动 Fastchat 所有依赖的 LLM 服务；
+- `--openai-api` 为仅启动 FastChat 的 controller 和 openai-api-server 服务；
+- 其他为单独服务启动选项。
+
+若想指定非默认模型，需要用 `--model-name` 选项，示例：
+
+```shell
+$ python startup.py --all-webui --model-name Qwen-7B-Chat
+```
+
+更多信息可通过 `python startup.py -h` 查看。
+
+## 多卡加载
+项目支持多卡加载，需在 startup.py 中的 create_model_worker_app 函数中，修改如下三个参数:
+
+```python
+gpus=None, 
+num_gpus= 1, 
+max_gpu_memory="20GiB"
+```
+
+其中，`gpus` 控制使用的显卡的ID，例如 "0,1";
+
+`num_gpus` 控制使用的卡数;
+
+`max_gpu_memory` 控制每个卡使用的显存容量。
+
+注1：server_config.py的FSCHAT_MODEL_WORKERS字典中也增加了相关配置，如有需要也可通过修改FSCHAT_MODEL_WORKERS字典中对应参数实现多卡加载，且需注意server_config.py的配置会覆盖create_model_worker_app 函数的配置。
+
+注2：少数情况下，gpus参数会不生效，此时需要通过设置环境变量CUDA_VISIBLE_DEVICES来指定torch可见的gpu,示例代码：
+
+```shell
+CUDA_VISIBLE_DEVICES=0,1 python startup.py -a
+```
+
+## 最轻模式本地部署方案
+
+该模式的配置方式与常规模式相同，但无需安装 `torch` 等重依赖，通过在线API实现 LLM 和 Ebeddings 相关功能，适合没有显卡的电脑使用。
+
+```shell
+$ pip install -r requirements_lite.txt
+$ python startup.py -a --lite
+```
+
+该模式支持的在线 Embeddings 包括：
+- [智谱AI](http://open.bigmodel.cn)
+- [MiniMax](https://api.minimax.chat)
+- [百度千帆](https://cloud.baidu.com/product/wenxinworkshop?track=dingbutonglan)
+- [阿里云通义千问](https://dashscope.aliyun.com/)
+
+在 model_config.py 中 将 LLM_MODELS 和 EMBEDDING_MODEL 设置为可用的在线 API 名称即可。
+
+注意：在对话过程中并不要求 LLM 模型与 Embeddings 模型一致，你可以在知识库管理页面中使用 zhipu-api 作为嵌入模型，在知识库对话页面使用其它模型。
--- a/chatchat/chatchat/data/knowledge_base/samples/content/wiki/支持列表.md
+++ b/chatchat/chatchat/data/knowledge_base/samples/content/wiki/支持列表.md
@ -0,0 +1,176 @@
+## LLM 模型支持列表
+> 本地模型
+
+本地 LLM 模型接入基于 [FastChat](https://github.com/lm-sys/FastChat) 实现，支持模型如下：
+
+- [ChatGLM 全系类对话模型](https://huggingface.co/THUDM/)
+- [Orion 全系列对话模型](https://huggingface.co/OrionStarAI/)，必须安装flash-attn 才能使用
+- [Qwen 全系列对话模型](https://huggingface.co/Qwen/)
+- [internlm 全系列对话模型](https://huggingface.co/internlm)
+- [Baichuan 全系列对话模型](https://huggingface.co/baichuan-inc)，必须降级transformer才能使用
+- [llama 全系列对话模型](https://huggingface.co/meta-llama)
+- [Vicuna 全系列对话模型](https://huggingface.co/lmsys)
+- [mistral 全系列对话模型](https://huggingface.co/mistralai)
+
+- [vivo-ai/BlueLM-7B-Chat](https://huggingface.co/vivo-ai/BlueLM-7B-Chat)
+- [01-ai/Yi-34B-Chat](https://huggingface.co/01-ai/Yi-34B-Chat)
+- [BlinkDL/RWKV-4-Raven](https://huggingface.co/BlinkDL/rwkv-4-raven)
+- [camel-ai/CAMEL-13B-Combined-Data](https://huggingface.co/camel-ai/CAMEL-13B-Combined-Data)
+- [databricks/dolly-v2-12b](https://huggingface.co/databricks/dolly-v2-12b)
+- [FreedomIntelligence/phoenix-inst-chat-7b](https://huggingface.co/FreedomIntelligence/phoenix-inst-chat-7b)
+- [h2oai/h2ogpt-gm-oasst1-en-2048-open-llama-7b](https://huggingface.co/h2oai/h2ogpt-gm-oasst1-en-2048-open-llama-7b)
+- [lcw99/polyglot-ko-12.8b-chang-instruct-chat](https://huggingface.co/lcw99/polyglot-ko-12.8b-chang-instruct-chat)
+- [lmsys/fastchat-t5-3b-v1.0](https://huggingface.co/lmsys/fastchat-t5)
+- [mosaicml/mpt-7b-chat](https://huggingface.co/mosaicml/mpt-7b-chat)
+- [Neutralzz/BiLLa-7B-SFT](https://huggingface.co/Neutralzz/BiLLa-7B-SFT)
+- [nomic-ai/gpt4all-13b-snoozy](https://huggingface.co/nomic-ai/gpt4all-13b-snoozy)
+- [NousResearch/Nous-Hermes-13b](https://huggingface.co/NousResearch/Nous-Hermes-13b)
+- [openaccess-ai-collective/manticore-13b-chat-pyg](https://huggingface.co/openaccess-ai-collective/manticore-13b-chat-pyg)
+- [OpenAssistant/oasst-sft-4-pythia-12b-epoch-3.5](https://huggingface.co/OpenAssistant/oasst-sft-4-pythia-12b-epoch-3.5)
+- [project-baize/baize-v2-7b](https://huggingface.co/project-baize/baize-v2-7b)
+- [Salesforce/codet5p-6b](https://huggingface.co/Salesforce/codet5p-6b)
+- [StabilityAI/stablelm-tuned-alpha-7b](https://huggingface.co/stabilityai/stablelm-tuned-alpha-7b)
+- [tiiuae/falcon-40b](https://huggingface.co/tiiuae/falcon-40b)
+- [timdettmers/guanaco-33b-merged](https://huggingface.co/timdettmers/guanaco-33b-merged)
+- [togethercomputer/RedPajama-INCITE-7B-Chat](https://huggingface.co/togethercomputer/RedPajama-INCITE-7B-Chat)
+- [WizardLM/WizardLM-13B-V1.0](https://huggingface.co/WizardLM/WizardLM-13B-V1.0)
+- [WizardLM/WizardCoder-15B-V1.0](https://huggingface.co/WizardLM/WizardCoder-15B-V1.0)
+- [HuggingFaceH4/starchat-beta](https://huggingface.co/HuggingFaceH4/starchat-beta)
+- [FlagAlpha/Llama2-Chinese-13b-Chat](https://huggingface.co/FlagAlpha/Llama2-Chinese-13b-Chat) and others
+- [BAAI/AquilaChat-7B](https://huggingface.co/BAAI/AquilaChat-7B)
+- [all models of OpenOrca](https://huggingface.co/Open-Orca)
+- [Spicyboros](https://huggingface.co/jondurbin/spicyboros-7b-2.2?not-for-all-audiences=true) 
+- [airoboros 2.2](https://huggingface.co/jondurbin/airoboros-l2-13b-2.2)
+- [VMware&#39;s OpenLLaMa OpenInstruct](https://huggingface.co/VMware/open-llama-7b-open-instruct)
+- 任何 [EleutherAI](https://huggingface.co/EleutherAI) 的 pythia 模型，如 [pythia-6.9b](https://huggingface.co/EleutherAI/pythia-6.9b)
+- 在以上模型基础上训练的任何 [Peft](https://github.com/huggingface/peft) 适配器。为了激活，模型路径中必须有 `peft` 。注意：如果加载多个peft模型，你可以通过在任何模型工作器中设置环境变量 `PEFT_SHARE_BASE_WEIGHTS=true` 来使它们共享基础模型的权重。
+
+以上模型支持列表可能随 [FastChat](https://github.com/lm-sys/FastChat) 更新而持续更新，可参考 [FastChat 已支持模型列表](https://github.com/lm-sys/FastChat/blob/main/docs/model_support.md)。
+
+> 联网模型
+
+支持的联网模型
+- [智谱AI](http://open.bigmodel.cn)   **临时解决方案，不支持流式输出，尽情期待0.3.x**
+- [阿里云通义千问](https://dashscope.aliyun.com/)
+- [百川](https://www.baichuan-ai.com/)
+- [ChatGPT](https://api.openai.com)
+- [Gimini](https://makersuite.google.com/app/apikey)
+- [Azure OpenAI](https://azure.microsoft.com/en-us/products/ai-services/openai-service)
+- [MiniMax](https://api.minimax.chat)
+- [讯飞星火](https://xinghuo.xfyun.cn)
+- [百度千帆](https://cloud.baidu.com/product/wenxinworkshop?track=dingbutonglan)
+- [字节火山方舟](https://www.volcengine.com)
+
+## Embedding 模型支持列表
+
+> 本地模型
+
+本项目支持调用 [HuggingFace](https://huggingface.co/models?pipeline_tag=sentence-similarity) 中的 Embedding 模型，已支持的 Embedding 模型如下：
+
+
+MokaAI系列嵌入模型
+
+- [moka-ai/m3e-small](https://huggingface.co/moka-ai/m3e-small)
+- [moka-ai/m3e-base](https://huggingface.co/moka-ai/m3e-base)
+- [moka-ai/m3e-large](https://huggingface.co/moka-ai/m3e-large)
+
+BAAI系列嵌入模型
+- [BAAI/bge-small-zh](https://huggingface.co/BAAI/bge-small-zh)
+- [BAAI/bge-base-zh](https://huggingface.co/BAAI/bge-base-zh)
+- [BAAI/bge-large-zh](https://huggingface.co/BAAI/bge-large-zh)
+- [BAAI/bge-small-zh-v1.5](https://huggingface.co/BAAI/bge-small-zh-v1.5)
+- [BAAI/bge-base-zh-v1.5](https://huggingface.co/BAAI/bge-base-zh-v1.5)
+- [BAAI/bge-large-zh-v1.5](https://huggingface.co/BAAI/bge-large-zh-v1.5)
+- [BAAI/bge-large-zh-noinstruct](https://huggingface.co/BAAI/bge-large-zh-noinstruct)
+- [BAAI/bge-reranker-large](https://huggingface.co/BAAI/bge-reranker-large)
+- [BAAI/bge-reranker-base](https://huggingface.co/BAAI/bge-reranker-base)
+
+
+text2vec系列嵌入模型
+- [shibing624/text2vec-base-chinese-sentence](https://huggingface.co/shibing624/text2vec-base-chinese-sentence)
+- [shibing624/text2vec-base-chinese-paraphrase](https://huggingface.co/shibing624/text2vec-base-chinese-paraphrase)
+- [shibing624/text2vec-base-multilingual](https://huggingface.co/shibing624/text2vec-base-multilingual)
+- [shibing624/text2vec-base-chinese](https://huggingface.co/shibing624/text2vec-base-chinese)
+- [shibing624/text2vec-bge-large-chinese](https://huggingface.co/shibing624/text2vec-bge-large-chinese)
+- [GanymedeNil/text2vec-large-chinese](https://huggingface.co/GanymedeNil/text2vec-large-chinese)
+
+其他模型
+- [sensenova/piccolo-base-zh](https://huggingface.co/sensenova/piccolo-base-zh)
+- [sensenova/piccolo-large-zh](https://huggingface.co/sensenova/piccolo-large-zh)
+- [nghuyong/ernie-3.0-nano-zh](https://huggingface.co/nghuyong/ernie-3.0-nano-zh)
+- [nghuyong/ernie-3.0-base-zh](https://huggingface.co/nghuyong/ernie-3.0-base-zh)
+
+达摩院系列嵌入模型
+- [damo/nlp_gte_sentence-embedding_chinese-large](https://modelscope.cn/models/damo/nlp_gte_sentence-embedding_chinese-large)
+
+> 联网模型
+
+除本地模型外，本项目也支持直接接入 OpenAI的在线嵌入模型。
+支持的联网模型
+- [OpenAI/text-embedding-ada-002](https://platform.openai.com/docs/guides/embeddings)
+- [智谱AI](http://open.bigmodel.cn)
+- [MiniMax](https://api.minimax.chat)
+- [百度千帆](https://cloud.baidu.com/product/wenxinworkshop?track=dingbutonglan)
+- [阿里云通义千问](https://dashscope.aliyun.com/)
+
+## 分词器支持列表
+
+> Langchain 中的分词器
+
+本项目支持调用 [Langchain](https://api.python.langchain.com/en/latest/api_reference.html#module-langchain.text_splitter) 的 Text Splitter 分词器以及基于此改进的自定义分词器，已支持的 Text Splitter 类型如下：
+- CharacterTextSplitter
+- LatexTextSplitter
+- MarkdownHeaderTextSplitter
+- MarkdownTextSplitter
+- NLTKTextSplitter
+- PythonCodeTextSplitter
+- RecursiveCharacterTextSplitter
+- SentenceTransformersTokenTextSplitter
+- SpacyTextSplitter
+
+> 自定义分词器
+
+已经支持的定制分词器如下：
+
+- [AliTextSplitter](text_splitter/ali_text_splitter.py)
+- [ChineseRecursiveTextSplitter](text_splitter/chinese_recursive_text_splitter.py)
+- [ChineseTextSplitter](text_splitter/chinese_text_splitter.py)
+
+
+## 向量数据库支持列表
+
+> 本地向量数据库
+
+目前支持的本地向量数据库列表如下：
+
+- [FAISS](https://github.com/facebookresearch/faiss) 
+- [Milvus](https://github.com/milvus-io/milvus)
+- [PGVector](https://github.com/pgvector/pgvector)
+
+> 联网向量数据库
+
+- [Zilliz](https://zilliz.com)
+
+## 工具支持列表
+> Langchain工具
+
+- Shell 工具，用于模拟当前的Linux Shell环境
+- Youtube 工具，用于搜索Youtube的相关视频链接
+- Wolfram 工具，用Wolfram来实现数学计算等
+
+其他Langchain自带的工具也可以按照上述三个工具的方式来自己实现
+
+> 本地工具
+
+- 翻译工具，实现对输入的任意语言翻译。 
+- 数学工具，使用LLMMathChain 实现数学计算。
+- 高级知识库工具，智能选择调用多个或者单个知识库并查询内容。
+- 进阶知识库工具，智能选择调用一个最相近的知识库并查询内容。
+- 基础知识库工具，选择指定的一个知识库并回答。
+
+> 联网工具
+
+-  天气工具，使用自定义的LLMWetherChain实现天气查询，调用和风天气API。
+-  搜索工具，使用我们的搜索API来实现搜索并概括内容。
+
+- 我们期待开发者共享更多的工具，帮助项目生态完善
--- a/chatchat/chatchat/data/knowledge_base/samples/content/wiki/最佳实践.md
+++ b/chatchat/chatchat/data/knowledge_base/samples/content/wiki/最佳实践.md
@ -0,0 +1,889 @@
+## 推荐的模型组合
+
+ 在默认的配置文件中，我们提供了以下模型组合
+```
+LLM: Chatglm2-6b
+Embedding Models: m3e-base
+TextSplitter: ChineseRecursiveTextSplitter
+Kb_dataset: faiss
+```
+
+ 我们推荐开发者根据自己的业务需求进行模型微调，如果不需要微调且配置充足，可选择以下性能较好的配置
+```
+model_config.py
+LLM: Qwen-14B-Chat 或 Baichuan2-13B-Chat
+Embedding Models: piccolo-large-zh 或 bge-large-zh-v1.5 
+HISTORY_LEN = 20
+TEMPERATURE = 0.1
+```  
+使用该模型将需要更高的硬件要求
+```
+1张 RTX A6000 或者 A40 等 48GB 显存以上的显卡。推荐 1 x A100 以上。
+(使用多张显卡拼接也能运行，但是速度非常慢，2张4090拼接运行大概为一秒一个字的速度)
+
+64GB 内存用于加载模型而不被Kill
+
+服务器级的CPU，推荐 Xeon(R) Platinum 8358P 以上
+```
+
+ 如果开发者知识库较大，有大量文档，大文件，我们推荐开发者使用 ```pg``` 向量数据库
+ 如果开发者的知识库具有一定的关键词特征，例如：
+    + 问答对文件(以Q + A 为一个组合的json文件)
+    + Markdown文件
+    + 并排的pdf文件
+    + 具有多个表格的pdf文件
+  
+  我们推荐开发者自行开发分词器，以达到更好的效果。
+
+ 如果开发者想使用更全面的 Agent 功能，我们推荐开发者使用以下配置
+```
+LLM: Qwen-14B-Chat, AgentLM-70B 或 GPT-4
+Tools 的工具控制在10个之内
+```
+
+## 微调模型加载实操
+
+### 非p-tuning类PEFT加载
+本项目基于 FastChat 加载 LLM 服务，故需以 FastChat 加载 PEFT 路径，针对chatglm,falcon，codet5p以外的模型，以及非p-tuning以外的peft方法，需对peft文件进行修改，步骤如下：
+
+1. 将config.json文件修改为adapter_config.json;
+2. 保证文件夹包含pytorch_model.bin文件；
+3. 修改文件夹名称，保证文件夹包含'peft'一词；
+4. 将peft文件夹移入项目目录下；
+5. 确保adapter_config.json文件夹中base_model_name_or_path指向基础模型；
+6. 将peft路径添加到model_config.py的llm_dict中，键为模型名，值为peft路径，注意使用相对路径，如"peft"；
+7. 开启 `PEFT_SHARE_BASE_WEIGHTS=true`环境变量，再执行python startup.py -a
+
+针对p-tuning和chatglm模型，需要对fastchat进行较大幅度的修改。
+
+### p-tuning加载
+
+P-tuning虽然是一种peft方法，但并不能于huggingface的peft python包兼容，而fastchat在多处以字符串匹配的方式进行硬编码加载模型，因此导致fastchat和chatchat不能兼容p-tuning，经langchain-chatchat开发组多次尝试，给出如下指南进行p-tuning加载。
+
+#### 1. peft文件夹修改
+
+1. 将config.json文件修改为adapter_config.json;
+2. 保证文件夹包含pytorch_model.bin文件；
+3. 修改文件夹名称，保证文件夹包含'peft'一词；
+4. 在adapter_config.json文件中增加如下字段：
+
+   ```json
+       "base_model_name_or_path": "/root/model/chatglm2-6b/"
+       "task_type": "CAUSAL_LM",
+       "peft_type": "PREFIX_TUNING",
+       "inference_mode": true,
+       "revision": "main",
+       "num_virtual_tokens": 16
+   ```
+
+   **其中,"base_model_name_or_path"为基础模型的存在位置**；
+5. 将文件夹移入项目文件夹中，如Langchain-Chatchat项目文件夹目录下；
+
+#### 2. fastchat包代码修改
+
+##### 2.1 fastchat.model.model_adapter文件修改
+
+1. 将fastchat.model.model_adapter.py文件的load_model函数修改为：
+
+   ```python
+   def load_model(
+       model_path: str,
+       device: str = "cuda",
+       num_gpus: int = 1,
+       max_gpu_memory: Optional[str] = None,
+       dtype: Optional[torch.dtype] = None,
+       load_8bit: bool = False,
+       cpu_offloading: bool = False,
+       gptq_config: Optional[GptqConfig] = None,
+       awq_config: Optional[AWQConfig] = None,
+       revision: str = "main",
+       debug: bool = False,
+       load_kwargs = {}
+   ):
+       """Load a model from Hugging Face."""
+       # get model adapter
+       adapter = get_model_adapter(model_path)
+       kwargs = load_kwargs
+       # Handle device mapping
+       cpu_offloading = raise_warning_for_incompatible_cpu_offloading_configuration(
+           device, load_8bit, cpu_offloading
+       )
+       if device == "cpu":
+           kwargs["torch_dtype"]= torch.float32
+           if CPU_ISA in ["avx512_bf16", "amx"]:
+               try:
+                   import intel_extension_for_pytorch as ipex
+
+                   kwargs ["torch_dtype"]= torch.bfloat16
+               except ImportError:
+                   warnings.warn(
+                       "Intel Extension for PyTorch is not installed, it can be installed to accelerate cpu inference"
+                   )
+       elif device == "cuda":
+           kwargs["torch_dtype"] = torch.float16
+           if num_gpus != 1:
+               kwargs["device_map"] = "auto"
+               if max_gpu_memory is None:
+                   kwargs[
+                       "device_map"
+                   ] = "sequential"  # This is important for not the same VRAM sizes
+                   available_gpu_memory = get_gpu_memory(num_gpus)
+                   kwargs["max_memory"] = {
+                       i: str(int(available_gpu_memory[i] * 0.85)) + "GiB"
+                       for i in range(num_gpus)
+                   }
+               else:
+                   kwargs["max_memory"] = {i: max_gpu_memory for i in range(num_gpus)}
+       elif device == "mps":
+           kwargs["torch_dtype"] = torch.float16
+           # Avoid bugs in mps backend by not using in-place operations.
+           replace_llama_attn_with_non_inplace_operations()
+       elif device == "xpu":
+           kwargs["torch_dtype"] = torch.bfloat16
+           # Try to load ipex, while it looks unused, it links into torch for xpu support
+           try:
+               import intel_extension_for_pytorch as ipex
+           except ImportError:
+               warnings.warn(
+                   "Intel Extension for PyTorch is not installed, but is required for xpu inference."
+               )
+       elif device == "npu":
+           kwargs["torch_dtype"]= torch.float16
+           # Try to load ipex, while it looks unused, it links into torch for xpu support
+           try:
+               import torch_npu
+           except ImportError:
+               warnings.warn("Ascend Extension for PyTorch is not installed.")
+       else:
+           raise ValueError(f"Invalid device: {device}")
+
+       if cpu_offloading:
+           # raises an error on incompatible platforms
+           from transformers import BitsAndBytesConfig
+
+           if "max_memory" in kwargs:
+               kwargs["max_memory"]["cpu"] = (
+                   str(math.floor(psutil.virtual_memory().available / 2**20)) + "Mib"
+               )
+           kwargs["quantization_config"] = BitsAndBytesConfig(
+               load_in_8bit_fp32_cpu_offload=cpu_offloading
+           )
+           kwargs["load_in_8bit"] = load_8bit
+       elif load_8bit:
+           if num_gpus != 1:
+               warnings.warn(
+                   "8-bit quantization is not supported for multi-gpu inference."
+               )
+           else:
+               model, tokenizer = adapter.load_compress_model(
+                   model_path=model_path,
+                   device=device,
+                   torch_dtype=kwargs["torch_dtype"],
+                   revision=revision,
+               )
+               if debug:
+                   print(model)
+               return model, tokenizer
+       elif awq_config and awq_config.wbits < 16:
+           assert (
+               awq_config.wbits == 4
+           ), "Currently we only support 4-bit inference for AWQ."
+           model, tokenizer = load_awq_quantized(model_path, awq_config, device)
+           if num_gpus != 1:
+               device_map = accelerate.infer_auto_device_map(
+                   model,
+                   max_memory=kwargs["max_memory"],
+                   no_split_module_classes=[
+                       "OPTDecoderLayer",
+                       "LlamaDecoderLayer",
+                       "BloomBlock",
+                       "MPTBlock",
+                       "DecoderLayer",
+                   ],
+               )
+               model = accelerate.dispatch_model(
+                   model, device_map=device_map, offload_buffers=True
+               )
+           else:
+               model.to(device)
+           return model, tokenizer
+       elif gptq_config and gptq_config.wbits < 16:
+           model, tokenizer = load_gptq_quantized(model_path, gptq_config)
+           if num_gpus != 1:
+               device_map = accelerate.infer_auto_device_map(
+                   model,
+                   max_memory=kwargs["max_memory"],
+                   no_split_module_classes=["LlamaDecoderLayer"],
+               )
+               model = accelerate.dispatch_model(
+                   model, device_map=device_map, offload_buffers=True
+               )
+           else:
+               model.to(device)
+           return model, tokenizer
+       kwargs["revision"] = revision
+
+       if dtype is not None:  # Overwrite dtype if it is provided in the arguments.
+           kwargs["torch_dtype"] = dtype
+
+       # Load model
+       model, tokenizer = adapter.load_model(model_path, kwargs)
+
+       if (
+           device == "cpu"
+           and kwargs["torch_dtype"] is torch.bfloat16
+           and CPU_ISA is not None
+       ):
+           model = ipex.optimize(model, dtype=kwargs["torch_dtype"])
+
+       if (device == "cuda" and num_gpus == 1 and not cpu_offloading) or device in (
+           "mps",
+           "xpu",
+           "npu",
+       ):
+           model.to(device)
+
+       if device == "xpu":
+           model = torch.xpu.optimize(model, dtype=kwargs["torch_dtype"], inplace=True)
+
+       if debug:
+           print(model)
+
+       return model, tokenizer
+   ```
+2. 将fastchat.model.model_adapter.py的函数修改为：
+
+   ```python
+   def get_generate_stream_function(model: torch.nn.Module, model_path: str):
+       """Get the generate_stream function for inference."""
+       from fastchat.serve.inference import generate_stream
+
+       model_type = str(type(model)).lower()
+
+       is_chatglm = "chatglm" in model_type 
+       is_falcon = "rwforcausallm" in model_type
+       is_codet5p = "codet5p" in model_type 
+       is_peft = "peft" in model_type
+
+       if is_chatglm:
+           return generate_stream_chatglm
+       elif is_falcon:
+           return generate_stream_falcon
+       elif is_codet5p:
+           return generate_stream_codet5p
+       elif peft_share_base_weights and is_peft:
+           # Return a curried stream function that loads the right adapter
+           # according to the model_name available in this context.  This ensures
+           # the right weights are available.
+           @torch.inference_mode()
+           def generate_stream_peft(
+               model,
+               tokenizer,
+               params: Dict,
+               device: str,
+               context_len: int,
+               stream_interval: int = 2,
+               judge_sent_end: bool = False,
+           ):
+
+               model.set_adapter(model_path)
+               if "chatglm" in str(type(model.base_model)).lower():
+                   model.disable_adapter()
+                   prefix_state_dict = torch.load(os.path.join(model_path, "pytorch_model.bin"))
+                   new_prefix_state_dict = {}
+
+                   for k, v in prefix_state_dict.items():
+                       if k.startswith("transformer.prefix_encoder."):
+                           new_prefix_state_dict[k[len("transformer.prefix_encoder."):]] = v
+                       elif k.startswith("transformer.prompt_encoder."):
+                           new_prefix_state_dict[k[len("transformer.prompt_encoder."):]] = v
+                   model.transformer.prefix_encoder.load_state_dict(new_prefix_state_dict)
+                   for x in generate_stream_chatglm(
+                       model,
+                       tokenizer,
+                       params,
+                       device,
+                       context_len,
+                       stream_interval,
+                       judge_sent_end,
+                   ):
+                       yield x
+               elif "rwforcausallm" in str(type(model.base_model)).lower():
+
+                   for x in generate_stream_falcon(
+                       model,
+                       tokenizer,
+                       params,
+                       device,
+                       context_len,
+                       stream_interval,
+                       judge_sent_end,
+                   ):
+                       yield x   
+               elif "codet5p" in str(type(model.base_model)).lower():
+
+                   for x in generate_stream_codet5p(
+                       model,
+                       tokenizer,
+                       params,
+                       device,
+                       context_len,
+                       stream_interval,
+                       judge_sent_end,
+                   ):
+                       yield x   
+               else:
+
+                   for x in generate_stream(
+                       model,
+                       tokenizer,
+                       params,
+                       device,
+                       context_len,
+                       stream_interval,
+                       judge_sent_end,
+                   ):
+                       yield x
+
+           return generate_stream_peft
+       else:
+           return generate_stream
+   ```
+3. 将fastchat.model.model_adapter.py的PeftModelAdapter类的load_model方法修改为：
+
+   ```python
+       def load_model(self, model_path: str, from_pretrained_kwargs: dict):
+           """Loads the base model then the (peft) adapter weights"""
+           from peft import PeftConfig, PeftModel
+
+           config = PeftConfig.from_pretrained(model_path)
+           base_model_path = config.base_model_name_or_path
+           if "peft" in base_model_path:
+               raise ValueError(
+                   f"PeftModelAdapter cannot load a base model with 'peft' in the name: {config.base_model_name_or_path}"
+               )
+
+           # Basic proof of concept for loading peft adapters that share the base
+           # weights.  This is pretty messy because Peft re-writes the underlying
+           # base model and internally stores a map of adapter layers.
+           # So, to make this work we:
+           #  1. Cache the first peft model loaded for a given base models.
+           #  2. Call `load_model` for any follow on Peft models.
+           #  3. Make sure we load the adapters by the model_path.  Why? This is
+           #  what's accessible during inference time.
+           #  4. In get_generate_stream_function, make sure we load the right
+           #  adapter before doing inference.  This *should* be safe when calls
+           #  are blocked the same semaphore.
+           if peft_share_base_weights:
+               if base_model_path in peft_model_cache:
+                   model, tokenizer = peft_model_cache[base_model_path]
+                   # Super important: make sure we use model_path as the
+                   # `adapter_name`.
+                   model.load_adapter(model_path, adapter_name=model_path)
+               else:
+                   base_adapter = get_model_adapter(base_model_path)
+                   base_model, tokenizer = base_adapter.load_model(
+                       base_model_path, from_pretrained_kwargs
+                   )
+                   # Super important: make sure we use model_path as the
+                   # `adapter_name`.
+                   from peft import get_peft_model
+                   model = get_peft_model(base_model,config,adapter_name=model_path)
+                   peft_model_cache[base_model_path] = (model, tokenizer)
+               return model, tokenizer
+
+           # In the normal case, load up the base model weights again.
+           base_adapter = get_model_adapter(base_model_path)
+           base_model, tokenizer = base_adapter.load_model(
+               base_model_path, from_pretrained_kwargs
+           )
+           from peft import get_peft_model
+           model = get_peft_model(base_model,config,adapter_name=model_path)
+           return model, tokenizer
+
+   ```
+4. 将fastchat.model.model_adapter.py的ChatglmAdapter类的load_model方法修改为：
+
+   ```python
+       def load_model(self, model_path: str, from_pretrained_kwargs: dict):
+           revision = from_pretrained_kwargs.get("revision", "main")
+           tokenizer = AutoTokenizer.from_pretrained(
+               model_path, trust_remote_code=True, revision=revision
+           )
+           config = AutoConfig.from_pretrained(model_path, trust_remote_code=True,**from_pretrained_kwargs)
+           model = AutoModel.from_pretrained(
+               model_path, trust_remote_code=True, config=config
+           )
+           return model, tokenizer
+   ```
+
+##### 2.2 fastchat.serve.model_worker文件修改
+
+1. 将fastchat.serve.model_worker文件的ModelWorker的__init__方法修改如下：
+
+   ```python
+   class ModelWorker(BaseModelWorker):
+       def __init__(
+           self,
+           controller_addr: str,
+           worker_addr: str,
+           worker_id: str,
+           model_path: str,
+           model_names: List[str],
+           limit_worker_concurrency: int,
+           no_register: bool,
+           device: str,
+           num_gpus: int,
+           max_gpu_memory: str,
+           dtype: Optional[torch.dtype] = None,
+           load_8bit: bool = False,
+           cpu_offloading: bool = False,
+           gptq_config: Optional[GptqConfig] = None,
+           awq_config: Optional[AWQConfig] = None,
+           stream_interval: int = 2,
+           conv_template: Optional[str] = None,
+           embed_in_truncate: bool = False,
+           seed: Optional[int] = None,
+           load_kwargs = {}, #修改点
+           **kwargs,
+       ):
+           super().__init__(
+               controller_addr,
+               worker_addr,
+               worker_id,
+               model_path,
+               model_names,
+               limit_worker_concurrency,
+               conv_template=conv_template,
+           )
+
+           logger.info(f"Loading the model {self.model_names} on worker {worker_id} ...")
+           self.model, self.tokenizer = load_model(
+               model_path,
+               device=device,
+               num_gpus=num_gpus,
+               max_gpu_memory=max_gpu_memory,
+               dtype=dtype,
+               load_8bit=load_8bit,
+               cpu_offloading=cpu_offloading,
+               gptq_config=gptq_config,
+               awq_config=awq_config,
+               load_kwargs=load_kwargs #修改点
+           )
+           self.device = device
+           if self.tokenizer.pad_token == None:
+               self.tokenizer.pad_token = self.tokenizer.eos_token
+           self.context_len = get_context_length(self.model.config)
+           print("**"*100)
+           self.generate_stream_func = get_generate_stream_function(self.model, model_path)
+           print(f"self.generate_stream_func{self.generate_stream_func}")
+           print("*"*100)
+           self.stream_interval = stream_interval
+           self.embed_in_truncate = embed_in_truncate
+           self.seed = seed
+
+           if not no_register:
+               self.init_heart_beat()
+   ```
+2. 在fastchat.serve.model_worker文件的create_model_worker增加如下args参数：
+
+   ```python
+   parser.add_argument("--load_kwargs",type=dict,default={})
+   ```
+
+    并将如下语句：
+
+```python
+    worker = ModelWorker(
+        args.controller_address,
+        args.worker_address,
+        worker_id,
+        args.model_path,
+        args.model_names,
+        args.limit_worker_concurrency,
+        no_register=args.no_register,
+        device=args.device,
+        num_gpus=args.num_gpus,
+        max_gpu_memory=args.max_gpu_memory,
+        dtype=str_to_torch_dtype(args.dtype),
+        load_8bit=args.load_8bit,
+        cpu_offloading=args.cpu_offloading,
+        gptq_config=gptq_config,
+        awq_config=awq_config,
+        stream_interval=args.stream_interval,
+        conv_template=args.conv_template,
+        embed_in_truncate=args.embed_in_truncate,
+        seed=args.seed,
+    )
+```
+
+修改为：
+
+```python
+    worker = ModelWorker(
+        args.controller_address,
+        args.worker_address,
+        worker_id,
+        args.model_path,
+        args.model_names,
+        args.limit_worker_concurrency,
+        no_register=args.no_register,
+        device=args.device,
+        num_gpus=args.num_gpus,
+        max_gpu_memory=args.max_gpu_memory,
+        dtype=str_to_torch_dtype(args.dtype),
+        load_8bit=args.load_8bit,
+        cpu_offloading=args.cpu_offloading,
+        gptq_config=gptq_config,
+        awq_config=awq_config,
+        stream_interval=args.stream_interval,
+        conv_template=args.conv_template,
+        embed_in_truncate=args.embed_in_truncate,
+        seed=args.seed,
+        load_kwargs=args.load_kwargs
+    )
+```
+
+至此，我们完成了fastchat加载ptuning的所有修改，在调用fastchat加载p-tuning时，可以通过加入 `PEFT_SHARE_BASE_WEIGHTS=true`，并以字典的形式添加--load_kwargs参数为训练ptuning时的pre_seq_len值即可，例如将2.2.2步骤中的 `parser.add_argument("--load_kwargs",type=dict,default={})`修改为：
+
+`parser.add_argument("--load_kwargs",type=dict,default={"pre_seq_len":16})`
+
+#### 3 langchain-chatchat代码修改：
+
+1. 在configs/serve_config.py中的FSCHAT_MODEL_WORKERS字典中增加如下字段：
+
+   ```
+   "load_kwargs": {"pre_seq_len": 16} #值修改为adapter_config.json中的pre_seq_len值
+   ```
+2. 将startup.py中的create_model_worker_app修改为：
+
+   ```python
+   def create_model_worker_app(log_level: str = "INFO", **kwargs) -> FastAPI:
+       """
+       kwargs包含的字段如下：
+       host:
+       port:
+       model_names:[`model_name`]
+       controller_address:
+       worker_address:
+
+
+       对于online_api:
+           online_api:True
+           worker_class: `provider`
+       对于离线模型：
+           model_path: `model_name_or_path`,huggingface的repo-id或本地路径
+           device:`LLM_DEVICE`
+       """
+       import fastchat.constants
+       fastchat.constants.LOGDIR = LOG_PATH
+       from fastchat.serve.model_worker import worker_id, logger
+       import argparse
+       logger.setLevel(log_level)
+
+       parser = argparse.ArgumentParser()
+       args = parser.parse_args([])
+
+       for k, v in kwargs.items():
+           setattr(args, k, v)
+
+       # 在线模型API
+       if worker_class := kwargs.get("worker_class"):
+           from fastchat.serve.model_worker import app
+           worker = worker_class(model_names=args.model_names,
+                                 controller_addr=args.controller_address,
+                                 worker_addr=args.worker_address)
+           sys.modules["fastchat.serve.model_worker"].worker = worker
+       # 本地模型
+       else:
+           from configs.model_config import VLLM_MODEL_DICT
+           if kwargs["model_names"][0] in VLLM_MODEL_DICT and args.infer_turbo == "vllm":
+               import fastchat.serve.vllm_worker
+               from fastchat.serve.vllm_worker import VLLMWorker,app
+               from vllm import AsyncLLMEngine
+               from vllm.engine.arg_utils import AsyncEngineArgs,EngineArgs
+               args.tokenizer = args.model_path # 如果tokenizer与model_path不一致在此处添加
+               args.tokenizer_mode = 'auto'
+               args.trust_remote_code= True
+               args.download_dir= None
+               args.load_format = 'auto'
+               args.dtype = 'auto'
+               args.seed = 0
+               args.worker_use_ray = False
+               args.pipeline_parallel_size = 1
+               args.tensor_parallel_size = 1
+               args.block_size = 16
+               args.swap_space = 4  # GiB
+               args.gpu_memory_utilization = 0.90
+               args.max_num_batched_tokens = 2560
+               args.max_num_seqs = 256
+               args.disable_log_stats = False
+               args.conv_template = None
+               args.limit_worker_concurrency = 5
+               args.no_register = False
+               args.num_gpus = 1 # vllm worker的切分是tensor并行，这里填写显卡的数量
+               args.engine_use_ray = False
+               args.disable_log_requests = False
+               if args.model_path:
+                   args.model = args.model_path
+               if args.num_gpus > 1:
+                   args.tensor_parallel_size = args.num_gpus
+
+               for k, v in kwargs.items():
+                   setattr(args, k, v)
+
+               engine_args = AsyncEngineArgs.from_cli_args(args)
+               engine = AsyncLLMEngine.from_engine_args(engine_args)
+
+               worker = VLLMWorker(
+                           controller_addr = args.controller_address,
+                           worker_addr = args.worker_address,
+                           worker_id = worker_id,
+                           model_path = args.model_path,
+                           model_names = args.model_names,
+                           limit_worker_concurrency = args.limit_worker_concurrency,
+                           no_register = args.no_register,
+                           llm_engine =  engine,
+                           conv_template = args.conv_template,
+                           )
+               sys.modules["fastchat.serve.vllm_worker"].engine = engine
+               sys.modules["fastchat.serve.vllm_worker"].worker = worker
+
+           else:
+               from fastchat.serve.model_worker import app, GptqConfig, AWQConfig, ModelWorker
+               args.gpus = "0" # GPU的编号,如果有多个GPU，可以设置为"0,1,2,3"
+               args.max_gpu_memory = "20GiB"
+               args.num_gpus = 1  # model worker的切分是model并行，这里填写显卡的数量
+
+               args.load_8bit = False
+               args.cpu_offloading = None
+               args.gptq_ckpt = None
+               args.gptq_wbits = 16
+               args.gptq_groupsize = -1
+               args.gptq_act_order = False
+               args.awq_ckpt = None
+               args.awq_wbits = 16
+               args.awq_groupsize = -1
+               args.model_names = []
+               args.conv_template = None
+               args.limit_worker_concurrency = 5
+               args.stream_interval = 2
+               args.no_register = False
+               args.embed_in_truncate = False
+               args.load_kwargs = {"pre_seq_len": 16} # 改*************************
+               for k, v in kwargs.items():
+                   setattr(args, k, v)
+               if args.gpus:
+                   if args.num_gpus is None:
+                       args.num_gpus = len(args.gpus.split(','))
+                   if len(args.gpus.split(",")) < args.num_gpus:
+                       raise ValueError(
+                           f"Larger --num-gpus ({args.num_gpus}) than --gpus {args.gpus}!"
+                       )
+                   os.environ["CUDA_VISIBLE_DEVICES"] = args.gpus
+               gptq_config = GptqConfig(
+                   ckpt=args.gptq_ckpt or args.model_path,
+                   wbits=args.gptq_wbits,
+                   groupsize=args.gptq_groupsize,
+                   act_order=args.gptq_act_order,
+               )
+               awq_config = AWQConfig(
+                   ckpt=args.awq_ckpt or args.model_path,
+                   wbits=args.awq_wbits,
+                   groupsize=args.awq_groupsize,
+               )
+
+               worker = ModelWorker(
+                   controller_addr=args.controller_address,
+                   worker_addr=args.worker_address,
+                   worker_id=worker_id,
+                   model_path=args.model_path,
+                   model_names=args.model_names,
+                   limit_worker_concurrency=args.limit_worker_concurrency,
+                   no_register=args.no_register,
+                   device=args.device,
+                   num_gpus=args.num_gpus,
+                   max_gpu_memory=args.max_gpu_memory,
+                   load_8bit=args.load_8bit,
+                   cpu_offloading=args.cpu_offloading,
+                   gptq_config=gptq_config,
+                   awq_config=awq_config,
+                   stream_interval=args.stream_interval,
+                   conv_template=args.conv_template,
+                   embed_in_truncate=args.embed_in_truncate,
+                   load_kwargs=args.load_kwargs #改*************************
+               )
+               sys.modules["fastchat.serve.model_worker"].args = args
+               sys.modules["fastchat.serve.model_worker"].gptq_config = gptq_config
+
+               sys.modules["fastchat.serve.model_worker"].worker = worker
+
+       MakeFastAPIOffline(app)
+       app.title = f"FastChat LLM Server ({args.model_names[0]})"
+       app._worker = worker
+       return app
+   ```
+
+至此，我们完成了langchain-chatchat加载p-tuning的全部操作，将ptuing的路径添加到model_config的llm_dict，如
+```
+chatglm2-6b: 'p-tuning-peft'
+```
+
+即可以如下方式加载p-tuning：
+
+```shell
+PEFT_SHARE_BASE_WEIGHTS=true python startup.py -a
+
+```
+
+
+## 预处理知识库文件
+
+在载入知识库文件的时候，直接上传文档虽然能实现基础的问答，但是，其效果并不能发挥到最佳水平。因此，我们建议开发者对知识库文件做出以下的预处理。
+以下方式的预处理如果执行了，有概率提升模型的召回率。
+
+### 1. 使用``` TXT / Markdown ``` 等格式化文件，并按照要点排版
+例如，以下段落应该被处理成如下内容后在嵌入知识库，会有更好的效果。
+```
+原文: PDF类型
+查特查特团队荣获AGI Playground Hackathon黑客松“生产力工具的新想象”赛道季军
+2023年10月16日, Founder Park在近日结束的AGI Playground Hackathon黑客松比赛中，查特查特团队展现出色的实力，荣获了“生产力工具的新想象”赛道季军。本次比赛由Founder Park主办，并由智谱、Dify、Zilliz、声网、AWS云服务等企业协办。
+比赛吸引了120多支参赛团队，最终有36支队伍进入决赛，其中34支队伍成功完成了路演。比赛规定，所有参赛选手必须在短短的48小时内完成一个应用产品开发，同时要求使用智谱大模型及Zilliz向量数据库进行开发。
+查特查特团队的现场参赛人员由两名项目成员组成：
+来自A大学的小明负责了Agent旅游助手的开发、场地协调以及团队住宿和行程的安排；在保证团队完赛上做出了主要贡献。作为队长，栋宇坚持自信，创新，沉着的精神，不断提出改进方案并抓紧落实，遇到相关问题积极请教老师，提高了团队开发效率。
+作为核心开发者的B公司小蓝，他则主管Agent智能知识库查询开发、Agent底层框架设计、相关API调整和UI调整。在最后，他代表团队在规定的时间内呈现了产品的特点和优势，并完美的展示了产品demo。为团队最终产品能够得到奖项做出了重要贡献。
+```
+修改后的Markdown文件，具有更高的召回率
+```
+# 查特查特团队荣获AGI Playground Hackathon黑客松“生产力工具的新想象”赛道季军。
+
+## 报道简介
+2023年10月16日, Founder Park在近日结束的AGI Playground Hackathon黑客松比赛中，查特查特团队展现出色的实力，荣获了“生产力工具的新想象”赛道季军。本次比赛由Founder Park主办，并由智谱、Dify、Zilliz、声网、AWS云服务等企业协办。
+
+## 比赛介绍
+
+比赛吸引了120多支参赛团队，最终有36支队伍进入决赛，其中34支队伍成功完成了路演。比赛规定，所有参赛选手必须在短短的48小时内完成一个应用产品开发，同时要求使用智谱大模型及Zilliz向量数据库进行开发。
+
+## 获奖队员简介
+
+ 小明，A大学
+  + 负责Agent旅游助手的开发、场地协调以及团队住宿和行程的安排
+  + 在保证团队完赛上做出了主要贡献。作为队长，栋宇坚持自信，创新，沉着的精神，不断提出改进方案并抓紧落实，遇到相关问题积极请教老师，提高了团队开发效率。
+
+ 小蓝，B公司
+  + 主管Agent智能知识库查询开发、Agent底层框架设计、相关API调整和UI调整。
+  + 代表团队在规定的时间内呈现了产品的特点和优势，并完美的展示了产品demo。
+```
+
+### 2. 减少文件中冲突的内容，分门别类存放数据
+
+就像人类寻找相关点一样，如果在多份文件中存在相似的内容，可能会导致模型无法准确的搜索到相关内容。
+因此，需要减少文件中相似的内容，或将其分在不同的知识库中。
+例如，以下两个句子中，如果搜索外籍教师，则具有歧义，非常容易搜索到错误答案。
+
+```
+文件一：
+在大数据专业中，我们已经拥有超过1/3的外籍博士和教师。
+
+文件二：
+
+本专业具有40%的外籍教师比例，
+本专业有博士生10人，研究生12人。
+```
+
+### 3. 减少具有歧义的句子
+知识库中应该减少具有歧义的句子和段落，或者汉语的高级用法，例如
+```
+1. 他说他会杀了那个人。
+2. 你说啥子？
+3. 我喜欢你的头发。
+4. 地板真的滑，我差点没摔倒。
+```
+在相似度模型对比的时候，仅仅能搜索句子的表面意思，因此，使用有歧义的句子和段落可能导致搜索错误。
+
+### 4. 减少单个文件的大小，减少文件中的特殊符号
+ 上传知识库的单个文件不建议超过5MB，以免出现向量化中断卡死等情况。同时，上传大文件不要使用faiss数据库。
+ 减少上传文件中的中文符号，特殊符号，无意义空格等。
+
+## 自定义的关键词调整Embedding模型
+
+1.首先准备一个关键字的文本文件，每一行是一个关键字。例如：
+```
+文件key_words.txt：
+iphone13pro
+中石油
+```
+2. 配置kb_config.py
+```
+EMBEDDING_KEYWORD_FILE = "embedding_keywords.txt"
+```  
+3. 运行```embeddings/add_embedding_keywords.py```
+```
+输入的文本（这里只是一个没分隔的一串字符）：iphone13pro
+生成的token id序列：[101, 21128, 102]
+token到token id的映射：
+[CLS]->101
+iphone13pro->21128
+[SEP]->102
+
+输入的文本：中石油
+生成的token id序列：[101, 21129, 102]
+token到token id的映射：
+[CLS]->101
+中石油->21129
+[SEP]->102
+```
+这样，你就获得了一个新的带有关键词调整的Embedding模型
+## 实际使用效果
+在这里，我们放置了一些成功调用的效果图，方便开发者进行查看自己是否成功运行了框架。
+
+### 检查是否成功上传/管理自己的知识库
+
+在WebUI界面上传知识库，则必须保证知识库进行向量化，成功之后，文件会被切分并在向量位置打钩。
+下图展示了成功上传知识库的画面
+
+![成功上传知识库](https://github.com/chatchat-space/Langchain-Chatchat/blob/dev/img/init_knowledge_base.jpg)
+
+请确保所有知识库都已经进行了向量化。
+
+### 检查是否成功开启LLM对话
+
+若打开webui后，在该模式下能成功跟大模型对话即成功调用。
+
+下图为成功调用LLM的效果图:
+
+![LLM对话](https://github.com/chatchat-space/Langchain-Chatchat/blob/dev/img/LLM_success.png)
+
+### 检查是否成功调用知识库/搜索
+若成功调用知识库，则你应该能看到，在大模型回答的下方有一个```知识库匹配结果```的展开框，并且内部显示了相关的匹配结果。
+如果没有搜索到相关内容，则会提示```根据已知信息无法回答问题```,并且下拉框中没有任何内容。
+
+下图为成功调用知识库效果图：
+
+![成功调用知识库](https://github.com/chatchat-space/Langchain-Chatchat/blob/dev/img/knowledge_base_success.jpg)
+
+在这个案例中，第一次用户的提问无法在知识库中寻找到合适的答案，因此，大模型回答了```根据已知信息无法回答问题```。
+
+第二次用户的提问能在知识库中寻找到合适的答案，因此，大模型给出了一个正确的回答。
+
+__注意__: 知识库的搜索情况取决于嵌入模型的准度，分词器的设置，知识库的排版和大模型的数量，提示词设定等多个因素。因此，需要开发者进行深度的优化和调试。
+
+### 检查是否成功调用Agent工具
+
+若成功调用Agent工具，则你应该看到大模型完整的思维过程，这会在```思考过程```下拉框中显示出来。如果成功调用Agent工具，则你应该看到Markdown引用效果的工具使用情况。
+在Agent对话模式中，```思考过程```中显示的是大模型的思考过程，而下拉框之前的内容为大模型的```Final Answer```，缺乏中间的运算过程。
+
+下图展现了一个成功调用Agent工具的效果图:
+
+![成功调用单个Agent工具](https://github.com/chatchat-space/Langchain-Chatchat/blob/dev/img/agent_success.png)
+
+本框架支持模型连续掉用多个Agent工具，下图展示了一个一个提问中大模型连续调用多个Agent工具的效果图:
+
+![连续调用多个Agent工具](https://github.com/chatchat-space/Langchain-Chatchat/blob/dev/img/agent_continue.png)
+
+在这个案例中，```3900```是大模型的最终答案，其余都是思考过程。
+
+
+
+
+
--- a/chatchat/chatchat/data/knowledge_base/samples/content/wiki/自定义.md
+++ b/chatchat/chatchat/data/knowledge_base/samples/content/wiki/自定义.md
@ -0,0 +1,159 @@
+## 使用自定义的分词器
+1. 在```text_splitter```文件夹下新建一个文件，文件名为您的分词器名字，比如`my_splitter.py`，然后在`__init__.py`中导入您的分词器，如下所示：
+```python
+from .my_splitter import MySplitter
+```
+
+2. 修改```config/model_config.py```文件，将您的分词器名字添加到```text_splitter_dict```中，如下所示：
+```python
+MySplitter: {
+        "source": "huggingface",  # 选择tiktoken则使用openai的方法
+        "tokenizer_name_or_path": "your tokenizer", #如果选择huggingface则使用huggingface的方法，部分tokenizer需要从Huggingface下载
+    }
+TEXT_SPLITTER = "MySplitter"
+```
+
+完成上述步骤后，就能使用自己的分词器了。
+
+## 使用自定义的 Agent 工具
+
+1. 创建自己的Agent工具
+
+ 开发者在```server/agent```文件中创建一个自己的文件，并将其添加到```tools_select.py```中。这样就完成了Tools的设定。
+
+ 当您创建了一个```custom_agent.py```文件，其中包含一个```work```函数，那么您需要在```tools_select.py```中添加如下代码：
+```python
+from custom_agent import work
+Tool.from_function(
+    func=work,
+    name="该函数的名字",
+    description=""
+    )
+```
+ 请注意，如果你确定在某一个工程中不会使用到某个工具，可以将其从Tools中移除，降低模型分类错误导致使用错误工具的风险。
+
+2. 修改 ```custom_template.py``` 文件
+
+开发者需要根据自己选择的大模型设定适合该模型的Agent Prompt和自自定义返回格式。
+````
+"""
+Answer the following questions as best you can. You have access to the following tools:
+{tools}
+Use the following format:
+Question: the input question you must answer
+Thought: you should always think about what to do
+Action: the action to take, should be one of [{tool_names}]
+Action Input: the input to the action
+Observation: the result of the action
+... (this Thought/Action/Action Input/Observation can be repeated zero or more times)
+Thought: I now know the final answer
+Final Answer: the final answer to the original input question
+Begin!
+history:
+{history}
+Question: {input}
+Thought: {agent_scratchpad}
+"""
+````
+除了使用 `Zero React` 的提示词方案，开发者可以自行对提示词进行修改，或者使用 Langchain 提供的其他的Agent结构。例如，如果您使用的模型为`ChatGLM3-6B`模型，我们提供了一个可以正常运行`ChatGLM3-6B`的Agent提示词，该提示词与 Langchain 的 `struct Agent`相似，其内容如下：
+````
+
+"ChatGLM3":
+"""
+You can answer using the tools, or answer directly using your knowledge without using the tools.Respond to the human as helpfully and accurately as possible.
+You have access to the following tools:
+{tools}
+Use a json blob to specify a tool by providing an action key (tool name) and an action_input key (tool input).
+Valid "action" values: "Final Answer" or  [{tool_names}]
+Provide only ONE action per $JSON_BLOB, as shown:
+
+```
+{{{{
+  "action": $TOOL_NAME,
+  "action_input": $INPUT
+}}}}
+```
+Follow this format:
+
+Question: input question to answer
+Thought: consider previous and subsequent steps
+Action:
+```
+$JSON_BLOB
+```
+Observation: action result
+... (repeat Thought/Action/Observation N times)
+Thought: I know what to respond
+Action:
+```
+{{{{
+  "action": "Final Answer",
+  "action_input": "Final response to human"
+}}}}
+Begin! Reminder to ALWAYS respond with a valid json blob of a single action. Use tools if necessary. Respond directly if appropriate. Format is Action:```$JSON_BLOB```then Observation:.
+
+history: {history}
+
+Question: {input}
+
+Thought: {agent_scratchpad}
+""",
+````
+
+3. 让不支持 Langchain 调用方式的但具备 Agent 能力的模型展现能力
+
+以**ChatGLM3-6B**为代表的模型，虽然具有 Function Call 能力，但其对齐格式与 Langchain 提供默认Agent格式并不符合，因此无法使用 Langchain 自身能力实现 Function Call。在我们的框架中，您可以在 ```server/Agent/custom_agent/``` 文件夹中自行复现更多模型的 Agent 能力实现。
+
+在完成上述步骤之后，您还需要到```server/chat/agent_chat/```中导入您的模块来实现特殊判定。
+
+同时，你应该在调用工具的时候使用自定义的模板，我们以`GLM`系列模型进行演示，如果您在使用`GLM`模型进行工具调用，你应该使用`model_config.py`中的`ChatGLM3`模板。
+
+4. 局限性
+
+- 由于 React Agent 的脆弱性，temperature 参数的设置对于模型的效果有很大的影响。我们建议开发者在使用自定义 Agent 时，对于不同的模型，将其设置成0.1以下，以达到更好的效果。
+- 目前，官方仅对 **ChatGLM3-6B** 一种模型进行了 非 Langchain 对齐格式下的能力激活，我们欢迎开发者自行探索其他模型，并提交对应的 PR，让框架支持更多的 Agent 模型。
+- 在`0.2.x`版本中，我们没有对`Plan`进行优化，因此，连续调用工具的能力较差，我们会在`0.3.x`中优化这一问题。此外，经过测试，本地模型在工具调用上的表现不如在线模型，我们更推荐使用 `gpt4-1106-Preview` 来完成工具调用的任务。
+
+## 使用自定义的微调模型
+
+- 本项目基于 FastChat 加载 LLM 服务，故需以 FastChat 加载 PEFT 路径。
+- 开发者需要保证路径名称里必须有 peft 这个词。
+- 配置文件的名字为 ```adapter_config.json```
+- peft 路径下包含.bin 格式的 PEFT 权重， peft路径在startup.py中 ```create_model_worker_app``` 函数的 ```args.model_names``` 中指定
+```python
+    args.model_names = ["/home/ubuntu/your_peft_folder/peft"]
+
+```
+- 执行代码之，应该设定环境变量
+```
+PEFT_SHARE_BASE_WEIGHTS=true 
+```
+
+注：如果上述方式启动失败，则需要以标准的 FastChat 服务启动方式分步启动，PEFT加载详细步骤参考以下ISSUE
+
+[加载lora微调后模型失效](https://github.com/chatchat-space/Langchain-Chatchat/issues/1130#issuecomment-1685291822)
+
+在```最佳实践```章节中，我们为开发者做了更详细的模型载入文档。
+
+__该功能可能还具有一定的Bug，需要开发者仔细适配。__
+
+
+## 使用自定义的嵌入模型
+
+- 使用自定义的嵌入模型，开发者需要将其合并到原始的嵌入模型中，之后仅需将其路径添加到```config/model_config.py```中并选择自己的模型启动即可。
+- 如果想自己在Embedding模型中支持 自定义的关键字，需要在 ```embeddings/embedding_keywords.txt```中设定好自己的关键字
+- 运行 ```embeddings/add_embedding_keywords.py```
+- 将生成的新Embedding模型地址放入```configs/model_config.py```中并选择，
+```
+"custom-embedding": "your path",
+```
+并设置
+```
+EMBEDDING_MODEL = "custom-embedding"  
+```
+即可调用加入关键字的embedding模型。
+在```最佳实践```章节中，我们为某几个关键词定制了一个Embed模型。
+
+## 日志功能
+
+- 日志功能记录了大模型的心跳和网络端口传输记录，开发者可以通过日志功能查看模型的运行情况。
--- a/chatchat/chatchat/data/knowledge_base/samples/vector_store/bge-large-zh-v1.5/index.faiss
+++ b/chatchat/chatchat/data/knowledge_base/samples/vector_store/bge-large-zh-v1.5/index.faiss
--- a/chatchat/chatchat/data/knowledge_base/samples/vector_store/bge-large-zh-v1.5/index.pkl
+++ b/chatchat/chatchat/data/knowledge_base/samples/vector_store/bge-large-zh-v1.5/index.pkl
--- a/chatchat/chatchat/data/knowledge_base/samples/vector_store/bge-large-zh/index.faiss
+++ b/chatchat/chatchat/data/knowledge_base/samples/vector_store/bge-large-zh/index.faiss
--- a/chatchat/chatchat/data/knowledge_base/samples/vector_store/bge-large-zh/index.pkl
+++ b/chatchat/chatchat/data/knowledge_base/samples/vector_store/bge-large-zh/index.pkl
--- a/chatchat/chatchat/data/knowledge_base/samples/vector_store/m3e-base/index.faiss
+++ b/chatchat/chatchat/data/knowledge_base/samples/vector_store/m3e-base/index.faiss
--- a/chatchat/chatchat/data/knowledge_base/samples/vector_store/m3e-base/index.pkl
+++ b/chatchat/chatchat/data/knowledge_base/samples/vector_store/m3e-base/index.pkl
--- a/chatchat/chatchat/data/knowledge_base/standard/content/核电厂定期安全审查指南第1部分：通用要求（征求意见稿）.doc
+++ b/chatchat/chatchat/data/knowledge_base/standard/content/核电厂定期安全审查指南第1部分：通用要求（征求意见稿）.doc
--- a/chatchat/chatchat/data/knowledge_base/standard/content/核电厂定期安全审查指南第1部分：通用要求（征求意见稿）.docx
+++ b/chatchat/chatchat/data/knowledge_base/standard/content/核电厂定期安全审查指南第1部分：通用要求（征求意见稿）.docx
--- a/chatchat/chatchat/data/knowledge_base/standard/content/核电厂定期安全审查指南第6部分：构筑物、系统和部件的实际状态-（征求意见稿）.docx
+++ b/chatchat/chatchat/data/knowledge_base/standard/content/核电厂定期安全审查指南第6部分：构筑物、系统和部件的实际状态-（征求意见稿）.docx
--- a/chatchat/chatchat/data/knowledge_base/standard/content/核电厂定期安全评价第12部分：设计（征求意见稿20230317）.doc
+++ b/chatchat/chatchat/data/knowledge_base/standard/content/核电厂定期安全评价第12部分：设计（征求意见稿20230317）.doc
--- a/chatchat/chatchat/data/knowledge_base/standard/content/核电厂定期安全评价第12部分：设计（征求意见稿20230317）.docx
+++ b/chatchat/chatchat/data/knowledge_base/standard/content/核电厂定期安全评价第12部分：设计（征求意见稿20230317）.docx
--- a/chatchat/chatchat/data/knowledge_base/standard/content/核电厂定期安全评价指南第7部分：经验反馈（征求意见稿）
+++ b/chatchat/chatchat/data/knowledge_base/standard/content/核电厂定期安全评价指南第7部分：经验反馈（征求意见稿）
--- a/chatchat/chatchat/data/knowledge_base/standard/content/行标编制说明（核电厂定期安全审查第12部分：设计）.doc
+++ b/chatchat/chatchat/data/knowledge_base/standard/content/行标编制说明（核电厂定期安全审查第12部分：设计）.doc
--- a/chatchat/chatchat/data/knowledge_base/standard/content/行标编制说明（核电厂定期安全审查第12部分：设计）.docx
+++ b/chatchat/chatchat/data/knowledge_base/standard/content/行标编制说明（核电厂定期安全审查第12部分：设计）.docx
--- a/chatchat/chatchat/data/knowledge_base/standard/content/行标编制说明（核电厂定期安全审查第1部分：通用要求）.doc
+++ b/chatchat/chatchat/data/knowledge_base/standard/content/行标编制说明（核电厂定期安全审查第1部分：通用要求）.doc
--- a/chatchat/chatchat/data/knowledge_base/standard/content/行标编制说明（核电厂定期安全审查第1部分：通用要求）.docx
+++ b/chatchat/chatchat/data/knowledge_base/standard/content/行标编制说明（核电厂定期安全审查第1部分：通用要求）.docx
--- a/chatchat/chatchat/data/knowledge_base/standard/content/行标编制说明（核电厂定期安全审查指南第6部分：构筑物、系统和部件的实际状态）(3).doc
+++ b/chatchat/chatchat/data/knowledge_base/standard/content/行标编制说明（核电厂定期安全审查指南第6部分：构筑物、系统和部件的实际状态）(3).doc
--- a/chatchat/chatchat/data/knowledge_base/standard/content/行标编制说明（核电厂定期安全审查指南第6部分：构筑物、系统和部件的实际状态）(3).docx
+++ b/chatchat/chatchat/data/knowledge_base/standard/content/行标编制说明（核电厂定期安全审查指南第6部分：构筑物、系统和部件的实际状态）(3).docx
--- a/chatchat/chatchat/data/knowledge_base/standard/content/行标编制说明（核电厂定期安全评价指南第7部分：经验反馈）20230324.docx
+++ b/chatchat/chatchat/data/knowledge_base/standard/content/行标编制说明（核电厂定期安全评价指南第7部分：经验反馈）20230324.docx
--- a/chatchat/chatchat/data/knowledge_base/standard/vector_store/bge-large-zh/index.faiss
+++ b/chatchat/chatchat/data/knowledge_base/standard/vector_store/bge-large-zh/index.faiss
--- a/chatchat/chatchat/data/knowledge_base/standard/vector_store/bge-large-zh/index.pkl
+++ b/chatchat/chatchat/data/knowledge_base/standard/vector_store/bge-large-zh/index.pkl
--- a/chatchat/chatchat/data/knowledge_base/standard/vector_store/index.faiss
+++ b/chatchat/chatchat/data/knowledge_base/standard/vector_store/index.faiss
--- a/chatchat/chatchat/data/knowledge_base/standard/vector_store/index.pkl
+++ b/chatchat/chatchat/data/knowledge_base/standard/vector_store/index.pkl
--- a/chatchat/chatchat/data/knowledge_base/standard/vector_store/m3e-base/index.faiss
+++ b/chatchat/chatchat/data/knowledge_base/standard/vector_store/m3e-base/index.faiss
--- a/Show More
+++ b/Show More