diff --git a/.gitignore b/.gitignore
index 90a14f46..15cba9e6 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,13 +1,13 @@
 *.log
 *.log.*
 *.bak
-/chatchat/data/*
-!/chatchat/data/knowledge_base/samples
-/chatchat/data/knowledge_base/samples/vector_store
-!/chatchat/data/nltk_data
+/chatchat/chatchat/data/*
+!/chatchat/chatchat/data/knowledge_base/samples
+/chatchat/chatchat/data/knowledge_base/samples/vector_store
+!/chatchat/chatchat/data/nltk_data
 
-/chatchat/configs/*.py
-/chatchat/configs/loom.yaml
+/chatchat/chatchat/configs/*.py
+/chatchat/chatchat/configs/loom.yaml
 .vscode/
 
 # below are standard python ignore files
diff --git a/chatchat/chatchat/configs/basic_config.py b/chatchat/chatchat/configs/basic_config.py
deleted file mode 100644
index 61ec7c70..00000000
--- a/chatchat/chatchat/configs/basic_config.py
+++ /dev/null
@@ -1,47 +0,0 @@
-import logging
-import os
-from pathlib import Path
-
-import langchain
-
-
-# 是否显示详细日志
-log_verbose = True
-langchain.verbose = log_verbose
-
-# 通常情况下不需要更改以下内容
-
-# 用户数据根目录
-DATA_PATH = str(Path(__file__).absolute().parent.parent / "data")
-if not os.path.exists(DATA_PATH):
-    os.mkdir(DATA_PATH)
-
-# nltk 模型存储路径
-NLTK_DATA_PATH = os.path.join(DATA_PATH, "nltk_data")
-import nltk
-nltk.data.path = [NLTK_DATA_PATH] + nltk.data.path
-
-# 日志格式
-LOG_FORMAT = "%(asctime)s - %(filename)s[line:%(lineno)d] - %(levelname)s: %(message)s"
-logger = logging.getLogger()
-logger.setLevel(logging.INFO)
-logging.basicConfig(format=LOG_FORMAT)
-
-
-# 日志存储路径
-LOG_PATH = os.path.join(DATA_PATH, "logs")
-if not os.path.exists(LOG_PATH):
-    os.mkdir(LOG_PATH)
-
-# 模型生成内容（图片、视频、音频等）保存位置
-MEDIA_PATH = os.path.join(DATA_PATH, "media")
-if not os.path.exists(MEDIA_PATH):
-    os.mkdir(MEDIA_PATH)
-    os.mkdir(os.path.join(MEDIA_PATH, "image"))
-    os.mkdir(os.path.join(MEDIA_PATH, "audio"))
-    os.mkdir(os.path.join(MEDIA_PATH, "video"))
-
-# 临时文件目录，主要用于文件对话
-BASE_TEMP_DIR = os.path.join(DATA_PATH, "temp")
-if not os.path.exists(BASE_TEMP_DIR):
-    os.mkdir(BASE_TEMP_DIR)
diff --git a/chatchat/chatchat/configs/kb_config.py b/chatchat/chatchat/configs/kb_config.py
deleted file mode 100644
index a330e80f..00000000
--- a/chatchat/chatchat/configs/kb_config.py
+++ /dev/null
@@ -1,148 +0,0 @@
-import os
-
-from configs.basic_config import DATA_PATH
-
-
-# 默认使用的知识库
-DEFAULT_KNOWLEDGE_BASE = "samples"
-
-# 默认向量库/全文检索引擎类型。可选：faiss, milvus(离线) & zilliz(在线), pgvector,全文检索引擎es
-DEFAULT_VS_TYPE = "faiss"
-
-# 缓存向量库数量（针对FAISS）
-CACHED_VS_NUM = 1
-
-# 缓存临时向量库数量（针对FAISS），用于文件对话
-CACHED_MEMO_VS_NUM = 10
-
-# 知识库中单段文本长度(不适用MarkdownHeaderTextSplitter)
-CHUNK_SIZE = 250
-
-# 知识库中相邻文本重合长度(不适用MarkdownHeaderTextSplitter)
-OVERLAP_SIZE = 50
-
-# 知识库匹配向量数量
-VECTOR_SEARCH_TOP_K = 3
-
-# 知识库匹配相关度阈值，取值范围在0-1之间，SCORE越小，相关度越高，取到1相当于不筛选，建议设置在0.5左右
-SCORE_THRESHOLD = 1
-
-# 默认搜索引擎。可选：bing, duckduckgo, metaphor
-DEFAULT_SEARCH_ENGINE = "metaphor"
-
-# 搜索引擎匹配结题数量
-SEARCH_ENGINE_TOP_K = 3
-
-
-# Bing 搜索必备变量
-# 使用 Bing 搜索需要使用 Bing Subscription Key,需要在azure port中申请试用bing search
-# 具体申请方式请见
-# https://learn.microsoft.com/en-us/bing/search-apis/bing-web-search/create-bing-search-service-resource
-# 使用python创建bing api 搜索实例详见:
-# https://learn.microsoft.com/en-us/bing/search-apis/bing-web-search/quickstarts/rest/python
-BING_SEARCH_URL = "https://api.bing.microsoft.com/v7.0/search"
-# 注意不是bing Webmaster Tools的api key，
-
-# 此外，如果是在服务器上，报Failed to establish a new connection: [Errno 110] Connection timed out
-# 是因为服务器加了防火墙，需要联系管理员加白名单，如果公司的服务器的话，就别想了GG
-BING_SUBSCRIPTION_KEY = "b31d23d7b96742ab959f4cc07a605f72"
-
-# metaphor搜索需要KEY
-METAPHOR_API_KEY = "f8c9f98f-141a-4a55-9be7-ae675ccacd7a"
-
-# 心知天气 API KEY，用于天气Agent。申请：https://www.seniverse.com/
-SENIVERSE_API_KEY = ""
-
-# 是否开启中文标题加强，以及标题增强的相关配置
-# 通过增加标题判断，判断哪些文本为标题，并在metadata中进行标记；
-# 然后将文本与往上一级的标题进行拼合，实现文本信息的增强。
-ZH_TITLE_ENHANCE = False
-
-# PDF OCR 控制：只对宽高超过页面一定比例（图片宽/页面宽，图片高/页面高）的图片进行 OCR。
-# 这样可以避免 PDF 中一些小图片的干扰，提高非扫描版 PDF 处理速度
-PDF_OCR_THRESHOLD = (0.6, 0.6)
-
-# 每个知识库的初始化介绍，用于在初始化知识库时显示和Agent调用，没写则没有介绍，不会被Agent调用。
-KB_INFO = {
-    "samples": "关于本项目issue的解答",
-}
-
-
-# 通常情况下不需要更改以下内容
-
-# 知识库默认存储路径
-KB_ROOT_PATH = os.path.join(DATA_PATH, "knowledge_base")
-if not os.path.exists(KB_ROOT_PATH):
-    os.mkdir(KB_ROOT_PATH)
-
-# 数据库默认存储路径。
-# 如果使用sqlite，可以直接修改DB_ROOT_PATH；如果使用其它数据库，请直接修改SQLALCHEMY_DATABASE_URI。
-DB_ROOT_PATH = os.path.join(KB_ROOT_PATH, "info.db")
-SQLALCHEMY_DATABASE_URI = f"sqlite:///{DB_ROOT_PATH}"
-
-# 可选向量库类型及对应配置
-kbs_config = {
-    "faiss": {
-    },
-    "milvus": {
-        "host": "127.0.0.1",
-        "port": "19530",
-        "user": "",
-        "password": "",
-        "secure": False,
-    },
-    "zilliz": {
-        "host": "in01-a7ce524e41e3935.ali-cn-hangzhou.vectordb.zilliz.com.cn",
-        "port": "19530",
-        "user": "",
-        "password": "",
-        "secure": True,
-        },
-    "pg": {
-        "connection_uri": "postgresql://postgres:postgres@127.0.0.1:5432/langchain_chatchat",
-    },
-
-    "es": {
-        "host": "127.0.0.1",
-        "port": "9200",
-        "index_name": "test_index",
-        "user": "",
-        "password": ""
-    },
-    "milvus_kwargs":{
-        "search_params":{"metric_type": "L2"}, #在此处增加search_params
-        "index_params":{"metric_type": "L2","index_type": "HNSW"} # 在此处增加index_params
-    },
-    "chromadb": {}
-}
-
-# TextSplitter配置项，如果你不明白其中的含义，就不要修改。
-text_splitter_dict = {
-    "ChineseRecursiveTextSplitter": {
-        "source": "",  ## 选择tiktoken则使用openai的方法
-        "tokenizer_name_or_path": "",
-    },
-    "SpacyTextSplitter": {
-        "source": "",
-        "tokenizer_name_or_path": "",
-    },
-    "RecursiveCharacterTextSplitter": {
-        "source": "tiktoken",
-        "tokenizer_name_or_path": "cl100k_base",
-    },
-    "MarkdownHeaderTextSplitter": {
-        "headers_to_split_on":
-            [
-                ("#", "head1"),
-                ("##", "head2"),
-                ("###", "head3"),
-                ("####", "head4"),
-            ]
-    },
-}
-
-# TEXT_SPLITTER 名称
-TEXT_SPLITTER_NAME = "ChineseRecursiveTextSplitter"
-
-# Embedding模型定制词语的词表文件
-EMBEDDING_KEYWORD_FILE = "embedding_keywords.txt"
diff --git a/chatchat/chatchat/configs/model_config.py b/chatchat/chatchat/configs/model_config.py
deleted file mode 100644
index b0f3c454..00000000
--- a/chatchat/chatchat/configs/model_config.py
+++ /dev/null
@@ -1,170 +0,0 @@
-import os
-
-
-# 默认选用的 LLM 名称
-DEFAULT_LLM_MODEL = "qwen"
-
-# 默认选用的 Embedding 名称
-DEFAULT_EMBEDDING_MODEL = "bge"
-
-
-# AgentLM模型的名称 (可以不指定，指定之后就锁定进入Agent之后的Chain的模型，不指定就是LLM_MODELS[0])
-Agent_MODEL = None
-
-# 历史对话轮数
-HISTORY_LEN = 3
-
-# 大模型最长支持的长度，如果不填写，则使用模型默认的最大长度，如果填写，则为用户设定的最大长度
-MAX_TOKENS = None
-
-# LLM通用对话参数
-TEMPERATURE = 0.7
-# TOP_P = 0.95 # ChatOpenAI暂不支持该参数
-
-SUPPORT_AGENT_MODELS = [
-    "chatglm3-6b",
-    "openai-api",
-    "Qwen-14B-Chat",
-    "Qwen-7B-Chat",
-    "qwen",
-]
-
-
-LLM_MODEL_CONFIG = {
-    # 意图识别不需要输出，模型后台知道就行
-    "preprocess_model": {
-        DEFAULT_LLM_MODEL: {
-            "temperature": 0.05,
-            "max_tokens": 4096,
-            "history_len": 100,
-            "prompt_name": "default",
-            "callbacks": False
-        },
-    },
-    "llm_model": {
-        DEFAULT_LLM_MODEL: {
-            "temperature": 0.9,
-            "max_tokens": 4096,
-            "history_len": 10,
-            "prompt_name": "default",
-            "callbacks": True
-        },
-    },
-    "action_model": {
-        DEFAULT_LLM_MODEL: {
-            "temperature": 0.01,
-            "max_tokens": 4096,
-            "callbacks": True
-        },
-    },
-    "postprocess_model": {
-        DEFAULT_LLM_MODEL: {
-            "temperature": 0.01,
-            "max_tokens": 4096,
-            "prompt_name": "default",
-            "callbacks": True
-        }
-    },
-    "image_model": {
-        "sd-turbo": {
-            "size": "256*256",
-        }
-    },
-    "multimodal_model": {
-        "qwen-vl": {}
-    },
-}
-
-# 可以通过 loom/xinference/oneapi/fastchat 启动模型服务，然后将其 URL 和 KEY 配置过来即可。
-#   - platform_name 可以任意填写，不要重复即可
-#   - platform_type 可选：openai, xinference, oneapi, fastchat。以后可能根据平台类型做一些功能区分
-#   - 将框架部署的模型填写到对应列表即可。不同框架可以加载同名模型，项目会自动做负载均衡。
-
-MODEL_PLATFORMS = [
-    # {
-    #     "platform_name": "openai-api",
-    #     "platform_type": "openai",
-    #     "api_base_url": "https://api.openai.com/v1",
-    #     "api_key": "sk-yBuaCpqEVUBarBP9700e7224A2D743AeA329334d19C0A336",
-    #     "api_proxy": "https://qujhzynu.cloud.sealos.io/v1",
-    #     "api_concurrencies": 5,
-    #     "llm_models": [
-    #         "gpt-3.5-turbo",
-    #     ],
-    #     "embed_models": [],
-    #     "image_models": [],
-    #     "multimodal_models": [],
-    # },
-
-    {
-        "platform_name": "xinference",
-        "platform_type": "xinference",
-        "api_base_url": "http://127.0.0.1:9997/v1",
-        "api_key": "EMPTY",
-        "api_concurrencies": 5,
-        # 注意：这里填写的是 xinference 部署的模型 UID，而非模型名称
-        "llm_models": [
-            "qwen",
-            "glm3",
-        ],
-        "embed_models": [
-            "bge",
-        ],
-        "image_models": [
-            "sd-turbo",
-        ],
-        "multimodal_models": [
-            "qwen-vl",
-        ],
-    },
-
-    {
-        "platform_name": "oneapi",
-        "platform_type": "oneapi",
-        "api_base_url": "http://127.0.0.1:3000/v1",
-        "api_key": "sk-Mlft68FXoTYqLfQr06F0E2D77e6e4220B6F420999d25383f",
-        "api_concurrencies": 5,
-        "llm_models": [
-            # 智谱 API
-            "chatglm_pro",
-            "chatglm_turbo",
-            "chatglm_std",
-            "chatglm_lite",
-            # 千问 API
-            "qwen-turbo",
-            "qwen-plus",
-            "qwen-max",
-            "qwen-max-longcontext",
-            # 千帆 API
-            "ERNIE-Bot",
-            "ERNIE-Bot-turbo",
-            "ERNIE-Bot-4",
-            # 星火 API
-            "SparkDesk",
-        ],
-        "embed_models": [
-            # 千问 API
-            "text-embedding-v1",
-            # 千帆 API
-            "Embedding-V1",
-        ],
-        "image_models": [],
-        "multimodal_models": [],
-    },
-
-    # {
-    #     "platform_name": "loom",
-    #     "platform_type": "loom",
-    #     "api_base_url": "http://127.0.0.1:7860/v1",
-    #     "api_key": "88296d2f9bbd9ab222c1086e39f5fbb2.FbC0YSrAMcaEF2gB",
-    #     "api_concurrencies": 5,
-    #     "llm_models": [
-    #         "chatglm3-6b",
-    #     ],
-    #     "embed_models": [],
-    #     "image_models": [],
-    #     "multimodal_models": [],
-    # },
-]
-
-LOOM_CONFIG = os.path.join(os.path.dirname(os.path.abspath(__file__)), "loom.yaml")
diff --git a/chatchat/chatchat/configs/prompt_config.py b/chatchat/chatchat/configs/prompt_config.py
deleted file mode 100644
index 58ce1e0c..00000000
--- a/chatchat/chatchat/configs/prompt_config.py
+++ /dev/null
@@ -1,209 +0,0 @@
-PROMPT_TEMPLATES = {
-    "preprocess_model": {
-        "default":
-            '你只要回复0 和 1 ，代表不需要使用工具。以下几种问题不需要使用工具:'
-            '1. 需要联网查询的内容\n'
-            '2. 需要计算的内容\n'
-            '3. 需要查询实时性的内容\n'
-            '如果我的输入满足这几种情况，返回1。其他输入，请你回复0，你只要返回一个数字\n'
-            '这是我的问题:'
-    },
-    "llm_model": {
-        "default":
-            '{{input}}',
-        "with_history":
-            'The following is a friendly conversation between a human and an AI. '
-            'The AI is talkative and provides lots of specific details from its context. '
-            'If the AI does not know the answer to a question, it truthfully says it does not know.\n\n'
-            'Current conversation:\n'
-            '{history}\n'
-            'Human: {input}\n'
-            'AI:',
-    },
-    "action_model": {
-        "GPT-4":
-            'Answer the following questions as best you can. You have access to the following tools:\n'
-            'The way you use the tools is by specifying a json blob.\n'
-            'Specifically, this json should have a `action` key (with the name of the tool to use) and a `action_input` key (with the input to the tool going here).\n'
-            'The only values that should be in the "action" field are: {tool_names}\n'
-            'The $JSON_BLOB should only contain a SINGLE action, do NOT return a list of multiple actions. Here is an example of a valid $JSON_BLOB:\n'
-            '```\n\n'
-            '{{{{\n'
-            '  "action": $TOOL_NAME,\n'
-            '  "action_input": $INPUT\n'
-            '}}}}\n'
-            '```\n\n'
-            'ALWAYS use the following format:\n'
-            'Question: the input question you must answer\n'
-            'Thought: you should always think about what to do\n'
-            'Action:\n'
-            '```\n\n'
-            '$JSON_BLOB'
-            '```\n\n'
-            'Observation: the result of the action\n'
-            '... (this Thought/Action/Observation can repeat N times)\n'
-            'Thought: I now know the final answer\n'
-            'Final Answer: the final answer to the original input question\n'
-            'Begin! Reminder to always use the exact characters `Final Answer` when responding.\n'
-            'Question:{input}\n'
-            'Thought:{agent_scratchpad}\n',
-
-        "ChatGLM3":
-            'You can answer using the tools.Respond to the human as helpfully and accurately as possible.\n'
-            'You have access to the following tools:\n'
-            '{tools}\n'
-            'Use a json blob to specify a tool by providing an action key (tool name)\n'
-            'and an action_input key (tool input).\n'
-            'Valid "action" values: "Final Answer" or  [{tool_names}]\n'
-            'Provide only ONE action per $JSON_BLOB, as shown:\n\n'
-            '```\n'
-            '{{{{\n'
-            '  "action": $TOOL_NAME,\n'
-            '  "action_input": $INPUT\n'
-            '}}}}\n'
-            '```\n\n'
-            'Follow this format:\n\n'
-            'Question: input question to answer\n'
-            'Thought: consider previous and subsequent steps\n'
-            'Action:\n'
-            '```\n'
-            '$JSON_BLOB\n'
-            '```\n'
-            'Observation: action result\n'
-            '... (repeat Thought/Action/Observation N times)\n'
-            'Thought: I know what to respond\n'
-            'Action:\n'
-            '```\n'
-            '{{{{\n'
-            '  "action": "Final Answer",\n'
-            '  "action_input": "Final response to human"\n'
-            '}}}}\n'
-            'Begin! Reminder to ALWAYS respond with a valid json blob of a single action. Use tools if necessary.\n'
-            'Respond directly if appropriate. Format is Action:```$JSON_BLOB```then Observation:.\n'
-            'Question: {input}\n\n'
-            '{agent_scratchpad}\n',
-        "qwen":
-            'Answer the following questions as best you can. You have access to the following APIs:\n\n'
-            '{tools}\n\n'
-            'Use the following format:\n\n'
-            'Question: the input question you must answer\n'
-            'Thought: you should always think about what to do\n'
-            'Action: the action to take, should be one of [{tool_names}]\n'
-            'Action Input: the input to the action\n'
-            'Observation: the result of the action\n'
-            '... (this Thought/Action/Action Input/Observation can be repeated zero or more times)\n'
-            'Thought: I now know the final answer\n'
-            'Final Answer: the final answer to the original input question\n\n'
-            'Format the Action Input as a JSON object.\n\n'
-            'Begin!\n\n'
-            'Question: {input}\n\n'
-            '{agent_scratchpad}\n\n',
-        "structured-chat-agent":
-            'Respond to the human as helpfully and accurately as possible. You have access to the following tools:\n\n'
-            '{tools}\n\n'
-            'Use a json blob to specify a tool by providing an action key (tool name) and an action_input key (tool input).\n\n'
-            'Valid "action" values: "Final Answer" or {tool_names}\n\n'
-            'Provide only ONE action per $JSON_BLOB, as shown:\n\n'
-            '```\n{{\n  "action": $TOOL_NAME,\n  "action_input": $INPUT\n}}\n```\n\n'
-            'Follow this format:\n\n'
-            'Question: input question to answer\n'
-            'Thought: consider previous and subsequent steps\n'
-            'Action:\n```\n$JSON_BLOB\n```\n'
-            'Observation: action result\n'
-            '... (repeat Thought/Action/Observation N times)\n'
-            'Thought: I know what to respond\n'
-            'Action:\n```\n{{\n  "action": "Final Answer",\n  "action_input": "Final response to human"\n}}\n\n'
-            'Begin! Reminder to ALWAYS respond with a valid json blob of a single action. Use tools if necessary. Respond directly if appropriate. Format is Action:```$JSON_BLOB```then Observation\n'
-            '{input}\n\n'
-            '{agent_scratchpad}\n\n'
-            # '(reminder to respond in a JSON blob no matter what)'
-    },
-    "postprocess_model": {
-        "default": "{{input}}",
-    }
-}
-
-TOOL_CONFIG = {
-    "search_local_knowledgebase": {
-        "use": False,
-        "top_k": 3,
-        "score_threshold": 1,
-        "conclude_prompt": {
-            "with_result":
-                '<指令>根据已知信息，简洁和专业的来回答问题。如果无法从中得到答案，请说 "根据已知信息无法回答该问题"，'
-                '不允许在答案中添加编造成分，答案请使用中文。 </指令>\n'
-                '<已知信息>{{ context }}</已知信息>\n'
-                '<问题>{{ question }}</问题>\n',
-            "without_result":
-                '请你根据我的提问回答我的问题:\n'
-                '{{ question }}\n'
-                '请注意，你必须在回答结束后强调，你的回答是根据你的经验回答而不是参考资料回答的。\n',
-        }
-    },
-    "search_internet": {
-        "use": False,
-        "search_engine_name": "bing",
-        "search_engine_config":
-            {
-                "bing": {
-                    "result_len": 3,
-                    "bing_search_url": "https://api.bing.microsoft.com/v7.0/search",
-                    "bing_key": "680a39347d7242c5bd2d7a9576a125b7",
-                },
-                "metaphor": {
-                    "result_len": 3,
-                    "metaphor_api_key": "",
-                    "split_result": False,
-                    "chunk_size": 500,
-                    "chunk_overlap": 0,
-                },
-                "duckduckgo": {
-                    "result_len": 3
-                }
-            },
-        "top_k": 10,
-        "verbose": "Origin",
-        "conclude_prompt":
-            "<指令>这是搜索到的互联网信息，请你根据这些信息进行提取并有调理，简洁的回答问题。如果无法从中得到答案，请说 “无法搜索到能回答问题的内容”。 "
-            "</指令>\n<已知信息>{{ context }}</已知信息>\n"
-            "<问题>\n"
-            "{{ question }}\n"
-            "</问题>\n"
-    },
-    "arxiv": {
-        "use": False,
-    },
-    "shell": {
-        "use": False,
-    },
-    "weather_check": {
-        "use": False,
-        "api-key": "S8vrB4U_-c5mvAMiK",
-    },
-    "search_youtube": {
-        "use": False,
-    },
-    "wolfram": {
-        "use": False,
-    },
-    "calculate": {
-        "use": False,
-    },
-    "vqa_processor": {
-        "use": False,
-        "model_path": "your model path",
-        "tokenizer_path": "your tokenizer path",
-        "device": "cuda:1"
-    },
-    "aqa_processor": {
-        "use": False,
-        "model_path": "your model path",
-        "tokenizer_path": "yout tokenizer path",
-        "device": "cuda:2"
-    },
-
-    "text2images": {
-        "use": False,
-    },
-
-}
diff --git a/chatchat/chatchat/configs/server_config.py b/chatchat/chatchat/configs/server_config.py
deleted file mode 100644
index 40485250..00000000
--- a/chatchat/chatchat/configs/server_config.py
+++ /dev/null
@@ -1,25 +0,0 @@
-import sys
-
-
-# httpx 请求默认超时时间（秒）。如果加载模型或对话较慢，出现超时错误，可以适当加大该值。
-HTTPX_DEFAULT_TIMEOUT = 300.0
-
-# API 是否开启跨域，默认为False，如果需要开启，请设置为True
-# is open cross domain
-OPEN_CROSS_DOMAIN = True
-
-# 各服务器默认绑定host。如改为"0.0.0.0"需要修改下方所有XX_SERVER的host
-DEFAULT_BIND_HOST = "127.0.0.1" if sys.platform != "win32" else "127.0.0.1"
-
-
-# webui.py server
-WEBUI_SERVER = {
-    "host": DEFAULT_BIND_HOST,
-    "port": 8501,
-}
-
-# api.py server
-API_SERVER = {
-    "host": DEFAULT_BIND_HOST,
-    "port": 7861,
-}
diff --git a/chatchat/chatchat/data/knowledge_base/info.db b/chatchat/chatchat/data/knowledge_base/info.db
deleted file mode 100644
index 95597ecf..00000000
Binary files a/chatchat/chatchat/data/knowledge_base/info.db and /dev/null differ
diff --git a/chatchat/chatchat/data/knowledge_base/samples/content/README.md b/chatchat/chatchat/data/knowledge_base/samples/content/README.md
deleted file mode 100644
index 67aa6ef8..00000000
--- a/chatchat/chatchat/data/knowledge_base/samples/content/README.md
+++ /dev/null
@@ -1,159 +0,0 @@
-![](img/logo-long-chatchat-trans-v2.png)
-
-
-🌍 [READ THIS IN ENGLISH](README_en.md)
-
-📃 **LangChain-Chatchat** (原 Langchain-ChatGLM)
-
-基于 ChatGLM 等大语言模型与 Langchain 等应用框架实现，开源、可离线部署的检索增强生成(RAG)大模型知识库项目。
-
----
-
-## 目录
-
-* [介绍](README.md#介绍)
-* [解决的痛点](README.md#解决的痛点)
-* [快速上手](README.md#快速上手)
-  * [1. 环境配置](README.md#1-环境配置)
-  * [2. 模型下载](README.md#2-模型下载)
-  * [3. 初始化知识库和配置文件](README.md#3-初始化知识库和配置文件)
-  * [4. 一键启动](README.md#4-一键启动)
-  * [5. 启动界面示例](README.md#5-启动界面示例)
-* [联系我们](README.md#联系我们)
-
-
-## 介绍
-
-🤖️ 一种利用 [langchain](https://github.com/hwchase17/langchain) 思想实现的基于本地知识库的问答应用，目标期望建立一套对中文场景与开源模型支持友好、可离线运行的知识库问答解决方案。
-
-💡 受 [GanymedeNil](https://github.com/GanymedeNil) 的项目 [document.ai](https://github.com/GanymedeNil/document.ai) 和 [AlexZhangji](https://github.com/AlexZhangji) 创建的 [ChatGLM-6B Pull Request](https://github.com/THUDM/ChatGLM-6B/pull/216) 启发，建立了全流程可使用开源模型实现的本地知识库问答应用。本项目的最新版本中通过使用 [FastChat](https://github.com/lm-sys/FastChat) 接入 Vicuna, Alpaca, LLaMA, Koala, RWKV 等模型，依托于 [langchain](https://github.com/langchain-ai/langchain) 框架支持通过基于 [FastAPI](https://github.com/tiangolo/fastapi) 提供的 API 调用服务，或使用基于 [Streamlit](https://github.com/streamlit/streamlit) 的 WebUI 进行操作。
-
-✅ 依托于本项目支持的开源 LLM 与 Embedding 模型，本项目可实现全部使用**开源**模型**离线私有部署**。与此同时，本项目也支持 OpenAI GPT API 的调用，并将在后续持续扩充对各类模型及模型 API 的接入。
-
-⛓️ 本项目实现原理如下图所示，过程包括加载文件 -> 读取文本 -> 文本分割 -> 文本向量化 -> 问句向量化 -> 在文本向量中匹配出与问句向量最相似的 `top k`个 -> 匹配出的文本作为上下文和问题一起添加到 `prompt`中 -> 提交给 `LLM`生成回答。
-
-📺 [原理介绍视频](https://www.bilibili.com/video/BV13M4y1e7cN/?share_source=copy_web&vd_source=e6c5aafe684f30fbe41925d61ca6d514)
-
-![实现原理图](img/langchain+chatglm.png)
-
-从文档处理角度来看，实现流程如下：
-
-![实现原理图2](img/langchain+chatglm2.png)
-
-🚩 本项目未涉及微调、训练过程，但可利用微调或训练对本项目效果进行优化。
-
-🌐 [AutoDL 镜像](https://www.codewithgpu.com/i/chatchat-space/Langchain-Chatchat/Langchain-Chatchat) 中 `v11` 版本所使用代码已更新至本项目 `v0.2.7` 版本。
-
-🐳 [Docker 镜像](registry.cn-beijing.aliyuncs.com/chatchat/chatchat:0.2.6) 已经更新到 ```0.2.7``` 版本。
-
-🌲 一行命令运行 Docker ：
-
-```shell
-docker run -d --gpus all -p 80:8501 registry.cn-beijing.aliyuncs.com/chatchat/chatchat:0.2.7
-```
-
-🧩 本项目有一个非常完整的[Wiki](https://github.com/chatchat-space/Langchain-Chatchat/wiki/) ， README只是一个简单的介绍，__仅仅是入门教程，能够基础运行__。 如果你想要更深入的了解本项目，或者想对本项目做出贡献。请移步 [Wiki](https://github.com/chatchat-space/Langchain-Chatchat/wiki/)  界面
-
-## 解决的痛点
-
-该项目是一个可以实现 __完全本地化__推理的知识库增强方案, 重点解决数据安全保护，私域化部署的企业痛点。
-本开源方案采用```Apache License```，可以免费商用，无需付费。
-
-我们支持市面上主流的本地大预言模型和Embedding模型，支持开源的本地向量数据库。
-支持列表详见[Wiki](https://github.com/chatchat-space/Langchain-Chatchat/wiki/)
-
-
-## 快速上手
-
-### 1. 环境配置
-
-+ 首先，确保你的机器安装了 Python 3.8 - 3.10
-```
-$ python --version
-Python 3.10.12
-```
-接着，创建一个虚拟环境，并在虚拟环境内安装项目的依赖
-```shell
-
-# 拉取仓库
-$ git clone https://github.com/chatchat-space/Langchain-Chatchat.git
-
-# 进入目录
-$ cd Langchain-Chatchat
-
-# 安装全部依赖
-$ pip install -r requirements.txt 
-$ pip install -r requirements_api.txt
-$ pip install -r requirements_webui.txt  
-
-# 默认依赖包括基本运行环境（FAISS向量库）。如果要使用 milvus/pg_vector 等向量库，请将 requirements.txt 中相应依赖取消注释再安装。
-```
-### 2， 模型下载
-
-如需在本地或离线环境下运行本项目，需要首先将项目所需的模型下载至本地，通常开源 LLM 与 Embedding 模型可以从 [HuggingFace](https://huggingface.co/models) 下载。
-
-以本项目中默认使用的 LLM 模型 [THUDM/ChatGLM2-6B](https://huggingface.co/THUDM/chatglm2-6b) 与 Embedding 模型 [moka-ai/m3e-base](https://huggingface.co/moka-ai/m3e-base) 为例：
-
-下载模型需要先[安装 Git LFS](https://docs.github.com/zh/repositories/working-with-files/managing-large-files/installing-git-large-file-storage)，然后运行
-
-```Shell
-$ git lfs install
-$ git clone https://huggingface.co/THUDM/chatglm2-6b
-$ git clone https://huggingface.co/moka-ai/m3e-base
-```
-### 3. 初始化知识库和配置文件
-
-按照下列方式初始化自己的知识库和简单的复制配置文件
-```shell
-$ python copy_config_example.py
-$ python init_database.py --recreate-vs
- ```
-### 4. 一键启动
-
-按照以下命令启动项目
-```shell
-$ python startup.py -a
-```
-### 5. 启动界面示例
-
-如果正常启动，你将能看到以下界面
-
-1. FastAPI Docs 界面
-
-![](img/fastapi_docs_026.png)
-
-2. Web UI 启动界面示例：
-
-- Web UI 对话界面：
-
-![img](img/LLM_success.png)
-
-- Web UI 知识库管理页面：
-
-![](img/init_knowledge_base.jpg)
-
-
-### 注意
-
-以上方式只是为了快速上手，如果需要更多的功能和自定义启动方式 ，请参考[Wiki](https://github.com/chatchat-space/Langchain-Chatchat/wiki/)
-
-
----
-## 项目里程碑
-
-
----
-## 联系我们
-### Telegram
-[![Telegram](https://img.shields.io/badge/Telegram-2CA5E0?style=for-the-badge&logo=telegram&logoColor=white "langchain-chatglm")](https://t.me/+RjliQ3jnJ1YyN2E9)
-
-### 项目交流群
-<img src="img/qr_code_76.jpg" alt="二维码" width="300" />
-
-🎉 Langchain-Chatchat 项目微信交流群，如果你也对本项目感兴趣，欢迎加入群聊参与讨论交流。
-
-### 公众号
-
-<img src="img/official_wechat_mp_account.png" alt="二维码" width="300" />
-
-🎉 Langchain-Chatchat 项目官方公众号，欢迎扫码关注。
diff --git a/chatchat/chatchat/data/knowledge_base/samples/content/webui2.py b/chatchat/chatchat/data/knowledge_base/samples/content/webui2.py
deleted file mode 100644
index b2637adc..00000000
--- a/chatchat/chatchat/data/knowledge_base/samples/content/webui2.py
+++ /dev/null
@@ -1,237 +0,0 @@
-from nicegui import ui, Client, app, run
-from nicegui.events import ValueChangeEventArguments
-from configs import (VERSION, LLM_MODELS, TEMPERATURE, HISTORY_LEN,
-                     VECTOR_SEARCH_TOP_K, SEARCH_ENGINE_TOP_K)
-from webui_pages.utils import AsyncApiRequest
-import asyncio
-from typing import Any, List, Dict, Any
-
-
-app.add_static_files("/image", "img")
-
-
-class Session:
-    def __init__(self) -> None:
-        user = app.storage.user
-        for k, v in self._attrs().items():
-            user.setdefault(k, v)
-
-    def _attrs(self) -> Dict[str, Any]:
-        return {
-            "messages": [],
-            "query": "",
-            "thinking": False,
-            "cur_kb": "",
-            "cur_temperature": TEMPERATURE,
-            "chat_list": [],
-            "cur_chat": "",
-        }
-
-    @property
-    def user(self):
-        return app.storage.user
-
-    def __getattr__(self, attr: str) -> Any:
-        if attr in self._attrs():
-            return self.user[attr]
-        else:
-            raise AttributeError(attr)
-
-    def __setattr__(self, attr: str, val: Any) -> None:
-        if attr in self._attrs():
-            self.user[attr] = val
-        else:
-            raise AttributeError(attr)
-
-
-def make_header(left_drawer, right_drawer):
-    with ui.header().classes("bg-black p-2") as header:
-        with ui.link():
-            ui.icon("menu", size="md").on("click", lambda: left_drawer.toggle())
-        ui.image("img/logo-long-chatchat-trans-v2.png").props("fit=scale-down").classes("h-8 w-48 float-left")
-        left_header = ui.row().props('id="left-header"')
-        ui.element("q-space")
-        right_header = ui.row().props('id="right-header"')
-        ui.label(f"(Version: {VERSION})").classes("text-grey text-xs pt-4")
-        with ui.link():
-            ui.icon("menu", size="md").on("click", lambda: right_drawer.toggle())
-        return left_header, right_header
-
-
-def make_left_drawer(links: List, current: str):
-    with ui.left_drawer(bordered=True, elevated=True) as drawer:
-        return drawer
-
-
-@ui.refreshable
-async def output_messages():
-    session = Session()
-
-    for msg in session.messages:
-        is_user = msg["role"] == "user"
-        if is_user:
-            name = "User"
-            avatar = "/image/user_avatar.png"
-        else:
-            name = "AI"
-            avatar = "/image/chatchat_icon_blue_square_v2.png"
-        ele = ui.chat_message([], sent=False, name=None, avatar=avatar)
-        with ele.add_slot("default"):
-            ui.markdown(msg["content"])
-    
-    ui.query("img.q-message-avatar").classes("self-start")
-    (ui.query("div.q-message-text--received")
-     .classes("bg-green-100")
-     .style("border-radius: 5px;"))
-    # (ui.query("div.q-message-text--received")
-    #  .run_method("remove_classes", ["q-message-text--received"]))
-    # await ui.run_javascript("window.sc")
-
-
-@ui.page("/", title="Langchain-Chatchat WebUI")
-async def index(client: Client):
-    ui.add_head_html('''<style>
-                     p > code {color: green;padding: 2px;}
-                     pre:has(code) {background-color: #eee; padding: 10px;} !important
-                     </style>''')
-
-    async def send():
-        question = query.value.strip()
-        query.value = ""
-
-        if not question:
-            return
-
-        if question == "/clear":
-            session.messages = []
-            output_messages.refresh()
-            return
-        
-        session.thinking = True
-        session.messages.append({"role": "user", "content": question})
-        session.messages.append({"role": "assistant", "content": "Thinking..."})
-        output_messages.refresh()
-        await asyncio.sleep(0.1)
-
-        text = ""
-        async for chunk in api.chat_chat(question,
-                                   stream=True,
-                                   conversation_id=None,
-                                   model=cur_llm_model.value,
-                                   temperature=temperature.value):
-            text += chunk.get("text", "")
-            tail = " ▌"
-            if text.count("```") % 2 == 1:
-                if text[-1] != "`":
-                    tail += "\n```\n"
-                elif text[-2:] == "``":
-                    tail += "`\n"
-                elif text[-1:] == "`":
-                    tail += "``\n"
-            session.messages[-1]["content"] = text + tail
-            output_messages.refresh()
-            await asyncio.sleep(0.1)
-
-        session.messages[-1]["content"] = text
-        output_messages.refresh()
-        await asyncio.sleep(0.1)
-        session.thinking = False
-
-    session = Session()
-    api = AsyncApiRequest()
-
-    left_drawer = make_left_drawer([], "")
-
-    with ui.right_drawer(bordered=True, elevated=True) as right_drawer:
-        ui.markdown("### 灵感大全")
-        user_name = ui.input("用户名称", value="用户")
-        system_message = (ui.input("AI系统消息",
-                                   value="你是一个聪明的人工智能助手，可以回答用户提出的问题。")
-                            .props("autogrow"))
-        chat_image = ui.upload(label="上传图片").classes("w-full mt-5")
-        chat_file = ui.upload(label="上传文件").classes("w-full mt-5")
-
-    left_header, right_header = make_header(left_drawer, right_drawer)
-
-    with left_header:
-        chat_session = (ui.radio(["会话1", "会话2"], value="会话1")
-                        .props("inline")
-                        .classes("p-0"))
-
-    with left_drawer:
-        ui.markdown("### 配置项")
-
-        def on_chat_mode_change(e: ValueChangeEventArguments):
-            if e.value == "Agent对话":
-                session.cur_temperature = temperature.value
-                temperature.set_value(0.01)
-            else:
-                temperature.set_value(session.cur_temperature)
-
-        chat_mode = ui.select(["LLM 对话", "知识库问答", "搜索引擎问答", "Agent对话"],
-                            label="对话模式",
-                            value="LLM 对话",
-                            on_change=on_chat_mode_change,
-                            )
-        ui.separator()
-
-        with ui.expansion("模型配置", icon="psychology", value=True):
-            running_models = await api.list_running_models()
-            config_models = await api.list_config_models()
-            models = {x: f"{x}(running)" for x in running_models}
-            for v in config_models.values():
-                for m in v:
-                    if m not in running_models:
-                        models.update({m: m})
-            cur_llm_model = ui.select(models, label="LLM模型", value=LLM_MODELS[0], with_input=True, clearable=True)
-            temperature = ui.number("Temperature", value=TEMPERATURE, min=0, max=1, step=0.01)
-            history_len = ui.number("历史对话轮数", value=HISTORY_LEN, min=0, max=10)
-
-        with (ui.expansion("知识库配置", icon="book", value=True)
-              .bind_visibility_from(chat_mode, "value", value="知识库问答")):
-            def on_kb_change(e: ValueChangeEventArguments):
-                session.cur_kb = e.value
-
-            kb_names = await api.list_knowledge_bases()
-            kb_name = ui.select(kb_names,
-                                label="知识库",
-                                value=session.cur_kb or kb_names[0],
-                                on_change=on_kb_change,
-                                )
-            vector_top_k = ui.number("Top K", value=VECTOR_SEARCH_TOP_K, min=1, max=10)
-
-        with (ui.expansion("搜索引擎配置", icon="travel_explore", value=True)
-              .bind_visibility_from(chat_mode, "value", value="搜索引擎问答")):
-            search_engine = ui.select(["Bing", "Duckduckgo"], value="Bing")
-            search_top_k = ui.number("Top K", value=SEARCH_ENGINE_TOP_K, min=1, max=10)
-
-    await client.connected()
-    with ui.column():
-        await output_messages()
-
-    with ui.row().classes("absolute bottom-2 left-20 right-20"):
-        # command = ui.select(["/clear", "/upload"]).classes("w-1/4")
-        query = (ui.input(autocomplete=["/clear", "/upload"],
-                          placeholder="input your question here.")
-                          .classes("flex-grow")
-                          .props('autogrow outlined autofocus counter dense clearable')
-                          .bind_value(session, "query")
-                          .on("keydown.enter.prevent", send)
-        )
-        with query.add_slot("after"):
-            ui.button(icon="send", on_click=send).classes("self-center").props("small dense p-0 m-0")
-        # query._props["autofocus"] = True
-        # query._props["autogrow"] = True
-        # query._props["placeholder"] = "input your question here."
-        # query._props[":list"] = '["/clear", "/upload"]'
-        # query._props["shadow-text"] = ["/clear", "/upload"]
-        # ui.input(autocomplete=["/clear", "/upload"])
-
-
-
-# TODO: 
-# 右侧栏上下文：system_message, picture, file, 知识库文档预览
-
-
-if __name__ in {"__main__", "__mp_main__"}:
-    ui.run(port=5000, storage_secret="111111", reload=True)
diff --git a/chatchat/chatchat/data/knowledge_base/samples/content/wiki/Home.md b/chatchat/chatchat/data/knowledge_base/samples/content/wiki/Home.md
deleted file mode 100644
index 71a324cb..00000000
--- a/chatchat/chatchat/data/knowledge_base/samples/content/wiki/Home.md
+++ /dev/null
@@ -1,74 +0,0 @@
-
-![](https://github.com/chatchat-space/Langchain-Chatchat/blob/master/img/logo-long-chatchat-trans-v2.png)
-
-> 欢迎来到 Langchain‐Chatchat 的 Wiki , 在这里开启 Langchain 与大模型的邂逅!
-
-
-## 项目简介
-
-📃 **LangChain-Chatchat** (原 Langchain-ChatGLM):  基于 Langchain 与 ChatGLM 等大语言模型的本地知识库问答应用实现。
-
-🤖️ 一种利用 [langchain](https://github.com/hwchase17/langchain) 思想实现的基于本地知识库的问答应用，目标期望建立一套对中文场景与开源模型支持友好、可离线运行的知识库问答解决方案。
-
-💡 受 [GanymedeNil](https://github.com/GanymedeNil) 的项目 [document.ai](https://github.com/GanymedeNil/document.ai) 和 [AlexZhangji](https://github.com/AlexZhangji) 创建的 [ChatGLM-6B Pull Request](https://github.com/THUDM/ChatGLM-6B/pull/216) 启发，建立了全流程可使用开源模型实现的本地知识库问答应用。本项目的最新版本中通过使用 [FastChat](https://github.com/lm-sys/FastChat) 接入 Vicuna, Alpaca, LLaMA, Koala, RWKV 等模型，依托于 [langchain](https://github.com/langchain-ai/langchain) 框架支持通过基于 [FastAPI](https://github.com/tiangolo/fastapi) 提供的 API 调用服务，或使用基于 [Streamlit](https://github.com/streamlit/streamlit) 的 WebUI 进行操作。
-
-✅ 依托于本项目支持的开源 LLM 与 Embedding 模型，本项目可实现全部使用**开源**模型**离线私有部署**。与此同时，本项目也支持 OpenAI GPT API 的调用，并将在后续持续扩充对各类模型及模型 API 的接入。
-
-⛓️ 本项目实现原理如下图所示，过程包括加载文件 -> 读取文本 -> 文本分割 -> 文本向量化 -> 问句向量化 -> 在文本向量中匹配出与问句向量最相似的 `top k`个 -> 匹配出的文本作为上下文和问题一起添加到 `prompt`中 -> 提交给 `LLM`生成回答。
-
-
-## 算法流程
-
-大家可以前往Bilibili平台查看原理介绍视频：
-
-📺 [原理介绍视频](https://www.bilibili.com/video/BV13M4y1e7cN/?share_source=copy_web&vd_source=e6c5aafe684f30fbe41925d61ca6d514)
-
-开发组也为大家绘制了一张实现原理图，效果如下：
-
-![实现原理图](https://github.com/chatchat-space/Langchain-Chatchat/blob/master/img/langchain+chatglm.png)
-
-从文档处理角度来看，实现流程如下：
-
-![实现原理图2](https://github.com/chatchat-space/Langchain-Chatchat/blob/master/img/langchain+chatglm2.png)
-
-
-## 技术路线图（截止0.2.10）
-
-- [X] Langchain 应用
-  - [X] 本地数据接入
-    - [X] 接入非结构化文档
-      - [X] .txt, .rtf, .epub, .srt
-      - [X] .eml, .msg
-      - [X] .html, .xml, .toml, .mhtml
-      - [X] .json, .jsonl
-      - [X] .md, .rst
-      - [X] .docx, .doc, .pptx, .ppt, .odt
-      - [X] .enex
-      - [X] .pdf
-      - [X] .jpg, .jpeg, .png, .bmp
-      - [X] .py, .ipynb
-    - [X] 结构化数据接入
-      - [X] .csv, .tsv
-      - [X] .xlsx, .xls, .xlsd
-    - [X] 分词及召回
-      - [X] 接入不同类型 TextSplitter
-      - [X] 优化依据中文标点符号设计的 ChineseTextSplitter
-  - [X] 搜索引擎接入
-    - [X] Bing 搜索
-    - [X] DuckDuckGo 搜索
-    - [X] Metaphor 搜索
-  - [X] Agent 实现
-    - [X] 基础React形式的Agent实现，包括调用计算器等
-    - [X] Langchain 自带的Agent实现和调用
-    - [X] 智能调用不同的数据库和联网知识
-- [X] LLM 模型接入
-  - [X] 支持通过调用 [FastChat](https://github.com/lm-sys/fastchat) api 调用 llm
-  - [X] 支持 ChatGLM API 等 LLM API 的接入
-  - [X] 支持 Langchain 框架支持的LLM API 接入
-- [X] Embedding 模型接入
-  - [X] 支持调用 HuggingFace 中各开源 Emebdding 模型
-  - [X] 支持 OpenAI Embedding API 等 Embedding API 的接入
-  - [X] 支持 智谱AI、百度千帆、千问、MiniMax 等在线 Embedding API 的接入
-- [X] 基于 FastAPI 的 API 方式调用
-- [X] Web UI
-  - [X] 基于 Streamlit 的 Web UI
\ No newline at end of file
diff --git a/chatchat/chatchat/data/knowledge_base/samples/content/wiki/_Sidebar.md b/chatchat/chatchat/data/knowledge_base/samples/content/wiki/_Sidebar.md
deleted file mode 100644
index 6d7fb6f6..00000000
--- a/chatchat/chatchat/data/knowledge_base/samples/content/wiki/_Sidebar.md
+++ /dev/null
@@ -1,58 +0,0 @@
-__导航栏，一切从这里出发__
-## [Home](https://github.com/chatchat-space/Langchain-Chatchat/wiki)
-## [支持列表](https://github.com/chatchat-space/Langchain-Chatchat/wiki/%E6%94%AF%E6%8C%81%E5%88%97%E8%A1%A8)
-* [LLM 模型支持列表](https://github.com/chatchat-space/Langchain-Chatchat/wiki/%E6%94%AF%E6%8C%81%E5%88%97%E8%A1%A8#llm-%E6%A8%A1%E5%9E%8B%E6%94%AF%E6%8C%81%E5%88%97%E8%A1%A8)
-* [Embedding 模型支持列表](https://github.com/chatchat-space/Langchain-Chatchat/wiki/%E6%94%AF%E6%8C%81%E5%88%97%E8%A1%A8#embedding-%E6%A8%A1%E5%9E%8B%E6%94%AF%E6%8C%81%E5%88%97%E8%A1%A8)
-* [分词器支持列表](https://github.com/chatchat-space/Langchain-Chatchat/wiki/%E6%94%AF%E6%8C%81%E5%88%97%E8%A1%A8#%E5%88%86%E8%AF%8D%E5%99%A8%E6%94%AF%E6%8C%81%E5%88%97%E8%A1%A8)
-* [向量数据库支持列表](https://github.com/chatchat-space/Langchain-Chatchat/wiki/%E6%94%AF%E6%8C%81%E5%88%97%E8%A1%A8#%E5%90%91%E9%87%8F%E6%95%B0%E6%8D%AE%E5%BA%93%E6%94%AF%E6%8C%81%E5%88%97%E8%A1%A8)
-* [工具支持列表](https://github.com/chatchat-space/Langchain-Chatchat/wiki/%E6%94%AF%E6%8C%81%E5%88%97%E8%A1%A8#%E5%B7%A5%E5%85%B7%E6%94%AF%E6%8C%81%E5%88%97%E8%A1%A8)
-
-## [开发环境部署](https://github.com/chatchat-space/Langchain-Chatchat/wiki/%E5%BC%80%E5%8F%91%E7%8E%AF%E5%A2%83%E9%83%A8%E7%BD%B2)
-
-### 前期准备
-  * [软件要求](https://github.com/chatchat-space/Langchain-Chatchat/wiki/%E5%BC%80%E5%8F%91%E7%8E%AF%E5%A2%83%E9%83%A8%E7%BD%B2#%E8%BD%AF%E4%BB%B6%E8%A6%81%E6%B1%82)
-  * [硬件要求](https://github.com/chatchat-space/Langchain-Chatchat/wiki/%E5%BC%80%E5%8F%91%E7%8E%AF%E5%A2%83%E9%83%A8%E7%BD%B2#%E7%A1%AC%E4%BB%B6%E8%A6%81%E6%B1%82)
-  * [VPN](https://github.com/chatchat-space/Langchain-Chatchat/wiki/%E5%BC%80%E5%8F%91%E7%8E%AF%E5%A2%83%E9%83%A8%E7%BD%B2#vpn)
-
-### 部署代码
-  * [Docker 部署](https://github.com/chatchat-space/Langchain-Chatchat/wiki/%E5%BC%80%E5%8F%91%E7%8E%AF%E5%A2%83%E9%83%A8%E7%BD%B2#docker-%E9%83%A8%E7%BD%B2)
-  * [最轻模式部署方案](https://github.com/chatchat-space/Langchain-Chatchat/wiki/%E5%BC%80%E5%8F%91%E7%8E%AF%E5%A2%83%E9%83%A8%E7%BD%B2#%E6%9C%80%E8%BD%BB%E6%A8%A1%E5%BC%8F%E6%9C%AC%E5%9C%B0%E9%83%A8%E7%BD%B2%E6%96%B9%E6%A1%88)
-  * [常规模式本地部署方案](https://github.com/chatchat-space/Langchain-Chatchat/wiki/%E5%BC%80%E5%8F%91%E7%8E%AF%E5%A2%83%E9%83%A8%E7%BD%B2#%E5%B8%B8%E8%A7%84%E6%A8%A1%E5%BC%8F%E6%9C%AC%E5%9C%B0%E9%83%A8%E7%BD%B2%E6%96%B9%E6%A1%88)
-    + [环境安装](https://github.com/chatchat-space/Langchain-Chatchat/wiki/%E5%BC%80%E5%8F%91%E7%8E%AF%E5%A2%83%E9%83%A8%E7%BD%B2#%E6%9C%AC%E5%9C%B0%E9%83%A8%E7%BD%B2%E7%8E%AF%E5%A2%83%E5%AE%89%E8%A3%85)
-    + [模型下载](https://github.com/chatchat-space/Langchain-Chatchat/wiki/%E5%BC%80%E5%8F%91%E7%8E%AF%E5%A2%83%E9%83%A8%E7%BD%B2#%E6%A8%A1%E5%9E%8B%E4%B8%8B%E8%BD%BD)
-    + [初始化知识库](https://github.com/chatchat-space/Langchain-Chatchat/wiki/%E5%BC%80%E5%8F%91%E7%8E%AF%E5%A2%83%E9%83%A8%E7%BD%B2#%E5%88%9D%E5%A7%8B%E5%8C%96%E7%9F%A5%E8%AF%86%E5%BA%93)
-    + [一键启动](https://github.com/chatchat-space/Langchain-Chatchat/wiki/%E5%BC%80%E5%8F%91%E7%8E%AF%E5%A2%83%E9%83%A8%E7%BD%B2#%E4%B8%80%E9%94%AE%E5%90%AF%E5%8A%A8)
-    + [多卡加载](https://github.com/chatchat-space/Langchain-Chatchat/wiki/%E5%BC%80%E5%8F%91%E7%8E%AF%E5%A2%83%E9%83%A8%E7%BD%B2#%E5%A4%9A%E5%8D%A1%E5%8A%A0%E8%BD%BD)
-
-## [参数配置](https://github.com/chatchat-space/Langchain-Chatchat/wiki/%E5%8F%82%E6%95%B0%E9%85%8D%E7%BD%AE)
-
-* [基础配置项 basic_config.py](https://github.com/chatchat-space/Langchain-Chatchat/wiki/%E5%8F%82%E6%95%B0%E9%85%8D%E7%BD%AE#%E5%9F%BA%E7%A1%80%E9%85%8D%E7%BD%AE%E9%A1%B9-basic_configpy)
-* [模型配置项 model_config.py](https://github.com/chatchat-space/Langchain-Chatchat/wiki/%E5%8F%82%E6%95%B0%E9%85%8D%E7%BD%AE#%E6%A8%A1%E5%9E%8B%E9%85%8D%E7%BD%AE%E9%A1%B9-model_configpy)
-* [提示词配置项 prompt_config.py](https://github.com/chatchat-space/Langchain-Chatchat/wiki/%E5%8F%82%E6%95%B0%E9%85%8D%E7%BD%AE#%E6%8F%90%E7%A4%BA%E8%AF%8D%E9%85%8D%E7%BD%AE%E9%A1%B9-prompt_configpy)
-* [数据库配置 kb_config.py](https://github.com/chatchat-space/Langchain-Chatchat/wiki/%E5%8F%82%E6%95%B0%E9%85%8D%E7%BD%AE#%E6%95%B0%E6%8D%AE%E5%BA%93%E9%85%8D%E7%BD%AE-kb_configpy)
-* [服务和端口配置项 server_config.py](https://github.com/chatchat-space/Langchain-Chatchat/wiki/%E5%8F%82%E6%95%B0%E9%85%8D%E7%BD%AE#%E6%9C%8D%E5%8A%A1%E5%92%8C%E7%AB%AF%E5%8F%A3%E9%85%8D%E7%BD%AE%E9%A1%B9-server_configpy)
-* [覆盖配置文件 或者配置 startup.py](https://github.com/chatchat-space/Langchain-Chatchat/wiki/%E5%8F%82%E6%95%B0%E9%85%8D%E7%BD%AE#%E8%A6%86%E7%9B%96%E9%85%8D%E7%BD%AE%E6%96%87%E4%BB%B6-%E6%88%96%E8%80%85%E9%85%8D%E7%BD%AE-startuppy)
-
-## [自定义](https://github.com/chatchat-space/Langchain-Chatchat/wiki/%E8%87%AA%E5%AE%9A%E4%B9%89)
-
-* [使用自定义的分词器](https://github.com/chatchat-space/Langchain-Chatchat/wiki/%E8%87%AA%E5%AE%9A%E4%B9%89#%E4%BD%BF%E7%94%A8%E8%87%AA%E5%AE%9A%E4%B9%89%E7%9A%84%E5%88%86%E8%AF%8D%E5%99%A8)
-* [使用自定义的 Agent 工具](https://github.com/chatchat-space/Langchain-Chatchat/wiki/%E8%87%AA%E5%AE%9A%E4%B9%89#%E4%BD%BF%E7%94%A8%E8%87%AA%E5%AE%9A%E4%B9%89%E7%9A%84-agent-%E5%B7%A5%E5%85%B7)
-* [使用自定义的微调模型](https://github.com/chatchat-space/Langchain-Chatchat/wiki/%E8%87%AA%E5%AE%9A%E4%B9%89#%E4%BD%BF%E7%94%A8%E8%87%AA%E5%AE%9A%E4%B9%89%E7%9A%84%E5%BE%AE%E8%B0%83%E6%A8%A1%E5%9E%8B)
-* [使用自定义的嵌入模型](https://github.com/chatchat-space/Langchain-Chatchat/wiki/%E8%87%AA%E5%AE%9A%E4%B9%89#%E4%BD%BF%E7%94%A8%E8%87%AA%E5%AE%9A%E4%B9%89%E7%9A%84%E5%B5%8C%E5%85%A5%E6%A8%A1%E5%9E%8B)
-* [日志功能](https://github.com/chatchat-space/Langchain-Chatchat/wiki/%E6%9C%80%E4%BD%B3%E5%AE%9E%E8%B7%B5#%E6%97%A5%E5%BF%97%E5%8A%9F%E8%83%BD)
-
-## [最佳实践](https://github.com/chatchat-space/Langchain-Chatchat/wiki/%E6%9C%80%E4%BD%B3%E5%AE%9E%E8%B7%B5)
-* [推荐的模型组合](https://github.com/chatchat-space/Langchain-Chatchat/wiki/%E6%9C%80%E4%BD%B3%E5%AE%9E%E8%B7%B5#%E6%8E%A8%E8%8D%90%E7%9A%84%E6%A8%A1%E5%9E%8B%E7%BB%84%E5%90%88)
-* [微调模型加载实操](https://github.com/chatchat-space/Langchain-Chatchat/wiki/%E6%9C%80%E4%BD%B3%E5%AE%9E%E8%B7%B5#%E5%BE%AE%E8%B0%83%E6%A8%A1%E5%9E%8B%E5%8A%A0%E8%BD%BD%E5%AE%9E%E6%93%8D)
-* [预处理知识库文件](https://github.com/chatchat-space/Langchain-Chatchat/wiki/%E6%9C%80%E4%BD%B3%E5%AE%9E%E8%B7%B5#%E9%A2%84%E5%A4%84%E7%90%86%E7%9F%A5%E8%AF%86%E5%BA%93%E6%96%87%E4%BB%B6)
-* [自定义的关键词调整Embedding模型](https://github.com/chatchat-space/Langchain-Chatchat/wiki/%E6%9C%80%E4%BD%B3%E5%AE%9E%E8%B7%B5#%E8%87%AA%E5%AE%9A%E4%B9%89%E7%9A%84%E5%85%B3%E9%94%AE%E8%AF%8D%E8%B0%83%E6%95%B4embedding%E6%A8%A1%E5%9E%8B)
-* [实际使用效果](https://github.com/chatchat-space/Langchain-Chatchat/wiki/%E6%9C%80%E4%BD%B3%E5%AE%9E%E8%B7%B5#%E5%AE%9E%E9%99%85%E4%BD%BF%E7%94%A8%E6%95%88%E6%9E%9C)
-
-## [做出贡献](https://github.com/chatchat-space/Langchain-Chatchat/wiki/%E5%81%9A%E5%87%BA%E8%B4%A1%E7%8C%AE)
-
-* [Issue 规范](https://github.com/chatchat-space/Langchain-Chatchat/wiki/%E5%81%9A%E5%87%BA%E8%B4%A1%E7%8C%AE#issue-%E8%A7%84%E8%8C%83)
-* [PR 规范](https://github.com/chatchat-space/Langchain-Chatchat/wiki/%E5%81%9A%E5%87%BA%E8%B4%A1%E7%8C%AE#pr-%E8%A7%84%E8%8C%83)
-
-## [合作伙伴](https://github.com/chatchat-space/Langchain-Chatchat/wiki/%E5%90%88%E4%BD%9C%E4%BC%99%E4%BC%B4)
-
-## [常见问题](https://github.com/chatchat-space/Langchain-Chatchat/wiki/%E5%B8%B8%E8%A7%81%E9%97%AE%E9%A2%98)
\ No newline at end of file
diff --git a/chatchat/chatchat/data/knowledge_base/samples/content/wiki/做出贡献.md b/chatchat/chatchat/data/knowledge_base/samples/content/wiki/做出贡献.md
deleted file mode 100644
index 50d73b7f..00000000
--- a/chatchat/chatchat/data/knowledge_base/samples/content/wiki/做出贡献.md
+++ /dev/null
@@ -1,51 +0,0 @@
-## Issue 规范
-> 什么样的 issue 是不会被回复的
-
-1. 在提出issue前，请查看您的提出的问题是否已经在 issue 列表或者 discussion 内出现，提出重复的问题将 **被关闭** 。
-2. 非项目推荐配置的任何关于环境配置问题的 issue 通常将  **不会由官方回复**，请您在微信沟通群内咨询。
-3. 与项目无关的 issue 将  **不会被回复** 。
-4. 超过30天没有更新动态的 issue 将  **被关闭** 。
-5. 语言非中文和英语的 issue 将  **被关闭** 。
-6. 没有尝试过解决方案的 issue 将  **被关闭** 。
-7. 没有提出任何贡献（例如PR，论文）的 feature / enhancement 将会 **被关闭** 。您可以在 discussion 中的 **希望开发的功能** 讨论区中留言，我们开发组会进行回复。
-8. 不按照 Issue 规范提出的 issue 可能将 **被关闭** 。
-
-> 如何提 issue
-
-1. 简要阐述你的问题
-2. 配上报错日志以(运行报错)或者运行不理想的效果图(原本期望和现实的)
-3. 配上对应的配置文件以你的环境
-4. 你尝试过的解决方法。（非常重要）
-5. 按照模板提出Issue
-
-## PR 规范 
-
-> 什么样的 PR 是不会被接受的
-1. 非紧急bug修复的PR并直接提交到```master```的PR。
-2. 仅仅修改```Readme.md```和```配置文件```的。
-3. 跟项目组已经开发的内容冲突的(dev版本)，将可能被拒绝。
-
-首先请注意所有的PR需要以dev分支为基准，master分支仅用来发行与紧急bug修复。
-
-> 提出新的通用自定义分词器
-
-1. 将您的分词器所在的代码文件放在```text_splitter```文件夹下，文件名为您的分词器名字`my_splitter.py`，然后在`__init__.py`中导入您的分词器。
-2. 发起PR，并说明您的分词器面向的场景或者改进之处。我们非常期待您能举例一个具体的应用场景。
-
-> 提出新的 Agent 工具
-
-1. 将您的Agent工具所在的代码放在 ```server/agent```文件夹下，文件名为您的工具名字`my_tools.py`，然后在`tools.py`中导入您的工具。
-2. 发起PR，说明您的工具面向的场景或改进之处，并说明如何进行测试和调用。我们非常期待您能举例一个具体的应用场景。
-
-> 提出新的自定义模型
-
-1. 将您的模型贡献到huggingface平台上，并开放给开发人员下载。
-2. 发起PR，说明您的工具面向的场景或改进之处，并说明如何进行测试和调用。我们非常期待您能举例一个具体的应用场景。
-3. 由开发人员测试通过后，将您的模型添加到合作模型名单中。
-
-
-> 修复 Bug & 增加其他新功能
-
-1. 一个 PR 中必须 **只有一个或者一类功能增加，或者修复一个bug** ，多个功能混合的 PR 将 **不会被接受** 。
-2. 说明您增加的功能或者改进之处，并说明如何进行测试和调用。我们非常期待您能举例一个具体的应用场景。
-
diff --git a/chatchat/chatchat/data/knowledge_base/samples/content/wiki/参数配置.md b/chatchat/chatchat/data/knowledge_base/samples/content/wiki/参数配置.md
deleted file mode 100644
index 23408613..00000000
--- a/chatchat/chatchat/data/knowledge_base/samples/content/wiki/参数配置.md
+++ /dev/null
@@ -1,156 +0,0 @@
-在开始参数配置之前，先执行以下脚本
-```shell
-python copy_config_example.py
-```
-该脚本将会将所有```config```目录下的配置文件样例复制一份到```config```目录下，方便开发者进行配置。
-接着，开发者可以根据自己的需求，对配置文件进行修改。
-
-## 基础配置项 basic_config.py
-该配置基负责记录日志的格式和储存路径，通常不需要修改。
-
-## 模型配置项 model_config.py
-本文件包含本地LLM模型、本地Embeddings模型、在线LLM模型API的相关配置。
-
-- 本地模型路径配置。建议将所有下载的模型放到一个统一的目录下，然后将`MODEL_ROOT_PATH`指定为该目录，只要模型目录名称符合下列情况之一（以text2vec为例），即可自动识别加载：
-  - text2vec，即MODEL_PATH中的键
-  - GanymedeNil/text2vec-large-chinese，即MODEL_PATH中的值
-  - text2vec-large-chinese，即MODEL_PATH中的值的简写形式
-
-- 在线模型API配置。在`ONLINE_LLM_MODEL`已经预先写好了所有支持的在线API服务，通常只需要把申请的API_KEY等填入即可。
-有些在线API服务需要安装额外的依赖：
-  - zhipu-api: zhipuai
-  - fangzhou-api: volcengine>=1.0.106
-  - qianfan-api: qianfan
-  - qwen-api: dashscope
-
-- HISTORY_LEN。历史对话轮数通常不建议设置超过10，因为这可能导致以下问题
-  1. 显存占用过高：尤其是部分模型，本身就已经要占用满显存的情况下，保留太多历史，一次传入token太多，可能会爆显存。
-  2. 速度处理很慢：还是因为一次传入了太多token，导致速度很慢。
-
-- TEMPERATURE。通常不建议设置过高。
-在Agent对话模式和知识库问答中，我们强烈建议将要其设置成0或者接近于0。
-
-- Agent_MODEL = None
-我们支持用户使用“模型接力赛”的用法，即：
-选择的大模型仅能调用工具，但是在工具中表现较差，则这个工具作为 “模型调用工具”
-如果用户设置了Agent_MODEL，则在 Agent 中，使用Agent_MODEL来执行任务，否则，使用LLM_MODEL
-
-
-## 提示词配置项 prompt_config.py
-
-提示词配置分为三个板块，分别对应三种聊天类型。
-- llm_chat: 基础的对话提示词， 通常来说，直接是用户输入的内容，没有系统提示词。
-- knowledge_base_chat: 与知识库对话的提示词，在模板中，我们为开发者设计了一个系统提示词，开发者可以自行更改。
-- agent_chat: 与Agent对话的提示词，同样，我们为开发者设计了一个系统提示词，开发者可以自行更改。
-
-prompt模板使用Jinja2语法，简单点就是用双大括号代替f-string的单大括号
-请注意，本配置文件支持热加载，修改prompt模板后无需重启服务。
-
-## 数据库配置 kb_config.py
-请确认本地分词器路径是否已经填写，如：
-
-```
-text_splitter_dict = {
-   "ChineseRecursiveTextSplitter": {
-       "source":"huggingface",  # 选择tiktoken则使用openai的方法,不填写则默认为字符长度切割方法。
-       "tokenizer_name_or_path":"", # 空格不填则默认使用大模型的分词器。 
-    }
-}
-```
-设置好的分词器需要再```TEXT_SPLITTER_NAME```中指定并应用。
-
-在这里，通常使用```huggingface```的方法，并且，我们推荐使用大模型自带的分词器来完成任务。
-
-请注意，使用```gpt2```分词器将要访问huggingface官网下载权重。
-
-我们还支持使用```tiktoken``` 和传统的 按照长度分词的方式，开发者可以自行配置。
-
-如果希望调用自己的分词器，请参考[最佳实践]部分。
-
-```kbs_config```设置了使用的向量数据库，目前可以选择
-- ```faiss```: 使用faiss数据库，需要安装faiss-gpu
-- ```milvus```: 使用milvus数据库，需要安装milvus并进行端口配置
-- ```pg```: 使用pg数据库，需要配置connection_uri
-
-## 服务和端口配置项 server_config.py
-
-通常，这个页面并不需要进行大量的修改，仅需确保对应的端口打开，并不互相冲突即可。
-
-如果你是Linux系统推荐设置
-
-```
-DEFAULT_BIND_HOST ="0.0.0.0"
-```
-如果使用联网模型，则需要关注联网模型的端口。
-
-这些模型必须是在model_config.MODEL_PATH或ONLINE_MODEL中正确配置的。
-
-#在启动startup.py时，可用通过`--model-worker --model-name xxxx`指定模型，不指定则为LLM_MODEL
-
-
-## 覆盖配置文件 或者配置 startup.py
-
-在 ```server_config.py```中有以下配置文件被注释了
-
-```
-"gpus": None, # 使用的GPU，以str的格式指定，如"0,1"，如失效请使用CUDA_VISIBLE_DEVICES="0,1"等形式指定
-"num_gpus": 1, # 使用GPU的数量
-"max_gpu_memory":"20GiB", # 每个GPU占用的最大显存
-
- 以下为model_worker非常用参数，可根据需要配置
-"load_8bit": False, # 开启8bit量化
-"cpu_offloading": None,
-"gptq_ckpt": None,
-"gptq_wbits": 16,
-"gptq_groupsize": -1,
-"gptq_act_order": False,
-"awq_ckpt": None,
-"awq_wbits": 16,
-"awq_groupsize": -1,
-"model_names": [LLM_MODEL],
-"conv_template": None,
-"limit_worker_concurrency": 5,
-"stream_interval": 2,
-"no_register": False,
-"embed_in_truncate": False,
-
- 以下为vllm_woker配置参数,注意使用vllm必须有gpu，仅在Linux测试通过
-
- tokenizer = model_path # 如果tokenizer与model_path不一致在此处添加
- 'tokenizer_mode':'auto',
- 'trust_remote_code':True,
- 'download_dir':None,
- 'load_format':'auto',
- 'dtype':'auto',
- 'seed':0,
- 'worker_use_ray':False,
- 'pipeline_parallel_size':1,
- 'tensor_parallel_size':1,
- 'block_size':16,
- 'swap_space':4 , # GiB
- 'gpu_memory_utilization':0.90,
- 'max_num_batched_tokens':2560,
- 'max_num_seqs':256,
- 'disable_log_stats':False,
- 'conv_template':None,
- 'limit_worker_concurrency':5,
- 'no_register':False,
- 'num_gpus': 1
- 'engine_use_ray': False,
- 'disable_log_requests': False
-```
-
-在这些参数中，如果没有设置，则使用```startup.py```中的默认值，如果设置了，则使用设置的值。
-因此，强烈建议开发不要在```startup.py```中进行配置，而应该在```server_config.py```中进行配置。避免配置文件覆盖。
-
-## 选择使用的模型
-在```model_config.py```完成模型配置后，还不能直接使用，需要在该文件下配置本地模型的运行方式或在线模型的API，例如
-```
-    "agentlm-7b": { # 使用default中的IP和端口
-       "device": "cuda",
-    },
-    "zhipu-api": { # 请为每个要运行的在线API设置不同的端口
-        "port": 21001,
-    },
-```
-本地模型使用default中的IP和端口，在线模型可以自己选择端口
\ No newline at end of file
diff --git a/chatchat/chatchat/data/knowledge_base/samples/content/wiki/合作伙伴.md b/chatchat/chatchat/data/knowledge_base/samples/content/wiki/合作伙伴.md
deleted file mode 100644
index e84b59c8..00000000
--- a/chatchat/chatchat/data/knowledge_base/samples/content/wiki/合作伙伴.md
+++ /dev/null
@@ -1,37 +0,0 @@
-## 合作伙伴名单
-🎉 Langchain-Chatchat 项目合作伙伴，感谢以下合作伙伴对本项目的支持。
-
-<table style="width:100%; border-collapse:collapse;">
-  <tr>
-    <td style="width:30%; text-align:center; vertical-align:middle;">
-      <img src="https://github.com/chatchat-space/Langchain-Chatchat/blob/dev/img/partners/chatglm.svg" alt="ChatGLM Logo" width="300" height="100">
-    </td>
-    <td style="width:80%; vertical-align:middle;">
-      <a href="https://chatglm.cn/" target="_blank" style="text-decoration:none;">ChatGLM: 国内最早的开源中文大模型之一</a>
-    </td>
-  </tr>
-  <tr>
-    <td style="width:30%; text-align:center; vertical-align:middle;">
-      <img src="https://github.com/chatchat-space/Langchain-Chatchat/blob/dev/img/partners/autodl.svg" alt="ChatGLM Logo" width="300" height="100">
-    </td>
-    <td style="width:80%; vertical-align:middle;">
-      <a href="https://www.autodl.com/" target="_blank" style="text-decoration:none;"> AutoDL 提供弹性、好用、省钱的云GPU租用服务。缺显卡就上 AutoDL.com </a>
-    </td>
-  </tr>
-  <tr>
-    <td style="width:30%; text-align:center; vertical-align:middle;">
-      <img src="https://github.com/chatchat-space/Langchain-Chatchat/blob/dev/img/partners/aws.svg" alt="ChatGLM Logo" width="300" height="100">
-    </td>
-    <td style="width:80%; vertical-align:middle;">
-      <a href="https://aws.amazon.com/" target="_blank" style="text-decoration:none;"> 全球云计算领导者 </a>
-    </td>
-  </tr>
-  <tr>
-    <td style="width:30%; text-align:center; vertical-align:middle;">
-      <img src="https://github.com/chatchat-space/Langchain-Chatchat/blob/dev/img/partners/zhenfund.svg" alt="Zhenge Logo" width="300" height="100">
-    </td>
-    <td style="width:80%; vertical-align:middle;">
-      <a href="https://www.zhenfund.com/" target="_blank" style="text-decoration:none;">我们相信预测未来的最好方式是自己来创造。我们在这里等你。</a>
-    </td>
-  </tr>
-</table>
diff --git a/chatchat/chatchat/data/knowledge_base/samples/content/wiki/常见问题.md b/chatchat/chatchat/data/knowledge_base/samples/content/wiki/常见问题.md
deleted file mode 100644
index 9203d521..00000000
--- a/chatchat/chatchat/data/knowledge_base/samples/content/wiki/常见问题.md
+++ /dev/null
@@ -1,277 +0,0 @@
-> 以下是一些常见的问题和回答
-#### Q: 我要提出问题，怎么办
-
-A: 首先，你要观察一下你的问题是否有没有被解决，建议翻看以往的Issue和Discussion，如果有，先按照他们的方法来做。
-如果没有，按照以下步骤
-1. 这是一个bug还是一个讨论问题，如果是讨论问题，放在disscusion，如果是bug和feature，放在issue。
-2. 如果要提出feature，提交一份对应的PR会让开发者更重视你的问题，否则你的问题很有可能被直接关闭。
-
-#### Q: ValueError: Found modules on cpu/disk. Using Exllama backend requires all the modules to be on GPU. You can deactivate exllama backend by setting disable_exllama=True in the quantization config object.
-
-A: 这是Fschat依赖源码的问题，请查看以下解决方式，通过修改'Fschat'库中的对应内容。
-
-https://github.com/lm-sys/FastChat/issues/2459
-
-https://stackoverflow.com/questions/76983305/fine-tuning-thebloke-llama-2-13b-chat-gptq-model-with-hugging-face-transformers
-
----
-
-#### Q: AttributeError: 'ChatGLMTokenizer' object has no attribute 'tokenizer'
-
-A: 查看以下Issue
-
-https://github.com/chatchat-space/Langchain-Chatchat/issues/1835
-
----
-
-#### Q: 使用Qwen API key 报错 multiple wodgets with the same key＝“
-
-A: 确保你的key是`dashscope`平台的key。并保证`dashscope`依赖满足我们的依赖版本。
-
----
-
-#### Q：linux下向量化PDF文件时出错：`ImportError: 从文件 *.pdf 加载文档时出错：libGL.so.1: cannot open shared object file: No such file or directory`
-
-A： 这是系统缺少必要的动态库，可以手动安装：`libgl1-mesa-glx` 和 `libglib2.0-0`
-
----
-
-#### Q: 各种Int4模型无法载入
-A. 由于各种Int4模型与Fp16模型并不相似，且量化技术可能有所不同，无法载入可能是因为fschat不支持或者缺少对应的依赖，需要查看对应仓库的issue获得更多信息。开发组没有针对Int4模型进行优化。
-
----
-
-#### Q1: 本项目支持哪些文件格式？
-
-A1: 目前已测试支持 txt、docx、md、pdf、csv、html、json 等格式文件
-
-更多文件格式请参考 [langchain 文档](https://python.langchain.com/en/latest/modules/indexes/document_loaders/examples/unstructured_file.html)。目前已知文档中若含有特殊字符，可能存在文件无法加载的问题。
-
----
-
-#### Q2: 使用过程中 Python 包 `nltk`发生了 `Resource punkt not found.`报错，该如何解决？
-
-A2: 方法一：https://github.com/nltk/nltk_data/raw/gh-pages/packages/tokenizers/punkt.zip 中的 `packages/tokenizers` 解压，放到  `nltk_data/tokenizers` 存储路径下。
-
-`nltk_data` 存储路径可以通过 `nltk.data.path` 查询。
-
-方法二：执行python代码
-
-```
-import nltk
-nltk.download()
-```
-
----
-
-#### Q3: 使用过程中 Python 包 `nltk`发生了 `Resource averaged_perceptron_tagger not found.`报错，该如何解决？
-
-A3: 
-
-方法一：将 https://github.com/nltk/nltk_data/blob/gh-pages/packages/taggers/averaged_perceptron_tagger.zip 下载，解压放到 `nltk_data/taggers` 存储路径下。
-
-`nltk_data` 存储路径可以通过 `nltk.data.path` 查询。
-
-方法二：执行python代码
-
-```
-import nltk
-nltk.download()
-```
-
----
-
-#### Q4: 本项目可否在 colab 中运行？
-
-A4: 可以尝试使用 chatglm-6b-int4 模型在 colab 中运行，需要注意的是，如需在 colab 中运行 Web UI，需将 `webui.py`中 `demo.queue(concurrency_count=3).launch( server_name='0.0.0.0', share=False, inbrowser=False)`中参数 `share`设置为 `True`。
-
----
-
-#### Q5: 在 Anaconda 中使用 pip 安装包无效如何解决？
-
-A5: 此问题是系统环境问题，详细见  [在Anaconda中使用pip安装包无效问题](在Anaconda中使用pip安装包无效问题.md)
-
----
-
-#### Q6: 本项目中所需模型如何下载至本地？
-
-A6: 本项目中使用的模型均为 `huggingface.com` 中可下载的开源模型，以默认选择的 `chatglm-6b`和 `text2vec-large-chinese`模型为例，下载模型可执行如下代码：
-
-```shell
-# 安装 git lfs
-$ git lfs install
-
-# 下载 LLM 模型
-$ git clone https://huggingface.co/THUDM/chatglm-6b /your_path/chatglm-6b
-
-# 下载 Embedding 模型
-$ git clone https://huggingface.co/GanymedeNil/text2vec-large-chinese /your_path/text2vec
-
-# 模型需要更新时，可打开模型所在文件夹后拉取最新模型文件/代码
-$ git pull
-```
-
----
-
-#### Q7: `huggingface.com`中模型下载速度较慢怎么办？
-
-A7: 可使用本项目用到的模型权重文件百度网盘地址：
-
-- ernie-3.0-base-zh.zip 链接: https://pan.baidu.com/s/1CIvKnD3qzE-orFouA8qvNQ?pwd=4wih
-- ernie-3.0-nano-zh.zip 链接: https://pan.baidu.com/s/1Fh8fgzVdavf5P1omAJJ-Zw?pwd=q6s5
-- text2vec-large-chinese.zip 链接: https://pan.baidu.com/s/1sMyPzBIXdEzHygftEoyBuA?pwd=4xs7
-- chatglm-6b-int4-qe.zip 链接: https://pan.baidu.com/s/1DDKMOMHtNZccOOBGWIOYww?pwd=22ji
-- chatglm-6b-int4.zip 链接: https://pan.baidu.com/s/1pvZ6pMzovjhkA6uPcRLuJA?pwd=3gjd
-- chatglm-6b.zip 链接: https://pan.baidu.com/s/1B-MpsVVs1GHhteVBetaquw?pwd=djay
-
----
-
-#### Q8: 老版本和新版本无法兼容怎么办？
-
-A8: 保存老版本的配置文件，删除老版本代码并下载新版本代码后，根据新版本的配置文件格式进行修改。
-
-在 ```0.2.6```后，运行环境和配置文件发生重大变化，建议重新配置环境和配置文件，并重建知识库。
-
-
----
-
-#### Q9: 显卡内存爆了，提示 "OutOfMemoryError: CUDA out of memory"
-
-A9: `VECTOR_SEARCH_TOP_K` 和 `HISTORY_LEN` 的值调低，比如 `VECTOR_SEARCH_TOP_K = 3` 和 `LLM_HISTORY_LEN = 2`，这样由 `query` 和 `context` 拼接得到的 `prompt` 会变短，会减少内存的占用。或者使用量化模型减少显存占用。
-
----
-
-#### Q10: 执行 `pip install -r requirements.txt` 过程中遇到 python 包，如 langchain 找不到对应版本的问题
-
-A10: 更换 pypi 源后重新安装，如阿里源、清华源等，网络条件允许时建议直接使用 pypi.org 源，具体操作命令如下：
-
-```shell
-# 使用 pypi 源
-$ pip install -r requirements.txt -i https://pypi.python.org/simple
-```
-
-或
-
-```shell
-# 使用阿里源
-$ pip install -r requirements.txt -i http://mirrors.aliyun.com/pypi/simple/
-```
-
-或
-
-```shell
-# 使用清华源
-$ pip install -r requirements.txt -i https://pypi.tuna.tsinghua.edu.cn/simple/
-```
-
----
-
-#### Q11: 启动 api.py 时 upload_file 接口抛出 `partially initialized module 'charset_normalizer' has no attribute 'md__mypyc' (most likely due to a circular import)`
-
-A11: 这是由于 charset_normalizer 模块版本过高导致的，需要降低低 charset_normalizer 的版本,测试在 charset_normalizer==2.1.0 上可用。
-
----
-
-#### Q12: 调用api中的 `bing_search_chat` 接口时，报出 `Failed to establish a new connection: [Errno 110] Connection timed out`
-
-A12: 这是因为服务器加了防火墙，需要联系管理员加白名单，如果公司的服务器的话，就别想了GG--!
-
----
-
-#### Q13: 加载 chatglm-6b-int8 或 chatglm-6b-int4 抛出 `RuntimeError: Only Tensors of floating point andcomplex dtype can require gradients`
-
-A13: 疑为 chatglm 的 quantization 的问题或 torch 版本差异问题，针对已经变为 Parameter 的 torch.zeros 矩阵也执行 Parameter 操作，从而抛出 `RuntimeError: Only Tensors of floating point andcomplex dtype can require gradients`。解决办法是在 chatglm 项目的原始文件中的 quantization.py 文件 374 行改为：
-
-```
-    try:
-        self.weight =Parameter(self.weight.to(kwargs["device"]), requires_grad=False)
-    except Exception as e:
-        pass
-```
-
-    如果上述方式不起作用，则在.cache/hugggingface/modules/目录下针对chatglm项目的原始文件中的quantization.py文件执行上述操作，若软链接不止一个，按照错误提示选择正确的路径。
-
-注：虽然模型可以顺利加载但在cpu上仍存在推理失败的可能：即针对每个问题，模型一直输出gugugugu。
-
-    因此，最好不要试图用cpu加载量化模型，原因可能是目前python主流量化包的量化操作是在gpu上执行的,会天然地存在gap。
-
----
-
-#### Q14: 修改配置中路径后，加载 text2vec-large-chinese 依然提示 `WARNING: No sentence-transformers model found with name text2vec-large-chinese. Creating a new one with MEAN pooling.`
-
-A14: 尝试更换 embedding，如 text2vec-base-chinese，请在 [configs/model_config.py](../configs/model_config.py) 文件中，修改 `text2vec-base`参数为本地路径，绝对路径或者相对路径均可
-
----
-
-#### Q16: 使用pg向量库建表报错
-
-A15: 需要手动安装对应的vector扩展(连接pg执行 CREATE EXTENSION IF NOT EXISTS vector)
-
----
-
-#### Q16: pymilvus 连接超时
-
-A16.pymilvus版本需要匹配和milvus对应否则会超时参考pymilvus==2.1.3
-
----
-
-#### Q17: 使用vllm推理加速框架时，已经下载了模型但出现HuggingFace通信问题
-
-A17: 参照如下代码修改python环境下/site-packages/vllm/model_executor/weight_utils.py文件的prepare_hf_model_weights函数如下对应代码：
-
-```python
-
-    if not is_local:
-        # Use file lock to prevent multiple processes from
-        # downloading the same model weights at the same time.
-        model_path_temp = os.path.join(
-            os.getenv("HOME"),
-            ".cache/huggingface/hub",
-            "models--" + model_name_or_path.replace("/", "--"),
-            "snapshots/",
-        )
-        downloaded = False
-        if os.path.exists(model_path_temp):
-            temp_last_dir = os.listdir(model_path_temp)[-1]
-            model_path_temp = os.path.join(model_path_temp, temp_last_dir)
-            base_pattern = os.path.join(model_path_temp, "pytorch_model*.bin")
-            files = glob.glob(base_pattern)
-            if len(files) > 0:
-                downloaded = True
-
-        if downloaded:
-           hf_folder = model_path_temp
-        else:
-            with get_lock(model_name_or_path, cache_dir):
-                hf_folder = snapshot_download(model_name_or_path,
-                                            allow_patterns=allow_patterns,
-                                            cache_dir=cache_dir,
-                                            tqdm_class=Disabledtqdm)
-    else:
-        hf_folder = model_name_or_path
-```
-
----
-
-#### Q18: `/xxx/base_model_worer.py` 报 `assert r.status_code == 200` 错误
-
-A：这个错误是本地模型进程注册到 fastchat controller 失败了。一般有两种原因：1、开了系统全局代理，关闭即可。2、DEFAULT_BIND_HOST 设为'0.0.0.0'，改成'127.0.0.1' 或 本机实际 IP 即可。或者更新到最新版本代码也可以解决。
-
-
-#### Q19: 使用vllm后端加速，无返回且不报错。
-
-A: fschat=0.2.33的vllm_worker脚本代码有bug, 如需使用，需源码修改fastchat.server.vllm_worker，将103行中sampling_params = SamplingParams的参数stop=list(stop)修改为stop= [i for i in stop if i!=""]
-
-
-#### Q20: chatglm3-6b对话中出现"<|user|>"标签，且自问自答。
-
-A20: chatglm3官方目前已经修复了chatglm3-6b的问题，若使用的模型为chatglm3-6b，仅需更新chatglm3-6b模型代码即可;请前往 Huggingface 下载最新的权重。
-并更新fschat版本到 0.2.34以上。
-
-#### Q21: 为什么启动的时候一直出现
-```
-"device not in ['cuda', 'mps', 'cpu','xpu'], device = auto"
-```
-的警告
-
-A21: 这是因为你没有在对应的启动选项设定设备，请在`model_config.py`中设定 DEVICE，不过，就算不设定，auto也能正常使用
\ No newline at end of file
diff --git a/chatchat/chatchat/data/knowledge_base/samples/content/wiki/开发环境部署.md b/chatchat/chatchat/data/knowledge_base/samples/content/wiki/开发环境部署.md
deleted file mode 100644
index 9e996fdd..00000000
--- a/chatchat/chatchat/data/knowledge_base/samples/content/wiki/开发环境部署.md
+++ /dev/null
@@ -1,281 +0,0 @@
-## 软件要求
-
-要顺利运行本代码，请按照以下系统要求进行配置
-
-**已经测试过的系统**
-
-+ Linux Ubuntu 22.04.5 kernel version 6.7
-
-其他系统可能出现系统兼容性问题。
-
-**最低要求**
-
-该要求仅针对标准模式，轻量模式使用在线模型，不需要安装torch等库，也不需要显卡即可运行。
-
-+ Python 版本: >= 3.8(很不稳定), < 3.12
-+ CUDA 版本: >= 12.1 
-
-**推荐要求**
-
-开发者在以下环境下进行代码调试，在该环境下能够避免最多环境问题。
-
-+ Python 版本 == 3.11.7
-+ CUDA 版本: == 12.1
-
-## 硬件要求
-
-本框架使用 `fschat`驱动，统一使用 `huggingface`进行推理，其他推理方式(如 `llama-cpp`，`TensorRT加速引擎` 建议通过推理引擎以 API 形式接入我们的框架)。
-
-同时, 我们没有对 `Int4` 模型进行适配，不保证`Int4`模型能够正常运行。因此，量化版本暂时需要由开发者自行适配, 我们可能在未来放。
-
-如果想要顺利在GPU运行本地模型的 **FP16** 版本，你至少需要以下的硬件配置，来保证在我们框架下能够实现 **稳定连续对话** 
-
-+ ChatGLM3-6B & LLaMA-7B-Chat 等 7B模型
-  + 最低显存要求: 14GB
-  + 推荐显卡: RTX 4080
-+ Qwen-14B-Chat 等 14B模型
-  + 最低显存要求: 30GB
-  + 推荐显卡: V100
-+ Yi-34B-Chat 等 34B模型
-  + 最低显存要求: 69GB  
-  + 推荐显卡: A100
-+ Qwen-72B-Chat 等 72B模型
-  + 最低显存要求: 145GB
-  + 推荐显卡：多卡 A100 以上
-
-一种简单的估算方式为：
-```
-FP16: 显存占用(GB) = 模型量级 x 2
-Int4: 显存占用(GB) = 模型量级 x 0.75
-```
-以上数据仅为估算，实际情况以 **nvidia-smi** 占用为准。
-请注意，如果使用最低配置，仅能保证代码能够运行，但运行速度较慢，体验不佳。
-
-同时，Embedding 模型将会占用 1-2G 的显存，历史记录最多会占用 数GB 的显存，因此，需要多冗余一些显存。
-
-内存最低要求: 内存要求至少应该比模型运行的显存大。
-
-例如，运行ChatGLM3-6B `FP16` 模型，显存占用13G，推荐使用16G以上内存。
-
-### 部分测试用机配置参考，在以下机器下开发组成员已经进行原生模拟测试（创建新环境并根据要求下载后运行），确保能流畅运行全部功能的代码框架。
-+ 服务器
-```
-处理器: Intel® Xeon® Platinum 8558P Processor (260M Cache, 2.7 GHz)
-内存: 4 TB
-显卡组:  NVIDIA H800 SXM5 80GB x 8
-硬盘: 6 PB 
-操作系统: Ubuntu 22.04 LTS,Linux kernel 5.15.0-60-generic
-显卡驱动版本: 535.129.03
-Cuda版本: 12.1 
-Python版本: 3.11.7
-网络IP地址：美国，洛杉矶
-```
-+ 个人PC
-```
-处理器: Intel® Core™ i9 processor 14900K 
-内存: 256 GB DDR5
-显卡组:  NVIDIA RTX4090 X 1 / NVIDIA RTXA6000 X 1
-硬盘: 1 TB
-操作系统: Ubuntu 22.04 LTS / Arch Linux, Linux Kernel 6.6.7
-显卡驱动版本: 545.29.06
-Cuda版本: 12.3 Update 1
-Python版本: 3.11.7
-网络IP地址：中国，上海 
-```
-
-## VPN
-
-如果您位于中国(含港，澳，台) 需要调用 OpenAI 或者 其他境外模型的 API，需要使用 VPN 工具或访问镜像站。
-
-从 Huggingface 下载模型或者从本仓库拉取最新的代码时，需要开发者自行设置代理。本项目不涉及任何代理工具设置和使用，也不解决任何关于代理的问题。
-
-## Docker 部署
-
-开发组为开发者们提供了一键部署的 docker 镜像文件懒人包。开发者们可以在 AutoDL 平台和 Docker 平台一键部署。
-
-🌐 [AutoDL 镜像](https://www.codewithgpu.com/i/chatchat-space/Langchain-Chatchat/Langchain-Chatchat) ，已经更新到`V13`版本,对应`0.2.9`
-
-🐳 [Docker 镜像](registry.cn-beijing.aliyuncs.com/chatchat/chatchat:0.2.7)
-
-💻 一行命令运行 Docker 🌲：
-
-```shell
-docker run -d --gpus all -p 80:8501 registry.cn-beijing.aliyuncs.com/chatchat/chatchat:0.2.7
-```
-
-- 该版本镜像大小 `43.1GB`，使用 `v0.2.6`，以 `nvidia/cuda:12.1.1-cudnn8-devel-ubuntu22.04` 为基础镜像
-- 该版本为正常版本，非轻量化版本
-- 该版本内置两个 Embedding 模型：`m3e-large`，`text2vec-bge-large-chinese`，默认启用后者，内置 `chatglm2-6b-32k`
-- 该版本目标为方便一键部署使用，请确保您已经在 Linux 发行版上安装了 NVIDIA 驱动程序
-- 请注意，您不需要在主机系统上安装 CUDA 工具包，但需要安装 `NVIDIA Driver` 以及 `NVIDIA Container Toolkit`，请参考[安装指南](https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/latest/install-guide.html)
-- 首次拉取和启动均需要一定时间，首次启动时请参照下图使用 `docker logs -f <container id>` 查看日志
-- 如遇到启动过程卡在 `Waiting..` 步骤，建议使用 `docker exec -it <container id> bash` 进入 `/logs/` 目录查看对应阶段日志
-
-## 常规模式本地部署方案
-
-```shell
-# 首先，确信你的机器安装了 Python 3.8 - 3.10 版本
-$ python --version
-Python 3.8.13
-
-# 如果低于这个版本，可使用conda安装环境
-$ conda create -p /your_path/env_name python=3.8
-
-# 激活环境
-$ source activate /your_path/env_name
-
-# 或，conda安装，不指定路径, 注意以下，都将/your_path/env_name替换为env_name
-$ conda create -n env_name python=3.8
-$ conda activate env_name # Activate the environment
-
-# 更新py库
-$ pip3 install --upgrade pip
-
-# 关闭环境
-$ source deactivate /your_path/env_name
-
-# 删除环境
-$ conda env remove -p  /your_path/env_name
-```
-接着，开始安装项目的依赖
-
-```shell
-# 拉取仓库
-$ git clone --recursive https://github.com/chatchat-space/Langchain-Chatchat.git
-
-# 进入目录
-$ cd Langchain-Chatchat
-
-# 安装全部依赖
-$ pip install -r requirements.txt
-
-# 默认依赖包括基本运行环境（FAISS向量库）。以下是可选依赖：
-- 如果要使用 milvus/pg_vector 等向量库，请将 requirements.txt 中相应依赖取消注释再安装。
-- 如果要开启 OCR GPU 加速，请安装 rapidocr_paddle[gpu]
-- 如果要使用在线 API 模型，请安装对用的 SDK
-
-```
-
-此外，为方便用户 API 与 webui 分离运行，可单独根据运行需求安装依赖包。
-
-- 如果只需运行 API，可执行：
-    ```shell
-    $ pip install -r requirements_api.txt
-    
-    # 默认依赖包括基本运行环境（FAISS向量库）。如果要使用 milvus/pg_vector 等向量库，请将 requirements.txt 中相应依赖取消注释再安装。
-    ```
-
-- 如果只需运行 WebUI，可执行：
-    ```shell
-    $ pip install -r requirements_webui.txt
-    ```
-
-注：使用 `langchain.document_loaders.UnstructuredFileLoader`进行 `.docx` 等格式非结构化文件接入时，可能需要依据文档进行其他依赖包的安装，请参考 [langchain 文档](https://python.langchain.com/en/latest/modules/indexes/document_loaders/examples/unstructured_file.html)。
-
-
-需要注意的是，对于以下依赖，我们建议源码安装依赖或者定期检查是否为最新版本，我们的框架可能会大量使用这些依赖的最新特性。
-+ transformers
-+ fastchat
-+ fastapi
-+ streamlit 以及其组件
-+ langchain 以及其组件
-+ xformers 
-
-## 模型下载
-
-如需在本地或离线环境下运行本项目，需要首先将项目所需的模型下载至本地，通常开源 LLM 与 Embedding 模型可以从 [HuggingFace](https://huggingface.co/models) 下载。
-
-以本项目中默认使用的 LLM 模型 [THUDM/chatglm2-6b](https://huggingface.co/THUDM/chatglm2-6b) 与 Embedding 模型 [moka-ai/m3e-base](https://huggingface.co/moka-ai/m3e-base) 为例：
-
-下载模型需要先[安装Git LFS](https://docs.github.com/zh/repositories/working-with-files/managing-large-files/installing-git-large-file-storage)，然后运行
-
-```Shell
-$ git lfs install
-$ git clone https://huggingface.co/THUDM/chatglm2-6b
-$ git clone https://huggingface.co/moka-ai/m3e-base
-```
-
-## 初始化知识库
-
-当前项目的知识库信息存储在数据库中，在正式运行项目之前请先初始化数据库（我们强烈建议您在执行操作前备份您的知识文件）。
-- 如果您已经有创建过知识库，可以先执行以下命令创建或更新数据库表：
-  ```shell
-  $ python init_database.py --create-tables
-  ```
-  如果可以正常运行，则无需再重建知识库。
-
-- 如果您是第一次运行本项目，知识库尚未建立，或者之前使用的是低于最新master分支版本的框架，或者配置文件中的知识库类型、嵌入模型发生变化，或者之前的向量库没有开启 `normalize_L2`，需要以下命令初始化或重建知识库：
-
-  ```shell
-  $ python init_database.py --recreate-vs
-  ```
-  
-## 一键启动
-启动前，确保已经按照[参数配置](https://github.com/chatchat-space/Langchain-Chatchat/wiki/%E5%8F%82%E6%95%B0%E9%85%8D%E7%BD%AE)正确配置各config模块。
-
-一键启动脚本 startup.py， 一键启动所有 Fastchat 服务、API 服务、WebUI 服务，示例代码：
-
-```shell
-$ python startup.py -a
-```
-
-并可使用 `Ctrl + C` 直接关闭所有运行服务。如果一次结束不了，可以多按几次。
-
-可选参数包括 `-a (或--all-webui)`, `--all-api`, `--llm-api`, `-c (或--controller)`, `--openai-api`,
-`-m (或--model-worker)`, `--api`, `--webui`，其中：
-
-- `--all-webui` 为一键启动 WebUI 所有依赖服务；
-- `--all-api` 为一键启动 API 所有依赖服务；
-- `--llm-api` 为一键启动 Fastchat 所有依赖的 LLM 服务；
-- `--openai-api` 为仅启动 FastChat 的 controller 和 openai-api-server 服务；
-- 其他为单独服务启动选项。
-
-若想指定非默认模型，需要用 `--model-name` 选项，示例：
-
-```shell
-$ python startup.py --all-webui --model-name Qwen-7B-Chat
-```
-
-更多信息可通过 `python startup.py -h` 查看。
-
-## 多卡加载
-项目支持多卡加载，需在 startup.py 中的 create_model_worker_app 函数中，修改如下三个参数:
-
-```python
-gpus=None, 
-num_gpus= 1, 
-max_gpu_memory="20GiB"
-```
-
-其中，`gpus` 控制使用的显卡的ID，例如 "0,1";
-
-`num_gpus` 控制使用的卡数;
-
-`max_gpu_memory` 控制每个卡使用的显存容量。
-
-注1：server_config.py的FSCHAT_MODEL_WORKERS字典中也增加了相关配置，如有需要也可通过修改FSCHAT_MODEL_WORKERS字典中对应参数实现多卡加载，且需注意server_config.py的配置会覆盖create_model_worker_app 函数的配置。
-
-注2：少数情况下，gpus参数会不生效，此时需要通过设置环境变量CUDA_VISIBLE_DEVICES来指定torch可见的gpu,示例代码：
-
-```shell
-CUDA_VISIBLE_DEVICES=0,1 python startup.py -a
-```
-
-## 最轻模式本地部署方案
-
-该模式的配置方式与常规模式相同，但无需安装 `torch` 等重依赖，通过在线API实现 LLM 和 Ebeddings 相关功能，适合没有显卡的电脑使用。
-
-```shell
-$ pip install -r requirements_lite.txt
-$ python startup.py -a --lite
-```
-
-该模式支持的在线 Embeddings 包括：
-- [智谱AI](http://open.bigmodel.cn)
-- [MiniMax](https://api.minimax.chat)
-- [百度千帆](https://cloud.baidu.com/product/wenxinworkshop?track=dingbutonglan)
-- [阿里云通义千问](https://dashscope.aliyun.com/)
-
-在 model_config.py 中 将 LLM_MODELS 和 EMBEDDING_MODEL 设置为可用的在线 API 名称即可。
-
-注意：在对话过程中并不要求 LLM 模型与 Embeddings 模型一致，你可以在知识库管理页面中使用 zhipu-api 作为嵌入模型，在知识库对话页面使用其它模型。
diff --git a/chatchat/chatchat/data/knowledge_base/samples/content/wiki/支持列表.md b/chatchat/chatchat/data/knowledge_base/samples/content/wiki/支持列表.md
deleted file mode 100644
index 8937ed79..00000000
--- a/chatchat/chatchat/data/knowledge_base/samples/content/wiki/支持列表.md
+++ /dev/null
@@ -1,176 +0,0 @@
-## LLM 模型支持列表
-> 本地模型
-
-本地 LLM 模型接入基于 [FastChat](https://github.com/lm-sys/FastChat) 实现，支持模型如下：
-
-- [ChatGLM 全系类对话模型](https://huggingface.co/THUDM/)
-- [Orion 全系列对话模型](https://huggingface.co/OrionStarAI/)，必须安装flash-attn 才能使用
-- [Qwen 全系列对话模型](https://huggingface.co/Qwen/)
-- [internlm 全系列对话模型](https://huggingface.co/internlm)
-- [Baichuan 全系列对话模型](https://huggingface.co/baichuan-inc)，必须降级transformer才能使用
-- [llama 全系列对话模型](https://huggingface.co/meta-llama)
-- [Vicuna 全系列对话模型](https://huggingface.co/lmsys)
-- [mistral 全系列对话模型](https://huggingface.co/mistralai)
-
-- [vivo-ai/BlueLM-7B-Chat](https://huggingface.co/vivo-ai/BlueLM-7B-Chat)
-- [01-ai/Yi-34B-Chat](https://huggingface.co/01-ai/Yi-34B-Chat)
-- [BlinkDL/RWKV-4-Raven](https://huggingface.co/BlinkDL/rwkv-4-raven)
-- [camel-ai/CAMEL-13B-Combined-Data](https://huggingface.co/camel-ai/CAMEL-13B-Combined-Data)
-- [databricks/dolly-v2-12b](https://huggingface.co/databricks/dolly-v2-12b)
-- [FreedomIntelligence/phoenix-inst-chat-7b](https://huggingface.co/FreedomIntelligence/phoenix-inst-chat-7b)
-- [h2oai/h2ogpt-gm-oasst1-en-2048-open-llama-7b](https://huggingface.co/h2oai/h2ogpt-gm-oasst1-en-2048-open-llama-7b)
-- [lcw99/polyglot-ko-12.8b-chang-instruct-chat](https://huggingface.co/lcw99/polyglot-ko-12.8b-chang-instruct-chat)
-- [lmsys/fastchat-t5-3b-v1.0](https://huggingface.co/lmsys/fastchat-t5)
-- [mosaicml/mpt-7b-chat](https://huggingface.co/mosaicml/mpt-7b-chat)
-- [Neutralzz/BiLLa-7B-SFT](https://huggingface.co/Neutralzz/BiLLa-7B-SFT)
-- [nomic-ai/gpt4all-13b-snoozy](https://huggingface.co/nomic-ai/gpt4all-13b-snoozy)
-- [NousResearch/Nous-Hermes-13b](https://huggingface.co/NousResearch/Nous-Hermes-13b)
-- [openaccess-ai-collective/manticore-13b-chat-pyg](https://huggingface.co/openaccess-ai-collective/manticore-13b-chat-pyg)
-- [OpenAssistant/oasst-sft-4-pythia-12b-epoch-3.5](https://huggingface.co/OpenAssistant/oasst-sft-4-pythia-12b-epoch-3.5)
-- [project-baize/baize-v2-7b](https://huggingface.co/project-baize/baize-v2-7b)
-- [Salesforce/codet5p-6b](https://huggingface.co/Salesforce/codet5p-6b)
-- [StabilityAI/stablelm-tuned-alpha-7b](https://huggingface.co/stabilityai/stablelm-tuned-alpha-7b)
-- [tiiuae/falcon-40b](https://huggingface.co/tiiuae/falcon-40b)
-- [timdettmers/guanaco-33b-merged](https://huggingface.co/timdettmers/guanaco-33b-merged)
-- [togethercomputer/RedPajama-INCITE-7B-Chat](https://huggingface.co/togethercomputer/RedPajama-INCITE-7B-Chat)
-- [WizardLM/WizardLM-13B-V1.0](https://huggingface.co/WizardLM/WizardLM-13B-V1.0)
-- [WizardLM/WizardCoder-15B-V1.0](https://huggingface.co/WizardLM/WizardCoder-15B-V1.0)
-- [HuggingFaceH4/starchat-beta](https://huggingface.co/HuggingFaceH4/starchat-beta)
-- [FlagAlpha/Llama2-Chinese-13b-Chat](https://huggingface.co/FlagAlpha/Llama2-Chinese-13b-Chat) and others
-- [BAAI/AquilaChat-7B](https://huggingface.co/BAAI/AquilaChat-7B)
-- [all models of OpenOrca](https://huggingface.co/Open-Orca)
-- [Spicyboros](https://huggingface.co/jondurbin/spicyboros-7b-2.2?not-for-all-audiences=true) 
-- [airoboros 2.2](https://huggingface.co/jondurbin/airoboros-l2-13b-2.2)
-- [VMware&#39;s OpenLLaMa OpenInstruct](https://huggingface.co/VMware/open-llama-7b-open-instruct)
-- 任何 [EleutherAI](https://huggingface.co/EleutherAI) 的 pythia 模型，如 [pythia-6.9b](https://huggingface.co/EleutherAI/pythia-6.9b)
-- 在以上模型基础上训练的任何 [Peft](https://github.com/huggingface/peft) 适配器。为了激活，模型路径中必须有 `peft` 。注意：如果加载多个peft模型，你可以通过在任何模型工作器中设置环境变量 `PEFT_SHARE_BASE_WEIGHTS=true` 来使它们共享基础模型的权重。
-
-以上模型支持列表可能随 [FastChat](https://github.com/lm-sys/FastChat) 更新而持续更新，可参考 [FastChat 已支持模型列表](https://github.com/lm-sys/FastChat/blob/main/docs/model_support.md)。
-
-> 联网模型
-
-支持的联网模型
-- [智谱AI](http://open.bigmodel.cn)   **临时解决方案，不支持流式输出，尽情期待0.3.x**
-- [阿里云通义千问](https://dashscope.aliyun.com/)
-- [百川](https://www.baichuan-ai.com/)
-- [ChatGPT](https://api.openai.com)
-- [Gimini](https://makersuite.google.com/app/apikey)
-- [Azure OpenAI](https://azure.microsoft.com/en-us/products/ai-services/openai-service)
-- [MiniMax](https://api.minimax.chat)
-- [讯飞星火](https://xinghuo.xfyun.cn)
-- [百度千帆](https://cloud.baidu.com/product/wenxinworkshop?track=dingbutonglan)
-- [字节火山方舟](https://www.volcengine.com)
-
-## Embedding 模型支持列表
-
-> 本地模型
-
-本项目支持调用 [HuggingFace](https://huggingface.co/models?pipeline_tag=sentence-similarity) 中的 Embedding 模型，已支持的 Embedding 模型如下：
-
-
-MokaAI系列嵌入模型
-
-- [moka-ai/m3e-small](https://huggingface.co/moka-ai/m3e-small)
-- [moka-ai/m3e-base](https://huggingface.co/moka-ai/m3e-base)
-- [moka-ai/m3e-large](https://huggingface.co/moka-ai/m3e-large)
-
-BAAI系列嵌入模型
-- [BAAI/bge-small-zh](https://huggingface.co/BAAI/bge-small-zh)
-- [BAAI/bge-base-zh](https://huggingface.co/BAAI/bge-base-zh)
-- [BAAI/bge-large-zh](https://huggingface.co/BAAI/bge-large-zh)
-- [BAAI/bge-small-zh-v1.5](https://huggingface.co/BAAI/bge-small-zh-v1.5)
-- [BAAI/bge-base-zh-v1.5](https://huggingface.co/BAAI/bge-base-zh-v1.5)
-- [BAAI/bge-large-zh-v1.5](https://huggingface.co/BAAI/bge-large-zh-v1.5)
-- [BAAI/bge-large-zh-noinstruct](https://huggingface.co/BAAI/bge-large-zh-noinstruct)
-- [BAAI/bge-reranker-large](https://huggingface.co/BAAI/bge-reranker-large)
-- [BAAI/bge-reranker-base](https://huggingface.co/BAAI/bge-reranker-base)
-
-
-text2vec系列嵌入模型
-- [shibing624/text2vec-base-chinese-sentence](https://huggingface.co/shibing624/text2vec-base-chinese-sentence)
-- [shibing624/text2vec-base-chinese-paraphrase](https://huggingface.co/shibing624/text2vec-base-chinese-paraphrase)
-- [shibing624/text2vec-base-multilingual](https://huggingface.co/shibing624/text2vec-base-multilingual)
-- [shibing624/text2vec-base-chinese](https://huggingface.co/shibing624/text2vec-base-chinese)
-- [shibing624/text2vec-bge-large-chinese](https://huggingface.co/shibing624/text2vec-bge-large-chinese)
-- [GanymedeNil/text2vec-large-chinese](https://huggingface.co/GanymedeNil/text2vec-large-chinese)
-
-其他模型
-- [sensenova/piccolo-base-zh](https://huggingface.co/sensenova/piccolo-base-zh)
-- [sensenova/piccolo-large-zh](https://huggingface.co/sensenova/piccolo-large-zh)
-- [nghuyong/ernie-3.0-nano-zh](https://huggingface.co/nghuyong/ernie-3.0-nano-zh)
-- [nghuyong/ernie-3.0-base-zh](https://huggingface.co/nghuyong/ernie-3.0-base-zh)
-
-达摩院系列嵌入模型
-- [damo/nlp_gte_sentence-embedding_chinese-large](https://modelscope.cn/models/damo/nlp_gte_sentence-embedding_chinese-large)
-
-> 联网模型
-
-除本地模型外，本项目也支持直接接入 OpenAI的在线嵌入模型。
-支持的联网模型
-- [OpenAI/text-embedding-ada-002](https://platform.openai.com/docs/guides/embeddings)
-- [智谱AI](http://open.bigmodel.cn)
-- [MiniMax](https://api.minimax.chat)
-- [百度千帆](https://cloud.baidu.com/product/wenxinworkshop?track=dingbutonglan)
-- [阿里云通义千问](https://dashscope.aliyun.com/)
-
-## 分词器支持列表
-
-> Langchain 中的分词器
-
-本项目支持调用 [Langchain](https://api.python.langchain.com/en/latest/api_reference.html#module-langchain.text_splitter) 的 Text Splitter 分词器以及基于此改进的自定义分词器，已支持的 Text Splitter 类型如下：
-- CharacterTextSplitter
-- LatexTextSplitter
-- MarkdownHeaderTextSplitter
-- MarkdownTextSplitter
-- NLTKTextSplitter
-- PythonCodeTextSplitter
-- RecursiveCharacterTextSplitter
-- SentenceTransformersTokenTextSplitter
-- SpacyTextSplitter
-
-> 自定义分词器
-
-已经支持的定制分词器如下：
-
-- [AliTextSplitter](text_splitter/ali_text_splitter.py)
-- [ChineseRecursiveTextSplitter](text_splitter/chinese_recursive_text_splitter.py)
-- [ChineseTextSplitter](text_splitter/chinese_text_splitter.py)
-
-
-## 向量数据库支持列表
-
-> 本地向量数据库
-
-目前支持的本地向量数据库列表如下：
-
-- [FAISS](https://github.com/facebookresearch/faiss) 
-- [Milvus](https://github.com/milvus-io/milvus)
-- [PGVector](https://github.com/pgvector/pgvector)
-
-> 联网向量数据库
-
-- [Zilliz](https://zilliz.com)
-
-## 工具支持列表
-> Langchain工具
-
-- Shell 工具，用于模拟当前的Linux Shell环境
-- Youtube 工具，用于搜索Youtube的相关视频链接
-- Wolfram 工具，用Wolfram来实现数学计算等
-
-其他Langchain自带的工具也可以按照上述三个工具的方式来自己实现
-
-> 本地工具
-
-- 翻译工具，实现对输入的任意语言翻译。 
-- 数学工具，使用LLMMathChain 实现数学计算。
-- 高级知识库工具，智能选择调用多个或者单个知识库并查询内容。
-- 进阶知识库工具，智能选择调用一个最相近的知识库并查询内容。
-- 基础知识库工具，选择指定的一个知识库并回答。
-
-> 联网工具
-
--  天气工具，使用自定义的LLMWetherChain实现天气查询，调用和风天气API。
--  搜索工具，使用我们的搜索API来实现搜索并概括内容。
-
-- 我们期待开发者共享更多的工具，帮助项目生态完善
diff --git a/chatchat/chatchat/data/knowledge_base/samples/content/wiki/最佳实践.md b/chatchat/chatchat/data/knowledge_base/samples/content/wiki/最佳实践.md
deleted file mode 100644
index 91e85a03..00000000
--- a/chatchat/chatchat/data/knowledge_base/samples/content/wiki/最佳实践.md
+++ /dev/null
@@ -1,889 +0,0 @@
-## 推荐的模型组合
-
-+ 在默认的配置文件中，我们提供了以下模型组合
-```
-LLM: Chatglm2-6b
-Embedding Models: m3e-base
-TextSplitter: ChineseRecursiveTextSplitter
-Kb_dataset: faiss
-```
-
-+ 我们推荐开发者根据自己的业务需求进行模型微调，如果不需要微调且配置充足，可选择以下性能较好的配置
-```
-model_config.py
-LLM: Qwen-14B-Chat 或 Baichuan2-13B-Chat
-Embedding Models: piccolo-large-zh 或 bge-large-zh-v1.5 
-HISTORY_LEN = 20
-TEMPERATURE = 0.1
-```  
-使用该模型将需要更高的硬件要求
-```
-1张 RTX A6000 或者 A40 等 48GB 显存以上的显卡。推荐 1 x A100 以上。
-(使用多张显卡拼接也能运行，但是速度非常慢，2张4090拼接运行大概为一秒一个字的速度)
-
-64GB 内存用于加载模型而不被Kill
-
-服务器级的CPU，推荐 Xeon(R) Platinum 8358P 以上
-```
-
-+ 如果开发者知识库较大，有大量文档，大文件，我们推荐开发者使用 ```pg``` 向量数据库
-+ 如果开发者的知识库具有一定的关键词特征，例如：
-    + 问答对文件(以Q + A 为一个组合的json文件)
-    + Markdown文件
-    + 并排的pdf文件
-    + 具有多个表格的pdf文件
-  
-  我们推荐开发者自行开发分词器，以达到更好的效果。
-
-+ 如果开发者想使用更全面的 Agent 功能，我们推荐开发者使用以下配置
-```
-LLM: Qwen-14B-Chat, AgentLM-70B 或 GPT-4
-Tools 的工具控制在10个之内
-```
-
-## 微调模型加载实操
-
-### 非p-tuning类PEFT加载
-本项目基于 FastChat 加载 LLM 服务，故需以 FastChat 加载 PEFT 路径，针对chatglm,falcon，codet5p以外的模型，以及非p-tuning以外的peft方法，需对peft文件进行修改，步骤如下：
-
-1. 将config.json文件修改为adapter_config.json;
-2. 保证文件夹包含pytorch_model.bin文件；
-3. 修改文件夹名称，保证文件夹包含'peft'一词；
-4. 将peft文件夹移入项目目录下；
-5. 确保adapter_config.json文件夹中base_model_name_or_path指向基础模型；
-6. 将peft路径添加到model_config.py的llm_dict中，键为模型名，值为peft路径，注意使用相对路径，如"peft"；
-7. 开启 `PEFT_SHARE_BASE_WEIGHTS=true`环境变量，再执行python startup.py -a
-
-针对p-tuning和chatglm模型，需要对fastchat进行较大幅度的修改。
-
-### p-tuning加载
-
-P-tuning虽然是一种peft方法，但并不能于huggingface的peft python包兼容，而fastchat在多处以字符串匹配的方式进行硬编码加载模型，因此导致fastchat和chatchat不能兼容p-tuning，经langchain-chatchat开发组多次尝试，给出如下指南进行p-tuning加载。
-
-#### 1. peft文件夹修改
-
-1. 将config.json文件修改为adapter_config.json;
-2. 保证文件夹包含pytorch_model.bin文件；
-3. 修改文件夹名称，保证文件夹包含'peft'一词；
-4. 在adapter_config.json文件中增加如下字段：
-
-   ```json
-       "base_model_name_or_path": "/root/model/chatglm2-6b/"
-       "task_type": "CAUSAL_LM",
-       "peft_type": "PREFIX_TUNING",
-       "inference_mode": true,
-       "revision": "main",
-       "num_virtual_tokens": 16
-   ```
-
-   **其中,"base_model_name_or_path"为基础模型的存在位置**；
-5. 将文件夹移入项目文件夹中，如Langchain-Chatchat项目文件夹目录下；
-
-#### 2. fastchat包代码修改
-
-##### 2.1 fastchat.model.model_adapter文件修改
-
-1. 将fastchat.model.model_adapter.py文件的load_model函数修改为：
-
-   ```python
-   def load_model(
-       model_path: str,
-       device: str = "cuda",
-       num_gpus: int = 1,
-       max_gpu_memory: Optional[str] = None,
-       dtype: Optional[torch.dtype] = None,
-       load_8bit: bool = False,
-       cpu_offloading: bool = False,
-       gptq_config: Optional[GptqConfig] = None,
-       awq_config: Optional[AWQConfig] = None,
-       revision: str = "main",
-       debug: bool = False,
-       load_kwargs = {}
-   ):
-       """Load a model from Hugging Face."""
-       # get model adapter
-       adapter = get_model_adapter(model_path)
-       kwargs = load_kwargs
-       # Handle device mapping
-       cpu_offloading = raise_warning_for_incompatible_cpu_offloading_configuration(
-           device, load_8bit, cpu_offloading
-       )
-       if device == "cpu":
-           kwargs["torch_dtype"]= torch.float32
-           if CPU_ISA in ["avx512_bf16", "amx"]:
-               try:
-                   import intel_extension_for_pytorch as ipex
-
-                   kwargs ["torch_dtype"]= torch.bfloat16
-               except ImportError:
-                   warnings.warn(
-                       "Intel Extension for PyTorch is not installed, it can be installed to accelerate cpu inference"
-                   )
-       elif device == "cuda":
-           kwargs["torch_dtype"] = torch.float16
-           if num_gpus != 1:
-               kwargs["device_map"] = "auto"
-               if max_gpu_memory is None:
-                   kwargs[
-                       "device_map"
-                   ] = "sequential"  # This is important for not the same VRAM sizes
-                   available_gpu_memory = get_gpu_memory(num_gpus)
-                   kwargs["max_memory"] = {
-                       i: str(int(available_gpu_memory[i] * 0.85)) + "GiB"
-                       for i in range(num_gpus)
-                   }
-               else:
-                   kwargs["max_memory"] = {i: max_gpu_memory for i in range(num_gpus)}
-       elif device == "mps":
-           kwargs["torch_dtype"] = torch.float16
-           # Avoid bugs in mps backend by not using in-place operations.
-           replace_llama_attn_with_non_inplace_operations()
-       elif device == "xpu":
-           kwargs["torch_dtype"] = torch.bfloat16
-           # Try to load ipex, while it looks unused, it links into torch for xpu support
-           try:
-               import intel_extension_for_pytorch as ipex
-           except ImportError:
-               warnings.warn(
-                   "Intel Extension for PyTorch is not installed, but is required for xpu inference."
-               )
-       elif device == "npu":
-           kwargs["torch_dtype"]= torch.float16
-           # Try to load ipex, while it looks unused, it links into torch for xpu support
-           try:
-               import torch_npu
-           except ImportError:
-               warnings.warn("Ascend Extension for PyTorch is not installed.")
-       else:
-           raise ValueError(f"Invalid device: {device}")
-
-       if cpu_offloading:
-           # raises an error on incompatible platforms
-           from transformers import BitsAndBytesConfig
-
-           if "max_memory" in kwargs:
-               kwargs["max_memory"]["cpu"] = (
-                   str(math.floor(psutil.virtual_memory().available / 2**20)) + "Mib"
-               )
-           kwargs["quantization_config"] = BitsAndBytesConfig(
-               load_in_8bit_fp32_cpu_offload=cpu_offloading
-           )
-           kwargs["load_in_8bit"] = load_8bit
-       elif load_8bit:
-           if num_gpus != 1:
-               warnings.warn(
-                   "8-bit quantization is not supported for multi-gpu inference."
-               )
-           else:
-               model, tokenizer = adapter.load_compress_model(
-                   model_path=model_path,
-                   device=device,
-                   torch_dtype=kwargs["torch_dtype"],
-                   revision=revision,
-               )
-               if debug:
-                   print(model)
-               return model, tokenizer
-       elif awq_config and awq_config.wbits < 16:
-           assert (
-               awq_config.wbits == 4
-           ), "Currently we only support 4-bit inference for AWQ."
-           model, tokenizer = load_awq_quantized(model_path, awq_config, device)
-           if num_gpus != 1:
-               device_map = accelerate.infer_auto_device_map(
-                   model,
-                   max_memory=kwargs["max_memory"],
-                   no_split_module_classes=[
-                       "OPTDecoderLayer",
-                       "LlamaDecoderLayer",
-                       "BloomBlock",
-                       "MPTBlock",
-                       "DecoderLayer",
-                   ],
-               )
-               model = accelerate.dispatch_model(
-                   model, device_map=device_map, offload_buffers=True
-               )
-           else:
-               model.to(device)
-           return model, tokenizer
-       elif gptq_config and gptq_config.wbits < 16:
-           model, tokenizer = load_gptq_quantized(model_path, gptq_config)
-           if num_gpus != 1:
-               device_map = accelerate.infer_auto_device_map(
-                   model,
-                   max_memory=kwargs["max_memory"],
-                   no_split_module_classes=["LlamaDecoderLayer"],
-               )
-               model = accelerate.dispatch_model(
-                   model, device_map=device_map, offload_buffers=True
-               )
-           else:
-               model.to(device)
-           return model, tokenizer
-       kwargs["revision"] = revision
-
-       if dtype is not None:  # Overwrite dtype if it is provided in the arguments.
-           kwargs["torch_dtype"] = dtype
-
-       # Load model
-       model, tokenizer = adapter.load_model(model_path, kwargs)
-
-       if (
-           device == "cpu"
-           and kwargs["torch_dtype"] is torch.bfloat16
-           and CPU_ISA is not None
-       ):
-           model = ipex.optimize(model, dtype=kwargs["torch_dtype"])
-
-       if (device == "cuda" and num_gpus == 1 and not cpu_offloading) or device in (
-           "mps",
-           "xpu",
-           "npu",
-       ):
-           model.to(device)
-
-       if device == "xpu":
-           model = torch.xpu.optimize(model, dtype=kwargs["torch_dtype"], inplace=True)
-
-       if debug:
-           print(model)
-
-       return model, tokenizer
-   ```
-2. 将fastchat.model.model_adapter.py的函数修改为：
-
-   ```python
-   def get_generate_stream_function(model: torch.nn.Module, model_path: str):
-       """Get the generate_stream function for inference."""
-       from fastchat.serve.inference import generate_stream
-
-       model_type = str(type(model)).lower()
-
-       is_chatglm = "chatglm" in model_type 
-       is_falcon = "rwforcausallm" in model_type
-       is_codet5p = "codet5p" in model_type 
-       is_peft = "peft" in model_type
-
-       if is_chatglm:
-           return generate_stream_chatglm
-       elif is_falcon:
-           return generate_stream_falcon
-       elif is_codet5p:
-           return generate_stream_codet5p
-       elif peft_share_base_weights and is_peft:
-           # Return a curried stream function that loads the right adapter
-           # according to the model_name available in this context.  This ensures
-           # the right weights are available.
-           @torch.inference_mode()
-           def generate_stream_peft(
-               model,
-               tokenizer,
-               params: Dict,
-               device: str,
-               context_len: int,
-               stream_interval: int = 2,
-               judge_sent_end: bool = False,
-           ):
-
-               model.set_adapter(model_path)
-               if "chatglm" in str(type(model.base_model)).lower():
-                   model.disable_adapter()
-                   prefix_state_dict = torch.load(os.path.join(model_path, "pytorch_model.bin"))
-                   new_prefix_state_dict = {}
-
-                   for k, v in prefix_state_dict.items():
-                       if k.startswith("transformer.prefix_encoder."):
-                           new_prefix_state_dict[k[len("transformer.prefix_encoder."):]] = v
-                       elif k.startswith("transformer.prompt_encoder."):
-                           new_prefix_state_dict[k[len("transformer.prompt_encoder."):]] = v
-                   model.transformer.prefix_encoder.load_state_dict(new_prefix_state_dict)
-                   for x in generate_stream_chatglm(
-                       model,
-                       tokenizer,
-                       params,
-                       device,
-                       context_len,
-                       stream_interval,
-                       judge_sent_end,
-                   ):
-                       yield x
-               elif "rwforcausallm" in str(type(model.base_model)).lower():
-
-                   for x in generate_stream_falcon(
-                       model,
-                       tokenizer,
-                       params,
-                       device,
-                       context_len,
-                       stream_interval,
-                       judge_sent_end,
-                   ):
-                       yield x   
-               elif "codet5p" in str(type(model.base_model)).lower():
-
-                   for x in generate_stream_codet5p(
-                       model,
-                       tokenizer,
-                       params,
-                       device,
-                       context_len,
-                       stream_interval,
-                       judge_sent_end,
-                   ):
-                       yield x   
-               else:
-
-                   for x in generate_stream(
-                       model,
-                       tokenizer,
-                       params,
-                       device,
-                       context_len,
-                       stream_interval,
-                       judge_sent_end,
-                   ):
-                       yield x
-
-           return generate_stream_peft
-       else:
-           return generate_stream
-   ```
-3. 将fastchat.model.model_adapter.py的PeftModelAdapter类的load_model方法修改为：
-
-   ```python
-       def load_model(self, model_path: str, from_pretrained_kwargs: dict):
-           """Loads the base model then the (peft) adapter weights"""
-           from peft import PeftConfig, PeftModel
-
-           config = PeftConfig.from_pretrained(model_path)
-           base_model_path = config.base_model_name_or_path
-           if "peft" in base_model_path:
-               raise ValueError(
-                   f"PeftModelAdapter cannot load a base model with 'peft' in the name: {config.base_model_name_or_path}"
-               )
-
-           # Basic proof of concept for loading peft adapters that share the base
-           # weights.  This is pretty messy because Peft re-writes the underlying
-           # base model and internally stores a map of adapter layers.
-           # So, to make this work we:
-           #  1. Cache the first peft model loaded for a given base models.
-           #  2. Call `load_model` for any follow on Peft models.
-           #  3. Make sure we load the adapters by the model_path.  Why? This is
-           #  what's accessible during inference time.
-           #  4. In get_generate_stream_function, make sure we load the right
-           #  adapter before doing inference.  This *should* be safe when calls
-           #  are blocked the same semaphore.
-           if peft_share_base_weights:
-               if base_model_path in peft_model_cache:
-                   model, tokenizer = peft_model_cache[base_model_path]
-                   # Super important: make sure we use model_path as the
-                   # `adapter_name`.
-                   model.load_adapter(model_path, adapter_name=model_path)
-               else:
-                   base_adapter = get_model_adapter(base_model_path)
-                   base_model, tokenizer = base_adapter.load_model(
-                       base_model_path, from_pretrained_kwargs
-                   )
-                   # Super important: make sure we use model_path as the
-                   # `adapter_name`.
-                   from peft import get_peft_model
-                   model = get_peft_model(base_model,config,adapter_name=model_path)
-                   peft_model_cache[base_model_path] = (model, tokenizer)
-               return model, tokenizer
-
-           # In the normal case, load up the base model weights again.
-           base_adapter = get_model_adapter(base_model_path)
-           base_model, tokenizer = base_adapter.load_model(
-               base_model_path, from_pretrained_kwargs
-           )
-           from peft import get_peft_model
-           model = get_peft_model(base_model,config,adapter_name=model_path)
-           return model, tokenizer
-
-   ```
-4. 将fastchat.model.model_adapter.py的ChatglmAdapter类的load_model方法修改为：
-
-   ```python
-       def load_model(self, model_path: str, from_pretrained_kwargs: dict):
-           revision = from_pretrained_kwargs.get("revision", "main")
-           tokenizer = AutoTokenizer.from_pretrained(
-               model_path, trust_remote_code=True, revision=revision
-           )
-           config = AutoConfig.from_pretrained(model_path, trust_remote_code=True,**from_pretrained_kwargs)
-           model = AutoModel.from_pretrained(
-               model_path, trust_remote_code=True, config=config
-           )
-           return model, tokenizer
-   ```
-
-##### 2.2 fastchat.serve.model_worker文件修改
-
-1. 将fastchat.serve.model_worker文件的ModelWorker的__init__方法修改如下：
-
-   ```python
-   class ModelWorker(BaseModelWorker):
-       def __init__(
-           self,
-           controller_addr: str,
-           worker_addr: str,
-           worker_id: str,
-           model_path: str,
-           model_names: List[str],
-           limit_worker_concurrency: int,
-           no_register: bool,
-           device: str,
-           num_gpus: int,
-           max_gpu_memory: str,
-           dtype: Optional[torch.dtype] = None,
-           load_8bit: bool = False,
-           cpu_offloading: bool = False,
-           gptq_config: Optional[GptqConfig] = None,
-           awq_config: Optional[AWQConfig] = None,
-           stream_interval: int = 2,
-           conv_template: Optional[str] = None,
-           embed_in_truncate: bool = False,
-           seed: Optional[int] = None,
-           load_kwargs = {}, #修改点
-           **kwargs,
-       ):
-           super().__init__(
-               controller_addr,
-               worker_addr,
-               worker_id,
-               model_path,
-               model_names,
-               limit_worker_concurrency,
-               conv_template=conv_template,
-           )
-
-           logger.info(f"Loading the model {self.model_names} on worker {worker_id} ...")
-           self.model, self.tokenizer = load_model(
-               model_path,
-               device=device,
-               num_gpus=num_gpus,
-               max_gpu_memory=max_gpu_memory,
-               dtype=dtype,
-               load_8bit=load_8bit,
-               cpu_offloading=cpu_offloading,
-               gptq_config=gptq_config,
-               awq_config=awq_config,
-               load_kwargs=load_kwargs #修改点
-           )
-           self.device = device
-           if self.tokenizer.pad_token == None:
-               self.tokenizer.pad_token = self.tokenizer.eos_token
-           self.context_len = get_context_length(self.model.config)
-           print("**"*100)
-           self.generate_stream_func = get_generate_stream_function(self.model, model_path)
-           print(f"self.generate_stream_func{self.generate_stream_func}")
-           print("*"*100)
-           self.stream_interval = stream_interval
-           self.embed_in_truncate = embed_in_truncate
-           self.seed = seed
-
-           if not no_register:
-               self.init_heart_beat()
-   ```
-2. 在fastchat.serve.model_worker文件的create_model_worker增加如下args参数：
-
-   ```python
-   parser.add_argument("--load_kwargs",type=dict,default={})
-   ```
-
-    并将如下语句：
-
-```python
-    worker = ModelWorker(
-        args.controller_address,
-        args.worker_address,
-        worker_id,
-        args.model_path,
-        args.model_names,
-        args.limit_worker_concurrency,
-        no_register=args.no_register,
-        device=args.device,
-        num_gpus=args.num_gpus,
-        max_gpu_memory=args.max_gpu_memory,
-        dtype=str_to_torch_dtype(args.dtype),
-        load_8bit=args.load_8bit,
-        cpu_offloading=args.cpu_offloading,
-        gptq_config=gptq_config,
-        awq_config=awq_config,
-        stream_interval=args.stream_interval,
-        conv_template=args.conv_template,
-        embed_in_truncate=args.embed_in_truncate,
-        seed=args.seed,
-    )
-```
-
-修改为：
-
-```python
-    worker = ModelWorker(
-        args.controller_address,
-        args.worker_address,
-        worker_id,
-        args.model_path,
-        args.model_names,
-        args.limit_worker_concurrency,
-        no_register=args.no_register,
-        device=args.device,
-        num_gpus=args.num_gpus,
-        max_gpu_memory=args.max_gpu_memory,
-        dtype=str_to_torch_dtype(args.dtype),
-        load_8bit=args.load_8bit,
-        cpu_offloading=args.cpu_offloading,
-        gptq_config=gptq_config,
-        awq_config=awq_config,
-        stream_interval=args.stream_interval,
-        conv_template=args.conv_template,
-        embed_in_truncate=args.embed_in_truncate,
-        seed=args.seed,
-        load_kwargs=args.load_kwargs
-    )
-```
-
-至此，我们完成了fastchat加载ptuning的所有修改，在调用fastchat加载p-tuning时，可以通过加入 `PEFT_SHARE_BASE_WEIGHTS=true`，并以字典的形式添加--load_kwargs参数为训练ptuning时的pre_seq_len值即可，例如将2.2.2步骤中的 `parser.add_argument("--load_kwargs",type=dict,default={})`修改为：
-
-`parser.add_argument("--load_kwargs",type=dict,default={"pre_seq_len":16})`
-
-#### 3 langchain-chatchat代码修改：
-
-1. 在configs/serve_config.py中的FSCHAT_MODEL_WORKERS字典中增加如下字段：
-
-   ```
-   "load_kwargs": {"pre_seq_len": 16} #值修改为adapter_config.json中的pre_seq_len值
-   ```
-2. 将startup.py中的create_model_worker_app修改为：
-
-   ```python
-   def create_model_worker_app(log_level: str = "INFO", **kwargs) -> FastAPI:
-       """
-       kwargs包含的字段如下：
-       host:
-       port:
-       model_names:[`model_name`]
-       controller_address:
-       worker_address:
-
-
-       对于online_api:
-           online_api:True
-           worker_class: `provider`
-       对于离线模型：
-           model_path: `model_name_or_path`,huggingface的repo-id或本地路径
-           device:`LLM_DEVICE`
-       """
-       import fastchat.constants
-       fastchat.constants.LOGDIR = LOG_PATH
-       from fastchat.serve.model_worker import worker_id, logger
-       import argparse
-       logger.setLevel(log_level)
-
-       parser = argparse.ArgumentParser()
-       args = parser.parse_args([])
-
-       for k, v in kwargs.items():
-           setattr(args, k, v)
-
-       # 在线模型API
-       if worker_class := kwargs.get("worker_class"):
-           from fastchat.serve.model_worker import app
-           worker = worker_class(model_names=args.model_names,
-                                 controller_addr=args.controller_address,
-                                 worker_addr=args.worker_address)
-           sys.modules["fastchat.serve.model_worker"].worker = worker
-       # 本地模型
-       else:
-           from configs.model_config import VLLM_MODEL_DICT
-           if kwargs["model_names"][0] in VLLM_MODEL_DICT and args.infer_turbo == "vllm":
-               import fastchat.serve.vllm_worker
-               from fastchat.serve.vllm_worker import VLLMWorker,app
-               from vllm import AsyncLLMEngine
-               from vllm.engine.arg_utils import AsyncEngineArgs,EngineArgs
-               args.tokenizer = args.model_path # 如果tokenizer与model_path不一致在此处添加
-               args.tokenizer_mode = 'auto'
-               args.trust_remote_code= True
-               args.download_dir= None
-               args.load_format = 'auto'
-               args.dtype = 'auto'
-               args.seed = 0
-               args.worker_use_ray = False
-               args.pipeline_parallel_size = 1
-               args.tensor_parallel_size = 1
-               args.block_size = 16
-               args.swap_space = 4  # GiB
-               args.gpu_memory_utilization = 0.90
-               args.max_num_batched_tokens = 2560
-               args.max_num_seqs = 256
-               args.disable_log_stats = False
-               args.conv_template = None
-               args.limit_worker_concurrency = 5
-               args.no_register = False
-               args.num_gpus = 1 # vllm worker的切分是tensor并行，这里填写显卡的数量
-               args.engine_use_ray = False
-               args.disable_log_requests = False
-               if args.model_path:
-                   args.model = args.model_path
-               if args.num_gpus > 1:
-                   args.tensor_parallel_size = args.num_gpus
-
-               for k, v in kwargs.items():
-                   setattr(args, k, v)
-
-               engine_args = AsyncEngineArgs.from_cli_args(args)
-               engine = AsyncLLMEngine.from_engine_args(engine_args)
-
-               worker = VLLMWorker(
-                           controller_addr = args.controller_address,
-                           worker_addr = args.worker_address,
-                           worker_id = worker_id,
-                           model_path = args.model_path,
-                           model_names = args.model_names,
-                           limit_worker_concurrency = args.limit_worker_concurrency,
-                           no_register = args.no_register,
-                           llm_engine =  engine,
-                           conv_template = args.conv_template,
-                           )
-               sys.modules["fastchat.serve.vllm_worker"].engine = engine
-               sys.modules["fastchat.serve.vllm_worker"].worker = worker
-
-           else:
-               from fastchat.serve.model_worker import app, GptqConfig, AWQConfig, ModelWorker
-               args.gpus = "0" # GPU的编号,如果有多个GPU，可以设置为"0,1,2,3"
-               args.max_gpu_memory = "20GiB"
-               args.num_gpus = 1  # model worker的切分是model并行，这里填写显卡的数量
-
-               args.load_8bit = False
-               args.cpu_offloading = None
-               args.gptq_ckpt = None
-               args.gptq_wbits = 16
-               args.gptq_groupsize = -1
-               args.gptq_act_order = False
-               args.awq_ckpt = None
-               args.awq_wbits = 16
-               args.awq_groupsize = -1
-               args.model_names = []
-               args.conv_template = None
-               args.limit_worker_concurrency = 5
-               args.stream_interval = 2
-               args.no_register = False
-               args.embed_in_truncate = False
-               args.load_kwargs = {"pre_seq_len": 16} # 改*************************
-               for k, v in kwargs.items():
-                   setattr(args, k, v)
-               if args.gpus:
-                   if args.num_gpus is None:
-                       args.num_gpus = len(args.gpus.split(','))
-                   if len(args.gpus.split(",")) < args.num_gpus:
-                       raise ValueError(
-                           f"Larger --num-gpus ({args.num_gpus}) than --gpus {args.gpus}!"
-                       )
-                   os.environ["CUDA_VISIBLE_DEVICES"] = args.gpus
-               gptq_config = GptqConfig(
-                   ckpt=args.gptq_ckpt or args.model_path,
-                   wbits=args.gptq_wbits,
-                   groupsize=args.gptq_groupsize,
-                   act_order=args.gptq_act_order,
-               )
-               awq_config = AWQConfig(
-                   ckpt=args.awq_ckpt or args.model_path,
-                   wbits=args.awq_wbits,
-                   groupsize=args.awq_groupsize,
-               )
-
-               worker = ModelWorker(
-                   controller_addr=args.controller_address,
-                   worker_addr=args.worker_address,
-                   worker_id=worker_id,
-                   model_path=args.model_path,
-                   model_names=args.model_names,
-                   limit_worker_concurrency=args.limit_worker_concurrency,
-                   no_register=args.no_register,
-                   device=args.device,
-                   num_gpus=args.num_gpus,
-                   max_gpu_memory=args.max_gpu_memory,
-                   load_8bit=args.load_8bit,
-                   cpu_offloading=args.cpu_offloading,
-                   gptq_config=gptq_config,
-                   awq_config=awq_config,
-                   stream_interval=args.stream_interval,
-                   conv_template=args.conv_template,
-                   embed_in_truncate=args.embed_in_truncate,
-                   load_kwargs=args.load_kwargs #改*************************
-               )
-               sys.modules["fastchat.serve.model_worker"].args = args
-               sys.modules["fastchat.serve.model_worker"].gptq_config = gptq_config
-
-               sys.modules["fastchat.serve.model_worker"].worker = worker
-
-       MakeFastAPIOffline(app)
-       app.title = f"FastChat LLM Server ({args.model_names[0]})"
-       app._worker = worker
-       return app
-   ```
-
-至此，我们完成了langchain-chatchat加载p-tuning的全部操作，将ptuing的路径添加到model_config的llm_dict，如
-```
-chatglm2-6b: 'p-tuning-peft'
-```
-
-即可以如下方式加载p-tuning：
-
-```shell
-PEFT_SHARE_BASE_WEIGHTS=true python startup.py -a
-
-```
-
-
-## 预处理知识库文件
-
-在载入知识库文件的时候，直接上传文档虽然能实现基础的问答，但是，其效果并不能发挥到最佳水平。因此，我们建议开发者对知识库文件做出以下的预处理。
-以下方式的预处理如果执行了，有概率提升模型的召回率。
-
-### 1. 使用``` TXT / Markdown ``` 等格式化文件，并按照要点排版
-例如，以下段落应该被处理成如下内容后在嵌入知识库，会有更好的效果。
-```
-原文: PDF类型
-查特查特团队荣获AGI Playground Hackathon黑客松“生产力工具的新想象”赛道季军
-2023年10月16日, Founder Park在近日结束的AGI Playground Hackathon黑客松比赛中，查特查特团队展现出色的实力，荣获了“生产力工具的新想象”赛道季军。本次比赛由Founder Park主办，并由智谱、Dify、Zilliz、声网、AWS云服务等企业协办。
-比赛吸引了120多支参赛团队，最终有36支队伍进入决赛，其中34支队伍成功完成了路演。比赛规定，所有参赛选手必须在短短的48小时内完成一个应用产品开发，同时要求使用智谱大模型及Zilliz向量数据库进行开发。
-查特查特团队的现场参赛人员由两名项目成员组成：
-来自A大学的小明负责了Agent旅游助手的开发、场地协调以及团队住宿和行程的安排；在保证团队完赛上做出了主要贡献。作为队长，栋宇坚持自信，创新，沉着的精神，不断提出改进方案并抓紧落实，遇到相关问题积极请教老师，提高了团队开发效率。
-作为核心开发者的B公司小蓝，他则主管Agent智能知识库查询开发、Agent底层框架设计、相关API调整和UI调整。在最后，他代表团队在规定的时间内呈现了产品的特点和优势，并完美的展示了产品demo。为团队最终产品能够得到奖项做出了重要贡献。
-```
-修改后的Markdown文件，具有更高的召回率
-```
-# 查特查特团队荣获AGI Playground Hackathon黑客松“生产力工具的新想象”赛道季军。
-
-## 报道简介
-2023年10月16日, Founder Park在近日结束的AGI Playground Hackathon黑客松比赛中，查特查特团队展现出色的实力，荣获了“生产力工具的新想象”赛道季军。本次比赛由Founder Park主办，并由智谱、Dify、Zilliz、声网、AWS云服务等企业协办。
-
-## 比赛介绍
-
-比赛吸引了120多支参赛团队，最终有36支队伍进入决赛，其中34支队伍成功完成了路演。比赛规定，所有参赛选手必须在短短的48小时内完成一个应用产品开发，同时要求使用智谱大模型及Zilliz向量数据库进行开发。
-
-## 获奖队员简介
-
-+ 小明，A大学
-  + 负责Agent旅游助手的开发、场地协调以及团队住宿和行程的安排
-  + 在保证团队完赛上做出了主要贡献。作为队长，栋宇坚持自信，创新，沉着的精神，不断提出改进方案并抓紧落实，遇到相关问题积极请教老师，提高了团队开发效率。
-
-+ 小蓝，B公司
-  + 主管Agent智能知识库查询开发、Agent底层框架设计、相关API调整和UI调整。
-  + 代表团队在规定的时间内呈现了产品的特点和优势，并完美的展示了产品demo。
-```
-
-### 2. 减少文件中冲突的内容，分门别类存放数据
-
-就像人类寻找相关点一样，如果在多份文件中存在相似的内容，可能会导致模型无法准确的搜索到相关内容。
-因此，需要减少文件中相似的内容，或将其分在不同的知识库中。
-例如，以下两个句子中，如果搜索外籍教师，则具有歧义，非常容易搜索到错误答案。
-
-```
-文件一：
-在大数据专业中，我们已经拥有超过1/3的外籍博士和教师。
-
-文件二：
-
-本专业具有40%的外籍教师比例，
-本专业有博士生10人，研究生12人。
-```
-
-### 3. 减少具有歧义的句子
-知识库中应该减少具有歧义的句子和段落，或者汉语的高级用法，例如
-```
-1. 他说他会杀了那个人。
-2. 你说啥子？
-3. 我喜欢你的头发。
-4. 地板真的滑，我差点没摔倒。
-```
-在相似度模型对比的时候，仅仅能搜索句子的表面意思，因此，使用有歧义的句子和段落可能导致搜索错误。
-
-### 4. 减少单个文件的大小，减少文件中的特殊符号
-+ 上传知识库的单个文件不建议超过5MB，以免出现向量化中断卡死等情况。同时，上传大文件不要使用faiss数据库。
-+ 减少上传文件中的中文符号，特殊符号，无意义空格等。
-
-## 自定义的关键词调整Embedding模型
-
-1.首先准备一个关键字的文本文件，每一行是一个关键字。例如：
-```
-文件key_words.txt：
-iphone13pro
-中石油
-```
-2. 配置kb_config.py
-```
-EMBEDDING_KEYWORD_FILE = "embedding_keywords.txt"
-```  
-3. 运行```embeddings/add_embedding_keywords.py```
-```
-输入的文本（这里只是一个没分隔的一串字符）：iphone13pro
-生成的token id序列：[101, 21128, 102]
-token到token id的映射：
-[CLS]->101
-iphone13pro->21128
-[SEP]->102
-
-输入的文本：中石油
-生成的token id序列：[101, 21129, 102]
-token到token id的映射：
-[CLS]->101
-中石油->21129
-[SEP]->102
-```
-这样，你就获得了一个新的带有关键词调整的Embedding模型
-## 实际使用效果
-在这里，我们放置了一些成功调用的效果图，方便开发者进行查看自己是否成功运行了框架。
-
-### 检查是否成功上传/管理自己的知识库
-
-在WebUI界面上传知识库，则必须保证知识库进行向量化，成功之后，文件会被切分并在向量位置打钩。
-下图展示了成功上传知识库的画面
-
-![成功上传知识库](https://github.com/chatchat-space/Langchain-Chatchat/blob/dev/img/init_knowledge_base.jpg)
-
-请确保所有知识库都已经进行了向量化。
-
-### 检查是否成功开启LLM对话
-
-若打开webui后，在该模式下能成功跟大模型对话即成功调用。
-
-下图为成功调用LLM的效果图:
-
-![LLM对话](https://github.com/chatchat-space/Langchain-Chatchat/blob/dev/img/LLM_success.png)
-
-### 检查是否成功调用知识库/搜索
-若成功调用知识库，则你应该能看到，在大模型回答的下方有一个```知识库匹配结果```的展开框，并且内部显示了相关的匹配结果。
-如果没有搜索到相关内容，则会提示```根据已知信息无法回答问题```,并且下拉框中没有任何内容。
-
-下图为成功调用知识库效果图：
-
-![成功调用知识库](https://github.com/chatchat-space/Langchain-Chatchat/blob/dev/img/knowledge_base_success.jpg)
-
-在这个案例中，第一次用户的提问无法在知识库中寻找到合适的答案，因此，大模型回答了```根据已知信息无法回答问题```。
-
-第二次用户的提问能在知识库中寻找到合适的答案，因此，大模型给出了一个正确的回答。
-
-__注意__: 知识库的搜索情况取决于嵌入模型的准度，分词器的设置，知识库的排版和大模型的数量，提示词设定等多个因素。因此，需要开发者进行深度的优化和调试。
-
-### 检查是否成功调用Agent工具
-
-若成功调用Agent工具，则你应该看到大模型完整的思维过程，这会在```思考过程```下拉框中显示出来。如果成功调用Agent工具，则你应该看到Markdown引用效果的工具使用情况。
-在Agent对话模式中，```思考过程```中显示的是大模型的思考过程，而下拉框之前的内容为大模型的```Final Answer```，缺乏中间的运算过程。
-
-下图展现了一个成功调用Agent工具的效果图:
-
-![成功调用单个Agent工具](https://github.com/chatchat-space/Langchain-Chatchat/blob/dev/img/agent_success.png)
-
-本框架支持模型连续掉用多个Agent工具，下图展示了一个一个提问中大模型连续调用多个Agent工具的效果图:
-
-![连续调用多个Agent工具](https://github.com/chatchat-space/Langchain-Chatchat/blob/dev/img/agent_continue.png)
-
-在这个案例中，```3900```是大模型的最终答案，其余都是思考过程。
-
-
-
-
-
diff --git a/chatchat/chatchat/data/knowledge_base/samples/content/wiki/自定义.md b/chatchat/chatchat/data/knowledge_base/samples/content/wiki/自定义.md
deleted file mode 100644
index 5e339f46..00000000
--- a/chatchat/chatchat/data/knowledge_base/samples/content/wiki/自定义.md
+++ /dev/null
@@ -1,159 +0,0 @@
-## 使用自定义的分词器
-1. 在```text_splitter```文件夹下新建一个文件，文件名为您的分词器名字，比如`my_splitter.py`，然后在`__init__.py`中导入您的分词器，如下所示：
-```python
-from .my_splitter import MySplitter
-```
-
-2. 修改```config/model_config.py```文件，将您的分词器名字添加到```text_splitter_dict```中，如下所示：
-```python
-MySplitter: {
-        "source": "huggingface",  # 选择tiktoken则使用openai的方法
-        "tokenizer_name_or_path": "your tokenizer", #如果选择huggingface则使用huggingface的方法，部分tokenizer需要从Huggingface下载
-    }
-TEXT_SPLITTER = "MySplitter"
-```
-
-完成上述步骤后，就能使用自己的分词器了。
-
-## 使用自定义的 Agent 工具
-
-1. 创建自己的Agent工具
-
-+ 开发者在```server/agent```文件中创建一个自己的文件，并将其添加到```tools_select.py```中。这样就完成了Tools的设定。
-
-+ 当您创建了一个```custom_agent.py```文件，其中包含一个```work```函数，那么您需要在```tools_select.py```中添加如下代码：
-```python
-from custom_agent import work
-Tool.from_function(
-    func=work,
-    name="该函数的名字",
-    description=""
-    )
-```
-+ 请注意，如果你确定在某一个工程中不会使用到某个工具，可以将其从Tools中移除，降低模型分类错误导致使用错误工具的风险。
-
-2. 修改 ```custom_template.py``` 文件
-
-开发者需要根据自己选择的大模型设定适合该模型的Agent Prompt和自自定义返回格式。
-````
-"""
-Answer the following questions as best you can. You have access to the following tools:
-{tools}
-Use the following format:
-Question: the input question you must answer
-Thought: you should always think about what to do
-Action: the action to take, should be one of [{tool_names}]
-Action Input: the input to the action
-Observation: the result of the action
-... (this Thought/Action/Action Input/Observation can be repeated zero or more times)
-Thought: I now know the final answer
-Final Answer: the final answer to the original input question
-Begin!
-history:
-{history}
-Question: {input}
-Thought: {agent_scratchpad}
-"""
-````
-除了使用 `Zero React` 的提示词方案，开发者可以自行对提示词进行修改，或者使用 Langchain 提供的其他的Agent结构。例如，如果您使用的模型为`ChatGLM3-6B`模型，我们提供了一个可以正常运行`ChatGLM3-6B`的Agent提示词，该提示词与 Langchain 的 `struct Agent`相似，其内容如下：
-````
-
-"ChatGLM3":
-"""
-You can answer using the tools, or answer directly using your knowledge without using the tools.Respond to the human as helpfully and accurately as possible.
-You have access to the following tools:
-{tools}
-Use a json blob to specify a tool by providing an action key (tool name) and an action_input key (tool input).
-Valid "action" values: "Final Answer" or  [{tool_names}]
-Provide only ONE action per $JSON_BLOB, as shown:
-
-```
-{{{{
-  "action": $TOOL_NAME,
-  "action_input": $INPUT
-}}}}
-```
-Follow this format:
-
-Question: input question to answer
-Thought: consider previous and subsequent steps
-Action:
-```
-$JSON_BLOB
-```
-Observation: action result
-... (repeat Thought/Action/Observation N times)
-Thought: I know what to respond
-Action:
-```
-{{{{
-  "action": "Final Answer",
-  "action_input": "Final response to human"
-}}}}
-Begin! Reminder to ALWAYS respond with a valid json blob of a single action. Use tools if necessary. Respond directly if appropriate. Format is Action:```$JSON_BLOB```then Observation:.
-
-history: {history}
-
-Question: {input}
-
-Thought: {agent_scratchpad}
-""",
-````
-
-3. 让不支持 Langchain 调用方式的但具备 Agent 能力的模型展现能力
-
-以**ChatGLM3-6B**为代表的模型，虽然具有 Function Call 能力，但其对齐格式与 Langchain 提供默认Agent格式并不符合，因此无法使用 Langchain 自身能力实现 Function Call。在我们的框架中，您可以在 ```server/Agent/custom_agent/``` 文件夹中自行复现更多模型的 Agent 能力实现。
-
-在完成上述步骤之后，您还需要到```server/chat/agent_chat/```中导入您的模块来实现特殊判定。
-
-同时，你应该在调用工具的时候使用自定义的模板，我们以`GLM`系列模型进行演示，如果您在使用`GLM`模型进行工具调用，你应该使用`model_config.py`中的`ChatGLM3`模板。
-
-4. 局限性
-
-- 由于 React Agent 的脆弱性，temperature 参数的设置对于模型的效果有很大的影响。我们建议开发者在使用自定义 Agent 时，对于不同的模型，将其设置成0.1以下，以达到更好的效果。
-- 目前，官方仅对 **ChatGLM3-6B** 一种模型进行了 非 Langchain 对齐格式下的能力激活，我们欢迎开发者自行探索其他模型，并提交对应的 PR，让框架支持更多的 Agent 模型。
-- 在`0.2.x`版本中，我们没有对`Plan`进行优化，因此，连续调用工具的能力较差，我们会在`0.3.x`中优化这一问题。此外，经过测试，本地模型在工具调用上的表现不如在线模型，我们更推荐使用 `gpt4-1106-Preview` 来完成工具调用的任务。
-
-## 使用自定义的微调模型
-
-- 本项目基于 FastChat 加载 LLM 服务，故需以 FastChat 加载 PEFT 路径。
-- 开发者需要保证路径名称里必须有 peft 这个词。
-- 配置文件的名字为 ```adapter_config.json```
-- peft 路径下包含.bin 格式的 PEFT 权重， peft路径在startup.py中 ```create_model_worker_app``` 函数的 ```args.model_names``` 中指定
-```python
-    args.model_names = ["/home/ubuntu/your_peft_folder/peft"]
-
-```
-- 执行代码之，应该设定环境变量
-```
-PEFT_SHARE_BASE_WEIGHTS=true 
-```
-
-注：如果上述方式启动失败，则需要以标准的 FastChat 服务启动方式分步启动，PEFT加载详细步骤参考以下ISSUE
-
-[加载lora微调后模型失效](https://github.com/chatchat-space/Langchain-Chatchat/issues/1130#issuecomment-1685291822)
-
-在```最佳实践```章节中，我们为开发者做了更详细的模型载入文档。
-
-__该功能可能还具有一定的Bug，需要开发者仔细适配。__
-
-
-## 使用自定义的嵌入模型
-
-- 使用自定义的嵌入模型，开发者需要将其合并到原始的嵌入模型中，之后仅需将其路径添加到```config/model_config.py```中并选择自己的模型启动即可。
-- 如果想自己在Embedding模型中支持 自定义的关键字，需要在 ```embeddings/embedding_keywords.txt```中设定好自己的关键字
-- 运行 ```embeddings/add_embedding_keywords.py```
-- 将生成的新Embedding模型地址放入```configs/model_config.py```中并选择，
-```
-"custom-embedding": "your path",
-```
-并设置
-```
-EMBEDDING_MODEL = "custom-embedding"  
-```
-即可调用加入关键字的embedding模型。
-在```最佳实践```章节中，我们为某几个关键词定制了一个Embed模型。
-
-## 日志功能
-
-- 日志功能记录了大模型的心跳和网络端口传输记录，开发者可以通过日志功能查看模型的运行情况。
\ No newline at end of file
diff --git a/chatchat/chatchat/data/knowledge_base/samples/vector_store/bge-large-zh-v1.5/index.faiss b/chatchat/chatchat/data/knowledge_base/samples/vector_store/bge-large-zh-v1.5/index.faiss
deleted file mode 100644
index b27d5816..00000000
Binary files a/chatchat/chatchat/data/knowledge_base/samples/vector_store/bge-large-zh-v1.5/index.faiss and /dev/null differ
diff --git a/chatchat/chatchat/data/knowledge_base/samples/vector_store/bge-large-zh-v1.5/index.pkl b/chatchat/chatchat/data/knowledge_base/samples/vector_store/bge-large-zh-v1.5/index.pkl
deleted file mode 100644
index 456365e3..00000000
Binary files a/chatchat/chatchat/data/knowledge_base/samples/vector_store/bge-large-zh-v1.5/index.pkl and /dev/null differ
diff --git a/chatchat/chatchat/data/knowledge_base/samples/vector_store/bge-large-zh/index.faiss b/chatchat/chatchat/data/knowledge_base/samples/vector_store/bge-large-zh/index.faiss
deleted file mode 100644
index 8f0030a0..00000000
Binary files a/chatchat/chatchat/data/knowledge_base/samples/vector_store/bge-large-zh/index.faiss and /dev/null differ
diff --git a/chatchat/chatchat/data/knowledge_base/samples/vector_store/bge-large-zh/index.pkl b/chatchat/chatchat/data/knowledge_base/samples/vector_store/bge-large-zh/index.pkl
deleted file mode 100644
index 0c47af4b..00000000
Binary files a/chatchat/chatchat/data/knowledge_base/samples/vector_store/bge-large-zh/index.pkl and /dev/null differ
diff --git a/chatchat/chatchat/data/knowledge_base/samples/vector_store/m3e-base/index.faiss b/chatchat/chatchat/data/knowledge_base/samples/vector_store/m3e-base/index.faiss
deleted file mode 100644
index 55c68229..00000000
Binary files a/chatchat/chatchat/data/knowledge_base/samples/vector_store/m3e-base/index.faiss and /dev/null differ
diff --git a/chatchat/chatchat/data/knowledge_base/samples/vector_store/m3e-base/index.pkl b/chatchat/chatchat/data/knowledge_base/samples/vector_store/m3e-base/index.pkl
deleted file mode 100644
index 84670f15..00000000
Binary files a/chatchat/chatchat/data/knowledge_base/samples/vector_store/m3e-base/index.pkl and /dev/null differ
diff --git a/chatchat/chatchat/data/knowledge_base/standard/content/核电厂定期安全审查指南 第1部分：通用要求（征求意见稿）.doc b/chatchat/chatchat/data/knowledge_base/standard/content/核电厂定期安全审查指南 第1部分：通用要求（征求意见稿）.doc
deleted file mode 100644
index f4dc835f..00000000
Binary files a/chatchat/chatchat/data/knowledge_base/standard/content/核电厂定期安全审查指南 第1部分：通用要求（征求意见稿）.doc and /dev/null differ
diff --git a/chatchat/chatchat/data/knowledge_base/standard/content/核电厂定期安全审查指南 第1部分：通用要求（征求意见稿）.docx b/chatchat/chatchat/data/knowledge_base/standard/content/核电厂定期安全审查指南 第1部分：通用要求（征求意见稿）.docx
deleted file mode 100644
index 6fb15e3d..00000000
Binary files a/chatchat/chatchat/data/knowledge_base/standard/content/核电厂定期安全审查指南 第1部分：通用要求（征求意见稿）.docx and /dev/null differ
diff --git a/chatchat/chatchat/data/knowledge_base/standard/content/核电厂定期安全审查指南 第6部分：构筑物、系统和部件的实际状态-（征求意见稿）.docx b/chatchat/chatchat/data/knowledge_base/standard/content/核电厂定期安全审查指南 第6部分：构筑物、系统和部件的实际状态-（征求意见稿）.docx
deleted file mode 100644
index 5b048db1..00000000
Binary files a/chatchat/chatchat/data/knowledge_base/standard/content/核电厂定期安全审查指南 第6部分：构筑物、系统和部件的实际状态-（征求意见稿）.docx and /dev/null differ
diff --git a/chatchat/chatchat/data/knowledge_base/standard/content/核电厂定期安全评价 第12部分：设计（征求意见稿20230317）.doc b/chatchat/chatchat/data/knowledge_base/standard/content/核电厂定期安全评价 第12部分：设计（征求意见稿20230317）.doc
deleted file mode 100644
index b0ae6bd9..00000000
Binary files a/chatchat/chatchat/data/knowledge_base/standard/content/核电厂定期安全评价 第12部分：设计（征求意见稿20230317）.doc and /dev/null differ
diff --git a/chatchat/chatchat/data/knowledge_base/standard/content/核电厂定期安全评价 第12部分：设计（征求意见稿20230317）.docx b/chatchat/chatchat/data/knowledge_base/standard/content/核电厂定期安全评价 第12部分：设计（征求意见稿20230317）.docx
deleted file mode 100644
index b0b61095..00000000
Binary files a/chatchat/chatchat/data/knowledge_base/standard/content/核电厂定期安全评价 第12部分：设计（征求意见稿20230317）.docx and /dev/null differ
diff --git a/chatchat/chatchat/data/knowledge_base/standard/content/核电厂定期安全评价指南 第7部分：经验反馈（征求意见稿） 20230324.doc b/chatchat/chatchat/data/knowledge_base/standard/content/核电厂定期安全评价指南 第7部分：经验反馈（征求意见稿） 20230324.doc
deleted file mode 100644
index 654dc3da..00000000
Binary files a/chatchat/chatchat/data/knowledge_base/standard/content/核电厂定期安全评价指南 第7部分：经验反馈（征求意见稿） 20230324.doc and /dev/null differ
diff --git a/chatchat/chatchat/data/knowledge_base/standard/content/行标编制说明（核电厂定期安全审查 第12部分：设计）.doc b/chatchat/chatchat/data/knowledge_base/standard/content/行标编制说明（核电厂定期安全审查 第12部分：设计）.doc
deleted file mode 100644
index 6dc86fe3..00000000
Binary files a/chatchat/chatchat/data/knowledge_base/standard/content/行标编制说明（核电厂定期安全审查 第12部分：设计）.doc and /dev/null differ
diff --git a/chatchat/chatchat/data/knowledge_base/standard/content/行标编制说明（核电厂定期安全审查 第12部分：设计）.docx b/chatchat/chatchat/data/knowledge_base/standard/content/行标编制说明（核电厂定期安全审查 第12部分：设计）.docx
deleted file mode 100644
index 2bb6c721..00000000
Binary files a/chatchat/chatchat/data/knowledge_base/standard/content/行标编制说明（核电厂定期安全审查 第12部分：设计）.docx and /dev/null differ
diff --git a/chatchat/chatchat/data/knowledge_base/standard/content/行标编制说明（核电厂定期安全审查 第1部分：通用要求）.doc b/chatchat/chatchat/data/knowledge_base/standard/content/行标编制说明（核电厂定期安全审查 第1部分：通用要求）.doc
deleted file mode 100644
index bbc8027b..00000000
Binary files a/chatchat/chatchat/data/knowledge_base/standard/content/行标编制说明（核电厂定期安全审查 第1部分：通用要求）.doc and /dev/null differ
diff --git a/chatchat/chatchat/data/knowledge_base/standard/content/行标编制说明（核电厂定期安全审查 第1部分：通用要求）.docx b/chatchat/chatchat/data/knowledge_base/standard/content/行标编制说明（核电厂定期安全审查 第1部分：通用要求）.docx
deleted file mode 100644
index 0320067a..00000000
Binary files a/chatchat/chatchat/data/knowledge_base/standard/content/行标编制说明（核电厂定期安全审查 第1部分：通用要求）.docx and /dev/null differ
diff --git a/chatchat/chatchat/data/knowledge_base/standard/content/行标编制说明（核电厂定期安全审查指南 第6部分：构筑物、系统和部件的实际状态）(3).doc b/chatchat/chatchat/data/knowledge_base/standard/content/行标编制说明（核电厂定期安全审查指南 第6部分：构筑物、系统和部件的实际状态）(3).doc
deleted file mode 100644
index 82f9eeeb..00000000
Binary files a/chatchat/chatchat/data/knowledge_base/standard/content/行标编制说明（核电厂定期安全审查指南 第6部分：构筑物、系统和部件的实际状态）(3).doc and /dev/null differ
diff --git a/chatchat/chatchat/data/knowledge_base/standard/content/行标编制说明（核电厂定期安全审查指南 第6部分：构筑物、系统和部件的实际状态）(3).docx b/chatchat/chatchat/data/knowledge_base/standard/content/行标编制说明（核电厂定期安全审查指南 第6部分：构筑物、系统和部件的实际状态）(3).docx
deleted file mode 100644
index d90ff3d4..00000000
Binary files a/chatchat/chatchat/data/knowledge_base/standard/content/行标编制说明（核电厂定期安全审查指南 第6部分：构筑物、系统和部件的实际状态）(3).docx and /dev/null differ
diff --git a/chatchat/chatchat/data/knowledge_base/standard/content/行标编制说明（核电厂定期安全评价指南 第7部分：经验反馈）20230324.docx b/chatchat/chatchat/data/knowledge_base/standard/content/行标编制说明（核电厂定期安全评价指南 第7部分：经验反馈）20230324.docx
deleted file mode 100644
index 839f0279..00000000
Binary files a/chatchat/chatchat/data/knowledge_base/standard/content/行标编制说明（核电厂定期安全评价指南 第7部分：经验反馈）20230324.docx and /dev/null differ
diff --git a/chatchat/chatchat/data/knowledge_base/standard/vector_store/bge-large-zh/index.faiss b/chatchat/chatchat/data/knowledge_base/standard/vector_store/bge-large-zh/index.faiss
deleted file mode 100644
index 878c010b..00000000
Binary files a/chatchat/chatchat/data/knowledge_base/standard/vector_store/bge-large-zh/index.faiss and /dev/null differ
diff --git a/chatchat/chatchat/data/knowledge_base/standard/vector_store/bge-large-zh/index.pkl b/chatchat/chatchat/data/knowledge_base/standard/vector_store/bge-large-zh/index.pkl
deleted file mode 100644
index 7cbaf484..00000000
Binary files a/chatchat/chatchat/data/knowledge_base/standard/vector_store/bge-large-zh/index.pkl and /dev/null differ
diff --git a/chatchat/chatchat/data/knowledge_base/standard/vector_store/index.faiss b/chatchat/chatchat/data/knowledge_base/standard/vector_store/index.faiss
deleted file mode 100644
index 7e3f6d42..00000000
Binary files a/chatchat/chatchat/data/knowledge_base/standard/vector_store/index.faiss and /dev/null differ
diff --git a/chatchat/chatchat/data/knowledge_base/standard/vector_store/index.pkl b/chatchat/chatchat/data/knowledge_base/standard/vector_store/index.pkl
deleted file mode 100644
index 3c18642a..00000000
Binary files a/chatchat/chatchat/data/knowledge_base/standard/vector_store/index.pkl and /dev/null differ
diff --git a/chatchat/chatchat/data/knowledge_base/standard/vector_store/m3e-base/index.faiss b/chatchat/chatchat/data/knowledge_base/standard/vector_store/m3e-base/index.faiss
deleted file mode 100644
index c42bfdf0..00000000
Binary files a/chatchat/chatchat/data/knowledge_base/standard/vector_store/m3e-base/index.faiss and /dev/null differ
diff --git a/chatchat/chatchat/data/knowledge_base/standard/vector_store/m3e-base/index.pkl b/chatchat/chatchat/data/knowledge_base/standard/vector_store/m3e-base/index.pkl
deleted file mode 100644
index cd593ce2..00000000
Binary files a/chatchat/chatchat/data/knowledge_base/standard/vector_store/m3e-base/index.pkl and /dev/null differ
diff --git a/chatchat/chatchat/data/knowledge_base/test/content/1.pdf b/chatchat/chatchat/data/knowledge_base/test/content/1.pdf
deleted file mode 100644
index 6766487d..00000000
Binary files a/chatchat/chatchat/data/knowledge_base/test/content/1.pdf and /dev/null differ
diff --git a/chatchat/chatchat/data/knowledge_base/test/content/tests/README.md b/chatchat/chatchat/data/knowledge_base/test/content/tests/README.md
deleted file mode 100644
index 67aa6ef8..00000000
--- a/chatchat/chatchat/data/knowledge_base/test/content/tests/README.md
+++ /dev/null
@@ -1,159 +0,0 @@
-![](img/logo-long-chatchat-trans-v2.png)
-
-
-🌍 [READ THIS IN ENGLISH](README_en.md)
-
-📃 **LangChain-Chatchat** (原 Langchain-ChatGLM)
-
-基于 ChatGLM 等大语言模型与 Langchain 等应用框架实现，开源、可离线部署的检索增强生成(RAG)大模型知识库项目。
-
----
-
-## 目录
-
-* [介绍](README.md#介绍)
-* [解决的痛点](README.md#解决的痛点)
-* [快速上手](README.md#快速上手)
-  * [1. 环境配置](README.md#1-环境配置)
-  * [2. 模型下载](README.md#2-模型下载)
-  * [3. 初始化知识库和配置文件](README.md#3-初始化知识库和配置文件)
-  * [4. 一键启动](README.md#4-一键启动)
-  * [5. 启动界面示例](README.md#5-启动界面示例)
-* [联系我们](README.md#联系我们)
-
-
-## 介绍
-
-🤖️ 一种利用 [langchain](https://github.com/hwchase17/langchain) 思想实现的基于本地知识库的问答应用，目标期望建立一套对中文场景与开源模型支持友好、可离线运行的知识库问答解决方案。
-
-💡 受 [GanymedeNil](https://github.com/GanymedeNil) 的项目 [document.ai](https://github.com/GanymedeNil/document.ai) 和 [AlexZhangji](https://github.com/AlexZhangji) 创建的 [ChatGLM-6B Pull Request](https://github.com/THUDM/ChatGLM-6B/pull/216) 启发，建立了全流程可使用开源模型实现的本地知识库问答应用。本项目的最新版本中通过使用 [FastChat](https://github.com/lm-sys/FastChat) 接入 Vicuna, Alpaca, LLaMA, Koala, RWKV 等模型，依托于 [langchain](https://github.com/langchain-ai/langchain) 框架支持通过基于 [FastAPI](https://github.com/tiangolo/fastapi) 提供的 API 调用服务，或使用基于 [Streamlit](https://github.com/streamlit/streamlit) 的 WebUI 进行操作。
-
-✅ 依托于本项目支持的开源 LLM 与 Embedding 模型，本项目可实现全部使用**开源**模型**离线私有部署**。与此同时，本项目也支持 OpenAI GPT API 的调用，并将在后续持续扩充对各类模型及模型 API 的接入。
-
-⛓️ 本项目实现原理如下图所示，过程包括加载文件 -> 读取文本 -> 文本分割 -> 文本向量化 -> 问句向量化 -> 在文本向量中匹配出与问句向量最相似的 `top k`个 -> 匹配出的文本作为上下文和问题一起添加到 `prompt`中 -> 提交给 `LLM`生成回答。
-
-📺 [原理介绍视频](https://www.bilibili.com/video/BV13M4y1e7cN/?share_source=copy_web&vd_source=e6c5aafe684f30fbe41925d61ca6d514)
-
-![实现原理图](img/langchain+chatglm.png)
-
-从文档处理角度来看，实现流程如下：
-
-![实现原理图2](img/langchain+chatglm2.png)
-
-🚩 本项目未涉及微调、训练过程，但可利用微调或训练对本项目效果进行优化。
-
-🌐 [AutoDL 镜像](https://www.codewithgpu.com/i/chatchat-space/Langchain-Chatchat/Langchain-Chatchat) 中 `v11` 版本所使用代码已更新至本项目 `v0.2.7` 版本。
-
-🐳 [Docker 镜像](registry.cn-beijing.aliyuncs.com/chatchat/chatchat:0.2.6) 已经更新到 ```0.2.7``` 版本。
-
-🌲 一行命令运行 Docker ：
-
-```shell
-docker run -d --gpus all -p 80:8501 registry.cn-beijing.aliyuncs.com/chatchat/chatchat:0.2.7
-```
-
-🧩 本项目有一个非常完整的[Wiki](https://github.com/chatchat-space/Langchain-Chatchat/wiki/) ， README只是一个简单的介绍，__仅仅是入门教程，能够基础运行__。 如果你想要更深入的了解本项目，或者想对本项目做出贡献。请移步 [Wiki](https://github.com/chatchat-space/Langchain-Chatchat/wiki/)  界面
-
-## 解决的痛点
-
-该项目是一个可以实现 __完全本地化__推理的知识库增强方案, 重点解决数据安全保护，私域化部署的企业痛点。
-本开源方案采用```Apache License```，可以免费商用，无需付费。
-
-我们支持市面上主流的本地大预言模型和Embedding模型，支持开源的本地向量数据库。
-支持列表详见[Wiki](https://github.com/chatchat-space/Langchain-Chatchat/wiki/)
-
-
-## 快速上手
-
-### 1. 环境配置
-
-+ 首先，确保你的机器安装了 Python 3.8 - 3.10
-```
-$ python --version
-Python 3.10.12
-```
-接着，创建一个虚拟环境，并在虚拟环境内安装项目的依赖
-```shell
-
-# 拉取仓库
-$ git clone https://github.com/chatchat-space/Langchain-Chatchat.git
-
-# 进入目录
-$ cd Langchain-Chatchat
-
-# 安装全部依赖
-$ pip install -r requirements.txt 
-$ pip install -r requirements_api.txt
-$ pip install -r requirements_webui.txt  
-
-# 默认依赖包括基本运行环境（FAISS向量库）。如果要使用 milvus/pg_vector 等向量库，请将 requirements.txt 中相应依赖取消注释再安装。
-```
-### 2， 模型下载
-
-如需在本地或离线环境下运行本项目，需要首先将项目所需的模型下载至本地，通常开源 LLM 与 Embedding 模型可以从 [HuggingFace](https://huggingface.co/models) 下载。
-
-以本项目中默认使用的 LLM 模型 [THUDM/ChatGLM2-6B](https://huggingface.co/THUDM/chatglm2-6b) 与 Embedding 模型 [moka-ai/m3e-base](https://huggingface.co/moka-ai/m3e-base) 为例：
-
-下载模型需要先[安装 Git LFS](https://docs.github.com/zh/repositories/working-with-files/managing-large-files/installing-git-large-file-storage)，然后运行
-
-```Shell
-$ git lfs install
-$ git clone https://huggingface.co/THUDM/chatglm2-6b
-$ git clone https://huggingface.co/moka-ai/m3e-base
-```
-### 3. 初始化知识库和配置文件
-
-按照下列方式初始化自己的知识库和简单的复制配置文件
-```shell
-$ python copy_config_example.py
-$ python init_database.py --recreate-vs
- ```
-### 4. 一键启动
-
-按照以下命令启动项目
-```shell
-$ python startup.py -a
-```
-### 5. 启动界面示例
-
-如果正常启动，你将能看到以下界面
-
-1. FastAPI Docs 界面
-
-![](img/fastapi_docs_026.png)
-
-2. Web UI 启动界面示例：
-
-- Web UI 对话界面：
-
-![img](img/LLM_success.png)
-
-- Web UI 知识库管理页面：
-
-![](img/init_knowledge_base.jpg)
-
-
-### 注意
-
-以上方式只是为了快速上手，如果需要更多的功能和自定义启动方式 ，请参考[Wiki](https://github.com/chatchat-space/Langchain-Chatchat/wiki/)
-
-
----
-## 项目里程碑
-
-
----
-## 联系我们
-### Telegram
-[![Telegram](https://img.shields.io/badge/Telegram-2CA5E0?style=for-the-badge&logo=telegram&logoColor=white "langchain-chatglm")](https://t.me/+RjliQ3jnJ1YyN2E9)
-
-### 项目交流群
-<img src="img/qr_code_76.jpg" alt="二维码" width="300" />
-
-🎉 Langchain-Chatchat 项目微信交流群，如果你也对本项目感兴趣，欢迎加入群聊参与讨论交流。
-
-### 公众号
-
-<img src="img/official_wechat_mp_account.png" alt="二维码" width="300" />
-
-🎉 Langchain-Chatchat 项目官方公众号，欢迎扫码关注。
diff --git a/chatchat/chatchat/data/knowledge_base/test/vector_store/bge-large-zh-v1.5/index.faiss b/chatchat/chatchat/data/knowledge_base/test/vector_store/bge-large-zh-v1.5/index.faiss
deleted file mode 100644
index 700502e6..00000000
Binary files a/chatchat/chatchat/data/knowledge_base/test/vector_store/bge-large-zh-v1.5/index.faiss and /dev/null differ
diff --git a/chatchat/chatchat/data/knowledge_base/test/vector_store/bge-large-zh-v1.5/index.pkl b/chatchat/chatchat/data/knowledge_base/test/vector_store/bge-large-zh-v1.5/index.pkl
deleted file mode 100644
index 1f7ca5b7..00000000
Binary files a/chatchat/chatchat/data/knowledge_base/test/vector_store/bge-large-zh-v1.5/index.pkl and /dev/null differ
diff --git a/chatchat/chatchat/data/knowledge_base/test/vector_store/bge/index.faiss b/chatchat/chatchat/data/knowledge_base/test/vector_store/bge/index.faiss
deleted file mode 100644
index ba31968e..00000000
Binary files a/chatchat/chatchat/data/knowledge_base/test/vector_store/bge/index.faiss and /dev/null differ
diff --git a/chatchat/chatchat/data/knowledge_base/test/vector_store/bge/index.pkl b/chatchat/chatchat/data/knowledge_base/test/vector_store/bge/index.pkl
deleted file mode 100644
index b731580d..00000000
Binary files a/chatchat/chatchat/data/knowledge_base/test/vector_store/bge/index.pkl and /dev/null differ
diff --git a/chatchat/chatchat/data/knowledge_base/test/vector_store/qwen-api/index.faiss b/chatchat/chatchat/data/knowledge_base/test/vector_store/qwen-api/index.faiss
deleted file mode 100644
index bb938827..00000000
Binary files a/chatchat/chatchat/data/knowledge_base/test/vector_store/qwen-api/index.faiss and /dev/null differ
diff --git a/chatchat/chatchat/data/knowledge_base/test/vector_store/qwen-api/index.pkl b/chatchat/chatchat/data/knowledge_base/test/vector_store/qwen-api/index.pkl
deleted file mode 100644
index 3d9d7560..00000000
Binary files a/chatchat/chatchat/data/knowledge_base/test/vector_store/qwen-api/index.pkl and /dev/null differ