修复一些bug (#1965)

* 修改一些bug * 更新掉了一部分之前的bug
2026-01-19 13:23:16 +08:00 · 2023-11-03 18:56:27 +08:00 · 2023-11-03 18:56:27 +08:00 · 57612d5232
commit 57612d5232
parent fa906b33a8
20 changed files with 1881 additions and 1908 deletions
--- a/README.md
+++ b/README.md
@ -148,3 +148,5 @@ $ python startup.py -a
 ### 公众号
 ![](img/official_wechat_mp_account.png)
 🎉 Langchain-Chatchat 项目官方公众号，欢迎扫码关注。
+
+<u>[Langchain-Chatchat](https://github.com/chatchat-space/Langchain-Chatchat)</u>:基于 ChatGLM 等大语言模型与 Langchain 等应用框架实现，开源、可离线部署的检索增强生成(RAG)大模型知识库项目
--- a/configs/basic_config.py.example
+++ b/configs/basic_config.py.example
@ -6,8 +6,6 @@ import langchain
 log_verbose = False
 langchain.verbose = False

-# 是否保存聊天记录
-SAVE_CHAT_HISTORY = False

 # 通常情况下不需要更改以下内容

--- a/img/qr_code_67.jpg
+++ b/img/qr_code_67.jpg
--- a/requirements.txt
+++ b/requirements.txt
@ -1,6 +1,6 @@
-langchain>=0.0.324
+langchain>=0.0.326
 langchain-experimental>=0.0.30
-fschat[model_worker]==0.2.31
+fschat[model_worker]==0.2.32
 xformers>=0.0.22.post4
 openai>=0.28.1
 sentence_transformers
@ -48,7 +48,7 @@ pandas~=2.0.3
 streamlit>=1.26.0
 streamlit-option-menu>=0.3.6
 streamlit-antd-components>=0.1.11
-streamlit-chatbox==1.1.11
+streamlit-chatbox>=1.1.11
 streamlit-aggrid>=0.3.4.post3
 httpx[brotli,http2,socks]>=0.25.0
 watchdog
--- a/requirements_api.txt
+++ b/requirements_api.txt
@ -1,10 +1,10 @@
-langchain>=0.0.324
+langchain>=0.0.326
 langchain-experimental>=0.0.30
-fschat[model_worker]==0.2.31
+fschat[model_worker]==0.2.32
 xformers>=0.0.22.post4
 openai>=0.28.1
 sentence_transformers>=2.2.2
-transformers>=4.34
+transformers>=4.35
 torch>=2.1
 torchvision
 torchaudio
--- a/requirements_lite.txt
+++ b/requirements_lite.txt
@ -1,12 +1,12 @@
-langchain>=0.0.324
-fschat>=0.2.31
+langchain>=0.0.326
+fschat>=0.2.32
 openai
 # sentence_transformers
-# transformers>=4.33.0
+# transformers>=4.35.0
 # torch>=2.0.1
 # torchvision
 # torchaudio
-fastapi>=0.103.1
+fastapi>=0.104.1
 python-multipart
 nltk~=3.8.1
 uvicorn~=0.23.1
--- a/server/agent/tools/search_all_knowledge_more.py
+++ b/server/agent/tools/search_all_knowledge_more.py
@ -71,15 +71,16 @@ bigdata,大数据的就业情况如何

 这些数据库是你能访问的，冒号之前是他们的名字，冒号之后是他们的功能，你应该参考他们的功能来帮助你思考

+
 {database_names}

 你的回答格式应该按照下面的内容，请注意```text 等标记都必须输出，这是我用来提取答案的标记。
-
+不要输出中文的逗号，不要输出引号。

 Question: ${{用户的问题}}

 ```text
-${{知识库名称,查询问题,不要带有任何除了,之外的符号}}
+${{知识库名称,查询问题,不要带有任何除了,之外的符号,比如不要输出中文的逗号，不要输出引号}}

 ```output
 数据库查询的结果
@ -165,7 +166,11 @@ class LLMKnowledgeChain(LLMChain):
            lines = cleaned_input_str.split("\n")
            # 使用逗号分割每一行，然后形成一个（数据库，查询）元组的列表

+            try:
                queries = [(line.split(",")[0].strip(), line.split(",")[1].strip()) for line in lines]
+            except:
+                queries = [(line.split("，")[0].strip(), line.split("，")[1].strip()) for line in lines]
+            print(queries)
            run_manager.on_text("知识库查询询内容:\n\n" + str(queries) + " \n\n", color="blue", verbose=self.verbose)
            output = self._evaluate_expression(queries)
            run_manager.on_text("\nAnswer: ", verbose=self.verbose)
@ -193,7 +198,10 @@ class LLMKnowledgeChain(LLMChain):
            cleaned_input_str = (
                expression.replace("\"", "").replace("“", "").replace("”", "").replace("```", "").strip())
            lines = cleaned_input_str.split("\n")
+            try:
                queries = [(line.split(",")[0].strip(), line.split(",")[1].strip()) for line in lines]
+            except:
+                queries = [(line.split("，")[0].strip(), line.split("，")[1].strip()) for line in lines]
            await run_manager.on_text("知识库查询询内容:\n\n" + str(queries) + " \n\n", color="blue",
                                      verbose=self.verbose)

--- a/server/agent/tools_select.py
+++ b/server/agent/tools_select.py
@ -20,21 +20,25 @@ tools = [
        func=weathercheck,
        name="天气查询工具",
        description="无需访问互联网，使用这个工具查询中国各地未来24小时的天气",
+        args_schema=WhetherSchema,
    ),
    Tool.from_function(
        func=shell,
        name="shell工具",
        description="使用命令行工具输出",
+        args_schema=ShellInput,
    ),
    Tool.from_function(
        func=knowledge_search_more,
        name="知识库查询工具",
        description="优先访问知识库来获取答案",
+        args_schema=KnowledgeSearchInput,
    ),
    Tool.from_function(
        func=search_internet,
        name="互联网查询工具",
        description="如果你无法访问互联网，这个工具可以帮助你访问Bing互联网来解答问题",
+        args_schema=SearchInternetInput,
    ),
    Tool.from_function(
        func=wolfram,
--- a/server/chat/agent_chat.py
+++ b/server/chat/agent_chat.py
@ -76,7 +76,7 @@ async def agent_chat(query: str = Body(..., description="用户输入", examples
        agent = LLMSingleActionAgent(
            llm_chain=llm_chain,
            output_parser=output_parser,
-            stop=["\nObservation:", "Observation:", "<|im_end|>", "<|observation|>"],
+            stop=["\nObservation:", "Observation:", "<|im_end|>", "<|observation|>"],  # Qwen模型中使用这个
            allowed_tools=tool_names,
        )
        # 把history转成agent的memory
--- a/server/chat/chat.py
+++ b/server/chat/chat.py
@ -11,7 +11,7 @@ from langchain.prompts.chat import ChatPromptTemplate
 from typing import List, Optional
 from server.chat.utils import History
 from server.utils import get_prompt_template
-from server.db.repository.chat_history_repository import add_chat_history_to_db, update_chat_history
+from server.db.repository import add_chat_history_to_db, update_chat_history


 async def chat(query: str = Body(..., description="用户输入", examples=["恼羞成怒"]),
--- a/server/db/repository/init.py
+++ b/server/db/repository/init.py
@ -0,0 +1,3 @@
+from .chat_history_repository import *
+from .knowledge_base_repository import *
+from .knowledge_file_repository import *
--- a/server/knowledge_base/kb_doc_api.py
+++ b/server/knowledge_base/kb_doc_api.py
@ -368,7 +368,7 @@ def recreate_vector_store(
                        "code": 200,
                        "msg": f"({i + 1} / {len(files)}): {file_name}",
                        "total": len(files),
-                        "finished": i,
+                        "finished": i + 1,
                        "doc": file_name,
                    }, ensure_ascii=False)
                    kb.add_doc(kb_file, not_refresh_vs_cache=True)
--- a/server/utils.py
+++ b/server/utils.py
@ -15,7 +15,6 @@ import httpx
 from typing import Literal, Optional, Callable, Generator, Dict, Any, Awaitable, Union


-
 async def wrap_done(fn: Awaitable, event: asyncio.Event):
    """Wrap an awaitable with a event to signal when it's done or an exception is raised."""
    try:
@ -41,47 +40,7 @@ def get_ChatOpenAI(
 ) -> ChatOpenAI:
    ## 以下模型是Langchain原生支持的模型，这些模型不会走Fschat封装
    config_models = list_config_llm_models()
-    if model_name in config_models.get("langchain", {}):
-        config = config_models["langchain"][model_name]
-        if model_name == "Azure-OpenAI":
-            model = AzureChatOpenAI(
-                streaming=streaming,
-                verbose=verbose,
-                callbacks=callbacks,
-                deployment_name=config.get("deployment_name"),
-                model_version=config.get("model_version"),
-                openai_api_type=config.get("openai_api_type"),
-                openai_api_base=config.get("api_base_url"),
-                openai_api_version=config.get("api_version"),
-                openai_api_key=config.get("api_key"),
-                openai_proxy=config.get("openai_proxy"),
-                temperature=temperature,
-                max_tokens=max_tokens,
-            )

-        elif model_name == "OpenAI":
-            model = ChatOpenAI(
-                streaming=streaming,
-                verbose=verbose,
-                callbacks=callbacks,
-                model_name=config.get("model_name"),
-                openai_api_base=config.get("api_base_url"),
-                openai_api_key=config.get("api_key"),
-                openai_proxy=config.get("openai_proxy"),
-                temperature=temperature,
-                max_tokens=max_tokens,
-            )
-        elif model_name == "Anthropic":
-            model = ChatAnthropic(
-                streaming=streaming,
-                verbose=verbose,
-                callbacks=callbacks,
-                model_name=config.get("model_name"),
-                anthropic_api_key=config.get("api_key"),
-
-            )
-    ## TODO 支持其他的Langchain原生支持的模型
-    else:
    ## 非Langchain原生支持的模型，走Fschat封装
    config = get_model_worker_config(model_name)
    model = ChatOpenAI(
@ -437,7 +396,6 @@ def get_model_worker_config(model_name: str = None) -> dict:
    config.update(ONLINE_LLM_MODEL.get(model_name, {}).copy())
    config.update(FSCHAT_MODEL_WORKERS.get(model_name, {}).copy())

-
    if model_name in ONLINE_LLM_MODEL:
        config["online_api"] = True
        if provider := config.get("provider"):