修复一些bug (#1965)

* 修改一些bug * 更新掉了一部分之前的bug
2026-02-09 00:25:46 +08:00 · 2023-11-03 18:56:27 +08:00 · 2023-11-03 18:56:27 +08:00 · 57612d5232
commit 57612d5232
parent fa906b33a8
20 changed files with 1881 additions and 1908 deletions
--- a/README.md
+++ b/README.md
@ -148,3 +148,5 @@ $ python startup.py -a
 ### 公众号
 ![](img/official_wechat_mp_account.png)
 🎉 Langchain-Chatchat 项目官方公众号，欢迎扫码关注。
 <u>[Langchain-Chatchat](https://github.com/chatchat-space/Langchain-Chatchat)</u>:基于 ChatGLM 等大语言模型与 Langchain 等应用框架实现，开源、可离线部署的检索增强生成(RAG)大模型知识库项目
--- a/configs/basic_config.py.example
+++ b/configs/basic_config.py.example
@ -6,8 +6,6 @@ import langchain
 log_verbose = False
 langchain.verbose = False
 # 是否保存聊天记录
 SAVE_CHAT_HISTORY = False
 # 通常情况下不需要更改以下内容
--- a/img/qr_code_67.jpg
+++ b/img/qr_code_67.jpg
--- a/requirements.txt
+++ b/requirements.txt
@ -1,6 +1,6 @@
-langchain>=0.0.324
+langchain>=0.0.326
 langchain-experimental>=0.0.30
-fschat[model_worker]==0.2.31
+fschat[model_worker]==0.2.32
 xformers>=0.0.22.post4
 openai>=0.28.1
 sentence_transformers
@ -48,7 +48,7 @@ pandas~=2.0.3
 streamlit>=1.26.0
 streamlit-option-menu>=0.3.6
 streamlit-antd-components>=0.1.11
-streamlit-chatbox==1.1.11
+streamlit-chatbox>=1.1.11
 streamlit-aggrid>=0.3.4.post3
 httpx[brotli,http2,socks]>=0.25.0
 watchdog
--- a/requirements_api.txt
+++ b/requirements_api.txt
@ -1,10 +1,10 @@
-langchain>=0.0.324
+langchain>=0.0.326
 langchain-experimental>=0.0.30
-fschat[model_worker]==0.2.31
+fschat[model_worker]==0.2.32
 xformers>=0.0.22.post4
 openai>=0.28.1
 sentence_transformers>=2.2.2
-transformers>=4.34
+transformers>=4.35
 torch>=2.1
 torchvision
 torchaudio
--- a/requirements_lite.txt
+++ b/requirements_lite.txt
@ -1,12 +1,12 @@
-langchain>=0.0.324
+langchain>=0.0.326
-fschat>=0.2.31
+fschat>=0.2.32
 openai
 # sentence_transformers
-# transformers>=4.33.0
+# transformers>=4.35.0
 # torch>=2.0.1
 # torchvision
 # torchaudio
-fastapi>=0.103.1
+fastapi>=0.104.1
 python-multipart
 nltk~=3.8.1
 uvicorn~=0.23.1
--- a/server/agent/tools/search_all_knowledge_more.py
+++ b/server/agent/tools/search_all_knowledge_more.py
@ -71,15 +71,16 @@ bigdata,大数据的就业情况如何
 这些数据库是你能访问的，冒号之前是他们的名字，冒号之后是他们的功能，你应该参考他们的功能来帮助你思考
 {database_names}
 你的回答格式应该按照下面的内容，请注意```text 等标记都必须输出，这是我用来提取答案的标记。
-
+不要输出中文的逗号，不要输出引号。
 Question: ${{用户的问题}}
 ```text
-${{知识库名称,查询问题,不要带有任何除了,之外的符号}}
+${{知识库名称,查询问题,不要带有任何除了,之外的符号,比如不要输出中文的逗号，不要输出引号}}
 ```output
 数据库查询的结果
@ -165,7 +166,11 @@ class LLMKnowledgeChain(LLMChain):
            lines = cleaned_input_str.split("\n")
            # 使用逗号分割每一行，然后形成一个（数据库，查询）元组的列表
-            queries = [(line.split(",")[0].strip(), line.split(",")[1].strip()) for line in lines]
+            try:
                queries = [(line.split(",")[0].strip(), line.split(",")[1].strip()) for line in lines]
            except:
                queries = [(line.split("，")[0].strip(), line.split("，")[1].strip()) for line in lines]
            print(queries)
            run_manager.on_text("知识库查询询内容:\n\n" + str(queries) + " \n\n", color="blue", verbose=self.verbose)
            output = self._evaluate_expression(queries)
            run_manager.on_text("\nAnswer: ", verbose=self.verbose)
@ -193,7 +198,10 @@ class LLMKnowledgeChain(LLMChain):
            cleaned_input_str = (
                expression.replace("\"", "").replace("“", "").replace("”", "").replace("```", "").strip())
            lines = cleaned_input_str.split("\n")
-            queries = [(line.split(",")[0].strip(), line.split(",")[1].strip()) for line in lines]
+            try:
                queries = [(line.split(",")[0].strip(), line.split(",")[1].strip()) for line in lines]
            except:
                queries = [(line.split("，")[0].strip(), line.split("，")[1].strip()) for line in lines]
            await run_manager.on_text("知识库查询询内容:\n\n" + str(queries) + " \n\n", color="blue",
                                      verbose=self.verbose)
--- a/server/agent/tools_select.py
+++ b/server/agent/tools_select.py
@ -20,21 +20,25 @@ tools = [
        func=weathercheck,
        name="天气查询工具",
        description="无需访问互联网，使用这个工具查询中国各地未来24小时的天气",
        args_schema=WhetherSchema,
    ),
    Tool.from_function(
        func=shell,
        name="shell工具",
        description="使用命令行工具输出",
        args_schema=ShellInput,
    ),
    Tool.from_function(
        func=knowledge_search_more,
        name="知识库查询工具",
        description="优先访问知识库来获取答案",
        args_schema=KnowledgeSearchInput,
    ),
    Tool.from_function(
        func=search_internet,
        name="互联网查询工具",
        description="如果你无法访问互联网，这个工具可以帮助你访问Bing互联网来解答问题",
        args_schema=SearchInternetInput,
    ),
    Tool.from_function(
        func=wolfram,
--- a/server/chat/agent_chat.py
+++ b/server/chat/agent_chat.py
@ -76,7 +76,7 @@ async def agent_chat(query: str = Body(..., description="用户输入", examples
        agent = LLMSingleActionAgent(
            llm_chain=llm_chain,
            output_parser=output_parser,
-            stop=["\nObservation:", "Observation:", "<|im_end|>", "<|observation|>"],
+            stop=["\nObservation:", "Observation:", "<|im_end|>", "<|observation|>"],  # Qwen模型中使用这个
            allowed_tools=tool_names,
        )
        # 把history转成agent的memory
--- a/server/chat/chat.py
+++ b/server/chat/chat.py
@ -11,7 +11,7 @@ from langchain.prompts.chat import ChatPromptTemplate
 from typing import List, Optional
 from server.chat.utils import History
 from server.utils import get_prompt_template
-from server.db.repository.chat_history_repository import add_chat_history_to_db, update_chat_history
+from server.db.repository import add_chat_history_to_db, update_chat_history
 async def chat(query: str = Body(..., description="用户输入", examples=["恼羞成怒"]),
--- a/server/db/repository/init.py
+++ b/server/db/repository/init.py
@ -0,0 +1,3 @@
 from .chat_history_repository import *
 from .knowledge_base_repository import *
 from .knowledge_file_repository import *
--- a/server/knowledge_base/kb_doc_api.py
+++ b/server/knowledge_base/kb_doc_api.py
@ -368,7 +368,7 @@ def recreate_vector_store(
                        "code": 200,
                        "msg": f"({i + 1} / {len(files)}): {file_name}",
                        "total": len(files),
-                        "finished": i,
+                        "finished": i + 1,
                        "doc": file_name,
                    }, ensure_ascii=False)
                    kb.add_doc(kb_file, not_refresh_vs_cache=True)
--- a/server/utils.py
+++ b/server/utils.py
@ -15,7 +15,6 @@ import httpx
 from typing import Literal, Optional, Callable, Generator, Dict, Any, Awaitable, Union
 async def wrap_done(fn: Awaitable, event: asyncio.Event):
    """Wrap an awaitable with a event to signal when it's done or an exception is raised."""
    try:
@ -41,61 +40,21 @@ def get_ChatOpenAI(
 ) -> ChatOpenAI:
    ## 以下模型是Langchain原生支持的模型，这些模型不会走Fschat封装
    config_models = list_config_llm_models()
    if model_name in config_models.get("langchain", {}):
        config = config_models["langchain"][model_name]
        if model_name == "Azure-OpenAI":
            model = AzureChatOpenAI(
                streaming=streaming,
                verbose=verbose,
                callbacks=callbacks,
                deployment_name=config.get("deployment_name"),
                model_version=config.get("model_version"),
                openai_api_type=config.get("openai_api_type"),
                openai_api_base=config.get("api_base_url"),
                openai_api_version=config.get("api_version"),
                openai_api_key=config.get("api_key"),
                openai_proxy=config.get("openai_proxy"),
                temperature=temperature,
                max_tokens=max_tokens,
            )
-        elif model_name == "OpenAI":
+    ## 非Langchain原生支持的模型，走Fschat封装
-            model = ChatOpenAI(
+    config = get_model_worker_config(model_name)
-                streaming=streaming,
+    model = ChatOpenAI(
-                verbose=verbose,
+        streaming=streaming,
-                callbacks=callbacks,
+        verbose=verbose,
-                model_name=config.get("model_name"),
+        callbacks=callbacks,
-                openai_api_base=config.get("api_base_url"),
+        openai_api_key=config.get("api_key", "EMPTY"),
-                openai_api_key=config.get("api_key"),
+        openai_api_base=config.get("api_base_url", fschat_openai_api_address()),
-                openai_proxy=config.get("openai_proxy"),
+        model_name=model_name,
-                temperature=temperature,
+        temperature=temperature,
-                max_tokens=max_tokens,
+        max_tokens=max_tokens,
-            )
+        openai_proxy=config.get("openai_proxy"),
-        elif model_name == "Anthropic":
+        **kwargs
-            model = ChatAnthropic(
+    )
                streaming=streaming,
                verbose=verbose,
                callbacks=callbacks,
                model_name=config.get("model_name"),
                anthropic_api_key=config.get("api_key"),
            )
    ## TODO 支持其他的Langchain原生支持的模型
    else:
        ## 非Langchain原生支持的模型，走Fschat封装
        config = get_model_worker_config(model_name)
        model = ChatOpenAI(
            streaming=streaming,
            verbose=verbose,
            callbacks=callbacks,
            openai_api_key=config.get("api_key", "EMPTY"),
            openai_api_base=config.get("api_base_url", fschat_openai_api_address()),
            model_name=model_name,
            temperature=temperature,
            max_tokens=max_tokens,
            openai_proxy=config.get("openai_proxy"),
            **kwargs
        )
    return model
@ -249,9 +208,9 @@ def torch_gc():
                empty_cache()
            except Exception as e:
                msg = ("如果您使用的是 macOS 建议将 pytorch 版本升级至 2.0.0 或更高版本，"
-                    "以支持及时清理 torch 产生的内存占用。")
+                       "以支持及时清理 torch 产生的内存占用。")
                logger.error(f'{e.__class__.__name__}: {msg}',
-                            exc_info=e if log_verbose else None)
+                             exc_info=e if log_verbose else None)
    except Exception:
        ...
@ -437,7 +396,6 @@ def get_model_worker_config(model_name: str = None) -> dict:
    config.update(ONLINE_LLM_MODEL.get(model_name, {}).copy())
    config.update(FSCHAT_MODEL_WORKERS.get(model_name, {}).copy())
    if model_name in ONLINE_LLM_MODEL:
        config["online_api"] = True
        if provider := config.get("provider"):
@ -474,7 +432,7 @@ def fschat_controller_address() -> str:
 def fschat_model_worker_address(model_name: str = LLM_MODEL) -> str:
-    if model := get_model_worker_config(model_name): # TODO: depends fastchat
+    if model := get_model_worker_config(model_name):  # TODO: depends fastchat
        host = model["host"]
        if host == "0.0.0.0":
            host = "127.0.0.1"
@ -624,7 +582,7 @@ def run_in_thread_pool(
            thread = pool.submit(func, **kwargs)
            tasks.append(thread)
-        for obj in as_completed(tasks): # TODO: Ctrl+c无法停止
+        for obj in as_completed(tasks):  # TODO: Ctrl+c无法停止
            yield obj.result()