diff --git a/README.md b/README.md
index e69818c3..2e97dc65 100644
--- a/README.md
+++ b/README.md
@@ -193,7 +193,7 @@ $ python startup.py -a
[](https://t.me/+RjliQ3jnJ1YyN2E9)
### 项目交流群
-
+
🎉 Langchain-Chatchat 项目微信交流群,如果你也对本项目感兴趣,欢迎加入群聊参与讨论交流。
diff --git a/img/qr_code_90.jpg b/img/qr_code_90.jpg
new file mode 100644
index 00000000..174d150f
Binary files /dev/null and b/img/qr_code_90.jpg differ
diff --git a/img/qr_code_91.jpg b/img/qr_code_91.jpg
new file mode 100644
index 00000000..54107922
Binary files /dev/null and b/img/qr_code_91.jpg differ
diff --git a/img/qr_code_92.jpg b/img/qr_code_92.jpg
new file mode 100644
index 00000000..4010c44a
Binary files /dev/null and b/img/qr_code_92.jpg differ
diff --git a/img/qr_code_93.jpg b/img/qr_code_93.jpg
new file mode 100644
index 00000000..d83ac08d
Binary files /dev/null and b/img/qr_code_93.jpg differ
diff --git a/img/qr_code_94.jpg b/img/qr_code_94.jpg
new file mode 100644
index 00000000..00e14b81
Binary files /dev/null and b/img/qr_code_94.jpg differ
diff --git a/img/qr_code_95.jpg b/img/qr_code_95.jpg
new file mode 100644
index 00000000..11742e96
Binary files /dev/null and b/img/qr_code_95.jpg differ
diff --git a/img/qr_code_96.jpg b/img/qr_code_96.jpg
new file mode 100644
index 00000000..cc0777df
Binary files /dev/null and b/img/qr_code_96.jpg differ
diff --git a/img/qrcode_90_2.jpg b/img/qrcode_90_2.jpg
new file mode 100644
index 00000000..1666a074
Binary files /dev/null and b/img/qrcode_90_2.jpg differ
diff --git a/server/knowledge_base/kb_doc_api.py b/server/knowledge_base/kb_doc_api.py
index b5246da0..8b200254 100644
--- a/server/knowledge_base/kb_doc_api.py
+++ b/server/knowledge_base/kb_doc_api.py
@@ -38,6 +38,9 @@ def search_docs(
data = [DocumentWithVSId(**x[0].dict(), score=x[1], id=x[0].metadata.get("id")) for x in docs]
elif file_name or metadata:
data = kb.list_docs(file_name=file_name, metadata=metadata)
+ for d in data:
+ if "vector" in d.metadata:
+ del d.metadata["vector"]
return data
diff --git a/server/knowledge_base/utils.py b/server/knowledge_base/utils.py
index fc7edac3..0d1e450f 100644
--- a/server/knowledge_base/utils.py
+++ b/server/knowledge_base/utils.py
@@ -16,7 +16,7 @@ import langchain_community.document_loaders
from langchain.docstore.document import Document
from langchain.text_splitter import TextSplitter
from pathlib import Path
-from server.utils import run_in_thread_pool
+from server.utils import run_in_thread_pool, run_in_process_pool
import json
from typing import List, Union, Dict, Tuple, Generator
import chardet
@@ -353,6 +353,16 @@ class KnowledgeFile:
return os.path.getsize(self.filepath)
+def files2docs_in_thread_file2docs(*, file: KnowledgeFile, **kwargs) -> Tuple[bool, Tuple[str, str, List[Document]]]:
+ try:
+ return True, (file.kb_name, file.filename, file.file2text(**kwargs))
+ except Exception as e:
+ msg = f"从文件 {file.kb_name}/{file.filename} 加载文档时出错:{e}"
+ logger.error(f'{e.__class__.__name__}: {msg}',
+ exc_info=e if log_verbose else None)
+ return False, (file.kb_name, file.filename, msg)
+
+
def files2docs_in_thread(
files: List[Union[KnowledgeFile, Tuple[str, str], Dict]],
chunk_size: int = CHUNK_SIZE,
@@ -365,14 +375,6 @@ def files2docs_in_thread(
生成器返回值为 status, (kb_name, file_name, docs | error)
'''
- def file2docs(*, file: KnowledgeFile, **kwargs) -> Tuple[bool, Tuple[str, str, List[Document]]]:
- try:
- return True, (file.kb_name, file.filename, file.file2text(**kwargs))
- except Exception as e:
- msg = f"从文件 {file.kb_name}/{file.filename} 加载文档时出错:{e}"
- logger.error(f'{e.__class__.__name__}: {msg}',
- exc_info=e if log_verbose else None)
- return False, (file.kb_name, file.filename, msg)
kwargs_list = []
for i, file in enumerate(files):
@@ -395,7 +397,7 @@ def files2docs_in_thread(
except Exception as e:
yield False, (kb_name, filename, str(e))
- for result in run_in_thread_pool(func=file2docs, params=kwargs_list):
+ for result in run_in_process_pool(func=files2docs_in_thread_file2docs, params=kwargs_list):
yield result
diff --git a/server/utils.py b/server/utils.py
index c3541504..969a7c6b 100644
--- a/server/utils.py
+++ b/server/utils.py
@@ -2,8 +2,9 @@ from fastapi import FastAPI
from pathlib import Path
import asyncio
import os
-from concurrent.futures import ThreadPoolExecutor, as_completed
-from langchain.embeddings.base import Embeddings
+import sys
+import multiprocessing as mp
+from concurrent.futures import ThreadPoolExecutor, ProcessPoolExecutor, as_completed
from langchain_openai.chat_models import ChatOpenAI
from langchain_openai.llms import OpenAI
import httpx
@@ -572,11 +573,36 @@ def run_in_thread_pool(
tasks = []
with ThreadPoolExecutor() as pool:
for kwargs in params:
- thread = pool.submit(func, **kwargs)
- tasks.append(thread)
+ tasks.append(pool.submit(func, **kwargs))
for obj in as_completed(tasks):
- yield obj.result()
+ try:
+ yield obj.result()
+ except Exception as e:
+ logger.error(f"error in sub thread: {e}", exc_info=True)
+
+
+def run_in_process_pool(
+ func: Callable,
+ params: List[Dict] = [],
+) -> Generator:
+ '''
+ 在线程池中批量运行任务,并将运行结果以生成器的形式返回。
+ 请确保任务中的所有操作是线程安全的,任务函数请全部使用关键字参数。
+ '''
+ tasks = []
+ max_workers = None
+ if sys.platform.startswith("win"):
+ max_workers = min(mp.cpu_count(), 60) # max_workers should not exceed 60 on windows
+ with ProcessPoolExecutor(max_workers=max_workers) as pool:
+ for kwargs in params:
+ tasks.append(pool.submit(func, **kwargs))
+
+ for obj in as_completed(tasks):
+ try:
+ yield obj.result()
+ except Exception as e:
+ logger.error(f"error in sub process: {e}", exc_info=True)
def get_httpx_client(