mirror of
https://github.com/RYDE-WORK/Langchain-Chatchat.git
synced 2026-01-19 21:37:20 +08:00
commit
b7a249f46a
@ -5,6 +5,19 @@ from server.knowledge_base.utils import KnowledgeFile
|
||||
from typing import List, Dict
|
||||
|
||||
|
||||
@with_session
|
||||
def list_file_num_docs_id_by_kb_name_and_file_name(session,
|
||||
kb_name: str,
|
||||
file_name: str,
|
||||
) -> List[int]:
|
||||
'''
|
||||
列出某知识库某文件对应的所有Document的id。
|
||||
返回形式:[str, ...]
|
||||
'''
|
||||
doc_ids = session.query(FileDocModel.doc_id).filter_by(kb_name=kb_name, file_name=file_name).all()
|
||||
return [int(_id[0]) for _id in doc_ids]
|
||||
|
||||
|
||||
@with_session
|
||||
def list_docs_from_db(session,
|
||||
kb_name: str,
|
||||
@ -19,16 +32,16 @@ def list_docs_from_db(session,
|
||||
if file_name:
|
||||
docs = docs.filter(FileDocModel.file_name.ilike(file_name))
|
||||
for k, v in metadata.items():
|
||||
docs = docs.filter(FileDocModel.meta_data[k].as_string()==str(v))
|
||||
docs = docs.filter(FileDocModel.meta_data[k].as_string() == str(v))
|
||||
|
||||
return [{"id": x.doc_id, "metadata": x.metadata} for x in docs.all()]
|
||||
|
||||
|
||||
@with_session
|
||||
def delete_docs_from_db(session,
|
||||
kb_name: str,
|
||||
file_name: str = None,
|
||||
) -> List[Dict]:
|
||||
kb_name: str,
|
||||
file_name: str = None,
|
||||
) -> List[Dict]:
|
||||
'''
|
||||
删除某知识库某文件对应的所有Document,并返回被删除的Document。
|
||||
返回形式:[{"id": str, "metadata": dict}, ...]
|
||||
@ -51,7 +64,7 @@ def add_docs_to_db(session,
|
||||
将某知识库某文件对应的所有Document信息添加到数据库。
|
||||
doc_infos形式:[{"id": str, "metadata": dict}, ...]
|
||||
'''
|
||||
#! 这里会出现doc_infos为None的情况,需要进一步排查
|
||||
# ! 这里会出现doc_infos为None的情况,需要进一步排查
|
||||
if doc_infos is None:
|
||||
print("输入的server.db.repository.knowledge_file_repository.add_docs_to_db的doc_infos参数为None")
|
||||
return False
|
||||
@ -80,18 +93,18 @@ def list_files_from_db(session, kb_name):
|
||||
|
||||
@with_session
|
||||
def add_file_to_db(session,
|
||||
kb_file: KnowledgeFile,
|
||||
docs_count: int = 0,
|
||||
custom_docs: bool = False,
|
||||
doc_infos: List[Dict] = [], # 形式:[{"id": str, "metadata": dict}, ...]
|
||||
):
|
||||
kb_file: KnowledgeFile,
|
||||
docs_count: int = 0,
|
||||
custom_docs: bool = False,
|
||||
doc_infos: List[Dict] = [], # 形式:[{"id": str, "metadata": dict}, ...]
|
||||
):
|
||||
kb = session.query(KnowledgeBaseModel).filter_by(kb_name=kb_file.kb_name).first()
|
||||
if kb:
|
||||
# 如果已经存在该文件,则更新文件信息与版本号
|
||||
existing_file: KnowledgeFileModel = (session.query(KnowledgeFileModel)
|
||||
.filter(KnowledgeFileModel.kb_name.ilike(kb_file.kb_name),
|
||||
KnowledgeFileModel.file_name.ilike(kb_file.filename))
|
||||
.first())
|
||||
.first())
|
||||
mtime = kb_file.get_mtime()
|
||||
size = kb_file.get_size()
|
||||
|
||||
@ -111,7 +124,7 @@ def add_file_to_db(session,
|
||||
text_splitter_name=kb_file.text_splitter_name or "SpacyTextSplitter",
|
||||
file_mtime=mtime,
|
||||
file_size=size,
|
||||
docs_count = docs_count,
|
||||
docs_count=docs_count,
|
||||
custom_docs=custom_docs,
|
||||
)
|
||||
kb.file_count += 1
|
||||
@ -124,8 +137,8 @@ def add_file_to_db(session,
|
||||
def delete_file_from_db(session, kb_file: KnowledgeFile):
|
||||
existing_file = (session.query(KnowledgeFileModel)
|
||||
.filter(KnowledgeFileModel.file_name.ilike(kb_file.filename),
|
||||
KnowledgeFileModel.kb_name.ilike(kb_file.kb_name))
|
||||
.first())
|
||||
KnowledgeFileModel.kb_name.ilike(kb_file.kb_name))
|
||||
.first())
|
||||
if existing_file:
|
||||
session.delete(existing_file)
|
||||
delete_docs_from_db(kb_name=kb_file.kb_name, file_name=kb_file.filename)
|
||||
@ -140,8 +153,10 @@ def delete_file_from_db(session, kb_file: KnowledgeFile):
|
||||
|
||||
@with_session
|
||||
def delete_files_from_db(session, knowledge_base_name: str):
|
||||
session.query(KnowledgeFileModel).filter(KnowledgeFileModel.kb_name.ilike(knowledge_base_name)).delete(synchronize_session=False)
|
||||
session.query(FileDocModel).filter(FileDocModel.kb_name.ilike(knowledge_base_name)).delete(synchronize_session=False)
|
||||
session.query(KnowledgeFileModel).filter(KnowledgeFileModel.kb_name.ilike(knowledge_base_name)).delete(
|
||||
synchronize_session=False)
|
||||
session.query(FileDocModel).filter(FileDocModel.kb_name.ilike(knowledge_base_name)).delete(
|
||||
synchronize_session=False)
|
||||
kb = session.query(KnowledgeBaseModel).filter(KnowledgeBaseModel.kb_name.ilike(knowledge_base_name)).first()
|
||||
if kb:
|
||||
kb.file_count = 0
|
||||
@ -154,8 +169,8 @@ def delete_files_from_db(session, knowledge_base_name: str):
|
||||
def file_exists_in_db(session, kb_file: KnowledgeFile):
|
||||
existing_file = (session.query(KnowledgeFileModel)
|
||||
.filter(KnowledgeFileModel.file_name.ilike(kb_file.filename),
|
||||
KnowledgeFileModel.kb_name.ilike(kb_file.kb_name))
|
||||
.first())
|
||||
KnowledgeFileModel.kb_name.ilike(kb_file.kb_name))
|
||||
.first())
|
||||
return True if existing_file else False
|
||||
|
||||
|
||||
|
||||
@ -315,6 +315,7 @@ class KBServiceFactory:
|
||||
from server.knowledge_base.kb_service.zilliz_kb_service import ZillizKBService
|
||||
return ZillizKBService(kb_name, embed_model=embed_model)
|
||||
elif SupportedVSType.DEFAULT == vector_store_type:
|
||||
from server.knowledge_base.kb_service.milvus_kb_service import MilvusKBService
|
||||
return MilvusKBService(kb_name,
|
||||
embed_model=embed_model) # other milvus parameters are set in model_config.kbs_config
|
||||
elif SupportedVSType.ES == vector_store_type:
|
||||
|
||||
@ -5,6 +5,7 @@ from langchain.vectorstores.milvus import Milvus
|
||||
import os
|
||||
|
||||
from configs import kbs_config
|
||||
from server.db.repository import list_file_num_docs_id_by_kb_name_and_file_name
|
||||
|
||||
from server.knowledge_base.kb_service.base import KBService, SupportedVSType, EmbeddingsFunAdapter, \
|
||||
score_threshold_process
|
||||
@ -23,7 +24,7 @@ class MilvusKBService(KBService):
|
||||
result = []
|
||||
if self.milvus.col:
|
||||
# ids = [int(id) for id in ids] # for milvus if needed #pr 2725
|
||||
data_list = self.milvus.col.query(expr=f'pk in {ids}', output_fields=["*"])
|
||||
data_list = self.milvus.col.query(expr=f'pk in {[int(_id) for _id in ids]}', output_fields=["*"])
|
||||
for data in data_list:
|
||||
text = data.pop("text")
|
||||
result.append(Document(page_content=text, metadata=data))
|
||||
@ -84,12 +85,9 @@ class MilvusKBService(KBService):
|
||||
return doc_infos
|
||||
|
||||
def do_delete_doc(self, kb_file: KnowledgeFile, **kwargs):
|
||||
id_list = list_file_num_docs_id_by_kb_name_and_file_name(kb_file.kb_name, kb_file.filename)
|
||||
if self.milvus.col:
|
||||
filepath = kb_file.filepath.replace('\\', '\\\\')
|
||||
filename = os.path.basename(filepath)
|
||||
delete_list = [item.get("pk") for item in
|
||||
self.milvus.col.query(expr=f'source == "{filepath}"', output_fields=["pk"])]
|
||||
self.milvus.col.delete(expr=f'pk in {delete_list}')
|
||||
self.milvus.col.delete(expr=f'pk in {id_list}')
|
||||
|
||||
def do_clear_vs(self):
|
||||
if self.milvus.col:
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user