修复milvus相关bug

This commit is contained in:
zqt 2024-01-30 21:28:42 +08:00
parent 845adfbc0b
commit cbfbbe5e6b
3 changed files with 38 additions and 24 deletions

View File

@ -5,6 +5,19 @@ from server.knowledge_base.utils import KnowledgeFile
from typing import List, Dict
@with_session
def list_file_num_docs_id_by_kb_name_and_file_name(session,
kb_name: str,
file_name: str,
) -> List[int]:
'''
列出某知识库某文件对应的所有Document的id
返回形式[str, ...]
'''
doc_ids = session.query(FileDocModel.doc_id).filter_by(kb_name=kb_name, file_name=file_name).all()
return [int(_id[0]) for _id in doc_ids]
@with_session
def list_docs_from_db(session,
kb_name: str,
@ -140,8 +153,10 @@ def delete_file_from_db(session, kb_file: KnowledgeFile):
@with_session
def delete_files_from_db(session, knowledge_base_name: str):
session.query(KnowledgeFileModel).filter(KnowledgeFileModel.kb_name.ilike(knowledge_base_name)).delete(synchronize_session=False)
session.query(FileDocModel).filter(FileDocModel.kb_name.ilike(knowledge_base_name)).delete(synchronize_session=False)
session.query(KnowledgeFileModel).filter(KnowledgeFileModel.kb_name.ilike(knowledge_base_name)).delete(
synchronize_session=False)
session.query(FileDocModel).filter(FileDocModel.kb_name.ilike(knowledge_base_name)).delete(
synchronize_session=False)
kb = session.query(KnowledgeBaseModel).filter(KnowledgeBaseModel.kb_name.ilike(knowledge_base_name)).first()
if kb:
kb.file_count = 0

View File

@ -315,6 +315,7 @@ class KBServiceFactory:
from server.knowledge_base.kb_service.zilliz_kb_service import ZillizKBService
return ZillizKBService(kb_name, embed_model=embed_model)
elif SupportedVSType.DEFAULT == vector_store_type:
from server.knowledge_base.kb_service.milvus_kb_service import MilvusKBService
return MilvusKBService(kb_name,
embed_model=embed_model) # other milvus parameters are set in model_config.kbs_config
elif SupportedVSType.ES == vector_store_type:

View File

@ -5,6 +5,7 @@ from langchain.vectorstores.milvus import Milvus
import os
from configs import kbs_config
from server.db.repository import list_file_num_docs_id_by_kb_name_and_file_name
from server.knowledge_base.kb_service.base import KBService, SupportedVSType, EmbeddingsFunAdapter, \
score_threshold_process
@ -23,7 +24,7 @@ class MilvusKBService(KBService):
result = []
if self.milvus.col:
# ids = [int(id) for id in ids] # for milvus if needed #pr 2725
data_list = self.milvus.col.query(expr=f'pk in {ids}', output_fields=["*"])
data_list = self.milvus.col.query(expr=f'pk in {[int(_id) for _id in ids]}', output_fields=["*"])
for data in data_list:
text = data.pop("text")
result.append(Document(page_content=text, metadata=data))
@ -84,12 +85,9 @@ class MilvusKBService(KBService):
return doc_infos
def do_delete_doc(self, kb_file: KnowledgeFile, **kwargs):
id_list = list_file_num_docs_id_by_kb_name_and_file_name(kb_file.kb_name, kb_file.filename)
if self.milvus.col:
filepath = kb_file.filepath.replace('\\', '\\\\')
filename = os.path.basename(filepath)
delete_list = [item.get("pk") for item in
self.milvus.col.query(expr=f'source == "{filepath}"', output_fields=["pk"])]
self.milvus.col.delete(expr=f'pk in {delete_list}')
self.milvus.col.delete(expr=f'pk in {id_list}')
def do_clear_vs(self):
if self.milvus.col: