mirror of
https://github.com/RYDE-WORK/Langchain-Chatchat.git
synced 2026-02-06 23:15:53 +08:00
commit
b7a249f46a
@ -5,6 +5,19 @@ from server.knowledge_base.utils import KnowledgeFile
|
|||||||
from typing import List, Dict
|
from typing import List, Dict
|
||||||
|
|
||||||
|
|
||||||
|
@with_session
|
||||||
|
def list_file_num_docs_id_by_kb_name_and_file_name(session,
|
||||||
|
kb_name: str,
|
||||||
|
file_name: str,
|
||||||
|
) -> List[int]:
|
||||||
|
'''
|
||||||
|
列出某知识库某文件对应的所有Document的id。
|
||||||
|
返回形式:[str, ...]
|
||||||
|
'''
|
||||||
|
doc_ids = session.query(FileDocModel.doc_id).filter_by(kb_name=kb_name, file_name=file_name).all()
|
||||||
|
return [int(_id[0]) for _id in doc_ids]
|
||||||
|
|
||||||
|
|
||||||
@with_session
|
@with_session
|
||||||
def list_docs_from_db(session,
|
def list_docs_from_db(session,
|
||||||
kb_name: str,
|
kb_name: str,
|
||||||
@ -19,16 +32,16 @@ def list_docs_from_db(session,
|
|||||||
if file_name:
|
if file_name:
|
||||||
docs = docs.filter(FileDocModel.file_name.ilike(file_name))
|
docs = docs.filter(FileDocModel.file_name.ilike(file_name))
|
||||||
for k, v in metadata.items():
|
for k, v in metadata.items():
|
||||||
docs = docs.filter(FileDocModel.meta_data[k].as_string()==str(v))
|
docs = docs.filter(FileDocModel.meta_data[k].as_string() == str(v))
|
||||||
|
|
||||||
return [{"id": x.doc_id, "metadata": x.metadata} for x in docs.all()]
|
return [{"id": x.doc_id, "metadata": x.metadata} for x in docs.all()]
|
||||||
|
|
||||||
|
|
||||||
@with_session
|
@with_session
|
||||||
def delete_docs_from_db(session,
|
def delete_docs_from_db(session,
|
||||||
kb_name: str,
|
kb_name: str,
|
||||||
file_name: str = None,
|
file_name: str = None,
|
||||||
) -> List[Dict]:
|
) -> List[Dict]:
|
||||||
'''
|
'''
|
||||||
删除某知识库某文件对应的所有Document,并返回被删除的Document。
|
删除某知识库某文件对应的所有Document,并返回被删除的Document。
|
||||||
返回形式:[{"id": str, "metadata": dict}, ...]
|
返回形式:[{"id": str, "metadata": dict}, ...]
|
||||||
@ -51,7 +64,7 @@ def add_docs_to_db(session,
|
|||||||
将某知识库某文件对应的所有Document信息添加到数据库。
|
将某知识库某文件对应的所有Document信息添加到数据库。
|
||||||
doc_infos形式:[{"id": str, "metadata": dict}, ...]
|
doc_infos形式:[{"id": str, "metadata": dict}, ...]
|
||||||
'''
|
'''
|
||||||
#! 这里会出现doc_infos为None的情况,需要进一步排查
|
# ! 这里会出现doc_infos为None的情况,需要进一步排查
|
||||||
if doc_infos is None:
|
if doc_infos is None:
|
||||||
print("输入的server.db.repository.knowledge_file_repository.add_docs_to_db的doc_infos参数为None")
|
print("输入的server.db.repository.knowledge_file_repository.add_docs_to_db的doc_infos参数为None")
|
||||||
return False
|
return False
|
||||||
@ -80,18 +93,18 @@ def list_files_from_db(session, kb_name):
|
|||||||
|
|
||||||
@with_session
|
@with_session
|
||||||
def add_file_to_db(session,
|
def add_file_to_db(session,
|
||||||
kb_file: KnowledgeFile,
|
kb_file: KnowledgeFile,
|
||||||
docs_count: int = 0,
|
docs_count: int = 0,
|
||||||
custom_docs: bool = False,
|
custom_docs: bool = False,
|
||||||
doc_infos: List[Dict] = [], # 形式:[{"id": str, "metadata": dict}, ...]
|
doc_infos: List[Dict] = [], # 形式:[{"id": str, "metadata": dict}, ...]
|
||||||
):
|
):
|
||||||
kb = session.query(KnowledgeBaseModel).filter_by(kb_name=kb_file.kb_name).first()
|
kb = session.query(KnowledgeBaseModel).filter_by(kb_name=kb_file.kb_name).first()
|
||||||
if kb:
|
if kb:
|
||||||
# 如果已经存在该文件,则更新文件信息与版本号
|
# 如果已经存在该文件,则更新文件信息与版本号
|
||||||
existing_file: KnowledgeFileModel = (session.query(KnowledgeFileModel)
|
existing_file: KnowledgeFileModel = (session.query(KnowledgeFileModel)
|
||||||
.filter(KnowledgeFileModel.kb_name.ilike(kb_file.kb_name),
|
.filter(KnowledgeFileModel.kb_name.ilike(kb_file.kb_name),
|
||||||
KnowledgeFileModel.file_name.ilike(kb_file.filename))
|
KnowledgeFileModel.file_name.ilike(kb_file.filename))
|
||||||
.first())
|
.first())
|
||||||
mtime = kb_file.get_mtime()
|
mtime = kb_file.get_mtime()
|
||||||
size = kb_file.get_size()
|
size = kb_file.get_size()
|
||||||
|
|
||||||
@ -111,7 +124,7 @@ def add_file_to_db(session,
|
|||||||
text_splitter_name=kb_file.text_splitter_name or "SpacyTextSplitter",
|
text_splitter_name=kb_file.text_splitter_name or "SpacyTextSplitter",
|
||||||
file_mtime=mtime,
|
file_mtime=mtime,
|
||||||
file_size=size,
|
file_size=size,
|
||||||
docs_count = docs_count,
|
docs_count=docs_count,
|
||||||
custom_docs=custom_docs,
|
custom_docs=custom_docs,
|
||||||
)
|
)
|
||||||
kb.file_count += 1
|
kb.file_count += 1
|
||||||
@ -124,8 +137,8 @@ def add_file_to_db(session,
|
|||||||
def delete_file_from_db(session, kb_file: KnowledgeFile):
|
def delete_file_from_db(session, kb_file: KnowledgeFile):
|
||||||
existing_file = (session.query(KnowledgeFileModel)
|
existing_file = (session.query(KnowledgeFileModel)
|
||||||
.filter(KnowledgeFileModel.file_name.ilike(kb_file.filename),
|
.filter(KnowledgeFileModel.file_name.ilike(kb_file.filename),
|
||||||
KnowledgeFileModel.kb_name.ilike(kb_file.kb_name))
|
KnowledgeFileModel.kb_name.ilike(kb_file.kb_name))
|
||||||
.first())
|
.first())
|
||||||
if existing_file:
|
if existing_file:
|
||||||
session.delete(existing_file)
|
session.delete(existing_file)
|
||||||
delete_docs_from_db(kb_name=kb_file.kb_name, file_name=kb_file.filename)
|
delete_docs_from_db(kb_name=kb_file.kb_name, file_name=kb_file.filename)
|
||||||
@ -140,8 +153,10 @@ def delete_file_from_db(session, kb_file: KnowledgeFile):
|
|||||||
|
|
||||||
@with_session
|
@with_session
|
||||||
def delete_files_from_db(session, knowledge_base_name: str):
|
def delete_files_from_db(session, knowledge_base_name: str):
|
||||||
session.query(KnowledgeFileModel).filter(KnowledgeFileModel.kb_name.ilike(knowledge_base_name)).delete(synchronize_session=False)
|
session.query(KnowledgeFileModel).filter(KnowledgeFileModel.kb_name.ilike(knowledge_base_name)).delete(
|
||||||
session.query(FileDocModel).filter(FileDocModel.kb_name.ilike(knowledge_base_name)).delete(synchronize_session=False)
|
synchronize_session=False)
|
||||||
|
session.query(FileDocModel).filter(FileDocModel.kb_name.ilike(knowledge_base_name)).delete(
|
||||||
|
synchronize_session=False)
|
||||||
kb = session.query(KnowledgeBaseModel).filter(KnowledgeBaseModel.kb_name.ilike(knowledge_base_name)).first()
|
kb = session.query(KnowledgeBaseModel).filter(KnowledgeBaseModel.kb_name.ilike(knowledge_base_name)).first()
|
||||||
if kb:
|
if kb:
|
||||||
kb.file_count = 0
|
kb.file_count = 0
|
||||||
@ -154,8 +169,8 @@ def delete_files_from_db(session, knowledge_base_name: str):
|
|||||||
def file_exists_in_db(session, kb_file: KnowledgeFile):
|
def file_exists_in_db(session, kb_file: KnowledgeFile):
|
||||||
existing_file = (session.query(KnowledgeFileModel)
|
existing_file = (session.query(KnowledgeFileModel)
|
||||||
.filter(KnowledgeFileModel.file_name.ilike(kb_file.filename),
|
.filter(KnowledgeFileModel.file_name.ilike(kb_file.filename),
|
||||||
KnowledgeFileModel.kb_name.ilike(kb_file.kb_name))
|
KnowledgeFileModel.kb_name.ilike(kb_file.kb_name))
|
||||||
.first())
|
.first())
|
||||||
return True if existing_file else False
|
return True if existing_file else False
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@ -315,6 +315,7 @@ class KBServiceFactory:
|
|||||||
from server.knowledge_base.kb_service.zilliz_kb_service import ZillizKBService
|
from server.knowledge_base.kb_service.zilliz_kb_service import ZillizKBService
|
||||||
return ZillizKBService(kb_name, embed_model=embed_model)
|
return ZillizKBService(kb_name, embed_model=embed_model)
|
||||||
elif SupportedVSType.DEFAULT == vector_store_type:
|
elif SupportedVSType.DEFAULT == vector_store_type:
|
||||||
|
from server.knowledge_base.kb_service.milvus_kb_service import MilvusKBService
|
||||||
return MilvusKBService(kb_name,
|
return MilvusKBService(kb_name,
|
||||||
embed_model=embed_model) # other milvus parameters are set in model_config.kbs_config
|
embed_model=embed_model) # other milvus parameters are set in model_config.kbs_config
|
||||||
elif SupportedVSType.ES == vector_store_type:
|
elif SupportedVSType.ES == vector_store_type:
|
||||||
|
|||||||
@ -5,6 +5,7 @@ from langchain.vectorstores.milvus import Milvus
|
|||||||
import os
|
import os
|
||||||
|
|
||||||
from configs import kbs_config
|
from configs import kbs_config
|
||||||
|
from server.db.repository import list_file_num_docs_id_by_kb_name_and_file_name
|
||||||
|
|
||||||
from server.knowledge_base.kb_service.base import KBService, SupportedVSType, EmbeddingsFunAdapter, \
|
from server.knowledge_base.kb_service.base import KBService, SupportedVSType, EmbeddingsFunAdapter, \
|
||||||
score_threshold_process
|
score_threshold_process
|
||||||
@ -23,7 +24,7 @@ class MilvusKBService(KBService):
|
|||||||
result = []
|
result = []
|
||||||
if self.milvus.col:
|
if self.milvus.col:
|
||||||
# ids = [int(id) for id in ids] # for milvus if needed #pr 2725
|
# ids = [int(id) for id in ids] # for milvus if needed #pr 2725
|
||||||
data_list = self.milvus.col.query(expr=f'pk in {ids}', output_fields=["*"])
|
data_list = self.milvus.col.query(expr=f'pk in {[int(_id) for _id in ids]}', output_fields=["*"])
|
||||||
for data in data_list:
|
for data in data_list:
|
||||||
text = data.pop("text")
|
text = data.pop("text")
|
||||||
result.append(Document(page_content=text, metadata=data))
|
result.append(Document(page_content=text, metadata=data))
|
||||||
@ -84,12 +85,9 @@ class MilvusKBService(KBService):
|
|||||||
return doc_infos
|
return doc_infos
|
||||||
|
|
||||||
def do_delete_doc(self, kb_file: KnowledgeFile, **kwargs):
|
def do_delete_doc(self, kb_file: KnowledgeFile, **kwargs):
|
||||||
|
id_list = list_file_num_docs_id_by_kb_name_and_file_name(kb_file.kb_name, kb_file.filename)
|
||||||
if self.milvus.col:
|
if self.milvus.col:
|
||||||
filepath = kb_file.filepath.replace('\\', '\\\\')
|
self.milvus.col.delete(expr=f'pk in {id_list}')
|
||||||
filename = os.path.basename(filepath)
|
|
||||||
delete_list = [item.get("pk") for item in
|
|
||||||
self.milvus.col.query(expr=f'source == "{filepath}"', output_fields=["pk"])]
|
|
||||||
self.milvus.col.delete(expr=f'pk in {delete_list}')
|
|
||||||
|
|
||||||
def do_clear_vs(self):
|
def do_clear_vs(self):
|
||||||
if self.milvus.col:
|
if self.milvus.col:
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user