From 253d4b9b9af0908c3ffc191bbb96496dbbcfcdeb Mon Sep 17 00:00:00 2001 From: iimm <45782636+liudichen@users.noreply.github.com> Date: Wed, 20 Mar 2024 08:41:41 +0800 Subject: [PATCH] =?UTF-8?q?=E4=BF=AE=E5=A4=8D=E4=BD=BF=E7=94=A8pgvector?= =?UTF-8?q?=E6=97=B6get=5Fdoc=5Fby=5Fids=E6=96=B9=E6=B3=95=E6=8A=A5?= =?UTF-8?q?=E9=94=99=E7=9A=84bug=E5=8F=8A=E7=9F=A5=E8=AF=86=E5=BA=93?= =?UTF-8?q?=E6=96=87=E4=BB=B6=E5=88=A0=E9=99=A4=E5=90=8E=E5=90=91=E9=87=8F?= =?UTF-8?q?=E4=BB=8D=E7=84=B6=E5=AD=98=E5=9C=A8=E7=9A=84bug=20(#3407)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * 修复使用pgvector无法查询到document * 修复使用pg/es向量库无法知识库删除文档后向量库并未删除对应记录的bug --- server/knowledge_base/kb_service/base.py | 14 ++++++++++++++ server/knowledge_base/kb_service/es_kb_service.py | 2 +- server/knowledge_base/kb_service/pg_kb_service.py | 7 +++---- 3 files changed, 18 insertions(+), 5 deletions(-) diff --git a/server/knowledge_base/kb_service/base.py b/server/knowledge_base/kb_service/base.py index a6a42598..c83753d5 100644 --- a/server/knowledge_base/kb_service/base.py +++ b/server/knowledge_base/kb_service/base.py @@ -222,6 +222,20 @@ class KBService(ABC): pass return docs + def get_relative_source_path(self,filepath: str): + ''' + 将文件路径转化为相对路径,保证查询时一致 + ''' + relative_path = filepath + if os.path.isabs(relative_path): + try: + relative_path = Path(filepath).relative_to(self.doc_path) + except Exception as e: + print(f"cannot convert absolute path ({source}) to relative path. error is : {e}") + + relative_path = str(relative_path.as_posix().strip("/")) + return relative_path + @abstractmethod def do_create_kb(self): """ diff --git a/server/knowledge_base/kb_service/es_kb_service.py b/server/knowledge_base/kb_service/es_kb_service.py index afdfd70c..17e0d767 100644 --- a/server/knowledge_base/kb_service/es_kb_service.py +++ b/server/knowledge_base/kb_service/es_kb_service.py @@ -184,7 +184,7 @@ class ESKBService(KBService): query = { "query": { "term": { - "metadata.source.keyword": kb_file.filepath + "metadata.source.keyword": self.get_relative_source_path(kb_file.filepath) } } } diff --git a/server/knowledge_base/kb_service/pg_kb_service.py b/server/knowledge_base/kb_service/pg_kb_service.py index 46efe7d8..fc17c93d 100644 --- a/server/knowledge_base/kb_service/pg_kb_service.py +++ b/server/knowledge_base/kb_service/pg_kb_service.py @@ -28,9 +28,9 @@ class PGKBService(KBService): def get_doc_by_ids(self, ids: List[str]) -> List[Document]: with Session(PGKBService.engine) as session: - stmt = text("SELECT document, cmetadata FROM langchain_pg_embedding WHERE collection_id in :ids") + stmt = text("SELECT document, cmetadata FROM langchain_pg_embedding WHERE custom_id = ANY(:ids)") results = [Document(page_content=row[0], metadata=row[1]) for row in - session.execute(stmt, {'ids': ids}).fetchall()] + session.execute(stmt, {'ids': ids}).fetchall()] return results def del_doc_by_ids(self, ids: List[str]) -> bool: return super().del_doc_by_ids(ids) @@ -71,11 +71,10 @@ class PGKBService(KBService): def do_delete_doc(self, kb_file: KnowledgeFile, **kwargs): with Session(PGKBService.engine) as session: - filepath = kb_file.filepath.replace('\\', '\\\\') session.execute( text( ''' DELETE FROM langchain_pg_embedding WHERE cmetadata::jsonb @> '{"source": "filepath"}'::jsonb;'''.replace( - "filepath", filepath))) + "filepath", self.get_relative_source_path(kb_file.filepath)))) session.commit() def do_clear_vs(self):