diff --git a/server/knowledge_base/kb_service/base.py b/server/knowledge_base/kb_service/base.py index a6a42598..c83753d5 100644 --- a/server/knowledge_base/kb_service/base.py +++ b/server/knowledge_base/kb_service/base.py @@ -222,6 +222,20 @@ class KBService(ABC): pass return docs + def get_relative_source_path(self,filepath: str): + ''' + 将文件路径转化为相对路径,保证查询时一致 + ''' + relative_path = filepath + if os.path.isabs(relative_path): + try: + relative_path = Path(filepath).relative_to(self.doc_path) + except Exception as e: + print(f"cannot convert absolute path ({source}) to relative path. error is : {e}") + + relative_path = str(relative_path.as_posix().strip("/")) + return relative_path + @abstractmethod def do_create_kb(self): """ diff --git a/server/knowledge_base/kb_service/es_kb_service.py b/server/knowledge_base/kb_service/es_kb_service.py index afdfd70c..17e0d767 100644 --- a/server/knowledge_base/kb_service/es_kb_service.py +++ b/server/knowledge_base/kb_service/es_kb_service.py @@ -184,7 +184,7 @@ class ESKBService(KBService): query = { "query": { "term": { - "metadata.source.keyword": kb_file.filepath + "metadata.source.keyword": self.get_relative_source_path(kb_file.filepath) } } } diff --git a/server/knowledge_base/kb_service/pg_kb_service.py b/server/knowledge_base/kb_service/pg_kb_service.py index 46efe7d8..fc17c93d 100644 --- a/server/knowledge_base/kb_service/pg_kb_service.py +++ b/server/knowledge_base/kb_service/pg_kb_service.py @@ -28,9 +28,9 @@ class PGKBService(KBService): def get_doc_by_ids(self, ids: List[str]) -> List[Document]: with Session(PGKBService.engine) as session: - stmt = text("SELECT document, cmetadata FROM langchain_pg_embedding WHERE collection_id in :ids") + stmt = text("SELECT document, cmetadata FROM langchain_pg_embedding WHERE custom_id = ANY(:ids)") results = [Document(page_content=row[0], metadata=row[1]) for row in - session.execute(stmt, {'ids': ids}).fetchall()] + session.execute(stmt, {'ids': ids}).fetchall()] return results def del_doc_by_ids(self, ids: List[str]) -> bool: return super().del_doc_by_ids(ids) @@ -71,11 +71,10 @@ class PGKBService(KBService): def do_delete_doc(self, kb_file: KnowledgeFile, **kwargs): with Session(PGKBService.engine) as session: - filepath = kb_file.filepath.replace('\\', '\\\\') session.execute( text( ''' DELETE FROM langchain_pg_embedding WHERE cmetadata::jsonb @> '{"source": "filepath"}'::jsonb;'''.replace( - "filepath", filepath))) + "filepath", self.get_relative_source_path(kb_file.filepath)))) session.commit() def do_clear_vs(self):