Merge pull request #2919 from fengyaojieTTT/bugfix/es-document-query

修复es 知识库查询bug (#2848)
This commit is contained in:
zR 2024-02-06 12:25:56 +08:00 committed by GitHub
commit f04a2bfcc7
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -155,6 +155,20 @@ class ESKBService(KBService):
k=top_k)
return docs
def get_doc_by_ids(self, ids: List[str]) -> List[Document]:
results = []
for doc_id in ids:
try:
response = self.es_client_python.get(index=self.index_name, id=doc_id)
source = response["_source"]
# Assuming your document has "text" and "metadata" fields
text = source.get("context", "")
metadata = source.get("metadata", {})
results.append(Document(page_content=text, metadata=metadata))
except Exception as e:
logger.error(f"Error retrieving document from Elasticsearch! {e}")
return results
def del_doc_by_ids(self, ids: List[str]) -> bool:
for doc_id in ids:
try:
@ -200,17 +214,21 @@ class ESKBService(KBService):
# 获取 id 和 source , 格式:[{"id": str, "metadata": dict}, ...]
print("写入数据成功.")
print("*"*100)
if self.es_client_python.indices.exists(index=self.index_name):
file_path = docs[0].metadata.get("source")
query = {
"query": {
"term": {
"metadata.source.keyword": file_path
},
"term": {
"_index": self.index_name
}
}
}
search_results = self.es_client_python.search(body=query)
# 注意设置size默认返回10个。
search_results = self.es_client_python.search(body=query, size=50)
if len(search_results["hits"]["hits"]) == 0:
raise ValueError("召回元素个数为0")
info_docs = [{"id":hit["_id"], "metadata": hit["_source"]["metadata"]} for hit in search_results["hits"]["hits"]]