mirror of
https://github.com/RYDE-WORK/Langchain-Chatchat.git
synced 2026-02-07 15:38:27 +08:00
Add document normalization in Chroma. (#3640)
This commit is contained in:
parent
6d128f24f0
commit
2f2221ca47
@ -82,9 +82,12 @@ class ChromaKBService(KBService):
|
|||||||
|
|
||||||
def do_add_doc(self, docs: List[Document], **kwargs) -> List[Dict]:
|
def do_add_doc(self, docs: List[Document], **kwargs) -> List[Dict]:
|
||||||
doc_infos = []
|
doc_infos = []
|
||||||
data = self._docs_to_embeddings(docs)
|
embed_func = EmbeddingsFunAdapter(self.embed_model)
|
||||||
ids = [str(uuid.uuid1()) for _ in range(len(data["texts"]))]
|
texts = [doc.page_content for doc in docs]
|
||||||
for _id, text, embedding, metadata in zip(ids, data["texts"], data["embeddings"], data["metadatas"]):
|
metadatas = [doc.metadata for doc in docs]
|
||||||
|
embeddings = embed_func.embed_documents(texts=texts)
|
||||||
|
ids = [str(uuid.uuid1()) for _ in range(len(texts))]
|
||||||
|
for _id, text, embedding, metadata in zip(ids, texts, embeddings, metadatas):
|
||||||
self.collection.add(ids=_id, embeddings=embedding, metadatas=metadata, documents=text)
|
self.collection.add(ids=_id, embeddings=embedding, metadatas=metadata, documents=text)
|
||||||
doc_infos.append({"id": _id, "metadata": metadata})
|
doc_infos.append({"id": _id, "metadata": metadata})
|
||||||
return doc_infos
|
return doc_infos
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user