Add document normalization in Chroma. (#3640)

This commit is contained in:
你的代码TT 2024-04-15 21:37:32 +08:00 committed by GitHub
parent 6d128f24f0
commit 2f2221ca47
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -82,9 +82,12 @@ class ChromaKBService(KBService):
def do_add_doc(self, docs: List[Document], **kwargs) -> List[Dict]:
doc_infos = []
data = self._docs_to_embeddings(docs)
ids = [str(uuid.uuid1()) for _ in range(len(data["texts"]))]
for _id, text, embedding, metadata in zip(ids, data["texts"], data["embeddings"], data["metadatas"]):
embed_func = EmbeddingsFunAdapter(self.embed_model)
texts = [doc.page_content for doc in docs]
metadatas = [doc.metadata for doc in docs]
embeddings = embed_func.embed_documents(texts=texts)
ids = [str(uuid.uuid1()) for _ in range(len(texts))]
for _id, text, embedding, metadata in zip(ids, texts, embeddings, metadatas):
self.collection.add(ids=_id, embeddings=embedding, metadatas=metadata, documents=text)
doc_infos.append({"id": _id, "metadata": metadata})
return doc_infos