mirror of
https://github.com/RYDE-WORK/Langchain-Chatchat.git
synced 2026-02-07 15:38:27 +08:00
更新了chromadb的打印的符号
This commit is contained in:
parent
ffbfcd41f2
commit
042a70c09a
@ -3,7 +3,7 @@ import os
|
|||||||
# 默认使用的知识库
|
# 默认使用的知识库
|
||||||
DEFAULT_KNOWLEDGE_BASE = "samples"
|
DEFAULT_KNOWLEDGE_BASE = "samples"
|
||||||
|
|
||||||
# 默认向量库/全文检索引擎类型。可选:faiss, milvus(离线) & zilliz(在线), pgvector, 全文检索引擎es, chromadb
|
# 默认向量库/全文检索引擎类型。可选:faiss, milvus(离线) & zilliz(在线), pgvector, chromadb 全文检索引擎es
|
||||||
DEFAULT_VS_TYPE = "faiss"
|
DEFAULT_VS_TYPE = "faiss"
|
||||||
|
|
||||||
# 缓存向量库数量(针对FAISS)
|
# 缓存向量库数量(针对FAISS)
|
||||||
|
|||||||
@ -44,10 +44,11 @@ llama-index==0.9.35
|
|||||||
# dashscope==1.13.6 # qwen
|
# dashscope==1.13.6 # qwen
|
||||||
# volcengine==1.0.119 # fangzhou
|
# volcengine==1.0.119 # fangzhou
|
||||||
# uncomment libs if you want to use corresponding vector store
|
# uncomment libs if you want to use corresponding vector store
|
||||||
# pymilvus==2.3.4
|
# pymilvus==2.3.6
|
||||||
# psycopg2==2.9.9
|
# psycopg2==2.9.9
|
||||||
# pgvector==0.2.4
|
# pgvector>=0.2.4
|
||||||
# chromadb==0.4.13
|
# chromadb==0.4.13
|
||||||
|
|
||||||
#flash-attn==2.4.2 # For Orion-14B-Chat and Qwen-14B-Chat
|
#flash-attn==2.4.2 # For Orion-14B-Chat and Qwen-14B-Chat
|
||||||
#autoawq==0.1.8 # For Int4
|
#autoawq==0.1.8 # For Int4
|
||||||
#rapidocr_paddle[gpu]==1.3.11 # gpu accelleration for ocr of pdf and image files
|
#rapidocr_paddle[gpu]==1.3.11 # gpu accelleration for ocr of pdf and image files
|
||||||
|
|||||||
@ -50,9 +50,11 @@ pyjwt==2.8.0
|
|||||||
# metaphor-python~=0.1.23
|
# metaphor-python~=0.1.23
|
||||||
|
|
||||||
# volcengine>=1.0.119
|
# volcengine>=1.0.119
|
||||||
# pymilvus>=2.3.4
|
# pymilvus==2.3.6
|
||||||
# psycopg2==2.9.9
|
# psycopg2==2.9.9
|
||||||
# pgvector>=0.2.4
|
# pgvector>=0.2.4
|
||||||
|
# chromadb==0.4.13
|
||||||
|
|
||||||
#flash-attn==2.4.2 # For Orion-14B-Chat and Qwen-14B-Chat
|
#flash-attn==2.4.2 # For Orion-14B-Chat and Qwen-14B-Chat
|
||||||
#autoawq==0.1.8 # For Int4
|
#autoawq==0.1.8 # For Int4
|
||||||
#rapidocr_paddle[gpu]==1.3.11 # gpu accelleration for ocr of pdf and image files
|
#rapidocr_paddle[gpu]==1.3.11 # gpu accelleration for ocr of pdf and image files
|
||||||
@ -33,6 +33,7 @@ watchdog~=3.0.0
|
|||||||
# psycopg2==2.9.9
|
# psycopg2==2.9.9
|
||||||
# pgvector>=0.2.4
|
# pgvector>=0.2.4
|
||||||
# chromadb==0.4.13
|
# chromadb==0.4.13
|
||||||
|
|
||||||
# jq==1.6.0
|
# jq==1.6.0
|
||||||
# beautifulsoup4~=4.12.2
|
# beautifulsoup4~=4.12.2
|
||||||
# pysrt~=1.1.2
|
# pysrt~=1.1.2
|
||||||
|
|||||||
@ -23,13 +23,13 @@ def _get_result_to_documents(get_result: GetResult) -> List[Document]:
|
|||||||
|
|
||||||
return document_list
|
return document_list
|
||||||
|
|
||||||
|
|
||||||
def _results_to_docs_and_scores(results: Any) -> List[Tuple[Document, float]]:
|
def _results_to_docs_and_scores(results: Any) -> List[Tuple[Document, float]]:
|
||||||
"""
|
"""
|
||||||
from langchain_community.vectorstores.chroma import Chroma
|
from langchain_community.vectorstores.chroma import Chroma
|
||||||
"""
|
"""
|
||||||
return [
|
return [
|
||||||
# TODO: Chroma can do batch querying,
|
# TODO: Chroma can do batch querying,
|
||||||
# we shouldn't hard code to the 1st result
|
|
||||||
(Document(page_content=result[0], metadata=result[1] or {}), result[2])
|
(Document(page_content=result[0], metadata=result[1] or {}), result[2])
|
||||||
for result in zip(
|
for result in zip(
|
||||||
results["documents"][0],
|
results["documents"][0],
|
||||||
@ -40,14 +40,12 @@ def _results_to_docs_and_scores(results: Any) -> List[Tuple[Document, float]]:
|
|||||||
|
|
||||||
|
|
||||||
class ChromaKBService(KBService):
|
class ChromaKBService(KBService):
|
||||||
|
|
||||||
vs_path: str
|
vs_path: str
|
||||||
kb_path: str
|
kb_path: str
|
||||||
|
|
||||||
client = None
|
client = None
|
||||||
collection = None
|
collection = None
|
||||||
|
|
||||||
|
|
||||||
def vs_type(self) -> str:
|
def vs_type(self) -> str:
|
||||||
return SupportedVSType.CHROMADB
|
return SupportedVSType.CHROMADB
|
||||||
|
|
||||||
@ -75,16 +73,16 @@ class ChromaKBService(KBService):
|
|||||||
if not str(e) == f"Collection {self.kb_name} does not exist.":
|
if not str(e) == f"Collection {self.kb_name} does not exist.":
|
||||||
raise e
|
raise e
|
||||||
|
|
||||||
def do_search(self, query: str, top_k: int, score_threshold: float = SCORE_THRESHOLD) -> List[Tuple[Document, float]]:
|
def do_search(self, query: str, top_k: int, score_threshold: float = SCORE_THRESHOLD) -> List[
|
||||||
|
Tuple[Document, float]]:
|
||||||
embed_func = EmbeddingsFunAdapter(self.embed_model)
|
embed_func = EmbeddingsFunAdapter(self.embed_model)
|
||||||
embeddings = embed_func.embed_query(query)
|
embeddings = embed_func.embed_query(query)
|
||||||
query_result: QueryResult = self.collection.query(query_embeddings=embeddings, n_results=top_k)
|
query_result: QueryResult = self.collection.query(query_embeddings=embeddings, n_results=top_k)
|
||||||
return _results_to_docs_and_scores(query_result)
|
return _results_to_docs_and_scores(query_result)
|
||||||
|
|
||||||
def do_add_doc(self, docs: List[Document], **kwargs) -> List[Dict]:
|
def do_add_doc(self, docs: List[Document], **kwargs) -> List[Dict]:
|
||||||
doc_infos = []
|
doc_infos = []
|
||||||
data = self._docs_to_embeddings(docs)
|
data = self._docs_to_embeddings(docs)
|
||||||
print(data)
|
|
||||||
ids = [str(uuid.uuid1()) for _ in range(len(data["texts"]))]
|
ids = [str(uuid.uuid1()) for _ in range(len(data["texts"]))]
|
||||||
for _id, text, embedding, metadata in zip(ids, data["texts"], data["embeddings"], data["metadatas"]):
|
for _id, text, embedding, metadata in zip(ids, data["texts"], data["embeddings"], data["metadatas"]):
|
||||||
self.collection.add(ids=_id, embeddings=embedding, metadatas=metadata, documents=text)
|
self.collection.add(ids=_id, embeddings=embedding, metadatas=metadata, documents=text)
|
||||||
|
|||||||
@ -2,6 +2,7 @@ from typing import List, Dict, Optional
|
|||||||
|
|
||||||
from langchain.schema import Document
|
from langchain.schema import Document
|
||||||
from langchain.vectorstores.milvus import Milvus
|
from langchain.vectorstores.milvus import Milvus
|
||||||
|
import os
|
||||||
|
|
||||||
from configs import kbs_config
|
from configs import kbs_config
|
||||||
|
|
||||||
@ -85,6 +86,7 @@ class MilvusKBService(KBService):
|
|||||||
def do_delete_doc(self, kb_file: KnowledgeFile, **kwargs):
|
def do_delete_doc(self, kb_file: KnowledgeFile, **kwargs):
|
||||||
if self.milvus.col:
|
if self.milvus.col:
|
||||||
filepath = kb_file.filepath.replace('\\', '\\\\')
|
filepath = kb_file.filepath.replace('\\', '\\\\')
|
||||||
|
filename = os.path.basename(filepath)
|
||||||
delete_list = [item.get("pk") for item in
|
delete_list = [item.get("pk") for item in
|
||||||
self.milvus.col.query(expr=f'source == "{filepath}"', output_fields=["pk"])]
|
self.milvus.col.query(expr=f'source == "{filepath}"', output_fields=["pk"])]
|
||||||
self.milvus.col.delete(expr=f'pk in {delete_list}')
|
self.milvus.col.delete(expr=f'pk in {delete_list}')
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user