mirror of
https://github.com/RYDE-WORK/Langchain-Chatchat.git
synced 2026-02-07 15:38:27 +08:00
Fix(File RAG): use jieba instead of cutword
This commit is contained in:
parent
d71c9b0a27
commit
1987063a76
@ -30,12 +30,13 @@ class EnsembleRetrieverService(BaseRetrieverService):
|
|||||||
}
|
}
|
||||||
)
|
)
|
||||||
# TODO: 换个不用torch的实现方式
|
# TODO: 换个不用torch的实现方式
|
||||||
from cutword.cutword import Cutter
|
# from cutword.cutword import Cutter
|
||||||
cutter = Cutter()
|
import jieba
|
||||||
|
# cutter = Cutter()
|
||||||
docs = list(vectorstore.docstore._dict.values())
|
docs = list(vectorstore.docstore._dict.values())
|
||||||
bm25_retriever = BM25Retriever.from_documents(
|
bm25_retriever = BM25Retriever.from_documents(
|
||||||
docs,
|
docs,
|
||||||
preprocess_func=cutter.cutword
|
preprocess_func=jieba.lcut_for_search,
|
||||||
)
|
)
|
||||||
bm25_retriever.k = top_k
|
bm25_retriever.k = top_k
|
||||||
ensemble_retriever = EnsembleRetriever(
|
ensemble_retriever = EnsembleRetriever(
|
||||||
|
|||||||
@ -29,7 +29,8 @@ unstructured = "~0.11.0"
|
|||||||
python-magic-bin = {version = "*", platform = "win32"}
|
python-magic-bin = {version = "*", platform = "win32"}
|
||||||
SQLAlchemy = "~2.0.25"
|
SQLAlchemy = "~2.0.25"
|
||||||
faiss-cpu = "~1.7.4"
|
faiss-cpu = "~1.7.4"
|
||||||
cutword = "0.1.0"
|
#cutword = "0.1.0"
|
||||||
|
jieba = "0.42.1"
|
||||||
rank_bm25 = "0.2.2"
|
rank_bm25 = "0.2.2"
|
||||||
# accelerate = "~0.24.1"
|
# accelerate = "~0.24.1"
|
||||||
# spacy = "~3.7.2"
|
# spacy = "~3.7.2"
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user