diff --git a/libs/chatchat-server/chatchat/server/file_rag/retrievers/ensemble.py b/libs/chatchat-server/chatchat/server/file_rag/retrievers/ensemble.py index cb09b633..5d6b17a6 100644 --- a/libs/chatchat-server/chatchat/server/file_rag/retrievers/ensemble.py +++ b/libs/chatchat-server/chatchat/server/file_rag/retrievers/ensemble.py @@ -30,12 +30,13 @@ class EnsembleRetrieverService(BaseRetrieverService): } ) # TODO: 换个不用torch的实现方式 - from cutword.cutword import Cutter - cutter = Cutter() + # from cutword.cutword import Cutter + import jieba + # cutter = Cutter() docs = list(vectorstore.docstore._dict.values()) bm25_retriever = BM25Retriever.from_documents( docs, - preprocess_func=cutter.cutword + preprocess_func=jieba.lcut_for_search, ) bm25_retriever.k = top_k ensemble_retriever = EnsembleRetriever( diff --git a/libs/chatchat-server/pyproject.toml b/libs/chatchat-server/pyproject.toml index 136b8b1b..85646274 100644 --- a/libs/chatchat-server/pyproject.toml +++ b/libs/chatchat-server/pyproject.toml @@ -29,7 +29,8 @@ unstructured = "~0.11.0" python-magic-bin = {version = "*", platform = "win32"} SQLAlchemy = "~2.0.25" faiss-cpu = "~1.7.4" -cutword = "0.1.0" +#cutword = "0.1.0" +jieba = "0.42.1" rank_bm25 = "0.2.2" # accelerate = "~0.24.1" # spacy = "~3.7.2"