修改项目架构

2026-02-05 22:33:24 +08:00 · 2023-04-13 23:01:52 +08:00 · 2023-04-13 23:01:52 +08:00 · a6184b01be
commit a6184b01be
parent 5c9e931a8e
8 changed files with 181 additions and 139 deletions
--- a/README.md
+++ b/README.md
@ -16,6 +16,8 @@
 🚩 本项目未涉及微调、训练过程，但可利用微调或训练对本项目效果进行优化。
 [TOC]
 ## 更新信息
 **[2023/04/07]** 
@ -76,7 +78,7 @@ Web UI 可以实现如下功能：
 3. 添加上传文件功能，通过下拉框选择已上传的文件，点击`loading`加载文件，过程中可随时更换加载的文件
 4. 底部添加`use via API`可对接到自己系统
-或执行 [knowledge_based_chatglm.py](knowledge_based_chatglm.py) 脚本体验**命令行交互**
+或执行 [knowledge_based_chatglm.py](cli_demo.py) 脚本体验**命令行交互**
 ```commandline
 python knowledge_based_chatglm.py
 ```
--- a/README_en.md
+++ b/README_en.md
@ -68,7 +68,7 @@ pip install -r requirements.txt
 ```
 Attention: With langchain.document_loaders.UnstructuredFileLoader used to connect with local knowledge file, you may need some other dependencies as mentioned in  [langchain documentation](https://python.langchain.com/en/latest/modules/indexes/document_loaders/examples/unstructured_file.html)
-### 2. Run [knowledge_based_chatglm.py](knowledge_based_chatglm.py) script
+### 2. Run [knowledge_based_chatglm.py](cli_demo.py) script
 ```commandline
 python knowledge_based_chatglm.py
 ```
--- a/chains/local_doc_qa.py
+++ b/chains/local_doc_qa.py
@ -0,0 +1,104 @@
 from langchain.chains import RetrievalQA
 from langchain.prompts import PromptTemplate
 from langchain.embeddings.huggingface import HuggingFaceEmbeddings
 from langchain.vectorstores import FAISS
 from langchain.document_loaders import UnstructuredFileLoader
 from models.chatglm_llm import ChatGLM
 import sentence_transformers
 import os
 from configs.model_config import *
 import datetime
 # return top-k text chunk from vector store
 VECTOR_SEARCH_TOP_K = 10
 # LLM input history length
 LLM_HISTORY_LEN = 3
 # Show reply with source text from input document
 REPLY_WITH_SOURCE = True
 class LocalDocQA:
    llm: object = None
    embeddings: object = None
    def init_cfg(self,
                 embedding_model: str = EMBEDDING_MODEL,
                 embedding_device=EMBEDDING_DEVICE,
                 llm_history_len: int = LLM_HISTORY_LEN,
                 llm_model: str = LLM_MODEL,
                 llm_device=LLM_DEVICE
                 ):
        self.llm = ChatGLM()
        self.llm.load_model(model_name_or_path=llm_model_dict[llm_model],
                            llm_device=llm_device)
        self.llm.history_len = llm_history_len
        self.embeddings = HuggingFaceEmbeddings(model_name=embedding_model_dict[embedding_model], )
        self.embeddings.client = sentence_transformers.SentenceTransformer(self.embeddings.model_name,
                                                                           device=embedding_device)
    def init_knowledge_vector_store(self,
                                    filepath: str):
        if not os.path.exists(filepath):
            print("路径不存在")
            return None
        elif os.path.isfile(filepath):
            file = os.path.split(filepath)[-1]
            try:
                loader = UnstructuredFileLoader(filepath, mode="elements")
                docs = loader.load()
                print(f"{file} 已成功加载")
            except:
                print(f"{file} 未能成功加载")
                return None
        elif os.path.isdir(filepath):
            docs = []
            for file in os.listdir(filepath):
                fullfilepath = os.path.join(filepath, file)
                try:
                    loader = UnstructuredFileLoader(fullfilepath, mode="elements")
                    docs += loader.load()
                    print(f"{file} 已成功加载")
                except:
                    print(f"{file} 未能成功加载")
        vector_store = FAISS.from_documents(docs, self.embeddings)
        vs_path = f"""./vector_store/{os.path.splitext(file)}_FAISS_{datetime.datetime.now().strftime("%Y%m%d_%H%M%S")}"""
        vector_store.save_local(vs_path)
        return vs_path
    def get_knowledge_based_answer(self,
                                   query,
                                   vs_path,
                                   chat_history=[],
                                   top_k=VECTOR_SEARCH_TOP_K):
        prompt_template = """基于以下已知信息，简洁和专业的来回答用户的问题。
    如果无法从中得到答案，请说 "根据已知信息无法回答该问题" 或 "没有提供足够的相关信息"，不允许在答案中添加编造成分，答案请使用中文。
    已知内容:
    {context}
    问题:
    {question}"""
        prompt = PromptTemplate(
            template=prompt_template,
            input_variables=["context", "question"]
        )
        self.llm.history = chat_history
        vector_store = FAISS.load_local(vs_path, self.embeddings)
        knowledge_chain = RetrievalQA.from_llm(
            llm=self.llm,
            retriever=vector_store.as_retriever(search_kwargs={"k": top_k}),
            prompt=prompt
        )
        knowledge_chain.combine_documents_chain.document_prompt = PromptTemplate(
            input_variables=["page_content"], template="{page_content}"
        )
        knowledge_chain.return_source_documents = True
        result = knowledge_chain({"query": query})
        self.llm.history[-1][0] = query
        return result, self.llm.history
--- a/cli_demo.py
+++ b/cli_demo.py
@ -0,0 +1,33 @@
 from configs.model_config import *
 import datetime
 from chains.local_doc_qa import LocalDocQA
 # return top-k text chunk from vector store
 VECTOR_SEARCH_TOP_K = 10
 # LLM input history length
 LLM_HISTORY_LEN = 3
 # Show reply with source text from input document
 REPLY_WITH_SOURCE = True
 if __name__ == "__main__":
    local_doc_qa = LocalDocQA()
    local_doc_qa.init_cfg(llm_model=LLM_MODEL,
                          embedding_model=EMBEDDING_MODEL,
                          embedding_device=EMBEDDING_DEVICE,
                          llm_history_len=LLM_HISTORY_LEN)
    vs_path = None
    while not vs_path:
        filepath = input("Input your local knowledge file path 请输入本地知识文件路径：")
        vs_path = local_doc_qa.init_knowledge_vector_store(filepath)
    history = []
    while True:
        query = input("Input your question 请输入问题：")
        resp, history = local_doc_qa.get_knowledge_based_answer(query=query,
                                                                vs_path=vs_path,
                                                                chat_history=history)
        if REPLY_WITH_SOURCE:
            print(resp)
        else:
            print(resp["result"])
--- a/configs/model_config.py
+++ b/configs/model_config.py
@ -0,0 +1,31 @@
 import torch.cuda
 import torch.backends
 embedding_model_dict = {
    "ernie-tiny": "nghuyong/ernie-3.0-nano-zh",
    "ernie-base": "nghuyong/ernie-3.0-base-zh",
    "text2vec": "GanymedeNil/text2vec-large-chinese",
    "local": "/Users/liuqian/Downloads/ChatGLM-6B/text2vec-large-chinese"
 }
 # Embedding model name
 EMBEDDING_MODEL = "local"#"text2vec"
 # Embedding running device
 EMBEDDING_DEVICE = "cuda" if torch.cuda.is_available() else "mps" if torch.backends.mps.is_available() else "cpu"
 # supported LLM models
 llm_model_dict = {
    "chatglm-6b-int4-qe": "THUDM/chatglm-6b-int4-qe",
    "chatglm-6b-int4": "THUDM/chatglm-6b-int4",
    "chatglm-6b": "THUDM/chatglm-6b",
    "local": "/Users/liuqian/Downloads/ChatGLM-6B/chatglm-6b"
 }
 # LLM model name
 LLM_MODEL = "local"#"chatglm-6b"
 # LLM running device
 LLM_DEVICE = "cuda" if torch.cuda.is_available() else "mps" if torch.backends.mps.is_available() else "cpu"
--- a/knowledge_based_chatglm.py
+++ b/knowledge_based_chatglm.py
@ -1,124 +0,0 @@
 from langchain.chains import RetrievalQA
 from langchain.prompts import PromptTemplate
 from langchain.embeddings.huggingface import HuggingFaceEmbeddings
 from langchain.vectorstores import FAISS
 from langchain.document_loaders import UnstructuredFileLoader
 from chatglm_llm import ChatGLM
 import sentence_transformers
 import torch
 import os
 import readline
 # Global Parameters
 EMBEDDING_MODEL = "text2vec"
 VECTOR_SEARCH_TOP_K = 6
 LLM_MODEL = "chatglm-6b"
 LLM_HISTORY_LEN = 3
 DEVICE = "cuda" if torch.cuda.is_available() else "mps" if torch.backends.mps.is_available() else "cpu"
 # Show reply with source text from input document
 REPLY_WITH_SOURCE = True
 embedding_model_dict = {
    "ernie-tiny": "nghuyong/ernie-3.0-nano-zh",
    "ernie-base": "nghuyong/ernie-3.0-base-zh",
    "text2vec": "GanymedeNil/text2vec-large-chinese",
 }
 llm_model_dict = {
    "chatglm-6b-int4-qe": "THUDM/chatglm-6b-int4-qe",
    "chatglm-6b-int4": "THUDM/chatglm-6b-int4",
    "chatglm-6b": "THUDM/chatglm-6b",
 }
 def init_cfg(LLM_MODEL, EMBEDDING_MODEL, LLM_HISTORY_LEN, V_SEARCH_TOP_K=6):
    global chatglm, embeddings, VECTOR_SEARCH_TOP_K
    VECTOR_SEARCH_TOP_K = V_SEARCH_TOP_K
    chatglm = ChatGLM()
    chatglm.load_model(model_name_or_path=llm_model_dict[LLM_MODEL])
    chatglm.history_len = LLM_HISTORY_LEN
    embeddings = HuggingFaceEmbeddings(model_name=embedding_model_dict[EMBEDDING_MODEL],)
    embeddings.client = sentence_transformers.SentenceTransformer(embeddings.model_name,
                                                                  device=DEVICE)
 def init_knowledge_vector_store(filepath:str):
    if not os.path.exists(filepath):
        print("路径不存在")
        return None
    elif os.path.isfile(filepath):
        file = os.path.split(filepath)[-1]
        try:
            loader = UnstructuredFileLoader(filepath, mode="elements")
            docs = loader.load()
            print(f"{file} 已成功加载")
        except:
            print(f"{file} 未能成功加载")
            return None
    elif os.path.isdir(filepath):
        docs = []
        for file in os.listdir(filepath):
            fullfilepath = os.path.join(filepath, file)
            try:
                loader = UnstructuredFileLoader(fullfilepath, mode="elements")
                docs += loader.load()
                print(f"{file} 已成功加载")
            except:
                print(f"{file} 未能成功加载")
    vector_store = FAISS.from_documents(docs, embeddings)
    return vector_store
 def get_knowledge_based_answer(query, vector_store, chat_history=[]):
    global chatglm, embeddings
    prompt_template = """基于以下已知信息，简洁和专业的来回答用户的问题。
 如果无法从中得到答案，请说 "根据已知信息无法回答该问题" 或 "没有提供足够的相关信息"，不允许在答案中添加编造成分，答案请使用中文。
 已知内容:
 {context}
 问题:
 {question}"""
    prompt = PromptTemplate(
        template=prompt_template,
        input_variables=["context", "question"]
    )
    chatglm.history = chat_history
    knowledge_chain = RetrievalQA.from_llm(
        llm=chatglm,
        retriever=vector_store.as_retriever(search_kwargs={"k": VECTOR_SEARCH_TOP_K}),
        prompt=prompt
    )
    knowledge_chain.combine_documents_chain.document_prompt = PromptTemplate(
            input_variables=["page_content"], template="{page_content}"
        )
    knowledge_chain.return_source_documents = True
    result = knowledge_chain({"query": query})
    chatglm.history[-1][0] = query
    return result, chatglm.history
 if __name__ == "__main__":
    init_cfg(LLM_MODEL, EMBEDDING_MODEL, LLM_HISTORY_LEN)
    vector_store = None
    while not vector_store:
        filepath = input("Input your local knowledge file path 请输入本地知识文件路径：")
        vector_store = init_knowledge_vector_store(filepath)
    history = []
    while True:
        query = input("Input your question 请输入问题：")
        resp, history = get_knowledge_based_answer(query=query,
                                                   vector_store=vector_store,
                                                   chat_history=history)
        if REPLY_WITH_SOURCE:
            print(resp)
        else:
            print(resp["result"])
--- a/models/chatglm_llm.py
+++ b/models/chatglm_llm.py
@ -3,8 +3,9 @@ from typing import Optional, List
 from langchain.llms.utils import enforce_stop_tokens
 from transformers import AutoTokenizer, AutoModel
 import torch
 from configs.model_config import LLM_DEVICE
-DEVICE = "cuda" if torch.cuda.is_available() else "mps" if torch.backends.mps.is_available() else "cpu"
+DEVICE = LLM_DEVICE
 DEVICE_ID = "0" if torch.cuda.is_available() else None
 CUDA_DEVICE = f"{DEVICE}:{DEVICE_ID}" if DEVICE_ID else DEVICE
@ -48,12 +49,14 @@ class ChatGLM(LLM):
        self.history = self.history+[[None, response]]
        return response
-    def load_model(self, model_name_or_path: str = "THUDM/chatglm-6b"):
+    def load_model(self,
                   model_name_or_path: str = "THUDM/chatglm-6b",
                   llm_device=LLM_DEVICE):
        self.tokenizer = AutoTokenizer.from_pretrained(
            model_name_or_path,
            trust_remote_code=True
        )
-        if torch.cuda.is_available():
+        if torch.cuda.is_available() and llm_device.lower().startswith("cuda"):
            self.model = (
                AutoModel.from_pretrained(
                    model_name_or_path,
@ -61,19 +64,12 @@ class ChatGLM(LLM):
                .half()
                .cuda()
            )
        elif torch.backends.mps.is_available():
            self.model = (
                AutoModel.from_pretrained(
                    model_name_or_path,
                    trust_remote_code=True)
                .float()
                .to('mps')
            )
        else:
            self.model = (
                AutoModel.from_pretrained(
                    model_name_or_path,
                    trust_remote_code=True)
                .float()
                .to(llm_device)
            )
        self.model = self.model.eval()
--- a/webui.py
+++ b/webui.py
@ -1,7 +1,7 @@
 import gradio as gr
 import os
 import shutil
-import knowledge_based_chatglm as kb
+import cli_demo as kb
 def get_file_list():
@ -108,7 +108,7 @@ with gr.Blocks(css="""
                                             value=file_list[0] if len(file_list) > 0 else None)
                with gr.Tab("upload"):
                    file = gr.File(label="content file",
-                                   file_types=['.txt', '.md', '.docx']
+                                   file_types=['.txt', '.md', '.docx', '.pdf']
                                   ).style(height=100)
                    # 将上传的文件保存到content文件夹下,并更新下拉框
                    file.upload(upload_file,