diff --git a/.gitignore b/.gitignore
new file mode 100644
index 00000000..058eb02d
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,166 @@
+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+
+# C extensions
+*.so
+
+# Distribution / packaging
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+share/python-wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+MANIFEST
+
+# PyInstaller
+# Usually these files are written by a python script from a template
+# before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.nox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+*.py,cover
+.hypothesis/
+.pytest_cache/
+cover/
+
+# Translations
+*.mo
+*.pot
+
+# Django stuff:
+*.log
+local_settings.py
+db.sqlite3
+db.sqlite3-journal
+
+# Flask stuff:
+instance/
+.webassets-cache
+
+# Scrapy stuff:
+.scrapy
+
+# Sphinx documentation
+docs/_build/
+
+# PyBuilder
+.pybuilder/
+target/
+
+# Jupyter Notebook
+.ipynb_checkpoints
+
+# IPython
+profile_default/
+ipython_config.py
+
+# pyenv
+# For a library or package, you might want to ignore these files since the code is
+# intended to run in multiple environments; otherwise, check them in:
+# .python-version
+
+# pipenv
+# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
+# However, in case of collaboration, if having platform-specific dependencies or dependencies
+# having no cross-platform support, pipenv may install dependencies that don't work, or not
+# install all needed dependencies.
+#Pipfile.lock
+
+# poetry
+# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
+# This is especially recommended for binary packages to ensure reproducibility, and is more
+# commonly ignored for libraries.
+# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
+#poetry.lock
+
+# pdm
+# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
+#pdm.lock
+# pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
+# in version control.
+# https://pdm.fming.dev/#use-with-ide
+.pdm.toml
+
+# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
+__pypackages__/
+
+# Celery stuff
+celerybeat-schedule
+celerybeat.pid
+
+# SageMath parsed files
+*.sage.py
+
+# Environments
+.env
+.venv
+env/
+venv/
+ENV/
+env.bak/
+venv.bak/
+
+# Spyder project settings
+.spyderproject
+.spyproject
+
+# Rope project settings
+.ropeproject
+
+# mkdocs documentation
+/site
+
+# mypy
+.mypy_cache/
+.dmypy.json
+dmypy.json
+
+# Pyre type checker
+.pyre/
+
+# pytype static type analyzer
+.pytype/
+
+# Cython debug symbols
+cython_debug/
+
+# PyCharm
+# JetBrains specific template is maintained in a separate JetBrains.gitignore that can
+# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
+# and can be added to the global gitignore or merged into this file. For a more nuclear
+# option (not recommended) you can uncomment the following to ignore the entire idea folder.
+.idea/
+
+# Other files
+output/*
+log/*
+.chroma
+vector_store/*
\ No newline at end of file
diff --git a/README.md b/README.md
index b9819684..d2e0edbb 100644
--- a/README.md
+++ b/README.md
@@ -18,16 +18,14 @@
## 更新信息
-**[2023/04/07]**
-1. 解决加载 ChatGLM 模型时发生显存占用为双倍的问题 (感谢 [@suc16](https://github.com/suc16) 和 [@myml](https://github.com/myml)) ;
-2. 新增清理显存机制;
-3. 新增`nghuyong/ernie-3.0-nano-zh`和`nghuyong/ernie-3.0-base-zh`作为 Embedding 模型备选项,相比`GanymedeNil/text2vec-large-chinese`占用显存资源更少 (感谢 [@lastrei](https://github.com/lastrei))。
+**[2023/04/15]**
+1. 重构项目结构,在根目录下保留命令行 Demo [cli_demo.py](cli_demo.py) 和 Web UI Demo [webui.py](webui.py);
+2. 对 Web UI 进行改进,修改为运行 Web UI 后首先按照 [configs/model_config.py](configs/model_config.py) 默认选项加载模型,并增加报错提示信息等;
+3. 对常见问题进行补充说明。
-**[2023/04/09]**
-1. 使用`langchain`中的`RetrievalQA`替代之前选用的`ChatVectorDBChain`,替换后可以有效减少提问 2-3 次后因显存不足而停止运行的问题;
-2. 在`knowledge_based_chatglm.py`中增加`EMBEDDING_MODEL`、`VECTOR_SEARCH_TOP_K`、`LLM_MODEL`、`LLM_HISTORY_LEN`、`REPLY_WITH_SOURCE`参数值设置;
-3. 增加 GPU 显存需求更小的`chatglm-6b-int4`、`chatglm-6b-int4-qe`作为 LLM 模型备选项;
-4. 更正`README.md`中的代码错误(感谢 [@calcitem](https://github.com/calcitem))。
+**[2023/04/12]**
+1. 替换 Web UI 中的样例文件,避免出现 Ubuntu 中出现因文件编码无法读取的问题;
+2. 替换`knowledge_based_chatglm.py`中的 prompt 模版,避免出现因 prompt 模版包含中英双语导致 chatglm 返回内容错乱的问题。
**[2023/04/11]**
1. 加入 Web UI V0.1 版本(感谢 [@liangtongt](https://github.com/liangtongt));
@@ -35,15 +33,22 @@
3. 增加 LLM 和 Embedding 模型运行设备是否可用`cuda`、`mps`、`cpu`的自动判断。
4. 在`knowledge_based_chatglm.py`中增加对`filepath`的判断,在之前支持单个文件导入的基础上,现支持单个文件夹路径作为输入,输入后将会遍历文件夹中各个文件,并在命令行中显示每个文件是否成功加载。
-**[2023/04/12]**
-1. 替换 Web UI 中的样例文件,避免出现 Ubuntu 中出现因文件编码无法读取的问题;
-2. 替换`knowledge_based_chatglm.py`中的 prompt 模版,避免出现因 prompt 模版包含中英双语导致 chatglm 返回内容错乱的问题。
+**[2023/04/09]**
+1. 使用`langchain`中的`RetrievalQA`替代之前选用的`ChatVectorDBChain`,替换后可以有效减少提问 2-3 次后因显存不足而停止运行的问题;
+2. 在`knowledge_based_chatglm.py`中增加`EMBEDDING_MODEL`、`VECTOR_SEARCH_TOP_K`、`LLM_MODEL`、`LLM_HISTORY_LEN`、`REPLY_WITH_SOURCE`参数值设置;
+3. 增加 GPU 显存需求更小的`chatglm-6b-int4`、`chatglm-6b-int4-qe`作为 LLM 模型备选项;
+4. 更正`README.md`中的代码错误(感谢 [@calcitem](https://github.com/calcitem))。
+
+**[2023/04/07]**
+1. 解决加载 ChatGLM 模型时发生显存占用为双倍的问题 (感谢 [@suc16](https://github.com/suc16) 和 [@myml](https://github.com/myml)) ;
+2. 新增清理显存机制;
+3. 新增`nghuyong/ernie-3.0-nano-zh`和`nghuyong/ernie-3.0-base-zh`作为 Embedding 模型备选项,相比`GanymedeNil/text2vec-large-chinese`占用显存资源更少 (感谢 [@lastrei](https://github.com/lastrei))。
## 使用方式
### 硬件需求
- ChatGLM-6B 模型硬件需求
-
+
| **量化等级** | **最低 GPU 显存**(推理) | **最低 GPU 显存**(高效参数微调) |
| -------------- | ------------------------- | --------------------------------- |
| FP16(无量化) | 13 GB | 14 GB |
@@ -53,55 +58,81 @@
- Embedding 模型硬件需求
本项目中默认选用的 Embedding 模型 [GanymedeNil/text2vec-large-chinese](https://huggingface.co/GanymedeNil/text2vec-large-chinese/tree/main) 约占用显存 3GB,也可修改为在 CPU 中运行。
+
### 软件需求
-本项目已在 python 3.8 环境下完成测试。
-### 1. 安装 python 依赖包
-```commandline
-pip install -r requirements.txt
+
+本项目已在 Python 3.8,CUDA 11.7 环境下完成测试。
+
+### 1. 安装环境
+
+- 环境检查
+
+```shell
+# 首先,确信你的机器安装了 Python 3.8 及以上版本
+$ python --version
+Python 3.8.13
+
+# 如果低于这个版本,可使用conda安装环境
+$ conda create -p /your_path/env_name python=3.8
+
+# 激活环境
+$ source activate /your_path/env_name
+
+# 关闭环境
+$ source deactivate /your_path/env_name
+
+# 删除环境
+$ conda env remove -p /your_path/env_name
+```
+
+- 项目依赖
+
+```shell
+# 拉取仓库
+$ git clone https://github.com/imClumsyPanda/langchain-ChatGLM.git
+
+# 安装依赖
+$ pip install -r requirements.txt
```
注:使用 langchain.document_loaders.UnstructuredFileLoader 进行非结构化文件接入时,可能需要依据文档进行其他依赖包的安装,请参考 [langchain 文档](https://python.langchain.com/en/latest/modules/indexes/document_loaders/examples/unstructured_file.html)
-### 2. 执行脚本体验 Web UI 或命令行交互
-执行 [webui.py](webui.py) 脚本体验 **Web 交互**
-```commandline
-python webui.py
+### 2. 设置模型默认参数
+
+在开始执行 Web UI 或命令行交互前,请先检查 [configs/model_config.py](configs/model_config.py) 中的各项模型参数设计是否符合需求。
+
+### 3. 执行脚本体验 Web UI 或命令行交互
+执行 [webui.py](webui.py) 脚本体验 **Web 交互**
+```shell
+$ python webui.py
```
+注:如未将模型下载至本地,请执行前检查`$HOME/.cache/huggingface/`文件夹剩余空间,至少15G
+
执行后效果如下图所示:

-Web UI 中提供的 API 接口如下图所示:
-
Web UI 可以实现如下功能:
-1. 自动读取`knowledge_based_chatglm.py`中`LLM`及`embedding`模型枚举,选择后点击`setting`进行模型加载,可随时切换模型进行测试
-2. 可手动调节保留对话历史长度,可根据显存大小自行调节
-3. 添加上传文件功能,通过下拉框选择已上传的文件,点击`loading`加载文件,过程中可随时更换加载的文件
-4. 底部添加`use via API`可对接到自己系统
-或执行 [knowledge_based_chatglm.py](knowledge_based_chatglm.py) 脚本体验**命令行交互**
-```commandline
-python knowledge_based_chatglm.py
+1. 运行前自动读取`configs/model_config.py`中`LLM`及`Embedding`模型枚举及默认模型设置运行模型,如需重新加载模型,可在界面重新选择后点击`重新加载模型`进行模型加载;
+2. 可手动调节保留对话历史长度,可根据显存大小自行调节
+3. 添加上传文件功能,通过下拉框选择已上传的文件,点击`加载文件`按钮,过程中可随时更换加载的文件
+
+或执行 [knowledge_based_chatglm.py](cli_demo.py) 脚本体验**命令行交互**
+```shell
+$ python knowledge_based_chatglm.py
```
### 常见问题
Q1: 本项目支持哪些文件格式?
-A1: 目前已测试支持 txt、docx、md 格式文件,更多文件格式请参考 [langchain 文档](https://python.langchain.com/en/latest/modules/indexes/document_loaders/examples/unstructured_file.html)。目前已知文档中若含有特殊字符,可能存在文件无法加载的问题。
+A1: 目前已测试支持 txt、docx、md、pdf 格式文件,更多文件格式请参考 [langchain 文档](https://python.langchain.com/en/latest/modules/indexes/document_loaders/examples/unstructured_file.html)。目前已知文档中若含有特殊字符,可能存在文件无法加载的问题。
-Q2: 读取特定格式文件时遇到缺少`detectron2`时如何解决?
-
-A2: 因该包安装过程中遇到问题较多,且仅部分格式文件需要,所以未加入`requirements.txt`。可以通过一下命令安装
-
-```commandline
-pip install "detectron2@git+https://github.com/facebookresearch/detectron2.git@v0.6#egg=detectron2"
-```
-
-Q3: `Resource punkt not found.` 如何解决?
+Q3: 使用过程中 Python 包`nltk`发生了`Resource punkt not found.`报错,该如何解决?
A3: https://github.com/nltk/nltk_data/raw/gh-pages/packages/tokenizers/punkt.zip 中的 `packages/tokenizers` 解压,放到 `nltk_data/tokenizers` 存储路径下。
`nltk_data` 存储路径可以通过 `nltk.data.path` 查询。
-Q4: `Resource averaged_perceptron_tagger not found.` 如何解决?
+Q4: 使用过程中 Python 包`nltk`发生了`Resource averaged_perceptron_tagger not found.`报错,该如何解决?
A4: 将 https://github.com/nltk/nltk_data/blob/gh-pages/packages/taggers/averaged_perceptron_tagger.zip 下载,解压放到 `nltk_data/taggers` 存储路径下。
@@ -111,6 +142,60 @@ Q5: 本项目可否在 colab 中运行?
A5: 可以尝试使用 chatglm-6b-int4 模型在 colab 中运行,需要注意的是,如需在 colab 中运行 Web UI,需将`webui.py`中`demo.queue(concurrency_count=3).launch(
server_name='0.0.0.0', share=False, inbrowser=False)`中参数`share`设置为`True`。
+
+Q6: 在 Anaconda 中使用 pip 安装包无效如何解决?
+
+A6: 此问题是系统环境问题,详细见 [在Anaconda中使用pip安装包无效问题](docs/在Anaconda中使用pip安装包无效问题.md)
+
+Q7: 本项目中所需模型如何下载至本地?
+
+A7: 本项目中使用的模型均为`huggingface.com`中可下载的开源模型,以默认选择的`chatglm-6b`和`text2vec-large-chinese`模型为例,下载模型可执行如下代码:
+
+```shell
+# 安装 git lfs
+$ git lfs install
+
+# 下载 LLM 模型
+$ git clone https://huggingface.co/THUDM/chatglm-6b /your_path/chatglm-6b
+
+# 下载 Embedding 模型
+$ git clone https://huggingface.co/GanymedeNil/text2vec-large-chinese /your_path/text2vec
+
+# 模型需要更新时,可打开模型所在文件夹后拉取最新模型文件/代码
+$ git pull
+```
+
+Q8: `huggingface.com`中模型下载速度较慢怎么办?
+
+A8: 可使用本项目用到的模型权重文件百度网盘地址:
+- ernie-3.0-base-zh.zip 链接: https://pan.baidu.com/s/1CIvKnD3qzE-orFouA8qvNQ?pwd=4wih
+- ernie-3.0-nano-zh.zip 链接: https://pan.baidu.com/s/1Fh8fgzVdavf5P1omAJJ-Zw?pwd=q6s5
+- text2vec-large-chinese.zip 链接: https://pan.baidu.com/s/1sMyPzBIXdEzHygftEoyBuA?pwd=4xs7
+- chatglm-6b-int4-qe.zip 链接: https://pan.baidu.com/s/1DDKMOMHtNZccOOBGWIOYww?pwd=22ji
+- chatglm-6b-int4.zip 链接: https://pan.baidu.com/s/1pvZ6pMzovjhkA6uPcRLuJA?pwd=3gjd
+- chatglm-6b.zip 链接: https://pan.baidu.com/s/1B-MpsVVs1GHhteVBetaquw?pwd=djay
+
+Q9: 下载完模型后,如何修改代码以执行本地模型?
+
+A9: 模型下载完成后,请在 [configs/model_config.py](configs/model_config.py) 文件中,对`embedding_model_dict`和`llm_model_dict`参数进行修改,如把`llm_model_dict`从
+```
+embedding_model_dict = {
+ "ernie-tiny": "nghuyong/ernie-3.0-nano-zh",
+ "ernie-base": "nghuyong/ernie-3.0-base-zh",
+ "text2vec": "GanymedeNil/text2vec-large-chinese"
+}
+```
+
+修改为
+
+```
+embedding_model_dict = {
+ "ernie-tiny": "nghuyong/ernie-3.0-nano-zh",
+ "ernie-base": "nghuyong/ernie-3.0-base-zh",
+ "text2vec": "/Users/liuqian/Downloads/ChatGLM-6B/text2vec-large-chinese"
+}
+```
+
## DEMO
以问题`chatglm-6b 的局限性具体体现在哪里,如何实现改进`为例
@@ -148,15 +233,14 @@ A5: 可以尝试使用 chatglm-6b-int4 模型在 colab 中运行,需要注意
- [x] .pdf(需要按照常见问题 Q2 中描述进行`detectron2`的安装)
- [x] .docx
- [x] .txt
+ - [ ] 搜索引擎与本地网页
- [ ] 增加更多 LLM 模型支持
- [x] THUDM/chatglm-6b
- [x] THUDM/chatglm-6b-int4
- [x] THUDM/chatglm-6b-int4-qe
- [ ] 增加 Web UI DEMO
- [x] 利用 gradio 实现 Web UI DEMO
- - [ ] 添加模型加载进度条
- - [ ] 添加输出内容及错误提示
- - [ ] 国际化语言切换
+ - [x] 添加输出内容及错误提示
- [ ] 引用标注
- [ ] 利用 fastapi 实现 API 部署方式,并实现调用 API 的 web ui DEMO
diff --git a/README_en.md b/README_en.md
index b9c350a6..b29bf881 100644
--- a/README_en.md
+++ b/README_en.md
@@ -1,97 +1,211 @@
-# ChatGLM Application Based on Local Knowledge
+# ChatGLM Application with Local Knowledge Implementation
## Introduction
+[](https://t.me/+RjliQ3jnJ1YyN2E9)
+
🌍 [_中文文档_](README.md)
-🤖️ A local knowledge based LLM Application with [ChatGLM-6B](https://github.com/THUDM/ChatGLM-6B) and [langchain](https://github.com/hwchase17/langchain).
+🤖️ This is a ChatGLM application based on local knowledge, implemented using [ChatGLM-6B](https://github.com/THUDM/ChatGLM-6B) and [langchain](https://github.com/hwchase17/langchain).
-💡 Inspired by [document.ai](https://github.com/GanymedeNil/document.ai) by [GanymedeNil](https://github.com/GanymedeNil) and [ChatGLM-6B Pull Request](https://github.com/THUDM/ChatGLM-6B/pull/216) by [AlexZhangji](https://github.com/AlexZhangji).
+💡 Inspired by [document.ai](https://github.com/GanymedeNil/document.ai) and [Alex Zhangji](https://github.com/AlexZhangji)'s [ChatGLM-6B Pull Request](https://github.com/THUDM/ChatGLM-6B/pull/216), this project establishes a local knowledge question-answering application using open-source models.
-✅ In this project, [GanymedeNil/text2vec-large-chinese](https://huggingface.co/GanymedeNil/text2vec-large-chinese/tree/main) is used as Embedding Model,and [ChatGLM-6B](https://github.com/THUDM/ChatGLM-6B) used as LLM。Based on those models,this project can be deployed **offline** with all **open source** models。
+✅ The embeddings used in this project are [GanymedeNil/text2vec-large-chinese](https://huggingface.co/GanymedeNil/text2vec-large-chinese/tree/main), and the LLM is [ChatGLM-6B](https://github.com/THUDM/ChatGLM-6B). Relying on these models, this project enables the use of **open-source** models for **offline private deployment**.
-## Webui
-
-Click on steps 1-3 according to the above figure to complete the model loading, file loading, and viewing of dialogue history
+⛓️ The implementation principle of this project is illustrated in the figure below. The process includes loading files -> reading text -> text segmentation -> text vectorization -> question vectorization -> matching the top k most similar text vectors to the question vector -> adding the matched text to `prompt` along with the question as context -> submitting to `LLM` to generate an answer.
-
-Click on the Use via API at the bottom to view the API interface. Existing applications can be docked and called through post requests
+
-### TODO
--[] Add Model Load progress bar
--[] Add output content and error prompts
--[] International language switching
--[] Reference annotation
--[] Add plugin system (can be used for basic LORA training, etc.)
+🚩 This project does not involve fine-tuning or training; however, fine-tuning or training can be employed to optimize the effectiveness of this project.
-## Update
-**[2023/04/11]**
-1. Add Webui V0.1 version and synchronize the updated content before the current day;
-2. Automatically read knowledge_ based_ Enumerate LLM and embedding models in chatglm.py, select and click 'setting' to load the model. You can switch models for testing at any time
-3. The length of the conversation history can be manually adjusted and can be adjusted according to the size of the video memory
-4. Add the upload file function, select the uploaded file from the dropdown box, click loading to load the file, and the loaded file can be changed at any time during the process
-5. Add use via API at the bottom to connect to your own system
+## Changelog
+
+**[2023/04/15]**
+
+ 1. refactor the project structure to keep the command line demo [cli_demo.py](cli_demo.py) and the Web UI demo [webui.py](webui.py) in the root directory.
+ 2. Improve the Web UI by modifying it to first load the model according to the default option of [configs/model_config.py](configs/model_config.py) after running the Web UI, and adding error messages, etc.
+ 3. Update FAQ.
+
+**[2023/04/12]**
+
+ 1. Replaced the sample files in the Web UI to avoid issues with unreadable files due to encoding problems in Ubuntu;
+ 2. Replaced the prompt template in `knowledge_based_chatglm.py` to prevent confusion in the content returned by ChatGLM, which may arise from the prompt template containing Chinese and English bilingual text.
+
+**[2023/04/11]**
+
+ 1. Added Web UI V0.1 version (thanks to [@liangtongt](https://github.com/liangtongt));
+ 2. Added Frequently Asked Questions in `README.md` (thanks to [@calcitem](https://github.com/calcitem) and [@bolongliu](https://github.com/bolongliu));
+ 3. Enhanced automatic detection for the availability of `cuda`, `mps`, and `cpu` for LLM and Embedding model running devices;
+ 4. Added a check for `filepath` in `knowledge_based_chatglm.py`. In addition to supporting single file import, it now supports a single folder path as input. After input, it will traverse each file in the folder and display a command-line message indicating the success of each file load.
+
+5. **[2023/04/09]**
+
+ 1. Replaced the previously selected `ChatVectorDBChain` with `RetrievalQA` in `langchain`, effectively reducing the issue of stopping due to insufficient video memory after asking 2-3 times;
+ 2. Added `EMBEDDING_MODEL`, `VECTOR_SEARCH_TOP_K`, `LLM_MODEL`, `LLM_HISTORY_LEN`, `REPLY_WITH_SOURCE` parameter value settings in `knowledge_based_chatglm.py`;
+ 3. Added `chatglm-6b-int4` and `chatglm-6b-int4-qe`, which require less GPU memory, as LLM model options;
+ 4. Corrected code errors in `README.md` (thanks to [@calcitem](https://github.com/calcitem)).
**[2023/04/07]**
-1. Fix bug which costs twice gpu memory (Thanks to [@suc16](https://github.com/suc16) and [@myml](https://github.com/myml)).
-2. Add gpu memory clear function after each call of ChatGLM.
-3. Add `nghuyong/ernie-3.0-nano-zh` and `nghuyong/ernie-3.0-base-zh` as Embedding model alternatives,costing less gpu than `GanymedeNil/text2vec-large-chinese` (Thanks to [@lastrei](https://github.com/lastrei))
-**[2023/04/09]**
-1. Using `RetrievalQA` in `langchain` to replace the previously selected `ChatVectorDBChain`, the replacement can effectively solve the problem of program stopping after 2-3 questions due to insufficient gpu memory.
-2. Add `EMBEDDING_MODEL`, `VECTOR_SEARCH_TOP_K`, `LLM_MODEL`, `LLM_HISTORY_LEN`, `REPLY_WITH_SOURCE` parameter value settings in `knowledge_based_chatglm.py`.
-3. Add `chatglm-6b-int4`, `chatglm-6b-int4-qe` with smaller GPU memory requirements as LLM model alternatives.
-4. Correct code errors in `README.md` (Thanks to [@calcitem](https://github.com/calcitem)).
+ 1. Resolved the issue of doubled video memory usage when loading the ChatGLM model (thanks to [@suc16](https://github.com/suc16) and [@myml](https://github.com/myml));
+ 2. Added a mechanism to clear video memory;
+ 3. Added `nghuyong/ernie-3.0-nano-zh` and `nghuyong/ernie-3.0-base-zh` as Embedding model options, which consume less video memory resources than `GanymedeNil/text2vec-large-chinese` (thanks to [@lastrei](https://github.com/lastrei)).
-## Usage
+## How to Use
### Hardware Requirements
-- ChatGLM Hardware Requirements
+- ChatGLM-6B Model Hardware Requirements
+
+ | **Quantization Level** | **Minimum GPU Memory** (inference) | **Minimum GPU Memory** (efficient parameter fine-tuning) |
+ | -------------- | ------------------------- | -------- ------------------------- |
+ | FP16 (no quantization) | 13 GB | 14 GB |
+ | INT8 | 8 GB | 9 GB |
+ | INT4 | 6 GB | 7 GB |
- | **Quantization Level** | **GPU Memory** |
- |------------------------|----------------|
- | FP16(no quantization) | 13 GB |
- | INT8 | 10 GB |
- | INT4 | 6 GB |
-- Embedding Hardware Requirements
-
- The default Embedding model in this repo is [GanymedeNil/text2vec-large-chinese](https://huggingface.co/GanymedeNil/text2vec-large-chinese/tree/main), 3GB GPU Memory required when running on GPU.
+- Embedding Model Hardware Requirements
+ The default Embedding model [GanymedeNil/text2vec-large-chinese](https://huggingface.co/GanymedeNil/text2vec-large-chinese/tree/main) in this project occupies around 3GB of video memory and can also be configured to run on a CPU.
### Software Requirements
-This repo has been tested in python 3.8 environment。
-### 1. install python packages
-```commandline
-pip install -r requirements.txt
+This repository has been tested with Python 3.8 and CUDA 11.7 environments.
+
+### 1. Setting up the environment
+
+* Environment check
+
+```shell
+# First, make sure your machine has Python 3.8 or higher installed
+$ python --version
+Python 3.8.13
+
+# If your version is lower, you can use conda to install the environment
+$ conda create -p /your_path/env_name python=3.8
+
+# Activate the environment
+$ source activate /your_path/env_name
+
+# Deactivate the environment
+$ source deactivate /your_path/env_name
+
+# Remove the environment
+$ conda env remove -p /your_path/env_name
```
-Attention: With langchain.document_loaders.UnstructuredFileLoader used to connect with local knowledge file, you may need some other dependencies as mentioned in [langchain documentation](https://python.langchain.com/en/latest/modules/indexes/document_loaders/examples/unstructured_file.html)
-### 2. Run [knowledge_based_chatglm.py](knowledge_based_chatglm.py) script
+* Project dependencies
+
+```shell
+
+# Clone the repository
+$ git clone https://github.com/imClumsyPanda/langchain-ChatGLM.git
+
+# Install dependencies
+$ pip install -r requirements.txt
+```
+
+Note: When using langchain.document_loaders.UnstructuredFileLoader for unstructured file integration, you may need to install other dependency packages according to the documentation. Please refer to [langchain documentation](https://python.langchain.com/en/latest/modules/indexes/document_loaders/examples/unstructured_file.html).
+
+### 2. Run Scripts to Experience Web UI or Command Line Interaction
+
+Execute [webui.py](webui.py) script to experience **Web interaction**
+```commandline
+python webui.py
+```
+Note: Before executing, check the remaining space in the `$HOME/.cache/huggingface/` folder, at least 15G.
+
+The resulting interface is shown below:
+
+The Web UI supports the following features:
+
+1. Automatically reads the `LLM` and `embedding` model enumerations in `configs/model_config.py`, allowing you to select and reload the model by clicking `重新加载模型`.
+2. The length of retained dialogue history can be manually adjusted according to the available video memory.
+3. Adds a file upload function. Select the uploaded file through the drop-down box, click `加载文件` to load the file, and change the loaded file at any time during the process.
+
+Alternatively, execute the [knowledge_based_chatglm.py](https://chat.openai.com/chat/cli_demo.py) script to experience **command line interaction**:
+
```commandline
python knowledge_based_chatglm.py
```
-### Known issues
-- Currently tested to support txt, docx, md format files, for more file formats please refer to [langchain documentation](https://python.langchain.com/en/latest/modules/indexes/document_loaders/examples/unstructured_file.html). If the document contains special characters, the file may not be correctly loaded.
-- When running this project with macOS, it may not work properly due to incompatibility with pytorch caused by macOS version 13.3 and above.
-
### FAQ
-Q: How to solve `Resource punkt not found.`?
+Q1: What file formats does this project support?
-A: Unzip `packages/tokenizers` in https://github.com/nltk/nltk_data/raw/gh-pages/packages/tokenizers/punkt.zip and put it in the corresponding directory of `Searched in:`.
+A1: Currently, this project has been tested with txt, docx, and md file formats. For more file formats, please refer to the [langchain documentation](https://python.langchain.com/en/latest/modules/indexes/document_loaders/examples/unstructured_file.html). It is known that if the document contains special characters, there might be issues with loading the file.
-Q: How to solve `Resource averaged_perceptron_tagger not found.`?
+Q2: How can I resolve the `detectron2` dependency issue when reading specific file formats?
-A: Download https://github.com/nltk/nltk_data/blob/gh-pages/packages/taggers/averaged_perceptron_tagger.zip, decompress it and put it in the corresponding directory of `Searched in:`.
+A2: As the installation process for this package can be problematic and it is only required for some file formats, it is not included in `requirements.txt`. You can install it with the following command:
+
+```commandline
+pip install "detectron2@git+https://github.com/facebookresearch/detectron2.git@v0.6#egg=detectron2"
+```
+
+Q3: How can I solve the `Resource punkt not found.` error?
+
+A3: Unzip the `packages/tokenizers` folder from https://github.com/nltk/nltk_data/raw/gh-pages/packages/tokenizers/punkt.zip, and place it in the `nltk_data/tokenizers` storage path.
+
+The `nltk_data` storage path can be found using `nltk.data.path`.
+
+Q4: How can I solve the `Resource averaged_perceptron_tagger not found.` error?
+
+A4: Download https://github.com/nltk/nltk_data/blob/gh-pages/packages/taggers/averaged_perceptron_tagger.zip, extract it, and place it in the `nltk_data/taggers` storage path.
+
+The `nltk_data` storage path can be found using `nltk.data.path`.
+
+Q5: Can this project run in Google Colab?
+
+A5: You can try running the chatglm-6b-int4 model in Google Colab. Please note that if you want to run the Web UI in Colab, you need to set the `share` parameter in `demo.queue(concurrency_count=3).launch(server_name='0.0.0.0', share=False, inbrowser=False)` to `True`.
+
+This issue is related to the system environment. For more details, please refer to [Issues with installing packages using pip in Anaconda](docs/Issue-with-Installing-Packages-Using-pip-in-Anaconda.md).
+
+## DEMO
+
+Using the question "What are the limitations of ChatGLM-6B and how can they be improved?" as an example:
+
+Without using langchain to access local documents:
+
+> ChatGLM-6B is a chatbot based on GPT-3, and its limitations are mainly reflected in the following aspects:
+>
+> 1. Language model accuracy: Although GPT-3 is a large language model with relatively high accuracy, it still has some limitations. For instance, it may not be able to accurately understand certain dialects, accents, or complex sentence structures.
+> 2. Completeness of the knowledge graph: GPT-3 can generate some knowledge graphs, but its completeness and accuracy may not be sufficient. For example, it may struggle to accurately understand specific domain expertise and terminology.
+> 3. Coherence of dialogue generation: While GPT-3 can generate somewhat coherent dialogues, they may not always be natural. For instance, it may generate false, incoherent dialogue or misinterpret the user's intent.
+>
+> To improve ChatGLM-6B, consider the following aspects:
+>
+> 1. Enhance the accuracy of the language model: The accuracy of the language model can be improved by increasing the training data, using better language model architectures, and optimizing the model training process.
+> 2. Strengthen the integrity of the knowledge graph: The integrity of the knowledge graph can be enhanced by increasing its dimensions, employing better knowledge graph construction methods, and optimizing the knowledge graph's matching algorithm.
+> 3. Boost the coherence of dialogue generation: The coherence of dialogue generation can be improved by augmenting the context information of the dialogue generation model, utilizing better context information extraction methods, and optimizing the dialogue generation model's algorithm.
+> 4. Incorporate more human feedback: Human feedback can help ChatGLM-6B better understand users' intentions and needs, thereby improving the quality and accuracy of dialogue generation. More human feedback can be introduced by involving more human agents and using human feedback collectors.
+
+ChatGLM's answer after using LangChain to access the README.md file of the ChatGLM-6B project:
+>ChatGLM-6B is a deep learning-based natural language processing model that excels at answering questions. However, it also has some limitations, as follows:
+>1. Limited language understanding: ChatGLM-6B has been primarily trained on Chinese natural language, and its understanding of other languages may be limited.
+>2. Insufficient knowledge base: The training dataset of ChatGLM-6B contains only a Chinese corpus, so it may not be able to answer non-Chinese questions or queries in specific domains.
+>3. Limited data volume: ChatGLM-6B's training dataset has only a few million records, which may hinder its ability to answer very specific or complex questions.
+>
+>To improve ChatGLM-6B, consider the following aspects:
+>1. Expand language knowledge: Learn natural language processing techniques in other languages to broaden the model's language understanding capabilities.
+>2. Broaden the knowledge base: Collect more Chinese corpora or use datasets in other languages to expand the model's knowledge base.
+>3. Increase data volume: Use larger datasets to train ChatGLM-6B, which can improve the model's performance.
+>4. Introduce more evaluation metrics: Incorporate additional evaluation metrics to assess the model's performance, which can help identify the shortcomings and limitations of ChatGLM-6B.
+>5. Enhance the model architecture: Improve ChatGLM-6B's model architecture to boost its performance and capabilities. For example, employ larger neural networks or refined convolutional neural network structures.
## Roadmap
-- [x] local knowledge based application with langchain + ChatGLM-6B
-- [x] unstructured files loaded with langchain
-- [ ] more different file format loaded with langchain
-- [ ] implement web ui DEMO with gradio/streamlit
-- [ ] implement API with fastapi,and web ui DEMO with API
-
+- [x] Implement LangChain + ChatGLM-6B for local knowledge application
+- [x] Unstructured file access based on langchain
+ - [x].md
+ - [x].pdf
+ - [x].docx
+ - [x].txt
+- [ ] Add support for more LLM models
+ - [x] THUDM/chatglm-6b
+ - [x] THUDM/chatglm-6b-int4
+ - [x] THUDM/chatglm-6b-int4-qe
+- [ ] Add Web UI DEMO
+ - [x] Implement Web UI DEMO using Gradio
+ - [x] Add output and error messages
+ - [ ] Citation callout
+- [ ] Use FastAPI to implement API deployment method and develop a Web UI DEMO for API calls
diff --git a/agent/__init__.py b/agent/__init__.py
new file mode 100644
index 00000000..933c16e2
--- /dev/null
+++ b/agent/__init__.py
@@ -0,0 +1 @@
+from .chatglm_with_shared_memory_openai_llm import *
\ No newline at end of file
diff --git a/chains/local_doc_qa.py b/chains/local_doc_qa.py
new file mode 100644
index 00000000..12489426
--- /dev/null
+++ b/chains/local_doc_qa.py
@@ -0,0 +1,116 @@
+from langchain.chains import RetrievalQA
+from langchain.prompts import PromptTemplate
+from langchain.embeddings.huggingface import HuggingFaceEmbeddings
+from langchain.vectorstores import FAISS
+from langchain.document_loaders import UnstructuredFileLoader
+from models.chatglm_llm import ChatGLM
+import sentence_transformers
+import os
+from configs.model_config import *
+import datetime
+from typing import List
+
+# return top-k text chunk from vector store
+VECTOR_SEARCH_TOP_K = 10
+
+# LLM input history length
+LLM_HISTORY_LEN = 3
+
+# Show reply with source text from input document
+REPLY_WITH_SOURCE = True
+
+
+class LocalDocQA:
+ llm: object = None
+ embeddings: object = None
+
+ def init_cfg(self,
+ embedding_model: str = EMBEDDING_MODEL,
+ embedding_device=EMBEDDING_DEVICE,
+ llm_history_len: int = LLM_HISTORY_LEN,
+ llm_model: str = LLM_MODEL,
+ llm_device=LLM_DEVICE,
+ top_k=VECTOR_SEARCH_TOP_K,
+ ):
+ self.llm = ChatGLM()
+ self.llm.load_model(model_name_or_path=llm_model_dict[llm_model],
+ llm_device=llm_device)
+ self.llm.history_len = llm_history_len
+
+ self.embeddings = HuggingFaceEmbeddings(model_name=embedding_model_dict[embedding_model], )
+ self.embeddings.client = sentence_transformers.SentenceTransformer(self.embeddings.model_name,
+ device=embedding_device)
+ self.top_k = top_k
+
+ def init_knowledge_vector_store(self,
+ filepath: str or List[str]):
+ if isinstance(filepath, str):
+ if not os.path.exists(filepath):
+ print("路径不存在")
+ return None
+ elif os.path.isfile(filepath):
+ file = os.path.split(filepath)[-1]
+ try:
+ loader = UnstructuredFileLoader(filepath, mode="elements")
+ docs = loader.load()
+ print(f"{file} 已成功加载")
+ except:
+ print(f"{file} 未能成功加载")
+ return None
+ elif os.path.isdir(filepath):
+ docs = []
+ for file in os.listdir(filepath):
+ fullfilepath = os.path.join(filepath, file)
+ try:
+ loader = UnstructuredFileLoader(fullfilepath, mode="elements")
+ docs += loader.load()
+ print(f"{file} 已成功加载")
+ except:
+ print(f"{file} 未能成功加载")
+ else:
+ docs = []
+ for file in filepath:
+ try:
+ loader = UnstructuredFileLoader(file, mode="elements")
+ docs += loader.load()
+ print(f"{file} 已成功加载")
+ except:
+ print(f"{file} 未能成功加载")
+
+ vector_store = FAISS.from_documents(docs, self.embeddings)
+ vs_path = f"""./vector_store/{os.path.splitext(file)[0]}_FAISS_{datetime.datetime.now().strftime("%Y%m%d_%H%M%S")}"""
+ vector_store.save_local(vs_path)
+ return vs_path if len(docs)>0 else None
+
+ def get_knowledge_based_answer(self,
+ query,
+ vs_path,
+ chat_history=[], ):
+ prompt_template = """基于以下已知信息,简洁和专业的来回答用户的问题。
+ 如果无法从中得到答案,请说 "根据已知信息无法回答该问题" 或 "没有提供足够的相关信息",不允许在答案中添加编造成分,答案请使用中文。
+
+ 已知内容:
+ {context}
+
+ 问题:
+ {question}"""
+ prompt = PromptTemplate(
+ template=prompt_template,
+ input_variables=["context", "question"]
+ )
+ self.llm.history = chat_history
+ vector_store = FAISS.load_local(vs_path, self.embeddings)
+ knowledge_chain = RetrievalQA.from_llm(
+ llm=self.llm,
+ retriever=vector_store.as_retriever(search_kwargs={"k": self.top_k}),
+ prompt=prompt
+ )
+ knowledge_chain.combine_documents_chain.document_prompt = PromptTemplate(
+ input_variables=["page_content"], template="{page_content}"
+ )
+
+ knowledge_chain.return_source_documents = True
+
+ result = knowledge_chain({"query": query})
+ self.llm.history[-1][0] = query
+ return result, self.llm.history
diff --git a/cli_demo.py b/cli_demo.py
new file mode 100644
index 00000000..cda072d5
--- /dev/null
+++ b/cli_demo.py
@@ -0,0 +1,33 @@
+from configs.model_config import *
+from chains.local_doc_qa import LocalDocQA
+
+# return top-k text chunk from vector store
+VECTOR_SEARCH_TOP_K = 10
+
+# LLM input history length
+LLM_HISTORY_LEN = 3
+
+# Show reply with source text from input document
+REPLY_WITH_SOURCE = True
+
+if __name__ == "__main__":
+ local_doc_qa = LocalDocQA()
+ local_doc_qa.init_cfg(llm_model=LLM_MODEL,
+ embedding_model=EMBEDDING_MODEL,
+ embedding_device=EMBEDDING_DEVICE,
+ llm_history_len=LLM_HISTORY_LEN,
+ top_k=VECTOR_SEARCH_TOP_K)
+ vs_path = None
+ while not vs_path:
+ filepath = input("Input your local knowledge file path 请输入本地知识文件路径:")
+ vs_path = local_doc_qa.init_knowledge_vector_store(filepath)
+ history = []
+ while True:
+ query = input("Input your question 请输入问题:")
+ resp, history = local_doc_qa.get_knowledge_based_answer(query=query,
+ vs_path=vs_path,
+ chat_history=history)
+ if REPLY_WITH_SOURCE:
+ print(resp)
+ else:
+ print(resp["result"])
diff --git a/configs/model_config.py b/configs/model_config.py
new file mode 100644
index 00000000..fd309e14
--- /dev/null
+++ b/configs/model_config.py
@@ -0,0 +1,29 @@
+import torch.cuda
+import torch.backends
+
+
+embedding_model_dict = {
+ "ernie-tiny": "nghuyong/ernie-3.0-nano-zh",
+ "ernie-base": "nghuyong/ernie-3.0-base-zh",
+ "text2vec": "GanymedeNil/text2vec-large-chinese",
+}
+
+# Embedding model name
+EMBEDDING_MODEL = "text2vec"
+
+# Embedding running device
+EMBEDDING_DEVICE = "cuda" if torch.cuda.is_available() else "mps" if torch.backends.mps.is_available() else "cpu"
+
+# supported LLM models
+llm_model_dict = {
+ "chatglm-6b-int4-qe": "THUDM/chatglm-6b-int4-qe",
+ "chatglm-6b-int4": "THUDM/chatglm-6b-int4",
+ "chatglm-6b": "THUDM/chatglm-6b",
+}
+
+# LLM model name
+LLM_MODEL = "chatglm-6b"
+
+# LLM running device
+LLM_DEVICE = "cuda" if torch.cuda.is_available() else "mps" if torch.backends.mps.is_available() else "cpu"
+
diff --git a/content/langchain-ChatGLM README.md b/content/langchain-ChatGLM_README.md
similarity index 100%
rename from content/langchain-ChatGLM README.md
rename to content/langchain-ChatGLM_README.md
diff --git a/content/state_of_the_search.txt b/content/state_of_the_search.txt
new file mode 100644
index 00000000..34f4f179
--- /dev/null
+++ b/content/state_of_the_search.txt
@@ -0,0 +1,835 @@
+ChatGPT是OpenAI开发的一个大型语言模型,可以提供各种主题的信息,
+
+# 如何向 ChatGPT 提问以获得高质量答案:提示技巧工程完全指南
+
+## 介绍
+
+我很高兴欢迎您阅读我的最新书籍《The Art of Asking ChatGPT for High-Quality Answers: A complete Guide to Prompt Engineering Techniques》。本书是一本全面指南,介绍了各种提示技术,用于从ChatGPT中生成高质量的答案。
+
+我们将探讨如何使用不同的提示工程技术来实现不同的目标。ChatGPT是一款最先进的语言模型,能够生成类似人类的文本。然而,理解如何正确地向ChatGPT提问以获得我们所需的高质量输出非常重要。而这正是本书的目的。
+
+无论您是普通人、研究人员、开发人员,还是只是想在自己的领域中将ChatGPT作为个人助手的人,本书都是为您编写的。我使用简单易懂的语言,提供实用的解释,并在每个提示技术中提供了示例和提示公式。通过本书,您将学习如何使用提示工程技术来控制ChatGPT的输出,并生成符合您特定需求的文本。
+
+在整本书中,我们还提供了如何结合不同的提示技术以实现更具体结果的示例。我希望您能像我写作时一样,享受阅读本书并从中获得知识。
+
+