2026-01-19 21:37:20 +08:00
332 changed files with 3769 additions and 34755 deletions
--- a/.dockerignore
+++ b/.dockerignore
@ -1,19 +0,0 @@
-.idea
-# Langchain-Chatchat
-docs
-.github
-tests
-Dockerfile
-.dockerignore
-.gitignore
-.gitmodules
-README.md
-README_en.md
-README_ja.md
-LICENSE
-requirements_api.txt
-requirements_lite.txt
-requirements_webui.txt
-# bge-large-zh-v1.5
-bge-large-zh-v1.5/README.md
-# chatglm3-6b
--- a/.github/workflows/close-issue.yml
+++ b/.github/workflows/close-issue.yml
@ -1,22 +0,0 @@
-name: Close inactive issues
-on:
-  schedule:
-    - cron: "30 21 * * *"
-
-jobs:
-  close-issues:
-    runs-on: ubuntu-latest
-    permissions:
-      issues: write
-      pull-requests: write
-    steps:
-      - uses: actions/stale@v5
-        with:
-          days-before-issue-stale: 30
-          days-before-issue-close: 14
-          stale-issue-label: "stale"
-          stale-issue-message: "这个问题已经被标记为 `stale` ，因为它已经超过 30 天没有任何活动。"
-          close-issue-message: "这个问题已经被自动关闭，因为它被标为 `stale` 后超过 14 天没有任何活动。"
-          days-before-pr-stale: -1
-          days-before-pr-close: -1
-          repo-token: ${{ secrets.GITHUB_TOKEN }}
--- a/.github/workflows/docker-build.yaml
+++ b/.github/workflows/docker-build.yaml
@ -1,155 +0,0 @@
-name: docker-build
-on:
-  push:
-    branches:
-      - master
-    paths-ignore:
-      - 'README.md'
-      - 'README_en.md'
-      - 'README_ja.md'
-env:
-  TZ: Asia/Shanghai
-jobs:
-  docker-build:
-    runs-on: ubuntu-latest
-    # if: github.event.pull_request.merged == true
-    steps:
-      - name: Optimize Disk Space
-        uses: hugoalh/disk-space-optimizer-ghaction@v0.8.0
-        with:
-          operate_sudo: "True"
-          general_include: ".+"
-          general_exclude: |-
-            ^GCC$
-            ^G\+\+$
-            Clang
-            LLVM
-          docker_include: ".+"
-          docker_prune: "True"
-          docker_clean: "True"
-          apt_prune: "True"
-          apt_clean: "True"
-          homebrew_prune: "True"
-          homebrew_clean: "True"
-          npm_prune: "True"
-          npm_clean: "True"
-          os_swap: "True"
-      - name: Remove Unnecessary Tools And Files
-        env:
-          DEBIAN_FRONTEND: noninteractive
-        run: |
-          sudo apt-get remove -y '^dotnet-.*' '^llvm-.*' 'php.*' azure-cli google-chrome-stable firefox powershell mono-devel
-          sudo apt-get autoremove --purge -y
-          sudo find /var/log -name "*.gz" -type f -delete
-          sudo rm -rf /var/cache/apt/archives
-          sudo rm -rf /tmp/*
-          sudo rm -rf /etc/apt/sources.list.d/* /usr/share/dotnet /usr/local/lib/android /opt/ghc /etc/mysql /etc/php
-          sudo -E apt-get -y purge azure-cli* docker* ghc* zulu* hhvm* llvm* firefox* google* dotnet* aspnetcore* powershell* openjdk* adoptopenjdk* mysql* php* mongodb* moby* snap* || true
-          sudo rm -rf /etc/apt/sources.list.d/* /usr/local/lib/android /opt/ghc /usr/share/dotnet /usr/local/graalvm /usr/local/.ghcup \
-          /usr/local/share/powershell /usr/local/share/chromium /usr/local/lib/node_modules
-          sudo rm -rf /etc/apt/sources.list.d/* /usr/share/dotnet /usr/local/lib/android /opt/ghc /etc/mysql /etc/php
-          sudo -E apt-get -y purge azure-cli* docker* ghc* zulu* hhvm* llvm* firefox* google* dotnet* aspnetcore* powershell* openjdk* adoptopenjdk* mysql* php* mongodb* moby* snap* || true
-          sudo -E apt-get -qq update
-          sudo -E apt-get -qq install libfuse-dev $(curl -fsSL git.io/depends-ubuntu-2204)
-          sudo -E apt-get -qq autoremove --purge
-          sudo -E apt-get -qq clean
-          sudo apt-get clean
-          rm -rf /opt/hostedtoolcache
-          sudo timedatectl set-timezone "$TZ"
-      - name: Free Up Disk Space
-        uses: easimon/maximize-build-space@master
-        with:
-          root-reserve-mb: 62464 # 给 / 预留 61GiB 空间( docker 预留)
-          swap-size-mb: 1
-          remove-dotnet: 'true'
-          remove-android: 'true'
-          remove-haskell: 'true'
-          remove-codeql: 'true'
-          remove-docker-images: 'true'
-      - name: Checkout Repository
-        uses: actions/checkout@v4
-      - name: Get Latest Release
-        id: get_version
-        run: |
-          VERSION=$(curl --silent "https://api.github.com/repos/${{ github.repository }}/releases/latest" | jq -r .tag_name)
-          echo "RELEASE_VERSION=${VERSION}" >> $GITHUB_ENV
-      - name: Set Image Tag
-        id: imageTag
-        run: echo "::set-output name=image_tag::$RELEASE_VERSION-$(date +%Y%m%d)-$(git rev-parse --short HEAD)"
-      - name: Set Up QEMU
-        uses: docker/setup-qemu-action@v2
-      - name: Set Up Docker Buildx
-        uses: docker/setup-buildx-action@v2
-      - name: Clone Model
-        run: |
-          sudo mkdir -p $GITHUB_WORKSPACE/bge-large-zh-v1.5
-          cd $GITHUB_WORKSPACE/bge-large-zh-v1.5
-          sudo wget https://huggingface.co/BAAI/bge-large-zh-v1.5/resolve/main/.gitattributes &> /dev/null
-          sudo wget https://huggingface.co/BAAI/bge-large-zh-v1.5/resolve/main/config.json &> /dev/null
-          sudo wget https://huggingface.co/BAAI/bge-large-zh-v1.5/resolve/main/config_sentence_transformers.json &> /dev/null
-          sudo wget https://huggingface.co/BAAI/bge-large-zh-v1.5/resolve/main/modules.json &> /dev/null
-          sudo wget https://huggingface.co/BAAI/bge-large-zh-v1.5/resolve/main/pytorch_model.bin &> /dev/null
-          sudo wget https://huggingface.co/BAAI/bge-large-zh-v1.5/resolve/main/sentence_bert_config.json &> /dev/null
-          sudo wget https://huggingface.co/BAAI/bge-large-zh-v1.5/resolve/main/special_tokens_map.json &> /dev/null
-          sudo wget https://huggingface.co/BAAI/bge-large-zh-v1.5/resolve/main/tokenizer.json &> /dev/null
-          sudo wget https://huggingface.co/BAAI/bge-large-zh-v1.5/resolve/main/tokenizer_config.json &> /dev/null
-          sudo wget https://huggingface.co/BAAI/bge-large-zh-v1.5/resolve/main/vocab.txt &> /dev/null
-          sudo mkdir -p $GITHUB_WORKSPACE/bge-large-zh-v1.5/1_Pooling
-          cd $GITHUB_WORKSPACE/bge-large-zh-v1.5/1_Pooling
-          sudo wget https://huggingface.co/BAAI/bge-large-zh-v1.5/resolve/main/1_Pooling/config.json &> /dev/null
-          sudo mkdir -p $GITHUB_WORKSPACE/chatglm3-6b
-          cd $GITHUB_WORKSPACE/chatglm3-6b
-          sudo wget https://huggingface.co/THUDM/chatglm3-6b/resolve/main/config.json &> /dev/null
-          sudo wget https://huggingface.co/THUDM/chatglm3-6b/resolve/main/configuration_chatglm.py &> /dev/null
-          sudo wget https://huggingface.co/THUDM/chatglm3-6b/resolve/main/model-00001-of-00007.safetensors &> /dev/null
-          sudo wget https://huggingface.co/THUDM/chatglm3-6b/resolve/main/model-00002-of-00007.safetensors &> /dev/null
-          sudo wget https://huggingface.co/THUDM/chatglm3-6b/resolve/main/model-00003-of-00007.safetensors &> /dev/null
-          sudo wget https://huggingface.co/THUDM/chatglm3-6b/resolve/main/model-00004-of-00007.safetensors &> /dev/null
-          sudo wget https://huggingface.co/THUDM/chatglm3-6b/resolve/main/model-00005-of-00007.safetensors &> /dev/null
-          sudo wget https://huggingface.co/THUDM/chatglm3-6b/resolve/main/model-00006-of-00007.safetensors &> /dev/null
-          sudo wget https://huggingface.co/THUDM/chatglm3-6b/resolve/main/model-00007-of-00007.safetensors &> /dev/null
-          sudo wget https://huggingface.co/THUDM/chatglm3-6b/resolve/main/model.safetensors.index.json &> /dev/null
-          sudo wget https://huggingface.co/THUDM/chatglm3-6b/resolve/main/modeling_chatglm.py &> /dev/null
-          sudo wget https://huggingface.co/THUDM/chatglm3-6b/resolve/main/pytorch_model.bin.index.json &> /dev/null
-          sudo wget https://huggingface.co/THUDM/chatglm3-6b/resolve/main/quantization.py &> /dev/null
-          sudo wget https://huggingface.co/THUDM/chatglm3-6b/resolve/main/special_tokens_map.json &> /dev/null
-          sudo wget https://huggingface.co/THUDM/chatglm3-6b/resolve/main/tokenization_chatglm.py &> /dev/null
-          sudo wget https://huggingface.co/THUDM/chatglm3-6b/resolve/main/tokenizer.model &> /dev/null
-          sudo wget https://huggingface.co/THUDM/chatglm3-6b/resolve/main/tokenizer_config.json &> /dev/null
-          du -sh $GITHUB_WORKSPACE
-          du -sh $GITHUB_WORKSPACE/*
-          du -sh $GITHUB_WORKSPACE/bge-large-zh-v1.5/*       
-          du -sh $GITHUB_WORKSPACE/chatglm3-6b/*
-      - name: Show Runner Disk
-        run: df -hT
-      - name: Docker Build
-        run: |
-          docker build -t uswccr.ccs.tencentyun.com/chatchat/chatchat:${{ steps.imageTag.outputs.image_tag }} -f Dockerfile .
-      - name: Show Images Size
-        run: docker images
-      - name: Login To Tencent CCR
-        uses: docker/login-action@v2
-        with:
-          registry: uswccr.ccs.tencentyun.com
-          username: ${{ secrets.CCR_REGISTRY_USERNAME }}
-          password: ${{ secrets.CCR_REGISTRY_PASSWORD }}
-      - name: Docker Push
-        run: docker push uswccr.ccs.tencentyun.com/chatchat/chatchat:${{ steps.imageTag.outputs.image_tag }}
-#      - name: Login to Docker Hub
-#        uses: docker/login-action@v2
-#        with:
-#          username: ${{ secrets.DOCKERHUB_USERNAME }}
-#          password: ${{ secrets.DOCKERHUB_TOKEN }}
-      - name: Update README.md
-        run: |
-          sed -i "s|uswccr.ccs.tencentyun.com/chatchat/chatchat:[^ ]*|uswccr.ccs.tencentyun.com/chatchat/chatchat:${{ steps.imageTag.outputs.image_tag }}|g" README.md
-          sed -i "s|uswccr.ccs.tencentyun.com/chatchat/chatchat:[^ ]*|uswccr.ccs.tencentyun.com/chatchat/chatchat:${{ steps.imageTag.outputs.image_tag }}|g" README_en.md
-          sed -i "s|uswccr.ccs.tencentyun.com/chatchat/chatchat:[^ ]*|uswccr.ccs.tencentyun.com/chatchat/chatchat:${{ steps.imageTag.outputs.image_tag }}|g" README_ja.md
-          git config --local user.email "action@github.com"
-          git config --local user.name "GitHub Action"
-          git commit -am "feat:update docker image:tag"
-      - name: Push README.md
-        uses: ad-m/github-push-action@master
-        with:
-          github_token: ${{ secrets.GH_PAT }}
-          branch: ${{ github.ref }}
--- a/.gitignore
+++ b/.gitignore
@ -1,180 +1,9 @@
 *.log
 *.log.*
-*.bak
 logs
-/knowledge_base/*
-!/knowledge_base/samples
-/knowledge_base/samples/vector_store
-
-/configs/*.py
-.vscode/
-
-# below are standard python ignore files
-# Byte-compiled / optimized / DLL files
-__pycache__/
-*.py[cod]
-*$py.class
-
-# C extensions
-*.so
-
-# Distribution / packaging
-.Python
-build/
-develop-eggs/
-dist/
-downloads/
-eggs/
-.eggs/
-lib/
-lib64/
-parts/
-sdist/
-var/
-wheels/
-share/python-wheels/
-*.egg-info/
-.installed.cfg
-*.egg
-MANIFEST
-
-# PyInstaller
-#  Usually these files are written by a python script from a template
-#  before PyInstaller builds the exe, so as to inject date/other infos into it.
-*.manifest
-*.spec
-
-# Installer logs
-pip-log.txt
-pip-delete-this-directory.txt
-
-# Unit test / coverage reports
-htmlcov/
-.tox/
-.nox/
-.coverage
-.coverage.*
-.cache
-nosetests.xml
-coverage.xml
-*.cover
-*.py,cover
-.hypothesis/
-.pytest_cache/
-cover/
-
-# Translations
-*.mo
-*.pot
-
-# Django stuff:
-*.log
-local_settings.py
-db.sqlite3
-db.sqlite3-journal
-
-# Flask stuff:
-instance/
-.webassets-cache
-
-# Scrapy stuff:
-.scrapy
-
-# Sphinx documentation
-docs/_build/
-
-# PyBuilder
-.pybuilder/
-target/
-
-# Jupyter Notebook
-.ipynb_checkpoints
-
-# IPython
-profile_default/
-ipython_config.py
-
-# pyenv
-#   For a library or package, you might want to ignore these files since the code is
-#   intended to run in multiple environments; otherwise, check them in:
-.python-version
-
-# pipenv
-#   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
-#   However, in case of collaboration, if having platform-specific dependencies or dependencies
-#   having no cross-platform support, pipenv may install dependencies that don't work, or not
-#   install all needed dependencies.
-Pipfile.lock
-
-# poetry
-#   Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
-#   This is especially recommended for binary packages to ensure reproducibility, and is more
-#   commonly ignored for libraries.
-#   https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
-poetry.lock
-
-# pdm
-#   Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
-#pdm.lock
-#   pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
-#   in version control.
-#   https://pdm.fming.dev/#use-with-ide
-.pdm.toml
-
-# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
-__pypackages__/
-
-# Celery stuff
-celerybeat-schedule
-celerybeat.pid
-
-# SageMath parsed files
-*.sage.py
-
-# Environments
-.env
-.venv
-env/
-venv/
-ENV/
-env.bak/
-venv.bak/
-
-# Spyder project settings
-.spyderproject
-.spyproject
-
-# Rope project settings
-.ropeproject
-
-# mkdocs documentation
-/site
-
-# mypy
-.mypy_cache/
-.dmypy.json
-dmypy.json
-
-# Pyre type checker
-.pyre/
-
-# pytype static type analyzer
-.pytype/
-
-# Cython debug symbols
-cython_debug/
-
-# PyCharm
-#  JetBrains specific template is maintained in a separate JetBrains.gitignore that can
-#  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
-#  and can be added to the global gitignore or merged into this file.  For a more nuclear
-#  option (not recommended) you can uncomment the following to ignore the entire idea folder.
 .idea/
-.pytest_cache
-.DS_Store
-
-# Test File
-test.py
+__pycache__/
+/knowledge_base/
 configs/*.py
-
-
+.vscode/
+.pytest_cache/
--- a/.gitmodules
+++ b/.gitmodules
@ -1,3 +0,0 @@
-[submodule "knowledge_base/samples/content/wiki"]
-	path = knowledge_base/samples/content/wiki
-	url = https://github.com/chatchat-space/Langchain-Chatchat.wiki.git
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@ -0,0 +1,22 @@
+# 贡献指南
+
+欢迎！我们是一个非常友好的社区，非常高兴您想要帮助我们让这个应用程序变得更好。但是，请您遵循一些通用准则以保持组织有序。
+
+1. 确保为您要修复的错误或要添加的功能创建了一个[问题](https://github.com/imClumsyPanda/langchain-ChatGLM/issues)，尽可能保持它们小。
+2. 请使用 `git pull --rebase` 来拉取和衍合上游的更新。
+3. 将提交合并为格式良好的提交。在提交说明中单独一行提到要解决的问题，如`Fix #<bug>`（有关更多可以使用的关键字，请参见[将拉取请求链接到问题](https://docs.github.com/en/issues/tracking-your-work-with-issues/linking-a-pull-request-to-an-issue)）。
+4. 推送到`dev`。在说明中提到正在解决的问题。
+
+---
+
+# Contribution Guide
+
+Welcome! We're a pretty friendly community, and we're thrilled that you want to help make this app even better. However, we ask that you follow some general guidelines to keep things organized around here.
+
+1. Make sure an [issue](https://github.com/imClumsyPanda/langchain-ChatGLM/issues) is created for the bug you're about to fix, or feature you're about to add. Keep them as small as possible.
+
+2. Please use `git pull --rebase` to fetch and merge updates from the upstream.
+
+3. Rebase commits into well-formatted commits. Mention the issue being resolved in the commit message on a line all by itself like `Fixes #<bug>` (refer to [Linking a pull request to an issue](https://docs.github.com/en/issues/tracking-your-work-with-issues/linking-a-pull-request-to-an-issue) for more keywords you can use).
+
+4. Push into `dev`.  Mention which bug is being resolved in the description.
--- a/27
+++ b/27
@ -1,27 +0,0 @@
-# Base Image
-FROM nvidia/cuda:12.1.1-cudnn8-runtime-ubuntu22.04
-# Labels
-LABEL maintainer=chatchat
-# Environment Variables
-ENV HOME=/Langchain-Chatchat
-# Commands
-WORKDIR /
-RUN ln -sf /usr/share/zoneinfo/Asia/Shanghai /etc/localtime && \
-    echo "Asia/Shanghai" > /etc/timezone && \
-    apt-get update -y && \
-    apt-get install -y --no-install-recommends python3.11 python3-pip curl libgl1 libglib2.0-0 jq && \
-    apt-get clean && \
-    rm -rf /var/lib/apt/lists/* && \
-    rm -f /usr/bin/python3 && \
-    ln -s /usr/bin/python3.11 /usr/bin/python3 && \
-    mkdir -p $HOME
-# Copy the application files
-COPY . $HOME
-WORKDIR $HOME
-# Install dependencies from requirements.txt
-RUN pip3 install -r requirements.txt -i https://pypi.org/simple && \
-    python3 copy_config_example.py && \
-    sed -i 's|MODEL_ROOT_PATH = ""|MODEL_ROOT_PATH = "/Langchain-Chatchat"|' configs/model_config.py && \
-    python3 init_database.py --recreate-vs
-EXPOSE 22 7861 8501
-ENTRYPOINT ["python3", "startup.py", "-a"]
--- a/README.md
+++ b/README.md
@ -1,49 +1,36 @@
 ![](img/logo-long-chatchat-trans-v2.png)

-🌍 [READ THIS IN ENGLISH](README_en.md)
-🌍 [日本語で読む](README_ja.md)
-
-📃 **LangChain-Chatchat** (原 Langchain-ChatGLM)
-
-基于 ChatGLM 等大语言模型与 Langchain 等应用框架实现，开源、可离线部署的检索增强生成(RAG)大模型知识库项目。
-
-### ⚠️ 重要提示
-
-`0.2.10`将会是`0.2.x`系列的最后一个版本，`0.2.x`系列版本将会停止更新和技术支持，全力研发具有更强应用性的 `Langchain-Chatchat 0.3.x`。
-`0.2.10` 的后续 bug 修复将会直接推送到`master`分支，而不再进行版本更新。
-
---
+**LangChain-Chatchat** (原 Langchain-ChatGLM):  基于 Langchain 与 ChatGLM 等大语言模型的本地知识库问答应用实现。

 ## 目录

 * [介绍](README.md#介绍)
-* [解决的痛点](README.md#解决的痛点)
-* [快速上手](README.md#快速上手)
-    * [1. 环境配置](README.md#1-环境配置)
-    * [2. 模型下载](README.md#2-模型下载)
-    * [3. 初始化知识库和配置文件](README.md#3-初始化知识库和配置文件)
-    * [4. 一键启动](README.md#4-一键启动)
-    * [5. 启动界面示例](README.md#5-启动界面示例)
-* [联系我们](README.md#联系我们)
+* [变更日志](README.md#变更日志)
+* [模型支持](README.md#模型支持)
+* [Docker 部署](README.md#Docker-部署)
+* [开发部署](README.md#开发部署)
+  * [软件需求](README.md#软件需求)
+  * [1. 开发环境准备](README.md#1.-开发环境准备)
+  * [2. 下载模型至本地](README.md#2.-下载模型至本地)
+  * [3. 设置配置项](README.md#3.-设置配置项)
+  * [4. 知识库初始化与迁移](README.md#4.-知识库初始化与迁移)
+  * [5. 一键启动API服务或WebUI服务](README.md#6.-一键启动)
+  * [6. 分步启动 API 服务或 Web UI](README.md#5.-启动-API-服务或-Web-UI)
+* [常见问题](README.md#常见问题)
+* [路线图](README.md#路线图)
+* [项目交流群](README.md#项目交流群)
+
+---

 ## 介绍

-🤖️ 一种利用 [langchain](https://github.com/langchain-ai/langchain)
-思想实现的基于本地知识库的问答应用，目标期望建立一套对中文场景与开源模型支持友好、可离线运行的知识库问答解决方案。
+🤖️ 一种利用 [langchain](https://github.com/hwchase17/langchain) 思想实现的基于本地知识库的问答应用，目标期望建立一套对中文场景与开源模型支持友好、可离线运行的知识库问答解决方案。

-💡 受 [GanymedeNil](https://github.com/GanymedeNil) 的项目 [document.ai](https://github.com/GanymedeNil/document.ai)
-和 [AlexZhangji](https://github.com/AlexZhangji)
-创建的 [ChatGLM-6B Pull Request](https://github.com/THUDM/ChatGLM-6B/pull/216)
-启发，建立了全流程可使用开源模型实现的本地知识库问答应用。本项目的最新版本中通过使用 [FastChat](https://github.com/lm-sys/FastChat)
-接入 Vicuna, Alpaca, LLaMA, Koala, RWKV 等模型，依托于 [langchain](https://github.com/langchain-ai/langchain)
-框架支持通过基于 [FastAPI](https://github.com/tiangolo/fastapi) 提供的 API
-调用服务，或使用基于 [Streamlit](https://github.com/streamlit/streamlit) 的 WebUI 进行操作。
+💡 受 [GanymedeNil](https://github.com/GanymedeNil) 的项目 [document.ai](https://github.com/GanymedeNil/document.ai) 和 [AlexZhangji](https://github.com/AlexZhangji) 创建的 [ChatGLM-6B Pull Request](https://github.com/THUDM/ChatGLM-6B/pull/216) 启发，建立了全流程可使用开源模型实现的本地知识库问答应用。本项目的最新版本中通过使用 [FastChat](https://github.com/lm-sys/FastChat) 接入 Vicuna, Alpaca, LLaMA, Koala, RWKV 等模型，依托于 [langchain](https://github.com/langchain-ai/langchain) 框架支持通过基于 [FastAPI](https://github.com/tiangolo/fastapi) 提供的 API 调用服务，或使用基于 [Streamlit](https://github.com/streamlit/streamlit) 的 WebUI 进行操作。

-✅ 依托于本项目支持的开源 LLM 与 Embedding 模型，本项目可实现全部使用**开源**模型**离线私有部署**。与此同时，本项目也支持
-OpenAI GPT API 的调用，并将在后续持续扩充对各类模型及模型 API 的接入。
+✅ 依托于本项目支持的开源 LLM 与 Embedding 模型，本项目可实现全部使用**开源**模型**离线私有部署**。与此同时，本项目也支持 OpenAI GPT API 的调用，并将在后续持续扩充对各类模型及模型 API 的接入。

-⛓️ 本项目实现原理如下图所示，过程包括加载文件 -> 读取文本 -> 文本分割 -> 文本向量化 -> 问句向量化 ->
-在文本向量中匹配出与问句向量最相似的 `top k`个 -> 匹配出的文本作为上下文和问题一起添加到 `prompt`中 -> 提交给 `LLM`生成回答。
+⛓️ 本项目实现原理如下图所示，过程包括加载文件 -> 读取文本 -> 文本分割 -> 文本向量化 -> 问句向量化 -> 在文本向量中匹配出与问句向量最相似的 `top k`个 -> 匹配出的文本作为上下文和问题一起添加到 `prompt`中 -> 提交给 `LLM`生成回答。

 📺 [原理介绍视频](https://www.bilibili.com/video/BV13M4y1e7cN/?share_source=copy_web&vd_source=e6c5aafe684f30fbe41925d61ca6d514)

@ -55,152 +42,446 @@ OpenAI GPT API 的调用，并将在后续持续扩充对各类模型及模型 A

 🚩 本项目未涉及微调、训练过程，但可利用微调或训练对本项目效果进行优化。

-🌐 [AutoDL 镜像](https://www.codewithgpu.com/i/chatchat-space/Langchain-Chatchat/Langchain-Chatchat) 中 `0.2.10`
+🌐 [AutoDL 镜像](https://www.codewithgpu.com/i/imClumsyPanda/langchain-ChatGLM/Langchain-Chatchat) 中 `v6` 版本所使用代码已更新至本项目 `0.2.2` 版本。

-版本所使用代码已更新至本项目 `v0.2.10` 版本。
+🐳 [Docker 镜像](registry.cn-beijing.aliyuncs.com/chatchat/chatchat:0.2.0)

-🐳 [Docker 镜像](isafetech/chatchat:0.2.10) 已经更新到 ```0.2.10``` 版本。
-
-🌲 本次更新后同时支持DockerHub、阿里云、腾讯云镜像源：
+💻 一行命令运行 Docker：

 ```shell
-docker run -d --gpus all -p 80:8501 isafetech/chatchat:0.2.10
-docker run -d --gpus all -p 80:8501 uswccr.ccs.tencentyun.com/chatchat/chatchat:0.2.10
-docker run -d --gpus all -p 80:8501 registry.cn-beijing.aliyuncs.com/chatchat/chatchat:0.2.10
+docker run -d --gpus all -p 80:8501 registry.cn-beijing.aliyuncs.com/chatchat/chatchat:0.2.0
 ```

-🧩 本项目有一个非常完整的[Wiki](https://github.com/chatchat-space/Langchain-Chatchat/wiki/) ， README只是一个简单的介绍，_
-_仅仅是入门教程，能够基础运行__。
-如果你想要更深入的了解本项目，或者想对本项目做出贡献。请移步 [Wiki](https://github.com/chatchat-space/Langchain-Chatchat/wiki/)
-界面
+---

-## 解决的痛点
+## 变更日志

-该项目是一个可以实现 __完全本地化__推理的知识库增强方案, 重点解决数据安全保护，私域化部署的企业痛点。
-本开源方案采用```Apache License```，可以免费商用，无需付费。
+参见 [版本更新日志](https://github.com/imClumsyPanda/langchain-ChatGLM/releases)。

-我们支持市面上主流的本地大语言模型和Embedding模型，支持开源的本地向量数据库。
-支持列表详见[Wiki](https://github.com/chatchat-space/Langchain-Chatchat/wiki/)
+从 `0.1.x` 升级过来的用户请注意，需要按照[开发部署](README.md#3.-开发部署)过程操作，将现有知识库迁移到新格式，具体见[知识库初始化与迁移](docs/INSTALL.md#知识库初始化与迁移)。

-## 快速上手
+### `0.2.0` 版本与 `0.1.x` 版本区别

-### 1. 环境配置
+1. 使用 [FastChat](https://github.com/lm-sys/FastChat) 提供开源 LLM 模型的 API，以 OpenAI API 接口形式接入，提升 LLM 模型加载效果；
+2. 使用 [langchain](https://github.com/langchain-ai/langchain) 中已有 Chain 的实现，便于后续接入不同类型 Chain，并将对 Agent 接入开展测试；
+3. 使用 [FastAPI](https://github.com/tiangolo/fastapi) 提供 API 服务，全部接口可在 FastAPI 自动生成的 docs 中开展测试，且所有对话接口支持通过参数设置流式或非流式输出；
+4. 使用 [Streamlit](https://github.com/streamlit/streamlit) 提供 WebUI 服务，可选是否基于 API 服务启动 WebUI，增加会话管理，可以自定义会话主题并切换，且后续可支持不同形式输出内容的显示；
+5. 项目中默认 LLM 模型改为 [THUDM/chatglm2-6b](https://huggingface.co/THUDM/chatglm2-6b)，默认 Embedding 模型改为 [moka-ai/m3e-base](https://huggingface.co/moka-ai/m3e-base)，文件加载方式与文段划分方式也有调整，后续将重新实现上下文扩充，并增加可选设置；
+6. 项目中扩充了对不同类型向量库的支持，除支持 [FAISS](https://github.com/facebookresearch/faiss) 向量库外，还提供 [Milvus](https://github.com/milvus-io/milvus), [PGVector](https://github.com/pgvector/pgvector) 向量库的接入；
+7. 项目中搜索引擎对话，除 Bing 搜索外，增加 DuckDuckGo 搜索选项，DuckDuckGo 搜索无需配置 API Key，在可访问国外服务环境下可直接使用。

-+ 首先，确保你的机器安装了 Python 3.8 - 3.11 (我们强烈推荐使用 Python3.11)。
+---

-```
-$ python --version
-Python 3.11.7
-```
+## 模型支持

-接着，创建一个虚拟环境，并在虚拟环境内安装项目的依赖
+本项目中默认使用的 LLM 模型为 [THUDM/chatglm2-6b](https://huggingface.co/THUDM/chatglm2-6b)，默认使用的 Embedding 模型为 [moka-ai/m3e-base](https://huggingface.co/moka-ai/m3e-base) 为例。
+
+### LLM 模型支持
+
+本项目最新版本中基于 [FastChat](https://github.com/lm-sys/FastChat) 进行本地 LLM 模型接入，支持模型如下：
+
+- [meta-llama/Llama-2-7b-chat-hf](https://huggingface.co/meta-llama/Llama-2-7b-chat-hf)
+- Vicuna, Alpaca, LLaMA, Koala
+- [BlinkDL/RWKV-4-Raven](https://huggingface.co/BlinkDL/rwkv-4-raven)
+- [camel-ai/CAMEL-13B-Combined-Data](https://huggingface.co/camel-ai/CAMEL-13B-Combined-Data)
+- [databricks/dolly-v2-12b](https://huggingface.co/databricks/dolly-v2-12b)
+- [FreedomIntelligence/phoenix-inst-chat-7b](https://huggingface.co/FreedomIntelligence/phoenix-inst-chat-7b)
+- [h2oai/h2ogpt-gm-oasst1-en-2048-open-llama-7b](https://huggingface.co/h2oai/h2ogpt-gm-oasst1-en-2048-open-llama-7b)
+- [lcw99/polyglot-ko-12.8b-chang-instruct-chat](https://huggingface.co/lcw99/polyglot-ko-12.8b-chang-instruct-chat)
+- [lmsys/fastchat-t5-3b-v1.0](https://huggingface.co/lmsys/fastchat-t5)
+- [mosaicml/mpt-7b-chat](https://huggingface.co/mosaicml/mpt-7b-chat)
+- [Neutralzz/BiLLa-7B-SFT](https://huggingface.co/Neutralzz/BiLLa-7B-SFT)
+- [nomic-ai/gpt4all-13b-snoozy](https://huggingface.co/nomic-ai/gpt4all-13b-snoozy)
+- [NousResearch/Nous-Hermes-13b](https://huggingface.co/NousResearch/Nous-Hermes-13b)
+- [openaccess-ai-collective/manticore-13b-chat-pyg](https://huggingface.co/openaccess-ai-collective/manticore-13b-chat-pyg)
+- [OpenAssistant/oasst-sft-4-pythia-12b-epoch-3.5](https://huggingface.co/OpenAssistant/oasst-sft-4-pythia-12b-epoch-3.5)
+- [project-baize/baize-v2-7b](https://huggingface.co/project-baize/baize-v2-7b)
+- [Salesforce/codet5p-6b](https://huggingface.co/Salesforce/codet5p-6b)
+- [StabilityAI/stablelm-tuned-alpha-7b](https://huggingface.co/stabilityai/stablelm-tuned-alpha-7b)
+- [THUDM/chatglm-6b](https://huggingface.co/THUDM/chatglm-6b)
+- [THUDM/chatglm2-6b](https://huggingface.co/THUDM/chatglm2-6b)
+- [tiiuae/falcon-40b](https://huggingface.co/tiiuae/falcon-40b)
+- [timdettmers/guanaco-33b-merged](https://huggingface.co/timdettmers/guanaco-33b-merged)
+- [togethercomputer/RedPajama-INCITE-7B-Chat](https://huggingface.co/togethercomputer/RedPajama-INCITE-7B-Chat)
+- [WizardLM/WizardLM-13B-V1.0](https://huggingface.co/WizardLM/WizardLM-13B-V1.0)
+- [WizardLM/WizardCoder-15B-V1.0](https://huggingface.co/WizardLM/WizardCoder-15B-V1.0)
+- [baichuan-inc/baichuan-7B](https://huggingface.co/baichuan-inc/baichuan-7B)
+- [internlm/internlm-chat-7b](https://huggingface.co/internlm/internlm-chat-7b)
+- [Qwen/Qwen-7B-Chat](https://huggingface.co/Qwen/Qwen-7B-Chat)
+- [HuggingFaceH4/starchat-beta](https://huggingface.co/HuggingFaceH4/starchat-beta)
+- 任何 [EleutherAI](https://huggingface.co/EleutherAI) 的 pythia 模型，如 [pythia-6.9b](https://huggingface.co/EleutherAI/pythia-6.9b)
+- 在以上模型基础上训练的任何 [Peft](https://github.com/huggingface/peft) 适配器。为了激活，模型路径中必须有 `peft` 。注意：如果加载多个peft模型，你可以通过在任何模型工作器中设置环境变量 `PEFT_SHARE_BASE_WEIGHTS=true` 来使它们共享基础模型的权重。
+
+以上模型支持列表可能随 [FastChat](https://github.com/lm-sys/FastChat) 更新而持续更新，可参考 [FastChat 已支持模型列表](https://github.com/lm-sys/FastChat/blob/main/docs/model_support.md)。
+
+除本地模型外，本项目也支持直接接入 OpenAI API，具体设置可参考 `configs/model_configs.py.example` 中的 `llm_model_dict` 的 `openai-chatgpt-3.5` 配置信息。
+
+### Embedding 模型支持
+
+本项目支持调用 [HuggingFace](https://huggingface.co/models?pipeline_tag=sentence-similarity) 中的 Embedding 模型，已支持的 Embedding 模型如下：
+
+- [moka-ai/m3e-small](https://huggingface.co/moka-ai/m3e-small)
+- [moka-ai/m3e-base](https://huggingface.co/moka-ai/m3e-base)
+- [moka-ai/m3e-large](https://huggingface.co/moka-ai/m3e-large)
+- [BAAI/bge-small-zh](https://huggingface.co/BAAI/bge-small-zh)
+- [BAAI/bge-base-zh](https://huggingface.co/BAAI/bge-base-zh)
+- [BAAI/bge-large-zh](https://huggingface.co/BAAI/bge-large-zh)
+- [BAAI/bge-large-zh-noinstruct](https://huggingface.co/BAAI/bge-large-zh-noinstruct)
+- [text2vec-base-chinese-sentence](https://huggingface.co/shibing624/text2vec-base-chinese-sentence)
+- [text2vec-base-chinese-paraphrase](https://huggingface.co/shibing624/text2vec-base-chinese-paraphrase)
+- [text2vec-base-multilingual](https://huggingface.co/shibing624/text2vec-base-multilingual)
+- [shibing624/text2vec-base-chinese](https://huggingface.co/shibing624/text2vec-base-chinese)
+- [GanymedeNil/text2vec-large-chinese](https://huggingface.co/GanymedeNil/text2vec-large-chinese)
+- [nghuyong/ernie-3.0-nano-zh](https://huggingface.co/nghuyong/ernie-3.0-nano-zh)
+- [nghuyong/ernie-3.0-base-zh](https://huggingface.co/nghuyong/ernie-3.0-base-zh)
+- [OpenAI/text-embedding-ada-002](https://platform.openai.com/docs/guides/embeddings)
+
+---
+
+## Docker 部署
+
+🐳 Docker 镜像地址: `registry.cn-beijing.aliyuncs.com/chatchat/chatchat:0.2.0)`

 ```shell
-
-# 拉取仓库
-$ git clone https://github.com/chatchat-space/Langchain-Chatchat.git
-
-# 进入目录
-$ cd Langchain-Chatchat
-
-# 安装全部依赖
-$ pip install -r requirements.txt 
-$ pip install -r requirements_api.txt
-$ pip install -r requirements_webui.txt  
-
-# 默认依赖包括基本运行环境（FAISS向量库）。如果要使用 milvus/pg_vector 等向量库，请将 requirements.txt 中相应依赖取消注释再安装。
+docker run -d --gpus all -p 80:8501 registry.cn-beijing.aliyuncs.com/chatchat/chatchat:0.2.0
 ```

-请注意，LangChain-Chatchat `0.2.x` 系列是针对 Langchain `0.0.x` 系列版本的，如果你使用的是 Langchain `0.1.x`
-系列版本，需要降级您的`Langchain`版本。
+- 该版本镜像大小 `33.9GB`，使用 `v0.2.0`，以 `nvidia/cuda:12.1.1-cudnn8-devel-ubuntu22.04` 为基础镜像
+- 该版本内置一个 `embedding` 模型：`m3e-large`，内置 `chatglm2-6b-32k`
+- 该版本目标为方便一键部署使用，请确保您已经在Linux发行版上安装了NVIDIA驱动程序
+- 请注意，您不需要在主机系统上安装CUDA工具包，但需要安装 `NVIDIA Driver` 以及 `NVIDIA Container Toolkit`，请参考[安装指南](https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/latest/install-guide.html)
+- 首次拉取和启动均需要一定时间，首次启动时请参照下图使用 `docker logs -f <container id>` 查看日志
+- 如遇到启动过程卡在 `Waiting..` 步骤，建议使用 `docker exec -it <container id> bash` 进入 `/logs/` 目录查看对应阶段日志

-### 2. 模型下载
+---

-如需在本地或离线环境下运行本项目，需要首先将项目所需的模型下载至本地，通常开源 LLM 与 Embedding
-模型可以从 [HuggingFace](https://huggingface.co/models) 下载。
+## 开发部署

-以本项目中默认使用的 LLM 模型 [THUDM/ChatGLM3-6B](https://huggingface.co/THUDM/chatglm3-6b) 与 Embedding
-模型 [BAAI/bge-large-zh](https://huggingface.co/BAAI/bge-large-zh) 为例：
+### 软件需求

-下载模型需要先[安装 Git LFS](https://docs.github.com/zh/repositories/working-with-files/managing-large-files/installing-git-large-file-storage)
-，然后运行
+本项目已在 Python 3.8.1 - 3.10，CUDA 11.7 环境下完成测试。已在 Windows、ARM 架构的 macOS、Linux 系统中完成测试。
+
+### 1. 开发环境准备
+
+参见 [开发环境准备](docs/INSTALL.md)。
+
+**请注意：** `0.2.0` 及更新版本的依赖包与 `0.1.x` 版本依赖包可能发生冲突，强烈建议新建环境后重新安装依赖包。
+
+### 2. 下载模型至本地
+
+如需在本地或离线环境下运行本项目，需要首先将项目所需的模型下载至本地，通常开源 LLM 与 Embedding 模型可以从 [HuggingFace](https://huggingface.co/models) 下载。
+
+以本项目中默认使用的 LLM 模型 [THUDM/chatglm2-6b](https://huggingface.co/THUDM/chatglm2-6b) 与 Embedding 模型 [moka-ai/m3e-base](https://huggingface.co/moka-ai/m3e-base) 为例：
+
+下载模型需要先[安装Git LFS](https://docs.github.com/zh/repositories/working-with-files/managing-large-files/installing-git-large-file-storage)，然后运行

 ```Shell
-$ git lfs install
-$ git clone https://huggingface.co/THUDM/chatglm3-6b
-$ git clone https://huggingface.co/BAAI/bge-large-zh
+$ git clone https://huggingface.co/THUDM/chatglm2-6b
+
+$ git clone https://huggingface.co/moka-ai/m3e-base
 ```

-### 3. 初始化知识库和配置文件
+### 3. 设置配置项

-按照下列方式初始化自己的知识库和简单的复制配置文件
+复制模型相关参数配置模板文件 [configs/model_config.py.example](configs/model_config.py.example) 存储至项目路径下 `./configs` 路径下，并重命名为 `model_config.py`。

-```shell
-$ python copy_config_example.py
-$ python init_database.py --recreate-vs
- ```
+复制服务相关参数配置模板文件 [configs/server_config.py.example](configs/server_config.py.example) 存储至项目路径下 `./configs` 路径下，并重命名为 `server_config.py`。

-### 4. 一键启动
+在开始执行 Web UI 或命令行交互前，请先检查 `configs/model_config.py` 和 `configs/server_config.py` 中的各项模型参数设计是否符合需求：

-按照以下命令启动项目
+- 请确认已下载至本地的 LLM 模型本地存储路径写在 `llm_model_dict` 对应模型的 `local_model_path` 属性中，如:
+
+```python
+llm_model_dict={
+                "chatglm2-6b": {
+                        "local_model_path": "/Users/xxx/Downloads/chatglm2-6b",
+                        "api_base_url": "http://localhost:8888/v1",  # "name"修改为 FastChat 服务中的"api_base_url"
+                        "api_key": "EMPTY"
+                    },
+                }
+```
+
+- 请确认已下载至本地的 Embedding 模型本地存储路径写在 `embedding_model_dict` 对应模型位置，如：
+
+```python
+embedding_model_dict = {
+                        "m3e-base": "/Users/xxx/Downloads/m3e-base",
+                       }
+```
+
+如果你选择使用OpenAI的Embedding模型，请将模型的 ``key``写入 `embedding_model_dict`中。使用该模型，你需要能够访问OpenAI官的API，或设置代理。
+
+### 4. 知识库初始化与迁移
+
+当前项目的知识库信息存储在数据库中，在正式运行项目之前请先初始化数据库（我们强烈建议您在执行操作前备份您的知识文件）。
+
+- 如果您是从 `0.1.x` 版本升级过来的用户，针对已建立的知识库，请确认知识库的向量库类型、Embedding 模型与 `configs/model_config.py` 中默认设置一致，如无变化只需以下命令将现有知识库信息添加到数据库即可：
+
+  ```shell
+  $ python init_database.py
+  ```
+- 如果您是第一次运行本项目，知识库尚未建立，或者配置文件中的知识库类型、嵌入模型发生变化，或者之前的向量库没有开启 `normalize_L2`，需要以下命令初始化或重建知识库：
+
+  ```shell
+  $ python init_database.py --recreate-vs
+  ```
+
+### 5. 一键启动API 服务或 Web UI
+
+#### 5.1 启动命令
+
+一键启动脚本 startup.py,一键启动所有 Fastchat 服务、API 服务、WebUI 服务，示例代码：

 ```shell
 $ python startup.py -a
 ```

-### 5. 启动界面示例
+并可使用 `Ctrl + C` 直接关闭所有运行服务。如果一次结束不了，可以多按几次。

-如果正常启动，你将能看到以下界面
+可选参数包括 `-a (或--all-webui)`, `--all-api`, `--llm-api`, `-c (或--controller)`, `--openai-api`,
+`-m (或--model-worker)`, `--api`, `--webui`，其中：

-1. FastAPI Docs 界面
+- `--all-webui` 为一键启动 WebUI 所有依赖服务；
+- `--all-api` 为一键启动 API 所有依赖服务；
+- `--llm-api` 为一键启动 Fastchat 所有依赖的 LLM 服务；
+- `--openai-api` 为仅启动 FastChat 的 controller 和 openai-api-server 服务；
+- 其他为单独服务启动选项。

-![](img/fastapi_docs_026.png)
+#### 5.2 启动非默认模型

-2. Web UI 启动界面示例：
+若想指定非默认模型，需要用 `--model-name` 选项，示例：
+
+```shell
+$ python startup.py --all-webui --model-name Qwen-7B-Chat
+```
+
+更多信息可通过 `python startup.py -h`查看。
+
+#### 5.3 多卡加载
+
+项目支持多卡加载，需在 startup.py 中的 create_model_worker_app 函数中，修改如下三个参数:
+
+```python
+gpus=None, 
+num_gpus=1, 
+max_gpu_memory="20GiB"
+```
+
+其中，`gpus` 控制使用的显卡的ID，例如 "0,1";
+
+`num_gpus` 控制使用的卡数;
+
+`max_gpu_memory` 控制每个卡使用的显存容量。
+
+注1：server_config.py的FSCHAT_MODEL_WORKERS字典中也增加了相关配置，如有需要也可通过修改FSCHAT_MODEL_WORKERS字典中对应参数实现多卡加载。
+
+注2：少数情况下，gpus参数会不生效，此时需要通过设置环境变量CUDA_VISIBLE_DEVICES来指定torch可见的gpu,示例代码：
+
+```shell
+CUDA_VISIBLE_DEVICES=0,1 python startup.py -a
+```
+
+#### 5.4 PEFT 加载(包括lora,p-tuning,prefix tuning, prompt tuning,ia3等)
+
+本项目基于 FastChat 加载 LLM 服务，故需以 FastChat 加载 PEFT 路径，即保证路径名称里必须有 peft 这个词，配置文件的名字为 adapter_config.json，peft 路径下包含.bin 格式的 PEFT 权重，peft路径在startup.py中create_model_worker_app函数的args.model_names中指定，并开启环境变量PEFT_SHARE_BASE_WEIGHTS=true参数。
+
+注：如果上述方式启动失败，则需要以标准的fastchat服务启动方式分步启动，分步启动步骤参考第六节，PEFT加载详细步骤参考[加载lora微调后模型失效](https://github.com/chatchat-space/Langchain-Chatchat/issues/1130#issuecomment-1685291822)，
+
+#### **5.5 注意事项：**
+
+**1. startup 脚本用多进程方式启动各模块的服务，可能会导致打印顺序问题，请等待全部服务发起后再调用，并根据默认或指定端口调用服务（默认 LLM API 服务端口：`127.0.0.1:8888`,默认 API 服务端口：`127.0.0.1:7861`,默认 WebUI 服务端口：`本机IP：8501`)**
+
+**2.服务启动时间示设备不同而不同，约 3-10 分钟，如长时间没有启动请前往 `./logs`目录下监控日志，定位问题。**
+
+**3. 在Linux上使用ctrl+C退出可能会由于linux的多进程机制导致multiprocessing遗留孤儿进程，可通过shutdown_all.sh进行退出**
+
+#### 5.6 启动界面示例：
+
+1. FastAPI docs 界面
+
+![](img/fastapi_docs_020_0.png)
+
+2. webui启动界面示例：
+
+- Web UI 对话界面：
+  ![img](img/webui_0813_0.png)
+- Web UI 知识库管理页面：
+  ![](img/webui_0813_1.png)
+
+### 6 分步启动 API 服务或 Web UI
+
+注意：如使用了一键启动方式，可忽略本节。
+
+#### 6.1 启动 LLM 服务
+
+如需使用开源模型进行本地部署，需首先启动 LLM 服务，启动方式分为三种：
+
+- [基于多进程脚本 llm_api.py 启动 LLM 服务](README.md#5.1.1-基于多进程脚本-llm_api.py-启动-LLM-服务)
+- [基于命令行脚本 llm_api_stale.py 启动 LLM 服务](README.md#5.1.2-基于命令行脚本-llm_api_stale.py-启动-LLM-服务)
+- [PEFT 加载](README.md#5.1.3-PEFT-加载)
+
+三种方式只需选择一个即可，具体操作方式详见 5.1.1 - 5.1.3。
+
+如果启动在线的API服务（如 OPENAI 的 API 接口），则无需启动 LLM 服务，即 5.1 小节的任何命令均无需启动。
+
+##### 6.1.1 基于多进程脚本 llm_api.py 启动 LLM 服务
+
+在项目根目录下，执行 [server/llm_api.py](server/llm_api.py) 脚本启动 **LLM 模型**服务：
+
+```shell
+$ python server/llm_api.py
+```
+
+项目支持多卡加载，需在 llm_api.py 中的 create_model_worker_app 函数中，修改如下三个参数:
+
+```python
+gpus=None, 
+num_gpus=1, 
+max_gpu_memory="20GiB"
+```
+
+其中，`gpus` 控制使用的显卡的ID，如果 "0,1";
+
+`num_gpus` 控制使用的卡数;
+
+`max_gpu_memory` 控制每个卡使用的显存容量。
+
+##### 6.1.2 基于命令行脚本 llm_api_stale.py 启动 LLM 服务
+
+⚠️ **注意:**
+
+**1.llm_api_stale.py脚本原生仅适用于linux,mac设备需要安装对应的linux命令,win平台请使用wsl;**
+
+**2.加载非默认模型需要用命令行参数--model-path-address指定模型，不会读取model_config.py配置;**
+
+在项目根目录下，执行 [server/llm_api_stale.py](server/llm_api_stale.py) 脚本启动 **LLM 模型**服务：
+
+```shell
+$ python server/llm_api_stale.py
+```
+
+该方式支持启动多个worker，示例启动方式：
+
+```shell
+$ python server/llm_api_stale.py --model-path-address model1@host1@port1 model2@host2@port2
+```
+
+如果出现server端口占用情况，需手动指定server端口,并同步修改model_config.py下对应模型的base_api_url为指定端口:
+
+```shell
+$ python server/llm_api_stale.py --server-port 8887
+```
+
+如果要启动多卡加载，示例命令如下：
+
+```shell
+$ python server/llm_api_stale.py --gpus 0,1 --num-gpus 2 --max-gpu-memory 10GiB
+```
+
+注：以如上方式启动LLM服务会以nohup命令在后台运行 FastChat 服务，如需停止服务，可以运行如下命令：
+
+```shell
+$ python server/llm_api_shutdown.py --serve all 
+```
+
+亦可单独停止一个 FastChat 服务模块，可选 [`all`, `controller`, `model_worker`, `openai_api_server`]
+
+##### 6.1.3 PEFT 加载(包括lora,p-tuning,prefix tuning, prompt tuning,ia3等)
+
+本项目基于 FastChat 加载 LLM 服务，故需以 FastChat 加载 PEFT 路径，即保证路径名称里必须有 peft 这个词，配置文件的名字为 adapter_config.json，peft 路径下包含 model.bin 格式的 PEFT 权重。
+详细步骤参考[加载lora微调后模型失效](https://github.com/chatchat-space/Langchain-Chatchat/issues/1130#issuecomment-1685291822)
+
+![image](https://github.com/chatchat-space/Langchain-Chatchat/assets/22924096/4e056c1c-5c4b-4865-a1af-859cd58a625d)
+
+#### 6.2 启动 API 服务
+
+本地部署情况下，按照 [5.1 节](README.md#5.1-启动-LLM-服务)**启动 LLM 服务后**，再执行 [server/api.py](server/api.py) 脚本启动 **API** 服务；
+
+在线调用API服务的情况下，直接执执行 [server/api.py](server/api.py) 脚本启动 **API** 服务；
+
+调用命令示例：
+
+```shell
+$ python server/api.py
+```
+
+启动 API 服务后，可访问 `localhost:7861` 或 `{API 所在服务器 IP}:7861` FastAPI 自动生成的 docs 进行接口查看与测试。
+
+- FastAPI docs 界面
+
+  ![](img/fastapi_docs_020_0.png)
+
+#### 6.3 启动 Web UI 服务
+
+按照 [5.2 节](README.md#5.2-启动-API-服务)**启动 API 服务后**，执行 [webui.py](webui.py) 启动 **Web UI** 服务（默认使用端口 `8501`）
+
+```shell
+$ streamlit run webui.py
+```
+
+使用 Langchain-Chatchat 主题色启动 **Web UI** 服务（默认使用端口 `8501`）
+
+```shell
+$ streamlit run webui.py --theme.base "light" --theme.primaryColor "#165dff" --theme.secondaryBackgroundColor "#f5f5f5" --theme.textColor "#000000"
+```
+
+或使用以下命令指定启动 **Web UI** 服务并指定端口号
+
+```shell
+$ streamlit run webui.py --server.port 666
+```

 - Web UI 对话界面：

-![img](img/LLM_success.png)
-
+  ![](img/webui_0813_0.png)
 - Web UI 知识库管理页面：

-![](img/init_knowledge_base.jpg)
-
-### 注意
-
-以上方式只是为了快速上手，如果需要更多的功能和自定义启动方式
-，请参考[Wiki](https://github.com/chatchat-space/Langchain-Chatchat/wiki/)
-
+  ![](img/webui_0813_1.png)

 ---

-## 项目里程碑
+## 常见问题

-+ `2023年4月`: `Langchain-ChatGLM 0.1.0` 发布，支持基于 ChatGLM-6B 模型的本地知识库问答。
-+ `2023年8月`: `Langchain-ChatGLM` 改名为 `Langchain-Chatchat`，`0.2.0` 发布，使用 `fastchat` 作为模型加载方案，支持更多的模型和数据库。
-+ `2023年10月`: `Langchain-Chatchat 0.2.5` 发布，推出 Agent 内容，开源项目在`Founder Park & Zhipu AI & Zilliz`
-  举办的黑客马拉松获得三等奖。
-+ `2023年12月`: `Langchain-Chatchat` 开源项目获得超过 **20K** stars.
-+ `2024年1月`: `LangChain 0.1.x` 推出，`Langchain-Chatchat 0.2.x` 发布稳定版本`0.2.10`
-  后将停止更新和技术支持，全力研发具有更强应用性的 `Langchain-Chatchat 0.3.x`。
-
-+ 🔥 让我们一起期待未来 Chatchat 的故事 ···
+参见 [常见问题](docs/FAQ.md)。

 ---

-## 联系我们
+## 路线图

-### Telegram
+- [X] Langchain 应用
+  - [X] 本地数据接入
+    - [X] 接入非结构化文档
+      - [X] .md
+      - [X] .txt
+      - [X] .docx
+    - [ ] 结构化数据接入
+      - [X] .csv
+      - [ ] .xlsx
+    - [ ] 分词及召回
+      - [ ] 接入不同类型 TextSplitter
+      - [ ] 优化依据中文标点符号设计的 ChineseTextSplitter
+      - [ ] 重新实现上下文拼接召回
+    - [ ] 本地网页接入
+    - [ ] SQL 接入
+    - [ ] 知识图谱/图数据库接入
+  - [X] 搜索引擎接入
+    - [X] Bing 搜索
+    - [X] DuckDuckGo 搜索
+  - [ ] Agent 实现
+- [X] LLM 模型接入
+  - [X] 支持通过调用 [FastChat](https://github.com/lm-sys/fastchat) api 调用 llm
+  - [ ] 支持 ChatGLM API 等 LLM API 的接入
+- [X] Embedding 模型接入
+  - [X] 支持调用 HuggingFace 中各开源 Emebdding 模型
+  - [ ] 支持 OpenAI Embedding API 等 Embedding API 的接入
+- [X] 基于 FastAPI 的 API 方式调用
+- [X] Web UI
+  - [X] 基于 Streamlit 的 Web UI

-[![Telegram](https://img.shields.io/badge/Telegram-2CA5E0?style=for-the-badge&logo=telegram&logoColor=white "langchain-chatglm")](https://t.me/+RjliQ3jnJ1YyN2E9)
+---

-### 项目交流群
-<img src="img/qr_code_108.jpg" alt="二维码" width="300" />
+## 项目交流群

-🎉 Langchain-Chatchat 项目微信交流群，如果你也对本项目感兴趣，欢迎加入群聊参与讨论交流。
+<img src="img/qr_code_58.jpg" alt="二维码" width="300" height="300" />

-### 公众号
-
-<img src="img/official_wechat_mp_account.png" alt="二维码" width="300" />
-
-🎉 Langchain-Chatchat 项目官方公众号，欢迎扫码关注。
+🎉 langchain-ChatGLM 项目微信交流群，如果你也对本项目感兴趣，欢迎加入群聊参与讨论交流。
--- a/README_en.md
+++ b/README_en.md
@ -1,207 +0,0 @@
-![](img/logo-long-chatchat-trans-v2.png)
-
-🌍 [中文文档](README.md)
-🌍 [日本語で読む](README_ja.md)
-
-📃 **LangChain-Chatchat** (formerly Langchain-ChatGLM):
-
-A LLM application aims to implement knowledge and search engine based QA based on Langchain and open-source or remote
-LLM API.
-
-⚠️`0.2.10` will be the last version of the `0.2.x` series. The `0.2.x` series will stop updating and technical support,
-and strive to develop `Langchain-Chachat 0.3.x` with stronger applicability.
-Subsequent bug fixes for `0.2.10` will be pushed directly to the `master` branch without version updates.
-
-
---
-
-## Table of Contents
-
- [Introduction](README.md#Introduction)
- [Pain Points Addressed](README.md#Pain-Points-Addressed)
- [Quick Start](README.md#Quick-Start)
-    - [1. Environment Setup](README.md#1-Environment-Setup)
-    - [2. Model Download](README.md#2-Model-Download)
-    - [3. Initialize Knowledge Base and Configuration Files](README.md#3-Initialize-Knowledge-Base-and-Configuration-Files)
-    - [4. One-Click Startup](README.md#4-One-Click-Startup)
-    - [5. Startup Interface Examples](README.md#5-Startup-Interface-Examples)
- [Contact Us](README.md#Contact-Us)
-
-## Introduction
-
-🤖️ A Q&A application based on local knowledge base implemented using the idea
-of [langchain](https://github.com/langchain-ai/langchain). The goal is to build a KBQA(Knowledge based Q&A) solution
-that
-is friendly to Chinese scenarios and open source models and can run both offline and online.
-
-💡 Inspired by [document.ai](https://github.com/GanymedeNil/document.ai)
-and [ChatGLM-6B Pull Request](https://github.com/THUDM/ChatGLM-6B/pull/216) , we build a local knowledge base question
-answering application that can be implemented using an open source model or remote LLM api throughout the process. In
-the latest version of this project, [FastChat](https://github.com/lm-sys/FastChat) is used to access Vicuna, Alpaca,
-LLaMA, Koala, RWKV and many other models. Relying on [langchain](https://github.com/langchain-ai/langchain) , this
-project supports calling services through the API provided based on [FastAPI](https://github.com/tiangolo/fastapi), or
-using the WebUI based on [Streamlit](https://github.com/streamlit/streamlit).
-
-✅ Relying on the open source LLM and Embedding models, this project can realize full-process **offline private
-deployment**. At the same time, this project also supports the call of OpenAI GPT API- and Zhipu API, and will continue
-to expand the access to various models and remote APIs in the future.
-
-⛓️ The implementation principle of this project is shown in the graph below. The main process includes: loading files ->
-reading text -> text segmentation -> text vectorization -> question vectorization -> matching the `top-k` most similar
-to the question vector in the text vector -> The matched text is added to `prompt `as context and question -> submitte
-to `LLM` to generate an answer.
-
-📺[video introduction](https://www.bilibili.com/video/BV13M4y1e7cN/?share_source=copy_web&vd_source=e6c5aafe684f30fbe41925d61ca6d514)
-
-![实现原理图](img/langchain+chatglm.png)
-
-The main process analysis from the aspect of document process:
-
-![实现原理图2](img/langchain+chatglm2.png)
-
-🚩 The training or fine-tuning are not involved in the project, but still, one always can improve performance by do
-these.
-
-🌐 [AutoDL image](https://www.codewithgpu.com/i/chatchat-space/Langchain-Chatchat/Langchain-Chatchat) is supported, and in `0.2.10` the codes are update to v0.2.10.
-
-🐳 [Docker image](isafetech/chatchat:0.2.10) is supported to ```0.2.10```.
-
-🌲 The latest update also provides support for image sources from DockerHub, Ali Cloud, and Tencent Cloud:
-
-```shell
-docker run -d --gpus all -p 80:8501 isafetech/chatchat:0.2.10
-docker run -d --gpus all -p 80:8501 uswccr.ccs.tencentyun.com/chatchat/chatchat:0.2.10
-docker run -d --gpus all -p 80:8501 registry.cn-beijing.aliyuncs.com/chatchat/chatchat:0.2.10
-```
-
-## Pain Points Addressed
-
-This project is a solution for enhancing knowledge bases with fully localized inference, specifically addressing the
-pain points of data security and private deployments for businesses.
-This open-source solution is under the Apache License and can be used for commercial purposes for free, with no fees
-required.
-We support mainstream local large prophecy models and Embedding models available in the market, as well as open-source
-local vector databases. For a detailed list of supported models and databases, please refer to
-our [Wiki](https://github.com/chatchat-space/Langchain-Chatchat/wiki/)
-
-## Quick Start
-
-### Environment Setup
-
-First, make sure your machine has Python 3.10 installed.
-
-```
-$ python --version
-Python 3.10.12
-```
-
-Then, create a virtual environment and install the project's dependencies within the virtual environment.
-
-```shell
-
-# 拉取仓库
-$ git clone https://github.com/chatchat-space/Langchain-Chatchat.git
-
-# 进入目录
-$ cd Langchain-Chatchat
-
-# 安装全部依赖
-$ pip install -r requirements.txt 
-$ pip install -r requirements_api.txt
-$ pip install -r requirements_webui.txt  
-
-# 默认依赖包括基本运行环境（FAISS向量库）。如果要使用 milvus/pg_vector 等向量库，请将 requirements.txt 中相应依赖取消注释再安装。
-```
-
-Please note that the LangChain-Chachat `0.2.x` series is for the Langchain `0.0.x` series version. If you are using the
-Langchain `0.1.x` series version, you need to downgrade.
-
-### Model Download
-
-If you need to run this project locally or in an offline environment, you must first download the required models for
-the project. Typically, open-source LLM and Embedding models can be downloaded from HuggingFace.
-
-Taking the default LLM model used in this project, [THUDM/chatglm2-6b](https://huggingface.co/THUDM/chatglm2-6b), and
-the Embedding model [moka-ai/m3e-base](https://huggingface.co/moka-ai/m3e-base) as examples:
-
-To download the models, you need to first
-install [Git LFS](https://docs.github.com/zh/repositories/working-with-files/managing-large-files/installing-git-large-file-storage)
-and then run:
-
-```Shell
-$ git lfs install
-$ git clone https://huggingface.co/THUDM/chatglm2-6b
-$ git clone https://huggingface.co/moka-ai/m3e-base
-```
-
-### Initializing the Knowledge Base and Config File
-
-Follow the steps below to initialize your own knowledge base and config file:
-
-```shell
-$ python copy_config_example.py
-$ python init_database.py --recreate-vs
- ```
-
-### One-Click Launch
-
-To start the project, run the following command:
-
-```shell
-$ python startup.py -a
-```
-
-### Example of Launch Interface
-
-1. FastAPI docs interface
-
-![](img/fastapi_docs_026.png)
-
-2. webui page
-
- Web UI dialog page:
-
-![img](img/LLM_success.png)
-
- Web UI knowledge base management page:
-
-![](img/init_knowledge_base.jpg)
-
-### Note
-
-The above instructions are provided for a quick start. If you need more features or want to customize the launch method,
-please refer to the [Wiki](https://github.com/chatchat-space/Langchain-Chatchat/wiki/).
-
---
-
-## Project Milestones
-
-+ `April 2023`: `Langchain-ChatGLM 0.1.0` released, supporting local knowledge base question and answer based on the
-  ChatGLM-6B model.
-+ `August 2023`: `Langchain-ChatGLM` was renamed to `Langchain-Chatchat`, `0.2.0` was released, using `fastchat` as the
-  model loading solution, supporting more models and databases.
-+ `October 2023`: `Langchain-Chachat 0.2.5` was released, Agent content was launched, and the open source project won
-  the third prize in the hackathon held by `Founder Park & Zhipu AI & Zilliz`.
-+ `December 2023`: `Langchain-Chachat` open source project received more than **20K** stars.
-+ `January 2024`: `LangChain 0.1.x` is launched, `Langchain-Chachat 0.2.x` is released. After the stable
-  version `0.2.10` is released, updates and technical support will be stopped, and all efforts will be made to
-  develop `Langchain with stronger applicability -Chat 0.3.x`.
-
-
-+ 🔥 Let’s look forward to the future Chatchat stories together···
-
---
-
-## Contact Us
-
-### Telegram
-
-[![Telegram](https://img.shields.io/badge/Telegram-2CA5E0?style=for-the-badge&logo=telegram&logoColor=white "langchain-chatglm")](https://t.me/+RjliQ3jnJ1YyN2E9)
-
-### WeChat Group
-
-<img src="img/qr_code_90.jpg" alt="二维码" width="300" height="300" />
-
-### WeChat Official Account
-
-<img src="img/official_wechat_mp_account.png" alt="图片" width="900" height="300" />
--- a/README_ja.md
+++ b/README_ja.md
@ -1,200 +0,0 @@
-![](img/logo-long-chatchat-trans-v2.png)
-
-🌍 [中文文档](README.md)
-🌍 [READ THIS IN ENGLISH](README_en.md)
-
-📃 **LangChain-Chatchat** (旧名 Langchain-ChatGLM)
-
-ChatGLM などの大規模な言語モデルや Langchain などのアプリケーション フレームワークに基づいた、オープン
-ソースのオフライン展開可能な検索拡張生成 (RAG) 大規模モデル ナレッジ ベース プロジェクトです。
-
-⚠️`0.2.10` は `0.2.x` シリーズの最終バージョンとなり、`0.2.x`
-シリーズはアップデートと技術サポートを終了し、より適用性の高い `Langchain-Chachat 0.3.x` の開発に努めます。 。
-`0.2.10` のその後のバグ修正は、バージョン更新なしで `master` ブランチに直接プッシュされます。
-
---
-
-## 目次
-
- [イントロ](README_ja.md#イントロ)
- [ペインポイントへの対応](README_ja.md#ペインポイントへの対応)
- [クイックスタート](README_ja.md#クイックスタート)
-    - [1. 環境セットアップ](README_ja.md#環境セットアップ)
-    - [2. モデルをダウンロード](README_ja.md#モデルをダウンロード)
-    - [3. ナレッジベースと設定ファイルの初期化](README_ja.md#ナレッジベースと設定ファイルの初期化)
-    - [4. ワンクリック起動](README_ja.md#ワンクリック起動)
-    - [5. 起動インターフェースの例](README_ja.md#起動インターフェースの例)
- [お問い合わせ](README_ja.md#お問い合わせ)
-
-## イントロ
-
-🤖️ [langchain](https://github.com/hwchase17/langchain) のアイデアを用いて実装された、ローカルナレッジベースに基づく Q&A
-アプリケーション。
-目標は、中国のシナリオとオープンソースモデルに親和性があり、オフラインとオンラインの両方で実行可能な KBQA（ナレッジベースの
-Q&A）ソリューションを構築することです。
-
-💡 [document.ai](https://github.com/GanymedeNil/document.ai)
-と [ChatGLM-6B Pull Request](https://github.com/THUDM/ChatGLM-6B/pull/216) に触発され、
-プロセス全体を通してオープンソースモデルまたはリモート LLM api を使用して実装することができるローカルナレッジベースの質問応答アプリケーションを構築します。
-このプロジェクトの最新バージョンでは、[FastChat](https://github.com/lm-sys/FastChat)
-を使用して、Vicuna、Alpaca、LLaMA、Koala、RWKV、その他多くのモデルにアクセスしています。
-このプロジェクトは [langchain](https://github.com/langchain-ai/langchain)
-に依存し、[FastAPI](https://github.com/tiangolo/fastapi) に基づいて提供されるAPIを通してサービスを呼び出したり、
-[Streamlit](https://github.com/streamlit/streamlit) に基づいて WebUI を使ったりすることをサポートしています。
-
-✅ オープンソースの LLM と Embedding モデルに依存して、このプロジェクトはフルプロセスの **オフラインプライベートデプロイメント
-** を実現することができます。
-同時に、本プロジェクトは OpenAI GPT API や Zhipu API の呼び出しにも対応しており、今後も様々な機種やリモート API
-へのアクセスを拡大していきます。
-
-⛓️ このプロジェクトの実施原則を下のグラフに示します。主なプロセスは以下の通りです:
-ファイルの読み込み -> テキストの読み込み -> テキストのセグメンテーション -> テキストのベクトル化 -> 質問のベクトル化 ->
-質問ベクトルと最も似ている `top-k` をテキストベクトルでマッチング ->
-マッチしたテキストをコンテキストと質問として `prompt` に追加 -> 回答を生成するために `LLM` に送信。
-
-📺[video introduction](https://www.bilibili.com/video/BV13M4y1e7cN/?share_source=copy_web&vd_source=e6c5aafe684f30fbe41925d61ca6d514)
-
-![实现原理图](img/langchain+chatglm.png)
-
-文書プロセスの側面からの主なプロセス分析:
-
-![实现原理图2](img/langchain+chatglm2.png)
-
-🚩 トレーニングやファインチューニングはプロジェクトには含まれないが、これらを行うことで必ずパフォーマンスを向上させることができます。
-
-🌐 [AutoDL イメージ](registry.cn-beijing.aliyuncs.com/chatchat/chatchat:0.2.5)がサポートされ、`0.2.10` では v0.2.10
-にアップデートされました。
-
-🐳 [Docker イメージ](isafetech/chatchat:0.2.10)
-
-🌲 今回のアップデートにより、DockerHub、阿里雲、騰訊のクラウドにも対応しました。より広範なクラウド環境で利用可能となりました。
-
-```shell
-docker run -d --gpus all -p 80:8501 isafetech/chatchat:0.2.10
-docker run -d --gpus all -p 80:8501 uswccr.ccs.tencentyun.com/chatchat/chatchat:0.2.10
-docker run -d --gpus all -p 80:8501 registry.cn-beijing.aliyuncs.com/chatchat/chatchat:0.2.10
-```
-
-## ペインポイントへの対応
-
-このプロジェクトは、完全にローカライズされた推論によってナレッジベースを強化するソリューションであり、特にデータセキュリティと企業向けのプライベートな展開の問題に取り組んでいます。
-このオープンソースソリューションは Apache ライセンスに基づき、無償で商用利用できます。
-私たちは、市場で入手可能な主流のローカル大予言モデルや Embedding モデル、オープンソースのローカルベクターデータベースをサポートしています。
-対応機種とデータベースの詳細については、[Wiki](https://github.com/chatchat-space/Langchain-Chatchat/wiki/) をご参照ください。
-
-## クイックスタート
-
-### 環境セットアップ
-
-まず、マシンにPython 3.10がインストールされていることを確認してください。
-
-```
-$ python --version
-Python 3.11.7
-```
-
-次に、仮想環境を作成し、プロジェクトの依存関係を仮想環境内にインストールする。
-
-```shell
-
-# リポジトリをクローン
-$ git clone https://github.com/chatchat-space/Langchain-Chatchat.git
-
-# ディレクトリに移動
-$ cd Langchain-Chatchat
-
-# すべての依存関係をインストール
-$ pip install -r requirements.txt
-$ pip install -r requirements_api.txt
-$ pip install -r requirements_webui.txt
-
-# デフォルトの依存関係には、基本的な実行環境(FAISS ベクターライブラリ)が含まれます。milvus/pg_vector などのベクターライブラリを使用する場合は、requirements.txt 内の対応する依存関係のコメントを解除してからインストールしてください。
-```
-
-LangChain-Chachat `0.2.x` シリーズは Langchain `0.0.x` シリーズ用です。Langchain `0.1.x` シリーズをお使いの場合は、ダウングレードする必要があります。
-
-### モデルをダウンロード
-
-このプロジェクトをローカルまたはオフライン環境で実行する必要がある場合は、まずプロジェクトに必要なモデルをダウンロードする必要があります。
-通常、オープンソースの LLM と Embedding モデルは Hugging Face からダウンロードできる。
-
-このプロジェクトで使用されているデフォルトの LLM
-モデルである [THUDM/chatglm2-6b](https://huggingface.co/THUDM/chatglm2-6b)と、Embedding
-モデル [moka-ai/m3e-base](https://huggingface.co/moka-ai/m3e-base) を例にとると、次のようになります:
-
-モデルをダウンロードするには、まず [Git LFS](https://docs.github.com/zh/repositories/working-with-files/managing-large-files/installing-git-large-file-storage)
-をインストールし、次のように実行する必要があります:
-
-```Shell
-$ git lfs install
-$ git clone https://huggingface.co/THUDM/chatglm2-6b
-$ git clone https://huggingface.co/moka-ai/m3e-base
-```
-
-### ナレッジベースと設定ファイルの初期化
-
-以下の手順に従って、ナレッジベースと設定ファイルを初期化してください:
-
-```shell
-$ python copy_config_example.py
-$ python init_database.py --recreate-vs
- ```
-
-### ワンクリック起動
-
-プロジェクトを開始するには、次のコマンドを実行します:
-
-```shell
-$ python startup.py -a
-```
-
-### 起動インターフェースの例
-
-1. FastAPI docs インターフェース
-
-![](img/fastapi_docs_026.png)
-
-2. webui ページ
-
- Web UI ダイアログページ:
-
-![img](img/LLM_success.png)
-
- Web UI ナレッジベースマネジメントページ:
-
-![](img/init_knowledge_base.jpg)
-
-### 注
-
-上記の手順はクイックスタートのために提供されています。より多くの機能が必要な場合や、起動方法をカスタマイズしたい場合は、[Wiki](https://github.com/chatchat-space/Langchain-Chatchat/wiki/)
-を参照してください。
-
---
-
-## プロジェクトのマイルストーン
-
-+ `2023 年 4 月`: `Langchain-ChatGLM 0.1.0` がリリースされ、ChatGLM-6B モデルに基づくローカル ナレッジ ベースの質問と回答がサポートされました。
-+ `2023 年 8 月`: `Langchain-ChatGLM` は `Langchain-Chatchat` に名前変更され、モデル読み込みソリューションとして `fastchat` を使用し、より多くのモデルとデータベースをサポートする `0.2.0` がリリースされました。
-+ `2023 年 10 月`: `Langchain-Chachat 0.2.5` リリース、エージェント コンテンツ、オープンソース プロジェクトを`Founder Park & Zhipu AI & Zilliz`で開始
-  開催したハッカソンでは3位に入賞しました。
-+ `2023 年 12 月`: `Langchain-Chachat`オープンソース プロジェクトは **20,000** つ以上のスターを獲得しました。
-+ `2024 年 1 月`: `LangChain 0.1.x` がリリースされ、`Langchain-Chachat 0.2.x` が安定版 `0.2.10` をリリースしました。
-  今後はアップデートと技術サポートを停止し、より適用性の高い`Langchain-Chachat 0.3.x`の開発に努める予定です。
-
-+ 🔥 これからのChatchatストーリーを一緒に楽しみにしましょう···
-
---
-
-## お問い合わせ
-
-### Telegram
-
-[![Telegram](https://img.shields.io/badge/Telegram-2CA5E0?style=for-the-badge&logo=telegram&logoColor=white "langchain-chatglm")](https://t.me/+RjliQ3jnJ1YyN2E9)
-
-### WeChat グループ
-
-<img src="img/qr_code_90.jpg" alt="二维码" width="300" height="300" />
-
-### WeChat 公式アカウント
-
-<img src="img/official_wechat_mp_account.png" alt="图片" width="900" height="300" />
--- a/chains/llmchain_with_history.py
+++ b/chains/llmchain_with_history.py
@ -1,12 +1,19 @@
-from server.utils import get_ChatOpenAI
-from configs.model_config import LLM_MODELS, TEMPERATURE
-from langchain.chains import LLMChain
+from langchain.chat_models import ChatOpenAI
+from configs.model_config import llm_model_dict, LLM_MODEL
+from langchain import LLMChain
 from langchain.prompts.chat import (
    ChatPromptTemplate,
    HumanMessagePromptTemplate,
 )

-model = get_ChatOpenAI(model_name=LLM_MODELS[0], temperature=TEMPERATURE)
+model = ChatOpenAI(
+    streaming=True,
+    verbose=True,
+    # callbacks=[callback],
+    openai_api_key=llm_model_dict[LLM_MODEL]["api_key"],
+    openai_api_base=llm_model_dict[LLM_MODEL]["api_base_url"],
+    model_name=LLM_MODEL
+)


 human_prompt = "{input}"
--- a/configs/init.py
+++ b/configs/init.py
@ -1,8 +1,4 @@
-from .basic_config import *
 from .model_config import *
-from .kb_config import *
 from .server_config import *
-from .prompt_config import *

-
-VERSION = "v0.2.10"
+VERSION = "v0.2.3"
--- a/configs/basic_config.py.example
+++ b/configs/basic_config.py.example
@ -1,32 +0,0 @@
-import logging
-import os
-import langchain
-import tempfile
-import shutil
-
-
-# 是否显示详细日志
-log_verbose = False
-langchain.verbose = False
-
-# 通常情况下不需要更改以下内容
-
-# 日志格式
-LOG_FORMAT = "%(asctime)s - %(filename)s[line:%(lineno)d] - %(levelname)s: %(message)s"
-logger = logging.getLogger()
-logger.setLevel(logging.INFO)
-logging.basicConfig(format=LOG_FORMAT)
-
-
-# 日志存储路径
-LOG_PATH = os.path.join(os.path.dirname(os.path.dirname(__file__)), "logs")
-if not os.path.exists(LOG_PATH):
-    os.mkdir(LOG_PATH)
-
-# 临时文件目录，主要用于文件对话
-BASE_TEMP_DIR = os.path.join(tempfile.gettempdir(), "chatchat")
-try:
-    shutil.rmtree(BASE_TEMP_DIR)
-except Exception:
-    pass
-os.makedirs(BASE_TEMP_DIR, exist_ok=True)
--- a/configs/kb_config.py.example
+++ b/configs/kb_config.py.example
@ -1,146 +0,0 @@
-import os
-
-# 默认使用的知识库
-DEFAULT_KNOWLEDGE_BASE = "samples"
-
-# 默认向量库/全文检索引擎类型。可选：faiss, milvus(离线) & zilliz(在线), pgvector, chromadb 全文检索引擎es
-DEFAULT_VS_TYPE = "faiss"
-
-# 缓存向量库数量（针对FAISS）
-CACHED_VS_NUM = 1
-
-# 缓存临时向量库数量（针对FAISS），用于文件对话
-CACHED_MEMO_VS_NUM = 10
-
-# 知识库中单段文本长度(不适用MarkdownHeaderTextSplitter)
-CHUNK_SIZE = 250
-
-# 知识库中相邻文本重合长度(不适用MarkdownHeaderTextSplitter)
-OVERLAP_SIZE = 50
-
-# 知识库匹配向量数量
-VECTOR_SEARCH_TOP_K = 3
-
-# 知识库匹配的距离阈值，一般取值范围在0-1之间，SCORE越小，距离越小从而相关度越高。
-# 但有用户报告遇到过匹配分值超过1的情况，为了兼容性默认设为1，在WEBUI中调整范围为0-2
-SCORE_THRESHOLD = 1.0
-
-# 默认搜索引擎。可选：bing, duckduckgo, metaphor
-DEFAULT_SEARCH_ENGINE = "duckduckgo"
-
-# 搜索引擎匹配结题数量
-SEARCH_ENGINE_TOP_K = 3
-
-
-# Bing 搜索必备变量
-# 使用 Bing 搜索需要使用 Bing Subscription Key,需要在azure port中申请试用bing search
-# 具体申请方式请见
-# https://learn.microsoft.com/en-us/bing/search-apis/bing-web-search/create-bing-search-service-resource
-# 使用python创建bing api 搜索实例详见:
-# https://learn.microsoft.com/en-us/bing/search-apis/bing-web-search/quickstarts/rest/python
-BING_SEARCH_URL = "https://api.bing.microsoft.com/v7.0/search"
-# 注意不是bing Webmaster Tools的api key，
-
-# 此外，如果是在服务器上，报Failed to establish a new connection: [Errno 110] Connection timed out
-# 是因为服务器加了防火墙，需要联系管理员加白名单，如果公司的服务器的话，就别想了GG
-BING_SUBSCRIPTION_KEY = ""
-
-# metaphor搜索需要KEY
-METAPHOR_API_KEY = ""
-
-# 心知天气 API KEY，用于天气Agent。申请：https://www.seniverse.com/
-SENIVERSE_API_KEY = ""
-
-# 是否开启中文标题加强，以及标题增强的相关配置
-# 通过增加标题判断，判断哪些文本为标题，并在metadata中进行标记；
-# 然后将文本与往上一级的标题进行拼合，实现文本信息的增强。
-ZH_TITLE_ENHANCE = False
-
-# PDF OCR 控制：只对宽高超过页面一定比例（图片宽/页面宽，图片高/页面高）的图片进行 OCR。
-# 这样可以避免 PDF 中一些小图片的干扰，提高非扫描版 PDF 处理速度
-PDF_OCR_THRESHOLD = (0.6, 0.6)
-
-# 每个知识库的初始化介绍，用于在初始化知识库时显示和Agent调用，没写则没有介绍，不会被Agent调用。
-KB_INFO = {
-    "知识库名称": "知识库介绍",
-    "samples": "关于本项目issue的解答",
-}
-
-
-# 通常情况下不需要更改以下内容
-
-# 知识库默认存储路径
-KB_ROOT_PATH = os.path.join(os.path.dirname(os.path.dirname(__file__)), "knowledge_base")
-if not os.path.exists(KB_ROOT_PATH):
-    os.mkdir(KB_ROOT_PATH)
-# 数据库默认存储路径。
-# 如果使用sqlite，可以直接修改DB_ROOT_PATH；如果使用其它数据库，请直接修改SQLALCHEMY_DATABASE_URI。
-DB_ROOT_PATH = os.path.join(KB_ROOT_PATH, "info.db")
-SQLALCHEMY_DATABASE_URI = f"sqlite:///{DB_ROOT_PATH}"
-
-# 可选向量库类型及对应配置
-kbs_config = {
-    "faiss": {
-    },
-    "milvus": {
-        "host": "127.0.0.1",
-        "port": "19530",
-        "user": "",
-        "password": "",
-        "secure": False,
-    },
-    "zilliz": {
-        "host": "in01-a7ce524e41e3935.ali-cn-hangzhou.vectordb.zilliz.com.cn",
-        "port": "19530",
-        "user": "",
-        "password": "",
-        "secure": True,
-        },
-    "pg": {
-        "connection_uri": "postgresql://postgres:postgres@127.0.0.1:5432/langchain_chatchat",
-    },
-
-    "es": {
-        "host": "127.0.0.1",
-        "port": "9200",
-        "index_name": "test_index",
-        "user": "",
-        "password": ""
-    },
-    "milvus_kwargs":{
-        "search_params":{"metric_type": "L2"}, #在此处增加search_params
-        "index_params":{"metric_type": "L2","index_type": "HNSW"} # 在此处增加index_params
-    },
-    "chromadb": {}
-}
-
-# TextSplitter配置项，如果你不明白其中的含义，就不要修改。
-text_splitter_dict = {
-    "ChineseRecursiveTextSplitter": {
-        "source": "huggingface",   # 选择tiktoken则使用openai的方法
-        "tokenizer_name_or_path": "",
-    },
-    "SpacyTextSplitter": {
-        "source": "huggingface",
-        "tokenizer_name_or_path": "gpt2",
-    },
-    "RecursiveCharacterTextSplitter": {
-        "source": "tiktoken",
-        "tokenizer_name_or_path": "cl100k_base",
-    },
-    "MarkdownHeaderTextSplitter": {
-        "headers_to_split_on":
-            [
-                ("#", "head1"),
-                ("##", "head2"),
-                ("###", "head3"),
-                ("####", "head4"),
-            ]
-    },
-}
-
-# TEXT_SPLITTER 名称
-TEXT_SPLITTER_NAME = "ChineseRecursiveTextSplitter"
-
-# Embedding模型定制词语的词表文件
-EMBEDDING_KEYWORD_FILE = "embedding_keywords.txt"
--- a/configs/model_config.py.example
+++ b/configs/model_config.py.example
@ -1,327 +1,177 @@
 import os
+import logging
+# 日志格式
+LOG_FORMAT = "%(asctime)s - %(filename)s[line:%(lineno)d] - %(levelname)s: %(message)s"
+logger = logging.getLogger()
+logger.setLevel(logging.INFO)
+logging.basicConfig(format=LOG_FORMAT)

-# 可以指定一个绝对路径，统一存放所有的Embedding和LLM模型。
-# 每个模型可以是一个单独的目录，也可以是某个目录下的二级子目录。
-# 如果模型目录名称和 MODEL_PATH 中的 key 或 value 相同，程序会自动检测加载，无需修改 MODEL_PATH 中的路径。
-MODEL_ROOT_PATH = ""
+
+# 在以下字典中修改属性值，以指定本地embedding模型存储位置
+# 如将 "text2vec": "GanymedeNil/text2vec-large-chinese" 修改为 "text2vec": "User/Downloads/text2vec-large-chinese"
+# 此处请写绝对路径
+embedding_model_dict = {
+    "ernie-tiny": "nghuyong/ernie-3.0-nano-zh",
+    "ernie-base": "nghuyong/ernie-3.0-base-zh",
+    "text2vec-base": "shibing624/text2vec-base-chinese",
+    "text2vec": "GanymedeNil/text2vec-large-chinese",
+    "text2vec-paraphrase": "shibing624/text2vec-base-chinese-paraphrase",
+    "text2vec-sentence": "shibing624/text2vec-base-chinese-sentence",
+    "text2vec-multilingual": "shibing624/text2vec-base-multilingual",
+    "m3e-small": "moka-ai/m3e-small",
+    "m3e-base": "moka-ai/m3e-base",
+    "m3e-large": "moka-ai/m3e-large",
+    "bge-small-zh": "BAAI/bge-small-zh",
+    "bge-base-zh": "BAAI/bge-base-zh",
+    "bge-large-zh": "BAAI/bge-large-zh",
+    "bge-large-zh-noinstruct": "BAAI/bge-large-zh-noinstruct",
+    "text-embedding-ada-002": os.environ.get("OPENAI_API_KEY")
+}

 # 选用的 Embedding 名称
-EMBEDDING_MODEL = "bge-large-zh-v1.5"
+EMBEDDING_MODEL = "m3e-base"

-# Embedding 模型运行设备。设为 "auto" 会自动检测(会有警告)，也可手动设定为 "cuda","mps","cpu","xpu" 其中之一。
+# Embedding 模型运行设备。设为"auto"会自动检测，也可手动设定为"cuda","mps","cpu"其中之一。
 EMBEDDING_DEVICE = "auto"

-# 选用的reranker模型
-RERANKER_MODEL = "bge-reranker-large"
-# 是否启用reranker模型
-USE_RERANKER = False
-RERANKER_MAX_LENGTH = 1024
+llm_model_dict = {
+    "chatglm-6b": {
+        "local_model_path": "THUDM/chatglm-6b",
+        "api_base_url": "http://localhost:8888/v1",  # "name"修改为fastchat服务中的"api_base_url"
+        "api_key": "EMPTY"
+    },

-# 如果需要在 EMBEDDING_MODEL 中增加自定义的关键字时配置
-EMBEDDING_KEYWORD_FILE = "keywords.txt"
-EMBEDDING_MODEL_OUTPUT_PATH = "output"
+    "chatglm2-6b": {
+        "local_model_path": "THUDM/chatglm2-6b",
+        "api_base_url": "http://localhost:8888/v1",  # URL需要与运行fastchat服务端的server_config.FSCHAT_OPENAI_API一致
+        "api_key": "EMPTY"
+    },

-# 要运行的 LLM 名称，可以包括本地模型和在线模型。列表中本地模型将在启动项目时全部加载。
-# 列表中第一个模型将作为 API 和 WEBUI 的默认模型。
-# 在这里，我们使用目前主流的两个离线模型，其中，chatglm3-6b 为默认加载模型。
-# 如果你的显存不足，可使用 Qwen-1_8B-Chat, 该模型 FP16 仅需 3.8G显存。
+    "chatglm2-6b-32k": {
+        "local_model_path": "THUDM/chatglm2-6b-32k",  # "THUDM/chatglm2-6b-32k",
+        "api_base_url": "http://localhost:8888/v1",  # "URL需要与运行fastchat服务端的server_config.FSCHAT_OPENAI_API一致
+        "api_key": "EMPTY"
+    },

-LLM_MODELS = ["chatglm3-6b", "zhipu-api", "openai-api"]
-Agent_MODEL = None
+    # 调用chatgpt时如果报出： urllib3.exceptions.MaxRetryError: HTTPSConnectionPool(host='api.openai.com', port=443):
+    #  Max retries exceeded with url: /v1/chat/completions
+    # 则需要将urllib3版本修改为1.25.11
+    # 如果依然报urllib3.exceptions.MaxRetryError: HTTPSConnectionPool，则将https改为http
+    # 参考https://zhuanlan.zhihu.com/p/350015032

-# LLM 模型运行设备。设为"auto"会自动检测(会有警告)，也可手动设定为 "cuda","mps","cpu","xpu" 其中之一。
-LLM_DEVICE = "auto"
+    # 如果报出：raise NewConnectionError(
+    # urllib3.exceptions.NewConnectionError: <urllib3.connection.HTTPSConnection object at 0x000001FE4BDB85E0>:
+    # Failed to establish a new connection: [WinError 10060]
+    # 则是因为内地和香港的IP都被OPENAI封了，需要切换为日本、新加坡等地

+    # 如果出现WARNING: Retrying langchain.chat_models.openai.acompletion_with_retry.<locals>._completion_with_retry in
+    # 4.0 seconds as it raised APIConnectionError: Error communicating with OpenAI.
+    # 需要添加代理访问(正常开的代理软件可能会拦截不上)需要设置配置openai_proxy 或者 使用环境遍历OPENAI_PROXY 进行设置
+    # 比如: "openai_proxy": 'http://127.0.0.1:4780'
+    "gpt-3.5-turbo": {
+        "api_base_url": "https://api.openai.com/v1",
+        "api_key": os.environ.get("OPENAI_API_KEY"),
+        "openai_proxy": os.environ.get("OPENAI_PROXY")
+    },
+    # 线上模型。当前支持智谱AI。
+    # 如果没有设置有效的local_model_path，则认为是在线模型API。
+    # 请在server_config中为每个在线API设置不同的端口
+    # 具体注册及api key获取请前往 http://open.bigmodel.cn
+    "chatglm-api": {
+        "api_base_url": "http://127.0.0.1:8888/v1",
+        "api_key": os.environ.get("ZHIPUAI_API_KEY"),
+        "provider": "ChatGLMWorker",
+        "version": "chatglm_pro",  # 可选包括 "chatglm_lite", "chatglm_std", "chatglm_pro"
+    },
+}
+
+# LLM 名称
+LLM_MODEL = "chatglm2-6b"
+
+# 历史对话轮数
 HISTORY_LEN = 3

-MAX_TOKENS = 2048
+# LLM 运行设备。设为"auto"会自动检测，也可手动设定为"cuda","mps","cpu"其中之一。
+LLM_DEVICE = "auto"

-TEMPERATURE = 0.7
+# 日志存储路径
+LOG_PATH = os.path.join(os.path.dirname(os.path.dirname(__file__)), "logs")
+if not os.path.exists(LOG_PATH):
+    os.mkdir(LOG_PATH)

-ONLINE_LLM_MODEL = {
-    "openai-api": {
-        "model_name": "gpt-4",
-        "api_base_url": "https://api.openai.com/v1",
-        "api_key": "",
-        "openai_proxy": "",
+# 知识库默认存储路径
+KB_ROOT_PATH = os.path.join(os.path.dirname(os.path.dirname(__file__)), "knowledge_base")
+
+# 数据库默认存储路径。
+# 如果使用sqlite，可以直接修改DB_ROOT_PATH；如果使用其它数据库，请直接修改SQLALCHEMY_DATABASE_URI。
+DB_ROOT_PATH = os.path.join(KB_ROOT_PATH, "info.db")
+SQLALCHEMY_DATABASE_URI = f"sqlite:///{DB_ROOT_PATH}"
+
+# 可选向量库类型及对应配置
+kbs_config = {
+    "faiss": {
    },
-
-    # 智谱AI API,具体注册及api key获取请前往 http://open.bigmodel.cn
-    "zhipu-api": {
-        "api_key": "",
-        "version": "glm-4",
-        "provider": "ChatGLMWorker",
+    "milvus": {
+        "host": "127.0.0.1",
+        "port": "19530",
+        "user": "",
+        "password": "",
+        "secure": False,
    },
-
-    # 具体注册及api key获取请前往 https://api.minimax.chat/
-    "minimax-api": {
-        "group_id": "",
-        "api_key": "",
-        "is_pro": False,
-        "provider": "MiniMaxWorker",
-    },
-
-    # 具体注册及api key获取请前往 https://xinghuo.xfyun.cn/
-    "xinghuo-api": {
-        "APPID": "",
-        "APISecret": "",
-        "api_key": "",
-        "version": "v3.5", # 你使用的讯飞星火大模型版本，可选包括 "v3.5","v3.0", "v2.0", "v1.5"
-        "provider": "XingHuoWorker",
-    },
-
-    # 百度千帆 API，申请方式请参考 https://cloud.baidu.com/doc/WENXINWORKSHOP/s/4lilb2lpf
-    "qianfan-api": {
-        "version": "ERNIE-Bot",  # 注意大小写。当前支持 "ERNIE-Bot" 或 "ERNIE-Bot-turbo"， 更多的见官方文档。
-        "version_url": "",  # 也可以不填写version，直接填写在千帆申请模型发布的API地址
-        "api_key": "",
-        "secret_key": "",
-        "provider": "QianFanWorker",
-    },
-
-    # 火山方舟 API，文档参考 https://www.volcengine.com/docs/82379
-    "fangzhou-api": {
-        "version": "", # 对应火山方舟的 endpoint_id
-        "version_url": "",
-        "api_key": "",
-        "secret_key": "",
-        "provider": "FangZhouWorker",
-    },
-
-    # 阿里云通义千问 API，文档参考 https://help.aliyun.com/zh/dashscope/developer-reference/api-details
-    "qwen-api": {
-        "version": "qwen-max",
-        "api_key": "",
-        "provider": "QwenWorker",
-        "embed_model": "text-embedding-v1"  # embedding 模型名称
-    },
-
-    # 百川 API，申请方式请参考 https://www.baichuan-ai.com/home#api-enter
-    "baichuan-api": {
-        "version": "Baichuan2-53B",
-        "api_key": "",
-        "secret_key": "",
-        "provider": "BaiChuanWorker",
-    },
-
-    # Azure API
-    "azure-api": {
-        "deployment_name": "",  # 部署容器的名字
-        "resource_name": "",  # https://{resource_name}.openai.azure.com/openai/ 填写resource_name的部分，其他部分不要填写
-        "api_version": "",  # API的版本，不是模型版本
-        "api_key": "",
-        "provider": "AzureWorker",
-    },
-
-    # 昆仑万维天工 API https://model-platform.tiangong.cn/
-    "tiangong-api": {
-        "version": "SkyChat-MegaVerse",
-        "api_key": "",
-        "secret_key": "",
-        "provider": "TianGongWorker",
-    },
-
-    # Gemini API https://makersuite.google.com/app/apikey
-    "gemini-api": {
-        "api_key": "",
-        "provider": "GeminiWorker",
-    },
-
-    # Claude API : https://www.anthropic.com/api
-    # Available models: 
-    # Claude 3 Opus:    claude-3-opus-20240229 
-    # Claude 3 Sonnet	claude-3-sonnet-20240229
-    # Claude 3 Haiku	claude-3-haiku-20240307
-    "claude-api": {
-        "api_key": "",
-        "version": "2023-06-01",
-        "model_name":"claude-3-opus-20240229", 
-        "provider": "ClaudeWorker",
-    }
-
-}
-
-# 在以下字典中修改属性值，以指定本地embedding模型存储位置。支持3种设置方法：
-# 1、将对应的值修改为模型绝对路径
-# 2、不修改此处的值（以 text2vec 为例）：
-#       2.1 如果{MODEL_ROOT_PATH}下存在如下任一子目录：
-#           - text2vec
-#           - GanymedeNil/text2vec-large-chinese
-#           - text2vec-large-chinese
-#       2.2 如果以上本地路径不存在，则使用huggingface模型
-
-MODEL_PATH = {
-    "embed_model": {
-        "ernie-tiny": "nghuyong/ernie-3.0-nano-zh",
-        "ernie-base": "nghuyong/ernie-3.0-base-zh",
-        "text2vec-base": "shibing624/text2vec-base-chinese",
-        "text2vec": "GanymedeNil/text2vec-large-chinese",
-        "text2vec-paraphrase": "shibing624/text2vec-base-chinese-paraphrase",
-        "text2vec-sentence": "shibing624/text2vec-base-chinese-sentence",
-        "text2vec-multilingual": "shibing624/text2vec-base-multilingual",
-        "text2vec-bge-large-chinese": "shibing624/text2vec-bge-large-chinese",
-        "m3e-small": "moka-ai/m3e-small",
-        "m3e-base": "moka-ai/m3e-base",
-        "m3e-large": "moka-ai/m3e-large",
-
-        "bge-small-zh": "BAAI/bge-small-zh",
-        "bge-base-zh": "BAAI/bge-base-zh",
-        "bge-large-zh": "BAAI/bge-large-zh",
-        "bge-large-zh-noinstruct": "BAAI/bge-large-zh-noinstruct",
-        "bge-base-zh-v1.5": "BAAI/bge-base-zh-v1.5",
-        "bge-large-zh-v1.5": "BAAI/bge-large-zh-v1.5",
-
-        "bge-m3": "BAAI/bge-m3",
-
-        "piccolo-base-zh": "sensenova/piccolo-base-zh",
-        "piccolo-large-zh": "sensenova/piccolo-large-zh",
-        "nlp_gte_sentence-embedding_chinese-large": "damo/nlp_gte_sentence-embedding_chinese-large",
-        "text-embedding-ada-002": "your OPENAI_API_KEY",
-    },
-
-    "llm_model": {
-        "chatglm2-6b": "THUDM/chatglm2-6b",
-        "chatglm2-6b-32k": "THUDM/chatglm2-6b-32k",
-        "chatglm3-6b": "THUDM/chatglm3-6b",
-        "chatglm3-6b-32k": "THUDM/chatglm3-6b-32k",
-
-        "Orion-14B-Chat": "OrionStarAI/Orion-14B-Chat",
-        "Orion-14B-Chat-Plugin": "OrionStarAI/Orion-14B-Chat-Plugin",
-        "Orion-14B-LongChat": "OrionStarAI/Orion-14B-LongChat",
-
-        "Llama-2-7b-chat-hf": "meta-llama/Llama-2-7b-chat-hf",
-        "Llama-2-13b-chat-hf": "meta-llama/Llama-2-13b-chat-hf",
-        "Llama-2-70b-chat-hf": "meta-llama/Llama-2-70b-chat-hf",
-
-        "Qwen-1_8B-Chat": "Qwen/Qwen-1_8B-Chat",
-        "Qwen-7B-Chat": "Qwen/Qwen-7B-Chat",
-        "Qwen-14B-Chat": "Qwen/Qwen-14B-Chat",
-        "Qwen-72B-Chat": "Qwen/Qwen-72B-Chat",
-
-        # Qwen1.5 模型 VLLM可能出现问题
-        "Qwen1.5-0.5B-Chat": "Qwen/Qwen1.5-0.5B-Chat",
-        "Qwen1.5-1.8B-Chat": "Qwen/Qwen1.5-1.8B-Chat",
-        "Qwen1.5-4B-Chat": "Qwen/Qwen1.5-4B-Chat",
-        "Qwen1.5-7B-Chat": "Qwen/Qwen1.5-7B-Chat",
-        "Qwen1.5-14B-Chat": "Qwen/Qwen1.5-14B-Chat",
-        "Qwen1.5-72B-Chat": "Qwen/Qwen1.5-72B-Chat",
-
-        "baichuan-7b-chat": "baichuan-inc/Baichuan-7B-Chat",
-        "baichuan-13b-chat": "baichuan-inc/Baichuan-13B-Chat",
-        "baichuan2-7b-chat": "baichuan-inc/Baichuan2-7B-Chat",
-        "baichuan2-13b-chat": "baichuan-inc/Baichuan2-13B-Chat",
-
-        "internlm-7b": "internlm/internlm-7b",
-        "internlm-chat-7b": "internlm/internlm-chat-7b",
-        "internlm2-chat-7b": "internlm/internlm2-chat-7b",
-        "internlm2-chat-20b": "internlm/internlm2-chat-20b",
-
-        "BlueLM-7B-Chat": "vivo-ai/BlueLM-7B-Chat",
-        "BlueLM-7B-Chat-32k": "vivo-ai/BlueLM-7B-Chat-32k",
-
-        "Yi-34B-Chat": "https://huggingface.co/01-ai/Yi-34B-Chat",
-
-        "agentlm-7b": "THUDM/agentlm-7b",
-        "agentlm-13b": "THUDM/agentlm-13b",
-        "agentlm-70b": "THUDM/agentlm-70b",
-
-        "falcon-7b": "tiiuae/falcon-7b",
-        "falcon-40b": "tiiuae/falcon-40b",
-        "falcon-rw-7b": "tiiuae/falcon-rw-7b",
-
-        "aquila-7b": "BAAI/Aquila-7B",
-        "aquilachat-7b": "BAAI/AquilaChat-7B",
-        "open_llama_13b": "openlm-research/open_llama_13b",
-        "vicuna-13b-v1.5": "lmsys/vicuna-13b-v1.5",
-        "koala": "young-geng/koala",
-        "mpt-7b": "mosaicml/mpt-7b",
-        "mpt-7b-storywriter": "mosaicml/mpt-7b-storywriter",
-        "mpt-30b": "mosaicml/mpt-30b",
-        "opt-66b": "facebook/opt-66b",
-        "opt-iml-max-30b": "facebook/opt-iml-max-30b",
-        "gpt2": "gpt2",
-        "gpt2-xl": "gpt2-xl",
-        "gpt-j-6b": "EleutherAI/gpt-j-6b",
-        "gpt4all-j": "nomic-ai/gpt4all-j",
-        "gpt-neox-20b": "EleutherAI/gpt-neox-20b",
-        "pythia-12b": "EleutherAI/pythia-12b",
-        "oasst-sft-4-pythia-12b-epoch-3.5": "OpenAssistant/oasst-sft-4-pythia-12b-epoch-3.5",
-        "dolly-v2-12b": "databricks/dolly-v2-12b",
-        "stablelm-tuned-alpha-7b": "stabilityai/stablelm-tuned-alpha-7b",
-    },
-
-    "reranker": {
-        "bge-reranker-large": "BAAI/bge-reranker-large",
-        "bge-reranker-base": "BAAI/bge-reranker-base",
+    "pg": {
+        "connection_uri": "postgresql://postgres:postgres@127.0.0.1:5432/langchain_chatchat",
    }
 }

-# 通常情况下不需要更改以下内容
+# 默认向量库类型。可选：faiss, milvus, pg.
+DEFAULT_VS_TYPE = "faiss"
+
+# 缓存向量库数量
+CACHED_VS_NUM = 1
+
+# 知识库中单段文本长度
+CHUNK_SIZE = 250
+
+# 知识库中相邻文本重合长度
+OVERLAP_SIZE = 50
+
+# 知识库匹配向量数量
+VECTOR_SEARCH_TOP_K = 5
+
+# 知识库匹配相关度阈值，取值范围在0-1之间，SCORE越小，相关度越高，取到1相当于不筛选，建议设置在0.5左右
+SCORE_THRESHOLD = 1
+
+# 搜索引擎匹配结题数量
+SEARCH_ENGINE_TOP_K = 5

 # nltk 模型存储路径
 NLTK_DATA_PATH = os.path.join(os.path.dirname(os.path.dirname(__file__)), "nltk_data")

-# 使用VLLM可能导致模型推理能力下降，无法完成Agent任务
-VLLM_MODEL_DICT = {
-    "chatglm2-6b": "THUDM/chatglm2-6b",
-    "chatglm2-6b-32k": "THUDM/chatglm2-6b-32k",
-    "chatglm3-6b": "THUDM/chatglm3-6b",
-    "chatglm3-6b-32k": "THUDM/chatglm3-6b-32k",
+# 基于本地知识问答的提示词模版（使用Jinja2语法，简单点就是用双大括号代替f-string的单大括号
+PROMPT_TEMPLATE = """<指令>根据已知信息，简洁和专业的来回答问题。如果无法从中得到答案，请说 “根据已知信息无法回答该问题”，不允许在答案中添加编造成分，答案请使用中文。 </指令>

-    "Llama-2-7b-chat-hf": "meta-llama/Llama-2-7b-chat-hf",
-    "Llama-2-13b-chat-hf": "meta-llama/Llama-2-13b-chat-hf",
-    "Llama-2-70b-chat-hf": "meta-llama/Llama-2-70b-chat-hf",
+<已知信息>{{ context }}</已知信息>

-    "Qwen-1_8B-Chat": "Qwen/Qwen-1_8B-Chat",
-    "Qwen-7B-Chat": "Qwen/Qwen-7B-Chat",
-    "Qwen-14B-Chat": "Qwen/Qwen-14B-Chat",
-    "Qwen-72B-Chat": "Qwen/Qwen-72B-Chat",
+<问题>{{ question }}</问题>"""

-    "baichuan-7b-chat": "baichuan-inc/Baichuan-7B-Chat",
-    "baichuan-13b-chat": "baichuan-inc/Baichuan-13B-Chat",
-    "baichuan2-7b-chat": "baichuan-inc/Baichuan-7B-Chat",
-    "baichuan2-13b-chat": "baichuan-inc/Baichuan-13B-Chat",
+# API 是否开启跨域，默认为False，如果需要开启，请设置为True
+# is open cross domain
+OPEN_CROSS_DOMAIN = False

-    "BlueLM-7B-Chat": "vivo-ai/BlueLM-7B-Chat",
-    "BlueLM-7B-Chat-32k": "vivo-ai/BlueLM-7B-Chat-32k",
+# Bing 搜索必备变量
+# 使用 Bing 搜索需要使用 Bing Subscription Key,需要在azure port中申请试用bing search
+# 具体申请方式请见
+# https://learn.microsoft.com/en-us/bing/search-apis/bing-web-search/create-bing-search-service-resource
+# 使用python创建bing api 搜索实例详见:
+# https://learn.microsoft.com/en-us/bing/search-apis/bing-web-search/quickstarts/rest/python
+BING_SEARCH_URL = "https://api.bing.microsoft.com/v7.0/search"
+# 注意不是bing Webmaster Tools的api key，

-    "internlm-7b": "internlm/internlm-7b",
-    "internlm-chat-7b": "internlm/internlm-chat-7b",
-    "internlm2-chat-7b": "internlm/Models/internlm2-chat-7b",
-    "internlm2-chat-20b": "internlm/Models/internlm2-chat-20b",
+# 此外，如果是在服务器上，报Failed to establish a new connection: [Errno 110] Connection timed out
+# 是因为服务器加了防火墙，需要联系管理员加白名单，如果公司的服务器的话，就别想了GG
+BING_SUBSCRIPTION_KEY = ""

-    "aquila-7b": "BAAI/Aquila-7B",
-    "aquilachat-7b": "BAAI/AquilaChat-7B",
-
-    "falcon-7b": "tiiuae/falcon-7b",
-    "falcon-40b": "tiiuae/falcon-40b",
-    "falcon-rw-7b": "tiiuae/falcon-rw-7b",
-    "gpt2": "gpt2",
-    "gpt2-xl": "gpt2-xl",
-    "gpt-j-6b": "EleutherAI/gpt-j-6b",
-    "gpt4all-j": "nomic-ai/gpt4all-j",
-    "gpt-neox-20b": "EleutherAI/gpt-neox-20b",
-    "pythia-12b": "EleutherAI/pythia-12b",
-    "oasst-sft-4-pythia-12b-epoch-3.5": "OpenAssistant/oasst-sft-4-pythia-12b-epoch-3.5",
-    "dolly-v2-12b": "databricks/dolly-v2-12b",
-    "stablelm-tuned-alpha-7b": "stabilityai/stablelm-tuned-alpha-7b",
-    "open_llama_13b": "openlm-research/open_llama_13b",
-    "vicuna-13b-v1.3": "lmsys/vicuna-13b-v1.3",
-    "koala": "young-geng/koala",
-    "mpt-7b": "mosaicml/mpt-7b",
-    "mpt-7b-storywriter": "mosaicml/mpt-7b-storywriter",
-    "mpt-30b": "mosaicml/mpt-30b",
-    "opt-66b": "facebook/opt-66b",
-    "opt-iml-max-30b": "facebook/opt-iml-max-30b",
-
-}
-
-SUPPORT_AGENT_MODEL = [
-    "openai-api",  # GPT4 模型
-    "qwen-api",  # Qwen Max模型
-    "zhipu-api",  # 智谱AI GLM4模型
-    "Qwen",  # 所有Qwen系列本地模型
-    "chatglm3-6b",
-    "internlm2-chat-20b",
-    "Orion-14B-Chat-Plugin",
-]
+# 是否开启中文标题加强，以及标题增强的相关配置
+# 通过增加标题判断，判断哪些文本为标题，并在metadata中进行标记；
+# 然后将文本与往上一级的标题进行拼合，实现文本信息的增强。
+ZH_TITLE_ENHANCE = False
--- a/configs/prompt_config.py.example
+++ b/configs/prompt_config.py.example
@ -1,127 +0,0 @@
-# prompt模板使用Jinja2语法，简单点就是用双大括号代替f-string的单大括号
-# 本配置文件支持热加载，修改prompt模板后无需重启服务。
-
-# LLM对话支持的变量：
-#   - input: 用户输入内容
-
-# 知识库和搜索引擎对话支持的变量：
-#   - context: 从检索结果拼接的知识文本
-#   - question: 用户提出的问题
-
-# Agent对话支持的变量：
-
-#   - tools: 可用的工具列表
-#   - tool_names: 可用的工具名称列表
-#   - history: 用户和Agent的对话历史
-#   - input: 用户输入内容
-#   - agent_scratchpad: Agent的思维记录
-
-PROMPT_TEMPLATES = {
-    "llm_chat": {
-        "default":
-            '{{ input }}',
-
-        "with_history":
-            'The following is a friendly conversation between a human and an AI. '
-            'The AI is talkative and provides lots of specific details from its context. '
-            'If the AI does not know the answer to a question, it truthfully says it does not know.\n\n'
-            'Current conversation:\n'
-            '{history}\n'
-            'Human: {input}\n'
-            'AI:',
-
-        "py":
-            '你是一个聪明的代码助手，请你给我写出简单的py代码。 \n'
-            '{{ input }}',
-    },
-
-
-    "knowledge_base_chat": {
-        "default":
-            '<指令>根据已知信息，简洁和专业的来回答问题。如果无法从中得到答案，请说 “根据已知信息无法回答该问题”，'
-            '不允许在答案中添加编造成分，答案请使用中文。 </指令>\n'
-            '<已知信息>{{ context }}</已知信息>\n'
-            '<问题>{{ question }}</问题>\n',
-
-        "text":
-            '<指令>根据已知信息，简洁和专业的来回答问题。如果无法从中得到答案，请说 “根据已知信息无法回答该问题”，答案请使用中文。 </指令>\n'
-            '<已知信息>{{ context }}</已知信息>\n'
-            '<问题>{{ question }}</问题>\n',
-
-        "empty":  # 搜不到知识库的时候使用
-            '请你回答我的问题:\n'
-            '{{ question }}\n\n',
-    },
-
-
-    "search_engine_chat": {
-        "default":
-            '<指令>这是我搜索到的互联网信息，请你根据这些信息进行提取并有调理，简洁的回答问题。'
-            '如果无法从中得到答案，请说 “无法搜索到能回答问题的内容”。 </指令>\n'
-            '<已知信息>{{ context }}</已知信息>\n'
-            '<问题>{{ question }}</问题>\n',
-
-        "search":
-            '<指令>根据已知信息，简洁和专业的来回答问题。如果无法从中得到答案，请说 “根据已知信息无法回答该问题”，答案请使用中文。 </指令>\n'
-            '<已知信息>{{ context }}</已知信息>\n'
-            '<问题>{{ question }}</问题>\n',
-    },
-
-
-    "agent_chat": {
-        "default":
-            'Answer the following questions as best you can. If it is in order, you can use some tools appropriately. '
-            'You have access to the following tools:\n\n'
-            '{tools}\n\n'
-            'Use the following format:\n'
-            'Question: the input question you must answer1\n'
-            'Thought: you should always think about what to do and what tools to use.\n'
-            'Action: the action to take, should be one of [{tool_names}]\n'
-            'Action Input: the input to the action\n'
-            'Observation: the result of the action\n'
-            '... (this Thought/Action/Action Input/Observation can be repeated zero or more times)\n'
-            'Thought: I now know the final answer\n'
-            'Final Answer: the final answer to the original input question\n'
-            'Begin!\n\n'
-            'history: {history}\n\n'
-            'Question: {input}\n\n'
-            'Thought: {agent_scratchpad}\n',
-
-        "ChatGLM3":
-            'You can answer using the tools, or answer directly using your knowledge without using the tools. '
-            'Respond to the human as helpfully and accurately as possible.\n'
-            'You have access to the following tools:\n'
-            '{tools}\n'
-            'Use a json blob to specify a tool by providing an action key (tool name) '
-            'and an action_input key (tool input).\n'
-            'Valid "action" values: "Final Answer" or  [{tool_names}]'
-            'Provide only ONE action per $JSON_BLOB, as shown:\n\n'
-            '```\n'
-            '{{{{\n'
-            '  "action": $TOOL_NAME,\n'
-            '  "action_input": $INPUT\n'
-            '}}}}\n'
-            '```\n\n'
-            'Follow this format:\n\n'
-            'Question: input question to answer\n'
-            'Thought: consider previous and subsequent steps\n'
-            'Action:\n'
-            '```\n'
-            '$JSON_BLOB\n'
-            '```\n'
-            'Observation: action result\n'
-            '... (repeat Thought/Action/Observation N times)\n'
-            'Thought: I know what to respond\n'
-            'Action:\n'
-            '```\n'
-            '{{{{\n'
-            '  "action": "Final Answer",\n'
-            '  "action_input": "Final response to human"\n'
-            '}}}}\n'
-            'Begin! Reminder to ALWAYS respond with a valid json blob of a single action. Use tools if necessary. '
-            'Respond directly if appropriate. Format is Action:```$JSON_BLOB```then Observation:.\n'
-            'history: {history}\n\n'
-            'Question: {input}\n\n'
-            'Thought: {agent_scratchpad}',
-    }
-}
--- a/configs/server_config.py.example
+++ b/configs/server_config.py.example
@ -1,5 +1,5 @@
-import sys
-from configs.model_config import LLM_DEVICE
+from .model_config import LLM_MODEL, llm_model_dict, LLM_DEVICE
+import httpx

 # httpx 请求默认超时时间（秒）。如果加载模型或对话较慢，出现超时错误，可以适当加大该值。
 HTTPX_DEFAULT_TIMEOUT = 300.0
@ -8,8 +8,8 @@ HTTPX_DEFAULT_TIMEOUT = 300.0
 # is open cross domain
 OPEN_CROSS_DOMAIN = False

-# 各服务器默认绑定host。如改为"0.0.0.0"需要修改下方所有XX_SERVER的host
-DEFAULT_BIND_HOST = "0.0.0.0" if sys.platform != "win32" else "127.0.0.1"
+# 各服务器默认绑定host
+DEFAULT_BIND_HOST = "127.0.0.1"

 # webui.py server
 WEBUI_SERVER = {
@ -26,28 +26,25 @@ API_SERVER = {
 # fastchat openai_api server
 FSCHAT_OPENAI_API = {
    "host": DEFAULT_BIND_HOST,
-    "port": 20000,
+    "port": 8888,  # model_config.llm_model_dict中模型配置的api_base_url需要与这里一致。
 }

 # fastchat model_worker server
-# 这些模型必须是在model_config.MODEL_PATH或ONLINE_MODEL中正确配置的。
-# 在启动startup.py时，可用通过`--model-name xxxx yyyy`指定模型，不指定则为LLM_MODELS
+# 这些模型必须是在model_config.llm_model_dict中正确配置的。
+# 在启动startup.py时，可用通过`--model-worker --model-name xxxx`指定模型，不指定则为LLM_MODEL
 FSCHAT_MODEL_WORKERS = {
-    # 所有模型共用的默认配置，可在模型专项配置中进行覆盖。
+    # 所有模型共用的默认配置，可在模型专项配置或llm_model_dict中进行覆盖。
    "default": {
        "host": DEFAULT_BIND_HOST,
        "port": 20002,
        "device": LLM_DEVICE,
-        # False,'vllm',使用的推理加速框架,使用vllm如果出现HuggingFace通信问题，参见doc/FAQ
-        # vllm对一些模型支持还不成熟，暂时默认关闭
-        "infer_turbo": False,

-        # model_worker多卡加载需要配置的参数
-        # "gpus": None, # 使用的GPU，以str的格式指定，如"0,1"，如失效请使用CUDA_VISIBLE_DEVICES="0,1"等形式指定
+        # 多卡加载需要配置的参数
+        # "gpus": None, # 使用的GPU，以str的格式指定，如"0,1"
        # "num_gpus": 1, # 使用GPU的数量
        # "max_gpu_memory": "20GiB", # 每个GPU占用的最大显存

-        # 以下为model_worker非常用参数，可根据需要配置
+        # 以下为非常用参数，可根据需要配置
        # "load_8bit": False, # 开启8bit量化
        # "cpu_offloading": None,
        # "gptq_ckpt": None,
@ -57,81 +54,26 @@ FSCHAT_MODEL_WORKERS = {
        # "awq_ckpt": None,
        # "awq_wbits": 16,
        # "awq_groupsize": -1,
-        # "model_names": LLM_MODELS,
+        # "model_names": [LLM_MODEL],
        # "conv_template": None,
        # "limit_worker_concurrency": 5,
        # "stream_interval": 2,
        # "no_register": False,
-        # "embed_in_truncate": False,
-
-        # 以下为vllm_worker配置参数,注意使用vllm必须有gpu，仅在Linux测试通过
-
-        # tokenizer = model_path # 如果tokenizer与model_path不一致在此处添加
-        # 'tokenizer_mode':'auto',
-        # 'trust_remote_code':True,
-        # 'download_dir':None,
-        # 'load_format':'auto',
-        # 'dtype':'auto',
-        # 'seed':0,
-        # 'worker_use_ray':False,
-        # 'pipeline_parallel_size':1,
-        # 'tensor_parallel_size':1,
-        # 'block_size':16,
-        # 'swap_space':4 , # GiB
-        # 'gpu_memory_utilization':0.90,
-        # 'max_num_batched_tokens':2560,
-        # 'max_num_seqs':256,
-        # 'disable_log_stats':False,
-        # 'conv_template':None,
-        # 'limit_worker_concurrency':5,
-        # 'no_register':False,
-        # 'num_gpus': 1
-        # 'engine_use_ray': False,
-        # 'disable_log_requests': False
-
    },
-    "chatglm3-6b": {
-        "device": "cuda",
+    "baichuan-7b": { # 使用default中的IP和端口
+        "device": "cpu",
    },
-    "Qwen1.5-0.5B-Chat": {
-        "device": "cuda",
-    },
-    # 以下配置可以不用修改，在model_config中设置启动的模型
-    "zhipu-api": {
-        "port": 21001,
-    },
-    "minimax-api": {
-        "port": 21002,
-    },
-    "xinghuo-api": {
-        "port": 21003,
-    },
-    "qianfan-api": {
-        "port": 21004,
-    },
-    "fangzhou-api": {
-        "port": 21005,
-    },
-    "qwen-api": {
-        "port": 21006,
-    },
-    "baichuan-api": {
-        "port": 21007,
-    },
-    "azure-api": {
-        "port": 21008,
-    },
-    "tiangong-api": {
-        "port": 21009,
-    },
-    "gemini-api": {
-        "port": 21010,
-    },
-    "claude-api": {
-        "port": 21011,
+    "chatglm-api": { # 请为每个在线API设置不同的端口
+        "port": 20003,
    },
 }

+# fastchat multi model worker server
+FSCHAT_MULTI_MODEL_WORKERS = {
+    # TODO:
+}
+
+# fastchat controller server
 FSCHAT_CONTROLLER = {
    "host": DEFAULT_BIND_HOST,
    "port": 20001,
--- a/copy_config_example.py
+++ b/copy_config_example.py
@ -1,12 +0,0 @@
-# 用于批量将configs下的.example文件复制并命名为.py文件
-import os
-import shutil
-
-if __name__ == "__main__":
-    files = os.listdir("configs")
-
-    src_files = [os.path.join("configs", file) for file in files if ".example" in file]
-
-    for src_file in src_files:
-        tar_file = src_file.replace(".example", "")
-        shutil.copy(src_file, tar_file)
--- a/docs/CHANGELOG.md
+++ b/docs/CHANGELOG.md
@ -0,0 +1,32 @@
+## 变更日志
+
+**[2023/04/15]**
+
+1. 重构项目结构，在根目录下保留命令行 Demo [cli_demo.py](../cli_demo.py) 和 Web UI Demo [webui.py](../webui.py)；
+2. 对 Web UI 进行改进，修改为运行 Web UI 后首先按照 [configs/model_config.py](../configs/model_config.py) 默认选项加载模型，并增加报错提示信息等；
+3. 对常见问题进行补充说明。
+
+**[2023/04/12]**
+
+1. 替换 Web UI 中的样例文件，避免出现 Ubuntu 中出现因文件编码无法读取的问题；
+2. 替换`knowledge_based_chatglm.py`中的 prompt 模版，避免出现因 prompt 模版包含中英双语导致 chatglm 返回内容错乱的问题。
+
+**[2023/04/11]** 
+
+1. 加入 Web UI V0.1 版本（感谢 [@liangtongt](https://github.com/liangtongt)）；
+2. `README.md`中增加常见问题（感谢 [@calcitem](https://github.com/calcitem) 和 [@bolongliu](https://github.com/bolongliu)）；
+3. 增加 LLM 和 Embedding 模型运行设备是否可用`cuda`、`mps`、`cpu`的自动判断。
+4. 在`knowledge_based_chatglm.py`中增加对`filepath`的判断，在之前支持单个文件导入的基础上，现支持单个文件夹路径作为输入，输入后将会遍历文件夹中各个文件，并在命令行中显示每个文件是否成功加载。
+
+**[2023/04/09]**
+
+1. 使用`langchain`中的`RetrievalQA`替代之前选用的`ChatVectorDBChain`，替换后可以有效减少提问 2-3 次后因显存不足而停止运行的问题；
+2. 在`knowledge_based_chatglm.py`中增加`EMBEDDING_MODEL`、`VECTOR_SEARCH_TOP_K`、`LLM_MODEL`、`LLM_HISTORY_LEN`、`REPLY_WITH_SOURCE`参数值设置；
+3. 增加 GPU 显存需求更小的`chatglm-6b-int4`、`chatglm-6b-int4-qe`作为 LLM 模型备选项；
+4. 更正`README.md`中的代码错误（感谢 [@calcitem](https://github.com/calcitem)）。
+
+**[2023/04/07]** 
+
+1. 解决加载 ChatGLM 模型时发生显存占用为双倍的问题 (感谢 [@suc16](https://github.com/suc16) 和 [@myml](https://github.com/myml)) ；
+2. 新增清理显存机制；
+3. 新增`nghuyong/ernie-3.0-nano-zh`和`nghuyong/ernie-3.0-base-zh`作为 Embedding 模型备选项，相比`GanymedeNil/text2vec-large-chinese`占用显存资源更少 (感谢 [@lastrei](https://github.com/lastrei))。
--- a/docs/ES部署指南.md
+++ b/docs/ES部署指南.md
@ -1,29 +0,0 @@
-
-# 实现基于ES的数据插入、检索、删除、更新
-```shell
-author: 唐国梁Tommy
-e-mail: flytang186@qq.com
-
-如果遇到任何问题，可以与我联系，我这边部署后服务是没有问题的。
-```
-
-## 第1步：ES docker部署
-```shell
-docker network create elastic
-docker run -id --name elasticsearch --net elastic -p 9200:9200 -p 9300:9300 -e "discovery.type=single-node" -e "xpack.security.enabled=false" -e "xpack.security.http.ssl.enabled=false" -t docker.elastic.co/elasticsearch/elasticsearch:8.8.2
-```
-
-### 第2步：Kibana docker部署
-**注意：Kibana版本与ES保持一致**
-```shell
-docker pull docker.elastic.co/kibana/kibana:{version} 
-docker run --name kibana --net elastic -p 5601:5601 docker.elastic.co/kibana/kibana:{version}
-```
-
-### 第3步：核心代码
-```shell
-1. 核心代码路径
-server/knowledge_base/kb_service/es_kb_service.py
-
-2. 需要在 configs/model_config.py 中 配置 ES参数（IP， PORT）等；
-```
--- a/docs/FAQ.md
+++ b/docs/FAQ.md
@ -0,0 +1,185 @@
+### 常见问题
+
+Q1: 本项目支持哪些文件格式？
+
+A1: 目前已测试支持 txt、docx、md、pdf 格式文件，更多文件格式请参考 [langchain 文档](https://python.langchain.com/en/latest/modules/indexes/document_loaders/examples/unstructured_file.html)。目前已知文档中若含有特殊字符，可能存在文件无法加载的问题。
+
+---
+
+Q2: 使用过程中 Python 包 `nltk`发生了 `Resource punkt not found.`报错，该如何解决？
+
+A2: 方法一：https://github.com/nltk/nltk_data/raw/gh-pages/packages/tokenizers/punkt.zip 中的 `packages/tokenizers` 解压，放到  `nltk_data/tokenizers` 存储路径下。
+
+`nltk_data` 存储路径可以通过 `nltk.data.path` 查询。
+
+方法二：执行python代码
+
+```
+import nltk
+nltk.download()
+```
+
+---
+
+Q3: 使用过程中 Python 包 `nltk`发生了 `Resource averaged_perceptron_tagger not found.`报错，该如何解决？
+
+A3: 方法一：将 https://github.com/nltk/nltk_data/blob/gh-pages/packages/taggers/averaged_perceptron_tagger.zip 下载，解压放到 `nltk_data/taggers` 存储路径下。
+
+`nltk_data` 存储路径可以通过 `nltk.data.path` 查询。
+
+方法二：执行python代码
+
+```
+import nltk
+nltk.download()
+```
+
+---
+
+Q4: 本项目可否在 colab 中运行？
+
+A4: 可以尝试使用 chatglm-6b-int4 模型在 colab 中运行，需要注意的是，如需在 colab 中运行 Web UI，需将 `webui.py`中 `demo.queue(concurrency_count=3).launch( server_name='0.0.0.0', share=False, inbrowser=False)`中参数 `share`设置为 `True`。
+
+---
+
+Q5: 在 Anaconda 中使用 pip 安装包无效如何解决？
+
+A5: 此问题是系统环境问题，详细见  [在Anaconda中使用pip安装包无效问题](在Anaconda中使用pip安装包无效问题.md)
+
+---
+
+Q6: 本项目中所需模型如何下载至本地？
+
+A6: 本项目中使用的模型均为 `huggingface.com`中可下载的开源模型，以默认选择的 `chatglm-6b`和 `text2vec-large-chinese`模型为例，下载模型可执行如下代码：
+
+```shell
+# 安装 git lfs
+$ git lfs install
+
+# 下载 LLM 模型
+$ git clone https://huggingface.co/THUDM/chatglm-6b /your_path/chatglm-6b
+
+# 下载 Embedding 模型
+$ git clone https://huggingface.co/GanymedeNil/text2vec-large-chinese /your_path/text2vec
+
+# 模型需要更新时，可打开模型所在文件夹后拉取最新模型文件/代码
+$ git pull
+```
+
+---
+
+Q7: `huggingface.com`中模型下载速度较慢怎么办？
+
+A7: 可使用本项目用到的模型权重文件百度网盘地址：
+
+- ernie-3.0-base-zh.zip 链接: https://pan.baidu.com/s/1CIvKnD3qzE-orFouA8qvNQ?pwd=4wih
+- ernie-3.0-nano-zh.zip 链接: https://pan.baidu.com/s/1Fh8fgzVdavf5P1omAJJ-Zw?pwd=q6s5
+- text2vec-large-chinese.zip 链接: https://pan.baidu.com/s/1sMyPzBIXdEzHygftEoyBuA?pwd=4xs7
+- chatglm-6b-int4-qe.zip 链接: https://pan.baidu.com/s/1DDKMOMHtNZccOOBGWIOYww?pwd=22ji
+- chatglm-6b-int4.zip 链接: https://pan.baidu.com/s/1pvZ6pMzovjhkA6uPcRLuJA?pwd=3gjd
+- chatglm-6b.zip 链接: https://pan.baidu.com/s/1B-MpsVVs1GHhteVBetaquw?pwd=djay
+
+---
+
+Q8: 下载完模型后，如何修改代码以执行本地模型？
+
+A8: 模型下载完成后，请在 [configs/model_config.py](../configs/model_config.py) 文件中，对 `embedding_model_dict`和 `llm_model_dict`参数进行修改，如把 `llm_model_dict`从
+
+```python
+embedding_model_dict = {
+    "ernie-tiny": "nghuyong/ernie-3.0-nano-zh",
+    "ernie-base": "nghuyong/ernie-3.0-base-zh",
+    "text2vec": "GanymedeNil/text2vec-large-chinese"
+}
+```
+
+修改为
+
+```python
+embedding_model_dict = {
+                        "ernie-tiny": "nghuyong/ernie-3.0-nano-zh",
+                        "ernie-base": "nghuyong/ernie-3.0-base-zh",
+                        "text2vec": "/Users/liuqian/Downloads/ChatGLM-6B/text2vec-large-chinese"
+}
+```
+
+---
+
+Q9: 执行 `python cli_demo.py`过程中，显卡内存爆了，提示 "OutOfMemoryError: CUDA out of memory"
+
+A9: 将 `VECTOR_SEARCH_TOP_K` 和 `LLM_HISTORY_LEN` 的值调低，比如 `VECTOR_SEARCH_TOP_K = 5` 和 `LLM_HISTORY_LEN = 2`，这样由 `query` 和 `context` 拼接得到的 `prompt` 会变短，会减少内存的占用。或者打开量化，请在 [configs/model_config.py](../configs/model_config.py) 文件中，对`LOAD_IN_8BIT`参数进行修改
+
+---
+
+Q10: 执行 `pip install -r requirements.txt` 过程中遇到 python 包，如 langchain 找不到对应版本的问题
+
+A10: 更换 pypi 源后重新安装，如阿里源、清华源等，网络条件允许时建议直接使用 pypi.org 源，具体操作命令如下：
+
+```shell
+# 使用 pypi 源
+$ pip install -r requirements.txt -i https://pypi.python.org/simple
+```
+
+或
+
+```shell
+# 使用阿里源
+$ pip install -r requirements.txt -i http://mirrors.aliyun.com/pypi/simple/
+```
+
+或
+
+```shell
+# 使用清华源
+$ pip install -r requirements.txt -i https://pypi.tuna.tsinghua.edu.cn/simple/
+```
+
+---
+
+Q11: 启动 api.py 时 upload_file 接口抛出 `partially initialized module 'charset_normalizer' has no attribute 'md__mypyc' (most likely due to a circular import)`
+
+A11: 这是由于 charset_normalizer 模块版本过高导致的，需要降低低 charset_normalizer 的版本,测试在 charset_normalizer==2.1.0 上可用。
+
+---
+
+Q12: 调用api中的 `bing_search_chat` 接口时，报出 `Failed to establish a new connection: [Errno 110] Connection timed out`
+
+A12: 这是因为服务器加了防火墙，需要联系管理员加白名单，如果公司的服务器的话，就别想了GG--!
+
+---
+
+Q13: 加载 chatglm-6b-int8 或 chatglm-6b-int4 抛出 `RuntimeError: Only Tensors of floating point andcomplex dtype can require gradients`
+
+A13: 疑为 chatglm 的 quantization 的问题或 torch 版本差异问题，针对已经变为 Parameter 的 torch.zeros 矩阵也执行 Parameter 操作，从而抛出 `RuntimeError: Only Tensors of floating point andcomplex dtype can require gradients`。解决办法是在 chatglm 项目的原始文件中的 quantization.py 文件 374 行改为：
+
+```
+    try:
+        self.weight =Parameter(self.weight.to(kwargs["device"]), requires_grad=False)
+    except Exception as e:
+        pass
+```
+
+    如果上述方式不起作用，则在.cache/hugggingface/modules/目录下针对chatglm项目的原始文件中的quantization.py文件执行上述操作，若软链接不止一个，按照错误提示选择正确的路径。
+
+注：虽然模型可以顺利加载但在cpu上仍存在推理失败的可能：即针对每个问题，模型一直输出gugugugu。
+
+    因此，最好不要试图用cpu加载量化模型，原因可能是目前python主流量化包的量化操作是在gpu上执行的,会天然地存在gap。
+
+---
+
+Q14: 修改配置中路径后，加载 text2vec-large-chinese 依然提示 `WARNING: No sentence-transformers model found with name text2vec-large-chinese. Creating a new one with MEAN pooling.`
+
+A14: 尝试更换 embedding，如 text2vec-base-chinese，请在 [configs/model_config.py](../configs/model_config.py) 文件中，修改 `text2vec-base`参数为本地路径，绝对路径或者相对路径均可
+
+
+---
+
+Q15: 使用pg向量库建表报错
+
+A15: 需要手动安装对应的vector扩展(连接pg执行 CREATE EXTENSION IF NOT EXISTS vector)
+
+---
+
+Q16: pymilvus 连接超时
+
+A16.pymilvus版本需要匹配和milvus对应否则会超时参考pymilvus==2.1.3
--- a/docs/INSTALL.md
+++ b/docs/INSTALL.md
@ -0,0 +1,63 @@
+# 安装
+
+## 环境检查
+
+```shell
+# 首先，确信你的机器安装了 Python 3.8 - 3.10 版本
+$ python --version
+Python 3.8.13
+
+# 如果低于这个版本，可使用conda安装环境
+$ conda create -p /your_path/env_name python=3.8
+
+# 激活环境
+$ source activate /your_path/env_name
+
+# 或，conda安装，不指定路径, 注意以下，都将/your_path/env_name替换为env_name
+$ conda create -n env_name python=3.8
+$ conda activate env_name # Activate the environment
+
+# 更新py库
+$ pip3 install --upgrade pip
+
+# 关闭环境
+$ source deactivate /your_path/env_name
+
+# 删除环境
+$ conda env remove -p  /your_path/env_name
+```
+
+## 项目依赖
+
+```shell
+# 拉取仓库
+$ git clone https://github.com/chatchat-space/Langchain-Chatchat.git
+
+# 进入目录
+$ cd Langchain-Chatchat
+
+# 安装全部依赖
+$ pip install -r requirements.txt
+
+# 默认依赖包括基本运行环境（FAISS向量库）。如果要使用 milvus/pg_vector 等向量库，请将 requirements.txt 中相应依赖取消注释再安装。
+```
+
+此外，为方便用户 API 与 webui 分离运行，可单独根据运行需求安装依赖包。
+
+- 如果只需运行 API，可执行：
+    ```shell
+    $ pip install -r requirements_api.txt
+    
+    # 默认依赖包括基本运行环境（FAISS向量库）。如果要使用 milvus/pg_vector 等向量库，请将 requirements.txt 中相应依赖取消注释再安装。
+    ```
+
+- 如果只需运行 WebUI，可执行：
+    ```shell
+    $ pip install -r requirements_webui.txt
+    ```
+
+
+
+注：使用 `langchain.document_loaders.UnstructuredFileLoader` 进行 `.docx` 等格式非结构化文件接入时，可能需要依据文档进行其他依赖包的安装，请参考 [langchain 文档](https://python.langchain.com/en/latest/modules/indexes/document_loaders/examples/unstructured_file.html)。
+
+
--- a/docs/Issue-with-Installing-Packages-Using-pip-in-Anaconda.md
+++ b/docs/Issue-with-Installing-Packages-Using-pip-in-Anaconda.md
@ -0,0 +1,114 @@
+## Issue with Installing Packages Using pip in Anaconda
+
+## Problem
+
+Recently, when running open-source code, I encountered an issue: after creating a virtual environment with conda and switching to the new environment, using pip to install packages would be "ineffective." Here, "ineffective" means that the packages installed with pip are not in this new environment.
+
+------
+
+## Analysis
+
+1. First, create a test environment called test: `conda create -n test`
+2. Activate the test environment: `conda activate test`
+3. Use pip to install numpy: `pip install numpy`. You'll find that numpy already exists in the default environment.
+
+```powershell
+Looking in indexes: https://pypi.tuna.tsinghua.edu.cn/simple
+Requirement already satisfied: numpy in c:\programdata\anaconda3\lib\site-packages (1.20.3)
+```
+
+4. Check the information of pip: `pip show pip`
+
+```powershell
+Name: pip
+Version: 21.2.4
+Summary: The PyPA recommended tool for installing Python packages.
+Home-page: https://pip.pypa.io/
+Author: The pip developers
+Author-email: distutils-sig@python.org
+License: MIT
+Location: c:\programdata\anaconda3\lib\site-packages
+Requires:
+Required-by:
+```
+
+5. We can see that the current pip is in the default conda environment. This explains why the package is not in the new virtual environment when we directly use pip to install packages - because the pip being used belongs to the default environment, the installed package either already exists or is installed directly into the default environment.
+
+------
+
+## Solution
+
+1. We can directly use the conda command to install new packages, but sometimes conda may not have certain packages/libraries, so we still need to use pip to install.
+2. We can first use the conda command to install the pip package for the current virtual environment, and then use pip to install new packages.
+
+```powershell
+# Use conda to install the pip package
+(test) PS C:\Users\Administrator> conda install pip
+Collecting package metadata (current_repodata.json): done
+Solving environment: done
+....
+done
+
+# Display the information of the current pip, and find that pip is in the test environment
+(test) PS C:\Users\Administrator> pip show pip
+Name: pip
+Version: 21.2.4
+Summary: The PyPA recommended tool for installing Python packages.
+Home-page: https://pip.pypa.io/
+Author: The pip developers
+Author-email: distutils-sig@python.org
+License: MIT
+Location: c:\programdata\anaconda3\envs\test\lib\site-packages
+Requires:
+Required-by:
+
+# Now use pip to install the numpy package, and it is installed successfully
+(test) PS C:\Users\Administrator> pip install numpy
+Looking in indexes: 
+https://pypi.tuna.tsinghua.edu.cn/simple
+Collecting numpy
+  Using cached https://pypi.tuna.tsinghua.edu.cn/packages/4b/23/140ec5a509d992fe39db17200e96c00fd29603c1531ce633ef93dbad5e9e/numpy-1.22.2-cp39-cp39-win_amd64.whl (14.7 MB)
+Installing collected packages: numpy
+Successfully installed numpy-1.22.2
+
+# Use pip list to view the currently installed packages, no problem
+(test) PS C:\Users\Administrator> pip list
+Package      Version
+------------ ---------
+certifi      2021.10.8
+numpy        1.22.2
+pip          21.2.4
+setuptools   58.0.4
+wheel        0.37.1
+wincertstore 0.2
+```
+
+## Supplement
+
+1. The reason I didn't notice this problem before might be because the packages installed in the virtual environment were of a specific version, which overwrote the packages in the default environment. The main issue was actually a lack of careful observation:), otherwise, I could have noticed `Successfully uninstalled numpy-xxx` **default version** and `Successfully installed numpy-1.20.3` **specified version**.
+2. During testing, I found that if the Python version is specified when creating a new package, there shouldn't be this issue. I guess this is because pip will be installed in the virtual environment, while in our case, including pip, no packages were installed, so the default environment's pip was used.
+3. There's a question: I should have specified the Python version when creating a new virtual environment before, but I still used the default environment's pip package. However, I just couldn't reproduce the issue successfully on two different machines, which led to the second point mentioned above.
+4. After encountering the problem mentioned in point 3, I solved it by using `python -m pip install package-name`, adding `python -m` before pip. As for why, you can refer to the answer on [StackOverflow](https://stackoverflow.com/questions/41060382/using-pip-to-install-packages-to-anaconda-environment):
+
+>1. If you have a non-conda pip as your default pip but conda python as your default python (as below):
+>
+>```shell
+>>which -a pip
+>/home/<user>/.local/bin/pip   
+>/home/<user>/.conda/envs/newenv/bin/pip
+>/usr/bin/pip
+>
+>>which -a python
+>/home/<user>/.conda/envs/newenv/bin/python
+>/usr/bin/python
+>```
+>
+>2. Then, instead of calling `pip install <package>` directly, you can use the module flag -m in python so that it installs with the anaconda python
+>
+>```shell
+>python -m pip install <package>
+>```
+>
+>3. This will install the package to the anaconda library directory rather than the library directory associated with the (non-anaconda) pip
+>4. The reason for doing this is as follows: the pip command references a specific pip file/shortcut (which -a pip will tell you which one). Similarly, the python command references a specific python file (which -a python will tell you which one). For one reason or another, these two commands can become out of sync, so your "default" pip is in a different folder than your default python and therefore is associated with different versions of python.
+>5. In contrast, the python -m pip construct does not use the shortcut that the pip command points to. Instead, it asks python to find its pip version and use that version to install a package.
--- a/docs/docker/vector_db/milvus/docker-compose.yml
+++ b/docs/docker/vector_db/milvus/docker-compose.yml
@ -0,0 +1,49 @@
+version: '3.5'
+
+services:
+  etcd:
+    container_name: milvus-etcd
+    image: quay.io/coreos/etcd:v3.5.0
+    environment:
+      - ETCD_AUTO_COMPACTION_MODE=revision
+      - ETCD_AUTO_COMPACTION_RETENTION=1000
+      - ETCD_QUOTA_BACKEND_BYTES=4294967296
+      - ETCD_SNAPSHOT_COUNT=50000
+    volumes:
+      - ${DOCKER_VOLUME_DIRECTORY:-.}/volumes/etcd:/etcd
+    command: etcd -advertise-client-urls=http://127.0.0.1:2379 -listen-client-urls http://0.0.0.0:2379 --data-dir /etcd
+
+  minio:
+    container_name: milvus-minio
+    image: minio/minio:RELEASE.2022-03-17T06-34-49Z
+    environment:
+      MINIO_ACCESS_KEY: minioadmin
+      MINIO_SECRET_KEY: minioadmin
+    volumes:
+      - ${DOCKER_VOLUME_DIRECTORY:-.}/volumes/minio:/minio_data
+    command: minio server /minio_data
+    healthcheck:
+      test: ["CMD", "curl", "-f", "http://localhost:9000/minio/health/live"]
+      interval: 30s
+      timeout: 20s
+      retries: 3
+
+  standalone:
+    container_name: milvus-standalone
+    image: milvusdb/milvus:v2.1.3
+    command: ["milvus", "run", "standalone"]
+    environment:
+      ETCD_ENDPOINTS: etcd:2379
+      MINIO_ADDRESS: minio:9000
+    volumes:
+      - ${DOCKER_VOLUME_DIRECTORY:-.}/volumes/milvus:/var/lib/milvus
+    ports:
+      - "19530:19530"
+      - "9091:9091"
+    depends_on:
+      - "etcd"
+      - "minio"
+
+networks:
+  default:
+    name: milvus
--- a/docs/docker/vector_db/pg/docker-compose.yml
+++ b/docs/docker/vector_db/pg/docker-compose.yml
@ -0,0 +1,13 @@
+version: "3.8"
+services:
+ postgresql:
+  image: ankane/pgvector:v0.4.1
+  container_name: langchain_chatchat-pg-db
+  environment:
+      POSTGRES_DB: langchain_chatchat
+      POSTGRES_USER: postgres
+      POSTGRES_PASSWORD: postgres
+  ports:
+    - 5432:5432
+  volumes:
+    - ./data:/var/lib/postgresql/data
--- a/docs/向量库环境docker.md
+++ b/docs/向量库环境docker.md
@ -0,0 +1,8 @@
+向量库环境 docker-compose.yml 文件在 docs/docker/vector_db 中
+
+以 milvus 为例
+```shell
+cd docs/docker/vector_db/milvus
+docker-compose up -d
+```
+
--- a/docs/启动API服务.md
+++ b/docs/启动API服务.md
@ -0,0 +1,37 @@
+# 启动API服务
+
+## 通过py文件启动
+可以通过直接执行`api.py`文件启动API服务，默认以ip:0.0.0.0和port:7861启动http和ws服务。
+```shell
+python api.py
+```
+同时，启动时支持StartOption所列的模型加载参数，同时还支持IP和端口设置。
+```shell
+python api.py --model-name chatglm-6b-int8 --port 7862 
+```
+
+## 通过cli.bat/cli.sh启动
+也可以通过命令行控制文件继续启动。
+```shell
+cli.sh api --help
+```
+其他可设置参数和上述py文件启动方式相同。
+
+
+# 以https、wss启动API服务
+## 本地创建ssl相关证书文件
+如果没有正式签发的CA证书，可以[安装mkcert](https://github.com/FiloSottile/mkcert#installation)工具， 然后用如下指令生成本地CA证书：
+```shell
+mkcert -install
+mkcert api.example.com 47.123.123.123 localhost 127.0.0.1 ::1
+```
+默认回车保存在当前目录下，会有以生成指令第一个域名命名为前缀命名的两个pem文件。
+
+附带两个文件参数启动即可。
+````shell
+python api --port 7862 --ssl_keyfile api.example.com+4-key.pem --ssl_certfile api.example.com+4.pem
+
+./cli.sh api --port 7862 --ssl_keyfile api.example.com+4-key.pem --ssl_certfile api.example.com+4.pem
+````
+
+此外可以通过前置Nginx转发实现类似效果，可另行查阅相关资料。
--- a/docs/在Anaconda中使用pip安装包无效问题.md
+++ b/docs/在Anaconda中使用pip安装包无效问题.md
@ -0,0 +1,125 @@
+##  在 Anaconda 中使用 pip 安装包无效问题
+
+##  问题
+
+最近在跑开源代码的时候遇到的问题：使用 conda 创建虚拟环境并切换到新的虚拟环境后，再使用 pip 来安装包会“无效”。这里的“无效”指的是使用 pip 安装的包不在这个新的环境中。
+
+------
+
+## 分析
+
+1、首先创建一个测试环境 test，`conda create -n test`
+
+2、激活该测试环境，`conda activate test`
+
+3、使用 pip 安装 numpy，`pip install numpy`，会发现 numpy 已经存在默认的环境中
+
+```powershell
+Looking in indexes: https://pypi.tuna.tsinghua.edu.cn/simple
+Requirement already satisfied: numpy in c:\programdata\anaconda3\lib\site-packages (1.20.3)
+```
+
+4、这时候看一下 pip 的信息，`pip show pip`
+
+```powershell
+Name: pip
+Version: 21.2.4
+Summary: The PyPA recommended tool for installing Python packages.
+Home-page: https://pip.pypa.io/
+Author: The pip developers
+Author-email: distutils-sig@python.org
+License: MIT
+Location: c:\programdata\anaconda3\lib\site-packages
+Requires:
+Required-by:
+```
+
+5、可以发现当前 pip 是在默认的 conda 环境中。这也就解释了当我们直接使用 pip 安装包时为什么包不在这个新的虚拟环境中，因为使用的 pip 属于默认环境，安装的包要么已经存在，要么直接装到默认环境中去了。
+
+------
+
+## 解决
+
+1、我们可以直接使用 conda 命令安装新的包，但有些时候 conda 可能没有某些包/库，所以还是得用 pip 安装
+
+2、我们可以先使用 conda 命令为当前虚拟环境安装 pip 包，再使用 pip 安装新的包
+
+```powershell
+# 使用 conda 安装 pip 包
+(test) PS C:\Users\Administrator> conda install pip
+Collecting package metadata (current_repodata.json): done
+Solving environment: done
+....
+done
+
+# 显示当前 pip 的信息，发现 pip 在测试环境 test 中
+(test) PS C:\Users\Administrator> pip show pip
+Name: pip
+Version: 21.2.4
+Summary: The PyPA recommended tool for installing Python packages.
+Home-page: https://pip.pypa.io/
+Author: The pip developers
+Author-email: distutils-sig@python.org
+License: MIT
+Location: c:\programdata\anaconda3\envs\test\lib\site-packages
+Requires:
+Required-by:
+
+# 再使用 pip 安装 numpy 包，成功安装
+(test) PS C:\Users\Administrator> pip install numpy
+Looking in indexes: https://pypi.tuna.tsinghua.edu.cn/simple
+Collecting numpy
+  Using cached https://pypi.tuna.tsinghua.edu.cn/packages/4b/23/140ec5a509d992fe39db17200e96c00fd29603c1531ce633ef93dbad5e9e/numpy-1.22.2-cp39-cp39-win_amd64.whl (14.7 MB)
+Installing collected packages: numpy
+Successfully installed numpy-1.22.2
+
+# 使用 pip list 查看当前安装的包，没有问题
+(test) PS C:\Users\Administrator> pip list
+Package      Version
+------------ ---------
+certifi      2021.10.8
+numpy        1.22.2
+pip          21.2.4
+setuptools   58.0.4
+wheel        0.37.1
+wincertstore 0.2
+```
+
+------
+
+## 补充
+
+1、之前没有发现这个问题可能时因为在虚拟环境中安装的包是指定版本的，覆盖了默认环境中的包。其实主要还是观察不仔细：），不然可以发现 `Successfully uninstalled numpy-xxx`【默认版本】 以及 `Successfully installed numpy-1.20.3`【指定版本】
+
+2、测试时发现如果在新建包的时候指定了 python 版本的话应该是没有这个问题的，猜测时因为会在虚拟环境中安装好 pip ，而我们这里包括 pip 在内啥包也没有装，所以使用的是默认环境的 pip
+
+3、有个问题，之前我在创建新的虚拟环境时应该指定了 python 版本，但还是使用的默认环境的 pip 包，但是刚在在两台机器上都没有复现成功，于是有了上面的第 2 点
+
+4、出现了第 3 点的问题后，我当时是使用 `python -m pip install package-name` 解决的，在 pip 前面加上了 python -m。至于为什么，可以参考 [StackOverflow](https://stackoverflow.com/questions/41060382/using-pip-to-install-packages-to-anaconda-environment) 上的回答：
+
+> 1、如果你有一个非 conda 的 pip 作为你的默认 pip，但是 conda 的 python 是你的默认 python（如下）：
+>
+> ```shell
+> >which -a pip
+> /home/<user>/.local/bin/pip   
+> /home/<user>/.conda/envs/newenv/bin/pip
+> /usr/bin/pip
+> 
+> >which -a python
+> /home/<user>/.conda/envs/newenv/bin/python
+> /usr/bin/python
+> ```
+>
+> 2、然后，而不是直接调用 `pip install <package>`，你可以在 python 中使用模块标志 -m，以便它使用 anaconda python 进行安装
+>
+> ```shell
+>python -m pip install <package>
+> ```
+>
+> 3、这将把包安装到 anaconda 库目录，而不是与（非anaconda） pip 关联的库目录
+> 
+> 4、这样做的原因如下：命令 pip 引用了一个特定的 pip 文件 / 快捷方式（which -a pip 会告诉你是哪一个）。类似地，命令 python 引用一个特定的 python 文件（which -a python 会告诉你是哪个）。由于这样或那样的原因，这两个命令可能变得不同步，因此你的“默认” pip 与你的默认 python 位于不同的文件夹中，因此与不同版本的 python 相关联。
+>
+> 5、与此相反，python -m pip 构造不使用 pip 命令指向的快捷方式。相反，它要求 python 找到它的pip 版本，并使用该版本安装一个包。
+
+-   
--- a/document_loaders/FilteredCSVloader.py
+++ b/document_loaders/FilteredCSVloader.py
@ -1,84 +0,0 @@
-## 指定制定列的csv文件加载器
-
-from langchain.document_loaders import CSVLoader
-import csv
-from io import TextIOWrapper
-from typing import Dict, List, Optional
-from langchain.docstore.document import Document
-from langchain.document_loaders.helpers import detect_file_encodings
-
-
-class FilteredCSVLoader(CSVLoader):
-    def __init__(
-            self,
-            file_path: str,
-            columns_to_read: List[str],
-            source_column: Optional[str] = None,
-            metadata_columns: List[str] = [],
-            csv_args: Optional[Dict] = None,
-            encoding: Optional[str] = None,
-            autodetect_encoding: bool = False,
-    ):
-        super().__init__(
-            file_path=file_path,
-            source_column=source_column,
-            metadata_columns=metadata_columns,
-            csv_args=csv_args,
-            encoding=encoding,
-            autodetect_encoding=autodetect_encoding,
-        )
-        self.columns_to_read = columns_to_read
-
-    def load(self) -> List[Document]:
-        """Load data into document objects."""
-
-        docs = []
-        try:
-            with open(self.file_path, newline="", encoding=self.encoding) as csvfile:
-                docs = self.__read_file(csvfile)
-        except UnicodeDecodeError as e:
-            if self.autodetect_encoding:
-                detected_encodings = detect_file_encodings(self.file_path)
-                for encoding in detected_encodings:
-                    try:
-                        with open(
-                            self.file_path, newline="", encoding=encoding.encoding
-                        ) as csvfile:
-                            docs = self.__read_file(csvfile)
-                            break
-                    except UnicodeDecodeError:
-                        continue
-            else:
-                raise RuntimeError(f"Error loading {self.file_path}") from e
-        except Exception as e:
-            raise RuntimeError(f"Error loading {self.file_path}") from e
-
-        return docs
-
-    def __read_file(self, csvfile: TextIOWrapper) -> List[Document]:
-        docs = []
-        csv_reader = csv.DictReader(csvfile, **self.csv_args)  # type: ignore
-        for i, row in enumerate(csv_reader):
-            content = []
-            for col in self.columns_to_read:
-                if col in row:
-                    content.append(f'{col}:{str(row[col])}')
-                else:
-                    raise ValueError(f"Column '{self.columns_to_read[0]}' not found in CSV file.")
-            content = '\n'.join(content)
-            # Extract the source if available
-            source = (
-                row.get(self.source_column, None)
-                if self.source_column is not None
-                else self.file_path
-            )
-            metadata = {"source": source, "row": i}
-
-            for col in self.metadata_columns:
-                if col in row:
-                    metadata[col] = row[col]
-
-            doc = Document(page_content=content, metadata=metadata)
-            docs.append(doc)
-
-        return docs
--- a/document_loaders/init.py
+++ b/document_loaders/init.py
@ -1,4 +1,2 @@
 from .mypdfloader import RapidOCRPDFLoader
-from .myimgloader import RapidOCRLoader
-from .mydocloader import RapidOCRDocLoader
-from .mypptloader import RapidOCRPPTLoader
+from .myimgloader import RapidOCRLoader
--- a/document_loaders/mydocloader.py
+++ b/document_loaders/mydocloader.py
@ -1,71 +0,0 @@
-from langchain.document_loaders.unstructured import UnstructuredFileLoader
-from typing import List
-import tqdm
-
-
-class RapidOCRDocLoader(UnstructuredFileLoader):
-    def _get_elements(self) -> List:
-        def doc2text(filepath):
-            from docx.table import _Cell, Table
-            from docx.oxml.table import CT_Tbl
-            from docx.oxml.text.paragraph import CT_P
-            from docx.text.paragraph import Paragraph
-            from docx import Document, ImagePart
-            from PIL import Image
-            from io import BytesIO
-            import numpy as np
-            from rapidocr_onnxruntime import RapidOCR
-            ocr = RapidOCR()
-            doc = Document(filepath)
-            resp = ""
-
-            def iter_block_items(parent):
-                from docx.document import Document
-                if isinstance(parent, Document):
-                    parent_elm = parent.element.body
-                elif isinstance(parent, _Cell):
-                    parent_elm = parent._tc
-                else:
-                    raise ValueError("RapidOCRDocLoader parse fail")
-
-                for child in parent_elm.iterchildren():
-                    if isinstance(child, CT_P):
-                        yield Paragraph(child, parent)
-                    elif isinstance(child, CT_Tbl):
-                        yield Table(child, parent)
-
-            b_unit = tqdm.tqdm(total=len(doc.paragraphs)+len(doc.tables),
-                               desc="RapidOCRDocLoader block index: 0")
-            for i, block in enumerate(iter_block_items(doc)):
-                b_unit.set_description(
-                    "RapidOCRDocLoader  block index: {}".format(i))
-                b_unit.refresh()
-                if isinstance(block, Paragraph):
-                    resp += block.text.strip() + "\n"
-                    images = block._element.xpath('.//pic:pic')  # 获取所有图片
-                    for image in images:
-                        for img_id in image.xpath('.//a:blip/@r:embed'):  # 获取图片id
-                            part = doc.part.related_parts[img_id]  # 根据图片id获取对应的图片
-                            if isinstance(part, ImagePart):
-                                image = Image.open(BytesIO(part._blob))
-                                result, _ = ocr(np.array(image))
-                                if result:
-                                    ocr_result = [line[1] for line in result]
-                                    resp += "\n".join(ocr_result)
-                elif isinstance(block, Table):
-                    for row in block.rows:
-                        for cell in row.cells:
-                            for paragraph in cell.paragraphs:
-                                resp += paragraph.text.strip() + "\n"
-                b_unit.update(1)
-            return resp
-
-        text = doc2text(self.file_path)
-        from unstructured.partition.text import partition_text
-        return partition_text(text=text, **self.unstructured_kwargs)
-
-
-if __name__ == '__main__':
-    loader = RapidOCRDocLoader(file_path="../tests/samples/ocr_test.docx")
-    docs = loader.load()
-    print(docs)
--- a/document_loaders/myimgloader.py
+++ b/document_loaders/myimgloader.py
@ -1,13 +1,13 @@
 from typing import List
 from langchain.document_loaders.unstructured import UnstructuredFileLoader
-from document_loaders.ocr import get_ocr


 class RapidOCRLoader(UnstructuredFileLoader):
    def _get_elements(self) -> List:
        def img2text(filepath):
+            from rapidocr_onnxruntime import RapidOCR
            resp = ""
-            ocr = get_ocr()
+            ocr = RapidOCR()
            result, _ = ocr(filepath)
            if result:
                ocr_result = [line[1] for line in result]
--- a/document_loaders/mypdfloader.py
+++ b/document_loaders/mypdfloader.py
@ -1,79 +1,29 @@
 from typing import List
 from langchain.document_loaders.unstructured import UnstructuredFileLoader
-import cv2
-from PIL import Image
-import numpy as np
-from configs import PDF_OCR_THRESHOLD
-from document_loaders.ocr import get_ocr
-import tqdm


 class RapidOCRPDFLoader(UnstructuredFileLoader):
    def _get_elements(self) -> List:
-        def rotate_img(img, angle):
-            '''
-            img   --image
-            angle --rotation angle
-            return--rotated img
-            '''
-            
-            h, w = img.shape[:2]
-            rotate_center = (w/2, h/2)
-            #获取旋转矩阵
-            # 参数1为旋转中心点;
-            # 参数2为旋转角度,正值-逆时针旋转;负值-顺时针旋转
-            # 参数3为各向同性的比例因子,1.0原图，2.0变成原来的2倍，0.5变成原来的0.5倍
-            M = cv2.getRotationMatrix2D(rotate_center, angle, 1.0)
-            #计算图像新边界
-            new_w = int(h * np.abs(M[0, 1]) + w * np.abs(M[0, 0]))
-            new_h = int(h * np.abs(M[0, 0]) + w * np.abs(M[0, 1]))
-            #调整旋转矩阵以考虑平移
-            M[0, 2] += (new_w - w) / 2
-            M[1, 2] += (new_h - h) / 2
-
-            rotated_img = cv2.warpAffine(img, M, (new_w, new_h))
-            return rotated_img
-        
        def pdf2text(filepath):
-            import fitz # pyMuPDF里面的fitz包，不要与pip install fitz混淆
+            import fitz
+            from rapidocr_onnxruntime import RapidOCR
            import numpy as np
-            ocr = get_ocr()
+            ocr = RapidOCR()
            doc = fitz.open(filepath)
            resp = ""
-
-            b_unit = tqdm.tqdm(total=doc.page_count, desc="RapidOCRPDFLoader context page index: 0")
-            for i, page in enumerate(doc):
-                b_unit.set_description("RapidOCRPDFLoader context page index: {}".format(i))
-                b_unit.refresh()
+            for page in doc:
+                # TODO: 依据文本与图片顺序调整处理方式
                text = page.get_text("")
                resp += text + "\n"

-                img_list = page.get_image_info(xrefs=True)
+                img_list = page.get_images()
                for img in img_list:
-                    if xref := img.get("xref"):
-                        bbox = img["bbox"]
-                        # 检查图片尺寸是否超过设定的阈值
-                        if ((bbox[2] - bbox[0]) / (page.rect.width) < PDF_OCR_THRESHOLD[0]
-                            or (bbox[3] - bbox[1]) / (page.rect.height) < PDF_OCR_THRESHOLD[1]):
-                            continue
-                        pix = fitz.Pixmap(doc, xref)
-                        samples = pix.samples
-                        if int(page.rotation)!=0:  #如果Page有旋转角度，则旋转图片
-                            img_array = np.frombuffer(pix.samples, dtype=np.uint8).reshape(pix.height, pix.width, -1)
-                            tmp_img = Image.fromarray(img_array);
-                            ori_img = cv2.cvtColor(np.array(tmp_img),cv2.COLOR_RGB2BGR)
-                            rot_img = rotate_img(img=ori_img, angle=360-page.rotation)
-                            img_array = cv2.cvtColor(rot_img, cv2.COLOR_RGB2BGR)
-                        else:
-                            img_array = np.frombuffer(pix.samples, dtype=np.uint8).reshape(pix.height, pix.width, -1)
-
-                        result, _ = ocr(img_array)
-                        if result:
-                            ocr_result = [line[1] for line in result]
-                            resp += "\n".join(ocr_result)
-
-                # 更新进度
-                b_unit.update(1)
+                    pix = fitz.Pixmap(doc, img[0])
+                    img_array = np.frombuffer(pix.samples, dtype=np.uint8).reshape(pix.height, pix.width, -1)
+                    result, _ = ocr(img_array)
+                    if result:
+                        ocr_result = [line[1] for line in result]
+                        resp += "\n".join(ocr_result)
            return resp

        text = pdf2text(self.file_path)
@ -82,6 +32,6 @@ class RapidOCRPDFLoader(UnstructuredFileLoader):


 if __name__ == "__main__":
-    loader = RapidOCRPDFLoader(file_path="/Users/tonysong/Desktop/test.pdf")
+    loader = RapidOCRPDFLoader(file_path="../tests/samples/ocr_test.pdf")
    docs = loader.load()
    print(docs)
--- a/document_loaders/mypptloader.py
+++ b/document_loaders/mypptloader.py
@ -1,59 +0,0 @@
-from langchain.document_loaders.unstructured import UnstructuredFileLoader
-from typing import List
-import tqdm
-
-
-class RapidOCRPPTLoader(UnstructuredFileLoader):
-    def _get_elements(self) -> List:
-        def ppt2text(filepath):
-            from pptx import Presentation
-            from PIL import Image
-            import numpy as np
-            from io import BytesIO
-            from rapidocr_onnxruntime import RapidOCR
-            ocr = RapidOCR()
-            prs = Presentation(filepath)
-            resp = ""
-
-            def extract_text(shape):
-                nonlocal resp
-                if shape.has_text_frame:
-                    resp += shape.text.strip() + "\n"
-                if shape.has_table:
-                    for row in shape.table.rows:
-                        for cell in row.cells:
-                            for paragraph in cell.text_frame.paragraphs:
-                                resp += paragraph.text.strip() + "\n"
-                if shape.shape_type == 13:  # 13 表示图片
-                    image = Image.open(BytesIO(shape.image.blob))
-                    result, _ = ocr(np.array(image))
-                    if result:
-                        ocr_result = [line[1] for line in result]
-                        resp += "\n".join(ocr_result)
-                elif shape.shape_type == 6:  # 6 表示组合
-                    for child_shape in shape.shapes:
-                        extract_text(child_shape)
-
-            b_unit = tqdm.tqdm(total=len(prs.slides),
-                               desc="RapidOCRPPTLoader slide index: 1")
-            # 遍历所有幻灯片
-            for slide_number, slide in enumerate(prs.slides, start=1):
-                b_unit.set_description(
-                    "RapidOCRPPTLoader slide index: {}".format(slide_number))
-                b_unit.refresh()
-                sorted_shapes = sorted(slide.shapes,
-                                       key=lambda x: (x.top, x.left))  # 从上到下、从左到右遍历
-                for shape in sorted_shapes:
-                    extract_text(shape)
-                b_unit.update(1)
-            return resp
-
-        text = ppt2text(self.file_path)
-        from unstructured.partition.text import partition_text
-        return partition_text(text=text, **self.unstructured_kwargs)
-
-
-if __name__ == '__main__':
-    loader = RapidOCRPPTLoader(file_path="../tests/samples/ocr_test.pptx")
-    docs = loader.load()
-    print(docs)
--- a/document_loaders/ocr.py
+++ b/document_loaders/ocr.py
@ -1,18 +0,0 @@
-from typing import TYPE_CHECKING
-
-
-if TYPE_CHECKING:
-    try:
-        from rapidocr_paddle import RapidOCR
-    except ImportError:
-        from rapidocr_onnxruntime import RapidOCR
-
-
-def get_ocr(use_cuda: bool = True) -> "RapidOCR":
-    try:
-        from rapidocr_paddle import RapidOCR
-        ocr = RapidOCR(det_use_cuda=use_cuda, cls_use_cuda=use_cuda, rec_use_cuda=use_cuda)
-    except ImportError:
-        from rapidocr_onnxruntime import RapidOCR
-        ocr = RapidOCR()
-    return ocr
--- a/embeddings/add_embedding_keywords.py
+++ b/embeddings/add_embedding_keywords.py
@ -1,79 +0,0 @@
-'''
-该功能是为了将关键词加入到embedding模型中，以便于在embedding模型中进行关键词的embedding
-该功能的实现是通过修改embedding模型的tokenizer来实现的
-该功能仅仅对EMBEDDING_MODEL参数对应的的模型有效，输出后的模型保存在原本模型
-感谢@CharlesJu1和@charlesyju的贡献提出了想法和最基础的PR
-
-保存的模型的位置位于原本嵌入模型的目录下，模型的名称为原模型名称+Merge_Keywords_时间戳
-'''
-import sys
-
-sys.path.append("..")
-import os
-import torch
-
-from datetime import datetime
-from configs import (
-    MODEL_PATH,
-    EMBEDDING_MODEL,
-    EMBEDDING_KEYWORD_FILE,
-)
-
-from safetensors.torch import save_model
-from sentence_transformers import SentenceTransformer
-from langchain_core._api import deprecated
-
-
-@deprecated(
-        since="0.3.0",
-        message="自定义关键词 Langchain-Chatchat 0.3.x 重写, 0.2.x中相关功能将废弃",
-        removal="0.3.0"
-    )
-def get_keyword_embedding(bert_model, tokenizer, key_words):
-    tokenizer_output = tokenizer(key_words, return_tensors="pt", padding=True, truncation=True)
-    input_ids = tokenizer_output['input_ids']
-    input_ids = input_ids[:, 1:-1]
-
-    keyword_embedding = bert_model.embeddings.word_embeddings(input_ids)
-    keyword_embedding = torch.mean(keyword_embedding, 1)
-    return keyword_embedding
-
-
-def add_keyword_to_model(model_name=EMBEDDING_MODEL, keyword_file: str = "", output_model_path: str = None):
-    key_words = []
-    with open(keyword_file, "r") as f:
-        for line in f:
-            key_words.append(line.strip())
-
-    st_model = SentenceTransformer(model_name)
-    key_words_len = len(key_words)
-    word_embedding_model = st_model._first_module()
-    bert_model = word_embedding_model.auto_model
-    tokenizer = word_embedding_model.tokenizer
-    key_words_embedding = get_keyword_embedding(bert_model, tokenizer, key_words)
-
-    embedding_weight = bert_model.embeddings.word_embeddings.weight
-    embedding_weight_len = len(embedding_weight)
-    tokenizer.add_tokens(key_words)
-    bert_model.resize_token_embeddings(len(tokenizer), pad_to_multiple_of=32)
-    embedding_weight = bert_model.embeddings.word_embeddings.weight
-    with torch.no_grad():
-        embedding_weight[embedding_weight_len:embedding_weight_len + key_words_len, :] = key_words_embedding
-
-    if output_model_path:
-        os.makedirs(output_model_path, exist_ok=True)
-        word_embedding_model.save(output_model_path)
-        safetensors_file = os.path.join(output_model_path, "model.safetensors")
-        metadata = {'format': 'pt'}
-        save_model(bert_model, safetensors_file, metadata)
-        print("save model to {}".format(output_model_path))
-
-
-def add_keyword_to_embedding_model(path: str = EMBEDDING_KEYWORD_FILE):
-    keyword_file = os.path.join(path)
-    model_name = MODEL_PATH["embed_model"][EMBEDDING_MODEL]
-    model_parent_directory = os.path.dirname(model_name)
-    current_time = datetime.now().strftime('%Y%m%d_%H%M%S')
-    output_model_name = "{}_Merge_Keywords_{}".format(EMBEDDING_MODEL, current_time)
-    output_model_path = os.path.join(model_parent_directory, output_model_name)
-    add_keyword_to_model(model_name, keyword_file, output_model_path)
--- a/embeddings/embedding_keywords.txt
+++ b/embeddings/embedding_keywords.txt
@ -1,3 +0,0 @@
-Langchain-Chatchat
-数据科学与大数据技术
-人工智能与先进计算
--- a/img/LLM_success.png
+++ b/img/LLM_success.png
--- a/img/agent_continue.png
+++ b/img/agent_continue.png
--- a/img/agent_success.png
+++ b/img/agent_success.png
--- a/img/chatchat-qrcode.jpg
+++ b/img/chatchat-qrcode.jpg
--- a/img/fastapi_docs_020_0.png
+++ b/img/fastapi_docs_020_0.png
--- a/img/fastapi_docs_026.png
+++ b/img/fastapi_docs_026.png
--- a/img/init_knowledge_base.jpg
+++ b/img/init_knowledge_base.jpg
--- a/img/knowledge_base_success.jpg
+++ b/img/knowledge_base_success.jpg
--- a/img/official_account_qr.png
+++ b/img/official_account_qr.png
--- a/img/official_wechat_mp_account.png
+++ b/img/official_wechat_mp_account.png
--- a/img/partners/autodl.svg
+++ b/img/partners/autodl.svg
--- a/img/partners/aws.svg
+++ b/img/partners/aws.svg
--- a/img/partners/chatglm.svg
+++ b/img/partners/chatglm.svg
--- a/img/partners/zhenfund.svg
+++ b/img/partners/zhenfund.svg
@ -1,9 +0,0 @@
-<svg width="654" height="213" viewBox="0 0 654 213" fill="none" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink">
-<rect x="654" width="213" height="654" transform="rotate(90 654 0)" fill="url(#pattern0)"/>
-<defs>
-<pattern id="pattern0" patternContentUnits="objectBoundingBox" width="1" height="1">
-<use xlink:href="#image0_237_57" transform="matrix(0.0204695 0 0 0.00666667 -0.00150228 0)"/>
-</pattern>
-<image id="image0_237_57" width="49" height="150" xlink:href="data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAADEAAACWCAYAAAB3hWBKAAAMyElEQVR4Ae1de5AcRRk/AR88C2MgIdzdfB0jIioFIoggGkoeCbnt3lAVtUALygdCUSWI0X8M3LcHiUHkEQoIuem7aKR4pCJSFFqQ6cvyCMnMXngVBORhBaG0ohaiIgqCrPX1bO/OzM7uzcztZAO1VzU1szPf1/39+uvn933d19fX+9vNSgAFuwo53J/rVYDzcoWNAtaiYNVcLw5X5guCA+YKgAoodxAFODdXEByeKhVA5KyJ/nlI2mh1CfZaAOSzLeka/GMB+uqIGDgpVwBTJX7VabP2DQqEAlZNxUPfUcCWOh9nz685pu/9SfhyoRnh1tF1YQSrljhckCSjUsFaGORDbn0zCV8uNCVuLQoLw3iSjLCvbw/k7I0GL9yehC8XmpKArzYEYdWSsBYnzQgFbG/wwmNJ+TpOh8I6qyGI7ipXJMkEl/R9ADm8XuflbHMSvlxoRoasz9cFaQyIG1CwpSXOTsUizG+6CoNDyOGuEB+H1bkImCRRnN+3Fwp4NSRQA0ziUb40NPjlJPnlRlPi7IfTAsHZxtyES5qw7mkEjGcCwmErnt4/I2leudONFAePGebwIxRsHQoot5/1snuwAMVqX9/7chesl0GvBHolkK0E/JEbHsvUQ+mJI7s0W84d4kLBrsgqvOEr8S6CuJLP3D88G822Fu8qCBSDp5jSnM69uyA4403CF+C8pklf3EQw8O6KM5jVodqdPpkR0f/pCAgvfSpd5qBZ7LBgf68D4ewPXRYpW/YoGK0fGtPuxXMPy5ZSF7lKnH0nBEKw5V0UJ1vWP1k0+GEU7M06EM52rl/St2e21LrERdNp5DBRB6HNkmyy/VQ8YqAuwLldEt/PFgV7KQQg2D6SPnPA7oLg7K0eCF39uqwJmjIkMCK3NkiTobkI87tanXqZ90qggyWAwjoh7ay1iZ73z+ugSOmTQs52TruLFdb16XPuIEcPRH1Ef7drgrNfYhGO6mDlyD8p5OzqYBvK3f2bB6QrxJyBIAgU8Os88sk9TW01r7cH9iaZfnLPtNMZlDhcFNRGiVtndzqP3NPDAhwfBIEcbss9005ngBzOD4EQsL3TeaRKjxpmqqVoyH9dt5K8nCrTThOjYH8Ol2pdsIYZp9GI499x9lCn5UqVXidAkNknVaadJp4WCA7vIGc3dN3EQ9EDWZanJW79AAtweKcLtZfebl8COhClEf6GyNkXd3uhowKWBHw/1I1ytixKQ79rJs1ADC27IY6uK+90Iwz09a3cU9TLhMAK2NIVgeMy7YGIK5VuvOtpohulHpdnTxNxpdKNd+8NTTQNdjDWZE8twvwSt04OjRMcnoqja3q3aHBu7sqJaiIkaGAQzPw+7/0TVEI9EEk01dNEwtaECw/tb2qMgVCfaX/bFQ07IdYeWa8EeiVQKwHPkWd5jv2Yp2Q1y+UqO1VcrA5jHbI+E1XANUv6976cs49H30/521P28iyCB3nSgKDdAcjhGdrysGLxoR8pcRg2/g3kek/g2JRCBwk2bx7b33PsN4ICZXlOCuL6hfM+iII9iQXrKvKfLz/TOgQ53IHcGiH3gJ67cUgHYlKNn5JF6ChPUhBITkoBt+s9S4IVDIgVC2cfhILdqCPg0oJwHVtEBfIc+1bPsTHNVdk0/qWghuOekVujyOFvtXC9paUACEM/wgdPxNQglH1kGIT9rEmwk3cSGDn7J1UhSrdm/73N98TC9rotWMAqFDCeKu9yubyXq+zX6kAcO5e42BJnS5DD76khN4GgNYuxTgrr+tQgKEHPkffUQShZdTeu/ViqkkhAjEsO2g85PIoCtmFhzj6hNiEaWz5x8dzDUlcnDULJc4IgqMtNIFdqkuWczdJBkxzWxIEg+zAKeDgTCNe95QBX2W8ZIK6yd1aruEdqKRMw4BA7DnXgpLW83sVSb0VB9gJeoB1jmUBQ3p6S6w0IuruOPZ6md5p07MQxgCVh/RiFv+EW+cAc3eUW4Sg9htBgl7ZhEwDXketcZf87CCL1s2MnjovVRmsOjw4PDX4hqjzaeJgpViRYlVILb+ZaKUBEBe/I7/cKCMdz7Punc1UmxrobK94RdfYS6ZVAcwnQIDfpyKOp349envqFnvc0c+0mb6rr1+/pKftbniNfatXNViZkwYi7rbzmcG9ibNhVtuMq2f0dYdu2rdnHVbLcSvjG+9EhA0Kvyc0YoWS1smn0k+ZbqztNAmk2G/yOHH5LE0K9VY4miNMY7G5oCNraUBDUxBP3rTs4yOM6csoTVPAMmE1L0RAIAb/Rs1oOm0pF9o1Ma+xtD645xHXk2xGB3nYd6dbGjf+Zb0EQJIir5IvmmzchrwkKF33WEz86/4azv4QCxAS8ghyuq81uz/dBMBnlb/vbVfapdUH0xE8+75btfsMUnFNNbhoLHRHjOnKz4XWVbBsDSNUEuXUmctiEgt2NBbgYOdw6zFkFOXvePxsHVmbShKdk0QhC94qyv24A0N1z5L8a3xttwv9m32u+UQMP8sU96+qkjQRwszZgC1iJAu7V6wwdGNkpEBtHQ+HSSUFQ1YsTPPiuFQgU7EF/1ad3mXmp1xNRTXhKnhPM2HPs101pR9uE58hJ88117LuCfHHPbUAsR84kcrYZs6yxPWWfZAShu+vYz2wv37ifEaKVJlw1dnKIT9lTHm7VGoR1gh9nW6tOaRdFNQtgvQfygcintWmTbE/K/m9dWLJHKblS91pKvlN/T+DV2MkGeNy91uv8DMmyQcYCDj/3Sx5eoLN0/P2vGdsEZegp+86gQBmeH48TPPguBMKYZ/SdPYcF9hXk8B9/fU222JRdLGXkKnuWp+xnMwhfpTaTZLSmfHR14mx9GJw/YlNb8Pf7ZVxjayDa4iE3pAHiKrmjokaPDQrV7lkfbCUGjwjSoICV2kCweNbByNktmU37wUQrE6MnuI5c6jpylPr+6GqP5lgVx76WeiqyHgZ5e8+9EnivlQD1MtHVXOLfajz/KJt2BV4pr53tKntjmp6pmdb+abs8cv/mOfbWZqFaL47iabsIgqpQvFC7BoQ+wm+6Gw1p/bCrQGARDtQW8YUzDjDVCzl8DTmbJDcwXSjgMhrZzfdE94oaOzsKgpyRiZhTEmERgLyjwwKepg2HtZDuJ5Gzh5DDtTWX1x30nCrpaHUiB0uqBFIQ+yDgduSwAgXs8H3X7GrtFi4CUFKXFwc+Smc+p0i2r69mb/qr0QZZyLdsWb93qkQSEl9esBg5VIgcOawmrdQOyaINuNlBUIKekmMGRO1eTChXYrIRAZ/THiEOt2FhzkwdFsFhgg7ZpcMTzflQmTRBUjQ55B05mVi6hIR+xABc45/WAjv02qEIByJnzyGHX+lzaykSjo5+TeuMJxl2lNd+yHXkK0Ft0Gw2oXypyHDhjAP0sZXcuoQYa435d/6GXCjTnY62bErUc6QKCpjPc7LBjsyVly6YfSyFP5SEdSEumjWXutkmoaMvPEfelI/gwQGxPQhtPKPqQtE0ZLqkSBs6Pj8mmDI23slV8qJug9ANm8P9NEaEo6Dh1ZBZk2jiqlNFyQXdBmFqhx6hBazS1g2KuqGD2Ws9lqGJvZMdKfGUOsa5kog34VTcN+EPfNYISoez0wZ16mp7Z8+aUundd0UJuI59fDDgpOLYF0wnkmZbec3MYHrk58gdh+vYF0d7KPIGPVKWmU4D2urYnwin136c6AjAOBAkhO8JGrskrVZ2KxCmNEkrW8tr9XQ4SantliA0GDIOK/t71Wp1yrPCOw0CFw3OHSlA+7DUmOr0qNFC891+cCqtaGd8wI/tqanbBA1qtDCK07Q+fJezZTRvarlXPDp/Ik8oRSe7jvxjMwhZJQ+Rq+wLW2kliyboQAZcNPApWi8gHdAQ2aukTfv6ZHl2ZBxQ8jmHJoHGnas9Rc0ru3rkPrl5Kw+MD0QTnSaIZ7RVQx99DDf7R73qY5BfjjrrQ/lGq1PUotFOK7Xg3+8GE8wGAm5Czo6subkuQw4LdOg1nVdLz4K9aIJ/g3nVn6cCQYSkFfJLxFYv3R3LstFKJhCC3ehPAMlLCpdhgZ1mQJQEnN5qt34qEIaYZq0to20c+Q9vQn67uWHLtv/2KvQPbDg80hEQUZ+0AWDuNa/q6lZa8ZSM9m5tQVC6tLXAaEI3Yq0JHa28gbRCFkCTf+w9SXWKY2yrlZRdLPKp2gT8qa0JMysIAvbEfev2dZVN4UQh33VYS1OPE4Eu9nHqZmv7Ye9Gzh7wn+FhFHBnXGHqd9MBYRKd3CRPJI9pWHhjLEgAol6dAv8aKDhWcLZMN/QhdpzJM3TvBAhKkEycrrJXNWslAQgKD2qxvdPvbtkyvculZtIMAcjjh6vGjwitvROusVvJQiYdY8psRfOuff9/T15+hUcbNtcAAAAASUVORK5CYII="/>
-</defs>
-</svg>
--- a/img/qr_code_100.jpg
+++ b/img/qr_code_100.jpg
--- a/img/qr_code_101.jpg
+++ b/img/qr_code_101.jpg
--- a/img/qr_code_102.jpg
+++ b/img/qr_code_102.jpg
--- a/img/qr_code_103.jpg
+++ b/img/qr_code_103.jpg
--- a/img/qr_code_104.jpg
+++ b/img/qr_code_104.jpg
--- a/img/qr_code_105.jpg
+++ b/img/qr_code_105.jpg
--- a/img/qr_code_106.jpg
+++ b/img/qr_code_106.jpg
--- a/img/qr_code_106_2.jpg
+++ b/img/qr_code_106_2.jpg
--- a/img/qr_code_107.jpg
+++ b/img/qr_code_107.jpg
--- a/img/qr_code_108.jpg
+++ b/img/qr_code_108.jpg
--- a/img/qr_code_58.jpg
+++ b/img/qr_code_58.jpg
--- a/img/qr_code_90.jpg
+++ b/img/qr_code_90.jpg
--- a/img/qr_code_90.png
+++ b/img/qr_code_90.png
--- a/img/qr_code_91.jpg
+++ b/img/qr_code_91.jpg
--- a/img/qr_code_92.jpg
+++ b/img/qr_code_92.jpg
--- a/img/qr_code_93.jpg
+++ b/img/qr_code_93.jpg
--- a/img/qr_code_94.jpg
+++ b/img/qr_code_94.jpg
--- a/img/qr_code_95.jpg
+++ b/img/qr_code_95.jpg
--- a/img/qr_code_96.jpg
+++ b/img/qr_code_96.jpg
--- a/img/qr_code_97.jpg
+++ b/img/qr_code_97.jpg
--- a/img/qr_code_98.jpg
+++ b/img/qr_code_98.jpg
--- a/img/qr_code_99.jpg
+++ b/img/qr_code_99.jpg
--- a/img/qrcode_90_2.jpg
+++ b/img/qrcode_90_2.jpg
--- a/img/webui_020_0.png
+++ b/img/webui_020_0.png
--- a/img/webui_020_1.png
+++ b/img/webui_020_1.png
--- a/img/webui_0813_0.png
+++ b/img/webui_0813_0.png
--- a/img/webui_0813_1.png
+++ b/img/webui_0813_1.png
--- a/init_database.py
+++ b/init_database.py
@ -1,120 +1,44 @@
-import sys
-sys.path.append(".")
-from server.knowledge_base.migrate import (create_tables, reset_tables, import_from_db,
-                                           folder2db, prune_db_docs, prune_folder_files)
-from configs.model_config import NLTK_DATA_PATH, EMBEDDING_MODEL
+from server.knowledge_base.migrate import create_tables, reset_tables, folder2db, recreate_all_vs, list_kbs_from_folder
+from configs.model_config import NLTK_DATA_PATH
 import nltk
 nltk.data.path = [NLTK_DATA_PATH] + nltk.data.path
+from startup import dump_server_info
 from datetime import datetime


 if __name__ == "__main__":
    import argparse
    
-    parser = argparse.ArgumentParser(description="please specify only one operate method once time.")
+    parser = argparse.ArgumentParser()
+    parser.formatter_class = argparse.RawTextHelpFormatter

    parser.add_argument(
-        "-r",
        "--recreate-vs",
        action="store_true",
        help=('''
-            recreate vector store.
+            recreate all vector store.
            use this option if you have copied document files to the content folder, but vector store has not been populated or DEFAUL_VS_TYPE/EMBEDDING_MODEL changed.
+            if your vector store is ready with the configs, just skip this option to fill info to database only.
            '''
        )
    )
-    parser.add_argument(
-        "--create-tables",
-        action="store_true",
-        help=("create empty tables if not existed")
-    )
-    parser.add_argument(
-        "--clear-tables",
-        action="store_true",
-        help=("create empty tables, or drop the database tables before recreate vector stores")
-    )
-    parser.add_argument(
-        "--import-db",
-        help="import tables from specified sqlite database"
-    )
-    parser.add_argument(
-        "-u",
-        "--update-in-db",
-        action="store_true",
-        help=('''
-            update vector store for files exist in database.
-            use this option if you want to recreate vectors for files exist in db and skip files exist in local folder only.
-            '''
-        )
-    )
-    parser.add_argument(
-        "-i",
-        "--increment",
-        action="store_true",
-        help=('''
-            update vector store for files exist in local folder and not exist in database.
-            use this option if you want to create vectors incrementally.
-            '''
-        )
-    )
-    parser.add_argument(
-        "--prune-db",
-        action="store_true",
-        help=('''
-            delete docs in database that not existed in local folder.
-            it is used to delete database docs after user deleted some doc files in file browser
-            '''
-        )
-    )
-    parser.add_argument(
-        "--prune-folder",
-        action="store_true",
-        help=('''
-            delete doc files in local folder that not existed in database.
-            is is used to free local disk space by delete unused doc files.
-            '''
-        )
-    )
-    parser.add_argument(
-        "-n",
-        "--kb-name",
-        type=str,
-        nargs="+",
-        default=[],
-        help=("specify knowledge base names to operate on. default is all folders exist in KB_ROOT_PATH.")
-    )
-    parser.add_argument(
-        "-e",
-        "--embed-model",
-        type=str,
-        default=EMBEDDING_MODEL,
-        help=("specify embeddings model.")
-    )
-
    args = parser.parse_args()
+
+    dump_server_info()
+
    start_time = datetime.now()

-    if args.create_tables:
-        create_tables() # confirm tables exist
-
-    if args.clear_tables:
-        reset_tables()
-        print("database tables reset")
-
    if args.recreate_vs:
-        create_tables()
+        reset_tables()
+        print("database talbes reseted")
        print("recreating all vector stores")
-        folder2db(kb_names=args.kb_name, mode="recreate_vs", embed_model=args.embed_model)
-    elif args.import_db:
-        import_from_db(args.import_db)
-    elif args.update_in_db:
-        folder2db(kb_names=args.kb_name, mode="update_in_db", embed_model=args.embed_model)
-    elif args.increment:
-        folder2db(kb_names=args.kb_name, mode="increment", embed_model=args.embed_model)
-    elif args.prune_db:
-        prune_db_docs(args.kb_name)
-    elif args.prune_folder:
-        prune_folder_files(args.kb_name)
+        recreate_all_vs()
+    else:
+        create_tables()
+        print("database talbes created")
+        print("filling kb infos to database")
+        for kb in list_kbs_from_folder():
+            folder2db(kb, "fill_info_only")

    end_time = datetime.now()
    print(f"总计用时： {end_time-start_time}")
--- a/knowledge_base/samples/content/llm/img/分布式训练技术原理-幕布图片-124076-270516.jpg
+++ b/knowledge_base/samples/content/llm/img/分布式训练技术原理-幕布图片-124076-270516.jpg
--- a/knowledge_base/samples/content/llm/img/分布式训练技术原理-幕布图片-20096-279847.jpg
+++ b/knowledge_base/samples/content/llm/img/分布式训练技术原理-幕布图片-20096-279847.jpg
--- a/knowledge_base/samples/content/llm/img/分布式训练技术原理-幕布图片-220157-552735.jpg
+++ b/knowledge_base/samples/content/llm/img/分布式训练技术原理-幕布图片-220157-552735.jpg
--- a/knowledge_base/samples/content/llm/img/分布式训练技术原理-幕布图片-36114-765327.jpg
+++ b/knowledge_base/samples/content/llm/img/分布式训练技术原理-幕布图片-36114-765327.jpg
--- a/knowledge_base/samples/content/llm/img/分布式训练技术原理-幕布图片-392521-261326.jpg
+++ b/knowledge_base/samples/content/llm/img/分布式训练技术原理-幕布图片-392521-261326.jpg
--- a/knowledge_base/samples/content/llm/img/分布式训练技术原理-幕布图片-42284-124759.jpg
+++ b/knowledge_base/samples/content/llm/img/分布式训练技术原理-幕布图片-42284-124759.jpg
--- a/knowledge_base/samples/content/llm/img/分布式训练技术原理-幕布图片-57107-679259.jpg
+++ b/knowledge_base/samples/content/llm/img/分布式训练技术原理-幕布图片-57107-679259.jpg
--- a/knowledge_base/samples/content/llm/img/分布式训练技术原理-幕布图片-618350-869132.jpg
+++ b/knowledge_base/samples/content/llm/img/分布式训练技术原理-幕布图片-618350-869132.jpg
--- a/knowledge_base/samples/content/llm/img/分布式训练技术原理-幕布图片-838373-426344.jpg
+++ b/knowledge_base/samples/content/llm/img/分布式训练技术原理-幕布图片-838373-426344.jpg
--- a/knowledge_base/samples/content/llm/img/分布式训练技术原理-幕布图片-906937-836104.jpg
+++ b/knowledge_base/samples/content/llm/img/分布式训练技术原理-幕布图片-906937-836104.jpg
--- a/knowledge_base/samples/content/llm/img/大模型应用技术原理-幕布图片-108319-429731.jpg
+++ b/knowledge_base/samples/content/llm/img/大模型应用技术原理-幕布图片-108319-429731.jpg
--- a/knowledge_base/samples/content/llm/img/大模型应用技术原理-幕布图片-580318-260070.jpg
+++ b/knowledge_base/samples/content/llm/img/大模型应用技术原理-幕布图片-580318-260070.jpg
--- a/knowledge_base/samples/content/llm/img/大模型应用技术原理-幕布图片-793118-735987.jpg
+++ b/knowledge_base/samples/content/llm/img/大模型应用技术原理-幕布图片-793118-735987.jpg
--- a/knowledge_base/samples/content/llm/img/大模型应用技术原理-幕布图片-918388-323086.jpg
+++ b/knowledge_base/samples/content/llm/img/大模型应用技术原理-幕布图片-918388-323086.jpg
--- a/knowledge_base/samples/content/llm/img/大模型技术栈-算法与原理-幕布图片-19929-302935.jpg
+++ b/knowledge_base/samples/content/llm/img/大模型技术栈-算法与原理-幕布图片-19929-302935.jpg
--- a/knowledge_base/samples/content/llm/img/大模型技术栈-算法与原理-幕布图片-299768-254064.jpg
+++ b/knowledge_base/samples/content/llm/img/大模型技术栈-算法与原理-幕布图片-299768-254064.jpg
--- a/knowledge_base/samples/content/llm/img/大模型技术栈-算法与原理-幕布图片-454007-940199.jpg
+++ b/knowledge_base/samples/content/llm/img/大模型技术栈-算法与原理-幕布图片-454007-940199.jpg
--- a/knowledge_base/samples/content/llm/img/大模型技术栈-算法与原理-幕布图片-628857-182232.jpg
+++ b/knowledge_base/samples/content/llm/img/大模型技术栈-算法与原理-幕布图片-628857-182232.jpg
--- a/knowledge_base/samples/content/llm/img/大模型技术栈-算法与原理-幕布图片-729151-372321.jpg
+++ b/knowledge_base/samples/content/llm/img/大模型技术栈-算法与原理-幕布图片-729151-372321.jpg
--- a/knowledge_base/samples/content/llm/img/大模型技术栈-算法与原理-幕布图片-81470-404273.jpg
+++ b/knowledge_base/samples/content/llm/img/大模型技术栈-算法与原理-幕布图片-81470-404273.jpg
--- a/knowledge_base/samples/content/llm/img/大模型指令对齐训练原理-幕布图片-17565-176537.jpg
+++ b/knowledge_base/samples/content/llm/img/大模型指令对齐训练原理-幕布图片-17565-176537.jpg
--- a/Show More
+++ b/Show More