mirror of
https://github.com/RYDE-WORK/Langchain-Chatchat.git
synced 2026-01-19 13:23:16 +08:00
新功能: - 优化 PDF 文件的 OCR,过滤无意义的小图片 by @liunux4odoo #2525 - 支持 Gemini 在线模型 by @yhfgyyf #2630 - 支持 GLM4 在线模型 by @zRzRzRzRzRzRzR - elasticsearch更新https连接 by @xldistance #2390 - 增强对PPT、DOC知识库文件的OCR识别 by @596192804 #2013 - 更新 Agent 对话功能 by @zRzRzRzRzRzRzR - 每次创建对象时从连接池获取连接,避免每次执行方法时都新建连接 by @Lijia0 #2480 - 实现 ChatOpenAI 判断token有没有超过模型的context上下文长度 by @glide-the - 更新运行数据库报错和项目里程碑 by @zRzRzRzRzRzRzR #2659 - 更新配置文件/文档/依赖 by @imClumsyPanda @zRzRzRzRzRzRzR - 添加日文版 readme by @eltociear #2787 修复: - langchain 更新后,PGVector 向量库连接错误 by @HALIndex #2591 - Minimax's model worker 错误 by @xyhshen - ES库无法向量检索.添加mappings创建向量索引 by MSZheng20 #2688
52 lines
1.6 KiB
Python
52 lines
1.6 KiB
Python
from typing import (
|
|
TYPE_CHECKING,
|
|
Any,
|
|
Tuple
|
|
)
|
|
import sys
|
|
import logging
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
if TYPE_CHECKING:
|
|
import tiktoken
|
|
|
|
|
|
class MinxChatOpenAI:
|
|
|
|
@staticmethod
|
|
def import_tiktoken() -> Any:
|
|
try:
|
|
import tiktoken
|
|
except ImportError:
|
|
raise ValueError(
|
|
"Could not import tiktoken python package. "
|
|
"This is needed in order to calculate get_token_ids. "
|
|
"Please install it with `pip install tiktoken`."
|
|
)
|
|
return tiktoken
|
|
|
|
@staticmethod
|
|
def get_encoding_model(self) -> Tuple[str, "tiktoken.Encoding"]:
|
|
tiktoken_ = MinxChatOpenAI.import_tiktoken()
|
|
if self.tiktoken_model_name is not None:
|
|
model = self.tiktoken_model_name
|
|
else:
|
|
model = self.model_name
|
|
if model == "gpt-3.5-turbo":
|
|
# gpt-3.5-turbo may change over time.
|
|
# Returning num tokens assuming gpt-3.5-turbo-0301.
|
|
model = "gpt-3.5-turbo-0301"
|
|
elif model == "gpt-4":
|
|
# gpt-4 may change over time.
|
|
# Returning num tokens assuming gpt-4-0314.
|
|
model = "gpt-4-0314"
|
|
# Returns the number of tokens used by a list of messages.
|
|
try:
|
|
encoding = tiktoken_.encoding_for_model(model)
|
|
except Exception as e:
|
|
logger.warning("Warning: model not found. Using cl100k_base encoding.")
|
|
model = "cl100k_base"
|
|
encoding = tiktoken_.get_encoding(model)
|
|
return model, encoding
|