Langchain-Chatchat/configs/server_config.py.example
liunux4odoo 9c525b7fa5
publish 0.2.10 (#2797)
新功能:
- 优化 PDF 文件的 OCR,过滤无意义的小图片 by @liunux4odoo #2525
- 支持 Gemini 在线模型 by @yhfgyyf #2630
- 支持 GLM4 在线模型 by @zRzRzRzRzRzRzR
- elasticsearch更新https连接 by @xldistance #2390
- 增强对PPT、DOC知识库文件的OCR识别 by @596192804 #2013
- 更新 Agent 对话功能 by @zRzRzRzRzRzRzR
- 每次创建对象时从连接池获取连接,避免每次执行方法时都新建连接 by @Lijia0 #2480
- 实现 ChatOpenAI 判断token有没有超过模型的context上下文长度 by @glide-the
- 更新运行数据库报错和项目里程碑 by @zRzRzRzRzRzRzR #2659
- 更新配置文件/文档/依赖 by @imClumsyPanda @zRzRzRzRzRzRzR
- 添加日文版 readme by @eltociear #2787

修复:
- langchain 更新后,PGVector 向量库连接错误 by @HALIndex #2591
- Minimax's model worker 错误 by @xyhshen 
- ES库无法向量检索.添加mappings创建向量索引 by MSZheng20 #2688
2024-01-26 06:58:49 +08:00

138 lines
3.9 KiB
Plaintext
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

import sys
from configs.model_config import LLM_DEVICE
# httpx 请求默认超时时间(秒)。如果加载模型或对话较慢,出现超时错误,可以适当加大该值。
HTTPX_DEFAULT_TIMEOUT = 300.0
# API 是否开启跨域默认为False如果需要开启请设置为True
# is open cross domain
OPEN_CROSS_DOMAIN = False
# 各服务器默认绑定host。如改为"0.0.0.0"需要修改下方所有XX_SERVER的host
DEFAULT_BIND_HOST = "0.0.0.0" if sys.platform != "win32" else "127.0.0.1"
# webui.py server
WEBUI_SERVER = {
"host": DEFAULT_BIND_HOST,
"port": 8501,
}
# api.py server
API_SERVER = {
"host": DEFAULT_BIND_HOST,
"port": 7861,
}
# fastchat openai_api server
FSCHAT_OPENAI_API = {
"host": DEFAULT_BIND_HOST,
"port": 20000,
}
# fastchat model_worker server
# 这些模型必须是在model_config.MODEL_PATH或ONLINE_MODEL中正确配置的。
# 在启动startup.py时可用通过`--model-name xxxx yyyy`指定模型不指定则为LLM_MODELS
FSCHAT_MODEL_WORKERS = {
# 所有模型共用的默认配置,可在模型专项配置中进行覆盖。
"default": {
"host": DEFAULT_BIND_HOST,
"port": 20002,
"device": LLM_DEVICE,
# False,'vllm',使用的推理加速框架,使用vllm如果出现HuggingFace通信问题参见doc/FAQ
# vllm对一些模型支持还不成熟暂时默认关闭
"infer_turbo": False,
# model_worker多卡加载需要配置的参数
# "gpus": None, # 使用的GPU以str的格式指定如"0,1"如失效请使用CUDA_VISIBLE_DEVICES="0,1"等形式指定
# "num_gpus": 1, # 使用GPU的数量
# "max_gpu_memory": "20GiB", # 每个GPU占用的最大显存
# 以下为model_worker非常用参数可根据需要配置
# "load_8bit": False, # 开启8bit量化
# "cpu_offloading": None,
# "gptq_ckpt": None,
# "gptq_wbits": 16,
# "gptq_groupsize": -1,
# "gptq_act_order": False,
# "awq_ckpt": None,
# "awq_wbits": 16,
# "awq_groupsize": -1,
# "model_names": LLM_MODELS,
# "conv_template": None,
# "limit_worker_concurrency": 5,
# "stream_interval": 2,
# "no_register": False,
# "embed_in_truncate": False,
# 以下为vllm_worker配置参数,注意使用vllm必须有gpu仅在Linux测试通过
# tokenizer = model_path # 如果tokenizer与model_path不一致在此处添加
# 'tokenizer_mode':'auto',
# 'trust_remote_code':True,
# 'download_dir':None,
# 'load_format':'auto',
# 'dtype':'auto',
# 'seed':0,
# 'worker_use_ray':False,
# 'pipeline_parallel_size':1,
# 'tensor_parallel_size':1,
# 'block_size':16,
# 'swap_space':4 , # GiB
# 'gpu_memory_utilization':0.90,
# 'max_num_batched_tokens':2560,
# 'max_num_seqs':256,
# 'disable_log_stats':False,
# 'conv_template':None,
# 'limit_worker_concurrency':5,
# 'no_register':False,
# 'num_gpus': 1
# 'engine_use_ray': False,
# 'disable_log_requests': False
},
"Qwen-1_8B-Chat": {
"device": "cpu",
},
"chatglm3-6b": {
"device": "cuda",
},
# 以下配置可以不用修改在model_config中设置启动的模型
"zhipu-api": {
"port": 21001,
},
"minimax-api": {
"port": 21002,
},
"xinghuo-api": {
"port": 21003,
},
"qianfan-api": {
"port": 21004,
},
"fangzhou-api": {
"port": 21005,
},
"qwen-api": {
"port": 21006,
},
"baichuan-api": {
"port": 21007,
},
"azure-api": {
"port": 21008,
},
"tiangong-api": {
"port": 21009,
},
"gemini-api": {
"port": 21010,
},
}
FSCHAT_CONTROLLER = {
"host": DEFAULT_BIND_HOST,
"port": 20001,
"dispatch_method": "shortest_queue",
}