vllm参数配置

This commit is contained in:
glide-the 2024-01-20 19:51:41 +08:00 committed by liunux4odoo
parent 547b5b9206
commit b4702720a8

View File

@ -23,7 +23,7 @@ plugins:
- imitater: - imitater:
name: "imitater" name: "imitater"
logdir: "/logs" logdir: "logs"
worker_name: "qwen-worker1" worker_name: "qwen-worker1"
run_openai_api: run_openai_api:
host: "127.0.0.1" host: "127.0.0.1"
@ -44,7 +44,6 @@ plugins:
embed_model_device: "0" embed_model_device: "0"
embed_batch_size: 16 embed_batch_size: 16
- fastchat: - fastchat:
name: "fastchat" name: "fastchat"
logdir: "logs" logdir: "logs"
@ -91,26 +90,30 @@ plugins:
# 以下为vllm_worker配置参数,注意使用vllm必须有gpu仅在Linux测试通过 # 以下为vllm_worker配置参数,注意使用vllm必须有gpu仅在Linux测试通过
# tokenizer = model_path # 如果tokenizer与model_path不一致在此处添加 # tokenizer = model_path # 如果tokenizer与model_path不一致在此处添加
# 'tokenizer_mode':'auto', # 'max_model_len': 1024
# 'trust_remote_code':True, # 'max_parallel_loading_workers': 1
# 'download_dir':None, # 'max_context_len_to_capture': 1024
# 'load_format':'auto', # 'enforce_eager': False
# 'dtype':'auto', # 'tokenizer_mode': 'auto'
# 'seed':0, # 'trust_remote_code': True
# 'worker_use_ray':False, # 'download_dir': None
# 'pipeline_parallel_size':1, # 'load_format': 'auto'
# 'tensor_parallel_size':1, # 'dtype': 'auto'
# 'block_size':16, # 'seed': 0
# 'swap_space':4 , # GiB # 'worker_use_ray': False
# 'gpu_memory_utilization':0.90, # 'pipeline_parallel_size': 1
# 'max_num_batched_tokens':2560, # 'tensor_parallel_size': 1
# 'max_num_seqs':256, # 'block_size': 16
# 'disable_log_stats':False, # 'swap_space': 4 # GiB
# 'conv_template':None, # 'gpu_memory_utilization': 0.90
# 'limit_worker_concurrency':5, # 'max_num_batched_tokens': 2560
# 'no_register':False, # 'max_num_seqs': 256
# 'disable_log_stats': False
# 'conv_template': 'qwen-7b-chat'
# 'limit_worker_concurrency': 5
# 'no_register': False
# 'num_gpus': 1 # 'num_gpus': 1
# 'engine_use_ray': False, # 'engine_use_ray': False
# 'disable_log_requests': False # 'disable_log_requests': False
- chatglm3-6b: - chatglm3-6b: