From b4702720a8a167474ad3e5f8647527537b324538 Mon Sep 17 00:00:00 2001 From: glide-the <2533736852@qq.com> Date: Sat, 20 Jan 2024 19:51:41 +0800 Subject: [PATCH] =?UTF-8?q?vllm=E5=8F=82=E6=95=B0=E9=85=8D=E7=BD=AE?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- configs/loom.yaml.example | 49 +++++++++++++++++++++------------------ 1 file changed, 26 insertions(+), 23 deletions(-) diff --git a/configs/loom.yaml.example b/configs/loom.yaml.example index 262c2e06..aedd7a6c 100644 --- a/configs/loom.yaml.example +++ b/configs/loom.yaml.example @@ -23,7 +23,7 @@ plugins: - imitater: name: "imitater" - logdir: "/logs" + logdir: "logs" worker_name: "qwen-worker1" run_openai_api: host: "127.0.0.1" @@ -44,7 +44,6 @@ plugins: embed_model_device: "0" embed_batch_size: 16 - - fastchat: name: "fastchat" logdir: "logs" @@ -91,27 +90,31 @@ plugins: # 以下为vllm_worker配置参数,注意使用vllm必须有gpu,仅在Linux测试通过 # tokenizer = model_path # 如果tokenizer与model_path不一致在此处添加 - # 'tokenizer_mode':'auto', - # 'trust_remote_code':True, - # 'download_dir':None, - # 'load_format':'auto', - # 'dtype':'auto', - # 'seed':0, - # 'worker_use_ray':False, - # 'pipeline_parallel_size':1, - # 'tensor_parallel_size':1, - # 'block_size':16, - # 'swap_space':4 , # GiB - # 'gpu_memory_utilization':0.90, - # 'max_num_batched_tokens':2560, - # 'max_num_seqs':256, - # 'disable_log_stats':False, - # 'conv_template':None, - # 'limit_worker_concurrency':5, - # 'no_register':False, - # 'num_gpus': 1 - # 'engine_use_ray': False, - # 'disable_log_requests': False +# 'max_model_len': 1024 +# 'max_parallel_loading_workers': 1 +# 'max_context_len_to_capture': 1024 +# 'enforce_eager': False +# 'tokenizer_mode': 'auto' +# 'trust_remote_code': True +# 'download_dir': None +# 'load_format': 'auto' +# 'dtype': 'auto' +# 'seed': 0 +# 'worker_use_ray': False +# 'pipeline_parallel_size': 1 +# 'tensor_parallel_size': 1 +# 'block_size': 16 +# 'swap_space': 4 # GiB +# 'gpu_memory_utilization': 0.90 +# 'max_num_batched_tokens': 2560 +# 'max_num_seqs': 256 +# 'disable_log_stats': False +# 'conv_template': 'qwen-7b-chat' +# 'limit_worker_concurrency': 5 +# 'no_register': False +# 'num_gpus': 1 +# 'engine_use_ray': False +# 'disable_log_requests': False - chatglm3-6b: host: "127.0.0.1"