LICENSE MANIFEST.in README.md pyproject.toml requirements.txt setup.py csrc/activation.cpp csrc/activation_kernels.cu csrc/attention.cpp csrc/cache.cpp csrc/cache_kernels.cu csrc/cuda_utils.cpp csrc/cuda_utils_kernels.cu csrc/dispatch_utils.h csrc/layernorm.cpp csrc/layernorm_kernels.cu csrc/pos_encoding.cpp csrc/pos_encoding_kernels.cu csrc/quantization.cpp csrc/reduction_utils.cuh csrc/attention/attention_dtypes.h csrc/attention/attention_generic.cuh csrc/attention/attention_kernels.cu csrc/attention/attention_utils.cuh csrc/attention/dtype_bfloat16.cuh csrc/attention/dtype_float16.cuh csrc/attention/dtype_float32.cuh csrc/quantization/awq/dequantize.cuh csrc/quantization/awq/gemm_kernels.cu csrc/quantization/squeezellm/quant_cuda_kernel.cu tests/test_regression.py vllm/__init__.py vllm/block.py vllm/config.py vllm/logger.py vllm/outputs.py vllm/py.typed vllm/sampling_params.py vllm/sequence.py vllm/utils.py vllm.egg-info/PKG-INFO vllm.egg-info/SOURCES.txt vllm.egg-info/dependency_links.txt vllm.egg-info/requires.txt vllm.egg-info/top_level.txt vllm/core/__init__.py vllm/core/block_manager.py vllm/core/policy.py vllm/core/scheduler.py vllm/engine/__init__.py vllm/engine/arg_utils.py vllm/engine/async_llm_engine.py vllm/engine/llm_engine.py vllm/engine/ray_utils.py vllm/entrypoints/__init__.py vllm/entrypoints/api_server.py vllm/entrypoints/llm.py vllm/entrypoints/openai/__init__.py vllm/entrypoints/openai/api_server.py vllm/entrypoints/openai/protocol.py vllm/model_executor/__init__.py vllm/model_executor/input_metadata.py vllm/model_executor/model_loader.py vllm/model_executor/utils.py vllm/model_executor/weight_utils.py vllm/model_executor/layers/__init__.py vllm/model_executor/layers/activation.py vllm/model_executor/layers/attention.py vllm/model_executor/layers/layernorm.py vllm/model_executor/layers/linear.py vllm/model_executor/layers/rotary_embedding.py vllm/model_executor/layers/sampler.py vllm/model_executor/layers/vocab_parallel_embedding.py vllm/model_executor/layers/quantization/__init__.py vllm/model_executor/layers/quantization/awq.py vllm/model_executor/layers/quantization/base_config.py vllm/model_executor/layers/quantization/squeezellm.py vllm/model_executor/models/__init__.py vllm/model_executor/models/aquila.py vllm/model_executor/models/baichuan.py vllm/model_executor/models/bloom.py vllm/model_executor/models/chatglm.py vllm/model_executor/models/cpm.py vllm/model_executor/models/cpm_mistral.py vllm/model_executor/models/cpm_old.py vllm/model_executor/models/cpmmistral.py vllm/model_executor/models/falcon.py vllm/model_executor/models/gpt2.py vllm/model_executor/models/gpt_bigcode.py vllm/model_executor/models/gpt_j.py vllm/model_executor/models/gpt_neox.py vllm/model_executor/models/internlm.py vllm/model_executor/models/llama.py vllm/model_executor/models/mistral.py vllm/model_executor/models/mpt.py vllm/model_executor/models/opt.py vllm/model_executor/models/phi_1_5.py vllm/model_executor/models/qwen.py vllm/model_executor/models/yi.py vllm/model_executor/parallel_utils/__init__.py vllm/model_executor/parallel_utils/communication_op.py vllm/model_executor/parallel_utils/parallel_state.py vllm/model_executor/parallel_utils/utils.py vllm/transformers_utils/__init__.py vllm/transformers_utils/config.py vllm/transformers_utils/tokenizer.py vllm/transformers_utils/configs/__init__.py vllm/transformers_utils/configs/aquila.py vllm/transformers_utils/configs/baichuan.py vllm/transformers_utils/configs/chatglm.py vllm/transformers_utils/configs/cpm.py vllm/transformers_utils/configs/cpm_mistral.py vllm/transformers_utils/configs/cpmmistral.py vllm/transformers_utils/configs/falcon.py vllm/transformers_utils/configs/mpt.py vllm/transformers_utils/configs/qwen.py vllm/transformers_utils/configs/yi.py vllm/worker/__init__.py vllm/worker/cache_engine.py vllm/worker/worker.py