diff --git a/.buildkite/test-pipeline.yaml b/.buildkite/test-pipeline.yaml index 61078dcc7517b..c2a2a35c5bb8a 100644 --- a/.buildkite/test-pipeline.yaml +++ b/.buildkite/test-pipeline.yaml @@ -106,27 +106,30 @@ steps: source_file_dependencies: - vllm/ commands: - - pip install -e ./plugins/vllm_add_dummy_model - pytest -v -s entrypoints/llm --ignore=entrypoints/llm/test_lazy_outlines.py --ignore=entrypoints/llm/test_generate.py --ignore=entrypoints/llm/test_generate_multiple_loras.py --ignore=entrypoints/llm/test_guided_generate.py - pytest -v -s entrypoints/llm/test_lazy_outlines.py # it needs a clean process - pytest -v -s entrypoints/llm/test_generate.py # it needs a clean process - pytest -v -s entrypoints/llm/test_generate_multiple_loras.py # it needs a clean process - pytest -v -s entrypoints/llm/test_guided_generate.py # it needs a clean process - - pytest -v -s entrypoints/openai --ignore=entrypoints/openai/test_oot_registration.py - - pytest -v -s entrypoints/openai/test_oot_registration.py # it needs a clean process + - pytest -v -s entrypoints/openai - pytest -v -s entrypoints/test_chat_utils.py - pytest -v -s entrypoints/offline_mode # Needs to avoid interference with other tests -# platform plugin test need a single pipeline, since it needs to install a new fake platform plugin -- label: Platform Plugin Test # < 1min +# Plugin test need a single pipeline, since it will install a new fake platform plugin. +- label: Generic Plugin Test working_dir: "/vllm-workspace/tests" fast_check: true mirror_hardwares: [amd] source_file_dependencies: - vllm/ + - tests/models commands: + # test model plugin first since it needs to install a new fake platform when platform plugin is installed. + - pip install -e ./plugins/vllm_add_dummy_model + - pytest -v -s plugins/test_model_plugin.py + # test platform plugin second. - pip install -e ./plugins/vllm_add_dummy_platform - - pytest -v -s platform/test_platform_plugin.py + - pytest -v -s plugins/test_platform_plugin.py - label: Distributed Tests (4 GPUs) # 10min working_dir: "/vllm-workspace/tests" @@ -344,8 +347,6 @@ steps: - vllm/ - tests/models commands: - - pip install -e ./plugins/vllm_add_dummy_model - - pytest -v -s models/test_oot_registration.py # it needs a clean process - pytest -v -s models/test_registry.py - pytest -v -s models/test_initialization.py @@ -480,6 +481,7 @@ steps: - pytest models/encoder_decoder/vision_language/test_broadcast.py -v -s -m 'distributed(num_gpus=2)' - pytest models/decoder_only/vision_language/test_models.py -v -s -m 'distributed(num_gpus=2)' - pytest -v -s spec_decode/e2e/test_integration_dist_tp2.py + # distributed test need to be run on 2 gpus, move this test to plugins test once the plugin test runs on 2 gpus. - pip install -e ./plugins/vllm_add_dummy_model - pytest -v -s distributed/test_distributed_oot.py - CUDA_VISIBLE_DEVICES=0,1 pytest -v -s test_sharded_state_loader.py diff --git a/tests/distributed/test_distributed_oot.py b/tests/distributed/test_distributed_oot.py index 62e77a2f77597..2d157b4c13828 100644 --- a/tests/distributed/test_distributed_oot.py +++ b/tests/distributed/test_distributed_oot.py @@ -1,5 +1,4 @@ -from ..entrypoints.openai.test_oot_registration import ( - run_and_test_dummy_opt_api_server) +from ..plugins.test_model_plugin import run_and_test_dummy_opt_api_server def test_distributed_oot(dummy_opt_path: str): diff --git a/tests/entrypoints/openai/test_oot_registration.py b/tests/entrypoints/openai/test_oot_registration.py deleted file mode 100644 index b25cb1d0e7222..0000000000000 --- a/tests/entrypoints/openai/test_oot_registration.py +++ /dev/null @@ -1,42 +0,0 @@ -from ...utils import VLLM_PATH, RemoteOpenAIServer - -chatml_jinja_path = VLLM_PATH / "examples/template_chatml.jinja" -assert chatml_jinja_path.exists() - - -def run_and_test_dummy_opt_api_server(model, tp=1): - # the model is registered through the plugin - server_args = [ - "--gpu-memory-utilization", - "0.10", - "--dtype", - "float32", - "--chat-template", - str(chatml_jinja_path), - "--load-format", - "dummy", - "-tp", - f"{tp}", - ] - with RemoteOpenAIServer(model, server_args) as server: - client = server.get_client() - completion = client.chat.completions.create( - model=model, - messages=[{ - "role": "system", - "content": "You are a helpful assistant." - }, { - "role": "user", - "content": "Hello!" - }], - temperature=0, - ) - generated_text = completion.choices[0].message.content - assert generated_text is not None - # make sure only the first token is generated - rest = generated_text.replace("", "") - assert rest == "" - - -def test_oot_registration_for_api_server(dummy_opt_path: str): - run_and_test_dummy_opt_api_server(dummy_opt_path) diff --git a/tests/plugins/__init__.py b/tests/plugins/__init__.py new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/tests/models/test_oot_registration.py b/tests/plugins/test_model_plugin.py similarity index 65% rename from tests/models/test_oot_registration.py rename to tests/plugins/test_model_plugin.py index 2c413a633896a..8ee7436621a09 100644 --- a/tests/models/test_oot_registration.py +++ b/tests/plugins/test_model_plugin.py @@ -5,7 +5,8 @@ from vllm import LLM, SamplingParams from vllm.assets.image import ImageAsset -from ..utils import fork_new_process_for_each_test +from ..utils import (VLLM_PATH, RemoteOpenAIServer, + fork_new_process_for_each_test) @fork_new_process_for_each_test @@ -78,3 +79,45 @@ def test_oot_registration_multimodal(dummy_llava_path): # make sure only the first token is generated rest = generated_text.replace(first_token, "") assert rest == "" + + +chatml_jinja_path = VLLM_PATH / "examples/template_chatml.jinja" +assert chatml_jinja_path.exists() + + +def run_and_test_dummy_opt_api_server(model, tp=1): + # the model is registered through the plugin + server_args = [ + "--gpu-memory-utilization", + "0.10", + "--dtype", + "float32", + "--chat-template", + str(chatml_jinja_path), + "--load-format", + "dummy", + "-tp", + f"{tp}", + ] + with RemoteOpenAIServer(model, server_args) as server: + client = server.get_client() + completion = client.chat.completions.create( + model=model, + messages=[{ + "role": "system", + "content": "You are a helpful assistant." + }, { + "role": "user", + "content": "Hello!" + }], + temperature=0, + ) + generated_text = completion.choices[0].message.content + assert generated_text is not None + # make sure only the first token is generated + rest = generated_text.replace("", "") + assert rest == "" + + +def test_oot_registration_for_api_server(dummy_opt_path: str): + run_and_test_dummy_opt_api_server(dummy_opt_path) diff --git a/tests/platform/test_platform_plugin.py b/tests/plugins/test_platform_plugin.py similarity index 100% rename from tests/platform/test_platform_plugin.py rename to tests/plugins/test_platform_plugin.py diff --git a/vllm/platforms/__init__.py b/vllm/platforms/__init__.py index 5e7e94acdd36e..7f00d15f6a4d8 100644 --- a/vllm/platforms/__init__.py +++ b/vllm/platforms/__init__.py @@ -36,6 +36,10 @@ class CurrentPlatform(Platform): def __getattribute__(self, name: str) -> Any: """If the attribute is not found, go pass to the current platform.""" + # Use __getattribute__ to here to get the attribute from the current + # platform. It doesn't work to use __getattr__ because it will be called + # only when the attribute is not found. Since CurrentPlatform inherits + # from Platform, __getattr__ will not be called. global _current_platform # Go pass to the current platform. return _current_platform.__getattribute__(name)