Update plugin test

Signed-off-by: wangxiyuan <[email protected]>
vllm-project · Dec 24, 2024 · 735c0d1 · 735c0d1
1 parent f524313
commit 735c0d1
Show file tree

Hide file tree

Showing 7 changed files with 59 additions and 53 deletions.
diff --git a/.buildkite/test-pipeline.yaml b/.buildkite/test-pipeline.yaml
@@ -106,27 +106,30 @@ steps:
   source_file_dependencies:
   - vllm/
   commands:
-  - pip install -e ./plugins/vllm_add_dummy_model
   - pytest -v -s entrypoints/llm --ignore=entrypoints/llm/test_lazy_outlines.py --ignore=entrypoints/llm/test_generate.py --ignore=entrypoints/llm/test_generate_multiple_loras.py --ignore=entrypoints/llm/test_guided_generate.py
   - pytest -v -s entrypoints/llm/test_lazy_outlines.py # it needs a clean process
   - pytest -v -s entrypoints/llm/test_generate.py # it needs a clean process
   - pytest -v -s entrypoints/llm/test_generate_multiple_loras.py # it needs a clean process
   - pytest -v -s entrypoints/llm/test_guided_generate.py # it needs a clean process
-  - pytest -v -s entrypoints/openai --ignore=entrypoints/openai/test_oot_registration.py
-  - pytest -v -s entrypoints/openai/test_oot_registration.py # it needs a clean process
+  - pytest -v -s entrypoints/openai
   - pytest -v -s entrypoints/test_chat_utils.py
   - pytest -v -s entrypoints/offline_mode # Needs to avoid interference with other tests
 
-# platform plugin test need a single pipeline, since it needs to install a new fake platform plugin
-- label: Platform Plugin Test # < 1min
+# Plugin test need a single pipeline, since it will install a new fake platform plugin.
+- label: Generic Plugin Test
   working_dir: "/vllm-workspace/tests"
   fast_check: true
   mirror_hardwares: [amd]
   source_file_dependencies:
   - vllm/
+  - tests/models
   commands:
+  # test model plugin first since it needs to install a new fake platform when platform plugin is installed.
+  - pip install -e ./plugins/vllm_add_dummy_model
+  - pytest -v -s plugins/test_model_plugin.py
+  # test platform plugin second.
   - pip install -e ./plugins/vllm_add_dummy_platform
-  - pytest -v -s platform/test_platform_plugin.py
+  - pytest -v -s plugins/test_platform_plugin.py
 
 - label: Distributed Tests (4 GPUs) # 10min
   working_dir: "/vllm-workspace/tests"
@@ -344,8 +347,6 @@ steps:
   - vllm/
   - tests/models
   commands:
-    - pip install -e ./plugins/vllm_add_dummy_model
-    - pytest -v -s models/test_oot_registration.py # it needs a clean process
     - pytest -v -s models/test_registry.py
     - pytest -v -s models/test_initialization.py
 
@@ -480,6 +481,7 @@ steps:
   - pytest models/encoder_decoder/vision_language/test_broadcast.py -v -s -m 'distributed(num_gpus=2)'
   - pytest models/decoder_only/vision_language/test_models.py -v -s -m 'distributed(num_gpus=2)'
   - pytest -v -s spec_decode/e2e/test_integration_dist_tp2.py
+  # distributed test need to be run on 2 gpus, move this test to plugins test once the plugin test runs on 2 gpus.
   - pip install -e ./plugins/vllm_add_dummy_model
   - pytest -v -s distributed/test_distributed_oot.py
   - CUDA_VISIBLE_DEVICES=0,1 pytest -v -s test_sharded_state_loader.py

diff --git a/tests/distributed/test_distributed_oot.py b/tests/distributed/test_distributed_oot.py
@@ -1,5 +1,4 @@
-from ..entrypoints.openai.test_oot_registration import (
-    run_and_test_dummy_opt_api_server)
+from ..plugins.test_model_plugin import run_and_test_dummy_opt_api_server
 
 
 def test_distributed_oot(dummy_opt_path: str):

diff --git a/tests/entrypoints/openai/test_oot_registration.py b/tests/entrypoints/openai/test_oot_registration.py
diff --git a/tests/plugins/__init__.py b/tests/plugins/__init__.py
diff --git a/tests/models/test_oot_registration.py → tests/plugins/test_model_plugin.py b/tests/models/test_oot_registration.py → tests/plugins/test_model_plugin.py
@@ -5,7 +5,8 @@
 from vllm import LLM, SamplingParams
 from vllm.assets.image import ImageAsset
 
-from ..utils import fork_new_process_for_each_test
+from ..utils import (VLLM_PATH, RemoteOpenAIServer,
+                     fork_new_process_for_each_test)
 
 
 @fork_new_process_for_each_test
@@ -78,3 +79,45 @@ def test_oot_registration_multimodal(dummy_llava_path):
         # make sure only the first token is generated
         rest = generated_text.replace(first_token, "")
         assert rest == ""
+
+
+chatml_jinja_path = VLLM_PATH / "examples/template_chatml.jinja"
+assert chatml_jinja_path.exists()
+
+
+def run_and_test_dummy_opt_api_server(model, tp=1):
+    # the model is registered through the plugin
+    server_args = [
+        "--gpu-memory-utilization",
+        "0.10",
+        "--dtype",
+        "float32",
+        "--chat-template",
+        str(chatml_jinja_path),
+        "--load-format",
+        "dummy",
+        "-tp",
+        f"{tp}",
+    ]
+    with RemoteOpenAIServer(model, server_args) as server:
+        client = server.get_client()
+        completion = client.chat.completions.create(
+            model=model,
+            messages=[{
+                "role": "system",
+                "content": "You are a helpful assistant."
+            }, {
+                "role": "user",
+                "content": "Hello!"
+            }],
+            temperature=0,
+        )
+        generated_text = completion.choices[0].message.content
+        assert generated_text is not None
+        # make sure only the first token is generated
+        rest = generated_text.replace("<s>", "")
+        assert rest == ""
+
+
+def test_oot_registration_for_api_server(dummy_opt_path: str):
+    run_and_test_dummy_opt_api_server(dummy_opt_path)
diff --git a/tests/platform/test_platform_plugin.py → tests/plugins/test_platform_plugin.py b/tests/platform/test_platform_plugin.py → tests/plugins/test_platform_plugin.py
diff --git a/vllm/platforms/__init__.py b/vllm/platforms/__init__.py
@@ -36,6 +36,10 @@ class CurrentPlatform(Platform):
 
     def __getattribute__(self, name: str) -> Any:
         """If the attribute is not found, go pass to the current platform."""
+        # Use __getattribute__ to here to get the attribute from the current
+        # platform. It doesn't work to use __getattr__ because it will be called
+        # only when the attribute is not found. Since CurrentPlatform inherits
+        # from Platform, __getattr__ will not be called.
         global _current_platform
         # Go pass to the current platform.
         return _current_platform.__getattribute__(name)