Make score a separate task

Signed-off-by: DarkLight1337 <[email protected]>
vllm-project · Dec 2, 2024 · 3d149e7 · 3d149e7
1 parent a536fc8
commit 3d149e7
Show file tree

Hide file tree

Showing 4 changed files with 17 additions and 11 deletions.
diff --git a/docs/source/models/pooling_models.rst b/docs/source/models/pooling_models.rst
@@ -24,13 +24,15 @@ See :ref:`Engine Arguments <engine_args>` for a list of options when initializin
 For pooling models, we support the following :code:`task` options:
 
 - Embedding (:code:`"embed"` / :code:`"embedding"`)
-- Classification (:code:`"classify"`/ :code:`"score"`) -- reranking models fall under this category.
+- Classification (:code:`"classify"`)
+- Sentence Pair Scoring (:code:`"score"`)
 - Reward Modeling (:code:`"reward"`)
 
 The selected task determines the default :class:`~vllm.model_executor.layers.Pooler` that is used:
 
 - Embedding: Extract only the hidden states corresponding to the last token, and apply normalization.
 - Classification: Extract only the hidden states corresponding to the last token, and apply softmax.
+- Sentence Pair Scoring: Extract only the hidden states corresponding to the last token, and apply softmax.
 - Reward Modeling: Extract all of the hidden states and return them directly.
 
 When loading `Sentence Transformers <https://huggingface.co/sentence-transformers>`__ models,

diff --git a/tests/test_config.py b/tests/test_config.py
@@ -13,6 +13,7 @@
         ("facebook/opt-125m", "generate", "generate"),
         ("intfloat/e5-mistral-7b-instruct", "pooling", "embed"),
         ("jason9693/Qwen2.5-1.5B-apeach", "pooling", "classify"),
+        ("cross-encoder/ms-marco-MiniLM-L-6-v2", "pooling", "score"),
         ("Qwen/Qwen2.5-Math-RM-72B", "pooling", "reward"),
     ],
 )

diff --git a/vllm/config.py b/vllm/config.py
@@ -47,15 +47,16 @@
 _MULTIMODAL_MODEL_MAX_NUM_BATCHED_TOKENS = 5120
 
 TaskOption = Literal["auto", "generate", "embedding", "embed", "classify",
-                     "score", "reward", ]
+                     "score", "reward"]
 
-_ResolvedTask = Literal["generate", "embed", "classify", "reward", "draft"]
+_ResolvedTask = Literal["generate", "embed", "classify", "score", "reward",
+                        "draft"]
 
 RunnerType = Literal["generate", "pooling", "draft"]
 
 _RUNNER_TASKS: Dict[RunnerType, List[_ResolvedTask]] = {
     "generate": ["generate"],
-    "pooling": ["embed", "classify", "reward"],
+    "pooling": ["embed", "classify", "score", "reward"],
 }
 
 _TASK_RUNNER: Dict[_ResolvedTask, RunnerType] = {
@@ -368,8 +369,11 @@ def _get_preferred_task(
         architectures: List[str],
         supported_tasks: Set[_ResolvedTask],
     ) -> Optional[_ResolvedTask]:
-        if get_pooling_config(self.model, self.revision):
+        model_id = self.model
+        if get_pooling_config(model_id, self.revision):
             return "embed"
+        if model_id.startswith("cross-encoder/") or "-reranker" in model_id:
+            return "score"
 
         suffix_to_preferred_task: List[Tuple[str, _ResolvedTask]] = [
             # Other models follow this pattern
@@ -437,13 +441,12 @@ def _resolve_task(
                 if preferred_task != "embed":
                     msg = ("The 'embedding' task will be restricted to "
                            "embedding models in a future release. Please "
-                           "pass `--task classify` or `--task reward` "
-                           "explicitly for other types of pooling models.")
+                           "pass `--task classify`, `--task score`, or "
+                           "`--task reward` explicitly for other pooling "
+                           "models.")
                     warnings.warn(msg, DeprecationWarning, stacklevel=2)
 
                 task_option = preferred_task or "embed"
-            if task_option == "score":
-                task_option = "classify"
 
             if task_option not in supported_tasks:
                 msg = (

diff --git a/vllm/entrypoints/llm.py b/vllm/entrypoints/llm.py
@@ -798,7 +798,7 @@ def encode(
                     "Your model supports the 'pooling' runner, but is "
                     f"currently initialized for the '{runner_type}' runner. "
                     "Please initialize vLLM using `--task embed`, "
-                    "`--task classify`, `--task reward` etc.")
+                    "`--task classify`, `--task score` etc.")
 
             raise ValueError(" ".join(messages))
 
@@ -871,7 +871,7 @@ def score(
                     "Your model supports the 'pooling' runner, but is "
                     f"currently initialized for the '{runner_type}' runner. "
                     "Please initialize vLLM using `--task embed`, "
-                    "`--task classify`, `--task reward` etc.")
+                    "`--task classify`, `--task score` etc.")
 
             raise ValueError(" ".join(messages))