Skip to content

Commit

Permalink
Make score a separate task
Browse files Browse the repository at this point in the history
Signed-off-by: DarkLight1337 <[email protected]>
  • Loading branch information
DarkLight1337 committed Dec 2, 2024
1 parent a536fc8 commit 3d149e7
Show file tree
Hide file tree
Showing 4 changed files with 17 additions and 11 deletions.
4 changes: 3 additions & 1 deletion docs/source/models/pooling_models.rst
Original file line number Diff line number Diff line change
Expand Up @@ -24,13 +24,15 @@ See :ref:`Engine Arguments <engine_args>` for a list of options when initializin
For pooling models, we support the following :code:`task` options:

- Embedding (:code:`"embed"` / :code:`"embedding"`)
- Classification (:code:`"classify"`/ :code:`"score"`) -- reranking models fall under this category.
- Classification (:code:`"classify"`)
- Sentence Pair Scoring (:code:`"score"`)
- Reward Modeling (:code:`"reward"`)

The selected task determines the default :class:`~vllm.model_executor.layers.Pooler` that is used:

- Embedding: Extract only the hidden states corresponding to the last token, and apply normalization.
- Classification: Extract only the hidden states corresponding to the last token, and apply softmax.
- Sentence Pair Scoring: Extract only the hidden states corresponding to the last token, and apply softmax.
- Reward Modeling: Extract all of the hidden states and return them directly.

When loading `Sentence Transformers <https://huggingface.co/sentence-transformers>`__ models,
Expand Down
1 change: 1 addition & 0 deletions tests/test_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
("facebook/opt-125m", "generate", "generate"),
("intfloat/e5-mistral-7b-instruct", "pooling", "embed"),
("jason9693/Qwen2.5-1.5B-apeach", "pooling", "classify"),
("cross-encoder/ms-marco-MiniLM-L-6-v2", "pooling", "score"),
("Qwen/Qwen2.5-Math-RM-72B", "pooling", "reward"),
],
)
Expand Down
19 changes: 11 additions & 8 deletions vllm/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,15 +47,16 @@
_MULTIMODAL_MODEL_MAX_NUM_BATCHED_TOKENS = 5120

TaskOption = Literal["auto", "generate", "embedding", "embed", "classify",
"score", "reward", ]
"score", "reward"]

_ResolvedTask = Literal["generate", "embed", "classify", "reward", "draft"]
_ResolvedTask = Literal["generate", "embed", "classify", "score", "reward",
"draft"]

RunnerType = Literal["generate", "pooling", "draft"]

_RUNNER_TASKS: Dict[RunnerType, List[_ResolvedTask]] = {
"generate": ["generate"],
"pooling": ["embed", "classify", "reward"],
"pooling": ["embed", "classify", "score", "reward"],
}

_TASK_RUNNER: Dict[_ResolvedTask, RunnerType] = {
Expand Down Expand Up @@ -368,8 +369,11 @@ def _get_preferred_task(
architectures: List[str],
supported_tasks: Set[_ResolvedTask],
) -> Optional[_ResolvedTask]:
if get_pooling_config(self.model, self.revision):
model_id = self.model
if get_pooling_config(model_id, self.revision):
return "embed"
if model_id.startswith("cross-encoder/") or "-reranker" in model_id:
return "score"

suffix_to_preferred_task: List[Tuple[str, _ResolvedTask]] = [
# Other models follow this pattern
Expand Down Expand Up @@ -437,13 +441,12 @@ def _resolve_task(
if preferred_task != "embed":
msg = ("The 'embedding' task will be restricted to "
"embedding models in a future release. Please "
"pass `--task classify` or `--task reward` "
"explicitly for other types of pooling models.")
"pass `--task classify`, `--task score`, or "
"`--task reward` explicitly for other pooling "
"models.")
warnings.warn(msg, DeprecationWarning, stacklevel=2)

task_option = preferred_task or "embed"
if task_option == "score":
task_option = "classify"

if task_option not in supported_tasks:
msg = (
Expand Down
4 changes: 2 additions & 2 deletions vllm/entrypoints/llm.py
Original file line number Diff line number Diff line change
Expand Up @@ -798,7 +798,7 @@ def encode(
"Your model supports the 'pooling' runner, but is "
f"currently initialized for the '{runner_type}' runner. "
"Please initialize vLLM using `--task embed`, "
"`--task classify`, `--task reward` etc.")
"`--task classify`, `--task score` etc.")

raise ValueError(" ".join(messages))

Expand Down Expand Up @@ -871,7 +871,7 @@ def score(
"Your model supports the 'pooling' runner, but is "
f"currently initialized for the '{runner_type}' runner. "
"Please initialize vLLM using `--task embed`, "
"`--task classify`, `--task reward` etc.")
"`--task classify`, `--task score` etc.")

raise ValueError(" ".join(messages))

Expand Down

0 comments on commit 3d149e7

Please sign in to comment.