Skip to content

Commit

Permalink
conftests
Browse files Browse the repository at this point in the history
Signed-off-by: s.kochetkov <[email protected]>
  • Loading branch information
s.kochetkov committed Jan 3, 2025
1 parent 412590a commit e364516
Showing 1 changed file with 83 additions and 9 deletions.
92 changes: 83 additions & 9 deletions tests/conftest.py
100644 → 100755
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
import torch.nn as nn
import torch.nn.functional as F
from huggingface_hub import snapshot_download
from peft import PeftModel
from PIL import Image
from transformers import (AutoModelForCausalLM, AutoTokenizer, BatchEncoding,
BatchFeature)
Expand All @@ -30,6 +31,7 @@
from vllm.inputs import (ExplicitEncoderDecoderPrompt, TextPrompt,
to_enc_dec_tuple_list, zip_enc_dec_prompts)
from vllm.logger import init_logger
from vllm.lora.request import LoRARequest
from vllm.outputs import RequestOutput
from vllm.platforms import current_platform
from vllm.sampling_params import BeamSearchParams
Expand Down Expand Up @@ -643,11 +645,68 @@ def __exit__(self, exc_type, exc_value, traceback):
cleanup_dist_env_and_memory()


class PeftRunner(HfRunner):

def __init__(
self,
model_name: str,
adapter_name: str,
dtype: str = "half",
*,
model_kwargs: Optional[Dict[str, Any]] = None,
auto_cls=AutoModelForCausalLM,
postprocess_inputs: Callable[[BatchEncoding],
BatchEncoding] = identity,
) -> None:
super().__init__(model_name,
dtype,
model_kwargs=model_kwargs,
is_embedding_model=False,
auto_cls=auto_cls,
postprocess_inputs=postprocess_inputs)

self.model = PeftModel.from_pretrained(self.model,

Check failure on line 668 in tests/conftest.py

View workflow job for this annotation

GitHub Actions / mypy (3.9)

Unexpected keyword argument "is_embedding_model" for "__init__" of "HfRunner" [call-arg]

Check failure on line 668 in tests/conftest.py

View workflow job for this annotation

GitHub Actions / mypy (3.10)

Unexpected keyword argument "is_embedding_model" for "__init__" of "HfRunner" [call-arg]

Check failure on line 668 in tests/conftest.py

View workflow job for this annotation

GitHub Actions / mypy (3.11)

Unexpected keyword argument "is_embedding_model" for "__init__" of "HfRunner" [call-arg]

Check failure on line 668 in tests/conftest.py

View workflow job for this annotation

GitHub Actions / mypy (3.12)

Unexpected keyword argument "is_embedding_model" for "__init__" of "HfRunner" [call-arg]
model_id=adapter_name)


class PeftRunner(HfRunner):

def __init__(
self,
model_name: str,
adapter_name: str,
dtype: str = "half",
*,

Check failure on line 679 in tests/conftest.py

View workflow job for this annotation

GitHub Actions / mypy (3.9)

Name "PeftRunner" already defined on line 655 [no-redef]

Check failure on line 679 in tests/conftest.py

View workflow job for this annotation

GitHub Actions / ruff (3.12)

Ruff (F811)

tests/conftest.py:679:7: F811 Redefinition of unused `PeftRunner` from line 655

Check failure on line 679 in tests/conftest.py

View workflow job for this annotation

GitHub Actions / mypy (3.10)

Name "PeftRunner" already defined on line 655 [no-redef]

Check failure on line 679 in tests/conftest.py

View workflow job for this annotation

GitHub Actions / mypy (3.11)

Name "PeftRunner" already defined on line 655 [no-redef]

Check failure on line 679 in tests/conftest.py

View workflow job for this annotation

GitHub Actions / mypy (3.12)

Name "PeftRunner" already defined on line 655 [no-redef]
model_kwargs: Optional[Dict[str, Any]] = None,
auto_cls=AutoModelForCausalLM,
postprocess_inputs: Callable[[BatchEncoding],
BatchEncoding] = identity,
) -> None:
super().__init__(model_name,
dtype,
model_kwargs=model_kwargs,
is_embedding_model=False,
auto_cls=auto_cls,
postprocess_inputs=postprocess_inputs)

self.model = PeftModel.from_pretrained(self.model,

Check failure on line 692 in tests/conftest.py

View workflow job for this annotation

GitHub Actions / mypy (3.9)

Unexpected keyword argument "is_embedding_model" for "__init__" of "HfRunner" [call-arg]

Check failure on line 692 in tests/conftest.py

View workflow job for this annotation

GitHub Actions / mypy (3.10)

Unexpected keyword argument "is_embedding_model" for "__init__" of "HfRunner" [call-arg]

Check failure on line 692 in tests/conftest.py

View workflow job for this annotation

GitHub Actions / mypy (3.11)

Unexpected keyword argument "is_embedding_model" for "__init__" of "HfRunner" [call-arg]

Check failure on line 692 in tests/conftest.py

View workflow job for this annotation

GitHub Actions / mypy (3.12)

Unexpected keyword argument "is_embedding_model" for "__init__" of "HfRunner" [call-arg]
model_id=adapter_name)


@pytest.fixture(scope="session")
def hf_runner():
return HfRunner


@pytest.fixture(scope="session")
def peft_runner():
return PeftRunner

@pytest.fixture(scope="session")
def peft_runner():
return PeftRunner


class VllmRunner:

def __init__(

Check failure on line 712 in tests/conftest.py

View workflow job for this annotation

GitHub Actions / mypy (3.9)

Name "peft_runner" already defined on line 708 [no-redef]

Check failure on line 712 in tests/conftest.py

View workflow job for this annotation

GitHub Actions / mypy (3.10)

Name "peft_runner" already defined on line 708 [no-redef]

Check failure on line 712 in tests/conftest.py

View workflow job for this annotation

GitHub Actions / mypy (3.11)

Name "peft_runner" already defined on line 708 [no-redef]

Check failure on line 712 in tests/conftest.py

View workflow job for this annotation

GitHub Actions / mypy (3.12)

Name "peft_runner" already defined on line 708 [no-redef]
Expand All @@ -666,6 +725,8 @@ def __init__(
enable_chunked_prefill: bool = False,
swap_space: int = 4,
enforce_eager: Optional[bool] = False,
enable_lora: bool = False,
max_loras: int = 4,
**kwargs,
) -> None:
self.model = LLM(
Expand All @@ -682,6 +743,8 @@ def __init__(
max_model_len=max_model_len,
block_size=block_size,
enable_chunked_prefill=enable_chunked_prefill,
enable_lora=enable_lora,
max_loras=max_loras,
**kwargs,
)

Expand Down Expand Up @@ -726,13 +789,15 @@ def generate(
images: Optional[PromptImageInput] = None,
videos: Optional[PromptVideoInput] = None,
audios: Optional[PromptAudioInput] = None,
lora_requests: Optional[List[LoRARequest]] = None,
) -> List[Tuple[List[List[int]], List[str]]]:
inputs = self.get_inputs(prompts,
images=images,
videos=videos,
audios=audios)

req_outputs = self.model.generate(inputs,
lora_request=lora_requests,
sampling_params=sampling_params)

outputs: List[Tuple[List[List[int]], List[str]]] = []
Expand Down Expand Up @@ -771,6 +836,7 @@ def generate_w_logprobs(
images: Optional[PromptImageInput] = None,
audios: Optional[PromptAudioInput] = None,
videos: Optional[PromptVideoInput] = None,
lora_requests: Optional[List[LoRARequest]] = None,
) -> Union[List[TokensTextLogprobs],
List[TokensTextLogprobsPromptLogprobs]]:
inputs = self.get_inputs(prompts,
Expand All @@ -779,6 +845,7 @@ def generate_w_logprobs(
audios=audios)

req_outputs = self.model.generate(inputs,
lora_request=lora_requests,
sampling_params=sampling_params)

toks_str_logsprobs_prompt_logprobs = (
Expand All @@ -792,6 +859,7 @@ def generate_encoder_decoder_w_logprobs(
self,
encoder_decoder_prompts: List[ExplicitEncoderDecoderPrompt[str, str]],
sampling_params: SamplingParams,
lora_requests: Optional[List[LoRARequest]] = None,
) -> Union[List[TokensTextLogprobs],
List[TokensTextLogprobsPromptLogprobs]]:
'''
Expand All @@ -800,6 +868,7 @@ def generate_encoder_decoder_w_logprobs(

assert sampling_params.logprobs is not None
req_outputs = self.model.generate(encoder_decoder_prompts,
lora_request=lora_requests,
sampling_params=sampling_params)
toks_str_logsprobs_prompt_logprobs = (
self._final_steps_generate_w_logprobs(req_outputs))
Expand All @@ -815,13 +884,15 @@ def generate_greedy(
images: Optional[PromptImageInput] = None,
videos: Optional[PromptVideoInput] = None,
audios: Optional[PromptAudioInput] = None,
lora_requests: Optional[List[LoRARequest]] = None,
) -> List[Tuple[List[int], str]]:
greedy_params = SamplingParams(temperature=0.0, max_tokens=max_tokens)
outputs = self.generate(prompts,
greedy_params,
images=images,
videos=videos,
audios=audios)
audios=audios,
lora_requests=lora_requests)
return [(output_ids[0], output_str[0])
for output_ids, output_str in outputs]

Expand All @@ -834,6 +905,7 @@ def generate_greedy_logprobs(
images: Optional[PromptImageInput] = None,
audios: Optional[PromptAudioInput] = None,
videos: Optional[PromptVideoInput] = None,
lora_requests: Optional[List[LoRARequest]] = None,
stop_token_ids: Optional[List[int]] = None,
stop: Optional[List[str]] = None,
) -> Union[List[TokensTextLogprobs],
Expand All @@ -850,14 +922,16 @@ def generate_greedy_logprobs(
greedy_logprobs_params,
images=images,
audios=audios,
videos=videos)
videos=videos,
lora_requests=lora_requests)

def generate_encoder_decoder_greedy_logprobs(
self,
encoder_decoder_prompts: List[ExplicitEncoderDecoderPrompt[str, str]],
max_tokens: int,
num_logprobs: int,
num_prompt_logprobs: Optional[int] = None,
lora_requests: Optional[List[LoRARequest]] = None,
) -> Union[List[TokensTextLogprobs],
List[TokensTextLogprobsPromptLogprobs]]:
greedy_logprobs_params = SamplingParams(
Expand All @@ -871,14 +945,13 @@ def generate_encoder_decoder_greedy_logprobs(
'''

return self.generate_encoder_decoder_w_logprobs(
encoder_decoder_prompts, greedy_logprobs_params)
encoder_decoder_prompts,
greedy_logprobs_params,
lora_requests=lora_requests)

def generate_beam_search(
self,
prompts: Union[List[str], List[List[int]]],
beam_width: int,
max_tokens: int,
) -> List[Tuple[List[List[int]], List[str]]]:
self, prompts: Union[List[str], List[List[int]]], beam_width: int,
max_tokens: int) -> List[Tuple[List[List[int]], List[str]]]:
outputs = self.model.beam_search(
prompts,
BeamSearchParams(beam_width=beam_width, max_tokens=max_tokens))
Expand All @@ -899,13 +972,14 @@ def encode(
images: Optional[PromptImageInput] = None,
videos: Optional[PromptVideoInput] = None,
audios: Optional[PromptAudioInput] = None,
lora_requests: Optional[List[LoRARequest]] = None
) -> List[List[float]]:
inputs = self.get_inputs(prompts,
images=images,
videos=videos,
audios=audios)

req_outputs = self.model.embed(inputs)
req_outputs = self.model.embed(inputs, lora_request=lora_requests)
return [req_output.outputs.embedding for req_output in req_outputs]

def score(
Expand Down

0 comments on commit e364516

Please sign in to comment.