Skip to content

Commit

Permalink
fix
Browse files Browse the repository at this point in the history
  • Loading branch information
IlyasMoutawwakil committed Dec 12, 2024
1 parent 4c23a0d commit f3e7077
Show file tree
Hide file tree
Showing 3 changed files with 10 additions and 4 deletions.
4 changes: 2 additions & 2 deletions .github/workflows/test_cli_cuda_tensorrt_llm.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ jobs:

- name: Install dependencies
run: |
pip install -e .[testing,tesnsorrt-llm]
pip install -e .[testing]
- name: Run tests
run: |
Expand Down Expand Up @@ -84,7 +84,7 @@ jobs:

- name: Install dependencies
run: |
pip install -e .[testing,tesnsorrt-llm]
pip install -e .[testing]
- name: Run tests (sequential)
run: |
Expand Down
8 changes: 6 additions & 2 deletions optimum_benchmark/backends/tensorrt_llm/backend.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,6 @@ def download_pretrained_model(self) -> None:
def prepare_generation_config(self) -> None:
self.generation_config.eos_token_id = None
self.generation_config.pad_token_id = None

model_cache_folder = f"models/{self.config.model}".replace("/", "--")
model_cache_path = f"{HUGGINGFACE_HUB_CACHE}/{model_cache_folder}"
snapshot_file = f"{model_cache_path}/refs/{self.config.model_kwargs.get('revision', 'main')}"
Expand Down Expand Up @@ -91,7 +90,6 @@ def create_no_weights_model(self) -> None:
self.logger.info("\t+ Modifying generation config for fixed length generation")
self.generation_config.eos_token_id = None
self.generation_config.pad_token_id = None
self.logger.info("\t+ Saving new pretrained generation config")
self.generation_config.save_pretrained(save_directory=self.no_weights_model)

def load_trtllm_with_no_weights(self) -> None:
Expand Down Expand Up @@ -128,6 +126,12 @@ def trtllm_kwargs(self):
if self.config.gpus_per_node is not None:
kwargs["gpus_per_node"] = self.config.gpus_per_node

if self.config.max_input_len is not None:
kwargs["max_input_len"] = self.config.max_input_len

if self.config.max_output_len is not None:
kwargs["max_output_len"] = self.config.max_output_len

if self.config.max_batch_size is not None:
kwargs["max_batch_size"] = self.config.max_batch_size

Expand Down
2 changes: 2 additions & 0 deletions optimum_benchmark/backends/tensorrt_llm/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,8 @@ class TRTLLMConfig(BackendConfig):
use_fp8: Optional[bool] = None
world_size: Optional[int] = None
gpus_per_node: Optional[int] = None
max_input_len: Optional[int] = None
max_output_len: Optional[int] = None
max_batch_size: Optional[int] = None
max_new_tokens: Optional[int] = None
max_prompt_length: Optional[int] = None
Expand Down

0 comments on commit f3e7077

Please sign in to comment.