Skip to content

Commit

Permalink
Fix trt llm (#308)
Browse files Browse the repository at this point in the history
  • Loading branch information
IlyasMoutawwakil authored Dec 12, 2024
1 parent 4c23a0d commit f12f642
Show file tree
Hide file tree
Showing 7 changed files with 31 additions and 26 deletions.
5 changes: 2 additions & 3 deletions .github/workflows/test_cli_cuda_tensorrt_llm.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ jobs:

- name: Install dependencies
run: |
pip install -e .[testing,tesnsorrt-llm]
pip install -e .[testing]
- name: Run tests
run: |
Expand All @@ -57,7 +57,6 @@ jobs:
}}
name: Run examples
run: |
rm -rf /root/.cache/huggingface
pytest tests/test_examples.py -x -s -k "cli and cuda and trt"
cli_cuda_tensorrt_llm_multi_gpu_tests:
Expand All @@ -84,7 +83,7 @@ jobs:

- name: Install dependencies
run: |
pip install -e .[testing,tesnsorrt-llm]
pip install -e .[testing]
- name: Run tests (sequential)
run: |
Expand Down
4 changes: 2 additions & 2 deletions examples/cuda_tgi_llama.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -16,12 +16,12 @@ backend:
device: cuda
device_ids: 0
cuda_graphs: 0 # remove for better perf but bigger memory footprint
no_weights: false # investigate later
no_weights: true
model: TinyLlama/TinyLlama-1.1B-Chat-v1.0

scenario:
input_shapes:
batch_size: 4
batch_size: 1
sequence_length: 64

generate_kwargs:
Expand Down
2 changes: 1 addition & 1 deletion examples/cuda_trt_llama.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ backend:

scenario:
input_shapes:
batch_size: 4
batch_size: 1
sequence_length: 64

generate_kwargs:
Expand Down
2 changes: 1 addition & 1 deletion examples/cuda_vllm_llama.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ backend:

scenario:
input_shapes:
batch_size: 4
batch_size: 1
sequence_length: 64

generate_kwargs:
Expand Down
8 changes: 6 additions & 2 deletions optimum_benchmark/backends/tensorrt_llm/backend.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,6 @@ def download_pretrained_model(self) -> None:
def prepare_generation_config(self) -> None:
self.generation_config.eos_token_id = None
self.generation_config.pad_token_id = None

model_cache_folder = f"models/{self.config.model}".replace("/", "--")
model_cache_path = f"{HUGGINGFACE_HUB_CACHE}/{model_cache_folder}"
snapshot_file = f"{model_cache_path}/refs/{self.config.model_kwargs.get('revision', 'main')}"
Expand Down Expand Up @@ -91,7 +90,6 @@ def create_no_weights_model(self) -> None:
self.logger.info("\t+ Modifying generation config for fixed length generation")
self.generation_config.eos_token_id = None
self.generation_config.pad_token_id = None
self.logger.info("\t+ Saving new pretrained generation config")
self.generation_config.save_pretrained(save_directory=self.no_weights_model)

def load_trtllm_with_no_weights(self) -> None:
Expand Down Expand Up @@ -128,6 +126,12 @@ def trtllm_kwargs(self):
if self.config.gpus_per_node is not None:
kwargs["gpus_per_node"] = self.config.gpus_per_node

if self.config.max_input_len is not None:
kwargs["max_input_len"] = self.config.max_input_len

if self.config.max_output_len is not None:
kwargs["max_output_len"] = self.config.max_output_len

if self.config.max_batch_size is not None:
kwargs["max_batch_size"] = self.config.max_batch_size

Expand Down
2 changes: 2 additions & 0 deletions optimum_benchmark/backends/tensorrt_llm/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,8 @@ class TRTLLMConfig(BackendConfig):
use_fp8: Optional[bool] = None
world_size: Optional[int] = None
gpus_per_node: Optional[int] = None
max_input_len: Optional[int] = None
max_output_len: Optional[int] = None
max_batch_size: Optional[int] = None
max_new_tokens: Optional[int] = None
max_prompt_length: Optional[int] = None
Expand Down
34 changes: 17 additions & 17 deletions optimum_benchmark/task_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -242,13 +242,13 @@ def infer_task_from_model_name_or_path(
elif library_name == "timm":
inferred_task_name = "image-classification"

elif library_name == "diffusers":
diffusers_config = get_repo_config(model_name_or_path, "model_index.json", token=token, revision=revision)
target_class_name = diffusers_config["_class_name"]
elif library_name == "transformers":
transformers_config = get_repo_config(model_name_or_path, "config.json", token=token, revision=revision)
target_class_name = transformers_config["architectures"][0]

for task_name, pipeline_mapping in TASKS_TO_PIPELINE_TYPES_TO_PIPELINE_CLASS_NAMES.items():
for _, pipeline_class_name in pipeline_mapping.items():
if target_class_name == pipeline_class_name:
for task_name, model_mapping in TASKS_TO_MODEL_TYPES_TO_MODEL_CLASS_NAMES.items():
for _, model_class_name in model_mapping.items():
if target_class_name == model_class_name:
inferred_task_name = task_name
break
if inferred_task_name is not None:
Expand All @@ -257,13 +257,13 @@ def infer_task_from_model_name_or_path(
if inferred_task_name is None:
raise KeyError(f"Could not find the proper task name for target class name {target_class_name}.")

elif library_name == "transformers":
transformers_config = get_repo_config(model_name_or_path, "config.json", token=token, revision=revision)
target_class_name = transformers_config["architectures"][0]
elif library_name == "diffusers":
diffusers_config = get_repo_config(model_name_or_path, "model_index.json", token=token, revision=revision)
target_class_name = diffusers_config["_class_name"]

for task_name, model_mapping in TASKS_TO_MODEL_TYPES_TO_MODEL_CLASS_NAMES.items():
for _, model_class_name in model_mapping.items():
if target_class_name == model_class_name:
for task_name, pipeline_mapping in TASKS_TO_PIPELINE_TYPES_TO_PIPELINE_CLASS_NAMES.items():
for _, pipeline_class_name in pipeline_mapping.items():
if target_class_name == pipeline_class_name or (pipeline_class_name in target_class_name):
inferred_task_name = task_name
break
if inferred_task_name is not None:
Expand Down Expand Up @@ -293,13 +293,17 @@ def infer_model_type_from_model_name_or_path(
timm_config = get_repo_config(model_name_or_path, "config.json", token=token, revision=revision)
inferred_model_type = timm_config["architecture"]

elif library_name == "transformers":
transformers_config = get_repo_config(model_name_or_path, "config.json", token=token, revision=revision)
inferred_model_type = transformers_config["model_type"]

elif library_name == "diffusers":
diffusers_config = get_repo_config(model_name_or_path, "model_index.json", token=token, revision=revision)
target_class_name = diffusers_config["_class_name"]

for _, pipeline_mapping in TASKS_TO_PIPELINE_TYPES_TO_PIPELINE_CLASS_NAMES.items():
for pipeline_type, pipeline_class_name in pipeline_mapping.items():
if target_class_name == pipeline_class_name:
if target_class_name == pipeline_class_name or (pipeline_class_name in target_class_name):
inferred_model_type = pipeline_type
break
if inferred_model_type is not None:
Expand All @@ -308,8 +312,4 @@ def infer_model_type_from_model_name_or_path(
if inferred_model_type is None:
raise KeyError(f"Could not find the proper model type for target class name {target_class_name}.")

elif library_name == "transformers":
transformers_config = get_repo_config(model_name_or_path, "config.json", token=token, revision=revision)
inferred_model_type = transformers_config["model_type"]

return inferred_model_type

0 comments on commit f12f642

Please sign in to comment.