huggingface · IlyasMoutawwakil · Aug 19, 2024 · Jul 31, 2024 · Jul 31, 2024 · Jul 31, 2024
diff --git a/.github/workflows/update_llm_perf_cuda_pytorch.yaml b/.github/workflows/update_llm_perf_cuda_pytorch.yaml
@@ -1,9 +1,9 @@
 name: Update LLM Perf Benchmarks - CUDA PyTorch
 
-on:
-  workflow_dispatch:
-  schedule:
-    - cron: "0 0 * * *"
+on: 
+  workflow_dispatch:  # Manual trigger
+  release:            # Trigger on new release
+    types: [published]  
 
 concurrency:
   cancel-in-progress: true
@@ -18,7 +18,11 @@ jobs:
       fail-fast: false
       matrix:
         subset: [unquantized, bnb, awq, gptq]
-        machine: [{ name: 1xA10, runs-on: [single-gpu, nvidia-gpu, a10, ci] }]
+
+        machine: [
+          {name: 1xA10, runs-on: {group: 'aws-g5-4xlarge-plus'}}, 
+          {name: 1xT4, runs-on: {group: 'aws-g4dn-2xlarge'}}
+        ]
 
     runs-on: ${{ matrix.machine.runs-on }}
 

diff --git a/llm_perf/update_llm_perf_cuda_pytorch.py b/llm_perf/update_llm_perf_cuda_pytorch.py
@@ -134,7 +134,7 @@ def benchmark_cuda_pytorch(model, attn_implementation, weights_config):
         quantization_scheme=quant_scheme,
         quantization_config=quant_config,
         attn_implementation=attn_implementation,
-        hub_kwargs={"trust_remote_code": True},
+        model_kwargs={"trust_remote_code": True},
     )
 
     benchmark_config = BenchmarkConfig(

diff --git a/llm_perf/update_llm_perf_leaderboard.py b/llm_perf/update_llm_perf_leaderboard.py
@@ -32,7 +32,7 @@ def gather_benchmarks(subset: str, machine: str):
 
 def update_perf_dfs():
     for subset in ["unquantized", "bnb", "awq", "gptq"]:
-        for machine in ["1xA10", "1xA100"]:
+        for machine in ["1xA10", "1xA100", "1xT4"]:
             try:
                 gather_benchmarks(subset, machine)
             except Exception:

diff --git a/optimum_benchmark/backends/config.py b/optimum_benchmark/backends/config.py
@@ -73,6 +73,7 @@ def __post_init__(self):
                 self.library,
                 revision=self.model_kwargs.get("revision", None),
                 token=self.model_kwargs.get("token", None),
+                trust_remote_code=self.model_kwargs.get("trust_remote_code", False),
             )
 
         if self.device is None:

diff --git a/optimum_benchmark/task_utils.py b/optimum_benchmark/task_utils.py
@@ -190,6 +190,7 @@ def infer_model_type_from_model_name_or_path(
     library_name: Optional[str] = None,
     revision: Optional[str] = None,
     token: Optional[str] = None,
+    trust_remote_code: bool = False,
 ) -> str:
     if library_name is None:
         library_name = infer_library_from_model_name_or_path(model_name_or_path, revision=revision, token=token)
@@ -216,7 +217,9 @@ def infer_model_type_from_model_name_or_path(
                 break
 
     else:
-        transformers_config = get_transformers_pretrained_config(model_name_or_path, revision=revision, token=token)
+        transformers_config = get_transformers_pretrained_config(
+            model_name_or_path, revision=revision, token=token, trust_remote_code=trust_remote_code
+        )
         inferred_model_type = transformers_config.model_type
 
     if inferred_model_type is None: