Llm perf update (#206)

huggingface · May 16, 2024 · 1c6d941 · 1c6d941
1 parent b92e3e6
commit 1c6d941
Show file tree

Hide file tree

Showing 5 changed files with 50 additions and 88 deletions.
diff --git a/.github/workflows/update_llm_perf_leaderboard.yaml b/.github/workflows/update_llm_perf_leaderboard.yaml
@@ -27,7 +27,7 @@ jobs:
           pip install pandas huggingface_hub[hf_transfer]
           pip install .
 
-      - name: Update Open LLM Leaderboard
+      - name: Update Open LLM Data
         env:
           HF_TOKEN: ${{ secrets.HF_TOKEN }}
           HF_HUB_ENABLE_HF_TRANSFER: 1

diff --git a/.github/workflows/update_open_llm_leaderboard.yaml b/.github/workflows/update_open_llm_leaderboard.yaml
diff --git a/llm_perf/update_llm_perf_cuda_pytorch.py b/llm_perf/update_llm_perf_cuda_pytorch.py
@@ -165,7 +165,7 @@ def benchmark_cuda_pytorch(model, attn_implementation, weights_config):
 
 
 if __name__ == "__main__":
-    setup_logging(level="INFO", format_prefix="MAIN-PROCESS")
+    setup_logging(level="INFO", prefix="MAIN-PROCESS")
 
     models_attentions_weights = list(
         product(CANONICAL_PRETRAINED_OPEN_LLM_LIST, ATTENTION_COFIGS, WEIGHTS_CONFIGS.keys())

diff --git a/llm_perf/update_llm_perf_leaderboard.py b/llm_perf/update_llm_perf_leaderboard.py
@@ -1,37 +1,69 @@
+import subprocess
 from glob import glob
-from tempfile import TemporaryDirectory
 
 import pandas as pd
 from huggingface_hub import create_repo, snapshot_download, upload_file
 from tqdm import tqdm
 
 from optimum_benchmark import Benchmark
 
+REPO_TYPE = "dataset"
+REPO_ID = "optimum-benchmark/llm-perf-leaderboard"
+
+PERF_REPO_ID = "optimum-benchmark/llm-perf-pytorch-cuda-{subset}-{machine}"
+
+PERF_DF = "perf-df-{subset}-{machine}.csv"
+LLM_DF = "llm-df.csv"
+
 
 def gather_benchmarks(subset: str, machine: str):
-    pull_repo_id = f"optimum-benchmark/llm-perf-pytorch-cuda-{subset}-{machine}"
-    snapshot = snapshot_download(repo_type="dataset", repo_id=pull_repo_id, allow_patterns=["**/benchmark.json"])
+    perf_repo_id = PERF_REPO_ID.format(subset=subset, machine=machine)
+    snapshot = snapshot_download(repo_type=REPO_TYPE, repo_id=perf_repo_id, allow_patterns=["**/benchmark.json"])
 
     dfs = []
     for file in tqdm(glob(f"{snapshot}/**/benchmark.json", recursive=True)):
         dfs.append(Benchmark.from_json(file).to_dataframe())
     benchmarks = pd.concat(dfs, ignore_index=True)
 
-    tmp_dir = TemporaryDirectory()
-    push_repo_id = "optimum-benchmark/llm-perf-leaderboard"
-    file_name = f"llm-perf-leaderboard-{subset}-{machine}.csv"
-    benchmarks.to_csv(f"{tmp_dir.name}/{file_name}", index=False)
+    perf_df = PERF_DF.format(subset=subset, machine=machine)
+    benchmarks.to_csv(perf_df, index=False)
+    create_repo(repo_id=REPO_ID, repo_type=REPO_TYPE, private=False, exist_ok=True)
+    upload_file(
+        repo_id=REPO_ID,
+        repo_type=REPO_TYPE,
+        path_in_repo=perf_df,
+        path_or_fileobj=perf_df,
+    )
+
+
+def update_perf_dfs():
+    for subset in ["unquantized", "bnb", "awq", "gptq"]:
+        for machine in ["1xA10", "1xA100"]:
+            try:
+                gather_benchmarks(subset, machine)
+            except Exception:
+                print(f"Subset {subset} for machine {machine} not found")
+
+
+scrapping_script = """
+git clone https://github.com/Weyaxi/scrape-open-llm-leaderboard.git
+pip install -r scrape-open-llm-leaderboard/requirements.txt
+python scrape-open-llm-leaderboard/main.py
+rm -rf scrape-open-llm-leaderboard
+"""
+
 
-    create_repo(repo_id=push_repo_id, repo_type="dataset", private=False, exist_ok=True)
+def update_llm_df():
+    subprocess.run(scrapping_script, shell=True)
+    create_repo(repo_id=REPO_ID, repo_type=REPO_TYPE, exist_ok=True, private=False)
     upload_file(
-        path_or_fileobj=f"{tmp_dir.name}/{file_name}", path_in_repo=file_name, repo_id=push_repo_id, repo_type="dataset"
+        repo_id=REPO_ID,
+        repo_type=REPO_TYPE,
+        path_in_repo="llm-df.csv",
+        path_or_fileobj="llm-df.csv",
     )
-    tmp_dir.cleanup()
 
 
-for subset in ["unquantized", "bnb", "awq", "gptq"]:
-    for machine in ["1xA10", "1xA100"]:
-        try:
-            gather_benchmarks(subset, machine)
-        except Exception:
-            print(f"Subset {subset} for machine {machine} not found")
+if __name__ == "__main__":
+    update_llm_df()
+    update_perf_dfs()
diff --git a/llm_perf/update_open_llm_leaderboard.py b/llm_perf/update_open_llm_leaderboard.py