Skip to content

Commit

Permalink
Llm perf update (#206)
Browse files Browse the repository at this point in the history
  • Loading branch information
IlyasMoutawwakil authored May 16, 2024
1 parent b92e3e6 commit 1c6d941
Show file tree
Hide file tree
Showing 5 changed files with 50 additions and 88 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/update_llm_perf_leaderboard.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ jobs:
pip install pandas huggingface_hub[hf_transfer]
pip install .
- name: Update Open LLM Leaderboard
- name: Update Open LLM Data
env:
HF_TOKEN: ${{ secrets.HF_TOKEN }}
HF_HUB_ENABLE_HF_TRANSFER: 1
Expand Down
33 changes: 0 additions & 33 deletions .github/workflows/update_open_llm_leaderboard.yaml

This file was deleted.

2 changes: 1 addition & 1 deletion llm_perf/update_llm_perf_cuda_pytorch.py
Original file line number Diff line number Diff line change
Expand Up @@ -165,7 +165,7 @@ def benchmark_cuda_pytorch(model, attn_implementation, weights_config):


if __name__ == "__main__":
setup_logging(level="INFO", format_prefix="MAIN-PROCESS")
setup_logging(level="INFO", prefix="MAIN-PROCESS")

models_attentions_weights = list(
product(CANONICAL_PRETRAINED_OPEN_LLM_LIST, ATTENTION_COFIGS, WEIGHTS_CONFIGS.keys())
Expand Down
64 changes: 48 additions & 16 deletions llm_perf/update_llm_perf_leaderboard.py
Original file line number Diff line number Diff line change
@@ -1,37 +1,69 @@
import subprocess
from glob import glob
from tempfile import TemporaryDirectory

import pandas as pd
from huggingface_hub import create_repo, snapshot_download, upload_file
from tqdm import tqdm

from optimum_benchmark import Benchmark

REPO_TYPE = "dataset"
REPO_ID = "optimum-benchmark/llm-perf-leaderboard"

PERF_REPO_ID = "optimum-benchmark/llm-perf-pytorch-cuda-{subset}-{machine}"

PERF_DF = "perf-df-{subset}-{machine}.csv"
LLM_DF = "llm-df.csv"


def gather_benchmarks(subset: str, machine: str):
pull_repo_id = f"optimum-benchmark/llm-perf-pytorch-cuda-{subset}-{machine}"
snapshot = snapshot_download(repo_type="dataset", repo_id=pull_repo_id, allow_patterns=["**/benchmark.json"])
perf_repo_id = PERF_REPO_ID.format(subset=subset, machine=machine)
snapshot = snapshot_download(repo_type=REPO_TYPE, repo_id=perf_repo_id, allow_patterns=["**/benchmark.json"])

dfs = []
for file in tqdm(glob(f"{snapshot}/**/benchmark.json", recursive=True)):
dfs.append(Benchmark.from_json(file).to_dataframe())
benchmarks = pd.concat(dfs, ignore_index=True)

tmp_dir = TemporaryDirectory()
push_repo_id = "optimum-benchmark/llm-perf-leaderboard"
file_name = f"llm-perf-leaderboard-{subset}-{machine}.csv"
benchmarks.to_csv(f"{tmp_dir.name}/{file_name}", index=False)
perf_df = PERF_DF.format(subset=subset, machine=machine)
benchmarks.to_csv(perf_df, index=False)
create_repo(repo_id=REPO_ID, repo_type=REPO_TYPE, private=False, exist_ok=True)
upload_file(
repo_id=REPO_ID,
repo_type=REPO_TYPE,
path_in_repo=perf_df,
path_or_fileobj=perf_df,
)


def update_perf_dfs():
for subset in ["unquantized", "bnb", "awq", "gptq"]:
for machine in ["1xA10", "1xA100"]:
try:
gather_benchmarks(subset, machine)
except Exception:
print(f"Subset {subset} for machine {machine} not found")


scrapping_script = """
git clone https://github.com/Weyaxi/scrape-open-llm-leaderboard.git
pip install -r scrape-open-llm-leaderboard/requirements.txt
python scrape-open-llm-leaderboard/main.py
rm -rf scrape-open-llm-leaderboard
"""


create_repo(repo_id=push_repo_id, repo_type="dataset", private=False, exist_ok=True)
def update_llm_df():
subprocess.run(scrapping_script, shell=True)
create_repo(repo_id=REPO_ID, repo_type=REPO_TYPE, exist_ok=True, private=False)
upload_file(
path_or_fileobj=f"{tmp_dir.name}/{file_name}", path_in_repo=file_name, repo_id=push_repo_id, repo_type="dataset"
repo_id=REPO_ID,
repo_type=REPO_TYPE,
path_in_repo="llm-df.csv",
path_or_fileobj="llm-df.csv",
)
tmp_dir.cleanup()


for subset in ["unquantized", "bnb", "awq", "gptq"]:
for machine in ["1xA10", "1xA100"]:
try:
gather_benchmarks(subset, machine)
except Exception:
print(f"Subset {subset} for machine {machine} not found")
if __name__ == "__main__":
update_llm_df()
update_perf_dfs()
37 changes: 0 additions & 37 deletions llm_perf/update_open_llm_leaderboard.py

This file was deleted.

0 comments on commit 1c6d941

Please sign in to comment.