Skip to content

Commit

Permalink
Automatically download dataset if not present locally
Browse files Browse the repository at this point in the history
  • Loading branch information
fdalvi committed Oct 25, 2023
1 parent ce68bcb commit 98e974c
Showing 1 changed file with 8 additions and 0 deletions.
8 changes: 8 additions & 0 deletions llmebench/benchmark.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@ def __init__(
dataset_args = config.get("dataset_args", {})
if "data_dir" not in dataset_args:
dataset_args["data_dir"] = data_dir
self.data_dir = dataset_args["data_dir"]
self.dataset = config["dataset"](**dataset_args)

task_args = config.get("task_args", {})
Expand All @@ -55,6 +56,7 @@ def __init__(

# Data parameters
self.data_paths = utils.get_data_paths(config, "test")
self.should_download = "custom_test_split" not in config

self.zeroshot = True
if utils.is_fewshot_asset(config, prompt_fn):
Expand Down Expand Up @@ -143,6 +145,12 @@ def run_benchmark(self, dry_run=False):
base_name = self.name
base_cache_dir = self.cache_dir

# Download dataset if not already present on disk and custom splits are not specified
if self.should_download:
self.dataset.download_dataset(
self.data_dir, default_url="https://llmebench.qcri.org/data/"
)

# Create sub-directory for few shot experiments
if not self.is_zeroshot():
base_name = f"{self.name}/{self.n_shots}_shot"
Expand Down

0 comments on commit 98e974c

Please sign in to comment.