diff --git a/scripts/tool_benchmarks.py b/scripts/tool_benchmarks.py index 0f97e2b..918d88f 100644 --- a/scripts/tool_benchmarks.py +++ b/scripts/tool_benchmarks.py @@ -161,13 +161,13 @@ for model_name, model in tests[:-1]: rate_limiter = RateLimiter(requests_per_second=1) - print(f"Benchmarking {task.name} with model: {model_name}") - eval_config = task.get_eval_config() + print(f"Benchmarking {task.name} with model: {model_name}") + eval_config = task.get_eval_config() - for prompt, prompt_name in prompts: - agent_factory = StandardAgentFactory( - task, llm, prompt, rate_limiter=rate_limiter - ) + for prompt, prompt_name in prompts: + agent_factory = StandardAgentFactory( + task, model, prompt, rate_limiter=rate_limiter + ) client.run_on_dataset( dataset_name=dataset_name,