From 20b9435f41bcacaad72ad2399787f7d0e7464c43 Mon Sep 17 00:00:00 2001 From: Andrew Zhu Date: Wed, 24 Apr 2024 13:22:23 -0400 Subject: [PATCH] leaderboard: add info on function calling finetunes --- README.md | 2 ++ leaderboard-submissions/metadata/gemma-7b-it.json | 2 ++ leaderboard-submissions/metadata/llama-3.json | 4 +++- leaderboard-submissions/results/claude.json | 3 ++- leaderboard-submissions/results/gpt-3.5-turbo.json | 4 +++- leaderboard-submissions/results/gpt-4-turbo.json | 4 +++- leaderboard-submissions/results/gpt-4.json | 4 +++- leaderboard-submissions/results/llama-chat.json | 5 +++-- leaderboard-submissions/results/mistral-chat.json | 6 ++++-- leaderboard-submissions/results/mixtral.json | 6 ++++-- 10 files changed, 29 insertions(+), 11 deletions(-) diff --git a/README.md b/README.md index ce8ea29..491669b 100644 --- a/README.md +++ b/README.md @@ -193,6 +193,8 @@ You will also need to write a metadata file for your model. Your metadata file s "citation": "The list of authors and year, in citation format (e.g. `Zhu et al., 2024`)", "type": "FOUNDATION | FINETUNE | PROMPT | OTHER", "context": "The context length of the model your system uses (as an int)", + "is_trained_for_function_calling": "Whether your model was trained for function calling specifically (true/false)", + "details": "Additional model details (e.g. API model revision or Hugging Face model ID) - optional", "closedbook_generations": "YOUR-SYSTEM-NAME.jsonl", "openbook_generations": "YOUR-SYSTEM-NAME.jsonl", "evidenceprovided_generations": "YOUR-SYSTEM-NAME.jsonl" diff --git a/leaderboard-submissions/metadata/gemma-7b-it.json b/leaderboard-submissions/metadata/gemma-7b-it.json index d0eb34e..f183163 100644 --- a/leaderboard-submissions/metadata/gemma-7b-it.json +++ b/leaderboard-submissions/metadata/gemma-7b-it.json @@ -5,6 +5,8 @@ "citation": "Gemma Team, 2024", "type": "FOUNDATION", "context": 8192, + "is_trained_for_function_calling": false, + "details": "google/gemma-1.1-7b-it", "closedbook_generations": "results-closedbook-gemma.jsonl", "openbook_generations": "results-openbook-gemma.jsonl", "evidenceprovided_generations": "results-wiki-provided-gemma.jsonl" diff --git a/leaderboard-submissions/metadata/llama-3.json b/leaderboard-submissions/metadata/llama-3.json index e35cbf5..a31cdfd 100644 --- a/leaderboard-submissions/metadata/llama-3.json +++ b/leaderboard-submissions/metadata/llama-3.json @@ -1,10 +1,12 @@ { - "name": "Llama 3 70B-Instruct", + "name": "Llama 3 70B Instruct", "authors": "Meta", "url": "https://ai.meta.com/blog/meta-llama-3/", "citation": "Meta, 2024", "type": "FOUNDATION", "context": 8192, + "is_trained_for_function_calling": false, + "details": "meta-llama/Meta-Llama-3-70B-Instruct", "closedbook_generations": "results-closedbook-llama3.jsonl", "openbook_generations": "results-openbook-llama3.jsonl", "evidenceprovided_generations": "results-wiki-provided-llama3.jsonl" diff --git a/leaderboard-submissions/results/claude.json b/leaderboard-submissions/results/claude.json index 675be13..c69536a 100644 --- a/leaderboard-submissions/results/claude.json +++ b/leaderboard-submissions/results/claude.json @@ -7,7 +7,8 @@ "url": "https://www.anthropic.com/news/claude-2-1", "citation": "Anthropic, 2023", "type": "FOUNDATION", - "context": 200000 + "context": 200000, + "is_trained_for_function_calling": false }, "closedbook": { "acc": { diff --git a/leaderboard-submissions/results/gpt-3.5-turbo.json b/leaderboard-submissions/results/gpt-3.5-turbo.json index ea1e9dd..bbda945 100644 --- a/leaderboard-submissions/results/gpt-3.5-turbo.json +++ b/leaderboard-submissions/results/gpt-3.5-turbo.json @@ -7,7 +7,9 @@ "url": "https://platform.openai.com/docs/models/gpt-3-5-turbo", "citation": "OpenAI, 2023", "type": "FOUNDATION", - "context": 16384 + "context": 16384, + "is_trained_for_function_calling": true, + "details": "gpt-3.5-turbo-1106 (February 2024)" }, "closedbook": { "acc": { diff --git a/leaderboard-submissions/results/gpt-4-turbo.json b/leaderboard-submissions/results/gpt-4-turbo.json index 9e3297c..f8372ba 100644 --- a/leaderboard-submissions/results/gpt-4-turbo.json +++ b/leaderboard-submissions/results/gpt-4-turbo.json @@ -7,7 +7,9 @@ "url": "https://platform.openai.com/docs/models/gpt-4-and-gpt-4-turbo", "citation": "OpenAI, 2023", "type": "FOUNDATION", - "context": 128000 + "context": 128000, + "is_trained_for_function_calling": true, + "details": "gpt-4-0125-preview (February 2024)" }, "closedbook": { "acc": { diff --git a/leaderboard-submissions/results/gpt-4.json b/leaderboard-submissions/results/gpt-4.json index 305e62a..5134292 100644 --- a/leaderboard-submissions/results/gpt-4.json +++ b/leaderboard-submissions/results/gpt-4.json @@ -7,7 +7,9 @@ "url": "https://platform.openai.com/docs/models/gpt-4-and-gpt-4-turbo", "citation": "OpenAI, 2023", "type": "FOUNDATION", - "context": 8192 + "context": 8192, + "is_trained_for_function_calling": true, + "details": "gpt-4-0613 (February 2024)" }, "closedbook": { "acc": { diff --git a/leaderboard-submissions/results/llama-chat.json b/leaderboard-submissions/results/llama-chat.json index f5209b3..bfb54c1 100644 --- a/leaderboard-submissions/results/llama-chat.json +++ b/leaderboard-submissions/results/llama-chat.json @@ -2,12 +2,13 @@ "_submission_hash": "", "_results_hash": "", "metadata": { - "name": "LLaMA 2 70B", + "name": "LLaMA 2 Chat 70B", "authors": "Meta", "url": "https://ai.meta.com/research/publications/llama-2-open-foundation-and-fine-tuned-chat-models/", "citation": "Touvron et al., 2023", "type": "FOUNDATION", - "context": 4096 + "context": 4096, + "is_trained_for_function_calling": false }, "closedbook": { "acc": { diff --git a/leaderboard-submissions/results/mistral-chat.json b/leaderboard-submissions/results/mistral-chat.json index f2f0b1f..454850b 100644 --- a/leaderboard-submissions/results/mistral-chat.json +++ b/leaderboard-submissions/results/mistral-chat.json @@ -2,12 +2,14 @@ "_submission_hash": "", "_results_hash": "", "metadata": { - "name": "Mistral-7B", + "name": "Mistral-7B Instruct", "authors": "Mistral AI", "url": "https://mistral.ai/news/announcing-mistral-7b/", "citation": "Jiang et al., 2023", "type": "FOUNDATION", - "context": 32000 + "context": 32000, + "is_trained_for_function_calling": false, + "details": "mistralai/Mistral-7B-Instruct-v0.2" }, "closedbook": { "acc": { diff --git a/leaderboard-submissions/results/mixtral.json b/leaderboard-submissions/results/mixtral.json index c708cc5..7d5a9d9 100644 --- a/leaderboard-submissions/results/mixtral.json +++ b/leaderboard-submissions/results/mixtral.json @@ -2,12 +2,14 @@ "_submission_hash": "", "_results_hash": "", "metadata": { - "name": "Mixtral-8x7B", + "name": "Mixtral-8x7B Instruct", "authors": "Mistral AI", "url": "https://mistral.ai/news/mixtral-of-experts/", "citation": "Jiang et al., 2024", "type": "FOUNDATION", - "context": 32000 + "context": 32000, + "is_trained_for_function_calling": false, + "details": "mistralai/Mixtral-8x7B-Instruct-v0.1" }, "closedbook": { "acc": {