Skip to content

Commit

Permalink
leaderboard: add info on function calling finetunes
Browse files Browse the repository at this point in the history
  • Loading branch information
zhudotexe committed Apr 24, 2024
1 parent 1341142 commit 20b9435
Show file tree
Hide file tree
Showing 10 changed files with 29 additions and 11 deletions.
2 changes: 2 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -193,6 +193,8 @@ You will also need to write a metadata file for your model. Your metadata file s
"citation": "The list of authors and year, in citation format (e.g. `Zhu et al., 2024`)",
"type": "FOUNDATION | FINETUNE | PROMPT | OTHER",
"context": "The context length of the model your system uses (as an int)",
"is_trained_for_function_calling": "Whether your model was trained for function calling specifically (true/false)",
"details": "Additional model details (e.g. API model revision or Hugging Face model ID) - optional",
"closedbook_generations": "YOUR-SYSTEM-NAME.jsonl",
"openbook_generations": "YOUR-SYSTEM-NAME.jsonl",
"evidenceprovided_generations": "YOUR-SYSTEM-NAME.jsonl"
Expand Down
2 changes: 2 additions & 0 deletions leaderboard-submissions/metadata/gemma-7b-it.json
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@
"citation": "Gemma Team, 2024",
"type": "FOUNDATION",
"context": 8192,
"is_trained_for_function_calling": false,
"details": "google/gemma-1.1-7b-it",
"closedbook_generations": "results-closedbook-gemma.jsonl",
"openbook_generations": "results-openbook-gemma.jsonl",
"evidenceprovided_generations": "results-wiki-provided-gemma.jsonl"
Expand Down
4 changes: 3 additions & 1 deletion leaderboard-submissions/metadata/llama-3.json
Original file line number Diff line number Diff line change
@@ -1,10 +1,12 @@
{

Check failure on line 1 in leaderboard-submissions/metadata/llama-3.json

View workflow job for this annotation

GitHub Actions / evaluate

Invalid metadata file

SubmissionMetadata.__init__() got an unexpected keyword argument 'is_trained_for_function_calling'
"name": "Llama 3 70B-Instruct",
"name": "Llama 3 70B Instruct",
"authors": "Meta",
"url": "https://ai.meta.com/blog/meta-llama-3/",
"citation": "Meta, 2024",
"type": "FOUNDATION",
"context": 8192,
"is_trained_for_function_calling": false,
"details": "meta-llama/Meta-Llama-3-70B-Instruct",
"closedbook_generations": "results-closedbook-llama3.jsonl",
"openbook_generations": "results-openbook-llama3.jsonl",
"evidenceprovided_generations": "results-wiki-provided-llama3.jsonl"
Expand Down
3 changes: 2 additions & 1 deletion leaderboard-submissions/results/claude.json
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,8 @@
"url": "https://www.anthropic.com/news/claude-2-1",
"citation": "Anthropic, 2023",
"type": "FOUNDATION",
"context": 200000
"context": 200000,
"is_trained_for_function_calling": false
},
"closedbook": {
"acc": {
Expand Down
4 changes: 3 additions & 1 deletion leaderboard-submissions/results/gpt-3.5-turbo.json
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,9 @@
"url": "https://platform.openai.com/docs/models/gpt-3-5-turbo",
"citation": "OpenAI, 2023",
"type": "FOUNDATION",
"context": 16384
"context": 16384,
"is_trained_for_function_calling": true,
"details": "gpt-3.5-turbo-1106 (February 2024)"
},
"closedbook": {
"acc": {
Expand Down
4 changes: 3 additions & 1 deletion leaderboard-submissions/results/gpt-4-turbo.json
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,9 @@
"url": "https://platform.openai.com/docs/models/gpt-4-and-gpt-4-turbo",
"citation": "OpenAI, 2023",
"type": "FOUNDATION",
"context": 128000
"context": 128000,
"is_trained_for_function_calling": true,
"details": "gpt-4-0125-preview (February 2024)"
},
"closedbook": {
"acc": {
Expand Down
4 changes: 3 additions & 1 deletion leaderboard-submissions/results/gpt-4.json
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,9 @@
"url": "https://platform.openai.com/docs/models/gpt-4-and-gpt-4-turbo",
"citation": "OpenAI, 2023",
"type": "FOUNDATION",
"context": 8192
"context": 8192,
"is_trained_for_function_calling": true,
"details": "gpt-4-0613 (February 2024)"
},
"closedbook": {
"acc": {
Expand Down
5 changes: 3 additions & 2 deletions leaderboard-submissions/results/llama-chat.json
Original file line number Diff line number Diff line change
Expand Up @@ -2,12 +2,13 @@
"_submission_hash": "",
"_results_hash": "",
"metadata": {
"name": "LLaMA 2 70B",
"name": "LLaMA 2 Chat 70B",
"authors": "Meta",
"url": "https://ai.meta.com/research/publications/llama-2-open-foundation-and-fine-tuned-chat-models/",
"citation": "Touvron et al., 2023",
"type": "FOUNDATION",
"context": 4096
"context": 4096,
"is_trained_for_function_calling": false
},
"closedbook": {
"acc": {
Expand Down
6 changes: 4 additions & 2 deletions leaderboard-submissions/results/mistral-chat.json
Original file line number Diff line number Diff line change
Expand Up @@ -2,12 +2,14 @@
"_submission_hash": "",
"_results_hash": "",
"metadata": {
"name": "Mistral-7B",
"name": "Mistral-7B Instruct",
"authors": "Mistral AI",
"url": "https://mistral.ai/news/announcing-mistral-7b/",
"citation": "Jiang et al., 2023",
"type": "FOUNDATION",
"context": 32000
"context": 32000,
"is_trained_for_function_calling": false,
"details": "mistralai/Mistral-7B-Instruct-v0.2"
},
"closedbook": {
"acc": {
Expand Down
6 changes: 4 additions & 2 deletions leaderboard-submissions/results/mixtral.json
Original file line number Diff line number Diff line change
Expand Up @@ -2,12 +2,14 @@
"_submission_hash": "",
"_results_hash": "",
"metadata": {
"name": "Mixtral-8x7B",
"name": "Mixtral-8x7B Instruct",
"authors": "Mistral AI",
"url": "https://mistral.ai/news/mixtral-of-experts/",
"citation": "Jiang et al., 2024",
"type": "FOUNDATION",
"context": 32000
"context": 32000,
"is_trained_for_function_calling": false,
"details": "mistralai/Mixtral-8x7B-Instruct-v0.1"
},
"closedbook": {
"acc": {
Expand Down

0 comments on commit 20b9435

Please sign in to comment.