Skip to content

Commit

Permalink
Merge pull request #11 from zhudotexe/mistral-updates
Browse files Browse the repository at this point in the history
leaderboard: add mistral-small-2409 and mistral-large-2407
  • Loading branch information
zhudotexe authored Oct 16, 2024
2 parents ccf127b + cee3cb3 commit 0a74f05
Show file tree
Hide file tree
Showing 10 changed files with 4,553 additions and 0 deletions.

Large diffs are not rendered by default.

Large diffs are not rendered by default.

Large diffs are not rendered by default.

Large diffs are not rendered by default.

13 changes: 13 additions & 0 deletions leaderboard-submissions/metadata/mistral-large-2407.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
{
"name": "Mistral-Large-Instruct-2407 (123B)",
"authors": "Mistral AI",
"url": "https://huggingface.co/mistralai/Mistral-Large-Instruct-2407",
"citation": "Mistral AI, 2024",
"type": "FOUNDATION",
"context": 128000,
"is_trained_for_function_calling": true,
"details": "mistralai/Mistral-Large-Instruct-2407",
"closedbook_generations": "results-closedbook-mistral-large-2407.jsonl",
"openbook_generations": "results-openbook-mistral-large-2407.jsonl",
"evidenceprovided_generations": "results-wiki-provided-mistral-large-2407.jsonl"
}
13 changes: 13 additions & 0 deletions leaderboard-submissions/metadata/mistral-small-2409.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
{
"name": "Mistral-Small-Instruct-2409 (22B)",
"authors": "Mistral AI",
"url": "https://huggingface.co/mistralai/Mistral-Small-Instruct-2409",
"citation": "Mistral AI, 2024",
"type": "FOUNDATION",
"context": 32000,
"is_trained_for_function_calling": true,
"details": "mistralai/Mistral-Small-Instruct-2409",
"closedbook_generations": "results-closedbook-mistral-small-2409.jsonl",
"openbook_generations": "results-openbook-mistral-small-2409.jsonl",
"evidenceprovided_generations": "results-wiki-provided-mistral-small-2409.jsonl"
}

Large diffs are not rendered by default.

Large diffs are not rendered by default.

89 changes: 89 additions & 0 deletions leaderboard-submissions/results/mistral-large-2407.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,89 @@
{
"_submission_hash": "502e4d50e21a828650ee76a974b4504b13fbe3a2b826f885a8737810ba6cf7c4",
"_results_hash": "dd8cec8421b6afd58ac7e41fd46131fd902f3a0810e32c99cddaf7194f026c25",
"metadata": {
"name": "Mistral-Large-Instruct-2407 (123B)",
"authors": "Mistral AI",
"url": "https://huggingface.co/mistralai/Mistral-Large-Instruct-2407",
"citation": "Mistral AI, 2024",
"type": "FOUNDATION",
"context": 128000,
"is_trained_for_function_calling": true,
"details": "mistralai/Mistral-Large-Instruct-2407"
},
"closedbook": {
"acc": {
"loose": 0.47449874682486926,
"strict": 0.0856353591160221
},
"rouge": {
"rouge1": {
"precision": 0.4864243147808971,
"recall": 0.5366503254559003,
"fscore": 0.4722998986947336
},
"rouge2": {
"precision": 0.2636329382913684,
"recall": 0.3014700111537019,
"fscore": 0.2658354015590046
},
"rougeL": {
"precision": 0.41073744565002507,
"recall": 0.453494414004631,
"fscore": 0.39730944466764906
}
},
"bleurt": 0.4876885742710605,
"gpt": 0.20994475138121546
},
"openbook": {
"acc": {
"loose": 0.4044278886547418,
"strict": 0.07044198895027624
},
"rouge": {
"rouge1": {
"precision": 0.2557227715677404,
"recall": 0.46883375014495277,
"fscore": 0.27940024078902215
},
"rouge2": {
"precision": 0.12390861981624171,
"recall": 0.22520634201691972,
"fscore": 0.13809281378874866
},
"rougeL": {
"precision": 0.22428492555925336,
"recall": 0.4121212010870503,
"fscore": 0.24510882975840717
}
},
"bleurt": 0.4298844895124929,
"gpt": 0.18646408839779005
},
"evidenceprovided": {
"acc": {
"loose": 0.5162282511877617,
"strict": 0.08977900552486189
},
"rouge": {
"rouge1": {
"precision": 0.30008253611396407,
"recall": 0.584874437443051,
"fscore": 0.3591704026551226
},
"rouge2": {
"precision": 0.15353835199713906,
"recall": 0.2940244400309352,
"fscore": 0.1841714868990497
},
"rougeL": {
"precision": 0.24842762182621592,
"recall": 0.4850152873888482,
"fscore": 0.2966229987248017
}
},
"bleurt": 0.4837934568380289,
"gpt": 0.19751381215469613
}
}
89 changes: 89 additions & 0 deletions leaderboard-submissions/results/mistral-small-2409.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,89 @@
{
"_submission_hash": "b0c33b6049f25a156a7bb36445669697538a3542f7a68d33f2c19bb3c03ff28c",
"_results_hash": "f1b6be08b07d743b1160e478cdffcc0aa000d996777ccffe4149d39e4cd21f56",
"metadata": {
"name": "Mistral-Small-Instruct-2409 (22B)",
"authors": "Mistral AI",
"url": "https://huggingface.co/mistralai/Mistral-Small-Instruct-2409",
"citation": "Mistral AI, 2024",
"type": "FOUNDATION",
"context": 32000,
"is_trained_for_function_calling": true,
"details": "mistralai/Mistral-Small-Instruct-2409"
},
"closedbook": {
"acc": {
"loose": 0.45009737596509086,
"strict": 0.0718232044198895
},
"rouge": {
"rouge1": {
"precision": 0.40413593124020636,
"recall": 0.5203778032263678,
"fscore": 0.4289685522275595
},
"rouge2": {
"precision": 0.22677952683741726,
"recall": 0.28246582033023643,
"fscore": 0.23970226499726682
},
"rougeL": {
"precision": 0.33815109507904656,
"recall": 0.4376086364815907,
"fscore": 0.35935635298293217
}
},
"bleurt": 0.47831919019558156,
"gpt": 0.16988950276243095
},
"openbook": {
"acc": {
"loose": 0.1648043800345083,
"strict": 0.024861878453038673
},
"rouge": {
"rouge1": {
"precision": 0.08807170710615721,
"recall": 0.19938182128421078,
"fscore": 0.10339134253355729
},
"rouge2": {
"precision": 0.034980458138033084,
"recall": 0.08356692517473442,
"fscore": 0.04381925675910583
},
"rougeL": {
"precision": 0.07920926275369623,
"recall": 0.17572237714942518,
"fscore": 0.09205489092446673
}
},
"bleurt": 0.3076735539927832,
"gpt": 0.05939226519337017
},
"evidenceprovided": {
"acc": {
"loose": 0.5412839161496934,
"strict": 0.11602209944751381
},
"rouge": {
"rouge1": {
"precision": 0.5434171449488068,
"recall": 0.5991728084304127,
"fscore": 0.5257063117207197
},
"rouge2": {
"precision": 0.3034771259186124,
"recall": 0.33855921563246183,
"fscore": 0.3014339606975428
},
"rougeL": {
"precision": 0.4337592194646311,
"recall": 0.48099067004018287,
"fscore": 0.41983527281644906
}
},
"bleurt": 0.521161766897743,
"gpt": 0.2223756906077348
}
}

0 comments on commit 0a74f05

Please sign in to comment.