Skip to content

Commit

Permalink
Add evaluator weighted_alpaca_eval_gpt-4o-mini-2024-07-18 (#401)
Browse files Browse the repository at this point in the history
* Add evaluator weighted_alpaca_eval_gpt-4o-mini-2024-07-18

* Fix price for gpt-4o-mini-2024-07-18
  • Loading branch information
tongyx361 authored Aug 26, 2024
1 parent f1f6f9c commit 9136c7f
Show file tree
Hide file tree
Showing 3 changed files with 22 additions and 1 deletion.
6 changes: 5 additions & 1 deletion src/alpaca_eval/decoders/openai.py
Original file line number Diff line number Diff line change
Expand Up @@ -303,7 +303,11 @@ def _get_price_per_token(model, price_per_token=None):
"""Returns the price per token for a given model"""
if price_per_token is not None:
return float(price_per_token)
if "gpt-4-turbo" in model:
if "gpt-4o-mini-2024-07-18" in model:
return (
0.15 / 1_000_000
) # that's not completely true because decoding is 0.03 but close enough given that most is context
elif "gpt-4-turbo" in model:
return 0.01 / 1000
elif "gpt-4-1106" in model:
return (
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
weighted_alpaca_eval_gpt-4o-mini-2024-07-18:
prompt_template: "alpaca_eval_clf_gpt4_turbo/alpaca_eval_clf.txt"
fn_completions: "openai_completions"
completions_kwargs:
model_name: "gpt-4o-mini-2024-07-18"
max_tokens: 1
temperature: 1 # temperature should be applied for sampling, so that should make no effect.
logprobs: true
top_logprobs: 5
fn_completion_parser: "logprob_parser"
completion_parser_kwargs:
numerator_token: "m"
denominator_tokens: ["m", "M"]
is_binarize: false
completion_key: "completions_all"
batch_size: 1
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ gpt4_turbo_cot_logprob,67.86974910317902,5.397145061728395,1568.9484159171295,0.
gpt4_turbo_cot_clf,67.59689922480621,5.3972248062015495,1528.4046718706977,0.6666666666666667,0.6326057742256878,,,0.5936794582392777,0.5855855855855856,0.5255813953488373,645,verified
claude_ranking,67.5925925925926,4.954578395061729,218.4230414438272,0.9,0.90848221004591,,,0.7303370786516854,0.6576576576576577,0.4552469135802468,648,verified
alpaca_eval_llama3_70b_fn,67.53091913784353,0.41207197526091993,208.69685160402955,0.9,0.8577236113497642,32.25308641975309,8.204334365325078,0.7910112359550562,0.6576576576576577,0.47931967529957475,2587,minimal
weighted_alpaca_eval_gpt-4o-mini-2024-07-18,0.33674775667807266,12.90736111111111,93.54821923706267,0.9833333333333333,0.9389828560875118,32.24432530355238,14.380747136032564,0.7094594594594594,0.6306306306306306,0.5017959384038282,2592,minimal

This comment has been minimized.

Copy link
@kygguo

kygguo Dec 26, 2024

The human agreement value seems missing, could you please check this?

gpt4,66.93672839506173,12.452592592592593,1036.788589334915,0.8833333333333333,0.8668599990267735,31.481481481481488,14.621913580246911,0.647191011235955,0.6666666666666666,0.5397376543209877,2592,minimal
alpaca_farm_greedy_gpt4,66.43518518518519,15.28163425925926,877.6250469425926,0.8499999999999999,0.7481465609199582,30.246913580246915,19.290123456790123,0.597752808988764,0.6486486486486487,0.5362654320987654,2592,minimal
weighted_alpaca_eval_gpt4_turbo,65.73198824263118,4.323981481481481,227.7462866895061,0.7833333333333333,0.7688872243700914,33.89896126543981,23.652705035108028,0.6058558558558559,0.5727272727272728,0.5282783420419752,2592,minimal
Expand Down

0 comments on commit 9136c7f

Please sign in to comment.