Skip to content

Commit

Permalink
Fix price for gpt-4o-mini-2024-07-18
Browse files Browse the repository at this point in the history
  • Loading branch information
tongyx361 committed Aug 24, 2024
1 parent bb9d03a commit 3bd2098
Show file tree
Hide file tree
Showing 2 changed files with 76 additions and 23 deletions.
97 changes: 75 additions & 22 deletions src/alpaca_eval/decoders/openai.py
Original file line number Diff line number Diff line change
Expand Up @@ -87,7 +87,9 @@ def openai_completions(
logging.info("No samples to annotate.")
return []
else:
logging.info(f"Using `openai_completions` on {n_examples} prompts using {model_name}.")
logging.info(
f"Using `openai_completions` on {n_examples} prompts using {model_name}."
)

if tokens_to_avoid or tokens_to_favor:
tokenizer = tiktoken.encoding_for_model(model_name)
Expand All @@ -97,7 +99,9 @@ def openai_completions(
for t in tokens_to_avoid:
curr_tokens = tokenizer.encode(t)
if len(curr_tokens) != 1 and is_skip_multi_tokens_to_avoid:
logging.warning(f"'{t}' has more than one token, skipping because `is_skip_multi_tokens_to_avoid`.")
logging.warning(
f"'{t}' has more than one token, skipping because `is_skip_multi_tokens_to_avoid`."
)
continue
for tok_id in curr_tokens:
logit_bias[tok_id] = -100 # avoids certain tokens
Expand All @@ -113,15 +117,19 @@ def openai_completions(
if is_strip:
prompts = [p.strip() for p in prompts]

requires_chatml = decoding_kwargs.pop("requires_chatml", _requires_chatml(model_name))
requires_chatml = decoding_kwargs.pop(
"requires_chatml", _requires_chatml(model_name)
)
decoding_kwargs["is_chat"] = decoding_kwargs.get("is_chat", requires_chatml)
if requires_chatml:
prompts = [utils.prompt_to_chatml(prompt) for prompt in prompts]
num_procs = num_procs or 2
batch_size = batch_size or 1

if batch_size > 1:
logging.warning("batch_size > 1 is not supported yet for chat models. Setting to 1")
logging.warning(
"batch_size > 1 is not supported yet for chat models. Setting to 1"
)
batch_size = 1

else:
Expand All @@ -130,7 +138,10 @@ def openai_completions(

n_batches = int(math.ceil(n_examples / batch_size))

prompt_batches = [prompts[batch_id * batch_size : (batch_id + 1) * batch_size] for batch_id in range(n_batches)]
prompt_batches = [
prompts[batch_id * batch_size : (batch_id + 1) * batch_size]
for batch_id in range(n_batches)
]

if isinstance(max_tokens, int):
max_tokens = [max_tokens] * n_examples
Expand All @@ -149,7 +160,9 @@ def openai_completions(
]
else:
with ThreadPoolExecutor(max_workers=num_procs) as p:
partial_completion_helper = functools.partial(_openai_completion_helper, **kwargs)
partial_completion_helper = functools.partial(
_openai_completion_helper, **kwargs
)
completions = list(
tqdm.tqdm(
p.map(partial_completion_helper, inputs),
Expand All @@ -160,7 +173,11 @@ def openai_completions(
logging.info(f"Completed {n_examples} examples in {t}.")

# flatten the list and select only the text
completions_all = [completion for completion_batch in completions for completion in completion_batch]
completions_all = [
completion
for completion_batch in completions
for completion in completion_batch
]
completions_text = [completion["text"] for completion in completions_all]

price = [
Expand All @@ -186,9 +203,15 @@ def _openai_completion_helper(
temperature: Optional[float] = 0.7,
client_config_path: utils.AnyPath = constants.OPENAI_CLIENT_CONFIG_PATH, # see `client_configs/README.md`
# following is only for backward compatibility and should be avoided
openai_organization_ids: Optional[Sequence[str]] = constants.OPENAI_ORGANIZATION_IDS,
openai_organization_ids: Optional[
Sequence[str]
] = constants.OPENAI_ORGANIZATION_IDS,
openai_api_keys: Optional[Sequence[str]] = constants.OPENAI_API_KEYS,
openai_api_base: Optional[str] = os.getenv("OPENAI_API_BASE") if os.getenv("OPENAI_API_BASE") else openai.base_url,
openai_api_base: Optional[str] = (
os.getenv("OPENAI_API_BASE")
if os.getenv("OPENAI_API_BASE")
else openai.base_url
),
############################
client_kwargs: Optional[dict[str, Any]] = None,
n_retries: Optional[int] = 10,
Expand All @@ -212,7 +235,9 @@ def _openai_completion_helper(
# randomly select the client
client_idcs = range(len(all_clients))
curr_client_idx = random.choice(client_idcs)
logging.info(f"Using OAI client number {curr_client_idx+1} out of {len(client_idcs)}.")
logging.info(
f"Using OAI client number {curr_client_idx+1} out of {len(client_idcs)}."
)
client = all_clients[curr_client_idx]

# copy shared_kwargs to avoid modifying it
Expand All @@ -224,7 +249,9 @@ def _openai_completion_helper(
for _ in range(n_retries):
try:
if is_chat:
completion_batch = client.chat.completions.create(messages=prompt_batch[0], **curr_kwargs)
completion_batch = client.chat.completions.create(
messages=prompt_batch[0], **curr_kwargs
)

choices = completion_batch.choices
for i, choice in enumerate(choices):
Expand All @@ -244,24 +271,34 @@ def _openai_completion_helper(

if choice.message.tool_calls is not None:
# currently we only use function calls to get a JSON object => return raw text of json
choices[i]["text"] = choice.message.tool_calls[0].function.arguments
choices[i]["text"] = choice.message.tool_calls[
0
].function.arguments

else:
completion_batch = client.completions.create(prompt=prompt_batch, **curr_kwargs)
completion_batch = client.completions.create(
prompt=prompt_batch, **curr_kwargs
)
choices = completion_batch.choices
for i, choice in enumerate(choices):
choices[i] = choice.model_dump()

for choice in choices:
choice["total_tokens"] = completion_batch.usage.total_tokens / len(prompt_batch)
choice["total_tokens"] = completion_batch.usage.total_tokens / len(
prompt_batch
)
break
except openai.OpenAIError as e:
logging.warning(f"OpenAIError: {e}.")
if "Please reduce" in str(e):
kwargs["max_tokens"] = int(kwargs["max_tokens"] * 0.8)
logging.warning(f"Reducing target length to {kwargs['max_tokens']}, Retrying...")
logging.warning(
f"Reducing target length to {kwargs['max_tokens']}, Retrying..."
)
if kwargs["max_tokens"] == 0:
logging.exception("Prompt is already longer than max context length. Error:")
logging.exception(
"Prompt is already longer than max context length. Error:"
)
raise e
elif "Please try again with a different prompt." in str(e):
logging.warning(
Expand All @@ -276,12 +313,20 @@ def _openai_completion_helper(
if "rate limit" in str(e).lower():
logging.warning(f"Hit request rate limit; retrying...")
else:
logging.warning(f"Unknown error. \n It's likely a rate limit so we are retrying...")
logging.warning(
f"Unknown error. \n It's likely a rate limit so we are retrying..."
)
if len(all_clients) > 1:
curr_client_idx = random.choice([idx for idx in client_idcs if idx != curr_client_idx])
curr_client_idx = random.choice(
[idx for idx in client_idcs if idx != curr_client_idx]
)
client = all_clients[curr_client_idx]
logging.info(f"Switching OAI client to client number {curr_client_idx}.")
logging.info(f"Sleeping {sleep_time} before retrying to call openai API...")
logging.info(
f"Switching OAI client to client number {curr_client_idx}."
)
logging.info(
f"Sleeping {sleep_time} before retrying to call openai API..."
)
time.sleep(sleep_time) # Annoying rate limit on requests.

if choices is None:
Expand All @@ -295,15 +340,23 @@ def _openai_completion_helper(
def _requires_chatml(model: str) -> bool:
"""Whether a model requires the ChatML format."""
# TODO: this should ideally be an OpenAI function... Maybe it already exists?
not_chatml = ("instruct" in model) or ("gpt-3" in model and "turbo" not in model) or (model.startswith("text-"))
not_chatml = (
("instruct" in model)
or ("gpt-3" in model and "turbo" not in model)
or (model.startswith("text-"))
)
return not not_chatml


def _get_price_per_token(model, price_per_token=None):
"""Returns the price per token for a given model"""
if price_per_token is not None:
return float(price_per_token)
if "gpt-4-turbo" in model:
if "gpt-4o-mini-2024-07-18" in model:
return (
0.15 / 1_000_000
) # that's not completely true because decoding is 0.03 but close enough given that most is context
elif "gpt-4-turbo" in model:
return 0.01 / 1000
elif "gpt-4-1106" in model:
return (
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ gpt4_turbo_cot_logprob,67.86974910317902,5.397145061728395,1568.9484159171295,0.
gpt4_turbo_cot_clf,67.59689922480621,5.3972248062015495,1528.4046718706977,0.6666666666666667,0.6326057742256878,,,0.5936794582392777,0.5855855855855856,0.5255813953488373,645,verified
claude_ranking,67.5925925925926,4.954578395061729,218.4230414438272,0.9,0.90848221004591,,,0.7303370786516854,0.6576576576576577,0.4552469135802468,648,verified
alpaca_eval_llama3_70b_fn,67.53091913784353,0.41207197526091993,208.69685160402955,0.9,0.8577236113497642,32.25308641975309,8.204334365325078,0.7910112359550562,0.6576576576576577,0.47931967529957475,2587,minimal
weighted_alpaca_eval_gpt-4o-mini-2024-07-18,67.34955133561454,12.90736111111111,93.54821923706267,0.9833333333333333,0.9389828560875118,32.24432530355238,14.380747136032564,0.7094594594594594,0.6306306306306306,0.5017959384038282,2592,minimal
weighted_alpaca_eval_gpt-4o-mini-2024-07-18,0.33674775667807266,12.90736111111111,93.54821923706267,0.9833333333333333,0.9389828560875118,32.24432530355238,14.380747136032564,0.7094594594594594,0.6306306306306306,0.5017959384038282,2592,minimal
gpt4,66.93672839506173,12.452592592592593,1036.788589334915,0.8833333333333333,0.8668599990267735,31.481481481481488,14.621913580246911,0.647191011235955,0.6666666666666666,0.5397376543209877,2592,minimal
alpaca_farm_greedy_gpt4,66.43518518518519,15.28163425925926,877.6250469425926,0.8499999999999999,0.7481465609199582,30.246913580246915,19.290123456790123,0.597752808988764,0.6486486486486487,0.5362654320987654,2592,minimal
weighted_alpaca_eval_gpt4_turbo,65.73198824263118,4.323981481481481,227.7462866895061,0.7833333333333333,0.7688872243700914,33.89896126543981,23.652705035108028,0.6058558558558559,0.5727272727272728,0.5282783420419752,2592,minimal
Expand Down

0 comments on commit 3bd2098

Please sign in to comment.