Skip to content

Commit

Permalink
Merge pull request #171 from ag2ai/openaitestmodel
Browse files Browse the repository at this point in the history
Update of OpenAI (and Azure) models and versions
  • Loading branch information
qingyun-wu authored Dec 15, 2024
2 parents 5e7758b + 07759f3 commit 1c6445d
Show file tree
Hide file tree
Showing 28 changed files with 114 additions and 134 deletions.
5 changes: 2 additions & 3 deletions test/agentchat/contrib/agent_eval/test_agent_eval.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,13 +38,12 @@ def remove_ground_truth(test_case: str):
filter_dict={
"api_type": ["openai"],
"model": [
"gpt-4o-mini",
"gpt-4o",
"gpt-4-turbo",
"gpt-4-turbo-preview",
"gpt-4-0125-preview",
"gpt-4-1106-preview",
"gpt-3.5-turbo",
"gpt-3.5-turbo-0125",
"gpt-3.5-turbo-1106",
],
},
)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,12 +17,8 @@
from test_assistant_agent import KEY_LOC, OAI_CONFIG_LIST # noqa: E402

# Specify the model to use. GPT-3.5 is less reliable than GPT-4 at learning from user input.
filter_dict = {"model": ["gpt-4-0125-preview"]}
# filter_dict = {"model": ["gpt-3.5-turbo-1106"]}
# filter_dict = {"model": ["gpt-4-0613"]}
# filter_dict = {"model": ["gpt-3.5-turbo"]}
# filter_dict = {"model": ["gpt-4"]}
# filter_dict = {"model": ["gpt-35-turbo-16k", "gpt-3.5-turbo-16k"]}
filter_dict = {"model": ["gpt-4o-mini"]}
# filter_dict = {"model": ["gpt-4-0125-preview"]}


def create_teachable_agent(reset_db=False):
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@
sys.path.append(os.path.join(os.path.dirname(__file__), "../.."))
from conftest import MOCK_OPEN_AI_API_KEY, skip_openai # noqa: E402

filter_dict = {"model": ["gpt-35-turbo-16k", "gpt-3.5-turbo-16k"]}
filter_dict = {"model": ["gpt-4o-mini"]}

RESOLUTIONS = ["256x256", "512x512", "1024x1024"]
QUALITIES = ["standard", "hd"]
Expand Down Expand Up @@ -67,21 +67,21 @@ def api_key():

@pytest.fixture
def dalle_config() -> Dict[str, Any]:
config_list = openai_utils.config_list_from_models(model_list=["dall-e-2"], exclude="aoai")
config_list = openai_utils.config_list_from_models(model_list=["dall-e-3"], exclude="aoai")
if not config_list:
config_list = [{"model": "dall-e-2", "api_key": api_key()}]
config_list = [{"model": "dall-e-3", "api_key": api_key()}]
return {"config_list": config_list, "timeout": 120, "cache_seed": None}


@pytest.fixture
def gpt3_config() -> Dict[str, Any]:
def gpt4_config() -> Dict[str, Any]:
config_list = [
{
"model": "gpt-35-turbo-16k",
"model": "gpt-4o-mini",
"api_key": api_key(),
},
{
"model": "gpt-3.5-turbo-16k",
"model": "gpt-4o",
"api_key": api_key(),
},
]
Expand Down
4 changes: 1 addition & 3 deletions test/agentchat/contrib/capabilities/test_teachable_agent.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,10 +31,8 @@
# Specify the model to use by uncommenting one of the following lines.
# filter_dict={"model": ["gpt-4-1106-preview"]}
# filter_dict={"model": ["gpt-4-0613"]}
# filter_dict={"model": ["gpt-3.5-turbo-1106"]}
# filter_dict={"model": ["gpt-3.5-turbo-0613"]}
# filter_dict={"model": ["gpt-4"]}
filter_dict = {"tags": ["gpt-35-turbo-16k", "gpt-3.5-turbo-16k"]}
filter_dict = {"tags": ["gpt-4o-mini"]}


def create_teachable_agent(reset_db=False, verbosity=0):
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ def test_transform_messages_capability():
OAI_CONFIG_LIST,
KEY_LOC,
filter_dict={
"model": "gpt-3.5-turbo",
"model": "gpt-4o-mini",
},
)

Expand Down
20 changes: 10 additions & 10 deletions test/agentchat/contrib/test_agent_builder.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,8 +51,8 @@ def test_build():
builder = AgentBuilder(
config_file_or_env=OAI_CONFIG_LIST,
config_file_location=KEY_LOC,
builder_model_tags=["gpt-4"],
agent_model_tags=["gpt-4"],
builder_model_tags=["gpt-4o"],
agent_model_tags=["gpt-4o"],
)
building_task = (
"Find a paper on arxiv by programming, and analyze its application in some domain. "
Expand Down Expand Up @@ -83,8 +83,8 @@ def test_build_from_library():
builder = AgentBuilder(
config_file_or_env=OAI_CONFIG_LIST,
config_file_location=KEY_LOC,
builder_model_tags=["gpt-4"],
agent_model_tags=["gpt-4"],
builder_model_tags=["gpt-4o"],
agent_model_tags=["gpt-4o"],
)
building_task = (
"Find a paper on arxiv by programming, and analyze its application in some domain. "
Expand Down Expand Up @@ -136,8 +136,8 @@ def test_save():
builder = AgentBuilder(
config_file_or_env=OAI_CONFIG_LIST,
config_file_location=KEY_LOC,
builder_model_tags=["gpt-4"],
agent_model_tags=["gpt-4"],
builder_model_tags=["gpt-4o"],
agent_model_tags=["gpt-4o"],
)
building_task = (
"Find a paper on arxiv by programming, and analyze its application in some domain. "
Expand Down Expand Up @@ -175,8 +175,8 @@ def test_load():
config_file_location=KEY_LOC,
# builder_model=["gpt-4", "gpt-4-1106-preview"],
# agent_model=["gpt-4", "gpt-4-1106-preview"],
builder_model_tags=["gpt-4"],
agent_model_tags=["gpt-4"],
builder_model_tags=["gpt-4o"],
agent_model_tags=["gpt-4o"],
)

config_save_path = f"{here}/example_test_agent_builder_config.json"
Expand Down Expand Up @@ -204,8 +204,8 @@ def test_clear_agent():
builder = AgentBuilder(
config_file_or_env=OAI_CONFIG_LIST,
config_file_location=KEY_LOC,
builder_model_tags=["gpt-4"],
agent_model_tags=["gpt-4"],
builder_model_tags=["gpt-4o"],
agent_model_tags=["gpt-4o"],
)

config_save_path = f"{here}/example_test_agent_builder_config.json"
Expand Down
3 changes: 0 additions & 3 deletions test/agentchat/contrib/test_gpt_assistant.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,9 +40,6 @@
"gpt-4-turbo-preview",
"gpt-4-0125-preview",
"gpt-4-1106-preview",
"gpt-3.5-turbo",
"gpt-3.5-turbo-0125",
"gpt-3.5-turbo-1106",
],
},
)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ def test_group_chat_with_llama_index_conversable_agent(chat_mock: MagicMock) ->
Each agent is set to describe an image in a unique style, but the chat should not exceed the specified max_rounds.
"""
llm = OpenAI(
model="gpt-4",
model="gpt-4o",
temperature=0.0,
api_key=openaiKey,
)
Expand Down
7 changes: 5 additions & 2 deletions test/agentchat/contrib/test_reasoning_agent.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ def think_node():
@pytest.fixture
def reasoning_agent():
"""Create a ReasoningAgent instance for testing"""
config_list = [{"model": "gpt-4", "api_key": "fake_key"}]
config_list = [{"model": "gpt-4o", "api_key": "fake_key"}]
llm_config = {"config_list": config_list, "temperature": 0}
return ReasoningAgent("reasoning_agent", llm_config=llm_config)

Expand Down Expand Up @@ -164,7 +164,10 @@ def test_reasoning_agent_answer():

def helper_test_reasoning_agent_answer(max_depth, beam_size, answer_approach):
"""Test that ReasoningAgent properly terminates when TERMINATE is received"""
mock_config = {"config_list": [{"model": "gpt-4", "api_key": "fake", "base_url": "0.0.0.0:8000"}], "temperature": 0}
mock_config = {
"config_list": [{"model": "gpt-4o", "api_key": "fake", "base_url": "0.0.0.0:8000"}],
"temperature": 0,
}
with patch("autogen.agentchat.conversable_agent.ConversableAgent.generate_oai_reply") as mock_oai_reply:
agent = ReasoningAgent(
"test_agent",
Expand Down
2 changes: 1 addition & 1 deletion test/agentchat/test_agent_logging.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@
config_list = autogen.config_list_from_json(
OAI_CONFIG_LIST,
filter_dict={
"tags": ["gpt-3.5-turbo"],
"tags": ["gpt-4o-mini"],
},
file_location=KEY_LOC,
)
Expand Down
18 changes: 9 additions & 9 deletions test/agentchat/test_agent_usage.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,44 +32,44 @@ def test_gathering():
system_message="You are a helpful assistant.",
llm_config={
"config_list": config_list,
"model": "gpt-3.5-turbo-0613",
"model": "gpt-4o-mini",
},
)
assistant2 = AssistantAgent(
"assistant",
system_message="You are a helpful assistant.",
llm_config={
"config_list": config_list,
"model": "gpt-3.5-turbo-0613",
"model": "gpt-4o-mini",
},
)
assistant3 = AssistantAgent(
"assistant",
system_message="You are a helpful assistant.",
llm_config={
"config_list": config_list,
"model": "gpt-3.5-turbo-0613",
"model": "gpt-4o",
},
)

assistant1.client.total_usage_summary = {
"total_cost": 0.1,
"gpt-35-turbo": {"cost": 0.1, "prompt_tokens": 100, "completion_tokens": 200, "total_tokens": 300},
"gpt-4o-mini": {"cost": 0.1, "prompt_tokens": 100, "completion_tokens": 200, "total_tokens": 300},
}
assistant2.client.total_usage_summary = {
"total_cost": 0.2,
"gpt-35-turbo": {"cost": 0.2, "prompt_tokens": 100, "completion_tokens": 200, "total_tokens": 300},
"gpt-4o-mini": {"cost": 0.2, "prompt_tokens": 100, "completion_tokens": 200, "total_tokens": 300},
}
assistant3.client.total_usage_summary = {
"total_cost": 0.3,
"gpt-4": {"cost": 0.3, "prompt_tokens": 100, "completion_tokens": 200, "total_tokens": 300},
"gpt-4o": {"cost": 0.3, "prompt_tokens": 100, "completion_tokens": 200, "total_tokens": 300},
}

total_usage = gather_usage_summary([assistant1, assistant2, assistant3])

assert round(total_usage["usage_including_cached_inference"]["total_cost"], 8) == 0.6
assert round(total_usage["usage_including_cached_inference"]["gpt-35-turbo"]["cost"], 8) == 0.3
assert round(total_usage["usage_including_cached_inference"]["gpt-4"]["cost"], 8) == 0.3
assert round(total_usage["usage_including_cached_inference"]["gpt-4o-mini"]["cost"], 8) == 0.3
assert round(total_usage["usage_including_cached_inference"]["gpt-4o"]["cost"], 8) == 0.3

# test when agent doesn't have client
user_proxy = UserProxyAgent(
Expand All @@ -91,7 +91,7 @@ def test_agent_usage():
config_list = autogen.config_list_from_json(
OAI_CONFIG_LIST,
file_location=KEY_LOC,
filter_dict={"tags": ["gpt-3.5-turbo"]},
filter_dict={"tags": ["gpt-4o-mini"]},
)
assistant = AssistantAgent(
"assistant",
Expand Down
8 changes: 4 additions & 4 deletions test/agentchat/test_assistant_agent.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ def test_ai_user_proxy_agent():
config_list = autogen.config_list_from_json(
OAI_CONFIG_LIST,
file_location=KEY_LOC,
filter_dict={"tags": ["gpt-3.5-turbo"]},
filter_dict={"tags": ["gpt-4o-mini"]},
)
assistant = AssistantAgent(
"assistant",
Expand Down Expand Up @@ -72,7 +72,7 @@ def test_gpt35(human_input_mode="NEVER", max_consecutive_auto_reply=5):
config_list = autogen.config_list_from_json(
OAI_CONFIG_LIST,
file_location=KEY_LOC,
filter_dict={"tags": ["gpt-3.5-turbo", "gpt-3.5-turbo-16k"]},
filter_dict={"tags": ["gpt-4o-mini"]},
)
llm_config = {
"cache_seed": 42,
Expand Down Expand Up @@ -116,7 +116,7 @@ def test_create_execute_script(human_input_mode="NEVER", max_consecutive_auto_re
config_list = autogen.config_list_from_json(
OAI_CONFIG_LIST,
file_location=KEY_LOC,
filter_dict={"tags": ["gpt-3.5-turbo"]},
filter_dict={"tags": ["gpt-4o-mini"]},
)
conversations = {}
# autogen.ChatCompletion.start_logging(conversations)
Expand Down Expand Up @@ -170,7 +170,7 @@ def test_tsp(human_input_mode="NEVER", max_consecutive_auto_reply=2):
OAI_CONFIG_LIST,
file_location=KEY_LOC,
filter_dict={
"tags": ["gpt-4", "gpt-4-32k"],
"tags": ["gpt-4o"],
},
)
hard_questions = [
Expand Down
10 changes: 5 additions & 5 deletions test/agentchat/test_async_chats.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,10 +23,10 @@
@pytest.mark.skipif(skip_openai, reason="requested to skip openai tests")
@pytest.mark.asyncio
async def test_async_chats():
config_list_35 = autogen.config_list_from_json(
config_list_4omini = autogen.config_list_from_json(
OAI_CONFIG_LIST,
file_location=KEY_LOC,
filter_dict={"tags": ["gpt-3.5-turbo"]},
filter_dict={"tags": ["gpt-4o-mini"]},
)

financial_tasks = [
Expand All @@ -39,16 +39,16 @@ async def test_async_chats():

financial_assistant_1 = AssistantAgent(
name="Financial_assistant_1",
llm_config={"config_list": config_list_35},
llm_config={"config_list": config_list_4omini},
system_message="You are a knowledgeable AI Assistant. Reply TERMINATE when everything is done.",
)
financial_assistant_2 = AssistantAgent(
name="Financial_assistant_2",
llm_config={"config_list": config_list_35},
llm_config={"config_list": config_list_4omini},
)
writer = AssistantAgent(
name="Writer",
llm_config={"config_list": config_list_35},
llm_config={"config_list": config_list_4omini},
is_termination_msg=lambda x: x.get("content", "").find("TERMINATE") >= 0,
system_message="""
You are a professional writer, known for
Expand Down
4 changes: 2 additions & 2 deletions test/agentchat/test_async_get_human_input.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@
@pytest.mark.skipif(skip_openai, reason=reason)
@pytest.mark.asyncio
async def test_async_get_human_input():
config_list = autogen.config_list_from_json(OAI_CONFIG_LIST, KEY_LOC, filter_dict={"tags": ["gpt-3.5-turbo"]})
config_list = autogen.config_list_from_json(OAI_CONFIG_LIST, KEY_LOC, filter_dict={"tags": ["gpt-4o-mini"]})

# create an AssistantAgent instance named "assistant"
assistant = autogen.AssistantAgent(
Expand All @@ -50,7 +50,7 @@ async def test_async_get_human_input():
@pytest.mark.skipif(skip_openai, reason=reason)
@pytest.mark.asyncio
async def test_async_max_turn():
config_list = autogen.config_list_from_json(OAI_CONFIG_LIST, KEY_LOC, filter_dict={"tags": ["gpt-3.5-turbo"]})
config_list = autogen.config_list_from_json(OAI_CONFIG_LIST, KEY_LOC, filter_dict={"tags": ["gpt-4o-mini"]})

# create an AssistantAgent instance named "assistant"
assistant = autogen.AssistantAgent(
Expand Down
4 changes: 2 additions & 2 deletions test/agentchat/test_cache_agent.py
Original file line number Diff line number Diff line change
Expand Up @@ -120,7 +120,7 @@ def run_conversation(cache_seed, human_input_mode="NEVER", max_consecutive_auto_
OAI_CONFIG_LIST,
file_location=KEY_LOC,
filter_dict={
"tags": ["gpt-3.5-turbo", "gpt-3.5-turbo-16k"],
"tags": ["gpt-4o-mini"],
},
)
llm_config = {
Expand Down Expand Up @@ -167,7 +167,7 @@ def run_groupchat_conversation(cache, human_input_mode="NEVER", max_consecutive_
OAI_CONFIG_LIST,
file_location=KEY_LOC,
filter_dict={
"tags": ["gpt-3.5-turbo", "gpt-3.5-turbo-16k"],
"tags": ["gpt-4o-mini"],
},
)
llm_config = {
Expand Down
Loading

0 comments on commit 1c6445d

Please sign in to comment.