From f8b5e55118320fe119f9d578110222bd4c627949 Mon Sep 17 00:00:00 2001 From: Mark Sze Date: Sun, 8 Dec 2024 22:21:35 +0000 Subject: [PATCH 1/6] Sample test of update to 4o-mini for tests Signed-off-by: Mark Sze --- test/agentchat/test_async_chats.py | 2 +- test/agentchat/test_chats.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/test/agentchat/test_async_chats.py b/test/agentchat/test_async_chats.py index d2587ff273..25f974d1b1 100755 --- a/test/agentchat/test_async_chats.py +++ b/test/agentchat/test_async_chats.py @@ -26,7 +26,7 @@ async def test_async_chats(): config_list_35 = autogen.config_list_from_json( OAI_CONFIG_LIST, file_location=KEY_LOC, - filter_dict={"tags": ["gpt-3.5-turbo"]}, + filter_dict={"tags": ["gpt-4o-mini"]}, ) financial_tasks = [ diff --git a/test/agentchat/test_chats.py b/test/agentchat/test_chats.py index 8f243c1664..9fb7512a35 100755 --- a/test/agentchat/test_chats.py +++ b/test/agentchat/test_chats.py @@ -36,7 +36,7 @@ else autogen.config_list_from_json( OAI_CONFIG_LIST, file_location=KEY_LOC, - filter_dict={"tags": ["gpt-3.5-turbo"]}, + filter_dict={"tags": ["gpt-4o-mini"]}, ) ) From a898cc97055bd28e3f58da5fa349146c7b48b26e Mon Sep 17 00:00:00 2001 From: Mark Sze Date: Sun, 8 Dec 2024 22:42:11 +0000 Subject: [PATCH 2/6] Additional updates from 3.5 to 4o-mini and 4 to 4o Signed-off-by: Mark Sze --- .../contrib/agent_eval/test_agent_eval.py | 5 ++-- .../capabilities/chat_with_teachable_agent.py | 8 ++----- .../test_image_generation_capability.py | 12 +++++----- .../capabilities/test_teachable_agent.py | 4 +--- .../capabilities/test_transform_messages.py | 2 +- test/agentchat/contrib/test_agent_builder.py | 20 ++++++++-------- test/agentchat/contrib/test_gpt_assistant.py | 3 --- .../test_llamaindex_conversable_agent.py | 2 +- .../agentchat/contrib/test_reasoning_agent.py | 7 ++++-- test/agentchat/test_agent_logging.py | 2 +- test/agentchat/test_agent_usage.py | 18 +++++++------- test/agentchat/test_assistant_agent.py | 8 +++---- test/agentchat/test_async_get_human_input.py | 4 ++-- test/agentchat/test_cache_agent.py | 4 ++-- test/agentchat/test_conversable_agent.py | 11 ++++----- test/agentchat/test_function_call.py | 4 ++-- .../agentchat/test_function_call_groupchat.py | 2 +- test/agentchat/test_human_input.py | 2 +- test/agentchat/test_math_user_proxy_agent.py | 2 +- test/agentchat/test_nested.py | 2 +- test/agentchat/test_tool_calls.py | 2 +- test/io/test_websockets.py | 10 ++------ test/oai/_test_completion.py | 24 ++++++------------- test/test_code_utils.py | 2 +- test/test_logging.py | 12 +++++----- test/test_token_count.py | 2 ++ 26 files changed, 75 insertions(+), 99 deletions(-) diff --git a/test/agentchat/contrib/agent_eval/test_agent_eval.py b/test/agentchat/contrib/agent_eval/test_agent_eval.py index e871b9e347..65e03af36e 100644 --- a/test/agentchat/contrib/agent_eval/test_agent_eval.py +++ b/test/agentchat/contrib/agent_eval/test_agent_eval.py @@ -38,13 +38,12 @@ def remove_ground_truth(test_case: str): filter_dict={ "api_type": ["openai"], "model": [ + "gpt-4o-mini", + "gpt-4o", "gpt-4-turbo", "gpt-4-turbo-preview", "gpt-4-0125-preview", "gpt-4-1106-preview", - "gpt-3.5-turbo", - "gpt-3.5-turbo-0125", - "gpt-3.5-turbo-1106", ], }, ) diff --git a/test/agentchat/contrib/capabilities/chat_with_teachable_agent.py b/test/agentchat/contrib/capabilities/chat_with_teachable_agent.py index 8dc0e2256e..38b526ea25 100755 --- a/test/agentchat/contrib/capabilities/chat_with_teachable_agent.py +++ b/test/agentchat/contrib/capabilities/chat_with_teachable_agent.py @@ -17,12 +17,8 @@ from test_assistant_agent import KEY_LOC, OAI_CONFIG_LIST # noqa: E402 # Specify the model to use. GPT-3.5 is less reliable than GPT-4 at learning from user input. -filter_dict = {"model": ["gpt-4-0125-preview"]} -# filter_dict = {"model": ["gpt-3.5-turbo-1106"]} -# filter_dict = {"model": ["gpt-4-0613"]} -# filter_dict = {"model": ["gpt-3.5-turbo"]} -# filter_dict = {"model": ["gpt-4"]} -# filter_dict = {"model": ["gpt-35-turbo-16k", "gpt-3.5-turbo-16k"]} +filter_dict = {"model": ["gpt-4o-mini"]} +# filter_dict = {"model": ["gpt-4-0125-preview"]} def create_teachable_agent(reset_db=False): diff --git a/test/agentchat/contrib/capabilities/test_image_generation_capability.py b/test/agentchat/contrib/capabilities/test_image_generation_capability.py index c0cb6fc1a9..abd73d52ce 100644 --- a/test/agentchat/contrib/capabilities/test_image_generation_capability.py +++ b/test/agentchat/contrib/capabilities/test_image_generation_capability.py @@ -32,7 +32,7 @@ sys.path.append(os.path.join(os.path.dirname(__file__), "../..")) from conftest import MOCK_OPEN_AI_API_KEY, skip_openai # noqa: E402 -filter_dict = {"model": ["gpt-35-turbo-16k", "gpt-3.5-turbo-16k"]} +filter_dict = {"model": ["gpt-4o-mini"]} RESOLUTIONS = ["256x256", "512x512", "1024x1024"] QUALITIES = ["standard", "hd"] @@ -67,21 +67,21 @@ def api_key(): @pytest.fixture def dalle_config() -> Dict[str, Any]: - config_list = openai_utils.config_list_from_models(model_list=["dall-e-2"], exclude="aoai") + config_list = openai_utils.config_list_from_models(model_list=["dall-e-3"], exclude="aoai") if not config_list: - config_list = [{"model": "dall-e-2", "api_key": api_key()}] + config_list = [{"model": "dall-e-3", "api_key": api_key()}] return {"config_list": config_list, "timeout": 120, "cache_seed": None} @pytest.fixture -def gpt3_config() -> Dict[str, Any]: +def gpt4_config() -> Dict[str, Any]: config_list = [ { - "model": "gpt-35-turbo-16k", + "model": "gpt-4o-mini", "api_key": api_key(), }, { - "model": "gpt-3.5-turbo-16k", + "model": "gpt-4o", "api_key": api_key(), }, ] diff --git a/test/agentchat/contrib/capabilities/test_teachable_agent.py b/test/agentchat/contrib/capabilities/test_teachable_agent.py index ade6aa1e7f..82252f07f6 100755 --- a/test/agentchat/contrib/capabilities/test_teachable_agent.py +++ b/test/agentchat/contrib/capabilities/test_teachable_agent.py @@ -31,10 +31,8 @@ # Specify the model to use by uncommenting one of the following lines. # filter_dict={"model": ["gpt-4-1106-preview"]} # filter_dict={"model": ["gpt-4-0613"]} -# filter_dict={"model": ["gpt-3.5-turbo-1106"]} -# filter_dict={"model": ["gpt-3.5-turbo-0613"]} # filter_dict={"model": ["gpt-4"]} -filter_dict = {"tags": ["gpt-35-turbo-16k", "gpt-3.5-turbo-16k"]} +filter_dict = {"tags": ["gpt-4o-mini"]} def create_teachable_agent(reset_db=False, verbosity=0): diff --git a/test/agentchat/contrib/capabilities/test_transform_messages.py b/test/agentchat/contrib/capabilities/test_transform_messages.py index 9121a8d8bb..b3dee65d1b 100644 --- a/test/agentchat/contrib/capabilities/test_transform_messages.py +++ b/test/agentchat/contrib/capabilities/test_transform_messages.py @@ -33,7 +33,7 @@ def test_transform_messages_capability(): OAI_CONFIG_LIST, KEY_LOC, filter_dict={ - "model": "gpt-3.5-turbo", + "model": "gpt-4o-mini", }, ) diff --git a/test/agentchat/contrib/test_agent_builder.py b/test/agentchat/contrib/test_agent_builder.py index 9dee05766e..cab4a051b5 100755 --- a/test/agentchat/contrib/test_agent_builder.py +++ b/test/agentchat/contrib/test_agent_builder.py @@ -51,8 +51,8 @@ def test_build(): builder = AgentBuilder( config_file_or_env=OAI_CONFIG_LIST, config_file_location=KEY_LOC, - builder_model_tags=["gpt-4"], - agent_model_tags=["gpt-4"], + builder_model_tags=["gpt-4o"], + agent_model_tags=["gpt-4o"], ) building_task = ( "Find a paper on arxiv by programming, and analyze its application in some domain. " @@ -83,8 +83,8 @@ def test_build_from_library(): builder = AgentBuilder( config_file_or_env=OAI_CONFIG_LIST, config_file_location=KEY_LOC, - builder_model_tags=["gpt-4"], - agent_model_tags=["gpt-4"], + builder_model_tags=["gpt-4o"], + agent_model_tags=["gpt-4o"], ) building_task = ( "Find a paper on arxiv by programming, and analyze its application in some domain. " @@ -136,8 +136,8 @@ def test_save(): builder = AgentBuilder( config_file_or_env=OAI_CONFIG_LIST, config_file_location=KEY_LOC, - builder_model_tags=["gpt-4"], - agent_model_tags=["gpt-4"], + builder_model_tags=["gpt-4o"], + agent_model_tags=["gpt-4o"], ) building_task = ( "Find a paper on arxiv by programming, and analyze its application in some domain. " @@ -175,8 +175,8 @@ def test_load(): config_file_location=KEY_LOC, # builder_model=["gpt-4", "gpt-4-1106-preview"], # agent_model=["gpt-4", "gpt-4-1106-preview"], - builder_model_tags=["gpt-4"], - agent_model_tags=["gpt-4"], + builder_model_tags=["gpt-4o"], + agent_model_tags=["gpt-4o"], ) config_save_path = f"{here}/example_test_agent_builder_config.json" @@ -204,8 +204,8 @@ def test_clear_agent(): builder = AgentBuilder( config_file_or_env=OAI_CONFIG_LIST, config_file_location=KEY_LOC, - builder_model_tags=["gpt-4"], - agent_model_tags=["gpt-4"], + builder_model_tags=["gpt-4o"], + agent_model_tags=["gpt-4o"], ) config_save_path = f"{here}/example_test_agent_builder_config.json" diff --git a/test/agentchat/contrib/test_gpt_assistant.py b/test/agentchat/contrib/test_gpt_assistant.py index c67130f77a..ce29c7651c 100755 --- a/test/agentchat/contrib/test_gpt_assistant.py +++ b/test/agentchat/contrib/test_gpt_assistant.py @@ -40,9 +40,6 @@ "gpt-4-turbo-preview", "gpt-4-0125-preview", "gpt-4-1106-preview", - "gpt-3.5-turbo", - "gpt-3.5-turbo-0125", - "gpt-3.5-turbo-1106", ], }, ) diff --git a/test/agentchat/contrib/test_llamaindex_conversable_agent.py b/test/agentchat/contrib/test_llamaindex_conversable_agent.py index 6fd74d4d18..bc9bec4fd1 100644 --- a/test/agentchat/contrib/test_llamaindex_conversable_agent.py +++ b/test/agentchat/contrib/test_llamaindex_conversable_agent.py @@ -48,7 +48,7 @@ def test_group_chat_with_llama_index_conversable_agent(chat_mock: MagicMock) -> Each agent is set to describe an image in a unique style, but the chat should not exceed the specified max_rounds. """ llm = OpenAI( - model="gpt-4", + model="gpt-4o", temperature=0.0, api_key=openaiKey, ) diff --git a/test/agentchat/contrib/test_reasoning_agent.py b/test/agentchat/contrib/test_reasoning_agent.py index 323a661f76..ff732adc99 100644 --- a/test/agentchat/contrib/test_reasoning_agent.py +++ b/test/agentchat/contrib/test_reasoning_agent.py @@ -43,7 +43,7 @@ def think_node(): @pytest.fixture def reasoning_agent(): """Create a ReasoningAgent instance for testing""" - config_list = [{"model": "gpt-4", "api_key": "fake_key"}] + config_list = [{"model": "gpt-4o", "api_key": "fake_key"}] llm_config = {"config_list": config_list, "temperature": 0} return ReasoningAgent("reasoning_agent", llm_config=llm_config) @@ -164,7 +164,10 @@ def test_reasoning_agent_answer(): def helper_test_reasoning_agent_answer(max_depth, beam_size, answer_approach): """Test that ReasoningAgent properly terminates when TERMINATE is received""" - mock_config = {"config_list": [{"model": "gpt-4", "api_key": "fake", "base_url": "0.0.0.0:8000"}], "temperature": 0} + mock_config = { + "config_list": [{"model": "gpt-4o", "api_key": "fake", "base_url": "0.0.0.0:8000"}], + "temperature": 0, + } with patch("autogen.agentchat.conversable_agent.ConversableAgent.generate_oai_reply") as mock_oai_reply: agent = ReasoningAgent( "test_agent", diff --git a/test/agentchat/test_agent_logging.py b/test/agentchat/test_agent_logging.py index 4e17487382..c8418bd8a6 100644 --- a/test/agentchat/test_agent_logging.py +++ b/test/agentchat/test_agent_logging.py @@ -50,7 +50,7 @@ config_list = autogen.config_list_from_json( OAI_CONFIG_LIST, filter_dict={ - "tags": ["gpt-3.5-turbo"], + "tags": ["gpt-4o-mini"], }, file_location=KEY_LOC, ) diff --git a/test/agentchat/test_agent_usage.py b/test/agentchat/test_agent_usage.py index 88b686a1a2..02df221a3f 100755 --- a/test/agentchat/test_agent_usage.py +++ b/test/agentchat/test_agent_usage.py @@ -32,7 +32,7 @@ def test_gathering(): system_message="You are a helpful assistant.", llm_config={ "config_list": config_list, - "model": "gpt-3.5-turbo-0613", + "model": "gpt-4o-mini", }, ) assistant2 = AssistantAgent( @@ -40,7 +40,7 @@ def test_gathering(): system_message="You are a helpful assistant.", llm_config={ "config_list": config_list, - "model": "gpt-3.5-turbo-0613", + "model": "gpt-4o-mini", }, ) assistant3 = AssistantAgent( @@ -48,28 +48,28 @@ def test_gathering(): system_message="You are a helpful assistant.", llm_config={ "config_list": config_list, - "model": "gpt-3.5-turbo-0613", + "model": "gpt-4o", }, ) assistant1.client.total_usage_summary = { "total_cost": 0.1, - "gpt-35-turbo": {"cost": 0.1, "prompt_tokens": 100, "completion_tokens": 200, "total_tokens": 300}, + "gpt-4o-mini": {"cost": 0.1, "prompt_tokens": 100, "completion_tokens": 200, "total_tokens": 300}, } assistant2.client.total_usage_summary = { "total_cost": 0.2, - "gpt-35-turbo": {"cost": 0.2, "prompt_tokens": 100, "completion_tokens": 200, "total_tokens": 300}, + "gpt-4o-mini": {"cost": 0.2, "prompt_tokens": 100, "completion_tokens": 200, "total_tokens": 300}, } assistant3.client.total_usage_summary = { "total_cost": 0.3, - "gpt-4": {"cost": 0.3, "prompt_tokens": 100, "completion_tokens": 200, "total_tokens": 300}, + "gpt-4o": {"cost": 0.3, "prompt_tokens": 100, "completion_tokens": 200, "total_tokens": 300}, } total_usage = gather_usage_summary([assistant1, assistant2, assistant3]) assert round(total_usage["usage_including_cached_inference"]["total_cost"], 8) == 0.6 - assert round(total_usage["usage_including_cached_inference"]["gpt-35-turbo"]["cost"], 8) == 0.3 - assert round(total_usage["usage_including_cached_inference"]["gpt-4"]["cost"], 8) == 0.3 + assert round(total_usage["usage_including_cached_inference"]["gpt-4o-mini"]["cost"], 8) == 0.3 + assert round(total_usage["usage_including_cached_inference"]["gpt-4o"]["cost"], 8) == 0.3 # test when agent doesn't have client user_proxy = UserProxyAgent( @@ -91,7 +91,7 @@ def test_agent_usage(): config_list = autogen.config_list_from_json( OAI_CONFIG_LIST, file_location=KEY_LOC, - filter_dict={"tags": ["gpt-3.5-turbo"]}, + filter_dict={"tags": ["gpt-4o-mini"]}, ) assistant = AssistantAgent( "assistant", diff --git a/test/agentchat/test_assistant_agent.py b/test/agentchat/test_assistant_agent.py index 672ff59bd6..ee7f5b88bd 100755 --- a/test/agentchat/test_assistant_agent.py +++ b/test/agentchat/test_assistant_agent.py @@ -33,7 +33,7 @@ def test_ai_user_proxy_agent(): config_list = autogen.config_list_from_json( OAI_CONFIG_LIST, file_location=KEY_LOC, - filter_dict={"tags": ["gpt-3.5-turbo"]}, + filter_dict={"tags": ["gpt-4o-mini"]}, ) assistant = AssistantAgent( "assistant", @@ -72,7 +72,7 @@ def test_gpt35(human_input_mode="NEVER", max_consecutive_auto_reply=5): config_list = autogen.config_list_from_json( OAI_CONFIG_LIST, file_location=KEY_LOC, - filter_dict={"tags": ["gpt-3.5-turbo", "gpt-3.5-turbo-16k"]}, + filter_dict={"tags": ["gpt-4o-mini"]}, ) llm_config = { "cache_seed": 42, @@ -116,7 +116,7 @@ def test_create_execute_script(human_input_mode="NEVER", max_consecutive_auto_re config_list = autogen.config_list_from_json( OAI_CONFIG_LIST, file_location=KEY_LOC, - filter_dict={"tags": ["gpt-3.5-turbo"]}, + filter_dict={"tags": ["gpt-4o-mini"]}, ) conversations = {} # autogen.ChatCompletion.start_logging(conversations) @@ -170,7 +170,7 @@ def test_tsp(human_input_mode="NEVER", max_consecutive_auto_reply=2): OAI_CONFIG_LIST, file_location=KEY_LOC, filter_dict={ - "tags": ["gpt-4", "gpt-4-32k"], + "tags": ["gpt-4o"], }, ) hard_questions = [ diff --git a/test/agentchat/test_async_get_human_input.py b/test/agentchat/test_async_get_human_input.py index 555ed63866..e68c3c6892 100755 --- a/test/agentchat/test_async_get_human_input.py +++ b/test/agentchat/test_async_get_human_input.py @@ -23,7 +23,7 @@ @pytest.mark.skipif(skip_openai, reason=reason) @pytest.mark.asyncio async def test_async_get_human_input(): - config_list = autogen.config_list_from_json(OAI_CONFIG_LIST, KEY_LOC, filter_dict={"tags": ["gpt-3.5-turbo"]}) + config_list = autogen.config_list_from_json(OAI_CONFIG_LIST, KEY_LOC, filter_dict={"tags": ["gpt-4o-mini"]}) # create an AssistantAgent instance named "assistant" assistant = autogen.AssistantAgent( @@ -50,7 +50,7 @@ async def test_async_get_human_input(): @pytest.mark.skipif(skip_openai, reason=reason) @pytest.mark.asyncio async def test_async_max_turn(): - config_list = autogen.config_list_from_json(OAI_CONFIG_LIST, KEY_LOC, filter_dict={"tags": ["gpt-3.5-turbo"]}) + config_list = autogen.config_list_from_json(OAI_CONFIG_LIST, KEY_LOC, filter_dict={"tags": ["gpt-4o-mini"]}) # create an AssistantAgent instance named "assistant" assistant = autogen.AssistantAgent( diff --git a/test/agentchat/test_cache_agent.py b/test/agentchat/test_cache_agent.py index 723355b6e1..805a5be9f3 100644 --- a/test/agentchat/test_cache_agent.py +++ b/test/agentchat/test_cache_agent.py @@ -120,7 +120,7 @@ def run_conversation(cache_seed, human_input_mode="NEVER", max_consecutive_auto_ OAI_CONFIG_LIST, file_location=KEY_LOC, filter_dict={ - "tags": ["gpt-3.5-turbo", "gpt-3.5-turbo-16k"], + "tags": ["gpt-4o-mini"], }, ) llm_config = { @@ -167,7 +167,7 @@ def run_groupchat_conversation(cache, human_input_mode="NEVER", max_consecutive_ OAI_CONFIG_LIST, file_location=KEY_LOC, filter_dict={ - "tags": ["gpt-3.5-turbo", "gpt-3.5-turbo-16k"], + "tags": ["gpt-4o-mini"], }, ) llm_config = { diff --git a/test/agentchat/test_conversable_agent.py b/test/agentchat/test_conversable_agent.py index 320cfb324b..fd1f3bd3fb 100755 --- a/test/agentchat/test_conversable_agent.py +++ b/test/agentchat/test_conversable_agent.py @@ -32,9 +32,6 @@ here = os.path.abspath(os.path.dirname(__file__)) gpt4_config_list = [ - {"model": "gpt-4"}, - {"model": "gpt-4-turbo"}, - {"model": "gpt-4-32k"}, {"model": "gpt-4o"}, {"model": "gpt-4o-mini"}, ] @@ -856,7 +853,7 @@ def test_register_for_llm_without_model_name(): def test_register_for_execution(): with pytest.MonkeyPatch.context() as mp: mp.setenv("OPENAI_API_KEY", MOCK_OPEN_AI_API_KEY) - agent = ConversableAgent(name="agent", llm_config={"config_list": [{"model": "gpt-4"}]}) + agent = ConversableAgent(name="agent", llm_config={"config_list": [{"model": "gpt-4o"}]}) user_proxy_1 = UserProxyAgent(name="user_proxy_1") user_proxy_2 = UserProxyAgent(name="user_proxy_2") @@ -1015,7 +1012,7 @@ async def test_function_registration_e2e_async() -> None: config_list = autogen.config_list_from_json( OAI_CONFIG_LIST, filter_dict={ - "tags": ["gpt-4", "gpt-4-0314", "gpt4", "gpt-4-32k", "gpt-4-32k-0314", "gpt-4-32k-v0314"], + "tags": ["gpt-4o"], }, file_location=KEY_LOC, ) @@ -1086,7 +1083,7 @@ def stopwatch(num_seconds: Annotated[str, "Number of seconds in the stopwatch."] @pytest.mark.skipif(skip_openai, reason=reason) def test_max_turn(): - config_list = autogen.config_list_from_json(OAI_CONFIG_LIST, KEY_LOC, filter_dict={"tags": ["gpt-3.5-turbo"]}) + config_list = autogen.config_list_from_json(OAI_CONFIG_LIST, KEY_LOC, filter_dict={"tags": ["gpt-4o-mini"]}) # create an AssistantAgent instance named "assistant" assistant = autogen.AssistantAgent( @@ -1174,7 +1171,7 @@ def get_random_number(self): return str(random.randint(0, 100)) config_list = autogen.config_list_from_json( - OAI_CONFIG_LIST, file_location=KEY_LOC, filter_dict={"tags": ["gpt-3.5-turbo"]} + OAI_CONFIG_LIST, file_location=KEY_LOC, filter_dict={"tags": ["gpt-4o-mini"]} ) def my_message_play(sender, recipient, context): diff --git a/test/agentchat/test_function_call.py b/test/agentchat/test_function_call.py index 5dfcd839f3..1567a31fc4 100755 --- a/test/agentchat/test_function_call.py +++ b/test/agentchat/test_function_call.py @@ -33,7 +33,7 @@ def test_eval_math_responses(): config_list = autogen.config_list_from_json( OAI_CONFIG_LIST, filter_dict={ - "tags": ["gpt-4", "gpt-3.5-turbo", "gpt-3.5-turbo-16k"], + "tags": ["gpt-4o-mini", "gpt-4o"], }, file_location=KEY_LOC, ) @@ -238,7 +238,7 @@ def test_update_function(): config_list_gpt4 = autogen.config_list_from_json( OAI_CONFIG_LIST, filter_dict={ - "tags": ["gpt-4", "gpt-4-32k", "gpt-4o", "gpt-4o-mini"], + "tags": ["gpt-4o", "gpt-4o-mini"], }, file_location=KEY_LOC, ) diff --git a/test/agentchat/test_function_call_groupchat.py b/test/agentchat/test_function_call_groupchat.py index 7e09bbd365..5639a86115 100755 --- a/test/agentchat/test_function_call_groupchat.py +++ b/test/agentchat/test_function_call_groupchat.py @@ -55,7 +55,7 @@ def get_random_number(self): config_list_35 = autogen.config_list_from_json( OAI_CONFIG_LIST, file_location=KEY_LOC, - filter_dict={"tags": ["gpt-3.5-turbo", "gpt-3.5-turbo-16k"]}, + filter_dict={"tags": ["gpt-4o-mini"]}, ) llm_config_no_function = {"config_list": config_list_35} config_list_tool = autogen.filter_config(config_list_35, {"tags": ["tool"]}) diff --git a/test/agentchat/test_human_input.py b/test/agentchat/test_human_input.py index beca99033c..826aed609c 100755 --- a/test/agentchat/test_human_input.py +++ b/test/agentchat/test_human_input.py @@ -21,7 +21,7 @@ @pytest.mark.skipif(skip_openai, reason=reason) def test_get_human_input(): - config_list = autogen.config_list_from_json(OAI_CONFIG_LIST, KEY_LOC, filter_dict={"tags": ["gpt-3.5-turbo"]}) + config_list = autogen.config_list_from_json(OAI_CONFIG_LIST, KEY_LOC, filter_dict={"tags": ["gpt-4o-mini"]}) # create an AssistantAgent instance named "assistant" assistant = autogen.AssistantAgent( diff --git a/test/agentchat/test_math_user_proxy_agent.py b/test/agentchat/test_math_user_proxy_agent.py index 83c6662ce2..5248fac233 100755 --- a/test/agentchat/test_math_user_proxy_agent.py +++ b/test/agentchat/test_math_user_proxy_agent.py @@ -44,7 +44,7 @@ def test_math_user_proxy_agent(): OAI_CONFIG_LIST, file_location=KEY_LOC, filter_dict={ - "tags": ["gpt-3.5-turbo"], + "tags": ["gpt-4o-mini"], }, ) assistant = AssistantAgent( diff --git a/test/agentchat/test_nested.py b/test/agentchat/test_nested.py index 9995aa6ed6..d095135b12 100755 --- a/test/agentchat/test_nested.py +++ b/test/agentchat/test_nested.py @@ -44,7 +44,7 @@ def test_nested(): config_list_35 = autogen.config_list_from_json( OAI_CONFIG_LIST, file_location=KEY_LOC, - filter_dict={"tags": ["gpt-3.5-turbo"]}, + filter_dict={"tags": ["gpt-4o-mini"]}, ) llm_config = {"config_list": config_list} diff --git a/test/agentchat/test_tool_calls.py b/test/agentchat/test_tool_calls.py index eb2cbe7c35..3583e541bc 100755 --- a/test/agentchat/test_tool_calls.py +++ b/test/agentchat/test_tool_calls.py @@ -150,7 +150,7 @@ def test_update_tool(): config_list_gpt4 = autogen.config_list_from_json( OAI_CONFIG_LIST, filter_dict={ - "tags": ["gpt-4"], + "tags": ["gpt-4o"], }, file_location=KEY_LOC, ) diff --git a/test/io/test_websockets.py b/test/io/test_websockets.py index c6d8494461..6c4b4662e3 100644 --- a/test/io/test_websockets.py +++ b/test/io/test_websockets.py @@ -103,14 +103,8 @@ def on_connect(iostream: IOWebsockets, success_dict: Dict[str, bool] = success_d OAI_CONFIG_LIST, filter_dict={ "model": [ - "gpt-3.5-turbo", - "gpt-3.5-turbo-16k", - "gpt-4", - "gpt-4-0314", - "gpt4", - "gpt-4-32k", - "gpt-4-32k-0314", - "gpt-4-32k-v0314", + "gpt-4o-mini", + "gpt-4o", ], }, file_location=KEY_LOC, diff --git a/test/oai/_test_completion.py b/test/oai/_test_completion.py index 5e92149d41..fece9ed42c 100755 --- a/test/oai/_test_completion.py +++ b/test/oai/_test_completion.py @@ -51,7 +51,7 @@ def test_filter(): print(exc) return config_list = autogen.config_list_from_models( - KEY_LOC, exclude="aoai", model_list=["text-ada-001", "gpt-3.5-turbo", "text-davinci-003"] + KEY_LOC, exclude="aoai", model_list=["text-ada-001", "gpt-4o-mini", "text-davinci-003"] ) response = autogen.Completion.create( context={"yes_or_no_choice": True}, @@ -95,7 +95,7 @@ def test_chatcompletion(): assert "messages" not in params params = autogen.Completion._construct_params( context=None, - config={"model": "gpt-4"}, + config={"model": "gpt-4o"}, prompt="hi", ) assert "messages" in params @@ -149,13 +149,8 @@ def test_nocontext(): file_location=KEY_LOC, filter_dict={ "model": { - "gpt-3.5-turbo", - "gpt-3.5-turbo-16k", - "gpt-3.5-turbo-16k-0613", - "gpt-3.5-turbo-0301", - "chatgpt-35-turbo-0301", - "gpt-35-turbo-v0301", - "gpt", + "gpt-4o-mini", + "gpt-4o", }, }, ), @@ -185,13 +180,8 @@ def test_humaneval(num_samples=1): env_or_file=OAI_CONFIG_LIST, filter_dict={ "model": { - "gpt-3.5-turbo", - "gpt-3.5-turbo-16k", - "gpt-3.5-turbo-16k-0613", - "gpt-3.5-turbo-0301", - "chatgpt-35-turbo-0301", - "gpt-35-turbo-v0301", - "gpt", + "gpt-4o-mini", + "gpt-4o", }, }, file_location=KEY_LOC, @@ -233,7 +223,7 @@ def test_humaneval(num_samples=1): # no error should be raised response = autogen.Completion.create( context=test_data[0], - config_list=autogen.config_list_from_models(KEY_LOC, model_list=["gpt-3.5-turbo"]), + config_list=autogen.config_list_from_models(KEY_LOC, model_list=["gpt-4o-mini"]), prompt="", max_tokens=1, max_retry_period=0, diff --git a/test/test_code_utils.py b/test/test_code_utils.py index 8fb2f44a97..5871799d28 100755 --- a/test/test_code_utils.py +++ b/test/test_code_utils.py @@ -54,7 +54,7 @@ # OAI_CONFIG_LIST, # file_location=KEY_LOC, # filter_dict={ -# "model": ["gpt-4", "gpt4", "gpt-4-32k", "gpt-4-32k-0314"], +# "model": ["gpt-4o", "gpt4", "gpt-4-32k", "gpt-4-32k-0314"], # }, # ) # # config_list = autogen.config_list_from_json( diff --git a/test/test_logging.py b/test/test_logging.py index 0a27160e2d..ca2db497ee 100644 --- a/test/test_logging.py +++ b/test/test_logging.py @@ -35,7 +35,7 @@ "role": "user" } ], - "model": "gpt-4" + "model": "gpt-4o" } """ ) @@ -58,7 +58,7 @@ } ], "created": 1705993480, - "model": "gpt-4", + "model": "gpt-4o", "object": "chat.completion", "system_fingerprint": "fp_6d044fb900", "usage": { @@ -159,7 +159,7 @@ def test_log_new_agent(db_connection): cur = db_connection.cursor() agent_name = "some_assistant" - config_list = [{"model": "gpt-4", "api_key": "some_key"}] + config_list = [{"model": "gpt-4o", "api_key": "some_key"}] agent = AssistantAgent(agent_name, llm_config={"config_list": config_list}) init_args = {"foo": "bar", "baz": {"other_key": "other_val"}, "a": None} @@ -184,7 +184,7 @@ def test_log_oai_wrapper(db_connection): cur = db_connection.cursor() - llm_config = {"config_list": [{"model": "gpt-4", "api_key": "some_key", "base_url": "some url"}]} + llm_config = {"config_list": [{"model": "gpt-4o", "api_key": "some_key", "base_url": "some url"}]} init_args = {"llm_config": llm_config, "base_config": {}} wrapper = OpenAIWrapper(**llm_config) @@ -210,8 +210,8 @@ def test_log_oai_client(db_connection): openai_config = { "api_key": "some_key", - "api_version": "2024-02-01", - "azure_deployment": "gpt-4", + "api_version": "2024-08-06", + "azure_deployment": "gpt-4o", "azure_endpoint": "https://foobar.openai.azure.com/", } client = AzureOpenAI(**openai_config) diff --git a/test/test_token_count.py b/test/test_token_count.py index e37324932c..c009a65110 100755 --- a/test/test_token_count.py +++ b/test/test_token_count.py @@ -141,6 +141,8 @@ def test_model_aliases(): assert get_max_token_limit("gpt-35-turbo") == get_max_token_limit("gpt-3.5-turbo") assert get_max_token_limit("gpt4") == get_max_token_limit("gpt-4") assert get_max_token_limit("gpt4-32k") == get_max_token_limit("gpt-4-32k") + assert get_max_token_limit("gpt4o") == get_max_token_limit("gpt-4o") + assert get_max_token_limit("gpt4omini") == get_max_token_limit("gpt-4o-mini") if __name__ == "__main__": From 3b04874c678d0701a4b69c1163d6b4122ca8cf63 Mon Sep 17 00:00:00 2001 From: Mark Sze Date: Sun, 8 Dec 2024 22:54:22 +0000 Subject: [PATCH 3/6] Fix for gpt-4o-mini in token count test Signed-off-by: Mark Sze --- test/test_token_count.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/test_token_count.py b/test/test_token_count.py index c009a65110..de25b8a49d 100755 --- a/test/test_token_count.py +++ b/test/test_token_count.py @@ -142,7 +142,7 @@ def test_model_aliases(): assert get_max_token_limit("gpt4") == get_max_token_limit("gpt-4") assert get_max_token_limit("gpt4-32k") == get_max_token_limit("gpt-4-32k") assert get_max_token_limit("gpt4o") == get_max_token_limit("gpt-4o") - assert get_max_token_limit("gpt4omini") == get_max_token_limit("gpt-4o-mini") + assert get_max_token_limit("gpt4o-mini") == get_max_token_limit("gpt-4o-mini") if __name__ == "__main__": From fe18acb0ecf570d14ffe50352863dc9d1c9d9439 Mon Sep 17 00:00:00 2001 From: Mark Sze Date: Sun, 8 Dec 2024 23:56:20 +0000 Subject: [PATCH 4/6] Renaming and removal of tools filtering where not necessary Signed-off-by: Mark Sze --- test/agentchat/test_async_chats.py | 8 ++--- test/agentchat/test_chats.py | 30 +++++++++---------- .../agentchat/test_function_call_groupchat.py | 10 +++---- test/agentchat/test_nested.py | 8 ++--- 4 files changed, 26 insertions(+), 30 deletions(-) diff --git a/test/agentchat/test_async_chats.py b/test/agentchat/test_async_chats.py index 25f974d1b1..1167ff4d07 100755 --- a/test/agentchat/test_async_chats.py +++ b/test/agentchat/test_async_chats.py @@ -23,7 +23,7 @@ @pytest.mark.skipif(skip_openai, reason="requested to skip openai tests") @pytest.mark.asyncio async def test_async_chats(): - config_list_35 = autogen.config_list_from_json( + config_list_4omini = autogen.config_list_from_json( OAI_CONFIG_LIST, file_location=KEY_LOC, filter_dict={"tags": ["gpt-4o-mini"]}, @@ -39,16 +39,16 @@ async def test_async_chats(): financial_assistant_1 = AssistantAgent( name="Financial_assistant_1", - llm_config={"config_list": config_list_35}, + llm_config={"config_list": config_list_4omini}, system_message="You are a knowledgeable AI Assistant. Reply TERMINATE when everything is done.", ) financial_assistant_2 = AssistantAgent( name="Financial_assistant_2", - llm_config={"config_list": config_list_35}, + llm_config={"config_list": config_list_4omini}, ) writer = AssistantAgent( name="Writer", - llm_config={"config_list": config_list_35}, + llm_config={"config_list": config_list_4omini}, is_termination_msg=lambda x: x.get("content", "").find("TERMINATE") >= 0, system_message=""" You are a professional writer, known for diff --git a/test/agentchat/test_chats.py b/test/agentchat/test_chats.py index 9fb7512a35..a39162debf 100755 --- a/test/agentchat/test_chats.py +++ b/test/agentchat/test_chats.py @@ -30,7 +30,7 @@ ) ) -config_list_35 = ( +config_list_4omini = ( [] if skip_openai else autogen.config_list_from_json( @@ -40,8 +40,6 @@ ) ) -config_list_tool = filter_config(config_list_35, {"tags": ["tool"]}) - def test_chat_messages_for_summary(): assistant = UserProxyAgent(name="assistant", human_input_mode="NEVER", code_execution_config={"use_docker": False}) @@ -87,12 +85,12 @@ def test_chats_group(): financial_assistant = AssistantAgent( name="Financial_assistant", - llm_config={"config_list": config_list_35}, + llm_config={"config_list": config_list_4omini}, ) writer = AssistantAgent( name="Writer", - llm_config={"config_list": config_list_35}, + llm_config={"config_list": config_list_4omini}, system_message=""" You are a professional writer, known for your insightful and engaging articles. @@ -106,7 +104,7 @@ def test_chats_group(): system_message="""Critic. Double check plan, claims, code from other agents and provide feedback. Check whether the plan includes adding verifiable info such as source URL. Reply "TERMINATE" in the end when everything is done. """, - llm_config={"config_list": config_list_35}, + llm_config={"config_list": config_list_4omini}, ) groupchat_1 = GroupChat(agents=[user_proxy, financial_assistant, critic], messages=[], max_round=3) @@ -116,7 +114,7 @@ def test_chats_group(): manager_1 = GroupChatManager( groupchat=groupchat_1, name="Research_manager", - llm_config={"config_list": config_list_35}, + llm_config={"config_list": config_list_4omini}, code_execution_config={ "last_n_messages": 1, "work_dir": "groupchat", @@ -127,7 +125,7 @@ def test_chats_group(): manager_2 = GroupChatManager( groupchat=groupchat_2, name="Writing_manager", - llm_config={"config_list": config_list_35}, + llm_config={"config_list": config_list_4omini}, code_execution_config={ "last_n_messages": 1, "work_dir": "groupchat", @@ -201,17 +199,17 @@ def luck_number_message(sender, recipient, context): func = Function() financial_assistant_1 = AssistantAgent( name="Financial_assistant_1", - llm_config={"config_list": config_list_35}, + llm_config={"config_list": config_list_4omini}, function_map={"get_random_number": func.get_random_number}, ) financial_assistant_2 = AssistantAgent( name="Financial_assistant_2", - llm_config={"config_list": config_list_35}, + llm_config={"config_list": config_list_4omini}, function_map={"get_random_number": func.get_random_number}, ) writer = AssistantAgent( name="Writer", - llm_config={"config_list": config_list_35}, + llm_config={"config_list": config_list_4omini}, is_termination_msg=lambda x: x.get("content", "").find("TERMINATE") >= 0, system_message=""" You are a professional writer, known for @@ -315,15 +313,15 @@ def test_chats_general(): financial_assistant_1 = AssistantAgent( name="Financial_assistant_1", - llm_config={"config_list": config_list_35}, + llm_config={"config_list": config_list_4omini}, ) financial_assistant_2 = AssistantAgent( name="Financial_assistant_2", - llm_config={"config_list": config_list_35}, + llm_config={"config_list": config_list_4omini}, ) writer = AssistantAgent( name="Writer", - llm_config={"config_list": config_list_35}, + llm_config={"config_list": config_list_4omini}, is_termination_msg=lambda x: x.get("content", "").find("TERMINATE") >= 0, system_message=""" You are a professional writer, known for @@ -494,7 +492,7 @@ def test_chats_exceptions(): @pytest.mark.skipif(skip_openai, reason=reason) def test_chats_w_func(): llm_config = { - "config_list": config_list_tool, + "config_list": config_list_4omini, "timeout": 120, } @@ -549,7 +547,7 @@ def currency_calculator( @pytest.mark.skipif(skip_openai, reason=reason) def test_udf_message_in_chats(): - llm_config_35 = {"config_list": config_list_35} + llm_config_35 = {"config_list": config_list_4omini} research_task = """ ## NVDA (NVIDIA Corporation) diff --git a/test/agentchat/test_function_call_groupchat.py b/test/agentchat/test_function_call_groupchat.py index 5639a86115..4a17221d77 100755 --- a/test/agentchat/test_function_call_groupchat.py +++ b/test/agentchat/test_function_call_groupchat.py @@ -52,15 +52,13 @@ def get_random_number(self): return random.randint(0, 100) # llm_config without functions - config_list_35 = autogen.config_list_from_json( + config_list_4omini = autogen.config_list_from_json( OAI_CONFIG_LIST, file_location=KEY_LOC, filter_dict={"tags": ["gpt-4o-mini"]}, ) - llm_config_no_function = {"config_list": config_list_35} - config_list_tool = autogen.filter_config(config_list_35, {"tags": ["tool"]}) llm_config = { - "config_list": config_list_tool, + "config_list": config_list_4omini, key: value, } @@ -81,7 +79,7 @@ def get_random_number(self): name="Observer", system_message="You observe the the player's actions and results. Summarize in 1 sentence.", description="An observer.", - llm_config=llm_config_no_function, + llm_config=llm_config, ) groupchat = autogen.GroupChat( agents=[player, user_proxy, observer], messages=[], max_round=7, speaker_selection_method="round_robin" @@ -94,7 +92,7 @@ def get_random_number(self): ): manager = autogen.GroupChatManager(groupchat=groupchat, llm_config=llm_config) - manager = autogen.GroupChatManager(groupchat=groupchat, llm_config=llm_config_no_function) + manager = autogen.GroupChatManager(groupchat=groupchat, llm_config=llm_config) if sync: res = observer.initiate_chat(manager, message="Let's start the game!", summary_method="reflection_with_llm") diff --git a/test/agentchat/test_nested.py b/test/agentchat/test_nested.py index d095135b12..24c86a7fed 100755 --- a/test/agentchat/test_nested.py +++ b/test/agentchat/test_nested.py @@ -41,7 +41,7 @@ def mock_reply(recipient, messages, sender, config): @pytest.mark.skipif(skip_openai, reason=reason) def test_nested(): config_list = autogen.config_list_from_json(env_or_file=OAI_CONFIG_LIST, file_location=KEY_LOC) - config_list_35 = autogen.config_list_from_json( + config_list_4omini = autogen.config_list_from_json( OAI_CONFIG_LIST, file_location=KEY_LOC, filter_dict={"tags": ["gpt-4o-mini"]}, @@ -96,7 +96,7 @@ def test_nested(): assistant_2 = autogen.AssistantAgent( name="Assistant", - llm_config={"config_list": config_list_35}, + llm_config={"config_list": config_list_4omini}, # is_termination_msg=lambda x: x.get("content", "") == "", ) @@ -124,7 +124,7 @@ def test_nested(): writer = autogen.AssistantAgent( name="Writer", - llm_config={"config_list": config_list_35}, + llm_config={"config_list": config_list_4omini}, system_message=""" You are a professional writer, known for your insightful and engaging articles. @@ -135,7 +135,7 @@ def test_nested(): autogen.AssistantAgent( name="Reviewer", - llm_config={"config_list": config_list_35}, + llm_config={"config_list": config_list_4omini}, system_message=""" You are a compliance reviewer, known for your thoroughness and commitment to standards. Your task is to scrutinize content for any harmful elements or regulatory violations, ensuring From d625b093cf60f01db34cfa0cf2decda6950373ea Mon Sep 17 00:00:00 2001 From: Mark Sze Date: Mon, 9 Dec 2024 00:51:19 +0000 Subject: [PATCH 5/6] Fix groupchat function test Signed-off-by: Mark Sze --- test/agentchat/test_function_call_groupchat.py | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/test/agentchat/test_function_call_groupchat.py b/test/agentchat/test_function_call_groupchat.py index 4a17221d77..8a45ea7c06 100755 --- a/test/agentchat/test_function_call_groupchat.py +++ b/test/agentchat/test_function_call_groupchat.py @@ -52,6 +52,14 @@ def get_random_number(self): return random.randint(0, 100) # llm_config without functions + config_list_4omini_no_tools = autogen.config_list_from_json( + OAI_CONFIG_LIST, + file_location=KEY_LOC, + filter_dict={"tags": ["gpt-4o-mini"]}, + ) + llm_config_no_function = {"config_list": config_list_4omini_no_tools} + + # llm_config with functions config_list_4omini = autogen.config_list_from_json( OAI_CONFIG_LIST, file_location=KEY_LOC, @@ -79,7 +87,7 @@ def get_random_number(self): name="Observer", system_message="You observe the the player's actions and results. Summarize in 1 sentence.", description="An observer.", - llm_config=llm_config, + llm_config=llm_config_no_function, ) groupchat = autogen.GroupChat( agents=[player, user_proxy, observer], messages=[], max_round=7, speaker_selection_method="round_robin" @@ -92,7 +100,7 @@ def get_random_number(self): ): manager = autogen.GroupChatManager(groupchat=groupchat, llm_config=llm_config) - manager = autogen.GroupChatManager(groupchat=groupchat, llm_config=llm_config) + manager = autogen.GroupChatManager(groupchat=groupchat, llm_config=llm_config_no_function) if sync: res = observer.initiate_chat(manager, message="Let's start the game!", summary_method="reflection_with_llm") From 07759f3f011924976d26fa0a6ad62908817e011f Mon Sep 17 00:00:00 2001 From: Mark Sze Date: Mon, 9 Dec 2024 03:24:56 +0000 Subject: [PATCH 6/6] Update tests for pydantic 2+, tool config set to 4o-mini Signed-off-by: Mark Sze --- test/agentchat/test_conversable_agent.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/test/agentchat/test_conversable_agent.py b/test/agentchat/test_conversable_agent.py index fd1f3bd3fb..93866c81a0 100755 --- a/test/agentchat/test_conversable_agent.py +++ b/test/agentchat/test_conversable_agent.py @@ -589,8 +589,8 @@ def test__wrap_function_sync(): CurrencySymbol = Literal["USD", "EUR"] class Currency(BaseModel): - currency: Annotated[CurrencySymbol, Field(..., description="Currency code")] - amount: Annotated[float, Field(100.0, description="Amount of money in the currency")] + currency: CurrencySymbol = Field(description="Currency code") + amount: Annotated[float, Field(default=100.0, description="Amount of money in the currency")] Currency(currency="USD", amount=100.0) @@ -627,8 +627,8 @@ async def test__wrap_function_async(): CurrencySymbol = Literal["USD", "EUR"] class Currency(BaseModel): - currency: Annotated[CurrencySymbol, Field(..., description="Currency code")] - amount: Annotated[float, Field(100.0, description="Amount of money in the currency")] + currency: CurrencySymbol = Field(description="Currency code") + amount: Annotated[float, Field(default=100.0, description="Amount of money in the currency")] Currency(currency="USD", amount=100.0) @@ -934,7 +934,7 @@ def test_function_registration_e2e_sync() -> None: config_list = autogen.config_list_from_json( OAI_CONFIG_LIST, filter_dict={ - "tags": ["tool"], + "tags": ["gpt-4o-mini"], }, file_location=KEY_LOC, )