Merge pull request #227 from henrytwo/henrytu/cerebras_llama_3p3

Migrate Cerebras from Llama 3.1 70b to Llama 3.3 70b
ag2ai · Dec 17, 2024 · b493b35 · b493b35
2 parents 2d996c0 + 469800f
commit b493b35
Show file tree

Hide file tree

Showing 3 changed files with 11 additions and 11 deletions.
diff --git a/autogen/oai/cerebras.py b/autogen/oai/cerebras.py
@@ -42,7 +42,7 @@
 CEREBRAS_PRICING_1K = {
     # Convert pricing per million to per thousand tokens.
     "llama3.1-8b": (0.10 / 1000, 0.10 / 1000),
-    "llama3.1-70b": (0.60 / 1000, 0.60 / 1000),
+    "llama-3.3-70b": (0.85 / 1000, 1.20 / 1000),
 }
 
 

diff --git a/test/oai/test_cerebras.py b/test/oai/test_cerebras.py
@@ -142,7 +142,7 @@ def test_cost_calculation(mock_response):
         choices=[{"message": "Test message 1"}],
         usage={"prompt_tokens": 500, "completion_tokens": 300, "total_tokens": 800},
         cost=None,
-        model="llama3.1-70b",
+        model="llama-3.3-70b",
     )
     calculated_cost = calculate_cerebras_cost(
         response.usage["prompt_tokens"], response.usage["completion_tokens"], response.model
@@ -166,15 +166,15 @@ def test_create_response(mock_chat, cerebras_client):
         MagicMock(finish_reason="stop", message=MagicMock(content="Example Cerebras response", tool_calls=None))
     ]
     mock_cerebras_response.id = "mock_cerebras_response_id"
-    mock_cerebras_response.model = "llama3.1-70b"
+    mock_cerebras_response.model = "llama-3.3-70b"
     mock_cerebras_response.usage = MagicMock(prompt_tokens=10, completion_tokens=20)  # Example token usage
 
     mock_chat.return_value = mock_cerebras_response
 
     # Test parameters
     params = {
         "messages": [{"role": "user", "content": "Hello"}, {"role": "assistant", "content": "World"}],
-        "model": "llama3.1-70b",
+        "model": "llama-3.3-70b",
     }
 
     # Call the create method
@@ -185,7 +185,7 @@ def test_create_response(mock_chat, cerebras_client):
         response.choices[0].message.content == "Example Cerebras response"
     ), "Response content should match expected output"
     assert response.id == "mock_cerebras_response_id", "Response ID should match the mocked response ID"
-    assert response.model == "llama3.1-70b", "Response model should match the mocked response model"
+    assert response.model == "llama-3.3-70b", "Response model should match the mocked response model"
     assert response.usage.prompt_tokens == 10, "Response prompt tokens should match the mocked response usage"
     assert response.usage.completion_tokens == 20, "Response completion tokens should match the mocked response usage"
 
@@ -217,7 +217,7 @@ def test_create_response_with_tool_call(mock_chat, cerebras_client):
             )
         ],
         id="mock_cerebras_response_id",
-        model="llama3.1-70b",
+        model="llama-3.3-70b",
         usage=MagicMock(prompt_tokens=10, completion_tokens=20),
     )
 
@@ -245,7 +245,7 @@ def test_create_response_with_tool_call(mock_chat, cerebras_client):
 
     # Call the create method
     response = cerebras_client.create(
-        {"messages": cerebras_messages, "tools": converted_functions, "model": "llama3.1-70b"}
+        {"messages": cerebras_messages, "tools": converted_functions, "model": "llama-3.3-70b"}
     )
 
     # Assertions to check if the functions and content are included in the response

diff --git a/website/docs/topics/non-openai-models/cloud-cerebras.ipynb b/website/docs/topics/non-openai-models/cloud-cerebras.ipynb
@@ -47,7 +47,7 @@
     "        \"api_type\": \"cerebras\"\n",
     "    },\n",
     "    {\n",
-    "        \"model\": \"llama3.1-70b\",\n",
+    "        \"model\": \"llama-3.3-70b\",\n",
     "        \"api_key\": \"your Cerebras API Key goes here\",\n",
     "        \"api_type\": \"cerebras\"\n",
     "    }\n",
@@ -86,7 +86,7 @@
     "```python\n",
     "[\n",
     "    {\n",
-    "        \"model\": \"llama3.1-70b\",\n",
+    "        \"model\": \"llama-3.3-70b\",\n",
     "        \"api_key\": \"your Cerebras API Key goes here\",\n",
     "        \"api_type\": \"cerebras\"\n",
     "        \"max_tokens\": 10000,\n",
@@ -120,7 +120,7 @@
     "\n",
     "from autogen.oai.cerebras import CerebrasClient, calculate_cerebras_cost\n",
     "\n",
-    "config_list = [{\"model\": \"llama3.1-70b\", \"api_key\": os.environ.get(\"CEREBRAS_API_KEY\"), \"api_type\": \"cerebras\"}]"
+    "config_list = [{\"model\": \"llama-3.3-70b\", \"api_key\": os.environ.get(\"CEREBRAS_API_KEY\"), \"api_type\": \"cerebras\"}]"
    ]
   },
   {
@@ -270,7 +270,7 @@
     "\n",
     "config_list = [\n",
     "    {\n",
-    "        \"model\": \"llama3.1-70b\",\n",
+    "        \"model\": \"llama-3.3-70b\",\n",
     "        \"api_key\": os.environ.get(\"CEREBRAS_API_KEY\"),\n",
     "        \"api_type\": \"cerebras\",\n",
     "    }\n",