Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix Exception #21

Merged
merged 7 commits into from
Sep 13, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 1 addition & 7 deletions autogen/oai/bedrock.py
Original file line number Diff line number Diff line change
Expand Up @@ -204,13 +204,7 @@ def create(self, params):
if len(tool_config["tools"]) > 0:
request_args["toolConfig"] = tool_config

try:
response = self.bedrock_runtime.converse(
**request_args,
)
except Exception as e:
raise RuntimeError(f"Failed to get response from Bedrock: {e}")

response = self.bedrock_runtime.converse(**request_args)
if response is None:
raise RuntimeError(f"Failed to get response from Bedrock after retrying {self._retries} times.")

Expand Down
71 changes: 70 additions & 1 deletion autogen/oai/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,59 +49,106 @@
ERROR = None

try:
from google.api_core.exceptions import ( # noqa
InternalServerError as gemini_InternalServerError,
ResourceExhausted as gemini_ResourceExhausted,
)

from autogen.oai.gemini import GeminiClient

gemini_import_exception: Optional[ImportError] = None
except ImportError as e:
gemini_InternalServerError = gemini_ResourceExhausted = Exception
gemini_import_exception = e

try:
from anthropic import ( # noqa
InternalServerError as anthorpic_InternalServerError,
RateLimitError as anthorpic_RateLimitError,
)

from autogen.oai.anthropic import AnthropicClient

anthropic_import_exception: Optional[ImportError] = None
except ImportError as e:
anthorpic_InternalServerError = anthorpic_RateLimitError = Exception
anthropic_import_exception = e

try:
from mistralai.models import ( # noqa
HTTPValidationError as mistral_HTTPValidationError,
SDKError as mistral_SDKError,
)

from autogen.oai.mistral import MistralAIClient

mistral_import_exception: Optional[ImportError] = None
except ImportError as e:
mistral_SDKError = mistral_HTTPValidationError = Exception
mistral_import_exception = e

try:
from together.error import TogetherException as together_TogetherException

from autogen.oai.together import TogetherClient

together_import_exception: Optional[ImportError] = None
except ImportError as e:
together_TogetherException = Exception
together_import_exception = e

try:
from groq import ( # noqa
APIConnectionError as groq_APIConnectionError,
InternalServerError as groq_InternalServerError,
RateLimitError as groq_RateLimitError,
)

from autogen.oai.groq import GroqClient

groq_import_exception: Optional[ImportError] = None
except ImportError as e:
groq_InternalServerError = groq_RateLimitError = groq_APIConnectionError = Exception
groq_import_exception = e

try:
from cohere.errors import ( # noqa
InternalServerError as cohere_InternalServerError,
ServiceUnavailableError as cohere_ServiceUnavailableError,
TooManyRequestsError as cohere_TooManyRequestsError,
)

from autogen.oai.cohere import CohereClient

cohere_import_exception: Optional[ImportError] = None
except ImportError as e:
cohere_InternalServerError = cohere_TooManyRequestsError = cohere_ServiceUnavailableError = Exception
cohere_import_exception = e

try:
from ollama import ( # noqa
RequestError as ollama_RequestError,
ResponseError as ollama_ResponseError,
)

from autogen.oai.ollama import OllamaClient

ollama_import_exception: Optional[ImportError] = None
except ImportError as e:
ollama_RequestError = ollama_ResponseError = Exception
ollama_import_exception = e

try:
from botocore.exceptions import ( # noqa
BotoCoreError as bedrock_BotoCoreError,
ClientError as bedrock_ClientError,
)

from autogen.oai.bedrock import BedrockClient

bedrock_import_exception: Optional[ImportError] = None
except ImportError as e:
bedrock_BotoCoreError = bedrock_ClientError = Exception
bedrock_import_exception = e

logger = logging.getLogger(__name__)
Expand Down Expand Up @@ -544,7 +591,7 @@ def _register_default_client(self, config: Dict[str, Any], openai_config: Dict[s
self._clients.append(client)
elif api_type is not None and api_type.startswith("ollama"):
if ollama_import_exception:
raise ImportError("Please install `ollama` to use the Ollama API.")
raise ImportError("Please install `ollama` and `fix-busted-json` to use the Ollama API.")
client = OllamaClient(**openai_config)
self._clients.append(client)
elif api_type is not None and api_type.startswith("bedrock"):
Expand Down Expand Up @@ -791,6 +838,28 @@ def yes_or_no_filter(context, response):
logger.debug(f"config {i} failed", exc_info=True)
if i == last:
raise
except (
gemini_InternalServerError,
gemini_ResourceExhausted,
anthorpic_InternalServerError,
anthorpic_RateLimitError,
mistral_SDKError,
mistral_HTTPValidationError,
together_TogetherException,
groq_InternalServerError,
groq_RateLimitError,
groq_APIConnectionError,
cohere_InternalServerError,
cohere_TooManyRequestsError,
cohere_ServiceUnavailableError,
ollama_RequestError,
ollama_ResponseError,
bedrock_BotoCoreError,
bedrock_ClientError,
):
logger.debug(f"config {i} failed", exc_info=True)
if i == last:
raise
else:
# add cost calculation before caching no matter filter is passed or not
if price is not None:
Expand Down
127 changes: 48 additions & 79 deletions autogen/oai/cohere.py
Original file line number Diff line number Diff line change
Expand Up @@ -172,82 +172,22 @@ def create(self, params: Dict) -> ChatCompletion:

# Stream if in parameters
streaming = True if "stream" in params and params["stream"] else False
cohere_finish = ""

max_retries = 5
for attempt in range(max_retries):
ans = None
try:
if streaming:
response = client.chat_stream(**cohere_params)
else:
response = client.chat(**cohere_params)
except CohereRateLimitError as e:
raise RuntimeError(f"Cohere exception occurred: {e}")
else:

if streaming:
# Streaming...
ans = ""
for event in response:
if event.event_type == "text-generation":
ans = ans + event.text
elif event.event_type == "tool-calls-generation":
# When streaming, tool calls are compiled at the end into a single event_type
ans = event.text
cohere_finish = "tool_calls"
tool_calls = []
for tool_call in event.tool_calls:
tool_calls.append(
ChatCompletionMessageToolCall(
id=str(random.randint(0, 100000)),
function={
"name": tool_call.name,
"arguments": (
"" if tool_call.parameters is None else json.dumps(tool_call.parameters)
),
},
type="function",
)
)

# Not using billed_units, but that may be better for cost purposes
prompt_tokens = event.response.meta.tokens.input_tokens
completion_tokens = event.response.meta.tokens.output_tokens
total_tokens = prompt_tokens + completion_tokens

response_id = event.response.response_id
else:
# Non-streaming finished
ans: str = response.text

# Not using billed_units, but that may be better for cost purposes
prompt_tokens = response.meta.tokens.input_tokens
completion_tokens = response.meta.tokens.output_tokens
total_tokens = prompt_tokens + completion_tokens

response_id = response.response_id
break

if response is not None:

response_content = ans

if streaming:
# Streaming response
if cohere_finish == "":
cohere_finish = "stop"
tool_calls = None
else:
# Non-streaming response
# If we have tool calls as the response, populate completed tool calls for our return OAI response
if response.tool_calls is not None:
cohere_finish = "stop"
tool_calls = None
ans = None
if streaming:
response = client.chat_stream(**cohere_params)
# Streaming...
ans = ""
for event in response:
if event.event_type == "text-generation":
ans = ans + event.text
elif event.event_type == "tool-calls-generation":
# When streaming, tool calls are compiled at the end into a single event_type
ans = event.text
cohere_finish = "tool_calls"
tool_calls = []
for tool_call in response.tool_calls:

# if parameters are null, clear them out (Cohere can return a string "null" if no parameter values)

for tool_call in event.tool_calls:
tool_calls.append(
ChatCompletionMessageToolCall(
id=str(random.randint(0, 100000)),
Expand All @@ -260,16 +200,45 @@ def create(self, params: Dict) -> ChatCompletion:
type="function",
)
)
else:
cohere_finish = "stop"
tool_calls = None

# Not using billed_units, but that may be better for cost purposes
prompt_tokens = event.response.meta.tokens.input_tokens
completion_tokens = event.response.meta.tokens.output_tokens
total_tokens = prompt_tokens + completion_tokens
response_id = event.response.response_id
else:
raise RuntimeError(f"Failed to get response from Cohere after retrying {attempt + 1} times.")
response = client.chat(**cohere_params)
ans: str = response.text

# Not using billed_units, but that may be better for cost purposes
prompt_tokens = response.meta.tokens.input_tokens
completion_tokens = response.meta.tokens.output_tokens
total_tokens = prompt_tokens + completion_tokens

response_id = response.response_id
# If we have tool calls as the response, populate completed tool calls for our return OAI response
if response.tool_calls is not None:
cohere_finish = "tool_calls"
tool_calls = []
for tool_call in response.tool_calls:

# if parameters are null, clear them out (Cohere can return a string "null" if no parameter values)

tool_calls.append(
ChatCompletionMessageToolCall(
id=str(random.randint(0, 100000)),
function={
"name": tool_call.name,
"arguments": ("" if tool_call.parameters is None else json.dumps(tool_call.parameters)),
},
type="function",
)
)

# 3. convert output
message = ChatCompletionMessage(
role="assistant",
content=response_content,
content=ans,
function_call=None,
tool_calls=tool_calls,
)
Expand Down
26 changes: 2 additions & 24 deletions autogen/oai/gemini.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,6 @@
import requests
import vertexai
from google.ai.generativelanguage import Content, Part
from google.api_core.exceptions import InternalServerError
from google.auth.credentials import Credentials
from openai.types.chat import ChatCompletion
from openai.types.chat.chat_completion import ChatCompletionMessage, Choice
Expand Down Expand Up @@ -222,30 +221,9 @@ def create(self, params: Dict) -> ChatCompletion:
)
genai.configure(api_key=self.api_key)
chat = model.start_chat(history=gemini_messages[:-1])
max_retries = 5
for attempt in range(max_retries):
ans = None
try:
response = chat.send_message(
gemini_messages[-1].parts, stream=stream, safety_settings=safety_settings
)
except InternalServerError:
delay = 5 * (2**attempt)
warnings.warn(
f"InternalServerError `500` occurs when calling Gemini's chat model. Retry in {delay} seconds...",
UserWarning,
)
time.sleep(delay)
except Exception as e:
raise RuntimeError(f"Google GenAI exception occurred while calling Gemini API: {e}")
else:
# `ans = response.text` is unstable. Use the following code instead.
ans: str = chat.history[-1].parts[0].text
break

if ans is None:
raise RuntimeError(f"Fail to get response from Google AI after retrying {attempt + 1} times.")

response = chat.send_message(gemini_messages[-1].parts, stream=stream, safety_settings=safety_settings)
ans: str = chat.history[-1].parts[0].text
prompt_tokens = model.count_tokens(chat.history[:-1]).total_tokens
completion_tokens = model.count_tokens(ans).total_tokens
elif model_name == "gemini-pro-vision":
Expand Down
Loading
Loading