Merge branch 'main' into cookbook

autogenhub · Sep 5, 2024 · 4e5768d · 4e5768d
2 parents 8bb7645 + c996b65
commit 4e5768d
Show file tree

Hide file tree

Showing 21 changed files with 2,173 additions and 228 deletions.
diff --git a/.github/workflows/contrib-openai.yml b/.github/workflows/contrib-openai.yml
diff --git a/.github/workflows/contrib-tests.yml b/.github/workflows/contrib-tests.yml
@@ -613,6 +613,46 @@ jobs:
           file: ./coverage.xml
           flags: unittests
 
+  OllamaTest:
+    runs-on: ${{ matrix.os }}
+    strategy:
+      fail-fast: false
+      matrix:
+        os: [ubuntu-latest, macos-latest, windows-2019]
+        python-version: ["3.9", "3.10", "3.11", "3.12"]
+        exclude:
+          - os: macos-latest
+            python-version: "3.9"
+    steps:
+      - uses: actions/checkout@v4
+        with:
+          lfs: true
+      - name: Set up Python ${{ matrix.python-version }}
+        uses: actions/setup-python@v5
+        with:
+          python-version: ${{ matrix.python-version }}
+      - name: Install packages and dependencies for all tests
+        run: |
+          python -m pip install --upgrade pip wheel
+          pip install pytest-cov>=5
+      - name: Install packages and dependencies for Ollama
+        run: |
+          pip install -e .[ollama,test]
+      - name: Set AUTOGEN_USE_DOCKER based on OS
+        shell: bash
+        run: |
+          if [[ ${{ matrix.os }} != ubuntu-latest ]]; then
+            echo "AUTOGEN_USE_DOCKER=False" >> $GITHUB_ENV
+          fi
+      - name: Coverage
+        run: |
+          pytest test/oai/test_ollama.py --skip-openai
+      - name: Upload coverage to Codecov
+        uses: codecov/codecov-action@v3
+        with:
+          file: ./coverage.xml
+          flags: unittests
+
   BedrockTest:
     runs-on: ${{ matrix.os }}
     strategy:

diff --git a/.github/workflows/python-package.yml b/.github/workflows/python-package.yml
@@ -43,14 +43,24 @@ jobs:
       # - name: Conda list
       #   shell: pwsh
       #   run: conda list
-      - name: Build
+      - name: Build autogen
         shell: pwsh
         run: |
           pip install twine
-          python setup.py sdist bdist_wheel
-      - name: Publish to PyPI
+          python setup.py sdist bdist_wheel --name "autogen"
+      - name: Publish autogen to PyPI
         env:
-          TWINE_USERNAME: ${{ secrets.PYPI_USERNAME }}
-          TWINE_PASSWORD: ${{ secrets.PYPI_PASSWORD }}
+          TWINE_USERNAME: ${{ secrets.AUTOGEN_PYPI_USERNAME }}
+          TWINE_PASSWORD: ${{ secrets.AUTOGEN_PYPI_PASSWORD }}
         shell: pwsh
-        run: twine upload dist/*
+        run: twine upload dist/autogen*
+      - name: Build pyautogen
+        shell: pwsh
+        run: |
+          python setup.py sdist bdist_wheel --name "pyautogen"
+      - name: Publish pyautogen to PyPI
+        env:
+          TWINE_USERNAME: ${{ secrets.PYAUTOGEN_PYPI_USERNAME }}
+          TWINE_PASSWORD: ${{ secrets.PYAUTOGEN_PYPI_PASSWORD }}
+        shell: pwsh
+        run: twine upload dist/*pyautogen*
diff --git a/autogen/agentchat/contrib/capabilities/transforms.py b/autogen/agentchat/contrib/capabilities/transforms.py
@@ -451,3 +451,111 @@ def _compress_text(self, text: str) -> Tuple[str, int]:
     def _validate_min_tokens(self, min_tokens: Optional[int]):
         if min_tokens is not None and min_tokens <= 0:
             raise ValueError("min_tokens must be greater than 0 or None")
+
+
+class TextMessageContentName:
+    """A transform for including the agent's name in the content of a message.
+
+    How to create and apply the transform:
+    # Imports
+    from autogen.agentchat.contrib.capabilities import transform_messages, transforms
+
+    # Create Transform
+    name_transform = transforms.TextMessageContentName(position="start", format_string="'{name}' said:\n")
+
+    # Create the TransformMessages
+    context_handling = transform_messages.TransformMessages(
+                transforms=[
+                    name_transform
+                ]
+            )
+
+    # Add it to an agent so when they run inference it will apply to the messages
+    context_handling.add_to_agent(my_agent)
+    """
+
+    def __init__(
+        self,
+        position: str = "start",
+        format_string: str = "{name}:\n",
+        deduplicate: bool = True,
+        filter_dict: Optional[Dict] = None,
+        exclude_filter: bool = True,
+    ):
+        """
+        Args:
+            position (str): The position to add the name to the content. The possible options are 'start' or 'end'. Defaults to 'start'.
+            format_string (str): The f-string to format the message name with. Use '{name}' as a placeholder for the agent's name. Defaults to '{name}:\n' and must contain '{name}'.
+            deduplicate (bool): Whether to deduplicate the formatted string so it doesn't appear twice (sometimes the LLM will add it to new messages itself). Defaults to True.
+            filter_dict (None or dict): A dictionary to filter out messages that you want/don't want to compress.
+                If None, no filters will be applied.
+            exclude_filter (bool): If exclude filter is True (the default value), messages that match the filter will be
+                excluded from compression. If False, messages that match the filter will be compressed.
+        """
+
+        assert isinstance(position, str) and position in ["start", "end"]
+        assert isinstance(format_string, str) and "{name}" in format_string
+        assert isinstance(deduplicate, bool) and deduplicate is not None
+
+        self._position = position
+        self._format_string = format_string
+        self._deduplicate = deduplicate
+        self._filter_dict = filter_dict
+        self._exclude_filter = exclude_filter
+
+        # Track the number of messages changed for logging
+        self._messages_changed = 0
+
+    def apply_transform(self, messages: List[Dict]) -> List[Dict]:
+        """Applies the name change to the message based on the position and format string.
+
+        Args:
+            messages (List[Dict]): A list of message dictionaries.
+
+        Returns:
+            List[Dict]: A list of dictionaries with the message content updated with names.
+        """
+        # Make sure there is at least one message
+        if not messages:
+            return messages
+
+        messages_changed = 0
+        processed_messages = copy.deepcopy(messages)
+        for message in processed_messages:
+            # Some messages may not have content.
+            if not transforms_util.is_content_right_type(
+                message.get("content")
+            ) or not transforms_util.is_content_right_type(message.get("name")):
+                continue
+
+            if not transforms_util.should_transform_message(message, self._filter_dict, self._exclude_filter):
+                continue
+
+            if transforms_util.is_content_text_empty(message["content"]) or transforms_util.is_content_text_empty(
+                message["name"]
+            ):
+                continue
+
+            # Get and format the name in the content
+            content = message["content"]
+            formatted_name = self._format_string.format(name=message["name"])
+
+            if self._position == "start":
+                if not self._deduplicate or not content.startswith(formatted_name):
+                    message["content"] = f"{formatted_name}{content}"
+
+                    messages_changed += 1
+            else:
+                if not self._deduplicate or not content.endswith(formatted_name):
+                    message["content"] = f"{content}{formatted_name}"
+
+                    messages_changed += 1
+
+        self._messages_changed = messages_changed
+        return processed_messages
+
+    def get_logs(self, pre_transform_messages: List[Dict], post_transform_messages: List[Dict]) -> Tuple[str, bool]:
+        if self._messages_changed > 0:
+            return f"{self._messages_changed} message(s) changed to incorporate name.", True
+        else:
+            return "No messages changed to incorporate name.", False
diff --git a/autogen/agentchat/contrib/graph_rag/__init__.py b/autogen/agentchat/contrib/graph_rag/__init__.py
diff --git a/autogen/agentchat/contrib/graph_rag/document.py b/autogen/agentchat/contrib/graph_rag/document.py
@@ -0,0 +1,24 @@
+from dataclasses import dataclass
+from enum import Enum, auto
+from typing import Optional
+
+
+class DocumentType(Enum):
+    """
+    Enum for supporting document type.
+    """
+
+    TEXT = auto()
+    HTML = auto()
+    PDF = auto()
+
+
+@dataclass
+class Document:
+    """
+    A wrapper of graph store query results.
+    """
+
+    doctype: DocumentType
+    data: Optional[object] = None
+    path_or_url: Optional[str] = ""
diff --git a/autogen/agentchat/contrib/graph_rag/graph_query_engine.py b/autogen/agentchat/contrib/graph_rag/graph_query_engine.py
@@ -0,0 +1,51 @@
+from dataclasses import dataclass, field
+from typing import List, Optional, Protocol
+
+from .document import Document
+
+
+@dataclass
+class GraphStoreQueryResult:
+    """
+    A wrapper of graph store query results.
+
+    answer: human readable answer to question/query.
+    results: intermediate results to question/query, e.g. node entities.
+    """
+
+    answer: Optional[str] = None
+    results: list = field(default_factory=list)
+
+
+class GraphQueryEngine(Protocol):
+    """An abstract base class that represents a graph query engine on top of a underlying graph database.
+
+    This interface defines the basic methods for graph rag.
+    """
+
+    def init_db(self, input_doc: List[Document] | None = None):
+        """
+        This method initializes graph database with the input documents or records.
+        Usually, it takes the following steps,
+        1. connecting to a graph database.
+        2. extract graph nodes, edges based on input data, graph schema and etc.
+        3. build indexes etc.
+
+        Args:
+        input_doc: a list of input documents that are used to build the graph in database.
+
+        Returns: GraphStore
+        """
+        pass
+
+    def add_records(self, new_records: List) -> bool:
+        """
+        Add new records to the underlying database and add to the graph if required.
+        """
+        pass
+
+    def query(self, question: str, n_results: int = 1, **kwargs) -> GraphStoreQueryResult:
+        """
+        This method transform a string format question into database query and return the result.
+        """
+        pass
diff --git a/autogen/agentchat/contrib/graph_rag/graph_rag_capability.py b/autogen/agentchat/contrib/graph_rag/graph_rag_capability.py
@@ -0,0 +1,56 @@
+from autogen.agentchat.contrib.capabilities.agent_capability import AgentCapability
+from autogen.agentchat.conversable_agent import ConversableAgent
+
+from .graph_query_engine import GraphQueryEngine
+
+
+class GraphRagCapability(AgentCapability):
+    """
+    A graph rag capability uses a graph query engine to give a conversable agent the graph rag ability.
+
+    An agent class with graph rag capability could
+    1. create a graph in the underlying database with input documents.
+    2. retrieved relevant information based on messages received by the agent.
+    3. generate answers from retrieved information and send messages back.
+
+    For example,
+    graph_query_engine = GraphQueryEngine(...)
+    graph_query_engine.init_db([Document(doc1), Document(doc2), ...])
+
+    graph_rag_agent = ConversableAgent(
+        name="graph_rag_agent",
+        max_consecutive_auto_reply=3,
+        ...
+    )
+    graph_rag_capability = GraphRagCapbility(graph_query_engine)
+    graph_rag_capability.add_to_agent(graph_rag_agent)
+
+    user_proxy = UserProxyAgent(
+        name="user_proxy",
+        code_execution_config=False,
+        is_termination_msg=lambda msg: "TERMINATE" in msg["content"],
+        human_input_mode="ALWAYS",
+    )
+    user_proxy.initiate_chat(graph_rag_agent, message="Name a few actors who've played in 'The Matrix'")
+
+    # ChatResult(
+        # chat_id=None,
+        # chat_history=[
+            # {'content': 'Name a few actors who've played in \'The Matrix\'', 'role': 'graph_rag_agent'},
+            # {'content': 'A few actors who have played in The Matrix are:
+            #   - Keanu Reeves
+            #   - Laurence Fishburne
+            #   - Carrie-Anne Moss
+            #   - Hugo Weaving',
+            #   'role': 'user_proxy'},
+        # ...)
+
+    """
+
+    def __init__(self, query_engine: GraphQueryEngine):
+        """
+        initialize graph rag capability with a graph query engine
+        """
+        ...
+
+    def add_to_agent(self, agent: ConversableAgent): ...
diff --git a/autogen/logger/file_logger.py b/autogen/logger/file_logger.py
@@ -29,6 +29,7 @@
     from autogen.oai.gemini import GeminiClient
     from autogen.oai.groq import GroqClient
     from autogen.oai.mistral import MistralAIClient
+    from autogen.oai.ollama import OllamaClient
     from autogen.oai.together import TogetherClient
 
 logger = logging.getLogger(__name__)
@@ -222,6 +223,7 @@ def log_new_client(
             | TogetherClient
             | GroqClient
             | CohereClient
+            | OllamaClient
             | BedrockClient
         ),
         wrapper: OpenAIWrapper,

diff --git a/autogen/logger/sqlite_logger.py b/autogen/logger/sqlite_logger.py
@@ -30,6 +30,7 @@
     from autogen.oai.gemini import GeminiClient
     from autogen.oai.groq import GroqClient
     from autogen.oai.mistral import MistralAIClient
+    from autogen.oai.ollama import OllamaClient
     from autogen.oai.together import TogetherClient
 
 logger = logging.getLogger(__name__)
@@ -409,6 +410,7 @@ def log_new_client(
             TogetherClient,
             GroqClient,
             CohereClient,
+            OllamaClient,
             BedrockClient,
         ],
         wrapper: OpenAIWrapper,

diff --git a/autogen/oai/anthropic.py b/autogen/oai/anthropic.py
@@ -320,7 +320,7 @@ def oai_messages_to_anthropic_messages(params: Dict[str, Any]) -> list[dict[str,
     last_tool_result_index = -1
     for message in params["messages"]:
         if message["role"] == "system":
-            params["system"] = message["content"]
+            params["system"] = params.get("system", "") + ("\n" if "system" in params else "") + message["content"]
         else:
             # New messages will be added here, manage role alternations
             expected_role = "user" if len(processed_messages) % 2 == 0 else "assistant"

diff --git a/autogen/oai/client.py b/autogen/oai/client.py
@@ -90,6 +90,13 @@
 except ImportError as e:
     cohere_import_exception = e
 
+try:
+    from autogen.oai.ollama import OllamaClient
+
+    ollama_import_exception: Optional[ImportError] = None
+except ImportError as e:
+    ollama_import_exception = e
+
 try:
     from autogen.oai.bedrock import BedrockClient
 
@@ -535,6 +542,11 @@ def _register_default_client(self, config: Dict[str, Any], openai_config: Dict[s
                     raise ImportError("Please install `cohere` to use the Cohere API.")
                 client = CohereClient(**openai_config)
                 self._clients.append(client)
+            elif api_type is not None and api_type.startswith("ollama"):
+                if ollama_import_exception:
+                    raise ImportError("Please install `ollama` to use the Ollama API.")
+                client = OllamaClient(**openai_config)
+                self._clients.append(client)
             elif api_type is not None and api_type.startswith("bedrock"):
                 self._configure_openai_config_for_bedrock(config, openai_config)
                 if bedrock_import_exception: