[DEMO][Swarm of MultiModalityRobots][sequential_workflow with images

kyegomez · Nov 26, 2023 · b457511 · b457511
1 parent a56b0b6
commit b457511
Show file tree

Hide file tree

Showing 11 changed files with 396 additions and 50 deletions.
diff --git a/playground/demos/swarm_of_mma_manufacturing/assembly_line.jpg b/playground/demos/swarm_of_mma_manufacturing/assembly_line.jpg
diff --git a/playground/demos/swarm_of_mma_manufacturing/flow_iter.py b/playground/demos/swarm_of_mma_manufacturing/flow_iter.py
@@ -0,0 +1,129 @@
+"""
+Swarm of multi modal autonomous agents for manufacturing!
+---------------------------------------------------------    
+Health Security agent: Agent that monitors the health of working conditions: input image of factory output: health safety index 0.0 - 1.0 being the highest
+Quality Control agent: Agent that monitors the quality of the product: input image of product output: quality index 0.0 - 1.0 being the highest
+Productivity agent: Agent that monitors the productivity of the factory: input image of factory output: productivity index 0.0 - 1.0 being the highest
+Safety agent: Agent that monitors the safety of the factory: input image of factory output: safety index 0.0 - 1.0 being the highest
+Security agent: Agent that monitors the security of the factory: input image of factory output: security index 0.0 - 1.0 being the highest
+Sustainability agent: Agent that monitors the sustainability of the factory: input image of factory output: sustainability index 0.0 - 1.0 being the highest
+Efficiency agent: Agent that monitors the efficiency of the factory: input image of factory output: efficiency index 0.0 - 1.0 being the highest    
+
+
+Flow:
+health security agent -> quality control agent -> productivity agent -> safety agent -> security agent -> sustainability agent -> efficiency agent 
+"""
+from swarms.structs import Flow, SequentialWorkflow
+import os
+from dotenv import load_dotenv
+from swarms.models import GPT4VisionAPI
+from swarms.prompts.multi_modal_autonomous_instruction_prompt import (
+    MULTI_MODAL_AUTO_AGENT_SYSTEM_PROMPT_1,
+)
+
+load_dotenv()
+api_key = os.getenv("OPENAI_API_KEY")
+
+llm = GPT4VisionAPI(
+    openai_api_key=api_key
+)
+
+assembly_line = "playground/demos/swarm_of_mma_manufacturing/assembly_line.jpg"
+red_robots = "playground/demos/swarm_of_mma_manufacturing/red_robots.jpg"
+robots = "playground/demos/swarm_of_mma_manufacturing/robots.jpg"
+tesla_assembly_line = "playground/demos/swarm_of_mma_manufacturing/tesla_assembly.jpg"
+
+
+# Define detailed prompts for each agent
+tasks = {
+    "health_safety": (
+        "Analyze the factory's working environment for health safety. Focus on"
+        " cleanliness, ventilation, spacing between workstations, and personal"
+        " protective equipment availability."
+    ),
+    "productivity": (
+        "Review the factory's workflow efficiency, machine utilization, and"
+        " employee engagement. Identify operational delays or bottlenecks."
+    ),
+    "safety": (
+        "Analyze the factory's safety measures, including fire exits, safety"
+        " signage, and emergency response equipment."
+    ),
+    "security": (
+        "Evaluate the factory's security systems, entry/exit controls, and"
+        " potential vulnerabilities."
+    ),
+    "sustainability": (
+        "Inspect the factory's sustainability practices, including waste"
+        " management, energy usage, and eco-friendly processes."
+    ),
+    "efficiency": (
+        "Assess the manufacturing process's efficiency, considering the layout,"
+        " logistics, and automation level."
+    ),
+}
+
+
+# Define prompts for each agent
+health_safety_prompt = tasks["health_safety"]
+productivity_prompt = tasks["productivity"]
+safety_prompt = tasks["safety"]
+security_prompt = tasks["security"]
+sustainability_prompt = tasks["sustainability"]
+efficiency_prompt = tasks["efficiency"]
+
+
+# Health security agent
+health_security_agent = Flow(
+    llm=llm,
+    sop_list=health_safety_prompt,
+    max_loops=2,
+    multi_modal=True
+)
+
+# Quality control agent
+productivity_check_agent = Flow(
+    llm=llm,
+    sop=productivity_prompt,
+    max_loops=2,
+    multi_modal=True
+)
+
+# Security agent
+security_check_agent = Flow(
+    llm=llm,
+    sop=security_prompt,
+    max_loops=2,
+    multi_modal=True
+)
+
+# Efficiency agent
+efficiency_check_agent = Flow(
+    llm=llm,
+    sop=efficiency_prompt,
+    max_loops=2,
+    multi_modal=True
+)
+
+
+# Add the first task to the health_security_agent
+health_check = health_security_agent.run(
+    "Analyze the safety of this factory",
+    robots
+)
+
+# Add the third task to the productivity_check_agent
+productivity_check = productivity_check_agent.run(
+    health_check, assembly_line
+)
+
+# Add the fourth task to the security_check_agent
+security_check = security_check_agent.add(
+    productivity_check, red_robots
+)
+
+# Add the fifth task to the efficiency_check_agent
+efficiency_check = efficiency_check_agent.run(
+    security_check, tesla_assembly_line
+)
+
diff --git a/playground/demos/swarm_of_mma_manufacturing/main.py b/playground/demos/swarm_of_mma_manufacturing/main.py
@@ -13,3 +13,125 @@
 Flow:
 health security agent -> quality control agent -> productivity agent -> safety agent -> security agent -> sustainability agent -> efficiency agent 
 """
+from swarms.structs import Flow, SequentialWorkflow
+from swarms.models import GPT4VisionAPI
+from swarms.prompts.multi_modal_autonomous_instruction_prompt import (
+    MULTI_MODAL_AUTO_AGENT_SYSTEM_PROMPT_1,
+)
+
+
+llm = GPT4VisionAPI()
+
+assembly_line = "assembly_line.jpg"
+red_robots = "red_robots.jpg"
+robots = "robots.jpg"
+tesla_assembly_line = "tesla_assembly.jpg"
+
+
+# Define detailed prompts for each agent
+tasks = {
+    "health_safety": (
+        "Analyze the factory's working environment for health safety. Focus on"
+        " cleanliness, ventilation, spacing between workstations, and personal"
+        " protective equipment availability."
+    ),
+    "productivity": (
+        "Review the factory's workflow efficiency, machine utilization, and"
+        " employee engagement. Identify operational delays or bottlenecks."
+    ),
+    "safety": (
+        "Analyze the factory's safety measures, including fire exits, safety"
+        " signage, and emergency response equipment."
+    ),
+    "security": (
+        "Evaluate the factory's security systems, entry/exit controls, and"
+        " potential vulnerabilities."
+    ),
+    "sustainability": (
+        "Inspect the factory's sustainability practices, including waste"
+        " management, energy usage, and eco-friendly processes."
+    ),
+    "efficiency": (
+        "Assess the manufacturing process's efficiency, considering the layout,"
+        " logistics, and automation level."
+    ),
+}
+
+
+# Define prompts for each agent
+health_safety_prompt = tasks["health_safety"]
+productivity_prompt = tasks["productivity"]
+safety_prompt = tasks["safety"]
+security_prompt = tasks["security"]
+sustainability_prompt = tasks["sustainability"]
+efficiency_prompt = tasks["efficiency"]
+
+
+# Health security agent
+health_security_agent = Flow(
+    llm=llm,
+    sop=MULTI_MODAL_AUTO_AGENT_SYSTEM_PROMPT_1 + health_safety_prompt,
+    max_loops=2,
+)
+
+# Quality control agent
+quality_control_agent = Flow(
+    llm=llm,
+    sop=MULTI_MODAL_AUTO_AGENT_SYSTEM_PROMPT_1,
+    max_loops=2,
+)
+
+# Quality control agent
+productivity_check_agent = Flow(
+    llm=llm,
+    sop=MULTI_MODAL_AUTO_AGENT_SYSTEM_PROMPT_1 + productivity_prompt,
+    max_loops=2,
+)
+
+# Security agent
+security_check_agent = Flow(
+    llm=llm,
+    sop=MULTI_MODAL_AUTO_AGENT_SYSTEM_PROMPT_1 + security_prompt,
+    max_loops=2,
+)
+
+# Efficiency agent
+efficiency_check_agent = Flow(
+    llm=llm,
+    sop=MULTI_MODAL_AUTO_AGENT_SYSTEM_PROMPT_1 + efficiency_prompt,
+    max_loops=2,
+)
+
+
+# Sequential workflow
+workflow = SequentialWorkflow(
+    max_loops=4,
+    name="Swarm of multi modal autonomous agents for manufacturing!",
+    description="Swarm of multi modal autonomous agents for manufacturing!",
+)
+
+# Add the first task to the health_security_agent
+health_check = workflow.add(
+    health_security_agent,
+    "Analyze the safety of this factory",
+    robots
+)
+
+# Add the third task to the productivity_check_agent
+productivity_check = workflow.add(
+    productivity_check_agent, health_check, assembly_line
+)
+
+# Add the fourth task to the security_check_agent
+security_check = workflow.add(
+    security_check_agent, productivity_check, red_robots
+)
+
+# Add the fifth task to the efficiency_check_agent
+efficiency_check = workflow.add(
+    efficiency_check_agent, security_check, tesla_assembly_line
+)
+
+
+# Run the workflow
+workflow.run()
diff --git a/playground/demos/swarm_of_mma_manufacturing/red_robots.jpg b/playground/demos/swarm_of_mma_manufacturing/red_robots.jpg
diff --git a/playground/demos/swarm_of_mma_manufacturing/robots.jpg b/playground/demos/swarm_of_mma_manufacturing/robots.jpg
diff --git a/playground/demos/swarm_of_mma_manufacturing/tesla_assembly.jpg b/playground/demos/swarm_of_mma_manufacturing/tesla_assembly.jpg
diff --git a/pyproject.toml b/pyproject.toml
@@ -4,7 +4,7 @@ build-backend = "poetry.core.masonry.api"
 
 [tool.poetry]
 name = "swarms"
-version = "2.4.3"
+version = "2.4.5"
 description = "Swarms - Pytorch"
 license = "MIT"
 authors = ["Kye Gomez <[email protected]>"]

diff --git a/swarms/models/base_multimodal_model.py b/swarms/models/base_multimodal_model.py
@@ -13,6 +13,49 @@
 
 
 class BaseMultiModalModel:
+    """
+    Base class for multimodal models
+    
+    
+    Args:
+        model_name (Optional[str], optional): Model name. Defaults to None.
+        temperature (Optional[int], optional): Temperature. Defaults to 0.5.
+        max_tokens (Optional[int], optional): Max tokens. Defaults to 500.
+        max_workers (Optional[int], optional): Max workers. Defaults to 10.
+        top_p (Optional[int], optional): Top p. Defaults to 1.
+        top_k (Optional[int], optional): Top k. Defaults to 50.
+        beautify (Optional[bool], optional): Beautify. Defaults to False.
+        device (Optional[str], optional): Device. Defaults to "cuda".
+        max_new_tokens (Optional[int], optional): Max new tokens. Defaults to 500.
+        retries (Optional[int], optional): Retries. Defaults to 3.
+        
+    Examples:
+        >>> from swarms.models.base_multimodal_model import BaseMultiModalModel
+        >>> model = BaseMultiModalModel()
+        >>> model.run("Generate a summary of this text")
+        >>> model.run("Generate a summary of this text", "https://www.google.com/images/branding/googlelogo/2x/googlelogo_color_272x92dp.png")
+        >>> model.run_batch(["Generate a summary of this text", "Generate a summary of this text"])
+        >>> model.run_batch([("Generate a summary of this text", "https://www.google.com/images/branding/googlelogo/2x/googlelogo_color_272x92dp.png"), ("Generate a summary of this text", "https://www.google.com/images/branding/googlelogo/2x/googlelogo_color_272x92dp.png")])
+        >>> model.run_batch_async(["Generate a summary of this text", "Generate a summary of this text"])
+        >>> model.run_batch_async([("Generate a summary of this text", "https://www.google.com/images/branding/googlelogo/2x/googlelogo_color_272x92dp.png"), ("Generate a summary of this text", "https://www.google.com/images/branding/googlelogo/2x/googlelogo_color_272x92dp.png")])
+        >>> model.run_batch_async_with_retries(["Generate a summary of this text", "Generate a summary of this text"])
+        >>> model.run_batch_async_with_retries([("Generate a summary of this text", "https://www.google.com/images/branding/googlelogo/2x/googlelogo_color_272x92dp.png"), ("Generate a summary of this text", "https://www.google.com/images/branding/googlelogo/2x/googlelogo_color_272x92dp.png")])
+        >>> model.generate_summary("Generate a summary of this text")
+        >>> model.set_temperature(0.5)
+        >>> model.set_max_tokens(500)
+        >>> model.get_generation_time()
+        >>> model.get_chat_history()
+        >>> model.get_unique_chat_history()
+        >>> model.get_chat_history_length()
+        >>> model.get_unique_chat_history_length()
+        >>> model.get_chat_history_tokens()
+        >>> model.print_beautiful("Print this beautifully")
+        >>> model.stream("Stream this")
+        >>> model.unique_chat_history()
+        >>> model.clear_chat_history()
+        >>> model.get_img_from_web("https://www.google.com/images/branding/googlelogo/")
+    
+    """
     def __init__(
         self,
         model_name: Optional[str],

diff --git a/swarms/models/gpt4_vision_api.py b/swarms/models/gpt4_vision_api.py
@@ -1,18 +1,16 @@
-import logging
 import asyncio
 import base64
-from typing import Optional
 import concurrent.futures
-from termcolor import colored
 import json
+import logging
 import os
 from concurrent.futures import ThreadPoolExecutor
-from typing import List, Tuple
+from typing import List, Optional, Tuple
 
 import aiohttp
 import requests
 from dotenv import load_dotenv
-
+from termcolor import colored
 
 try:
     import cv2
@@ -94,9 +92,10 @@ def encode_image(self, img: str):
 
     def download_img_then_encode(self, img: str):
         """Download image from URL then encode image to base64 using requests"""
+        pass
 
     # Function to handle vision tasks
-    def run(self, task: str, img: str):
+    def run(self, task: Optional[str] = None, img: Optional[str] = None, *args, **kwargs):
         """Run the model."""
         try:
             base64_image = self.encode_image(img)
@@ -131,6 +130,7 @@ def run(self, task: str, img: str):
             )
 
             out = response.json()
+            content = print(out)
             content = out["choices"][0]["message"]["content"]
 
             if self.streaming_enabled:
@@ -263,6 +263,7 @@ def __call__(self, task: str, img: str):
             )
 
             out = response.json()
+            content = print(out)
             content = out["choices"][0]["message"]["content"]
 
             if self.streaming_enabled:
@@ -287,6 +288,14 @@ def run_many(
     ):
         """
         Run the model on multiple tasks and images all at once using concurrent
+        
+        Args:
+            tasks (List[str]): List of tasks
+            imgs (List[str]): List of image paths
+            
+        Returns:
+            List[str]: List of responses
+        
 
         """
         # Instantiate the thread pool executor
@@ -301,8 +310,8 @@ def run_many(
 
     async def arun(
         self,
-        task: str,
-        img: str,
+        task: Optional[str] = None,
+        img: Optional[str] = None,
     ):
         """
         Asynchronously run the model