Skip to content

Commit

Permalink
[DEMO][Swarm of MultiModalityRobots][sequential_workflow with images
Browse files Browse the repository at this point in the history
  • Loading branch information
Kye committed Nov 26, 2023
1 parent a56b0b6 commit b457511
Show file tree
Hide file tree
Showing 11 changed files with 396 additions and 50 deletions.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
129 changes: 129 additions & 0 deletions playground/demos/swarm_of_mma_manufacturing/flow_iter.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,129 @@
"""
Swarm of multi modal autonomous agents for manufacturing!
---------------------------------------------------------
Health Security agent: Agent that monitors the health of working conditions: input image of factory output: health safety index 0.0 - 1.0 being the highest
Quality Control agent: Agent that monitors the quality of the product: input image of product output: quality index 0.0 - 1.0 being the highest
Productivity agent: Agent that monitors the productivity of the factory: input image of factory output: productivity index 0.0 - 1.0 being the highest
Safety agent: Agent that monitors the safety of the factory: input image of factory output: safety index 0.0 - 1.0 being the highest
Security agent: Agent that monitors the security of the factory: input image of factory output: security index 0.0 - 1.0 being the highest
Sustainability agent: Agent that monitors the sustainability of the factory: input image of factory output: sustainability index 0.0 - 1.0 being the highest
Efficiency agent: Agent that monitors the efficiency of the factory: input image of factory output: efficiency index 0.0 - 1.0 being the highest
Flow:
health security agent -> quality control agent -> productivity agent -> safety agent -> security agent -> sustainability agent -> efficiency agent
"""
from swarms.structs import Flow, SequentialWorkflow
import os
from dotenv import load_dotenv
from swarms.models import GPT4VisionAPI
from swarms.prompts.multi_modal_autonomous_instruction_prompt import (
MULTI_MODAL_AUTO_AGENT_SYSTEM_PROMPT_1,
)

load_dotenv()
api_key = os.getenv("OPENAI_API_KEY")

llm = GPT4VisionAPI(
openai_api_key=api_key
)

assembly_line = "playground/demos/swarm_of_mma_manufacturing/assembly_line.jpg"
red_robots = "playground/demos/swarm_of_mma_manufacturing/red_robots.jpg"
robots = "playground/demos/swarm_of_mma_manufacturing/robots.jpg"
tesla_assembly_line = "playground/demos/swarm_of_mma_manufacturing/tesla_assembly.jpg"


# Define detailed prompts for each agent
tasks = {
"health_safety": (
"Analyze the factory's working environment for health safety. Focus on"
" cleanliness, ventilation, spacing between workstations, and personal"
" protective equipment availability."
),
"productivity": (
"Review the factory's workflow efficiency, machine utilization, and"
" employee engagement. Identify operational delays or bottlenecks."
),
"safety": (
"Analyze the factory's safety measures, including fire exits, safety"
" signage, and emergency response equipment."
),
"security": (
"Evaluate the factory's security systems, entry/exit controls, and"
" potential vulnerabilities."
),
"sustainability": (
"Inspect the factory's sustainability practices, including waste"
" management, energy usage, and eco-friendly processes."
),
"efficiency": (
"Assess the manufacturing process's efficiency, considering the layout,"
" logistics, and automation level."
),
}


# Define prompts for each agent
health_safety_prompt = tasks["health_safety"]
productivity_prompt = tasks["productivity"]
safety_prompt = tasks["safety"]
security_prompt = tasks["security"]
sustainability_prompt = tasks["sustainability"]
efficiency_prompt = tasks["efficiency"]


# Health security agent
health_security_agent = Flow(
llm=llm,
sop_list=health_safety_prompt,
max_loops=2,
multi_modal=True
)

# Quality control agent
productivity_check_agent = Flow(
llm=llm,
sop=productivity_prompt,
max_loops=2,
multi_modal=True
)

# Security agent
security_check_agent = Flow(
llm=llm,
sop=security_prompt,
max_loops=2,
multi_modal=True
)

# Efficiency agent
efficiency_check_agent = Flow(
llm=llm,
sop=efficiency_prompt,
max_loops=2,
multi_modal=True
)


# Add the first task to the health_security_agent
health_check = health_security_agent.run(
"Analyze the safety of this factory",
robots
)

# Add the third task to the productivity_check_agent
productivity_check = productivity_check_agent.run(
health_check, assembly_line
)

# Add the fourth task to the security_check_agent
security_check = security_check_agent.add(
productivity_check, red_robots
)

# Add the fifth task to the efficiency_check_agent
efficiency_check = efficiency_check_agent.run(
security_check, tesla_assembly_line
)

122 changes: 122 additions & 0 deletions playground/demos/swarm_of_mma_manufacturing/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,3 +13,125 @@
Flow:
health security agent -> quality control agent -> productivity agent -> safety agent -> security agent -> sustainability agent -> efficiency agent
"""
from swarms.structs import Flow, SequentialWorkflow
from swarms.models import GPT4VisionAPI
from swarms.prompts.multi_modal_autonomous_instruction_prompt import (
MULTI_MODAL_AUTO_AGENT_SYSTEM_PROMPT_1,
)


llm = GPT4VisionAPI()

assembly_line = "assembly_line.jpg"
red_robots = "red_robots.jpg"
robots = "robots.jpg"
tesla_assembly_line = "tesla_assembly.jpg"


# Define detailed prompts for each agent
tasks = {
"health_safety": (
"Analyze the factory's working environment for health safety. Focus on"
" cleanliness, ventilation, spacing between workstations, and personal"
" protective equipment availability."
),
"productivity": (
"Review the factory's workflow efficiency, machine utilization, and"
" employee engagement. Identify operational delays or bottlenecks."
),
"safety": (
"Analyze the factory's safety measures, including fire exits, safety"
" signage, and emergency response equipment."
),
"security": (
"Evaluate the factory's security systems, entry/exit controls, and"
" potential vulnerabilities."
),
"sustainability": (
"Inspect the factory's sustainability practices, including waste"
" management, energy usage, and eco-friendly processes."
),
"efficiency": (
"Assess the manufacturing process's efficiency, considering the layout,"
" logistics, and automation level."
),
}


# Define prompts for each agent
health_safety_prompt = tasks["health_safety"]
productivity_prompt = tasks["productivity"]
safety_prompt = tasks["safety"]
security_prompt = tasks["security"]
sustainability_prompt = tasks["sustainability"]
efficiency_prompt = tasks["efficiency"]


# Health security agent
health_security_agent = Flow(
llm=llm,
sop=MULTI_MODAL_AUTO_AGENT_SYSTEM_PROMPT_1 + health_safety_prompt,
max_loops=2,
)

# Quality control agent
quality_control_agent = Flow(
llm=llm,
sop=MULTI_MODAL_AUTO_AGENT_SYSTEM_PROMPT_1,
max_loops=2,
)

# Quality control agent
productivity_check_agent = Flow(
llm=llm,
sop=MULTI_MODAL_AUTO_AGENT_SYSTEM_PROMPT_1 + productivity_prompt,
max_loops=2,
)

# Security agent
security_check_agent = Flow(
llm=llm,
sop=MULTI_MODAL_AUTO_AGENT_SYSTEM_PROMPT_1 + security_prompt,
max_loops=2,
)

# Efficiency agent
efficiency_check_agent = Flow(
llm=llm,
sop=MULTI_MODAL_AUTO_AGENT_SYSTEM_PROMPT_1 + efficiency_prompt,
max_loops=2,
)


# Sequential workflow
workflow = SequentialWorkflow(
max_loops=4,
name="Swarm of multi modal autonomous agents for manufacturing!",
description="Swarm of multi modal autonomous agents for manufacturing!",
)

# Add the first task to the health_security_agent
health_check = workflow.add(
health_security_agent,
"Analyze the safety of this factory",
robots
)

# Add the third task to the productivity_check_agent
productivity_check = workflow.add(
productivity_check_agent, health_check, assembly_line
)

# Add the fourth task to the security_check_agent
security_check = workflow.add(
security_check_agent, productivity_check, red_robots
)

# Add the fifth task to the efficiency_check_agent
efficiency_check = workflow.add(
efficiency_check_agent, security_check, tesla_assembly_line
)


# Run the workflow
workflow.run()
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ build-backend = "poetry.core.masonry.api"

[tool.poetry]
name = "swarms"
version = "2.4.3"
version = "2.4.5"
description = "Swarms - Pytorch"
license = "MIT"
authors = ["Kye Gomez <[email protected]>"]
Expand Down
43 changes: 43 additions & 0 deletions swarms/models/base_multimodal_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,49 @@


class BaseMultiModalModel:
"""
Base class for multimodal models
Args:
model_name (Optional[str], optional): Model name. Defaults to None.
temperature (Optional[int], optional): Temperature. Defaults to 0.5.
max_tokens (Optional[int], optional): Max tokens. Defaults to 500.
max_workers (Optional[int], optional): Max workers. Defaults to 10.
top_p (Optional[int], optional): Top p. Defaults to 1.
top_k (Optional[int], optional): Top k. Defaults to 50.
beautify (Optional[bool], optional): Beautify. Defaults to False.
device (Optional[str], optional): Device. Defaults to "cuda".
max_new_tokens (Optional[int], optional): Max new tokens. Defaults to 500.
retries (Optional[int], optional): Retries. Defaults to 3.
Examples:
>>> from swarms.models.base_multimodal_model import BaseMultiModalModel
>>> model = BaseMultiModalModel()
>>> model.run("Generate a summary of this text")
>>> model.run("Generate a summary of this text", "https://www.google.com/images/branding/googlelogo/2x/googlelogo_color_272x92dp.png")
>>> model.run_batch(["Generate a summary of this text", "Generate a summary of this text"])
>>> model.run_batch([("Generate a summary of this text", "https://www.google.com/images/branding/googlelogo/2x/googlelogo_color_272x92dp.png"), ("Generate a summary of this text", "https://www.google.com/images/branding/googlelogo/2x/googlelogo_color_272x92dp.png")])
>>> model.run_batch_async(["Generate a summary of this text", "Generate a summary of this text"])
>>> model.run_batch_async([("Generate a summary of this text", "https://www.google.com/images/branding/googlelogo/2x/googlelogo_color_272x92dp.png"), ("Generate a summary of this text", "https://www.google.com/images/branding/googlelogo/2x/googlelogo_color_272x92dp.png")])
>>> model.run_batch_async_with_retries(["Generate a summary of this text", "Generate a summary of this text"])
>>> model.run_batch_async_with_retries([("Generate a summary of this text", "https://www.google.com/images/branding/googlelogo/2x/googlelogo_color_272x92dp.png"), ("Generate a summary of this text", "https://www.google.com/images/branding/googlelogo/2x/googlelogo_color_272x92dp.png")])
>>> model.generate_summary("Generate a summary of this text")
>>> model.set_temperature(0.5)
>>> model.set_max_tokens(500)
>>> model.get_generation_time()
>>> model.get_chat_history()
>>> model.get_unique_chat_history()
>>> model.get_chat_history_length()
>>> model.get_unique_chat_history_length()
>>> model.get_chat_history_tokens()
>>> model.print_beautiful("Print this beautifully")
>>> model.stream("Stream this")
>>> model.unique_chat_history()
>>> model.clear_chat_history()
>>> model.get_img_from_web("https://www.google.com/images/branding/googlelogo/")
"""
def __init__(
self,
model_name: Optional[str],
Expand Down
25 changes: 17 additions & 8 deletions swarms/models/gpt4_vision_api.py
Original file line number Diff line number Diff line change
@@ -1,18 +1,16 @@
import logging
import asyncio
import base64
from typing import Optional
import concurrent.futures
from termcolor import colored
import json
import logging
import os
from concurrent.futures import ThreadPoolExecutor
from typing import List, Tuple
from typing import List, Optional, Tuple

import aiohttp
import requests
from dotenv import load_dotenv

from termcolor import colored

try:
import cv2
Expand Down Expand Up @@ -94,9 +92,10 @@ def encode_image(self, img: str):

def download_img_then_encode(self, img: str):
"""Download image from URL then encode image to base64 using requests"""
pass

# Function to handle vision tasks
def run(self, task: str, img: str):
def run(self, task: Optional[str] = None, img: Optional[str] = None, *args, **kwargs):
"""Run the model."""
try:
base64_image = self.encode_image(img)
Expand Down Expand Up @@ -131,6 +130,7 @@ def run(self, task: str, img: str):
)

out = response.json()
content = print(out)
content = out["choices"][0]["message"]["content"]

if self.streaming_enabled:
Expand Down Expand Up @@ -263,6 +263,7 @@ def __call__(self, task: str, img: str):
)

out = response.json()
content = print(out)
content = out["choices"][0]["message"]["content"]

if self.streaming_enabled:
Expand All @@ -287,6 +288,14 @@ def run_many(
):
"""
Run the model on multiple tasks and images all at once using concurrent
Args:
tasks (List[str]): List of tasks
imgs (List[str]): List of image paths
Returns:
List[str]: List of responses
"""
# Instantiate the thread pool executor
Expand All @@ -301,8 +310,8 @@ def run_many(

async def arun(
self,
task: str,
img: str,
task: Optional[str] = None,
img: Optional[str] = None,
):
"""
Asynchronously run the model
Expand Down
Loading

0 comments on commit b457511

Please sign in to comment.