diff --git a/autogen/agentchat/contrib/captainagent.py b/autogen/agentchat/contrib/captainagent.py index 0ee7496939..9c8cdab6d3 100644 --- a/autogen/agentchat/contrib/captainagent.py +++ b/autogen/agentchat/contrib/captainagent.py @@ -138,6 +138,7 @@ def __init__( human_input_mode: Optional[str] = "NEVER", code_execution_config: Optional[Union[Dict, Literal[False]]] = False, nested_config: Optional[Dict] = None, + agent_config_save_path: Optional[str] = None, description: Optional[str] = DEFAULT_DESCRIPTION, **kwargs, ): @@ -154,6 +155,9 @@ def __init__( max_consecutive_auto_reply (int): the maximum number of consecutive auto replies. default to None (no limit provided, class attribute MAX_CONSECUTIVE_AUTO_REPLY will be used as the limit in this case). The limit only plays a role when human_input_mode is not "ALWAYS". + nested_config (dict): the configuration for the nested chat instantiated by CaptainAgent. + A full list of keys and their functionalities can be found in [docs](https://ag2ai.github.io/ag2/docs/topics/captainagent/configurations). + agent_config_save_path (str): the path to save the generated or retrieved agent configuration. **kwargs (dict): Please refer to other kwargs in [ConversableAgent](https://github.com/ag2ai/ag2/blob/main/autogen/agentchat/conversable_agent.py#L74). """ @@ -174,17 +178,14 @@ def __init__( if nested_config["group_chat_llm_config"] is None: nested_config["group_chat_llm_config"] = llm_config.copy() - self.assistant = ConversableAgent( - name="CaptainAgent", - system_message=system_message, - llm_config=llm_config, - ) + self.assistant = ConversableAgent(name="CaptainAgent", system_message=system_message, llm_config=llm_config) self.assistant.update_tool_signature(self.AUTOBUILD_TOOL, is_remove=False) self.executor = CaptainUserProxyAgent( name="Expert_summoner", code_execution_config=code_execution_config, nested_config=nested_config, + is_termination_msg=lambda x: x.get("content", "") and "terminate" in x.get("content", "").lower(), human_input_mode="NEVER", ) @@ -201,7 +202,6 @@ def __init__( } ], trigger=UserProxyAgent, - # reply_func_from_nested_chats=None, position=0, ) @@ -230,7 +230,8 @@ class CaptainUserProxyAgent(ConversableAgent): CONVERSATION_REVIEW_PROMPT = """# Your task Briefly summarize the conversation history derived from an experts' group chat by following the answer format. -If you found non-trivial errors or issues in the conversation, point it out with a detailed reason and mark the "Need double-check" as "Yes." if you think it is worth further verification. +If you found non-trivial errors or issues in the conversation, point it out with a detailed reason, if you think it is worth further verification, mark the "Need double-check" as "Yes" +If you find the conversation ends with TERMINATE and the task is solved, this is normal situation, you can mark the "Need double-check" as "No". # Conversation history: {chat_history} @@ -373,7 +374,7 @@ def _run_autobuild(self, group_name: str, execution_task: str, building_task: st tool_root_dir = self.tool_root_dir tool_builder = ToolBuilder( corpus_path=os.path.join(tool_root_dir, "tool_description.tsv"), - retriever=self._nested_config["autobuild_tool_config"]["retriever"], + retriever=self._nested_config["autobuild_tool_config"].get("retriever", "all-mpnet-base-v2"), ) for idx, agent in enumerate(agent_list): if idx == len(self.tool_history[group_name]): @@ -404,7 +405,7 @@ def _run_autobuild(self, group_name: str, execution_task: str, building_task: st # Retrieve and build tools based on the smilarities between the skills and the tool description tool_builder = ToolBuilder( corpus_path=os.path.join(tool_root_dir, "tool_description.tsv"), - retriever=self._nested_config["autobuild_tool_config"]["retriever"], + retriever=self._nested_config["autobuild_tool_config"].get("retriever", "all-mpnet-base-v2"), ) for idx, skill in enumerate(skills): tools = tool_builder.retrieve(skill) diff --git a/autogen/agentchat/contrib/captainagent/tools/README.md b/autogen/agentchat/contrib/captainagent/tools/README.md new file mode 100644 index 0000000000..717c924a60 --- /dev/null +++ b/autogen/agentchat/contrib/captainagent/tools/README.md @@ -0,0 +1,44 @@ +# Introduction + +This directory contains a library of manually created python tools. These tools have three categories: math, data_analysis and information_retrieval. + +# Directory Layout +``` +tools +├── README.md +├── data_analysis +│ ├── calculate_correlation.py +│ └── ... +├── information_retrieval +│ ├── arxiv_download.py +│ ├── arxiv_search.py +│ └── ... +├── math +│ ├── calculate_circle_area_from_diameter.py +│ └── ... +└── tool_description.tsv +``` + +Tools can be imported from `tools/{category}/{tool_name}.py` with exactly the same function name. + +`tool_description.tsv` contains descriptions of tools for retrieval. + +# How to use +Some tools require Bing Search API key and RapidAPI key. For Bing API, you can read more about how to get an API on the [Bing Web Search API](https://www.microsoft.com/en-us/bing/apis/bing-web-search-api) page. For RapidAPI, you can [sign up](https://rapidapi.com/auth/sign-up) and subscribe to these two links([link1](https://rapidapi.com/solid-api-solid-api-default/api/youtube-transcript3), [link2](https://rapidapi.com/420vijay47/api/youtube-mp3-downloader2)). These apis have free billing options and there is no need to worry about extra costs. + +To install the requirements for running tools, use pip install. +```bash +pip install -r autogen/agentchat/contrib/captainagent/tools/requirements.txt +``` + +Whenever you run the tool-related code, remember to export the api keys to system variables. +```bash +export BING_API_KEY="" +export RAPID_API_KEY="" +``` +or +```python +import os +os.environ["BING_API_KEY"] = "" +os.environ["RAPID_API_KEY"] = "" +``` diff --git a/autogen/agentchat/contrib/captainagent/tools/data_analysis/calculate_correlation.py b/autogen/agentchat/contrib/captainagent/tools/data_analysis/calculate_correlation.py new file mode 100644 index 0000000000..ccd51d3d93 --- /dev/null +++ b/autogen/agentchat/contrib/captainagent/tools/data_analysis/calculate_correlation.py @@ -0,0 +1,38 @@ +def calculate_correlation(csv_path: str, column1: str, column2: str, method: str = "pearson") -> float: + """ + Calculate the correlation between two columns in a CSV file. + + Args: + csv_path (str): The path to the CSV file. + column1 (str): The name of the first column. + column2 (str): The name of the second column. + method (str or callable, optional): The method used to calculate the correlation. + - 'pearson' (default): Pearson correlation coefficient. + - 'kendall': Kendall Tau correlation coefficient. + - 'spearman': Spearman rank correlation coefficient. + - callable: A custom correlation function that takes two arrays and returns a scalar. + + Returns: + float: The correlation coefficient between the two columns. + """ + import pandas as pd + + # Read the CSV file into a pandas DataFrame + df = pd.read_csv(csv_path) + + # Select the specified columns + selected_columns = df[[column1, column2]] + + # Calculate the correlation based on the specified method + if method == "pearson": + correlation = selected_columns.corr().iloc[0, 1] + elif method == "kendall": + correlation = selected_columns.corr(method="kendall").iloc[0, 1] + elif method == "spearman": + correlation = selected_columns.corr(method="spearman").iloc[0, 1] + elif callable(method): + correlation = selected_columns.corr(method=method).iloc[0, 1] + else: + raise ValueError("Invalid correlation method. Please choose 'pearson', 'kendall', 'spearman', or a callable.") + + return correlation diff --git a/autogen/agentchat/contrib/captainagent/tools/data_analysis/calculate_skewness_and_kurtosis.py b/autogen/agentchat/contrib/captainagent/tools/data_analysis/calculate_skewness_and_kurtosis.py new file mode 100644 index 0000000000..4b1737fe02 --- /dev/null +++ b/autogen/agentchat/contrib/captainagent/tools/data_analysis/calculate_skewness_and_kurtosis.py @@ -0,0 +1,26 @@ +def calculate_skewness_and_kurtosis(csv_file: str, column_name: str) -> tuple: + """ + Calculate the skewness and kurtosis of a specified column in a CSV file. The kurtosis is calculated using the Fisher definition. + The two metrics are computed using scipy.stats functions. + + Args: + csv_file (str): The path to the CSV file. + column_name (str): The name of the column to calculate skewness and kurtosis for. + + Returns: + tuple: (skewness, kurtosis) + """ + import pandas as pd + from scipy.stats import kurtosis, skew + + # Read the CSV file into a pandas DataFrame + df = pd.read_csv(csv_file) + + # Extract the specified column + column = df[column_name] + + # Calculate the skewness and kurtosis + skewness = skew(column) + kurt = kurtosis(column) + + return skewness, kurt diff --git a/autogen/agentchat/contrib/captainagent/tools/data_analysis/detect_outlier_iqr.py b/autogen/agentchat/contrib/captainagent/tools/data_analysis/detect_outlier_iqr.py new file mode 100644 index 0000000000..1fcb493224 --- /dev/null +++ b/autogen/agentchat/contrib/captainagent/tools/data_analysis/detect_outlier_iqr.py @@ -0,0 +1,26 @@ +def detect_outlier_iqr(csv_file: str, column_name: str): + """ + Detect outliers in a specified column of a CSV file using the IQR method. + + Args: + csv_file (str): The path to the CSV file. + column_name (str): The name of the column to detect outliers in. + + Returns: + list: A list of row indices that correspond to the outliers. + """ + import pandas as pd + + # Read the CSV file into a pandas DataFrame + df = pd.read_csv(csv_file) + + # Calculate the quartiles and IQR for the specified column + q1 = df[column_name].quantile(0.25) + q3 = df[column_name].quantile(0.75) + iqr = q3 - q1 + + # Find the outliers based on the defined criteria + outliers = df[(df[column_name] < q1 - 1.5 * iqr) | (df[column_name] > q3 + 1.5 * iqr)] + + # Return the row indices of the outliers + return outliers.index.tolist() diff --git a/autogen/agentchat/contrib/captainagent/tools/data_analysis/detect_outlier_zscore.py b/autogen/agentchat/contrib/captainagent/tools/data_analysis/detect_outlier_zscore.py new file mode 100644 index 0000000000..ec0f8742da --- /dev/null +++ b/autogen/agentchat/contrib/captainagent/tools/data_analysis/detect_outlier_zscore.py @@ -0,0 +1,26 @@ +def detect_outlier_zscore(csv_file, column_name, threshold=3): + """ + Detect outliers in a CSV file based on a specified column. The outliers are determined by calculating the z-score of the data points in the column. + + Args: + csv_file (str): The path to the CSV file. + column_name (str): The name of the column to calculate z-scores for. + threshold (float, optional): The threshold value for determining outliers. By default set to 3. + + Returns: + list: A list of row indices where the z-score is above the threshold. + """ + import numpy as np + import pandas as pd + + # Read the CSV file into a pandas DataFrame + df = pd.read_csv(csv_file) + + # Calculate the z-score for the specified column + z_scores = np.abs((df[column_name] - df[column_name].mean()) / df[column_name].std()) + + # Find the row indices where the z-score is above the threshold + outlier_indices = np.where(z_scores > threshold)[0] + + # Return the row indices of the outliers + return outlier_indices diff --git a/autogen/agentchat/contrib/captainagent/tools/data_analysis/explore_csv.py b/autogen/agentchat/contrib/captainagent/tools/data_analysis/explore_csv.py new file mode 100644 index 0000000000..6e233c05db --- /dev/null +++ b/autogen/agentchat/contrib/captainagent/tools/data_analysis/explore_csv.py @@ -0,0 +1,19 @@ +def explore_csv(file_path, num_lines=5): + """ + Reads a CSV file and prints the column names, shape, data types, and the first few lines of data. + + Args: + file_path (str): The path to the CSV file. + num_lines (int, optional): The number of lines to print. Defaults to 5. + """ + import pandas as pd + + df = pd.read_csv(file_path) + header = df.columns + print("Columns:") + print(", ".join(header)) + print("Shape:", df.shape) + print("Data Types:") + print(df.dtypes) + print("First", num_lines, "lines:") + print(df.head(num_lines)) diff --git a/autogen/agentchat/contrib/captainagent/tools/data_analysis/shapiro_wilk_test.py b/autogen/agentchat/contrib/captainagent/tools/data_analysis/shapiro_wilk_test.py new file mode 100644 index 0000000000..0d2f51f3a5 --- /dev/null +++ b/autogen/agentchat/contrib/captainagent/tools/data_analysis/shapiro_wilk_test.py @@ -0,0 +1,28 @@ +from autogen.coding.func_with_reqs import with_requirements + + +@with_requirements(["pandas", "scipy"]) +def shapiro_wilk_test(csv_file, column_name): + """ + Perform the Shapiro-Wilk test on a specified column of a CSV file. + + Args: + csv_file (str): The path to the CSV file. + column_name (str): The name of the column to perform the test on. + + Returns: + float: The p-value resulting from the Shapiro-Wilk test. + """ + import pandas as pd + from scipy.stats import shapiro + + # Read the CSV file into a pandas DataFrame + df = pd.read_csv(csv_file) + + # Extract the specified column as a numpy array + column_data = df[column_name].values + + # Perform the Shapiro-Wilk test + _, p_value = shapiro(column_data) + + return p_value diff --git a/autogen/agentchat/contrib/captainagent/tools/information_retrieval/arxiv_download.py b/autogen/agentchat/contrib/captainagent/tools/information_retrieval/arxiv_download.py new file mode 100644 index 0000000000..53fbff94fd --- /dev/null +++ b/autogen/agentchat/contrib/captainagent/tools/information_retrieval/arxiv_download.py @@ -0,0 +1,23 @@ +import arxiv + +from autogen.coding.func_with_reqs import with_requirements + + +@with_requirements(["arxiv"], ["arxiv"]) +def arxiv_download(id_list: list, download_dir="./"): + """ + Downloads PDF files from ArXiv based on a list of arxiv paper IDs. + + Args: + id_list (list): A list of paper IDs to download. e.g. [2302.00006v1] + download_dir (str, optional): The directory to save the downloaded PDF files. Defaults to './'. + + Returns: + list: A list of paths to the downloaded PDF files. + """ + paths = [] + for paper in arxiv.Client().results(arxiv.Search(id_list=id_list)): + path = paper.download_pdf(download_dir, filename=paper.get_short_id() + ".pdf") + paths.append(path) + print("Paper id:", paper.get_short_id(), "Downloaded to:", path) + return paths diff --git a/autogen/agentchat/contrib/captainagent/tools/information_retrieval/arxiv_search.py b/autogen/agentchat/contrib/captainagent/tools/information_retrieval/arxiv_search.py new file mode 100644 index 0000000000..7a62143dc0 --- /dev/null +++ b/autogen/agentchat/contrib/captainagent/tools/information_retrieval/arxiv_search.py @@ -0,0 +1,52 @@ +import arxiv + +from autogen.coding.func_with_reqs import with_requirements + + +@with_requirements(["arxiv"], ["arxiv"]) +def arxiv_search(query, max_results=10, sortby="relevance"): + """ + Search for articles on arXiv based on the given query. + + Args: + query (str): The search query. + max_results (int, optional): The maximum number of results to retrieve. Defaults to 10. + sortby (str, optional): The sorting criterion for the search results. Can be 'relevance' or 'submittedDate'. Defaults to 'relevance'. + + Returns: + list: A list of dictionaries containing information about the search results. Each dictionary contains the following keys: + - 'title': The title of the article. + - 'authors': The authors of the article. + - 'summary': The summary of the article. + - 'entry_id': The entry ID of the article. + - 'doi': The DOI of the article (If applicable). + - 'published': The publication date of the article in the format 'Y-M'. + """ + + def get_author(r): + return ", ".join(a.name for a in r.authors) + + criterion = {"relevance": arxiv.SortCriterion.Relevance, "submittedDate": arxiv.SortCriterion.SubmittedDate}[sortby] + + client = arxiv.Client() + search = arxiv.Search(query=query, max_results=max_results, sort_by=criterion) + res = [] + results = client.results(search) + for r in results: + print("Entry id:", r.entry_id) + print("Title:", r.title) + print("Authors:", get_author(r)) + print("DOI:", r.doi) + print("Published:", r.published.strftime("%Y-%m")) + # print("Summary:", r.summary) + res.append( + { + "title": r.title, + "authors": get_author(r), + "summary": r.summary, + "entry_id": r.entry_id, + "doi": r.doi, + "published": r.published.strftime("%Y-%m"), + } + ) + return res diff --git a/autogen/agentchat/contrib/captainagent/tools/information_retrieval/extract_pdf_image.py b/autogen/agentchat/contrib/captainagent/tools/information_retrieval/extract_pdf_image.py new file mode 100644 index 0000000000..4b2ffd03d4 --- /dev/null +++ b/autogen/agentchat/contrib/captainagent/tools/information_retrieval/extract_pdf_image.py @@ -0,0 +1,51 @@ +import os + +from autogen.coding.func_with_reqs import with_requirements + + +@with_requirements(["PyMuPDF"], ["os"]) +def extract_pdf_image(pdf_path: str, output_dir: str, page_number=None): + """ + Extracts images from a PDF file and saves them to the specified output directory. + + Args: + pdf_path (str): The path to the PDF file. + output_dir (str): The directory to save the extracted images. + page_number (int, optional): The page number to extract images from. If not provided, extract images from all pages. + """ + import fitz # PyMuPDF library + + # Open the PDF file + doc = fitz.open(pdf_path) + + # Create the output directory if it doesn't exist + os.makedirs(output_dir, exist_ok=True) + + # Extract images from the PDF file + images = [] + if page_number is not None: + page = doc[page_number - 1] # Adjust page number to 0-based index + for img in page.get_images(): + xref = img[0] + base_image = doc.extract_image(xref) + image_bytes = base_image["image"] + images.append(image_bytes) + else: + for page in doc: + for img in page.get_images(): + xref = img[0] + base_image = doc.extract_image(xref) + image_bytes = base_image["image"] + images.append(image_bytes) + + # Save the extracted images + for i, image_bytes in enumerate(images): + image_path = os.path.join(output_dir, f"image_{i}.png") + with open(image_path, "wb") as f: + f.write(image_bytes) + + # Print the total number of images saved + print(f"Saved a total of {len(images)} images") + + # Close the PDF file + doc.close() diff --git a/autogen/agentchat/contrib/captainagent/tools/information_retrieval/extract_pdf_text.py b/autogen/agentchat/contrib/captainagent/tools/information_retrieval/extract_pdf_text.py new file mode 100644 index 0000000000..01ff60bdc5 --- /dev/null +++ b/autogen/agentchat/contrib/captainagent/tools/information_retrieval/extract_pdf_text.py @@ -0,0 +1,36 @@ +from autogen.coding.func_with_reqs import with_requirements + + +@with_requirements(["PyMuPDF"]) +def extract_pdf_text(pdf_path, page_number=None): + """ + Extracts text from a specified page or the entire PDF file. + + Args: + pdf_path (str): The path to the PDF file. + page_number (int, optional): The page number to extract (starting from 0). If not provided, + the function will extract text from the entire PDF file. + + Returns: + str: The extracted text. + """ + import fitz + + # Open the PDF file + doc = fitz.open(pdf_path) + + # Extract text from the entire PDF file or a specific page + text = "" + if page_number is None: + # Extract content from the entire PDF file + for page in doc: + text += page.get_text() + else: + # Extract content from a specific page + page = doc[page_number] + text = page.get_text() + + # Close the PDF file + doc.close() + + return text diff --git a/autogen/agentchat/contrib/captainagent/tools/information_retrieval/get_wikipedia_text.py b/autogen/agentchat/contrib/captainagent/tools/information_retrieval/get_wikipedia_text.py new file mode 100644 index 0000000000..8c21b9a656 --- /dev/null +++ b/autogen/agentchat/contrib/captainagent/tools/information_retrieval/get_wikipedia_text.py @@ -0,0 +1,19 @@ +def get_wikipedia_text(title): + """ + Retrieves the text content of a Wikipedia page. It does not support tables and other complex formatting. + + Args: + title (str): The title of the Wikipedia page. + + Returns: + str or None: The text content of the Wikipedia page if it exists, None otherwise. + """ + import wikipediaapi + + wiki_wiki = wikipediaapi.Wikipedia("Mozilla/5.0 (merlin@example.com)", "en") + page = wiki_wiki.page(title) + + if page.exists(): + return page.text + else: + return None diff --git a/autogen/agentchat/contrib/captainagent/tools/information_retrieval/get_youtube_caption.py b/autogen/agentchat/contrib/captainagent/tools/information_retrieval/get_youtube_caption.py new file mode 100644 index 0000000000..56fe2a5b68 --- /dev/null +++ b/autogen/agentchat/contrib/captainagent/tools/information_retrieval/get_youtube_caption.py @@ -0,0 +1,32 @@ +# alternative api: https://rapidapi.com/omarmhaimdat/api/youtube-v2 + + +def get_youtube_caption(videoId): + """ + Retrieves the captions for a YouTube video. + + Args: + videoId (str): The ID of the YouTube video. + + Returns: + str: The captions of the YouTube video in text format. + + Raises: + KeyError: If the RAPID_API_KEY environment variable is not set. + """ + import os + + import requests + + RAPID_API_KEY = os.environ["RAPID_API_KEY"] + video_url = f"https://www.youtube.com/watch?v={videoId}" + url = "https://youtube-transcript3.p.rapidapi.com/api/transcript-with-url" + + querystring = {"url": video_url, "lang": "en", "flat_text": "true"} + + headers = {"X-RapidAPI-Key": RAPID_API_KEY, "X-RapidAPI-Host": "youtube-transcript3.p.rapidapi.com"} + + response = requests.get(url, headers=headers, params=querystring) + response = response.json() + print(response) + return response["transcript"] diff --git a/autogen/agentchat/contrib/captainagent/tools/information_retrieval/image_qa.py b/autogen/agentchat/contrib/captainagent/tools/information_retrieval/image_qa.py new file mode 100644 index 0000000000..6f28c36cdc --- /dev/null +++ b/autogen/agentchat/contrib/captainagent/tools/information_retrieval/image_qa.py @@ -0,0 +1,58 @@ +import os + +from PIL import Image + +from autogen.coding.func_with_reqs import with_requirements + + +@with_requirements(["transformers", "torch"], ["transformers", "torch", "PIL", "os"]) +def image_qa(image, question, ckpt="Salesforce/blip-vqa-base"): + """ + Perform question answering on an image using a pre-trained VQA model. + + Args: + image (Union[str, Image.Image]): The image to perform question answering on. It can be either file path to the image or a PIL Image object. + question: The question to ask about the image. + + Returns: + dict: The generated answer text. + """ + import torch + from transformers import BlipForQuestionAnswering, BlipProcessor + + def image_processing(img): + if isinstance(img, Image.Image): + return img.convert("RGB") + elif isinstance(img, str): + if os.path.exists(img): + return Image.open(img).convert("RGB") + else: + full_path = img + if os.path.exists(full_path): + return Image.open(full_path).convert("RGB") + else: + raise FileNotFoundError + + def text_processing(file_path): + # Check the file extension + if file_path.endswith(".txt"): + with open(file_path, "r") as file: + content = file.read() + else: + # if the file is not .txt, then it is a string, directly return the string + return file_path + return content + + image = image_processing(image) + question = text_processing(question) + + processor = BlipProcessor.from_pretrained(ckpt) + model = BlipForQuestionAnswering.from_pretrained(ckpt, torch_dtype=torch.float16).to("cuda") + + raw_image = image + + inputs = processor(raw_image, question, return_tensors="pt").to("cuda", torch.float16) + out = model.generate(**inputs) + result_formatted = processor.decode(out[0], skip_special_tokens=True) + + return result_formatted diff --git a/autogen/agentchat/contrib/captainagent/tools/information_retrieval/optical_character_recognition.py b/autogen/agentchat/contrib/captainagent/tools/information_retrieval/optical_character_recognition.py new file mode 100644 index 0000000000..2a641a0f6a --- /dev/null +++ b/autogen/agentchat/contrib/captainagent/tools/information_retrieval/optical_character_recognition.py @@ -0,0 +1,59 @@ +import os + +from autogen.coding.func_with_reqs import with_requirements + + +@with_requirements(["easyocr"], ["os"]) +def optical_character_recognition(image): + """ + Perform optical character recognition (OCR) on the given image. + + Args: + image (Union[str, Image.Image]): The image to perform OCR on. It can be either a file path or an Image object. + + Returns: + str: The extracted text from the image. + + Raises: + FileNotFoundError: If the image file path does not exist. + """ + import io + + import easyocr + from PIL import Image + + def image_processing(img): + if isinstance(img, Image.Image): + return img.convert("RGB") + elif isinstance(img, str): + if os.path.exists(img): + return Image.open(img).convert("RGB") + else: + full_path = img + if os.path.exists(full_path): + return Image.open(full_path).convert("RGB") + else: + raise FileNotFoundError + + reader = easyocr.Reader(["en"]) # Load the OCR model into memory + + if isinstance(image, str): + # If image is a path, use it directly + if not os.path.exists(image): + raise FileNotFoundError + image_path_or_bytes = image + else: + # If image is an Image object, convert it to a bytes stream + buffer = io.BytesIO() + image = image_processing(image) # Process the image if needed + image.save(buffer, format="JPEG") + buffer.seek(0) + image_path_or_bytes = buffer + + # Read text from the image or image path + result = reader.readtext(image_path_or_bytes) + + # Extract only the text from the result + result_text = [text for _, text, _ in result] + + return ", ".join(result_text) diff --git a/autogen/agentchat/contrib/captainagent/tools/information_retrieval/perform_web_search.py b/autogen/agentchat/contrib/captainagent/tools/information_retrieval/perform_web_search.py new file mode 100644 index 0000000000..f6a3b4b5ea --- /dev/null +++ b/autogen/agentchat/contrib/captainagent/tools/information_retrieval/perform_web_search.py @@ -0,0 +1,45 @@ +def perform_web_search(query, count=10, offset=0): + """ + Perform a web search using Bing API. + + Args: + query (str): The search query. + count (int, optional): Number of search results to retrieve. Defaults to 10. + offset (int, optional): Offset of the first search result. Defaults to 0. + + Returns: + The name, URL and snippet of each search result. + """ + import os + + import requests + + # Get the Bing API key from the environment variable + bing_api_key = os.getenv("BING_API_KEY") + + # Check if the API key is available + if not bing_api_key: + raise ValueError("Bing API key not found in environment variable") + + # Set up the API request + url = "https://api.bing.microsoft.com/v7.0/search" + headers = { + "Ocp-Apim-Subscription-Key": bing_api_key, + } + params = { + "q": query, + "count": count, # Number of search results to retrieve + "offset": offset, # Offset of the first search result + } + + # Send the API request + response = requests.get(url, headers=headers, params=params) + response.raise_for_status() + + # Process the search results + search_results = response.json() + for index, result in enumerate(search_results["webPages"]["value"]): + print(f"Search Result {index+1}:") + print(result["name"]) + print(result["url"]) + print(result["snippet"]) diff --git a/autogen/agentchat/contrib/captainagent/tools/information_retrieval/scrape_wikipedia_tables.py b/autogen/agentchat/contrib/captainagent/tools/information_retrieval/scrape_wikipedia_tables.py new file mode 100644 index 0000000000..6fb24d7e17 --- /dev/null +++ b/autogen/agentchat/contrib/captainagent/tools/information_retrieval/scrape_wikipedia_tables.py @@ -0,0 +1,31 @@ +def scrape_wikipedia_tables(url: str, header_keyword: str): + """ + Scrapes Wikipedia tables based on a given URL and header keyword. + + Args: + url: The URL of the Wikipedia page to scrape. + header_keyword: The keyword to search for in the headers of the page. + + Returns: + list: A list of lists representing the scraped table data. Each inner list represents a row in the table, + with each element representing a cell value. + """ + import requests + from bs4 import BeautifulSoup + + response = requests.get(url) + response.raise_for_status() + soup = BeautifulSoup(response.content, "html.parser") + headers = soup.find_all(["h1", "h2", "h3", "h4", "h5", "h6"]) + data = [] + for header in headers: + if header_keyword.lower() in header.text.lower(): + table = header.find_next_sibling("table", class_="wikitable") + if table: + rows = table.find_all("tr") + for row in rows: + cols = row.find_all(["th", "td"]) + cols = [ele.text.strip() for ele in cols] + data.append([ele for ele in cols if ele]) + break + return data diff --git a/autogen/agentchat/contrib/captainagent/tools/information_retrieval/transcribe_audio_file.py b/autogen/agentchat/contrib/captainagent/tools/information_retrieval/transcribe_audio_file.py new file mode 100644 index 0000000000..c9a7d79d75 --- /dev/null +++ b/autogen/agentchat/contrib/captainagent/tools/information_retrieval/transcribe_audio_file.py @@ -0,0 +1,19 @@ +from autogen.coding.func_with_reqs import with_requirements + + +@with_requirements(["openai-whisper"]) +def transcribe_audio_file(file_path): + """ + Transcribes the audio file located at the given file path. + + Args: + file_path (str): The path to the audio file. + + Returns: + str: The transcribed text from the audio file. + """ + import whisper + + model = whisper.load_model("base") + result = model.transcribe(file_path) + return result["text"] diff --git a/autogen/agentchat/contrib/captainagent/tools/information_retrieval/youtube_download.py b/autogen/agentchat/contrib/captainagent/tools/information_retrieval/youtube_download.py new file mode 100644 index 0000000000..0bc89d3074 --- /dev/null +++ b/autogen/agentchat/contrib/captainagent/tools/information_retrieval/youtube_download.py @@ -0,0 +1,33 @@ +def youtube_download(url: str): + """ + Downloads a YouTube video and returns the download link. + + Args: + url: The URL of the YouTube video. + + Returns: + str: The download link for the audio. + """ + import os + + import requests + + endpoint = "https://youtube-mp3-downloader2.p.rapidapi.com/ytmp3/ytmp3/" + + querystring = {"url": url} + + headers = { + "X-RapidAPI-Key": os.environ.get("RAPIDAPI_KEY"), + "X-RapidAPI-Host": "youtube-mp3-downloader2.p.rapidapi.com", + } + + response = requests.get(endpoint, headers=headers, params=querystring) + response = response.json() + + if "link" in response: + return response["link"] + else: + print("Error: Unable to retrieve download link.") + print(response) + # or you can return an error message + # return "Error: Unable to retrieve download link." diff --git a/autogen/agentchat/contrib/captainagent/tools/math/calculate_circle_area_from_diameter.py b/autogen/agentchat/contrib/captainagent/tools/math/calculate_circle_area_from_diameter.py new file mode 100644 index 0000000000..ebf1601dcc --- /dev/null +++ b/autogen/agentchat/contrib/captainagent/tools/math/calculate_circle_area_from_diameter.py @@ -0,0 +1,19 @@ +from autogen.coding.func_with_reqs import with_requirements + + +@with_requirements(["sympy"]) +def calculate_circle_area_from_diameter(diameter): + """ + Calculate the area of a circle given its diameter. + + Args: + diameter (float): The diameter of the circle. + + Returns: + float: The area of the circle. + """ + from sympy import pi + + radius = diameter / 2 + area = pi * radius**2 + return area diff --git a/autogen/agentchat/contrib/captainagent/tools/math/calculate_day_of_the_week.py b/autogen/agentchat/contrib/captainagent/tools/math/calculate_day_of_the_week.py new file mode 100644 index 0000000000..ef1c2d8756 --- /dev/null +++ b/autogen/agentchat/contrib/captainagent/tools/math/calculate_day_of_the_week.py @@ -0,0 +1,16 @@ +def calculate_day_of_the_week(total_days: int, starting_day: str): + """ + Calculates the day of the week after a given number of days starting from a specified day. + + Args: + total_days: The number of days to calculate. + starting_day: The starting day of the week, should be one of 'Monday', 'Tuesday', 'Wednesday', etc. + + Returns: + str: The day of the week after the specified number of days. + """ + days_of_week = ["Monday", "Tuesday", "Wednesday", "Thursday", "Friday", "Saturday", "Sunday"] + + start_index = days_of_week.index(starting_day) + end_index = (start_index + total_days) % 7 + return days_of_week[end_index] diff --git a/autogen/agentchat/contrib/captainagent/tools/math/calculate_fraction_sum.py b/autogen/agentchat/contrib/captainagent/tools/math/calculate_fraction_sum.py new file mode 100644 index 0000000000..819fb40be7 --- /dev/null +++ b/autogen/agentchat/contrib/captainagent/tools/math/calculate_fraction_sum.py @@ -0,0 +1,26 @@ +def calculate_fraction_sum( + fraction1_numerator: int, fraction1_denominator: int, fraction2_numerator: int, fraction2_denominator: int +): + """ + Calculates the sum of two fractions and returns the result as a mixed number. + + Args: + fraction1_numerator: The numerator of the first fraction. + fraction1_denominator: The denominator of the first fraction. + fraction2_numerator: The numerator of the second fraction. + fraction2_denominator: The denominator of the second fraction. + + Returns: + str: The sum of the two fractions as a mixed number in the format 'a b/c' + """ + from fractions import Fraction + + fraction1 = Fraction(fraction1_numerator, fraction1_denominator) + fraction2 = Fraction(fraction2_numerator, fraction2_denominator) + result = fraction1 + fraction2 + mixed_number = result.numerator // result.denominator + mixed_fraction_numerator = result.numerator % result.denominator + if mixed_fraction_numerator > 0: + return f"{mixed_number} {Fraction(mixed_fraction_numerator, result.denominator)}" + else: + return str(mixed_number) diff --git a/autogen/agentchat/contrib/captainagent/tools/math/calculate_matrix_power.py b/autogen/agentchat/contrib/captainagent/tools/math/calculate_matrix_power.py new file mode 100644 index 0000000000..6c00e0de21 --- /dev/null +++ b/autogen/agentchat/contrib/captainagent/tools/math/calculate_matrix_power.py @@ -0,0 +1,29 @@ +from autogen.coding.func_with_reqs import with_requirements + + +@with_requirements(["sympy"]) +def calculate_matrix_power(matrix, power): + """ + Calculate the power of a given matrix. + + Args: + matrix (list): An array of numbers that represents the matrix. + power (int): The power to which the matrix is raised. + + Returns: + Matrix: The resulting matrix after raising to power. + + Raises: + ValueError: If the power is negative and the matrix is not invertible. + """ + from sympy import Matrix, eye + + m = Matrix(matrix) + if power == 0: + return eye(m.shape[0]) + elif power < 0: + if not m.is_invertible(): + raise ValueError("Matrix is not invertible.") + return m.inverse() ** (-power) + elif power > 0: + return m**power diff --git a/autogen/agentchat/contrib/captainagent/tools/math/calculate_reflected_point.py b/autogen/agentchat/contrib/captainagent/tools/math/calculate_reflected_point.py new file mode 100644 index 0000000000..d575803c82 --- /dev/null +++ b/autogen/agentchat/contrib/captainagent/tools/math/calculate_reflected_point.py @@ -0,0 +1,14 @@ +def calculate_reflected_point(point): + """ + Calculates the reflection point of a given point about the line y=x. + + Args: + point (dict): A dictionary representing the coordinates of the point. + The dictionary should have keys 'x' and 'y' representing the x and y coordinates respectively. + + Returns: + dict: A dictionary representing the coordinates of the reflected point. Its keys are 'x' and 'y'. + """ + # Swap x and y for reflection about y=x + reflected_point = {"x": point["y"], "y": point["x"]} + return reflected_point diff --git a/autogen/agentchat/contrib/captainagent/tools/math/complex_numbers_product.py b/autogen/agentchat/contrib/captainagent/tools/math/complex_numbers_product.py new file mode 100644 index 0000000000..49194dacb6 --- /dev/null +++ b/autogen/agentchat/contrib/captainagent/tools/math/complex_numbers_product.py @@ -0,0 +1,23 @@ +from autogen.coding.func_with_reqs import with_requirements + + +@with_requirements(["sympy"]) +def complex_numbers_product(complex_numbers): + """ + Calculates the product of a list of complex numbers. + + Args: + complex_numbers (list): A list of dictionaries representing complex numbers. + Each dictionary should have 'real' and 'imag' keys representing the real + and imaginary parts of the complex number. + + Returns: + complex: The simplified product of the complex numbers. + + """ + from sympy import I, simplify + + result = 1 + for c in complex_numbers: + result *= c["real"] + I * c["imag"] + return simplify(result) diff --git a/autogen/agentchat/contrib/captainagent/tools/math/compute_currency_conversion.py b/autogen/agentchat/contrib/captainagent/tools/math/compute_currency_conversion.py new file mode 100644 index 0000000000..fc90eecb23 --- /dev/null +++ b/autogen/agentchat/contrib/captainagent/tools/math/compute_currency_conversion.py @@ -0,0 +1,21 @@ +from autogen.coding.func_with_reqs import with_requirements + + +@with_requirements(["sympy"]) +def compute_currency_conversion(amount, exchange_rate): + """ + Compute the currency conversion of the given amount using the provided exchange rate. + + Args: + amount (float): The amount to be converted. + exchange_rate (float): The exchange rate to use for the conversion, represented as the amount of second currency equivalent to one unit of the first currency. + + Returns: + float: The converted amount. + + """ + from sympy import Rational + + # Calculate the converted amount using the given exchange rate + converted_amount = Rational(amount, exchange_rate) + return float(converted_amount) diff --git a/autogen/agentchat/contrib/captainagent/tools/math/count_distinct_permutations.py b/autogen/agentchat/contrib/captainagent/tools/math/count_distinct_permutations.py new file mode 100644 index 0000000000..9b3f77c812 --- /dev/null +++ b/autogen/agentchat/contrib/captainagent/tools/math/count_distinct_permutations.py @@ -0,0 +1,25 @@ +def count_distinct_permutations(sequence): + """ + Counts the number of distinct permutations of a sequence where items may be indistinguishable. + + Args: + sequence (iterable): The sequence for which to count the distinct permutations. + + Returns: + int: The number of distinct permutations. + + Example: + >>> count_distinct_permutations('aab') + 3 + >>> count_distinct_permutations([1, 2, 2]) + 3 + """ + from collections import Counter + from math import factorial + + counts = Counter(sequence) + total_length = sum(counts.values()) + permutations = factorial(total_length) + for count in counts.values(): + permutations //= factorial(count) + return permutations diff --git a/autogen/agentchat/contrib/captainagent/tools/math/evaluate_expression.py b/autogen/agentchat/contrib/captainagent/tools/math/evaluate_expression.py new file mode 100644 index 0000000000..6f2a2f6aac --- /dev/null +++ b/autogen/agentchat/contrib/captainagent/tools/math/evaluate_expression.py @@ -0,0 +1,26 @@ +def evaluate_expression(expression): + """ + Evaluates a mathematical expression with support for floor function notation and power notation. + + Args: + expression (str): The mathematical expression to evaluate. It can only contain one symbol 'x'. + + Returns: + Union[sympy.Expr, str]: The evaluated result as a sympy expression if successful, + otherwise an error message as a string. + + """ + from sympy import symbols, sympify + + # Replace power with ** for sympy + expression = expression.replace("^", "**") + # Replace the floor function notation + expression = expression.replace("\\lfloor", "floor(").replace("\\rfloor", ")") + try: + # Create a symbol 'x' for use in case it is in the expression + symbols("x") + # Evaluate the expression + result = sympify(expression) + return result + except Exception as e: + return str(e) diff --git a/autogen/agentchat/contrib/captainagent/tools/math/find_continuity_point.py b/autogen/agentchat/contrib/captainagent/tools/math/find_continuity_point.py new file mode 100644 index 0000000000..f6f7844c98 --- /dev/null +++ b/autogen/agentchat/contrib/captainagent/tools/math/find_continuity_point.py @@ -0,0 +1,32 @@ +def find_continuity_point(f_leq, f_gt, x_value): + """ + Find the value 'a' that ensures the continuity of a piecewise function at a given point. + + Args: + f_leq (str): The function expression for f(x) when x is less than or equal to the continuity point, in the form of a string. + f_gt (str): The function expression for f(x) when x is greater than the continuity point, in the form of a string. + x_value (float): The x-value at which continuity is to be ensured. + + Returns: + float or None: The value of 'a' that satisfies the continuity condition, + or None if no such value exists. + """ + from sympy import Eq, solve, symbols, sympify + + x, a = symbols("x a") + + # Convert string to sympy expression + f_leq_expr = sympify(f_leq) + f_gt_expr = sympify(f_gt) + + # Evaluate the expressions at the given x_value + f_leq_value = f_leq_expr.subs(x, x_value) + f_gt_value = f_gt_expr.subs(x, x_value) + + # Set up the equation for a + equation = Eq(f_leq_value, f_gt_value) + + # Solve the equation + a_value = solve(equation, a) + + return a_value[0] if a_value else None diff --git a/autogen/agentchat/contrib/captainagent/tools/math/fraction_to_mixed_numbers.py b/autogen/agentchat/contrib/captainagent/tools/math/fraction_to_mixed_numbers.py new file mode 100644 index 0000000000..e6bacd42a4 --- /dev/null +++ b/autogen/agentchat/contrib/captainagent/tools/math/fraction_to_mixed_numbers.py @@ -0,0 +1,37 @@ +def fraction_to_mixed_numbers(numerator, denominator): + """ + Simplifies a fraction to its lowest terms and returns it as a mixed number. + + Args: + numerator (int): The numerator of the fraction. + denominator (int): The denominator of the fraction. + + Returns: + str: The simplified fraction as a string. If the fraction is already an integer, it returns the integer as a string. + If the fraction is a proper fraction, it returns the mixed number representation as a string. + If the numerator or denominator is not an integer, it returns an error message. + If the denominator is zero, it returns an error message. + """ + from sympy import Rational + + # Ensure that numerator and denominator are integers + if not isinstance(numerator, int) or not isinstance(denominator, int): + return "Error: Numerator and denominator must be integers." + + # Handle the case where the denominator is zero + if denominator == 0: + return "Error: Denominator cannot be zero." + + # Simplify the fraction to its lowest terms + result = Rational(numerator, denominator) + # Return the result as a mixed number if needed + if result.is_integer: + return str(int(result)) + else: + # Result as a mixed number + integer_part = int(result) + fractional_part = result - integer_part + if fractional_part != 0: + return f"{integer_part} {fractional_part}" + else: + return str(integer_part) diff --git a/autogen/agentchat/contrib/captainagent/tools/math/modular_inverse_sum.py b/autogen/agentchat/contrib/captainagent/tools/math/modular_inverse_sum.py new file mode 100644 index 0000000000..bf75b963e9 --- /dev/null +++ b/autogen/agentchat/contrib/captainagent/tools/math/modular_inverse_sum.py @@ -0,0 +1,20 @@ +def modular_inverse_sum(expressions, modulus): + """ + Calculates the sum of modular inverses of the given expressions modulo the specified modulus. + + Args: + expressions (list): A list of numbers for which the modular inverses need to be calculated. + modulus (int): The modulus value. + + Returns: + int: The sum of modular inverses modulo the specified modulus. + """ + from sympy import mod_inverse + + mod_sum = 0 + for number in expressions: + try: + mod_sum += mod_inverse(number, modulus) + except ValueError: + pass # If modular inverse does not exist, skip the term + return mod_sum % modulus diff --git a/autogen/agentchat/contrib/captainagent/tools/math/simplify_mixed_numbers.py b/autogen/agentchat/contrib/captainagent/tools/math/simplify_mixed_numbers.py new file mode 100644 index 0000000000..5f11ec279a --- /dev/null +++ b/autogen/agentchat/contrib/captainagent/tools/math/simplify_mixed_numbers.py @@ -0,0 +1,34 @@ +def simplify_mixed_numbers(numerator1, denominator1, numerator2, denominator2, whole_number1, whole_number2): + """ + Simplifies the sum of two mixed numbers and returns the result as a string in the format 'a b/c'. + + Args: + numerator1 (int): The numerator of the first fraction. + denominator1 (int): The denominator of the first fraction. + numerator2 (int): The numerator of the second fraction. + denominator2 (int): The denominator of the second fraction. + whole_number1 (int): The whole number part of the first mixed number. + whole_number2 (int): The whole number part of the second mixed number. + + Returns: + str: The simplified sum of the two mixed numbers as a string in the format 'a b/c'. + """ + from fractions import Fraction + + # Convert mixed numbers to improper fractions + fraction1 = whole_number1 * denominator1 + numerator1 + fraction2 = whole_number2 * denominator2 + numerator2 + # Create Fraction objects + frac1 = Fraction(fraction1, denominator1) + frac2 = Fraction(fraction2, denominator2) + # Calculate the sum + result = frac1 + frac2 + # Convert to mixed number + mixed_number = result.numerator // result.denominator + mixed_fraction_numerator = result.numerator % result.denominator + mixed_fraction = Fraction(mixed_fraction_numerator, result.denominator) + # Return as a string in the format 'a b/c' + if mixed_fraction_numerator > 0: + return f"{mixed_number} {mixed_fraction}" + else: + return str(mixed_number) diff --git a/autogen/agentchat/contrib/captainagent/tools/math/sum_of_digit_factorials.py b/autogen/agentchat/contrib/captainagent/tools/math/sum_of_digit_factorials.py new file mode 100644 index 0000000000..9710eff4f2 --- /dev/null +++ b/autogen/agentchat/contrib/captainagent/tools/math/sum_of_digit_factorials.py @@ -0,0 +1,13 @@ +def sum_of_digit_factorials(number): + """ + Calculates the sum of the factorial of each digit in a number, often used in problems involving curious numbers like 145. + + Args: + number (int): The number for which to calculate the sum of digit factorials. + + Returns: + int: The sum of the factorials of the digits in the given number. + """ + from math import factorial + + return sum(factorial(int(digit)) for digit in str(number)) diff --git a/autogen/agentchat/contrib/captainagent/tools/math/sum_of_primes_below.py b/autogen/agentchat/contrib/captainagent/tools/math/sum_of_primes_below.py new file mode 100644 index 0000000000..abed34f645 --- /dev/null +++ b/autogen/agentchat/contrib/captainagent/tools/math/sum_of_primes_below.py @@ -0,0 +1,13 @@ +def sum_of_primes_below(threshold): + """ + Calculates the sum of all prime numbers below a given threshold. + + Args: + threshold (int): The maximum number (exclusive) up to which primes are summed. + + Returns: + int: The sum of all prime numbers below the threshold. + """ + from sympy import primerange + + return sum(primerange(2, threshold)) diff --git a/autogen/agentchat/contrib/captainagent/tools/requirements.txt b/autogen/agentchat/contrib/captainagent/tools/requirements.txt new file mode 100644 index 0000000000..4c2d5f169d --- /dev/null +++ b/autogen/agentchat/contrib/captainagent/tools/requirements.txt @@ -0,0 +1,10 @@ +markdownify +arxiv +pymupdf +wikipedia-api +easyocr +python-pptx +openai-whisper +pandas +scipy +sentence-transformers diff --git a/autogen/agentchat/contrib/captainagent/tools/tool_description.tsv b/autogen/agentchat/contrib/captainagent/tools/tool_description.tsv new file mode 100644 index 0000000000..44cf2db262 --- /dev/null +++ b/autogen/agentchat/contrib/captainagent/tools/tool_description.tsv @@ -0,0 +1,34 @@ +docid document_content +1 math complex_numbers_product Calculates the product of a list of complex numbers. +2 math calculate_matrix_power Calculate the power of a given matrix. +3 math calculate_day_of_the_week Calculates the day of the week after a given number of days starting from a specified day. +4 math modular_inverse_sum Calculates the sum of modular inverses of the given expressions modulo the specified modulus. +5 math sum_of_digit_factorials Calculates the sum of the factorial of each digit in a number, often used in problems involving curious numbers like 145. +6 math sum_of_primes_below Calculates the sum of all prime numbers below a given threshold. +7 math evaluate_expression Evaluates a mathematical expression with support for floor function notation and power notation. +8 math compute_currency_conversion Compute the currency conversion of the given amount using the provided exchange rate. +9 math find_continuity_point Find the value 'a' that ensures the continuity of a piecewise function at a given point. +10 math simplify_mixed_numbers Simplifies the sum of two mixed numbers and returns the result as a string in the format 'a b/c'. +11 math fraction_to_mixed_numbers Simplifies a fraction to its lowest terms and returns it as a mixed number. +12 math calculate_fraction_sum Calculates the sum of two fractions and returns the result as a mixed number. +13 math count_distinct_permutations Counts the number of distinct permutations of a sequence where items may be indistinguishable. +14 math calculate_circle_area_from_diameter Calculate the area of a circle given its diameter. +15 math calculate_reflected_point Calculates the reflection point of a given point about the line y=x. +16 data_analysis explore_csv Reads a CSV file and prints the column names, shape, data types, and the first few lines of data. +17 data_analysis calculate_correlation Calculate the correlation between two columns in a CSV file. +18 data_analysis detect_outlier_zscore Detect outliers in a CSV file based on a specified column. The outliers are determined by calculating the z-score of the data points in the column. +19 data_analysis detect_outlier_iqr Detect outliers in a specified column of a CSV file using the IQR method. +20 data_analysis shapiro_wilk_test Perform the Shapiro-Wilk test on a specified column of a CSV file. +21 data_analysis calculate_skewness_and_kurtosis Calculate the skewness and kurtosis of a specified column in a CSV file. The kurtosis is calculated using the Fisher definition. The two metrics are computed using scipy.stats functions. +22 information_retrieval perform_web_search Perform a web search using Bing API. +23 information_retrieval transcribe_audio_file Transcribes the audio file located at the given file path. +24 information_retrieval arxiv_search Search for articles on arXiv based on the given query. +25 information_retrieval arxiv_download Downloads PDF files from ArXiv based on a list of arxiv paper IDs. +26 information_retrieval scrape_wikipedia_tables Scrapes Wikipedia tables based on a given URL and header keyword. +27 information_retrieval extract_pdf_text Extracts text from a specified page or the entire PDF file. +28 information_retrieval extract_pdf_image Extracts images from a PDF file and saves them to the specified output directory. +29 information_retrieval image_qa Perform question answering on an image using a pre-trained VQA model. +30 information_retrieval optical_character_recognition Perform optical character recognition (OCR) on the given image. +31 information_retrieval get_youtube_caption Retrieves the captions for a YouTube video. +32 information_retrieval youtube_download Downloads a YouTube video and returns the download link. +33 information_retrieval get_wikipedia_text Retrieves the text content of a Wikipedia page. It does not support tables and other complex formatting. diff --git a/notebook/captainagent_expert_library.json b/notebook/captainagent_expert_library.json index f2e67bc583..d4ef91ca1e 100644 --- a/notebook/captainagent_expert_library.json +++ b/notebook/captainagent_expert_library.json @@ -9,11 +9,6 @@ "name": "APIProcessing_Expert", "system_message": "## Your role\nAPIProcessing_Expert is a specialist in the realm of video content analysis and transcription, focused on harnessing the capabilities of advanced APIs to streamline video processing and data extraction. With a strong foundation in Python scripting, APIProcessing_Expert ensures efficient automation and sophisticated data handling. Their expertise in natural language processing is crucial for accurately understanding and transcribing spoken content. Moreover, APIProcessing_Expert possesses meticulous verification skills that underpin the reliability of every transcription process they oversee.\n\n## Task and skill instructions\n- As an APIProcessing_Expert, your primary task is to perform comprehensive video content analysis, which involves dissecting and understanding video data to extract meaningful insights. This complexity is managed by utilizing a variety of APIs that offer advanced video processing functionalities, enabling you to handle large volumes of video content with precision and ease.\n- Your proficiency in Python scripting is crucial, as it allows you to automate the video processing workflow, making data extraction and handling both efficient and scalable. Scripts that you write are expected to optimize the workflow, reduce manual intervention, and ensure that data is processed in a secure and organized manner.\n- A core aspect of your expertise lies in natural language processing (NLP), which is instrumental in understanding and transcribing spoken content within videos. Your role involves implementing NLP techniques to decipher language, accents, dialects, and semantic meaning, thereby transforming auditory information into accurate written text.\n- Another key skill in your role is your verification ability, which involves rigorously checking the accuracy of the transcriptions generated. You are tasked with ensuring that transcriptions are error-free and faithfully represent the spoken words in the video. This might include cross-referencing transcripts with source material, employing quality control measures, and making necessary corrections to uphold high standards of transcription fidelity.\n\n(Optional) In addition to these responsibilities, APIProcessing_Expert is expected to stay abreast of the latest developments in video processing technology, NLP, and API services to continuously enhance the quality and speed of the transcription service offered. They may also contribute to the development of new tools and techniques for improving video content analysis.\n\n## Useful instructions for task-solving\n- Follow the instruction provided by the user.\n- Solve the task step by step if you need to.\n- If a plan is not provided, explain your plan first.\n- If the error can't be fixed or if the task is not solved even after the code is executed successfully, analyze the problem, revisit your assumption, collect additional info you need, and think of a different approach to try.\n- When you find an answer, verify the answer carefully. \n- Include verifiable evidence in your response if possible.\n \n## How to use code?\n- Suggest python code (in a python coding block) or shell script (in a sh coding block) for the Computer_terminal to execute.\n- When using code, you must indicate the script type in the code block.\n- Do not suggest incomplete code which requires users to modify.\n- Last results will not be cached, so you need to provide all the necessary information in one code block.\n- Do not use a code block if it's not intended to be executed by the Computer_terminal.\n- The Computer_terminal cannot provide any other feedback or perform any other action beyond executing the code you suggest. \n- The Computer_terminal can't modify your code.\n- Use 'print' function for the output when relevant. \n- Check the execution result returned by the user.\n- Do not ask users to copy and paste the result.\n- If the result indicates there is an error, fix the error and output the code again. \n- If you want the Computer_terminal to save the code in a file before executing it, put # filename: inside the code block as the first line. " }, - { - "name": "WebNavigation_Expert", - "system_message": "## Your role \nAs the WebNavigation_Expert, your skills are crucial in navigating complex, multi-modal data across the internet. Your expertise is not just confined to finding relevant information but also includes analyzing and utilizing that information to solve intricate real-world problems through collaborative efforts.\n\n## Task and skill instructions\n- Your primary task involves engaging in comprehensive online research, parsing through various forms of data, and discerning the most pertinent information for the problems at hand.\n- Your skill set should encompass proficiency in reasoning and critical thinking to ensure that the information sourced is reliable and applicable. Equipped with the ability to handle multi-modal data proficiently, you will often be required to sift through text, images, videos, and datasets.\n- Collaboration is key in your role. You will be expected to work alongside a group of experts, contributing your unique insights while also verifying and refining each other's findings.\n- When necessary, you are to leverage your coding abilities to write Python scripts. Your coding skills should help automate parts of the research process, analyze data more efficiently, or scrape web content that could be essential for problem-solving.\n\n(Optional) Other information:\n\n- You must remain adaptable and ready to learn new web tools and technologies as the tasks may require the use of specific or specialized web platforms.\n- Attention to detail and the ability to document and communicate your research process clearly to the team are imperative, ensuring that solutions are not only reached but are also well-understood and replicable by peers.\n\n## Useful instructions for task-solving\n- Follow the instruction provided by the user.\n- Solve the task step by step if you need to.\n- If a plan is not provided, explain your plan first.\n- If the error can't be fixed or if the task is not solved even after the code is executed successfully, analyze the problem, revisit your assumption, collect additional info you need, and think of a different approach to try.\n- When you find an answer, verify the answer carefully. \n- Include verifiable evidence in your response if possible.\n \n## How to use code?\n- Suggest python code (in a python coding block) or shell script (in a sh coding block) for the Computer_terminal to execute.\n- When using code, you must indicate the script type in the code block.\n- Do not suggest incomplete code which requires users to modify.\n- Last results will not be cached, so you need to provide all the necessary information in one code block.\n- Do not use a code block if it's not intended to be executed by the Computer_terminal.\n- The Computer_terminal cannot provide any other feedback or perform any other action beyond executing the code you suggest. \n- The Computer_terminal can't modify your code.\n- Use 'print' function for the output when relevant. \n- Check the execution result returned by the user.\n- Do not ask users to copy and paste the result.\n- If the result indicates there is an error, fix the error and output the code again. \n- If you want the Computer_terminal to save the code in a file before executing it, put # filename: inside the code block as the first line. ", - "description": "The WebNavigation_Expert is skilled in thorough online research, critical analysis of diverse data, and applying their findings collaboratively to resolve complex problems, with proficiencies in coding for data handling and the flexibility to learn new technologies." - }, { "name": "Reasoning_Expert", "system_message": "## Your role\nAs a Reasoning_Expert, you are responsible for providing critical analytical skills and logical problem-solving techniques to approach complex, real-world issues. Your capacity to reason through challenging scenarios and synthesize information from various sources is crucial in devising effective solutions.\n\n## Task and skill instructions\n- You will be tasked with deciphering multifaceted problems that demand extensive reasoning capabilities. You should expect to interact with multi-modal data, which could include text, images, audio, and video, necessitating a comprehensive understanding and integration of diverse data formats.\n- Your skills will also be put to the test in browsing the web efficiently for information, facts, or data that might contribute to problem-solving. Your proficiency in using digital tools and platforms will be pivotal in facilitating your tasks. Additionally, you will be collaborating with a team of other experts. Hence, your ability to work in a team, cross-verify solutions, and give and receive constructive feedback is essential.\n- Given the complexity of the tasks, you may encounter scenarios where writing Python code can streamline or automate parts of the problem-solving process. You are expected to have the capability to write and understand Python code and apply it whenever necessary to aid in your analytical endeavors.\n\nYour unique role is integral to the team's success, as your reasoning strengths will provide the backbone for strategizing and driving forward towards practical solutions. It is through the collaborative synergy of various skills including yours that complex problems can be solved effectively.\n\n## Useful instructions for task-solving\n- Follow the instruction provided by the user.\n- Solve the task step by step if you need to.\n- If a plan is not provided, explain your plan first.\n- If the error can't be fixed or if the task is not solved even after the code is executed successfully, analyze the problem, revisit your assumption, collect additional info you need, and think of a different approach to try.\n- When you find an answer, verify the answer carefully. \n- Include verifiable evidence in your response if possible.\n \n## How to use code?\n- Suggest python code (in a python coding block) or shell script (in a sh coding block) for the Computer_terminal to execute.\n- When using code, you must indicate the script type in the code block.\n- Do not suggest incomplete code which requires users to modify.\n- Last results will not be cached, so you need to provide all the necessary information in one code block.\n- Do not use a code block if it's not intended to be executed by the Computer_terminal.\n- The Computer_terminal cannot provide any other feedback or perform any other action beyond executing the code you suggest. \n- The Computer_terminal can't modify your code.\n- Use 'print' function for the output when relevant. \n- Check the execution result returned by the user.\n- Do not ask users to copy and paste the result.\n- If the result indicates there is an error, fix the error and output the code again. \n- If you want the Computer_terminal to save the code in a file before executing it, put # filename: inside the code block as the first line. ", diff --git a/website/blog/2024-11-15-CaptainAgent/index.mdx b/website/blog/2024-11-15-CaptainAgent/index.mdx index 3c88da5893..d48c923d0d 100644 --- a/website/blog/2024-11-15-CaptainAgent/index.mdx +++ b/website/blog/2024-11-15-CaptainAgent/index.mdx @@ -4,6 +4,7 @@ authors: - jialeliu - LinxinS97 - jieyuz2 + - skzhang1 tags: [LLM, GPT, AutoBuild] --- ![Illustration of how CaptainAgent build a team](img/overall.png) @@ -31,39 +32,22 @@ Without the agent library and tool library, CaptainAgent will automatically gene from autogen.agentchat.contrib.captain_agent import CaptainAgent from autogen import UserProxyAgent -general_llm_config = { +llm_config = { "temperature": 0, - "config_list": autogen.config_list_from_json("OAI_CONFIG_LIST", filter_dict={"model": ["gpt-4-1106-preview"]}), -} - -nested_mode_config = { - "autobuild_init_config": { - "config_file_or_env": "OAI_CONFIG_LIST", - "builder_model": "gpt-4-1106-preview", - "agent_model": "gpt-4-1106-preview", - }, - # this is used to configure the autobuild building process - "autobuild_build_config": { - "default_llm_config": {"temperature": 1, "top_p": 0.95}, - "code_execution_config": {"timeout": 300, "work_dir": "groupchat", "last_n_messages": 1}, - "coding": True, - }, - "group_chat_config": {"max_round": 15}, - "group_chat_llm_config": general_llm_config.copy(), - "max_turns": 3, + "config_list": autogen.config_list_from_json("OAI_CONFIG_LIST", filter_dict={"model": ["gpt-4o-mini"]}), } ## build agents captain_agent = CaptainAgent( name="captain_agent", - llm_config=general_llm_config, - nested_mode_config=nested_mode_config, + llm_config=llm_config, + code_execution_config={"use_docker": False, "work_dir": "groupchat"}, ) user_proxy = UserProxyAgent( name="user_proxy", - code_execution_config={"use_docker": False}, + human_input_mode="NEVER" ) -query = "Let's play game of 24. Given 4 numbers, you need to use +, -, *, / to get 24. The numbers are 2, 2, 7, 12." +query = "Search arxiv for the latest paper about large language models and discuss its potential application in software engineering." result = user_proxy.initiate_chat(captain_agent, message=query) ``` @@ -83,25 +67,14 @@ general_llm_config = { } nested_mode_config = { - "autobuild_init_config": { - "config_file_or_env": "OAI_CONFIG_LIST", - "builder_model": "gpt-4-1106-preview", - "agent_model": "gpt-4-1106-preview", - }, - # this is used to configure the autobuild building process + # this is used to configure the building process "autobuild_build_config": { - "default_llm_config": {"temperature": 1, "top_p": 0.95}, - "code_execution_config": {"timeout": 300, "work_dir": "groupchat", "last_n_messages": 1}, - "coding": True, "library_path": "captainagent_expert_library.json" }, + # this is used to configure tool library "autobuild_tool_config": { "tool_root": "default", # this will use the tool library we provide - "retriever": "all-mpnet-base-v2", - }, - "group_chat_config": {"max_round": 10}, - "group_chat_llm_config": general_llm_config.copy(), - "max_turns": 3 + } } ## build agents @@ -109,12 +82,13 @@ captain_agent = CaptainAgent( name="captain_agent", llm_config=general_llm_config, nested_mode_config=nested_mode_config, + code_execution_config={"use_docker": False, "work_dir": "groupchat"}, ) user_proxy = UserProxyAgent( name="user_proxy", - code_execution_config={"use_docker": False}, + human_input_mode="NEVER" ) -query = 'Find the stock price of Microsoft in the past 1 year and plot a line chart to show the trend. Save the line chart as "microsoft_stock_price.png".' +query = "Search arxiv for the latest paper about large language models and discuss its potential application in software engineering." result = user_proxy.initiate_chat(captain_agent, message=query) ``` @@ -125,7 +99,7 @@ Please refer to our [paper](https://arxiv.org/pdf/2405.19425) for more details a If you find this blog useful, please consider citing: ``` -@misc{song2024adaptiveinconversationteambuilding, +@article{song2024adaptive, title={Adaptive In-conversation Team Building for Language Model Agents}, author={Linxin Song and Jiale Liu and Jieyu Zhang and Shaokun Zhang and Ao Luo and Shijian Wang and Qingyun Wu and Chi Wang}, year={2024}, diff --git a/website/docs/topics/captainagent/agent_library.mdx b/website/docs/topics/captainagent/agent_library.mdx index c1d06a19e5..0864a131c7 100644 --- a/website/docs/topics/captainagent/agent_library.mdx +++ b/website/docs/topics/captainagent/agent_library.mdx @@ -1,8 +1,11 @@ # Agent Library +## Library Structure A simple agent in agent library requires three fields: - description: This describes the functionality of the agent. - system_message: This provides the system message of the agent for initialization. - name: The name of the agent. +- model (optional): The backbone model of the agent. +- tags (optional): The tag of the backbone model to use. An example of the agent library is as follows. ``` @@ -16,7 +19,7 @@ An example of the agent library is as follows. We provide a predefined agent library in `notebook/captainagent_expert_library.json`. ## Adding advanced agents -We also support adding agents with advanced capability to the library, aside from agents with different system message. Just need to add a `model_path` field and any other arguments that needs to pass while initialization. For example, to add a WebSurferAgent: +We also support adding agents with advanced capability to the library, aside from agents with customized system message. Just need to add a `agent_path` field and any other arguments that needs to pass while initialization. For example, to add a WebSurferAgent: ``` [ @@ -37,3 +40,13 @@ We also support adding agents with advanced capability to the library, aside fro } ] ``` + +Under the hood, the code will import the module according to the `agent_path`. The core code that implements this feature is: +``` +module_path, model_class_name = agent_path.replace("/", ".").rsplit(".", 1) +module = importlib.import_module(module_path) +model_class = getattr(module, model_class_name) +agent = model_class(**kwargs) +``` + +Make sure the correct path is provided to the config according to the code. diff --git a/website/docs/topics/captainagent/configurations.mdx b/website/docs/topics/captainagent/configurations.mdx new file mode 100644 index 0000000000..a834df3bef --- /dev/null +++ b/website/docs/topics/captainagent/configurations.mdx @@ -0,0 +1,79 @@ +# Details of all the available configurations +Captain Agent requires `nested_config` for configuration. Below is an example, we will break it down and provide a detailed explanation. + +``` +nested_config = { + "autobuild_init_config": { + "config_file_or_env": "OAI_CONFIG_LIST", + "builder_model": "gpt-4o", + "agent_model": "gpt-4o", + }, + "autobuild_build_config": { + "default_llm_config": {"temperature": 1, "top_p": 0.95, "max_tokens": 1500, "seed": 52}, + "code_execution_config": {"timeout": 300, "work_dir": "groupchat", "last_n_messages": 1}, + "coding": True, + "library_path_or_json": "captainagent_expert_library.json", + }, + "autobuild_tool_config": { + "tool_root": "default", # this will use the tool library we provide + "retriever": "all-mpnet-base-v2", + }, + "group_chat_config": {"max_round": 15}, + "group_chat_llm_config": llm_config.copy(), +} +``` + + +## `autobuild_init_config` +This section is used to configure the initial setup of autobuild. + +### `config_file_or_env` +Configures the path to API key config. Defaults to `OAI_CONFIG_LIST`. + +### `builder_model` +Configures the backbone of agent builder. The builder is used for agent selection from the library. Defaults to `gpt-4o-mini`. + +### `agent_model` +Configures the backbone of agents in the group chat. Defaults to `gpt-4o-mini`. + +### `kwargs` +`autobuild_init_config` takes in arguments from `AgentBuilder.__init__()`. Check the full list of arguments [here](https://github.com/ag2ai/ag2/blob/main/autogen/agentchat/contrib/agent_builder.py#L181). + +## `autobuild_build_config` +This section is used to configure the building process of autobuild. + +### `default_llm_config` +Configures the default parameters for the builder during the autobuild process. Defaults to `{"temperature": 1, "top_p": 0.95, "max_tokens": 2048}`. `config_list` is **not* needed here. + +### `code_execution_config` +Configures how the user proxy executes code within the nested chat. Defaults to `{"timeout": 300, "work_dir": "groupchat", "last_n_messages": 1, "use_docker": False}`. Full configuration docs [here](https://ag2ai.github.io/ag2/docs/reference/agentchat/user_proxy_agent). + +### `coding` +Enables or disables whether to add the user proxy in the nested chat. Defaults to `True`. + +### `library_path_or_json` +Specifies the path to the agent library file. For details on customizing your own agent library, refer to the [agent library page](https://ag2ai.github.io/ag2/docs/topics/captainagent/agent_library). + +### `kwargs` +`autobuild_build_config` takes in arguments for `AgentBuilder.build()`. Check the full list of arguments [here](https://github.com/ag2ai/ag2/blob/main/autogen/agentchat/contrib/agent_builder.py#L365). + +## `autobuild_tool_config` +This section is used to configure how to retrieve the tool library for the agents in the group chat. For details on customizing your own tool library, refer to the [tool library page](https://ag2ai.github.io/ag2/docs/topics/captainagent/tool_library). + +### `tool_root` +Specifies the root directory of the tool library. When set to `'default'`, it will load the default library we provide. + +### `retriever` +Configures the retriever model used for fetching relevant tools from the library. Defaults to `all-mpnet-base-v2`. The value is valid as long as it is found by [SentenceTransformers library](https://huggingface.co/sentence-transformers). + +## `group_chat_config` +This section is used to configure the group chat settings. + +### `max_round` +Specifies the maximum number of rounds in a group chat session. Defaults to `10`. + +### `kwargs` +`group_chat_config` also takes in arguments for initializing `autogen.GroupChat`. Refer to all the configurables [here](https://ag2ai.github.io/ag2/docs/reference/agentchat/groupchat). + +## `group_chat_llm_config` +Specifies the LLM config of the `GroupChatManager`. diff --git a/website/docs/topics/captainagent/tool_library.mdx b/website/docs/topics/captainagent/tool_library.mdx index ad4edc0b07..d308642ba2 100644 --- a/website/docs/topics/captainagent/tool_library.mdx +++ b/website/docs/topics/captainagent/tool_library.mdx @@ -1,11 +1,14 @@ # Tool Library In CaptainAgent, tools are in the form of python functions. The agents can write code to import functions and call them according to their needs. This can significantly enhance the functionality and capabilities of the agents. -We provide a list of tools that comes with the release of CaptainAgent. +We provide a list of tools that comes with the release of CaptainAgent. Its full content can be found [here](https://github.com/ag2ai/ag2/tree/main/autogen/agentchat/contrib/captainagent/tools/README.md) ## Using the Built-in Tool Library ### Install dependencies -First install the requirements for running tools via pip. The requirements file is located in `autogen/agentchat/contrib/captainagent/tools/requirements.txt`. +First install the requirements for running the tools via pip. +``` +pip install -r https://raw.githubusercontent.com/ag2ai/ag2/refs/heads/main/autogen/agentchat/contrib/captainagent/tools/requirements.txt +``` ### Subscribe to Certain APIs To use the provided built-in tools, it is required to obtain a Bing Search API key and RapidAPI key. @@ -57,3 +60,42 @@ After candidates are retrieved, the agent's system message will be updated with A user proxy with the ability to execute the code will be added to the nested chat. Under the hood, this is achieved by leveraging the [User Defined Functions](/docs/topics/code-execution/user-defined-functions) feature. A `LocalCommandLineCodeExecutor` equipped with all the functions serves as code executor for the user proxy. + +# Building your own Tool Library +Building your own tool library is simple, follow the same directory layout as the one we provided. The python files should be follow the layout `tools/{category}/{tool_name}.py`. +The tool you'd like to add should be imported in the following fashion: + +```python +from tools.category.tool_name import tool_name +``` + +The `tool_description.tsv` file should be a tab-separated file with two columns `docid` and `document_content`. The `document_content` should always follow +the format `"category tool_name tool_description"`. The category and tool_name should always be one word with no space in between. The document_content is +used to calculate semantic similarity for retrieval. + +Once your library is ready, specify the path in `nested_config` of CaptainAgent. +```python +nested_config = { + ... + "autobuild_tool_config": { + "tool_root": "Your tool root here", + "retriever": "all-mpnet-base-v2", # This is the default embedding model, you can reove this line if you are not intending to change it + }, + ... +} +``` + +By following these steps, you can easily customize the tools library of CaptainAgent and empower your agents with new tools and capabilities. + +## Note on Adding Customized Tools +Due to the implementation of [User Defined Functions](/docs/topics/code-execution/user-defined-functions), when writing your own tool, you need to write your import statement in the function definition. For example, adding an audio transcription tool: + +```python +def audio_transcription(audio_file): + import whisper + model = whisper.load_model("base") + result = model.transcribe(audio_file) + return result["text"] +``` + +There is also decorator `with_requirements` that may become handy for [adding dependencies](/docs/topics/code-execution/user-defined-functions).