Skip to content

Commit

Permalink
Merge pull request #54 from ag2ai/captain1
Browse files Browse the repository at this point in the history
CaptainAgent PR part 3
  • Loading branch information
qingyun-wu authored Nov 21, 2024
2 parents 344c1b2 + 5625e1b commit 996f923
Show file tree
Hide file tree
Showing 42 changed files with 1,218 additions and 57 deletions.
19 changes: 10 additions & 9 deletions autogen/agentchat/contrib/captainagent.py
Original file line number Diff line number Diff line change
Expand Up @@ -138,6 +138,7 @@ def __init__(
human_input_mode: Optional[str] = "NEVER",
code_execution_config: Optional[Union[Dict, Literal[False]]] = False,
nested_config: Optional[Dict] = None,
agent_config_save_path: Optional[str] = None,
description: Optional[str] = DEFAULT_DESCRIPTION,
**kwargs,
):
Expand All @@ -154,6 +155,9 @@ def __init__(
max_consecutive_auto_reply (int): the maximum number of consecutive auto replies.
default to None (no limit provided, class attribute MAX_CONSECUTIVE_AUTO_REPLY will be used as the limit in this case).
The limit only plays a role when human_input_mode is not "ALWAYS".
nested_config (dict): the configuration for the nested chat instantiated by CaptainAgent.
A full list of keys and their functionalities can be found in [docs](https://ag2ai.github.io/ag2/docs/topics/captainagent/configurations).
agent_config_save_path (str): the path to save the generated or retrieved agent configuration.
**kwargs (dict): Please refer to other kwargs in
[ConversableAgent](https://github.com/ag2ai/ag2/blob/main/autogen/agentchat/conversable_agent.py#L74).
"""
Expand All @@ -174,17 +178,14 @@ def __init__(
if nested_config["group_chat_llm_config"] is None:
nested_config["group_chat_llm_config"] = llm_config.copy()

self.assistant = ConversableAgent(
name="CaptainAgent",
system_message=system_message,
llm_config=llm_config,
)
self.assistant = ConversableAgent(name="CaptainAgent", system_message=system_message, llm_config=llm_config)
self.assistant.update_tool_signature(self.AUTOBUILD_TOOL, is_remove=False)

self.executor = CaptainUserProxyAgent(
name="Expert_summoner",
code_execution_config=code_execution_config,
nested_config=nested_config,
is_termination_msg=lambda x: x.get("content", "") and "terminate" in x.get("content", "").lower(),
human_input_mode="NEVER",
)

Expand All @@ -201,7 +202,6 @@ def __init__(
}
],
trigger=UserProxyAgent,
# reply_func_from_nested_chats=None,
position=0,
)

Expand Down Expand Up @@ -230,7 +230,8 @@ class CaptainUserProxyAgent(ConversableAgent):

CONVERSATION_REVIEW_PROMPT = """# Your task
Briefly summarize the conversation history derived from an experts' group chat by following the answer format.
If you found non-trivial errors or issues in the conversation, point it out with a detailed reason and mark the "Need double-check" as "Yes." if you think it is worth further verification.
If you found non-trivial errors or issues in the conversation, point it out with a detailed reason, if you think it is worth further verification, mark the "Need double-check" as "Yes"
If you find the conversation ends with TERMINATE and the task is solved, this is normal situation, you can mark the "Need double-check" as "No".
# Conversation history:
{chat_history}
Expand Down Expand Up @@ -373,7 +374,7 @@ def _run_autobuild(self, group_name: str, execution_task: str, building_task: st
tool_root_dir = self.tool_root_dir
tool_builder = ToolBuilder(
corpus_path=os.path.join(tool_root_dir, "tool_description.tsv"),
retriever=self._nested_config["autobuild_tool_config"]["retriever"],
retriever=self._nested_config["autobuild_tool_config"].get("retriever", "all-mpnet-base-v2"),
)
for idx, agent in enumerate(agent_list):
if idx == len(self.tool_history[group_name]):
Expand Down Expand Up @@ -404,7 +405,7 @@ def _run_autobuild(self, group_name: str, execution_task: str, building_task: st
# Retrieve and build tools based on the smilarities between the skills and the tool description
tool_builder = ToolBuilder(
corpus_path=os.path.join(tool_root_dir, "tool_description.tsv"),
retriever=self._nested_config["autobuild_tool_config"]["retriever"],
retriever=self._nested_config["autobuild_tool_config"].get("retriever", "all-mpnet-base-v2"),
)
for idx, skill in enumerate(skills):
tools = tool_builder.retrieve(skill)
Expand Down
44 changes: 44 additions & 0 deletions autogen/agentchat/contrib/captainagent/tools/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
# Introduction

This directory contains a library of manually created python tools. These tools have three categories: math, data_analysis and information_retrieval.

# Directory Layout
```
tools
├── README.md
├── data_analysis
│ ├── calculate_correlation.py
│ └── ...
├── information_retrieval
│ ├── arxiv_download.py
│ ├── arxiv_search.py
│ └── ...
├── math
│ ├── calculate_circle_area_from_diameter.py
│ └── ...
└── tool_description.tsv
```

Tools can be imported from `tools/{category}/{tool_name}.py` with exactly the same function name.

`tool_description.tsv` contains descriptions of tools for retrieval.

# How to use
Some tools require Bing Search API key and RapidAPI key. For Bing API, you can read more about how to get an API on the [Bing Web Search API](https://www.microsoft.com/en-us/bing/apis/bing-web-search-api) page. For RapidAPI, you can [sign up](https://rapidapi.com/auth/sign-up) and subscribe to these two links([link1](https://rapidapi.com/solid-api-solid-api-default/api/youtube-transcript3), [link2](https://rapidapi.com/420vijay47/api/youtube-mp3-downloader2)). These apis have free billing options and there is no need to worry about extra costs.

To install the requirements for running tools, use pip install.
```bash
pip install -r autogen/agentchat/contrib/captainagent/tools/requirements.txt
```

Whenever you run the tool-related code, remember to export the api keys to system variables.
```bash
export BING_API_KEY=""
export RAPID_API_KEY=""
```
or
```python
import os
os.environ["BING_API_KEY"] = ""
os.environ["RAPID_API_KEY"] = ""
```
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
def calculate_correlation(csv_path: str, column1: str, column2: str, method: str = "pearson") -> float:
"""
Calculate the correlation between two columns in a CSV file.
Args:
csv_path (str): The path to the CSV file.
column1 (str): The name of the first column.
column2 (str): The name of the second column.
method (str or callable, optional): The method used to calculate the correlation.
- 'pearson' (default): Pearson correlation coefficient.
- 'kendall': Kendall Tau correlation coefficient.
- 'spearman': Spearman rank correlation coefficient.
- callable: A custom correlation function that takes two arrays and returns a scalar.
Returns:
float: The correlation coefficient between the two columns.
"""
import pandas as pd

# Read the CSV file into a pandas DataFrame
df = pd.read_csv(csv_path)

# Select the specified columns
selected_columns = df[[column1, column2]]

# Calculate the correlation based on the specified method
if method == "pearson":
correlation = selected_columns.corr().iloc[0, 1]
elif method == "kendall":
correlation = selected_columns.corr(method="kendall").iloc[0, 1]
elif method == "spearman":
correlation = selected_columns.corr(method="spearman").iloc[0, 1]
elif callable(method):
correlation = selected_columns.corr(method=method).iloc[0, 1]
else:
raise ValueError("Invalid correlation method. Please choose 'pearson', 'kendall', 'spearman', or a callable.")

return correlation
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
def calculate_skewness_and_kurtosis(csv_file: str, column_name: str) -> tuple:
"""
Calculate the skewness and kurtosis of a specified column in a CSV file. The kurtosis is calculated using the Fisher definition.
The two metrics are computed using scipy.stats functions.
Args:
csv_file (str): The path to the CSV file.
column_name (str): The name of the column to calculate skewness and kurtosis for.
Returns:
tuple: (skewness, kurtosis)
"""
import pandas as pd
from scipy.stats import kurtosis, skew

# Read the CSV file into a pandas DataFrame
df = pd.read_csv(csv_file)

# Extract the specified column
column = df[column_name]

# Calculate the skewness and kurtosis
skewness = skew(column)
kurt = kurtosis(column)

return skewness, kurt
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
def detect_outlier_iqr(csv_file: str, column_name: str):
"""
Detect outliers in a specified column of a CSV file using the IQR method.
Args:
csv_file (str): The path to the CSV file.
column_name (str): The name of the column to detect outliers in.
Returns:
list: A list of row indices that correspond to the outliers.
"""
import pandas as pd

# Read the CSV file into a pandas DataFrame
df = pd.read_csv(csv_file)

# Calculate the quartiles and IQR for the specified column
q1 = df[column_name].quantile(0.25)
q3 = df[column_name].quantile(0.75)
iqr = q3 - q1

# Find the outliers based on the defined criteria
outliers = df[(df[column_name] < q1 - 1.5 * iqr) | (df[column_name] > q3 + 1.5 * iqr)]

# Return the row indices of the outliers
return outliers.index.tolist()
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
def detect_outlier_zscore(csv_file, column_name, threshold=3):
"""
Detect outliers in a CSV file based on a specified column. The outliers are determined by calculating the z-score of the data points in the column.
Args:
csv_file (str): The path to the CSV file.
column_name (str): The name of the column to calculate z-scores for.
threshold (float, optional): The threshold value for determining outliers. By default set to 3.
Returns:
list: A list of row indices where the z-score is above the threshold.
"""
import numpy as np
import pandas as pd

# Read the CSV file into a pandas DataFrame
df = pd.read_csv(csv_file)

# Calculate the z-score for the specified column
z_scores = np.abs((df[column_name] - df[column_name].mean()) / df[column_name].std())

# Find the row indices where the z-score is above the threshold
outlier_indices = np.where(z_scores > threshold)[0]

# Return the row indices of the outliers
return outlier_indices
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
def explore_csv(file_path, num_lines=5):
"""
Reads a CSV file and prints the column names, shape, data types, and the first few lines of data.
Args:
file_path (str): The path to the CSV file.
num_lines (int, optional): The number of lines to print. Defaults to 5.
"""
import pandas as pd

df = pd.read_csv(file_path)
header = df.columns
print("Columns:")
print(", ".join(header))
print("Shape:", df.shape)
print("Data Types:")
print(df.dtypes)
print("First", num_lines, "lines:")
print(df.head(num_lines))
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
from autogen.coding.func_with_reqs import with_requirements


@with_requirements(["pandas", "scipy"])
def shapiro_wilk_test(csv_file, column_name):
"""
Perform the Shapiro-Wilk test on a specified column of a CSV file.
Args:
csv_file (str): The path to the CSV file.
column_name (str): The name of the column to perform the test on.
Returns:
float: The p-value resulting from the Shapiro-Wilk test.
"""
import pandas as pd
from scipy.stats import shapiro

# Read the CSV file into a pandas DataFrame
df = pd.read_csv(csv_file)

# Extract the specified column as a numpy array
column_data = df[column_name].values

# Perform the Shapiro-Wilk test
_, p_value = shapiro(column_data)

return p_value
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
import arxiv

from autogen.coding.func_with_reqs import with_requirements


@with_requirements(["arxiv"], ["arxiv"])
def arxiv_download(id_list: list, download_dir="./"):
"""
Downloads PDF files from ArXiv based on a list of arxiv paper IDs.
Args:
id_list (list): A list of paper IDs to download. e.g. [2302.00006v1]
download_dir (str, optional): The directory to save the downloaded PDF files. Defaults to './'.
Returns:
list: A list of paths to the downloaded PDF files.
"""
paths = []
for paper in arxiv.Client().results(arxiv.Search(id_list=id_list)):
path = paper.download_pdf(download_dir, filename=paper.get_short_id() + ".pdf")
paths.append(path)
print("Paper id:", paper.get_short_id(), "Downloaded to:", path)
return paths
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
import arxiv

from autogen.coding.func_with_reqs import with_requirements


@with_requirements(["arxiv"], ["arxiv"])
def arxiv_search(query, max_results=10, sortby="relevance"):
"""
Search for articles on arXiv based on the given query.
Args:
query (str): The search query.
max_results (int, optional): The maximum number of results to retrieve. Defaults to 10.
sortby (str, optional): The sorting criterion for the search results. Can be 'relevance' or 'submittedDate'. Defaults to 'relevance'.
Returns:
list: A list of dictionaries containing information about the search results. Each dictionary contains the following keys:
- 'title': The title of the article.
- 'authors': The authors of the article.
- 'summary': The summary of the article.
- 'entry_id': The entry ID of the article.
- 'doi': The DOI of the article (If applicable).
- 'published': The publication date of the article in the format 'Y-M'.
"""

def get_author(r):
return ", ".join(a.name for a in r.authors)

criterion = {"relevance": arxiv.SortCriterion.Relevance, "submittedDate": arxiv.SortCriterion.SubmittedDate}[sortby]

client = arxiv.Client()
search = arxiv.Search(query=query, max_results=max_results, sort_by=criterion)
res = []
results = client.results(search)
for r in results:
print("Entry id:", r.entry_id)
print("Title:", r.title)
print("Authors:", get_author(r))
print("DOI:", r.doi)
print("Published:", r.published.strftime("%Y-%m"))
# print("Summary:", r.summary)
res.append(
{
"title": r.title,
"authors": get_author(r),
"summary": r.summary,
"entry_id": r.entry_id,
"doi": r.doi,
"published": r.published.strftime("%Y-%m"),
}
)
return res
Loading

0 comments on commit 996f923

Please sign in to comment.