Skip to content

Commit

Permalink
tools deprecation and new format support (#89)
Browse files Browse the repository at this point in the history
* tools deprecation and new format support

* Add tool for arxiv (#88)

* enable advanced=True for Google search

* add ConfigurableAction in base

* yahoo tool added

* pubmed tool added

---------

Co-authored-by: lucifertrj <[email protected]>
Co-authored-by: Adhvaith Hundi <[email protected]>
  • Loading branch information
3 people authored Nov 7, 2024
1 parent b41648a commit ad65789
Show file tree
Hide file tree
Showing 16 changed files with 529 additions and 147 deletions.
21 changes: 18 additions & 3 deletions src/openagi/actions/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@

from openagi.llms.base import LLMBaseModel
from openagi.memory.memory import Memory

from typing import ClassVar, Dict, Any

class BaseAction(BaseModel):
"""Base Actions class to be inherited by other actions, providing basic functionality and structure."""
Expand All @@ -24,8 +24,7 @@ class BaseAction(BaseModel):
)

def execute(self):
"""Executes the action
"""
"""Executes the action"""
raise NotImplementedError("Subclasses must implement this method.")

@classmethod
Expand All @@ -43,3 +42,19 @@ def cls_doc(cls):
if field_name not in default_exclude_doc_fields
},
}

class ConfigurableAction(BaseAction):
config: ClassVar[Dict[str, Any]] = {}

@classmethod
def set_config(cls, *args, **kwargs):
if args:
if len(args) == 1 and isinstance(args[0], dict):
cls.config.update(args[0])
else:
raise ValueError("If using positional arguments, a single dictionary must be provided.")
cls.config.update(kwargs)

@classmethod
def get_config(cls, key: str, default: Any = None) -> Any:
return cls.config.get(key, default)
1 change: 0 additions & 1 deletion src/openagi/actions/files.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
import logging
from pathlib import Path
from typing import Dict, Optional

from pydantic import Field

from openagi.actions.base import BaseAction
Expand Down
57 changes: 57 additions & 0 deletions src/openagi/actions/tools/arxiv_search.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
from openagi.actions.base import ConfigurableAction
from pydantic import Field
from openagi.exception import OpenAGIException
from typing import ClassVar, Dict, Any

try:
import arxiv
except ImportError:
raise OpenAGIException("Install arxiv with cmd `pip install arxiv`")


class ArxivSearch(ConfigurableAction):
"""
Arxiv Search is a tool used to search articles in Physics, Mathematics, Computer Science, Quantitative Biology, Quantitative Finance, and Statistics
"""
query: str = Field(..., description="User query or question")
max_results: int = Field(10, description="Total results, in int, to be executed from the search. Defaults to 10.")

def execute(self):
search = arxiv.Search(
query = self.query,
max_results = self.max_results,
)
client = arxiv.Client()
results = client.results(search)
meta_data = ""
for result in results:
meta_data += f"title : {result.title}\n "
meta_data += f"summary : {result.summary}\n "
meta_data += f"published : {result.published}\n "
meta_data += f"authors : {result.authors}\n "
meta_data += f"pdf_url : {result.pdf_url}\n "
meta_data += f"entry_id : {result.entry_id}\n\n "
return meta_data.strip()























16 changes: 3 additions & 13 deletions src/openagi/actions/tools/ddg_search.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,11 @@
import json
from typing import Any
from openagi.actions.base import BaseAction
from openagi.actions.base import ConfigurableAction
from pydantic import Field
from duckduckgo_search import DDGS
import logging

class DuckDuckGoSearch(BaseAction):
class DuckDuckGoSearch(ConfigurableAction):
"""Use this Action to search DuckDuckGo for a query."""

name: str = Field(
Expand Down Expand Up @@ -39,14 +39,4 @@ def execute(self):
self.query,
max_results=self.max_results,
)
return json.dumps(result)


class DuckDuckGoNewsSearch(DuckDuckGoSearch):
"""Use this Action to get the latest news from DuckDuckGo."""

def execute(self):
ddgs = self._get_ddgs()
return json.dumps(
ddgs.news(keywords=self.query, max_results=(self.max_results)), indent=2
)
return json.dumps(result)
91 changes: 58 additions & 33 deletions src/openagi/actions/tools/document_loader.py
Original file line number Diff line number Diff line change
@@ -1,39 +1,64 @@
from openagi.actions.base import BaseAction
from typing import Any
from openagi.actions.base import ConfigurableAction
from langchain_community.document_loaders import TextLoader
from langchain_community.document_loaders.csv_loader import CSVLoader
from langchain_community.document_loaders.pdf import PyPDFLoader
from pydantic import Field

class TextLoaderTool(ConfigurableAction):
"""Load content from a text file.
This action loads and processes content from .txt files, combining
metadata and content into a single context string.
"""

def execute(self) -> str:
file_path: str = self.get_config('filename')
loader = TextLoader(file_path=file_path)
documents = loader.load()

if not documents:
return ""

page_content = documents[0].page_content
source = documents[0].metadata["source"]
return f"{source} {page_content}"

class DocumentLoader(BaseAction):
"""Use this Action to extract content from documents"""
class PDFLoaderTool(ConfigurableAction):
"""Load content from a PDF file.
This action loads and processes content from .pdf files, combining
metadata and content into a single context string.
"""

def execute(self) -> str:
file_path: str = self.get_config('filename')
loader = PyPDFLoader(file_path=file_path)
documents = loader.load()

if not documents:
return ""

page_content = documents[0].page_content
source = documents[0].metadata["source"]
return f"{source} {page_content}"

file_path: str = Field(
default_factory=str,
description="File from which content is extracted",
)

def text_loader(self):
loader = TextLoader(file_path=self.file_path)
data = loader.load()
page_content = data[0].page_content
meta_data = data[0].metadata["source"]
context = meta_data + " " + page_content
return context

def csv_loader(self):
content = ""
loader = CSVLoader(file_path=self.file_path)
data = loader.load()

for i in range(len(data)):
row_content = data[i].page_content
row_no = data[i].metadata["row"]
content += "row_no" + " " + str(row_no) + ": " + str(row_content)
return content

def execute(self):
if self.file_path.endswith(".txt"):
context = self.text_loader()
elif self.file_path.endswith(".csv"):
context = self.csv_loader()
return context
class CSVLoaderTool(ConfigurableAction):
"""Load content from a CSV file.
This action loads and processes content from .csv files, combining
row numbers and content into a formatted string representation.
"""

def execute(self) -> str:
file_path: str = self.get_config('filename')
loader = CSVLoader(file_path=file_path)
documents = loader.load()

content_parts = []
for idx, doc in enumerate(documents):
row_content = doc.page_content
row_number = doc.metadata["row"]
content_parts.append(f"row_no {row_number}: {row_content}")

return "".join(content_parts)
75 changes: 54 additions & 21 deletions src/openagi/actions/tools/exasearch.py
Original file line number Diff line number Diff line change
@@ -1,30 +1,63 @@
from openagi.actions.base import BaseAction
import os
from openagi.actions.base import ConfigurableAction
from pydantic import Field
from openagi.exception import OpenAGIException
import os
import warnings

try:
from exa_py import Exa
from exa_py import Exa
except ImportError:
raise OpenAGIException("Install Exa Py with cmd `pip install exa_py`")
raise OpenAGIException("Install Exa Py with cmd `pip install exa_py`")

class ExaSearch(BaseAction):
"""
Exa Search is a tool used when user needs to ask the question in terms of query to get response
class ExaSearch(ConfigurableAction):
"""Exa Search tool for querying and retrieving information.
This action uses the Exa API to perform searches and retrieve relevant content
based on user queries. Requires an API key to be configured before use.
"""
query: str = Field(..., description="User query or question ")

def execute(self):
api_key = os.environ["EXA_API_KEY"]
query: str = Field(..., description="User query or question")

def __init__(self, **data):
super().__init__(**data)
self._check_deprecated_usage()

def _check_deprecated_usage(self):
if 'EXA_API_KEY' in os.environ and not self.get_config('api_key'):
warnings.warn(
"Using environment variables for API keys is deprecated and will be removed in a future version. "
"Please use ExaSearch.set_config(api_key='your_key') instead of setting environment variables.",
DeprecationWarning,
stacklevel=2
)
self.set_config(api_key=os.environ['EXA_API_KEY'])


def execute(self) -> str:
api_key: str = self.get_config('api_key')
if not api_key:
if 'EXA_API_KEY' in os.environ:
api_key = os.environ['EXA_API_KEY']
warnings.warn(
"Using environment variables for API keys is deprecated and will be removed in a future version. "
"Please use ExaSearch.set_config(api_key='your_key') instead of setting environment variables.",
DeprecationWarning,
stacklevel=2
)
else:
raise OpenAGIException("API KEY NOT FOUND. Use ExaSearch.set_config(api_key='your_key') to set the API key.")

exa = Exa(api_key=api_key)
results = exa.search_and_contents(
self.query,
text={"max_characters": 512},
)

exa = Exa(api_key = api_key)
results = exa.search_and_contents(self.query,
text={"max_characters": 512},
)
content = ""
for idx in results.results:
content += idx.text.strip()
content_parts = []
for result in results.results:
content_parts.append(result.text.strip())

content = content.replace("<|endoftext|>","")
content = content.replace("NaN","")
return content
content = "".join(content_parts)
return (
content.replace("<|endoftext|>", "")
.replace("NaN", "")
)
38 changes: 38 additions & 0 deletions src/openagi/actions/tools/google_search_tool.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
from openagi.actions.base import ConfigurableAction
from pydantic import Field
from openagi.exception import OpenAGIException
import logging

try:
from googlesearch import search
except ImportError:
raise OpenAGIException("Install googlesearch-python with cmd `pip install googlesearch-python`")

class GoogleSearchTool(ConfigurableAction):
"""
Google Search is a tool used for scraping the Google search engine. Extract information from Google search results.
"""
query: str = Field(..., description="User query or question ")

max_results: int = Field(
default=10,
description="Total results, in int, to be executed from the search. Defaults to 10. The limit should be 10 and not execeed more than 10",
)

lang: str = Field(
default="en",
description = "specify the langauge for your search results."
)

def execute(self):
if self.max_results > 15:
logging.info("Over threshold value... Limiting the Max results to 15")
self.max_results = 15

context = ""
search_results = search(self.query,num_results=self.max_results,lang=self.lang,advanced=True)
for info in search_results:
context += f"Title: {info.title}. Description: {info.description}. URL: {info.url}"

return context

Loading

0 comments on commit ad65789

Please sign in to comment.