Skip to content

Commit

Permalink
Merge branch 'main' into refactoring-parse-html
Browse files Browse the repository at this point in the history
  • Loading branch information
PeriniM authored Feb 28, 2024
2 parents 89d1933 + a46f45d commit 5735f3e
Show file tree
Hide file tree
Showing 10 changed files with 169 additions and 14 deletions.
7 changes: 5 additions & 2 deletions scrapegraphai/nodes/fetch_html_node.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,9 +33,12 @@ class FetchHTMLNode(BaseNode):
to succeed.
"""

def __init__(self, node_name: str, node_type: str = "node"):
def __init__(self, node_name: str, node_type: str = "fetch_node"):
"""
Initializes the FetchHTMLNode with a node name and node type.
Arguments:
node_name (str): name of the node
node_type (str, optional): type of the node
"""
super().__init__(node_name, node_type)

Expand All @@ -54,7 +57,7 @@ def execute(self, state: dict) -> dict:
KeyError: If the 'url' key is not found in the state, indicating that the
necessary information to perform the operation is missing.
"""
print("---FETCH DATA---")
print("---FETCHING HTML CODE---")
try:
url = state["url"]
except KeyError as e:
Expand Down
10 changes: 7 additions & 3 deletions scrapegraphai/nodes/generate_answer_node.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,11 +32,15 @@ class GenerateAnswerNode(BaseNode):
updating the state with the generated answer under the 'answer' key.
"""

def __init__(self, llm, node_name: str = "GenerateAnswerNode"):
def __init__(self, llm, node_name: str, node_type: str = "GenerateAnswerNode"):
"""
Initializes the GenerateAnswerNode with a language model client and a node name.
Args:
llm (OpenAIImageToText): An instance of the OpenAIImageToText class.
node_name (str): name of the node
node_type (str, optional): type of the node
"""
super().__init__(node_name, "node")
super().__init__(node_name, node_type)
self.llm = llm

def execute(self, state: dict) -> dict:
Expand All @@ -58,7 +62,7 @@ def execute(self, state: dict) -> dict:
that the necessary information for generating an answer is missing.
"""

print("---GENERATE ANSWER---")
print("---GENERATING ANSWER---")
try:
user_input = state["user_input"]
document = state["document_chunks"]
Expand Down
10 changes: 7 additions & 3 deletions scrapegraphai/nodes/get_probable_tags_node.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,11 +29,15 @@ class GetProbableTagsNode(BaseNode):
probable HTML tags, updating the state with these tags under the 'tags' key.
"""

def __init__(self, llm, node_name: str = "GetProbableTagsNode"):
def __init__(self, llm, node_name: str, node_type: str = "GetPropbableTagsNode"):
"""
Initializes the GetProbableTagsNode with a language model client and a node name.
Args:
llm (OpenAIImageToText): An instance of the OpenAIImageToText class.
node_name (str): name of the node
node_type (str, optional): type of the node
"""
super().__init__(node_name, "node")
super().__init__(node_name, node_type)
self.llm = llm

def execute(self, state: dict):
Expand All @@ -54,7 +58,7 @@ def execute(self, state: dict):
necessary information for generating tag predictions is missing.
"""

print("---GET PROBABLE TAGS---")
print("---GETTING PROBABLE TAGS---")
try:
user_input = state["user_input"]
url = state["url"]
Expand Down
6 changes: 4 additions & 2 deletions scrapegraphai/nodes/image_to_text_node.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@

from .base_node import BaseNode


class ImageToTextNode(BaseNode):
"""
A class representing a node that processes an image and returns the text description.
Expand All @@ -15,13 +16,14 @@ class ImageToTextNode(BaseNode):
execute(state, url): Execute the node's logic and return the updated state.
"""

def __init__(self, llm, node_name: str = "ParseImageToText"):
def __init__(self, llm, node_name: str, node_type: str = "ImageToTextNode"):
"""
Initializes an instance of the ImageToTextNode class.
Args:
llm (OpenAIImageToText): An instance of the OpenAIImageToText class.
node_name (str, optional): The name of the node. Defaults to "ParseImageToText".
node_name (str): name of the node
node_type (str, optional): type of the node
"""
super().__init__(node_name, "node")
self.llm = llm
Expand Down
9 changes: 6 additions & 3 deletions scrapegraphai/nodes/parse_html_node.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,11 +28,14 @@ class ParseHTMLNode(BaseNode):
the specified tags, if provided, and updates the state with the parsed content.
"""

def __init__(self, node_name="ParseHTMLNode"):
def __init__(self, node_name: str, node_type: str = "ParseHTMLNode"):
"""
Initializes the ParseHTMLNode with a node name.
Args:
node_name (str): name of the node
node_type (str, optional): type of the node
"""
super().__init__(node_name, "node")
super().__init__(node_name, node_type)

def execute(self, state):
"""
Expand All @@ -54,7 +57,7 @@ def execute(self, state):
information for parsing is missing.
"""

print("---PARSE HTML DOCUMENT---")
print("---PARSING HTML DOCUMENT---")
try:
document = state["document"]
except KeyError as e:
Expand Down
2 changes: 1 addition & 1 deletion scrapegraphai/nodes/rag_node.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@ def execute(self, state):
information for parsing is missing.
"""

print("---PARSE HTML DOCUMENT---")
print("---PARSING HTML DOCUMENT---")
try:
user_input = state["user_input"]
document = state["document"]
Expand Down
1 change: 1 addition & 0 deletions tests/graphs/.env.example
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
OPENAI_APIKEY="your openai.com api key"
58 changes: 58 additions & 0 deletions tests/graphs/custom_graph_test.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
"""
Module for testing the class custom_graph class
"""
import unittest
import os
from dotenv import load_dotenv
from scrapegraphai.models import OpenAI
from scrapegraphai.graphs import BaseGraph
from scrapegraphai.nodes import FetchHTMLNode, ParseHTMLNode, GenerateAnswerNode


class TestCustomGraph(unittest.TestCase):
"""
class for testing the class custom_graph
"""

@classmethod
def setUpClass(cls):
load_dotenv()
openai_key = os.getenv("OPENAI_APIKEY")
llm_config = {
"api_key": openai_key,
"model_name": "gpt-3.5-turbo",
"temperature": 0,
"streaming": True
}
cls.model = OpenAI(llm_config)
cls.fetch_html_node = FetchHTMLNode("fetch_html")
cls.parse_document_node = ParseHTMLNode("parse_document")
cls.generate_answer_node = GenerateAnswerNode(
cls.model, "generate_answer")
cls.graph = BaseGraph(
nodes={
cls.fetch_html_node,
cls.parse_document_node,
cls.generate_answer_node
},
edges={
(cls.fetch_html_node, cls.parse_document_node),
(cls.parse_document_node, cls.generate_answer_node)
},
entry_point=cls.fetch_html_node
)

def test_execution(self):
"""
Execution of the test
"""
inputs = {"user_input": "Give me the news",
"url": "https://www.ansa.it/sito/notizie/topnews/index.shtml"}
result = self.graph.execute(inputs)
answer = result.get("answer", "No answer found.")
self.assertIsNotNone(answer)
self.assertNotEqual(answer, "No answer found.")


if __name__ == '__main__':
unittest.main()
38 changes: 38 additions & 0 deletions tests/graphs/smart_scraper_test.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
"""
Module for testing the class SmartScraperGraph
"""
import unittest
import os
from dotenv import load_dotenv
from scrapegraphai.graphs import SmartScraperGraph


class TestSmartScraperGraph(unittest.TestCase):
"""
class for testing the class SmartScraperGraph
"""

@classmethod
def setUpClass(cls):
load_dotenv()
openai_key = os.getenv("OPENAI_APIKEY")
cls.llm_config = {
"api_key": openai_key,
"model_name": "gpt-3.5-turbo",
}
cls.URL = "https://perinim.github.io/projects/"
cls.PROMPT = "List me all the titles and project descriptions and give me an audio"
cls.smart_scraper_graph = SmartScraperGraph(
cls.PROMPT, cls.URL, cls.llm_config)

def test_scraper_execution(self):
"""
Execution of the test
"""
answer = self.smart_scraper_graph.run()
self.assertIsNotNone(answer)
self.assertNotEqual(answer, "")


if __name__ == '__main__':
unittest.main()
42 changes: 42 additions & 0 deletions tests/graphs/speech_summary_test.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
"""
Module for testing the class SpeechSummaryGraph
"""
import unittest
import os
from dotenv import load_dotenv
from scrapegraphai.graphs import SpeechSummaryGraph


class TestSpeechSummaryGraph(unittest.TestCase):
"""
class for testing the class SpeechSummaryGraph
"""

def setUp(self):
load_dotenv()
openai_key = os.getenv("OPENAI_APIKEY")
self.llm_config = {
"api_key": openai_key,
}
self.curr_dir = os.path.dirname(os.path.realpath(__file__))
self.output_file_path = os.path.join(
self.curr_dir, "website_summary.mp3")

def test_summary_generation(self):
"""
Execution of the test
"""
speech_summary_graph = SpeechSummaryGraph("""Make a summary of the news to be
converted to audio for
blind people.""",
"https://www.wired.com/category/science/",
self.llm_config,
self.output_file_path)
final_state = speech_summary_graph.run()
result = final_state.get("answer", "No answer found.")
self.assertIsNotNone(result)
self.assertNotEqual(result, "No answer found.")


if __name__ == '__main__':
unittest.main()

0 comments on commit 5735f3e

Please sign in to comment.