Skip to content

Commit f8bb329

Browse files
committed
refactoring of the example and add 2 new nodes
1 parent ef7d575 commit f8bb329

File tree

5 files changed

+146
-154
lines changed

5 files changed

+146
-154
lines changed
Original file line numberDiff line numberDiff line change
@@ -1,52 +1,52 @@
1-
"""
2-
Example of custom graph using existing nodes
3-
"""
4-
5-
import os
6-
from dotenv import load_dotenv
7-
8-
from langchain_openai import ChatOpenAI
9-
from scrapegraphai.graphs import BaseGraph
10-
from scrapegraphai.nodes import FetchHTMLNode, ParseHTMLNode, GenerateAnswerNode
11-
12-
# load the environment variables
13-
load_dotenv()
14-
openai_key = os.getenv("API_KEY")
15-
if not openai_key:
16-
print("Error: OpenAI API key not found in environment variables.")
17-
18-
# Define the configuration for the language model
19-
llm_config = {
20-
"api_key": openai_key,
21-
"model_name": "gpt-3.5-turbo",
22-
"temperature": 0,
23-
"streaming": True
24-
}
25-
model = ChatOpenAI(**llm_config)
26-
27-
# define the nodes for the graph
28-
fetch_html_node = FetchHTMLNode("fetch_html")
29-
parse_document_node = ParseHTMLNode("parse_document")
30-
generate_answer_node = GenerateAnswerNode(model, "generate_answer")
31-
32-
# create the graph
33-
graph = BaseGraph(
34-
nodes={
35-
fetch_html_node,
36-
parse_document_node,
37-
generate_answer_node
38-
},
39-
edges={
40-
(fetch_html_node, parse_document_node),
41-
(parse_document_node, generate_answer_node)
42-
},
43-
entry_point=fetch_html_node
44-
)
45-
46-
# execute the graph
47-
inputs = {"keys": {"user_input": "What is the title of the page?", "url": "https://example.com"}}
48-
result = graph.execute(inputs)
49-
50-
# get the answer from the result
51-
answer = result["keys"].get("answer", "No answer found.")
52-
print(answer)
1+
"""
2+
Example of custom graph using existing nodes
3+
"""
4+
5+
import os
6+
from dotenv import load_dotenv
7+
8+
from langchain_openai import ChatOpenAI
9+
from scrapegraphai.graphs import BaseGraph
10+
from scrapegraphai.nodes import FetchHTMLNode, ParseHTMLNode, GenerateAnswerNode
11+
12+
# load the environment variables
13+
load_dotenv()
14+
openai_key = os.getenv("API_KEY")
15+
if not openai_key:
16+
print("Error: OpenAI API key not found in environment variables.")
17+
18+
# Define the configuration for the language model
19+
llm_config = {
20+
"api_key": openai_key,
21+
"model_name": "gpt-3.5-turbo",
22+
"temperature": 0,
23+
"streaming": True
24+
}
25+
model = ChatOpenAI(**llm_config)
26+
27+
# define the nodes for the graph
28+
fetch_html_node = FetchHTMLNode("fetch_html")
29+
parse_document_node = ParseHTMLNode("parse_document")
30+
generate_answer_node = GenerateAnswerNode(model, "generate_answer")
31+
32+
# create the graph
33+
graph = BaseGraph(
34+
nodes={
35+
fetch_html_node,
36+
parse_document_node,
37+
generate_answer_node
38+
},
39+
edges={
40+
(fetch_html_node, parse_document_node),
41+
(parse_document_node, generate_answer_node)
42+
},
43+
entry_point=fetch_html_node
44+
)
45+
46+
# execute the graph
47+
inputs = {"keys": {"user_input": "What is the title of the page?", "url": "https://example.com"}}
48+
result = graph.execute(inputs)
49+
50+
# get the answer from the result
51+
answer = result["keys"].get("answer", "No answer found.")
52+
print(answer)

examples/graph_example.py

-22
This file was deleted.
+90
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,90 @@
1+
"""
2+
Module for creating the basic node
3+
"""
4+
from abc import ABC, abstractmethod
5+
from openai import OpenAI
6+
7+
8+
class ImageToTextNode(ABC):
9+
"""
10+
An abstract base class for nodes in a graph-based workflow. Each node is
11+
intended to perform a specific action when executed as part of the graph's
12+
processing flow.
13+
14+
Attributes:
15+
node_name (str): A unique identifier for the node.
16+
node_type (str): Specifies the node's type, which influences how the
17+
node interacts within the graph. Valid values are
18+
"node" for standard nodes and "conditional_node" for
19+
nodes that determine the flow based on conditions.
20+
21+
Methods:
22+
execute(state): An abstract method that subclasses must implement. This
23+
method should contain the logic that the node executes
24+
when it is reached in the graph's flow. It takes the
25+
graph's current state as input and returns the updated
26+
state after execution.
27+
28+
Args:
29+
node_name (str): The unique identifier name for the node. This name is
30+
used to reference the node within the graph.
31+
node_type (str): The type of the node, limited to "node" or
32+
"conditional_node". This categorization helps in
33+
determining the node's role and behavior within the
34+
graph.
35+
36+
Raises:
37+
ValueError: If the provided `node_type` is not one of the allowed
38+
values ("node" or "conditional_node"), a ValueError is
39+
raised to indicate the incorrect usage.
40+
"""
41+
42+
def __init__(self, llm, node_name: str = "ParseImageToText"):
43+
"""
44+
Initialize the node with a unique identifier and a specified node type.
45+
46+
Args:
47+
node_name (str): The unique identifier name for the node.
48+
node_type (str): The type of the node, limited to "node" or "conditional_node".
49+
50+
Raises:
51+
ValueError: If node_type is not "node" or "conditional_node".
52+
"""
53+
super().__init__(node_name, "node")
54+
self.llm = llm
55+
56+
@abstractmethod
57+
def execute(self, state: dict, url: str) -> str:
58+
"""
59+
Execute the node's logic and return the updated state.
60+
Args:
61+
state (dict): The current state of the graph.
62+
url (str): url of the image where to
63+
:return: The updated state after executing this node.
64+
"""
65+
# Da fixare
66+
client = OpenAI(api_key=self.llm.openai_api_key)
67+
68+
if not self.llm.model_name == "gpt-4-vision-preview":
69+
raise ValueError("Model is not gpt-4-vision-preview")
70+
71+
response = client.chat.completions.create(
72+
model=self.llm.model_name,
73+
messages=[
74+
{
75+
"role": "user",
76+
"content": [
77+
{"type": "text", "text": "What’s in this image?"},
78+
{
79+
"type": "image_url",
80+
"image_url": {
81+
"url": url,
82+
},
83+
},
84+
],
85+
}
86+
],
87+
max_tokens=300,
88+
)
89+
90+
return response.choices[0]

scrapegraphai/nodes/parse_html_node.py

-74
This file was deleted.

scrapegraphai/nodes/text_to_speach_node.py

+4-6
Original file line numberDiff line numberDiff line change
@@ -9,10 +9,8 @@
99

1010
class TextToSpeachNode(BaseNode):
1111
"""
12-
A node responsible for parsing HTML content from a document using specified tags.
13-
It uses BeautifulSoupTransformer for parsing, providing flexibility in extracting
14-
specific parts of an HTML document based on the tags provided in the state.
15-
12+
A node responsible for parsing text content from a document using specified tags and readinf
13+
it with the selected voiceq.
1614
This node enhances the scraping workflow by allowing for targeted extraction of
1715
content, thereby optimizing the processing of large HTML documents.
1816
@@ -29,14 +27,14 @@ class TextToSpeachNode(BaseNode):
2927
the specified tags, if provided, and updates the state with the parsed content.
3028
"""
3129

32-
def __init__(self, llm, node_name="ParseTextToSpeach"):
30+
def __init__(self, llm, node_name: str = "ParseTextToSpeach"):
3331
"""
3432
Initializes the ParseHTMLNode with a node name.
3533
"""
3634
super().__init__(node_name, "node")
3735
self.llm = llm
3836

39-
def execute(self, state: dict, text: str, output_path: str = str, model: str = "tts-1", voice="alloy"):
37+
def execute(self, state: dict, text: str, output_path: str = str, model: str = "tts-1", voice="alloy") -> dict:
4038
"""
4139
Executes the node's logic to parse the HTML document based on specified tags.
4240
If tags are provided in the state, the document is parsed accordingly; otherwise,

0 commit comments

Comments
 (0)