Skip to content

Commit dc442b9

Browse files
committed
refactoring of fetch node
1 parent f8bb329 commit dc442b9

File tree

3 files changed

+22
-21
lines changed

3 files changed

+22
-21
lines changed

scrapegraphai/nodes/fetch_html_node.py

+4-3
Original file line numberDiff line numberDiff line change
@@ -2,8 +2,8 @@
22
Module for fetching the HTML node
33
"""
44

5-
from langchain_community.document_loaders import AsyncHtmlLoader
65
from .base_node import BaseNode
6+
from ..utils.getter import _get_function
77

88

99
class FetchHTMLNode(BaseNode):
@@ -60,8 +60,9 @@ def execute(self, state: dict) -> dict:
6060
except KeyError as e:
6161
print(f"Error: {e} not found in state.")
6262
raise
63-
loader = AsyncHtmlLoader(url)
64-
document = loader.load()
63+
64+
document = _get_function(url)
65+
6566
state["keys"]["document"] = document
6667

6768
return state

scrapegraphai/nodes/image_to_text_node.py

+16-17
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,13 @@
11
"""
22
Module for creating the basic node
33
"""
4-
from abc import ABC, abstractmethod
5-
from openai import OpenAI
4+
from langchain_openai import ChatOpenAI
5+
from .base_node import BaseNode
66

7+
from langchain_core.messages import HumanMessage, SystemMessage
78

8-
class ImageToTextNode(ABC):
9+
10+
class ImageToTextNode(BaseNode):
911
"""
1012
An abstract base class for nodes in a graph-based workflow. Each node is
1113
intended to perform a specific action when executed as part of the graph's
@@ -53,7 +55,6 @@ def __init__(self, llm, node_name: str = "ParseImageToText"):
5355
super().__init__(node_name, "node")
5456
self.llm = llm
5557

56-
@abstractmethod
5758
def execute(self, state: dict, url: str) -> str:
5859
"""
5960
Execute the node's logic and return the updated state.
@@ -63,28 +64,26 @@ def execute(self, state: dict, url: str) -> str:
6364
:return: The updated state after executing this node.
6465
"""
6566
# Da fixare
66-
client = OpenAI(api_key=self.llm.openai_api_key)
6767

6868
if not self.llm.model_name == "gpt-4-vision-preview":
6969
raise ValueError("Model is not gpt-4-vision-preview")
7070

71-
response = client.chat.completions.create(
72-
model=self.llm.model_name,
73-
messages=[
74-
{
75-
"role": "user",
76-
"content": [
77-
{"type": "text", "text": "What’s in this image?"},
71+
chat = ChatOpenAI(model=self.llm.model_name, max_tokens=256)
72+
result = chat.invoke(
73+
[
74+
HumanMessage(
75+
content=[
76+
{"type": "text", "text": "What is this image showing"},
7877
{
7978
"type": "image_url",
8079
"image_url": {
8180
"url": url,
81+
"detail": "auto",
8282
},
8383
},
84-
],
85-
}
86-
],
87-
max_tokens=300,
84+
]
85+
)
86+
]
8887
)
8988

90-
return response.choices[0]
89+
return result

scrapegraphai/utils/getter.py

+2-1
Original file line numberDiff line numberDiff line change
@@ -14,5 +14,6 @@ def _get_function(link: str) -> str:
1414
Returns:
1515
str: The content of the response as a string.
1616
"""
17+
1718
loader = AsyncHtmlLoader(link)
18-
return str(loader.load())
19+
return loader.load()

0 commit comments

Comments
 (0)