1
1
"""
2
2
Module for creating the basic node
3
3
"""
4
- from abc import ABC , abstractmethod
5
- from openai import OpenAI
4
+ from langchain_openai import ChatOpenAI
5
+ from . base_node import BaseNode
6
6
7
+ from langchain_core .messages import HumanMessage , SystemMessage
7
8
8
- class ImageToTextNode (ABC ):
9
+
10
+ class ImageToTextNode (BaseNode ):
9
11
"""
10
12
An abstract base class for nodes in a graph-based workflow. Each node is
11
13
intended to perform a specific action when executed as part of the graph's
@@ -53,7 +55,6 @@ def __init__(self, llm, node_name: str = "ParseImageToText"):
53
55
super ().__init__ (node_name , "node" )
54
56
self .llm = llm
55
57
56
- @abstractmethod
57
58
def execute (self , state : dict , url : str ) -> str :
58
59
"""
59
60
Execute the node's logic and return the updated state.
@@ -63,28 +64,26 @@ def execute(self, state: dict, url: str) -> str:
63
64
:return: The updated state after executing this node.
64
65
"""
65
66
# Da fixare
66
- client = OpenAI (api_key = self .llm .openai_api_key )
67
67
68
68
if not self .llm .model_name == "gpt-4-vision-preview" :
69
69
raise ValueError ("Model is not gpt-4-vision-preview" )
70
70
71
- response = client .chat .completions .create (
72
- model = self .llm .model_name ,
73
- messages = [
74
- {
75
- "role" : "user" ,
76
- "content" : [
77
- {"type" : "text" , "text" : "What’s in this image?" },
71
+ chat = ChatOpenAI (model = self .llm .model_name , max_tokens = 256 )
72
+ result = chat .invoke (
73
+ [
74
+ HumanMessage (
75
+ content = [
76
+ {"type" : "text" , "text" : "What is this image showing" },
78
77
{
79
78
"type" : "image_url" ,
80
79
"image_url" : {
81
80
"url" : url ,
81
+ "detail" : "auto" ,
82
82
},
83
83
},
84
- ],
85
- }
86
- ],
87
- max_tokens = 300 ,
84
+ ]
85
+ )
86
+ ]
88
87
)
89
88
90
- return response . choices [ 0 ]
89
+ return result
0 commit comments