refactoring for pylint score

VinciGit00 · VinciGit00 · commit ae929393b923 · 2024-02-22T10:19:10.000+01:00
diff --git a/examples/custom_graph_example.py b/examples/custom_graph_example.py
@@ -40,7 +40,8 @@
 )
 
 # execute the graph
-inputs = {"user_input": "Give me the news", "url": "https://www.ansa.it/sito/notizie/topnews/index.shtml"}
+inputs = {"user_input": "Give me the news",
+          "url": "https://www.ansa.it/sito/notizie/topnews/index.shtml"}
 result = graph.execute(inputs)
 
 # get the answer from the result
diff --git a/examples/graph_builder_example.py b/examples/graph_builder_example.py
@@ -1,3 +1,6 @@
+""" 
+Example of graph builder
+"""
 import os
 from dotenv import load_dotenv
 from scrapegraphai.builders import GraphBuilder
@@ -14,8 +17,8 @@
 }
 
 # Example usage of GraphBuilder
-user_prompt = "Extract the news and generate a text summary with a voiceover."
-graph_builder = GraphBuilder(user_prompt, llm_config)
+USER_PROMPT = "Extract the news and generate a text summary with a voiceover."
+graph_builder = GraphBuilder(USER_PROMPT, llm_config)
 graph_json = graph_builder.build_graph()
 
 # Convert the resulting JSON to Graphviz format
diff --git a/examples/smart_scraper_example.py b/examples/smart_scraper_example.py
@@ -15,12 +15,12 @@
     "model_name": "gpt-3.5-turbo",
 }
 
-# Define URL and prompt
-url = "https://perinim.github.io/projects/"
-prompt = "List me all the titles and project descriptions"
+# Define URL and PROMPT
+URL = "https://perinim.github.io/projects/"
+PROMPT = "List me all the titles and project descriptions"
 
 # Create the SmartScraperGraph instance
-smart_scraper_graph = SmartScraperGraph(prompt, url, llm_config)
+smart_scraper_graph = SmartScraperGraph(PROMPT, URL, llm_config)
 
 answer = smart_scraper_graph.run()
 print(answer)
diff --git a/examples/speech_summary_graph_example.py b/examples/speech_summary_graph_example.py
@@ -18,9 +18,10 @@
 curr_dir = os.path.dirname(os.path.realpath(__file__))
 output_file_path = os.path.join(curr_dir, "website_summary.mp3")
 
-speech_summary_graph = SpeechSummaryGraph("Make a summary of the webpage to be converted to audio for blind people.",
-                             "https://perinim.github.io/projects/", llm_config,
-                                output_file_path)
+speech_summary_graph = SpeechSummaryGraph("""Make a summary of the webpage to be
+converted to audio for blind people.""",
+                                          "https://perinim.github.io/projects/", llm_config,
+                                          output_file_path)
 
 final_state = speech_summary_graph.run()
 print(final_state.get("answer", "No answer found."))
diff --git a/scrapegraphai/builders/graph_builder.py b/scrapegraphai/builders/graph_builder.py
@@ -81,7 +81,8 @@ def _generate_nodes_description(self):
         """
 
         return "\n".join([
-            f'- {node}: {data["description"]} (Type: {data["type"]}, Args: {", ".join(data["args"].keys())})'
+            f"""- {node}: {data["description"]} (Type: {data["type"]}, 
+            Args: {", ".join(data["args"].keys())})"""
             for node, data in nodes_metadata.items()
         ])
 
diff --git a/scrapegraphai/helpers/nodes_metadata.py b/scrapegraphai/helpers/nodes_metadata.py
@@ -33,13 +33,15 @@
         "description": """A node responsible for reducing the amount of text to be processed 
         by identifying and retrieving the most relevant chunks of text based on the user's query. 
         Utilizes RecursiveCharacterTextSplitter for chunking, Html2TextTransformer for HTML to text 
-        conversion, and a combination of FAISS and OpenAIEmbeddings for efficient information retrieval.""",
+        conversion, and a combination of FAISS and OpenAIEmbeddings 
+        for efficient information retrieval.""",
         "type": "node",
         "args": {
             "user_input": "The user's query or question guiding the retrieval.",
             "document": "The HTML content to be processed and compressed."
         },
-        "returns": "Updated state with 'relevant_chunks' key containing the most relevant text chunks."
+        "returns": """Updated state with 'relevant_chunks' key containing
+         the most relevant text chunks."""
     },
     "GenerateAnswerNode": {
         "description": "Generates an answer based on the user's input and parsed document.",
diff --git a/scrapegraphai/helpers/schemas.py b/scrapegraphai/helpers/schemas.py
@@ -46,8 +46,10 @@
                         "items": {
                             "type": "string"
                         },
-                        "description": """An array containing the node_names of the ending nodes of the edge. 
-                        If the 'from' node is a conditional node, this array must contain exactly two node_names."""
+                        "description": """An array containing the node_names 
+                        of the ending nodes of the edge. 
+                        If the 'from' node is a conditional node, 
+                        this array must contain exactly two node_names."""
                     }
                 },
                 "required": ["from", "to"]
diff --git a/scrapegraphai/models/__init__.py b/scrapegraphai/models/__init__.py
@@ -4,4 +4,4 @@
 
 from .openai import OpenAI
 from .openai_itt import OpenAIImageToText
-from .openai_tts import OpenAITextToSpeech
+from .openai_tts import OpenAITextToSpeech
diff --git a/scrapegraphai/nodes/fetch_html_node.py b/scrapegraphai/nodes/fetch_html_node.py
@@ -1,9 +1,9 @@
 """ 
 Module for fetching the HTML node
 """
-
-from .base_node import BaseNode
 from langchain_community.document_loaders import AsyncHtmlLoader
+from .base_node import BaseNode
+
 
 class FetchHTMLNode(BaseNode):
     """
diff --git a/scrapegraphai/nodes/generate_answer_node.py b/scrapegraphai/nodes/generate_answer_node.py
@@ -86,7 +86,6 @@ def execute(self, state: dict) -> dict:
         Content of {chunk_id}: {context}
         Question: {question}
                 """
-        
         template_merge = """You are a website scraper and you have just scraped the
         following content from a website.
         You are now asked to answer a question about the content you have scraped.\n {format_instructions} \n
@@ -101,14 +100,15 @@ def execute(self, state: dict) -> dict:
             prompt = PromptTemplate(
                 template=template_chunks,
                 input_variables=["question"],
-                partial_variables={"context": chunk.page_content, "chunk_id": i + 1, "format_instructions": format_instructions},
+                partial_variables={"context": chunk.page_content,
+                                   "chunk_id": i + 1, "format_instructions": format_instructions},
             )
             # Dynamically name the chains based on their index
             chain_name = f"chunk{i+1}"
             chains_dict[chain_name] = prompt | self.llm | output_parser
 
         # Use dictionary unpacking to pass the dynamically named chains to RunnableParallel
-        map_chain = RunnableParallel(**chains_dict)           
+        map_chain = RunnableParallel(**chains_dict)
         # Chain
         answer_map = map_chain.invoke({"question": user_input})
 
diff --git a/scrapegraphai/nodes/rag_node.py b/scrapegraphai/nodes/rag_node.py
@@ -3,14 +3,13 @@
 """
 
 from langchain.text_splitter import RecursiveCharacterTextSplitter
-from langchain_community.document_transformers import Html2TextTransformer
 from langchain.docstore.document import Document
+from langchain.retrievers import ContextualCompressionRetriever
+from langchain.retrievers.document_compressors import EmbeddingsFilter, DocumentCompressorPipeline
+from langchain_community.document_transformers import Html2TextTransformer, EmbeddingsRedundantFilter
 from langchain_community.vectorstores import FAISS
 from langchain_openai import OpenAIEmbeddings
-from langchain.retrievers import ContextualCompressionRetriever
-from langchain.retrievers.document_compressors import EmbeddingsFilter
-from langchain.retrievers.document_compressors import DocumentCompressorPipeline
-from langchain_community.document_transformers import EmbeddingsRedundantFilter
+
 
 from .base_node import BaseNode
 
@@ -77,7 +76,8 @@ def execute(self, state):
             chunk_overlap=0,
         )
 
-        docs_transformed = Html2TextTransformer().transform_documents(document)[0]
+        docs_transformed = Html2TextTransformer(
+        ).transform_documents(document)[0]
 
         chunks = text_splitter.split_text(docs_transformed.page_content)
         chunked_docs = []
@@ -90,12 +90,15 @@ def execute(self, state):
                 },
             )
             chunked_docs.append(doc)
-        
+
         openai_key = self.llm.openai_api_key
-        retriever = FAISS.from_documents(chunked_docs, OpenAIEmbeddings(api_key=openai_key)).as_retriever()
-        embeddings = OpenAIEmbeddings(api_key=openai_key) # could be any embedding of your choice
+        retriever = FAISS.from_documents(chunked_docs,
+                                         OpenAIEmbeddings(api_key=openai_key)).as_retriever()
+        # could be any embedding of your choice
+        embeddings = OpenAIEmbeddings(api_key=openai_key)
         redundant_filter = EmbeddingsRedundantFilter(embeddings=embeddings)
-        relevant_filter = EmbeddingsFilter(embeddings=embeddings) # similarity_threshold could be set, now k=20
+        # similarity_threshold could be set, now k=20
+        relevant_filter = EmbeddingsFilter(embeddings=embeddings)
         pipeline_compressor = DocumentCompressorPipeline(
             transformers=[redundant_filter, relevant_filter]
         )
@@ -104,7 +107,8 @@ def execute(self, state):
             base_compressor=pipeline_compressor, base_retriever=retriever
         )
 
-        compressed_docs = compression_retriever.get_relevant_documents(user_input)
+        compressed_docs = compression_retriever.get_relevant_documents(
+            user_input)
         print("Documents compressed and stored in a vector database.")
         state.update({"relevant_chunks": compressed_docs})
         return state
diff --git a/scrapegraphai/nodes/text_to_speech_node.py b/scrapegraphai/nodes/text_to_speech_node.py
@@ -5,6 +5,7 @@
 
 from .base_node import BaseNode
 
+
 class TextToSpeechNode(BaseNode):
     """
     A class representing a node that processes text and returns the voice.
@@ -23,7 +24,7 @@ def __init__(self, llm, node_name: str = "TextToSpeechNode"):
         super().__init__(node_name, "node")
         self.llm = llm
 
-    def execute(self, state: dict, text: str | None = None) -> dict:
+    def execute(self, state: dict) -> dict:
         """
         Execute the node's logic and return the updated state.
         Args:
@@ -36,7 +37,6 @@ def execute(self, state: dict, text: str | None = None) -> dict:
         text2translate = state.get("answer", None)
         if not text2translate:
             raise ValueError("No text to translate to speech.")
-        
         print("---TRANSLATING TEXT TO SPEECH---")
         audio = self.llm.run(text2translate["summary"])
 

Original file line number	Diff line number	Diff line change
`@@ -40,7 +40,8 @@`
`40`	`40`	`)`
`41`	`41`
`42`	`42`	`# execute the graph`
`43`		`-inputs = {"user_input": "Give me the news", "url": "https://www.ansa.it/sito/notizie/topnews/index.shtml"}`
	`43`	`+inputs = {"user_input": "Give me the news",`
	`44`	`+ "url": "https://www.ansa.it/sito/notizie/topnews/index.shtml"}`
`44`	`45`	`result = graph.execute(inputs)`
`45`	`46`
`46`	`47`	`# get the answer from the result`