ScrapeGraphAI
diff --git a/‎.github/workflows/pylint.yml
+1-1 b/‎.github/workflows/pylint.yml
+1-1
diff --git a/‎README.md
+1-1 b/‎README.md
+1-1
diff --git a/‎commit_and_push.sh
+1-2 b/‎commit_and_push.sh
+1-2
diff --git a/‎examples/utils/convert_to_csv.example.py
+39 b/‎examples/utils/convert_to_csv.example.py
+39
diff --git a/‎examples/utils/convert_to_json_example.py
+28 b/‎examples/utils/convert_to_json_example.py
+28
diff --git a/‎examples/utils/remover_example.py
+21 b/‎examples/utils/remover_example.py
+21
diff --git a/‎examples/utils/save_audio_from_bytes_example.py
+10 b/‎examples/utils/save_audio_from_bytes_example.py
+10
diff --git a/‎examples/utils/token_calculator_example.py
+14 b/‎examples/utils/token_calculator_example.py
+14
diff --git a/‎examples/vision_speech_example.py ‎examples/utils/vision_speech_example.py b/‎examples/vision_speech_example.py ‎examples/utils/vision_speech_example.py
diff --git a/‎scrapegraphai/builders/__init__.py
+1-1 b/‎scrapegraphai/builders/__init__.py
+1-1
diff --git a/‎scrapegraphai/builders/graph_builder.py
+29-17 b/‎scrapegraphai/builders/graph_builder.py
+29-17
diff --git a/‎scrapegraphai/graphs/speech_summary_graph.py
+10-5 b/‎scrapegraphai/graphs/speech_summary_graph.py
+10-5
diff --git a/‎scrapegraphai/helpers/__init__.py
+6 b/‎scrapegraphai/helpers/__init__.py
+6
diff --git a/‎scrapegraphai/utils/nodes_metadata.py ‎scrapegraphai/helpers/nodes_metadata.py
+7-4 b/‎scrapegraphai/utils/nodes_metadata.py ‎scrapegraphai/helpers/nodes_metadata.py
+7-4
@@ -20,4 +20,4 @@ jobs:
           pip install pylint
           pip install -r requirements.txt
       - name: Analysing the code with pylint
-        run: pylint --disable=C0114,C0115,C0116 --exit-zero scrapegraphai/**/*.py scrapegraphai/*.py examples/*.py tests/*.py 
+        run:pylint scrapegraphai/**/*.py scrapegraphai/*.py examples/*.py tests/**/*.py
@@ -24,7 +24,7 @@ Try out ScrapeGraphAI in your browser:
 ## 📖 Documentation
 
 The documentation for ScrapeGraphAI can be found [here](https://scrapegraph-ai.readthedocs.io/en/latest/).
-Behind this there is also the docusaurus documentation [here]([https://scrapegraph-ai.readthedocs.io/en/latest/](https://scrapegraph-doc.onrender.com/)).
+Behind this there is also the docusaurus documentation [here](https://scrapegraph-doc.onrender.com/)).
 
 ## Setup the api keys
 
 
@@ -20,8 +20,7 @@ fi
 commit_message="$1"
 
 # Run Pylint on the specified Python files
-pylint scrapegraphai/**/*.py scrapegraphai/*.py examples/*.py tests/*.py
-
+pylint scrapegraphai/**/*.py scrapegraphai/*.py examples/*.py tests/**/*.py
 #Maket the pull
 git pull
 
 
@@ -0,0 +1,39 @@
+""" 
+Teest for convert_to_csv
+"""
+import os
+from scrapegraphai.utils.convert_to_csv import convert_to_csv
+
+
+def main():
+    """
+    Example usage of the convert_to_csv function.
+    """
+    # Example data
+    data = {
+        'Name': ['John', 'Alice', 'Bob'],
+        'Age': [30, 25, 35],
+        'City': ['New York', 'San Francisco', 'Seattle']
+    }
+
+    # Example filename and position
+    filename = "example_data"
+    position = "./output"
+
+    try:
+        # Convert data to CSV and save
+        convert_to_csv(data, filename, position)
+        print(
+            f"Data saved successfully to {os.path.join(position, filename)}.csv")
+    except ValueError as ve:
+        print(f"ValueError: {ve}")
+    except FileNotFoundError as fnfe:
+        print(f"FileNotFoundError: {fnfe}")
+    except PermissionError as pe:
+        print(f"PermissionError: {pe}")
+    except Exception as e:
+        print(f"An unexpected error occurred: {e}")
+
+
+if __name__ == "__main__":
+    main()
@@ -0,0 +1,28 @@
+"""
+Example of using convert_to_json function to save data in JSON format.
+"""
+import os
+from scrapegraphai.utils.convert_to_json import convert_to_json
+
+# Data to save in JSON format
+data_to_save = {
+    "name": "John Doe",
+    "age": 30,
+    "city": "New York"
+}
+
+FILENAME = "example_data"
+DIRECTORY = "data_output"
+
+try:
+    convert_to_json(data_to_save, FILENAME, DIRECTORY)
+    print(
+        f"Data has been successfully saved to {os.path.join(DIRECTORY, FILENAME)}.json")
+except ValueError as value_error:
+    print(value_error)
+except FileNotFoundError as file_not_found_error:
+    print(file_not_found_error)
+except PermissionError as permission_error:
+    print(permission_error)
+except Exception as exception:
+    print(f"An error occurred: {exception}")
@@ -0,0 +1,21 @@
+""" 
+Example of the remover method
+"""
+from scrapegraphai.utils.remover import remover
+
+HTML_CONTENT = """
+<html>
+<head>
+    <title>Test Page</title>
+</head>
+<body>
+    <h1>This is a Test</h1>
+    <p>Hello, World!</p>
+    <script>alert("This is a script");</script>
+</body>
+</html>
+"""
+
+parsed_content = remover(HTML_CONTENT)
+
+print(parsed_content)
@@ -0,0 +1,10 @@
+""" 
+Example for th e file save_audio_from_bytes
+"""
+from scrapegraphai.utils.save_audio_from_bytes import save_audio_from_bytes
+
+BYTE_RESPONSE = b'\x12\x34\x56\x78\x90'
+
+OUTPUT_PATH = "generated_speech.wav"
+
+save_audio_from_bytes(BYTE_RESPONSE, OUTPUT_PATH)
@@ -0,0 +1,14 @@
+"""
+Example for calclating the tokenizer
+"""
+from scrapegraphai.utils.token_calculator import truncate_text_tokens
+
+INPUT_TEXT = "http://nba.com"
+
+MODEL_NAME = "gpt-3.5-turbo"
+ENCODING_NAME = "EMBEDDING_ENCODING"
+
+tokenized_chunks = truncate_text_tokens(INPUT_TEXT, MODEL_NAME, ENCODING_NAME)
+
+for i, chunk in enumerate(tokenized_chunks):
+    print(f"Chunk {i+1}: {chunk}")
@@ -2,4 +2,4 @@
     __init__.py file for builders folder
 """
 
-from .graph_builder import GraphBuilder
+from .graph_builder import GraphBuilder
@@ -1,7 +1,12 @@
+""" 
+Module for making the graph building
+"""
+import graphviz
 from langchain_core.prompts import ChatPromptTemplate
 from langchain.chains import create_extraction_chain
 from ..models import OpenAI
-from ..utils import nodes_metadata, graph_schema
+from ..helpers import nodes_metadata, graph_schema
+
 
 class GraphBuilder:
     """
@@ -11,19 +16,24 @@ class GraphBuilder:
 
     Attributes:
         prompt (str): The user's natural language prompt for the scraping task.
-        llm (ChatOpenAI): An instance of the ChatOpenAI class configured with the specified llm_config.
+        llm (ChatOpenAI): An instance of the ChatOpenAI class configured 
+        with the specified llm_config.
         nodes_description (str): A string description of all available nodes and their arguments.
-        chain (LLMChain): The extraction chain responsible for processing the prompt and creating the graph.
+        chain (LLMChain): The extraction chain responsible for 
+        processing the prompt and creating the graph.
 
     Methods:
-        build_graph(): Executes the graph creation process based on the user prompt and returns the graph configuration.
-        convert_json_to_graphviz(json_data): Converts a JSON graph configuration to a Graphviz object for visualization.
+        build_graph(): Executes the graph creation process based on the user prompt 
+        and returns the graph configuration.
+        convert_json_to_graphviz(json_data): Converts a JSON graph configuration 
+        to a Graphviz object for visualization.
 
     Args:
         prompt (str): The user's natural language prompt describing the desired scraping operation.
         url (str): The target URL from which data is to be scraped.
-        llm_config (dict): Configuration parameters for the language model, where 'api_key' is mandatory, 
-                           and 'model_name', 'temperature', and 'streaming' can be optionally included.
+        llm_config (dict): Configuration parameters for the 
+            language model, where 'api_key' is mandatory, 
+            and 'model_name', 'temperature', and 'streaming' can be optionally included.
 
     Raises:
         ValueError: If 'api_key' is not included in llm_config.
@@ -38,7 +48,7 @@ def __init__(self, user_prompt: str, llm_config: dict):
         self.llm = self._create_llm()
         self.nodes_description = self._generate_nodes_description()
         self.chain = self._create_extraction_chain()
-        
+
     def _create_llm(self):
         """
         Creates an instance of the OpenAI class with the provided language model configuration.
@@ -77,7 +87,8 @@ def _generate_nodes_description(self):
 
     def _create_extraction_chain(self):
         """
-        Creates an extraction chain for processing the user prompt and generating the graph configuration.
+        Creates an extraction chain for processing the user prompt and 
+        generating the graph configuration.
 
         Returns:
             LLMChain: An instance of the LLMChain class.
@@ -90,20 +101,22 @@ def _create_extraction_chain(self):
 
         Based on the user's input: "{input}", identify the essential nodes required for the task and suggest a graph configuration that outlines the flow between the chosen nodes.
         """.format(nodes_description=self.nodes_description, input="{input}")
-        extraction_prompt = ChatPromptTemplate.from_template(create_graph_prompt_template)
+        extraction_prompt = ChatPromptTemplate.from_template(
+            create_graph_prompt_template)
         return create_extraction_chain(prompt=extraction_prompt, schema=graph_schema, llm=self.llm)
 
     def build_graph(self):
         """
-        Executes the graph creation process based on the user prompt and returns the graph configuration.
+        Executes the graph creation process based on the user prompt and
+         returns the graph configuration.
 
         Returns:
             dict: A JSON representation of the graph configuration.
         """
         return self.chain.invoke(self.user_prompt)
-    
+
     @staticmethod
-    def convert_json_to_graphviz(json_data, format='pdf'):
+    def convert_json_to_graphviz(json_data, format: str = 'pdf'):
         """
         Converts a JSON graph configuration to a Graphviz object for visualization.
 
@@ -113,11 +126,10 @@ def convert_json_to_graphviz(json_data, format='pdf'):
         Returns:
             graphviz.Digraph: A Graphviz object representing the graph configuration.
         """
-        import graphviz
 
         graph = graphviz.Digraph(comment='ScrapeGraphAI Generated Graph', format=format,
-                     node_attr={'color': 'lightblue2', 'style': 'filled'})
-        
+                                 node_attr={'color': 'lightblue2', 'style': 'filled'})
+
         graph_config = json_data["text"][0]
 
         # Retrieve nodes, edges, and the entry point from the JSON data
@@ -142,4 +154,4 @@ def convert_json_to_graphviz(json_data, format='pdf'):
             else:
                 graph.edge(edge['from'], edge['to'])
 
-        return graph
+        return graph
@@ -1,3 +1,7 @@
+""" 
+Module for extracting the summary from the speach
+"""
+from scrapegraphai.utils.save_audio_from_bytes import save_audio_from_bytes
 from ..models import OpenAI, OpenAITextToSpeech
 from .base_graph import BaseGraph
 from ..nodes import (
@@ -7,8 +11,8 @@
     GenerateAnswerNode,
     ParseHTMLNode,
     TextToSpeechNode,
-    )
-from scrapegraphai.utils import save_audio_from_bytes
+)
+
 
 class SpeechSummaryGraph:
     """
@@ -17,7 +21,8 @@ class SpeechSummaryGraph:
 
     Attributes:
         url (str): The URL of the web page to scrape and summarize.
-        llm_config (dict): Configuration parameters for the language model, with 'api_key' mandatory.
+        llm_config (dict): Configuration parameters for the language model, 
+        with 'api_key' mandatory.
         summary_prompt (str): The prompt used to guide the summarization process.
         output_path (Path): The path where the generated MP3 file will be saved.
 
@@ -40,9 +45,9 @@ def __init__(self, prompt: str, url: str, llm_config: dict, output_path: str):
         self.llm_config = llm_config
         self.llm = self._create_llm()
         self.output_path = output_path
-        self.text_to_speech_model = OpenAITextToSpeech(llm_config, model="tts-1", voice="alloy")
+        self.text_to_speech_model = OpenAITextToSpeech(
+            llm_config, model="tts-1", voice="alloy")
         self.graph = self._create_graph()
-        
 
     def _create_llm(self):
         """
 
@@ -0,0 +1,6 @@
+""" 
+__init__.py for th e helpers folder
+
+"""
+from .nodes_metadata import nodes_metadata
+from .schemas import graph_schema
@@ -43,24 +43,27 @@
         "type": "conditional_node",
         "args": {
             "key_name": "The key in the state to check for a condition.",
-            "next_nodes": "A list of two nodes specifying the next node to execute based on the condition's outcome."
+            "next_nodes": """A list of two nodes specifying the next node 
+            to execute based on the condition's outcome."""
         },
         "returns": "The name of the next node to execute."
     },
     "ImageToTextNode": {
-        "description": "Converts image content to text by extracting visual information and interpreting it.",
+        "description": """Converts image content to text by 
+        extracting visual information and interpreting it.""",
         "type": "node",
         "args": {
             "image_data": "Data of the image to be processed."
         },
         "returns": "Updated state with the textual description of the image under 'image_text' key."
     },
     "TextToSpeechNode": {
-        "description": "Converts text into spoken words, allowing for auditory representation of the text.",
+        "description": """Converts text into spoken words, allow
+        ing for auditory representation of the text.""",
         "type": "node",
         "args": {
             "text": "The text to be converted into speech."
         },
         "returns": "Updated state with the speech audio file or data under 'speech_audio' key."
     }
-}
+}