Merge branch 'main' of https://github.com/ScrapeGraphAI/Scrapegraph-ai

VinciGit00 · VinciGit00 · commit 18dbd057251f · 2025-03-10T11:27:35.000+01:00
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -1,3 +1,34 @@
+## [1.41.0](https://github.com/ScrapeGraphAI/Scrapegraph-ai/compare/v1.40.1...v1.41.0) (2025-03-09)
+
+
+### Features
+
+* add CLoD integration ([4e0e785](https://github.com/ScrapeGraphAI/Scrapegraph-ai/commit/4e0e78582c3a75e64c5eba26ce40b5ffbf05d58e))
+
+
+### Test
+
+* Add coverage improvement test for tests/test_generate_answer_node.py ([6769c0d](https://github.com/ScrapeGraphAI/Scrapegraph-ai/commit/6769c0d43ab72f1c8b520dd28d19f747b22f9b7c))
+* Add coverage improvement test for tests/test_models_tokens.py ([b21e781](https://github.com/ScrapeGraphAI/Scrapegraph-ai/commit/b21e781ce340c7fa2c5a99a28b7c23e06e950f1e))
+* Update coverage improvement test for tests/graphs/abstract_graph_test.py ([f296ac4](https://github.com/ScrapeGraphAI/Scrapegraph-ai/commit/f296ac4d5088a74d4f50e7262631f202a68b152c))
+
+
+### CI
+
+* **release:** 1.41.0-beta.1 [skip ci] ([7bfe494](https://github.com/ScrapeGraphAI/Scrapegraph-ai/commit/7bfe494237279d73cefe4161a0b8e95491329ccb))
+
+## [1.41.0-beta.1](https://github.com/ScrapeGraphAI/Scrapegraph-ai/compare/v1.40.1...v1.41.0-beta.1) (2025-03-07)
+
+
+### Features
+
+* add CLoD integration ([4e0e785](https://github.com/ScrapeGraphAI/Scrapegraph-ai/commit/4e0e78582c3a75e64c5eba26ce40b5ffbf05d58e))
+
+
+### Test
+
+* Add coverage improvement test for tests/test_generate_answer_node.py ([6769c0d](https://github.com/ScrapeGraphAI/Scrapegraph-ai/commit/6769c0d43ab72f1c8b520dd28d19f747b22f9b7c))
+
 ## [1.40.1](https://github.com/ScrapeGraphAI/Scrapegraph-ai/compare/v1.40.0...v1.40.1) (2025-02-27)
 
 
diff --git a/pyproject.toml b/pyproject.toml
@@ -1,7 +1,7 @@
 [project]
 name = "scrapegraphai"
 
-version = "1.40.1"
+version = "1.41.0"
 
 
 
diff --git a/scrapegraphai/graphs/abstract_graph.py b/scrapegraphai/graphs/abstract_graph.py
@@ -13,7 +13,7 @@
 from pydantic import BaseModel
 
 from ..helpers import models_tokens
-from ..models import DeepSeek, OneApi
+from ..models import CLoD, DeepSeek, OneApi
 from ..utils.logging import set_verbosity_info, set_verbosity_warning
 
 
@@ -164,6 +164,7 @@ def _create_llm(self, llm_config: dict) -> object:
             "deepseek",
             "ernie",
             "fireworks",
+            "clod",
             "togetherai",
         }
 
@@ -218,6 +219,7 @@ def _create_llm(self, llm_config: dict) -> object:
                 "ernie",
                 "deepseek",
                 "togetherai",
+                "clod",
             }:
                 if llm_params["model_provider"] == "bedrock":
                     llm_params["model_kwargs"] = {
@@ -229,6 +231,9 @@ def _create_llm(self, llm_config: dict) -> object:
             else:
                 model_provider = llm_params.pop("model_provider")
 
+                if model_provider == "clod":
+                    return CLoD(**llm_params)
+
                 if model_provider == "deepseek":
                     return DeepSeek(**llm_params)
 
diff --git a/scrapegraphai/helpers/models_tokens.py b/scrapegraphai/helpers/models_tokens.py
@@ -261,5 +261,37 @@
         "mixtral-moe-8x22B-instruct": 65536,
         "mixtral-moe-8x7B-instruct": 65536,
     },
+    "clod": {
+        "open-mistral-7b": 32000,
+        "Llama-3.1-70b": 128000,
+        "Llama-3.1-405b": 128000,
+        "Llama-3.3-70b": 128000,
+        "Llama-3.1-8b": 128000,
+        "gpt-4o": 128000,
+        "gpt-4o-mini": 128000,
+        "gpt-4-turbo": 128000,
+        "claude-3-opus-latest": 200000,
+        "gemini-1.5-flash-8b": 128000,
+        "gemini-1.5-flash": 128000,
+        "open-mixtral-8x7b": 32000,
+        "open-mixtral-8x22b": 64000,
+        "claude-3-5-sonnet-latest": 200000,
+        "claude-3-haiku-20240307": 200000,
+        "Qwen-2.5-Coder-32B": 32000,
+        "Deepseek-R1-Distill-Llama-70B": 131072,
+        "Deepseek-V3": 128000,
+        "Qwen-2-VL-72B": 128000,
+        "Deepseek-R1-Distill-Qwen-14B": 131072,
+        "Deepseek-R1-Distill-Qwen-1.5B": 131072,
+        "Deepseek-R1": 128000,
+        "Deepseek-Llm-Chat-67B": 4096,
+        "Qwen-2.5-7B": 132072,
+        "Qwen-2.5-72B": 132072,
+        "Qwen-2-72B": 128000,
+        "o1": 200000,
+        "gemini-2.0-flash-exp": 1000000,
+        "grok-beta": 128000,
+        "grok-2-latest": 128000,
+    },
     "togetherai": {"Meta-Llama-3.1-70B-Instruct-Turbo": 128000},
 }
diff --git a/scrapegraphai/models/__init__.py b/scrapegraphai/models/__init__.py
@@ -2,14 +2,10 @@
 This module contains the model definitions used in the ScrapeGraphAI application.
 """
 
+from .clod import CLoD
 from .deepseek import DeepSeek
 from .oneapi import OneApi
 from .openai_itt import OpenAIImageToText
 from .openai_tts import OpenAITextToSpeech
 
-__all__ = [
-    "DeepSeek",
-    "OneApi",
-    "OpenAIImageToText",
-    "OpenAITextToSpeech",
-]
+__all__ = ["DeepSeek", "OneApi", "OpenAIImageToText", "OpenAITextToSpeech", "CLoD"]
diff --git a/scrapegraphai/models/clod.py b/scrapegraphai/models/clod.py
@@ -0,0 +1,23 @@
+"""
+CLōD Module
+"""
+
+from langchain_openai import ChatOpenAI
+
+
+class CLoD(ChatOpenAI):
+    """
+    A wrapper for the ChatOpenAI class (CLōD uses an OpenAI-like API) that
+    provides default configuration and could be extended with additional methods
+    if needed.
+
+    Args:
+        llm_config (dict): Configuration parameters for the language model.
+    """
+
+    def __init__(self, **llm_config):
+        if "api_key" in llm_config:
+            llm_config["openai_api_key"] = llm_config.pop("api_key")
+        llm_config["openai_api_base"] = "https://api.clod.io/v1"
+
+        super().__init__(**llm_config)
diff --git a/tests/graphs/.env.example b/tests/graphs/.env.example
@@ -1,2 +1,3 @@
 OPENAI_API_KEY="YOUR OPENAI API KEY"
 FIREWORKS_APIKEY="YOOUR FIREWORK KEY"
+CLOD_API_KEY="YOUR CLOD API KEY"
diff --git a/tests/graphs/abstract_graph_test.py b/tests/graphs/abstract_graph_test.py
@@ -199,4 +199,37 @@ def test_set_common_params(self):
         test_params = {"param1": "value1", "param2": "value2"}
         graph.set_common_params(test_params)
 
-        # Assert that update_config was called on each node with the correct parameters
+        # Assert that update_config was called on each node with the correct parameters
+    
+    def test_get_state(self):
+        """Test that get_state returns the correct final state with or without a provided key, and raises KeyError for missing keys."""
+        graph = TestGraph("dummy", {"llm": {"model": "openai/gpt-3.5-turbo", "openai_api_key": "sk-test"}})
+        # Set a dummy final state
+        graph.final_state = {"answer": "42", "other": "value"}
+        # Test without a key returns the entire final_state
+        state = graph.get_state()
+        assert state == {"answer": "42", "other": "value"}
+        # Test with a valid key returns the specific value
+        answer = graph.get_state("answer")
+        assert answer == "42"
+        # Test that a missing key raises a KeyError
+        with pytest.raises(KeyError):
+            _ = graph.get_state("nonexistent")
+
+    def test_append_node(self):
+        """Test that append_node correctly delegates to the graph's append_node method."""
+        graph = TestGraph("dummy", {"llm": {"model": "openai/gpt-3.5-turbo", "openai_api_key": "sk-test"}})
+        # Replace the graph object with a mock that has append_node
+        mock_graph = Mock()
+        graph.graph = mock_graph
+        dummy_node = Mock()
+        graph.append_node(dummy_node)
+        mock_graph.append_node.assert_called_once_with(dummy_node)
+
+    def test_get_execution_info(self):
+        """Test that get_execution_info returns the execution info stored in the graph."""
+        graph = TestGraph("dummy", {"llm": {"model": "openai/gpt-3.5-turbo", "openai_api_key": "sk-test"}})
+        dummy_info = {"execution": "info", "status": "ok"}
+        graph.execution_info = dummy_info
+        info = graph.get_execution_info()
+        assert info == dummy_info
diff --git a/tests/graphs/smart_scraper_clod_test.py b/tests/graphs/smart_scraper_clod_test.py
@@ -0,0 +1,55 @@
+"""
+Module for testing the smart scraper class
+"""
+
+import os
+
+import pytest
+from dotenv import load_dotenv
+
+from scrapegraphai.graphs import SmartScraperGraph
+
+load_dotenv()
+
+
+@pytest.fixture
+def graph_config():
+    """Configuration of the graph"""
+    clod_api_key = os.getenv("CLOD_API_KEY")
+    return {
+        "llm": {
+            "api_key": clod_api_key,
+            "model": "clod/claude-3-5-sonnet-latest",
+        },
+        "verbose": True,
+        "headless": False,
+    }
+
+
+def test_scraping_pipeline(graph_config):
+    """Start of the scraping pipeline"""
+    smart_scraper_graph = SmartScraperGraph(
+        prompt="List me all the projects with their description.",
+        source="https://perinim.github.io/projects/",
+        config=graph_config,
+    )
+
+    result = smart_scraper_graph.run()
+
+    assert result is not None
+    assert isinstance(result, dict)
+
+
+def test_get_execution_info(graph_config):
+    """Get the execution info"""
+    smart_scraper_graph = SmartScraperGraph(
+        prompt="List me all the projects with their description.",
+        source="https://perinim.github.io/projects/",
+        config=graph_config,
+    )
+
+    smart_scraper_graph.run()
+
+    graph_exec_info = smart_scraper_graph.get_execution_info()
+
+    assert graph_exec_info is not None
diff --git a/tests/test_models_tokens.py b/tests/test_models_tokens.py
diff --git a/uv.lock b/uv.lock

Original file line number	Diff line number	Diff line change
`@@ -1,2 +1,3 @@`
`1`	`1`	`OPENAI_API_KEY="YOUR OPENAI API KEY"`
`2`	`2`	`FIREWORKS_APIKEY="YOOUR FIREWORK KEY"`
	`3`	`+CLOD_API_KEY="YOUR CLOD API KEY"`