Skip to content

Commit 18dbd05

Browse files
committed
2 parents ff7b33b + f67b30b commit 18dbd05

11 files changed

+389
-134
lines changed

CHANGELOG.md

+31
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,34 @@
1+
## [1.41.0](https://github.com/ScrapeGraphAI/Scrapegraph-ai/compare/v1.40.1...v1.41.0) (2025-03-09)
2+
3+
4+
### Features
5+
6+
* add CLoD integration ([4e0e785](https://github.com/ScrapeGraphAI/Scrapegraph-ai/commit/4e0e78582c3a75e64c5eba26ce40b5ffbf05d58e))
7+
8+
9+
### Test
10+
11+
* Add coverage improvement test for tests/test_generate_answer_node.py ([6769c0d](https://github.com/ScrapeGraphAI/Scrapegraph-ai/commit/6769c0d43ab72f1c8b520dd28d19f747b22f9b7c))
12+
* Add coverage improvement test for tests/test_models_tokens.py ([b21e781](https://github.com/ScrapeGraphAI/Scrapegraph-ai/commit/b21e781ce340c7fa2c5a99a28b7c23e06e950f1e))
13+
* Update coverage improvement test for tests/graphs/abstract_graph_test.py ([f296ac4](https://github.com/ScrapeGraphAI/Scrapegraph-ai/commit/f296ac4d5088a74d4f50e7262631f202a68b152c))
14+
15+
16+
### CI
17+
18+
* **release:** 1.41.0-beta.1 [skip ci] ([7bfe494](https://github.com/ScrapeGraphAI/Scrapegraph-ai/commit/7bfe494237279d73cefe4161a0b8e95491329ccb))
19+
20+
## [1.41.0-beta.1](https://github.com/ScrapeGraphAI/Scrapegraph-ai/compare/v1.40.1...v1.41.0-beta.1) (2025-03-07)
21+
22+
23+
### Features
24+
25+
* add CLoD integration ([4e0e785](https://github.com/ScrapeGraphAI/Scrapegraph-ai/commit/4e0e78582c3a75e64c5eba26ce40b5ffbf05d58e))
26+
27+
28+
### Test
29+
30+
* Add coverage improvement test for tests/test_generate_answer_node.py ([6769c0d](https://github.com/ScrapeGraphAI/Scrapegraph-ai/commit/6769c0d43ab72f1c8b520dd28d19f747b22f9b7c))
31+
132
## [1.40.1](https://github.com/ScrapeGraphAI/Scrapegraph-ai/compare/v1.40.0...v1.40.1) (2025-02-27)
233

334

pyproject.toml

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
[project]
22
name = "scrapegraphai"
33

4-
version = "1.40.1"
4+
version = "1.41.0"
55

66

77

scrapegraphai/graphs/abstract_graph.py

+6-1
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@
1313
from pydantic import BaseModel
1414

1515
from ..helpers import models_tokens
16-
from ..models import DeepSeek, OneApi
16+
from ..models import CLoD, DeepSeek, OneApi
1717
from ..utils.logging import set_verbosity_info, set_verbosity_warning
1818

1919

@@ -164,6 +164,7 @@ def _create_llm(self, llm_config: dict) -> object:
164164
"deepseek",
165165
"ernie",
166166
"fireworks",
167+
"clod",
167168
"togetherai",
168169
}
169170

@@ -218,6 +219,7 @@ def _create_llm(self, llm_config: dict) -> object:
218219
"ernie",
219220
"deepseek",
220221
"togetherai",
222+
"clod",
221223
}:
222224
if llm_params["model_provider"] == "bedrock":
223225
llm_params["model_kwargs"] = {
@@ -229,6 +231,9 @@ def _create_llm(self, llm_config: dict) -> object:
229231
else:
230232
model_provider = llm_params.pop("model_provider")
231233

234+
if model_provider == "clod":
235+
return CLoD(**llm_params)
236+
232237
if model_provider == "deepseek":
233238
return DeepSeek(**llm_params)
234239

scrapegraphai/helpers/models_tokens.py

+32
Original file line numberDiff line numberDiff line change
@@ -261,5 +261,37 @@
261261
"mixtral-moe-8x22B-instruct": 65536,
262262
"mixtral-moe-8x7B-instruct": 65536,
263263
},
264+
"clod": {
265+
"open-mistral-7b": 32000,
266+
"Llama-3.1-70b": 128000,
267+
"Llama-3.1-405b": 128000,
268+
"Llama-3.3-70b": 128000,
269+
"Llama-3.1-8b": 128000,
270+
"gpt-4o": 128000,
271+
"gpt-4o-mini": 128000,
272+
"gpt-4-turbo": 128000,
273+
"claude-3-opus-latest": 200000,
274+
"gemini-1.5-flash-8b": 128000,
275+
"gemini-1.5-flash": 128000,
276+
"open-mixtral-8x7b": 32000,
277+
"open-mixtral-8x22b": 64000,
278+
"claude-3-5-sonnet-latest": 200000,
279+
"claude-3-haiku-20240307": 200000,
280+
"Qwen-2.5-Coder-32B": 32000,
281+
"Deepseek-R1-Distill-Llama-70B": 131072,
282+
"Deepseek-V3": 128000,
283+
"Qwen-2-VL-72B": 128000,
284+
"Deepseek-R1-Distill-Qwen-14B": 131072,
285+
"Deepseek-R1-Distill-Qwen-1.5B": 131072,
286+
"Deepseek-R1": 128000,
287+
"Deepseek-Llm-Chat-67B": 4096,
288+
"Qwen-2.5-7B": 132072,
289+
"Qwen-2.5-72B": 132072,
290+
"Qwen-2-72B": 128000,
291+
"o1": 200000,
292+
"gemini-2.0-flash-exp": 1000000,
293+
"grok-beta": 128000,
294+
"grok-2-latest": 128000,
295+
},
264296
"togetherai": {"Meta-Llama-3.1-70B-Instruct-Turbo": 128000},
265297
}

scrapegraphai/models/__init__.py

+2-6
Original file line numberDiff line numberDiff line change
@@ -2,14 +2,10 @@
22
This module contains the model definitions used in the ScrapeGraphAI application.
33
"""
44

5+
from .clod import CLoD
56
from .deepseek import DeepSeek
67
from .oneapi import OneApi
78
from .openai_itt import OpenAIImageToText
89
from .openai_tts import OpenAITextToSpeech
910

10-
__all__ = [
11-
"DeepSeek",
12-
"OneApi",
13-
"OpenAIImageToText",
14-
"OpenAITextToSpeech",
15-
]
11+
__all__ = ["DeepSeek", "OneApi", "OpenAIImageToText", "OpenAITextToSpeech", "CLoD"]

scrapegraphai/models/clod.py

+23
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
"""
2+
CLōD Module
3+
"""
4+
5+
from langchain_openai import ChatOpenAI
6+
7+
8+
class CLoD(ChatOpenAI):
9+
"""
10+
A wrapper for the ChatOpenAI class (CLōD uses an OpenAI-like API) that
11+
provides default configuration and could be extended with additional methods
12+
if needed.
13+
14+
Args:
15+
llm_config (dict): Configuration parameters for the language model.
16+
"""
17+
18+
def __init__(self, **llm_config):
19+
if "api_key" in llm_config:
20+
llm_config["openai_api_key"] = llm_config.pop("api_key")
21+
llm_config["openai_api_base"] = "https://api.clod.io/v1"
22+
23+
super().__init__(**llm_config)

tests/graphs/.env.example

+1
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,3 @@
11
OPENAI_API_KEY="YOUR OPENAI API KEY"
22
FIREWORKS_APIKEY="YOOUR FIREWORK KEY"
3+
CLOD_API_KEY="YOUR CLOD API KEY"

tests/graphs/abstract_graph_test.py

+34-1
Original file line numberDiff line numberDiff line change
@@ -199,4 +199,37 @@ def test_set_common_params(self):
199199
test_params = {"param1": "value1", "param2": "value2"}
200200
graph.set_common_params(test_params)
201201

202-
# Assert that update_config was called on each node with the correct parameters
202+
# Assert that update_config was called on each node with the correct parameters
203+
204+
def test_get_state(self):
205+
"""Test that get_state returns the correct final state with or without a provided key, and raises KeyError for missing keys."""
206+
graph = TestGraph("dummy", {"llm": {"model": "openai/gpt-3.5-turbo", "openai_api_key": "sk-test"}})
207+
# Set a dummy final state
208+
graph.final_state = {"answer": "42", "other": "value"}
209+
# Test without a key returns the entire final_state
210+
state = graph.get_state()
211+
assert state == {"answer": "42", "other": "value"}
212+
# Test with a valid key returns the specific value
213+
answer = graph.get_state("answer")
214+
assert answer == "42"
215+
# Test that a missing key raises a KeyError
216+
with pytest.raises(KeyError):
217+
_ = graph.get_state("nonexistent")
218+
219+
def test_append_node(self):
220+
"""Test that append_node correctly delegates to the graph's append_node method."""
221+
graph = TestGraph("dummy", {"llm": {"model": "openai/gpt-3.5-turbo", "openai_api_key": "sk-test"}})
222+
# Replace the graph object with a mock that has append_node
223+
mock_graph = Mock()
224+
graph.graph = mock_graph
225+
dummy_node = Mock()
226+
graph.append_node(dummy_node)
227+
mock_graph.append_node.assert_called_once_with(dummy_node)
228+
229+
def test_get_execution_info(self):
230+
"""Test that get_execution_info returns the execution info stored in the graph."""
231+
graph = TestGraph("dummy", {"llm": {"model": "openai/gpt-3.5-turbo", "openai_api_key": "sk-test"}})
232+
dummy_info = {"execution": "info", "status": "ok"}
233+
graph.execution_info = dummy_info
234+
info = graph.get_execution_info()
235+
assert info == dummy_info
+55
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,55 @@
1+
"""
2+
Module for testing the smart scraper class
3+
"""
4+
5+
import os
6+
7+
import pytest
8+
from dotenv import load_dotenv
9+
10+
from scrapegraphai.graphs import SmartScraperGraph
11+
12+
load_dotenv()
13+
14+
15+
@pytest.fixture
16+
def graph_config():
17+
"""Configuration of the graph"""
18+
clod_api_key = os.getenv("CLOD_API_KEY")
19+
return {
20+
"llm": {
21+
"api_key": clod_api_key,
22+
"model": "clod/claude-3-5-sonnet-latest",
23+
},
24+
"verbose": True,
25+
"headless": False,
26+
}
27+
28+
29+
def test_scraping_pipeline(graph_config):
30+
"""Start of the scraping pipeline"""
31+
smart_scraper_graph = SmartScraperGraph(
32+
prompt="List me all the projects with their description.",
33+
source="https://perinim.github.io/projects/",
34+
config=graph_config,
35+
)
36+
37+
result = smart_scraper_graph.run()
38+
39+
assert result is not None
40+
assert isinstance(result, dict)
41+
42+
43+
def test_get_execution_info(graph_config):
44+
"""Get the execution info"""
45+
smart_scraper_graph = SmartScraperGraph(
46+
prompt="List me all the projects with their description.",
47+
source="https://perinim.github.io/projects/",
48+
config=graph_config,
49+
)
50+
51+
smart_scraper_graph.run()
52+
53+
graph_exec_info = smart_scraper_graph.get_execution_info()
54+
55+
assert graph_exec_info is not None

0 commit comments

Comments
 (0)