Skip to content

Commit 4e0e785

Browse files
committed
feat: add CLoD integration
1 parent 8267e9d commit 4e0e785

File tree

7 files changed

+175
-132
lines changed

7 files changed

+175
-132
lines changed

scrapegraphai/graphs/abstract_graph.py

+6-1
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@
1313
from pydantic import BaseModel
1414

1515
from ..helpers import models_tokens
16-
from ..models import DeepSeek, OneApi
16+
from ..models import CLoD, DeepSeek, OneApi
1717
from ..utils.logging import set_verbosity_info, set_verbosity_warning
1818

1919

@@ -164,6 +164,7 @@ def _create_llm(self, llm_config: dict) -> object:
164164
"deepseek",
165165
"ernie",
166166
"fireworks",
167+
"clod",
167168
"togetherai",
168169
}
169170

@@ -218,6 +219,7 @@ def _create_llm(self, llm_config: dict) -> object:
218219
"ernie",
219220
"deepseek",
220221
"togetherai",
222+
"clod",
221223
}:
222224
if llm_params["model_provider"] == "bedrock":
223225
llm_params["model_kwargs"] = {
@@ -229,6 +231,9 @@ def _create_llm(self, llm_config: dict) -> object:
229231
else:
230232
model_provider = llm_params.pop("model_provider")
231233

234+
if model_provider == "clod":
235+
return CLoD(**llm_params)
236+
232237
if model_provider == "deepseek":
233238
return DeepSeek(**llm_params)
234239

scrapegraphai/helpers/models_tokens.py

+32
Original file line numberDiff line numberDiff line change
@@ -261,5 +261,37 @@
261261
"mixtral-moe-8x22B-instruct": 65536,
262262
"mixtral-moe-8x7B-instruct": 65536,
263263
},
264+
"clod": {
265+
"open-mistral-7b": 32000,
266+
"Llama-3.1-70b": 128000,
267+
"Llama-3.1-405b": 128000,
268+
"Llama-3.3-70b": 128000,
269+
"Llama-3.1-8b": 128000,
270+
"gpt-4o": 128000,
271+
"gpt-4o-mini": 128000,
272+
"gpt-4-turbo": 128000,
273+
"claude-3-opus-latest": 200000,
274+
"gemini-1.5-flash-8b": 128000,
275+
"gemini-1.5-flash": 128000,
276+
"open-mixtral-8x7b": 32000,
277+
"open-mixtral-8x22b": 64000,
278+
"claude-3-5-sonnet-latest": 200000,
279+
"claude-3-haiku-20240307": 200000,
280+
"Qwen-2.5-Coder-32B": 32000,
281+
"Deepseek-R1-Distill-Llama-70B": 131072,
282+
"Deepseek-V3": 128000,
283+
"Qwen-2-VL-72B": 128000,
284+
"Deepseek-R1-Distill-Qwen-14B": 131072,
285+
"Deepseek-R1-Distill-Qwen-1.5B": 131072,
286+
"Deepseek-R1": 128000,
287+
"Deepseek-Llm-Chat-67B": 4096,
288+
"Qwen-2.5-7B": 132072,
289+
"Qwen-2.5-72B": 132072,
290+
"Qwen-2-72B": 128000,
291+
"o1": 200000,
292+
"gemini-2.0-flash-exp": 1000000,
293+
"grok-beta": 128000,
294+
"grok-2-latest": 128000,
295+
},
264296
"togetherai": {"Meta-Llama-3.1-70B-Instruct-Turbo": 128000},
265297
}

scrapegraphai/models/__init__.py

+2-6
Original file line numberDiff line numberDiff line change
@@ -2,14 +2,10 @@
22
This module contains the model definitions used in the ScrapeGraphAI application.
33
"""
44

5+
from .clod import CLoD
56
from .deepseek import DeepSeek
67
from .oneapi import OneApi
78
from .openai_itt import OpenAIImageToText
89
from .openai_tts import OpenAITextToSpeech
910

10-
__all__ = [
11-
"DeepSeek",
12-
"OneApi",
13-
"OpenAIImageToText",
14-
"OpenAITextToSpeech",
15-
]
11+
__all__ = ["DeepSeek", "OneApi", "OpenAIImageToText", "OpenAITextToSpeech", "CLoD"]

scrapegraphai/models/clod.py

+23
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
"""
2+
CLōD Module
3+
"""
4+
5+
from langchain_openai import ChatOpenAI
6+
7+
8+
class CLoD(ChatOpenAI):
9+
"""
10+
A wrapper for the ChatOpenAI class (CLōD uses an OpenAI-like API) that
11+
provides default configuration and could be extended with additional methods
12+
if needed.
13+
14+
Args:
15+
llm_config (dict): Configuration parameters for the language model.
16+
"""
17+
18+
def __init__(self, **llm_config):
19+
if "api_key" in llm_config:
20+
llm_config["openai_api_key"] = llm_config.pop("api_key")
21+
llm_config["openai_api_base"] = "https://api.clod.io/v1"
22+
23+
super().__init__(**llm_config)

tests/graphs/.env.example

+1
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,3 @@
11
OPENAI_API_KEY="YOUR OPENAI API KEY"
22
FIREWORKS_APIKEY="YOOUR FIREWORK KEY"
3+
CLOD_API_KEY="YOUR CLOD API KEY"
+55
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,55 @@
1+
"""
2+
Module for testing the smart scraper class
3+
"""
4+
5+
import os
6+
7+
import pytest
8+
from dotenv import load_dotenv
9+
10+
from scrapegraphai.graphs import SmartScraperGraph
11+
12+
load_dotenv()
13+
14+
15+
@pytest.fixture
16+
def graph_config():
17+
"""Configuration of the graph"""
18+
clod_api_key = os.getenv("CLOD_API_KEY")
19+
return {
20+
"llm": {
21+
"api_key": clod_api_key,
22+
"model": "clod/claude-3-5-sonnet-latest",
23+
},
24+
"verbose": True,
25+
"headless": False,
26+
}
27+
28+
29+
def test_scraping_pipeline(graph_config):
30+
"""Start of the scraping pipeline"""
31+
smart_scraper_graph = SmartScraperGraph(
32+
prompt="List me all the projects with their description.",
33+
source="https://perinim.github.io/projects/",
34+
config=graph_config,
35+
)
36+
37+
result = smart_scraper_graph.run()
38+
39+
assert result is not None
40+
assert isinstance(result, dict)
41+
42+
43+
def test_get_execution_info(graph_config):
44+
"""Get the execution info"""
45+
smart_scraper_graph = SmartScraperGraph(
46+
prompt="List me all the projects with their description.",
47+
source="https://perinim.github.io/projects/",
48+
config=graph_config,
49+
)
50+
51+
smart_scraper_graph.run()
52+
53+
graph_exec_info = smart_scraper_graph.get_execution_info()
54+
55+
assert graph_exec_info is not None

0 commit comments

Comments
 (0)