Skip to content

Commit b21e781

Browse files
test: Add coverage improvement test for tests/test_models_tokens.py
1 parent f296ac4 commit b21e781

File tree

1 file changed

+148
-0
lines changed

1 file changed

+148
-0
lines changed

tests/test_models_tokens.py

+148
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,148 @@
1+
import pytest
2+
from scrapegraphai.helpers.models_tokens import models_tokens
3+
4+
class TestModelsTokens:
5+
"""Test suite for verifying the models_tokens dictionary content and structure."""
6+
7+
def test_openai_tokens(self):
8+
"""Test that the 'openai' provider exists and its tokens are valid positive integers."""
9+
openai_models = models_tokens.get("openai")
10+
assert openai_models is not None, "'openai' key should be present in models_tokens"
11+
for model, token in openai_models.items():
12+
assert isinstance(model, str), "Model name should be a string"
13+
assert isinstance(token, int), "Token limit should be an integer"
14+
assert token > 0, "Token limit should be positive"
15+
16+
def test_azure_openai_tokens(self):
17+
"""Test that the 'azure_openai' provider exists and its tokens are valid."""
18+
azure_models = models_tokens.get("azure_openai")
19+
assert azure_models is not None, "'azure_openai' key should be present"
20+
for model, token in azure_models.items():
21+
assert isinstance(model, str), "Model name should be a string"
22+
assert isinstance(token, int), "Token limit should be an integer"
23+
24+
def test_google_providers(self):
25+
"""Test that Google provider dictionaries ('google_genai' and 'google_vertexai') contain expected entries."""
26+
google_genai = models_tokens.get("google_genai")
27+
google_vertexai = models_tokens.get("google_vertexai")
28+
assert google_genai is not None, "'google_genai' key should be present"
29+
assert google_vertexai is not None, "'google_vertexai' key should be present"
30+
# Check a specific key from google_genai
31+
assert "gemini-pro" in google_genai, "'gemini-pro' should be in google_genai models"
32+
# Validate token values types
33+
for provider in [google_genai, google_vertexai]:
34+
for token in provider.values():
35+
assert isinstance(token, int), "Token limit must be an integer"
36+
37+
def test_non_existent_provider(self):
38+
"""Test that a non-existent provider returns None."""
39+
assert models_tokens.get("non_existent") is None, "Non-existent provider should return None"
40+
41+
def test_total_model_keys(self):
42+
"""Test that the total number of models across all providers is above an expected count."""
43+
total_keys = sum(len(details) for details in models_tokens.values())
44+
assert total_keys > 20, "Expected more than 20 total model tokens defined"
45+
46+
def test_specific_token_value(self):
47+
"""Test specific expected token value for a known model."""
48+
openai = models_tokens.get("openai")
49+
# Verify that the token limit for "gpt-4" is 8192 as defined
50+
assert openai.get("gpt-4") == 8192, "Expected token limit for gpt-4 to be 8192"
51+
52+
def test_non_empty_model_keys(self):
53+
"""Ensure that model token names are non-empty strings."""
54+
for provider, model_dict in models_tokens.items():
55+
for model in model_dict.keys():
56+
assert model != "", f"Model name in provider '{provider}' should not be empty."
57+
58+
def test_token_limits_range(self):
59+
"""Test that token limits for all models fall within a plausible range (e.g., 1 to 300000)."""
60+
for provider, model_dict in models_tokens.items():
61+
for model, token in model_dict.items():
62+
assert 1 <= token <= 1100000, f"Token limit for {model} in provider {provider} is out of plausible range."
63+
def test_provider_structure(self):
64+
"""Test that every provider in models_tokens has a dictionary as its value."""
65+
for provider, models in models_tokens.items():
66+
assert isinstance(models, dict), f"Provider {provider} should map to a dictionary, got {type(models).__name__}"
67+
68+
def test_non_empty_provider(self):
69+
"""Test that each provider dictionary is not empty."""
70+
for provider, models in models_tokens.items():
71+
assert len(models) > 0, f"Provider {provider} should contain at least one model."
72+
73+
def test_specific_model_token_values(self):
74+
"""Test specific expected token values for selected models from various providers."""
75+
# Verify a token for a selected model from the 'openai' provider
76+
openai = models_tokens.get("openai")
77+
assert openai.get("gpt-3.5-turbo-0125") == 16385, "Expected token limit for gpt-3.5-turbo-0125 in openai to be 16385"
78+
79+
# Verify a token for a selected model from the 'azure_openai' provider
80+
azure = models_tokens.get("azure_openai")
81+
assert azure.get("gpt-3.5") == 4096, "Expected token limit for gpt-3.5 in azure_openai to be 4096"
82+
83+
# Verify a token for a selected model from the 'anthropic' provider
84+
anthropic = models_tokens.get("anthropic")
85+
assert anthropic.get("claude_instant") == 100000, "Expected token limit for claude_instant in anthropic to be 100000"
86+
87+
def test_providers_count(self):
88+
"""Test that the total number of providers is as expected (at least 15)."""
89+
assert len(models_tokens) >= 15, "Expected at least 15 providers in models_tokens"
90+
91+
def test_non_existent_model(self):
92+
"""Test that a non-existent model within a valid provider returns None."""
93+
openai = models_tokens.get("openai")
94+
assert openai.get("non_existent_model") is None, "Non-existent model should return None from a valid provider."
95+
def test_no_whitespace_in_model_names(self):
96+
"""Test that model names do not contain leading or trailing whitespace."""
97+
for provider, model_dict in models_tokens.items():
98+
for model in model_dict.keys():
99+
# Assert that stripping whitespace does not change the model name
100+
assert model == model.strip(), f"Model name '{model}' in provider '{provider}' contains leading or trailing whitespace."
101+
102+
def test_specific_models_additional(self):
103+
"""Test specific token values for additional models across various providers."""
104+
# Check some models in the 'ollama' provider
105+
ollama = models_tokens.get("ollama")
106+
assert ollama.get("llama2") == 4096, "Expected token limit for 'llama2' in ollama to be 4096"
107+
assert ollama.get("llama2:70b") == 4096, "Expected token limit for 'llama2:70b' in ollama to be 4096"
108+
109+
# Check a specific model from the 'mistralai' provider
110+
mistralai = models_tokens.get("mistralai")
111+
assert mistralai.get("open-codestral-mamba") == 256000, "Expected token limit for 'open-codestral-mamba' in mistralai to be 256000"
112+
113+
# Check a specific model from the 'deepseek' provider
114+
deepseek = models_tokens.get("deepseek")
115+
assert deepseek.get("deepseek-chat") == 28672, "Expected token limit for 'deepseek-chat' in deepseek to be 28672"
116+
117+
# Check a model from the 'ernie' provider
118+
ernie = models_tokens.get("ernie")
119+
assert ernie.get("ernie-bot") == 4096, "Expected token limit for 'ernie-bot' in ernie to be 4096"
120+
121+
def test_nvidia_specific(self):
122+
"""Test specific token value for 'meta/codellama-70b' in the nvidia provider."""
123+
nvidia = models_tokens.get("nvidia")
124+
assert nvidia is not None, "'nvidia' provider should exist"
125+
# Verify token for 'meta/codellama-70b' equals 16384 as defined in the nvidia dictionary
126+
assert nvidia.get("meta/codellama-70b") == 16384, "Expected token limit for 'meta/codellama-70b' in nvidia to be 16384"
127+
128+
def test_groq_specific(self):
129+
"""Test specific token value for 'claude-3-haiku-20240307\'' in the groq provider."""
130+
groq = models_tokens.get("groq")
131+
assert groq is not None, "'groq' provider should exist"
132+
# Note: The model name has an embedded apostrophe at the end in its name.
133+
assert groq.get("claude-3-haiku-20240307'") == 8192, "Expected token limit for 'claude-3-haiku-20240307\\'' in groq to be 8192"
134+
135+
def test_togetherai_specific(self):
136+
"""Test specific token value for 'meta-llama/Meta-Llama-3.1-70B-Instruct-Turbo' in the toghetherai provider."""
137+
togetherai = models_tokens.get("toghetherai")
138+
assert togetherai is not None, "'toghetherai' provider should exist"
139+
expected = 128000
140+
model_name = "meta-llama/Meta-Llama-3.1-70B-Instruct-Turbo"
141+
assert togetherai.get(model_name) == expected, f"Expected token limit for '{model_name}' in toghetherai to be {expected}"
142+
143+
def test_ernie_all_values(self):
144+
"""Test that all models in the 'ernie' provider have token values exactly 4096."""
145+
ernie = models_tokens.get("ernie")
146+
assert ernie is not None, "'ernie' provider should exist"
147+
for model, token in ernie.items():
148+
assert token == 4096, f"Expected token limit for '{model}' in ernie to be 4096, got {token}"

0 commit comments

Comments
 (0)