1
+ import pytest
2
+ from scrapegraphai .helpers .models_tokens import models_tokens
3
+
4
+ class TestModelsTokens :
5
+ """Test suite for verifying the models_tokens dictionary content and structure."""
6
+
7
+ def test_openai_tokens (self ):
8
+ """Test that the 'openai' provider exists and its tokens are valid positive integers."""
9
+ openai_models = models_tokens .get ("openai" )
10
+ assert openai_models is not None , "'openai' key should be present in models_tokens"
11
+ for model , token in openai_models .items ():
12
+ assert isinstance (model , str ), "Model name should be a string"
13
+ assert isinstance (token , int ), "Token limit should be an integer"
14
+ assert token > 0 , "Token limit should be positive"
15
+
16
+ def test_azure_openai_tokens (self ):
17
+ """Test that the 'azure_openai' provider exists and its tokens are valid."""
18
+ azure_models = models_tokens .get ("azure_openai" )
19
+ assert azure_models is not None , "'azure_openai' key should be present"
20
+ for model , token in azure_models .items ():
21
+ assert isinstance (model , str ), "Model name should be a string"
22
+ assert isinstance (token , int ), "Token limit should be an integer"
23
+
24
+ def test_google_providers (self ):
25
+ """Test that Google provider dictionaries ('google_genai' and 'google_vertexai') contain expected entries."""
26
+ google_genai = models_tokens .get ("google_genai" )
27
+ google_vertexai = models_tokens .get ("google_vertexai" )
28
+ assert google_genai is not None , "'google_genai' key should be present"
29
+ assert google_vertexai is not None , "'google_vertexai' key should be present"
30
+ # Check a specific key from google_genai
31
+ assert "gemini-pro" in google_genai , "'gemini-pro' should be in google_genai models"
32
+ # Validate token values types
33
+ for provider in [google_genai , google_vertexai ]:
34
+ for token in provider .values ():
35
+ assert isinstance (token , int ), "Token limit must be an integer"
36
+
37
+ def test_non_existent_provider (self ):
38
+ """Test that a non-existent provider returns None."""
39
+ assert models_tokens .get ("non_existent" ) is None , "Non-existent provider should return None"
40
+
41
+ def test_total_model_keys (self ):
42
+ """Test that the total number of models across all providers is above an expected count."""
43
+ total_keys = sum (len (details ) for details in models_tokens .values ())
44
+ assert total_keys > 20 , "Expected more than 20 total model tokens defined"
45
+
46
+ def test_specific_token_value (self ):
47
+ """Test specific expected token value for a known model."""
48
+ openai = models_tokens .get ("openai" )
49
+ # Verify that the token limit for "gpt-4" is 8192 as defined
50
+ assert openai .get ("gpt-4" ) == 8192 , "Expected token limit for gpt-4 to be 8192"
51
+
52
+ def test_non_empty_model_keys (self ):
53
+ """Ensure that model token names are non-empty strings."""
54
+ for provider , model_dict in models_tokens .items ():
55
+ for model in model_dict .keys ():
56
+ assert model != "" , f"Model name in provider '{ provider } ' should not be empty."
57
+
58
+ def test_token_limits_range (self ):
59
+ """Test that token limits for all models fall within a plausible range (e.g., 1 to 300000)."""
60
+ for provider , model_dict in models_tokens .items ():
61
+ for model , token in model_dict .items ():
62
+ assert 1 <= token <= 1100000 , f"Token limit for { model } in provider { provider } is out of plausible range."
63
+ def test_provider_structure (self ):
64
+ """Test that every provider in models_tokens has a dictionary as its value."""
65
+ for provider , models in models_tokens .items ():
66
+ assert isinstance (models , dict ), f"Provider { provider } should map to a dictionary, got { type (models ).__name__ } "
67
+
68
+ def test_non_empty_provider (self ):
69
+ """Test that each provider dictionary is not empty."""
70
+ for provider , models in models_tokens .items ():
71
+ assert len (models ) > 0 , f"Provider { provider } should contain at least one model."
72
+
73
+ def test_specific_model_token_values (self ):
74
+ """Test specific expected token values for selected models from various providers."""
75
+ # Verify a token for a selected model from the 'openai' provider
76
+ openai = models_tokens .get ("openai" )
77
+ assert openai .get ("gpt-3.5-turbo-0125" ) == 16385 , "Expected token limit for gpt-3.5-turbo-0125 in openai to be 16385"
78
+
79
+ # Verify a token for a selected model from the 'azure_openai' provider
80
+ azure = models_tokens .get ("azure_openai" )
81
+ assert azure .get ("gpt-3.5" ) == 4096 , "Expected token limit for gpt-3.5 in azure_openai to be 4096"
82
+
83
+ # Verify a token for a selected model from the 'anthropic' provider
84
+ anthropic = models_tokens .get ("anthropic" )
85
+ assert anthropic .get ("claude_instant" ) == 100000 , "Expected token limit for claude_instant in anthropic to be 100000"
86
+
87
+ def test_providers_count (self ):
88
+ """Test that the total number of providers is as expected (at least 15)."""
89
+ assert len (models_tokens ) >= 15 , "Expected at least 15 providers in models_tokens"
90
+
91
+ def test_non_existent_model (self ):
92
+ """Test that a non-existent model within a valid provider returns None."""
93
+ openai = models_tokens .get ("openai" )
94
+ assert openai .get ("non_existent_model" ) is None , "Non-existent model should return None from a valid provider."
95
+ def test_no_whitespace_in_model_names (self ):
96
+ """Test that model names do not contain leading or trailing whitespace."""
97
+ for provider , model_dict in models_tokens .items ():
98
+ for model in model_dict .keys ():
99
+ # Assert that stripping whitespace does not change the model name
100
+ assert model == model .strip (), f"Model name '{ model } ' in provider '{ provider } ' contains leading or trailing whitespace."
101
+
102
+ def test_specific_models_additional (self ):
103
+ """Test specific token values for additional models across various providers."""
104
+ # Check some models in the 'ollama' provider
105
+ ollama = models_tokens .get ("ollama" )
106
+ assert ollama .get ("llama2" ) == 4096 , "Expected token limit for 'llama2' in ollama to be 4096"
107
+ assert ollama .get ("llama2:70b" ) == 4096 , "Expected token limit for 'llama2:70b' in ollama to be 4096"
108
+
109
+ # Check a specific model from the 'mistralai' provider
110
+ mistralai = models_tokens .get ("mistralai" )
111
+ assert mistralai .get ("open-codestral-mamba" ) == 256000 , "Expected token limit for 'open-codestral-mamba' in mistralai to be 256000"
112
+
113
+ # Check a specific model from the 'deepseek' provider
114
+ deepseek = models_tokens .get ("deepseek" )
115
+ assert deepseek .get ("deepseek-chat" ) == 28672 , "Expected token limit for 'deepseek-chat' in deepseek to be 28672"
116
+
117
+ # Check a model from the 'ernie' provider
118
+ ernie = models_tokens .get ("ernie" )
119
+ assert ernie .get ("ernie-bot" ) == 4096 , "Expected token limit for 'ernie-bot' in ernie to be 4096"
120
+
121
+ def test_nvidia_specific (self ):
122
+ """Test specific token value for 'meta/codellama-70b' in the nvidia provider."""
123
+ nvidia = models_tokens .get ("nvidia" )
124
+ assert nvidia is not None , "'nvidia' provider should exist"
125
+ # Verify token for 'meta/codellama-70b' equals 16384 as defined in the nvidia dictionary
126
+ assert nvidia .get ("meta/codellama-70b" ) == 16384 , "Expected token limit for 'meta/codellama-70b' in nvidia to be 16384"
127
+
128
+ def test_groq_specific (self ):
129
+ """Test specific token value for 'claude-3-haiku-20240307\' ' in the groq provider."""
130
+ groq = models_tokens .get ("groq" )
131
+ assert groq is not None , "'groq' provider should exist"
132
+ # Note: The model name has an embedded apostrophe at the end in its name.
133
+ assert groq .get ("claude-3-haiku-20240307'" ) == 8192 , "Expected token limit for 'claude-3-haiku-20240307\\ '' in groq to be 8192"
134
+
135
+ def test_togetherai_specific (self ):
136
+ """Test specific token value for 'meta-llama/Meta-Llama-3.1-70B-Instruct-Turbo' in the toghetherai provider."""
137
+ togetherai = models_tokens .get ("toghetherai" )
138
+ assert togetherai is not None , "'toghetherai' provider should exist"
139
+ expected = 128000
140
+ model_name = "meta-llama/Meta-Llama-3.1-70B-Instruct-Turbo"
141
+ assert togetherai .get (model_name ) == expected , f"Expected token limit for '{ model_name } ' in toghetherai to be { expected } "
142
+
143
+ def test_ernie_all_values (self ):
144
+ """Test that all models in the 'ernie' provider have token values exactly 4096."""
145
+ ernie = models_tokens .get ("ernie" )
146
+ assert ernie is not None , "'ernie' provider should exist"
147
+ for model , token in ernie .items ():
148
+ assert token == 4096 , f"Expected token limit for '{ model } ' in ernie to be 4096, got { token } "
0 commit comments