1
+ import urllib .parse
1
2
import pytest
2
-
3
+ from unittest . mock import patch , Mock
3
4
from scrapegraphai .docloaders .scrape_do import scrape_do_fetch
4
- from unittest .mock import Mock , patch
5
-
6
- class TestScrapeDoFetch :
7
- @patch ('scrapegraphai.docloaders.scrape_do.requests.get' )
8
- @patch ('scrapegraphai.docloaders.scrape_do.os.getenv' )
9
- def test_scrape_do_fetch_with_proxy_geocode_and_super_proxy (self , mock_getenv , mock_get ):
10
- """
11
- Test scrape_do_fetch function with proxy mode, geoCode, and super_proxy enabled.
12
- This test verifies that the function correctly handles proxy settings,
13
- geoCode parameter, and super_proxy flag when making a request.
14
- """
15
- # Mock environment variable
16
- mock_getenv .return_value = "proxy.scrape.do:8080"
17
-
18
- # Mock the response
19
- mock_response = Mock ()
20
- mock_response .text = "Mocked response content"
21
- mock_get .return_value = mock_response
22
5
23
- # Test parameters
24
- token = "test_token"
25
- target_url = "https://example.com"
26
- use_proxy = True
27
- geoCode = "US"
28
- super_proxy = True
29
6
30
- # Call the function
31
- result = scrape_do_fetch (token , target_url , use_proxy , geoCode , super_proxy )
7
+ def test_scrape_do_fetch_without_proxy ():
8
+ """
9
+ Test scrape_do_fetch function using API mode (without proxy).
32
10
33
- # Assertions
34
- assert result == "Mocked response content"
35
- mock_get .assert_called_once ()
36
- call_args = mock_get .call_args
11
+ This test verifies that:
12
+ 1. The function correctly uses the API mode when use_proxy is False.
13
+ 2. The correct URL is constructed with the token and encoded target URL.
14
+ 3. The function returns the expected response text.
15
+ """
16
+ token = "test_token"
17
+ target_url = "https://example.com"
18
+ encoded_url = urllib .parse .quote (target_url )
19
+ expected_response = "Mocked API response"
20
+
21
+ with patch ("requests.get" ) as mock_get :
22
+ mock_response = Mock ()
23
+ mock_response .text = expected_response
24
+ mock_get .return_value = mock_response
37
25
38
- # Check if the URL is correct
39
- assert call_args [0 ][0 ] == target_url
26
+ result = scrape_do_fetch (token , target_url , use_proxy = False )
40
27
41
- # Check if proxies are set correctly
42
- assert call_args [1 ]['proxies' ] == {
43
- "http" : f"http://{ token } :@proxy.scrape.do:8080" ,
44
- "https" : f"http://{ token } :@proxy.scrape.do:8080" ,
45
- }
28
+ expected_url = f"http://api.scrape.do?token={ token } &url={ encoded_url } "
29
+ mock_get .assert_called_once_with (expected_url )
46
30
47
- # Check if verify is False
48
- assert call_args [1 ]['verify' ] is False
31
+ assert result == expected_response
49
32
50
- # Check if params are set correctly
51
- assert call_args [1 ]['params' ] == {"geoCode" : "US" , "super" : "true" }
0 commit comments