3
3
import argparse
4
4
import base64
5
5
import hashlib
6
+ import os
6
7
import re
7
8
import sys
8
9
import requests
@@ -91,10 +92,9 @@ def is_allowlisted(self, netloc):
91
92
92
93
def get_html (self ):
93
94
if self .browser :
94
- from selenium import webdriver
95
- from selenium .webdriver .chrome .options import Options
95
+ from seleniumwire import webdriver
96
96
97
- chrome_options = Options ()
97
+ chrome_options = webdriver . ChromeOptions ()
98
98
chrome_options .add_argument ("--headless" )
99
99
chrome_options .add_argument ("--no-sandbox" )
100
100
chrome_options .add_argument ("--disable-dev-shm-usage" )
@@ -104,20 +104,31 @@ def get_html(self):
104
104
}
105
105
}
106
106
107
- browser = webdriver .Chrome (options = chrome_options )
107
+ browser = webdriver .Chrome (
108
+ options = chrome_options ,
109
+ seleniumwire_options = {
110
+ 'proxy' : {
111
+ 'http' : os .environ .get ("http_proxy" ),
112
+ 'https' : os .environ .get ("https_proxy" ),
113
+ }
114
+ }
115
+ )
108
116
109
117
def interceptor (request ):
110
- request .headers .update (self .headers )
118
+ for key , value in self .headers .items ():
119
+ del request .headers [key ]
120
+ request .headers [key ] = value
111
121
112
122
browser .request_interceptor = interceptor
113
123
browser .get (self .url )
114
- return browser .execute_script ("return document.documentElement.outerHTML;" )
124
+ content = browser .execute_script ("return document.documentElement.outerHTML;" )
125
+
126
+ browser .quit ()
127
+ return content
115
128
else :
116
129
# file deepcode ignore Ssrf: The purpose of the script is to parse remote URLs from the CLI
117
-
118
130
return requests .get (self .url , headers = self .headers ).content
119
131
120
-
121
132
def get_remote_resource_tags (self , html ):
122
133
soup = BeautifulSoup (html , 'lxml' )
123
134
0 commit comments